Files
NeuroSploit/backend/core/validation_judge.py
2026-02-11 10:47:33 -03:00

322 lines
12 KiB
Python

"""
NeuroSploit v3 - Validation Judge
Sole authority for approving or rejecting vulnerability findings.
No finding enters the confirmed list without passing through this judge.
Pipeline:
1. Run negative controls (benign payloads → compare responses)
2. Check proof of execution (per vuln type)
3. Get AI interpretation (BEFORE verdict, not after)
4. Calculate confidence score (0-100)
5. Apply verdict (confirmed/likely/rejected)
"""
import logging
from dataclasses import dataclass, field, asdict
from typing import Callable, Dict, List, Optional, Any
from backend.core.negative_control import NegativeControlEngine, NegativeControlResult
from backend.core.proof_of_execution import ProofOfExecution, ProofResult
from backend.core.confidence_scorer import ConfidenceScorer, ConfidenceResult
from backend.core.vuln_engine.system_prompts import get_prompt_for_vuln_type
from backend.core.access_control_learner import AccessControlLearner
logger = logging.getLogger(__name__)
# ---------------------------------------------------------------------------
# Result types
# ---------------------------------------------------------------------------
@dataclass
class JudgmentResult:
"""Complete judgment result from the ValidationJudge."""
approved: bool # Should this finding be accepted?
verdict: str # "confirmed" | "likely" | "rejected"
confidence_score: int # 0-100
confidence_breakdown: Dict[str, int] = field(default_factory=dict)
proof_of_execution: Optional[ProofResult] = None
negative_controls: Optional[NegativeControlResult] = None
ai_interpretation: Optional[str] = None
evidence_summary: str = "" # Hardened evidence string
rejection_reason: str = "" # Why was it rejected (if applicable)
# ---------------------------------------------------------------------------
# Judge
# ---------------------------------------------------------------------------
class ValidationJudge:
"""Sole authority for approving/rejecting vulnerability findings.
Orchestrates negative controls, proof of execution, AI interpretation,
and confidence scoring into a single JudgmentResult.
Usage:
judge = ValidationJudge(controls, proof, scorer, llm)
judgment = await judge.evaluate(
vuln_type, url, param, payload, test_response, baseline,
signals, evidence, make_request_fn
)
if judgment.approved:
# Create finding with judgment.confidence_score
else:
# Store as rejected finding with judgment.rejection_reason
"""
def __init__(
self,
negative_controls: NegativeControlEngine,
proof_engine: ProofOfExecution,
confidence_scorer: ConfidenceScorer,
llm=None,
access_control_learner: Optional[AccessControlLearner] = None,
):
self.controls = negative_controls
self.proof = proof_engine
self.scorer = confidence_scorer
self.llm = llm
self.acl_learner = access_control_learner
async def evaluate(
self,
vuln_type: str,
url: str,
param: str,
payload: str,
test_response: Dict,
baseline: Optional[Dict],
signals: List[str],
evidence: str,
make_request_fn: Callable,
method: str = "GET",
injection_point: str = "parameter",
) -> JudgmentResult:
"""Full evaluation pipeline.
Args:
vuln_type: Vulnerability type (e.g., "ssrf", "xss_reflected")
url: Target URL
param: Parameter being tested
payload: The attack payload used
test_response: HTTP response dict from the attack
baseline: Optional baseline response for comparison
signals: Signal names from multi_signal_verify (e.g., ["baseline_diff"])
evidence: Raw evidence string from verification
make_request_fn: Async fn(url, method, params) → response dict
method: HTTP method used
injection_point: Where payload was injected
Returns:
JudgmentResult with verdict, score, proof, controls, evidence
"""
# Step 1: Run negative controls
control_result = await self._run_controls(
url, param, method, vuln_type, test_response,
make_request_fn, baseline, injection_point
)
# Step 2: Check proof of execution
proof_result = self.proof.check(
vuln_type, payload, test_response, baseline
)
# Step 3: AI interpretation (BEFORE verdict)
ai_interp = await self._get_ai_interpretation(
vuln_type, payload, test_response
)
# Step 4: Calculate confidence score
confidence = self.scorer.calculate(
signals, proof_result, control_result, ai_interp
)
# Step 4b: Apply access control learning adjustment
if self.acl_learner:
try:
body = test_response.get("body", "") if isinstance(test_response, dict) else ""
status = test_response.get("status", 0) if isinstance(test_response, dict) else 0
hints = self.acl_learner.get_evaluation_hints(vuln_type, body, status)
if hints and hints.get("likely_false_positive") and hints.get("fp_signals", 0) >= 2:
fp_rate = self.acl_learner.get_false_positive_rate(vuln_type)
if fp_rate > 0.7:
# High historical FP rate + matching FP pattern → penalize
penalty = -20
confidence.score = max(0, confidence.score + penalty)
confidence.breakdown["acl_learning_penalty"] = penalty
confidence.detail += f"; ACL learning penalty ({penalty}pts, FP rate: {fp_rate:.0%})"
# Recalculate verdict
if confidence.score >= self.scorer.THRESHOLD_CONFIRMED:
confidence.verdict = "confirmed"
elif confidence.score >= self.scorer.THRESHOLD_LIKELY:
confidence.verdict = "likely"
else:
confidence.verdict = "rejected"
except Exception:
pass
# Step 5: Build judgment
approved = confidence.verdict != "rejected"
# Build evidence summary
evidence_summary = self._build_evidence_summary(
evidence, proof_result, control_result, confidence, ai_interp
)
# Build rejection reason if applicable
rejection_reason = ""
if not approved:
rejection_reason = self._build_rejection_reason(
vuln_type, param, proof_result, control_result,
confidence, ai_interp
)
return JudgmentResult(
approved=approved,
verdict=confidence.verdict,
confidence_score=confidence.score,
confidence_breakdown=confidence.breakdown,
proof_of_execution=proof_result,
negative_controls=control_result,
ai_interpretation=ai_interp,
evidence_summary=evidence_summary,
rejection_reason=rejection_reason,
)
async def _run_controls(
self,
url: str,
param: str,
method: str,
vuln_type: str,
attack_response: Dict,
make_request_fn: Callable,
baseline: Optional[Dict],
injection_point: str,
) -> Optional[NegativeControlResult]:
"""Run negative controls with error handling."""
try:
return await self.controls.run_controls(
url, param, method, vuln_type, attack_response,
make_request_fn, baseline, injection_point
)
except Exception as e:
logger.debug(f"Negative controls failed: {e}")
return None
async def _get_ai_interpretation(
self,
vuln_type: str,
payload: str,
response: Dict,
) -> Optional[str]:
"""Get AI interpretation of the response (BEFORE verdict)."""
if not self.llm or not self.llm.is_available():
return None
try:
body = response.get("body", "")[:1000]
status = response.get("status", 0)
# Inject access control learning hints for relevant vuln types
acl_hint = ""
if self.acl_learner:
hints = self.acl_learner.get_evaluation_hints(vuln_type, body, status)
if hints and hints.get("matching_patterns", 0) > 0:
fp_label = "LIKELY FALSE POSITIVE" if hints["likely_false_positive"] else "POSSIBLY REAL"
acl_hint = (
f"\n\n**Learned Pattern Hints:** {fp_label} "
f"(pattern: {hints['pattern_type']}, "
f"FP signals: {hints['fp_signals']}, TP signals: {hints['tp_signals']})\n"
f"IMPORTANT: For access control vulns (BOLA/BFLA/IDOR), do NOT rely on "
f"HTTP status codes. Compare actual response DATA — check if different "
f"user's private data is returned vs. denial/empty/own-data patterns."
)
prompt = f"""Briefly analyze this HTTP response after testing for {vuln_type.upper()}.
Payload sent: {payload[:200]}
Response status: {status}
Response excerpt:
```
{body}
```
{acl_hint}
Answer in 1-2 sentences: Was the payload processed/executed? Or was it ignored/filtered/blocked? Be specific about what happened."""
system = get_prompt_for_vuln_type(vuln_type, "interpretation")
result = await self.llm.generate(prompt, system)
return result.strip()[:300] if result else None
except Exception:
return None
def _build_evidence_summary(
self,
raw_evidence: str,
proof: Optional[ProofResult],
controls: Optional[NegativeControlResult],
confidence: ConfidenceResult,
ai_interp: Optional[str],
) -> str:
"""Build hardened evidence string with all verification components."""
parts = []
# Raw evidence
if raw_evidence:
parts.append(raw_evidence)
# Proof of execution
if proof:
if proof.proven:
parts.append(f"[PROOF] {proof.proof_type}: {proof.detail}")
else:
parts.append(f"[NO PROOF] {proof.detail}")
# Negative controls
if controls:
parts.append(f"[CONTROLS] {controls.detail}")
# AI interpretation
if ai_interp:
parts.append(f"[AI] {ai_interp}")
# Confidence score
parts.append(f"[CONFIDENCE] {confidence.score}/100 [{confidence.verdict}]")
return " | ".join(parts)
def _build_rejection_reason(
self,
vuln_type: str,
param: str,
proof: Optional[ProofResult],
controls: Optional[NegativeControlResult],
confidence: ConfidenceResult,
ai_interp: Optional[str],
) -> str:
"""Build clear rejection reason explaining why finding was rejected."""
reasons = []
if proof and not proof.proven:
reasons.append("no proof of execution")
if controls and controls.same_behavior:
reasons.append(
f"negative controls show same behavior "
f"({controls.controls_matching}/{controls.controls_run} controls match)"
)
if ai_interp:
ineffective_kws = ["ignored", "not processed", "blocked", "filtered",
"sanitized", "no effect"]
if any(kw in ai_interp.lower() for kw in ineffective_kws):
reasons.append(f"AI confirms payload was ineffective")
reason_str = "; ".join(reasons) if reasons else "confidence too low"
return (f"Rejected {vuln_type} in {param}: {reason_str} "
f"(score: {confidence.score}/100)")