Files
NeuroSploit/backend/core/confidence_scorer.py
2026-02-11 10:47:33 -03:00

180 lines
6.9 KiB
Python

"""
NeuroSploit v3 - Confidence Scoring Engine
Numeric 0-100 confidence scoring for vulnerability findings.
Combines proof of execution, negative control results, and signal analysis
into a single score with transparent breakdown.
Score Thresholds:
>= 90 → "confirmed" (AI Verified, high confidence)
>= 60 → "likely" (needs manual review)
< 60 → "rejected" (auto-reject, false positive)
"""
import logging
from dataclasses import dataclass, field
from typing import Dict, List, Optional
logger = logging.getLogger(__name__)
# ---------------------------------------------------------------------------
# Result types
# ---------------------------------------------------------------------------
@dataclass
class ConfidenceResult:
"""Result of confidence scoring."""
score: int # 0-100
verdict: str # "confirmed" | "likely" | "rejected"
breakdown: Dict[str, int] = field(default_factory=dict) # Component scores
detail: str = "" # Human-readable explanation
# ---------------------------------------------------------------------------
# Scorer
# ---------------------------------------------------------------------------
class ConfidenceScorer:
"""Calculates numeric confidence score 0-100 for vulnerability findings.
Weights:
+0-60 Proof of execution (per vuln type — the most important signal)
+0-30 Proof of impact (severity-aware)
+0-20 Negative controls passed (response differs from benign)
-40 Only baseline diff signal (no actual proof of exploitation)
-60 Same behavior on negative controls (critical false positive indicator)
-40 AI interpretation says payload was ineffective
"""
# Threshold constants
THRESHOLD_CONFIRMED = 90
THRESHOLD_LIKELY = 60
# Weight caps
MAX_PROOF_SCORE = 60
MAX_IMPACT_SCORE = 30
MAX_CONTROLS_BONUS = 20
PENALTY_ONLY_DIFF = -40
PENALTY_SAME_BEHAVIOR = -60
PENALTY_AI_INEFFECTIVE = -40
# Keywords in AI interpretation that indicate payload was ineffective
INEFFECTIVE_KEYWORDS = [
"ignored", "not processed", "blocked", "filtered",
"sanitized", "rejected", "not executed", "was not",
"does not", "did not", "no effect", "no impact",
"benign", "safe", "harmless",
]
def calculate(
self,
signals: List[str],
proof_result, # ProofResult from proof_of_execution
control_result, # NegativeControlResult from negative_control
ai_interpretation: Optional[str] = None,
) -> ConfidenceResult:
"""Calculate confidence score from all verification components.
Args:
signals: List of signal names from multi_signal_verify
(e.g., ["baseline_diff", "payload_effect"])
proof_result: ProofResult from ProofOfExecution.check()
control_result: NegativeControlResult from NegativeControlEngine
ai_interpretation: Optional AI response interpretation text
Returns:
ConfidenceResult with score, verdict, breakdown, and detail
"""
breakdown: Dict[str, int] = {}
score = 0
# ── Component 1: Proof of Execution (0-60) ────────────────────
proof_score = min(proof_result.score, self.MAX_PROOF_SCORE) if proof_result else 0
score += proof_score
breakdown["proof_of_execution"] = proof_score
# ── Component 2: Proof of Impact (0-30) ───────────────────────
impact_score = 0
if proof_result and proof_result.proven:
if proof_result.impact_demonstrated:
impact_score = self.MAX_IMPACT_SCORE # Full impact shown
else:
impact_score = 15 # Proven but no impact demonstration
score += impact_score
breakdown["proof_of_impact"] = impact_score
# ── Component 3: Negative Controls (bonus/penalty) ─────────────
controls_score = 0
if control_result:
if control_result.same_behavior:
controls_score = self.PENALTY_SAME_BEHAVIOR # -60
else:
controls_score = min(
self.MAX_CONTROLS_BONUS,
control_result.confidence_adjustment
) # +20
score += controls_score
breakdown["negative_controls"] = controls_score
# ── Penalty: Only baseline diff signal ─────────────────────────
diff_penalty = 0
if signals and set(signals) <= {"baseline_diff", "new_errors"}:
# Only diff-based signals, no actual payload effect
if proof_score == 0:
diff_penalty = self.PENALTY_ONLY_DIFF # -40
score += diff_penalty
breakdown["diff_only_penalty"] = diff_penalty
# ── Penalty: AI says payload was ineffective ──────────────────
ai_penalty = 0
if ai_interpretation:
ai_lower = ai_interpretation.lower()
if any(kw in ai_lower for kw in self.INEFFECTIVE_KEYWORDS):
ai_penalty = self.PENALTY_AI_INEFFECTIVE # -40
score += ai_penalty
breakdown["ai_ineffective_penalty"] = ai_penalty
# ── Clamp and determine verdict ────────────────────────────────
score = max(0, min(100, score))
if score >= self.THRESHOLD_CONFIRMED:
verdict = "confirmed"
elif score >= self.THRESHOLD_LIKELY:
verdict = "likely"
else:
verdict = "rejected"
# Build detail string
detail_parts = []
if proof_result and proof_result.proven:
detail_parts.append(f"Proof: {proof_result.proof_type} ({proof_score}pts)")
else:
detail_parts.append("No proof of execution (0pts)")
if impact_score > 0:
detail_parts.append(f"Impact: +{impact_score}pts")
if control_result:
if control_result.same_behavior:
detail_parts.append(
f"NEGATIVE CONTROL FAIL: {control_result.controls_matching}/"
f"{control_result.controls_run} same behavior ({controls_score}pts)")
else:
detail_parts.append(f"Controls passed (+{controls_score}pts)")
if diff_penalty:
detail_parts.append(f"Only-diff penalty ({diff_penalty}pts)")
if ai_penalty:
detail_parts.append(f"AI-ineffective penalty ({ai_penalty}pts)")
detail = f"Score: {score}/100 [{verdict}] — " + "; ".join(detail_parts)
return ConfidenceResult(
score=score,
verdict=verdict,
breakdown=breakdown,
detail=detail,
)