mirror of
https://github.com/CyberSecurityUP/NeuroSploit.git
synced 2026-02-12 14:02:45 +00:00
180 lines
6.9 KiB
Python
180 lines
6.9 KiB
Python
"""
|
|
NeuroSploit v3 - Confidence Scoring Engine
|
|
|
|
Numeric 0-100 confidence scoring for vulnerability findings.
|
|
Combines proof of execution, negative control results, and signal analysis
|
|
into a single score with transparent breakdown.
|
|
|
|
Score Thresholds:
|
|
>= 90 → "confirmed" (AI Verified, high confidence)
|
|
>= 60 → "likely" (needs manual review)
|
|
< 60 → "rejected" (auto-reject, false positive)
|
|
"""
|
|
|
|
import logging
|
|
from dataclasses import dataclass, field
|
|
from typing import Dict, List, Optional
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Result types
|
|
# ---------------------------------------------------------------------------
|
|
|
|
@dataclass
|
|
class ConfidenceResult:
|
|
"""Result of confidence scoring."""
|
|
score: int # 0-100
|
|
verdict: str # "confirmed" | "likely" | "rejected"
|
|
breakdown: Dict[str, int] = field(default_factory=dict) # Component scores
|
|
detail: str = "" # Human-readable explanation
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Scorer
|
|
# ---------------------------------------------------------------------------
|
|
|
|
class ConfidenceScorer:
|
|
"""Calculates numeric confidence score 0-100 for vulnerability findings.
|
|
|
|
Weights:
|
|
+0-60 Proof of execution (per vuln type — the most important signal)
|
|
+0-30 Proof of impact (severity-aware)
|
|
+0-20 Negative controls passed (response differs from benign)
|
|
-40 Only baseline diff signal (no actual proof of exploitation)
|
|
-60 Same behavior on negative controls (critical false positive indicator)
|
|
-40 AI interpretation says payload was ineffective
|
|
"""
|
|
|
|
# Threshold constants
|
|
THRESHOLD_CONFIRMED = 90
|
|
THRESHOLD_LIKELY = 60
|
|
|
|
# Weight caps
|
|
MAX_PROOF_SCORE = 60
|
|
MAX_IMPACT_SCORE = 30
|
|
MAX_CONTROLS_BONUS = 20
|
|
PENALTY_ONLY_DIFF = -40
|
|
PENALTY_SAME_BEHAVIOR = -60
|
|
PENALTY_AI_INEFFECTIVE = -40
|
|
|
|
# Keywords in AI interpretation that indicate payload was ineffective
|
|
INEFFECTIVE_KEYWORDS = [
|
|
"ignored", "not processed", "blocked", "filtered",
|
|
"sanitized", "rejected", "not executed", "was not",
|
|
"does not", "did not", "no effect", "no impact",
|
|
"benign", "safe", "harmless",
|
|
]
|
|
|
|
def calculate(
|
|
self,
|
|
signals: List[str],
|
|
proof_result, # ProofResult from proof_of_execution
|
|
control_result, # NegativeControlResult from negative_control
|
|
ai_interpretation: Optional[str] = None,
|
|
) -> ConfidenceResult:
|
|
"""Calculate confidence score from all verification components.
|
|
|
|
Args:
|
|
signals: List of signal names from multi_signal_verify
|
|
(e.g., ["baseline_diff", "payload_effect"])
|
|
proof_result: ProofResult from ProofOfExecution.check()
|
|
control_result: NegativeControlResult from NegativeControlEngine
|
|
ai_interpretation: Optional AI response interpretation text
|
|
|
|
Returns:
|
|
ConfidenceResult with score, verdict, breakdown, and detail
|
|
"""
|
|
breakdown: Dict[str, int] = {}
|
|
score = 0
|
|
|
|
# ── Component 1: Proof of Execution (0-60) ────────────────────
|
|
proof_score = min(proof_result.score, self.MAX_PROOF_SCORE) if proof_result else 0
|
|
score += proof_score
|
|
breakdown["proof_of_execution"] = proof_score
|
|
|
|
# ── Component 2: Proof of Impact (0-30) ───────────────────────
|
|
impact_score = 0
|
|
if proof_result and proof_result.proven:
|
|
if proof_result.impact_demonstrated:
|
|
impact_score = self.MAX_IMPACT_SCORE # Full impact shown
|
|
else:
|
|
impact_score = 15 # Proven but no impact demonstration
|
|
score += impact_score
|
|
breakdown["proof_of_impact"] = impact_score
|
|
|
|
# ── Component 3: Negative Controls (bonus/penalty) ─────────────
|
|
controls_score = 0
|
|
if control_result:
|
|
if control_result.same_behavior:
|
|
controls_score = self.PENALTY_SAME_BEHAVIOR # -60
|
|
else:
|
|
controls_score = min(
|
|
self.MAX_CONTROLS_BONUS,
|
|
control_result.confidence_adjustment
|
|
) # +20
|
|
score += controls_score
|
|
breakdown["negative_controls"] = controls_score
|
|
|
|
# ── Penalty: Only baseline diff signal ─────────────────────────
|
|
diff_penalty = 0
|
|
if signals and set(signals) <= {"baseline_diff", "new_errors"}:
|
|
# Only diff-based signals, no actual payload effect
|
|
if proof_score == 0:
|
|
diff_penalty = self.PENALTY_ONLY_DIFF # -40
|
|
score += diff_penalty
|
|
breakdown["diff_only_penalty"] = diff_penalty
|
|
|
|
# ── Penalty: AI says payload was ineffective ──────────────────
|
|
ai_penalty = 0
|
|
if ai_interpretation:
|
|
ai_lower = ai_interpretation.lower()
|
|
if any(kw in ai_lower for kw in self.INEFFECTIVE_KEYWORDS):
|
|
ai_penalty = self.PENALTY_AI_INEFFECTIVE # -40
|
|
score += ai_penalty
|
|
breakdown["ai_ineffective_penalty"] = ai_penalty
|
|
|
|
# ── Clamp and determine verdict ────────────────────────────────
|
|
score = max(0, min(100, score))
|
|
|
|
if score >= self.THRESHOLD_CONFIRMED:
|
|
verdict = "confirmed"
|
|
elif score >= self.THRESHOLD_LIKELY:
|
|
verdict = "likely"
|
|
else:
|
|
verdict = "rejected"
|
|
|
|
# Build detail string
|
|
detail_parts = []
|
|
if proof_result and proof_result.proven:
|
|
detail_parts.append(f"Proof: {proof_result.proof_type} ({proof_score}pts)")
|
|
else:
|
|
detail_parts.append("No proof of execution (0pts)")
|
|
|
|
if impact_score > 0:
|
|
detail_parts.append(f"Impact: +{impact_score}pts")
|
|
|
|
if control_result:
|
|
if control_result.same_behavior:
|
|
detail_parts.append(
|
|
f"NEGATIVE CONTROL FAIL: {control_result.controls_matching}/"
|
|
f"{control_result.controls_run} same behavior ({controls_score}pts)")
|
|
else:
|
|
detail_parts.append(f"Controls passed (+{controls_score}pts)")
|
|
|
|
if diff_penalty:
|
|
detail_parts.append(f"Only-diff penalty ({diff_penalty}pts)")
|
|
|
|
if ai_penalty:
|
|
detail_parts.append(f"AI-ineffective penalty ({ai_penalty}pts)")
|
|
|
|
detail = f"Score: {score}/100 [{verdict}] — " + "; ".join(detail_parts)
|
|
|
|
return ConfidenceResult(
|
|
score=score,
|
|
verdict=verdict,
|
|
breakdown=breakdown,
|
|
detail=detail,
|
|
)
|