mirror of
https://github.com/CyberSecurityUP/NeuroSploit.git
synced 2026-07-03 18:07:51 +02:00
harness: reduce false positives (robust verdicts, severity quorum, refute pass)
- Robust verdict parsing (pool::parse_verdict): whitespace-insensitive, checks explicit rejection first, counts only explicit confirmations; ambiguous → Unclear (not confirmed). Replaces the fragile exact-JSON / loose "yes" match. - Severity-aware quorum (pool::quorum_confirmed): High/Critical now need ≥2 validators AND ≥2/3 agreement (a single vote can no longer confirm a Critical); lower severities need a strict majority (>half, was ≥half). Single- model panels fall back to majority so they aren't nuked. - Adversarial refute pass (REFUTE_SYS): every confirmed High/Critical is re-examined by a skeptical panel that assumes false-positive; findings that can't withstand a majority of skeptics are dropped. Survives on infra failure. - Strengthened VOTE_SYS with an explicit false-positive checklist (reflected-not- executed, version/banner guesses, self-XSS, error-as-injection, thin evidence, inflated severity); validator query now also includes impact. - Unit tests for parse_verdict + quorum_confirmed. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -60,7 +60,11 @@ fn tool_doctrine(mcp_on: bool) -> String {
|
||||
Use only what is installed; degrade gracefully. Never run destructive or DoS actions.\n\n"
|
||||
)
|
||||
}
|
||||
const VOTE_SYS: &str = "You are an adversarial security validator. Decide if the candidate finding is a REAL, reproducible, exploitable vulnerability with proof. Reply with JSON {\"verdict\":\"confirmed\"|\"rejected\",\"reason\":\"...\"}. Default to rejected when uncertain.";
|
||||
const VOTE_SYS: &str = "You are an adversarial security validator. Decide if the candidate finding is a REAL, reproducible, exploitable vulnerability whose EVIDENCE actually proves impact. Reject common false positives: input merely reflected but not executed; version/banner guesses with no working PoC; self-XSS; theoretical issues; an error message or stack trace mistaken for injection; missing, generic, or non-reproducible evidence; severity inflated beyond what the evidence demonstrates. Confirm only if the provided evidence (request/response) concretely proves the vulnerability. Reply with JSON {\"verdict\":\"confirmed\"|\"rejected\",\"reason\":\"...\"}. Default to rejected when uncertain.";
|
||||
/// Adversarial second pass for High/Critical findings: assume false positive
|
||||
/// until the evidence forces otherwise. A finding that can't withstand the
|
||||
/// skeptics is dropped.
|
||||
const REFUTE_SYS: &str = "You are a skeptical senior reviewer trying to DISPROVE a reported vulnerability. Assume it is a FALSE POSITIVE unless the evidence forces otherwise. Scrutinize: does the evidence PROVE execution/impact, or only that input was reflected/accepted? Is there a real working PoC, or just a version/banner/theory? Could it be self-XSS, an error message, or an unreachable path? Reply JSON {\"verdict\":\"confirmed\"|\"rejected\",\"reason\":\"...\"} where confirmed means the vulnerability is REAL and proven by the evidence. When in doubt, reject.";
|
||||
const CODE_VOTE_SYS: &str = "You are an adversarial source-code reviewer. Decide if the reported issue is a REAL vulnerability in the provided code (reachable, exploitable, not a false positive). Reply JSON {\"verdict\":\"confirmed\"|\"rejected\",\"reason\":\"...\"}.";
|
||||
|
||||
/// ReAct loop directive: make the agent reason → act with a tool → observe →
|
||||
@@ -225,6 +229,7 @@ pub async fn run(cfg: RunConfig, lib: &Library, pool: &ModelPool, tx: Sender<Str
|
||||
findings.extend(extra);
|
||||
findings = dedup_findings(findings);
|
||||
}
|
||||
let findings = refute_pass(findings, pool, cfg.vote_n, &tx).await;
|
||||
finish(cfg, lib, recon, transcript, findings, selected, &mut rl, tx).await
|
||||
}
|
||||
|
||||
@@ -286,6 +291,7 @@ pub async fn run_whitebox(cfg: RunConfig, lib: &Library, pool: &ModelPool, tx: S
|
||||
let candidates = dedup_findings(raw.iter().flat_map(|(_, _, f)| f.clone()).collect());
|
||||
let _ = tx.send(format!("{} candidate finding(s) (deduped) — validating", candidates.len())).await;
|
||||
let findings = validate(candidates, pool, CODE_VOTE_SYS, cfg.vote_n, &tx).await;
|
||||
let findings = refute_pass(findings, pool, cfg.vote_n, &tx).await;
|
||||
finish(cfg, lib, "{}".into(), transcript, findings, selected, &mut rl, tx).await
|
||||
}
|
||||
|
||||
@@ -428,6 +434,7 @@ pub async fn run_greybox(cfg: RunConfig, lib: &Library, pool: &ModelPool, tx: Se
|
||||
findings.extend(extra);
|
||||
findings = dedup_findings(findings);
|
||||
}
|
||||
let findings = refute_pass(findings, pool, cfg.vote_n, &tx).await;
|
||||
finish(cfg, lib, recon, transcript, findings, selected, &mut rl, tx).await
|
||||
}
|
||||
|
||||
@@ -603,11 +610,11 @@ async fn validate(candidates: Vec<Finding>, pool: &ModelPool, sys: &str, vote_n:
|
||||
let finder = finder.clone();
|
||||
async move {
|
||||
let q = format!(
|
||||
"Finding: {} | severity {} | {} | at {} | payload {} | evidence {}",
|
||||
f.title, f.severity, f.cwe, f.endpoint, f.payload, f.evidence
|
||||
"Finding: {} | severity {} | {} | at {} | payload {} | evidence {} | impact {}",
|
||||
f.title, f.severity, f.cwe, f.endpoint, f.payload, f.evidence, f.impact
|
||||
);
|
||||
let (yes, total) = pool.vote(sys, &q, vote_n, finder.as_deref()).await;
|
||||
f.validated = total > 0 && yes * 2 >= total;
|
||||
f.validated = crate::pool::quorum_confirmed(&f.severity, yes, total);
|
||||
f.votes = format!("{yes}/{total}");
|
||||
if f.confidence == 0.0 && total > 0 {
|
||||
f.confidence = yes as f64 / total as f64;
|
||||
@@ -622,6 +629,37 @@ async fn validate(candidates: Vec<Finding>, pool: &ModelPool, sys: &str, vote_n:
|
||||
validated.into_iter().filter(|f| f.validated).collect()
|
||||
}
|
||||
|
||||
/// Adversarial refutation pass: every confirmed **High/Critical** finding is
|
||||
/// re-examined by a skeptical panel that tries to prove it's a false positive.
|
||||
/// A finding that fails to withstand a majority of skeptics is dropped. Lower
|
||||
/// severities pass through unchanged. Runs only when a real panel exists.
|
||||
async fn refute_pass(findings: Vec<Finding>, pool: &ModelPool, vote_n: usize, tx: &Sender<String>) -> Vec<Finding> {
|
||||
let finder = pool.candidates.first().map(|m| m.label());
|
||||
let mut kept = Vec::new();
|
||||
for mut f in findings {
|
||||
let s = f.severity.to_lowercase();
|
||||
let high = s.starts_with("crit") || s.starts_with("high");
|
||||
if !high || pool.stop_exploiting() {
|
||||
kept.push(f);
|
||||
continue;
|
||||
}
|
||||
let q = format!(
|
||||
"Finding: {} | severity {} | {} | at {} | payload {} | evidence {} | impact {}",
|
||||
f.title, f.severity, f.cwe, f.endpoint, f.payload, f.evidence, f.impact
|
||||
);
|
||||
let (yes, total) = pool.vote(REFUTE_SYS, &q, vote_n.max(2), finder.as_deref()).await;
|
||||
// Survive on no-response (infra failure) or a surviving majority.
|
||||
let survives = total == 0 || yes * 2 > total;
|
||||
if survives {
|
||||
if total > 0 { f.votes = format!("{} · refute {yes}/{total}", f.votes); }
|
||||
kept.push(f);
|
||||
} else {
|
||||
let _ = tx.send(format!("vote {} → dropped by adversarial refute ({yes}/{total})", f.title)).await;
|
||||
}
|
||||
}
|
||||
kept
|
||||
}
|
||||
|
||||
async fn finish(cfg: RunConfig, _lib: &Library, recon: String, transcript: String, mut findings: Vec<Finding>,
|
||||
selected: Vec<Agent>, rl: &mut RlState, tx: Sender<String>) -> RunOutput {
|
||||
// --- Grounding gate: no claim without a tool receipt (anti-hallucination) ---
|
||||
@@ -994,5 +1032,6 @@ pub async fn run_host(cfg: RunConfig, lib: &Library, pool: &ModelPool, tx: Sende
|
||||
findings.extend(extra);
|
||||
findings = dedup_findings(findings);
|
||||
}
|
||||
let findings = refute_pass(findings, pool, cfg.vote_n, &tx).await;
|
||||
finish(cfg, lib, recon, transcript, findings, selected, &mut rl, tx).await
|
||||
}
|
||||
|
||||
@@ -312,12 +312,7 @@ impl ModelPool {
|
||||
};
|
||||
if let Ok(text) = self.one("validate", m, system, user).await {
|
||||
total += 1;
|
||||
let t = text.to_lowercase();
|
||||
if t.contains("\"verdict\": \"confirmed\"")
|
||||
|| t.trim_start().starts_with("yes")
|
||||
|| t.contains("confirmed: true")
|
||||
|| t.contains("is_real\": true")
|
||||
{
|
||||
if parse_verdict(&text) == Verdict::Confirmed {
|
||||
confirmed += 1;
|
||||
}
|
||||
}
|
||||
@@ -333,3 +328,97 @@ async fn wait_cancelled(flag: &Arc<AtomicBool>) {
|
||||
tokio::time::sleep(Duration::from_millis(120)).await;
|
||||
}
|
||||
}
|
||||
|
||||
/// A validator's verdict on a candidate finding.
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
pub enum Verdict {
|
||||
Confirmed,
|
||||
Rejected,
|
||||
/// No clear yes/no — treated conservatively as NOT confirmed.
|
||||
Unclear,
|
||||
}
|
||||
|
||||
/// Robustly parse a validator reply into a verdict. Whitespace-insensitive
|
||||
/// (so `{"verdict":"confirmed"}` and `{ "verdict": "confirmed" }` both match),
|
||||
/// checks explicit rejection first, and only counts an *explicit* confirmation.
|
||||
/// Anything ambiguous is `Unclear` (does not count as confirmed) — biasing the
|
||||
/// pipeline against false positives.
|
||||
pub fn parse_verdict(text: &str) -> Verdict {
|
||||
let lower = text.to_lowercase();
|
||||
let dense: String = lower.chars().filter(|c| !c.is_whitespace()).collect();
|
||||
|
||||
// Explicit rejection wins (conservative).
|
||||
let rejected = [
|
||||
"\"verdict\":\"rejected\"", "\"verdict\":\"reject\"", "verdict:rejected",
|
||||
"\"is_real\":false", "\"isreal\":false", "\"confirmed\":false", "\"real\":false",
|
||||
"\"exploitable\":false", "\"valid\":false",
|
||||
];
|
||||
if rejected.iter().any(|k| dense.contains(k)) {
|
||||
return Verdict::Rejected;
|
||||
}
|
||||
// Explicit confirmation.
|
||||
let confirmed = [
|
||||
"\"verdict\":\"confirmed\"", "verdict:confirmed",
|
||||
"\"is_real\":true", "\"isreal\":true", "\"confirmed\":true", "\"real\":true",
|
||||
"\"exploitable\":true", "\"valid\":true",
|
||||
];
|
||||
if confirmed.iter().any(|k| dense.contains(k)) {
|
||||
return Verdict::Confirmed;
|
||||
}
|
||||
// Fallback: only a leading, unambiguous "yes" counts as confirmation.
|
||||
if lower.trim_start().starts_with("yes") {
|
||||
return Verdict::Confirmed;
|
||||
}
|
||||
Verdict::Unclear
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod verdict_tests {
|
||||
use super::*;
|
||||
#[test]
|
||||
fn parses_json_and_prose() {
|
||||
assert_eq!(parse_verdict(r#"{"verdict":"confirmed","reason":"x"}"#), Verdict::Confirmed);
|
||||
assert_eq!(parse_verdict(r#"{ "verdict": "confirmed" }"#), Verdict::Confirmed);
|
||||
assert_eq!(parse_verdict(r#"{ "verdict": "rejected" }"#), Verdict::Rejected);
|
||||
assert_eq!(parse_verdict(r#"{"is_real": false}"#), Verdict::Rejected);
|
||||
assert_eq!(parse_verdict("Yes, the evidence proves RCE."), Verdict::Confirmed);
|
||||
assert_eq!(parse_verdict("This looks theoretical."), Verdict::Unclear); // not counted
|
||||
}
|
||||
#[test]
|
||||
fn rejection_beats_confirmation_when_both_present() {
|
||||
// an answer that says confirmed:false must not be read as confirmed
|
||||
assert_eq!(parse_verdict(r#"{"confirmed": false, "note": "verdict was confirmed earlier"}"#), Verdict::Rejected);
|
||||
}
|
||||
#[test]
|
||||
fn quorum_is_severity_aware() {
|
||||
// high/critical: need >=2 votes AND >=2/3
|
||||
assert!(!quorum_confirmed("High", 1, 2));
|
||||
assert!(quorum_confirmed("High", 2, 2));
|
||||
assert!(quorum_confirmed("Critical", 2, 3));
|
||||
assert!(!quorum_confirmed("Critical", 1, 3));
|
||||
// single validator: majority applies to all
|
||||
assert!(quorum_confirmed("Critical", 1, 1));
|
||||
// low/medium: strict majority (more than half)
|
||||
assert!(quorum_confirmed("Low", 1, 1));
|
||||
assert!(!quorum_confirmed("Medium", 1, 2));
|
||||
assert!(quorum_confirmed("Low", 2, 3));
|
||||
assert!(!quorum_confirmed("Low", 0, 2));
|
||||
}
|
||||
}
|
||||
|
||||
/// Severity-aware confirmation quorum. False High/Critical findings are the most
|
||||
/// costly, so they require ≥2 validators AND ≥2/3 agreement; lower severities
|
||||
/// pass on a strict majority (more than half). With only one validator available
|
||||
/// (single-model panel) the majority rule applies to all severities.
|
||||
pub fn quorum_confirmed(severity: &str, yes: usize, total: usize) -> bool {
|
||||
if total == 0 {
|
||||
return false;
|
||||
}
|
||||
let s = severity.to_lowercase();
|
||||
let high = s.starts_with("crit") || s.starts_with("high");
|
||||
if high && total >= 2 {
|
||||
yes * 3 >= total * 2 // ≥ two-thirds
|
||||
} else {
|
||||
yes * 2 > total // strict majority
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user