diff --git a/neurosploit-rs/crates/harness/src/pipeline.rs b/neurosploit-rs/crates/harness/src/pipeline.rs index 32898ae..6c6d9b8 100644 --- a/neurosploit-rs/crates/harness/src/pipeline.rs +++ b/neurosploit-rs/crates/harness/src/pipeline.rs @@ -60,7 +60,11 @@ fn tool_doctrine(mcp_on: bool) -> String { Use only what is installed; degrade gracefully. Never run destructive or DoS actions.\n\n" ) } -const VOTE_SYS: &str = "You are an adversarial security validator. Decide if the candidate finding is a REAL, reproducible, exploitable vulnerability with proof. Reply with JSON {\"verdict\":\"confirmed\"|\"rejected\",\"reason\":\"...\"}. Default to rejected when uncertain."; +const VOTE_SYS: &str = "You are an adversarial security validator. Decide if the candidate finding is a REAL, reproducible, exploitable vulnerability whose EVIDENCE actually proves impact. Reject common false positives: input merely reflected but not executed; version/banner guesses with no working PoC; self-XSS; theoretical issues; an error message or stack trace mistaken for injection; missing, generic, or non-reproducible evidence; severity inflated beyond what the evidence demonstrates. Confirm only if the provided evidence (request/response) concretely proves the vulnerability. Reply with JSON {\"verdict\":\"confirmed\"|\"rejected\",\"reason\":\"...\"}. Default to rejected when uncertain."; +/// Adversarial second pass for High/Critical findings: assume false positive +/// until the evidence forces otherwise. A finding that can't withstand the +/// skeptics is dropped. +const REFUTE_SYS: &str = "You are a skeptical senior reviewer trying to DISPROVE a reported vulnerability. Assume it is a FALSE POSITIVE unless the evidence forces otherwise. Scrutinize: does the evidence PROVE execution/impact, or only that input was reflected/accepted? Is there a real working PoC, or just a version/banner/theory? Could it be self-XSS, an error message, or an unreachable path? Reply JSON {\"verdict\":\"confirmed\"|\"rejected\",\"reason\":\"...\"} where confirmed means the vulnerability is REAL and proven by the evidence. When in doubt, reject."; const CODE_VOTE_SYS: &str = "You are an adversarial source-code reviewer. Decide if the reported issue is a REAL vulnerability in the provided code (reachable, exploitable, not a false positive). Reply JSON {\"verdict\":\"confirmed\"|\"rejected\",\"reason\":\"...\"}."; /// ReAct loop directive: make the agent reason → act with a tool → observe → @@ -225,6 +229,7 @@ pub async fn run(cfg: RunConfig, lib: &Library, pool: &ModelPool, tx: Sender, pool: &ModelPool, sys: &str, vote_n: let finder = finder.clone(); async move { let q = format!( - "Finding: {} | severity {} | {} | at {} | payload {} | evidence {}", - f.title, f.severity, f.cwe, f.endpoint, f.payload, f.evidence + "Finding: {} | severity {} | {} | at {} | payload {} | evidence {} | impact {}", + f.title, f.severity, f.cwe, f.endpoint, f.payload, f.evidence, f.impact ); let (yes, total) = pool.vote(sys, &q, vote_n, finder.as_deref()).await; - f.validated = total > 0 && yes * 2 >= total; + f.validated = crate::pool::quorum_confirmed(&f.severity, yes, total); f.votes = format!("{yes}/{total}"); if f.confidence == 0.0 && total > 0 { f.confidence = yes as f64 / total as f64; @@ -622,6 +629,37 @@ async fn validate(candidates: Vec, pool: &ModelPool, sys: &str, vote_n: validated.into_iter().filter(|f| f.validated).collect() } +/// Adversarial refutation pass: every confirmed **High/Critical** finding is +/// re-examined by a skeptical panel that tries to prove it's a false positive. +/// A finding that fails to withstand a majority of skeptics is dropped. Lower +/// severities pass through unchanged. Runs only when a real panel exists. +async fn refute_pass(findings: Vec, pool: &ModelPool, vote_n: usize, tx: &Sender) -> Vec { + let finder = pool.candidates.first().map(|m| m.label()); + let mut kept = Vec::new(); + for mut f in findings { + let s = f.severity.to_lowercase(); + let high = s.starts_with("crit") || s.starts_with("high"); + if !high || pool.stop_exploiting() { + kept.push(f); + continue; + } + let q = format!( + "Finding: {} | severity {} | {} | at {} | payload {} | evidence {} | impact {}", + f.title, f.severity, f.cwe, f.endpoint, f.payload, f.evidence, f.impact + ); + let (yes, total) = pool.vote(REFUTE_SYS, &q, vote_n.max(2), finder.as_deref()).await; + // Survive on no-response (infra failure) or a surviving majority. + let survives = total == 0 || yes * 2 > total; + if survives { + if total > 0 { f.votes = format!("{} · refute {yes}/{total}", f.votes); } + kept.push(f); + } else { + let _ = tx.send(format!("vote {} → dropped by adversarial refute ({yes}/{total})", f.title)).await; + } + } + kept +} + async fn finish(cfg: RunConfig, _lib: &Library, recon: String, transcript: String, mut findings: Vec, selected: Vec, rl: &mut RlState, tx: Sender) -> RunOutput { // --- Grounding gate: no claim without a tool receipt (anti-hallucination) --- @@ -994,5 +1032,6 @@ pub async fn run_host(cfg: RunConfig, lib: &Library, pool: &ModelPool, tx: Sende findings.extend(extra); findings = dedup_findings(findings); } + let findings = refute_pass(findings, pool, cfg.vote_n, &tx).await; finish(cfg, lib, recon, transcript, findings, selected, &mut rl, tx).await } diff --git a/neurosploit-rs/crates/harness/src/pool.rs b/neurosploit-rs/crates/harness/src/pool.rs index b553f93..70c7e4a 100644 --- a/neurosploit-rs/crates/harness/src/pool.rs +++ b/neurosploit-rs/crates/harness/src/pool.rs @@ -312,12 +312,7 @@ impl ModelPool { }; if let Ok(text) = self.one("validate", m, system, user).await { total += 1; - let t = text.to_lowercase(); - if t.contains("\"verdict\": \"confirmed\"") - || t.trim_start().starts_with("yes") - || t.contains("confirmed: true") - || t.contains("is_real\": true") - { + if parse_verdict(&text) == Verdict::Confirmed { confirmed += 1; } } @@ -333,3 +328,97 @@ async fn wait_cancelled(flag: &Arc) { tokio::time::sleep(Duration::from_millis(120)).await; } } + +/// A validator's verdict on a candidate finding. +#[derive(Debug, PartialEq, Eq)] +pub enum Verdict { + Confirmed, + Rejected, + /// No clear yes/no — treated conservatively as NOT confirmed. + Unclear, +} + +/// Robustly parse a validator reply into a verdict. Whitespace-insensitive +/// (so `{"verdict":"confirmed"}` and `{ "verdict": "confirmed" }` both match), +/// checks explicit rejection first, and only counts an *explicit* confirmation. +/// Anything ambiguous is `Unclear` (does not count as confirmed) — biasing the +/// pipeline against false positives. +pub fn parse_verdict(text: &str) -> Verdict { + let lower = text.to_lowercase(); + let dense: String = lower.chars().filter(|c| !c.is_whitespace()).collect(); + + // Explicit rejection wins (conservative). + let rejected = [ + "\"verdict\":\"rejected\"", "\"verdict\":\"reject\"", "verdict:rejected", + "\"is_real\":false", "\"isreal\":false", "\"confirmed\":false", "\"real\":false", + "\"exploitable\":false", "\"valid\":false", + ]; + if rejected.iter().any(|k| dense.contains(k)) { + return Verdict::Rejected; + } + // Explicit confirmation. + let confirmed = [ + "\"verdict\":\"confirmed\"", "verdict:confirmed", + "\"is_real\":true", "\"isreal\":true", "\"confirmed\":true", "\"real\":true", + "\"exploitable\":true", "\"valid\":true", + ]; + if confirmed.iter().any(|k| dense.contains(k)) { + return Verdict::Confirmed; + } + // Fallback: only a leading, unambiguous "yes" counts as confirmation. + if lower.trim_start().starts_with("yes") { + return Verdict::Confirmed; + } + Verdict::Unclear +} + +#[cfg(test)] +mod verdict_tests { + use super::*; + #[test] + fn parses_json_and_prose() { + assert_eq!(parse_verdict(r#"{"verdict":"confirmed","reason":"x"}"#), Verdict::Confirmed); + assert_eq!(parse_verdict(r#"{ "verdict": "confirmed" }"#), Verdict::Confirmed); + assert_eq!(parse_verdict(r#"{ "verdict": "rejected" }"#), Verdict::Rejected); + assert_eq!(parse_verdict(r#"{"is_real": false}"#), Verdict::Rejected); + assert_eq!(parse_verdict("Yes, the evidence proves RCE."), Verdict::Confirmed); + assert_eq!(parse_verdict("This looks theoretical."), Verdict::Unclear); // not counted + } + #[test] + fn rejection_beats_confirmation_when_both_present() { + // an answer that says confirmed:false must not be read as confirmed + assert_eq!(parse_verdict(r#"{"confirmed": false, "note": "verdict was confirmed earlier"}"#), Verdict::Rejected); + } + #[test] + fn quorum_is_severity_aware() { + // high/critical: need >=2 votes AND >=2/3 + assert!(!quorum_confirmed("High", 1, 2)); + assert!(quorum_confirmed("High", 2, 2)); + assert!(quorum_confirmed("Critical", 2, 3)); + assert!(!quorum_confirmed("Critical", 1, 3)); + // single validator: majority applies to all + assert!(quorum_confirmed("Critical", 1, 1)); + // low/medium: strict majority (more than half) + assert!(quorum_confirmed("Low", 1, 1)); + assert!(!quorum_confirmed("Medium", 1, 2)); + assert!(quorum_confirmed("Low", 2, 3)); + assert!(!quorum_confirmed("Low", 0, 2)); + } +} + +/// Severity-aware confirmation quorum. False High/Critical findings are the most +/// costly, so they require ≥2 validators AND ≥2/3 agreement; lower severities +/// pass on a strict majority (more than half). With only one validator available +/// (single-model panel) the majority rule applies to all severities. +pub fn quorum_confirmed(severity: &str, yes: usize, total: usize) -> bool { + if total == 0 { + return false; + } + let s = severity.to_lowercase(); + let high = s.starts_with("crit") || s.starts_with("high"); + if high && total >= 2 { + yes * 3 >= total * 2 // ≥ two-thirds + } else { + yes * 2 > total // strict majority + } +}