mirror of
https://github.com/CyberSecurityUP/NeuroSploit.git
synced 2026-06-30 07:15:30 +02:00
NeuroSploit v3.3.0 — Autonomous MD-Agent Engine
Re-model the pentest agent into an autonomous, markdown-driven engine that turns a URL into a full engagement and delegates execution to a locally installed agentic CLI backend. Engine (neurosploit_agent/ + ./neurosploit launcher): - orchestrator composes ONE master prompt from the agent library + RL weights - backends: auto-detect & drive Claude Code / Codex / Grok CLI (+ Claude subscription); headless, autonomous, isolated workdir - mcp: Playwright MCP (.mcp.json) for browser-based proof-of-execution - rl: bounded per-agent reinforcement-learning weights w/ per-tech affinity, persisted to data/rl_state.json - models: latest registry incl. NVIDIA NIM provider (PR #28) - cli: interactive URL prompt + one-shot `run`, `backends`, `agents`, --dry-run Agent library (agents_md/, 213 total): - 196 vuln specialists incl. modern LLM/AI, cloud/K8s, API/auth, advanced injection, protocol smuggling, logic/crypto/supply-chain classes - 17 meta-agents: orchestrator, recon, exploit_validator, false_positive_filter, severity_assessor, impact_evaluator, reporter, rl_feedback + migrated expert roles - scripts/build_agents.py data-driven builder; REGISTRY.md index Docs: rewritten README.md, v3.3.0 RELEASE.md, .env.example (NVIDIA NIM, xAI, engine vars). Retire legacy Python orchestration (neurosploit.py + agent classes) to legacy/. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,15 @@
|
||||
"""
|
||||
NeuroSploit v3.3.0 — Autonomous MD-Agent Engine.
|
||||
|
||||
A lean orchestration layer that turns a URL into an autonomous penetration test:
|
||||
it composes a master prompt from the curated `agents_md/` markdown library and
|
||||
hands execution to a locally-installed agentic CLI backend (Claude Code, Codex,
|
||||
or Grok CLI), augmented with Playwright MCP, and learns across runs via a
|
||||
reinforcement-learning reward loop.
|
||||
|
||||
This package replaces the legacy Python orchestration (`neurosploit.py` + heavy
|
||||
`core/` agents), which now lives under `legacy/` for reference.
|
||||
"""
|
||||
|
||||
__version__ = "3.3.0"
|
||||
__all__ = ["__version__"]
|
||||
@@ -0,0 +1,165 @@
|
||||
"""
|
||||
Agent loader for NeuroSploit v3.3.0.
|
||||
|
||||
Discovers and parses the curated `agents_md/` markdown library, builds a
|
||||
searchable index, and produces an RL-weighted, recon-aware ordering of which
|
||||
specialist agents the orchestrator should run for a given target.
|
||||
"""
|
||||
|
||||
import os
|
||||
import re
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Dict, List, Optional
|
||||
|
||||
ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||
AGENTS_DIR = os.path.join(ROOT, "agents_md")
|
||||
|
||||
# Recon-signal → keyword hints used to pre-select agents. The CLI backend does
|
||||
# the final, intelligent selection; this just narrows the candidate set so we
|
||||
# do not dump 200 playbooks into every prompt.
|
||||
SIGNAL_HINTS: Dict[str, List[str]] = {
|
||||
"graphql": ["graphql"],
|
||||
"jwt": ["jwt"],
|
||||
"oauth": ["oauth", "oidc", "saml"],
|
||||
"ai_features": ["llm_", "prompt_injection", "ai_", "vector_db", "ml_model", "rag"],
|
||||
"cloud": ["aws_", "gcp_", "azure_", "s3_", "gcs_", "cloud_", "imds", "metadata", "terraform", "ecr", "helm", "serverless", "k8s", "kubelet", "docker_socket", "container_escape"],
|
||||
"rest": ["api_", "rest_", "mass_assignment", "bola", "bfla", "idor"],
|
||||
"ws": ["websocket", "ws_"],
|
||||
"upload": ["file_upload", "zip_slip", "xxe", "deserial", "pickle", "yaml"],
|
||||
"template": ["ssti", "csti", "template", "ssi", "esi"],
|
||||
"cache_proxy": ["cache", "smuggl", "desync", "h2c", "hop_by_hop", "proxy", "response_splitting"],
|
||||
}
|
||||
|
||||
|
||||
@dataclass
|
||||
class Agent:
|
||||
name: str
|
||||
path: str
|
||||
title: str
|
||||
kind: str # "vuln" | "meta"
|
||||
user_prompt: str = ""
|
||||
system_prompt: str = ""
|
||||
cwe: str = ""
|
||||
severity: str = ""
|
||||
tags: List[str] = field(default_factory=list)
|
||||
|
||||
|
||||
def _parse(path: str, kind: str) -> Agent:
|
||||
text = open(path, encoding="utf-8", errors="replace").read()
|
||||
name = os.path.splitext(os.path.basename(path))[0]
|
||||
title_m = re.search(r"^#\s+(.+?)\s*$", text, re.M)
|
||||
title = title_m.group(1).strip() if title_m else name
|
||||
up = re.search(r"##\s*User Prompt\s*\n(.*?)(?=\n##\s|\Z)", text, re.S)
|
||||
sp = re.search(r"##\s*System Prompt\s*\n(.*?)(?=\n##\s|\Z)", text, re.S)
|
||||
cwe_m = re.search(r"(CWE-\d+)", text)
|
||||
sev_m = re.search(r"Severity:\s*([A-Za-z]+)", text)
|
||||
return Agent(
|
||||
name=name, path=path, title=title, kind=kind,
|
||||
user_prompt=(up.group(1).strip() if up else ""),
|
||||
system_prompt=(sp.group(1).strip() if sp else ""),
|
||||
cwe=(cwe_m.group(1) if cwe_m else ""),
|
||||
severity=(sev_m.group(1) if sev_m else ""),
|
||||
tags=[name],
|
||||
)
|
||||
|
||||
|
||||
class AgentLibrary:
|
||||
def __init__(self, base: str = AGENTS_DIR):
|
||||
self.base = base
|
||||
self.vulns: Dict[str, Agent] = {}
|
||||
self.meta: Dict[str, Agent] = {}
|
||||
self._load()
|
||||
|
||||
def _load(self):
|
||||
vdir, mdir = os.path.join(self.base, "vulns"), os.path.join(self.base, "meta")
|
||||
for d, kind, store in ((vdir, "vuln", self.vulns), (mdir, "meta", self.meta)):
|
||||
if not os.path.isdir(d):
|
||||
continue
|
||||
for fn in sorted(os.listdir(d)):
|
||||
if fn.endswith(".md"):
|
||||
a = _parse(os.path.join(d, fn), kind)
|
||||
store[a.name] = a
|
||||
|
||||
# -- selection ---------------------------------------------------------
|
||||
def candidates_for(self, recon: Optional[dict]) -> List[str]:
|
||||
"""Return vuln-agent names whose preconditions plausibly match recon.
|
||||
|
||||
With no recon (or a generic target) we return all vuln agents and let
|
||||
the backend prioritise. With recon signals we narrow to the relevant
|
||||
subset plus a baseline of always-run web agents.
|
||||
"""
|
||||
if not recon:
|
||||
return list(self.vulns.keys())
|
||||
wanted: set = set()
|
||||
signals = _signals_from_recon(recon)
|
||||
for sig in signals:
|
||||
for kw in SIGNAL_HINTS.get(sig, []):
|
||||
wanted.update(n for n in self.vulns if kw in n)
|
||||
# Always include core web classes regardless of recon.
|
||||
baseline = ["xss_reflected", "xss_stored", "xss_dom", "sqli_error", "sqli_blind",
|
||||
"ssrf", "idor", "csrf", "open_redirect", "command_injection",
|
||||
"lfi", "path_traversal", "auth_bypass", "security_headers",
|
||||
"information_disclosure", "cors_misconfig"]
|
||||
wanted.update(n for n in baseline if n in self.vulns)
|
||||
return sorted(wanted) if wanted else list(self.vulns.keys())
|
||||
|
||||
def ranked(self, recon: Optional[dict], weights: Dict[str, float]) -> List[str]:
|
||||
cands = self.candidates_for(recon)
|
||||
return sorted(cands, key=lambda n: weights.get(n, 0.5), reverse=True)
|
||||
|
||||
def index_markdown(self, names: List[str], weights: Dict[str, float]) -> str:
|
||||
"""Compact catalog (name — title — CWE — weight) for the master prompt."""
|
||||
rows = []
|
||||
for n in names:
|
||||
a = self.vulns.get(n)
|
||||
if not a:
|
||||
continue
|
||||
rows.append(f"- `{n}` — {a.title} [{a.cwe or 'CWE-?'}] (priority {weights.get(n, 0.5):.2f})")
|
||||
return "\n".join(rows)
|
||||
|
||||
def render(self, name: str, target: str, recon_json: str = "{}", collaborator: str = "") -> str:
|
||||
a = self.vulns.get(name) or self.meta.get(name)
|
||||
if not a:
|
||||
raise KeyError(name)
|
||||
body = open(a.path, encoding="utf-8", errors="replace").read()
|
||||
return (body.replace("{target}", target)
|
||||
.replace("{recon_json}", recon_json)
|
||||
.replace("{collaborator}", collaborator))
|
||||
|
||||
def counts(self) -> Dict[str, int]:
|
||||
return {"vulns": len(self.vulns), "meta": len(self.meta),
|
||||
"total": len(self.vulns) + len(self.meta)}
|
||||
|
||||
|
||||
def _signals_from_recon(recon: dict) -> List[str]:
|
||||
sigs: List[str] = []
|
||||
apis = recon.get("apis", {}) or {}
|
||||
if apis.get("graphql"):
|
||||
sigs.append("graphql")
|
||||
if apis.get("rest"):
|
||||
sigs.append("rest")
|
||||
if apis.get("ws"):
|
||||
sigs.append("ws")
|
||||
if recon.get("ai_features"):
|
||||
sigs.append("ai_features")
|
||||
if (recon.get("cloud", {}) or {}).get("provider") or (recon.get("cloud", {}) or {}).get("metadata_surface"):
|
||||
sigs.append("cloud")
|
||||
auth = recon.get("auth", {}) or {}
|
||||
if auth.get("session") == "jwt":
|
||||
sigs.append("jwt")
|
||||
if auth.get("oauth"):
|
||||
sigs.append("oauth")
|
||||
tech = recon.get("tech", {}) or {}
|
||||
blob = " ".join(str(v) for v in tech.values()).lower()
|
||||
if any(t in blob for t in ("flask", "jinja", "twig", "freemarker", "velocity", "thymeleaf")):
|
||||
sigs.append("template")
|
||||
if tech.get("waf") or tech.get("http2"):
|
||||
sigs.append("cache_proxy")
|
||||
# Generic surfaces always worth a look.
|
||||
sigs += ["upload", "cache_proxy"]
|
||||
return list(dict.fromkeys(sigs))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
lib = AgentLibrary()
|
||||
print(lib.counts())
|
||||
@@ -0,0 +1,147 @@
|
||||
"""
|
||||
Agentic CLI backends for NeuroSploit v3.3.0.
|
||||
|
||||
NeuroSploit does not embed its own agent loop — it delegates autonomous
|
||||
execution to whichever agentic coding CLI is installed locally:
|
||||
|
||||
* Claude Code (`claude`) — also the path for a Claude *subscription*
|
||||
* Codex CLI (`codex`)
|
||||
* Grok CLI (`grok`)
|
||||
|
||||
Each backend is driven headlessly: we pass the composed master prompt, a working
|
||||
directory (with `.mcp.json` for Playwright), and provider env, and let the CLI
|
||||
run the test autonomously to completion. The engine then reads the artifacts the
|
||||
run wrote to `results/`.
|
||||
"""
|
||||
|
||||
import os
|
||||
import shutil
|
||||
import subprocess
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Dict, List, Optional
|
||||
|
||||
|
||||
@dataclass
|
||||
class Backend:
|
||||
key: str
|
||||
label: str
|
||||
binary: str
|
||||
# builds argv given (prompt_file, workdir, model). Prompt is passed via file
|
||||
# to avoid arg-length limits and shell-escaping issues.
|
||||
def available(self) -> bool:
|
||||
return shutil.which(self.binary) is not None
|
||||
|
||||
def version(self) -> str:
|
||||
try:
|
||||
out = subprocess.run([self.binary, "--version"], capture_output=True,
|
||||
text=True, timeout=15)
|
||||
return (out.stdout or out.stderr).strip().splitlines()[0] if (out.stdout or out.stderr) else "?"
|
||||
except Exception:
|
||||
return "?"
|
||||
|
||||
def build_argv(self, prompt_file: str, workdir: str, model: str,
|
||||
autonomous: bool, mcp_config: Optional[str]) -> List[str]:
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
@dataclass
|
||||
class ClaudeBackend(Backend):
|
||||
def build_argv(self, prompt_file, workdir, model, autonomous, mcp_config):
|
||||
# Headless "print" mode reads the prompt from stdin (caller pipes the file).
|
||||
argv = [self.binary, "-p", "--output-format", "stream-json", "--verbose"]
|
||||
if model:
|
||||
argv += ["--model", model]
|
||||
if mcp_config:
|
||||
argv += ["--mcp-config", mcp_config]
|
||||
if autonomous:
|
||||
# Full autonomy for an authorized engagement in an isolated workdir.
|
||||
argv += ["--dangerously-skip-permissions"]
|
||||
return argv
|
||||
|
||||
stdin_prompt: bool = True
|
||||
|
||||
|
||||
@dataclass
|
||||
class CodexBackend(Backend):
|
||||
def build_argv(self, prompt_file, workdir, model, autonomous, mcp_config):
|
||||
# `codex exec` runs non-interactively to completion.
|
||||
argv = [self.binary, "exec", "--cd", workdir]
|
||||
if model:
|
||||
argv += ["--model", model]
|
||||
if autonomous:
|
||||
argv += ["--dangerously-bypass-approvals-and-sandbox"]
|
||||
if mcp_config:
|
||||
argv += ["--config", f"mcp_config_file={mcp_config}"]
|
||||
argv += ["-"] # read prompt from stdin
|
||||
return argv
|
||||
|
||||
stdin_prompt: bool = True
|
||||
|
||||
|
||||
@dataclass
|
||||
class GrokBackend(Backend):
|
||||
def build_argv(self, prompt_file, workdir, model, autonomous, mcp_config):
|
||||
# grok-cli headless/print form.
|
||||
argv = [self.binary, "--prompt-file", prompt_file, "--workdir", workdir]
|
||||
if model:
|
||||
argv += ["--model", model]
|
||||
if mcp_config:
|
||||
argv += ["--mcp-config", mcp_config]
|
||||
if autonomous:
|
||||
argv += ["--yolo"]
|
||||
return argv
|
||||
|
||||
stdin_prompt: bool = False
|
||||
|
||||
|
||||
REGISTRY: Dict[str, Backend] = {
|
||||
"claude": ClaudeBackend("claude", "Claude Code", "claude"),
|
||||
"codex": CodexBackend("codex", "Codex CLI", "codex"),
|
||||
"grok": GrokBackend("grok", "Grok CLI", "grok"),
|
||||
}
|
||||
|
||||
|
||||
def detect() -> List[Backend]:
|
||||
"""Return installed backends, in preference order."""
|
||||
order = ["claude", "codex", "grok"]
|
||||
return [REGISTRY[k] for k in order if REGISTRY[k].available()]
|
||||
|
||||
|
||||
def get(key: str) -> Optional[Backend]:
|
||||
return REGISTRY.get(key)
|
||||
|
||||
|
||||
@dataclass
|
||||
class RunResult:
|
||||
backend: str
|
||||
returncode: int
|
||||
log_path: str
|
||||
workdir: str
|
||||
|
||||
|
||||
def run(backend: Backend, prompt: str, workdir: str, model: str = "",
|
||||
autonomous: bool = True, mcp_config: Optional[str] = None,
|
||||
env: Optional[Dict[str, str]] = None, timeout: int = 7200,
|
||||
dry_run: bool = False) -> RunResult:
|
||||
"""Execute a backend against the composed prompt and stream logs to disk."""
|
||||
os.makedirs(workdir, exist_ok=True)
|
||||
prompt_file = os.path.join(workdir, "master_prompt.md")
|
||||
open(prompt_file, "w", encoding="utf-8").write(prompt)
|
||||
log_path = os.path.join(workdir, "backend.log")
|
||||
|
||||
argv = backend.build_argv(prompt_file, workdir, model, autonomous, mcp_config)
|
||||
full_env = os.environ.copy()
|
||||
if env:
|
||||
full_env.update(env)
|
||||
|
||||
if dry_run:
|
||||
open(log_path, "w").write("DRY RUN\n" + " ".join(argv) + "\n")
|
||||
return RunResult(backend.key, 0, log_path, workdir)
|
||||
|
||||
stdin_data = prompt if getattr(backend, "stdin_prompt", False) else None
|
||||
with open(log_path, "w", encoding="utf-8") as logf:
|
||||
proc = subprocess.run(
|
||||
argv, input=stdin_data, stdout=logf, stderr=subprocess.STDOUT,
|
||||
cwd=workdir, env=full_env, text=True, timeout=timeout,
|
||||
)
|
||||
return RunResult(backend.key, proc.returncode, log_path, workdir)
|
||||
@@ -0,0 +1,163 @@
|
||||
"""
|
||||
NeuroSploit v3.3.0 — terminal launcher.
|
||||
|
||||
Two ways in:
|
||||
|
||||
neurosploit # interactive: prompts for URL + choices
|
||||
neurosploit run https://t.example --backend claude --model claude-opus-4-8
|
||||
|
||||
The interactive flow asks for a URL, lets you pick from the agentic CLI backends
|
||||
actually installed on this machine (Claude Code / Codex / Grok, or a Claude
|
||||
subscription), picks a model, then launches the autonomous engagement.
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import sys
|
||||
|
||||
from . import backends, models
|
||||
from .config import RunConfig
|
||||
from .orchestrator import run_engagement
|
||||
|
||||
BANNER = r"""
|
||||
_ _ ____ _ _ _
|
||||
| \ | | ___ _ _ _ __ ___ / ___|_ __ | | ___ (_) |_
|
||||
| \| |/ _ \ | | | '__/ _ \\___ \| '_ \| |/ _ \| | __|
|
||||
| |\ | __/ |_| | | | (_) |___) | |_) | | (_) | | |_
|
||||
|_| \_|\___|\__,_|_| \___/|____/| .__/|_|\___/|_|\__|
|
||||
v3.3.0 Autonomous MD-Agent Engine
|
||||
|_|
|
||||
"""
|
||||
|
||||
|
||||
def _progress(msg: str):
|
||||
print(f" [*] {msg}", flush=True)
|
||||
|
||||
|
||||
def _choose(prompt, options, default_idx=0):
|
||||
for i, (key, label) in enumerate(options):
|
||||
mark = "*" if i == default_idx else " "
|
||||
print(f" {mark} {i + 1}) {label}")
|
||||
raw = input(f"{prompt} [{default_idx + 1}]: ").strip()
|
||||
if not raw:
|
||||
return options[default_idx][0]
|
||||
try:
|
||||
return options[int(raw) - 1][0]
|
||||
except (ValueError, IndexError):
|
||||
print(" invalid choice, using default")
|
||||
return options[default_idx][0]
|
||||
|
||||
|
||||
def interactive() -> int:
|
||||
print(BANNER)
|
||||
installed = backends.detect()
|
||||
if not installed:
|
||||
print(" [!] No agentic CLI backend found (claude / codex / grok).")
|
||||
print(" Install one: Claude Code, Codex CLI, or Grok CLI, then re-run.")
|
||||
return 2
|
||||
print(f" Detected backends: {', '.join(b.label + ' (' + b.version() + ')' for b in installed)}\n")
|
||||
|
||||
target = input(" Target URL: ").strip()
|
||||
if not target:
|
||||
print(" [!] A target URL is required.")
|
||||
return 2
|
||||
if not target.startswith(("http://", "https://")):
|
||||
target = "https://" + target
|
||||
scope = input(f" In-scope hosts [default: {target}]: ").strip() or target
|
||||
collaborator = input(" OOB collaborator host (optional, for blind/SSRF proof): ").strip()
|
||||
|
||||
backend_key = _choose(" Choose backend", [(b.key, f"{b.label} [{b.version()}]") for b in installed])
|
||||
|
||||
# Provider/model: map backend → sensible provider, then pick a model.
|
||||
prov_for_backend = {"claude": "anthropic", "codex": "openai", "grok": "xai"}
|
||||
provider = prov_for_backend.get(backend_key, "anthropic")
|
||||
sub = input(" Use Claude subscription (login) instead of an API key? [y/N]: ").strip().lower()
|
||||
if sub == "y" and backend_key == "claude":
|
||||
provider = "claude_subscription"
|
||||
model_opts = [(m.id, f"{m.label} ({m.context // 1000}k ctx) {m.notes}")
|
||||
for m in models.list_models(provider)] or [("", "backend default")]
|
||||
model = _choose(" Choose model", model_opts)
|
||||
|
||||
cfg = RunConfig(target=target, scope=scope, backend=backend_key,
|
||||
provider=provider, model=model, collaborator=collaborator)
|
||||
print()
|
||||
_progress(f"Starting autonomous engagement against {target}")
|
||||
result = run_engagement(cfg, progress=_progress)
|
||||
_summary(result)
|
||||
return 0 if result["returncode"] == 0 else 1
|
||||
|
||||
|
||||
def _summary(result):
|
||||
print("\n ── Engagement complete ─────────────────────────────")
|
||||
print(f" Workdir : {result['workdir']}")
|
||||
print(f" Findings: {len(result['findings'])} validated")
|
||||
by_sev = {}
|
||||
for f in result["findings"]:
|
||||
by_sev[f.get("severity", "?")] = by_sev.get(f.get("severity", "?"), 0) + 1
|
||||
if by_sev:
|
||||
print(" By severity: " + ", ".join(f"{k}={v}" for k, v in by_sev.items()))
|
||||
print(f" Report : reports/ | Raw: {result['workdir']}/findings.json")
|
||||
print(" ────────────────────────────────────────────────────")
|
||||
|
||||
|
||||
def main(argv=None) -> int:
|
||||
parser = argparse.ArgumentParser(prog="neurosploit",
|
||||
description="NeuroSploit v3.3.0 autonomous MD-agent pentest engine")
|
||||
sub = parser.add_subparsers(dest="cmd")
|
||||
|
||||
r = sub.add_parser("run", help="run an engagement against a URL")
|
||||
r.add_argument("url")
|
||||
r.add_argument("--backend", default=None, help="claude | codex | grok (default: first installed)")
|
||||
r.add_argument("--provider", default=None)
|
||||
r.add_argument("--model", default=None)
|
||||
r.add_argument("--scope", default="")
|
||||
r.add_argument("--collaborator", default="")
|
||||
r.add_argument("--no-rl", action="store_true")
|
||||
r.add_argument("--no-mcp", action="store_true")
|
||||
r.add_argument("--max-agents", type=int, default=0)
|
||||
r.add_argument("--dry-run", action="store_true", help="compose prompt + show command without executing the backend")
|
||||
|
||||
sub.add_parser("backends", help="list detected CLI backends")
|
||||
sub.add_parser("agents", help="show agent library counts")
|
||||
|
||||
args = parser.parse_args(argv)
|
||||
|
||||
if args.cmd is None:
|
||||
try:
|
||||
return interactive()
|
||||
except (KeyboardInterrupt, EOFError):
|
||||
print("\n aborted.")
|
||||
return 130
|
||||
|
||||
if args.cmd == "backends":
|
||||
for b in backends.detect():
|
||||
print(f" {b.key:8} {b.label:14} {b.version()}")
|
||||
if not backends.detect():
|
||||
print(" none installed (claude / codex / grok)")
|
||||
return 0
|
||||
|
||||
if args.cmd == "agents":
|
||||
from .agent_loader import AgentLibrary
|
||||
print(AgentLibrary().counts())
|
||||
return 0
|
||||
|
||||
if args.cmd == "run":
|
||||
url = args.url if args.url.startswith(("http://", "https://")) else "https://" + args.url
|
||||
backend = args.backend or (backends.detect()[0].key if backends.detect() else "claude")
|
||||
prov_for_backend = {"claude": "anthropic", "codex": "openai", "grok": "xai"}
|
||||
provider = args.provider or prov_for_backend.get(backend, "anthropic")
|
||||
model = args.model or (models.list_models(provider)[0].id if models.list_models(provider) else "")
|
||||
cfg = RunConfig(target=url, scope=args.scope or url, backend=backend,
|
||||
provider=provider, model=model, collaborator=args.collaborator,
|
||||
use_rl=not args.no_rl, use_mcp=not args.no_mcp,
|
||||
max_agents=args.max_agents, dry_run=args.dry_run)
|
||||
print(BANNER)
|
||||
result = run_engagement(cfg, progress=_progress)
|
||||
_summary(result)
|
||||
return 0 if result["returncode"] == 0 else 1
|
||||
|
||||
parser.print_help()
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
@@ -0,0 +1,52 @@
|
||||
"""Configuration & paths for NeuroSploit v3.3.0."""
|
||||
|
||||
import os
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Optional
|
||||
|
||||
ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||
|
||||
|
||||
def _p(*parts) -> str:
|
||||
return os.path.join(ROOT, *parts)
|
||||
|
||||
|
||||
@dataclass
|
||||
class RunConfig:
|
||||
target: str
|
||||
scope: str = ""
|
||||
rules_of_engagement: str = "Authorized, non-destructive testing only. No DoS unless explicitly permitted. Stay strictly in scope."
|
||||
backend: str = "claude" # claude | codex | grok
|
||||
provider: str = "anthropic" # see models.PROVIDERS
|
||||
model: str = "claude-opus-4-8"
|
||||
autonomous: bool = True
|
||||
collaborator: str = "" # OOB callback host for blind vuln proof
|
||||
use_rl: bool = True
|
||||
use_mcp: bool = True
|
||||
max_agents: int = 0 # 0 = no cap (backend prioritises)
|
||||
timeout: int = 7200
|
||||
dry_run: bool = False
|
||||
workdir: str = field(default="")
|
||||
|
||||
def resolved_workdir(self) -> str:
|
||||
return self.workdir or _p("results", _slug(self.target))
|
||||
|
||||
|
||||
def _slug(url: str) -> str:
|
||||
s = url.replace("https://", "").replace("http://", "")
|
||||
return "".join(c if c.isalnum() else "_" for c in s).strip("_")[:60] or "target"
|
||||
|
||||
|
||||
PATHS = {
|
||||
"agents": _p("agents_md"),
|
||||
"results": _p("results"),
|
||||
"reports": _p("reports"),
|
||||
"data": _p("data"),
|
||||
"logs": _p("logs"),
|
||||
"rl_state": _p("data", "rl_state.json"),
|
||||
}
|
||||
|
||||
|
||||
def ensure_dirs():
|
||||
for k in ("results", "reports", "data", "logs"):
|
||||
os.makedirs(PATHS[k], exist_ok=True)
|
||||
@@ -0,0 +1,49 @@
|
||||
"""
|
||||
MCP bridge for NeuroSploit v3.3.0.
|
||||
|
||||
Generates the MCP server configuration the agentic CLI backend loads so the
|
||||
autonomous run can drive a real browser (Playwright) and any extra MCP tooling.
|
||||
Playwright lets agents render SPAs, execute JS, capture DOM/network/screenshots
|
||||
and confirm client-side execution (XSS/CSTI) — turning "the payload reflected"
|
||||
into "the payload executed", which is what the validator agents demand.
|
||||
"""
|
||||
|
||||
import json
|
||||
import os
|
||||
import shutil
|
||||
from typing import Dict
|
||||
|
||||
ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||
|
||||
|
||||
def playwright_server() -> Dict:
|
||||
"""Prefer a local @playwright/mcp; fall back to npx on demand."""
|
||||
return {
|
||||
"command": "npx",
|
||||
"args": ["-y", "@playwright/mcp@latest", "--headless", "--isolated"],
|
||||
}
|
||||
|
||||
|
||||
def build_mcp_config(extra: Dict[str, Dict] | None = None) -> Dict:
|
||||
servers = {"playwright": playwright_server()}
|
||||
if extra:
|
||||
servers.update(extra)
|
||||
return {"mcpServers": servers}
|
||||
|
||||
|
||||
def write_mcp_config(workdir: str, extra: Dict[str, Dict] | None = None) -> str:
|
||||
"""Write a `.mcp.json` into the run workdir and return its path.
|
||||
|
||||
Claude Code auto-loads `.mcp.json` from the working directory; Codex/Grok
|
||||
accept an explicit config path (see backends.py).
|
||||
"""
|
||||
cfg = build_mcp_config(extra)
|
||||
path = os.path.join(workdir, ".mcp.json")
|
||||
os.makedirs(workdir, exist_ok=True)
|
||||
json.dump(cfg, open(path, "w", encoding="utf-8"), indent=2)
|
||||
return path
|
||||
|
||||
|
||||
def playwright_available() -> bool:
|
||||
"""Best-effort check that Playwright MCP can be launched."""
|
||||
return shutil.which("npx") is not None
|
||||
@@ -0,0 +1,141 @@
|
||||
"""
|
||||
Model registry for NeuroSploit v3.3.0.
|
||||
|
||||
Maps logical providers to their latest models and the env vars / base URLs the
|
||||
agentic CLI backends need. Includes the NVIDIA NIM provider added in PR #28.
|
||||
|
||||
The engine itself does not call these APIs directly — the chosen CLI backend
|
||||
(Claude Code / Codex / Grok) does. This registry is what the launcher uses to
|
||||
present choices and to export the right environment to the backend process.
|
||||
"""
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Dict, List, Optional
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class Model:
|
||||
id: str
|
||||
label: str
|
||||
context: int = 200_000
|
||||
notes: str = ""
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class Provider:
|
||||
key: str
|
||||
label: str
|
||||
env_keys: List[str] # accepted API-key env var names
|
||||
base_url: Optional[str] = None # OpenAI-compatible base URL, if any
|
||||
base_url_env: Optional[str] = None # env var the backend reads for base URL
|
||||
models: List[Model] = field(default_factory=list)
|
||||
subscription: bool = False # uses a CLI subscription rather than an API key
|
||||
|
||||
|
||||
PROVIDERS: Dict[str, Provider] = {
|
||||
# --- Anthropic (latest Claude family; default) -------------------------
|
||||
"anthropic": Provider(
|
||||
key="anthropic", label="Anthropic Claude",
|
||||
env_keys=["ANTHROPIC_API_KEY"],
|
||||
models=[
|
||||
Model("claude-opus-4-8", "Claude Opus 4.8", 1_000_000, "Most capable; deep multi-step pentest reasoning"),
|
||||
Model("claude-sonnet-4-6", "Claude Sonnet 4.6", 1_000_000, "Balanced cost/quality default"),
|
||||
Model("claude-haiku-4-5", "Claude Haiku 4.5", 200_000, "Fast/cheap recon and triage"),
|
||||
],
|
||||
),
|
||||
# --- OpenAI ------------------------------------------------------------
|
||||
"openai": Provider(
|
||||
key="openai", label="OpenAI",
|
||||
env_keys=["OPENAI_API_KEY"],
|
||||
models=[
|
||||
Model("gpt-5.1", "GPT-5.1", 400_000, "Strong general reasoning"),
|
||||
Model("o4", "o4", 200_000, "Deliberate reasoning for validation"),
|
||||
],
|
||||
),
|
||||
# --- xAI Grok ----------------------------------------------------------
|
||||
"xai": Provider(
|
||||
key="xai", label="xAI Grok",
|
||||
env_keys=["XAI_API_KEY", "GROK_API_KEY"],
|
||||
base_url="https://api.x.ai/v1", base_url_env="OPENAI_BASE_URL",
|
||||
models=[
|
||||
Model("grok-4", "Grok 4", 256_000, "Fast agentic execution"),
|
||||
Model("grok-4-fast", "Grok 4 Fast", 128_000, "Low-latency triage"),
|
||||
],
|
||||
),
|
||||
# --- NVIDIA NIM (PR #28) ----------------------------------------------
|
||||
# OpenAI-compatible endpoint at integrate.api.nvidia.com; keys are `nvapi-...`.
|
||||
"nvidia_nim": Provider(
|
||||
key="nvidia_nim", label="NVIDIA NIM",
|
||||
env_keys=["NVIDIA_NIM_API_KEY", "NVIDIA_API_KEY"],
|
||||
base_url="https://integrate.api.nvidia.com/v1", base_url_env="OPENAI_BASE_URL",
|
||||
models=[
|
||||
Model("nvidia/llama-3.3-nemotron-super-49b-v1", "Nemotron Super 49B", 128_000, "NIM hosted reasoning"),
|
||||
Model("deepseek-ai/deepseek-r1", "DeepSeek-R1 (NIM)", 128_000, "Strong reasoning via NIM"),
|
||||
Model("qwen/qwen2.5-coder-32b-instruct", "Qwen2.5 Coder 32B (NIM)", 128_000, "Code/exploit oriented"),
|
||||
],
|
||||
),
|
||||
# --- Google Gemini -----------------------------------------------------
|
||||
"gemini": Provider(
|
||||
key="gemini", label="Google Gemini",
|
||||
env_keys=["GEMINI_API_KEY", "GOOGLE_API_KEY"],
|
||||
models=[
|
||||
Model("gemini-2.5-pro", "Gemini 2.5 Pro", 1_000_000, "Large context recon"),
|
||||
Model("gemini-2.5-flash", "Gemini 2.5 Flash", 1_000_000, "Fast/cheap"),
|
||||
],
|
||||
),
|
||||
# --- OpenRouter (aggregator) ------------------------------------------
|
||||
"openrouter": Provider(
|
||||
key="openrouter", label="OpenRouter",
|
||||
env_keys=["OPENROUTER_API_KEY"],
|
||||
base_url="https://openrouter.ai/api/v1", base_url_env="OPENAI_BASE_URL",
|
||||
models=[Model("anthropic/claude-opus-4-8", "Opus 4.8 (OpenRouter)", 1_000_000)],
|
||||
),
|
||||
# --- Local Ollama ------------------------------------------------------
|
||||
"ollama": Provider(
|
||||
key="ollama", label="Ollama (local)",
|
||||
env_keys=[],
|
||||
base_url="http://localhost:11434/v1", base_url_env="OPENAI_BASE_URL",
|
||||
models=[
|
||||
Model("qwen2.5-coder:32b", "Qwen2.5 Coder 32B (local)", 32_000),
|
||||
Model("llama3.3:70b", "Llama 3.3 70B (local)", 128_000),
|
||||
],
|
||||
),
|
||||
# --- Subscription via Claude Code CLI (no API key needed) -------------
|
||||
"claude_subscription": Provider(
|
||||
key="claude_subscription", label="Claude subscription (via Claude Code login)",
|
||||
env_keys=[], subscription=True,
|
||||
models=[
|
||||
Model("claude-opus-4-8", "Claude Opus 4.8 (subscription)", 1_000_000),
|
||||
Model("claude-sonnet-4-6", "Claude Sonnet 4.6 (subscription)", 1_000_000),
|
||||
],
|
||||
),
|
||||
}
|
||||
|
||||
DEFAULT_PROVIDER = "anthropic"
|
||||
|
||||
|
||||
def get_provider(key: str) -> Optional[Provider]:
|
||||
return PROVIDERS.get(key)
|
||||
|
||||
|
||||
def list_models(provider_key: str) -> List[Model]:
|
||||
p = PROVIDERS.get(provider_key)
|
||||
return list(p.models) if p else []
|
||||
|
||||
|
||||
def resolve_env(provider_key: str, model_id: str) -> Dict[str, str]:
|
||||
"""Return the env vars a backend needs for this provider/model selection."""
|
||||
import os
|
||||
env: Dict[str, str] = {}
|
||||
p = PROVIDERS.get(provider_key)
|
||||
if not p:
|
||||
return env
|
||||
if p.base_url and p.base_url_env:
|
||||
env[p.base_url_env] = p.base_url
|
||||
for k in p.env_keys:
|
||||
if os.getenv(k):
|
||||
env[k] = os.environ[k]
|
||||
break
|
||||
env["NEUROSPLOIT_MODEL"] = model_id
|
||||
env["NEUROSPLOIT_PROVIDER"] = provider_key
|
||||
return env
|
||||
@@ -0,0 +1,150 @@
|
||||
"""
|
||||
Orchestrator for NeuroSploit v3.3.0.
|
||||
|
||||
Ties the pieces together: load the agent library, apply RL weights to pick and
|
||||
rank specialist agents for the target, compose the single master prompt (the
|
||||
`meta/orchestrator` playbook + the recon-aware agent catalog + the operating
|
||||
contract), hand it to the chosen CLI backend with Playwright MCP, then read back
|
||||
artifacts and feed the RL loop.
|
||||
"""
|
||||
|
||||
import json
|
||||
import os
|
||||
from typing import Dict, List, Optional
|
||||
|
||||
from . import backends, mcp, models
|
||||
from .agent_loader import AgentLibrary
|
||||
from .config import RunConfig, PATHS, ensure_dirs
|
||||
from .rl import RLEngine, outcomes_from_findings
|
||||
|
||||
|
||||
def compose_master_prompt(cfg: RunConfig, lib: AgentLibrary, rl: RLEngine,
|
||||
recon: Optional[dict]) -> str:
|
||||
weights = rl.weights() if cfg.use_rl else {}
|
||||
ranked = lib.ranked(recon, weights)
|
||||
if cfg.max_agents > 0:
|
||||
ranked = ranked[:cfg.max_agents]
|
||||
agent_index = lib.index_markdown(ranked, weights)
|
||||
rl_weights_txt = json.dumps({n: round(weights.get(n, 0.5), 2) for n in ranked[:40]}, indent=0)
|
||||
|
||||
orch = lib.render("orchestrator", cfg.target,
|
||||
recon_json=json.dumps(recon or {}), collaborator=cfg.collaborator) \
|
||||
if "orchestrator" in lib.meta else ""
|
||||
|
||||
header = f"""# NeuroSploit v3.3.0 — Autonomous Engagement
|
||||
|
||||
You are running an AUTHORIZED, autonomous web penetration test.
|
||||
|
||||
TARGET: {cfg.target}
|
||||
SCOPE: {cfg.scope or cfg.target}
|
||||
RULES OF ENGAGEMENT: {cfg.rules_of_engagement}
|
||||
OOB COLLABORATOR: {cfg.collaborator or '(none provided — skip OOB-only confirmations)'}
|
||||
WORKDIR: {cfg.resolved_workdir()}
|
||||
|
||||
You have Playwright MCP (browser automation, JS execution, DOM/network capture,
|
||||
screenshots) and local shell tools. Use the browser to PROVE client-side
|
||||
execution; use the collaborator to PROVE blind/OOB issues.
|
||||
|
||||
## Specialist agent library
|
||||
The `agents_md/` directory holds {lib.counts()['vulns']} vulnerability playbooks
|
||||
and {lib.counts()['meta']} meta playbooks. For each specialist you choose to run,
|
||||
open its file under `agents_md/vulns/<name>.md`, substitute the target and recon,
|
||||
and follow its methodology and (strict) anti-false-positive System Prompt.
|
||||
|
||||
### Recon-ranked candidate agents (by RL priority)
|
||||
{agent_index}
|
||||
|
||||
### RL priors (higher = historically more productive on similar targets)
|
||||
{rl_weights_txt}
|
||||
"""
|
||||
|
||||
contract = f"""
|
||||
## Required pipeline (follow in order)
|
||||
1. Run `agents_md/meta/recon.md` → write `results/recon.json`.
|
||||
2. Re-rank the candidate agents above using recon + RL priors; skip agents with
|
||||
no applicable surface.
|
||||
3. Execute each selected specialist; gather candidate findings WITH evidence.
|
||||
4. For every candidate: `meta/exploit_validator.md` → `meta/false_positive_filter.md`.
|
||||
Discard anything not reproducibly exploitable.
|
||||
5. Score survivors: `meta/severity_assessor.md` then `meta/impact_evaluator.md`.
|
||||
6. `meta/reporter.md` → write `results/findings.json` AND `reports/report.md`.
|
||||
7. `meta/rl_feedback.md` → write/merge `data/rl_state.json`.
|
||||
|
||||
## Output contract (MANDATORY)
|
||||
Write `results/findings.json` as a JSON array of objects:
|
||||
{{"id","agent","title","severity","cvss","cwe","endpoint","payload","evidence","impact","remediation","confidence","validated"}}
|
||||
Only include findings with `validated: true`. If you find nothing, write `[]`.
|
||||
Also write `results/agents_ran.json` as a JSON array of the agent names you executed.
|
||||
|
||||
Stay strictly in scope. Never run destructive/DoS payloads unless ROE permits.
|
||||
Report ONLY proven, reproducible findings.
|
||||
"""
|
||||
return "\n".join(x for x in (header, orch, contract) if x.strip())
|
||||
|
||||
|
||||
def collect_results(workdir: str) -> Dict:
|
||||
findings, ran = [], []
|
||||
fpath = os.path.join(workdir, "findings.json")
|
||||
rpath = os.path.join(workdir, "agents_ran.json")
|
||||
# The backend may write under results/<slug>/ or results/ — check both.
|
||||
for base in (workdir, PATHS["results"]):
|
||||
for name, sink in (("findings.json", "findings"), ("agents_ran.json", "ran")):
|
||||
p = os.path.join(base, name)
|
||||
if os.path.exists(p):
|
||||
try:
|
||||
data = json.load(open(p, encoding="utf-8"))
|
||||
if sink == "findings" and not findings:
|
||||
findings = data
|
||||
elif sink == "ran" and not ran:
|
||||
ran = data
|
||||
except Exception:
|
||||
pass
|
||||
return {"findings": findings, "agents_ran": ran}
|
||||
|
||||
|
||||
def run_engagement(cfg: RunConfig, recon: Optional[dict] = None,
|
||||
progress=lambda m: None) -> Dict:
|
||||
ensure_dirs()
|
||||
workdir = cfg.resolved_workdir()
|
||||
os.makedirs(workdir, exist_ok=True)
|
||||
|
||||
lib = AgentLibrary(PATHS["agents"])
|
||||
rl = RLEngine(PATHS["rl_state"])
|
||||
progress(f"Loaded {lib.counts()['total']} agents "
|
||||
f"({lib.counts()['vulns']} vuln / {lib.counts()['meta']} meta)")
|
||||
|
||||
backend = backends.get(cfg.backend)
|
||||
if not backend or not backend.available():
|
||||
avail = [b.key for b in backends.detect()]
|
||||
raise RuntimeError(f"Backend '{cfg.backend}' not available. Installed: {avail or 'none'}")
|
||||
|
||||
mcp_cfg = None
|
||||
if cfg.use_mcp and mcp.playwright_available():
|
||||
mcp_cfg = mcp.write_mcp_config(workdir)
|
||||
progress("Playwright MCP configured")
|
||||
elif cfg.use_mcp:
|
||||
progress("WARNING: npx not found — Playwright MCP disabled; browser-proof agents degraded")
|
||||
|
||||
prompt = compose_master_prompt(cfg, lib, rl, recon)
|
||||
env = models.resolve_env(cfg.provider, cfg.model)
|
||||
|
||||
progress(f"Launching {backend.label} ({cfg.model}) — autonomous={cfg.autonomous}")
|
||||
res = backends.run(backend, prompt, workdir, model=cfg.model,
|
||||
autonomous=cfg.autonomous, mcp_config=mcp_cfg, env=env,
|
||||
timeout=cfg.timeout, dry_run=cfg.dry_run)
|
||||
progress(f"Backend exited rc={res.returncode}; log: {res.log_path}")
|
||||
|
||||
out = collect_results(workdir)
|
||||
findings = out["findings"] or []
|
||||
ran = out["agents_ran"] or []
|
||||
progress(f"Collected {len(findings)} validated finding(s) from {len(ran)} agent(s)")
|
||||
|
||||
if cfg.use_rl and not cfg.dry_run:
|
||||
tech = ((recon or {}).get("tech", {}) or {}).get("framework", "") or None
|
||||
outcomes = outcomes_from_findings(findings, ran, tech=tech)
|
||||
rl.update(outcomes, target=cfg.target)
|
||||
rl.save()
|
||||
progress("RL state updated → data/rl_state.json")
|
||||
|
||||
return {"workdir": workdir, "returncode": res.returncode,
|
||||
"findings": findings, "agents_ran": ran, "log": res.log_path}
|
||||
@@ -0,0 +1,125 @@
|
||||
"""
|
||||
Reinforcement-learning engine for NeuroSploit v3.3.0.
|
||||
|
||||
A lightweight, persisted reward loop that biases agent selection across runs.
|
||||
It is deliberately model-free and explainable: each specialist agent carries a
|
||||
weight in [0.05, 1.0] plus per-tech-stack affinity, updated after every run from
|
||||
validated findings (positive reward) and rejected false positives (negative).
|
||||
|
||||
This mirrors `agents_md/meta/rl_feedback.md`: the markdown agent reasons about
|
||||
rewards qualitatively; this module applies them deterministically so the state
|
||||
file is reproducible and auditable.
|
||||
"""
|
||||
|
||||
import json
|
||||
import os
|
||||
from dataclasses import dataclass
|
||||
from typing import Dict, List, Optional
|
||||
|
||||
ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||
STATE_PATH = os.path.join(ROOT, "data", "rl_state.json")
|
||||
|
||||
SEVERITY_REWARD = {"critical": 1.0, "high": 0.7, "medium": 0.4, "low": 0.2, "info": 0.05}
|
||||
FP_PENALTY = 0.3
|
||||
IDLE_PENALTY = 0.05 # ran, found nothing, cost budget
|
||||
ALPHA = 0.3 # learning rate
|
||||
WMIN, WMAX = 0.05, 1.0
|
||||
|
||||
|
||||
@dataclass
|
||||
class Outcome:
|
||||
agent: str
|
||||
validated: List[str] # severities of validated findings
|
||||
false_positives: int = 0
|
||||
ran: bool = True
|
||||
skipped_correctly: bool = False
|
||||
tech: Optional[str] = None
|
||||
|
||||
|
||||
def _clamp(x: float) -> float:
|
||||
return max(WMIN, min(WMAX, x))
|
||||
|
||||
|
||||
class RLEngine:
|
||||
def __init__(self, path: str = STATE_PATH):
|
||||
self.path = path
|
||||
self.state = self._load()
|
||||
|
||||
def _load(self) -> dict:
|
||||
if os.path.exists(self.path):
|
||||
try:
|
||||
return json.load(open(self.path, encoding="utf-8"))
|
||||
except Exception:
|
||||
pass
|
||||
return {"version": 1, "agents": {}}
|
||||
|
||||
def weights(self) -> Dict[str, float]:
|
||||
return {name: rec.get("weight", 0.5) for name, rec in self.state.get("agents", {}).items()}
|
||||
|
||||
def weight(self, agent: str, tech: Optional[str] = None) -> float:
|
||||
rec = self.state.get("agents", {}).get(agent)
|
||||
if not rec:
|
||||
return 0.5
|
||||
w = rec.get("weight", 0.5)
|
||||
if tech:
|
||||
w = max(w, rec.get("tech_affinity", {}).get(tech, 0.0) or 0.0)
|
||||
return w
|
||||
|
||||
def reward(self, o: Outcome) -> float:
|
||||
if o.skipped_correctly:
|
||||
return 0.0
|
||||
if not o.ran:
|
||||
return 0.0
|
||||
r = sum(SEVERITY_REWARD.get(s.lower(), 0.2) for s in o.validated)
|
||||
r -= FP_PENALTY * o.false_positives
|
||||
if not o.validated and not o.false_positives:
|
||||
r -= IDLE_PENALTY
|
||||
return max(-1.0, min(1.0, r))
|
||||
|
||||
def update(self, outcomes: List[Outcome], target: str = "") -> dict:
|
||||
agents = self.state.setdefault("agents", {})
|
||||
for o in outcomes:
|
||||
rec = agents.setdefault(o.agent, {
|
||||
"weight": 0.5, "runs": 0, "validated_hits": 0,
|
||||
"false_positives": 0, "reward_last": 0.0, "tech_affinity": {},
|
||||
})
|
||||
r = self.reward(o)
|
||||
old = rec["weight"]
|
||||
rec["weight"] = _clamp(old + ALPHA * (r - old))
|
||||
rec["reward_last"] = round(r, 3)
|
||||
if o.ran and not o.skipped_correctly:
|
||||
rec["runs"] += 1
|
||||
rec["validated_hits"] += len(o.validated)
|
||||
rec["false_positives"] += o.false_positives
|
||||
if o.tech:
|
||||
ta = rec.setdefault("tech_affinity", {})
|
||||
ta[o.tech] = _clamp((ta.get(o.tech, 0.5)) + ALPHA * (r - ta.get(o.tech, 0.5)))
|
||||
self.state["updated_for"] = target
|
||||
return self.state
|
||||
|
||||
def save(self):
|
||||
os.makedirs(os.path.dirname(self.path), exist_ok=True)
|
||||
json.dump(self.state, open(self.path, "w", encoding="utf-8"), indent=2)
|
||||
|
||||
|
||||
def outcomes_from_findings(findings: List[dict], ran_agents: List[str],
|
||||
tech: Optional[str] = None) -> List[Outcome]:
|
||||
"""Build per-agent Outcomes from a run's findings + the agents that ran."""
|
||||
by_agent: Dict[str, Outcome] = {
|
||||
a: Outcome(agent=a, validated=[], false_positives=0, ran=True, tech=tech)
|
||||
for a in ran_agents
|
||||
}
|
||||
for f in findings:
|
||||
a = f.get("agent")
|
||||
if a not in by_agent:
|
||||
by_agent[a] = Outcome(agent=a, validated=[], false_positives=0, ran=True, tech=tech)
|
||||
if f.get("validated"):
|
||||
by_agent[a].validated.append(f.get("severity", "Low"))
|
||||
elif f.get("verdict") == "false_positive":
|
||||
by_agent[a].false_positives += 1
|
||||
return list(by_agent.values())
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
rl = RLEngine()
|
||||
print(json.dumps(rl.weights(), indent=2))
|
||||
Reference in New Issue
Block a user