NeuroSploit v3.3.0 — Autonomous MD-Agent Engine

Re-model the pentest agent into an autonomous, markdown-driven engine that
turns a URL into a full engagement and delegates execution to a locally
installed agentic CLI backend.

Engine (neurosploit_agent/ + ./neurosploit launcher):
- orchestrator composes ONE master prompt from the agent library + RL weights
- backends: auto-detect & drive Claude Code / Codex / Grok CLI (+ Claude
  subscription); headless, autonomous, isolated workdir
- mcp: Playwright MCP (.mcp.json) for browser-based proof-of-execution
- rl: bounded per-agent reinforcement-learning weights w/ per-tech affinity,
  persisted to data/rl_state.json
- models: latest registry incl. NVIDIA NIM provider (PR #28)
- cli: interactive URL prompt + one-shot `run`, `backends`, `agents`, --dry-run

Agent library (agents_md/, 213 total):
- 196 vuln specialists incl. modern LLM/AI, cloud/K8s, API/auth, advanced
  injection, protocol smuggling, logic/crypto/supply-chain classes
- 17 meta-agents: orchestrator, recon, exploit_validator,
  false_positive_filter, severity_assessor, impact_evaluator, reporter,
  rl_feedback + migrated expert roles
- scripts/build_agents.py data-driven builder; REGISTRY.md index

Docs: rewritten README.md, v3.3.0 RELEASE.md, .env.example (NVIDIA NIM, xAI,
engine vars).

Retire legacy Python orchestration (neurosploit.py + agent classes) to legacy/.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
CyberSecurityUP
2026-06-14 20:57:38 -03:00
parent 59f8f42d80
commit 55af0d4634
248 changed files with 18707 additions and 577 deletions
+15
View File
@@ -0,0 +1,15 @@
"""
NeuroSploit v3.3.0 — Autonomous MD-Agent Engine.
A lean orchestration layer that turns a URL into an autonomous penetration test:
it composes a master prompt from the curated `agents_md/` markdown library and
hands execution to a locally-installed agentic CLI backend (Claude Code, Codex,
or Grok CLI), augmented with Playwright MCP, and learns across runs via a
reinforcement-learning reward loop.
This package replaces the legacy Python orchestration (`neurosploit.py` + heavy
`core/` agents), which now lives under `legacy/` for reference.
"""
__version__ = "3.3.0"
__all__ = ["__version__"]
+165
View File
@@ -0,0 +1,165 @@
"""
Agent loader for NeuroSploit v3.3.0.
Discovers and parses the curated `agents_md/` markdown library, builds a
searchable index, and produces an RL-weighted, recon-aware ordering of which
specialist agents the orchestrator should run for a given target.
"""
import os
import re
from dataclasses import dataclass, field
from typing import Dict, List, Optional
ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
AGENTS_DIR = os.path.join(ROOT, "agents_md")
# Recon-signal → keyword hints used to pre-select agents. The CLI backend does
# the final, intelligent selection; this just narrows the candidate set so we
# do not dump 200 playbooks into every prompt.
SIGNAL_HINTS: Dict[str, List[str]] = {
"graphql": ["graphql"],
"jwt": ["jwt"],
"oauth": ["oauth", "oidc", "saml"],
"ai_features": ["llm_", "prompt_injection", "ai_", "vector_db", "ml_model", "rag"],
"cloud": ["aws_", "gcp_", "azure_", "s3_", "gcs_", "cloud_", "imds", "metadata", "terraform", "ecr", "helm", "serverless", "k8s", "kubelet", "docker_socket", "container_escape"],
"rest": ["api_", "rest_", "mass_assignment", "bola", "bfla", "idor"],
"ws": ["websocket", "ws_"],
"upload": ["file_upload", "zip_slip", "xxe", "deserial", "pickle", "yaml"],
"template": ["ssti", "csti", "template", "ssi", "esi"],
"cache_proxy": ["cache", "smuggl", "desync", "h2c", "hop_by_hop", "proxy", "response_splitting"],
}
@dataclass
class Agent:
name: str
path: str
title: str
kind: str # "vuln" | "meta"
user_prompt: str = ""
system_prompt: str = ""
cwe: str = ""
severity: str = ""
tags: List[str] = field(default_factory=list)
def _parse(path: str, kind: str) -> Agent:
text = open(path, encoding="utf-8", errors="replace").read()
name = os.path.splitext(os.path.basename(path))[0]
title_m = re.search(r"^#\s+(.+?)\s*$", text, re.M)
title = title_m.group(1).strip() if title_m else name
up = re.search(r"##\s*User Prompt\s*\n(.*?)(?=\n##\s|\Z)", text, re.S)
sp = re.search(r"##\s*System Prompt\s*\n(.*?)(?=\n##\s|\Z)", text, re.S)
cwe_m = re.search(r"(CWE-\d+)", text)
sev_m = re.search(r"Severity:\s*([A-Za-z]+)", text)
return Agent(
name=name, path=path, title=title, kind=kind,
user_prompt=(up.group(1).strip() if up else ""),
system_prompt=(sp.group(1).strip() if sp else ""),
cwe=(cwe_m.group(1) if cwe_m else ""),
severity=(sev_m.group(1) if sev_m else ""),
tags=[name],
)
class AgentLibrary:
def __init__(self, base: str = AGENTS_DIR):
self.base = base
self.vulns: Dict[str, Agent] = {}
self.meta: Dict[str, Agent] = {}
self._load()
def _load(self):
vdir, mdir = os.path.join(self.base, "vulns"), os.path.join(self.base, "meta")
for d, kind, store in ((vdir, "vuln", self.vulns), (mdir, "meta", self.meta)):
if not os.path.isdir(d):
continue
for fn in sorted(os.listdir(d)):
if fn.endswith(".md"):
a = _parse(os.path.join(d, fn), kind)
store[a.name] = a
# -- selection ---------------------------------------------------------
def candidates_for(self, recon: Optional[dict]) -> List[str]:
"""Return vuln-agent names whose preconditions plausibly match recon.
With no recon (or a generic target) we return all vuln agents and let
the backend prioritise. With recon signals we narrow to the relevant
subset plus a baseline of always-run web agents.
"""
if not recon:
return list(self.vulns.keys())
wanted: set = set()
signals = _signals_from_recon(recon)
for sig in signals:
for kw in SIGNAL_HINTS.get(sig, []):
wanted.update(n for n in self.vulns if kw in n)
# Always include core web classes regardless of recon.
baseline = ["xss_reflected", "xss_stored", "xss_dom", "sqli_error", "sqli_blind",
"ssrf", "idor", "csrf", "open_redirect", "command_injection",
"lfi", "path_traversal", "auth_bypass", "security_headers",
"information_disclosure", "cors_misconfig"]
wanted.update(n for n in baseline if n in self.vulns)
return sorted(wanted) if wanted else list(self.vulns.keys())
def ranked(self, recon: Optional[dict], weights: Dict[str, float]) -> List[str]:
cands = self.candidates_for(recon)
return sorted(cands, key=lambda n: weights.get(n, 0.5), reverse=True)
def index_markdown(self, names: List[str], weights: Dict[str, float]) -> str:
"""Compact catalog (name — title — CWE — weight) for the master prompt."""
rows = []
for n in names:
a = self.vulns.get(n)
if not a:
continue
rows.append(f"- `{n}` — {a.title} [{a.cwe or 'CWE-?'}] (priority {weights.get(n, 0.5):.2f})")
return "\n".join(rows)
def render(self, name: str, target: str, recon_json: str = "{}", collaborator: str = "") -> str:
a = self.vulns.get(name) or self.meta.get(name)
if not a:
raise KeyError(name)
body = open(a.path, encoding="utf-8", errors="replace").read()
return (body.replace("{target}", target)
.replace("{recon_json}", recon_json)
.replace("{collaborator}", collaborator))
def counts(self) -> Dict[str, int]:
return {"vulns": len(self.vulns), "meta": len(self.meta),
"total": len(self.vulns) + len(self.meta)}
def _signals_from_recon(recon: dict) -> List[str]:
sigs: List[str] = []
apis = recon.get("apis", {}) or {}
if apis.get("graphql"):
sigs.append("graphql")
if apis.get("rest"):
sigs.append("rest")
if apis.get("ws"):
sigs.append("ws")
if recon.get("ai_features"):
sigs.append("ai_features")
if (recon.get("cloud", {}) or {}).get("provider") or (recon.get("cloud", {}) or {}).get("metadata_surface"):
sigs.append("cloud")
auth = recon.get("auth", {}) or {}
if auth.get("session") == "jwt":
sigs.append("jwt")
if auth.get("oauth"):
sigs.append("oauth")
tech = recon.get("tech", {}) or {}
blob = " ".join(str(v) for v in tech.values()).lower()
if any(t in blob for t in ("flask", "jinja", "twig", "freemarker", "velocity", "thymeleaf")):
sigs.append("template")
if tech.get("waf") or tech.get("http2"):
sigs.append("cache_proxy")
# Generic surfaces always worth a look.
sigs += ["upload", "cache_proxy"]
return list(dict.fromkeys(sigs))
if __name__ == "__main__":
lib = AgentLibrary()
print(lib.counts())
+147
View File
@@ -0,0 +1,147 @@
"""
Agentic CLI backends for NeuroSploit v3.3.0.
NeuroSploit does not embed its own agent loop — it delegates autonomous
execution to whichever agentic coding CLI is installed locally:
* Claude Code (`claude`) — also the path for a Claude *subscription*
* Codex CLI (`codex`)
* Grok CLI (`grok`)
Each backend is driven headlessly: we pass the composed master prompt, a working
directory (with `.mcp.json` for Playwright), and provider env, and let the CLI
run the test autonomously to completion. The engine then reads the artifacts the
run wrote to `results/`.
"""
import os
import shutil
import subprocess
from dataclasses import dataclass, field
from typing import Dict, List, Optional
@dataclass
class Backend:
key: str
label: str
binary: str
# builds argv given (prompt_file, workdir, model). Prompt is passed via file
# to avoid arg-length limits and shell-escaping issues.
def available(self) -> bool:
return shutil.which(self.binary) is not None
def version(self) -> str:
try:
out = subprocess.run([self.binary, "--version"], capture_output=True,
text=True, timeout=15)
return (out.stdout or out.stderr).strip().splitlines()[0] if (out.stdout or out.stderr) else "?"
except Exception:
return "?"
def build_argv(self, prompt_file: str, workdir: str, model: str,
autonomous: bool, mcp_config: Optional[str]) -> List[str]:
raise NotImplementedError
@dataclass
class ClaudeBackend(Backend):
def build_argv(self, prompt_file, workdir, model, autonomous, mcp_config):
# Headless "print" mode reads the prompt from stdin (caller pipes the file).
argv = [self.binary, "-p", "--output-format", "stream-json", "--verbose"]
if model:
argv += ["--model", model]
if mcp_config:
argv += ["--mcp-config", mcp_config]
if autonomous:
# Full autonomy for an authorized engagement in an isolated workdir.
argv += ["--dangerously-skip-permissions"]
return argv
stdin_prompt: bool = True
@dataclass
class CodexBackend(Backend):
def build_argv(self, prompt_file, workdir, model, autonomous, mcp_config):
# `codex exec` runs non-interactively to completion.
argv = [self.binary, "exec", "--cd", workdir]
if model:
argv += ["--model", model]
if autonomous:
argv += ["--dangerously-bypass-approvals-and-sandbox"]
if mcp_config:
argv += ["--config", f"mcp_config_file={mcp_config}"]
argv += ["-"] # read prompt from stdin
return argv
stdin_prompt: bool = True
@dataclass
class GrokBackend(Backend):
def build_argv(self, prompt_file, workdir, model, autonomous, mcp_config):
# grok-cli headless/print form.
argv = [self.binary, "--prompt-file", prompt_file, "--workdir", workdir]
if model:
argv += ["--model", model]
if mcp_config:
argv += ["--mcp-config", mcp_config]
if autonomous:
argv += ["--yolo"]
return argv
stdin_prompt: bool = False
REGISTRY: Dict[str, Backend] = {
"claude": ClaudeBackend("claude", "Claude Code", "claude"),
"codex": CodexBackend("codex", "Codex CLI", "codex"),
"grok": GrokBackend("grok", "Grok CLI", "grok"),
}
def detect() -> List[Backend]:
"""Return installed backends, in preference order."""
order = ["claude", "codex", "grok"]
return [REGISTRY[k] for k in order if REGISTRY[k].available()]
def get(key: str) -> Optional[Backend]:
return REGISTRY.get(key)
@dataclass
class RunResult:
backend: str
returncode: int
log_path: str
workdir: str
def run(backend: Backend, prompt: str, workdir: str, model: str = "",
autonomous: bool = True, mcp_config: Optional[str] = None,
env: Optional[Dict[str, str]] = None, timeout: int = 7200,
dry_run: bool = False) -> RunResult:
"""Execute a backend against the composed prompt and stream logs to disk."""
os.makedirs(workdir, exist_ok=True)
prompt_file = os.path.join(workdir, "master_prompt.md")
open(prompt_file, "w", encoding="utf-8").write(prompt)
log_path = os.path.join(workdir, "backend.log")
argv = backend.build_argv(prompt_file, workdir, model, autonomous, mcp_config)
full_env = os.environ.copy()
if env:
full_env.update(env)
if dry_run:
open(log_path, "w").write("DRY RUN\n" + " ".join(argv) + "\n")
return RunResult(backend.key, 0, log_path, workdir)
stdin_data = prompt if getattr(backend, "stdin_prompt", False) else None
with open(log_path, "w", encoding="utf-8") as logf:
proc = subprocess.run(
argv, input=stdin_data, stdout=logf, stderr=subprocess.STDOUT,
cwd=workdir, env=full_env, text=True, timeout=timeout,
)
return RunResult(backend.key, proc.returncode, log_path, workdir)
+163
View File
@@ -0,0 +1,163 @@
"""
NeuroSploit v3.3.0 — terminal launcher.
Two ways in:
neurosploit # interactive: prompts for URL + choices
neurosploit run https://t.example --backend claude --model claude-opus-4-8
The interactive flow asks for a URL, lets you pick from the agentic CLI backends
actually installed on this machine (Claude Code / Codex / Grok, or a Claude
subscription), picks a model, then launches the autonomous engagement.
"""
import argparse
import sys
from . import backends, models
from .config import RunConfig
from .orchestrator import run_engagement
BANNER = r"""
_ _ ____ _ _ _
| \ | | ___ _ _ _ __ ___ / ___|_ __ | | ___ (_) |_
| \| |/ _ \ | | | '__/ _ \\___ \| '_ \| |/ _ \| | __|
| |\ | __/ |_| | | | (_) |___) | |_) | | (_) | | |_
|_| \_|\___|\__,_|_| \___/|____/| .__/|_|\___/|_|\__|
v3.3.0 Autonomous MD-Agent Engine
|_|
"""
def _progress(msg: str):
print(f" [*] {msg}", flush=True)
def _choose(prompt, options, default_idx=0):
for i, (key, label) in enumerate(options):
mark = "*" if i == default_idx else " "
print(f" {mark} {i + 1}) {label}")
raw = input(f"{prompt} [{default_idx + 1}]: ").strip()
if not raw:
return options[default_idx][0]
try:
return options[int(raw) - 1][0]
except (ValueError, IndexError):
print(" invalid choice, using default")
return options[default_idx][0]
def interactive() -> int:
print(BANNER)
installed = backends.detect()
if not installed:
print(" [!] No agentic CLI backend found (claude / codex / grok).")
print(" Install one: Claude Code, Codex CLI, or Grok CLI, then re-run.")
return 2
print(f" Detected backends: {', '.join(b.label + ' (' + b.version() + ')' for b in installed)}\n")
target = input(" Target URL: ").strip()
if not target:
print(" [!] A target URL is required.")
return 2
if not target.startswith(("http://", "https://")):
target = "https://" + target
scope = input(f" In-scope hosts [default: {target}]: ").strip() or target
collaborator = input(" OOB collaborator host (optional, for blind/SSRF proof): ").strip()
backend_key = _choose(" Choose backend", [(b.key, f"{b.label} [{b.version()}]") for b in installed])
# Provider/model: map backend → sensible provider, then pick a model.
prov_for_backend = {"claude": "anthropic", "codex": "openai", "grok": "xai"}
provider = prov_for_backend.get(backend_key, "anthropic")
sub = input(" Use Claude subscription (login) instead of an API key? [y/N]: ").strip().lower()
if sub == "y" and backend_key == "claude":
provider = "claude_subscription"
model_opts = [(m.id, f"{m.label} ({m.context // 1000}k ctx) {m.notes}")
for m in models.list_models(provider)] or [("", "backend default")]
model = _choose(" Choose model", model_opts)
cfg = RunConfig(target=target, scope=scope, backend=backend_key,
provider=provider, model=model, collaborator=collaborator)
print()
_progress(f"Starting autonomous engagement against {target}")
result = run_engagement(cfg, progress=_progress)
_summary(result)
return 0 if result["returncode"] == 0 else 1
def _summary(result):
print("\n ── Engagement complete ─────────────────────────────")
print(f" Workdir : {result['workdir']}")
print(f" Findings: {len(result['findings'])} validated")
by_sev = {}
for f in result["findings"]:
by_sev[f.get("severity", "?")] = by_sev.get(f.get("severity", "?"), 0) + 1
if by_sev:
print(" By severity: " + ", ".join(f"{k}={v}" for k, v in by_sev.items()))
print(f" Report : reports/ | Raw: {result['workdir']}/findings.json")
print(" ────────────────────────────────────────────────────")
def main(argv=None) -> int:
parser = argparse.ArgumentParser(prog="neurosploit",
description="NeuroSploit v3.3.0 autonomous MD-agent pentest engine")
sub = parser.add_subparsers(dest="cmd")
r = sub.add_parser("run", help="run an engagement against a URL")
r.add_argument("url")
r.add_argument("--backend", default=None, help="claude | codex | grok (default: first installed)")
r.add_argument("--provider", default=None)
r.add_argument("--model", default=None)
r.add_argument("--scope", default="")
r.add_argument("--collaborator", default="")
r.add_argument("--no-rl", action="store_true")
r.add_argument("--no-mcp", action="store_true")
r.add_argument("--max-agents", type=int, default=0)
r.add_argument("--dry-run", action="store_true", help="compose prompt + show command without executing the backend")
sub.add_parser("backends", help="list detected CLI backends")
sub.add_parser("agents", help="show agent library counts")
args = parser.parse_args(argv)
if args.cmd is None:
try:
return interactive()
except (KeyboardInterrupt, EOFError):
print("\n aborted.")
return 130
if args.cmd == "backends":
for b in backends.detect():
print(f" {b.key:8} {b.label:14} {b.version()}")
if not backends.detect():
print(" none installed (claude / codex / grok)")
return 0
if args.cmd == "agents":
from .agent_loader import AgentLibrary
print(AgentLibrary().counts())
return 0
if args.cmd == "run":
url = args.url if args.url.startswith(("http://", "https://")) else "https://" + args.url
backend = args.backend or (backends.detect()[0].key if backends.detect() else "claude")
prov_for_backend = {"claude": "anthropic", "codex": "openai", "grok": "xai"}
provider = args.provider or prov_for_backend.get(backend, "anthropic")
model = args.model or (models.list_models(provider)[0].id if models.list_models(provider) else "")
cfg = RunConfig(target=url, scope=args.scope or url, backend=backend,
provider=provider, model=model, collaborator=args.collaborator,
use_rl=not args.no_rl, use_mcp=not args.no_mcp,
max_agents=args.max_agents, dry_run=args.dry_run)
print(BANNER)
result = run_engagement(cfg, progress=_progress)
_summary(result)
return 0 if result["returncode"] == 0 else 1
parser.print_help()
return 0
if __name__ == "__main__":
sys.exit(main())
+52
View File
@@ -0,0 +1,52 @@
"""Configuration & paths for NeuroSploit v3.3.0."""
import os
from dataclasses import dataclass, field
from typing import Optional
ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
def _p(*parts) -> str:
return os.path.join(ROOT, *parts)
@dataclass
class RunConfig:
target: str
scope: str = ""
rules_of_engagement: str = "Authorized, non-destructive testing only. No DoS unless explicitly permitted. Stay strictly in scope."
backend: str = "claude" # claude | codex | grok
provider: str = "anthropic" # see models.PROVIDERS
model: str = "claude-opus-4-8"
autonomous: bool = True
collaborator: str = "" # OOB callback host for blind vuln proof
use_rl: bool = True
use_mcp: bool = True
max_agents: int = 0 # 0 = no cap (backend prioritises)
timeout: int = 7200
dry_run: bool = False
workdir: str = field(default="")
def resolved_workdir(self) -> str:
return self.workdir or _p("results", _slug(self.target))
def _slug(url: str) -> str:
s = url.replace("https://", "").replace("http://", "")
return "".join(c if c.isalnum() else "_" for c in s).strip("_")[:60] or "target"
PATHS = {
"agents": _p("agents_md"),
"results": _p("results"),
"reports": _p("reports"),
"data": _p("data"),
"logs": _p("logs"),
"rl_state": _p("data", "rl_state.json"),
}
def ensure_dirs():
for k in ("results", "reports", "data", "logs"):
os.makedirs(PATHS[k], exist_ok=True)
+49
View File
@@ -0,0 +1,49 @@
"""
MCP bridge for NeuroSploit v3.3.0.
Generates the MCP server configuration the agentic CLI backend loads so the
autonomous run can drive a real browser (Playwright) and any extra MCP tooling.
Playwright lets agents render SPAs, execute JS, capture DOM/network/screenshots
and confirm client-side execution (XSS/CSTI) — turning "the payload reflected"
into "the payload executed", which is what the validator agents demand.
"""
import json
import os
import shutil
from typing import Dict
ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
def playwright_server() -> Dict:
"""Prefer a local @playwright/mcp; fall back to npx on demand."""
return {
"command": "npx",
"args": ["-y", "@playwright/mcp@latest", "--headless", "--isolated"],
}
def build_mcp_config(extra: Dict[str, Dict] | None = None) -> Dict:
servers = {"playwright": playwright_server()}
if extra:
servers.update(extra)
return {"mcpServers": servers}
def write_mcp_config(workdir: str, extra: Dict[str, Dict] | None = None) -> str:
"""Write a `.mcp.json` into the run workdir and return its path.
Claude Code auto-loads `.mcp.json` from the working directory; Codex/Grok
accept an explicit config path (see backends.py).
"""
cfg = build_mcp_config(extra)
path = os.path.join(workdir, ".mcp.json")
os.makedirs(workdir, exist_ok=True)
json.dump(cfg, open(path, "w", encoding="utf-8"), indent=2)
return path
def playwright_available() -> bool:
"""Best-effort check that Playwright MCP can be launched."""
return shutil.which("npx") is not None
+141
View File
@@ -0,0 +1,141 @@
"""
Model registry for NeuroSploit v3.3.0.
Maps logical providers to their latest models and the env vars / base URLs the
agentic CLI backends need. Includes the NVIDIA NIM provider added in PR #28.
The engine itself does not call these APIs directly — the chosen CLI backend
(Claude Code / Codex / Grok) does. This registry is what the launcher uses to
present choices and to export the right environment to the backend process.
"""
from dataclasses import dataclass, field
from typing import Dict, List, Optional
@dataclass(frozen=True)
class Model:
id: str
label: str
context: int = 200_000
notes: str = ""
@dataclass(frozen=True)
class Provider:
key: str
label: str
env_keys: List[str] # accepted API-key env var names
base_url: Optional[str] = None # OpenAI-compatible base URL, if any
base_url_env: Optional[str] = None # env var the backend reads for base URL
models: List[Model] = field(default_factory=list)
subscription: bool = False # uses a CLI subscription rather than an API key
PROVIDERS: Dict[str, Provider] = {
# --- Anthropic (latest Claude family; default) -------------------------
"anthropic": Provider(
key="anthropic", label="Anthropic Claude",
env_keys=["ANTHROPIC_API_KEY"],
models=[
Model("claude-opus-4-8", "Claude Opus 4.8", 1_000_000, "Most capable; deep multi-step pentest reasoning"),
Model("claude-sonnet-4-6", "Claude Sonnet 4.6", 1_000_000, "Balanced cost/quality default"),
Model("claude-haiku-4-5", "Claude Haiku 4.5", 200_000, "Fast/cheap recon and triage"),
],
),
# --- OpenAI ------------------------------------------------------------
"openai": Provider(
key="openai", label="OpenAI",
env_keys=["OPENAI_API_KEY"],
models=[
Model("gpt-5.1", "GPT-5.1", 400_000, "Strong general reasoning"),
Model("o4", "o4", 200_000, "Deliberate reasoning for validation"),
],
),
# --- xAI Grok ----------------------------------------------------------
"xai": Provider(
key="xai", label="xAI Grok",
env_keys=["XAI_API_KEY", "GROK_API_KEY"],
base_url="https://api.x.ai/v1", base_url_env="OPENAI_BASE_URL",
models=[
Model("grok-4", "Grok 4", 256_000, "Fast agentic execution"),
Model("grok-4-fast", "Grok 4 Fast", 128_000, "Low-latency triage"),
],
),
# --- NVIDIA NIM (PR #28) ----------------------------------------------
# OpenAI-compatible endpoint at integrate.api.nvidia.com; keys are `nvapi-...`.
"nvidia_nim": Provider(
key="nvidia_nim", label="NVIDIA NIM",
env_keys=["NVIDIA_NIM_API_KEY", "NVIDIA_API_KEY"],
base_url="https://integrate.api.nvidia.com/v1", base_url_env="OPENAI_BASE_URL",
models=[
Model("nvidia/llama-3.3-nemotron-super-49b-v1", "Nemotron Super 49B", 128_000, "NIM hosted reasoning"),
Model("deepseek-ai/deepseek-r1", "DeepSeek-R1 (NIM)", 128_000, "Strong reasoning via NIM"),
Model("qwen/qwen2.5-coder-32b-instruct", "Qwen2.5 Coder 32B (NIM)", 128_000, "Code/exploit oriented"),
],
),
# --- Google Gemini -----------------------------------------------------
"gemini": Provider(
key="gemini", label="Google Gemini",
env_keys=["GEMINI_API_KEY", "GOOGLE_API_KEY"],
models=[
Model("gemini-2.5-pro", "Gemini 2.5 Pro", 1_000_000, "Large context recon"),
Model("gemini-2.5-flash", "Gemini 2.5 Flash", 1_000_000, "Fast/cheap"),
],
),
# --- OpenRouter (aggregator) ------------------------------------------
"openrouter": Provider(
key="openrouter", label="OpenRouter",
env_keys=["OPENROUTER_API_KEY"],
base_url="https://openrouter.ai/api/v1", base_url_env="OPENAI_BASE_URL",
models=[Model("anthropic/claude-opus-4-8", "Opus 4.8 (OpenRouter)", 1_000_000)],
),
# --- Local Ollama ------------------------------------------------------
"ollama": Provider(
key="ollama", label="Ollama (local)",
env_keys=[],
base_url="http://localhost:11434/v1", base_url_env="OPENAI_BASE_URL",
models=[
Model("qwen2.5-coder:32b", "Qwen2.5 Coder 32B (local)", 32_000),
Model("llama3.3:70b", "Llama 3.3 70B (local)", 128_000),
],
),
# --- Subscription via Claude Code CLI (no API key needed) -------------
"claude_subscription": Provider(
key="claude_subscription", label="Claude subscription (via Claude Code login)",
env_keys=[], subscription=True,
models=[
Model("claude-opus-4-8", "Claude Opus 4.8 (subscription)", 1_000_000),
Model("claude-sonnet-4-6", "Claude Sonnet 4.6 (subscription)", 1_000_000),
],
),
}
DEFAULT_PROVIDER = "anthropic"
def get_provider(key: str) -> Optional[Provider]:
return PROVIDERS.get(key)
def list_models(provider_key: str) -> List[Model]:
p = PROVIDERS.get(provider_key)
return list(p.models) if p else []
def resolve_env(provider_key: str, model_id: str) -> Dict[str, str]:
"""Return the env vars a backend needs for this provider/model selection."""
import os
env: Dict[str, str] = {}
p = PROVIDERS.get(provider_key)
if not p:
return env
if p.base_url and p.base_url_env:
env[p.base_url_env] = p.base_url
for k in p.env_keys:
if os.getenv(k):
env[k] = os.environ[k]
break
env["NEUROSPLOIT_MODEL"] = model_id
env["NEUROSPLOIT_PROVIDER"] = provider_key
return env
+150
View File
@@ -0,0 +1,150 @@
"""
Orchestrator for NeuroSploit v3.3.0.
Ties the pieces together: load the agent library, apply RL weights to pick and
rank specialist agents for the target, compose the single master prompt (the
`meta/orchestrator` playbook + the recon-aware agent catalog + the operating
contract), hand it to the chosen CLI backend with Playwright MCP, then read back
artifacts and feed the RL loop.
"""
import json
import os
from typing import Dict, List, Optional
from . import backends, mcp, models
from .agent_loader import AgentLibrary
from .config import RunConfig, PATHS, ensure_dirs
from .rl import RLEngine, outcomes_from_findings
def compose_master_prompt(cfg: RunConfig, lib: AgentLibrary, rl: RLEngine,
recon: Optional[dict]) -> str:
weights = rl.weights() if cfg.use_rl else {}
ranked = lib.ranked(recon, weights)
if cfg.max_agents > 0:
ranked = ranked[:cfg.max_agents]
agent_index = lib.index_markdown(ranked, weights)
rl_weights_txt = json.dumps({n: round(weights.get(n, 0.5), 2) for n in ranked[:40]}, indent=0)
orch = lib.render("orchestrator", cfg.target,
recon_json=json.dumps(recon or {}), collaborator=cfg.collaborator) \
if "orchestrator" in lib.meta else ""
header = f"""# NeuroSploit v3.3.0 — Autonomous Engagement
You are running an AUTHORIZED, autonomous web penetration test.
TARGET: {cfg.target}
SCOPE: {cfg.scope or cfg.target}
RULES OF ENGAGEMENT: {cfg.rules_of_engagement}
OOB COLLABORATOR: {cfg.collaborator or '(none provided — skip OOB-only confirmations)'}
WORKDIR: {cfg.resolved_workdir()}
You have Playwright MCP (browser automation, JS execution, DOM/network capture,
screenshots) and local shell tools. Use the browser to PROVE client-side
execution; use the collaborator to PROVE blind/OOB issues.
## Specialist agent library
The `agents_md/` directory holds {lib.counts()['vulns']} vulnerability playbooks
and {lib.counts()['meta']} meta playbooks. For each specialist you choose to run,
open its file under `agents_md/vulns/<name>.md`, substitute the target and recon,
and follow its methodology and (strict) anti-false-positive System Prompt.
### Recon-ranked candidate agents (by RL priority)
{agent_index}
### RL priors (higher = historically more productive on similar targets)
{rl_weights_txt}
"""
contract = f"""
## Required pipeline (follow in order)
1. Run `agents_md/meta/recon.md` → write `results/recon.json`.
2. Re-rank the candidate agents above using recon + RL priors; skip agents with
no applicable surface.
3. Execute each selected specialist; gather candidate findings WITH evidence.
4. For every candidate: `meta/exploit_validator.md` → `meta/false_positive_filter.md`.
Discard anything not reproducibly exploitable.
5. Score survivors: `meta/severity_assessor.md` then `meta/impact_evaluator.md`.
6. `meta/reporter.md` → write `results/findings.json` AND `reports/report.md`.
7. `meta/rl_feedback.md` → write/merge `data/rl_state.json`.
## Output contract (MANDATORY)
Write `results/findings.json` as a JSON array of objects:
{{"id","agent","title","severity","cvss","cwe","endpoint","payload","evidence","impact","remediation","confidence","validated"}}
Only include findings with `validated: true`. If you find nothing, write `[]`.
Also write `results/agents_ran.json` as a JSON array of the agent names you executed.
Stay strictly in scope. Never run destructive/DoS payloads unless ROE permits.
Report ONLY proven, reproducible findings.
"""
return "\n".join(x for x in (header, orch, contract) if x.strip())
def collect_results(workdir: str) -> Dict:
findings, ran = [], []
fpath = os.path.join(workdir, "findings.json")
rpath = os.path.join(workdir, "agents_ran.json")
# The backend may write under results/<slug>/ or results/ — check both.
for base in (workdir, PATHS["results"]):
for name, sink in (("findings.json", "findings"), ("agents_ran.json", "ran")):
p = os.path.join(base, name)
if os.path.exists(p):
try:
data = json.load(open(p, encoding="utf-8"))
if sink == "findings" and not findings:
findings = data
elif sink == "ran" and not ran:
ran = data
except Exception:
pass
return {"findings": findings, "agents_ran": ran}
def run_engagement(cfg: RunConfig, recon: Optional[dict] = None,
progress=lambda m: None) -> Dict:
ensure_dirs()
workdir = cfg.resolved_workdir()
os.makedirs(workdir, exist_ok=True)
lib = AgentLibrary(PATHS["agents"])
rl = RLEngine(PATHS["rl_state"])
progress(f"Loaded {lib.counts()['total']} agents "
f"({lib.counts()['vulns']} vuln / {lib.counts()['meta']} meta)")
backend = backends.get(cfg.backend)
if not backend or not backend.available():
avail = [b.key for b in backends.detect()]
raise RuntimeError(f"Backend '{cfg.backend}' not available. Installed: {avail or 'none'}")
mcp_cfg = None
if cfg.use_mcp and mcp.playwright_available():
mcp_cfg = mcp.write_mcp_config(workdir)
progress("Playwright MCP configured")
elif cfg.use_mcp:
progress("WARNING: npx not found — Playwright MCP disabled; browser-proof agents degraded")
prompt = compose_master_prompt(cfg, lib, rl, recon)
env = models.resolve_env(cfg.provider, cfg.model)
progress(f"Launching {backend.label} ({cfg.model}) — autonomous={cfg.autonomous}")
res = backends.run(backend, prompt, workdir, model=cfg.model,
autonomous=cfg.autonomous, mcp_config=mcp_cfg, env=env,
timeout=cfg.timeout, dry_run=cfg.dry_run)
progress(f"Backend exited rc={res.returncode}; log: {res.log_path}")
out = collect_results(workdir)
findings = out["findings"] or []
ran = out["agents_ran"] or []
progress(f"Collected {len(findings)} validated finding(s) from {len(ran)} agent(s)")
if cfg.use_rl and not cfg.dry_run:
tech = ((recon or {}).get("tech", {}) or {}).get("framework", "") or None
outcomes = outcomes_from_findings(findings, ran, tech=tech)
rl.update(outcomes, target=cfg.target)
rl.save()
progress("RL state updated → data/rl_state.json")
return {"workdir": workdir, "returncode": res.returncode,
"findings": findings, "agents_ran": ran, "log": res.log_path}
+125
View File
@@ -0,0 +1,125 @@
"""
Reinforcement-learning engine for NeuroSploit v3.3.0.
A lightweight, persisted reward loop that biases agent selection across runs.
It is deliberately model-free and explainable: each specialist agent carries a
weight in [0.05, 1.0] plus per-tech-stack affinity, updated after every run from
validated findings (positive reward) and rejected false positives (negative).
This mirrors `agents_md/meta/rl_feedback.md`: the markdown agent reasons about
rewards qualitatively; this module applies them deterministically so the state
file is reproducible and auditable.
"""
import json
import os
from dataclasses import dataclass
from typing import Dict, List, Optional
ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
STATE_PATH = os.path.join(ROOT, "data", "rl_state.json")
SEVERITY_REWARD = {"critical": 1.0, "high": 0.7, "medium": 0.4, "low": 0.2, "info": 0.05}
FP_PENALTY = 0.3
IDLE_PENALTY = 0.05 # ran, found nothing, cost budget
ALPHA = 0.3 # learning rate
WMIN, WMAX = 0.05, 1.0
@dataclass
class Outcome:
agent: str
validated: List[str] # severities of validated findings
false_positives: int = 0
ran: bool = True
skipped_correctly: bool = False
tech: Optional[str] = None
def _clamp(x: float) -> float:
return max(WMIN, min(WMAX, x))
class RLEngine:
def __init__(self, path: str = STATE_PATH):
self.path = path
self.state = self._load()
def _load(self) -> dict:
if os.path.exists(self.path):
try:
return json.load(open(self.path, encoding="utf-8"))
except Exception:
pass
return {"version": 1, "agents": {}}
def weights(self) -> Dict[str, float]:
return {name: rec.get("weight", 0.5) for name, rec in self.state.get("agents", {}).items()}
def weight(self, agent: str, tech: Optional[str] = None) -> float:
rec = self.state.get("agents", {}).get(agent)
if not rec:
return 0.5
w = rec.get("weight", 0.5)
if tech:
w = max(w, rec.get("tech_affinity", {}).get(tech, 0.0) or 0.0)
return w
def reward(self, o: Outcome) -> float:
if o.skipped_correctly:
return 0.0
if not o.ran:
return 0.0
r = sum(SEVERITY_REWARD.get(s.lower(), 0.2) for s in o.validated)
r -= FP_PENALTY * o.false_positives
if not o.validated and not o.false_positives:
r -= IDLE_PENALTY
return max(-1.0, min(1.0, r))
def update(self, outcomes: List[Outcome], target: str = "") -> dict:
agents = self.state.setdefault("agents", {})
for o in outcomes:
rec = agents.setdefault(o.agent, {
"weight": 0.5, "runs": 0, "validated_hits": 0,
"false_positives": 0, "reward_last": 0.0, "tech_affinity": {},
})
r = self.reward(o)
old = rec["weight"]
rec["weight"] = _clamp(old + ALPHA * (r - old))
rec["reward_last"] = round(r, 3)
if o.ran and not o.skipped_correctly:
rec["runs"] += 1
rec["validated_hits"] += len(o.validated)
rec["false_positives"] += o.false_positives
if o.tech:
ta = rec.setdefault("tech_affinity", {})
ta[o.tech] = _clamp((ta.get(o.tech, 0.5)) + ALPHA * (r - ta.get(o.tech, 0.5)))
self.state["updated_for"] = target
return self.state
def save(self):
os.makedirs(os.path.dirname(self.path), exist_ok=True)
json.dump(self.state, open(self.path, "w", encoding="utf-8"), indent=2)
def outcomes_from_findings(findings: List[dict], ran_agents: List[str],
tech: Optional[str] = None) -> List[Outcome]:
"""Build per-agent Outcomes from a run's findings + the agents that ran."""
by_agent: Dict[str, Outcome] = {
a: Outcome(agent=a, validated=[], false_positives=0, ran=True, tech=tech)
for a in ran_agents
}
for f in findings:
a = f.get("agent")
if a not in by_agent:
by_agent[a] = Outcome(agent=a, validated=[], false_positives=0, ran=True, tech=tech)
if f.get("validated"):
by_agent[a].validated.append(f.get("severity", "Low"))
elif f.get("verdict") == "false_positive":
by_agent[a].false_positives += 1
return list(by_agent.values())
if __name__ == "__main__":
rl = RLEngine()
print(json.dumps(rl.weights(), indent=2))