NeuroSploit v3.3.0 — Autonomous MD-Agent Engine

Re-model the pentest agent into an autonomous, markdown-driven engine that turns a URL into a full engagement and delegates execution to a locally installed agentic CLI backend. Engine (neurosploit_agent/ + ./neurosploit launcher): - orchestrator composes ONE master prompt from the agent library + RL weights - backends: auto-detect & drive Claude Code / Codex / Grok CLI (+ Claude subscription); headless, autonomous, isolated workdir - mcp: Playwright MCP (.mcp.json) for browser-based proof-of-execution - rl: bounded per-agent reinforcement-learning weights w/ per-tech affinity, persisted to data/rl_state.json - models: latest registry incl. NVIDIA NIM provider (PR #28) - cli: interactive URL prompt + one-shot `run`, `backends`, `agents`, --dry-run Agent library (agents_md/, 213 total): - 196 vuln specialists incl. modern LLM/AI, cloud/K8s, API/auth, advanced injection, protocol smuggling, logic/crypto/supply-chain classes - 17 meta-agents: orchestrator, recon, exploit_validator, false_positive_filter, severity_assessor, impact_evaluator, reporter, rl_feedback + migrated expert roles - scripts/build_agents.py data-driven builder; REGISTRY.md index Docs: rewritten README.md, v3.3.0 RELEASE.md, .env.example (NVIDIA NIM, xAI, engine vars). Retire legacy Python orchestration (neurosploit.py + agent classes) to legacy/. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-30 07:15:30 +02:00 · 2026-06-14 20:57:38 -03:00
parent 59f8f42d80
commit 55af0d4634
248 changed files with 18707 additions and 577 deletions
@@ -0,0 +1,15 @@
+"""
+NeuroSploit v3.3.0 — Autonomous MD-Agent Engine.
+
+A lean orchestration layer that turns a URL into an autonomous penetration test:
+it composes a master prompt from the curated `agents_md/` markdown library and
+hands execution to a locally-installed agentic CLI backend (Claude Code, Codex,
+or Grok CLI), augmented with Playwright MCP, and learns across runs via a
+reinforcement-learning reward loop.
+
+This package replaces the legacy Python orchestration (`neurosploit.py` + heavy
+`core/` agents), which now lives under `legacy/` for reference.
+"""
+
+__version__ = "3.3.0"
+__all__ = ["__version__"]
@@ -0,0 +1,165 @@
+"""
+Agent loader for NeuroSploit v3.3.0.
+
+Discovers and parses the curated `agents_md/` markdown library, builds a
+searchable index, and produces an RL-weighted, recon-aware ordering of which
+specialist agents the orchestrator should run for a given target.
+"""
+
+import os
+import re
+from dataclasses import dataclass, field
+from typing import Dict, List, Optional
+
+ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+AGENTS_DIR = os.path.join(ROOT, "agents_md")
+
+# Recon-signal → keyword hints used to pre-select agents. The CLI backend does
+# the final, intelligent selection; this just narrows the candidate set so we
+# do not dump 200 playbooks into every prompt.
+SIGNAL_HINTS: Dict[str, List[str]] = {
+    "graphql": ["graphql"],
+    "jwt": ["jwt"],
+    "oauth": ["oauth", "oidc", "saml"],
+    "ai_features": ["llm_", "prompt_injection", "ai_", "vector_db", "ml_model", "rag"],
+    "cloud": ["aws_", "gcp_", "azure_", "s3_", "gcs_", "cloud_", "imds", "metadata", "terraform", "ecr", "helm", "serverless", "k8s", "kubelet", "docker_socket", "container_escape"],
+    "rest": ["api_", "rest_", "mass_assignment", "bola", "bfla", "idor"],
+    "ws": ["websocket", "ws_"],
+    "upload": ["file_upload", "zip_slip", "xxe", "deserial", "pickle", "yaml"],
+    "template": ["ssti", "csti", "template", "ssi", "esi"],
+    "cache_proxy": ["cache", "smuggl", "desync", "h2c", "hop_by_hop", "proxy", "response_splitting"],
+}
+
+
+@dataclass
+class Agent:
+    name: str
+    path: str
+    title: str
+    kind: str                      # "vuln" | "meta"
+    user_prompt: str = ""
+    system_prompt: str = ""
+    cwe: str = ""
+    severity: str = ""
+    tags: List[str] = field(default_factory=list)
+
+
+def _parse(path: str, kind: str) -> Agent:
+    text = open(path, encoding="utf-8", errors="replace").read()
+    name = os.path.splitext(os.path.basename(path))[0]
+    title_m = re.search(r"^#\s+(.+?)\s*$", text, re.M)
+    title = title_m.group(1).strip() if title_m else name
+    up = re.search(r"##\s*User Prompt\s*\n(.*?)(?=\n##\s|\Z)", text, re.S)
+    sp = re.search(r"##\s*System Prompt\s*\n(.*?)(?=\n##\s|\Z)", text, re.S)
+    cwe_m = re.search(r"(CWE-\d+)", text)
+    sev_m = re.search(r"Severity:\s*([A-Za-z]+)", text)
+    return Agent(
+        name=name, path=path, title=title, kind=kind,
+        user_prompt=(up.group(1).strip() if up else ""),
+        system_prompt=(sp.group(1).strip() if sp else ""),
+        cwe=(cwe_m.group(1) if cwe_m else ""),
+        severity=(sev_m.group(1) if sev_m else ""),
+        tags=[name],
+    )
+
+
+class AgentLibrary:
+    def __init__(self, base: str = AGENTS_DIR):
+        self.base = base
+        self.vulns: Dict[str, Agent] = {}
+        self.meta: Dict[str, Agent] = {}
+        self._load()
+
+    def _load(self):
+        vdir, mdir = os.path.join(self.base, "vulns"), os.path.join(self.base, "meta")
+        for d, kind, store in ((vdir, "vuln", self.vulns), (mdir, "meta", self.meta)):
+            if not os.path.isdir(d):
+                continue
+            for fn in sorted(os.listdir(d)):
+                if fn.endswith(".md"):
+                    a = _parse(os.path.join(d, fn), kind)
+                    store[a.name] = a
+
+    # -- selection ---------------------------------------------------------
+    def candidates_for(self, recon: Optional[dict]) -> List[str]:
+        """Return vuln-agent names whose preconditions plausibly match recon.
+
+        With no recon (or a generic target) we return all vuln agents and let
+        the backend prioritise. With recon signals we narrow to the relevant
+        subset plus a baseline of always-run web agents.
+        """
+        if not recon:
+            return list(self.vulns.keys())
+        wanted: set = set()
+        signals = _signals_from_recon(recon)
+        for sig in signals:
+            for kw in SIGNAL_HINTS.get(sig, []):
+                wanted.update(n for n in self.vulns if kw in n)
+        # Always include core web classes regardless of recon.
+        baseline = ["xss_reflected", "xss_stored", "xss_dom", "sqli_error", "sqli_blind",
+                    "ssrf", "idor", "csrf", "open_redirect", "command_injection",
+                    "lfi", "path_traversal", "auth_bypass", "security_headers",
+                    "information_disclosure", "cors_misconfig"]
+        wanted.update(n for n in baseline if n in self.vulns)
+        return sorted(wanted) if wanted else list(self.vulns.keys())
+
+    def ranked(self, recon: Optional[dict], weights: Dict[str, float]) -> List[str]:
+        cands = self.candidates_for(recon)
+        return sorted(cands, key=lambda n: weights.get(n, 0.5), reverse=True)
+
+    def index_markdown(self, names: List[str], weights: Dict[str, float]) -> str:
+        """Compact catalog (name — title — CWE — weight) for the master prompt."""
+        rows = []
+        for n in names:
+            a = self.vulns.get(n)
+            if not a:
+                continue
+            rows.append(f"- `{n}` — {a.title} [{a.cwe or 'CWE-?'}] (priority {weights.get(n, 0.5):.2f})")
+        return "\n".join(rows)
+
+    def render(self, name: str, target: str, recon_json: str = "{}", collaborator: str = "") -> str:
+        a = self.vulns.get(name) or self.meta.get(name)
+        if not a:
+            raise KeyError(name)
+        body = open(a.path, encoding="utf-8", errors="replace").read()
+        return (body.replace("{target}", target)
+                    .replace("{recon_json}", recon_json)
+                    .replace("{collaborator}", collaborator))
+
+    def counts(self) -> Dict[str, int]:
+        return {"vulns": len(self.vulns), "meta": len(self.meta),
+                "total": len(self.vulns) + len(self.meta)}
+
+
+def _signals_from_recon(recon: dict) -> List[str]:
+    sigs: List[str] = []
+    apis = recon.get("apis", {}) or {}
+    if apis.get("graphql"):
+        sigs.append("graphql")
+    if apis.get("rest"):
+        sigs.append("rest")
+    if apis.get("ws"):
+        sigs.append("ws")
+    if recon.get("ai_features"):
+        sigs.append("ai_features")
+    if (recon.get("cloud", {}) or {}).get("provider") or (recon.get("cloud", {}) or {}).get("metadata_surface"):
+        sigs.append("cloud")
+    auth = recon.get("auth", {}) or {}
+    if auth.get("session") == "jwt":
+        sigs.append("jwt")
+    if auth.get("oauth"):
+        sigs.append("oauth")
+    tech = recon.get("tech", {}) or {}
+    blob = " ".join(str(v) for v in tech.values()).lower()
+    if any(t in blob for t in ("flask", "jinja", "twig", "freemarker", "velocity", "thymeleaf")):
+        sigs.append("template")
+    if tech.get("waf") or tech.get("http2"):
+        sigs.append("cache_proxy")
+    # Generic surfaces always worth a look.
+    sigs += ["upload", "cache_proxy"]
+    return list(dict.fromkeys(sigs))
+
+
+if __name__ == "__main__":
+    lib = AgentLibrary()
+    print(lib.counts())
@@ -0,0 +1,147 @@
+"""
+Agentic CLI backends for NeuroSploit v3.3.0.
+
+NeuroSploit does not embed its own agent loop — it delegates autonomous
+execution to whichever agentic coding CLI is installed locally:
+
+  * Claude Code  (`claude`)  — also the path for a Claude *subscription*
+  * Codex CLI    (`codex`)
+  * Grok CLI     (`grok`)
+
+Each backend is driven headlessly: we pass the composed master prompt, a working
+directory (with `.mcp.json` for Playwright), and provider env, and let the CLI
+run the test autonomously to completion. The engine then reads the artifacts the
+run wrote to `results/`.
+"""
+
+import os
+import shutil
+import subprocess
+from dataclasses import dataclass, field
+from typing import Dict, List, Optional
+
+
+@dataclass
+class Backend:
+    key: str
+    label: str
+    binary: str
+    # builds argv given (prompt_file, workdir, model). Prompt is passed via file
+    # to avoid arg-length limits and shell-escaping issues.
+    def available(self) -> bool:
+        return shutil.which(self.binary) is not None
+
+    def version(self) -> str:
+        try:
+            out = subprocess.run([self.binary, "--version"], capture_output=True,
+                                 text=True, timeout=15)
+            return (out.stdout or out.stderr).strip().splitlines()[0] if (out.stdout or out.stderr) else "?"
+        except Exception:
+            return "?"
+
+    def build_argv(self, prompt_file: str, workdir: str, model: str,
+                   autonomous: bool, mcp_config: Optional[str]) -> List[str]:
+        raise NotImplementedError
+
+
+@dataclass
+class ClaudeBackend(Backend):
+    def build_argv(self, prompt_file, workdir, model, autonomous, mcp_config):
+        # Headless "print" mode reads the prompt from stdin (caller pipes the file).
+        argv = [self.binary, "-p", "--output-format", "stream-json", "--verbose"]
+        if model:
+            argv += ["--model", model]
+        if mcp_config:
+            argv += ["--mcp-config", mcp_config]
+        if autonomous:
+            # Full autonomy for an authorized engagement in an isolated workdir.
+            argv += ["--dangerously-skip-permissions"]
+        return argv
+
+    stdin_prompt: bool = True
+
+
+@dataclass
+class CodexBackend(Backend):
+    def build_argv(self, prompt_file, workdir, model, autonomous, mcp_config):
+        # `codex exec` runs non-interactively to completion.
+        argv = [self.binary, "exec", "--cd", workdir]
+        if model:
+            argv += ["--model", model]
+        if autonomous:
+            argv += ["--dangerously-bypass-approvals-and-sandbox"]
+        if mcp_config:
+            argv += ["--config", f"mcp_config_file={mcp_config}"]
+        argv += ["-"]  # read prompt from stdin
+        return argv
+
+    stdin_prompt: bool = True
+
+
+@dataclass
+class GrokBackend(Backend):
+    def build_argv(self, prompt_file, workdir, model, autonomous, mcp_config):
+        # grok-cli headless/print form.
+        argv = [self.binary, "--prompt-file", prompt_file, "--workdir", workdir]
+        if model:
+            argv += ["--model", model]
+        if mcp_config:
+            argv += ["--mcp-config", mcp_config]
+        if autonomous:
+            argv += ["--yolo"]
+        return argv
+
+    stdin_prompt: bool = False
+
+
+REGISTRY: Dict[str, Backend] = {
+    "claude": ClaudeBackend("claude", "Claude Code", "claude"),
+    "codex": CodexBackend("codex", "Codex CLI", "codex"),
+    "grok": GrokBackend("grok", "Grok CLI", "grok"),
+}
+
+
+def detect() -> List[Backend]:
+    """Return installed backends, in preference order."""
+    order = ["claude", "codex", "grok"]
+    return [REGISTRY[k] for k in order if REGISTRY[k].available()]
+
+
+def get(key: str) -> Optional[Backend]:
+    return REGISTRY.get(key)
+
+
+@dataclass
+class RunResult:
+    backend: str
+    returncode: int
+    log_path: str
+    workdir: str
+
+
+def run(backend: Backend, prompt: str, workdir: str, model: str = "",
+        autonomous: bool = True, mcp_config: Optional[str] = None,
+        env: Optional[Dict[str, str]] = None, timeout: int = 7200,
+        dry_run: bool = False) -> RunResult:
+    """Execute a backend against the composed prompt and stream logs to disk."""
+    os.makedirs(workdir, exist_ok=True)
+    prompt_file = os.path.join(workdir, "master_prompt.md")
+    open(prompt_file, "w", encoding="utf-8").write(prompt)
+    log_path = os.path.join(workdir, "backend.log")
+
+    argv = backend.build_argv(prompt_file, workdir, model, autonomous, mcp_config)
+    full_env = os.environ.copy()
+    if env:
+        full_env.update(env)
+
+    if dry_run:
+        open(log_path, "w").write("DRY RUN\n" + " ".join(argv) + "\n")
+        return RunResult(backend.key, 0, log_path, workdir)
+
+    stdin_data = prompt if getattr(backend, "stdin_prompt", False) else None
+    with open(log_path, "w", encoding="utf-8") as logf:
+        proc = subprocess.run(
+            argv, input=stdin_data, stdout=logf, stderr=subprocess.STDOUT,
+            cwd=workdir, env=full_env, text=True, timeout=timeout,
+        )
+    return RunResult(backend.key, proc.returncode, log_path, workdir)
@@ -0,0 +1,163 @@
+"""
+NeuroSploit v3.3.0 — terminal launcher.
+
+Two ways in:
+
+    neurosploit                      # interactive: prompts for URL + choices
+    neurosploit run https://t.example --backend claude --model claude-opus-4-8
+
+The interactive flow asks for a URL, lets you pick from the agentic CLI backends
+actually installed on this machine (Claude Code / Codex / Grok, or a Claude
+subscription), picks a model, then launches the autonomous engagement.
+"""
+
+import argparse
+import sys
+
+from . import backends, models
+from .config import RunConfig
+from .orchestrator import run_engagement
+
+BANNER = r"""
+   _   _                      ____        _       _ _
+  | \ | | ___ _   _ _ __ ___ / ___|_ __ | | ___ (_) |_
+  |  \| |/ _ \ | | | '__/ _ \\___ \| '_ \| |/ _ \| | __|
+  | |\  |  __/ |_| | | | (_) |___) | |_) | | (_) | | |_
+  |_| \_|\___|\__,_|_|  \___/|____/| .__/|_|\___/|_|\__|
+        v3.3.0  Autonomous MD-Agent Engine
+                                   |_|
+"""
+
+
+def _progress(msg: str):
+    print(f"  [*] {msg}", flush=True)
+
+
+def _choose(prompt, options, default_idx=0):
+    for i, (key, label) in enumerate(options):
+        mark = "*" if i == default_idx else " "
+        print(f"   {mark} {i + 1}) {label}")
+    raw = input(f"{prompt} [{default_idx + 1}]: ").strip()
+    if not raw:
+        return options[default_idx][0]
+    try:
+        return options[int(raw) - 1][0]
+    except (ValueError, IndexError):
+        print("   invalid choice, using default")
+        return options[default_idx][0]
+
+
+def interactive() -> int:
+    print(BANNER)
+    installed = backends.detect()
+    if not installed:
+        print("  [!] No agentic CLI backend found (claude / codex / grok).")
+        print("      Install one: Claude Code, Codex CLI, or Grok CLI, then re-run.")
+        return 2
+    print(f"  Detected backends: {', '.join(b.label + ' (' + b.version() + ')' for b in installed)}\n")
+
+    target = input("  Target URL: ").strip()
+    if not target:
+        print("  [!] A target URL is required.")
+        return 2
+    if not target.startswith(("http://", "https://")):
+        target = "https://" + target
+    scope = input(f"  In-scope hosts [default: {target}]: ").strip() or target
+    collaborator = input("  OOB collaborator host (optional, for blind/SSRF proof): ").strip()
+
+    backend_key = _choose("  Choose backend", [(b.key, f"{b.label}  [{b.version()}]") for b in installed])
+
+    # Provider/model: map backend → sensible provider, then pick a model.
+    prov_for_backend = {"claude": "anthropic", "codex": "openai", "grok": "xai"}
+    provider = prov_for_backend.get(backend_key, "anthropic")
+    sub = input("  Use Claude subscription (login) instead of an API key? [y/N]: ").strip().lower()
+    if sub == "y" and backend_key == "claude":
+        provider = "claude_subscription"
+    model_opts = [(m.id, f"{m.label}  ({m.context // 1000}k ctx)  {m.notes}")
+                  for m in models.list_models(provider)] or [("", "backend default")]
+    model = _choose("  Choose model", model_opts)
+
+    cfg = RunConfig(target=target, scope=scope, backend=backend_key,
+                    provider=provider, model=model, collaborator=collaborator)
+    print()
+    _progress(f"Starting autonomous engagement against {target}")
+    result = run_engagement(cfg, progress=_progress)
+    _summary(result)
+    return 0 if result["returncode"] == 0 else 1
+
+
+def _summary(result):
+    print("\n  ── Engagement complete ─────────────────────────────")
+    print(f"   Workdir : {result['workdir']}")
+    print(f"   Findings: {len(result['findings'])} validated")
+    by_sev = {}
+    for f in result["findings"]:
+        by_sev[f.get("severity", "?")] = by_sev.get(f.get("severity", "?"), 0) + 1
+    if by_sev:
+        print("   By severity: " + ", ".join(f"{k}={v}" for k, v in by_sev.items()))
+    print(f"   Report  : reports/  |  Raw: {result['workdir']}/findings.json")
+    print("  ────────────────────────────────────────────────────")
+
+
+def main(argv=None) -> int:
+    parser = argparse.ArgumentParser(prog="neurosploit",
+                                     description="NeuroSploit v3.3.0 autonomous MD-agent pentest engine")
+    sub = parser.add_subparsers(dest="cmd")
+
+    r = sub.add_parser("run", help="run an engagement against a URL")
+    r.add_argument("url")
+    r.add_argument("--backend", default=None, help="claude | codex | grok (default: first installed)")
+    r.add_argument("--provider", default=None)
+    r.add_argument("--model", default=None)
+    r.add_argument("--scope", default="")
+    r.add_argument("--collaborator", default="")
+    r.add_argument("--no-rl", action="store_true")
+    r.add_argument("--no-mcp", action="store_true")
+    r.add_argument("--max-agents", type=int, default=0)
+    r.add_argument("--dry-run", action="store_true", help="compose prompt + show command without executing the backend")
+
+    sub.add_parser("backends", help="list detected CLI backends")
+    sub.add_parser("agents", help="show agent library counts")
+
+    args = parser.parse_args(argv)
+
+    if args.cmd is None:
+        try:
+            return interactive()
+        except (KeyboardInterrupt, EOFError):
+            print("\n  aborted.")
+            return 130
+
+    if args.cmd == "backends":
+        for b in backends.detect():
+            print(f"  {b.key:8} {b.label:14} {b.version()}")
+        if not backends.detect():
+            print("  none installed (claude / codex / grok)")
+        return 0
+
+    if args.cmd == "agents":
+        from .agent_loader import AgentLibrary
+        print(AgentLibrary().counts())
+        return 0
+
+    if args.cmd == "run":
+        url = args.url if args.url.startswith(("http://", "https://")) else "https://" + args.url
+        backend = args.backend or (backends.detect()[0].key if backends.detect() else "claude")
+        prov_for_backend = {"claude": "anthropic", "codex": "openai", "grok": "xai"}
+        provider = args.provider or prov_for_backend.get(backend, "anthropic")
+        model = args.model or (models.list_models(provider)[0].id if models.list_models(provider) else "")
+        cfg = RunConfig(target=url, scope=args.scope or url, backend=backend,
+                        provider=provider, model=model, collaborator=args.collaborator,
+                        use_rl=not args.no_rl, use_mcp=not args.no_mcp,
+                        max_agents=args.max_agents, dry_run=args.dry_run)
+        print(BANNER)
+        result = run_engagement(cfg, progress=_progress)
+        _summary(result)
+        return 0 if result["returncode"] == 0 else 1
+
+    parser.print_help()
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())
@@ -0,0 +1,52 @@
+"""Configuration & paths for NeuroSploit v3.3.0."""
+
+import os
+from dataclasses import dataclass, field
+from typing import Optional
+
+ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+
+
+def _p(*parts) -> str:
+    return os.path.join(ROOT, *parts)
+
+
+@dataclass
+class RunConfig:
+    target: str
+    scope: str = ""
+    rules_of_engagement: str = "Authorized, non-destructive testing only. No DoS unless explicitly permitted. Stay strictly in scope."
+    backend: str = "claude"            # claude | codex | grok
+    provider: str = "anthropic"        # see models.PROVIDERS
+    model: str = "claude-opus-4-8"
+    autonomous: bool = True
+    collaborator: str = ""             # OOB callback host for blind vuln proof
+    use_rl: bool = True
+    use_mcp: bool = True
+    max_agents: int = 0                # 0 = no cap (backend prioritises)
+    timeout: int = 7200
+    dry_run: bool = False
+    workdir: str = field(default="")
+
+    def resolved_workdir(self) -> str:
+        return self.workdir or _p("results", _slug(self.target))
+
+
+def _slug(url: str) -> str:
+    s = url.replace("https://", "").replace("http://", "")
+    return "".join(c if c.isalnum() else "_" for c in s).strip("_")[:60] or "target"
+
+
+PATHS = {
+    "agents": _p("agents_md"),
+    "results": _p("results"),
+    "reports": _p("reports"),
+    "data": _p("data"),
+    "logs": _p("logs"),
+    "rl_state": _p("data", "rl_state.json"),
+}
+
+
+def ensure_dirs():
+    for k in ("results", "reports", "data", "logs"):
+        os.makedirs(PATHS[k], exist_ok=True)
@@ -0,0 +1,49 @@
+"""
+MCP bridge for NeuroSploit v3.3.0.
+
+Generates the MCP server configuration the agentic CLI backend loads so the
+autonomous run can drive a real browser (Playwright) and any extra MCP tooling.
+Playwright lets agents render SPAs, execute JS, capture DOM/network/screenshots
+and confirm client-side execution (XSS/CSTI) — turning "the payload reflected"
+into "the payload executed", which is what the validator agents demand.
+"""
+
+import json
+import os
+import shutil
+from typing import Dict
+
+ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+
+
+def playwright_server() -> Dict:
+    """Prefer a local @playwright/mcp; fall back to npx on demand."""
+    return {
+        "command": "npx",
+        "args": ["-y", "@playwright/mcp@latest", "--headless", "--isolated"],
+    }
+
+
+def build_mcp_config(extra: Dict[str, Dict] | None = None) -> Dict:
+    servers = {"playwright": playwright_server()}
+    if extra:
+        servers.update(extra)
+    return {"mcpServers": servers}
+
+
+def write_mcp_config(workdir: str, extra: Dict[str, Dict] | None = None) -> str:
+    """Write a `.mcp.json` into the run workdir and return its path.
+
+    Claude Code auto-loads `.mcp.json` from the working directory; Codex/Grok
+    accept an explicit config path (see backends.py).
+    """
+    cfg = build_mcp_config(extra)
+    path = os.path.join(workdir, ".mcp.json")
+    os.makedirs(workdir, exist_ok=True)
+    json.dump(cfg, open(path, "w", encoding="utf-8"), indent=2)
+    return path
+
+
+def playwright_available() -> bool:
+    """Best-effort check that Playwright MCP can be launched."""
+    return shutil.which("npx") is not None
@@ -0,0 +1,141 @@
+"""
+Model registry for NeuroSploit v3.3.0.
+
+Maps logical providers to their latest models and the env vars / base URLs the
+agentic CLI backends need. Includes the NVIDIA NIM provider added in PR #28.
+
+The engine itself does not call these APIs directly — the chosen CLI backend
+(Claude Code / Codex / Grok) does. This registry is what the launcher uses to
+present choices and to export the right environment to the backend process.
+"""
+
+from dataclasses import dataclass, field
+from typing import Dict, List, Optional
+
+
+@dataclass(frozen=True)
+class Model:
+    id: str
+    label: str
+    context: int = 200_000
+    notes: str = ""
+
+
+@dataclass(frozen=True)
+class Provider:
+    key: str
+    label: str
+    env_keys: List[str]                 # accepted API-key env var names
+    base_url: Optional[str] = None      # OpenAI-compatible base URL, if any
+    base_url_env: Optional[str] = None  # env var the backend reads for base URL
+    models: List[Model] = field(default_factory=list)
+    subscription: bool = False          # uses a CLI subscription rather than an API key
+
+
+PROVIDERS: Dict[str, Provider] = {
+    # --- Anthropic (latest Claude family; default) -------------------------
+    "anthropic": Provider(
+        key="anthropic", label="Anthropic Claude",
+        env_keys=["ANTHROPIC_API_KEY"],
+        models=[
+            Model("claude-opus-4-8", "Claude Opus 4.8", 1_000_000, "Most capable; deep multi-step pentest reasoning"),
+            Model("claude-sonnet-4-6", "Claude Sonnet 4.6", 1_000_000, "Balanced cost/quality default"),
+            Model("claude-haiku-4-5", "Claude Haiku 4.5", 200_000, "Fast/cheap recon and triage"),
+        ],
+    ),
+    # --- OpenAI ------------------------------------------------------------
+    "openai": Provider(
+        key="openai", label="OpenAI",
+        env_keys=["OPENAI_API_KEY"],
+        models=[
+            Model("gpt-5.1", "GPT-5.1", 400_000, "Strong general reasoning"),
+            Model("o4", "o4", 200_000, "Deliberate reasoning for validation"),
+        ],
+    ),
+    # --- xAI Grok ----------------------------------------------------------
+    "xai": Provider(
+        key="xai", label="xAI Grok",
+        env_keys=["XAI_API_KEY", "GROK_API_KEY"],
+        base_url="https://api.x.ai/v1", base_url_env="OPENAI_BASE_URL",
+        models=[
+            Model("grok-4", "Grok 4", 256_000, "Fast agentic execution"),
+            Model("grok-4-fast", "Grok 4 Fast", 128_000, "Low-latency triage"),
+        ],
+    ),
+    # --- NVIDIA NIM (PR #28) ----------------------------------------------
+    # OpenAI-compatible endpoint at integrate.api.nvidia.com; keys are `nvapi-...`.
+    "nvidia_nim": Provider(
+        key="nvidia_nim", label="NVIDIA NIM",
+        env_keys=["NVIDIA_NIM_API_KEY", "NVIDIA_API_KEY"],
+        base_url="https://integrate.api.nvidia.com/v1", base_url_env="OPENAI_BASE_URL",
+        models=[
+            Model("nvidia/llama-3.3-nemotron-super-49b-v1", "Nemotron Super 49B", 128_000, "NIM hosted reasoning"),
+            Model("deepseek-ai/deepseek-r1", "DeepSeek-R1 (NIM)", 128_000, "Strong reasoning via NIM"),
+            Model("qwen/qwen2.5-coder-32b-instruct", "Qwen2.5 Coder 32B (NIM)", 128_000, "Code/exploit oriented"),
+        ],
+    ),
+    # --- Google Gemini -----------------------------------------------------
+    "gemini": Provider(
+        key="gemini", label="Google Gemini",
+        env_keys=["GEMINI_API_KEY", "GOOGLE_API_KEY"],
+        models=[
+            Model("gemini-2.5-pro", "Gemini 2.5 Pro", 1_000_000, "Large context recon"),
+            Model("gemini-2.5-flash", "Gemini 2.5 Flash", 1_000_000, "Fast/cheap"),
+        ],
+    ),
+    # --- OpenRouter (aggregator) ------------------------------------------
+    "openrouter": Provider(
+        key="openrouter", label="OpenRouter",
+        env_keys=["OPENROUTER_API_KEY"],
+        base_url="https://openrouter.ai/api/v1", base_url_env="OPENAI_BASE_URL",
+        models=[Model("anthropic/claude-opus-4-8", "Opus 4.8 (OpenRouter)", 1_000_000)],
+    ),
+    # --- Local Ollama ------------------------------------------------------
+    "ollama": Provider(
+        key="ollama", label="Ollama (local)",
+        env_keys=[],
+        base_url="http://localhost:11434/v1", base_url_env="OPENAI_BASE_URL",
+        models=[
+            Model("qwen2.5-coder:32b", "Qwen2.5 Coder 32B (local)", 32_000),
+            Model("llama3.3:70b", "Llama 3.3 70B (local)", 128_000),
+        ],
+    ),
+    # --- Subscription via Claude Code CLI (no API key needed) -------------
+    "claude_subscription": Provider(
+        key="claude_subscription", label="Claude subscription (via Claude Code login)",
+        env_keys=[], subscription=True,
+        models=[
+            Model("claude-opus-4-8", "Claude Opus 4.8 (subscription)", 1_000_000),
+            Model("claude-sonnet-4-6", "Claude Sonnet 4.6 (subscription)", 1_000_000),
+        ],
+    ),
+}
+
+DEFAULT_PROVIDER = "anthropic"
+
+
+def get_provider(key: str) -> Optional[Provider]:
+    return PROVIDERS.get(key)
+
+
+def list_models(provider_key: str) -> List[Model]:
+    p = PROVIDERS.get(provider_key)
+    return list(p.models) if p else []
+
+
+def resolve_env(provider_key: str, model_id: str) -> Dict[str, str]:
+    """Return the env vars a backend needs for this provider/model selection."""
+    import os
+    env: Dict[str, str] = {}
+    p = PROVIDERS.get(provider_key)
+    if not p:
+        return env
+    if p.base_url and p.base_url_env:
+        env[p.base_url_env] = p.base_url
+    for k in p.env_keys:
+        if os.getenv(k):
+            env[k] = os.environ[k]
+            break
+    env["NEUROSPLOIT_MODEL"] = model_id
+    env["NEUROSPLOIT_PROVIDER"] = provider_key
+    return env
@@ -0,0 +1,150 @@
+"""
+Orchestrator for NeuroSploit v3.3.0.
+
+Ties the pieces together: load the agent library, apply RL weights to pick and
+rank specialist agents for the target, compose the single master prompt (the
+`meta/orchestrator` playbook + the recon-aware agent catalog + the operating
+contract), hand it to the chosen CLI backend with Playwright MCP, then read back
+artifacts and feed the RL loop.
+"""
+
+import json
+import os
+from typing import Dict, List, Optional
+
+from . import backends, mcp, models
+from .agent_loader import AgentLibrary
+from .config import RunConfig, PATHS, ensure_dirs
+from .rl import RLEngine, outcomes_from_findings
+
+
+def compose_master_prompt(cfg: RunConfig, lib: AgentLibrary, rl: RLEngine,
+                          recon: Optional[dict]) -> str:
+    weights = rl.weights() if cfg.use_rl else {}
+    ranked = lib.ranked(recon, weights)
+    if cfg.max_agents > 0:
+        ranked = ranked[:cfg.max_agents]
+    agent_index = lib.index_markdown(ranked, weights)
+    rl_weights_txt = json.dumps({n: round(weights.get(n, 0.5), 2) for n in ranked[:40]}, indent=0)
+
+    orch = lib.render("orchestrator", cfg.target,
+                      recon_json=json.dumps(recon or {}), collaborator=cfg.collaborator) \
+        if "orchestrator" in lib.meta else ""
+
+    header = f"""# NeuroSploit v3.3.0 — Autonomous Engagement
+
+You are running an AUTHORIZED, autonomous web penetration test.
+
+TARGET: {cfg.target}
+SCOPE: {cfg.scope or cfg.target}
+RULES OF ENGAGEMENT: {cfg.rules_of_engagement}
+OOB COLLABORATOR: {cfg.collaborator or '(none provided — skip OOB-only confirmations)'}
+WORKDIR: {cfg.resolved_workdir()}
+
+You have Playwright MCP (browser automation, JS execution, DOM/network capture,
+screenshots) and local shell tools. Use the browser to PROVE client-side
+execution; use the collaborator to PROVE blind/OOB issues.
+
+## Specialist agent library
+The `agents_md/` directory holds {lib.counts()['vulns']} vulnerability playbooks
+and {lib.counts()['meta']} meta playbooks. For each specialist you choose to run,
+open its file under `agents_md/vulns/<name>.md`, substitute the target and recon,
+and follow its methodology and (strict) anti-false-positive System Prompt.
+
+### Recon-ranked candidate agents (by RL priority)
+{agent_index}
+
+### RL priors (higher = historically more productive on similar targets)
+{rl_weights_txt}
+"""
+
+    contract = f"""
+## Required pipeline (follow in order)
+1. Run `agents_md/meta/recon.md` → write `results/recon.json`.
+2. Re-rank the candidate agents above using recon + RL priors; skip agents with
+   no applicable surface.
+3. Execute each selected specialist; gather candidate findings WITH evidence.
+4. For every candidate: `meta/exploit_validator.md` → `meta/false_positive_filter.md`.
+   Discard anything not reproducibly exploitable.
+5. Score survivors: `meta/severity_assessor.md` then `meta/impact_evaluator.md`.
+6. `meta/reporter.md` → write `results/findings.json` AND `reports/report.md`.
+7. `meta/rl_feedback.md` → write/merge `data/rl_state.json`.
+
+## Output contract (MANDATORY)
+Write `results/findings.json` as a JSON array of objects:
+{{"id","agent","title","severity","cvss","cwe","endpoint","payload","evidence","impact","remediation","confidence","validated"}}
+Only include findings with `validated: true`. If you find nothing, write `[]`.
+Also write `results/agents_ran.json` as a JSON array of the agent names you executed.
+
+Stay strictly in scope. Never run destructive/DoS payloads unless ROE permits.
+Report ONLY proven, reproducible findings.
+"""
+    return "\n".join(x for x in (header, orch, contract) if x.strip())
+
+
+def collect_results(workdir: str) -> Dict:
+    findings, ran = [], []
+    fpath = os.path.join(workdir, "findings.json")
+    rpath = os.path.join(workdir, "agents_ran.json")
+    # The backend may write under results/<slug>/ or results/ — check both.
+    for base in (workdir, PATHS["results"]):
+        for name, sink in (("findings.json", "findings"), ("agents_ran.json", "ran")):
+            p = os.path.join(base, name)
+            if os.path.exists(p):
+                try:
+                    data = json.load(open(p, encoding="utf-8"))
+                    if sink == "findings" and not findings:
+                        findings = data
+                    elif sink == "ran" and not ran:
+                        ran = data
+                except Exception:
+                    pass
+    return {"findings": findings, "agents_ran": ran}
+
+
+def run_engagement(cfg: RunConfig, recon: Optional[dict] = None,
+                   progress=lambda m: None) -> Dict:
+    ensure_dirs()
+    workdir = cfg.resolved_workdir()
+    os.makedirs(workdir, exist_ok=True)
+
+    lib = AgentLibrary(PATHS["agents"])
+    rl = RLEngine(PATHS["rl_state"])
+    progress(f"Loaded {lib.counts()['total']} agents "
+             f"({lib.counts()['vulns']} vuln / {lib.counts()['meta']} meta)")
+
+    backend = backends.get(cfg.backend)
+    if not backend or not backend.available():
+        avail = [b.key for b in backends.detect()]
+        raise RuntimeError(f"Backend '{cfg.backend}' not available. Installed: {avail or 'none'}")
+
+    mcp_cfg = None
+    if cfg.use_mcp and mcp.playwright_available():
+        mcp_cfg = mcp.write_mcp_config(workdir)
+        progress("Playwright MCP configured")
+    elif cfg.use_mcp:
+        progress("WARNING: npx not found — Playwright MCP disabled; browser-proof agents degraded")
+
+    prompt = compose_master_prompt(cfg, lib, rl, recon)
+    env = models.resolve_env(cfg.provider, cfg.model)
+
+    progress(f"Launching {backend.label} ({cfg.model}) — autonomous={cfg.autonomous}")
+    res = backends.run(backend, prompt, workdir, model=cfg.model,
+                       autonomous=cfg.autonomous, mcp_config=mcp_cfg, env=env,
+                       timeout=cfg.timeout, dry_run=cfg.dry_run)
+    progress(f"Backend exited rc={res.returncode}; log: {res.log_path}")
+
+    out = collect_results(workdir)
+    findings = out["findings"] or []
+    ran = out["agents_ran"] or []
+    progress(f"Collected {len(findings)} validated finding(s) from {len(ran)} agent(s)")
+
+    if cfg.use_rl and not cfg.dry_run:
+        tech = ((recon or {}).get("tech", {}) or {}).get("framework", "") or None
+        outcomes = outcomes_from_findings(findings, ran, tech=tech)
+        rl.update(outcomes, target=cfg.target)
+        rl.save()
+        progress("RL state updated → data/rl_state.json")
+
+    return {"workdir": workdir, "returncode": res.returncode,
+            "findings": findings, "agents_ran": ran, "log": res.log_path}
@@ -0,0 +1,125 @@
+"""
+Reinforcement-learning engine for NeuroSploit v3.3.0.
+
+A lightweight, persisted reward loop that biases agent selection across runs.
+It is deliberately model-free and explainable: each specialist agent carries a
+weight in [0.05, 1.0] plus per-tech-stack affinity, updated after every run from
+validated findings (positive reward) and rejected false positives (negative).
+
+This mirrors `agents_md/meta/rl_feedback.md`: the markdown agent reasons about
+rewards qualitatively; this module applies them deterministically so the state
+file is reproducible and auditable.
+"""
+
+import json
+import os
+from dataclasses import dataclass
+from typing import Dict, List, Optional
+
+ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+STATE_PATH = os.path.join(ROOT, "data", "rl_state.json")
+
+SEVERITY_REWARD = {"critical": 1.0, "high": 0.7, "medium": 0.4, "low": 0.2, "info": 0.05}
+FP_PENALTY = 0.3
+IDLE_PENALTY = 0.05      # ran, found nothing, cost budget
+ALPHA = 0.3             # learning rate
+WMIN, WMAX = 0.05, 1.0
+
+
+@dataclass
+class Outcome:
+    agent: str
+    validated: List[str]        # severities of validated findings
+    false_positives: int = 0
+    ran: bool = True
+    skipped_correctly: bool = False
+    tech: Optional[str] = None
+
+
+def _clamp(x: float) -> float:
+    return max(WMIN, min(WMAX, x))
+
+
+class RLEngine:
+    def __init__(self, path: str = STATE_PATH):
+        self.path = path
+        self.state = self._load()
+
+    def _load(self) -> dict:
+        if os.path.exists(self.path):
+            try:
+                return json.load(open(self.path, encoding="utf-8"))
+            except Exception:
+                pass
+        return {"version": 1, "agents": {}}
+
+    def weights(self) -> Dict[str, float]:
+        return {name: rec.get("weight", 0.5) for name, rec in self.state.get("agents", {}).items()}
+
+    def weight(self, agent: str, tech: Optional[str] = None) -> float:
+        rec = self.state.get("agents", {}).get(agent)
+        if not rec:
+            return 0.5
+        w = rec.get("weight", 0.5)
+        if tech:
+            w = max(w, rec.get("tech_affinity", {}).get(tech, 0.0) or 0.0)
+        return w
+
+    def reward(self, o: Outcome) -> float:
+        if o.skipped_correctly:
+            return 0.0
+        if not o.ran:
+            return 0.0
+        r = sum(SEVERITY_REWARD.get(s.lower(), 0.2) for s in o.validated)
+        r -= FP_PENALTY * o.false_positives
+        if not o.validated and not o.false_positives:
+            r -= IDLE_PENALTY
+        return max(-1.0, min(1.0, r))
+
+    def update(self, outcomes: List[Outcome], target: str = "") -> dict:
+        agents = self.state.setdefault("agents", {})
+        for o in outcomes:
+            rec = agents.setdefault(o.agent, {
+                "weight": 0.5, "runs": 0, "validated_hits": 0,
+                "false_positives": 0, "reward_last": 0.0, "tech_affinity": {},
+            })
+            r = self.reward(o)
+            old = rec["weight"]
+            rec["weight"] = _clamp(old + ALPHA * (r - old))
+            rec["reward_last"] = round(r, 3)
+            if o.ran and not o.skipped_correctly:
+                rec["runs"] += 1
+            rec["validated_hits"] += len(o.validated)
+            rec["false_positives"] += o.false_positives
+            if o.tech:
+                ta = rec.setdefault("tech_affinity", {})
+                ta[o.tech] = _clamp((ta.get(o.tech, 0.5)) + ALPHA * (r - ta.get(o.tech, 0.5)))
+        self.state["updated_for"] = target
+        return self.state
+
+    def save(self):
+        os.makedirs(os.path.dirname(self.path), exist_ok=True)
+        json.dump(self.state, open(self.path, "w", encoding="utf-8"), indent=2)
+
+
+def outcomes_from_findings(findings: List[dict], ran_agents: List[str],
+                           tech: Optional[str] = None) -> List[Outcome]:
+    """Build per-agent Outcomes from a run's findings + the agents that ran."""
+    by_agent: Dict[str, Outcome] = {
+        a: Outcome(agent=a, validated=[], false_positives=0, ran=True, tech=tech)
+        for a in ran_agents
+    }
+    for f in findings:
+        a = f.get("agent")
+        if a not in by_agent:
+            by_agent[a] = Outcome(agent=a, validated=[], false_positives=0, ran=True, tech=tech)
+        if f.get("validated"):
+            by_agent[a].validated.append(f.get("severity", "Low"))
+        elif f.get("verdict") == "false_positive":
+            by_agent[a].false_positives += 1
+    return list(by_agent.values())
+
+
+if __name__ == "__main__":
+    rl = RLEngine()
+    print(json.dumps(rl.weights(), indent=2))