#!/usr/bin/env python3
"""
NeuroSploit v3.4.x — recon + whitebox(code) agent builder.

Emits two new categories the Rust harness loads:
  agents_md/recon/  — information-gathering specialists
  agents_md/code/   — white-box source-code (SAST) review specialists

Usage: python3 scripts/build_agents_v34.py
"""
import os

ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))


def render(a, code=False):
    L = [f"# {a['title']} Agent\n", "## User Prompt"]
    if code:
        L.append(f"You are reviewing the source code of **{{target}}** for {a['for']}.\n")
        L.append("**Recon Context:**\n{recon_json}\n")
        L.append("The relevant source files are provided to you below the methodology.\n")
    else:
        L.append(f"You are performing reconnaissance on **{{target}}** to {a['for']}.\n")
        L.append("**Recon Context:**\n{recon_json}\n")
    L.append("**METHODOLOGY:**\n")
    for i, (s, bs) in enumerate(a["steps"], 1):
        L.append(f"### {i}. {s}")
        L += [f"- {b}" for b in bs]
        L.append("")
    n = len(a["steps"]) + 1
    L.append(f"### {n}. Report Format")
    L.append("For each CONFIRMED finding:")
    L.append("```")
    L.append("FINDING:")
    L.append(f"- Title: {a['title']} at [{'file:line' if code else 'asset/endpoint'}]")
    L.append(f"- Severity: {a['sev']}")
    L.append(f"- CWE: {a['cwe']}")
    L.append(f"- Endpoint: [{'file:line' if code else 'URL/host'}]")
    L.append("- Vector: [what/where]")
    L.append("- Payload: [PoC / vulnerable code snippet]")
    L.append("- Evidence: [proof / exact code quoted]")
    L.append(f"- Impact: {a['impact']}")
    L.append(f"- Remediation: {a['fix']}")
    L.append("```\n")
    L.append("## System Prompt")
    L.append(a["system"])
    return "\n".join(L) + "\n"


RECON = [
{"name":"subdomain_enum","title":"Subdomain Enumeration Specialist","for":"discover all subdomains and expand the attack surface","sev":"Info","cwe":"CWE-200","impact":"Wider attack surface, forgotten/staging hosts","fix":"Inventory and decommission stale DNS records",
 "steps":[("Passive sources",["Query crt.sh, certificate transparency, Shodan, and passive DNS","Run `subfinder -d {target}` and `amass enum -passive -d {target}`"]),
 ("Active resolution",["Resolve and probe with `httpx -title -tech-detect -status-code`","Bruteforce with a curated wordlist where in scope"]),
 ("Triage",["Flag dev/staging/admin/api hosts and dangling CNAMEs (subdomain takeover candidates)"])],
 "system":"You are a recon specialist. Report only resolvable, in-scope subdomains you actually observed, with the resolution evidence. Do not invent hosts."},
{"name":"tech_fingerprint","title":"Technology Fingerprinting Specialist","for":"identify the full technology stack and versions","sev":"Info","cwe":"CWE-200","impact":"Targeted exploitation of known-vulnerable components","fix":"Hide version banners; keep components patched",
 "steps":[("Fingerprint",["Inspect headers, cookies, error pages, favicon hash","Run `whatweb`, `nuclei -t technologies`, and Wappalyzer-style detection"]),
 ("Version map",["Map server, framework, language, CMS, JS libs and their versions"]),
 ("CVE correlation",["Correlate detected versions to known CVEs for later exploitation"])],
 "system":"You are a fingerprinting specialist. Report only technologies you positively detected with the supporting evidence (header/banner/hash). Mark version guesses as uncertain."},
{"name":"js_analysis","title":"JavaScript Analysis Specialist","for":"extract endpoints, secrets and logic from client-side JS","sev":"Low","cwe":"CWE-200","impact":"Hidden endpoints and leaked secrets in bundles","fix":"Strip secrets from client code; restrict sourcemaps",
 "steps":[("Collect",["Gather all JS bundles and sourcemaps; `katana`/`gau` for URLs"]),
 ("Extract",["Regex for API paths, fetch/axios calls, API keys (sk-, AIza, nvapi-), tokens"]),
 ("Map",["Build an endpoint + parameter inventory from the JS for downstream agents"])],
 "system":"You are a JS-recon specialist. Report only endpoints/secrets actually present in the served JS, quoting the snippet. Validated secrets only; never invent."},
{"name":"api_discovery","title":"API Surface Discovery Specialist","for":"enumerate REST/GraphQL/gRPC/WebSocket surfaces","sev":"Info","cwe":"CWE-200","impact":"Undocumented API endpoints widen attack surface","fix":"Gate non-public APIs; remove exposed schemas",
 "steps":[("Find specs",["Probe /openapi.json, /swagger, /graphql, /.well-known, /v1 /v2 prefixes"]),
 ("Enumerate",["Introspect GraphQL; enumerate REST routes; check gRPC reflection"]),
 ("Catalog",["Record methods, params, auth requirements per endpoint"])],
 "system":"You are an API-recon specialist. Report only endpoints you confirmed respond, with method and a sample response signature. No speculation."},
{"name":"secret_scanning","title":"Exposed Secret Scanning Specialist","for":"find leaked credentials and keys across exposed assets","sev":"High","cwe":"CWE-522","impact":"Credential/key exposure enabling account or cloud takeover","fix":"Rotate exposed secrets; remove from public assets",
 "steps":[("Sweep",["Scan JS, .env, .git, backups, CI logs, comments with trufflehog-style regex"]),
 ("Validate",["Confirm key format and (in scope) liveness without abusing it"]),
 ("Classify",["Tag provider and privilege of each secret"])],
 "system":"You are a secret-scanning specialist. Report only real, validly-formatted secrets you actually found, quoting location. Never abuse keys beyond a minimal validity check."},
{"name":"dns_recon","title":"DNS Reconnaissance Specialist","for":"map DNS records and infrastructure relationships","sev":"Info","cwe":"CWE-200","impact":"Infra mapping; zone/record misconfig discovery","fix":"Harden DNS; disable zone transfers",
 "steps":[("Records",["Enumerate A/AAAA/CNAME/MX/TXT/NS/SOA; check SPF/DMARC/DKIM"]),
 ("Misconfig",["Test zone transfer (AXFR), wildcard records, dangling CNAMEs"]),
 ("Relate",["Cluster shared infrastructure and providers"])],
 "system":"You are a DNS-recon specialist. Report only records you actually resolved, with the query evidence."},
{"name":"content_discovery","title":"Content & Path Discovery Specialist","for":"discover hidden files, directories and backups","sev":"Low","cwe":"CWE-538","impact":"Exposure of admin panels, backups, configs","fix":"Remove sensitive files from web root; enforce authz",
 "steps":[("Crawl",["Spider with katana; parse robots.txt/sitemap.xml/.well-known"]),
 ("Fuzz",["`ffuf` directories/files with sensible wordlists and extensions (.bak,.old,.zip,.sql)"]),
 ("Triage",["Flag admin, backup, config, and source-leak paths"])],
 "system":"You are a content-discovery specialist. Report only paths that returned a meaningful status/body, with the evidence. No 404s as findings."},
{"name":"parameter_discovery","title":"Parameter Discovery Specialist","for":"enumerate hidden request parameters and inputs","sev":"Info","cwe":"CWE-200","impact":"Hidden params enable injection/logic attacks","fix":"Validate and document all accepted parameters",
 "steps":[("Mine",["Extract params from JS, forms, history (gau), and docs"]),
 ("Bruteforce",["Use arjun/param-miner style discovery with reflection detection"]),
 ("Hand off",["Provide the param inventory to injection specialists"])],
 "system":"You are a parameter-discovery specialist. Report only parameters you confirmed the app accepts/reflects, with evidence."},
{"name":"waf_cdn_detection","title":"WAF/CDN Detection Specialist","for":"identify WAF/CDN and inform evasion strategy","sev":"Info","cwe":"CWE-200","impact":"Informs bypass strategy; reveals origin exposure","fix":"Ensure origin is not directly reachable",
 "steps":[("Detect",["Fingerprint WAF/CDN via headers, cookies, block pages, `wafw00f`"]),
 ("Origin",["Search for origin IP leaks (DNS history, SSL SANs, headers)"]),
 ("Strategy",["Note effective encodings/paths for later, in-scope testing"])],
 "system":"You are a WAF/CDN specialist. Report only positively-identified protections and any verified origin exposure, with evidence."},
{"name":"cloud_asset_discovery","title":"Cloud Asset Discovery Specialist","for":"discover cloud buckets, functions and metadata surfaces","sev":"Low","cwe":"CWE-200","impact":"Exposed cloud assets and SSRF/metadata vectors","fix":"Lock down public cloud assets; enforce IMDSv2",
 "steps":[("Discover",["Find S3/GCS/Azure references; permutate bucket names; detect cloud provider"]),
 ("Probe",["Check public list/read on storage; note SSRF-to-metadata potential"]),
 ("Catalog",["Record provider, asset, and access level"])],
 "system":"You are a cloud-recon specialist. Report only assets you confirmed exist with their observed access level. No guessed buckets."},
{"name":"graphql_discovery","title":"GraphQL Discovery Specialist","for":"map the GraphQL schema and sensitive operations","sev":"Low","cwe":"CWE-200","impact":"Schema exposure aids targeted attacks","fix":"Disable introspection/suggestions in production",
 "steps":[("Locate",["Find /graphql endpoints and test introspection"]),
 ("Map",["If introspection off, use field-suggestion (clairvoyance) to reconstruct types"]),
 ("Flag",["Mark mutations and sensitive queries for the API agents"])],
 "system":"You are a GraphQL-recon specialist. Report only schema elements you actually recovered, with the query/response evidence."},
{"name":"osint_employee","title":"OSINT & Exposure Mapping Specialist","for":"map public exposure (leaked creds, repos, docs) for the target org","sev":"Low","cwe":"CWE-200","impact":"Public exposure enabling targeted attacks","fix":"Monitor and remediate public exposure",
 "steps":[("Sources",["Search public code (GitHub), paste sites, breach indices (in scope)"]),
 ("Correlate",["Link leaked emails/creds/repos to the target's assets"]),
 ("Report",["Summarize exposure relevant to the engagement"])],
 "system":"You are an OSINT specialist operating strictly within authorized scope. Report only verifiable public exposure tied to the target, citing the source. No private data harvesting."},
]


def _code(name, title, vc, cwe, sev, patterns, fix, impact):
    return {"name": name, "title": title, "for": f"{vc} in the source code", "sev": sev, "cwe": cwe,
            "impact": impact, "fix": fix,
            "steps": [("Locate sinks/sources", patterns),
                      ("Trace dataflow", ["Trace user-controlled input from source to the dangerous sink",
                                          "Confirm the path is reachable and lacks sanitization/validation"]),
                      ("Confirm exploitability", ["Quote the exact vulnerable lines (file:line)",
                                                  "Explain the concrete exploit and why existing controls don't stop it"])],
            "system": f"You are a white-box source reviewer for {vc}. Report ONLY issues you can prove in the PROVIDED code by quoting the exact vulnerable lines (file:line) and a reachable dataflow from untrusted input. Never report sanitized, unreachable, or hypothetical code. If the snippet is insufficient, say so rather than guess."}


CODE = [
_code("code_sqli","Source SQL Injection Reviewer","SQL injection","CWE-89","Critical",
 ["String concatenation/interpolation into SQL (f-strings, +, .format) passed to execute()","Raw queries bypassing the ORM; `.raw(`, `cursor.execute(... % ...)`"],
 "Use parameterized queries / ORM bindings","Database compromise, data exfiltration"),
_code("code_command_injection","Source Command Injection Reviewer","OS command injection","CWE-78","Critical",
 ["`os.system`, `subprocess(..., shell=True)`, `exec`, backticks with user input","Unsanitized input concatenated into shell strings"],
 "Avoid shells; pass argument arrays; validate input","Remote code execution on the host"),
_code("code_path_traversal","Source Path Traversal Reviewer","path traversal / arbitrary file access","CWE-22","High",
 ["User input in file paths (open/read/sendFile) without normalization","Missing checks for `../` and absolute paths"],
 "Canonicalize and confine paths to a safe base directory","Arbitrary file read/write"),
_code("code_ssrf","Source SSRF Reviewer","server-side request forgery","CWE-918","High",
 ["User-controlled URLs passed to HTTP clients (requests/fetch/curl)","No allowlist or scheme/host validation"],
 "Allowlist destinations; block internal ranges and redirects","Internal network access, cloud metadata theft"),
_code("code_xss","Source XSS Reviewer","cross-site scripting (output encoding)","CWE-79","High",
 ["Unescaped user input rendered to HTML (innerHTML, dangerouslySetInnerHTML, `|safe`, `v-html`)","Template autoescaping disabled"],
 "Context-aware output encoding; keep autoescaping on; CSP","Session theft, account takeover"),
_code("code_insecure_deserialization","Source Insecure Deserialization Reviewer","unsafe deserialization","CWE-502","Critical",
 ["`pickle.loads`, `yaml.load` (unsafe), Java/PHP native deserialization on untrusted data","Object deserialization of request data"],
 "Use safe formats/loaders; never deserialize untrusted data","Remote code execution"),
_code("code_hardcoded_secrets","Source Hardcoded Secrets Reviewer","hardcoded credentials/keys","CWE-798","High",
 ["API keys, passwords, tokens, private keys committed in source/config","High-entropy strings assigned to credential-like names"],
 "Move secrets to a vault/env; rotate exposed values","Credential/key compromise"),
_code("code_weak_crypto","Source Weak Cryptography Reviewer","weak or misused cryptography","CWE-327","Medium",
 ["MD5/SHA1 for passwords; ECB mode; static IV/salt; hardcoded keys","Custom/rolled crypto; weak random for security tokens"],
 "Use vetted algorithms (bcrypt/argon2, AES-GCM), random IVs, CSPRNG","Data exposure, token forgery"),
_code("code_auth_flaws","Source Authentication/Authorization Reviewer","broken authentication/authorization","CWE-287","High",
 ["Missing auth checks on sensitive routes; client-trusted role flags","Comparisons of secrets without constant-time; weak session handling"],
 "Enforce server-side authz on every action; harden sessions","Privilege escalation, account takeover"),
_code("code_idor_access","Source IDOR / Access Control Reviewer","insecure direct object references","CWE-639","High",
 ["Object lookups by user-supplied id without ownership checks","Direct DB fetch on `request.id` with no scoping"],
 "Enforce per-object ownership/authorization checks","Cross-account data access"),
_code("code_xxe","Source XXE Reviewer","XML external entity processing","CWE-611","High",
 ["XML parsers with external entities/DTDs enabled on untrusted input","`resolve_entities=True`, default-config parsers"],
 "Disable DTDs/external entities; use hardened parsers","File disclosure, SSRF"),
_code("code_open_redirect","Source Open Redirect Reviewer","open redirect","CWE-601","Medium",
 ["Redirects built from user input (redirect(request.param))","No allowlist of redirect destinations"],
 "Allowlist redirect targets; use relative paths","Phishing, OAuth token theft"),
_code("code_template_injection","Source Template Injection Reviewer","server-side template injection","CWE-1336","Critical",
 ["User input concatenated into template strings then rendered","`render_template_string`, dynamic template construction"],
 "Never render user input as templates; sandbox","Remote code execution"),
_code("code_race_condition","Source Race Condition Reviewer","TOCTOU / concurrency flaws","CWE-362","Medium",
 ["Check-then-act on shared state without locking","Non-atomic balance/quota/idempotency updates"],
 "Use atomic operations, locks, or transactions","Double-spend, state corruption"),
_code("code_unsafe_eval","Source Unsafe Eval Reviewer","dynamic code evaluation","CWE-95","Critical",
 ["`eval`, `exec`, `Function()`, `setTimeout(string)` on user input","Dynamic import/require of user-controlled names"],
 "Eliminate dynamic eval; use safe parsers/dispatch tables","Remote code execution"),
_code("code_csrf_missing","Source CSRF Protection Reviewer","missing CSRF protection","CWE-352","Medium",
 ["State-changing POST/PUT/DELETE without CSRF tokens","CSRF protection globally disabled"],
 "Enable anti-CSRF tokens / SameSite cookies","Unauthorized state-changing actions"),
_code("code_insecure_random","Source Insecure Randomness Reviewer","predictable randomness for security","CWE-330","Medium",
 ["`random`/`Math.random` used for tokens, IDs, passwords, OTPs","Seeded or time-based randomness for secrets"],
 "Use a CSPRNG (secrets, crypto.randomBytes)","Token/session prediction"),
_code("code_logging_sensitive","Source Sensitive Logging Reviewer","sensitive data in logs","CWE-532","Low",
 ["Logging passwords, tokens, PII, full requests","Debug logging of secrets in production paths"],
 "Redact sensitive fields; scope debug logging","Credential/PII exposure via logs"),
_code("code_sql_orm_raw","Source ORM Raw-Query Reviewer","unsafe raw ORM queries","CWE-89","High",
 ["`.raw()`, `.extra()`, query builders with string interpolation","Raw fragments mixing user input"],
 "Use parameter binding even in raw queries","SQL injection via ORM"),
_code("code_file_upload","Source File Upload Reviewer","insecure file upload handling","CWE-434","High",
 ["No type/extension/content validation; user-controlled filenames/paths","Uploads served from executable directories"],
 "Validate type/size; randomize names; store outside webroot","Webshell upload, RCE"),
_code("code_mass_assignment","Source Mass Assignment Reviewer","mass assignment / over-binding","CWE-915","High",
 ["Binding whole request body to models (`Model(**request)`, `update_attributes`)","No allowlist of bindable fields"],
 "Allowlist bindable fields; use DTOs","Privilege escalation via hidden fields"),
_code("code_jwt_misuse","Source JWT Misuse Reviewer","JWT verification flaws","CWE-347","High",
 ["`verify=False`, alg `none` accepted, secret not validated","Algorithm not pinned; weak/hardcoded secret"],
 "Pin algorithm; verify signature; strong secret/keys","Token forgery, auth bypass"),
_code("code_cors_misconfig","Source CORS Misconfiguration Reviewer","permissive CORS","CWE-942","Medium",
 ["`Access-Control-Allow-Origin: *` with credentials; reflecting Origin","Wildcard or unchecked origin allowlists"],
 "Strict origin allowlist; never reflect Origin with credentials","Cross-origin data theft"),
_code("code_ssrf_redirect","Source SSRF-via-Redirect Reviewer","SSRF through redirect following","CWE-918","Medium",
 ["HTTP clients following redirects to user-controlled URLs","No re-validation of redirect targets against allowlist"],
 "Disable/limit redirects; re-validate each hop","Internal access via redirect"),
]


def main():
    rdir = os.path.join(ROOT, "agents_md", "recon")
    cdir = os.path.join(ROOT, "agents_md", "code")
    os.makedirs(rdir, exist_ok=True)
    os.makedirs(cdir, exist_ok=True)
    for a in RECON:
        open(os.path.join(rdir, a["name"] + ".md"), "w").write(render(a, code=False))
    for a in CODE:
        open(os.path.join(cdir, a["name"] + ".md"), "w").write(render(a, code=True))
    print(f"recon agents: {len(RECON)}  |  code agents: {len(CODE)}")


if __name__ == "__main__":
    main()