NeuroSploit v3.2.1 - AI-Everywhere Auto Pentest + Container Fix + Deep Recon Overhaul

## AI-Everywhere Auto Pentest - Pre-stream AI master planning (_ai_master_plan) runs before parallel streams - Stream 1 AI recon analysis (Phase 9: hidden endpoint probing, priority routing) - Stream 2 AI payload generation (replaces hardcoded payloads with context-aware AI) - Stream 3 AI tool output analysis (real findings vs noise classification) - 4 new prompt builders in ai_prompts.py (master_plan, junior_ai_test, tool_analysis, recon_analysis) ## LLM-as-VulnEngine: AI Deep Testing - New _ai_deep_test() iterative loop: OBSERVE→PLAN→EXECUTE→ANALYZE→ADAPT (3 iterations max) - AI-first for top 15 injection types, hardcoded fallback for rest - Per-endpoint AI testing in Phase C instead of single _ai_dynamic_test() - New system prompt context: deep_testing + iterative_testing - Token budget adaptive: 15 normal, 5 when <50k tokens remain ## Container Fix (Critical) - Fixed ENTRYPOINT ["/bin/bash", "-c"] → CMD ["bash"] in Dockerfile.kali - Root cause: Docker ran /bin/bash -c "sleep" "infinity" → missing operand → container exit - All Kali sandbox tools (nuclei, naabu, etc.) now start and execute correctly ## Deep Recon Overhaul - JS analysis: 10→30 files, 11 regex patterns, source map parsing, parameter extraction - Sitemaps: recursive index following (depth 3), 8 candidates, 500 URL cap - API discovery: 7→20 Swagger/OpenAPI paths, 1→6 GraphQL paths, request body schema extraction - Framework detection: 9 frameworks (WordPress, Laravel, Django, Spring, Express, ASP.NET, Rails, Next.js, Flask) - 40+ common hidden/sensitive paths checked (.env, .git, /actuator, /debug, etc.) - API pattern fuzzing: infers endpoints from discovered patterns, batch existence checks - HTTP method discovery via OPTIONS probing - URL normalization and deduplication ## Frontend Fixes - Elapsed time now works for completed scans (computed from started_at→completed_at) - Container telemetry: exit -1 shows "ERR" (yellow), duration shows "N/A" on failure - HTML report rewrite: professional pentest report with cover page, risk gauge, ToC, per-finding cards, print CSS ## Other - Updated rebuild.sh summary and validation - Bug bounty training datasets added Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-07-24 11:40:56 +02:00 · 2026-02-23 17:55:28 -03:00
parent b056f6962a
commit 79acfe04a3
12 changed files with 4289 additions and 386 deletions
@@ -2,15 +2,19 @@
 Advanced reconnaissance module for NeuroSploitv2.

 Performs deep JS analysis, sitemap/robots parsing, API enumeration,
+source map parsing, framework-specific discovery, path fuzzing,
 and technology fingerprinting using async HTTP requests.
 """

 import re
 import json
 import asyncio
+import logging
 from dataclasses import dataclass, field
-from typing import Dict, List, Optional
-from urllib.parse import urljoin, urlparse
+from typing import Dict, List, Optional, Set, Tuple
+from urllib.parse import urljoin, urlparse, parse_qs, urlencode
+
+logger = logging.getLogger(__name__)

 try:
    import aiohttp
@@ -24,17 +28,24 @@ except ImportError:
    ET = None

 REQUEST_TIMEOUT = aiohttp.ClientTimeout(total=10) if HAS_AIOHTTP else None
-MAX_JS_FILES = 10
-MAX_JS_SIZE = 500 * 1024  # 500 KB
-MAX_SITEMAP_URLS = 200
+MAX_JS_FILES = 30
+MAX_JS_SIZE = 1024 * 1024  # 1 MB
+MAX_SITEMAP_URLS = 500
+MAX_SITEMAP_DEPTH = 3  # Recursive sitemap index depth
+MAX_ENDPOINTS = 2000  # Global cap to prevent memory bloat

 # --- Regex patterns for JS analysis ---

-RE_API_ENDPOINT = re.compile(r'/api/v[0-9]+/[a-z_/]+')
+RE_API_ENDPOINT = re.compile(r'["\'](/api/v?\d*/[a-zA-Z0-9_/\-{}]+)["\']')
+RE_RELATIVE_PATH = re.compile(r'["\'](/[a-zA-Z0-9_\-]+(?:/[a-zA-Z0-9_\-{}]+){1,6})["\']')
 RE_FETCH_URL = re.compile(r'fetch\(\s*["\']([^"\']+)["\']')
-RE_AXIOS_URL = re.compile(r'axios\.(?:get|post|put|patch|delete)\(\s*["\']([^"\']+)["\']')
+RE_AXIOS_URL = re.compile(r'axios\.(?:get|post|put|patch|delete|request)\(\s*["\']([^"\']+)["\']')
 RE_AJAX_URL = re.compile(r'\$\.ajax\(\s*\{[^}]*url\s*:\s*["\']([^"\']+)["\']', re.DOTALL)
 RE_XHR_URL = re.compile(r'\.open\(\s*["\'][A-Z]+["\']\s*,\s*["\']([^"\']+)["\']')
+RE_TEMPLATE_LITERAL = re.compile(r'`(/[a-zA-Z0-9_/\-]+\$\{[^}]+\}[a-zA-Z0-9_/\-]*)`')
+RE_WINDOW_LOCATION = re.compile(r'(?:window\.location|location\.href)\s*=\s*["\']([^"\']+)["\']')
+RE_FORM_ACTION = re.compile(r'action\s*[:=]\s*["\']([^"\']+)["\']')
+RE_HREF_PATTERN = re.compile(r'href\s*[:=]\s*["\']([^"\']+)["\']')

 RE_API_KEY = re.compile(
    r'(?:sk-[a-zA-Z0-9]{20,}|pk_(?:live|test)_[a-zA-Z0-9]{20,}'
@@ -51,6 +62,20 @@ RE_INTERNAL_URL = re.compile(
 RE_REACT_ROUTE = re.compile(r'path\s*[:=]\s*["\'](/[^"\']*)["\']')
 RE_ANGULAR_ROUTE = re.compile(r'path\s*:\s*["\']([^"\']+)["\']')
 RE_VUE_ROUTE = re.compile(r'path\s*:\s*["\'](/[^"\']*)["\']')
+RE_NEXTJS_PAGE = re.compile(r'"(/[a-zA-Z0-9_/\[\]\-]+)"')
+
+# Source map patterns
+RE_SOURCEMAP_URL = re.compile(r'//[#@]\s*sourceMappingURL\s*=\s*(\S+)')
+RE_SOURCEMAP_ROUTES = re.compile(r'(?:pages|routes|views)/([a-zA-Z0-9_/\[\]\-]+)\.(?:tsx?|jsx?|vue|svelte)')
+
+# GraphQL patterns
+RE_GQL_QUERY = re.compile(r'(?:query|mutation|subscription)\s+(\w+)')
+RE_GQL_FIELD = re.compile(r'gql\s*`[^`]*`', re.DOTALL)
+
+# Parameter patterns in JS
+RE_URL_PARAM = re.compile(r'[?&]([a-zA-Z0-9_]+)=')
+RE_BODY_PARAM = re.compile(r'(?:body|data|params)\s*[:=]\s*\{([^}]+)\}', re.DOTALL)
+RE_JSON_KEY = re.compile(r'["\']([a-zA-Z_][a-zA-Z0-9_]*)["\']')


@dataclass
@@ -60,6 +85,8 @@ class JSAnalysisResult:
    api_keys: List[str] = field(default_factory=list)
    internal_urls: List[str] = field(default_factory=list)
    secrets: List[str] = field(default_factory=list)
+    parameters: Dict[str, List[str]] = field(default_factory=dict)
+    source_map_routes: List[str] = field(default_factory=list)


@dataclass
@@ -70,12 +97,38 @@ class APISchema:
    source: str = ""


+@dataclass
+class EndpointInfo:
+    """Rich endpoint descriptor with method and parameter hints."""
+    url: str
+    method: str = "GET"
+    params: List[str] = field(default_factory=list)
+    source: str = ""  # How this endpoint was discovered
+    priority: int = 5  # 1-10, higher = more interesting
+
+
+def _normalize_url(url: str) -> str:
+    """Canonicalize a URL for deduplication."""
+    parsed = urlparse(url)
+    path = parsed.path.rstrip("/") or "/"
+    # Normalize double slashes
+    while "//" in path:
+        path = path.replace("//", "/")
+    # Sort query parameters
+    if parsed.query:
+        params = parse_qs(parsed.query, keep_blank_values=True)
+        sorted_query = urlencode(sorted(params.items()), doseq=True)
+        return f"{parsed.scheme}://{parsed.netloc}{path}?{sorted_query}"
+    return f"{parsed.scheme}://{parsed.netloc}{path}"
+
+
 class DeepRecon:
    """Advanced reconnaissance: JS analysis, sitemap, robots, API enum, fingerprinting."""

    def __init__(self, session: Optional["aiohttp.ClientSession"] = None):
        self._external_session = session is not None
        self._session = session
+        self._seen_urls: Set[str] = set()

    async def _get_session(self) -> "aiohttp.ClientSession":
        if self._session is None or self._session.closed:
@@ -101,100 +154,262 @@ class DeepRecon:
        except Exception:
            return None

+    async def _head_check(self, url: str) -> Optional[int]:
+        """Quick HEAD request to check if a URL exists. Returns status or None."""
+        try:
+            session = await self._get_session()
+            async with session.head(url, ssl=False, allow_redirects=True, timeout=aiohttp.ClientTimeout(total=5)) as resp:
+                return resp.status
+        except Exception:
+            return None
+
+    async def _check_url_alive(self, url: str, accept_codes: Set[int] = None) -> bool:
+        """Check if URL returns an acceptable status code."""
+        if accept_codes is None:
+            accept_codes = {200, 201, 301, 302, 307, 308, 401, 403}
+        status = await self._head_check(url)
+        return status is not None and status in accept_codes
+
    # ------------------------------------------------------------------
-    # JS file analysis
+    # JS file analysis (enhanced)
    # ------------------------------------------------------------------

    async def crawl_js_files(self, base_url: str, js_urls: List[str]) -> JSAnalysisResult:
        """Fetch and analyse JavaScript files for endpoints, keys, and secrets."""
        result = JSAnalysisResult()
-        urls_to_scan = js_urls[:MAX_JS_FILES]
+        urls_to_scan = list(dict.fromkeys(js_urls))[:MAX_JS_FILES]

        tasks = [self._fetch(urljoin(base_url, u), max_size=MAX_JS_SIZE) for u in urls_to_scan]
        bodies = await asyncio.gather(*tasks, return_exceptions=True)

+        # Also try to fetch source maps in parallel
+        sourcemap_tasks = []
+        sourcemap_base_urls = []
+        for url, body in zip(urls_to_scan, bodies):
+            if not isinstance(body, str):
+                continue
+            sm = RE_SOURCEMAP_URL.search(body)
+            if sm:
+                sm_url = sm.group(1)
+                if not sm_url.startswith("data:"):
+                    full_url = urljoin(urljoin(base_url, url), sm_url)
+                    sourcemap_tasks.append(self._fetch(full_url, max_size=MAX_JS_SIZE * 2))
+                    sourcemap_base_urls.append(full_url)
+
+        sourcemap_bodies = []
+        if sourcemap_tasks:
+            sourcemap_bodies = await asyncio.gather(*sourcemap_tasks, return_exceptions=True)
+
        seen_endpoints: set = set()
+        seen_params: Dict[str, Set[str]] = {}
+
        for body in bodies:
            if not isinstance(body, str):
                continue
+            self._extract_from_js(body, seen_endpoints, seen_params, result)

-            # API endpoint patterns
-            for m in RE_API_ENDPOINT.finditer(body):
-                seen_endpoints.add(m.group(0))
-            for regex in (RE_FETCH_URL, RE_AXIOS_URL, RE_AJAX_URL, RE_XHR_URL):
-                for m in regex.finditer(body):
-                    seen_endpoints.add(m.group(1))
-
-            # Route definitions (React Router, Angular, Vue Router)
-            for regex in (RE_REACT_ROUTE, RE_ANGULAR_ROUTE, RE_VUE_ROUTE):
-                for m in regex.finditer(body):
-                    seen_endpoints.add(m.group(1))
-
-            # API keys / tokens
-            for m in RE_API_KEY.finditer(body):
-                val = m.group(0)
-                if val not in result.api_keys:
-                    result.api_keys.append(val)
-                    result.secrets.append(val)
-
-            # Internal / private URLs
-            for m in RE_INTERNAL_URL.finditer(body):
-                val = m.group(0)
-                if val not in result.internal_urls:
-                    result.internal_urls.append(val)
+        # Parse source maps for original file paths → route discovery
+        for sm_body in sourcemap_bodies:
+            if not isinstance(sm_body, str):
+                continue
+            try:
+                sm_data = json.loads(sm_body)
+                sources = sm_data.get("sources", [])
+                for src in sources:
+                    m = RE_SOURCEMAP_ROUTES.search(src)
+                    if m:
+                        route = "/" + m.group(1).replace("[", "{").replace("]", "}")
+                        result.source_map_routes.append(route)
+                        seen_endpoints.add(route)
+            except (json.JSONDecodeError, ValueError):
+                # Not valid JSON source map — might still contain paths
+                for m in RE_SOURCEMAP_ROUTES.finditer(sm_body):
+                    route = "/" + m.group(1).replace("[", "{").replace("]", "}")
+                    result.source_map_routes.append(route)
+                    seen_endpoints.add(route)

        # Resolve endpoints relative to base_url
        for ep in sorted(seen_endpoints):
-            resolved = urljoin(base_url, ep) if not ep.startswith("http") else ep
-            if resolved not in result.endpoints:
+            if ep.startswith("http"):
+                resolved = ep
+            elif ep.startswith("/"):
+                resolved = urljoin(base_url, ep)
+            else:
+                continue
+            normalized = _normalize_url(resolved)
+            if normalized not in self._seen_urls:
+                self._seen_urls.add(normalized)
                result.endpoints.append(resolved)

+        # Convert param sets
+        for endpoint, params in seen_params.items():
+            result.parameters[endpoint] = sorted(params)
+
        return result

+    def _extract_from_js(
+        self, body: str, seen_endpoints: set, seen_params: Dict[str, Set[str]],
+        result: JSAnalysisResult,
+    ):
+        """Extract endpoints, params, keys, and internal URLs from a JS body."""
+        # API endpoint patterns (expanded)
+        for regex in (RE_API_ENDPOINT, RE_RELATIVE_PATH, RE_FETCH_URL, RE_AXIOS_URL,
+                      RE_AJAX_URL, RE_XHR_URL, RE_TEMPLATE_LITERAL, RE_WINDOW_LOCATION,
+                      RE_FORM_ACTION, RE_HREF_PATTERN):
+            for m in regex.finditer(body):
+                ep = m.group(1) if regex.groups else m.group(0)
+                # Filter out obvious non-endpoints
+                if self._is_valid_endpoint(ep):
+                    seen_endpoints.add(ep)
+
+        # Route definitions (React Router, Angular, Vue Router, Next.js)
+        for regex in (RE_REACT_ROUTE, RE_ANGULAR_ROUTE, RE_VUE_ROUTE, RE_NEXTJS_PAGE):
+            for m in regex.finditer(body):
+                route = m.group(1)
+                if route.startswith("/") and len(route) < 200:
+                    seen_endpoints.add(route)
+
+        # Extract URL parameters
+        for m in RE_URL_PARAM.finditer(body):
+            param_name = m.group(1)
+            # Find the URL this param belongs to (rough heuristic)
+            start = max(0, m.start() - 200)
+            context = body[start:m.start()]
+            for ep_regex in (RE_FETCH_URL, RE_API_ENDPOINT):
+                ep_match = ep_regex.search(context)
+                if ep_match:
+                    ep = ep_match.group(1) if ep_regex.groups else ep_match.group(0)
+                    if ep not in seen_params:
+                        seen_params[ep] = set()
+                    seen_params[ep].add(param_name)
+
+        # Extract JSON body parameters
+        for m in RE_BODY_PARAM.finditer(body):
+            block = m.group(1)
+            for key_m in RE_JSON_KEY.finditer(block):
+                key = key_m.group(1)
+                if len(key) <= 50 and not key.startswith("__"):
+                    if "_body_params" not in seen_params:
+                        seen_params["_body_params"] = set()
+                    seen_params["_body_params"].add(key)
+
+        # API keys / tokens
+        for m in RE_API_KEY.finditer(body):
+            val = m.group(0)
+            if val not in result.api_keys:
+                result.api_keys.append(val)
+                result.secrets.append(val)
+
+        # Internal / private URLs
+        for m in RE_INTERNAL_URL.finditer(body):
+            val = m.group(0)
+            if val not in result.internal_urls:
+                result.internal_urls.append(val)
+
+    @staticmethod
+    def _is_valid_endpoint(ep: str) -> bool:
+        """Filter out non-endpoint matches (CSS, images, data URIs, etc.)."""
+        if not ep or len(ep) > 500:
+            return False
+        if ep.startswith(("data:", "javascript:", "mailto:", "tel:", "#", "blob:")):
+            return False
+        # Skip common static assets
+        SKIP_EXT = ('.css', '.png', '.jpg', '.jpeg', '.gif', '.svg', '.ico', '.woff',
+                     '.woff2', '.ttf', '.eot', '.mp4', '.mp3', '.webp', '.avif',
+                     '.map', '.ts', '.tsx', '.jsx', '.scss', '.less', '.pdf')
+        lower = ep.lower()
+        if any(lower.endswith(ext) for ext in SKIP_EXT):
+            return False
+        # Must look like a path
+        if ep.startswith("/") or ep.startswith("http"):
+            return True
+        return False
+
    # ------------------------------------------------------------------
-    # Sitemap parsing
+    # Sitemap parsing (enhanced with recursive index following)
    # ------------------------------------------------------------------

    async def parse_sitemap(self, target: str) -> List[str]:
-        """Fetch and parse sitemap XML files for URLs."""
+        """Fetch and parse sitemap XML files for URLs. Follows sitemap indexes recursively."""
        target = target.rstrip("/")
        candidates = [
            f"{target}/sitemap.xml",
            f"{target}/sitemap_index.xml",
            f"{target}/sitemap1.xml",
+            f"{target}/sitemap-index.xml",
+            f"{target}/sitemaps.xml",
+            f"{target}/post-sitemap.xml",
+            f"{target}/page-sitemap.xml",
+            f"{target}/category-sitemap.xml",
        ]
-        urls: set = set()

-        for sitemap_url in candidates:
+        # Also check robots.txt for sitemap directives
+        robots_body = await self._fetch(f"{target}/robots.txt")
+        if robots_body:
+            for line in robots_body.splitlines():
+                line = line.strip()
+                if line.lower().startswith("sitemap:"):
+                    sm_url = line.split(":", 1)[1].strip()
+                    if sm_url and sm_url not in candidates:
+                        candidates.append(sm_url)
+
+        urls: set = set()
+        visited_sitemaps: set = set()
+
+        async def _parse_one(sitemap_url: str, depth: int = 0):
+            if depth > MAX_SITEMAP_DEPTH or sitemap_url in visited_sitemaps:
+                return
+            if len(urls) >= MAX_SITEMAP_URLS:
+                return
+            visited_sitemaps.add(sitemap_url)
+
            body = await self._fetch(sitemap_url)
            if not body or ET is None:
-                continue
+                return
            try:
                root = ET.fromstring(body)
            except ET.ParseError:
-                continue
-            # Handle both sitemapindex and urlset; strip namespace
+                return
+
+            sub_sitemaps = []
            for elem in root.iter():
                tag = elem.tag.split("}")[-1] if "}" in elem.tag else elem.tag
                if tag == "loc" and elem.text:
-                    urls.add(elem.text.strip())
+                    loc = elem.text.strip()
+                    # Check if this is a sub-sitemap
+                    if loc.endswith(".xml") or "sitemap" in loc.lower():
+                        sub_sitemaps.append(loc)
+                    else:
+                        urls.add(loc)
                    if len(urls) >= MAX_SITEMAP_URLS:
-                        return sorted(urls)[:MAX_SITEMAP_URLS]
+                        return
+
+            # Recursively follow sub-sitemaps
+            for sub in sub_sitemaps[:10]:  # Limit sub-sitemap recursion
+                await _parse_one(sub, depth + 1)
+
+        # Parse all candidate sitemaps
+        for sitemap_url in candidates:
+            if len(urls) >= MAX_SITEMAP_URLS:
+                break
+            await _parse_one(sitemap_url)

        return sorted(urls)[:MAX_SITEMAP_URLS]

    # ------------------------------------------------------------------
-    # Robots.txt parsing
+    # Robots.txt parsing (enhanced with Sitemap extraction)
    # ------------------------------------------------------------------

-    async def parse_robots(self, target: str) -> List[str]:
-        """Parse robots.txt and return resolved paths (Disallow + Allow)."""
+    async def parse_robots(self, target: str) -> Tuple[List[str], List[str]]:
+        """Parse robots.txt. Returns (paths, sitemap_urls)."""
        target = target.rstrip("/")
        body = await self._fetch(f"{target}/robots.txt")
        if not body:
-            return []
+            return [], []

        paths: set = set()
+        sitemaps: list = []
+
        for line in body.splitlines():
            line = line.strip()
            if line.startswith("#") or ":" not in line:
@@ -202,14 +417,16 @@ class DeepRecon:
            directive, _, value = line.partition(":")
            directive = directive.strip().lower()
            value = value.strip()
-            if directive in ("disallow", "allow") and value:
+            if directive in ("disallow", "allow") and value and value != "/":
                resolved = urljoin(target + "/", value)
                paths.add(resolved)
+            elif directive == "sitemap" and value:
+                sitemaps.append(value)

-        return sorted(paths)
+        return sorted(paths), sitemaps

    # ------------------------------------------------------------------
-    # API enumeration (Swagger / OpenAPI / GraphQL)
+    # API enumeration (Swagger / OpenAPI / GraphQL / WADL / AsyncAPI)
    # ------------------------------------------------------------------

    _API_DOC_PATHS = [
@@ -217,61 +434,97 @@ class DeepRecon:
        "/openapi.json",
        "/api-docs",
        "/v2/api-docs",
+        "/v3/api-docs",
        "/swagger/v1/swagger.json",
+        "/swagger/v2/swagger.json",
        "/.well-known/openapi",
        "/api/swagger.json",
+        "/api/openapi.json",
+        "/api/v1/swagger.json",
+        "/api/v1/openapi.json",
+        "/api/docs",
+        "/docs/api",
+        "/doc.json",
+        "/public/swagger.json",
+        "/swagger-ui/swagger.json",
+        "/api-docs.json",
+        "/api/api-docs",
+        "/_api/docs",
+    ]
+
+    _GRAPHQL_PATHS = [
+        "/graphql",
+        "/graphiql",
+        "/api/graphql",
+        "/v1/graphql",
+        "/gql",
+        "/query",
    ]

    async def enumerate_api(self, target: str, technologies: List[str]) -> APISchema:
-        """Discover and parse API documentation (OpenAPI/Swagger, GraphQL)."""
+        """Discover and parse API documentation (OpenAPI/Swagger, GraphQL, WADL)."""
        target = target.rstrip("/")
        schema = APISchema()

-        # Try OpenAPI / Swagger endpoints
-        for path in self._API_DOC_PATHS:
-            body = await self._fetch(f"{target}{path}")
-            if not body:
+        # Try OpenAPI / Swagger endpoints (parallel batch)
+        api_tasks = [self._fetch(f"{target}{path}") for path in self._API_DOC_PATHS]
+        api_results = await asyncio.gather(*api_tasks, return_exceptions=True)
+
+        for path, body in zip(self._API_DOC_PATHS, api_results):
+            if not isinstance(body, str):
                continue
            try:
                doc = json.loads(body)
            except (json.JSONDecodeError, ValueError):
                continue

-            # Looks like a valid Swagger/OpenAPI doc
            if "paths" in doc or "openapi" in doc or "swagger" in doc:
                schema.version = doc.get("openapi", doc.get("info", {}).get("version", ""))
                schema.source = path
                for route, methods in doc.get("paths", {}).items():
+                    if not isinstance(methods, dict):
+                        continue
                    for method, detail in methods.items():
                        if method.lower() in ("get", "post", "put", "patch", "delete", "options", "head"):
-                            params = [
-                                p.get("name", "")
-                                for p in detail.get("parameters", [])
-                                if isinstance(p, dict)
-                            ]
+                            params = []
+                            if isinstance(detail, dict):
+                                for p in detail.get("parameters", []):
+                                    if isinstance(p, dict):
+                                        params.append(p.get("name", ""))
+                                # Also extract request body schema params
+                                req_body = detail.get("requestBody", {})
+                                if isinstance(req_body, dict):
+                                    content = req_body.get("content", {})
+                                    for ct, ct_detail in content.items():
+                                        if isinstance(ct_detail, dict):
+                                            props = ct_detail.get("schema", {}).get("properties", {})
+                                            if isinstance(props, dict):
+                                                params.extend(props.keys())
                            schema.endpoints.append({
                                "url": route,
                                "method": method.upper(),
-                                "params": params,
+                                "params": [p for p in params if p],
                            })
+                logger.info(f"[DeepRecon] Found API schema at {path}: {len(schema.endpoints)} endpoints")
                return schema

-        # GraphQL introspection
-        if "graphql" in [t.lower() for t in technologies] or not schema.endpoints:
-            introspection = await self._graphql_introspect(target)
+        # GraphQL introspection (try multiple paths)
+        for gql_path in self._GRAPHQL_PATHS:
+            introspection = await self._graphql_introspect(f"{target}{gql_path}")
            if introspection:
                return introspection

        return schema

-    async def _graphql_introspect(self, target: str) -> Optional[APISchema]:
-        """Attempt GraphQL introspection query."""
-        query = '{"query":"{ __schema { queryType { name } types { name fields { name args { name } } } } }"}'
+    async def _graphql_introspect(self, gql_url: str) -> Optional[APISchema]:
+        """Attempt GraphQL introspection query at a specific URL."""
+        query = '{"query":"{ __schema { queryType { name } mutationType { name } types { name kind fields { name args { name type { name } } } } } }"}'
        try:
            session = await self._get_session()
            headers = {"Content-Type": "application/json"}
            async with session.post(
-                f"{target}/graphql", data=query, headers=headers, ssl=False
+                gql_url, data=query, headers=headers, ssl=False,
+                timeout=aiohttp.ClientTimeout(total=8),
            ) as resp:
                if resp.status != 200:
                    return None
@@ -282,10 +535,13 @@ class DeepRecon:
        if "data" not in data or "__schema" not in data.get("data", {}):
            return None

-        schema = APISchema(version="graphql", source="/graphql")
+        parsed_url = urlparse(gql_url)
+        source_path = parsed_url.path
+
+        schema = APISchema(version="graphql", source=source_path)
        for type_info in data["data"]["__schema"].get("types", []):
            type_name = type_info.get("name", "")
-            if type_name.startswith("__"):
+            if type_name.startswith("__") or type_info.get("kind") in ("SCALAR", "ENUM", "INPUT_OBJECT"):
                continue
            for fld in type_info.get("fields", []) or []:
                params = [a["name"] for a in fld.get("args", []) if isinstance(a, dict)]
@@ -296,13 +552,264 @@ class DeepRecon:
                })
        return schema if schema.endpoints else None

+    # ------------------------------------------------------------------
+    # Framework-specific endpoint discovery
+    # ------------------------------------------------------------------
+
+    _FRAMEWORK_PATHS: Dict[str, List[str]] = {
+        "wordpress": [
+            "/wp-admin/", "/wp-login.php", "/wp-json/wp/v2/posts",
+            "/wp-json/wp/v2/users", "/wp-json/wp/v2/pages",
+            "/wp-json/wp/v2/categories", "/wp-json/wp/v2/comments",
+            "/wp-json/wp/v2/media", "/wp-json/wp/v2/tags",
+            "/wp-json/", "/wp-content/uploads/",
+            "/wp-cron.php", "/xmlrpc.php", "/?rest_route=/wp/v2/users",
+            "/wp-admin/admin-ajax.php", "/wp-admin/load-scripts.php",
+            "/wp-includes/wlwmanifest.xml",
+        ],
+        "laravel": [
+            "/api/user", "/api/login", "/api/register",
+            "/sanctum/csrf-cookie", "/telescope",
+            "/horizon", "/nova-api/", "/_debugbar/open",
+            "/storage/logs/laravel.log", "/env",
+        ],
+        "django": [
+            "/admin/", "/admin/login/", "/api/",
+            "/__debug__/", "/static/admin/",
+            "/accounts/login/", "/accounts/signup/",
+            "/api/v1/", "/api/v2/",
+        ],
+        "spring": [
+            "/actuator", "/actuator/health", "/actuator/env",
+            "/actuator/beans", "/actuator/mappings", "/actuator/info",
+            "/actuator/configprops", "/actuator/metrics",
+            "/swagger-ui.html", "/swagger-ui/index.html",
+            "/api-docs", "/v3/api-docs",
+        ],
+        "express": [
+            "/api/", "/api/v1/", "/api/health",
+            "/api/status", "/auth/login", "/auth/register",
+            "/graphql",
+        ],
+        "aspnet": [
+            "/_blazor", "/swagger", "/swagger/index.html",
+            "/api/values", "/api/health",
+            "/Identity/Account/Login", "/Identity/Account/Register",
+        ],
+        "rails": [
+            "/rails/info", "/rails/mailers",
+            "/api/v1/", "/admin/",
+            "/users/sign_in", "/users/sign_up",
+            "/assets/application.js",
+        ],
+        "nextjs": [
+            "/_next/data/", "/api/", "/api/auth/session",
+            "/api/auth/signin", "/api/auth/providers",
+            "/_next/static/chunks/",
+        ],
+        "flask": [
+            "/api/", "/api/v1/", "/admin/",
+            "/static/", "/auth/login", "/auth/register",
+            "/swagger.json",
+        ],
+    }
+
+    # Common hidden paths to check regardless of framework
+    _COMMON_HIDDEN_PATHS = [
+        "/.env", "/.git/config", "/.git/HEAD",
+        "/backup/", "/backups/", "/backup.sql", "/backup.zip",
+        "/config.json", "/config.yaml", "/config.yml",
+        "/debug/", "/debug/vars", "/debug/pprof",
+        "/internal/", "/internal/health", "/internal/status",
+        "/metrics", "/prometheus", "/health", "/healthz", "/ready",
+        "/status", "/ping", "/version", "/info",
+        "/.well-known/security.txt", "/security.txt",
+        "/crossdomain.xml", "/clientaccesspolicy.xml",
+        "/server-status", "/server-info",
+        "/phpinfo.php", "/info.php",
+        "/web.config", "/WEB-INF/web.xml",
+        "/console/", "/manage/", "/management/",
+        "/api/debug", "/api/config",
+        "/trace", "/jolokia/",
+        "/cgi-bin/", "/fcgi-bin/",
+        "/.htaccess", "/.htpasswd",
+    ]
+
+    async def discover_framework_endpoints(
+        self, target: str, technologies: List[str]
+    ) -> List[EndpointInfo]:
+        """Probe framework-specific endpoints based on detected technologies."""
+        target = target.rstrip("/")
+        tech_lower = [t.lower() for t in technologies]
+        endpoints: List[EndpointInfo] = []
+        urls_to_check: List[Tuple[str, str, int]] = []  # (url, source, priority)
+
+        # Match frameworks by technology signatures
+        fw_matches = set()
+        for fw_name, keywords in {
+            "wordpress": ["wordpress", "wp-", "woocommerce"],
+            "laravel": ["laravel", "php", "lumen"],
+            "django": ["django", "python", "wagtail"],
+            "spring": ["spring", "java", "tomcat", "wildfly", "jetty"],
+            "express": ["express", "node", "koa", "fastify"],
+            "aspnet": ["asp.net", ".net", "blazor", "iis"],
+            "rails": ["ruby", "rails", "rack"],
+            "nextjs": ["next.js", "nextjs", "react", "vercel"],
+            "flask": ["flask", "python", "gunicorn", "werkzeug"],
+        }.items():
+            for kw in keywords:
+                for tech in tech_lower:
+                    if kw in tech:
+                        fw_matches.add(fw_name)
+                        break
+
+        # Add framework-specific paths
+        for fw in fw_matches:
+            for path in self._FRAMEWORK_PATHS.get(fw, []):
+                urls_to_check.append((f"{target}{path}", f"framework:{fw}", 7))
+
+        # Always check common hidden paths
+        for path in self._COMMON_HIDDEN_PATHS:
+            urls_to_check.append((f"{target}{path}", "common_hidden", 6))
+
+        # Batch check existence (parallel HEAD requests)
+        check_tasks = [self._check_url_alive(url) for url, _, _ in urls_to_check]
+        results = await asyncio.gather(*check_tasks, return_exceptions=True)
+
+        for (url, source, priority), alive in zip(urls_to_check, results):
+            if alive is True:
+                endpoints.append(EndpointInfo(
+                    url=url, method="GET", source=source, priority=priority,
+                ))
+
+        logger.info(f"[DeepRecon] Framework discovery: {len(endpoints)}/{len(urls_to_check)} alive")
+        return endpoints
+
+    # ------------------------------------------------------------------
+    # Path pattern fuzzing
+    # ------------------------------------------------------------------
+
+    async def fuzz_api_patterns(
+        self, target: str, known_endpoints: List[str]
+    ) -> List[EndpointInfo]:
+        """Infer and test related endpoints from discovered patterns."""
+        target = target.rstrip("/")
+        target_parsed = urlparse(target)
+        target_origin = f"{target_parsed.scheme}://{target_parsed.netloc}"
+
+        inferred: Set[str] = set()
+
+        # Extract API path patterns
+        api_bases: Set[str] = set()
+        api_resources: Set[str] = set()
+
+        for ep in known_endpoints:
+            parsed = urlparse(ep)
+            path = parsed.path
+            # Identify API base paths like /api/v1, /api/v2
+            m = re.match(r'(/api(?:/v\d+)?)', path)
+            if m:
+                api_bases.add(m.group(1))
+                # Extract resource name
+                rest = path[len(m.group(1)):]
+                parts = [p for p in rest.split("/") if p and not p.isdigit() and not re.match(r'^[0-9a-f-]{8,}$', p)]
+                if parts:
+                    api_resources.add(parts[0])
+
+        # Common REST resource names to try
+        COMMON_RESOURCES = [
+            "users", "user", "auth", "login", "register", "logout",
+            "profile", "settings", "admin", "posts", "articles",
+            "comments", "categories", "tags", "search", "upload",
+            "files", "images", "media", "notifications", "messages",
+            "products", "orders", "payments", "invoices", "customers",
+            "dashboard", "reports", "analytics", "logs", "events",
+            "webhooks", "tokens", "sessions", "roles", "permissions",
+            "config", "health", "status", "version", "docs",
+        ]
+
+        # Common REST sub-patterns
+        CRUD_SUFFIXES = [
+            "", "/1", "/me", "/all", "/list", "/search",
+            "/count", "/export", "/import", "/bulk",
+        ]
+
+        for base in api_bases:
+            # Try common resources under each API base
+            for resource in COMMON_RESOURCES:
+                if resource not in api_resources:
+                    inferred.add(f"{target_origin}{base}/{resource}")
+            # Try CRUD variants for known resources
+            for resource in api_resources:
+                for suffix in CRUD_SUFFIXES:
+                    inferred.add(f"{target_origin}{base}/{resource}{suffix}")
+
+        # Remove already-known endpoints
+        known_normalized = {_normalize_url(ep) for ep in known_endpoints}
+        inferred = {url for url in inferred if _normalize_url(url) not in known_normalized}
+
+        # Batch check (parallel, capped)
+        to_check = sorted(inferred)[:100]
+        check_tasks = [self._check_url_alive(url) for url in to_check]
+        results = await asyncio.gather(*check_tasks, return_exceptions=True)
+
+        discovered = []
+        for url, alive in zip(to_check, results):
+            if alive is True:
+                discovered.append(EndpointInfo(
+                    url=url, method="GET", source="api_fuzzing", priority=6,
+                ))
+
+        logger.info(f"[DeepRecon] API fuzzing: {len(discovered)}/{len(to_check)} alive")
+        return discovered
+
+    # ------------------------------------------------------------------
+    # Multi-method discovery
+    # ------------------------------------------------------------------
+
+    async def discover_methods(
+        self, target: str, endpoints: List[str], sample_size: int = 20
+    ) -> Dict[str, List[str]]:
+        """Test which HTTP methods each endpoint accepts (OPTIONS + probing)."""
+        results: Dict[str, List[str]] = {}
+        sampled = endpoints[:sample_size]
+
+        async def _check_options(url: str) -> Tuple[str, List[str]]:
+            try:
+                session = await self._get_session()
+                async with session.options(
+                    url, ssl=False, timeout=aiohttp.ClientTimeout(total=5)
+                ) as resp:
+                    allow = resp.headers.get("Allow", "")
+                    if allow:
+                        return url, [m.strip().upper() for m in allow.split(",")]
+                    # Also check Access-Control-Allow-Methods
+                    cors = resp.headers.get("Access-Control-Allow-Methods", "")
+                    if cors:
+                        return url, [m.strip().upper() for m in cors.split(",")]
+            except Exception:
+                pass
+            return url, []
+
+        tasks = [_check_options(url) for url in sampled]
+        responses = await asyncio.gather(*tasks, return_exceptions=True)
+
+        for resp in responses:
+            if isinstance(resp, tuple):
+                url, methods = resp
+                if methods:
+                    results[url] = methods
+
+        return results
+
    # ------------------------------------------------------------------
    # Deep technology fingerprinting
    # ------------------------------------------------------------------

    _FINGERPRINT_FILES = [
        "/readme.txt", "/README.md", "/CHANGELOG.md", "/CHANGES.txt",
-        "/package.json", "/composer.json",
+        "/package.json", "/composer.json", "/Gemfile.lock",
+        "/requirements.txt", "/go.mod", "/pom.xml", "/build.gradle",
    ]

    _WP_PROBES = [
@@ -349,6 +856,18 @@ class DeepRecon:
                        _add(name, ver, path)
                except (json.JSONDecodeError, ValueError):
                    pass
+            elif path == "/go.mod":
+                m = re.search(r'^module\s+(\S+)', content, re.MULTILINE)
+                if m:
+                    _add(m.group(1), "go-module", path)
+                for dep_m in re.finditer(r'^\s+(\S+)\s+(v[\d.]+)', content, re.MULTILINE):
+                    _add(dep_m.group(1), dep_m.group(2), path)
+            elif path == "/requirements.txt":
+                for dep_m in re.finditer(r'^([a-zA-Z0-9_\-]+)==([\d.]+)', content, re.MULTILINE):
+                    _add(dep_m.group(1), dep_m.group(2), path)
+            elif path == "/Gemfile.lock":
+                for dep_m in re.finditer(r'^\s{4}([a-z_\-]+)\s+\(([\d.]+)\)', content, re.MULTILINE):
+                    _add(dep_m.group(1), dep_m.group(2), path)
            else:
                m = self.RE_VERSION.search(content)
                if m:
@@ -375,3 +894,84 @@ class DeepRecon:
                _add("Drupal", m.group(1), dp_path)

        return results
+
+    # ------------------------------------------------------------------
+    # Comprehensive recon pipeline
+    # ------------------------------------------------------------------
+
+    async def full_recon(
+        self, target: str, technologies: List[str],
+        js_urls: List[str], known_endpoints: List[str],
+    ) -> Dict:
+        """Run ALL recon phases and return aggregated results."""
+        results: Dict = {
+            "sitemap_urls": [],
+            "robots_paths": [],
+            "js_analysis": None,
+            "api_schema": None,
+            "framework_endpoints": [],
+            "fuzzed_endpoints": [],
+            "method_map": {},
+            "fingerprints": [],
+            "all_endpoints": [],
+        }
+
+        # Run independent phases in parallel
+        sitemap_task = self.parse_sitemap(target)
+        robots_task = self.parse_robots(target)
+        js_task = self.crawl_js_files(target, js_urls) if js_urls else asyncio.sleep(0)
+        api_task = self.enumerate_api(target, technologies)
+        fw_task = self.discover_framework_endpoints(target, technologies)
+
+        sitemap_result, robots_result, js_result, api_result, fw_result = \
+            await asyncio.gather(sitemap_task, robots_task, js_task, api_task, fw_task,
+                                 return_exceptions=True)
+
+        if isinstance(sitemap_result, list):
+            results["sitemap_urls"] = sitemap_result
+        if isinstance(robots_result, tuple):
+            results["robots_paths"] = robots_result[0]
+        if isinstance(js_result, JSAnalysisResult):
+            results["js_analysis"] = js_result
+        if isinstance(api_result, APISchema):
+            results["api_schema"] = api_result
+        if isinstance(fw_result, list):
+            results["framework_endpoints"] = fw_result
+
+        # Aggregate all discovered endpoints
+        all_eps = set(known_endpoints)
+        all_eps.update(results["sitemap_urls"])
+        all_eps.update(results["robots_paths"])
+        if results["js_analysis"]:
+            all_eps.update(results["js_analysis"].endpoints)
+        if results["api_schema"]:
+            for ep in results["api_schema"].endpoints:
+                url = ep.get("url", "")
+                if url.startswith("/"):
+                    all_eps.add(urljoin(target, url))
+                elif url.startswith("http"):
+                    all_eps.add(url)
+        for fw_ep in results["framework_endpoints"]:
+            all_eps.add(fw_ep.url)
+
+        # Now run API fuzzing with ALL known endpoints
+        try:
+            fuzzed = await self.fuzz_api_patterns(target, sorted(all_eps))
+            if isinstance(fuzzed, list):
+                results["fuzzed_endpoints"] = fuzzed
+                for ep in fuzzed:
+                    all_eps.add(ep.url)
+        except Exception as e:
+            logger.warning(f"[DeepRecon] API fuzzing error: {e}")
+
+        # Discover methods for a sample
+        try:
+            methods = await self.discover_methods(target, sorted(all_eps))
+            results["method_map"] = methods
+        except Exception as e:
+            logger.warning(f"[DeepRecon] Method discovery error: {e}")
+
+        results["all_endpoints"] = sorted(all_eps)[:MAX_ENDPOINTS]
+        logger.info(f"[DeepRecon] Total endpoints discovered: {len(results['all_endpoints'])}")
+
+        return results
@@ -1624,3 +1624,476 @@ VULN_AI_PROMPTS: Dict[str, dict] = {
        "technology_hints": {"general": "OWASP API Security #3. Check: REST APIs without field selection, GraphQL without proper field-level authorization, response serializers including all model fields."}
    },
 }
+
+
+# ---------------------------------------------------------------------------
+# Deep Test Prompts — AI-driven iterative testing loop
+# ---------------------------------------------------------------------------
+
+def get_deep_test_plan_prompt(
+    vuln_type: str,
+    context: str,
+    playbook_ctx: str = "",
+    iteration: int = 1,
+    previous_results: str = "",
+) -> str:
+    """Build the PLANNING prompt for _ai_deep_test() Step 2.
+
+    The LLM receives full context about the endpoint and must generate
+    specific, targeted test cases — not generic payloads.
+
+    Args:
+        vuln_type: The vulnerability type being tested (e.g., "sqli_error")
+        context: Rich context string (endpoint, baseline, tech, WAF, params)
+        playbook_ctx: Playbook methodology context for this vuln type
+        iteration: Current iteration number (1-3)
+        previous_results: JSON string of previous test results (for iterations 2+)
+    """
+    # Get per-type proof requirements
+    proof_req = ""
+    try:
+        from backend.core.vuln_engine.system_prompts import VULN_TYPE_PROOF_REQUIREMENTS
+        proof_req = VULN_TYPE_PROOF_REQUIREMENTS.get(vuln_type, "")
+    except ImportError:
+        pass
+
+    # Get per-type AI prompt for detection strategy
+    type_prompt = VULN_AI_PROMPTS.get(vuln_type, {})
+    detection = type_prompt.get("detection_strategy", "")
+    methodology = type_prompt.get("test_methodology", "")
+    payload_hints = type_prompt.get("payload_selection", "")
+
+    iteration_context = ""
+    if iteration > 1 and previous_results:
+        iteration_context = f"""
+## PREVIOUS TEST RESULTS (Round {iteration - 1})
+You have already tested this endpoint. Here are the ACTUAL server responses:
+
+{previous_results}
+
+IMPORTANT: Analyze what happened. What did the server do with your input?
+- Did any payload cause an error? → Exploit that error pattern.
+- Did any payload get reflected? → Check encoding, try context escape.
+- Did any payload change the response? → Investigate what changed and why.
+- Did all payloads get blocked? → Try encoding/obfuscation bypass.
+- Did the server behave identically for all inputs? → Endpoint likely NOT vulnerable.
+
+Generate NEW test cases that build on what you learned. Do NOT repeat previous payloads.
+"""
+
+    return f"""You are an expert penetration tester performing Round {iteration} of iterative {vuln_type.upper()} testing.
+
+## TARGET CONTEXT
+{context}
+
+{f"## DETECTION STRATEGY" + chr(10) + detection if detection else ""}
+{f"## METHODOLOGY" + chr(10) + methodology if methodology else ""}
+{f"## PAYLOAD HINTS" + chr(10) + payload_hints if payload_hints else ""}
+{playbook_ctx}
+{f"## PROOF REQUIREMENTS" + chr(10) + proof_req if proof_req else ""}
+{iteration_context}
+
+## YOUR TASK
+Generate {3 if iteration == 1 else 5} specific test cases for {vuln_type} on this endpoint.
+Each test must be a concrete HTTP request — not a description of what to test.
+
+Respond ONLY with JSON:
+{{
+    "reasoning": "Brief explanation of your testing strategy based on the context",
+    "tests": [
+        {{
+            "name": "Descriptive name of the test",
+            "rationale": "Why this specific test based on what you observed",
+            "method": "GET|POST|PUT|DELETE",
+            "url": "Full URL to test (use actual URLs from context)",
+            "params": {{"param_name": "payload_value"}},
+            "headers": {{"Header-Name": "value"}},
+            "body": "request body if POST/PUT (or empty string)",
+            "content_type": "application/x-www-form-urlencoded|application/json|text/xml",
+            "injection_point": "parameter|header|body|path",
+            "success_indicators": ["what to look for in response that proves vulnerability"],
+            "failure_indicators": ["what indicates NOT vulnerable"]
+        }}
+    ]
+}}
+
+RULES:
+- Use ACTUAL URLs and parameters from the context — don't invent endpoints.
+- Each test MUST have a clear rationale tied to the target's behavior.
+- Include both aggressive tests (exploit attempts) and subtle probes (behavior mapping).
+- If this is Round 2+, your tests MUST be adapted based on previous results."""
+
+
+def get_deep_test_analysis_prompt(
+    vuln_type: str,
+    test_results: str,
+    baseline: str = "",
+    iteration: int = 1,
+) -> str:
+    """Build the ANALYSIS prompt for _ai_deep_test() Step 4.
+
+    The LLM receives actual HTTP responses and must analyze them
+    for vulnerability indicators with anti-hallucination enforcement.
+
+    Args:
+        vuln_type: The vulnerability type being tested
+        test_results: JSON string of test results with actual HTTP responses
+        baseline: Baseline response data for comparison
+        iteration: Current iteration number
+    """
+    # Get per-type proof requirements
+    proof_req = ""
+    try:
+        from backend.core.vuln_engine.system_prompts import VULN_TYPE_PROOF_REQUIREMENTS
+        proof_req = VULN_TYPE_PROOF_REQUIREMENTS.get(vuln_type, "")
+    except ImportError:
+        pass
+
+    type_prompt = VULN_AI_PROMPTS.get(vuln_type, {})
+    verification = type_prompt.get("verification_criteria", "")
+    fp_indicators = type_prompt.get("false_positive_indicators", "")
+
+    return f"""Analyze these HTTP responses for {vuln_type.upper()} vulnerability.
+This is Round {iteration} of iterative testing.
+
+## BASELINE RESPONSE (normal behavior without attack payload)
+{baseline if baseline else "Not available — compare between test responses instead."}
+
+## TEST RESULTS (actual server responses)
+{test_results}
+
+{f"## VERIFICATION CRITERIA" + chr(10) + verification if verification else ""}
+{f"## KNOWN FALSE POSITIVE PATTERNS" + chr(10) + fp_indicators if fp_indicators else ""}
+{f"## PROOF REQUIREMENTS" + chr(10) + proof_req if proof_req else ""}
+
+## ANALYSIS INSTRUCTIONS
+
+For EACH test result, analyze:
+1. Did the response differ from baseline? How exactly? (status, body, headers, timing)
+2. Is the difference CAUSED by the payload, or is it generic application behavior?
+3. Does the response contain proof of execution (not just delivery)?
+4. Would you stake your professional reputation on this finding?
+
+ANTI-HALLUCINATION CHECK:
+- ONLY cite evidence that appears in the ACTUAL response data above.
+- Do NOT infer, assume, or speculate about what "might" happen.
+- If the evidence is ambiguous, it is NOT confirmed.
+
+Respond ONLY with JSON:
+{{
+    "analysis": [
+        {{
+            "test_name": "Name of the test analyzed",
+            "is_vulnerable": true|false,
+            "confidence": "high|medium|low",
+            "evidence": "EXACT string/pattern from the actual response that proves it",
+            "reasoning": "Why this specific evidence proves (or disproves) the vulnerability"
+        }}
+    ],
+    "overall_vulnerable": true|false,
+    "continue_testing": true|false,
+    "next_round_strategy": "What to try next if continue_testing is true (or 'done' if false)",
+    "summary": "One-line summary of findings"
+}}
+
+CRITICAL: Set "continue_testing": true ONLY if you observed promising signals that
+warrant deeper investigation. If all tests show no vulnerability indicators, set false."""
+
+
+# ---------------------------------------------------------------------------
+# Pre-Stream Master Planning Prompt — AI context before parallel streams
+# ---------------------------------------------------------------------------
+
+def get_master_plan_prompt(
+    target: str,
+    initial_response: str = "",
+    technologies: str = "",
+    endpoints_preview: str = "",
+    forms_preview: str = "",
+    waf_info: str = "",
+    playbook_context: str = "",
+) -> str:
+    """Build the master planning prompt executed BEFORE the 3 parallel streams.
+
+    This gives the AI full initial context and asks it to produce a strategic
+    test plan that all 3 streams can reference for context-aware testing.
+    """
+    return f"""You are a senior penetration tester planning a comprehensive security assessment.
+
+## TARGET
+URL: {target}
+
+## INITIAL RECONNAISSANCE
+{f"### Response Headers & Body Fingerprint" + chr(10) + initial_response if initial_response else "Initial probe not yet available."}
+
+{f"### Technologies Detected" + chr(10) + technologies if technologies else "Not yet detected."}
+
+{f"### Endpoints Discovered" + chr(10) + endpoints_preview if endpoints_preview else "No endpoints discovered yet."}
+
+{f"### Forms Found" + chr(10) + forms_preview if forms_preview else "No forms found yet."}
+
+{f"### WAF Detection" + chr(10) + waf_info if waf_info else "No WAF detected."}
+
+{playbook_context}
+
+## YOUR TASK
+Create a MASTER TEST PLAN for this target. This plan will guide 3 parallel testing streams:
+1. **Recon Stream** — what to look for during deeper reconnaissance
+2. **Testing Stream** — which vulnerability types to prioritize and why
+3. **Tool Stream** — which security tools would be most effective
+
+Analyze the target's technology stack, response patterns, and attack surface to produce:
+
+Respond ONLY with JSON:
+{{
+    "target_profile": "Brief description of what this application appears to be",
+    "technology_assessment": "Key technologies and their security implications",
+    "attack_surface_summary": "Primary attack vectors based on initial recon",
+    "priority_vuln_types": ["ordered list of 10-15 vuln types most likely to succeed"],
+    "high_value_endpoints": ["endpoints that deserve the most attention"],
+    "recon_guidance": {{
+        "focus_areas": ["what the recon stream should specifically look for"],
+        "hidden_surface_hints": ["directories, API patterns, or configs to probe"]
+    }},
+    "testing_strategy": {{
+        "immediate_tests": ["vuln types to test RIGHT NOW on the main URL"],
+        "tech_specific_tests": ["tests specific to the detected technology stack"],
+        "bypass_strategies": ["WAF bypass or encoding strategies if WAF detected"]
+    }},
+    "tool_recommendations": {{
+        "priority_tools": ["tools to run first and why"],
+        "tool_arguments": ["specific flags or wordlists for this target"]
+    }},
+    "risk_assessment": "Overall risk level and what makes this target interesting"
+}}
+
+RULES:
+- Base your analysis on ACTUAL data from the initial probe — don't speculate.
+- Prioritize vuln types by LIKELIHOOD of success on THIS specific target.
+- Consider the technology stack when recommending tests (e.g., Java → deserialization, PHP → LFI).
+- If WAF is detected, factor bypass strategies into every recommendation."""
+
+
+# ---------------------------------------------------------------------------
+# Junior Stream AI Payload Generation Prompt
+# ---------------------------------------------------------------------------
+
+def get_junior_ai_test_prompt(
+    url: str,
+    vuln_type: str,
+    params: list,
+    method: str = "GET",
+    tech_context: str = "",
+    master_plan_context: str = "",
+    waf_info: str = "",
+) -> str:
+    """Build prompt for AI-generated payloads in Stream 2 junior testing.
+
+    Instead of hardcoded 3 payloads, the AI generates context-aware payloads
+    tailored to the specific endpoint, parameters, and technology stack.
+    """
+    # Get per-type detection strategy
+    type_prompt = VULN_AI_PROMPTS.get(vuln_type, {})
+    detection = type_prompt.get("detection_strategy", "")
+    payload_hints = type_prompt.get("payload_selection", "")
+
+    params_str = ", ".join(params[:5]) if params else "unknown"
+
+    return f"""You are a penetration tester performing quick, targeted {vuln_type.upper()} testing.
+
+## TARGET
+URL: {url}
+Method: {method}
+Parameters: {params_str}
+{f"Technologies: {tech_context}" if tech_context else ""}
+{f"WAF: {waf_info}" if waf_info else ""}
+{f"Master Plan Context: {master_plan_context}" if master_plan_context else ""}
+
+{f"## DETECTION STRATEGY" + chr(10) + detection if detection else ""}
+{f"## PAYLOAD HINTS" + chr(10) + payload_hints if payload_hints else ""}
+
+## YOUR TASK
+Generate 3-5 targeted {vuln_type} payloads for this specific endpoint.
+Each payload must be crafted for the actual parameters and technology stack.
+
+Respond ONLY with JSON:
+{{
+    "reasoning": "Brief strategy for testing this endpoint",
+    "tests": [
+        {{
+            "param": "parameter name to inject into",
+            "payload": "the actual payload string",
+            "method": "GET|POST",
+            "injection_point": "parameter|header|body",
+            "header_name": "header name if injection_point is header",
+            "success_indicator": "what to look for in response"
+        }}
+    ]
+}}
+
+RULES:
+- Use ACTUAL parameter names from the target.
+- Tailor payloads to the technology stack (don't send PHP payloads to Java apps).
+- If WAF is detected, use encoding/obfuscation in payloads.
+- Include at least one probe payload (behavior mapping) and one exploit payload.
+- Keep it fast — max 5 payloads."""
+
+
+# ---------------------------------------------------------------------------
+# Tool Output AI Analysis Prompt
+# ---------------------------------------------------------------------------
+
+def get_tool_analysis_prompt(
+    tool_name: str,
+    tool_output: str,
+    target: str,
+    existing_findings_summary: str = "",
+) -> str:
+    """Build prompt for AI analysis of security tool output in Stream 3.
+
+    Instead of just ingesting raw tool findings, the AI analyzes the output
+    to identify real vulnerabilities, filter noise, and suggest follow-up tests.
+    """
+    return f"""You are a senior penetration tester analyzing output from the security tool "{tool_name}".
+
+## TARGET
+{target}
+
+## TOOL OUTPUT (raw stdout/stderr)
+```
+{tool_output[:4000]}
+```
+
+{f"## EXISTING FINDINGS (already confirmed)" + chr(10) + existing_findings_summary if existing_findings_summary else ""}
+
+## YOUR TASK
+Analyze this tool output with expert judgment:
+
+1. **True Findings**: Identify REAL vulnerabilities from the output (not informational noise)
+2. **False Positives**: Flag findings that are likely false positives and explain why
+3. **Follow-Up Tests**: Suggest manual tests to confirm ambiguous findings
+4. **Hidden Insights**: What does this output reveal about the target that isn't obvious?
+
+Respond ONLY with JSON:
+{{
+    "real_findings": [
+        {{
+            "title": "Finding title",
+            "severity": "critical|high|medium|low|info",
+            "vulnerability_type": "vuln_type_name",
+            "endpoint": "affected URL",
+            "evidence": "exact evidence from tool output",
+            "confidence": "high|medium|low",
+            "reasoning": "why this is a real finding"
+        }}
+    ],
+    "false_positives": [
+        {{
+            "title": "What the tool flagged",
+            "reason": "why it's a false positive"
+        }}
+    ],
+    "follow_up_tests": [
+        {{
+            "test": "what to test manually",
+            "vuln_type": "vuln_type_name",
+            "endpoint": "URL to test",
+            "rationale": "why this follow-up is needed"
+        }}
+    ],
+    "target_insights": "What this tool output reveals about the target's security posture"
+}}
+
+RULES:
+- Only mark findings as "real" if the tool output contains concrete evidence.
+- Default scanner informational items (server headers, allowed methods) are NOT vulnerabilities.
+- Consider existing findings — don't flag duplicates.
+- Focus on ACTIONABLE output, not noise."""
+
+
+# ---------------------------------------------------------------------------
+# Recon AI Endpoint Analysis Prompt
+# ---------------------------------------------------------------------------
+
+def get_recon_analysis_prompt(
+    target: str,
+    endpoints: str,
+    forms: str = "",
+    technologies: str = "",
+    parameters: str = "",
+    js_files: str = "",
+    api_endpoints: str = "",
+) -> str:
+    """Build prompt for AI analysis of recon results in Stream 1.
+
+    After endpoint discovery, AI analyzes the full attack surface to
+    prioritize endpoints and identify hidden attack vectors.
+    """
+    return f"""You are a penetration tester analyzing reconnaissance results.
+
+## TARGET
+{target}
+
+## DISCOVERED ENDPOINTS
+{endpoints}
+
+{f"## FORMS" + chr(10) + forms if forms else ""}
+{f"## TECHNOLOGIES" + chr(10) + technologies if technologies else ""}
+{f"## PARAMETERS" + chr(10) + parameters if parameters else ""}
+{f"## JAVASCRIPT FILES" + chr(10) + js_files if js_files else ""}
+{f"## API ENDPOINTS" + chr(10) + api_endpoints if api_endpoints else ""}
+
+## YOUR TASK
+Analyze this reconnaissance data as a penetration tester would:
+
+1. **Endpoint Prioritization**: Rank endpoints by attack potential
+2. **Hidden Surface**: Identify probable hidden endpoints or patterns
+3. **Parameter Analysis**: Flag high-risk parameters based on naming conventions
+4. **Technology Vulnerabilities**: Map technologies to known vulnerability classes
+5. **Attack Chains**: Identify potential multi-step attack paths
+
+Respond ONLY with JSON:
+{{
+    "high_priority_endpoints": [
+        {{
+            "url": "endpoint URL",
+            "risk_score": 1-10,
+            "reason": "why this endpoint is high priority",
+            "suggested_vuln_types": ["vuln types to test"]
+        }}
+    ],
+    "hidden_endpoints_to_probe": [
+        {{
+            "url": "URL pattern to try",
+            "rationale": "why this might exist"
+        }}
+    ],
+    "high_risk_parameters": [
+        {{
+            "param": "parameter name",
+            "endpoint": "where found",
+            "risk_type": "what kind of injection it's susceptible to",
+            "priority": "high|medium|low"
+        }}
+    ],
+    "tech_vuln_mapping": [
+        {{
+            "technology": "tech name",
+            "vuln_types": ["relevant vuln types"],
+            "specific_tests": ["targeted test recommendations"]
+        }}
+    ],
+    "attack_chains": [
+        {{
+            "chain": "Step 1 → Step 2 → Impact",
+            "starting_point": "where to begin"
+        }}
+    ],
+    "additional_recon_suggestions": ["What else to look for"]
+}}
+
+RULES:
+- Base ALL analysis on the actual data provided — don't invent endpoints.
+- Prioritize by LIKELIHOOD of exploitation, not theoretical severity.
+- Consider technology-specific vulnerabilities (e.g., Spring → actuator, WordPress → wp-admin).
+- Flag parameters like 'url', 'file', 'path', 'redirect', 'callback', 'template' as high-risk."""
@@ -268,6 +268,41 @@ belongs to another user or represents privileged information. When in doubt, do
 three-way comparison: (1) your data, (2) target ID as you, (3) target ID as target user."""


+PROMPT_ITERATIVE_TESTING = """## ITERATIVE TESTING (OBSERVE → ADAPT → EXPLOIT)
+
+You are testing ITERATIVELY. Each round, you see the actual server responses from your
+previous tests. Use this feedback to refine your attack.
+
+OBSERVE → HYPOTHESIZE → TEST → ANALYZE → ADAPT:
+
+1. OBSERVE: Study the response carefully — status code, headers, body content, timing.
+   What does the server actually DO with your input?
+
+2. HYPOTHESIZE: Based on observed behavior, form a specific hypothesis:
+   - "Parameter reflects input unencoded → likely XSS"
+   - "Single quote causes 500 → backend SQL parsing fails → try error-based SQLi"
+   - "Different response for id=1 vs id=2 → possible IDOR"
+   - "Response includes external URL content → SSRF confirmed, try internal targets"
+
+3. TEST: Design your next test to confirm or deny the hypothesis.
+   Target the SPECIFIC behavior you observed — don't spray generic payloads.
+
+4. ANALYZE: Did the hypothesis hold? What new information did you learn?
+   - Error message leaked DB type → now try DB-specific injection syntax
+   - WAF blocked <script> → try event handlers, SVG, or encoding bypass
+   - Parameter reflected but encoded → try double encoding or context escape
+
+5. ADAPT: Refine your approach based on all accumulated evidence.
+   Each round should be MORE targeted than the last.
+
+RULES:
+- NEVER repeat the same payload twice.
+- NEVER ignore server responses — they contain the clues.
+- ALWAYS explain your reasoning: "I observed X, therefore I'm trying Y."
+- When you find something promising, ESCALATE: probe deeper, not wider.
+- If 3 rounds produce no results, the endpoint is likely NOT vulnerable to this type."""
+
+
 PROMPT_OFFENSIVE_MINDSET = """## OFFENSIVE MINDSET (MID-LEVEL PENTESTER)

 You are a MID-LEVEL penetration tester, not a vulnerability scanner.
@@ -442,11 +477,17 @@ PROMPT_CATALOG: Dict[str, Dict] = {
        "content": PROMPT_ACCESS_CONTROL_INTELLIGENCE,
        "contexts": ["testing", "verification", "confirmation"],
    },
+    "iterative_testing": {
+        "id": "iterative_testing",
+        "title": "Iterative Testing (Observe → Adapt → Exploit)",
+        "content": PROMPT_ITERATIVE_TESTING,
+        "contexts": ["deep_testing"],
+    },
    "offensive_mindset": {
        "id": "offensive_mindset",
        "title": "Offensive Mindset (Mid-Level Pentester)",
        "content": PROMPT_OFFENSIVE_MINDSET,
-        "contexts": ["testing", "strategy"],
+        "contexts": ["testing", "strategy", "deep_testing"],
    },
    "architecture_analysis": {
        "id": "architecture_analysis",
@@ -537,6 +578,18 @@ CONTEXT_PROMPTS: Dict[str, List[str]] = {
        "think_like_pentester",
        "anti_severity_inflation",
    ],
+    # Deep testing: AI-driven iterative testing loop (observe → plan → test → analyze → adapt)
+    "deep_testing": [
+        "anti_hallucination",
+        "anti_scanner",
+        "proof_of_execution",
+        "think_like_pentester",
+        "offensive_mindset",
+        "method_variation",
+        "iterative_testing",
+        "negative_controls",
+        "operational_humility",
+    ],
 }