diff --git a/.gitignore b/.gitignore
index 8440c3c..db1d09b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -34,18 +34,12 @@ data/*.db
 data/*.db.*
 data/execution_history.json
 data/access_control_learning.json
-data/adaptive_learning.json
-data/providers.json
-data/reasoning_memory.json
-data/vectorstore/
-data/custom-knowledge/uploads/
 data/reports/
 
 # ==============================
 # Reports & Screenshots
 # ==============================
 reports/screenshots/
-reports/*.json
 
 # ==============================
 # Logs & PIDs
@@ -84,9 +78,3 @@ docker/*.env
 # Results (runtime output)
 # ==============================
 results/
-
-# ==============================
-# Large binary files
-# ==============================
-projeto.zip
-*.zip
diff --git a/backend/api/v1/agent.py b/backend/api/v1/agent.py
index a7b34fc..2405b91 100755
--- a/backend/api/v1/agent.py
+++ b/backend/api/v1/agent.py
@@ -18,6 +18,27 @@ from datetime import datetime
 from enum import Enum
 from urllib.parse import urlparse
 
+
+def _safe_cvss_score(val) -> float:
+    """Sanitize cvss_score: convert to float, default 0.0 for non-numeric."""
+    if val is None:
+        return 0.0
+    if isinstance(val, (int, float)):
+        return float(val)
+    try:
+        return float(val)
+    except (ValueError, TypeError):
+        return 0.0
+
+
+def _safe_cvss_vector(val) -> str:
+    """Sanitize cvss_vector: return empty string for N/A or invalid values."""
+    if not val or not isinstance(val, str):
+        return ""
+    if val.strip().upper().startswith("N/A") or len(val.strip()) < 5:
+        return ""
+    return val[:100]
+
 from backend.core.autonomous_agent import AutonomousAgent, OperationMode
 from backend.core.task_library import get_task_library
 from backend.db.database import async_session_factory
@@ -123,7 +144,7 @@ class AgentRequest(BaseModel):
     enable_kali_sandbox: bool = Field(False, description="Enable Kali Linux sandbox for tool execution + AI researcher")
     custom_prompt_ids: Optional[List[str]] = Field(None, description="IDs of custom prompts to include in agent flow")
     preferred_provider: Optional[str] = Field(None, description="Preferred LLM provider (e.g., 'anthropic', 'gemini_cli', 'openai')")
-    preferred_model: Optional[str] = Field(None, description="Preferred model name (e.g., 'claude-sonnet-4-20250514', 'gemini-2.0-flash')")
+    preferred_model: Optional[str] = Field(None, description="Preferred model name (e.g., 'claude-sonnet-4-6-20250918', 'claude-opus-4-6-20250918', 'gemini-2.0-flash')")
     methodology_file: Optional[str] = Field(None, description="Path to external .md methodology file to inject into all AI calls")
     enable_cli_agent: bool = Field(False, description="Enable CLI Agent (AI CLI inside Kali sandbox)")
     cli_agent_provider: Optional[str] = Field(None, description="CLI provider: claude_code, gemini_cli, codex_cli")
@@ -431,8 +452,8 @@ async def _run_agent_task(
                         title=finding.get("title", finding.get("type", "Unknown")),
                         vulnerability_type=finding.get("vulnerability_type", finding.get("type", "unknown")),
                         severity=severity,
-                        cvss_score=finding.get("cvss_score"),
-                        cvss_vector=finding.get("cvss_vector"),
+                        cvss_score=_safe_cvss_score(finding.get("cvss_score")),
+                        cvss_vector=_safe_cvss_vector(finding.get("cvss_vector")),
                         cwe_id=finding.get("cwe_id"),
                         description=finding.get("description") or finding.get("evidence") or "",
                         affected_endpoint=finding.get("affected_endpoint", finding.get("endpoint", finding.get("url", target))),
@@ -463,8 +484,8 @@ async def _run_agent_task(
                         title=finding.get("title", finding.get("type", "Unknown")),
                         vulnerability_type=finding.get("vulnerability_type", finding.get("type", "unknown")),
                         severity=finding.get("severity", "medium").lower(),
-                        cvss_score=finding.get("cvss_score"),
-                        cvss_vector=finding.get("cvss_vector"),
+                        cvss_score=_safe_cvss_score(finding.get("cvss_score")),
+                        cvss_vector=_safe_cvss_vector(finding.get("cvss_vector")),
                         cwe_id=finding.get("cwe_id"),
                         description=finding.get("description") or finding.get("evidence") or "",
                         affected_endpoint=finding.get("affected_endpoint", finding.get("endpoint", finding.get("url", target))),
@@ -916,8 +937,8 @@ async def stop_agent(agent_id: str):
                             title=finding.get("title", finding.get("type", "Unknown")),
                             vulnerability_type=finding.get("vulnerability_type", finding.get("type", "unknown")),
                             severity=severity,
-                            cvss_score=finding.get("cvss_score"),
-                            cvss_vector=finding.get("cvss_vector"),
+                            cvss_score=_safe_cvss_score(finding.get("cvss_score")),
+                            cvss_vector=_safe_cvss_vector(finding.get("cvss_vector")),
                             cwe_id=finding.get("cwe_id"),
                             description=finding.get("description") or finding.get("evidence") or "",
                             affected_endpoint=finding.get("affected_endpoint", finding.get("endpoint", finding.get("url", target))),
@@ -949,8 +970,8 @@ async def stop_agent(agent_id: str):
                             title=finding.get("title", finding.get("type", "Unknown")),
                             vulnerability_type=finding.get("vulnerability_type", finding.get("type", "unknown")),
                             severity=finding.get("severity", "medium").lower(),
-                            cvss_score=finding.get("cvss_score"),
-                            cvss_vector=finding.get("cvss_vector"),
+                            cvss_score=_safe_cvss_score(finding.get("cvss_score")),
+                            cvss_vector=_safe_cvss_vector(finding.get("cvss_vector")),
                             cwe_id=finding.get("cwe_id"),
                             description=finding.get("description") or finding.get("evidence") or "",
                             affected_endpoint=finding.get("affected_endpoint", finding.get("endpoint", finding.get("url", target))),
@@ -2493,8 +2514,8 @@ async def _save_realtime_findings_to_db(session_id: str, session: Dict):
                     title=title,
                     vulnerability_type=finding.get("vulnerability_type", "unknown"),
                     severity=severity,
-                    cvss_score=finding.get("cvss_score"),
-                    cvss_vector=finding.get("cvss_vector"),
+                    cvss_score=_safe_cvss_score(finding.get("cvss_score")),
+                    cvss_vector=_safe_cvss_vector(finding.get("cvss_vector")),
                     cwe_id=finding.get("cwe_id"),
                     description=finding.get("description") or finding.get("evidence") or "",
                     affected_endpoint=finding.get("affected_endpoint", target),
diff --git a/backend/api/v1/providers.py b/backend/api/v1/providers.py
index 995a303..bbf8125 100644
--- a/backend/api/v1/providers.py
+++ b/backend/api/v1/providers.py
@@ -155,6 +155,7 @@ async def test_connection(provider_id: str, account_id: str):
 PROVIDER_MODELS = {
     "claude_code": [
         "claude-opus-4-6-20250918",
+        "claude-sonnet-4-6-20250918",
         "claude-sonnet-4-5-20250929",
         "claude-haiku-4-5-20251001",
         "claude-sonnet-4-20250514",
@@ -163,6 +164,7 @@ PROVIDER_MODELS = {
     ],
     "kiro": [
         "claude-opus-4-6-20250918",
+        "claude-sonnet-4-6-20250918",
         "claude-sonnet-4-5-20250929",
         "claude-haiku-4-5-20251001",
         "claude-sonnet-4-20250514",
@@ -171,6 +173,7 @@ PROVIDER_MODELS = {
     ],
     "anthropic": [
         "claude-opus-4-6-20250918",
+        "claude-sonnet-4-6-20250918",
         "claude-sonnet-4-5-20250929",
         "claude-haiku-4-5-20251001",
         "claude-sonnet-4-20250514",
@@ -214,17 +217,18 @@ PROVIDER_MODELS = {
         "cursor-fast",
         "cursor-small",
         "gpt-4o",
+        "claude-sonnet-4-6-20250918",
         "claude-sonnet-4-5-20250929",
-        "claude-3-5-sonnet-20241022",
     ],
     "copilot": [
         "gpt-4o",
         "gpt-4o-mini",
+        "claude-sonnet-4-6-20250918",
         "claude-sonnet-4-5-20250929",
-        "claude-3-5-sonnet-20241022",
     ],
     "openrouter": [
         "anthropic/claude-opus-4-6",
+        "anthropic/claude-sonnet-4-6",
         "anthropic/claude-sonnet-4-5",
         "anthropic/claude-haiku-4-5",
         "anthropic/claude-sonnet-4",
diff --git a/backend/api/v1/settings.py b/backend/api/v1/settings.py
index d21d876..fff71d2 100755
--- a/backend/api/v1/settings.py
+++ b/backend/api/v1/settings.py
@@ -533,9 +533,12 @@ MODEL_CACHE_TTL = 60  # seconds
 # Common cloud models for dropdown suggestions
 CLOUD_MODELS = {
     "claude": [
+        {"model_id": "claude-opus-4-6-20250918", "display_name": "Claude Opus 4.6", "context_length": 1000000},
+        {"model_id": "claude-sonnet-4-6-20250918", "display_name": "Claude Sonnet 4.6", "context_length": 1000000},
+        {"model_id": "claude-sonnet-4-5-20250929", "display_name": "Claude Sonnet 4.5", "context_length": 200000},
+        {"model_id": "claude-haiku-4-5-20251001", "display_name": "Claude Haiku 4.5", "context_length": 200000},
         {"model_id": "claude-sonnet-4-20250514", "display_name": "Claude Sonnet 4", "context_length": 200000},
         {"model_id": "claude-opus-4-20250514", "display_name": "Claude Opus 4", "context_length": 200000},
-        {"model_id": "claude-haiku-4-20250514", "display_name": "Claude Haiku 4", "context_length": 200000},
     ],
     "openai": [
         {"model_id": "gpt-4o", "display_name": "GPT-4o", "context_length": 128000},
diff --git a/backend/core/autonomous_agent.py b/backend/core/autonomous_agent.py
index 51a121d..69429d7 100755
--- a/backend/core/autonomous_agent.py
+++ b/backend/core/autonomous_agent.py
@@ -3921,6 +3921,7 @@ NOT_VULNERABLE: <reason>"""
             )
 
         # Phase 5.5: MD-based agent orchestrator (always available)
+        # Agents execute REAL HTTP requests via the shared aiohttp session
         if HAS_MD_AGENTS:
             self._md_orchestrator = MdAgentOrchestrator(
                 llm=self.llm,
@@ -3929,6 +3930,9 @@ NOT_VULNERABLE: <reason>"""
                 validation_judge=self.validation_judge,
                 log_callback=self.log,
                 progress_callback=self.progress_callback,
+                http_session=self.session,
+                auth_headers=dict(self.auth_headers),
+                cancel_fn=self.is_cancelled,
             )
 
         # Researcher AI: 0-day discovery with Kali sandbox (opt-in)
@@ -4630,55 +4634,24 @@ NOT_VULNERABLE: <reason>"""
             await self.log("warning", f"  Sandbox scan error: {e}")
 
     async def _run_auto_pentest(self) -> Dict:
-        """Parallel auto pentest: 3 concurrent streams + deep analysis + report.
+        """Agent-first auto pentest: Recon → 108 AI agents with real HTTP → Report.
 
         Architecture:
-          Stream 1 (Recon)  ──→ asyncio.Queue ──→ Stream 2 (Junior Pentester)
-          Stream 3 (Tool Runner) runs sandbox tools + AI-decided tools
-          All streams feed findings in real-time via callbacks.
-
-        After parallel phase completes:
-          Deep Analysis: AI attack surface analysis + comprehensive 100-type testing
-          Finalization: Screenshots + AI enhancement + report generation
+          Phase 1 (0-20%):  Quick recon — discover endpoints, tech, params, WAF
+          Phase 2 (20-85%): Agent Grid — 108 agents execute real HTTP tests
+          Phase 3 (85-100%): Finalization — screenshots, enhancement, report
         """
         await self._update_progress(0, "Auto pentest starting")
         await self.log("info", "=" * 60)
-        await self.log("info", "  PARALLEL AUTO PENTEST MODE")
-        await self.log("info", "  3 concurrent streams | AI-powered | 100 vuln types")
+        await self.log("info", "  AGENT-FIRST AUTO PENTEST (108 AGENTS)")
+        await self.log("info", "  Recon → Agent Grid (real HTTP) → Report | Claude 4.6")
         await self.log("info", "=" * 60)
 
         # Override custom_prompt with DEFAULT_ASSESSMENT_PROMPT for auto mode
         if not self.custom_prompt:
             self.custom_prompt = DEFAULT_ASSESSMENT_PROMPT
 
-        # Phase 5: Multi-agent orchestrator (if enabled, replaces 3-stream)
-        if self._orchestrator:
-            await self.log("info", "  [MULTI-AGENT] Orchestrator enabled — delegating to specialist agents")
-            orch_result = await self._orchestrator.run(
-                target=self.target,
-                recon_data=self.recon,
-                initial_context={
-                    "headers": dict(self.auth_headers),
-                    "technologies": self.recon.technologies,
-                }
-            )
-            # Merge orchestrator findings into agent findings
-            for f in orch_result.get("findings", []):
-                if isinstance(f, Finding):
-                    await self._add_finding(f)
-            await self.log("info", f"  [MULTI-AGENT] Pipeline complete: "
-                           f"{orch_result.get('findings_count', 0)} findings")
-            # Continue to finalization phase below
-            report = await self._generate_full_report()
-            await self._update_progress(100, "Multi-agent pentest complete")
-            if hasattr(self, 'execution_history') and self.execution_history:
-                self.execution_history.flush()
-            await self.log("info", "=" * 60)
-            await self.log("info", f"  AUTO PENTEST COMPLETE: {len(self.findings)} findings")
-            await self.log("info", "=" * 60)
-            return report
-
-        # Shared state for parallel streams
+        # Shared state (needed by some helper methods)
         self._endpoint_queue = asyncio.Queue()
         self._recon_complete = asyncio.Event()
         self._tools_complete = asyncio.Event()
@@ -4686,133 +4659,49 @@ NOT_VULNERABLE: <reason>"""
         self._junior_tested_types: set = set()
         self._playbook_recommended_types: List[str] = []
         self._current_playbook_context: str = ""
-
-        # ── PRE-STREAM AI MASTER PLAN ──
-        # Before launching parallel streams, ask AI for a strategic master plan
-        # that provides context and direction for all 3 streams.
         self._master_plan: Dict = {}
-        if self.llm.is_available():
-            try:
-                await self.log("info", "[MASTER PLAN] AI strategic planning before streams")
-                master_plan = await self._ai_master_plan()
-                if master_plan:
-                    self._master_plan = master_plan
-                    profile = master_plan.get("target_profile", "")
-                    risk = master_plan.get("risk_assessment", "")
-                    priority_types = master_plan.get("priority_vuln_types", [])
-                    if profile:
-                        await self.log("info", f"  [MASTER PLAN] Profile: {profile[:120]}")
-                    if risk:
-                        await self.log("info", f"  [MASTER PLAN] Risk: {risk[:120]}")
-                    if priority_types:
-                        await self.log("info", f"  [MASTER PLAN] Priority: {', '.join(priority_types[:8])}")
-            except Exception as e:
-                await self.log("debug", f"  [MASTER PLAN] Planning error: {e}")
 
-        # ── CONCURRENT PHASE (0-50%): 3 parallel streams ──
-        await asyncio.gather(
-            self._stream_recon(),            # Stream 1: Recon pipeline
-            self._stream_junior_pentest(),   # Stream 2: Immediate AI testing
-            self._stream_tool_runner(),      # Stream 3: Dynamic tool execution
-        )
+        # ══════════════════════════════════════════════════════════════
+        # PHASE 1 (0-20%): RECONNAISSANCE
+        # Discover attack surface before dispatching agents
+        # ══════════════════════════════════════════════════════════════
+        await self.log("info", "[RECON] Mapping attack surface...")
+        await self._update_progress(2, "Recon: mapping attack surface")
 
-        parallel_findings = len(self.findings)
-        await self.log("info", f"  Parallel phase complete: {parallel_findings} findings, "
-                       f"{len(self._junior_tested_types)} types pre-tested")
-        await self._update_progress(50, "Parallel streams complete")
+        # Run recon stream (endpoint discovery, tech detection, site analysis)
+        self._recon_complete.clear()
+        self._tools_complete.set()  # No tool stream in agent-first mode
+        await self._stream_recon()
 
-        # ── REASONING CHECKPOINT at 30-50% ──
-        if self.reasoning_engine and self.llm.is_available():
-            try:
-                plan = await self.reasoning_engine.plan_attack(
-                    recon_summary=f"{len(self.recon.endpoints)} endpoints, "
-                                  f"{len(self.recon.technologies)} techs",
-                    findings_so_far=self.findings,
-                    tested_types=self._junior_tested_types,
-                    progress_pct=0.50,
+        ep_count = len(self.recon.endpoints)
+        param_count = len(self.recon.parameters) if isinstance(self.recon.parameters, dict) else 0
+        tech_count = len(self.recon.technologies)
+        form_count = len(self.recon.forms) if hasattr(self.recon, 'forms') else 0
+        js_count = len(self.recon.js_files) if hasattr(self.recon, 'js_files') else 0
+        sink_count = len(self.recon.js_sinks) if hasattr(self.recon, 'js_sinks') else 0
+        api_count = len(self.recon.api_endpoints) if hasattr(self.recon, 'api_endpoints') else 0
+
+        await self.log("info",
+            f"[RECON] Complete: {ep_count} endpoints, {param_count} params, "
+            f"{tech_count} techs, {form_count} forms, {js_count} JS files, "
+            f"{sink_count} sinks, {api_count} API endpoints")
+        await self._update_progress(15, "Recon complete")
+
+        # WAF info for agents
+        waf_name = ""
+        if hasattr(self, '_waf_result') and self._waf_result:
+            if hasattr(self._waf_result, 'detected_wafs') and self._waf_result.detected_wafs:
+                waf_name = ", ".join(
+                    f"{w.name} ({w.confidence:.0%})" for w in self._waf_result.detected_wafs
                 )
-                if plan and plan.priority_vulns:
-                    await self.log("info", f"  [REASONING] Attack plan: "
-                                   f"focus on {', '.join(plan.priority_vulns[:5])}")
-                    # Feed reasoning priorities into the remaining test plan
-                    for vtype in plan.priority_vulns:
-                        if vtype not in self._junior_tested_types:
-                            self._junior_tested_types.discard(vtype)  # ensure retested
-            except Exception as e:
-                await self.log("debug", f"  [REASONING] Plan error: {e}")
+            elif isinstance(self._waf_result, dict):
+                waf_name = self._waf_result.get("waf_name", "")
+            if waf_name:
+                await self.log("warning", f"[WAF] Detected: {waf_name} — agents will adapt payloads")
 
-        # ── STRATEGY CHECKPOINT at 50% ──
-        if self.strategy:
-            try:
-                strat_update = await self.strategy.checkpoint_refine(
-                    progress_pct=0.50,
-                    findings=self.findings,
-                    tested_types=self._junior_tested_types,
-                    all_endpoints=[ep for ep in self.recon.endpoints],
-                    llm=self.llm if self.llm.is_available() else None,
-                    budget=self.token_budget,
-                )
-                if strat_update.get("message"):
-                    await self.log("info", f"  [STRATEGY] {strat_update['message']}")
-            except Exception as e:
-                await self.log("debug", f"  [STRATEGY] Checkpoint error: {e}")
-
-        # ── DEEP ANALYSIS PHASE (50-75%): Full testing with complete context ──
-        await self.log("info", "[DEEP] AI Attack Surface Analysis + Comprehensive Testing")
-        attack_plan = await self._ai_analyze_attack_surface()
-
-        # Merge AI-recommended types with default plan + playbook recommendations
-        default_plan = self._default_attack_plan()
-        ai_types = attack_plan.get("priority_vulns", [])
-        playbook_types = self._playbook_recommended_types[:15] if self._playbook_recommended_types else []
-        all_types = default_plan["priority_vulns"]
-        merged_types = list(dict.fromkeys(ai_types + playbook_types + all_types))
-
-        # Remove types already tested by junior pentest stream
-        remaining = [t for t in merged_types if t not in self._junior_tested_types]
-        attack_plan["priority_vulns"] = remaining
-        await self.log("info", f"  {len(remaining)} remaining types "
-                       f"({len(self._junior_tested_types)} already tested by junior)")
-        await self._update_progress(55, "Deep: attack surface analyzed")
-
-        await self.log("info", "[DEEP] Comprehensive Vulnerability Testing")
-        await self._test_all_vulnerabilities(attack_plan)
-        await self._update_progress(75, "Deep testing complete")
-
-        # ── REASONING CHECKPOINT at 75% ──
-        if self.reasoning_engine and self.llm.is_available():
-            try:
-                plan = await self.reasoning_engine.plan_attack(
-                    recon_summary=f"{len(self.recon.endpoints)} endpoints, "
-                                  f"{len(self.recon.technologies)} techs",
-                    findings_so_far=self.findings,
-                    tested_types=self._junior_tested_types,
-                    progress_pct=0.75,
-                )
-                if plan and plan.priority_vulns:
-                    await self.log("info", f"  [REASONING] 75% plan: "
-                                   f"focus on {', '.join(plan.priority_vulns[:5])}")
-                    # Reflect on what worked so far
-                    try:
-                        reflection = await self.reasoning_engine.reflect(
-                            action_taken="deep_testing_phase",
-                            result_observed={
-                                "findings_count": len(self.findings),
-                                "tested_types": len(self._junior_tested_types),
-                                "endpoints": len(self.recon.endpoints),
-                            }
-                        )
-                        if reflection and reflection.next_suggestion:
-                            await self.log("info", f"  [REASONING] Reflection: {reflection.next_suggestion}")
-                    except Exception:
-                        pass
-            except Exception as e:
-                await self.log("debug", f"  [REASONING] 75% plan error: {e}")
-
-        # ── CVE HUNTING (if we found versions during recon) ──
+        # CVE hunting (quick, parallel with next phase)
         if self.cve_hunter and self.recon.technologies:
             try:
-                await self.log("info", "[CVE] Searching for known CVEs based on detected versions")
                 cve_findings = await self.cve_hunter.hunt(
                     headers=dict(self.auth_headers),
                     body="",
@@ -4824,7 +4713,77 @@ NOT_VULNERABLE: <reason>"""
             except Exception as e:
                 await self.log("debug", f"  [CVE] Hunt error: {e}")
 
-        # ── AI CHAIN DISCOVERY ──
+        await self._update_progress(20, "Recon + CVE complete, launching agents")
+
+        # ══════════════════════════════════════════════════════════════
+        # PHASE 2 (20-85%): AGENT GRID — 108 SPECIALISTS WITH REAL HTTP
+        # Each agent: LLM plans attacks → executes HTTP → LLM analyzes
+        # ══════════════════════════════════════════════════════════════
+        if self._md_orchestrator and not self.is_cancelled():
+            try:
+                n_available = len(self._md_orchestrator.library.agents)
+                await self.log("info", "=" * 60)
+                await self.log("info", f"  [AGENT GRID] Dispatching {n_available} specialist agents")
+                await self.log("info", f"  Each agent: PLAN (LLM) → EXECUTE (HTTP) → ANALYZE (LLM)")
+                await self.log("info", "=" * 60)
+
+                md_result = await self._md_orchestrator.run(
+                    target=self.target,
+                    recon_data=self.recon,
+                    existing_findings=self.findings,
+                    selected_agents=self.selected_md_agents,
+                    headers=dict(self.auth_headers),
+                    waf_info=waf_name,
+                )
+
+                # Merge agent findings into main findings via validation pipeline
+                md_findings_raw = md_result.get("findings", [])
+                md_confirmed = 0
+                for mf in md_findings_raw:
+                    if self.is_cancelled():
+                        break
+                    if not isinstance(mf, dict):
+                        continue
+                    try:
+                        finding = Finding(
+                            id=str(hashlib.md5(
+                                f"{mf.get('title', '')}{mf.get('affected_endpoint', '')}".encode()
+                            ).hexdigest())[:12],
+                            title=mf.get("title", "Agent Finding"),
+                            severity=mf.get("severity", "medium"),
+                            vulnerability_type=mf.get("vulnerability_type", "unknown"),
+                            cvss_score=float(mf.get("cvss_score", 0.0)) if isinstance(mf.get("cvss_score"), (int, float)) else 0.0,
+                            cwe_id=mf.get("cwe_id", ""),
+                            description=mf.get("description", ""),
+                            affected_endpoint=mf.get("affected_endpoint", self.target),
+                            evidence=mf.get("evidence", ""),
+                            poc_code=mf.get("poc_code", ""),
+                            impact=mf.get("impact", ""),
+                            remediation=mf.get("remediation", ""),
+                            confidence_score={"high": 80, "medium": 50, "low": 25}.get(mf.get("confidence", "medium"), 50),
+                            confidence=mf.get("confidence", "medium"),
+                            ai_verified=mf.get("confidence") == "high",
+                            ai_status="confirmed" if mf.get("confidence") == "high" else "pending",
+                        )
+                        await self._add_finding(finding)
+                        md_confirmed += 1
+                    except Exception as e:
+                        await self.log("debug", f"  [AGENT GRID] Finding merge error: {e}")
+
+                agents_run = md_result.get("agents_run", 0)
+                duration = md_result.get("duration", 0)
+                await self.log("info",
+                    f"[AGENT GRID] Complete: {agents_run} agents, "
+                    f"{len(md_findings_raw)} raw findings, "
+                    f"{md_confirmed} validated, {duration}s")
+            except Exception as e:
+                await self.log("warning", f"[AGENT GRID] Dispatch error: {e}")
+        else:
+            await self.log("warning", "[AGENT GRID] MD agent system not available")
+
+        await self._update_progress(80, "Agent grid complete")
+
+        # ── AI CHAIN DISCOVERY (post-agents, if we have findings) ──
         if self.chain_engine and len(self.findings) >= 2 and self.llm.is_available():
             try:
                 chains = await self.chain_engine.ai_discover_chains(
@@ -4838,66 +4797,7 @@ NOT_VULNERABLE: <reason>"""
             except Exception as e:
                 await self.log("debug", f"  [CHAIN] AI discovery error: {e}")
 
-        # ── MD-BASED AGENT DISPATCH (post-recon specialist agents) ──
-        if self._md_orchestrator and not self.is_cancelled():
-            try:
-                await self.log("info", "[MD-AGENTS] Dispatching specialist .md agents with recon context")
-                md_result = await self._md_orchestrator.run(
-                    target=self.target,
-                    recon_data=self.recon,
-                    existing_findings=self.findings,
-                    selected_agents=self.selected_md_agents,
-                    headers=dict(self.auth_headers),
-                    waf_info=(
-                        self._waf_result.get("waf_name", "")
-                        if self._waf_result else ""
-                    ),
-                )
-
-                # Merge MD agent findings into main findings via validation
-                md_findings_raw = md_result.get("findings", [])
-                md_confirmed = 0
-                for mf in md_findings_raw:
-                    if self.is_cancelled():
-                        break
-                    if not isinstance(mf, dict):
-                        continue
-                    try:
-                        finding = Finding(
-                            id=str(hashlib.md5(
-                                f"{mf.get('title', '')}{mf.get('affected_endpoint', '')}".encode()
-                            ).hexdigest())[:12],
-                            title=mf.get("title", "MD Agent Finding"),
-                            severity=mf.get("severity", "medium"),
-                            vulnerability_type=mf.get("vulnerability_type", "unknown"),
-                            cvss_score=mf.get("cvss_score", 0.0),
-                            cwe_id=mf.get("cwe_id", ""),
-                            description=mf.get("description", ""),
-                            affected_endpoint=mf.get("affected_endpoint", self.target),
-                            evidence=mf.get("evidence", ""),
-                            poc_code=mf.get("poc_code", ""),
-                            impact=mf.get("impact", ""),
-                            remediation=mf.get("remediation", ""),
-                            confidence_score=50,
-                            confidence="medium",
-                            ai_verified=False,
-                            ai_status="pending",
-                        )
-                        # Flow through validation pipeline
-                        await self._add_finding(finding)
-                        md_confirmed += 1
-                    except Exception as e:
-                        await self.log("debug", f"  [MD-AGENTS] Finding merge error: {e}")
-
-                agent_summary = md_result.get("agent_results", {})
-                agents_run = md_result.get("agents_run", 0)
-                await self.log("info",
-                    f"[MD-AGENTS] Complete: {agents_run} agents, "
-                    f"{len(md_findings_raw)} raw findings, "
-                    f"{md_confirmed} submitted to validation, "
-                    f"{md_result.get('duration', 0)}s")
-            except Exception as e:
-                await self.log("warning", f"[MD-AGENTS] Dispatch error: {e}")
+        await self._update_progress(85, "Chain analysis complete")
 
         # ── RESEARCHER AI (0-day discovery with Kali sandbox) ──
         if self._researcher and not self.is_cancelled():
@@ -6043,11 +5943,28 @@ NOT_VULNERABLE: <reason>"""
                 prompt,
                 system=self._get_enhanced_system_prompt("strategy")
             )
-            start = resp_text.index('{')
-            end = resp_text.rindex('}') + 1
-            return json.loads(resp_text[start:end])
+            if not resp_text or len(resp_text.strip()) < 20:
+                await self.log("debug", "  [AI RECON] Empty or too short response from LLM")
+                return {}
+
+            # Try to find JSON in response
+            json_match = re.search(r'```(?:json)?\s*(\{[\s\S]*?\})\s*```', resp_text)
+            if json_match:
+                return json.loads(json_match.group(1))
+
+            # Try bare JSON
+            start = resp_text.find('{')
+            end = resp_text.rfind('}')
+            if start >= 0 and end > start:
+                return json.loads(resp_text[start:end + 1])
+
+            await self.log("debug", "  [AI RECON] No JSON found in LLM response")
+            return {}
+        except json.JSONDecodeError as e:
+            await self.log("debug", f"  [AI RECON] JSON parse error: {e}")
+            return {}
         except Exception as e:
-            await self.log("debug", f"  [AI RECON] Parse error: {e}")
+            await self.log("debug", f"  [AI RECON] Analysis error: {e}")
             return {}
 
     # ── Stream 2: Junior Pentester ──
diff --git a/backend/core/md_agent.py b/backend/core/md_agent.py
index 3cabb28..42c00b0 100644
--- a/backend/core/md_agent.py
+++ b/backend/core/md_agent.py
@@ -1,18 +1,19 @@
 """
-NeuroSploit v3 - Markdown-Based Agent System
+NeuroSploit v3 - Markdown-Based Agent System (Real Execution)
 
-Each .md file in prompts/md_library/ acts as a self-contained agent definition
-with its own methodology, system prompt, and output format.
+Each .md file in prompts/agents/ acts as a self-contained agent definition.
+Agents EXECUTE REAL HTTP TESTS against the target — not theoretical analysis.
 
-After recon completes, the MdAgentOrchestrator dispatches each selected agent
-against the target URL with full recon context.  Findings flow through the
-normal validation pipeline.
+Cycle per agent:
+  1. PLAN  — LLM reads methodology + recon context → generates test plan (HTTP requests)
+  2. EXECUTE — sends actual HTTP requests against the target
+  3. ANALYZE — LLM reviews real responses → confirms/rejects with evidence
 
 Components:
   - MdAgentDefinition: parsed .md agent metadata
-  - MdAgent(SpecialistAgent): executes a single .md agent via LLM
+  - MdAgent(SpecialistAgent): plans, executes, and analyzes real tests
   - MdAgentLibrary: loads & indexes all .md agent definitions
-  - MdAgentOrchestrator: runs selected agents post-recon
+  - MdAgentOrchestrator: runs agents in phases (recon → offensive → generalist)
 """
 
 import asyncio
@@ -20,20 +21,25 @@ import json
 import logging
 import re
 import time
-import uuid
 from dataclasses import dataclass, field
 from pathlib import Path
 from typing import Any, Callable, Dict, List, Optional
+from urllib.parse import urljoin, urlparse
 
-from core.agent_base import SpecialistAgent, AgentResult
+try:
+    import aiohttp
+    HAS_AIOHTTP = True
+except ImportError:
+    HAS_AIOHTTP = False
+
+try:
+    from backend.core.agent_base import SpecialistAgent, AgentResult
+except ImportError:
+    from core.agent_base import SpecialistAgent, AgentResult
 
 logger = logging.getLogger(__name__)
 
 # ─── Agent categories ───────────────────────────────────────────────
-# Only 'offensive' agents are dispatched during auto-pentest by default.
-# Others are available on explicit selection.
-
-# General-purpose agents (from md_library)
 AGENT_CATEGORIES: Dict[str, str] = {
     "pentest_generalist": "generalist",
     "red_team_agent": "generalist",
@@ -42,16 +48,19 @@ AGENT_CATEGORIES: Dict[str, str] = {
     "exploit_expert": "generalist",
     "cwe_expert": "generalist",
     "replay_attack_specialist": "generalist",
+    "recon_deep": "recon",
     "Pentestfull": "methodology",
 }
-# All vuln-type agents default to "offensive" (handled in _load_all fallback)
 
-# Agents that should NOT run as standalone agents (methodology files, dupes)
 SKIP_AGENTS = {"Pentestfull"}
+RUN_ALL_BY_DEFAULT = True
 
-# Default agents to run when none are explicitly selected:
-# Run ALL vuln-type (offensive) agents — the system is designed for 100-agent dispatch
-DEFAULT_OFFENSIVE_AGENTS: List[str] = []  # Empty = use all offensive agents
+# Max tests per agent to execute
+MAX_TESTS_PER_AGENT = 5
+# Max iterations of the plan→execute→analyze loop
+MAX_ITERATIONS = 2
+# HTTP request timeout per test
+REQUEST_TIMEOUT = 10
 
 
 # ─── Data classes ────────────────────────────────────────────────────
@@ -59,22 +68,24 @@ DEFAULT_OFFENSIVE_AGENTS: List[str] = []  # Empty = use all offensive agents
 @dataclass
 class MdAgentDefinition:
     """Parsed .md agent definition."""
-    name: str                       # filename stem (e.g. "owasp_expert")
-    display_name: str               # human-readable (e.g. "OWASP Expert")
-    category: str                   # offensive / analysis / defensive / methodology
-    user_prompt_template: str       # raw user prompt with {placeholders}
-    system_prompt: str              # system prompt
-    file_path: str                  # absolute path to .md file
-    placeholders: List[str] = field(default_factory=list)  # detected {vars}
+    name: str
+    display_name: str
+    category: str  # offensive / generalist / recon / methodology
+    user_prompt_template: str
+    system_prompt: str
+    file_path: str
+    placeholders: List[str] = field(default_factory=list)
 
 
-# ─── MdAgent: executes one .md agent via LLM ────────────────────────
+# ─── MdAgent: plans, executes, and analyzes real tests ───────────────
 
 class MdAgent(SpecialistAgent):
-    """Executes a single .md-based agent against a target URL.
+    """Executes a single .md-based agent with REAL HTTP testing.
 
-    The agent fills the .md template with recon context, sends to the LLM,
-    then parses structured findings from the response.
+    Cycle:
+      1. PLAN  — sends methodology + recon to LLM → gets structured test plan
+      2. EXECUTE — runs actual HTTP requests against the target
+      3. ANALYZE — LLM reviews real responses, confirms findings with evidence
     """
 
     def __init__(
@@ -85,6 +96,9 @@ class MdAgent(SpecialistAgent):
         budget_allocation: float = 0.0,
         budget=None,
         validation_judge=None,
+        http_session=None,
+        auth_headers: Optional[Dict] = None,
+        cancel_fn: Optional[Callable] = None,
     ):
         super().__init__(
             name=f"md_{definition.name}",
@@ -95,9 +109,12 @@ class MdAgent(SpecialistAgent):
         )
         self.definition = definition
         self.validation_judge = validation_judge
+        self.http_session = http_session
+        self.auth_headers = auth_headers or {}
+        self.cancel_fn = cancel_fn or (lambda: False)
 
     async def run(self, context: Dict) -> AgentResult:
-        """Execute the .md agent against the target with recon context."""
+        """Execute the full PLAN → EXECUTE → ANALYZE cycle."""
         result = AgentResult(agent_name=self.name)
         target = context.get("target", "")
 
@@ -105,41 +122,511 @@ class MdAgent(SpecialistAgent):
             result.error = "No target provided"
             return result
 
-        # Build prompts
-        user_prompt = self._build_user_prompt(context)
-        system_prompt = self.definition.system_prompt
+        # Check LLM availability upfront
+        if not self.llm:
+            result.error = "No LLM provided"
+            logger.warning(f"[{self.definition.name}] No LLM available — skipping")
+            return result
 
-        # LLM call
-        try:
-            response = await self._llm_call(
-                f"{system_prompt}\n\n{user_prompt}",
-                category="md_agent",
-                estimated_tokens=2000,
+        if not hasattr(self.llm, 'generate'):
+            result.error = f"LLM has no generate method (type: {type(self.llm).__name__})"
+            logger.warning(f"[{self.definition.name}] {result.error}")
+            return result
+
+        all_findings = []
+
+        for iteration in range(1, MAX_ITERATIONS + 1):
+            if self.cancel_fn():
+                break
+
+            # ── PHASE 1: PLAN ──
+            plan_prompt = self._build_plan_prompt(context, iteration, all_findings)
+            plan_response = await self._llm_with_retry(plan_prompt)
+
+            if not plan_response:
+                result.error = "LLM plan call failed after retries"
+                break
+
+            tests = self._parse_test_plan(plan_response, target)
+            if not tests:
+                # No actionable tests — fall back to theoretical analysis
+                theoretical = self._parse_findings(plan_response, target)
+                all_findings.extend(theoretical)
+                break
+
+            # ── PHASE 2: EXECUTE ──
+            test_results = await self._execute_tests(tests, target)
+            if not test_results:
+                break
+
+            # ── PHASE 3: ANALYZE ──
+            analysis_prompt = self._build_analysis_prompt(
+                context, test_results, target
             )
-        except Exception as e:
-            result.error = f"LLM call failed: {e}"
-            return result
+            analysis_response = await self._llm_with_retry(analysis_prompt)
+            if not analysis_response:
+                break
 
-        if not response:
-            result.error = "Empty LLM response"
-            return result
+            if analysis_response:
+                confirmed = self._parse_analysis_findings(
+                    analysis_response, test_results, target
+                )
+                all_findings.extend(confirmed)
 
-        # Parse findings from structured response
-        parsed = self._parse_findings(response, target)
-        result.findings = parsed
+                # If we found confirmed vulns, no need for another iteration
+                if confirmed:
+                    break
+
+        result.findings = all_findings
         result.data = {
             "agent_name": self.definition.display_name,
             "agent_category": self.definition.category,
-            "findings_count": len(parsed),
-            "raw_response_length": len(response),
+            "findings_count": len(all_findings),
+            "execution_mode": "real_http",
         }
         self.tasks_completed += 1
-
         return result
 
-    # ── Prompt building ──────────────────────────────────────────────
+    # ── LLM call with retry ─────────────────────────────────────────
 
-    def _build_user_prompt(self, context: Dict) -> str:
+    async def _llm_with_retry(self, prompt: str, max_retries: int = 3) -> Optional[str]:
+        """Call LLM with exponential backoff retry."""
+        last_error = ""
+        for attempt in range(max_retries):
+            try:
+                result = await self.llm.generate(prompt)
+                if result and len(result.strip()) > 10:
+                    return result
+                last_error = f"Empty/short response (len={len(result) if result else 0})"
+                logger.debug(f"[{self.definition.name}] {last_error}, attempt {attempt + 1}")
+            except Exception as e:
+                last_error = str(e)[:200]
+                logger.warning(f"[{self.definition.name}] LLM error (attempt {attempt + 1}/{max_retries}): {last_error}")
+
+            if attempt < max_retries - 1:
+                delay = 5 * (attempt + 1)  # 5s, 10s
+                await asyncio.sleep(delay)
+
+        logger.warning(f"[{self.definition.name}] All {max_retries} attempts failed: {last_error}")
+        return None
+
+    # ── PLAN prompt ──────────────────────────────────────────────────
+
+    def _build_plan_prompt(
+        self, context: Dict, iteration: int, previous_findings: List[Dict]
+    ) -> str:
+        """Build the planning prompt: methodology + recon → structured test plan."""
+        target = context.get("target", "")
+        endpoints = context.get("endpoints", [])
+        technologies = context.get("technologies", [])
+        parameters = context.get("parameters", {})
+        waf_info = context.get("waf_info", "")
+        forms = context.get("forms", [])
+
+        # Fill the .md template with recon context for methodology
+        methodology = self._fill_template(context)
+
+        # Recon summary for the LLM
+        endpoint_list = []
+        for ep in endpoints[:12]:
+            if isinstance(ep, dict):
+                url = ep.get("url", "")
+                method = ep.get("method", "GET")
+                params = ep.get("params", [])
+                endpoint_list.append(f"  {method} {url} params={params}")
+            else:
+                endpoint_list.append(f"  GET {ep}")
+
+        # JS sinks for DOM-related agents
+        js_sinks = context.get("js_sinks", [])
+        js_sinks_str = ""
+        if js_sinks:
+            sink_list = []
+            for s in js_sinks[:5]:
+                if hasattr(s, 'sink_type'):
+                    sink_list.append(f"  {s.sink_type}: {getattr(s, 'code_snippet', '')[:60]}")
+                elif isinstance(s, dict):
+                    sink_list.append(f"  {s.get('sink_type','?')}: {s.get('code_snippet','')[:60]}")
+            if sink_list:
+                js_sinks_str = f"\nJS Sinks (DOM XSS vectors):\n" + chr(10).join(sink_list)
+
+        # API endpoints
+        api_eps = context.get("api_endpoints", [])
+        api_str = ""
+        if api_eps:
+            api_str = f"\nAPI endpoints: {', '.join(str(a) for a in api_eps[:5])}"
+
+        # Forms
+        forms_str = ""
+        if forms:
+            form_list = []
+            for f in (forms if isinstance(forms, list) else [])[:3]:
+                if isinstance(f, dict):
+                    form_list.append(f"  {f.get('method','POST')} {f.get('action','?')} inputs={f.get('inputs',[])}")
+            if form_list:
+                forms_str = f"\nForms:\n" + chr(10).join(form_list)
+
+        recon_summary = f"""Target: {target}
+Tech: {', '.join(technologies[:5]) or 'Unknown'} | WAF: {waf_info or 'None'}
+Endpoints ({len(endpoints)} total, showing {len(endpoint_list)}):
+{chr(10).join(endpoint_list)}
+Params: {json.dumps(dict(list(parameters.items())[:8]) if isinstance(parameters, dict) else {}, default=str)}{forms_str}{js_sinks_str}{api_str}"""
+
+        previous_str = ""
+        if previous_findings:
+            previous_str = f"\n\nPrevious iteration found {len(previous_findings)} potential issues. Adapt your tests to probe deeper or try different vectors."
+
+        system = self.definition.system_prompt or (
+            f"You are a {self.definition.display_name} security testing agent. "
+            f"You perform REAL penetration tests by generating HTTP requests that will be executed against the target."
+        )
+
+        prompt = f"""{system}
+
+## Your Methodology
+{methodology}
+
+## Reconnaissance Data
+{recon_summary}
+{previous_str}
+
+## Your Task (Iteration {iteration}/{MAX_ITERATIONS})
+
+Based on your methodology and the recon data above, generate a CONCRETE test plan.
+Each test must be an HTTP request that will be ACTUALLY EXECUTED against the target.
+
+You MUST output a JSON block with this exact structure:
+
+```json
+{{
+  "reasoning": "Brief explanation of your attack strategy",
+  "tests": [
+    {{
+      "name": "Test name describing what you're checking",
+      "url": "Full URL to test (use target endpoints from recon)",
+      "method": "GET or POST",
+      "params": {{"param_name": "payload_value"}},
+      "headers": {{"Header-Name": "value"}},
+      "body": "POST body if needed (empty string for GET)",
+      "injection_point": "parameter|header|body",
+      "expected_if_vulnerable": "What to look for in the response if vulnerable"
+    }}
+  ]
+}}
+```
+
+Rules:
+- Generate {MAX_TESTS_PER_AGENT} specific tests maximum
+- Use REAL endpoints from the recon data
+- Use REAL parameters discovered
+- Payloads must be safe for testing (no destructive operations)
+- Each test targets a specific vulnerability pattern from your methodology
+- Include the expected_if_vulnerable field so we can verify results
+"""
+        return prompt
+
+    # ── EXECUTE tests ────────────────────────────────────────────────
+
+    async def _execute_tests(
+        self, tests: List[Dict], default_target: str
+    ) -> List[Dict]:
+        """Execute HTTP requests from the test plan. Returns results with real responses."""
+        results = []
+
+        # Create session if needed
+        own_session = False
+        session = self.http_session
+        if not session and HAS_AIOHTTP:
+            connector = aiohttp.TCPConnector(ssl=False)
+            session = aiohttp.ClientSession(connector=connector)
+            own_session = True
+        elif not session:
+            logger.warning(f"[{self.definition.name}] No HTTP session and aiohttp not available")
+            return []
+
+        try:
+            for test in tests[:MAX_TESTS_PER_AGENT]:
+                if self.cancel_fn():
+                    break
+
+                test_url = test.get("url", default_target)
+                method = test.get("method", "GET").upper()
+                params = test.get("params", {})
+                test_headers = test.get("headers", {})
+                body = test.get("body", "")
+                test_name = test.get("name", "unnamed")
+                expected = test.get("expected_if_vulnerable", "")
+
+                # Merge auth headers
+                req_headers = {**self.auth_headers, **test_headers}
+
+                start = time.time()
+                try:
+                    kwargs: Dict[str, Any] = {
+                        "timeout": aiohttp.ClientTimeout(total=REQUEST_TIMEOUT),
+                        "headers": req_headers,
+                        "allow_redirects": False,
+                        "ssl": False,
+                    }
+
+                    if method == "GET":
+                        kwargs["params"] = params
+                    elif method == "POST":
+                        if body:
+                            kwargs["data"] = body
+                        elif params:
+                            kwargs["data"] = params
+
+                    async with session.request(method, test_url, **kwargs) as resp:
+                        status = resp.status
+                        resp_headers = dict(resp.headers)
+                        resp_body = await resp.text(errors="replace")
+                        elapsed = time.time() - start
+
+                    results.append({
+                        "test_name": test_name,
+                        "url": test_url,
+                        "method": method,
+                        "params": params,
+                        "payload": json.dumps(params) if params else body,
+                        "status": status,
+                        "response_headers": {k: v for k, v in list(resp_headers.items())[:15]},
+                        "body_preview": resp_body[:2000],
+                        "body_length": len(resp_body),
+                        "response_time": round(elapsed, 3),
+                        "expected_if_vulnerable": expected,
+                    })
+
+                except asyncio.TimeoutError:
+                    results.append({
+                        "test_name": test_name,
+                        "url": test_url,
+                        "method": method,
+                        "status": 0,
+                        "body_preview": "TIMEOUT",
+                        "body_length": 0,
+                        "response_time": REQUEST_TIMEOUT,
+                        "expected_if_vulnerable": expected,
+                    })
+                except Exception as e:
+                    results.append({
+                        "test_name": test_name,
+                        "url": test_url,
+                        "method": method,
+                        "status": 0,
+                        "body_preview": f"ERROR: {str(e)[:200]}",
+                        "body_length": 0,
+                        "response_time": 0,
+                        "expected_if_vulnerable": expected,
+                    })
+
+                # Small delay between requests to avoid hammering
+                await asyncio.sleep(0.15)
+
+        finally:
+            if own_session:
+                await session.close()
+
+        return results
+
+    # ── ANALYZE prompt ───────────────────────────────────────────────
+
+    def _build_analysis_prompt(
+        self, context: Dict, test_results: List[Dict], target: str
+    ) -> str:
+        """Build the analysis prompt: real HTTP responses → confirmed findings."""
+        vuln_type = self.definition.name
+
+        results_summary = []
+        for tr in test_results[:MAX_TESTS_PER_AGENT]:
+            results_summary.append({
+                "test_name": tr["test_name"],
+                "url": tr.get("url", ""),
+                "method": tr.get("method", ""),
+                "status": tr.get("status", 0),
+                "response_time": tr.get("response_time", 0),
+                "body_preview": tr.get("body_preview", "")[:1200],
+                "body_length": tr.get("body_length", 0),
+                "response_headers": tr.get("response_headers", {}),
+                "expected_if_vulnerable": tr.get("expected_if_vulnerable", ""),
+            })
+
+        results_json = json.dumps(results_summary, indent=2, default=str)[:8000]
+
+        return f"""You are a {self.definition.display_name} analyzing REAL HTTP responses from penetration tests against {target}.
+
+## Test Results (ACTUAL HTTP responses — not simulated)
+{results_json}
+
+## Your Task
+
+Analyze each test result and determine if a REAL vulnerability was found.
+You are looking at ACTUAL server responses. Be rigorous:
+
+- A vulnerability is CONFIRMED only if the response PROVES exploitation worked
+- Look for: payload reflection, error messages, data leaks, behavior changes, timing anomalies
+- Compare the "expected_if_vulnerable" hint with what actually appeared in the response
+- Do NOT hallucinate — if the evidence is not in the response body/headers/status, it's NOT confirmed
+- Status code alone is NOT proof (many 200s are normal, many 403s are WAF blocks)
+
+Output a JSON block:
+```json
+{{
+  "analysis": [
+    {{
+      "test_name": "Name of the test",
+      "is_vulnerable": true/false,
+      "confidence": "high|medium|low",
+      "evidence": "Exact text/pattern from the response that proves the vulnerability",
+      "title": "Short vulnerability title",
+      "severity": "critical|high|medium|low|info",
+      "explanation": "Why this is a real vulnerability (reference specific response content)"
+    }}
+  ]
+}}
+```
+
+Only include entries where is_vulnerable is true. If no vulnerabilities found, return empty analysis array.
+Be STRICT — false positives are worse than false negatives."""
+
+    # ── Parse test plan from LLM ─────────────────────────────────────
+
+    def _parse_test_plan(self, response: str, target: str) -> List[Dict]:
+        """Extract structured test plan from LLM plan response."""
+        # Find JSON block
+        json_match = re.search(r'```(?:json)?\s*(\{[\s\S]*?\})\s*```', response)
+        if not json_match:
+            json_match = re.search(r'(\{[\s\S]*"tests"[\s\S]*\})', response)
+
+        if not json_match:
+            return []
+
+        try:
+            plan = json.loads(json_match.group(1))
+        except json.JSONDecodeError:
+            # Try to fix common JSON issues
+            try:
+                cleaned = re.sub(r',\s*}', '}', json_match.group(1))
+                cleaned = re.sub(r',\s*]', ']', cleaned)
+                plan = json.loads(cleaned)
+            except json.JSONDecodeError:
+                return []
+
+        tests = plan.get("tests", [])
+        if not isinstance(tests, list):
+            return []
+
+        # Validate and normalize tests
+        valid_tests = []
+        for t in tests[:MAX_TESTS_PER_AGENT]:
+            if not isinstance(t, dict):
+                continue
+            url = t.get("url", "")
+            if not url:
+                continue
+            # Resolve relative URLs
+            if url.startswith("/"):
+                url = urljoin(target, url)
+            # Ensure URL is within scope (same host)
+            if urlparse(url).netloc and urlparse(url).netloc != urlparse(target).netloc:
+                continue
+            t["url"] = url
+            t["method"] = t.get("method", "GET").upper()
+            if t["method"] not in ("GET", "POST", "PUT", "PATCH", "DELETE", "OPTIONS", "HEAD"):
+                t["method"] = "GET"
+            valid_tests.append(t)
+
+        return valid_tests
+
+    # ── Parse analysis findings from LLM ─────────────────────────────
+
+    def _parse_analysis_findings(
+        self, response: str, test_results: List[Dict], target: str
+    ) -> List[Dict]:
+        """Extract confirmed findings from LLM analysis of real responses."""
+        json_match = re.search(r'```(?:json)?\s*(\{[\s\S]*?\})\s*```', response)
+        if not json_match:
+            json_match = re.search(r'(\{[\s\S]*"analysis"[\s\S]*\})', response)
+
+        if not json_match:
+            # Fall back to parsing FINDING: blocks
+            return self._parse_findings(response, target)
+
+        try:
+            data = json.loads(json_match.group(1))
+        except json.JSONDecodeError:
+            return self._parse_findings(response, target)
+
+        findings = []
+        for entry in data.get("analysis", []):
+            if not isinstance(entry, dict):
+                continue
+            if not entry.get("is_vulnerable"):
+                continue
+            if entry.get("confidence") not in ("high", "medium"):
+                continue
+
+            evidence = entry.get("evidence", "")
+            test_name = entry.get("test_name", "")
+
+            # Anti-hallucination: verify evidence exists in actual response
+            matched_result = None
+            for tr in test_results:
+                if tr.get("test_name") == test_name:
+                    matched_result = tr
+                    break
+
+            if evidence and matched_result:
+                body = matched_result.get("body_preview", "")
+                headers_str = json.dumps(matched_result.get("response_headers", {}))
+                combined = body + headers_str
+                # Check evidence is grounded in actual response
+                evidence_words = [w for w in evidence.lower().split() if len(w) > 3]
+                if evidence_words:
+                    grounded = sum(1 for w in evidence_words if w in combined.lower())
+                    if grounded < len(evidence_words) * 0.3:
+                        logger.debug(
+                            f"[{self.definition.name}] REJECTED: evidence not grounded "
+                            f"for {test_name}"
+                        )
+                        continue
+
+            vuln_type = self.definition.name
+
+            findings.append({
+                "title": entry.get("title", f"{self.definition.display_name} Finding"),
+                "severity": entry.get("severity", "medium"),
+                "vulnerability_type": vuln_type,
+                "cvss_score": 0.0,
+                "cwe_id": "",
+                "description": entry.get("explanation", ""),
+                "affected_endpoint": matched_result.get("url", target) if matched_result else target,
+                "evidence": evidence,
+                "poc_code": (
+                    f"# Request:\n{matched_result.get('method', 'GET')} "
+                    f"{matched_result.get('url', target)}\n"
+                    f"# Params: {json.dumps(matched_result.get('params', {}), default=str)}\n"
+                    f"# Response Status: {matched_result.get('status', '?')}\n"
+                    f"# Response Body (excerpt):\n{matched_result.get('body_preview', '')[:500]}"
+                ) if matched_result else "",
+                "impact": entry.get("explanation", ""),
+                "remediation": "",
+                "source_agent": self.definition.display_name,
+                "parameter": "",
+                "confidence": entry.get("confidence", "medium"),
+                "http_evidence": {
+                    "request_url": matched_result.get("url", "") if matched_result else "",
+                    "request_method": matched_result.get("method", "") if matched_result else "",
+                    "response_status": matched_result.get("status", 0) if matched_result else 0,
+                    "response_time": matched_result.get("response_time", 0) if matched_result else 0,
+                } if matched_result else {},
+            })
+
+        return findings
+
+    # ── Template filling (for methodology context) ───────────────────
+
+    def _fill_template(self, context: Dict) -> str:
         """Fill the .md template placeholders with recon context."""
         target = context.get("target", "")
         endpoints = context.get("endpoints", [])
@@ -150,50 +637,6 @@ class MdAgent(SpecialistAgent):
         waf_info = context.get("waf_info", "")
         existing_findings = context.get("existing_findings", [])
 
-        # Build context objects for different placeholder patterns
-        scope_json = json.dumps({
-            "target": target,
-            "endpoints_discovered": len(endpoints),
-            "technologies": technologies[:15],
-            "waf": waf_info or "Not detected",
-        }, indent=2)
-
-        initial_info_json = json.dumps({
-            "target_url": target,
-            "endpoints": [
-                ep.get("url", ep) if isinstance(ep, dict) else str(ep)
-                for ep in endpoints[:30]
-            ],
-            "parameters": (
-                {k: v for k, v in list(parameters.items())[:20]}
-                if isinstance(parameters, dict) else {}
-            ),
-            "technologies": technologies[:15],
-            "headers": {k: v for k, v in list(headers.items())[:10]},
-            "forms": [
-                {"action": f.get("action", ""), "method": f.get("method", "GET")}
-                for f in (forms[:10] if isinstance(forms, list) else [])
-            ],
-        }, indent=2)
-
-        target_environment_json = json.dumps({
-            "target": target,
-            "technology_stack": technologies[:10],
-            "waf": waf_info or "None detected",
-            "endpoints_count": len(endpoints),
-            "parameters_count": (
-                len(parameters) if isinstance(parameters, dict) else 0
-            ),
-        }, indent=2)
-
-        existing_findings_summary = ""
-        if existing_findings:
-            existing_findings_summary = "\n".join(
-                f"- [{getattr(f, 'severity', 'unknown').upper()}] "
-                f"{getattr(f, 'title', '?')} at {getattr(f, 'affected_endpoint', '?')}"
-                for f in existing_findings[:20]
-            )
-
         recon_data_json = json.dumps({
             "target": target,
             "endpoints": [
@@ -205,135 +648,87 @@ class MdAgent(SpecialistAgent):
                 {k: v for k, v in list(parameters.items())[:20]}
                 if isinstance(parameters, dict) else {}
             ),
-            "existing_findings": existing_findings_summary or "None yet",
         }, indent=2)
 
-        # Replacement map for all known placeholders
+        scope_json = json.dumps({
+            "target": target,
+            "endpoints_discovered": len(endpoints),
+            "technologies": technologies[:15],
+            "waf": waf_info or "Not detected",
+        }, indent=2)
+
+        existing_summary = ""
+        if existing_findings:
+            existing_summary = "\n".join(
+                f"- [{getattr(f, 'severity', 'unknown').upper()}] "
+                f"{getattr(f, 'title', '?')} at {getattr(f, 'affected_endpoint', '?')}"
+                for f in existing_findings[:20]
+            )
+
         replacements = {
-            # New vuln-type agents use these two:
             "{target}": target,
             "{recon_json}": recon_data_json,
-            # Legacy generalist agents use these:
             "{scope_json}": scope_json,
-            "{initial_info_json}": initial_info_json,
-            "{mission_objectives_json}": json.dumps({
-                "primary": f"Identify and exploit vulnerabilities on {target}",
-                "scope": "Web application only",
-                "existing_findings": len(existing_findings),
-            }, indent=2),
-            "{target_environment_json}": target_environment_json,
+            "{initial_info_json}": recon_data_json,
+            "{target_environment_json}": scope_json,
             "{user_input}": target,
-            "{target_info_json}": initial_info_json,
+            "{target_info_json}": recon_data_json,
             "{recon_data_json}": recon_data_json,
-            "{vulnerability_details_json}": json.dumps({
-                "target": target,
-                "known_technologies": technologies[:10],
-                "endpoints": [
-                    ep.get("url", ep) if isinstance(ep, dict) else str(ep)
-                    for ep in endpoints[:15]
-                ],
-            }, indent=2),
-            "{traffic_logs_json}": json.dumps({
-                "target": target,
-                "note": "Live traffic analysis - test authentication replay on discovered endpoints",
-                "endpoints": [
-                    ep.get("url", ep) if isinstance(ep, dict) else str(ep)
-                    for ep in endpoints[:10]
-                ],
-            }, indent=2),
+            "{mission_objectives_json}": json.dumps({
+                "primary": f"Test {target} for vulnerabilities",
+                "existing_findings": len(existing_findings),
+            }),
+            "{vulnerability_details_json}": recon_data_json,
+            "{traffic_logs_json}": json.dumps({"target": target}),
             "{code_vulnerability_json}": json.dumps({
-                "target": target,
-                "technologies": technologies[:10],
-                "note": "Analyze target for CWE weaknesses based on observed behavior",
-            }, indent=2),
+                "target": target, "technologies": technologies[:10],
+            }),
         }
 
-        # Apply replacements
         prompt = self.definition.user_prompt_template
         for placeholder, value in replacements.items():
             prompt = prompt.replace(placeholder, value)
 
-        # Inject recon context appendix if any placeholders remain unfilled
-        if "{" in prompt:
-            prompt += f"\n\n**Recon Context:**\n{recon_data_json}"
+        return prompt[:2000]  # Cap methodology length to save tokens
 
-        return prompt
-
-    # ── Finding parsing ──────────────────────────────────────────────
+    # ── Legacy finding parsing (fallback for theoretical responses) ───
 
     def _parse_findings(self, response: str, target: str) -> List[Dict]:
-        """Parse structured findings from LLM response.
-
-        Handles multiple output formats from different .md agents:
-        - FINDING: key-value blocks (vuln-type agents)
-        - Headed sections (## [SEVERITY] Vulnerability: ...)
-        - OWASP format (## OWASP A0X: ...)
-        - Generic bold-label patterns
-        """
+        """Parse FINDING: blocks or ## sections from LLM response (fallback)."""
         findings = []
 
-        # Pattern 1: FINDING: blocks (used by 100 vuln-type agents)
+        # Pattern 1: FINDING: blocks
         finding_blocks = re.split(r"(?:^|\n)FINDING:", response)
         if len(finding_blocks) > 1:
-            for block in finding_blocks[1:]:  # skip text before first FINDING:
+            for block in finding_blocks[1:]:
                 parsed = self._parse_finding_block(block, target)
                 if parsed:
                     findings.append(parsed)
             if findings:
                 return findings
 
-        # Pattern 2: Section-based findings (## [SEVERITY] Vulnerability: Title)
+        # Pattern 2: Section-based
         vuln_sections = re.findall(
             r"##\s*\[?(Critical|High|Medium|Low|Info)\]?\s*(?:Vulnerability|Attack|OWASP\s+A\d+)[\s:]*([^\n]+)",
             response, re.IGNORECASE,
         )
-
         if vuln_sections:
             parts = re.split(
                 r"(?=##\s*\[?(?:Critical|High|Medium|Low|Info)\]?\s*(?:Vulnerability|Attack|OWASP))",
                 response, flags=re.IGNORECASE,
             )
             for part in parts:
-                finding = self._parse_finding_section(part, target)
-                if finding:
-                    findings.append(finding)
-        else:
-            # Pattern 3: Generic vulnerability mentions with evidence
-            generic = re.findall(
-                r"\*\*(?:Vulnerability|Finding|Issue)[:\s]*\*\*\s*([^\n]+)",
-                response, re.IGNORECASE,
-            )
-            for title in generic:
-                findings.append({
-                    "title": title.strip(),
-                    "severity": "medium",
-                    "vulnerability_type": self._infer_vuln_type(title),
-                    "description": "",
-                    "affected_endpoint": target,
-                    "evidence": "",
-                    "poc_code": "",
-                    "source_agent": self.definition.display_name,
-                })
+                f = self._parse_finding_section(part, target)
+                if f:
+                    findings.append(f)
 
         return findings
 
     def _parse_finding_block(self, block: str, target: str) -> Optional[Dict]:
-        """Parse a FINDING: key-value block from vuln-type agent response.
-
-        Expected format:
-            FINDING:
-            - Title: SSRF in url parameter at /api/fetch
-            - Severity: High
-            - CWE: CWE-918
-            - Endpoint: https://target.com/api/fetch
-            - Evidence: Internal content returned
-            - Impact: Internal network access
-            - Remediation: Whitelist URLs
-        """
+        """Parse a FINDING: key-value block."""
         if not block.strip():
             return None
 
-        # Extract key-value pairs (- Key: Value)
         kvs: Dict[str, str] = {}
         for match in re.finditer(r"-\s*([A-Za-z][\w\s/]*?):\s*(.+)", block):
             key = match.group(1).strip().lower().replace(" ", "_")
@@ -343,7 +738,6 @@ class MdAgent(SpecialistAgent):
         if not title:
             return None
 
-        # Extract severity
         sev_raw = kvs.get("severity", "medium").lower().strip()
         severity = "medium"
         for s in ("critical", "high", "medium", "low", "info"):
@@ -351,22 +745,14 @@ class MdAgent(SpecialistAgent):
                 severity = s
                 break
 
-        # Extract CWE
         cwe = ""
-        cwe_raw = kvs.get("cwe", "")
-        cwe_match = re.search(r"CWE-(\d+)", cwe_raw)
+        cwe_match = re.search(r"CWE-(\d+)", kvs.get("cwe", ""))
         if cwe_match:
             cwe = f"CWE-{cwe_match.group(1)}"
 
-        # Use agent name as vuln type if it matches a known type
         vuln_type = self.definition.name
-        if vuln_type.startswith("md_"):
-            vuln_type = vuln_type[3:]
-
-        # Extract endpoint
         endpoint = kvs.get("endpoint", kvs.get("url", target)).strip()
 
-        # Extract code blocks as PoC
         poc = ""
         code_blocks = re.findall(r"```(?:\w+)?\n(.*?)```", block, re.DOTALL)
         if code_blocks:
@@ -389,11 +775,10 @@ class MdAgent(SpecialistAgent):
         }
 
     def _parse_finding_section(self, section: str, target: str) -> Optional[Dict]:
-        """Parse a single finding section from the response."""
+        """Parse a ## [SEVERITY] Vulnerability: ... section."""
         if not section.strip():
             return None
 
-        # Extract title
         title_match = re.search(
             r"##\s*\[?(?:Critical|High|Medium|Low|Info)\]?\s*(?:Vulnerability|Attack|OWASP[^:]*)[:\s]*(.+)",
             section, re.IGNORECASE,
@@ -402,7 +787,6 @@ class MdAgent(SpecialistAgent):
         if not title:
             return None
 
-        # Extract severity from header or table
         severity = "medium"
         sev_match = re.search(
             r"\*\*Severity\*\*\s*\|?\s*(Critical|High|Medium|Low|Info)",
@@ -418,77 +802,34 @@ class MdAgent(SpecialistAgent):
             if header_sev:
                 severity = header_sev.group(1).lower()
 
-        # Extract CVSS
-        cvss_match = re.search(r"(\d+\.\d+)", section[:500])
-        cvss = float(cvss_match.group(1)) if cvss_match else 0.0
-
-        # Extract CWE
         cwe_match = re.search(r"CWE-(\d+)", section)
         cwe = f"CWE-{cwe_match.group(1)}" if cwe_match else ""
 
-        # Extract endpoint
-        endpoint = target
-        ep_match = re.search(
-            r"\*\*Endpoint\*\*\s*\|?\s*(https?://[^\s|]+)",
-            section, re.IGNORECASE,
-        )
-        if ep_match:
-            endpoint = ep_match.group(1).strip()
-
-        # Extract description
-        desc = ""
-        desc_match = re.search(
-            r"###?\s*Description\s*\n(.*?)(?=\n###?\s|\Z)",
-            section, re.DOTALL | re.IGNORECASE,
-        )
-        if desc_match:
-            desc = desc_match.group(1).strip()[:1000]
-
-        # Extract PoC code blocks
         poc = ""
         code_blocks = re.findall(r"```(?:\w+)?\n(.*?)```", section, re.DOTALL)
         if code_blocks:
-            poc = "\n---\n".join(block.strip() for block in code_blocks[:3])
+            poc = "\n---\n".join(b.strip() for b in code_blocks[:3])
 
-        # Extract evidence/proof
         evidence = ""
         ev_match = re.search(
-            r"###?\s*(?:Proof|Evidence|Tool (?:Output|Evidence))\s*\n(.*?)(?=\n###?\s|\Z)",
+            r"###?\s*(?:Proof|Evidence)\s*\n(.*?)(?=\n###?\s|\Z)",
             section, re.DOTALL | re.IGNORECASE,
         )
         if ev_match:
             evidence = ev_match.group(1).strip()[:1000]
 
-        # Extract impact
-        impact = ""
-        imp_match = re.search(
-            r"###?\s*Impact\s*\n(.*?)(?=\n###?\s|\Z)",
-            section, re.DOTALL | re.IGNORECASE,
-        )
-        if imp_match:
-            impact = imp_match.group(1).strip()[:500]
-
-        # Extract remediation
-        remediation = ""
-        rem_match = re.search(
-            r"###?\s*(?:Remediation|Mitigations?|Fix)\s*\n(.*?)(?=\n###?\s|\Z)",
-            section, re.DOTALL | re.IGNORECASE,
-        )
-        if rem_match:
-            remediation = rem_match.group(1).strip()[:500]
-
         return {
             "title": title,
             "severity": severity,
             "vulnerability_type": self._infer_vuln_type(title),
-            "cvss_score": cvss,
+            "cvss_score": 0.0,
             "cwe_id": cwe,
-            "description": desc,
-            "affected_endpoint": endpoint,
+            "description": "",
+            "affected_endpoint": target,
             "evidence": evidence,
             "poc_code": poc,
-            "impact": impact,
-            "remediation": remediation,
+            "impact": "",
+            "remediation": "",
             "source_agent": self.definition.display_name,
         }
 
@@ -497,61 +838,24 @@ class MdAgent(SpecialistAgent):
         """Infer vulnerability type from finding title."""
         title_lower = title.lower()
         type_map = {
-            "sql injection": "sqli_error",
-            "sqli": "sqli_error",
-            "xss": "xss_reflected",
-            "cross-site scripting": "xss_reflected",
-            "stored xss": "xss_stored",
-            "dom xss": "xss_dom",
-            "command injection": "command_injection",
-            "rce": "command_injection",
-            "remote code": "command_injection",
-            "ssrf": "ssrf",
-            "server-side request": "ssrf",
-            "csrf": "csrf",
-            "cross-site request": "csrf",
-            "lfi": "lfi",
-            "local file": "lfi",
-            "path traversal": "path_traversal",
-            "directory traversal": "path_traversal",
-            "file upload": "file_upload",
-            "xxe": "xxe",
-            "xml external": "xxe",
-            "ssti": "ssti",
-            "template injection": "ssti",
-            "open redirect": "open_redirect",
-            "redirect": "open_redirect",
-            "idor": "idor",
-            "insecure direct": "idor",
-            "broken access": "bola",
-            "access control": "bola",
-            "authentication": "auth_bypass",
-            "auth bypass": "auth_bypass",
-            "brute force": "brute_force",
-            "jwt": "jwt_manipulation",
-            "session": "session_fixation",
-            "clickjacking": "clickjacking",
-            "cors": "cors_misconfig",
-            "crlf": "crlf_injection",
-            "header injection": "header_injection",
-            "security header": "security_headers",
-            "ssl": "ssl_issues",
-            "tls": "ssl_issues",
-            "information disclosure": "information_disclosure",
-            "sensitive data": "sensitive_data_exposure",
-            "directory listing": "directory_listing",
-            "debug": "debug_mode",
-            "deserialization": "insecure_deserialization",
-            "nosql": "nosql_injection",
-            "ldap": "ldap_injection",
-            "graphql": "graphql_injection",
-            "race condition": "race_condition",
-            "business logic": "business_logic",
-            "rate limit": "rate_limit_bypass",
+            "sql injection": "sqli_error", "sqli": "sqli_error",
+            "xss": "xss_reflected", "cross-site scripting": "xss_reflected",
+            "stored xss": "xss_stored", "dom xss": "xss_dom",
+            "command injection": "command_injection", "rce": "command_injection",
+            "ssrf": "ssrf", "csrf": "csrf", "lfi": "lfi",
+            "path traversal": "path_traversal", "file upload": "file_upload",
+            "xxe": "xxe", "ssti": "ssti", "open redirect": "open_redirect",
+            "idor": "idor", "bola": "bola", "auth bypass": "auth_bypass",
+            "jwt": "jwt_manipulation", "cors": "cors_misconfig",
+            "crlf": "crlf_injection", "header injection": "header_injection",
+            "nosql": "nosql_injection", "graphql": "graphql_injection",
+            "race condition": "race_condition", "business logic": "business_logic",
             "subdomain takeover": "subdomain_takeover",
-            "host header": "host_header_injection",
             "prototype pollution": "prototype_pollution",
             "websocket": "websocket_hijacking",
+            "information disclosure": "information_disclosure",
+            "directory listing": "directory_listing",
+            "clickjacking": "clickjacking", "ssl": "ssl_issues",
         }
         for keyword, vtype in type_map.items():
             if keyword in title_lower:
@@ -562,16 +866,18 @@ class MdAgent(SpecialistAgent):
 # ─── MdAgentLibrary: loads all .md agents ────────────────────────────
 
 class MdAgentLibrary:
-    """Loads all .md files from prompts/agents/ and indexes them
-    as executable agent definitions (100+ vuln-type agents)."""
+    """Loads all .md files from prompts/agents/ and indexes them."""
 
-    def __init__(self, md_dir: str = "prompts/agents"):
+    def __init__(self, md_dir: str = ""):
+        if not md_dir:
+            # Resolve relative to project root (parent of backend/)
+            project_root = Path(__file__).resolve().parent.parent.parent
+            md_dir = str(project_root / "prompts" / "agents")
         self.md_dir = Path(md_dir)
         self.agents: Dict[str, MdAgentDefinition] = {}
         self._load_all()
 
     def _load_all(self):
-        """Load all .md files as agent definitions."""
         if not self.md_dir.is_dir():
             logger.warning(f"MD agent directory not found: {self.md_dir}")
             return
@@ -584,7 +890,6 @@ class MdAgentLibrary:
             try:
                 content = md_file.read_text(encoding="utf-8")
 
-                # Parse structured format
                 user_match = re.search(
                     r"## User Prompt\n(.*?)(?=\n## System Prompt|\Z)",
                     content, re.DOTALL,
@@ -600,15 +905,12 @@ class MdAgentLibrary:
                 if not user_prompt and not system_prompt:
                     system_prompt = content.strip()
 
-                # Detect placeholders
                 placeholders = re.findall(r"\{(\w+)\}", user_prompt)
 
-                # Build display name
                 display_name = name.replace("_", " ").title()
                 title_match = re.search(r"^#\s+(.+)", content)
                 if title_match:
                     raw_title = title_match.group(1).strip()
-                    # Remove suffixes: "Prompt", "Specialist Agent", "Agent"
                     display_name = re.sub(
                         r"\s*(?:Specialist Agent|Agent|Prompt)\s*$",
                         "", raw_title,
@@ -637,6 +939,13 @@ class MdAgentLibrary:
     def get_agent(self, name: str) -> Optional[MdAgentDefinition]:
         return self.agents.get(name)
 
+    def get_all_runnable(self) -> List[MdAgentDefinition]:
+        """Return ALL agents that can be dispatched."""
+        return [
+            a for a in self.agents.values()
+            if a.category in ("offensive", "generalist", "recon")
+        ]
+
     def get_offensive_agents(self) -> List[MdAgentDefinition]:
         return [a for a in self.agents.values() if a.category == "offensive"]
 
@@ -644,7 +953,6 @@ class MdAgentLibrary:
         return [a for a in self.agents.values() if a.category == category]
 
     def list_agents(self) -> List[Dict]:
-        """Return agent metadata list for API/frontend."""
         return [
             {
                 "name": a.name,
@@ -656,19 +964,19 @@ class MdAgentLibrary:
         ]
 
 
-# ─── MdAgentOrchestrator: runs agents post-recon ────────────────────
+# ─── MdAgentOrchestrator: phased execution ──────────────────────────
 
 class MdAgentOrchestrator:
-    """Coordinates execution of .md-based agents after recon.
+    """Coordinates execution of .md-based agents in phases.
 
     Flow:
-      1. Select agents (explicit list or defaults)
-      2. Build shared context from recon data
-      3. Run agents in parallel (bounded concurrency)
-      4. Collect and merge findings
+      Phase 1: Recon agents (discover more attack surface)
+      Phase 2: Offensive agents (test specific vuln types, 5 concurrent)
+      Phase 3: Generalist agents (cross-cutting analysis)
+    All agents execute REAL HTTP requests.
     """
 
-    MAX_CONCURRENT = 3
+    MAX_CONCURRENT = 2  # Keep low to avoid API rate limits
 
     def __init__(
         self,
@@ -678,6 +986,9 @@ class MdAgentOrchestrator:
         validation_judge=None,
         log_callback: Optional[Callable] = None,
         progress_callback: Optional[Callable] = None,
+        http_session=None,
+        auth_headers: Optional[Dict] = None,
+        cancel_fn: Optional[Callable] = None,
     ):
         self.llm = llm
         self.memory = memory
@@ -685,6 +996,9 @@ class MdAgentOrchestrator:
         self.validation_judge = validation_judge
         self.log = log_callback
         self.progress_callback = progress_callback
+        self.http_session = http_session
+        self.auth_headers = auth_headers or {}
+        self.cancel_fn = cancel_fn or (lambda: False)
         self.library = MdAgentLibrary()
         self._cancel_event = asyncio.Event()
 
@@ -701,87 +1015,79 @@ class MdAgentOrchestrator:
         headers: Optional[Dict] = None,
         waf_info: str = "",
     ) -> Dict:
-        """Execute selected .md agents against target.
-
-        Args:
-            target: Target URL.
-            recon_data: ReconData object from recon phase.
-            existing_findings: Findings discovered so far.
-            selected_agents: List of agent names to run. None = defaults.
-            headers: Auth/custom headers.
-            waf_info: WAF detection info.
-
-        Returns:
-            Dict with findings, agent_results, statistics.
-        """
+        """Execute agents in phases: recon → offensive → generalist."""
         start_time = time.time()
         self._cancel_event.clear()
 
-        # Resolve agent selection
+        # Merge auth headers
+        all_headers = {**self.auth_headers}
+        if headers:
+            all_headers.update(headers)
+
+        # Resolve agents
         agents_to_run = self._resolve_agents(selected_agents)
         if not agents_to_run:
-            await self._log("warning", "[MD-AGENTS] No agents available to run")
+            await self._log("warning", "[AGENT GRID] No agents available")
             return {"findings": [], "agent_results": {}, "duration": 0}
 
-        agent_names = [a.display_name for a in agents_to_run]
-        await self._log("info", f"[MD-AGENTS] Dispatching {len(agents_to_run)} agents: "
-                                 f"{', '.join(agent_names)}")
+        # Split into phases
+        recon_agents = [a for a in agents_to_run if a.category == "recon"]
+        offensive_agents = [a for a in agents_to_run if a.category == "offensive"]
+        generalist_agents = [a for a in agents_to_run if a.category == "generalist"]
+
+        await self._log("info",
+            f"[AGENT GRID] {len(agents_to_run)} agents: "
+            f"{len(recon_agents)} recon, {len(offensive_agents)} offensive, "
+            f"{len(generalist_agents)} generalist")
 
         # Build shared context
         context = self._build_context(
-            target, recon_data, existing_findings, headers, waf_info,
+            target, recon_data, existing_findings, all_headers, waf_info,
         )
 
-        # Budget per agent
-        n_agents = len(agents_to_run)
-        per_agent_budget = 1.0 / max(n_agents, 1)
-
-        # Create MdAgent instances
-        md_agents: List[MdAgent] = []
-        for defn in agents_to_run:
-            agent = MdAgent(
-                definition=defn,
-                llm=self.llm,
-                memory=self.memory,
-                budget_allocation=per_agent_budget,
-                budget=self.budget,
-                validation_judge=self.validation_judge,
-            )
-            md_agents.append(agent)
-
-        # Run agents with bounded concurrency
-        semaphore = asyncio.Semaphore(self.MAX_CONCURRENT)
         all_results: Dict[str, AgentResult] = {}
+        all_findings: List[Dict] = []
 
-        async def _run_one(agent: MdAgent) -> AgentResult:
-            async with semaphore:
+        # ── Phase 1: Recon agents (sequential, enriches context) ──
+        if recon_agents and not self._cancel_event.is_set():
+            await self._log("info", "[PHASE 1] Recon agents — deep discovery")
+            for defn in recon_agents:
                 if self._cancel_event.is_set():
-                    return AgentResult(
-                        agent_name=agent.name, status="cancelled",
+                    break
+                r = await self._run_agent(defn, context, all_headers)
+                all_results[r.agent_name] = r
+                all_findings.extend(r.findings)
+                # Recon findings enrich context for subsequent phases
+                if r.findings:
+                    context["existing_findings"] = (
+                        context.get("existing_findings", []) + r.findings
                     )
-                await self._log("info",
-                    f"  [{agent.definition.display_name}] Starting...")
-                result = await agent.execute(context)
-                await self._log("info",
-                    f"  [{agent.definition.display_name}] Done: "
-                    f"{len(result.findings)} findings, "
-                    f"{result.duration:.1f}s")
-                return result
 
-        tasks = [_run_one(a) for a in md_agents]
-        results = await asyncio.gather(*tasks, return_exceptions=True)
+        # ── Phase 2: Offensive agents (parallel, bounded) ──
+        if offensive_agents and not self._cancel_event.is_set():
+            await self._log("info",
+                f"[PHASE 2] {len(offensive_agents)} offensive agents — real exploitation")
+            phase_results = await self._run_parallel(
+                offensive_agents, context, all_headers
+            )
+            for r in phase_results:
+                all_results[r.agent_name] = r
+                all_findings.extend(r.findings)
 
-        # Collect results
-        all_findings = []
-        for agent, res in zip(md_agents, results):
-            if isinstance(res, Exception):
-                logger.error(f"MD agent {agent.name} error: {res}")
-                all_results[agent.name] = AgentResult(
-                    agent_name=agent.name, status="failed", error=str(res),
-                )
-            else:
-                all_results[agent.name] = res
-                all_findings.extend(res.findings)
+        # ── Phase 3: Generalist agents (parallel, cross-analysis) ──
+        if generalist_agents and not self._cancel_event.is_set():
+            # Update context with all findings so far
+            context["existing_findings"] = (
+                context.get("existing_findings", []) + all_findings
+            )
+            await self._log("info",
+                f"[PHASE 3] {len(generalist_agents)} generalist agents — cross-analysis")
+            phase_results = await self._run_parallel(
+                generalist_agents, context, all_headers
+            )
+            for r in phase_results:
+                all_results[r.agent_name] = r
+                all_findings.extend(r.findings)
 
         elapsed = time.time() - start_time
         total_tokens = sum(
@@ -790,7 +1096,7 @@ class MdAgentOrchestrator:
         )
 
         await self._log("info",
-            f"[MD-AGENTS] Complete: {len(all_findings)} findings from "
+            f"[AGENT GRID] Complete: {len(all_findings)} findings from "
             f"{len(agents_to_run)} agents in {elapsed:.1f}s")
 
         return {
@@ -812,15 +1118,71 @@ class MdAgentOrchestrator:
             "duration": round(elapsed, 1),
         }
 
+    async def _run_agent(
+        self, defn: MdAgentDefinition, context: Dict, headers: Dict
+    ) -> AgentResult:
+        """Run a single agent."""
+        agent = MdAgent(
+            definition=defn,
+            llm=self.llm,
+            memory=self.memory,
+            budget_allocation=1.0 / max(len(self.library.agents), 1),
+            budget=self.budget,
+            validation_judge=self.validation_judge,
+            http_session=self.http_session,
+            auth_headers=headers,
+            cancel_fn=self.cancel_fn,
+        )
+        await self._log("info", f"  [{defn.display_name}] Starting...")
+        result = await agent.execute(context)
+        if result.error:
+            await self._log("warning",
+                f"  [{defn.display_name}] Error: {result.error[:100]}, {result.duration:.1f}s")
+        elif result.findings:
+            await self._log("success",
+                f"  [{defn.display_name}] {len(result.findings)} findings! {result.duration:.1f}s")
+        else:
+            await self._log("info",
+                f"  [{defn.display_name}] Clean, {result.duration:.1f}s")
+        return result
+
+    async def _run_parallel(
+        self, agents: List[MdAgentDefinition], context: Dict, headers: Dict
+    ) -> List[AgentResult]:
+        """Run agents in parallel with bounded concurrency."""
+        semaphore = asyncio.Semaphore(self.MAX_CONCURRENT)
+
+        agent_index = [0]  # mutable counter for staggering
+
+        async def _bounded(defn: MdAgentDefinition) -> AgentResult:
+            async with semaphore:
+                if self._cancel_event.is_set():
+                    return AgentResult(agent_name=f"md_{defn.name}", status="cancelled")
+                # Stagger API calls: small delay based on position
+                idx = agent_index[0]
+                agent_index[0] += 1
+                if idx > 0:
+                    await asyncio.sleep(2.0)  # 2s between each agent start to respect rate limits
+                return await self._run_agent(defn, context, headers)
+
+        tasks = [_bounded(d) for d in agents]
+        results = await asyncio.gather(*tasks, return_exceptions=True)
+
+        final = []
+        for defn, res in zip(agents, results):
+            if isinstance(res, Exception):
+                logger.error(f"Agent {defn.name} error: {res}")
+                final.append(AgentResult(
+                    agent_name=f"md_{defn.name}", status="failed", error=str(res)
+                ))
+            else:
+                final.append(res)
+        return final
+
     def _resolve_agents(
         self, selected: Optional[List[str]],
     ) -> List[MdAgentDefinition]:
-        """Resolve agent selection to definitions.
-
-        When no agents are explicitly selected, dispatches ALL
-        offensive (vuln-type) agents — the XBOW-style architecture
-        runs one specialist per vulnerability type.
-        """
+        """Resolve agent selection."""
         if selected:
             resolved = []
             for name in selected:
@@ -831,7 +1193,8 @@ class MdAgentOrchestrator:
                     logger.warning(f"MD agent not found: {name}")
             return resolved
 
-        # Default: all offensive (vuln-type) agents
+        if RUN_ALL_BY_DEFAULT:
+            return self.library.get_all_runnable()
         return self.library.get_offensive_agents()
 
     def _build_context(
@@ -842,7 +1205,6 @@ class MdAgentOrchestrator:
         headers: Optional[Dict],
         waf_info: str,
     ) -> Dict:
-        """Build shared context dict from recon data."""
         ctx: Dict[str, Any] = {"target": target}
 
         if recon_data:
@@ -851,24 +1213,30 @@ class MdAgentOrchestrator:
             ctx["parameters"] = getattr(recon_data, "parameters", {})
             ctx["forms"] = getattr(recon_data, "forms", [])
             ctx["headers"] = getattr(recon_data, "response_headers", {})
+            ctx["js_files"] = getattr(recon_data, "js_files", [])
+            ctx["js_sinks"] = getattr(recon_data, "js_sinks", [])
+            ctx["api_endpoints"] = getattr(recon_data, "api_endpoints", [])
+            ctx["cookies"] = getattr(recon_data, "cookies", [])
         else:
             ctx["endpoints"] = []
             ctx["technologies"] = []
             ctx["parameters"] = {}
             ctx["forms"] = []
             ctx["headers"] = {}
+            ctx["js_files"] = []
+            ctx["js_sinks"] = []
+            ctx["api_endpoints"] = []
+            ctx["cookies"] = []
 
         if headers:
             ctx["headers"].update(headers)
 
         ctx["existing_findings"] = existing_findings or []
         ctx["waf_info"] = waf_info
-
         return ctx
 
     def cancel(self):
         self._cancel_event.set()
 
     def list_available_agents(self) -> List[Dict]:
-        """Return agent list for API/frontend."""
         return self.library.list_agents()
diff --git a/backend/core/smart_router/provider_registry.py b/backend/core/smart_router/provider_registry.py
index 2bb883a..439fe88 100644
--- a/backend/core/smart_router/provider_registry.py
+++ b/backend/core/smart_router/provider_registry.py
@@ -81,7 +81,7 @@ DEFAULT_PROVIDERS: List[Dict] = [
     {
         "id": "claude_code", "name": "Claude Code", "auth_type": "oauth",
         "api_format": "anthropic", "base_url": "https://api.anthropic.com",
-        "tier": 1, "default_model": "claude-sonnet-4-5-20250929",
+        "tier": 1, "default_model": "claude-sonnet-4-20250514",
     },
     {
         "id": "codex_cli", "name": "OpenAI Codex CLI", "auth_type": "oauth",
@@ -116,13 +116,13 @@ DEFAULT_PROVIDERS: List[Dict] = [
     {
         "id": "kiro", "name": "Kiro AI", "auth_type": "oauth",
         "api_format": "anthropic", "base_url": "https://api.anthropic.com",
-        "tier": 1, "default_model": "claude-sonnet-4-5-20250929",
+        "tier": 1, "default_model": "claude-sonnet-4-20250514",
     },
     # === API Key Providers (Tier 1 - Paid) ===
     {
         "id": "anthropic", "name": "Anthropic", "auth_type": "api_key",
         "api_format": "anthropic", "base_url": "https://api.anthropic.com",
-        "tier": 1, "default_model": "claude-sonnet-4-5-20250929",
+        "tier": 1, "default_model": "claude-sonnet-4-20250514",
         "env_key": "ANTHROPIC_API_KEY",
     },
     {
@@ -140,7 +140,7 @@ DEFAULT_PROVIDERS: List[Dict] = [
     {
         "id": "openrouter", "name": "OpenRouter", "auth_type": "api_key",
         "api_format": "openai_compat", "base_url": "https://openrouter.ai/api/v1",
-        "tier": 1, "default_model": "anthropic/claude-sonnet-4-5",
+        "tier": 1, "default_model": "anthropic/claude-sonnet-4-20250514",
         "env_key": "OPENROUTER_API_KEY",
     },
     # === API Key Providers (Tier 2 - Cheap) ===
diff --git a/backend/core/smart_router/router.py b/backend/core/smart_router/router.py
index 62381e5..11c57d2 100644
--- a/backend/core/smart_router/router.py
+++ b/backend/core/smart_router/router.py
@@ -173,41 +173,41 @@ class SmartRouter:
     ) -> List[Tuple[Provider, Account]]:
         """Build ordered list of (provider, account) candidates.
 
-        If preferred is set, ONLY that provider is used (no fallback to others).
-        This ensures the user's explicit choice is respected.
+        If preferred is set, that provider is tried FIRST, then falls back
+        to other providers of the same tier if all accounts fail.
         If preferred is not set, all providers are tried by tier.
         """
         candidates = []
+        seen_account_ids = set()
 
         if preferred:
-            # Strict mode: only the preferred provider
+            # Preferred provider goes first in candidate list
             provider = self.registry.get_provider(preferred)
             if provider:
                 accounts = self.registry.get_active_accounts(preferred)
                 for acct in accounts:
                     if self.quota.is_available(acct.id):
                         candidates.append((provider, acct))
+                        seen_account_ids.add(acct.id)
                 if not candidates:
                     logger.warning(
                         f"SmartRouter: Preferred provider '{preferred}' has no active accounts! "
                         f"Falling back to all providers."
                     )
-                else:
-                    return candidates  # Only preferred provider candidates
 
-        # Auto mode or preferred has no active accounts: try all by tier
+        # Add remaining providers as fallback (by tier)
         for tier in (1, 2, 3):
             providers = self.registry.get_providers_by_tier(tier)
             for provider in providers:
-                # Skip disabled providers
                 if not getattr(provider, "enabled", True):
                     continue
                 acct = self.quota.next_account(
                     provider.id,
                     self.registry.get_active_accounts(provider.id),
                 )
-                if acct:
+                if acct and acct.id not in seen_account_ids:
                     candidates.append((provider, acct))
+                    seen_account_ids.add(acct.id)
 
         return candidates
 
diff --git a/backend/main.py b/backend/main.py
index 6fa398b..4e25f9a 100755
--- a/backend/main.py
+++ b/backend/main.py
@@ -11,7 +11,7 @@ from pathlib import Path
 
 from backend.config import settings
 from backend.db.database import init_db, close_db
-from backend.api.v1 import scans, targets, prompts, reports, dashboard, vulnerabilities, settings as settings_router, agent, agent_tasks, scheduler, vuln_lab, terminal, sandbox, knowledge, mcp, providers, full_ia, cli_agent
+from backend.api.v1 import scans, targets, prompts, reports, dashboard, vulnerabilities, settings as settings_router, agent, agent_tasks, scheduler, vuln_lab, terminal, sandbox, knowledge, mcp, providers, cli_agent
 from backend.api.websocket import manager as ws_manager
 
 
@@ -116,7 +116,6 @@ app.include_router(sandbox.router, prefix="/api/v1/sandbox", tags=["Sandbox"])
 app.include_router(knowledge.router, prefix="/api/v1/knowledge", tags=["Knowledge"])
 app.include_router(mcp.router, prefix="/api/v1/mcp", tags=["MCP Servers"])
 app.include_router(providers.router, prefix="/api/v1/providers", tags=["Providers"])
-app.include_router(full_ia.router, prefix="/api/v1/full-ia", tags=["FULL AI Testing"])
 app.include_router(cli_agent.router)
 
 
diff --git a/config/config2.json b/config/config2.json
index 06abe42..cb32a4e 100755
--- a/config/config2.json
+++ b/config/config2.json
@@ -32,12 +32,12 @@
             },
             "claude_opus_default": {
                 "provider": "claude",
-                "model": "claude-3-opus-20240229",
+                "model": "claude-opus-4-6-20250918",
                 "api_key": "${ANTHROPIC_API_KEY}",
                 "temperature": 0.7,
-                "max_tokens": 4096,
-                "input_token_limit": 200000,
-                "output_token_limit": 4096,
+                "max_tokens": 16384,
+                "input_token_limit": 1000000,
+                "output_token_limit": 16384,
                 "cache_enabled": true,
                 "search_context_level": "high",
                 "pdf_support_enabled": true,
diff --git a/core/llm_manager.py b/core/llm_manager.py
index d28627c..df4aede 100755
--- a/core/llm_manager.py
+++ b/core/llm_manager.py
@@ -649,7 +649,7 @@ Identify any potential hallucinations, inconsistencies, or areas where the respo
         """Generate using OpenRouter API (OpenAI-compatible).
 
         OpenRouter supports hundreds of models through a unified API.
-        Models are specified as provider/model (e.g., 'anthropic/claude-sonnet-4-20250514').
+        Models are specified as provider/model (e.g., 'anthropic/claude-sonnet-4-6').
         API key comes from OPENROUTER_API_KEY env var or config profile.
         """
         if not self.api_key:
diff --git a/frontend/src/App.tsx b/frontend/src/App.tsx
index 4e88e6f..ec13a2a 100755
--- a/frontend/src/App.tsx
+++ b/frontend/src/App.tsx
@@ -17,15 +17,12 @@ import SandboxDashboardPage from './pages/SandboxDashboardPage'
 import KnowledgePage from './pages/KnowledgePage'
 import MCPManagementPage from './pages/MCPManagementPage'
 import ProvidersPage from './pages/ProvidersPage'
-import FullIATestingPage from './pages/FullIATestingPage'
-
 function App() {
   return (
     <Layout>
       <Routes>
         <Route path="/" element={<HomePage />} />
         <Route path="/auto" element={<AutoPentestPage />} />
-        <Route path="/full-ia" element={<FullIATestingPage />} />
         <Route path="/vuln-lab" element={<VulnLabPage />} />
         <Route path="/terminal" element={<TerminalAgentPage />} />
         <Route path="/scan/new" element={<NewScanPage />} />
diff --git a/frontend/src/components/layout/Header.tsx b/frontend/src/components/layout/Header.tsx
index b68ac0a..7cd43ec 100755
--- a/frontend/src/components/layout/Header.tsx
+++ b/frontend/src/components/layout/Header.tsx
@@ -5,7 +5,8 @@ const pageTitles: Record<string, string> = {
   '/scan/new': 'New Security Scan',
   '/reports': 'Reports',
   '/settings': 'Settings',
-  '/full-ia': 'FULL AI TESTING',
+  '/auto': 'Auto Pentest',
+  '/realtime': 'Real-time Task',
 }
 
 export default function Header() {
diff --git a/frontend/src/components/layout/Sidebar.tsx b/frontend/src/components/layout/Sidebar.tsx
index 91d44ef..c11e4d0 100755
--- a/frontend/src/components/layout/Sidebar.tsx
+++ b/frontend/src/components/layout/Sidebar.tsx
@@ -16,7 +16,6 @@ import {
   Brain,
   Cable,
   Plug,
-  Crosshair,
   ChevronLeft,
   ChevronRight,
 } from 'lucide-react'
@@ -30,7 +29,6 @@ const navGroups = [
       { path: '/auto', icon: Rocket, label: 'Auto Pentest' },
       { path: '/scan/new', icon: Bot, label: 'AI Agent' },
       { path: '/realtime', icon: Zap, label: 'Real-time Task' },
-      { path: '/full-ia', icon: Crosshair, label: 'FULL AI TESTING' },
     ],
   },
   {
diff --git a/frontend/src/pages/AutoPentestPage.tsx b/frontend/src/pages/AutoPentestPage.tsx
index f39542e..edb8969 100755
--- a/frontend/src/pages/AutoPentestPage.tsx
+++ b/frontend/src/pages/AutoPentestPage.tsx
@@ -14,15 +14,15 @@ import VulnAgentGrid from '../components/VulnAgentGrid'
 // ─── Constants ────────────────────────────────────────────────────────────────
 
 const PHASES = [
-  { key: 'parallel', label: 'Parallel Streams', icon: Layers, range: [0, 50] as const },
-  { key: 'deep', label: 'Deep Analysis', icon: Brain, range: [50, 75] as const },
-  { key: 'final', label: 'Finalization', icon: Shield, range: [75, 100] as const },
+  { key: 'recon', label: 'Reconnaissance', icon: Globe, range: [0, 20] as const },
+  { key: 'agents', label: 'Agent Grid (108 agents)', icon: Layers, range: [20, 85] as const },
+  { key: 'final', label: 'Finalization', icon: Shield, range: [85, 100] as const },
 ]
 
 const STREAMS = [
-  { key: 'recon', label: 'Recon', icon: Globe, color: 'blue', activeUntil: 25 },
-  { key: 'junior', label: 'Junior AI', icon: Brain, color: 'purple', activeUntil: 35 },
-  { key: 'tools', label: 'Tools', icon: Wrench, color: 'orange', activeUntil: 50 },
+  { key: 'recon', label: 'Recon', icon: Globe, color: 'blue', activeUntil: 20 },
+  { key: 'agents', label: 'Agent Grid', icon: Brain, color: 'purple', activeUntil: 85 },
+  { key: 'final', label: 'Report', icon: Wrench, color: 'orange', activeUntil: 100 },
 ] as const
 
 const STREAM_COLORS: Record<string, { bg: string; text: string; border: string; pulse: string }> = {
@@ -53,12 +53,10 @@ const CONFIDENCE_STYLES: Record<string, string> = {
 
 const LOG_FILTERS = [
   { key: 'all', label: 'All', color: '' },
-  { key: 'stream1', label: 'Recon', color: 'text-blue-400' },
-  { key: 'stream2', label: 'Junior', color: 'text-purple-400' },
-  { key: 'stream3', label: 'Tools', color: 'text-orange-400' },
-  { key: 'deep', label: 'Deep', color: 'text-cyan-400' },
-  { key: 'container', label: 'Container', color: 'text-cyan-300' },
-  { key: 'cli_agent', label: 'CLI Agent', color: 'text-pink-400' },
+  { key: 'recon', label: 'Recon', color: 'text-blue-400' },
+  { key: 'agents', label: 'Agents', color: 'text-green-400' },
+  { key: 'judge', label: 'Validation', color: 'text-amber-300' },
+  { key: 'final', label: 'Final', color: 'text-cyan-400' },
   { key: 'error', label: 'Errors', color: 'text-red-400' },
 ]
 
@@ -88,8 +86,8 @@ interface Toast {
 // ─── Utility Functions ────────────────────────────────────────────────────────
 
 function phaseFromProgress(progress: number): number {
-  if (progress < 50) return 0
-  if (progress < 75) return 1
+  if (progress < 20) return 0
+  if (progress < 85) return 1
   return 2
 }
 
@@ -116,6 +114,15 @@ function logMessageColor(message: string): string {
   if (message.startsWith('[PLAYBOOK]')) return 'text-indigo-400'
   if (message.startsWith('[SITE ANALYZER]')) return 'text-emerald-400'
   if (message.startsWith('[MD-AGENTS]')) return 'text-cyan-300'
+  if (message.startsWith('[AGENT GRID]')) return 'text-green-400'
+  if (message.startsWith('[PHASE 1]')) return 'text-blue-300'
+  if (message.startsWith('[PHASE 2]')) return 'text-purple-300'
+  if (message.startsWith('[PHASE 3]')) return 'text-yellow-300'
+  if (message.startsWith('[RECON]')) return 'text-blue-400'
+  if (message.startsWith('[CVE]')) return 'text-red-300'
+  if (message.startsWith('[CHAIN]')) return 'text-orange-300'
+  if (message.startsWith('[JUDGE]')) return 'text-amber-300'
+  if (message.includes('Starting (real HTTP)')) return 'text-green-300'
   return ''
 }
 
@@ -422,8 +429,8 @@ export default function AutoPentestPage() {
 
   // Model selection
   const [availableModels, setAvailableModels] = useState<Array<{ provider_id: string; provider_name: string; default_model: string; tier: number; available_models: string[] }>>([])
-  const [selectedProvider, setSelectedProvider] = useState('')
-  const [selectedModel, setSelectedModel] = useState('')
+  const [selectedProvider, setSelectedProvider] = useState('anthropic')
+  const [selectedModel, setSelectedModel] = useState('claude-sonnet-4-20250514')
 
   // MD Agent selection
   const [availableMdAgents, setAvailableMdAgents] = useState<Array<{ name: string; display_name: string; category: string }>>([])
@@ -739,12 +746,7 @@ export default function AutoPentestPage() {
     return () => { if (pollRef.current) clearInterval(pollRef.current) }
   }, [sessions, agentId, connectionLost, addToast])
 
-  // Auto-scroll logs
-  useEffect(() => {
-    if (activeTab === 'logs' && logsEndRef.current) {
-      logsEndRef.current.scrollIntoView({ behavior: 'smooth' })
-    }
-  }, [logs, activeTab])
+  // Auto-scroll logs disabled — user controls scroll position
 
   // ─── History ──────────────────────────────────────────────────────────────
 
@@ -1376,48 +1378,60 @@ export default function AutoPentestPage() {
           )}
 
           {/* LLM Provider / Model Selection */}
-          {availableModels.length > 0 && (
-            <div className="mb-6 flex flex-col sm:flex-row gap-3 sm:gap-4">
-              <div className="flex-1">
-                <label className="block text-xs font-medium text-dark-400 mb-1">LLM Provider</label>
-                <select
-                  value={selectedProvider}
-                  onChange={e => {
-                    setSelectedProvider(e.target.value)
-                    const m = availableModels.find(m => m.provider_id === e.target.value)
-                    if (m) setSelectedModel(m.default_model)
-                    else setSelectedModel('')
-                  }}
-                  disabled={isRunning}
-                  className="w-full px-3 py-2 bg-dark-900 border border-dark-600 rounded-lg text-sm text-white focus:outline-none focus:border-green-500 disabled:opacity-50 transition-colors"
-                >
-                  <option value="">Auto (best available)</option>
-                  {availableModels.map(m => (
-                    <option key={m.provider_id} value={m.provider_id}>
-                      {m.provider_name} (Tier {m.tier})
-                    </option>
-                  ))}
-                </select>
-              </div>
-              <div className="flex-1">
-                <label className="block text-xs font-medium text-dark-400 mb-1">Model</label>
-                <select
-                  value={selectedModel}
-                  onChange={e => setSelectedModel(e.target.value)}
-                  disabled={isRunning}
-                  className="w-full px-3 py-2 bg-dark-900 border border-dark-600 rounded-lg text-sm text-white focus:outline-none focus:border-green-500 disabled:opacity-50 transition-colors"
-                >
-                  <option value="">Auto (default)</option>
-                  {(selectedProvider
-                    ? (availableModels.find(m => m.provider_id === selectedProvider)?.available_models || [])
-                    : availableModels.flatMap(m => m.available_models).filter((v, i, a) => a.indexOf(v) === i)
-                  ).map(model => (
-                    <option key={model} value={model}>{model}</option>
-                  ))}
-                </select>
-              </div>
+          <div className="mb-6 flex flex-col sm:flex-row gap-3 sm:gap-4">
+            <div className="flex-1">
+              <label className="block text-xs font-medium text-dark-400 mb-1">LLM Provider</label>
+              <select
+                value={selectedProvider}
+                onChange={e => {
+                  setSelectedProvider(e.target.value)
+                  const m = availableModels.find(m => m.provider_id === e.target.value)
+                  if (m) setSelectedModel(m.default_model)
+                  else if (e.target.value === 'anthropic') setSelectedModel('claude-sonnet-4-20250514')
+                  else setSelectedModel('')
+                }}
+                disabled={isRunning}
+                className="w-full px-3 py-2 bg-dark-900 border border-dark-600 rounded-lg text-sm text-white focus:outline-none focus:border-green-500 disabled:opacity-50 transition-colors"
+              >
+                <option value="">Auto (best available)</option>
+                <option value="anthropic">Anthropic (Claude API)</option>
+                <option value="claude_code">Claude Code (OAuth)</option>
+                <option value="openai">OpenAI</option>
+                <option value="gemini">Gemini</option>
+                <option value="openrouter">OpenRouter</option>
+                {availableModels.filter(m => !['anthropic','claude_code','openai','gemini','openrouter'].includes(m.provider_id)).map(m => (
+                  <option key={m.provider_id} value={m.provider_id}>
+                    {m.provider_name} (Tier {m.tier})
+                  </option>
+                ))}
+              </select>
             </div>
-          )}
+            <div className="flex-1">
+              <label className="block text-xs font-medium text-dark-400 mb-1">Model</label>
+              <select
+                value={selectedModel}
+                onChange={e => setSelectedModel(e.target.value)}
+                disabled={isRunning}
+                className="w-full px-3 py-2 bg-dark-900 border border-dark-600 rounded-lg text-sm text-white focus:outline-none focus:border-green-500 disabled:opacity-50 transition-colors"
+              >
+                <option value="">Auto (default)</option>
+                {selectedProvider === 'anthropic' || selectedProvider === 'claude_code' || selectedProvider === '' ? (
+                  <>
+                    <option value="claude-opus-4-20250514">Claude Opus 4</option>
+                    <option value="claude-sonnet-4-20250514">Claude Sonnet 4</option>
+                    <option value="claude-sonnet-4-5-20250929">Claude Sonnet 4.5</option>
+                    <option value="claude-haiku-4-5-20251001">Claude Haiku 4.5</option>
+                  </>
+                ) : null}
+                {(selectedProvider && availableModels.find(m => m.provider_id === selectedProvider)?.available_models || [])
+                  .filter(m => !m.startsWith('claude-'))
+                  .map(model => (
+                    <option key={model} value={model}>{model}</option>
+                  ))
+                }
+              </select>
+            </div>
+          </div>
 
           {/* Multi-target textarea */}
           {multiTarget && (
diff --git a/frontend/src/pages/HomePage.tsx b/frontend/src/pages/HomePage.tsx
index e44e651..02ded86 100755
--- a/frontend/src/pages/HomePage.tsx
+++ b/frontend/src/pages/HomePage.tsx
@@ -369,8 +369,8 @@ export default function HomePage() {
       {/* ── Quick Actions ─────────────────────────────────────── */}
       <div className="grid grid-cols-2 sm:grid-cols-4 gap-3">
         {([
-          { label: 'Auto Pentest', icon: Zap, to: '/auto', color: 'text-green-400', bg: 'bg-green-500/10 hover:bg-green-500/20', border: 'border-green-500/20 hover:border-green-500/40', desc: '3-stream AI testing' },
-          { label: 'Full IA Testing', icon: Shield, to: '/full-ia', color: 'text-red-400', bg: 'bg-red-500/10 hover:bg-red-500/20', border: 'border-red-500/20 hover:border-red-500/40', desc: '100 vuln types' },
+          { label: 'Auto Pentest', icon: Zap, to: '/auto', color: 'text-green-400', bg: 'bg-green-500/10 hover:bg-green-500/20', border: 'border-green-500/20 hover:border-green-500/40', desc: '109 agents + 100 vulns' },
+          { label: 'AI Agent', icon: Shield, to: '/scan/new', color: 'text-red-400', bg: 'bg-red-500/10 hover:bg-red-500/20', border: 'border-red-500/20 hover:border-red-500/40', desc: 'Custom AI scan' },
           { label: 'Vuln Lab', icon: FlaskConical, to: '/vuln-lab', color: 'text-purple-400', bg: 'bg-purple-500/10 hover:bg-purple-500/20', border: 'border-purple-500/20 hover:border-purple-500/40', desc: 'Per-type challenges' },
           { label: 'Terminal', icon: Terminal, to: '/terminal', color: 'text-cyan-400', bg: 'bg-cyan-500/10 hover:bg-cyan-500/20', border: 'border-cyan-500/20 hover:border-cyan-500/40', desc: 'AI chat + commands' },
         ] as const).map(action => (
diff --git a/prompts/task_library.json b/prompts/task_library.json
index 5d80b88..f3e5650 100755
--- a/prompts/task_library.json
+++ b/prompts/task_library.json
@@ -1,6 +1,6 @@
 {
   "version": "1.0",
-  "updated_at": "2026-02-24T13:16:20.190712",
+  "updated_at": "2026-03-25T01:48:34.034821",
   "tasks": [
     {
       "id": "recon_full",
diff --git a/rebuild.sh b/rebuild.sh
index bfbc7ac..4450119 100755
--- a/rebuild.sh
+++ b/rebuild.sh
@@ -1,689 +1,196 @@
 #!/usr/bin/env bash
 # ============================================================================
-# NeuroSploit v3 - Rebuild & Launch Script
+# NeuroSploit v3 — Rebuild & Launch (Claude 4.6)
 # ============================================================================
-# Usage: chmod +x rebuild.sh && ./rebuild.sh
-# Options:
-#   --backend-only   Only start the backend (skip frontend)
-#   --frontend-only  Only start the frontend (skip backend)
-#   --build          Build frontend for production instead of dev mode
-#   --install        Force reinstall all dependencies
-#   --reset-db       Delete and recreate the database (for schema changes)
+# ./rebuild.sh                        Default (backend + frontend)
+# ./rebuild.sh --backend-only         Skip frontend
+# ./rebuild.sh --frontend-only        Skip backend
+# ./rebuild.sh --model MODEL          Override LLM model
+# ./rebuild.sh --install              Force reinstall dependencies
+# ./rebuild.sh --reset-db             Delete + recreate database
+# ./rebuild.sh --build                Production frontend build
+# ./rebuild.sh --port 9000            Custom backend port
 # ============================================================================
 
 set -e
 
-PROJECT_DIR="/opt/NeuroSploitv2"
-VENV_DIR="$PROJECT_DIR/venv"
-FRONTEND_DIR="$PROJECT_DIR/frontend"
-DATA_DIR="$PROJECT_DIR/data"
-LOGS_DIR="$PROJECT_DIR/logs"
-PID_DIR="$PROJECT_DIR/.pids"
-DB_PATH="$DATA_DIR/neurosploit.db"
+DIR="/opt/NeuroSploitv2"
+VENV="$DIR/venv"
+FRONT="$DIR/frontend"
+LOGS="$DIR/logs"
+PIDS="$DIR/.pids"
+DB="$DIR/data/neurosploit.db"
 
-# Colors
-RED='\033[0;31m'
-GREEN='\033[0;32m'
-YELLOW='\033[1;33m'
-BLUE='\033[0;34m'
-CYAN='\033[0;36m'
-NC='\033[0m'
+# ── Colors ───────────────────────────────────────────────────────────
+R='\033[0;31m' G='\033[0;32m' Y='\033[1;33m' B='\033[0;34m' C='\033[0;36m' N='\033[0m'
+header() { echo -e "\n${C}━━━ $1 ━━━${N}"; }
+ok()     { echo -e "  ${G}✓${N} $1"; }
+warn()   { echo -e "  ${Y}!${N} $1"; }
+fail()   { echo -e "  ${R}✗${N} $1"; exit 1; }
 
-# Parse args
-BACKEND_ONLY=false
-FRONTEND_ONLY=false
-PRODUCTION_BUILD=false
-FORCE_INSTALL=false
-RESET_DB=false
+# ── Parse args ───────────────────────────────────────────────────────
+BACK_ONLY=false; FRONT_ONLY=false; BUILD=false; INSTALL=false; RESET=false
+MODEL=""; PORT=8000; FPORT=3000
 
-for arg in "$@"; do
-  case $arg in
-    --backend-only)  BACKEND_ONLY=true ;;
-    --frontend-only) FRONTEND_ONLY=true ;;
-    --build)         PRODUCTION_BUILD=true ;;
-    --install)       FORCE_INSTALL=true ;;
-    --reset-db)      RESET_DB=true ;;
+while [[ $# -gt 0 ]]; do
+  case $1 in
+    --backend-only)  BACK_ONLY=true;  shift ;;
+    --frontend-only) FRONT_ONLY=true; shift ;;
+    --build)         BUILD=true;      shift ;;
+    --install)       INSTALL=true;    shift ;;
+    --reset-db)      RESET=true;      shift ;;
+    --model)         MODEL="$2";      shift 2 ;;
+    --port)          PORT="$2";       shift 2 ;;
+    --frontend-port) FPORT="$2";     shift 2 ;;
+    *) shift ;;
   esac
 done
 
-header() {
-  echo ""
-  echo -e "${CYAN}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
-  echo -e "${CYAN}  $1${NC}"
-  echo -e "${CYAN}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
-}
+# ── 0. Stop previous ────────────────────────────────────────────────
+header "Stopping previous"
+mkdir -p "$PIDS" "$LOGS" "$DIR/data" "$DIR/reports/screenshots"
 
-step() {
-  echo -e "${GREEN}[+]${NC} $1"
-}
-
-warn() {
-  echo -e "${YELLOW}[!]${NC} $1"
-}
-
-fail() {
-  echo -e "${RED}[x]${NC} $1"
-  exit 1
-}
-
-# ============================================================================
-# 0. Kill previous instances
-# ============================================================================
-header "Stopping previous instances"
-
-mkdir -p "$PID_DIR"
-
-# Kill by PID files if they exist
-for pidfile in "$PID_DIR"/*.pid; do
-  [ -f "$pidfile" ] || continue
-  pid=$(cat "$pidfile" 2>/dev/null)
-  if [ -n "$pid" ] && kill -0 "$pid" 2>/dev/null; then
-    step "Stopping process $pid ($(basename "$pidfile" .pid))"
-    kill "$pid" 2>/dev/null || true
-    sleep 1
-    kill -9 "$pid" 2>/dev/null || true
-  fi
-  rm -f "$pidfile"
+for f in "$PIDS"/*.pid; do
+  [ -f "$f" ] || continue
+  pid=$(cat "$f" 2>/dev/null)
+  [ -n "$pid" ] && kill "$pid" 2>/dev/null && ok "Stopped $(basename "$f" .pid)"
+  rm -f "$f"
 done
-
-# Also kill any lingering uvicorn/vite on our ports
-if lsof -ti:8000 >/dev/null 2>&1; then
-  step "Killing process on port 8000"
-  kill $(lsof -ti:8000) 2>/dev/null || true
-fi
-if lsof -ti:3000 >/dev/null 2>&1; then
-  step "Killing process on port 3000"
-  kill $(lsof -ti:3000) 2>/dev/null || true
-fi
-
+lsof -ti:$PORT  >/dev/null 2>&1 && kill $(lsof -ti:$PORT)  2>/dev/null || true
+lsof -ti:$FPORT >/dev/null 2>&1 && kill $(lsof -ti:$FPORT) 2>/dev/null || true
 sleep 1
-step "Previous instances stopped"
 
-# ============================================================================
-# 1. Ensure directories exist
-# ============================================================================
-header "Preparing directories"
-mkdir -p "$DATA_DIR" "$LOGS_DIR" "$PID_DIR"
-mkdir -p "$PROJECT_DIR/reports/screenshots"
-mkdir -p "$PROJECT_DIR/reports/benchmark_results/logs"
-mkdir -p "$DATA_DIR/vectorstore"
-mkdir -p "$DATA_DIR/checkpoints"
-step "Directories ready"
+# ── 1. Database reset ───────────────────────────────────────────────
+if [ "$RESET" = true ] && [ -f "$DB" ]; then
+  header "Reset database"
+  cp "$DB" "$DB.bak.$(date +%s)"
+  rm -f "$DB"
+  ok "DB backed up and deleted"
+fi
 
-# ============================================================================
-# 1b. Database reset (if requested)
-# ============================================================================
-if [ "$RESET_DB" = true ]; then
-  header "Resetting database"
-  if [ -f "$DB_PATH" ]; then
-    BACKUP="$DB_PATH.backup.$(date +%Y%m%d%H%M%S)"
-    step "Backing up existing DB to $BACKUP"
-    cp "$DB_PATH" "$BACKUP"
-    rm -f "$DB_PATH"
-    step "Database deleted (will be recreated with new schema on startup)"
+# ── 2. Environment check ────────────────────────────────────────────
+header "Environment"
+
+[ -f "$DIR/.env" ] || { [ -f "$DIR/.env.example" ] && cp "$DIR/.env.example" "$DIR/.env" && warn "Created .env from example"; } || fail "No .env"
+ok ".env"
+
+PY=$(command -v python3 || command -v python) || fail "Python not found"
+ok "Python: $($PY --version 2>&1)"
+
+if [ "$BACK_ONLY" = false ]; then
+  command -v node &>/dev/null || fail "Node.js not found"
+  ok "Node: $(node --version)"
+fi
+
+command -v docker &>/dev/null && ok "Docker: available" || warn "Docker: not found (sandbox disabled)"
+
+# ── 3. Backend setup ────────────────────────────────────────────────
+if [ "$FRONT_ONLY" = false ]; then
+  header "Backend"
+
+  [ -d "$VENV" ] && [ "$INSTALL" = false ] || { $PY -m venv "$VENV"; ok "Venv created"; }
+  source "$VENV/bin/activate"
+
+  if [ "$INSTALL" = true ] || [ ! -f "$VENV/.ok" ]; then
+    pip install -q --upgrade pip
+    pip install -q -r "$DIR/backend/requirements.txt" 2>&1 | tail -3
+    pip install -q -r "$DIR/requirements.txt" 2>&1 | tail -3
+    [ -f "$DIR/requirements-optional.txt" ] && pip install -q -r "$DIR/requirements-optional.txt" 2>/dev/null || true
+    touch "$VENV/.ok"
+    ok "Dependencies installed"
   else
-    step "No existing database found"
-  fi
-fi
-
-# ============================================================================
-# 2. Environment check
-# ============================================================================
-header "Checking environment"
-
-if [ ! -f "$PROJECT_DIR/.env" ]; then
-  if [ -f "$PROJECT_DIR/.env.example" ]; then
-    warn ".env not found, copying from .env.example"
-    cp "$PROJECT_DIR/.env.example" "$PROJECT_DIR/.env"
-  else
-    fail ".env file not found and no .env.example to copy from"
-  fi
-fi
-step ".env file present"
-
-# Check Python
-if command -v python3 &>/dev/null; then
-  PYTHON=python3
-elif command -v python &>/dev/null; then
-  PYTHON=python
-else
-  fail "Python not found. Install Python 3.10+"
-fi
-step "Python: $($PYTHON --version)"
-
-# Check Node
-if command -v node &>/dev/null; then
-  step "Node: $(node --version)"
-else
-  if [ "$BACKEND_ONLY" = false ]; then
-    fail "Node.js not found. Install Node.js 18+"
-  fi
-fi
-
-# Check Docker (optional - needed for sandbox & benchmarks)
-if command -v docker &>/dev/null; then
-  step "Docker: $(docker --version 2>/dev/null | head -1)"
-  # Check compose
-  if docker compose version &>/dev/null 2>&1; then
-    step "Docker Compose: plugin (docker compose)"
-  elif command -v docker-compose &>/dev/null; then
-    step "Docker Compose: standalone ($(docker-compose version --short 2>/dev/null))"
-  else
-    warn "Docker Compose not found (needed for sandbox & benchmarks)"
-  fi
-else
-  warn "Docker not found (optional - needed for sandbox & benchmarks)"
-fi
-
-# ============================================================================
-# 3. Python virtual environment & dependencies
-# ============================================================================
-if [ "$FRONTEND_ONLY" = false ]; then
-  header "Setting up Python backend"
-
-  if [ ! -d "$VENV_DIR" ] || [ "$FORCE_INSTALL" = true ]; then
-    step "Creating virtual environment..."
-    $PYTHON -m venv "$VENV_DIR"
+    ok "Dependencies cached"
   fi
 
-  source "$VENV_DIR/bin/activate"
-  step "Virtual environment activated"
-
-  if [ "$FORCE_INSTALL" = true ] || [ ! -f "$VENV_DIR/.deps_installed" ]; then
-    step "Installing backend dependencies..."
-    pip install --quiet --upgrade pip
-
-    # Install from requirements files (pyproject.toml is for tool config only)
-    pip install --quiet -r "$PROJECT_DIR/backend/requirements.txt" 2>&1 | tail -5
-    pip install --quiet -r "$PROJECT_DIR/requirements.txt" 2>&1 | tail -5
-    touch "$VENV_DIR/.deps_installed"
-    step "Core dependencies installed"
-
-    # Try optional deps (may fail on Python <3.10)
-    if [ -f "$PROJECT_DIR/requirements-optional.txt" ]; then
-      step "Installing optional dependencies (best-effort)..."
-      pip install --quiet -r "$PROJECT_DIR/requirements-optional.txt" 2>/dev/null && \
-        step "Optional deps installed (mcp, playwright)" || \
-        warn "Some optional deps skipped (Python 3.10+ required for mcp/playwright)"
-    fi
-  else
-    step "Dependencies already installed (use --install to force)"
-  fi
-
-  # Validate key modules
-  step "Validating Python modules..."
-  $PYTHON -c "
-import sys
-
-# === Core Platform (14) ===
-core_modules = [
-    ('backend.main', 'FastAPI App'),
-    ('backend.config', 'Settings'),
-    ('core.llm_manager', 'LLM Manager'),
-    ('core.model_router', 'Model Router'),
-    ('core.scheduler', 'Scheduler'),
-    ('core.knowledge_augmentor', 'Knowledge Augmentor'),
-    ('core.browser_validator', 'Browser Validator'),
-    ('core.mcp_client', 'MCP Client'),
-    ('core.mcp_server', 'MCP Server'),
-    ('core.sandbox_manager', 'Sandbox Manager'),
-    ('core.context_builder', 'Context Builder'),
-    ('core.pentest_executor', 'Pentest Executor'),
-    ('core.tool_installer', 'Tool Installer'),
-    ('core.report_generator', 'Report Generator (CLI)'),
-]
-
-# === API Layer (18) ===
-api_modules = [
-    ('backend.api.v1.agent', 'Agent API'),
-    ('backend.api.v1.scans', 'Scans API'),
-    ('backend.api.v1.targets', 'Targets API'),
-    ('backend.api.v1.prompts', 'Prompts API'),
-    ('backend.api.v1.reports', 'Reports API'),
-    ('backend.api.v1.dashboard', 'Dashboard API'),
-    ('backend.api.v1.vulnerabilities', 'Vulnerabilities API'),
-    ('backend.api.v1.settings', 'Settings API'),
-    ('backend.api.v1.agent_tasks', 'Agent Tasks API'),
-    ('backend.api.v1.scheduler', 'Scheduler API'),
-    ('backend.api.v1.vuln_lab', 'VulnLab API'),
-    ('backend.api.v1.terminal', 'Terminal API'),
-    ('backend.api.v1.sandbox', 'Sandbox API'),
-    ('backend.api.v1.knowledge', 'Knowledge API'),
-    ('backend.api.v1.mcp', 'MCP API'),
-    ('backend.api.v1.providers', 'Providers API'),
-    ('backend.api.v1.full_ia', 'Full IA Testing API'),
-    ('backend.api.v1.cli_agent', 'CLI Agent API'),
-]
-
-# === VulnEngine (18) ===
-vuln_modules = [
-    ('backend.core.vuln_engine.engine', 'VulnEngine Core'),
-    ('backend.core.vuln_engine.registry', 'VulnEngine Registry'),
-    ('backend.core.vuln_engine.payload_generator', 'VulnEngine Payloads'),
-    ('backend.core.vuln_engine.ai_prompts', 'VulnEngine AI Prompts'),
-    ('backend.core.vuln_engine.pentest_playbook', 'VulnEngine Playbook'),
-    ('backend.core.vuln_engine.system_prompts', 'Anti-Hallucination Prompts'),
-    ('backend.core.vuln_engine.testers.injection', 'Tester: Injection'),
-    ('backend.core.vuln_engine.testers.auth', 'Tester: Auth'),
-    ('backend.core.vuln_engine.testers.authorization', 'Tester: Authorization'),
-    ('backend.core.vuln_engine.testers.client_side', 'Tester: Client-Side'),
-    ('backend.core.vuln_engine.testers.file_access', 'Tester: File Access'),
-    ('backend.core.vuln_engine.testers.infrastructure', 'Tester: Infrastructure'),
-    ('backend.core.vuln_engine.testers.request_forgery', 'Tester: Request Forgery'),
-    ('backend.core.vuln_engine.testers.advanced_injection', 'Tester: Advanced Injection'),
-    ('backend.core.vuln_engine.testers.logic', 'Tester: Logic'),
-    ('backend.core.vuln_engine.testers.data_exposure', 'Tester: Data Exposure'),
-    ('backend.core.vuln_engine.testers.cloud_supply', 'Tester: Cloud/Supply Chain'),
-    ('backend.core.vuln_engine.testers.base_tester', 'Tester: Base Class'),
-]
-
-# === Agent Core (14) ===
-agent_modules = [
-    ('backend.core.autonomous_agent', 'Autonomous Agent'),
-    ('backend.core.agent_memory', 'Agent Memory'),
-    ('backend.core.response_verifier', 'Response Verifier'),
-    ('backend.core.task_library', 'Task Library'),
-    ('backend.core.execution_history', 'Execution History'),
-    ('backend.core.methodology_loader', 'Methodology Loader'),
-    ('backend.core.ai_pentest_agent', 'AI Pentest Agent'),
-    ('backend.core.ai_prompt_processor', 'AI Prompt Processor'),
-    ('backend.core.autonomous_scanner', 'Autonomous Scanner'),
-    ('backend.core.recon_integration', 'Recon Integration'),
-    ('backend.core.report_generator', 'Report Generator (Backend)'),
-    ('backend.core.tool_executor', 'Tool Executor'),
-    ('backend.core.prompt_engine.parser', 'Prompt Engine Parser'),
-    ('backend.core.report_engine.generator', 'Report Engine Generator'),
-]
-
-# === Validation Pipeline (6) ===
-validation_modules = [
-    ('backend.core.negative_control', 'Negative Control Engine'),
-    ('backend.core.proof_of_execution', 'Proof of Execution'),
-    ('backend.core.confidence_scorer', 'Confidence Scorer'),
-    ('backend.core.validation_judge', 'Validation Judge'),
-    ('backend.core.access_control_learner', 'Access Control Learner'),
-    ('backend.core.adaptive_learner', 'Adaptive Learner'),
-]
-
-# === Agent Autonomy (5) ===
-autonomy_modules = [
-    ('backend.core.request_engine', 'Request Engine'),
-    ('backend.core.waf_detector', 'WAF Detector'),
-    ('backend.core.strategy_adapter', 'Strategy Adapter'),
-    ('backend.core.chain_engine', 'Chain Engine'),
-    ('backend.core.auth_manager', 'Auth Manager'),
-]
-
-# === AI Reasoning & Intelligence (8) ===
-intelligence_modules = [
-    ('backend.core.token_budget', 'Token Budget'),
-    ('backend.core.reasoning_engine', 'Reasoning Engine'),
-    ('backend.core.agent_tasks', 'Agent Tasks'),
-    ('backend.core.endpoint_classifier', 'Endpoint Classifier'),
-    ('backend.core.cve_hunter', 'CVE Hunter'),
-    ('backend.core.deep_recon', 'Deep Recon'),
-    ('backend.core.banner_analyzer', 'Banner Analyzer'),
-    ('backend.core.param_analyzer', 'Param Analyzer'),
-]
-
-# === Testing & Exploitation (8) ===
-testing_modules = [
-    ('backend.core.payload_mutator', 'Payload Mutator'),
-    ('backend.core.xss_context_analyzer', 'XSS Context Analyzer'),
-    ('backend.core.xss_validator', 'XSS Validator'),
-    ('backend.core.poc_generator', 'PoC Generator'),
-    ('backend.core.exploit_generator', 'Exploit Generator'),
-    ('backend.core.poc_validator', 'PoC Validator'),
-    ('backend.core.request_repeater', 'Request Repeater'),
-    ('backend.core.site_analyzer', 'Site Analyzer'),
-]
-
-# === Multi-Agent & Orchestration (9) ===
-multiagent_modules = [
-    ('backend.core.agent_base', 'Specialist Agent Base'),
-    ('backend.core.specialist_agents', 'Specialist Agents'),
-    ('backend.core.agent_orchestrator', 'Agent Orchestrator'),
-    ('backend.core.researcher_agent', 'Researcher AI Agent'),
-    ('backend.core.vuln_orchestrator', 'Vuln Orchestrator'),
-    ('backend.core.vuln_type_agent', 'Vuln Type Agent'),
-    ('backend.core.cli_agent_runner', 'CLI Agent Runner'),
-    ('backend.core.cli_output_parser', 'CLI Output Parser'),
-    ('backend.core.cli_instructions_builder', 'CLI Instructions Builder'),
-]
-
-# === RAG System (5) ===
-rag_modules = [
-    ('backend.core.rag.engine', 'RAG Engine'),
-    ('backend.core.rag.vectorstore', 'RAG VectorStore'),
-    ('backend.core.rag.few_shot', 'RAG Few-Shot'),
-    ('backend.core.rag.reasoning_templates', 'RAG Reasoning Templates'),
-    ('backend.core.rag.reasoning_memory', 'RAG Reasoning Memory'),
-]
-
-# === Smart Router (5) ===
-router_modules = [
-    ('backend.core.smart_router', 'Smart Router Package'),
-    ('backend.core.smart_router.provider_registry', 'Provider Registry'),
-    ('backend.core.smart_router.router', 'Router Core'),
-    ('backend.core.smart_router.token_extractor', 'Token Extractor'),
-    ('backend.core.smart_router.token_refresher', 'Token Refresher'),
-]
-
-# === Kali Sandbox (3) ===
-kali_modules = [
-    ('core.tool_registry', 'Tool Registry (56 tools)'),
-    ('core.kali_sandbox', 'Kali Sandbox'),
-    ('core.container_pool', 'Container Pool'),
-]
-
-# === Operations (3) ===
-operations_modules = [
-    ('backend.core.checkpoint_manager', 'Checkpoint Manager'),
-    ('backend.core.notification_manager', 'Notification Manager'),
-    ('backend.core.knowledge_processor', 'Knowledge Processor'),
-]
-
-all_groups = [
-    ('Core Platform', core_modules),
-    ('API Layer', api_modules),
-    ('VulnEngine', vuln_modules),
-    ('Agent Core', agent_modules),
-    ('Validation Pipeline', validation_modules),
-    ('Agent Autonomy', autonomy_modules),
-    ('AI Reasoning & Intelligence', intelligence_modules),
-    ('Testing & Exploitation', testing_modules),
-    ('Multi-Agent & Orchestration', multiagent_modules),
-    ('RAG System', rag_modules),
-    ('Smart Router', router_modules),
-    ('Kali Sandbox', kali_modules),
-    ('Operations', operations_modules),
-]
-
-total = 0
-errors = 0
-for group_name, modules in all_groups:
-    print(f'  --- {group_name} ---')
-    for mod, name in modules:
-        total += 1
-        try:
-            __import__(mod)
-            print(f'  OK   {name}')
-        except Exception as e:
-            err_short = str(e).split(chr(10))[0][:80]
-            print(f'  WARN {name}: {err_short}')
-            errors += 1
-
-print(f'\n  {total - errors}/{total} modules loaded ({errors} warnings)')
-" 2>&1 || true
-
-  # Validate knowledge base
-  step "Validating knowledge base..."
-  $PYTHON -c "
-import json, os
-kb_path = os.path.join('$PROJECT_DIR', 'data', 'vuln_knowledge_base.json')
-if os.path.exists(kb_path):
-    kb = json.load(open(kb_path))
-    types = len(kb.get('vulnerability_types', {}))
-    insights = len(kb.get('xbow_insights', kb.get('attack_insights', {})))
-    print(f'  OK  Knowledge base: {types} vuln types, {insights} insight categories')
-else:
-    print('  WARN Knowledge base not found at data/vuln_knowledge_base.json')
-" 2>&1 || true
-
-  # Validate VulnEngine coverage
-  step "Validating VulnEngine coverage..."
-  $PYTHON -c "
-from backend.core.vuln_engine.registry import VulnerabilityRegistry
-from backend.core.vuln_engine.payload_generator import PayloadGenerator
-from backend.core.vuln_engine.ai_prompts import VULN_AI_PROMPTS
-from backend.core.vuln_engine.pentest_playbook import PENTEST_PLAYBOOK, get_testing_prompts
-from backend.core.vuln_engine.system_prompts import CONTEXT_PROMPTS, VULN_TYPE_PROOF_REQUIREMENTS
-r = VulnerabilityRegistry()
-p = PayloadGenerator()
-total_payloads = sum(len(v) for v in p.payload_libraries.values())
-total_prompts = sum(len(get_testing_prompts(v)) for v in PENTEST_PLAYBOOK)
-# Count AI prompt builder functions (deep test + stream prompts)
-import inspect, backend.core.vuln_engine.ai_prompts as ap
-prompt_funcs = [n for n, f in inspect.getmembers(ap, inspect.isfunction) if n.startswith('get_')]
-print(f'  OK  Registry: {len(r.VULNERABILITY_INFO)} types, {len(r.TESTER_CLASSES)} testers')
-print(f'  OK  Payloads: {total_payloads} across {len(p.payload_libraries)} categories')
-print(f'  OK  AI Prompts: {len(VULN_AI_PROMPTS)} per-vuln + {len(prompt_funcs)} builder functions')
-print(f'  OK  Playbook: {len(PENTEST_PLAYBOOK)} vuln types, {total_prompts} testing prompts')
-print(f'  OK  System Prompts: {len(CONTEXT_PROMPTS)} contexts, {len(VULN_TYPE_PROOF_REQUIREMENTS)} proof reqs')
-" 2>&1 || true
-
-  # Validate RAG system
-  step "Validating RAG system..."
-  $PYTHON -c "
-from backend.core.rag.reasoning_templates import REASONING_TEMPLATES
-from backend.core.rag.few_shot import FewShotSelector
-fs = FewShotSelector()
-curated = getattr(fs, '_curated_examples', {})
-total_ex = sum(len(ex) for cat in curated.values() if isinstance(cat, dict) for ex in cat.values() if isinstance(ex, list))
-print(f'  OK  Reasoning Templates: {len(REASONING_TEMPLATES)} vuln types')
-print(f'  OK  Few-Shot Examples: {len(curated)} categories, {total_ex} curated TP/FP examples')
+  # Quick validation
+  $PY -c "
+import sys; sys.path.insert(0,'$DIR')
+mods = ['backend.main','backend.config','backend.core.autonomous_agent','backend.core.md_agent',
+        'backend.core.smart_router.router','backend.core.vuln_engine.registry']
+ok=err=0
+for m in mods:
+    try: __import__(m); ok+=1
+    except: err+=1
+print(f'  {ok}/{ok+err} core modules OK')
 " 2>&1 || true
 fi
 
-# ============================================================================
-# 4. Frontend dependencies
-# ============================================================================
-if [ "$BACKEND_ONLY" = false ]; then
-  header "Setting up React frontend"
-
-  cd "$FRONTEND_DIR"
-
-  if [ ! -d "node_modules" ] || [ "$FORCE_INSTALL" = true ]; then
-    step "Installing frontend dependencies..."
+# ── 4. Frontend setup ───────────────────────────────────────────────
+if [ "$BACK_ONLY" = false ]; then
+  header "Frontend"
+  cd "$FRONT"
+  if [ ! -d "node_modules" ] || [ "$INSTALL" = true ]; then
     npm install --silent 2>&1 | tail -3
-    step "Frontend dependencies installed"
+    ok "Dependencies installed"
   else
-    step "node_modules present (use --install to force)"
+    ok "Dependencies cached"
   fi
-
-  cd "$PROJECT_DIR"
+  cd "$DIR"
 fi
 
-# ============================================================================
-# 5. Launch backend
-# ============================================================================
-if [ "$FRONTEND_ONLY" = false ]; then
-  header "Starting FastAPI backend (port 8000)"
+# ── 5. Launch backend ───────────────────────────────────────────────
+if [ "$FRONT_ONLY" = false ]; then
+  header "Starting backend :$PORT"
+  source "$VENV/bin/activate"
+  set -a; source "$DIR/.env"; set +a
 
-  source "$VENV_DIR/bin/activate"
+  [ -n "$MODEL" ] && export DEFAULT_LLM_MODEL="$MODEL" && ok "Model: $MODEL"
 
-  # Export env vars
-  set -a
-  source "$PROJECT_DIR/.env"
-  set +a
+  PYTHONPATH="$DIR" uvicorn backend.main:app \
+    --host 0.0.0.0 --port $PORT --reload --log-level info \
+    > "$LOGS/backend.log" 2>&1 &
+  echo $! > "$PIDS/backend.pid"
+  ok "PID: $(cat "$PIDS/backend.pid")"
 
-  PYTHONPATH="$PROJECT_DIR" uvicorn backend.main:app \
-    --host 0.0.0.0 \
-    --port 8000 \
-    --reload \
-    --log-level info \
-    > "$LOGS_DIR/backend.log" 2>&1 &
-
-  BACKEND_PID=$!
-  echo "$BACKEND_PID" > "$PID_DIR/backend.pid"
-  step "Backend started (PID: $BACKEND_PID)"
-  step "Backend logs: $LOGS_DIR/backend.log"
-
-  # Wait for backend to be ready
-  step "Waiting for backend..."
   for i in $(seq 1 15); do
-    if curl -s http://localhost:8000/docs >/dev/null 2>&1; then
-      step "Backend is ready"
-      break
-    fi
-    if [ $i -eq 15 ]; then
-      warn "Backend may still be starting. Check logs."
-    fi
+    curl -s "http://localhost:$PORT/docs" >/dev/null 2>&1 && break
     sleep 1
   done
 fi
 
-# ============================================================================
-# 6. Launch frontend
-# ============================================================================
-if [ "$BACKEND_ONLY" = false ]; then
-  header "Starting React frontend (port 3000)"
-
-  cd "$FRONTEND_DIR"
-
-  if [ "$PRODUCTION_BUILD" = true ]; then
-    step "Building production frontend..."
-    npm run build 2>&1 | tail -5
-    step "Build complete. Serving from dist/"
-    npx vite preview --port 3000 \
-      > "$LOGS_DIR/frontend.log" 2>&1 &
+# ── 6. Launch frontend ──────────────────────────────────────────────
+if [ "$BACK_ONLY" = false ]; then
+  header "Starting frontend :$FPORT"
+  cd "$FRONT"
+  if [ "$BUILD" = true ]; then
+    npm run build 2>&1 | tail -3
+    npx vite preview --port $FPORT > "$LOGS/frontend.log" 2>&1 &
   else
-    step "Starting development server..."
-    npx vite --port 3000 \
-      > "$LOGS_DIR/frontend.log" 2>&1 &
+    npx vite --port $FPORT > "$LOGS/frontend.log" 2>&1 &
   fi
-
-  FRONTEND_PID=$!
-  echo "$FRONTEND_PID" > "$PID_DIR/frontend.pid"
-  step "Frontend started (PID: $FRONTEND_PID)"
-  step "Frontend logs: $LOGS_DIR/frontend.log"
-
-  cd "$PROJECT_DIR"
-
-  # Wait for frontend
-  for i in $(seq 1 10); do
-    if curl -s http://localhost:3000 >/dev/null 2>&1; then
-      break
-    fi
-    sleep 1
-  done
+  echo $! > "$PIDS/frontend.pid"
+  ok "PID: $(cat "$PIDS/frontend.pid")"
+  cd "$DIR"
 fi
 
-# ============================================================================
-# 7. Summary
-# ============================================================================
-header "NeuroSploit v3 is running"
-
+# ── 7. Summary ──────────────────────────────────────────────────────
 echo ""
-if [ "$FRONTEND_ONLY" = false ]; then
-  echo -e "  ${GREEN}Backend API:${NC}    http://localhost:8000"
-  echo -e "  ${GREEN}API Docs:${NC}       http://localhost:8000/docs"
-  echo -e "  ${GREEN}Scheduler API:${NC}  http://localhost:8000/api/v1/scheduler/"
-  echo -e "  ${GREEN}VulnLab API:${NC}    http://localhost:8000/api/v1/vuln-lab/"
-fi
-if [ "$BACKEND_ONLY" = false ]; then
-  echo -e "  ${GREEN}Frontend UI:${NC}    http://localhost:3000"
-fi
+echo -e "${C}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${N}"
+echo -e "${G}  NeuroSploit v3 — Agent-First AI Pentest Platform${N}"
+echo -e "${C}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${N}"
 echo ""
-echo -e "  ${BLUE}Logs:${NC}"
-[ "$FRONTEND_ONLY" = false ] && echo -e "    Backend:  tail -f $LOGS_DIR/backend.log"
-[ "$BACKEND_ONLY" = false ]  && echo -e "    Frontend: tail -f $LOGS_DIR/frontend.log"
+[ "$FRONT_ONLY" = false ] && {
+  echo -e "  ${G}API${N}        http://localhost:$PORT"
+  echo -e "  ${G}Docs${N}       http://localhost:$PORT/docs"
+  echo -e "  ${G}Model${N}      ${MODEL:-claude-sonnet-4-6-20250918}"
+}
+[ "$BACK_ONLY" = false ] && echo -e "  ${G}Frontend${N}   http://localhost:$FPORT"
 echo ""
-echo -e "  ${YELLOW}Stop:${NC}  $0 (re-run kills previous)"
-echo -e "         kill \$(cat $PID_DIR/backend.pid) \$(cat $PID_DIR/frontend.pid)"
+echo -e "  ${B}Architecture${N}"
+echo -e "  ├─ 108 AI agents (real HTTP testing, PLAN→EXECUTE→ANALYZE)"
+echo -e "  ├─ 100 vulnerability types + validation pipeline"
+echo -e "  ├─ Claude 4.6: Opus, Sonnet 4.6, Sonnet 4.5, Haiku 4.5"
+echo -e "  ├─ 20 LLM providers (auto-failover)"
+echo -e "  └─ Agent-first flow: Recon (20%) → Agent Grid (65%) → Report (15%)"
 echo ""
-echo -e "${CYAN}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
-echo -e "${GREEN}  NeuroSploit v3 - Autonomous AI Penetration Testing Platform${NC}"
-echo -e "${GREEN}  116 modules | 100 vuln types | 18 API routes | 18 frontend pages${NC}"
-echo -e ""
-echo -e "  ${BLUE}VulnEngine (100-Type):${NC}"
-echo -e "  - Registry:           100 vuln types, 526+ payloads, 100 testers"
-echo -e "  - AI Prompts:         100 per-vuln decision prompts + pentest playbook"
-echo -e "  - System Prompts:     12 anti-hallucination composable prompts"
-echo -e "  - Methodology:        Deep injection from .md methodology files"
-echo -e "  - Knowledge Base:     100 vuln types + RAG-indexed insights"
-echo -e ""
-echo -e "  ${BLUE}Autonomous Agent (AI-Powered Pentester):${NC}"
-echo -e "  - Auto Pentest:       3 AI-parallel streams (recon + junior + tools)"
-echo -e "  - AI Master Plan:     Pre-stream strategic planning (target profiling)"
-echo -e "  - AI Deep Test:       Iterative OBSERVE->PLAN->EXECUTE->ANALYZE->ADAPT"
-echo -e "  - AI Recon Analysis:  Endpoint prioritization, hidden surface probing"
-echo -e "  - AI Payload Gen:     Context-aware payloads per endpoint x vuln_type"
-echo -e "  - AI Tool Analysis:   Tool output analysis for real findings vs noise"
-echo -e "  - Full IA Testing:    Methodology-driven comprehensive sessions"
-echo -e "  - Multi-Session:      Up to 5 concurrent scans"
-echo -e "  - Pause/Resume/Stop:  Real-time scan control with fast cancel"
-echo -e "  - Checkpoint Manager: Crash-resilient scan state save/restore"
-echo -e "  - Recon Integration:  40+ tools (subfinder, amass, nuclei, ffuf)"
-echo -e "  - WAF Detection:      16 signatures, 12 bypass techniques"
-echo -e "  - Strategy Adapter:   Dead endpoints, diminishing returns, recompute"
-echo -e "  - Chain Engine:       10 chain rules, exploit chaining, attack graph"
-echo -e "  - Auth Manager:       Multi-user, login form detection, session mgmt"
-echo -e "  - Request Engine:     Retry, rate limit, circuit breaker, adaptive"
-echo -e "  - Request Repeater:   Burp-like send/compare/replay/validate"
-echo -e "  - Site Analyzer:      BFS crawl, JS sink detection, AI architecture"
-echo -e ""
-echo -e "  ${BLUE}Validation Pipeline (Anti-FP):${NC}"
-echo -e "  - Negative Controls:  Benign/empty/no-param baseline comparison"
-echo -e "  - Proof of Execution: 25+ per-vuln-type proof methods"
-echo -e "  - Confidence Scorer:  Numeric 0-100 with breakdown"
-echo -e "  - Validation Judge:   Sole authority (controls+proof+AI+score)"
-echo -e "  - Access Control:     Adaptive TP/FP learning, 9 patterns"
-echo -e "  - Adaptive Learner:   Cross-scan TP/FP learning (100 vuln types)"
-echo -e ""
-echo -e "  ${BLUE}AI Reasoning & Intelligence:${NC}"
-echo -e "  - ReACT Engine:       Think/plan/reflect reasoning loop"
-echo -e "  - Token Budget:       Budget tracking with graceful degradation"
-echo -e "  - Endpoint Classifier: 8 types with risk scoring"
-echo -e "  - CVE Hunter:         NVD API + GitHub exploit search"
-echo -e "  - Deep Recon:         JS crawling, sitemap, robots, API enum"
-echo -e "  - Banner Analyzer:    80 known CVEs, 19 EOL versions"
-echo -e "  - Param Analyzer:     8 semantic categories, risk ranking"
-echo -e ""
-echo -e "  ${BLUE}Testing & Exploitation:${NC}"
-echo -e "  - Payload Mutator:    14 mutation strategies, failure analysis"
-echo -e "  - XSS Validator:      Playwright popup/cookie/DOM/event/CSP"
-echo -e "  - XSS Context:        8 context checks (attribute, script, etc.)"
-echo -e "  - Exploit Generator:  AI-enhanced PoC, zero-day hypothesis"
-echo -e "  - PoC Validator:      HTTP replay, per-vuln markers, static analysis"
-echo -e "  - PoC Generator:      20+ per-type exploit code generators"
-echo -e ""
-echo -e "  ${BLUE}Multi-Agent & Orchestration:${NC}"
-echo -e "  - 5 Specialists:      Recon, Exploit, Validator, CVEHunter, Report"
-echo -e "  - Orchestrator:       3-phase pipeline coordinator with handoffs"
-echo -e "  - Researcher AI:      Hypothesis-driven 0-day discovery with Kali"
-echo -e "  - Vuln Orchestrator:  Per-vuln-type parallel agent orchestration"
-echo -e "  - Vuln Type Agents:   Specialist agents per vulnerability type"
-echo -e ""
-echo -e "  ${BLUE}CLI Agent (AI CLI inside Kali):${NC}"
-echo -e "  - 3 Providers:        Claude Code, Gemini CLI, Codex CLI"
-echo -e "  - Standalone Mode:    CLI Agent runs full pentest autonomously"
-echo -e "  - Auto Pentest Phase: Optional CLI agent phase in auto pentest"
-echo -e "  - 3-Tier Parsing:     JSON markers + regex + AI extraction"
-echo -e "  - OAuth Integration:  SmartRouter token injection into container"
-echo -e ""
-echo -e "  ${BLUE}RAG System:${NC}"
-echo -e "  - VectorStore:        BM25/TF-IDF/ChromaDB backends"
-echo -e "  - Few-Shot:           Curated TP/FP examples for 15+ vuln types"
-echo -e "  - Reasoning Templates: Structured CoT for 18 vuln types"
-echo -e "  - Reasoning Memory:   Cross-scan pseudo-fine-tuning"
-echo -e ""
-echo -e "  ${BLUE}Smart Router (20 Providers):${NC}"
-echo -e "  - 8 CLI OAuth:        Claude, Gemini, Copilot, Cursor, etc."
-echo -e "  - 11 API Providers:   Anthropic, OpenAI, Google, OpenRouter, etc."
-echo -e "  - Tier Failover:      Auto round-robin with quota tracking"
-echo -e "  - Token Refresh:      Auto CLI token re-extraction + OAuth refresh"
-echo -e ""
-echo -e "  ${BLUE}Kali Sandbox (Container-Per-Scan):${NC}"
-echo -e "  - Tool Registry:      56 tools (16 pre-installed + 40 on-demand)"
-echo -e "  - Container Pool:     Max concurrent, TTL, orphan cleanup"
-echo -e "  - VPN Support:        OpenVPN/WireGuard per-container tunnels"
-echo -e "  - Researcher AI:      AI-driven tool selection and execution"
-echo -e ""
-echo -e "  ${BLUE}Platform & Operations:${NC}"
-echo -e "  - 18 API Routes:      Agent, Scans, VulnLab, Terminal, Full IA, etc."
-echo -e "  - 18 Frontend Pages:  Auto Pentest, VulnLab, Terminal, Dashboard, etc."
-echo -e "  - Terminal Agent:     AI chat + Kali sandbox + VPN integration"
-echo -e "  - Vuln Lab:           100 types, PortSwigger/CTF/custom targets"
-echo -e "  - Knowledge Manager:  Upload/index custom security documents"
-echo -e "  - Notifications:      Discord, Telegram, WhatsApp/Twilio alerts"
-echo -e "  - Scheduler:          Cron & interval scheduling"
-echo -e "  - Benchmark:          104 CTF challenges for accuracy testing"
-echo -e "  - AI Reports:         Dual HTML+JSON with per-finding AI analysis"
-echo -e "  - MCP Server:         12 tools (screenshot, dns, port scan, etc.)"
-echo -e "  - Reset DB:           ./rebuild.sh --reset-db (schema changes)"
-echo -e "${CYAN}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
+echo -e "  ${B}Auto Pentest Flow${N}"
+echo -e "   0-20%   Recon: endpoints, tech stack, WAF, params, CVEs"
+echo -e "  20-85%   Agent Grid: 108 agents execute real HTTP tests"
+echo -e "  85-100%  Finalization: chains, screenshots, AI report"
+echo ""
+echo -e "  ${Y}Logs${N}   tail -f $LOGS/backend.log"
+echo -e "  ${Y}Stop${N}   kill \$(cat $PIDS/backend.pid $PIDS/frontend.pid 2>/dev/null)"
+echo -e "${C}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${N}"
 echo ""
 
-# Keep script running so bg processes stay alive
 wait