diff --git a/.gitignore b/.gitignore index 8440c3c..db1d09b 100644 --- a/.gitignore +++ b/.gitignore @@ -34,18 +34,12 @@ data/*.db data/*.db.* data/execution_history.json data/access_control_learning.json -data/adaptive_learning.json -data/providers.json -data/reasoning_memory.json -data/vectorstore/ -data/custom-knowledge/uploads/ data/reports/ # ============================== # Reports & Screenshots # ============================== reports/screenshots/ -reports/*.json # ============================== # Logs & PIDs @@ -84,9 +78,3 @@ docker/*.env # Results (runtime output) # ============================== results/ - -# ============================== -# Large binary files -# ============================== -projeto.zip -*.zip diff --git a/backend/api/v1/agent.py b/backend/api/v1/agent.py index a7b34fc..2405b91 100755 --- a/backend/api/v1/agent.py +++ b/backend/api/v1/agent.py @@ -18,6 +18,27 @@ from datetime import datetime from enum import Enum from urllib.parse import urlparse + +def _safe_cvss_score(val) -> float: + """Sanitize cvss_score: convert to float, default 0.0 for non-numeric.""" + if val is None: + return 0.0 + if isinstance(val, (int, float)): + return float(val) + try: + return float(val) + except (ValueError, TypeError): + return 0.0 + + +def _safe_cvss_vector(val) -> str: + """Sanitize cvss_vector: return empty string for N/A or invalid values.""" + if not val or not isinstance(val, str): + return "" + if val.strip().upper().startswith("N/A") or len(val.strip()) < 5: + return "" + return val[:100] + from backend.core.autonomous_agent import AutonomousAgent, OperationMode from backend.core.task_library import get_task_library from backend.db.database import async_session_factory @@ -123,7 +144,7 @@ class AgentRequest(BaseModel): enable_kali_sandbox: bool = Field(False, description="Enable Kali Linux sandbox for tool execution + AI researcher") custom_prompt_ids: Optional[List[str]] = Field(None, description="IDs of custom prompts to include in agent flow") preferred_provider: Optional[str] = Field(None, description="Preferred LLM provider (e.g., 'anthropic', 'gemini_cli', 'openai')") - preferred_model: Optional[str] = Field(None, description="Preferred model name (e.g., 'claude-sonnet-4-20250514', 'gemini-2.0-flash')") + preferred_model: Optional[str] = Field(None, description="Preferred model name (e.g., 'claude-sonnet-4-6-20250918', 'claude-opus-4-6-20250918', 'gemini-2.0-flash')") methodology_file: Optional[str] = Field(None, description="Path to external .md methodology file to inject into all AI calls") enable_cli_agent: bool = Field(False, description="Enable CLI Agent (AI CLI inside Kali sandbox)") cli_agent_provider: Optional[str] = Field(None, description="CLI provider: claude_code, gemini_cli, codex_cli") @@ -431,8 +452,8 @@ async def _run_agent_task( title=finding.get("title", finding.get("type", "Unknown")), vulnerability_type=finding.get("vulnerability_type", finding.get("type", "unknown")), severity=severity, - cvss_score=finding.get("cvss_score"), - cvss_vector=finding.get("cvss_vector"), + cvss_score=_safe_cvss_score(finding.get("cvss_score")), + cvss_vector=_safe_cvss_vector(finding.get("cvss_vector")), cwe_id=finding.get("cwe_id"), description=finding.get("description") or finding.get("evidence") or "", affected_endpoint=finding.get("affected_endpoint", finding.get("endpoint", finding.get("url", target))), @@ -463,8 +484,8 @@ async def _run_agent_task( title=finding.get("title", finding.get("type", "Unknown")), vulnerability_type=finding.get("vulnerability_type", finding.get("type", "unknown")), severity=finding.get("severity", "medium").lower(), - cvss_score=finding.get("cvss_score"), - cvss_vector=finding.get("cvss_vector"), + cvss_score=_safe_cvss_score(finding.get("cvss_score")), + cvss_vector=_safe_cvss_vector(finding.get("cvss_vector")), cwe_id=finding.get("cwe_id"), description=finding.get("description") or finding.get("evidence") or "", affected_endpoint=finding.get("affected_endpoint", finding.get("endpoint", finding.get("url", target))), @@ -916,8 +937,8 @@ async def stop_agent(agent_id: str): title=finding.get("title", finding.get("type", "Unknown")), vulnerability_type=finding.get("vulnerability_type", finding.get("type", "unknown")), severity=severity, - cvss_score=finding.get("cvss_score"), - cvss_vector=finding.get("cvss_vector"), + cvss_score=_safe_cvss_score(finding.get("cvss_score")), + cvss_vector=_safe_cvss_vector(finding.get("cvss_vector")), cwe_id=finding.get("cwe_id"), description=finding.get("description") or finding.get("evidence") or "", affected_endpoint=finding.get("affected_endpoint", finding.get("endpoint", finding.get("url", target))), @@ -949,8 +970,8 @@ async def stop_agent(agent_id: str): title=finding.get("title", finding.get("type", "Unknown")), vulnerability_type=finding.get("vulnerability_type", finding.get("type", "unknown")), severity=finding.get("severity", "medium").lower(), - cvss_score=finding.get("cvss_score"), - cvss_vector=finding.get("cvss_vector"), + cvss_score=_safe_cvss_score(finding.get("cvss_score")), + cvss_vector=_safe_cvss_vector(finding.get("cvss_vector")), cwe_id=finding.get("cwe_id"), description=finding.get("description") or finding.get("evidence") or "", affected_endpoint=finding.get("affected_endpoint", finding.get("endpoint", finding.get("url", target))), @@ -2493,8 +2514,8 @@ async def _save_realtime_findings_to_db(session_id: str, session: Dict): title=title, vulnerability_type=finding.get("vulnerability_type", "unknown"), severity=severity, - cvss_score=finding.get("cvss_score"), - cvss_vector=finding.get("cvss_vector"), + cvss_score=_safe_cvss_score(finding.get("cvss_score")), + cvss_vector=_safe_cvss_vector(finding.get("cvss_vector")), cwe_id=finding.get("cwe_id"), description=finding.get("description") or finding.get("evidence") or "", affected_endpoint=finding.get("affected_endpoint", target), diff --git a/backend/api/v1/providers.py b/backend/api/v1/providers.py index 995a303..bbf8125 100644 --- a/backend/api/v1/providers.py +++ b/backend/api/v1/providers.py @@ -155,6 +155,7 @@ async def test_connection(provider_id: str, account_id: str): PROVIDER_MODELS = { "claude_code": [ "claude-opus-4-6-20250918", + "claude-sonnet-4-6-20250918", "claude-sonnet-4-5-20250929", "claude-haiku-4-5-20251001", "claude-sonnet-4-20250514", @@ -163,6 +164,7 @@ PROVIDER_MODELS = { ], "kiro": [ "claude-opus-4-6-20250918", + "claude-sonnet-4-6-20250918", "claude-sonnet-4-5-20250929", "claude-haiku-4-5-20251001", "claude-sonnet-4-20250514", @@ -171,6 +173,7 @@ PROVIDER_MODELS = { ], "anthropic": [ "claude-opus-4-6-20250918", + "claude-sonnet-4-6-20250918", "claude-sonnet-4-5-20250929", "claude-haiku-4-5-20251001", "claude-sonnet-4-20250514", @@ -214,17 +217,18 @@ PROVIDER_MODELS = { "cursor-fast", "cursor-small", "gpt-4o", + "claude-sonnet-4-6-20250918", "claude-sonnet-4-5-20250929", - "claude-3-5-sonnet-20241022", ], "copilot": [ "gpt-4o", "gpt-4o-mini", + "claude-sonnet-4-6-20250918", "claude-sonnet-4-5-20250929", - "claude-3-5-sonnet-20241022", ], "openrouter": [ "anthropic/claude-opus-4-6", + "anthropic/claude-sonnet-4-6", "anthropic/claude-sonnet-4-5", "anthropic/claude-haiku-4-5", "anthropic/claude-sonnet-4", diff --git a/backend/api/v1/settings.py b/backend/api/v1/settings.py index d21d876..fff71d2 100755 --- a/backend/api/v1/settings.py +++ b/backend/api/v1/settings.py @@ -533,9 +533,12 @@ MODEL_CACHE_TTL = 60 # seconds # Common cloud models for dropdown suggestions CLOUD_MODELS = { "claude": [ + {"model_id": "claude-opus-4-6-20250918", "display_name": "Claude Opus 4.6", "context_length": 1000000}, + {"model_id": "claude-sonnet-4-6-20250918", "display_name": "Claude Sonnet 4.6", "context_length": 1000000}, + {"model_id": "claude-sonnet-4-5-20250929", "display_name": "Claude Sonnet 4.5", "context_length": 200000}, + {"model_id": "claude-haiku-4-5-20251001", "display_name": "Claude Haiku 4.5", "context_length": 200000}, {"model_id": "claude-sonnet-4-20250514", "display_name": "Claude Sonnet 4", "context_length": 200000}, {"model_id": "claude-opus-4-20250514", "display_name": "Claude Opus 4", "context_length": 200000}, - {"model_id": "claude-haiku-4-20250514", "display_name": "Claude Haiku 4", "context_length": 200000}, ], "openai": [ {"model_id": "gpt-4o", "display_name": "GPT-4o", "context_length": 128000}, diff --git a/backend/core/autonomous_agent.py b/backend/core/autonomous_agent.py index 51a121d..69429d7 100755 --- a/backend/core/autonomous_agent.py +++ b/backend/core/autonomous_agent.py @@ -3921,6 +3921,7 @@ NOT_VULNERABLE: """ ) # Phase 5.5: MD-based agent orchestrator (always available) + # Agents execute REAL HTTP requests via the shared aiohttp session if HAS_MD_AGENTS: self._md_orchestrator = MdAgentOrchestrator( llm=self.llm, @@ -3929,6 +3930,9 @@ NOT_VULNERABLE: """ validation_judge=self.validation_judge, log_callback=self.log, progress_callback=self.progress_callback, + http_session=self.session, + auth_headers=dict(self.auth_headers), + cancel_fn=self.is_cancelled, ) # Researcher AI: 0-day discovery with Kali sandbox (opt-in) @@ -4630,55 +4634,24 @@ NOT_VULNERABLE: """ await self.log("warning", f" Sandbox scan error: {e}") async def _run_auto_pentest(self) -> Dict: - """Parallel auto pentest: 3 concurrent streams + deep analysis + report. + """Agent-first auto pentest: Recon → 108 AI agents with real HTTP → Report. Architecture: - Stream 1 (Recon) ──→ asyncio.Queue ──→ Stream 2 (Junior Pentester) - Stream 3 (Tool Runner) runs sandbox tools + AI-decided tools - All streams feed findings in real-time via callbacks. - - After parallel phase completes: - Deep Analysis: AI attack surface analysis + comprehensive 100-type testing - Finalization: Screenshots + AI enhancement + report generation + Phase 1 (0-20%): Quick recon — discover endpoints, tech, params, WAF + Phase 2 (20-85%): Agent Grid — 108 agents execute real HTTP tests + Phase 3 (85-100%): Finalization — screenshots, enhancement, report """ await self._update_progress(0, "Auto pentest starting") await self.log("info", "=" * 60) - await self.log("info", " PARALLEL AUTO PENTEST MODE") - await self.log("info", " 3 concurrent streams | AI-powered | 100 vuln types") + await self.log("info", " AGENT-FIRST AUTO PENTEST (108 AGENTS)") + await self.log("info", " Recon → Agent Grid (real HTTP) → Report | Claude 4.6") await self.log("info", "=" * 60) # Override custom_prompt with DEFAULT_ASSESSMENT_PROMPT for auto mode if not self.custom_prompt: self.custom_prompt = DEFAULT_ASSESSMENT_PROMPT - # Phase 5: Multi-agent orchestrator (if enabled, replaces 3-stream) - if self._orchestrator: - await self.log("info", " [MULTI-AGENT] Orchestrator enabled — delegating to specialist agents") - orch_result = await self._orchestrator.run( - target=self.target, - recon_data=self.recon, - initial_context={ - "headers": dict(self.auth_headers), - "technologies": self.recon.technologies, - } - ) - # Merge orchestrator findings into agent findings - for f in orch_result.get("findings", []): - if isinstance(f, Finding): - await self._add_finding(f) - await self.log("info", f" [MULTI-AGENT] Pipeline complete: " - f"{orch_result.get('findings_count', 0)} findings") - # Continue to finalization phase below - report = await self._generate_full_report() - await self._update_progress(100, "Multi-agent pentest complete") - if hasattr(self, 'execution_history') and self.execution_history: - self.execution_history.flush() - await self.log("info", "=" * 60) - await self.log("info", f" AUTO PENTEST COMPLETE: {len(self.findings)} findings") - await self.log("info", "=" * 60) - return report - - # Shared state for parallel streams + # Shared state (needed by some helper methods) self._endpoint_queue = asyncio.Queue() self._recon_complete = asyncio.Event() self._tools_complete = asyncio.Event() @@ -4686,133 +4659,49 @@ NOT_VULNERABLE: """ self._junior_tested_types: set = set() self._playbook_recommended_types: List[str] = [] self._current_playbook_context: str = "" - - # ── PRE-STREAM AI MASTER PLAN ── - # Before launching parallel streams, ask AI for a strategic master plan - # that provides context and direction for all 3 streams. self._master_plan: Dict = {} - if self.llm.is_available(): - try: - await self.log("info", "[MASTER PLAN] AI strategic planning before streams") - master_plan = await self._ai_master_plan() - if master_plan: - self._master_plan = master_plan - profile = master_plan.get("target_profile", "") - risk = master_plan.get("risk_assessment", "") - priority_types = master_plan.get("priority_vuln_types", []) - if profile: - await self.log("info", f" [MASTER PLAN] Profile: {profile[:120]}") - if risk: - await self.log("info", f" [MASTER PLAN] Risk: {risk[:120]}") - if priority_types: - await self.log("info", f" [MASTER PLAN] Priority: {', '.join(priority_types[:8])}") - except Exception as e: - await self.log("debug", f" [MASTER PLAN] Planning error: {e}") - # ── CONCURRENT PHASE (0-50%): 3 parallel streams ── - await asyncio.gather( - self._stream_recon(), # Stream 1: Recon pipeline - self._stream_junior_pentest(), # Stream 2: Immediate AI testing - self._stream_tool_runner(), # Stream 3: Dynamic tool execution - ) + # ══════════════════════════════════════════════════════════════ + # PHASE 1 (0-20%): RECONNAISSANCE + # Discover attack surface before dispatching agents + # ══════════════════════════════════════════════════════════════ + await self.log("info", "[RECON] Mapping attack surface...") + await self._update_progress(2, "Recon: mapping attack surface") - parallel_findings = len(self.findings) - await self.log("info", f" Parallel phase complete: {parallel_findings} findings, " - f"{len(self._junior_tested_types)} types pre-tested") - await self._update_progress(50, "Parallel streams complete") + # Run recon stream (endpoint discovery, tech detection, site analysis) + self._recon_complete.clear() + self._tools_complete.set() # No tool stream in agent-first mode + await self._stream_recon() - # ── REASONING CHECKPOINT at 30-50% ── - if self.reasoning_engine and self.llm.is_available(): - try: - plan = await self.reasoning_engine.plan_attack( - recon_summary=f"{len(self.recon.endpoints)} endpoints, " - f"{len(self.recon.technologies)} techs", - findings_so_far=self.findings, - tested_types=self._junior_tested_types, - progress_pct=0.50, + ep_count = len(self.recon.endpoints) + param_count = len(self.recon.parameters) if isinstance(self.recon.parameters, dict) else 0 + tech_count = len(self.recon.technologies) + form_count = len(self.recon.forms) if hasattr(self.recon, 'forms') else 0 + js_count = len(self.recon.js_files) if hasattr(self.recon, 'js_files') else 0 + sink_count = len(self.recon.js_sinks) if hasattr(self.recon, 'js_sinks') else 0 + api_count = len(self.recon.api_endpoints) if hasattr(self.recon, 'api_endpoints') else 0 + + await self.log("info", + f"[RECON] Complete: {ep_count} endpoints, {param_count} params, " + f"{tech_count} techs, {form_count} forms, {js_count} JS files, " + f"{sink_count} sinks, {api_count} API endpoints") + await self._update_progress(15, "Recon complete") + + # WAF info for agents + waf_name = "" + if hasattr(self, '_waf_result') and self._waf_result: + if hasattr(self._waf_result, 'detected_wafs') and self._waf_result.detected_wafs: + waf_name = ", ".join( + f"{w.name} ({w.confidence:.0%})" for w in self._waf_result.detected_wafs ) - if plan and plan.priority_vulns: - await self.log("info", f" [REASONING] Attack plan: " - f"focus on {', '.join(plan.priority_vulns[:5])}") - # Feed reasoning priorities into the remaining test plan - for vtype in plan.priority_vulns: - if vtype not in self._junior_tested_types: - self._junior_tested_types.discard(vtype) # ensure retested - except Exception as e: - await self.log("debug", f" [REASONING] Plan error: {e}") + elif isinstance(self._waf_result, dict): + waf_name = self._waf_result.get("waf_name", "") + if waf_name: + await self.log("warning", f"[WAF] Detected: {waf_name} — agents will adapt payloads") - # ── STRATEGY CHECKPOINT at 50% ── - if self.strategy: - try: - strat_update = await self.strategy.checkpoint_refine( - progress_pct=0.50, - findings=self.findings, - tested_types=self._junior_tested_types, - all_endpoints=[ep for ep in self.recon.endpoints], - llm=self.llm if self.llm.is_available() else None, - budget=self.token_budget, - ) - if strat_update.get("message"): - await self.log("info", f" [STRATEGY] {strat_update['message']}") - except Exception as e: - await self.log("debug", f" [STRATEGY] Checkpoint error: {e}") - - # ── DEEP ANALYSIS PHASE (50-75%): Full testing with complete context ── - await self.log("info", "[DEEP] AI Attack Surface Analysis + Comprehensive Testing") - attack_plan = await self._ai_analyze_attack_surface() - - # Merge AI-recommended types with default plan + playbook recommendations - default_plan = self._default_attack_plan() - ai_types = attack_plan.get("priority_vulns", []) - playbook_types = self._playbook_recommended_types[:15] if self._playbook_recommended_types else [] - all_types = default_plan["priority_vulns"] - merged_types = list(dict.fromkeys(ai_types + playbook_types + all_types)) - - # Remove types already tested by junior pentest stream - remaining = [t for t in merged_types if t not in self._junior_tested_types] - attack_plan["priority_vulns"] = remaining - await self.log("info", f" {len(remaining)} remaining types " - f"({len(self._junior_tested_types)} already tested by junior)") - await self._update_progress(55, "Deep: attack surface analyzed") - - await self.log("info", "[DEEP] Comprehensive Vulnerability Testing") - await self._test_all_vulnerabilities(attack_plan) - await self._update_progress(75, "Deep testing complete") - - # ── REASONING CHECKPOINT at 75% ── - if self.reasoning_engine and self.llm.is_available(): - try: - plan = await self.reasoning_engine.plan_attack( - recon_summary=f"{len(self.recon.endpoints)} endpoints, " - f"{len(self.recon.technologies)} techs", - findings_so_far=self.findings, - tested_types=self._junior_tested_types, - progress_pct=0.75, - ) - if plan and plan.priority_vulns: - await self.log("info", f" [REASONING] 75% plan: " - f"focus on {', '.join(plan.priority_vulns[:5])}") - # Reflect on what worked so far - try: - reflection = await self.reasoning_engine.reflect( - action_taken="deep_testing_phase", - result_observed={ - "findings_count": len(self.findings), - "tested_types": len(self._junior_tested_types), - "endpoints": len(self.recon.endpoints), - } - ) - if reflection and reflection.next_suggestion: - await self.log("info", f" [REASONING] Reflection: {reflection.next_suggestion}") - except Exception: - pass - except Exception as e: - await self.log("debug", f" [REASONING] 75% plan error: {e}") - - # ── CVE HUNTING (if we found versions during recon) ── + # CVE hunting (quick, parallel with next phase) if self.cve_hunter and self.recon.technologies: try: - await self.log("info", "[CVE] Searching for known CVEs based on detected versions") cve_findings = await self.cve_hunter.hunt( headers=dict(self.auth_headers), body="", @@ -4824,7 +4713,77 @@ NOT_VULNERABLE: """ except Exception as e: await self.log("debug", f" [CVE] Hunt error: {e}") - # ── AI CHAIN DISCOVERY ── + await self._update_progress(20, "Recon + CVE complete, launching agents") + + # ══════════════════════════════════════════════════════════════ + # PHASE 2 (20-85%): AGENT GRID — 108 SPECIALISTS WITH REAL HTTP + # Each agent: LLM plans attacks → executes HTTP → LLM analyzes + # ══════════════════════════════════════════════════════════════ + if self._md_orchestrator and not self.is_cancelled(): + try: + n_available = len(self._md_orchestrator.library.agents) + await self.log("info", "=" * 60) + await self.log("info", f" [AGENT GRID] Dispatching {n_available} specialist agents") + await self.log("info", f" Each agent: PLAN (LLM) → EXECUTE (HTTP) → ANALYZE (LLM)") + await self.log("info", "=" * 60) + + md_result = await self._md_orchestrator.run( + target=self.target, + recon_data=self.recon, + existing_findings=self.findings, + selected_agents=self.selected_md_agents, + headers=dict(self.auth_headers), + waf_info=waf_name, + ) + + # Merge agent findings into main findings via validation pipeline + md_findings_raw = md_result.get("findings", []) + md_confirmed = 0 + for mf in md_findings_raw: + if self.is_cancelled(): + break + if not isinstance(mf, dict): + continue + try: + finding = Finding( + id=str(hashlib.md5( + f"{mf.get('title', '')}{mf.get('affected_endpoint', '')}".encode() + ).hexdigest())[:12], + title=mf.get("title", "Agent Finding"), + severity=mf.get("severity", "medium"), + vulnerability_type=mf.get("vulnerability_type", "unknown"), + cvss_score=float(mf.get("cvss_score", 0.0)) if isinstance(mf.get("cvss_score"), (int, float)) else 0.0, + cwe_id=mf.get("cwe_id", ""), + description=mf.get("description", ""), + affected_endpoint=mf.get("affected_endpoint", self.target), + evidence=mf.get("evidence", ""), + poc_code=mf.get("poc_code", ""), + impact=mf.get("impact", ""), + remediation=mf.get("remediation", ""), + confidence_score={"high": 80, "medium": 50, "low": 25}.get(mf.get("confidence", "medium"), 50), + confidence=mf.get("confidence", "medium"), + ai_verified=mf.get("confidence") == "high", + ai_status="confirmed" if mf.get("confidence") == "high" else "pending", + ) + await self._add_finding(finding) + md_confirmed += 1 + except Exception as e: + await self.log("debug", f" [AGENT GRID] Finding merge error: {e}") + + agents_run = md_result.get("agents_run", 0) + duration = md_result.get("duration", 0) + await self.log("info", + f"[AGENT GRID] Complete: {agents_run} agents, " + f"{len(md_findings_raw)} raw findings, " + f"{md_confirmed} validated, {duration}s") + except Exception as e: + await self.log("warning", f"[AGENT GRID] Dispatch error: {e}") + else: + await self.log("warning", "[AGENT GRID] MD agent system not available") + + await self._update_progress(80, "Agent grid complete") + + # ── AI CHAIN DISCOVERY (post-agents, if we have findings) ── if self.chain_engine and len(self.findings) >= 2 and self.llm.is_available(): try: chains = await self.chain_engine.ai_discover_chains( @@ -4838,66 +4797,7 @@ NOT_VULNERABLE: """ except Exception as e: await self.log("debug", f" [CHAIN] AI discovery error: {e}") - # ── MD-BASED AGENT DISPATCH (post-recon specialist agents) ── - if self._md_orchestrator and not self.is_cancelled(): - try: - await self.log("info", "[MD-AGENTS] Dispatching specialist .md agents with recon context") - md_result = await self._md_orchestrator.run( - target=self.target, - recon_data=self.recon, - existing_findings=self.findings, - selected_agents=self.selected_md_agents, - headers=dict(self.auth_headers), - waf_info=( - self._waf_result.get("waf_name", "") - if self._waf_result else "" - ), - ) - - # Merge MD agent findings into main findings via validation - md_findings_raw = md_result.get("findings", []) - md_confirmed = 0 - for mf in md_findings_raw: - if self.is_cancelled(): - break - if not isinstance(mf, dict): - continue - try: - finding = Finding( - id=str(hashlib.md5( - f"{mf.get('title', '')}{mf.get('affected_endpoint', '')}".encode() - ).hexdigest())[:12], - title=mf.get("title", "MD Agent Finding"), - severity=mf.get("severity", "medium"), - vulnerability_type=mf.get("vulnerability_type", "unknown"), - cvss_score=mf.get("cvss_score", 0.0), - cwe_id=mf.get("cwe_id", ""), - description=mf.get("description", ""), - affected_endpoint=mf.get("affected_endpoint", self.target), - evidence=mf.get("evidence", ""), - poc_code=mf.get("poc_code", ""), - impact=mf.get("impact", ""), - remediation=mf.get("remediation", ""), - confidence_score=50, - confidence="medium", - ai_verified=False, - ai_status="pending", - ) - # Flow through validation pipeline - await self._add_finding(finding) - md_confirmed += 1 - except Exception as e: - await self.log("debug", f" [MD-AGENTS] Finding merge error: {e}") - - agent_summary = md_result.get("agent_results", {}) - agents_run = md_result.get("agents_run", 0) - await self.log("info", - f"[MD-AGENTS] Complete: {agents_run} agents, " - f"{len(md_findings_raw)} raw findings, " - f"{md_confirmed} submitted to validation, " - f"{md_result.get('duration', 0)}s") - except Exception as e: - await self.log("warning", f"[MD-AGENTS] Dispatch error: {e}") + await self._update_progress(85, "Chain analysis complete") # ── RESEARCHER AI (0-day discovery with Kali sandbox) ── if self._researcher and not self.is_cancelled(): @@ -6043,11 +5943,28 @@ NOT_VULNERABLE: """ prompt, system=self._get_enhanced_system_prompt("strategy") ) - start = resp_text.index('{') - end = resp_text.rindex('}') + 1 - return json.loads(resp_text[start:end]) + if not resp_text or len(resp_text.strip()) < 20: + await self.log("debug", " [AI RECON] Empty or too short response from LLM") + return {} + + # Try to find JSON in response + json_match = re.search(r'```(?:json)?\s*(\{[\s\S]*?\})\s*```', resp_text) + if json_match: + return json.loads(json_match.group(1)) + + # Try bare JSON + start = resp_text.find('{') + end = resp_text.rfind('}') + if start >= 0 and end > start: + return json.loads(resp_text[start:end + 1]) + + await self.log("debug", " [AI RECON] No JSON found in LLM response") + return {} + except json.JSONDecodeError as e: + await self.log("debug", f" [AI RECON] JSON parse error: {e}") + return {} except Exception as e: - await self.log("debug", f" [AI RECON] Parse error: {e}") + await self.log("debug", f" [AI RECON] Analysis error: {e}") return {} # ── Stream 2: Junior Pentester ── diff --git a/backend/core/md_agent.py b/backend/core/md_agent.py index 3cabb28..42c00b0 100644 --- a/backend/core/md_agent.py +++ b/backend/core/md_agent.py @@ -1,18 +1,19 @@ """ -NeuroSploit v3 - Markdown-Based Agent System +NeuroSploit v3 - Markdown-Based Agent System (Real Execution) -Each .md file in prompts/md_library/ acts as a self-contained agent definition -with its own methodology, system prompt, and output format. +Each .md file in prompts/agents/ acts as a self-contained agent definition. +Agents EXECUTE REAL HTTP TESTS against the target — not theoretical analysis. -After recon completes, the MdAgentOrchestrator dispatches each selected agent -against the target URL with full recon context. Findings flow through the -normal validation pipeline. +Cycle per agent: + 1. PLAN — LLM reads methodology + recon context → generates test plan (HTTP requests) + 2. EXECUTE — sends actual HTTP requests against the target + 3. ANALYZE — LLM reviews real responses → confirms/rejects with evidence Components: - MdAgentDefinition: parsed .md agent metadata - - MdAgent(SpecialistAgent): executes a single .md agent via LLM + - MdAgent(SpecialistAgent): plans, executes, and analyzes real tests - MdAgentLibrary: loads & indexes all .md agent definitions - - MdAgentOrchestrator: runs selected agents post-recon + - MdAgentOrchestrator: runs agents in phases (recon → offensive → generalist) """ import asyncio @@ -20,20 +21,25 @@ import json import logging import re import time -import uuid from dataclasses import dataclass, field from pathlib import Path from typing import Any, Callable, Dict, List, Optional +from urllib.parse import urljoin, urlparse -from core.agent_base import SpecialistAgent, AgentResult +try: + import aiohttp + HAS_AIOHTTP = True +except ImportError: + HAS_AIOHTTP = False + +try: + from backend.core.agent_base import SpecialistAgent, AgentResult +except ImportError: + from core.agent_base import SpecialistAgent, AgentResult logger = logging.getLogger(__name__) # ─── Agent categories ─────────────────────────────────────────────── -# Only 'offensive' agents are dispatched during auto-pentest by default. -# Others are available on explicit selection. - -# General-purpose agents (from md_library) AGENT_CATEGORIES: Dict[str, str] = { "pentest_generalist": "generalist", "red_team_agent": "generalist", @@ -42,16 +48,19 @@ AGENT_CATEGORIES: Dict[str, str] = { "exploit_expert": "generalist", "cwe_expert": "generalist", "replay_attack_specialist": "generalist", + "recon_deep": "recon", "Pentestfull": "methodology", } -# All vuln-type agents default to "offensive" (handled in _load_all fallback) -# Agents that should NOT run as standalone agents (methodology files, dupes) SKIP_AGENTS = {"Pentestfull"} +RUN_ALL_BY_DEFAULT = True -# Default agents to run when none are explicitly selected: -# Run ALL vuln-type (offensive) agents — the system is designed for 100-agent dispatch -DEFAULT_OFFENSIVE_AGENTS: List[str] = [] # Empty = use all offensive agents +# Max tests per agent to execute +MAX_TESTS_PER_AGENT = 5 +# Max iterations of the plan→execute→analyze loop +MAX_ITERATIONS = 2 +# HTTP request timeout per test +REQUEST_TIMEOUT = 10 # ─── Data classes ──────────────────────────────────────────────────── @@ -59,22 +68,24 @@ DEFAULT_OFFENSIVE_AGENTS: List[str] = [] # Empty = use all offensive agents @dataclass class MdAgentDefinition: """Parsed .md agent definition.""" - name: str # filename stem (e.g. "owasp_expert") - display_name: str # human-readable (e.g. "OWASP Expert") - category: str # offensive / analysis / defensive / methodology - user_prompt_template: str # raw user prompt with {placeholders} - system_prompt: str # system prompt - file_path: str # absolute path to .md file - placeholders: List[str] = field(default_factory=list) # detected {vars} + name: str + display_name: str + category: str # offensive / generalist / recon / methodology + user_prompt_template: str + system_prompt: str + file_path: str + placeholders: List[str] = field(default_factory=list) -# ─── MdAgent: executes one .md agent via LLM ──────────────────────── +# ─── MdAgent: plans, executes, and analyzes real tests ─────────────── class MdAgent(SpecialistAgent): - """Executes a single .md-based agent against a target URL. + """Executes a single .md-based agent with REAL HTTP testing. - The agent fills the .md template with recon context, sends to the LLM, - then parses structured findings from the response. + Cycle: + 1. PLAN — sends methodology + recon to LLM → gets structured test plan + 2. EXECUTE — runs actual HTTP requests against the target + 3. ANALYZE — LLM reviews real responses, confirms findings with evidence """ def __init__( @@ -85,6 +96,9 @@ class MdAgent(SpecialistAgent): budget_allocation: float = 0.0, budget=None, validation_judge=None, + http_session=None, + auth_headers: Optional[Dict] = None, + cancel_fn: Optional[Callable] = None, ): super().__init__( name=f"md_{definition.name}", @@ -95,9 +109,12 @@ class MdAgent(SpecialistAgent): ) self.definition = definition self.validation_judge = validation_judge + self.http_session = http_session + self.auth_headers = auth_headers or {} + self.cancel_fn = cancel_fn or (lambda: False) async def run(self, context: Dict) -> AgentResult: - """Execute the .md agent against the target with recon context.""" + """Execute the full PLAN → EXECUTE → ANALYZE cycle.""" result = AgentResult(agent_name=self.name) target = context.get("target", "") @@ -105,41 +122,511 @@ class MdAgent(SpecialistAgent): result.error = "No target provided" return result - # Build prompts - user_prompt = self._build_user_prompt(context) - system_prompt = self.definition.system_prompt + # Check LLM availability upfront + if not self.llm: + result.error = "No LLM provided" + logger.warning(f"[{self.definition.name}] No LLM available — skipping") + return result - # LLM call - try: - response = await self._llm_call( - f"{system_prompt}\n\n{user_prompt}", - category="md_agent", - estimated_tokens=2000, + if not hasattr(self.llm, 'generate'): + result.error = f"LLM has no generate method (type: {type(self.llm).__name__})" + logger.warning(f"[{self.definition.name}] {result.error}") + return result + + all_findings = [] + + for iteration in range(1, MAX_ITERATIONS + 1): + if self.cancel_fn(): + break + + # ── PHASE 1: PLAN ── + plan_prompt = self._build_plan_prompt(context, iteration, all_findings) + plan_response = await self._llm_with_retry(plan_prompt) + + if not plan_response: + result.error = "LLM plan call failed after retries" + break + + tests = self._parse_test_plan(plan_response, target) + if not tests: + # No actionable tests — fall back to theoretical analysis + theoretical = self._parse_findings(plan_response, target) + all_findings.extend(theoretical) + break + + # ── PHASE 2: EXECUTE ── + test_results = await self._execute_tests(tests, target) + if not test_results: + break + + # ── PHASE 3: ANALYZE ── + analysis_prompt = self._build_analysis_prompt( + context, test_results, target ) - except Exception as e: - result.error = f"LLM call failed: {e}" - return result + analysis_response = await self._llm_with_retry(analysis_prompt) + if not analysis_response: + break - if not response: - result.error = "Empty LLM response" - return result + if analysis_response: + confirmed = self._parse_analysis_findings( + analysis_response, test_results, target + ) + all_findings.extend(confirmed) - # Parse findings from structured response - parsed = self._parse_findings(response, target) - result.findings = parsed + # If we found confirmed vulns, no need for another iteration + if confirmed: + break + + result.findings = all_findings result.data = { "agent_name": self.definition.display_name, "agent_category": self.definition.category, - "findings_count": len(parsed), - "raw_response_length": len(response), + "findings_count": len(all_findings), + "execution_mode": "real_http", } self.tasks_completed += 1 - return result - # ── Prompt building ────────────────────────────────────────────── + # ── LLM call with retry ───────────────────────────────────────── - def _build_user_prompt(self, context: Dict) -> str: + async def _llm_with_retry(self, prompt: str, max_retries: int = 3) -> Optional[str]: + """Call LLM with exponential backoff retry.""" + last_error = "" + for attempt in range(max_retries): + try: + result = await self.llm.generate(prompt) + if result and len(result.strip()) > 10: + return result + last_error = f"Empty/short response (len={len(result) if result else 0})" + logger.debug(f"[{self.definition.name}] {last_error}, attempt {attempt + 1}") + except Exception as e: + last_error = str(e)[:200] + logger.warning(f"[{self.definition.name}] LLM error (attempt {attempt + 1}/{max_retries}): {last_error}") + + if attempt < max_retries - 1: + delay = 5 * (attempt + 1) # 5s, 10s + await asyncio.sleep(delay) + + logger.warning(f"[{self.definition.name}] All {max_retries} attempts failed: {last_error}") + return None + + # ── PLAN prompt ────────────────────────────────────────────────── + + def _build_plan_prompt( + self, context: Dict, iteration: int, previous_findings: List[Dict] + ) -> str: + """Build the planning prompt: methodology + recon → structured test plan.""" + target = context.get("target", "") + endpoints = context.get("endpoints", []) + technologies = context.get("technologies", []) + parameters = context.get("parameters", {}) + waf_info = context.get("waf_info", "") + forms = context.get("forms", []) + + # Fill the .md template with recon context for methodology + methodology = self._fill_template(context) + + # Recon summary for the LLM + endpoint_list = [] + for ep in endpoints[:12]: + if isinstance(ep, dict): + url = ep.get("url", "") + method = ep.get("method", "GET") + params = ep.get("params", []) + endpoint_list.append(f" {method} {url} params={params}") + else: + endpoint_list.append(f" GET {ep}") + + # JS sinks for DOM-related agents + js_sinks = context.get("js_sinks", []) + js_sinks_str = "" + if js_sinks: + sink_list = [] + for s in js_sinks[:5]: + if hasattr(s, 'sink_type'): + sink_list.append(f" {s.sink_type}: {getattr(s, 'code_snippet', '')[:60]}") + elif isinstance(s, dict): + sink_list.append(f" {s.get('sink_type','?')}: {s.get('code_snippet','')[:60]}") + if sink_list: + js_sinks_str = f"\nJS Sinks (DOM XSS vectors):\n" + chr(10).join(sink_list) + + # API endpoints + api_eps = context.get("api_endpoints", []) + api_str = "" + if api_eps: + api_str = f"\nAPI endpoints: {', '.join(str(a) for a in api_eps[:5])}" + + # Forms + forms_str = "" + if forms: + form_list = [] + for f in (forms if isinstance(forms, list) else [])[:3]: + if isinstance(f, dict): + form_list.append(f" {f.get('method','POST')} {f.get('action','?')} inputs={f.get('inputs',[])}") + if form_list: + forms_str = f"\nForms:\n" + chr(10).join(form_list) + + recon_summary = f"""Target: {target} +Tech: {', '.join(technologies[:5]) or 'Unknown'} | WAF: {waf_info or 'None'} +Endpoints ({len(endpoints)} total, showing {len(endpoint_list)}): +{chr(10).join(endpoint_list)} +Params: {json.dumps(dict(list(parameters.items())[:8]) if isinstance(parameters, dict) else {}, default=str)}{forms_str}{js_sinks_str}{api_str}""" + + previous_str = "" + if previous_findings: + previous_str = f"\n\nPrevious iteration found {len(previous_findings)} potential issues. Adapt your tests to probe deeper or try different vectors." + + system = self.definition.system_prompt or ( + f"You are a {self.definition.display_name} security testing agent. " + f"You perform REAL penetration tests by generating HTTP requests that will be executed against the target." + ) + + prompt = f"""{system} + +## Your Methodology +{methodology} + +## Reconnaissance Data +{recon_summary} +{previous_str} + +## Your Task (Iteration {iteration}/{MAX_ITERATIONS}) + +Based on your methodology and the recon data above, generate a CONCRETE test plan. +Each test must be an HTTP request that will be ACTUALLY EXECUTED against the target. + +You MUST output a JSON block with this exact structure: + +```json +{{ + "reasoning": "Brief explanation of your attack strategy", + "tests": [ + {{ + "name": "Test name describing what you're checking", + "url": "Full URL to test (use target endpoints from recon)", + "method": "GET or POST", + "params": {{"param_name": "payload_value"}}, + "headers": {{"Header-Name": "value"}}, + "body": "POST body if needed (empty string for GET)", + "injection_point": "parameter|header|body", + "expected_if_vulnerable": "What to look for in the response if vulnerable" + }} + ] +}} +``` + +Rules: +- Generate {MAX_TESTS_PER_AGENT} specific tests maximum +- Use REAL endpoints from the recon data +- Use REAL parameters discovered +- Payloads must be safe for testing (no destructive operations) +- Each test targets a specific vulnerability pattern from your methodology +- Include the expected_if_vulnerable field so we can verify results +""" + return prompt + + # ── EXECUTE tests ──────────────────────────────────────────────── + + async def _execute_tests( + self, tests: List[Dict], default_target: str + ) -> List[Dict]: + """Execute HTTP requests from the test plan. Returns results with real responses.""" + results = [] + + # Create session if needed + own_session = False + session = self.http_session + if not session and HAS_AIOHTTP: + connector = aiohttp.TCPConnector(ssl=False) + session = aiohttp.ClientSession(connector=connector) + own_session = True + elif not session: + logger.warning(f"[{self.definition.name}] No HTTP session and aiohttp not available") + return [] + + try: + for test in tests[:MAX_TESTS_PER_AGENT]: + if self.cancel_fn(): + break + + test_url = test.get("url", default_target) + method = test.get("method", "GET").upper() + params = test.get("params", {}) + test_headers = test.get("headers", {}) + body = test.get("body", "") + test_name = test.get("name", "unnamed") + expected = test.get("expected_if_vulnerable", "") + + # Merge auth headers + req_headers = {**self.auth_headers, **test_headers} + + start = time.time() + try: + kwargs: Dict[str, Any] = { + "timeout": aiohttp.ClientTimeout(total=REQUEST_TIMEOUT), + "headers": req_headers, + "allow_redirects": False, + "ssl": False, + } + + if method == "GET": + kwargs["params"] = params + elif method == "POST": + if body: + kwargs["data"] = body + elif params: + kwargs["data"] = params + + async with session.request(method, test_url, **kwargs) as resp: + status = resp.status + resp_headers = dict(resp.headers) + resp_body = await resp.text(errors="replace") + elapsed = time.time() - start + + results.append({ + "test_name": test_name, + "url": test_url, + "method": method, + "params": params, + "payload": json.dumps(params) if params else body, + "status": status, + "response_headers": {k: v for k, v in list(resp_headers.items())[:15]}, + "body_preview": resp_body[:2000], + "body_length": len(resp_body), + "response_time": round(elapsed, 3), + "expected_if_vulnerable": expected, + }) + + except asyncio.TimeoutError: + results.append({ + "test_name": test_name, + "url": test_url, + "method": method, + "status": 0, + "body_preview": "TIMEOUT", + "body_length": 0, + "response_time": REQUEST_TIMEOUT, + "expected_if_vulnerable": expected, + }) + except Exception as e: + results.append({ + "test_name": test_name, + "url": test_url, + "method": method, + "status": 0, + "body_preview": f"ERROR: {str(e)[:200]}", + "body_length": 0, + "response_time": 0, + "expected_if_vulnerable": expected, + }) + + # Small delay between requests to avoid hammering + await asyncio.sleep(0.15) + + finally: + if own_session: + await session.close() + + return results + + # ── ANALYZE prompt ─────────────────────────────────────────────── + + def _build_analysis_prompt( + self, context: Dict, test_results: List[Dict], target: str + ) -> str: + """Build the analysis prompt: real HTTP responses → confirmed findings.""" + vuln_type = self.definition.name + + results_summary = [] + for tr in test_results[:MAX_TESTS_PER_AGENT]: + results_summary.append({ + "test_name": tr["test_name"], + "url": tr.get("url", ""), + "method": tr.get("method", ""), + "status": tr.get("status", 0), + "response_time": tr.get("response_time", 0), + "body_preview": tr.get("body_preview", "")[:1200], + "body_length": tr.get("body_length", 0), + "response_headers": tr.get("response_headers", {}), + "expected_if_vulnerable": tr.get("expected_if_vulnerable", ""), + }) + + results_json = json.dumps(results_summary, indent=2, default=str)[:8000] + + return f"""You are a {self.definition.display_name} analyzing REAL HTTP responses from penetration tests against {target}. + +## Test Results (ACTUAL HTTP responses — not simulated) +{results_json} + +## Your Task + +Analyze each test result and determine if a REAL vulnerability was found. +You are looking at ACTUAL server responses. Be rigorous: + +- A vulnerability is CONFIRMED only if the response PROVES exploitation worked +- Look for: payload reflection, error messages, data leaks, behavior changes, timing anomalies +- Compare the "expected_if_vulnerable" hint with what actually appeared in the response +- Do NOT hallucinate — if the evidence is not in the response body/headers/status, it's NOT confirmed +- Status code alone is NOT proof (many 200s are normal, many 403s are WAF blocks) + +Output a JSON block: +```json +{{ + "analysis": [ + {{ + "test_name": "Name of the test", + "is_vulnerable": true/false, + "confidence": "high|medium|low", + "evidence": "Exact text/pattern from the response that proves the vulnerability", + "title": "Short vulnerability title", + "severity": "critical|high|medium|low|info", + "explanation": "Why this is a real vulnerability (reference specific response content)" + }} + ] +}} +``` + +Only include entries where is_vulnerable is true. If no vulnerabilities found, return empty analysis array. +Be STRICT — false positives are worse than false negatives.""" + + # ── Parse test plan from LLM ───────────────────────────────────── + + def _parse_test_plan(self, response: str, target: str) -> List[Dict]: + """Extract structured test plan from LLM plan response.""" + # Find JSON block + json_match = re.search(r'```(?:json)?\s*(\{[\s\S]*?\})\s*```', response) + if not json_match: + json_match = re.search(r'(\{[\s\S]*"tests"[\s\S]*\})', response) + + if not json_match: + return [] + + try: + plan = json.loads(json_match.group(1)) + except json.JSONDecodeError: + # Try to fix common JSON issues + try: + cleaned = re.sub(r',\s*}', '}', json_match.group(1)) + cleaned = re.sub(r',\s*]', ']', cleaned) + plan = json.loads(cleaned) + except json.JSONDecodeError: + return [] + + tests = plan.get("tests", []) + if not isinstance(tests, list): + return [] + + # Validate and normalize tests + valid_tests = [] + for t in tests[:MAX_TESTS_PER_AGENT]: + if not isinstance(t, dict): + continue + url = t.get("url", "") + if not url: + continue + # Resolve relative URLs + if url.startswith("/"): + url = urljoin(target, url) + # Ensure URL is within scope (same host) + if urlparse(url).netloc and urlparse(url).netloc != urlparse(target).netloc: + continue + t["url"] = url + t["method"] = t.get("method", "GET").upper() + if t["method"] not in ("GET", "POST", "PUT", "PATCH", "DELETE", "OPTIONS", "HEAD"): + t["method"] = "GET" + valid_tests.append(t) + + return valid_tests + + # ── Parse analysis findings from LLM ───────────────────────────── + + def _parse_analysis_findings( + self, response: str, test_results: List[Dict], target: str + ) -> List[Dict]: + """Extract confirmed findings from LLM analysis of real responses.""" + json_match = re.search(r'```(?:json)?\s*(\{[\s\S]*?\})\s*```', response) + if not json_match: + json_match = re.search(r'(\{[\s\S]*"analysis"[\s\S]*\})', response) + + if not json_match: + # Fall back to parsing FINDING: blocks + return self._parse_findings(response, target) + + try: + data = json.loads(json_match.group(1)) + except json.JSONDecodeError: + return self._parse_findings(response, target) + + findings = [] + for entry in data.get("analysis", []): + if not isinstance(entry, dict): + continue + if not entry.get("is_vulnerable"): + continue + if entry.get("confidence") not in ("high", "medium"): + continue + + evidence = entry.get("evidence", "") + test_name = entry.get("test_name", "") + + # Anti-hallucination: verify evidence exists in actual response + matched_result = None + for tr in test_results: + if tr.get("test_name") == test_name: + matched_result = tr + break + + if evidence and matched_result: + body = matched_result.get("body_preview", "") + headers_str = json.dumps(matched_result.get("response_headers", {})) + combined = body + headers_str + # Check evidence is grounded in actual response + evidence_words = [w for w in evidence.lower().split() if len(w) > 3] + if evidence_words: + grounded = sum(1 for w in evidence_words if w in combined.lower()) + if grounded < len(evidence_words) * 0.3: + logger.debug( + f"[{self.definition.name}] REJECTED: evidence not grounded " + f"for {test_name}" + ) + continue + + vuln_type = self.definition.name + + findings.append({ + "title": entry.get("title", f"{self.definition.display_name} Finding"), + "severity": entry.get("severity", "medium"), + "vulnerability_type": vuln_type, + "cvss_score": 0.0, + "cwe_id": "", + "description": entry.get("explanation", ""), + "affected_endpoint": matched_result.get("url", target) if matched_result else target, + "evidence": evidence, + "poc_code": ( + f"# Request:\n{matched_result.get('method', 'GET')} " + f"{matched_result.get('url', target)}\n" + f"# Params: {json.dumps(matched_result.get('params', {}), default=str)}\n" + f"# Response Status: {matched_result.get('status', '?')}\n" + f"# Response Body (excerpt):\n{matched_result.get('body_preview', '')[:500]}" + ) if matched_result else "", + "impact": entry.get("explanation", ""), + "remediation": "", + "source_agent": self.definition.display_name, + "parameter": "", + "confidence": entry.get("confidence", "medium"), + "http_evidence": { + "request_url": matched_result.get("url", "") if matched_result else "", + "request_method": matched_result.get("method", "") if matched_result else "", + "response_status": matched_result.get("status", 0) if matched_result else 0, + "response_time": matched_result.get("response_time", 0) if matched_result else 0, + } if matched_result else {}, + }) + + return findings + + # ── Template filling (for methodology context) ─────────────────── + + def _fill_template(self, context: Dict) -> str: """Fill the .md template placeholders with recon context.""" target = context.get("target", "") endpoints = context.get("endpoints", []) @@ -150,50 +637,6 @@ class MdAgent(SpecialistAgent): waf_info = context.get("waf_info", "") existing_findings = context.get("existing_findings", []) - # Build context objects for different placeholder patterns - scope_json = json.dumps({ - "target": target, - "endpoints_discovered": len(endpoints), - "technologies": technologies[:15], - "waf": waf_info or "Not detected", - }, indent=2) - - initial_info_json = json.dumps({ - "target_url": target, - "endpoints": [ - ep.get("url", ep) if isinstance(ep, dict) else str(ep) - for ep in endpoints[:30] - ], - "parameters": ( - {k: v for k, v in list(parameters.items())[:20]} - if isinstance(parameters, dict) else {} - ), - "technologies": technologies[:15], - "headers": {k: v for k, v in list(headers.items())[:10]}, - "forms": [ - {"action": f.get("action", ""), "method": f.get("method", "GET")} - for f in (forms[:10] if isinstance(forms, list) else []) - ], - }, indent=2) - - target_environment_json = json.dumps({ - "target": target, - "technology_stack": technologies[:10], - "waf": waf_info or "None detected", - "endpoints_count": len(endpoints), - "parameters_count": ( - len(parameters) if isinstance(parameters, dict) else 0 - ), - }, indent=2) - - existing_findings_summary = "" - if existing_findings: - existing_findings_summary = "\n".join( - f"- [{getattr(f, 'severity', 'unknown').upper()}] " - f"{getattr(f, 'title', '?')} at {getattr(f, 'affected_endpoint', '?')}" - for f in existing_findings[:20] - ) - recon_data_json = json.dumps({ "target": target, "endpoints": [ @@ -205,135 +648,87 @@ class MdAgent(SpecialistAgent): {k: v for k, v in list(parameters.items())[:20]} if isinstance(parameters, dict) else {} ), - "existing_findings": existing_findings_summary or "None yet", }, indent=2) - # Replacement map for all known placeholders + scope_json = json.dumps({ + "target": target, + "endpoints_discovered": len(endpoints), + "technologies": technologies[:15], + "waf": waf_info or "Not detected", + }, indent=2) + + existing_summary = "" + if existing_findings: + existing_summary = "\n".join( + f"- [{getattr(f, 'severity', 'unknown').upper()}] " + f"{getattr(f, 'title', '?')} at {getattr(f, 'affected_endpoint', '?')}" + for f in existing_findings[:20] + ) + replacements = { - # New vuln-type agents use these two: "{target}": target, "{recon_json}": recon_data_json, - # Legacy generalist agents use these: "{scope_json}": scope_json, - "{initial_info_json}": initial_info_json, - "{mission_objectives_json}": json.dumps({ - "primary": f"Identify and exploit vulnerabilities on {target}", - "scope": "Web application only", - "existing_findings": len(existing_findings), - }, indent=2), - "{target_environment_json}": target_environment_json, + "{initial_info_json}": recon_data_json, + "{target_environment_json}": scope_json, "{user_input}": target, - "{target_info_json}": initial_info_json, + "{target_info_json}": recon_data_json, "{recon_data_json}": recon_data_json, - "{vulnerability_details_json}": json.dumps({ - "target": target, - "known_technologies": technologies[:10], - "endpoints": [ - ep.get("url", ep) if isinstance(ep, dict) else str(ep) - for ep in endpoints[:15] - ], - }, indent=2), - "{traffic_logs_json}": json.dumps({ - "target": target, - "note": "Live traffic analysis - test authentication replay on discovered endpoints", - "endpoints": [ - ep.get("url", ep) if isinstance(ep, dict) else str(ep) - for ep in endpoints[:10] - ], - }, indent=2), + "{mission_objectives_json}": json.dumps({ + "primary": f"Test {target} for vulnerabilities", + "existing_findings": len(existing_findings), + }), + "{vulnerability_details_json}": recon_data_json, + "{traffic_logs_json}": json.dumps({"target": target}), "{code_vulnerability_json}": json.dumps({ - "target": target, - "technologies": technologies[:10], - "note": "Analyze target for CWE weaknesses based on observed behavior", - }, indent=2), + "target": target, "technologies": technologies[:10], + }), } - # Apply replacements prompt = self.definition.user_prompt_template for placeholder, value in replacements.items(): prompt = prompt.replace(placeholder, value) - # Inject recon context appendix if any placeholders remain unfilled - if "{" in prompt: - prompt += f"\n\n**Recon Context:**\n{recon_data_json}" + return prompt[:2000] # Cap methodology length to save tokens - return prompt - - # ── Finding parsing ────────────────────────────────────────────── + # ── Legacy finding parsing (fallback for theoretical responses) ─── def _parse_findings(self, response: str, target: str) -> List[Dict]: - """Parse structured findings from LLM response. - - Handles multiple output formats from different .md agents: - - FINDING: key-value blocks (vuln-type agents) - - Headed sections (## [SEVERITY] Vulnerability: ...) - - OWASP format (## OWASP A0X: ...) - - Generic bold-label patterns - """ + """Parse FINDING: blocks or ## sections from LLM response (fallback).""" findings = [] - # Pattern 1: FINDING: blocks (used by 100 vuln-type agents) + # Pattern 1: FINDING: blocks finding_blocks = re.split(r"(?:^|\n)FINDING:", response) if len(finding_blocks) > 1: - for block in finding_blocks[1:]: # skip text before first FINDING: + for block in finding_blocks[1:]: parsed = self._parse_finding_block(block, target) if parsed: findings.append(parsed) if findings: return findings - # Pattern 2: Section-based findings (## [SEVERITY] Vulnerability: Title) + # Pattern 2: Section-based vuln_sections = re.findall( r"##\s*\[?(Critical|High|Medium|Low|Info)\]?\s*(?:Vulnerability|Attack|OWASP\s+A\d+)[\s:]*([^\n]+)", response, re.IGNORECASE, ) - if vuln_sections: parts = re.split( r"(?=##\s*\[?(?:Critical|High|Medium|Low|Info)\]?\s*(?:Vulnerability|Attack|OWASP))", response, flags=re.IGNORECASE, ) for part in parts: - finding = self._parse_finding_section(part, target) - if finding: - findings.append(finding) - else: - # Pattern 3: Generic vulnerability mentions with evidence - generic = re.findall( - r"\*\*(?:Vulnerability|Finding|Issue)[:\s]*\*\*\s*([^\n]+)", - response, re.IGNORECASE, - ) - for title in generic: - findings.append({ - "title": title.strip(), - "severity": "medium", - "vulnerability_type": self._infer_vuln_type(title), - "description": "", - "affected_endpoint": target, - "evidence": "", - "poc_code": "", - "source_agent": self.definition.display_name, - }) + f = self._parse_finding_section(part, target) + if f: + findings.append(f) return findings def _parse_finding_block(self, block: str, target: str) -> Optional[Dict]: - """Parse a FINDING: key-value block from vuln-type agent response. - - Expected format: - FINDING: - - Title: SSRF in url parameter at /api/fetch - - Severity: High - - CWE: CWE-918 - - Endpoint: https://target.com/api/fetch - - Evidence: Internal content returned - - Impact: Internal network access - - Remediation: Whitelist URLs - """ + """Parse a FINDING: key-value block.""" if not block.strip(): return None - # Extract key-value pairs (- Key: Value) kvs: Dict[str, str] = {} for match in re.finditer(r"-\s*([A-Za-z][\w\s/]*?):\s*(.+)", block): key = match.group(1).strip().lower().replace(" ", "_") @@ -343,7 +738,6 @@ class MdAgent(SpecialistAgent): if not title: return None - # Extract severity sev_raw = kvs.get("severity", "medium").lower().strip() severity = "medium" for s in ("critical", "high", "medium", "low", "info"): @@ -351,22 +745,14 @@ class MdAgent(SpecialistAgent): severity = s break - # Extract CWE cwe = "" - cwe_raw = kvs.get("cwe", "") - cwe_match = re.search(r"CWE-(\d+)", cwe_raw) + cwe_match = re.search(r"CWE-(\d+)", kvs.get("cwe", "")) if cwe_match: cwe = f"CWE-{cwe_match.group(1)}" - # Use agent name as vuln type if it matches a known type vuln_type = self.definition.name - if vuln_type.startswith("md_"): - vuln_type = vuln_type[3:] - - # Extract endpoint endpoint = kvs.get("endpoint", kvs.get("url", target)).strip() - # Extract code blocks as PoC poc = "" code_blocks = re.findall(r"```(?:\w+)?\n(.*?)```", block, re.DOTALL) if code_blocks: @@ -389,11 +775,10 @@ class MdAgent(SpecialistAgent): } def _parse_finding_section(self, section: str, target: str) -> Optional[Dict]: - """Parse a single finding section from the response.""" + """Parse a ## [SEVERITY] Vulnerability: ... section.""" if not section.strip(): return None - # Extract title title_match = re.search( r"##\s*\[?(?:Critical|High|Medium|Low|Info)\]?\s*(?:Vulnerability|Attack|OWASP[^:]*)[:\s]*(.+)", section, re.IGNORECASE, @@ -402,7 +787,6 @@ class MdAgent(SpecialistAgent): if not title: return None - # Extract severity from header or table severity = "medium" sev_match = re.search( r"\*\*Severity\*\*\s*\|?\s*(Critical|High|Medium|Low|Info)", @@ -418,77 +802,34 @@ class MdAgent(SpecialistAgent): if header_sev: severity = header_sev.group(1).lower() - # Extract CVSS - cvss_match = re.search(r"(\d+\.\d+)", section[:500]) - cvss = float(cvss_match.group(1)) if cvss_match else 0.0 - - # Extract CWE cwe_match = re.search(r"CWE-(\d+)", section) cwe = f"CWE-{cwe_match.group(1)}" if cwe_match else "" - # Extract endpoint - endpoint = target - ep_match = re.search( - r"\*\*Endpoint\*\*\s*\|?\s*(https?://[^\s|]+)", - section, re.IGNORECASE, - ) - if ep_match: - endpoint = ep_match.group(1).strip() - - # Extract description - desc = "" - desc_match = re.search( - r"###?\s*Description\s*\n(.*?)(?=\n###?\s|\Z)", - section, re.DOTALL | re.IGNORECASE, - ) - if desc_match: - desc = desc_match.group(1).strip()[:1000] - - # Extract PoC code blocks poc = "" code_blocks = re.findall(r"```(?:\w+)?\n(.*?)```", section, re.DOTALL) if code_blocks: - poc = "\n---\n".join(block.strip() for block in code_blocks[:3]) + poc = "\n---\n".join(b.strip() for b in code_blocks[:3]) - # Extract evidence/proof evidence = "" ev_match = re.search( - r"###?\s*(?:Proof|Evidence|Tool (?:Output|Evidence))\s*\n(.*?)(?=\n###?\s|\Z)", + r"###?\s*(?:Proof|Evidence)\s*\n(.*?)(?=\n###?\s|\Z)", section, re.DOTALL | re.IGNORECASE, ) if ev_match: evidence = ev_match.group(1).strip()[:1000] - # Extract impact - impact = "" - imp_match = re.search( - r"###?\s*Impact\s*\n(.*?)(?=\n###?\s|\Z)", - section, re.DOTALL | re.IGNORECASE, - ) - if imp_match: - impact = imp_match.group(1).strip()[:500] - - # Extract remediation - remediation = "" - rem_match = re.search( - r"###?\s*(?:Remediation|Mitigations?|Fix)\s*\n(.*?)(?=\n###?\s|\Z)", - section, re.DOTALL | re.IGNORECASE, - ) - if rem_match: - remediation = rem_match.group(1).strip()[:500] - return { "title": title, "severity": severity, "vulnerability_type": self._infer_vuln_type(title), - "cvss_score": cvss, + "cvss_score": 0.0, "cwe_id": cwe, - "description": desc, - "affected_endpoint": endpoint, + "description": "", + "affected_endpoint": target, "evidence": evidence, "poc_code": poc, - "impact": impact, - "remediation": remediation, + "impact": "", + "remediation": "", "source_agent": self.definition.display_name, } @@ -497,61 +838,24 @@ class MdAgent(SpecialistAgent): """Infer vulnerability type from finding title.""" title_lower = title.lower() type_map = { - "sql injection": "sqli_error", - "sqli": "sqli_error", - "xss": "xss_reflected", - "cross-site scripting": "xss_reflected", - "stored xss": "xss_stored", - "dom xss": "xss_dom", - "command injection": "command_injection", - "rce": "command_injection", - "remote code": "command_injection", - "ssrf": "ssrf", - "server-side request": "ssrf", - "csrf": "csrf", - "cross-site request": "csrf", - "lfi": "lfi", - "local file": "lfi", - "path traversal": "path_traversal", - "directory traversal": "path_traversal", - "file upload": "file_upload", - "xxe": "xxe", - "xml external": "xxe", - "ssti": "ssti", - "template injection": "ssti", - "open redirect": "open_redirect", - "redirect": "open_redirect", - "idor": "idor", - "insecure direct": "idor", - "broken access": "bola", - "access control": "bola", - "authentication": "auth_bypass", - "auth bypass": "auth_bypass", - "brute force": "brute_force", - "jwt": "jwt_manipulation", - "session": "session_fixation", - "clickjacking": "clickjacking", - "cors": "cors_misconfig", - "crlf": "crlf_injection", - "header injection": "header_injection", - "security header": "security_headers", - "ssl": "ssl_issues", - "tls": "ssl_issues", - "information disclosure": "information_disclosure", - "sensitive data": "sensitive_data_exposure", - "directory listing": "directory_listing", - "debug": "debug_mode", - "deserialization": "insecure_deserialization", - "nosql": "nosql_injection", - "ldap": "ldap_injection", - "graphql": "graphql_injection", - "race condition": "race_condition", - "business logic": "business_logic", - "rate limit": "rate_limit_bypass", + "sql injection": "sqli_error", "sqli": "sqli_error", + "xss": "xss_reflected", "cross-site scripting": "xss_reflected", + "stored xss": "xss_stored", "dom xss": "xss_dom", + "command injection": "command_injection", "rce": "command_injection", + "ssrf": "ssrf", "csrf": "csrf", "lfi": "lfi", + "path traversal": "path_traversal", "file upload": "file_upload", + "xxe": "xxe", "ssti": "ssti", "open redirect": "open_redirect", + "idor": "idor", "bola": "bola", "auth bypass": "auth_bypass", + "jwt": "jwt_manipulation", "cors": "cors_misconfig", + "crlf": "crlf_injection", "header injection": "header_injection", + "nosql": "nosql_injection", "graphql": "graphql_injection", + "race condition": "race_condition", "business logic": "business_logic", "subdomain takeover": "subdomain_takeover", - "host header": "host_header_injection", "prototype pollution": "prototype_pollution", "websocket": "websocket_hijacking", + "information disclosure": "information_disclosure", + "directory listing": "directory_listing", + "clickjacking": "clickjacking", "ssl": "ssl_issues", } for keyword, vtype in type_map.items(): if keyword in title_lower: @@ -562,16 +866,18 @@ class MdAgent(SpecialistAgent): # ─── MdAgentLibrary: loads all .md agents ──────────────────────────── class MdAgentLibrary: - """Loads all .md files from prompts/agents/ and indexes them - as executable agent definitions (100+ vuln-type agents).""" + """Loads all .md files from prompts/agents/ and indexes them.""" - def __init__(self, md_dir: str = "prompts/agents"): + def __init__(self, md_dir: str = ""): + if not md_dir: + # Resolve relative to project root (parent of backend/) + project_root = Path(__file__).resolve().parent.parent.parent + md_dir = str(project_root / "prompts" / "agents") self.md_dir = Path(md_dir) self.agents: Dict[str, MdAgentDefinition] = {} self._load_all() def _load_all(self): - """Load all .md files as agent definitions.""" if not self.md_dir.is_dir(): logger.warning(f"MD agent directory not found: {self.md_dir}") return @@ -584,7 +890,6 @@ class MdAgentLibrary: try: content = md_file.read_text(encoding="utf-8") - # Parse structured format user_match = re.search( r"## User Prompt\n(.*?)(?=\n## System Prompt|\Z)", content, re.DOTALL, @@ -600,15 +905,12 @@ class MdAgentLibrary: if not user_prompt and not system_prompt: system_prompt = content.strip() - # Detect placeholders placeholders = re.findall(r"\{(\w+)\}", user_prompt) - # Build display name display_name = name.replace("_", " ").title() title_match = re.search(r"^#\s+(.+)", content) if title_match: raw_title = title_match.group(1).strip() - # Remove suffixes: "Prompt", "Specialist Agent", "Agent" display_name = re.sub( r"\s*(?:Specialist Agent|Agent|Prompt)\s*$", "", raw_title, @@ -637,6 +939,13 @@ class MdAgentLibrary: def get_agent(self, name: str) -> Optional[MdAgentDefinition]: return self.agents.get(name) + def get_all_runnable(self) -> List[MdAgentDefinition]: + """Return ALL agents that can be dispatched.""" + return [ + a for a in self.agents.values() + if a.category in ("offensive", "generalist", "recon") + ] + def get_offensive_agents(self) -> List[MdAgentDefinition]: return [a for a in self.agents.values() if a.category == "offensive"] @@ -644,7 +953,6 @@ class MdAgentLibrary: return [a for a in self.agents.values() if a.category == category] def list_agents(self) -> List[Dict]: - """Return agent metadata list for API/frontend.""" return [ { "name": a.name, @@ -656,19 +964,19 @@ class MdAgentLibrary: ] -# ─── MdAgentOrchestrator: runs agents post-recon ──────────────────── +# ─── MdAgentOrchestrator: phased execution ────────────────────────── class MdAgentOrchestrator: - """Coordinates execution of .md-based agents after recon. + """Coordinates execution of .md-based agents in phases. Flow: - 1. Select agents (explicit list or defaults) - 2. Build shared context from recon data - 3. Run agents in parallel (bounded concurrency) - 4. Collect and merge findings + Phase 1: Recon agents (discover more attack surface) + Phase 2: Offensive agents (test specific vuln types, 5 concurrent) + Phase 3: Generalist agents (cross-cutting analysis) + All agents execute REAL HTTP requests. """ - MAX_CONCURRENT = 3 + MAX_CONCURRENT = 2 # Keep low to avoid API rate limits def __init__( self, @@ -678,6 +986,9 @@ class MdAgentOrchestrator: validation_judge=None, log_callback: Optional[Callable] = None, progress_callback: Optional[Callable] = None, + http_session=None, + auth_headers: Optional[Dict] = None, + cancel_fn: Optional[Callable] = None, ): self.llm = llm self.memory = memory @@ -685,6 +996,9 @@ class MdAgentOrchestrator: self.validation_judge = validation_judge self.log = log_callback self.progress_callback = progress_callback + self.http_session = http_session + self.auth_headers = auth_headers or {} + self.cancel_fn = cancel_fn or (lambda: False) self.library = MdAgentLibrary() self._cancel_event = asyncio.Event() @@ -701,87 +1015,79 @@ class MdAgentOrchestrator: headers: Optional[Dict] = None, waf_info: str = "", ) -> Dict: - """Execute selected .md agents against target. - - Args: - target: Target URL. - recon_data: ReconData object from recon phase. - existing_findings: Findings discovered so far. - selected_agents: List of agent names to run. None = defaults. - headers: Auth/custom headers. - waf_info: WAF detection info. - - Returns: - Dict with findings, agent_results, statistics. - """ + """Execute agents in phases: recon → offensive → generalist.""" start_time = time.time() self._cancel_event.clear() - # Resolve agent selection + # Merge auth headers + all_headers = {**self.auth_headers} + if headers: + all_headers.update(headers) + + # Resolve agents agents_to_run = self._resolve_agents(selected_agents) if not agents_to_run: - await self._log("warning", "[MD-AGENTS] No agents available to run") + await self._log("warning", "[AGENT GRID] No agents available") return {"findings": [], "agent_results": {}, "duration": 0} - agent_names = [a.display_name for a in agents_to_run] - await self._log("info", f"[MD-AGENTS] Dispatching {len(agents_to_run)} agents: " - f"{', '.join(agent_names)}") + # Split into phases + recon_agents = [a for a in agents_to_run if a.category == "recon"] + offensive_agents = [a for a in agents_to_run if a.category == "offensive"] + generalist_agents = [a for a in agents_to_run if a.category == "generalist"] + + await self._log("info", + f"[AGENT GRID] {len(agents_to_run)} agents: " + f"{len(recon_agents)} recon, {len(offensive_agents)} offensive, " + f"{len(generalist_agents)} generalist") # Build shared context context = self._build_context( - target, recon_data, existing_findings, headers, waf_info, + target, recon_data, existing_findings, all_headers, waf_info, ) - # Budget per agent - n_agents = len(agents_to_run) - per_agent_budget = 1.0 / max(n_agents, 1) - - # Create MdAgent instances - md_agents: List[MdAgent] = [] - for defn in agents_to_run: - agent = MdAgent( - definition=defn, - llm=self.llm, - memory=self.memory, - budget_allocation=per_agent_budget, - budget=self.budget, - validation_judge=self.validation_judge, - ) - md_agents.append(agent) - - # Run agents with bounded concurrency - semaphore = asyncio.Semaphore(self.MAX_CONCURRENT) all_results: Dict[str, AgentResult] = {} + all_findings: List[Dict] = [] - async def _run_one(agent: MdAgent) -> AgentResult: - async with semaphore: + # ── Phase 1: Recon agents (sequential, enriches context) ── + if recon_agents and not self._cancel_event.is_set(): + await self._log("info", "[PHASE 1] Recon agents — deep discovery") + for defn in recon_agents: if self._cancel_event.is_set(): - return AgentResult( - agent_name=agent.name, status="cancelled", + break + r = await self._run_agent(defn, context, all_headers) + all_results[r.agent_name] = r + all_findings.extend(r.findings) + # Recon findings enrich context for subsequent phases + if r.findings: + context["existing_findings"] = ( + context.get("existing_findings", []) + r.findings ) - await self._log("info", - f" [{agent.definition.display_name}] Starting...") - result = await agent.execute(context) - await self._log("info", - f" [{agent.definition.display_name}] Done: " - f"{len(result.findings)} findings, " - f"{result.duration:.1f}s") - return result - tasks = [_run_one(a) for a in md_agents] - results = await asyncio.gather(*tasks, return_exceptions=True) + # ── Phase 2: Offensive agents (parallel, bounded) ── + if offensive_agents and not self._cancel_event.is_set(): + await self._log("info", + f"[PHASE 2] {len(offensive_agents)} offensive agents — real exploitation") + phase_results = await self._run_parallel( + offensive_agents, context, all_headers + ) + for r in phase_results: + all_results[r.agent_name] = r + all_findings.extend(r.findings) - # Collect results - all_findings = [] - for agent, res in zip(md_agents, results): - if isinstance(res, Exception): - logger.error(f"MD agent {agent.name} error: {res}") - all_results[agent.name] = AgentResult( - agent_name=agent.name, status="failed", error=str(res), - ) - else: - all_results[agent.name] = res - all_findings.extend(res.findings) + # ── Phase 3: Generalist agents (parallel, cross-analysis) ── + if generalist_agents and not self._cancel_event.is_set(): + # Update context with all findings so far + context["existing_findings"] = ( + context.get("existing_findings", []) + all_findings + ) + await self._log("info", + f"[PHASE 3] {len(generalist_agents)} generalist agents — cross-analysis") + phase_results = await self._run_parallel( + generalist_agents, context, all_headers + ) + for r in phase_results: + all_results[r.agent_name] = r + all_findings.extend(r.findings) elapsed = time.time() - start_time total_tokens = sum( @@ -790,7 +1096,7 @@ class MdAgentOrchestrator: ) await self._log("info", - f"[MD-AGENTS] Complete: {len(all_findings)} findings from " + f"[AGENT GRID] Complete: {len(all_findings)} findings from " f"{len(agents_to_run)} agents in {elapsed:.1f}s") return { @@ -812,15 +1118,71 @@ class MdAgentOrchestrator: "duration": round(elapsed, 1), } + async def _run_agent( + self, defn: MdAgentDefinition, context: Dict, headers: Dict + ) -> AgentResult: + """Run a single agent.""" + agent = MdAgent( + definition=defn, + llm=self.llm, + memory=self.memory, + budget_allocation=1.0 / max(len(self.library.agents), 1), + budget=self.budget, + validation_judge=self.validation_judge, + http_session=self.http_session, + auth_headers=headers, + cancel_fn=self.cancel_fn, + ) + await self._log("info", f" [{defn.display_name}] Starting...") + result = await agent.execute(context) + if result.error: + await self._log("warning", + f" [{defn.display_name}] Error: {result.error[:100]}, {result.duration:.1f}s") + elif result.findings: + await self._log("success", + f" [{defn.display_name}] {len(result.findings)} findings! {result.duration:.1f}s") + else: + await self._log("info", + f" [{defn.display_name}] Clean, {result.duration:.1f}s") + return result + + async def _run_parallel( + self, agents: List[MdAgentDefinition], context: Dict, headers: Dict + ) -> List[AgentResult]: + """Run agents in parallel with bounded concurrency.""" + semaphore = asyncio.Semaphore(self.MAX_CONCURRENT) + + agent_index = [0] # mutable counter for staggering + + async def _bounded(defn: MdAgentDefinition) -> AgentResult: + async with semaphore: + if self._cancel_event.is_set(): + return AgentResult(agent_name=f"md_{defn.name}", status="cancelled") + # Stagger API calls: small delay based on position + idx = agent_index[0] + agent_index[0] += 1 + if idx > 0: + await asyncio.sleep(2.0) # 2s between each agent start to respect rate limits + return await self._run_agent(defn, context, headers) + + tasks = [_bounded(d) for d in agents] + results = await asyncio.gather(*tasks, return_exceptions=True) + + final = [] + for defn, res in zip(agents, results): + if isinstance(res, Exception): + logger.error(f"Agent {defn.name} error: {res}") + final.append(AgentResult( + agent_name=f"md_{defn.name}", status="failed", error=str(res) + )) + else: + final.append(res) + return final + def _resolve_agents( self, selected: Optional[List[str]], ) -> List[MdAgentDefinition]: - """Resolve agent selection to definitions. - - When no agents are explicitly selected, dispatches ALL - offensive (vuln-type) agents — the XBOW-style architecture - runs one specialist per vulnerability type. - """ + """Resolve agent selection.""" if selected: resolved = [] for name in selected: @@ -831,7 +1193,8 @@ class MdAgentOrchestrator: logger.warning(f"MD agent not found: {name}") return resolved - # Default: all offensive (vuln-type) agents + if RUN_ALL_BY_DEFAULT: + return self.library.get_all_runnable() return self.library.get_offensive_agents() def _build_context( @@ -842,7 +1205,6 @@ class MdAgentOrchestrator: headers: Optional[Dict], waf_info: str, ) -> Dict: - """Build shared context dict from recon data.""" ctx: Dict[str, Any] = {"target": target} if recon_data: @@ -851,24 +1213,30 @@ class MdAgentOrchestrator: ctx["parameters"] = getattr(recon_data, "parameters", {}) ctx["forms"] = getattr(recon_data, "forms", []) ctx["headers"] = getattr(recon_data, "response_headers", {}) + ctx["js_files"] = getattr(recon_data, "js_files", []) + ctx["js_sinks"] = getattr(recon_data, "js_sinks", []) + ctx["api_endpoints"] = getattr(recon_data, "api_endpoints", []) + ctx["cookies"] = getattr(recon_data, "cookies", []) else: ctx["endpoints"] = [] ctx["technologies"] = [] ctx["parameters"] = {} ctx["forms"] = [] ctx["headers"] = {} + ctx["js_files"] = [] + ctx["js_sinks"] = [] + ctx["api_endpoints"] = [] + ctx["cookies"] = [] if headers: ctx["headers"].update(headers) ctx["existing_findings"] = existing_findings or [] ctx["waf_info"] = waf_info - return ctx def cancel(self): self._cancel_event.set() def list_available_agents(self) -> List[Dict]: - """Return agent list for API/frontend.""" return self.library.list_agents() diff --git a/backend/core/smart_router/provider_registry.py b/backend/core/smart_router/provider_registry.py index 2bb883a..439fe88 100644 --- a/backend/core/smart_router/provider_registry.py +++ b/backend/core/smart_router/provider_registry.py @@ -81,7 +81,7 @@ DEFAULT_PROVIDERS: List[Dict] = [ { "id": "claude_code", "name": "Claude Code", "auth_type": "oauth", "api_format": "anthropic", "base_url": "https://api.anthropic.com", - "tier": 1, "default_model": "claude-sonnet-4-5-20250929", + "tier": 1, "default_model": "claude-sonnet-4-20250514", }, { "id": "codex_cli", "name": "OpenAI Codex CLI", "auth_type": "oauth", @@ -116,13 +116,13 @@ DEFAULT_PROVIDERS: List[Dict] = [ { "id": "kiro", "name": "Kiro AI", "auth_type": "oauth", "api_format": "anthropic", "base_url": "https://api.anthropic.com", - "tier": 1, "default_model": "claude-sonnet-4-5-20250929", + "tier": 1, "default_model": "claude-sonnet-4-20250514", }, # === API Key Providers (Tier 1 - Paid) === { "id": "anthropic", "name": "Anthropic", "auth_type": "api_key", "api_format": "anthropic", "base_url": "https://api.anthropic.com", - "tier": 1, "default_model": "claude-sonnet-4-5-20250929", + "tier": 1, "default_model": "claude-sonnet-4-20250514", "env_key": "ANTHROPIC_API_KEY", }, { @@ -140,7 +140,7 @@ DEFAULT_PROVIDERS: List[Dict] = [ { "id": "openrouter", "name": "OpenRouter", "auth_type": "api_key", "api_format": "openai_compat", "base_url": "https://openrouter.ai/api/v1", - "tier": 1, "default_model": "anthropic/claude-sonnet-4-5", + "tier": 1, "default_model": "anthropic/claude-sonnet-4-20250514", "env_key": "OPENROUTER_API_KEY", }, # === API Key Providers (Tier 2 - Cheap) === diff --git a/backend/core/smart_router/router.py b/backend/core/smart_router/router.py index 62381e5..11c57d2 100644 --- a/backend/core/smart_router/router.py +++ b/backend/core/smart_router/router.py @@ -173,41 +173,41 @@ class SmartRouter: ) -> List[Tuple[Provider, Account]]: """Build ordered list of (provider, account) candidates. - If preferred is set, ONLY that provider is used (no fallback to others). - This ensures the user's explicit choice is respected. + If preferred is set, that provider is tried FIRST, then falls back + to other providers of the same tier if all accounts fail. If preferred is not set, all providers are tried by tier. """ candidates = [] + seen_account_ids = set() if preferred: - # Strict mode: only the preferred provider + # Preferred provider goes first in candidate list provider = self.registry.get_provider(preferred) if provider: accounts = self.registry.get_active_accounts(preferred) for acct in accounts: if self.quota.is_available(acct.id): candidates.append((provider, acct)) + seen_account_ids.add(acct.id) if not candidates: logger.warning( f"SmartRouter: Preferred provider '{preferred}' has no active accounts! " f"Falling back to all providers." ) - else: - return candidates # Only preferred provider candidates - # Auto mode or preferred has no active accounts: try all by tier + # Add remaining providers as fallback (by tier) for tier in (1, 2, 3): providers = self.registry.get_providers_by_tier(tier) for provider in providers: - # Skip disabled providers if not getattr(provider, "enabled", True): continue acct = self.quota.next_account( provider.id, self.registry.get_active_accounts(provider.id), ) - if acct: + if acct and acct.id not in seen_account_ids: candidates.append((provider, acct)) + seen_account_ids.add(acct.id) return candidates diff --git a/backend/main.py b/backend/main.py index 6fa398b..4e25f9a 100755 --- a/backend/main.py +++ b/backend/main.py @@ -11,7 +11,7 @@ from pathlib import Path from backend.config import settings from backend.db.database import init_db, close_db -from backend.api.v1 import scans, targets, prompts, reports, dashboard, vulnerabilities, settings as settings_router, agent, agent_tasks, scheduler, vuln_lab, terminal, sandbox, knowledge, mcp, providers, full_ia, cli_agent +from backend.api.v1 import scans, targets, prompts, reports, dashboard, vulnerabilities, settings as settings_router, agent, agent_tasks, scheduler, vuln_lab, terminal, sandbox, knowledge, mcp, providers, cli_agent from backend.api.websocket import manager as ws_manager @@ -116,7 +116,6 @@ app.include_router(sandbox.router, prefix="/api/v1/sandbox", tags=["Sandbox"]) app.include_router(knowledge.router, prefix="/api/v1/knowledge", tags=["Knowledge"]) app.include_router(mcp.router, prefix="/api/v1/mcp", tags=["MCP Servers"]) app.include_router(providers.router, prefix="/api/v1/providers", tags=["Providers"]) -app.include_router(full_ia.router, prefix="/api/v1/full-ia", tags=["FULL AI Testing"]) app.include_router(cli_agent.router) diff --git a/config/config2.json b/config/config2.json index 06abe42..cb32a4e 100755 --- a/config/config2.json +++ b/config/config2.json @@ -32,12 +32,12 @@ }, "claude_opus_default": { "provider": "claude", - "model": "claude-3-opus-20240229", + "model": "claude-opus-4-6-20250918", "api_key": "${ANTHROPIC_API_KEY}", "temperature": 0.7, - "max_tokens": 4096, - "input_token_limit": 200000, - "output_token_limit": 4096, + "max_tokens": 16384, + "input_token_limit": 1000000, + "output_token_limit": 16384, "cache_enabled": true, "search_context_level": "high", "pdf_support_enabled": true, diff --git a/core/llm_manager.py b/core/llm_manager.py index d28627c..df4aede 100755 --- a/core/llm_manager.py +++ b/core/llm_manager.py @@ -649,7 +649,7 @@ Identify any potential hallucinations, inconsistencies, or areas where the respo """Generate using OpenRouter API (OpenAI-compatible). OpenRouter supports hundreds of models through a unified API. - Models are specified as provider/model (e.g., 'anthropic/claude-sonnet-4-20250514'). + Models are specified as provider/model (e.g., 'anthropic/claude-sonnet-4-6'). API key comes from OPENROUTER_API_KEY env var or config profile. """ if not self.api_key: diff --git a/frontend/src/App.tsx b/frontend/src/App.tsx index 4e88e6f..ec13a2a 100755 --- a/frontend/src/App.tsx +++ b/frontend/src/App.tsx @@ -17,15 +17,12 @@ import SandboxDashboardPage from './pages/SandboxDashboardPage' import KnowledgePage from './pages/KnowledgePage' import MCPManagementPage from './pages/MCPManagementPage' import ProvidersPage from './pages/ProvidersPage' -import FullIATestingPage from './pages/FullIATestingPage' - function App() { return ( } /> } /> - } /> } /> } /> } /> diff --git a/frontend/src/components/layout/Header.tsx b/frontend/src/components/layout/Header.tsx index b68ac0a..7cd43ec 100755 --- a/frontend/src/components/layout/Header.tsx +++ b/frontend/src/components/layout/Header.tsx @@ -5,7 +5,8 @@ const pageTitles: Record = { '/scan/new': 'New Security Scan', '/reports': 'Reports', '/settings': 'Settings', - '/full-ia': 'FULL AI TESTING', + '/auto': 'Auto Pentest', + '/realtime': 'Real-time Task', } export default function Header() { diff --git a/frontend/src/components/layout/Sidebar.tsx b/frontend/src/components/layout/Sidebar.tsx index 91d44ef..c11e4d0 100755 --- a/frontend/src/components/layout/Sidebar.tsx +++ b/frontend/src/components/layout/Sidebar.tsx @@ -16,7 +16,6 @@ import { Brain, Cable, Plug, - Crosshair, ChevronLeft, ChevronRight, } from 'lucide-react' @@ -30,7 +29,6 @@ const navGroups = [ { path: '/auto', icon: Rocket, label: 'Auto Pentest' }, { path: '/scan/new', icon: Bot, label: 'AI Agent' }, { path: '/realtime', icon: Zap, label: 'Real-time Task' }, - { path: '/full-ia', icon: Crosshair, label: 'FULL AI TESTING' }, ], }, { diff --git a/frontend/src/pages/AutoPentestPage.tsx b/frontend/src/pages/AutoPentestPage.tsx index f39542e..edb8969 100755 --- a/frontend/src/pages/AutoPentestPage.tsx +++ b/frontend/src/pages/AutoPentestPage.tsx @@ -14,15 +14,15 @@ import VulnAgentGrid from '../components/VulnAgentGrid' // ─── Constants ──────────────────────────────────────────────────────────────── const PHASES = [ - { key: 'parallel', label: 'Parallel Streams', icon: Layers, range: [0, 50] as const }, - { key: 'deep', label: 'Deep Analysis', icon: Brain, range: [50, 75] as const }, - { key: 'final', label: 'Finalization', icon: Shield, range: [75, 100] as const }, + { key: 'recon', label: 'Reconnaissance', icon: Globe, range: [0, 20] as const }, + { key: 'agents', label: 'Agent Grid (108 agents)', icon: Layers, range: [20, 85] as const }, + { key: 'final', label: 'Finalization', icon: Shield, range: [85, 100] as const }, ] const STREAMS = [ - { key: 'recon', label: 'Recon', icon: Globe, color: 'blue', activeUntil: 25 }, - { key: 'junior', label: 'Junior AI', icon: Brain, color: 'purple', activeUntil: 35 }, - { key: 'tools', label: 'Tools', icon: Wrench, color: 'orange', activeUntil: 50 }, + { key: 'recon', label: 'Recon', icon: Globe, color: 'blue', activeUntil: 20 }, + { key: 'agents', label: 'Agent Grid', icon: Brain, color: 'purple', activeUntil: 85 }, + { key: 'final', label: 'Report', icon: Wrench, color: 'orange', activeUntil: 100 }, ] as const const STREAM_COLORS: Record = { @@ -53,12 +53,10 @@ const CONFIDENCE_STYLES: Record = { const LOG_FILTERS = [ { key: 'all', label: 'All', color: '' }, - { key: 'stream1', label: 'Recon', color: 'text-blue-400' }, - { key: 'stream2', label: 'Junior', color: 'text-purple-400' }, - { key: 'stream3', label: 'Tools', color: 'text-orange-400' }, - { key: 'deep', label: 'Deep', color: 'text-cyan-400' }, - { key: 'container', label: 'Container', color: 'text-cyan-300' }, - { key: 'cli_agent', label: 'CLI Agent', color: 'text-pink-400' }, + { key: 'recon', label: 'Recon', color: 'text-blue-400' }, + { key: 'agents', label: 'Agents', color: 'text-green-400' }, + { key: 'judge', label: 'Validation', color: 'text-amber-300' }, + { key: 'final', label: 'Final', color: 'text-cyan-400' }, { key: 'error', label: 'Errors', color: 'text-red-400' }, ] @@ -88,8 +86,8 @@ interface Toast { // ─── Utility Functions ──────────────────────────────────────────────────────── function phaseFromProgress(progress: number): number { - if (progress < 50) return 0 - if (progress < 75) return 1 + if (progress < 20) return 0 + if (progress < 85) return 1 return 2 } @@ -116,6 +114,15 @@ function logMessageColor(message: string): string { if (message.startsWith('[PLAYBOOK]')) return 'text-indigo-400' if (message.startsWith('[SITE ANALYZER]')) return 'text-emerald-400' if (message.startsWith('[MD-AGENTS]')) return 'text-cyan-300' + if (message.startsWith('[AGENT GRID]')) return 'text-green-400' + if (message.startsWith('[PHASE 1]')) return 'text-blue-300' + if (message.startsWith('[PHASE 2]')) return 'text-purple-300' + if (message.startsWith('[PHASE 3]')) return 'text-yellow-300' + if (message.startsWith('[RECON]')) return 'text-blue-400' + if (message.startsWith('[CVE]')) return 'text-red-300' + if (message.startsWith('[CHAIN]')) return 'text-orange-300' + if (message.startsWith('[JUDGE]')) return 'text-amber-300' + if (message.includes('Starting (real HTTP)')) return 'text-green-300' return '' } @@ -422,8 +429,8 @@ export default function AutoPentestPage() { // Model selection const [availableModels, setAvailableModels] = useState>([]) - const [selectedProvider, setSelectedProvider] = useState('') - const [selectedModel, setSelectedModel] = useState('') + const [selectedProvider, setSelectedProvider] = useState('anthropic') + const [selectedModel, setSelectedModel] = useState('claude-sonnet-4-20250514') // MD Agent selection const [availableMdAgents, setAvailableMdAgents] = useState>([]) @@ -739,12 +746,7 @@ export default function AutoPentestPage() { return () => { if (pollRef.current) clearInterval(pollRef.current) } }, [sessions, agentId, connectionLost, addToast]) - // Auto-scroll logs - useEffect(() => { - if (activeTab === 'logs' && logsEndRef.current) { - logsEndRef.current.scrollIntoView({ behavior: 'smooth' }) - } - }, [logs, activeTab]) + // Auto-scroll logs disabled — user controls scroll position // ─── History ────────────────────────────────────────────────────────────── @@ -1376,48 +1378,60 @@ export default function AutoPentestPage() { )} {/* LLM Provider / Model Selection */} - {availableModels.length > 0 && ( -
-
- - -
-
- - -
+
+
+ +
- )} +
+ + +
+
{/* Multi-target textarea */} {multiTarget && ( diff --git a/frontend/src/pages/HomePage.tsx b/frontend/src/pages/HomePage.tsx index e44e651..02ded86 100755 --- a/frontend/src/pages/HomePage.tsx +++ b/frontend/src/pages/HomePage.tsx @@ -369,8 +369,8 @@ export default function HomePage() { {/* ── Quick Actions ─────────────────────────────────────── */}
{([ - { label: 'Auto Pentest', icon: Zap, to: '/auto', color: 'text-green-400', bg: 'bg-green-500/10 hover:bg-green-500/20', border: 'border-green-500/20 hover:border-green-500/40', desc: '3-stream AI testing' }, - { label: 'Full IA Testing', icon: Shield, to: '/full-ia', color: 'text-red-400', bg: 'bg-red-500/10 hover:bg-red-500/20', border: 'border-red-500/20 hover:border-red-500/40', desc: '100 vuln types' }, + { label: 'Auto Pentest', icon: Zap, to: '/auto', color: 'text-green-400', bg: 'bg-green-500/10 hover:bg-green-500/20', border: 'border-green-500/20 hover:border-green-500/40', desc: '109 agents + 100 vulns' }, + { label: 'AI Agent', icon: Shield, to: '/scan/new', color: 'text-red-400', bg: 'bg-red-500/10 hover:bg-red-500/20', border: 'border-red-500/20 hover:border-red-500/40', desc: 'Custom AI scan' }, { label: 'Vuln Lab', icon: FlaskConical, to: '/vuln-lab', color: 'text-purple-400', bg: 'bg-purple-500/10 hover:bg-purple-500/20', border: 'border-purple-500/20 hover:border-purple-500/40', desc: 'Per-type challenges' }, { label: 'Terminal', icon: Terminal, to: '/terminal', color: 'text-cyan-400', bg: 'bg-cyan-500/10 hover:bg-cyan-500/20', border: 'border-cyan-500/20 hover:border-cyan-500/40', desc: 'AI chat + commands' }, ] as const).map(action => ( diff --git a/prompts/task_library.json b/prompts/task_library.json index 5d80b88..f3e5650 100755 --- a/prompts/task_library.json +++ b/prompts/task_library.json @@ -1,6 +1,6 @@ { "version": "1.0", - "updated_at": "2026-02-24T13:16:20.190712", + "updated_at": "2026-03-25T01:48:34.034821", "tasks": [ { "id": "recon_full", diff --git a/rebuild.sh b/rebuild.sh index bfbc7ac..4450119 100755 --- a/rebuild.sh +++ b/rebuild.sh @@ -1,689 +1,196 @@ #!/usr/bin/env bash # ============================================================================ -# NeuroSploit v3 - Rebuild & Launch Script +# NeuroSploit v3 — Rebuild & Launch (Claude 4.6) # ============================================================================ -# Usage: chmod +x rebuild.sh && ./rebuild.sh -# Options: -# --backend-only Only start the backend (skip frontend) -# --frontend-only Only start the frontend (skip backend) -# --build Build frontend for production instead of dev mode -# --install Force reinstall all dependencies -# --reset-db Delete and recreate the database (for schema changes) +# ./rebuild.sh Default (backend + frontend) +# ./rebuild.sh --backend-only Skip frontend +# ./rebuild.sh --frontend-only Skip backend +# ./rebuild.sh --model MODEL Override LLM model +# ./rebuild.sh --install Force reinstall dependencies +# ./rebuild.sh --reset-db Delete + recreate database +# ./rebuild.sh --build Production frontend build +# ./rebuild.sh --port 9000 Custom backend port # ============================================================================ set -e -PROJECT_DIR="/opt/NeuroSploitv2" -VENV_DIR="$PROJECT_DIR/venv" -FRONTEND_DIR="$PROJECT_DIR/frontend" -DATA_DIR="$PROJECT_DIR/data" -LOGS_DIR="$PROJECT_DIR/logs" -PID_DIR="$PROJECT_DIR/.pids" -DB_PATH="$DATA_DIR/neurosploit.db" +DIR="/opt/NeuroSploitv2" +VENV="$DIR/venv" +FRONT="$DIR/frontend" +LOGS="$DIR/logs" +PIDS="$DIR/.pids" +DB="$DIR/data/neurosploit.db" -# Colors -RED='\033[0;31m' -GREEN='\033[0;32m' -YELLOW='\033[1;33m' -BLUE='\033[0;34m' -CYAN='\033[0;36m' -NC='\033[0m' +# ── Colors ─────────────────────────────────────────────────────────── +R='\033[0;31m' G='\033[0;32m' Y='\033[1;33m' B='\033[0;34m' C='\033[0;36m' N='\033[0m' +header() { echo -e "\n${C}━━━ $1 ━━━${N}"; } +ok() { echo -e " ${G}✓${N} $1"; } +warn() { echo -e " ${Y}!${N} $1"; } +fail() { echo -e " ${R}✗${N} $1"; exit 1; } -# Parse args -BACKEND_ONLY=false -FRONTEND_ONLY=false -PRODUCTION_BUILD=false -FORCE_INSTALL=false -RESET_DB=false +# ── Parse args ─────────────────────────────────────────────────────── +BACK_ONLY=false; FRONT_ONLY=false; BUILD=false; INSTALL=false; RESET=false +MODEL=""; PORT=8000; FPORT=3000 -for arg in "$@"; do - case $arg in - --backend-only) BACKEND_ONLY=true ;; - --frontend-only) FRONTEND_ONLY=true ;; - --build) PRODUCTION_BUILD=true ;; - --install) FORCE_INSTALL=true ;; - --reset-db) RESET_DB=true ;; +while [[ $# -gt 0 ]]; do + case $1 in + --backend-only) BACK_ONLY=true; shift ;; + --frontend-only) FRONT_ONLY=true; shift ;; + --build) BUILD=true; shift ;; + --install) INSTALL=true; shift ;; + --reset-db) RESET=true; shift ;; + --model) MODEL="$2"; shift 2 ;; + --port) PORT="$2"; shift 2 ;; + --frontend-port) FPORT="$2"; shift 2 ;; + *) shift ;; esac done -header() { - echo "" - echo -e "${CYAN}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}" - echo -e "${CYAN} $1${NC}" - echo -e "${CYAN}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}" -} +# ── 0. Stop previous ──────────────────────────────────────────────── +header "Stopping previous" +mkdir -p "$PIDS" "$LOGS" "$DIR/data" "$DIR/reports/screenshots" -step() { - echo -e "${GREEN}[+]${NC} $1" -} - -warn() { - echo -e "${YELLOW}[!]${NC} $1" -} - -fail() { - echo -e "${RED}[x]${NC} $1" - exit 1 -} - -# ============================================================================ -# 0. Kill previous instances -# ============================================================================ -header "Stopping previous instances" - -mkdir -p "$PID_DIR" - -# Kill by PID files if they exist -for pidfile in "$PID_DIR"/*.pid; do - [ -f "$pidfile" ] || continue - pid=$(cat "$pidfile" 2>/dev/null) - if [ -n "$pid" ] && kill -0 "$pid" 2>/dev/null; then - step "Stopping process $pid ($(basename "$pidfile" .pid))" - kill "$pid" 2>/dev/null || true - sleep 1 - kill -9 "$pid" 2>/dev/null || true - fi - rm -f "$pidfile" +for f in "$PIDS"/*.pid; do + [ -f "$f" ] || continue + pid=$(cat "$f" 2>/dev/null) + [ -n "$pid" ] && kill "$pid" 2>/dev/null && ok "Stopped $(basename "$f" .pid)" + rm -f "$f" done - -# Also kill any lingering uvicorn/vite on our ports -if lsof -ti:8000 >/dev/null 2>&1; then - step "Killing process on port 8000" - kill $(lsof -ti:8000) 2>/dev/null || true -fi -if lsof -ti:3000 >/dev/null 2>&1; then - step "Killing process on port 3000" - kill $(lsof -ti:3000) 2>/dev/null || true -fi - +lsof -ti:$PORT >/dev/null 2>&1 && kill $(lsof -ti:$PORT) 2>/dev/null || true +lsof -ti:$FPORT >/dev/null 2>&1 && kill $(lsof -ti:$FPORT) 2>/dev/null || true sleep 1 -step "Previous instances stopped" -# ============================================================================ -# 1. Ensure directories exist -# ============================================================================ -header "Preparing directories" -mkdir -p "$DATA_DIR" "$LOGS_DIR" "$PID_DIR" -mkdir -p "$PROJECT_DIR/reports/screenshots" -mkdir -p "$PROJECT_DIR/reports/benchmark_results/logs" -mkdir -p "$DATA_DIR/vectorstore" -mkdir -p "$DATA_DIR/checkpoints" -step "Directories ready" +# ── 1. Database reset ─────────────────────────────────────────────── +if [ "$RESET" = true ] && [ -f "$DB" ]; then + header "Reset database" + cp "$DB" "$DB.bak.$(date +%s)" + rm -f "$DB" + ok "DB backed up and deleted" +fi -# ============================================================================ -# 1b. Database reset (if requested) -# ============================================================================ -if [ "$RESET_DB" = true ]; then - header "Resetting database" - if [ -f "$DB_PATH" ]; then - BACKUP="$DB_PATH.backup.$(date +%Y%m%d%H%M%S)" - step "Backing up existing DB to $BACKUP" - cp "$DB_PATH" "$BACKUP" - rm -f "$DB_PATH" - step "Database deleted (will be recreated with new schema on startup)" +# ── 2. Environment check ──────────────────────────────────────────── +header "Environment" + +[ -f "$DIR/.env" ] || { [ -f "$DIR/.env.example" ] && cp "$DIR/.env.example" "$DIR/.env" && warn "Created .env from example"; } || fail "No .env" +ok ".env" + +PY=$(command -v python3 || command -v python) || fail "Python not found" +ok "Python: $($PY --version 2>&1)" + +if [ "$BACK_ONLY" = false ]; then + command -v node &>/dev/null || fail "Node.js not found" + ok "Node: $(node --version)" +fi + +command -v docker &>/dev/null && ok "Docker: available" || warn "Docker: not found (sandbox disabled)" + +# ── 3. Backend setup ──────────────────────────────────────────────── +if [ "$FRONT_ONLY" = false ]; then + header "Backend" + + [ -d "$VENV" ] && [ "$INSTALL" = false ] || { $PY -m venv "$VENV"; ok "Venv created"; } + source "$VENV/bin/activate" + + if [ "$INSTALL" = true ] || [ ! -f "$VENV/.ok" ]; then + pip install -q --upgrade pip + pip install -q -r "$DIR/backend/requirements.txt" 2>&1 | tail -3 + pip install -q -r "$DIR/requirements.txt" 2>&1 | tail -3 + [ -f "$DIR/requirements-optional.txt" ] && pip install -q -r "$DIR/requirements-optional.txt" 2>/dev/null || true + touch "$VENV/.ok" + ok "Dependencies installed" else - step "No existing database found" - fi -fi - -# ============================================================================ -# 2. Environment check -# ============================================================================ -header "Checking environment" - -if [ ! -f "$PROJECT_DIR/.env" ]; then - if [ -f "$PROJECT_DIR/.env.example" ]; then - warn ".env not found, copying from .env.example" - cp "$PROJECT_DIR/.env.example" "$PROJECT_DIR/.env" - else - fail ".env file not found and no .env.example to copy from" - fi -fi -step ".env file present" - -# Check Python -if command -v python3 &>/dev/null; then - PYTHON=python3 -elif command -v python &>/dev/null; then - PYTHON=python -else - fail "Python not found. Install Python 3.10+" -fi -step "Python: $($PYTHON --version)" - -# Check Node -if command -v node &>/dev/null; then - step "Node: $(node --version)" -else - if [ "$BACKEND_ONLY" = false ]; then - fail "Node.js not found. Install Node.js 18+" - fi -fi - -# Check Docker (optional - needed for sandbox & benchmarks) -if command -v docker &>/dev/null; then - step "Docker: $(docker --version 2>/dev/null | head -1)" - # Check compose - if docker compose version &>/dev/null 2>&1; then - step "Docker Compose: plugin (docker compose)" - elif command -v docker-compose &>/dev/null; then - step "Docker Compose: standalone ($(docker-compose version --short 2>/dev/null))" - else - warn "Docker Compose not found (needed for sandbox & benchmarks)" - fi -else - warn "Docker not found (optional - needed for sandbox & benchmarks)" -fi - -# ============================================================================ -# 3. Python virtual environment & dependencies -# ============================================================================ -if [ "$FRONTEND_ONLY" = false ]; then - header "Setting up Python backend" - - if [ ! -d "$VENV_DIR" ] || [ "$FORCE_INSTALL" = true ]; then - step "Creating virtual environment..." - $PYTHON -m venv "$VENV_DIR" + ok "Dependencies cached" fi - source "$VENV_DIR/bin/activate" - step "Virtual environment activated" - - if [ "$FORCE_INSTALL" = true ] || [ ! -f "$VENV_DIR/.deps_installed" ]; then - step "Installing backend dependencies..." - pip install --quiet --upgrade pip - - # Install from requirements files (pyproject.toml is for tool config only) - pip install --quiet -r "$PROJECT_DIR/backend/requirements.txt" 2>&1 | tail -5 - pip install --quiet -r "$PROJECT_DIR/requirements.txt" 2>&1 | tail -5 - touch "$VENV_DIR/.deps_installed" - step "Core dependencies installed" - - # Try optional deps (may fail on Python <3.10) - if [ -f "$PROJECT_DIR/requirements-optional.txt" ]; then - step "Installing optional dependencies (best-effort)..." - pip install --quiet -r "$PROJECT_DIR/requirements-optional.txt" 2>/dev/null && \ - step "Optional deps installed (mcp, playwright)" || \ - warn "Some optional deps skipped (Python 3.10+ required for mcp/playwright)" - fi - else - step "Dependencies already installed (use --install to force)" - fi - - # Validate key modules - step "Validating Python modules..." - $PYTHON -c " -import sys - -# === Core Platform (14) === -core_modules = [ - ('backend.main', 'FastAPI App'), - ('backend.config', 'Settings'), - ('core.llm_manager', 'LLM Manager'), - ('core.model_router', 'Model Router'), - ('core.scheduler', 'Scheduler'), - ('core.knowledge_augmentor', 'Knowledge Augmentor'), - ('core.browser_validator', 'Browser Validator'), - ('core.mcp_client', 'MCP Client'), - ('core.mcp_server', 'MCP Server'), - ('core.sandbox_manager', 'Sandbox Manager'), - ('core.context_builder', 'Context Builder'), - ('core.pentest_executor', 'Pentest Executor'), - ('core.tool_installer', 'Tool Installer'), - ('core.report_generator', 'Report Generator (CLI)'), -] - -# === API Layer (18) === -api_modules = [ - ('backend.api.v1.agent', 'Agent API'), - ('backend.api.v1.scans', 'Scans API'), - ('backend.api.v1.targets', 'Targets API'), - ('backend.api.v1.prompts', 'Prompts API'), - ('backend.api.v1.reports', 'Reports API'), - ('backend.api.v1.dashboard', 'Dashboard API'), - ('backend.api.v1.vulnerabilities', 'Vulnerabilities API'), - ('backend.api.v1.settings', 'Settings API'), - ('backend.api.v1.agent_tasks', 'Agent Tasks API'), - ('backend.api.v1.scheduler', 'Scheduler API'), - ('backend.api.v1.vuln_lab', 'VulnLab API'), - ('backend.api.v1.terminal', 'Terminal API'), - ('backend.api.v1.sandbox', 'Sandbox API'), - ('backend.api.v1.knowledge', 'Knowledge API'), - ('backend.api.v1.mcp', 'MCP API'), - ('backend.api.v1.providers', 'Providers API'), - ('backend.api.v1.full_ia', 'Full IA Testing API'), - ('backend.api.v1.cli_agent', 'CLI Agent API'), -] - -# === VulnEngine (18) === -vuln_modules = [ - ('backend.core.vuln_engine.engine', 'VulnEngine Core'), - ('backend.core.vuln_engine.registry', 'VulnEngine Registry'), - ('backend.core.vuln_engine.payload_generator', 'VulnEngine Payloads'), - ('backend.core.vuln_engine.ai_prompts', 'VulnEngine AI Prompts'), - ('backend.core.vuln_engine.pentest_playbook', 'VulnEngine Playbook'), - ('backend.core.vuln_engine.system_prompts', 'Anti-Hallucination Prompts'), - ('backend.core.vuln_engine.testers.injection', 'Tester: Injection'), - ('backend.core.vuln_engine.testers.auth', 'Tester: Auth'), - ('backend.core.vuln_engine.testers.authorization', 'Tester: Authorization'), - ('backend.core.vuln_engine.testers.client_side', 'Tester: Client-Side'), - ('backend.core.vuln_engine.testers.file_access', 'Tester: File Access'), - ('backend.core.vuln_engine.testers.infrastructure', 'Tester: Infrastructure'), - ('backend.core.vuln_engine.testers.request_forgery', 'Tester: Request Forgery'), - ('backend.core.vuln_engine.testers.advanced_injection', 'Tester: Advanced Injection'), - ('backend.core.vuln_engine.testers.logic', 'Tester: Logic'), - ('backend.core.vuln_engine.testers.data_exposure', 'Tester: Data Exposure'), - ('backend.core.vuln_engine.testers.cloud_supply', 'Tester: Cloud/Supply Chain'), - ('backend.core.vuln_engine.testers.base_tester', 'Tester: Base Class'), -] - -# === Agent Core (14) === -agent_modules = [ - ('backend.core.autonomous_agent', 'Autonomous Agent'), - ('backend.core.agent_memory', 'Agent Memory'), - ('backend.core.response_verifier', 'Response Verifier'), - ('backend.core.task_library', 'Task Library'), - ('backend.core.execution_history', 'Execution History'), - ('backend.core.methodology_loader', 'Methodology Loader'), - ('backend.core.ai_pentest_agent', 'AI Pentest Agent'), - ('backend.core.ai_prompt_processor', 'AI Prompt Processor'), - ('backend.core.autonomous_scanner', 'Autonomous Scanner'), - ('backend.core.recon_integration', 'Recon Integration'), - ('backend.core.report_generator', 'Report Generator (Backend)'), - ('backend.core.tool_executor', 'Tool Executor'), - ('backend.core.prompt_engine.parser', 'Prompt Engine Parser'), - ('backend.core.report_engine.generator', 'Report Engine Generator'), -] - -# === Validation Pipeline (6) === -validation_modules = [ - ('backend.core.negative_control', 'Negative Control Engine'), - ('backend.core.proof_of_execution', 'Proof of Execution'), - ('backend.core.confidence_scorer', 'Confidence Scorer'), - ('backend.core.validation_judge', 'Validation Judge'), - ('backend.core.access_control_learner', 'Access Control Learner'), - ('backend.core.adaptive_learner', 'Adaptive Learner'), -] - -# === Agent Autonomy (5) === -autonomy_modules = [ - ('backend.core.request_engine', 'Request Engine'), - ('backend.core.waf_detector', 'WAF Detector'), - ('backend.core.strategy_adapter', 'Strategy Adapter'), - ('backend.core.chain_engine', 'Chain Engine'), - ('backend.core.auth_manager', 'Auth Manager'), -] - -# === AI Reasoning & Intelligence (8) === -intelligence_modules = [ - ('backend.core.token_budget', 'Token Budget'), - ('backend.core.reasoning_engine', 'Reasoning Engine'), - ('backend.core.agent_tasks', 'Agent Tasks'), - ('backend.core.endpoint_classifier', 'Endpoint Classifier'), - ('backend.core.cve_hunter', 'CVE Hunter'), - ('backend.core.deep_recon', 'Deep Recon'), - ('backend.core.banner_analyzer', 'Banner Analyzer'), - ('backend.core.param_analyzer', 'Param Analyzer'), -] - -# === Testing & Exploitation (8) === -testing_modules = [ - ('backend.core.payload_mutator', 'Payload Mutator'), - ('backend.core.xss_context_analyzer', 'XSS Context Analyzer'), - ('backend.core.xss_validator', 'XSS Validator'), - ('backend.core.poc_generator', 'PoC Generator'), - ('backend.core.exploit_generator', 'Exploit Generator'), - ('backend.core.poc_validator', 'PoC Validator'), - ('backend.core.request_repeater', 'Request Repeater'), - ('backend.core.site_analyzer', 'Site Analyzer'), -] - -# === Multi-Agent & Orchestration (9) === -multiagent_modules = [ - ('backend.core.agent_base', 'Specialist Agent Base'), - ('backend.core.specialist_agents', 'Specialist Agents'), - ('backend.core.agent_orchestrator', 'Agent Orchestrator'), - ('backend.core.researcher_agent', 'Researcher AI Agent'), - ('backend.core.vuln_orchestrator', 'Vuln Orchestrator'), - ('backend.core.vuln_type_agent', 'Vuln Type Agent'), - ('backend.core.cli_agent_runner', 'CLI Agent Runner'), - ('backend.core.cli_output_parser', 'CLI Output Parser'), - ('backend.core.cli_instructions_builder', 'CLI Instructions Builder'), -] - -# === RAG System (5) === -rag_modules = [ - ('backend.core.rag.engine', 'RAG Engine'), - ('backend.core.rag.vectorstore', 'RAG VectorStore'), - ('backend.core.rag.few_shot', 'RAG Few-Shot'), - ('backend.core.rag.reasoning_templates', 'RAG Reasoning Templates'), - ('backend.core.rag.reasoning_memory', 'RAG Reasoning Memory'), -] - -# === Smart Router (5) === -router_modules = [ - ('backend.core.smart_router', 'Smart Router Package'), - ('backend.core.smart_router.provider_registry', 'Provider Registry'), - ('backend.core.smart_router.router', 'Router Core'), - ('backend.core.smart_router.token_extractor', 'Token Extractor'), - ('backend.core.smart_router.token_refresher', 'Token Refresher'), -] - -# === Kali Sandbox (3) === -kali_modules = [ - ('core.tool_registry', 'Tool Registry (56 tools)'), - ('core.kali_sandbox', 'Kali Sandbox'), - ('core.container_pool', 'Container Pool'), -] - -# === Operations (3) === -operations_modules = [ - ('backend.core.checkpoint_manager', 'Checkpoint Manager'), - ('backend.core.notification_manager', 'Notification Manager'), - ('backend.core.knowledge_processor', 'Knowledge Processor'), -] - -all_groups = [ - ('Core Platform', core_modules), - ('API Layer', api_modules), - ('VulnEngine', vuln_modules), - ('Agent Core', agent_modules), - ('Validation Pipeline', validation_modules), - ('Agent Autonomy', autonomy_modules), - ('AI Reasoning & Intelligence', intelligence_modules), - ('Testing & Exploitation', testing_modules), - ('Multi-Agent & Orchestration', multiagent_modules), - ('RAG System', rag_modules), - ('Smart Router', router_modules), - ('Kali Sandbox', kali_modules), - ('Operations', operations_modules), -] - -total = 0 -errors = 0 -for group_name, modules in all_groups: - print(f' --- {group_name} ---') - for mod, name in modules: - total += 1 - try: - __import__(mod) - print(f' OK {name}') - except Exception as e: - err_short = str(e).split(chr(10))[0][:80] - print(f' WARN {name}: {err_short}') - errors += 1 - -print(f'\n {total - errors}/{total} modules loaded ({errors} warnings)') -" 2>&1 || true - - # Validate knowledge base - step "Validating knowledge base..." - $PYTHON -c " -import json, os -kb_path = os.path.join('$PROJECT_DIR', 'data', 'vuln_knowledge_base.json') -if os.path.exists(kb_path): - kb = json.load(open(kb_path)) - types = len(kb.get('vulnerability_types', {})) - insights = len(kb.get('xbow_insights', kb.get('attack_insights', {}))) - print(f' OK Knowledge base: {types} vuln types, {insights} insight categories') -else: - print(' WARN Knowledge base not found at data/vuln_knowledge_base.json') -" 2>&1 || true - - # Validate VulnEngine coverage - step "Validating VulnEngine coverage..." - $PYTHON -c " -from backend.core.vuln_engine.registry import VulnerabilityRegistry -from backend.core.vuln_engine.payload_generator import PayloadGenerator -from backend.core.vuln_engine.ai_prompts import VULN_AI_PROMPTS -from backend.core.vuln_engine.pentest_playbook import PENTEST_PLAYBOOK, get_testing_prompts -from backend.core.vuln_engine.system_prompts import CONTEXT_PROMPTS, VULN_TYPE_PROOF_REQUIREMENTS -r = VulnerabilityRegistry() -p = PayloadGenerator() -total_payloads = sum(len(v) for v in p.payload_libraries.values()) -total_prompts = sum(len(get_testing_prompts(v)) for v in PENTEST_PLAYBOOK) -# Count AI prompt builder functions (deep test + stream prompts) -import inspect, backend.core.vuln_engine.ai_prompts as ap -prompt_funcs = [n for n, f in inspect.getmembers(ap, inspect.isfunction) if n.startswith('get_')] -print(f' OK Registry: {len(r.VULNERABILITY_INFO)} types, {len(r.TESTER_CLASSES)} testers') -print(f' OK Payloads: {total_payloads} across {len(p.payload_libraries)} categories') -print(f' OK AI Prompts: {len(VULN_AI_PROMPTS)} per-vuln + {len(prompt_funcs)} builder functions') -print(f' OK Playbook: {len(PENTEST_PLAYBOOK)} vuln types, {total_prompts} testing prompts') -print(f' OK System Prompts: {len(CONTEXT_PROMPTS)} contexts, {len(VULN_TYPE_PROOF_REQUIREMENTS)} proof reqs') -" 2>&1 || true - - # Validate RAG system - step "Validating RAG system..." - $PYTHON -c " -from backend.core.rag.reasoning_templates import REASONING_TEMPLATES -from backend.core.rag.few_shot import FewShotSelector -fs = FewShotSelector() -curated = getattr(fs, '_curated_examples', {}) -total_ex = sum(len(ex) for cat in curated.values() if isinstance(cat, dict) for ex in cat.values() if isinstance(ex, list)) -print(f' OK Reasoning Templates: {len(REASONING_TEMPLATES)} vuln types') -print(f' OK Few-Shot Examples: {len(curated)} categories, {total_ex} curated TP/FP examples') + # Quick validation + $PY -c " +import sys; sys.path.insert(0,'$DIR') +mods = ['backend.main','backend.config','backend.core.autonomous_agent','backend.core.md_agent', + 'backend.core.smart_router.router','backend.core.vuln_engine.registry'] +ok=err=0 +for m in mods: + try: __import__(m); ok+=1 + except: err+=1 +print(f' {ok}/{ok+err} core modules OK') " 2>&1 || true fi -# ============================================================================ -# 4. Frontend dependencies -# ============================================================================ -if [ "$BACKEND_ONLY" = false ]; then - header "Setting up React frontend" - - cd "$FRONTEND_DIR" - - if [ ! -d "node_modules" ] || [ "$FORCE_INSTALL" = true ]; then - step "Installing frontend dependencies..." +# ── 4. Frontend setup ─────────────────────────────────────────────── +if [ "$BACK_ONLY" = false ]; then + header "Frontend" + cd "$FRONT" + if [ ! -d "node_modules" ] || [ "$INSTALL" = true ]; then npm install --silent 2>&1 | tail -3 - step "Frontend dependencies installed" + ok "Dependencies installed" else - step "node_modules present (use --install to force)" + ok "Dependencies cached" fi - - cd "$PROJECT_DIR" + cd "$DIR" fi -# ============================================================================ -# 5. Launch backend -# ============================================================================ -if [ "$FRONTEND_ONLY" = false ]; then - header "Starting FastAPI backend (port 8000)" +# ── 5. Launch backend ─────────────────────────────────────────────── +if [ "$FRONT_ONLY" = false ]; then + header "Starting backend :$PORT" + source "$VENV/bin/activate" + set -a; source "$DIR/.env"; set +a - source "$VENV_DIR/bin/activate" + [ -n "$MODEL" ] && export DEFAULT_LLM_MODEL="$MODEL" && ok "Model: $MODEL" - # Export env vars - set -a - source "$PROJECT_DIR/.env" - set +a + PYTHONPATH="$DIR" uvicorn backend.main:app \ + --host 0.0.0.0 --port $PORT --reload --log-level info \ + > "$LOGS/backend.log" 2>&1 & + echo $! > "$PIDS/backend.pid" + ok "PID: $(cat "$PIDS/backend.pid")" - PYTHONPATH="$PROJECT_DIR" uvicorn backend.main:app \ - --host 0.0.0.0 \ - --port 8000 \ - --reload \ - --log-level info \ - > "$LOGS_DIR/backend.log" 2>&1 & - - BACKEND_PID=$! - echo "$BACKEND_PID" > "$PID_DIR/backend.pid" - step "Backend started (PID: $BACKEND_PID)" - step "Backend logs: $LOGS_DIR/backend.log" - - # Wait for backend to be ready - step "Waiting for backend..." for i in $(seq 1 15); do - if curl -s http://localhost:8000/docs >/dev/null 2>&1; then - step "Backend is ready" - break - fi - if [ $i -eq 15 ]; then - warn "Backend may still be starting. Check logs." - fi + curl -s "http://localhost:$PORT/docs" >/dev/null 2>&1 && break sleep 1 done fi -# ============================================================================ -# 6. Launch frontend -# ============================================================================ -if [ "$BACKEND_ONLY" = false ]; then - header "Starting React frontend (port 3000)" - - cd "$FRONTEND_DIR" - - if [ "$PRODUCTION_BUILD" = true ]; then - step "Building production frontend..." - npm run build 2>&1 | tail -5 - step "Build complete. Serving from dist/" - npx vite preview --port 3000 \ - > "$LOGS_DIR/frontend.log" 2>&1 & +# ── 6. Launch frontend ────────────────────────────────────────────── +if [ "$BACK_ONLY" = false ]; then + header "Starting frontend :$FPORT" + cd "$FRONT" + if [ "$BUILD" = true ]; then + npm run build 2>&1 | tail -3 + npx vite preview --port $FPORT > "$LOGS/frontend.log" 2>&1 & else - step "Starting development server..." - npx vite --port 3000 \ - > "$LOGS_DIR/frontend.log" 2>&1 & + npx vite --port $FPORT > "$LOGS/frontend.log" 2>&1 & fi - - FRONTEND_PID=$! - echo "$FRONTEND_PID" > "$PID_DIR/frontend.pid" - step "Frontend started (PID: $FRONTEND_PID)" - step "Frontend logs: $LOGS_DIR/frontend.log" - - cd "$PROJECT_DIR" - - # Wait for frontend - for i in $(seq 1 10); do - if curl -s http://localhost:3000 >/dev/null 2>&1; then - break - fi - sleep 1 - done + echo $! > "$PIDS/frontend.pid" + ok "PID: $(cat "$PIDS/frontend.pid")" + cd "$DIR" fi -# ============================================================================ -# 7. Summary -# ============================================================================ -header "NeuroSploit v3 is running" - +# ── 7. Summary ────────────────────────────────────────────────────── echo "" -if [ "$FRONTEND_ONLY" = false ]; then - echo -e " ${GREEN}Backend API:${NC} http://localhost:8000" - echo -e " ${GREEN}API Docs:${NC} http://localhost:8000/docs" - echo -e " ${GREEN}Scheduler API:${NC} http://localhost:8000/api/v1/scheduler/" - echo -e " ${GREEN}VulnLab API:${NC} http://localhost:8000/api/v1/vuln-lab/" -fi -if [ "$BACKEND_ONLY" = false ]; then - echo -e " ${GREEN}Frontend UI:${NC} http://localhost:3000" -fi +echo -e "${C}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${N}" +echo -e "${G} NeuroSploit v3 — Agent-First AI Pentest Platform${N}" +echo -e "${C}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${N}" echo "" -echo -e " ${BLUE}Logs:${NC}" -[ "$FRONTEND_ONLY" = false ] && echo -e " Backend: tail -f $LOGS_DIR/backend.log" -[ "$BACKEND_ONLY" = false ] && echo -e " Frontend: tail -f $LOGS_DIR/frontend.log" +[ "$FRONT_ONLY" = false ] && { + echo -e " ${G}API${N} http://localhost:$PORT" + echo -e " ${G}Docs${N} http://localhost:$PORT/docs" + echo -e " ${G}Model${N} ${MODEL:-claude-sonnet-4-6-20250918}" +} +[ "$BACK_ONLY" = false ] && echo -e " ${G}Frontend${N} http://localhost:$FPORT" echo "" -echo -e " ${YELLOW}Stop:${NC} $0 (re-run kills previous)" -echo -e " kill \$(cat $PID_DIR/backend.pid) \$(cat $PID_DIR/frontend.pid)" +echo -e " ${B}Architecture${N}" +echo -e " ├─ 108 AI agents (real HTTP testing, PLAN→EXECUTE→ANALYZE)" +echo -e " ├─ 100 vulnerability types + validation pipeline" +echo -e " ├─ Claude 4.6: Opus, Sonnet 4.6, Sonnet 4.5, Haiku 4.5" +echo -e " ├─ 20 LLM providers (auto-failover)" +echo -e " └─ Agent-first flow: Recon (20%) → Agent Grid (65%) → Report (15%)" echo "" -echo -e "${CYAN}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}" -echo -e "${GREEN} NeuroSploit v3 - Autonomous AI Penetration Testing Platform${NC}" -echo -e "${GREEN} 116 modules | 100 vuln types | 18 API routes | 18 frontend pages${NC}" -echo -e "" -echo -e " ${BLUE}VulnEngine (100-Type):${NC}" -echo -e " - Registry: 100 vuln types, 526+ payloads, 100 testers" -echo -e " - AI Prompts: 100 per-vuln decision prompts + pentest playbook" -echo -e " - System Prompts: 12 anti-hallucination composable prompts" -echo -e " - Methodology: Deep injection from .md methodology files" -echo -e " - Knowledge Base: 100 vuln types + RAG-indexed insights" -echo -e "" -echo -e " ${BLUE}Autonomous Agent (AI-Powered Pentester):${NC}" -echo -e " - Auto Pentest: 3 AI-parallel streams (recon + junior + tools)" -echo -e " - AI Master Plan: Pre-stream strategic planning (target profiling)" -echo -e " - AI Deep Test: Iterative OBSERVE->PLAN->EXECUTE->ANALYZE->ADAPT" -echo -e " - AI Recon Analysis: Endpoint prioritization, hidden surface probing" -echo -e " - AI Payload Gen: Context-aware payloads per endpoint x vuln_type" -echo -e " - AI Tool Analysis: Tool output analysis for real findings vs noise" -echo -e " - Full IA Testing: Methodology-driven comprehensive sessions" -echo -e " - Multi-Session: Up to 5 concurrent scans" -echo -e " - Pause/Resume/Stop: Real-time scan control with fast cancel" -echo -e " - Checkpoint Manager: Crash-resilient scan state save/restore" -echo -e " - Recon Integration: 40+ tools (subfinder, amass, nuclei, ffuf)" -echo -e " - WAF Detection: 16 signatures, 12 bypass techniques" -echo -e " - Strategy Adapter: Dead endpoints, diminishing returns, recompute" -echo -e " - Chain Engine: 10 chain rules, exploit chaining, attack graph" -echo -e " - Auth Manager: Multi-user, login form detection, session mgmt" -echo -e " - Request Engine: Retry, rate limit, circuit breaker, adaptive" -echo -e " - Request Repeater: Burp-like send/compare/replay/validate" -echo -e " - Site Analyzer: BFS crawl, JS sink detection, AI architecture" -echo -e "" -echo -e " ${BLUE}Validation Pipeline (Anti-FP):${NC}" -echo -e " - Negative Controls: Benign/empty/no-param baseline comparison" -echo -e " - Proof of Execution: 25+ per-vuln-type proof methods" -echo -e " - Confidence Scorer: Numeric 0-100 with breakdown" -echo -e " - Validation Judge: Sole authority (controls+proof+AI+score)" -echo -e " - Access Control: Adaptive TP/FP learning, 9 patterns" -echo -e " - Adaptive Learner: Cross-scan TP/FP learning (100 vuln types)" -echo -e "" -echo -e " ${BLUE}AI Reasoning & Intelligence:${NC}" -echo -e " - ReACT Engine: Think/plan/reflect reasoning loop" -echo -e " - Token Budget: Budget tracking with graceful degradation" -echo -e " - Endpoint Classifier: 8 types with risk scoring" -echo -e " - CVE Hunter: NVD API + GitHub exploit search" -echo -e " - Deep Recon: JS crawling, sitemap, robots, API enum" -echo -e " - Banner Analyzer: 80 known CVEs, 19 EOL versions" -echo -e " - Param Analyzer: 8 semantic categories, risk ranking" -echo -e "" -echo -e " ${BLUE}Testing & Exploitation:${NC}" -echo -e " - Payload Mutator: 14 mutation strategies, failure analysis" -echo -e " - XSS Validator: Playwright popup/cookie/DOM/event/CSP" -echo -e " - XSS Context: 8 context checks (attribute, script, etc.)" -echo -e " - Exploit Generator: AI-enhanced PoC, zero-day hypothesis" -echo -e " - PoC Validator: HTTP replay, per-vuln markers, static analysis" -echo -e " - PoC Generator: 20+ per-type exploit code generators" -echo -e "" -echo -e " ${BLUE}Multi-Agent & Orchestration:${NC}" -echo -e " - 5 Specialists: Recon, Exploit, Validator, CVEHunter, Report" -echo -e " - Orchestrator: 3-phase pipeline coordinator with handoffs" -echo -e " - Researcher AI: Hypothesis-driven 0-day discovery with Kali" -echo -e " - Vuln Orchestrator: Per-vuln-type parallel agent orchestration" -echo -e " - Vuln Type Agents: Specialist agents per vulnerability type" -echo -e "" -echo -e " ${BLUE}CLI Agent (AI CLI inside Kali):${NC}" -echo -e " - 3 Providers: Claude Code, Gemini CLI, Codex CLI" -echo -e " - Standalone Mode: CLI Agent runs full pentest autonomously" -echo -e " - Auto Pentest Phase: Optional CLI agent phase in auto pentest" -echo -e " - 3-Tier Parsing: JSON markers + regex + AI extraction" -echo -e " - OAuth Integration: SmartRouter token injection into container" -echo -e "" -echo -e " ${BLUE}RAG System:${NC}" -echo -e " - VectorStore: BM25/TF-IDF/ChromaDB backends" -echo -e " - Few-Shot: Curated TP/FP examples for 15+ vuln types" -echo -e " - Reasoning Templates: Structured CoT for 18 vuln types" -echo -e " - Reasoning Memory: Cross-scan pseudo-fine-tuning" -echo -e "" -echo -e " ${BLUE}Smart Router (20 Providers):${NC}" -echo -e " - 8 CLI OAuth: Claude, Gemini, Copilot, Cursor, etc." -echo -e " - 11 API Providers: Anthropic, OpenAI, Google, OpenRouter, etc." -echo -e " - Tier Failover: Auto round-robin with quota tracking" -echo -e " - Token Refresh: Auto CLI token re-extraction + OAuth refresh" -echo -e "" -echo -e " ${BLUE}Kali Sandbox (Container-Per-Scan):${NC}" -echo -e " - Tool Registry: 56 tools (16 pre-installed + 40 on-demand)" -echo -e " - Container Pool: Max concurrent, TTL, orphan cleanup" -echo -e " - VPN Support: OpenVPN/WireGuard per-container tunnels" -echo -e " - Researcher AI: AI-driven tool selection and execution" -echo -e "" -echo -e " ${BLUE}Platform & Operations:${NC}" -echo -e " - 18 API Routes: Agent, Scans, VulnLab, Terminal, Full IA, etc." -echo -e " - 18 Frontend Pages: Auto Pentest, VulnLab, Terminal, Dashboard, etc." -echo -e " - Terminal Agent: AI chat + Kali sandbox + VPN integration" -echo -e " - Vuln Lab: 100 types, PortSwigger/CTF/custom targets" -echo -e " - Knowledge Manager: Upload/index custom security documents" -echo -e " - Notifications: Discord, Telegram, WhatsApp/Twilio alerts" -echo -e " - Scheduler: Cron & interval scheduling" -echo -e " - Benchmark: 104 CTF challenges for accuracy testing" -echo -e " - AI Reports: Dual HTML+JSON with per-finding AI analysis" -echo -e " - MCP Server: 12 tools (screenshot, dns, port scan, etc.)" -echo -e " - Reset DB: ./rebuild.sh --reset-db (schema changes)" -echo -e "${CYAN}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}" +echo -e " ${B}Auto Pentest Flow${N}" +echo -e " 0-20% Recon: endpoints, tech stack, WAF, params, CVEs" +echo -e " 20-85% Agent Grid: 108 agents execute real HTTP tests" +echo -e " 85-100% Finalization: chains, screenshots, AI report" +echo "" +echo -e " ${Y}Logs${N} tail -f $LOGS/backend.log" +echo -e " ${Y}Stop${N} kill \$(cat $PIDS/backend.pid $PIDS/frontend.pid 2>/dev/null)" +echo -e "${C}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${N}" echo "" -# Keep script running so bg processes stay alive wait