diff --git a/backend/api/v1/agent.py b/backend/api/v1/agent.py index 6b943de..0ba5409 100755 --- a/backend/api/v1/agent.py +++ b/backend/api/v1/agent.py @@ -105,6 +105,7 @@ class AgentMode(str, Enum): ANALYZE_ONLY = "analyze_only" # Analysis without testing AUTO_PENTEST = "auto_pentest" # One-click full auto pentest CLI_AGENT = "cli_agent" # AI CLI tool inside Kali sandbox + FULL_LLM_PENTEST = "full_llm_pentest" # LLM drives the entire pentest cycle class AgentRequest(BaseModel): @@ -251,6 +252,7 @@ async def run_agent(request: AgentRequest, background_tasks: BackgroundTasks): "analyze_only": "Analysis only, no active testing", "auto_pentest": "One-click auto pentest: Full recon + 100 vuln types + AI report", "cli_agent": "CLI Agent: AI CLI tool (Claude/Gemini/Codex) inside Kali sandbox", + "full_llm_pentest": "Full LLM Pentest: AI drives the entire pentest cycle autonomously", } return AgentResponse( @@ -379,6 +381,7 @@ async def _run_agent_task( AgentMode.ANALYZE_ONLY: OperationMode.ANALYZE_ONLY, AgentMode.AUTO_PENTEST: OperationMode.AUTO_PENTEST, AgentMode.CLI_AGENT: OperationMode.CLI_AGENT, + AgentMode.FULL_LLM_PENTEST: OperationMode.FULL_LLM_PENTEST, } op_mode = mode_map.get(mode, OperationMode.FULL_AUTO) diff --git a/backend/core/autonomous_agent.py b/backend/core/autonomous_agent.py index 2f0a698..fd156b0 100755 --- a/backend/core/autonomous_agent.py +++ b/backend/core/autonomous_agent.py @@ -255,6 +255,7 @@ class OperationMode(Enum): ANALYZE_ONLY = "analyze_only" AUTO_PENTEST = "auto_pentest" CLI_AGENT = "cli_agent" + FULL_LLM_PENTEST = "full_llm_pentest" class FindingSeverity(Enum): @@ -4005,6 +4006,8 @@ NOT_VULNERABLE: """ return await self._run_auto_pentest() elif self.mode == OperationMode.CLI_AGENT: return await self._run_cli_agent_mode() + elif self.mode == OperationMode.FULL_LLM_PENTEST: + return await self._run_full_llm_pentest() else: return await self._run_full_auto() except Exception as e: @@ -5008,6 +5011,457 @@ NOT_VULNERABLE: """ await self._update_progress(100, "CLI Agent pentest complete") return report + # ═══════════════════════════════════════════════════════════════════════════ + # FULL LLM PENTEST MODE — AI drives the entire pentest cycle + # ═══════════════════════════════════════════════════════════════════════════ + + async def _run_full_llm_pentest(self) -> Dict[str, Any]: + """Full LLM Pentest: the AI drives every step of the pentest. + + The LLM acts as a senior penetration tester. It plans HTTP requests, + the system executes them, and the LLM analyzes real responses to + identify vulnerabilities. Pure AI-driven, no hardcoded payloads. + + Loop: LLM plans → System executes HTTP → LLM analyzes → repeat + """ + await self._update_progress(0, "Full LLM Pentest starting") + await self.log("info", "=" * 60) + await self.log("info", " FULL LLM PENTEST MODE") + await self.log("info", " AI drives the entire pentest cycle") + await self.log("info", "=" * 60) + + if not self.llm.is_available(): + await self.log("error", "LLM not available! This mode requires an active LLM provider.") + await self.log("error", "Configure ANTHROPIC_API_KEY, OPENAI_API_KEY, or another provider.") + return self._generate_error_report("LLM not available for Full LLM Pentest mode") + + # Import prompts + from backend.core.vuln_engine.ai_prompts import ( + get_full_llm_pentest_system_prompt, + get_full_llm_pentest_round_prompt, + get_full_llm_pentest_report_prompt, + ) + + # Load methodology prompt + methodology = self.custom_prompt or "" + if not methodology: + try: + prompt_path = Path("/opt/Prompts-PenTest/pentestcompleto_en.md") + if not prompt_path.exists(): + prompt_path = Path("/opt/Prompts-PenTest/pentestcompleto.md") + if prompt_path.exists(): + methodology = prompt_path.read_text(encoding="utf-8") + except Exception: + pass + + # Build system prompt + system_prompt = get_full_llm_pentest_system_prompt(methodology) + await self.log("info", f" System prompt: {len(system_prompt)} chars") + await self.log("info", f" Methodology: {'loaded' if methodology else 'none'} ({len(methodology)} chars)") + + # State tracking + MAX_ROUNDS = 30 + MAX_ACTIONS_PER_ROUND = 10 + total_requests = 0 + discovered_info_parts: List[str] = [] + all_round_results: List[str] = [] # accumulates round-by-round results + llm_findings: List[Dict] = [] + + await self._update_progress(2, "Full LLM Pentest: Round 1") + + for round_num in range(1, MAX_ROUNDS + 1): + if self.is_cancelled(): + await self.log("warning", "[LLM PENTEST] Cancelled by user") + break + + # Calculate progress: rounds map to 0-85% + progress = min(85, int((round_num / MAX_ROUNDS) * 85)) + phase_label = ( + "Recon" if round_num <= 8 else + "Testing" if round_num <= 25 else + "Post-Exploitation" if round_num <= 28 else + "Reporting" + ) + await self._update_progress(progress, f"Full LLM Pentest: {phase_label} (Round {round_num}/{MAX_ROUNDS})") + + # Build round prompt with accumulated context + # Keep only recent results to manage token budget (last 5 rounds) + recent_results = "\n\n".join(all_round_results[-5:]) if all_round_results else "" + discovered_summary = "\n".join(discovered_info_parts[-30:]) if discovered_info_parts else "" + + round_prompt = get_full_llm_pentest_round_prompt( + target=self.target, + round_num=round_num, + max_rounds=MAX_ROUNDS, + previous_results=recent_results, + discovered_info=discovered_summary, + findings_so_far=len(self.findings), + ) + + # Call LLM + await self.log("info", f"[LLM PENTEST] Round {round_num}: Asking AI to plan ({phase_label})") + try: + llm_response = await self.llm.generate( + prompt=round_prompt, + system=system_prompt, + max_tokens=8192, + ) + except Exception as e: + await self.log("error", f"[LLM PENTEST] LLM call failed: {e}") + # Try to continue with next round + all_round_results.append(f"Round {round_num}: LLM call failed — {str(e)[:100]}") + continue + + # Parse LLM response as JSON + parsed = self._parse_llm_json(llm_response) + if not parsed: + await self.log("warning", f"[LLM PENTEST] Round {round_num}: Failed to parse LLM JSON response") + all_round_results.append(f"Round {round_num}: LLM returned invalid JSON") + continue + + reasoning = parsed.get("reasoning", "") + actions = parsed.get("actions", []) + findings = parsed.get("findings", []) + phase = parsed.get("phase", "unknown") + done = parsed.get("done", False) + summary = parsed.get("summary", "") + + if reasoning: + await self.log("info", f"[LLM PENTEST] AI reasoning: {reasoning[:200]}") + + # Execute HTTP actions + round_result_parts = [f"=== Round {round_num} ({phase}) ==="] + if reasoning: + round_result_parts.append(f"Reasoning: {reasoning}") + + actions_to_exec = actions[:MAX_ACTIONS_PER_ROUND] + await self.log("info", f"[LLM PENTEST] Executing {len(actions_to_exec)} HTTP requests") + + for i, action in enumerate(actions_to_exec): + if self.is_cancelled(): + break + + result = await self._execute_llm_action(action, i + 1) + total_requests += 1 + + if result: + # Add to round results for LLM context + result_summary = self._summarize_response(action, result) + round_result_parts.append(result_summary) + + # Track discovered info + purpose = action.get("purpose", "") + url = action.get("url", "") + status = result.get("status", 0) + if status == 200: + discovered_info_parts.append( + f"- {action.get('method', 'GET')} {url} → {status} " + f"({len(result.get('body', ''))} bytes) — {purpose}" + ) + elif status in (301, 302, 303, 307, 308): + location = result.get("headers", {}).get("Location", result.get("headers", {}).get("location", "")) + discovered_info_parts.append(f"- {url} → redirect to {location}") + elif status == 404: + discovered_info_parts.append(f"- {url} → 404 (not found)") + elif status == 403: + discovered_info_parts.append(f"- {url} → 403 (forbidden)") + else: + discovered_info_parts.append(f"- {url} → {status}") + else: + round_result_parts.append( + f"Request {i+1}: {action.get('method', 'GET')} {action.get('url', '?')} → FAILED (connection error/timeout)" + ) + + all_round_results.append("\n".join(round_result_parts)) + + # Process findings from this round + for finding_data in findings: + await self._process_llm_pentest_finding(finding_data, round_num) + + # Check if LLM says we're done + if done: + await self.log("success", f"[LLM PENTEST] AI completed pentest after {round_num} rounds") + if summary: + await self.log("info", f"[LLM PENTEST] Summary: {summary[:300]}") + break + + await self.log("info", f"[LLM PENTEST] Round {round_num} complete: " + f"{len(actions_to_exec)} requests, {len(findings)} findings, " + f"total: {total_requests} requests, {len(self.findings)} confirmed findings") + + # ── FINALIZATION ── + await self._update_progress(88, "Full LLM Pentest: Generating report") + await self.log("info", f"[LLM PENTEST] Testing complete: {total_requests} total requests, " + f"{len(self.findings)} confirmed findings") + + # Generate AI-enhanced report + report = await self._generate_full_report() + + # Also try to get an AI narrative report + if self.llm.is_available() and self.findings: + try: + findings_json = json.dumps([ + { + "title": f.title, + "severity": f.severity, + "vulnerability_type": f.vulnerability_type, + "affected_endpoint": f.affected_endpoint, + "parameter": f.parameter, + "payload": f.payload, + "evidence": f.evidence[:500] if f.evidence else "", + "description": f.description, + "impact": f.impact, + "cvss_score": f.cvss_score, + "cwe_id": f.cwe_id, + "poc_code": f.poc_code, + "remediation": f.remediation, + "confidence_score": f.confidence_score, + } + for f in self.findings + ], indent=2) + + report_prompt = get_full_llm_pentest_report_prompt( + target=self.target, + findings_json=findings_json, + total_rounds=min(round_num, MAX_ROUNDS), + total_requests=total_requests, + ) + ai_report_text = await self.llm.generate( + prompt=report_prompt, + system="You are a professional penetration testing report writer.", + max_tokens=16384, + ) + if ai_report_text: + report["ai_narrative_report"] = ai_report_text + await self.log("success", "[LLM PENTEST] AI narrative report generated") + except Exception as e: + await self.log("debug", f"[LLM PENTEST] Report generation error: {e}") + + await self._update_progress(100, "Full LLM Pentest complete") + await self.log("info", "=" * 60) + await self.log("info", f" FULL LLM PENTEST COMPLETE: {len(self.findings)} findings") + await self.log("info", f" Total HTTP requests: {total_requests}") + await self.log("info", "=" * 60) + return report + + async def _execute_llm_action(self, action: Dict, action_num: int) -> Optional[Dict]: + """Execute a single HTTP action planned by the LLM. + + The action dict has: method, url, headers, body, content_type, purpose + Returns the response dict or None on failure. + """ + method = (action.get("method") or "GET").upper() + url = action.get("url", "") + custom_headers = action.get("headers") or {} + body = action.get("body") + content_type = action.get("content_type", "") + purpose = action.get("purpose", "") + + if not url: + return None + + # Ensure URL is absolute + if not url.startswith("http"): + url = urljoin(self.target, url) + + # Build request headers + headers = dict(self.auth_headers) if self.auth_headers else {} + headers.update(custom_headers) + if content_type and "Content-Type" not in headers and "content-type" not in headers: + headers["Content-Type"] = content_type + + # Log the request + await self.log("info", f"[LLM PENTEST] → {method} {url[:120]} ({purpose[:60]})") + + try: + timeout = aiohttp.ClientTimeout(total=15) + + if self.request_engine: + # Use request engine for retry/rate limiting + data = None + params = None + if method == "GET": + # Parse params from URL + pass # URL already has params + else: + if body: + if content_type and "json" in content_type: + try: + data = json.loads(body) if isinstance(body, str) else body + except (json.JSONDecodeError, TypeError): + data = body + else: + data = body + else: + data = None + + result = await self.request_engine.request( + url, method=method, + headers=headers if headers else None, + data=data, + allow_redirects=True, + ) + if result: + resp_dict = { + "status": result.status, + "body": result.body[:50000] if result.body else "", + "headers": result.headers, + "url": result.url, + } + status_str = f"{result.status}" + body_len = len(result.body) if result.body else 0 + await self.log("info", f"[LLM PENTEST] ← {status_str} ({body_len} bytes)") + return resp_dict + else: + # Direct session fallback + req_kwargs: Dict[str, Any] = { + "allow_redirects": True, + "timeout": timeout, + "headers": headers, + } + if method != "GET" and body: + if content_type and "json" in content_type: + try: + req_kwargs["json"] = json.loads(body) if isinstance(body, str) else body + except (json.JSONDecodeError, TypeError): + req_kwargs["data"] = body + else: + req_kwargs["data"] = body + + async with self.session.request(method, url, **req_kwargs) as resp: + resp_body = await resp.text() + resp_dict = { + "status": resp.status, + "body": resp_body[:50000], + "headers": dict(resp.headers), + "url": str(resp.url), + } + await self.log("info", f"[LLM PENTEST] ← {resp.status} ({len(resp_body)} bytes)") + return resp_dict + + except asyncio.TimeoutError: + await self.log("debug", f"[LLM PENTEST] Timeout: {url[:80]}") + except Exception as e: + await self.log("debug", f"[LLM PENTEST] Request error: {str(e)[:80]}") + return None + + def _summarize_response(self, action: Dict, result: Dict) -> str: + """Create a compact summary of an HTTP response for the LLM context.""" + method = action.get("method", "GET") + url = action.get("url", "?") + purpose = action.get("purpose", "") + status = result.get("status", 0) + headers = result.get("headers", {}) + body = result.get("body", "") + + # Extract key headers + key_headers = {} + for h in ["Server", "server", "Content-Type", "content-type", + "X-Powered-By", "x-powered-by", "Set-Cookie", "set-cookie", + "Location", "location", "X-Frame-Options", "x-frame-options", + "Content-Security-Policy", "content-security-policy", + "WWW-Authenticate", "www-authenticate"]: + val = headers.get(h) + if val: + key_headers[h] = val[:200] + + # Truncate body for context (keep meaningful content) + body_preview = body[:3000] if body else "" + + lines = [ + f"Request: {method} {url}", + f"Purpose: {purpose}", + f"Status: {status}", + f"Headers: {json.dumps(key_headers, default=str)}", + f"Body ({len(body)} bytes):", + body_preview, + ] + return "\n".join(lines) + + def _parse_llm_json(self, text: str) -> Optional[Dict]: + """Parse JSON from LLM response, handling markdown code blocks.""" + if not text: + return None + + # Try direct parse + text_stripped = text.strip() + try: + return json.loads(text_stripped) + except (json.JSONDecodeError, ValueError): + pass + + # Try extracting from markdown code block + import re + patterns = [ + r'```json\s*\n(.*?)\n\s*```', + r'```\s*\n(.*?)\n\s*```', + r'\{[\s\S]*\}', + ] + for pattern in patterns[:2]: + match = re.search(pattern, text, re.DOTALL) + if match: + try: + return json.loads(match.group(1)) + except (json.JSONDecodeError, ValueError): + continue + + # Try finding the outermost JSON object + # Find first { and last } + first_brace = text.find('{') + last_brace = text.rfind('}') + if first_brace >= 0 and last_brace > first_brace: + try: + return json.loads(text[first_brace:last_brace + 1]) + except (json.JSONDecodeError, ValueError): + pass + + return None + + async def _process_llm_pentest_finding(self, finding_data: Dict, round_num: int): + """Process a finding reported by the LLM in Full LLM Pentest mode. + + Creates a Finding object and routes it through the validation pipeline. + """ + title = finding_data.get("title", "LLM Finding") + severity = finding_data.get("severity", "medium").lower() + if severity not in ("critical", "high", "medium", "low", "info"): + severity = "medium" + + vuln_type = finding_data.get("vulnerability_type", "unknown") + evidence = finding_data.get("evidence", "") + + # Skip findings without evidence (anti-hallucination) + if not evidence or len(evidence) < 10: + await self.log("debug", f"[LLM PENTEST] Skipping finding without evidence: {title}") + return + + finding = Finding( + id=hashlib.md5( + f"{title}|{finding_data.get('affected_endpoint', '')}|{finding_data.get('payload', '')}|{round_num}".encode() + ).hexdigest()[:12], + title=title, + severity=severity, + vulnerability_type=vuln_type, + cvss_score=finding_data.get("cvss_score", 0.0), + cwe_id=finding_data.get("cwe_id", ""), + description=finding_data.get("description", ""), + affected_endpoint=finding_data.get("affected_endpoint", self.target), + parameter=finding_data.get("parameter", ""), + payload=finding_data.get("payload", ""), + evidence=evidence, + impact=finding_data.get("impact", ""), + poc_code=finding_data.get("poc_code", ""), + remediation=finding_data.get("remediation", ""), + ai_verified=True, + confidence_score=70, # Initial score, ValidationJudge will refine + ai_status="confirmed", + ) + + # Route through validation pipeline (_judge_finding handles + # negative controls, proof of execution, confidence scoring) + await self._add_finding(finding) + await self.log("success", f"[LLM PENTEST] Finding: {severity.upper()} — {title}") + # ── Pre-Stream AI Master Plan ── async def _ai_master_plan(self) -> Dict: diff --git a/backend/core/vuln_engine/ai_prompts.py b/backend/core/vuln_engine/ai_prompts.py index 978c0f0..c93e99a 100755 --- a/backend/core/vuln_engine/ai_prompts.py +++ b/backend/core/vuln_engine/ai_prompts.py @@ -2097,3 +2097,222 @@ RULES: - Prioritize by LIKELIHOOD of exploitation, not theoretical severity. - Consider technology-specific vulnerabilities (e.g., Spring → actuator, WordPress → wp-admin). - Flag parameters like 'url', 'file', 'path', 'redirect', 'callback', 'template' as high-risk.""" + + +# ═══════════════════════════════════════════════════════════════════════════════ +# FULL LLM PENTEST PROMPTS — AI drives the entire pentest cycle +# ═══════════════════════════════════════════════════════════════════════════════ + +def get_full_llm_pentest_system_prompt(methodology: str = "") -> str: + """System prompt for the Full LLM Pentest mode. + + The LLM acts as a senior penetration tester and plans HTTP requests + for the system to execute. The LLM sees real responses and adapts. + """ + methodology_section = "" + if methodology: + # Truncate very large methodology to fit context + max_len = 60000 + if len(methodology) > max_len: + methodology = methodology[:max_len] + "\n\n[... methodology truncated for context ...]" + methodology_section = f""" +## PENTEST METHODOLOGY + +Follow this methodology throughout your testing: + +{methodology} +""" + + return f"""You are a Senior Offensive Security Consultant performing an authorized penetration test in a controlled lab environment. + +ALL ACTIVITIES ARE AUTHORIZED. The target is a deliberately vulnerable lab environment. + +## YOUR ROLE + +You are the testing engine. You think like a human pentester using Burp Suite / curl. +You PLAN what HTTP requests to make, and the system EXECUTES them for you. +You then ANALYZE the real responses and ADAPT your strategy. + +## HOW THIS WORKS + +Each round you output a JSON object with: +1. **reasoning**: What you observed, what you learned, what to try next +2. **actions**: HTTP requests you want the system to execute (max 10 per round) +3. **findings**: Vulnerabilities you confirmed based on REAL response evidence +4. **phase**: Current phase (recon, testing, post_exploitation, reporting) +5. **done**: true when you've completed the full pentest cycle + +The system executes your HTTP requests and returns the actual responses. +You then analyze those responses and plan your next actions. + +## PHASES + +### Phase 1: RECON (rounds 1-8) +- Fingerprint technologies (server headers, cookies, response patterns) +- Discover endpoints (crawl links, check robots.txt, sitemap.xml) +- Map input vectors (forms, parameters, headers, cookies) +- Identify authentication mechanisms +- Check for common files (.env, .git, admin panels) + +### Phase 2: TESTING (rounds 9-25) +Test each discovered endpoint for: +- SQL Injection (error-based, boolean-based, time-based, UNION-based) +- Cross-Site Scripting (reflected, stored, DOM-based) +- Local/Remote File Inclusion (LFI/RFI) +- Command Injection (OS command injection via various delimiters) +- Authentication bypass +- SSRF, CSRF, IDOR, XXE +- Security misconfigurations +- Sensitive data exposure +- Directory traversal + +### Phase 3: POST-EXPLOITATION (rounds 26-28) +- Extract data from confirmed vulnerabilities +- Chain vulnerabilities for maximum impact +- Test privilege escalation paths +- Verify data exposure scope + +### Phase 4: REPORTING (round 29-30) +- Compile all findings with evidence +- Set done=true + +{methodology_section} + +## CRITICAL RULES + +1. **REAL EVIDENCE ONLY**: Never claim a vulnerability without evidence from an actual response. + - SQLi: Show the SQL error message or extracted data from the response body + - XSS: Show the reflected payload in the response body unescaped + - LFI: Show file contents (e.g., /etc/passwd content) in the response + - Command Injection: Show command output in the response + +2. **NO HALLUCINATION**: If a test fails (payload is filtered, no error), say so honestly. + Do NOT fabricate evidence. The system will verify your claims. + +3. **ADAPT**: If WAF blocks payloads, try encoding, case variation, alternative syntax. + If an endpoint 404s, move to the next one. Don't repeat failed tests. + +4. **BE SPECIFIC**: Include exact URLs, parameters, payloads, and expected vs actual behavior. + +5. **PROGRESS**: Don't repeat the same tests. Track what you've already tested. + +## OUTPUT FORMAT (strict JSON) + +```json +{{ + "phase": "recon|testing|post_exploitation|reporting", + "reasoning": "Detailed explanation of what you observed and why you're taking these actions", + "actions": [ + {{ + "method": "GET|POST|PUT|DELETE|OPTIONS|HEAD|PATCH", + "url": "https://target.com/path?param=value", + "headers": {{"Header-Name": "value"}}, + "body": "form or raw body data (for POST/PUT)", + "content_type": "application/x-www-form-urlencoded|application/json|multipart/form-data", + "purpose": "What this request tests" + }} + ], + "findings": [ + {{ + "title": "SQL Injection in /login username parameter", + "severity": "critical|high|medium|low|info", + "vulnerability_type": "sql_injection|xss_reflected|xss_stored|lfi|rfi|command_injection|ssrf|csrf|idor|xxe|auth_bypass|open_redirect|directory_listing|info_disclosure|security_misconfiguration", + "affected_endpoint": "/login", + "parameter": "username", + "payload": "' OR 1=1--", + "evidence": "Response contained: You have an error in your SQL syntax...", + "description": "The username parameter is vulnerable to SQL injection...", + "impact": "An attacker could bypass authentication and extract all database contents", + "cvss_score": 9.8, + "cwe_id": "CWE-89", + "poc_code": "curl -X POST 'https://target/login' -d 'username=%27+OR+1%3D1--&password=test'", + "remediation": "Use parameterized queries / prepared statements" + }} + ], + "done": false, + "summary": "Only set when done=true. Full executive summary of the pentest." +}} +``` + +IMPORTANT: Output ONLY valid JSON. No markdown, no text before or after the JSON object.""" + + +def get_full_llm_pentest_round_prompt( + target: str, + round_num: int, + max_rounds: int, + previous_results: str, + discovered_info: str, + findings_so_far: int, +) -> str: + """Build the round prompt for each iteration of the Full LLM Pentest loop.""" + + phase_hint = "" + if round_num <= 8: + phase_hint = "You should be in the RECON phase. Focus on discovering endpoints, technologies, and input vectors." + elif round_num <= 25: + phase_hint = "You should be in the TESTING phase. Test discovered endpoints for vulnerabilities." + elif round_num <= 28: + phase_hint = "You should be in the POST-EXPLOITATION phase. Chain vulnerabilities and extract data." + else: + phase_hint = "You should be in the REPORTING phase. Compile final findings and set done=true." + + return f"""## ROUND {round_num}/{max_rounds} + +Target: {target} +Findings so far: {findings_so_far} +{phase_hint} + +{"WARNING: This is your LAST round. Set done=true and include your final summary." if round_num >= max_rounds else ""} + +## WHAT YOU KNOW SO FAR + +{discovered_info if discovered_info else "Nothing discovered yet. Start with basic recon."} + +## PREVIOUS ROUND RESULTS + +{previous_results if previous_results else "This is the first round. No previous results."} + +Plan your next actions. Remember: +- Max 10 HTTP requests per round +- Be strategic — don't waste requests on unlikely paths +- Build on what you've learned from previous responses +- Report findings as soon as you have REAL evidence + +Output your response as a single JSON object.""" + + +def get_full_llm_pentest_report_prompt( + target: str, + findings_json: str, + total_rounds: int, + total_requests: int, +) -> str: + """Prompt for the LLM to generate the final pentest report.""" + return f"""Generate a professional penetration test report for the following engagement. + +## Engagement Details +- Target: {target} +- Testing Rounds: {total_rounds} +- Total HTTP Requests: {total_requests} +- Methodology: AI-Driven Full Pentest (LLM as Testing Engine) + +## Confirmed Findings + +{findings_json} + +## Report Structure + +Generate a comprehensive report with: + +1. **Executive Summary** — Business impact (non-technical language), overall risk rating, key findings +2. **Scope and Methodology** — What was tested, approach taken, standards followed (OWASP, PTES) +3. **Detailed Findings** — For each vulnerability: title, severity, description, evidence, impact, remediation, OWASP/CWE references +4. **Risk Prioritization Table** — All findings sorted by severity with CVSS scores +5. **Remediation Roadmap** — Short-term fixes, medium-term improvements, long-term recommendations +6. **Conclusion** + +Write in professional English suitable for C-level stakeholders and technical teams. +Be precise, structured, and security-focused. + +Output the report as a markdown document.""" diff --git a/frontend/src/pages/FullIATestingPage.tsx b/frontend/src/pages/FullIATestingPage.tsx index 88c5ea3..cb21689 100644 --- a/frontend/src/pages/FullIATestingPage.tsx +++ b/frontend/src/pages/FullIATestingPage.tsx @@ -4,7 +4,7 @@ import { Crosshair, Shield, ChevronDown, ChevronUp, Loader2, AlertTriangle, CheckCircle2, Globe, Lock, Bug, FileText, ScrollText, X, ExternalLink, Download, Sparkles, - Brain, Wrench, Layers, Trash2, Clock, Search, + Brain, Trash2, Clock, Search, Activity, Terminal } from 'lucide-react' import { PieChart, Pie, Cell, Tooltip as RechartsTooltip, ResponsiveContainer } from 'recharts' @@ -14,22 +14,12 @@ import type { AgentStatus, AgentFinding, AgentLog, ToolExecution, ContainerStatu // ─── Constants ──────────────────────────────────────────────────────────────── const PHASES = [ - { key: 'parallel', label: 'Parallel Streams', icon: Layers, range: [0, 50] as const }, - { key: 'deep', label: 'Deep Analysis', icon: Brain, range: [50, 75] as const }, - { key: 'final', label: 'Finalization', icon: Shield, range: [75, 100] as const }, + { key: 'recon', label: 'AI Recon', icon: Globe, range: [0, 25] as const }, + { key: 'testing', label: 'AI Testing', icon: Bug, range: [25, 70] as const }, + { key: 'postexploit', label: 'Post-Exploitation', icon: Brain, range: [70, 85] as const }, + { key: 'report', label: 'Report', icon: Shield, range: [85, 100] as const }, ] -const STREAMS = [ - { key: 'recon', label: 'Recon', icon: Globe, color: 'blue', activeUntil: 25 }, - { key: 'junior', label: 'Junior AI', icon: Brain, color: 'purple', activeUntil: 35 }, - { key: 'tools', label: 'Tools', icon: Wrench, color: 'orange', activeUntil: 50 }, -] as const - -const STREAM_COLORS: Record = { - blue: { bg: 'bg-blue-500/20', text: 'text-blue-400', border: 'border-blue-500/40', pulse: 'bg-blue-400' }, - purple: { bg: 'bg-purple-500/20', text: 'text-purple-400', border: 'border-purple-500/40', pulse: 'bg-purple-400' }, - orange: { bg: 'bg-orange-500/20', text: 'text-orange-400', border: 'border-orange-500/40', pulse: 'bg-orange-400' }, -} const SEVERITY_COLORS: Record = { critical: 'bg-red-500', high: 'bg-orange-500', medium: 'bg-yellow-500', @@ -53,11 +43,8 @@ const CONFIDENCE_STYLES: Record = { const LOG_FILTERS = [ { key: 'all', label: 'All', color: '' }, - { key: 'stream1', label: 'Recon', color: 'text-blue-400' }, - { key: 'stream2', label: 'Junior', color: 'text-purple-400' }, - { key: 'stream3', label: 'Tools', color: 'text-orange-400' }, - { key: 'deep', label: 'Deep', color: 'text-cyan-400' }, - { key: 'container', label: 'Container', color: 'text-cyan-300' }, + { key: 'llm', label: 'LLM Pentest', color: 'text-red-400' }, + { key: 'ai', label: 'AI Decisions', color: 'text-purple-400' }, { key: 'error', label: 'Errors', color: 'text-red-400' }, ] @@ -70,9 +57,10 @@ const MAX_TOASTS = 5 // ─── Utility Functions ──────────────────────────────────────────────────────── function phaseFromProgress(progress: number): number { - if (progress < 50) return 0 - if (progress < 75) return 1 - return 2 + if (progress < 25) return 0 + if (progress < 70) return 1 + if (progress < 85) return 2 + return 3 } function formatElapsed(totalSeconds: number): string { @@ -83,6 +71,7 @@ function formatElapsed(totalSeconds: number): string { } function logMessageColor(message: string): string { + if (message.startsWith('[LLM PENTEST]')) return 'text-red-400' if (message.startsWith('[STREAM 1]')) return 'text-blue-400' if (message.startsWith('[STREAM 2]')) return 'text-purple-400' if (message.startsWith('[STREAM 3]')) return 'text-orange-400' @@ -101,11 +90,8 @@ function logMessageColor(message: string): string { function matchLogFilter(log: AgentLog, filter: string): boolean { if (filter === 'all') return true - if (filter === 'stream1') return log.message.startsWith('[STREAM 1]') - if (filter === 'stream2') return log.message.startsWith('[STREAM 2]') - if (filter === 'stream3') return log.message.startsWith('[STREAM 3]') - if (filter === 'deep') return log.message.startsWith('[DEEP]') - if (filter === 'container') return log.message.startsWith('[CONTAINER]') + if (filter === 'llm') return log.message.startsWith('[LLM PENTEST]') + if (filter === 'ai') return log.source === 'llm' || log.message.includes('[AI]') || log.message.includes('[LLM]') if (filter === 'error') return log.level === 'error' || log.level === 'warning' return true } @@ -139,33 +125,6 @@ interface Toast { // ─── Sub-Components ─────────────────────────────────────────────────────────── -function StreamBadge({ stream, progress, isRunning }: { - stream: typeof STREAMS[number]; progress: number; isRunning: boolean -}) { - const active = isRunning && progress < stream.activeUntil - const done = progress >= stream.activeUntil - const colors = STREAM_COLORS[stream.color] - const Icon = stream.icon - - return ( -
- {active && ( - - - - - )} - {done && } - {!active && !done && } - {stream.label} -
- ) -} - function LiveStatsDashboard({ status, elapsedSeconds, toolExecutions }: { status: AgentStatus; elapsedSeconds: number; toolExecutions: ToolExecution[] }) { @@ -487,13 +446,20 @@ export default function FullIATestingPage() { // ─── Elapsed Time Ticker ────────────────────────────────────────────────── useEffect(() => { - if (!isRunning || !status?.started_at) return + if (!status?.started_at) return const startTime = new Date(status.started_at).getTime() - const tick = () => setElapsedSeconds(Math.floor((Date.now() - startTime) / 1000)) - tick() - const id = setInterval(tick, 1000) - return () => clearInterval(id) - }, [isRunning, status?.started_at]) + if (isRunning) { + const tick = () => setElapsedSeconds(Math.floor((Date.now() - startTime) / 1000)) + tick() + const id = setInterval(tick, 1000) + return () => clearInterval(id) + } else { + const endTime = status.completed_at + ? new Date(status.completed_at).getTime() + : Date.now() + setElapsedSeconds(Math.max(0, Math.floor((endTime - startTime) / 1000))) + } + }, [isRunning, status?.started_at, status?.completed_at]) // ─── Polling ────────────────────────────────────────────────────────────── @@ -593,8 +559,9 @@ export default function FullIATestingPage() { try { const resp = await agentApi.autoPentest(primaryTarget, { + mode: 'full_llm_pentest', prompt: promptContent, - enable_kali_sandbox: true, + enable_kali_sandbox: false, auth_type: authType || undefined, auth_value: authValue || undefined, preferred_provider: selectedProvider || undefined, @@ -603,7 +570,7 @@ export default function FullIATestingPage() { setAgentId(resp.agent_id) setIsRunning(true) - addToast('FULL AI pentest started', 'info') + addToast('Full LLM Pentest started', 'info') localStorage.setItem(SESSION_KEY, JSON.stringify({ agentId: resp.agent_id, target: primaryTarget, @@ -705,9 +672,9 @@ export default function FullIATestingPage() {
-

FULL AI TESTING

+

FULL LLM PENTEST

- Complete AI-driven penetration test. Recon, exploitation, post-exploitation with Kali sandbox. + The LLM drives the entire pentest cycle. AI plans HTTP requests, system executes, AI analyzes and adapts.

{promptContent && ( )} @@ -885,9 +852,9 @@ export default function FullIATestingPage() { status?.status === 'error' ? 'bg-red-500' : 'bg-gray-500' }`} />

- {isRunning ? 'FULL AI Pentest Running' : - status?.status === 'completed' ? 'Pentest Complete' : - status?.status === 'error' ? 'Pentest Failed' : 'Pentest Stopped'} + {isRunning ? 'Full LLM Pentest Running' : + status?.status === 'completed' ? 'LLM Pentest Complete' : + status?.status === 'error' ? 'LLM Pentest Failed' : 'LLM Pentest Stopped'}

{target} @@ -946,7 +913,7 @@ export default function FullIATestingPage() { {/* Phase Indicators */} -
+
{PHASES.map((phase, idx) => { const Icon = phase.icon const isActive = idx === currentPhaseIdx && isRunning @@ -971,13 +938,6 @@ export default function FullIATestingPage() { {phase.range[0]}-{phase.range[1]}%
- {idx === 0 && ( -
- {STREAMS.map(stream => ( - - ))} -
- )}
) })} @@ -1272,7 +1232,7 @@ export default function FullIATestingPage() { {isRunning ? ( - FULL AI pentest in progress... Findings will appear as discovered. + Full LLM Pentest in progress... AI is planning and executing tests. ) : ( 'No findings' @@ -1304,7 +1264,7 @@ export default function FullIATestingPage() { )}

- {status.status === 'completed' ? 'FULL AI Pentest Complete' : 'Pentest Stopped'} + {status.status === 'completed' ? 'Full LLM Pentest Complete' : 'LLM Pentest Stopped'}