From e5857d00c1701eeea28095853a396d04baec829a Mon Sep 17 00:00:00 2001 From: CyberSecurityUP Date: Tue, 24 Feb 2026 00:28:26 -0300 Subject: [PATCH] NeuroSploit v3.2.2 - Full LLM Pentest Mode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit New feature: Full LLM Pentest mode where the AI drives the entire penetration test cycle autonomously. The LLM plans HTTP requests, the system executes them, and the LLM analyzes real responses to identify vulnerabilities — like a human pentester using Burp Suite. - New OperationMode.FULL_LLM_PENTEST + AgentMode enum - _run_full_llm_pentest(): 30-round ReACT loop (plan→execute→analyze→adapt) - 3 new prompt functions in ai_prompts.py (system, round, report) - Anti-hallucination: findings without real evidence are rejected - All findings routed through ValidationJudge pipeline - FullIATestingPage updated: 4-phase UI (Recon→Testing→PostExploit→Report) - No Kali sandbox required — uses system HTTP client directly - Methodology injection from pentestcompleto_en.md (118KB) Co-Authored-By: Claude Opus 4.6 --- backend/api/v1/agent.py | 3 + backend/core/autonomous_agent.py | 454 +++++++++++++++++++++++ backend/core/vuln_engine/ai_prompts.py | 219 +++++++++++ frontend/src/pages/FullIATestingPage.tsx | 130 +++---- 4 files changed, 721 insertions(+), 85 deletions(-) diff --git a/backend/api/v1/agent.py b/backend/api/v1/agent.py index 6b943de..0ba5409 100755 --- a/backend/api/v1/agent.py +++ b/backend/api/v1/agent.py @@ -105,6 +105,7 @@ class AgentMode(str, Enum): ANALYZE_ONLY = "analyze_only" # Analysis without testing AUTO_PENTEST = "auto_pentest" # One-click full auto pentest CLI_AGENT = "cli_agent" # AI CLI tool inside Kali sandbox + FULL_LLM_PENTEST = "full_llm_pentest" # LLM drives the entire pentest cycle class AgentRequest(BaseModel): @@ -251,6 +252,7 @@ async def run_agent(request: AgentRequest, background_tasks: BackgroundTasks): "analyze_only": "Analysis only, no active testing", "auto_pentest": "One-click auto pentest: Full recon + 100 vuln types + AI report", "cli_agent": "CLI Agent: AI CLI tool (Claude/Gemini/Codex) inside Kali sandbox", + "full_llm_pentest": "Full LLM Pentest: AI drives the entire pentest cycle autonomously", } return AgentResponse( @@ -379,6 +381,7 @@ async def _run_agent_task( AgentMode.ANALYZE_ONLY: OperationMode.ANALYZE_ONLY, AgentMode.AUTO_PENTEST: OperationMode.AUTO_PENTEST, AgentMode.CLI_AGENT: OperationMode.CLI_AGENT, + AgentMode.FULL_LLM_PENTEST: OperationMode.FULL_LLM_PENTEST, } op_mode = mode_map.get(mode, OperationMode.FULL_AUTO) diff --git a/backend/core/autonomous_agent.py b/backend/core/autonomous_agent.py index 2f0a698..fd156b0 100755 --- a/backend/core/autonomous_agent.py +++ b/backend/core/autonomous_agent.py @@ -255,6 +255,7 @@ class OperationMode(Enum): ANALYZE_ONLY = "analyze_only" AUTO_PENTEST = "auto_pentest" CLI_AGENT = "cli_agent" + FULL_LLM_PENTEST = "full_llm_pentest" class FindingSeverity(Enum): @@ -4005,6 +4006,8 @@ NOT_VULNERABLE: """ return await self._run_auto_pentest() elif self.mode == OperationMode.CLI_AGENT: return await self._run_cli_agent_mode() + elif self.mode == OperationMode.FULL_LLM_PENTEST: + return await self._run_full_llm_pentest() else: return await self._run_full_auto() except Exception as e: @@ -5008,6 +5011,457 @@ NOT_VULNERABLE: """ await self._update_progress(100, "CLI Agent pentest complete") return report + # ═══════════════════════════════════════════════════════════════════════════ + # FULL LLM PENTEST MODE — AI drives the entire pentest cycle + # ═══════════════════════════════════════════════════════════════════════════ + + async def _run_full_llm_pentest(self) -> Dict[str, Any]: + """Full LLM Pentest: the AI drives every step of the pentest. + + The LLM acts as a senior penetration tester. It plans HTTP requests, + the system executes them, and the LLM analyzes real responses to + identify vulnerabilities. Pure AI-driven, no hardcoded payloads. + + Loop: LLM plans → System executes HTTP → LLM analyzes → repeat + """ + await self._update_progress(0, "Full LLM Pentest starting") + await self.log("info", "=" * 60) + await self.log("info", " FULL LLM PENTEST MODE") + await self.log("info", " AI drives the entire pentest cycle") + await self.log("info", "=" * 60) + + if not self.llm.is_available(): + await self.log("error", "LLM not available! This mode requires an active LLM provider.") + await self.log("error", "Configure ANTHROPIC_API_KEY, OPENAI_API_KEY, or another provider.") + return self._generate_error_report("LLM not available for Full LLM Pentest mode") + + # Import prompts + from backend.core.vuln_engine.ai_prompts import ( + get_full_llm_pentest_system_prompt, + get_full_llm_pentest_round_prompt, + get_full_llm_pentest_report_prompt, + ) + + # Load methodology prompt + methodology = self.custom_prompt or "" + if not methodology: + try: + prompt_path = Path("/opt/Prompts-PenTest/pentestcompleto_en.md") + if not prompt_path.exists(): + prompt_path = Path("/opt/Prompts-PenTest/pentestcompleto.md") + if prompt_path.exists(): + methodology = prompt_path.read_text(encoding="utf-8") + except Exception: + pass + + # Build system prompt + system_prompt = get_full_llm_pentest_system_prompt(methodology) + await self.log("info", f" System prompt: {len(system_prompt)} chars") + await self.log("info", f" Methodology: {'loaded' if methodology else 'none'} ({len(methodology)} chars)") + + # State tracking + MAX_ROUNDS = 30 + MAX_ACTIONS_PER_ROUND = 10 + total_requests = 0 + discovered_info_parts: List[str] = [] + all_round_results: List[str] = [] # accumulates round-by-round results + llm_findings: List[Dict] = [] + + await self._update_progress(2, "Full LLM Pentest: Round 1") + + for round_num in range(1, MAX_ROUNDS + 1): + if self.is_cancelled(): + await self.log("warning", "[LLM PENTEST] Cancelled by user") + break + + # Calculate progress: rounds map to 0-85% + progress = min(85, int((round_num / MAX_ROUNDS) * 85)) + phase_label = ( + "Recon" if round_num <= 8 else + "Testing" if round_num <= 25 else + "Post-Exploitation" if round_num <= 28 else + "Reporting" + ) + await self._update_progress(progress, f"Full LLM Pentest: {phase_label} (Round {round_num}/{MAX_ROUNDS})") + + # Build round prompt with accumulated context + # Keep only recent results to manage token budget (last 5 rounds) + recent_results = "\n\n".join(all_round_results[-5:]) if all_round_results else "" + discovered_summary = "\n".join(discovered_info_parts[-30:]) if discovered_info_parts else "" + + round_prompt = get_full_llm_pentest_round_prompt( + target=self.target, + round_num=round_num, + max_rounds=MAX_ROUNDS, + previous_results=recent_results, + discovered_info=discovered_summary, + findings_so_far=len(self.findings), + ) + + # Call LLM + await self.log("info", f"[LLM PENTEST] Round {round_num}: Asking AI to plan ({phase_label})") + try: + llm_response = await self.llm.generate( + prompt=round_prompt, + system=system_prompt, + max_tokens=8192, + ) + except Exception as e: + await self.log("error", f"[LLM PENTEST] LLM call failed: {e}") + # Try to continue with next round + all_round_results.append(f"Round {round_num}: LLM call failed — {str(e)[:100]}") + continue + + # Parse LLM response as JSON + parsed = self._parse_llm_json(llm_response) + if not parsed: + await self.log("warning", f"[LLM PENTEST] Round {round_num}: Failed to parse LLM JSON response") + all_round_results.append(f"Round {round_num}: LLM returned invalid JSON") + continue + + reasoning = parsed.get("reasoning", "") + actions = parsed.get("actions", []) + findings = parsed.get("findings", []) + phase = parsed.get("phase", "unknown") + done = parsed.get("done", False) + summary = parsed.get("summary", "") + + if reasoning: + await self.log("info", f"[LLM PENTEST] AI reasoning: {reasoning[:200]}") + + # Execute HTTP actions + round_result_parts = [f"=== Round {round_num} ({phase}) ==="] + if reasoning: + round_result_parts.append(f"Reasoning: {reasoning}") + + actions_to_exec = actions[:MAX_ACTIONS_PER_ROUND] + await self.log("info", f"[LLM PENTEST] Executing {len(actions_to_exec)} HTTP requests") + + for i, action in enumerate(actions_to_exec): + if self.is_cancelled(): + break + + result = await self._execute_llm_action(action, i + 1) + total_requests += 1 + + if result: + # Add to round results for LLM context + result_summary = self._summarize_response(action, result) + round_result_parts.append(result_summary) + + # Track discovered info + purpose = action.get("purpose", "") + url = action.get("url", "") + status = result.get("status", 0) + if status == 200: + discovered_info_parts.append( + f"- {action.get('method', 'GET')} {url} → {status} " + f"({len(result.get('body', ''))} bytes) — {purpose}" + ) + elif status in (301, 302, 303, 307, 308): + location = result.get("headers", {}).get("Location", result.get("headers", {}).get("location", "")) + discovered_info_parts.append(f"- {url} → redirect to {location}") + elif status == 404: + discovered_info_parts.append(f"- {url} → 404 (not found)") + elif status == 403: + discovered_info_parts.append(f"- {url} → 403 (forbidden)") + else: + discovered_info_parts.append(f"- {url} → {status}") + else: + round_result_parts.append( + f"Request {i+1}: {action.get('method', 'GET')} {action.get('url', '?')} → FAILED (connection error/timeout)" + ) + + all_round_results.append("\n".join(round_result_parts)) + + # Process findings from this round + for finding_data in findings: + await self._process_llm_pentest_finding(finding_data, round_num) + + # Check if LLM says we're done + if done: + await self.log("success", f"[LLM PENTEST] AI completed pentest after {round_num} rounds") + if summary: + await self.log("info", f"[LLM PENTEST] Summary: {summary[:300]}") + break + + await self.log("info", f"[LLM PENTEST] Round {round_num} complete: " + f"{len(actions_to_exec)} requests, {len(findings)} findings, " + f"total: {total_requests} requests, {len(self.findings)} confirmed findings") + + # ── FINALIZATION ── + await self._update_progress(88, "Full LLM Pentest: Generating report") + await self.log("info", f"[LLM PENTEST] Testing complete: {total_requests} total requests, " + f"{len(self.findings)} confirmed findings") + + # Generate AI-enhanced report + report = await self._generate_full_report() + + # Also try to get an AI narrative report + if self.llm.is_available() and self.findings: + try: + findings_json = json.dumps([ + { + "title": f.title, + "severity": f.severity, + "vulnerability_type": f.vulnerability_type, + "affected_endpoint": f.affected_endpoint, + "parameter": f.parameter, + "payload": f.payload, + "evidence": f.evidence[:500] if f.evidence else "", + "description": f.description, + "impact": f.impact, + "cvss_score": f.cvss_score, + "cwe_id": f.cwe_id, + "poc_code": f.poc_code, + "remediation": f.remediation, + "confidence_score": f.confidence_score, + } + for f in self.findings + ], indent=2) + + report_prompt = get_full_llm_pentest_report_prompt( + target=self.target, + findings_json=findings_json, + total_rounds=min(round_num, MAX_ROUNDS), + total_requests=total_requests, + ) + ai_report_text = await self.llm.generate( + prompt=report_prompt, + system="You are a professional penetration testing report writer.", + max_tokens=16384, + ) + if ai_report_text: + report["ai_narrative_report"] = ai_report_text + await self.log("success", "[LLM PENTEST] AI narrative report generated") + except Exception as e: + await self.log("debug", f"[LLM PENTEST] Report generation error: {e}") + + await self._update_progress(100, "Full LLM Pentest complete") + await self.log("info", "=" * 60) + await self.log("info", f" FULL LLM PENTEST COMPLETE: {len(self.findings)} findings") + await self.log("info", f" Total HTTP requests: {total_requests}") + await self.log("info", "=" * 60) + return report + + async def _execute_llm_action(self, action: Dict, action_num: int) -> Optional[Dict]: + """Execute a single HTTP action planned by the LLM. + + The action dict has: method, url, headers, body, content_type, purpose + Returns the response dict or None on failure. + """ + method = (action.get("method") or "GET").upper() + url = action.get("url", "") + custom_headers = action.get("headers") or {} + body = action.get("body") + content_type = action.get("content_type", "") + purpose = action.get("purpose", "") + + if not url: + return None + + # Ensure URL is absolute + if not url.startswith("http"): + url = urljoin(self.target, url) + + # Build request headers + headers = dict(self.auth_headers) if self.auth_headers else {} + headers.update(custom_headers) + if content_type and "Content-Type" not in headers and "content-type" not in headers: + headers["Content-Type"] = content_type + + # Log the request + await self.log("info", f"[LLM PENTEST] → {method} {url[:120]} ({purpose[:60]})") + + try: + timeout = aiohttp.ClientTimeout(total=15) + + if self.request_engine: + # Use request engine for retry/rate limiting + data = None + params = None + if method == "GET": + # Parse params from URL + pass # URL already has params + else: + if body: + if content_type and "json" in content_type: + try: + data = json.loads(body) if isinstance(body, str) else body + except (json.JSONDecodeError, TypeError): + data = body + else: + data = body + else: + data = None + + result = await self.request_engine.request( + url, method=method, + headers=headers if headers else None, + data=data, + allow_redirects=True, + ) + if result: + resp_dict = { + "status": result.status, + "body": result.body[:50000] if result.body else "", + "headers": result.headers, + "url": result.url, + } + status_str = f"{result.status}" + body_len = len(result.body) if result.body else 0 + await self.log("info", f"[LLM PENTEST] ← {status_str} ({body_len} bytes)") + return resp_dict + else: + # Direct session fallback + req_kwargs: Dict[str, Any] = { + "allow_redirects": True, + "timeout": timeout, + "headers": headers, + } + if method != "GET" and body: + if content_type and "json" in content_type: + try: + req_kwargs["json"] = json.loads(body) if isinstance(body, str) else body + except (json.JSONDecodeError, TypeError): + req_kwargs["data"] = body + else: + req_kwargs["data"] = body + + async with self.session.request(method, url, **req_kwargs) as resp: + resp_body = await resp.text() + resp_dict = { + "status": resp.status, + "body": resp_body[:50000], + "headers": dict(resp.headers), + "url": str(resp.url), + } + await self.log("info", f"[LLM PENTEST] ← {resp.status} ({len(resp_body)} bytes)") + return resp_dict + + except asyncio.TimeoutError: + await self.log("debug", f"[LLM PENTEST] Timeout: {url[:80]}") + except Exception as e: + await self.log("debug", f"[LLM PENTEST] Request error: {str(e)[:80]}") + return None + + def _summarize_response(self, action: Dict, result: Dict) -> str: + """Create a compact summary of an HTTP response for the LLM context.""" + method = action.get("method", "GET") + url = action.get("url", "?") + purpose = action.get("purpose", "") + status = result.get("status", 0) + headers = result.get("headers", {}) + body = result.get("body", "") + + # Extract key headers + key_headers = {} + for h in ["Server", "server", "Content-Type", "content-type", + "X-Powered-By", "x-powered-by", "Set-Cookie", "set-cookie", + "Location", "location", "X-Frame-Options", "x-frame-options", + "Content-Security-Policy", "content-security-policy", + "WWW-Authenticate", "www-authenticate"]: + val = headers.get(h) + if val: + key_headers[h] = val[:200] + + # Truncate body for context (keep meaningful content) + body_preview = body[:3000] if body else "" + + lines = [ + f"Request: {method} {url}", + f"Purpose: {purpose}", + f"Status: {status}", + f"Headers: {json.dumps(key_headers, default=str)}", + f"Body ({len(body)} bytes):", + body_preview, + ] + return "\n".join(lines) + + def _parse_llm_json(self, text: str) -> Optional[Dict]: + """Parse JSON from LLM response, handling markdown code blocks.""" + if not text: + return None + + # Try direct parse + text_stripped = text.strip() + try: + return json.loads(text_stripped) + except (json.JSONDecodeError, ValueError): + pass + + # Try extracting from markdown code block + import re + patterns = [ + r'```json\s*\n(.*?)\n\s*```', + r'```\s*\n(.*?)\n\s*```', + r'\{[\s\S]*\}', + ] + for pattern in patterns[:2]: + match = re.search(pattern, text, re.DOTALL) + if match: + try: + return json.loads(match.group(1)) + except (json.JSONDecodeError, ValueError): + continue + + # Try finding the outermost JSON object + # Find first { and last } + first_brace = text.find('{') + last_brace = text.rfind('}') + if first_brace >= 0 and last_brace > first_brace: + try: + return json.loads(text[first_brace:last_brace + 1]) + except (json.JSONDecodeError, ValueError): + pass + + return None + + async def _process_llm_pentest_finding(self, finding_data: Dict, round_num: int): + """Process a finding reported by the LLM in Full LLM Pentest mode. + + Creates a Finding object and routes it through the validation pipeline. + """ + title = finding_data.get("title", "LLM Finding") + severity = finding_data.get("severity", "medium").lower() + if severity not in ("critical", "high", "medium", "low", "info"): + severity = "medium" + + vuln_type = finding_data.get("vulnerability_type", "unknown") + evidence = finding_data.get("evidence", "") + + # Skip findings without evidence (anti-hallucination) + if not evidence or len(evidence) < 10: + await self.log("debug", f"[LLM PENTEST] Skipping finding without evidence: {title}") + return + + finding = Finding( + id=hashlib.md5( + f"{title}|{finding_data.get('affected_endpoint', '')}|{finding_data.get('payload', '')}|{round_num}".encode() + ).hexdigest()[:12], + title=title, + severity=severity, + vulnerability_type=vuln_type, + cvss_score=finding_data.get("cvss_score", 0.0), + cwe_id=finding_data.get("cwe_id", ""), + description=finding_data.get("description", ""), + affected_endpoint=finding_data.get("affected_endpoint", self.target), + parameter=finding_data.get("parameter", ""), + payload=finding_data.get("payload", ""), + evidence=evidence, + impact=finding_data.get("impact", ""), + poc_code=finding_data.get("poc_code", ""), + remediation=finding_data.get("remediation", ""), + ai_verified=True, + confidence_score=70, # Initial score, ValidationJudge will refine + ai_status="confirmed", + ) + + # Route through validation pipeline (_judge_finding handles + # negative controls, proof of execution, confidence scoring) + await self._add_finding(finding) + await self.log("success", f"[LLM PENTEST] Finding: {severity.upper()} — {title}") + # ── Pre-Stream AI Master Plan ── async def _ai_master_plan(self) -> Dict: diff --git a/backend/core/vuln_engine/ai_prompts.py b/backend/core/vuln_engine/ai_prompts.py index 978c0f0..c93e99a 100755 --- a/backend/core/vuln_engine/ai_prompts.py +++ b/backend/core/vuln_engine/ai_prompts.py @@ -2097,3 +2097,222 @@ RULES: - Prioritize by LIKELIHOOD of exploitation, not theoretical severity. - Consider technology-specific vulnerabilities (e.g., Spring → actuator, WordPress → wp-admin). - Flag parameters like 'url', 'file', 'path', 'redirect', 'callback', 'template' as high-risk.""" + + +# ═══════════════════════════════════════════════════════════════════════════════ +# FULL LLM PENTEST PROMPTS — AI drives the entire pentest cycle +# ═══════════════════════════════════════════════════════════════════════════════ + +def get_full_llm_pentest_system_prompt(methodology: str = "") -> str: + """System prompt for the Full LLM Pentest mode. + + The LLM acts as a senior penetration tester and plans HTTP requests + for the system to execute. The LLM sees real responses and adapts. + """ + methodology_section = "" + if methodology: + # Truncate very large methodology to fit context + max_len = 60000 + if len(methodology) > max_len: + methodology = methodology[:max_len] + "\n\n[... methodology truncated for context ...]" + methodology_section = f""" +## PENTEST METHODOLOGY + +Follow this methodology throughout your testing: + +{methodology} +""" + + return f"""You are a Senior Offensive Security Consultant performing an authorized penetration test in a controlled lab environment. + +ALL ACTIVITIES ARE AUTHORIZED. The target is a deliberately vulnerable lab environment. + +## YOUR ROLE + +You are the testing engine. You think like a human pentester using Burp Suite / curl. +You PLAN what HTTP requests to make, and the system EXECUTES them for you. +You then ANALYZE the real responses and ADAPT your strategy. + +## HOW THIS WORKS + +Each round you output a JSON object with: +1. **reasoning**: What you observed, what you learned, what to try next +2. **actions**: HTTP requests you want the system to execute (max 10 per round) +3. **findings**: Vulnerabilities you confirmed based on REAL response evidence +4. **phase**: Current phase (recon, testing, post_exploitation, reporting) +5. **done**: true when you've completed the full pentest cycle + +The system executes your HTTP requests and returns the actual responses. +You then analyze those responses and plan your next actions. + +## PHASES + +### Phase 1: RECON (rounds 1-8) +- Fingerprint technologies (server headers, cookies, response patterns) +- Discover endpoints (crawl links, check robots.txt, sitemap.xml) +- Map input vectors (forms, parameters, headers, cookies) +- Identify authentication mechanisms +- Check for common files (.env, .git, admin panels) + +### Phase 2: TESTING (rounds 9-25) +Test each discovered endpoint for: +- SQL Injection (error-based, boolean-based, time-based, UNION-based) +- Cross-Site Scripting (reflected, stored, DOM-based) +- Local/Remote File Inclusion (LFI/RFI) +- Command Injection (OS command injection via various delimiters) +- Authentication bypass +- SSRF, CSRF, IDOR, XXE +- Security misconfigurations +- Sensitive data exposure +- Directory traversal + +### Phase 3: POST-EXPLOITATION (rounds 26-28) +- Extract data from confirmed vulnerabilities +- Chain vulnerabilities for maximum impact +- Test privilege escalation paths +- Verify data exposure scope + +### Phase 4: REPORTING (round 29-30) +- Compile all findings with evidence +- Set done=true + +{methodology_section} + +## CRITICAL RULES + +1. **REAL EVIDENCE ONLY**: Never claim a vulnerability without evidence from an actual response. + - SQLi: Show the SQL error message or extracted data from the response body + - XSS: Show the reflected payload in the response body unescaped + - LFI: Show file contents (e.g., /etc/passwd content) in the response + - Command Injection: Show command output in the response + +2. **NO HALLUCINATION**: If a test fails (payload is filtered, no error), say so honestly. + Do NOT fabricate evidence. The system will verify your claims. + +3. **ADAPT**: If WAF blocks payloads, try encoding, case variation, alternative syntax. + If an endpoint 404s, move to the next one. Don't repeat failed tests. + +4. **BE SPECIFIC**: Include exact URLs, parameters, payloads, and expected vs actual behavior. + +5. **PROGRESS**: Don't repeat the same tests. Track what you've already tested. + +## OUTPUT FORMAT (strict JSON) + +```json +{{ + "phase": "recon|testing|post_exploitation|reporting", + "reasoning": "Detailed explanation of what you observed and why you're taking these actions", + "actions": [ + {{ + "method": "GET|POST|PUT|DELETE|OPTIONS|HEAD|PATCH", + "url": "https://target.com/path?param=value", + "headers": {{"Header-Name": "value"}}, + "body": "form or raw body data (for POST/PUT)", + "content_type": "application/x-www-form-urlencoded|application/json|multipart/form-data", + "purpose": "What this request tests" + }} + ], + "findings": [ + {{ + "title": "SQL Injection in /login username parameter", + "severity": "critical|high|medium|low|info", + "vulnerability_type": "sql_injection|xss_reflected|xss_stored|lfi|rfi|command_injection|ssrf|csrf|idor|xxe|auth_bypass|open_redirect|directory_listing|info_disclosure|security_misconfiguration", + "affected_endpoint": "/login", + "parameter": "username", + "payload": "' OR 1=1--", + "evidence": "Response contained: You have an error in your SQL syntax...", + "description": "The username parameter is vulnerable to SQL injection...", + "impact": "An attacker could bypass authentication and extract all database contents", + "cvss_score": 9.8, + "cwe_id": "CWE-89", + "poc_code": "curl -X POST 'https://target/login' -d 'username=%27+OR+1%3D1--&password=test'", + "remediation": "Use parameterized queries / prepared statements" + }} + ], + "done": false, + "summary": "Only set when done=true. Full executive summary of the pentest." +}} +``` + +IMPORTANT: Output ONLY valid JSON. No markdown, no text before or after the JSON object.""" + + +def get_full_llm_pentest_round_prompt( + target: str, + round_num: int, + max_rounds: int, + previous_results: str, + discovered_info: str, + findings_so_far: int, +) -> str: + """Build the round prompt for each iteration of the Full LLM Pentest loop.""" + + phase_hint = "" + if round_num <= 8: + phase_hint = "You should be in the RECON phase. Focus on discovering endpoints, technologies, and input vectors." + elif round_num <= 25: + phase_hint = "You should be in the TESTING phase. Test discovered endpoints for vulnerabilities." + elif round_num <= 28: + phase_hint = "You should be in the POST-EXPLOITATION phase. Chain vulnerabilities and extract data." + else: + phase_hint = "You should be in the REPORTING phase. Compile final findings and set done=true." + + return f"""## ROUND {round_num}/{max_rounds} + +Target: {target} +Findings so far: {findings_so_far} +{phase_hint} + +{"WARNING: This is your LAST round. Set done=true and include your final summary." if round_num >= max_rounds else ""} + +## WHAT YOU KNOW SO FAR + +{discovered_info if discovered_info else "Nothing discovered yet. Start with basic recon."} + +## PREVIOUS ROUND RESULTS + +{previous_results if previous_results else "This is the first round. No previous results."} + +Plan your next actions. Remember: +- Max 10 HTTP requests per round +- Be strategic — don't waste requests on unlikely paths +- Build on what you've learned from previous responses +- Report findings as soon as you have REAL evidence + +Output your response as a single JSON object.""" + + +def get_full_llm_pentest_report_prompt( + target: str, + findings_json: str, + total_rounds: int, + total_requests: int, +) -> str: + """Prompt for the LLM to generate the final pentest report.""" + return f"""Generate a professional penetration test report for the following engagement. + +## Engagement Details +- Target: {target} +- Testing Rounds: {total_rounds} +- Total HTTP Requests: {total_requests} +- Methodology: AI-Driven Full Pentest (LLM as Testing Engine) + +## Confirmed Findings + +{findings_json} + +## Report Structure + +Generate a comprehensive report with: + +1. **Executive Summary** — Business impact (non-technical language), overall risk rating, key findings +2. **Scope and Methodology** — What was tested, approach taken, standards followed (OWASP, PTES) +3. **Detailed Findings** — For each vulnerability: title, severity, description, evidence, impact, remediation, OWASP/CWE references +4. **Risk Prioritization Table** — All findings sorted by severity with CVSS scores +5. **Remediation Roadmap** — Short-term fixes, medium-term improvements, long-term recommendations +6. **Conclusion** + +Write in professional English suitable for C-level stakeholders and technical teams. +Be precise, structured, and security-focused. + +Output the report as a markdown document.""" diff --git a/frontend/src/pages/FullIATestingPage.tsx b/frontend/src/pages/FullIATestingPage.tsx index 88c5ea3..cb21689 100644 --- a/frontend/src/pages/FullIATestingPage.tsx +++ b/frontend/src/pages/FullIATestingPage.tsx @@ -4,7 +4,7 @@ import { Crosshair, Shield, ChevronDown, ChevronUp, Loader2, AlertTriangle, CheckCircle2, Globe, Lock, Bug, FileText, ScrollText, X, ExternalLink, Download, Sparkles, - Brain, Wrench, Layers, Trash2, Clock, Search, + Brain, Trash2, Clock, Search, Activity, Terminal } from 'lucide-react' import { PieChart, Pie, Cell, Tooltip as RechartsTooltip, ResponsiveContainer } from 'recharts' @@ -14,22 +14,12 @@ import type { AgentStatus, AgentFinding, AgentLog, ToolExecution, ContainerStatu // ─── Constants ──────────────────────────────────────────────────────────────── const PHASES = [ - { key: 'parallel', label: 'Parallel Streams', icon: Layers, range: [0, 50] as const }, - { key: 'deep', label: 'Deep Analysis', icon: Brain, range: [50, 75] as const }, - { key: 'final', label: 'Finalization', icon: Shield, range: [75, 100] as const }, + { key: 'recon', label: 'AI Recon', icon: Globe, range: [0, 25] as const }, + { key: 'testing', label: 'AI Testing', icon: Bug, range: [25, 70] as const }, + { key: 'postexploit', label: 'Post-Exploitation', icon: Brain, range: [70, 85] as const }, + { key: 'report', label: 'Report', icon: Shield, range: [85, 100] as const }, ] -const STREAMS = [ - { key: 'recon', label: 'Recon', icon: Globe, color: 'blue', activeUntil: 25 }, - { key: 'junior', label: 'Junior AI', icon: Brain, color: 'purple', activeUntil: 35 }, - { key: 'tools', label: 'Tools', icon: Wrench, color: 'orange', activeUntil: 50 }, -] as const - -const STREAM_COLORS: Record = { - blue: { bg: 'bg-blue-500/20', text: 'text-blue-400', border: 'border-blue-500/40', pulse: 'bg-blue-400' }, - purple: { bg: 'bg-purple-500/20', text: 'text-purple-400', border: 'border-purple-500/40', pulse: 'bg-purple-400' }, - orange: { bg: 'bg-orange-500/20', text: 'text-orange-400', border: 'border-orange-500/40', pulse: 'bg-orange-400' }, -} const SEVERITY_COLORS: Record = { critical: 'bg-red-500', high: 'bg-orange-500', medium: 'bg-yellow-500', @@ -53,11 +43,8 @@ const CONFIDENCE_STYLES: Record = { const LOG_FILTERS = [ { key: 'all', label: 'All', color: '' }, - { key: 'stream1', label: 'Recon', color: 'text-blue-400' }, - { key: 'stream2', label: 'Junior', color: 'text-purple-400' }, - { key: 'stream3', label: 'Tools', color: 'text-orange-400' }, - { key: 'deep', label: 'Deep', color: 'text-cyan-400' }, - { key: 'container', label: 'Container', color: 'text-cyan-300' }, + { key: 'llm', label: 'LLM Pentest', color: 'text-red-400' }, + { key: 'ai', label: 'AI Decisions', color: 'text-purple-400' }, { key: 'error', label: 'Errors', color: 'text-red-400' }, ] @@ -70,9 +57,10 @@ const MAX_TOASTS = 5 // ─── Utility Functions ──────────────────────────────────────────────────────── function phaseFromProgress(progress: number): number { - if (progress < 50) return 0 - if (progress < 75) return 1 - return 2 + if (progress < 25) return 0 + if (progress < 70) return 1 + if (progress < 85) return 2 + return 3 } function formatElapsed(totalSeconds: number): string { @@ -83,6 +71,7 @@ function formatElapsed(totalSeconds: number): string { } function logMessageColor(message: string): string { + if (message.startsWith('[LLM PENTEST]')) return 'text-red-400' if (message.startsWith('[STREAM 1]')) return 'text-blue-400' if (message.startsWith('[STREAM 2]')) return 'text-purple-400' if (message.startsWith('[STREAM 3]')) return 'text-orange-400' @@ -101,11 +90,8 @@ function logMessageColor(message: string): string { function matchLogFilter(log: AgentLog, filter: string): boolean { if (filter === 'all') return true - if (filter === 'stream1') return log.message.startsWith('[STREAM 1]') - if (filter === 'stream2') return log.message.startsWith('[STREAM 2]') - if (filter === 'stream3') return log.message.startsWith('[STREAM 3]') - if (filter === 'deep') return log.message.startsWith('[DEEP]') - if (filter === 'container') return log.message.startsWith('[CONTAINER]') + if (filter === 'llm') return log.message.startsWith('[LLM PENTEST]') + if (filter === 'ai') return log.source === 'llm' || log.message.includes('[AI]') || log.message.includes('[LLM]') if (filter === 'error') return log.level === 'error' || log.level === 'warning' return true } @@ -139,33 +125,6 @@ interface Toast { // ─── Sub-Components ─────────────────────────────────────────────────────────── -function StreamBadge({ stream, progress, isRunning }: { - stream: typeof STREAMS[number]; progress: number; isRunning: boolean -}) { - const active = isRunning && progress < stream.activeUntil - const done = progress >= stream.activeUntil - const colors = STREAM_COLORS[stream.color] - const Icon = stream.icon - - return ( -
- {active && ( - - - - - )} - {done && } - {!active && !done && } - {stream.label} -
- ) -} - function LiveStatsDashboard({ status, elapsedSeconds, toolExecutions }: { status: AgentStatus; elapsedSeconds: number; toolExecutions: ToolExecution[] }) { @@ -487,13 +446,20 @@ export default function FullIATestingPage() { // ─── Elapsed Time Ticker ────────────────────────────────────────────────── useEffect(() => { - if (!isRunning || !status?.started_at) return + if (!status?.started_at) return const startTime = new Date(status.started_at).getTime() - const tick = () => setElapsedSeconds(Math.floor((Date.now() - startTime) / 1000)) - tick() - const id = setInterval(tick, 1000) - return () => clearInterval(id) - }, [isRunning, status?.started_at]) + if (isRunning) { + const tick = () => setElapsedSeconds(Math.floor((Date.now() - startTime) / 1000)) + tick() + const id = setInterval(tick, 1000) + return () => clearInterval(id) + } else { + const endTime = status.completed_at + ? new Date(status.completed_at).getTime() + : Date.now() + setElapsedSeconds(Math.max(0, Math.floor((endTime - startTime) / 1000))) + } + }, [isRunning, status?.started_at, status?.completed_at]) // ─── Polling ────────────────────────────────────────────────────────────── @@ -593,8 +559,9 @@ export default function FullIATestingPage() { try { const resp = await agentApi.autoPentest(primaryTarget, { + mode: 'full_llm_pentest', prompt: promptContent, - enable_kali_sandbox: true, + enable_kali_sandbox: false, auth_type: authType || undefined, auth_value: authValue || undefined, preferred_provider: selectedProvider || undefined, @@ -603,7 +570,7 @@ export default function FullIATestingPage() { setAgentId(resp.agent_id) setIsRunning(true) - addToast('FULL AI pentest started', 'info') + addToast('Full LLM Pentest started', 'info') localStorage.setItem(SESSION_KEY, JSON.stringify({ agentId: resp.agent_id, target: primaryTarget, @@ -705,9 +672,9 @@ export default function FullIATestingPage() {
-

FULL AI TESTING

+

FULL LLM PENTEST

- Complete AI-driven penetration test. Recon, exploitation, post-exploitation with Kali sandbox. + The LLM drives the entire pentest cycle. AI plans HTTP requests, system executes, AI analyzes and adapts.

{promptContent && ( )} @@ -885,9 +852,9 @@ export default function FullIATestingPage() { status?.status === 'error' ? 'bg-red-500' : 'bg-gray-500' }`} />

- {isRunning ? 'FULL AI Pentest Running' : - status?.status === 'completed' ? 'Pentest Complete' : - status?.status === 'error' ? 'Pentest Failed' : 'Pentest Stopped'} + {isRunning ? 'Full LLM Pentest Running' : + status?.status === 'completed' ? 'LLM Pentest Complete' : + status?.status === 'error' ? 'LLM Pentest Failed' : 'LLM Pentest Stopped'}

{target} @@ -946,7 +913,7 @@ export default function FullIATestingPage() { {/* Phase Indicators */} -
+
{PHASES.map((phase, idx) => { const Icon = phase.icon const isActive = idx === currentPhaseIdx && isRunning @@ -971,13 +938,6 @@ export default function FullIATestingPage() { {phase.range[0]}-{phase.range[1]}%
- {idx === 0 && ( -
- {STREAMS.map(stream => ( - - ))} -
- )}
) })} @@ -1272,7 +1232,7 @@ export default function FullIATestingPage() { {isRunning ? ( - FULL AI pentest in progress... Findings will appear as discovered. + Full LLM Pentest in progress... AI is planning and executing tests. ) : ( 'No findings' @@ -1304,7 +1264,7 @@ export default function FullIATestingPage() { )}

- {status.status === 'completed' ? 'FULL AI Pentest Complete' : 'Pentest Stopped'} + {status.status === 'completed' ? 'Full LLM Pentest Complete' : 'LLM Pentest Stopped'}