NeuroSploit v3.2 - Autonomous AI Penetration Testing Platform

116 modules | 100 vuln types | 18 API routes | 18 frontend pages Major features: - VulnEngine: 100 vuln types, 526+ payloads, 12 testers, anti-hallucination prompts - Autonomous Agent: 3-stream auto pentest, multi-session (5 concurrent), pause/resume/stop - CLI Agent: Claude Code / Gemini CLI / Codex CLI inside Kali containers - Validation Pipeline: negative controls, proof of execution, confidence scoring, judge - AI Reasoning: ReACT engine, token budget, endpoint classifier, CVE hunter, deep recon - Multi-Agent: 5 specialists + orchestrator + researcher AI + vuln type agents - RAG System: BM25/TF-IDF/ChromaDB vectorstore, few-shot, reasoning templates - Smart Router: 20 providers (8 CLI OAuth + 12 API), tier failover, token refresh - Kali Sandbox: container-per-scan, 56 tools, VPN support, on-demand install - Full IA Testing: methodology-driven comprehensive pentest sessions - Notifications: Discord, Telegram, WhatsApp/Twilio multi-channel alerts - Frontend: React/TypeScript with 18 pages, real-time WebSocket updates
2026-06-05 20:36:34 +02:00 · 2026-02-22 17:58:12 -03:00
commit e0935793c5
271 changed files with 132462 additions and 0 deletions
@@ -0,0 +1,450 @@
+"""
+NeuroSploit v3 - Prompt Parser
+
+Parses user prompts to extract:
+1. Vulnerability types to test
+2. Testing scope and depth
+3. Special instructions
+4. Output format preferences
+
+This enables dynamic, prompt-driven testing instead of hardcoded vulnerability types.
+"""
+import re
+from typing import List, Dict, Optional, Tuple
+from backend.schemas.prompt import (
+    PromptParseResult,
+    VulnerabilityTypeExtracted,
+    TestingScope
+)
+
+
+class PromptParser:
+    """
+    Parses penetration testing prompts to extract structured testing instructions.
+
+    Instead of requiring specific LLM calls for every parse, this uses pattern matching
+    and keyword analysis for fast, deterministic extraction.
+    """
+
+    # Vulnerability keyword mappings
+    VULNERABILITY_KEYWORDS = {
+        # XSS variants
+        "xss_reflected": [
+            "xss", "cross-site scripting", "reflected xss", "reflected cross-site",
+            "script injection", "html injection"
+        ],
+        "xss_stored": [
+            "stored xss", "persistent xss", "stored cross-site", "persistent cross-site"
+        ],
+        "xss_dom": [
+            "dom xss", "dom-based xss", "dom based", "client-side xss"
+        ],
+
+        # SQL Injection variants
+        "sqli_error": [
+            "sql injection", "sqli", "sql error", "error-based sql"
+        ],
+        "sqli_union": [
+            "union sql", "union injection", "union-based", "union based"
+        ],
+        "sqli_blind": [
+            "blind sql", "blind injection", "boolean sql", "boolean-based"
+        ],
+        "sqli_time": [
+            "time-based sql", "time based sql", "time-based injection"
+        ],
+
+        # Other injections
+        "nosql_injection": [
+            "nosql", "mongodb injection", "nosql injection"
+        ],
+        "command_injection": [
+            "command injection", "os command", "shell injection", "rce",
+            "remote code execution", "code execution"
+        ],
+        "ssti": [
+            "ssti", "template injection", "server-side template", "jinja injection",
+            "twig injection"
+        ],
+        "ldap_injection": [
+            "ldap injection", "ldap"
+        ],
+        "xpath_injection": [
+            "xpath injection", "xpath"
+        ],
+        "header_injection": [
+            "header injection", "http header"
+        ],
+        "crlf_injection": [
+            "crlf", "carriage return", "header splitting"
+        ],
+
+        # File access
+        "lfi": [
+            "lfi", "local file inclusion", "file inclusion", "path traversal",
+            "directory traversal", "../"
+        ],
+        "rfi": [
+            "rfi", "remote file inclusion"
+        ],
+        "path_traversal": [
+            "path traversal", "directory traversal", "dot dot slash"
+        ],
+        "file_upload": [
+            "file upload", "upload vulnerability", "unrestricted upload",
+            "malicious upload"
+        ],
+        "xxe": [
+            "xxe", "xml external entity", "xml injection"
+        ],
+
+        # Request forgery
+        "ssrf": [
+            "ssrf", "server-side request forgery", "server side request",
+            "internal request"
+        ],
+        "ssrf_cloud": [
+            "cloud metadata", "169.254.169.254", "metadata service", "aws metadata",
+            "gcp metadata"
+        ],
+        "csrf": [
+            "csrf", "cross-site request forgery", "xsrf"
+        ],
+
+        # Authentication
+        "auth_bypass": [
+            "authentication bypass", "auth bypass", "login bypass", "broken auth"
+        ],
+        "session_fixation": [
+            "session fixation", "session hijacking"
+        ],
+        "jwt_manipulation": [
+            "jwt", "json web token", "token manipulation", "jwt bypass"
+        ],
+        "weak_password": [
+            "weak password", "password policy", "credential"
+        ],
+        "brute_force": [
+            "brute force", "credential stuffing", "password spray"
+        ],
+
+        # Authorization
+        "idor": [
+            "idor", "insecure direct object", "direct object reference"
+        ],
+        "bola": [
+            "bola", "broken object level", "api authorization"
+        ],
+        "privilege_escalation": [
+            "privilege escalation", "privesc", "priv esc", "elevation"
+        ],
+
+        # API Security
+        "rate_limiting": [
+            "rate limit", "rate limiting", "throttling"
+        ],
+        "mass_assignment": [
+            "mass assignment", "parameter pollution"
+        ],
+        "excessive_data": [
+            "excessive data", "data exposure", "over-fetching"
+        ],
+        "graphql_introspection": [
+            "graphql introspection", "graphql schema"
+        ],
+        "graphql_injection": [
+            "graphql injection", "graphql attack"
+        ],
+
+        # Client-side
+        "cors_misconfig": [
+            "cors", "cross-origin", "cors misconfiguration"
+        ],
+        "clickjacking": [
+            "clickjacking", "click jacking", "ui redressing", "x-frame-options"
+        ],
+        "open_redirect": [
+            "open redirect", "url redirect", "redirect vulnerability"
+        ],
+
+        # Information disclosure
+        "error_disclosure": [
+            "error message", "stack trace", "debug information"
+        ],
+        "sensitive_data": [
+            "sensitive data", "pii exposure", "data leak"
+        ],
+        "debug_endpoints": [
+            "debug endpoint", "admin panel", "hidden endpoint"
+        ],
+
+        # Infrastructure
+        "security_headers": [
+            "security headers", "http headers", "csp", "content-security-policy",
+            "hsts", "x-content-type"
+        ],
+        "ssl_issues": [
+            "ssl", "tls", "certificate", "https"
+        ],
+        "http_methods": [
+            "http methods", "options method", "trace method", "put method"
+        ],
+
+        # Logic flaws
+        "race_condition": [
+            "race condition", "toctou", "time of check"
+        ],
+        "business_logic": [
+            "business logic", "logic flaw", "workflow"
+        ]
+    }
+
+    # Category mappings
+    VULNERABILITY_CATEGORIES = {
+        "injection": [
+            "xss_reflected", "xss_stored", "xss_dom", "sqli_error", "sqli_union",
+            "sqli_blind", "sqli_time", "nosql_injection", "command_injection",
+            "ssti", "ldap_injection", "xpath_injection", "header_injection", "crlf_injection"
+        ],
+        "file_access": ["lfi", "rfi", "path_traversal", "file_upload", "xxe"],
+        "request_forgery": ["ssrf", "ssrf_cloud", "csrf"],
+        "authentication": [
+            "auth_bypass", "session_fixation", "jwt_manipulation",
+            "weak_password", "brute_force"
+        ],
+        "authorization": ["idor", "bola", "privilege_escalation"],
+        "api_security": [
+            "rate_limiting", "mass_assignment", "excessive_data",
+            "graphql_introspection", "graphql_injection"
+        ],
+        "client_side": ["cors_misconfig", "clickjacking", "open_redirect"],
+        "information_disclosure": ["error_disclosure", "sensitive_data", "debug_endpoints"],
+        "infrastructure": ["security_headers", "ssl_issues", "http_methods"],
+        "logic_flaws": ["race_condition", "business_logic"]
+    }
+
+    # Depth keywords
+    DEPTH_KEYWORDS = {
+        "quick": ["quick", "fast", "basic", "simple", "light"],
+        "standard": ["standard", "normal", "default"],
+        "thorough": ["thorough", "comprehensive", "complete", "full", "deep"],
+        "exhaustive": ["exhaustive", "extensive", "all", "everything", "maximum"]
+    }
+
+    def __init__(self):
+        # Compile regex patterns for efficiency
+        self._compile_patterns()
+
+    def _compile_patterns(self):
+        """Compile regex patterns for keyword matching"""
+        self.vuln_patterns = {}
+        for vuln_type, keywords in self.VULNERABILITY_KEYWORDS.items():
+            pattern = r'\b(' + '|'.join(re.escape(kw) for kw in keywords) + r')\b'
+            self.vuln_patterns[vuln_type] = re.compile(pattern, re.IGNORECASE)
+
+    async def parse(self, prompt: str) -> PromptParseResult:
+        """
+        Parse a prompt to extract testing instructions.
+
+        Args:
+            prompt: User's penetration testing prompt
+
+        Returns:
+            PromptParseResult with extracted vulnerabilities and scope
+        """
+        prompt_lower = prompt.lower()
+
+        # Extract vulnerability types
+        vulnerabilities = self._extract_vulnerabilities(prompt, prompt_lower)
+
+        # If no specific vulnerabilities mentioned but comprehensive keywords found,
+        # add all vulnerabilities
+        if not vulnerabilities:
+            if any(kw in prompt_lower for kw in ["all vulnerabilities", "comprehensive", "full pentest", "everything"]):
+                vulnerabilities = self._get_all_vulnerabilities(prompt)
+
+        # Extract testing scope
+        scope = self._extract_scope(prompt_lower)
+
+        # Extract special instructions
+        special_instructions = self._extract_special_instructions(prompt)
+
+        # Extract target filters
+        target_filters = self._extract_target_filters(prompt)
+
+        # Extract output preferences
+        output_preferences = self._extract_output_preferences(prompt_lower)
+
+        return PromptParseResult(
+            vulnerabilities_to_test=vulnerabilities,
+            testing_scope=scope,
+            special_instructions=special_instructions,
+            target_filters=target_filters,
+            output_preferences=output_preferences
+        )
+
+    def _extract_vulnerabilities(self, prompt: str, prompt_lower: str) -> List[VulnerabilityTypeExtracted]:
+        """Extract vulnerability types from prompt"""
+        vulnerabilities = []
+        found_types = set()
+
+        for vuln_type, pattern in self.vuln_patterns.items():
+            matches = pattern.findall(prompt_lower)
+            if matches:
+                # Calculate confidence based on number of matches and context
+                confidence = min(0.9, 0.5 + len(matches) * 0.1)
+
+                # Get category
+                category = self._get_category(vuln_type)
+
+                # Extract context (surrounding text)
+                context = self._extract_context(prompt, matches[0])
+
+                if vuln_type not in found_types:
+                    found_types.add(vuln_type)
+                    vulnerabilities.append(VulnerabilityTypeExtracted(
+                        type=vuln_type,
+                        category=category,
+                        confidence=confidence,
+                        context=context
+                    ))
+
+        return vulnerabilities
+
+    def _get_all_vulnerabilities(self, prompt: str) -> List[VulnerabilityTypeExtracted]:
+        """Get all vulnerability types for comprehensive testing"""
+        vulnerabilities = []
+        for vuln_type in self.VULNERABILITY_KEYWORDS.keys():
+            category = self._get_category(vuln_type)
+            vulnerabilities.append(VulnerabilityTypeExtracted(
+                type=vuln_type,
+                category=category,
+                confidence=0.7,
+                context="Comprehensive testing requested"
+            ))
+        return vulnerabilities
+
+    def _get_category(self, vuln_type: str) -> str:
+        """Get category for a vulnerability type"""
+        for category, types in self.VULNERABILITY_CATEGORIES.items():
+            if vuln_type in types:
+                return category
+        return "other"
+
+    def _extract_context(self, prompt: str, keyword: str, window: int = 50) -> str:
+        """Extract context around a keyword"""
+        idx = prompt.lower().find(keyword.lower())
+        if idx == -1:
+            return ""
+        start = max(0, idx - window)
+        end = min(len(prompt), idx + len(keyword) + window)
+        return prompt[start:end].strip()
+
+    def _extract_scope(self, prompt_lower: str) -> TestingScope:
+        """Extract testing scope from prompt"""
+        # Determine depth
+        depth = "standard"
+        for level, keywords in self.DEPTH_KEYWORDS.items():
+            if any(kw in prompt_lower for kw in keywords):
+                depth = level
+                break
+
+        # Check for recon
+        include_recon = not any(
+            kw in prompt_lower for kw in ["no recon", "skip recon", "without recon"]
+        )
+
+        # Extract time limits
+        time_limit = None
+        time_match = re.search(r'(\d+)\s*(minute|min|hour|hr)', prompt_lower)
+        if time_match:
+            value = int(time_match.group(1))
+            unit = time_match.group(2)
+            if 'hour' in unit or 'hr' in unit:
+                time_limit = value * 60
+            else:
+                time_limit = value
+
+        # Extract request limits
+        max_requests = None
+        req_match = re.search(r'(\d+)\s*(request|req)', prompt_lower)
+        if req_match:
+            max_requests = int(req_match.group(1))
+
+        return TestingScope(
+            include_recon=include_recon,
+            depth=depth,
+            max_requests_per_endpoint=max_requests,
+            time_limit_minutes=time_limit
+        )
+
+    def _extract_special_instructions(self, prompt: str) -> List[str]:
+        """Extract special instructions from prompt"""
+        instructions = []
+
+        # Look for explicit instructions
+        instruction_patterns = [
+            r'focus on[:\s]+([^.]+)',
+            r'prioritize[:\s]+([^.]+)',
+            r'especially[:\s]+([^.]+)',
+            r'important[:\s]+([^.]+)',
+            r'make sure to[:\s]+([^.]+)',
+            r'don\'t forget to[:\s]+([^.]+)'
+        ]
+
+        for pattern in instruction_patterns:
+            matches = re.findall(pattern, prompt, re.IGNORECASE)
+            instructions.extend(matches)
+
+        return instructions
+
+    def _extract_target_filters(self, prompt: str) -> Dict:
+        """Extract target filtering preferences"""
+        filters = {
+            "include_patterns": [],
+            "exclude_patterns": [],
+            "focus_on_parameters": []
+        }
+
+        # Look for include patterns
+        include_match = re.findall(r'only\s+test\s+([^.]+)', prompt, re.IGNORECASE)
+        if include_match:
+            filters["include_patterns"].extend(include_match)
+
+        # Look for exclude patterns
+        exclude_match = re.findall(r'(?:skip|exclude|ignore)\s+([^.]+)', prompt, re.IGNORECASE)
+        if exclude_match:
+            filters["exclude_patterns"].extend(exclude_match)
+
+        # Look for parameter focus
+        param_match = re.findall(r'parameter[s]?\s+(?:like|named|called)\s+(\w+)', prompt, re.IGNORECASE)
+        if param_match:
+            filters["focus_on_parameters"].extend(param_match)
+
+        return filters
+
+    def _extract_output_preferences(self, prompt_lower: str) -> Dict:
+        """Extract output and reporting preferences"""
+        preferences = {
+            "severity_threshold": "all",
+            "include_poc": True,
+            "include_remediation": True
+        }
+
+        # Severity threshold
+        if "critical only" in prompt_lower or "only critical" in prompt_lower:
+            preferences["severity_threshold"] = "critical"
+        elif "high and above" in prompt_lower or "high severity" in prompt_lower:
+            preferences["severity_threshold"] = "high"
+        elif "medium and above" in prompt_lower:
+            preferences["severity_threshold"] = "medium"
+
+        # PoC preference
+        if "no poc" in prompt_lower or "without poc" in prompt_lower:
+            preferences["include_poc"] = False
+
+        # Remediation preference
+        if "no remediation" in prompt_lower or "without remediation" in prompt_lower:
+            preferences["include_remediation"] = False
+
+        return preferences