mirror of
https://github.com/CyberSecurityUP/NeuroSploit.git
synced 2026-03-20 09:13:37 +00:00
NeuroSploit v3.2 - Autonomous AI Penetration Testing Platform
116 modules | 100 vuln types | 18 API routes | 18 frontend pages Major features: - VulnEngine: 100 vuln types, 526+ payloads, 12 testers, anti-hallucination prompts - Autonomous Agent: 3-stream auto pentest, multi-session (5 concurrent), pause/resume/stop - CLI Agent: Claude Code / Gemini CLI / Codex CLI inside Kali containers - Validation Pipeline: negative controls, proof of execution, confidence scoring, judge - AI Reasoning: ReACT engine, token budget, endpoint classifier, CVE hunter, deep recon - Multi-Agent: 5 specialists + orchestrator + researcher AI + vuln type agents - RAG System: BM25/TF-IDF/ChromaDB vectorstore, few-shot, reasoning templates - Smart Router: 20 providers (8 CLI OAuth + 12 API), tier failover, token refresh - Kali Sandbox: container-per-scan, 56 tools, VPN support, on-demand install - Full IA Testing: methodology-driven comprehensive pentest sessions - Notifications: Discord, Telegram, WhatsApp/Twilio multi-channel alerts - Frontend: React/TypeScript with 18 pages, real-time WebSocket updates
This commit is contained in:
457
backend/core/cli_output_parser.py
Normal file
457
backend/core/cli_output_parser.py
Normal file
@@ -0,0 +1,457 @@
|
||||
"""
|
||||
CLI Output Parser - 3-tier finding extraction from CLI agent output.
|
||||
|
||||
Tier 1: JSON marker blocks (===FINDING_START=== / ===FINDING_END===)
|
||||
Tier 2: Regex patterns for known tool output formats (nuclei, nmap, sqlmap)
|
||||
Tier 3: AI-assisted extraction via LLM for unstructured text
|
||||
"""
|
||||
import json
|
||||
import re
|
||||
import logging
|
||||
from dataclasses import dataclass, field
|
||||
from typing import List, Dict, Optional, Set
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# JSON finding markers used in CLI instructions
|
||||
FINDING_START = "===FINDING_START==="
|
||||
FINDING_END = "===FINDING_END==="
|
||||
|
||||
# Progress markers
|
||||
PHASE_PATTERN = re.compile(r'\[PHASE\]\s*(.+)', re.IGNORECASE)
|
||||
COMPLETE_PATTERN = re.compile(r'\[COMPLETE\]', re.IGNORECASE)
|
||||
PROGRESS_PATTERN = re.compile(r'\[PROGRESS\]\s*(\d+)%?\s*(.*)', re.IGNORECASE)
|
||||
|
||||
# Severity keywords for regex extraction
|
||||
SEVERITY_MAP = {
|
||||
"critical": "critical", "crit": "critical",
|
||||
"high": "high",
|
||||
"medium": "medium", "med": "medium",
|
||||
"low": "low",
|
||||
"info": "info", "informational": "info",
|
||||
}
|
||||
|
||||
# Nuclei JSONL output pattern
|
||||
NUCLEI_JSON_PATTERN = re.compile(r'^\{.*"template-id".*"matched-at".*\}$', re.MULTILINE)
|
||||
|
||||
# Generic vulnerability patterns in CLI output
|
||||
VULN_PATTERNS = [
|
||||
# [VULNERABILITY] Title - Severity
|
||||
re.compile(
|
||||
r'\[(?:VULNERABILITY|VULN|FINDING|ALERT)\]\s*(.+?)(?:\s*[-–]\s*(critical|high|medium|low|info))?$',
|
||||
re.IGNORECASE | re.MULTILINE
|
||||
),
|
||||
# SQLMap style: Parameter 'X' is vulnerable
|
||||
re.compile(
|
||||
r"(?:Parameter|Param)\s+['\"]?(\w+)['\"]?\s+(?:is|appears)\s+(?:vulnerable|injectable)",
|
||||
re.IGNORECASE
|
||||
),
|
||||
# Nuclei text: [severity] [template-id] URL
|
||||
re.compile(
|
||||
r'\[(critical|high|medium|low|info)\]\s*\[([^\]]+)\]\s*(https?://\S+)',
|
||||
re.IGNORECASE
|
||||
),
|
||||
]
|
||||
|
||||
|
||||
@dataclass
|
||||
class ParsedFinding:
|
||||
"""A finding extracted from CLI output."""
|
||||
title: str
|
||||
severity: str = "medium"
|
||||
vulnerability_type: str = ""
|
||||
endpoint: str = ""
|
||||
parameter: str = ""
|
||||
evidence: str = ""
|
||||
poc_code: str = ""
|
||||
request: str = ""
|
||||
response: str = ""
|
||||
impact: str = ""
|
||||
cvss_score: Optional[float] = None
|
||||
source: str = "cli_agent"
|
||||
|
||||
def to_dict(self) -> Dict:
|
||||
d = {
|
||||
"title": self.title,
|
||||
"severity": self.severity,
|
||||
"vulnerability_type": self.vulnerability_type or self._infer_vuln_type(),
|
||||
"affected_endpoint": self.endpoint,
|
||||
"parameter": self.parameter,
|
||||
"evidence": self.evidence,
|
||||
"poc_code": self.poc_code,
|
||||
"request": self.request,
|
||||
"response": self.response,
|
||||
"impact": self.impact,
|
||||
"source": self.source,
|
||||
"ai_status": "confirmed",
|
||||
"ai_verified": True,
|
||||
"confidence_score": 70,
|
||||
}
|
||||
if self.cvss_score:
|
||||
d["cvss_score"] = self.cvss_score
|
||||
return d
|
||||
|
||||
def _infer_vuln_type(self) -> str:
|
||||
"""Infer vulnerability type from title keywords."""
|
||||
title_lower = self.title.lower()
|
||||
type_map = {
|
||||
"sql injection": "sqli_error", "sqli": "sqli_error",
|
||||
"xss": "xss_reflected", "cross-site scripting": "xss_reflected",
|
||||
"stored xss": "xss_stored", "dom xss": "xss_dom",
|
||||
"command injection": "command_injection", "rce": "command_injection",
|
||||
"ssrf": "ssrf", "server-side request": "ssrf",
|
||||
"lfi": "lfi", "local file": "lfi", "path traversal": "path_traversal",
|
||||
"rfi": "rfi", "remote file": "rfi",
|
||||
"xxe": "xxe", "xml external": "xxe",
|
||||
"ssti": "ssti", "template injection": "ssti",
|
||||
"csrf": "csrf", "cross-site request": "csrf",
|
||||
"idor": "idor", "insecure direct": "idor",
|
||||
"open redirect": "open_redirect",
|
||||
"file upload": "file_upload",
|
||||
"directory listing": "directory_listing",
|
||||
"information disclosure": "information_disclosure",
|
||||
"sensitive data": "sensitive_data_exposure",
|
||||
"security header": "security_headers",
|
||||
"ssl": "ssl_issues", "tls": "ssl_issues",
|
||||
"cors": "cors_misconfig",
|
||||
"crlf": "crlf_injection",
|
||||
"nosql": "nosql_injection",
|
||||
"ldap": "ldap_injection",
|
||||
"jwt": "jwt_manipulation",
|
||||
"auth bypass": "auth_bypass",
|
||||
"brute force": "brute_force",
|
||||
"rate limit": "rate_limit_bypass",
|
||||
"clickjacking": "clickjacking",
|
||||
"http smuggling": "http_smuggling",
|
||||
"cache poison": "cache_poisoning",
|
||||
"deserialization": "insecure_deserialization",
|
||||
"prototype pollution": "prototype_pollution",
|
||||
"graphql": "graphql_injection",
|
||||
"host header": "host_header_injection",
|
||||
"race condition": "race_condition",
|
||||
"business logic": "business_logic",
|
||||
}
|
||||
for keyword, vtype in type_map.items():
|
||||
if keyword in title_lower:
|
||||
return vtype
|
||||
return "unknown"
|
||||
|
||||
|
||||
class CLIOutputParser:
|
||||
"""3-tier output parser for CLI agent findings."""
|
||||
|
||||
def __init__(self):
|
||||
self._seen_finding_hashes: Set[str] = set()
|
||||
self._buffer = "" # Accumulates partial JSON blocks across chunks
|
||||
self._unparsed_chunks: List[str] = []
|
||||
self._total_findings = 0
|
||||
self._phases_seen: List[str] = []
|
||||
self._is_complete = False
|
||||
|
||||
def parse_chunk(self, text: str) -> List[ParsedFinding]:
|
||||
"""Parse a chunk of CLI output. Returns newly extracted findings."""
|
||||
if not text or not text.strip():
|
||||
return []
|
||||
|
||||
findings: List[ParsedFinding] = []
|
||||
|
||||
# Track progress markers
|
||||
for m in PHASE_PATTERN.finditer(text):
|
||||
phase = m.group(1).strip()
|
||||
if phase not in self._phases_seen:
|
||||
self._phases_seen.append(phase)
|
||||
logger.info(f"[CLI-PARSER] Phase: {phase}")
|
||||
|
||||
if COMPLETE_PATTERN.search(text):
|
||||
self._is_complete = True
|
||||
|
||||
# Tier 1: JSON marker blocks
|
||||
combined = self._buffer + text
|
||||
tier1 = self._extract_json_markers(combined)
|
||||
findings.extend(tier1)
|
||||
|
||||
# Tier 2: Regex patterns
|
||||
tier2 = self._extract_regex_findings(text)
|
||||
findings.extend(tier2)
|
||||
|
||||
# Tier 2b: Nuclei JSONL
|
||||
tier2b = self._extract_nuclei_jsonl(text)
|
||||
findings.extend(tier2b)
|
||||
|
||||
# Track unparsed text for later AI extraction
|
||||
if not tier1 and not tier2 and not tier2b:
|
||||
if len(text.strip()) > 50:
|
||||
self._unparsed_chunks.append(text)
|
||||
|
||||
# Deduplicate
|
||||
unique = []
|
||||
for f in findings:
|
||||
h = f"{f.title}|{f.endpoint}|{f.severity}"
|
||||
if h not in self._seen_finding_hashes:
|
||||
self._seen_finding_hashes.add(h)
|
||||
unique.append(f)
|
||||
self._total_findings += 1
|
||||
|
||||
return unique
|
||||
|
||||
def get_unparsed_text(self, clear: bool = True) -> str:
|
||||
"""Get accumulated unparsed text for AI extraction."""
|
||||
text = "\n".join(self._unparsed_chunks)
|
||||
if clear:
|
||||
self._unparsed_chunks = []
|
||||
return text
|
||||
|
||||
@property
|
||||
def is_complete(self) -> bool:
|
||||
return self._is_complete
|
||||
|
||||
@property
|
||||
def phases(self) -> List[str]:
|
||||
return self._phases_seen
|
||||
|
||||
@property
|
||||
def total_findings(self) -> int:
|
||||
return self._total_findings
|
||||
|
||||
def _extract_json_markers(self, text: str) -> List[ParsedFinding]:
|
||||
"""Tier 1: Extract findings from ===FINDING_START=== / ===FINDING_END=== blocks."""
|
||||
findings = []
|
||||
remaining_buffer = ""
|
||||
|
||||
# Find all complete blocks
|
||||
parts = text.split(FINDING_START)
|
||||
for i, part in enumerate(parts):
|
||||
if i == 0:
|
||||
continue # Text before first marker
|
||||
|
||||
if FINDING_END in part:
|
||||
json_text, after = part.split(FINDING_END, 1)
|
||||
json_text = json_text.strip()
|
||||
try:
|
||||
data = json.loads(json_text)
|
||||
f = self._json_to_finding(data)
|
||||
if f:
|
||||
findings.append(f)
|
||||
except json.JSONDecodeError:
|
||||
# Try to fix common JSON issues
|
||||
fixed = self._try_fix_json(json_text)
|
||||
if fixed:
|
||||
f = self._json_to_finding(fixed)
|
||||
if f:
|
||||
findings.append(f)
|
||||
else:
|
||||
logger.debug(f"[CLI-PARSER] Invalid JSON in marker block: {json_text[:100]}")
|
||||
else:
|
||||
# Incomplete block - save to buffer for next chunk
|
||||
remaining_buffer = FINDING_START + part
|
||||
|
||||
self._buffer = remaining_buffer
|
||||
return findings
|
||||
|
||||
def _extract_regex_findings(self, text: str) -> List[ParsedFinding]:
|
||||
"""Tier 2: Extract findings using regex patterns."""
|
||||
findings = []
|
||||
|
||||
for pattern in VULN_PATTERNS:
|
||||
for match in pattern.finditer(text):
|
||||
groups = match.groups()
|
||||
if len(groups) >= 1:
|
||||
title = groups[0].strip()
|
||||
severity = "medium"
|
||||
endpoint = ""
|
||||
|
||||
if len(groups) >= 2 and groups[1]:
|
||||
sev = groups[1].lower().strip()
|
||||
severity = SEVERITY_MAP.get(sev, "medium")
|
||||
|
||||
if len(groups) >= 3 and groups[2]:
|
||||
endpoint = groups[2].strip()
|
||||
|
||||
# Skip very short or generic titles
|
||||
if len(title) < 5 or title.lower() in ("n/a", "none", "test"):
|
||||
continue
|
||||
|
||||
findings.append(ParsedFinding(
|
||||
title=title,
|
||||
severity=severity,
|
||||
endpoint=endpoint,
|
||||
evidence=match.group(0),
|
||||
))
|
||||
|
||||
return findings
|
||||
|
||||
def _extract_nuclei_jsonl(self, text: str) -> List[ParsedFinding]:
|
||||
"""Tier 2b: Extract findings from Nuclei JSONL output."""
|
||||
findings = []
|
||||
|
||||
for match in NUCLEI_JSON_PATTERN.finditer(text):
|
||||
try:
|
||||
data = json.loads(match.group(0))
|
||||
template_id = data.get("template-id", "")
|
||||
matched_at = data.get("matched-at", "")
|
||||
info = data.get("info", {})
|
||||
severity = info.get("severity", "medium").lower()
|
||||
name = info.get("name", template_id)
|
||||
description = info.get("description", "")
|
||||
|
||||
findings.append(ParsedFinding(
|
||||
title=f"[Nuclei] {name}",
|
||||
severity=SEVERITY_MAP.get(severity, "medium"),
|
||||
vulnerability_type=self._nuclei_to_vuln_type(template_id),
|
||||
endpoint=matched_at,
|
||||
evidence=f"Template: {template_id}\n{description}",
|
||||
poc_code=f"nuclei -t {template_id} -u {matched_at}",
|
||||
))
|
||||
except json.JSONDecodeError:
|
||||
continue
|
||||
|
||||
return findings
|
||||
|
||||
def _json_to_finding(self, data: Dict) -> Optional[ParsedFinding]:
|
||||
"""Convert a JSON dict to ParsedFinding."""
|
||||
title = data.get("title", "").strip()
|
||||
if not title:
|
||||
return None
|
||||
|
||||
severity = data.get("severity", "medium").lower()
|
||||
severity = SEVERITY_MAP.get(severity, severity)
|
||||
if severity not in ("critical", "high", "medium", "low", "info"):
|
||||
severity = "medium"
|
||||
|
||||
return ParsedFinding(
|
||||
title=title,
|
||||
severity=severity,
|
||||
vulnerability_type=data.get("vulnerability_type", ""),
|
||||
endpoint=data.get("endpoint", data.get("affected_endpoint", "")),
|
||||
parameter=data.get("parameter", ""),
|
||||
evidence=data.get("evidence", ""),
|
||||
poc_code=data.get("poc_code", data.get("poc", "")),
|
||||
request=data.get("request", ""),
|
||||
response=data.get("response", ""),
|
||||
impact=data.get("impact", ""),
|
||||
cvss_score=data.get("cvss_score"),
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _try_fix_json(text: str) -> Optional[Dict]:
|
||||
"""Try to fix common JSON issues."""
|
||||
# Remove trailing commas
|
||||
fixed = re.sub(r',\s*}', '}', text)
|
||||
fixed = re.sub(r',\s*]', ']', fixed)
|
||||
# Try to parse
|
||||
try:
|
||||
return json.loads(fixed)
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
# Try wrapping in braces
|
||||
if not fixed.startswith('{'):
|
||||
try:
|
||||
return json.loads('{' + fixed + '}')
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def _nuclei_to_vuln_type(template_id: str) -> str:
|
||||
"""Map nuclei template ID to vulnerability type."""
|
||||
tid = template_id.lower()
|
||||
mappings = {
|
||||
"sqli": "sqli_error", "sql-injection": "sqli_error",
|
||||
"xss": "xss_reflected", "cross-site-scripting": "xss_reflected",
|
||||
"ssrf": "ssrf", "server-side-request": "ssrf",
|
||||
"lfi": "lfi", "local-file": "lfi",
|
||||
"rfi": "rfi", "remote-file": "rfi",
|
||||
"rce": "command_injection", "command-injection": "command_injection",
|
||||
"ssti": "ssti", "template-injection": "ssti",
|
||||
"xxe": "xxe", "xml-external": "xxe",
|
||||
"redirect": "open_redirect",
|
||||
"cors": "cors_misconfig",
|
||||
"crlf": "crlf_injection",
|
||||
"csrf": "csrf",
|
||||
"header-injection": "header_injection",
|
||||
"directory-listing": "directory_listing",
|
||||
"info-disclosure": "information_disclosure",
|
||||
"exposure": "sensitive_data_exposure",
|
||||
"ssl": "ssl_issues", "tls": "ssl_issues",
|
||||
"default-login": "default_credentials",
|
||||
"misconfig": "security_headers",
|
||||
}
|
||||
for key, vtype in mappings.items():
|
||||
if key in tid:
|
||||
return vtype
|
||||
return "unknown"
|
||||
|
||||
|
||||
# AI-assisted extraction prompt template
|
||||
AI_EXTRACT_PROMPT = """Analyze this penetration testing CLI output and extract any CONFIRMED vulnerability findings.
|
||||
|
||||
IMPORTANT: Only extract findings where there is clear evidence of a vulnerability (error messages,
|
||||
data leakage, successful exploitation). Do NOT extract theoretical or untested issues.
|
||||
|
||||
CLI Output:
|
||||
{output}
|
||||
|
||||
For each confirmed finding, provide:
|
||||
- title: concise vulnerability name
|
||||
- severity: critical|high|medium|low|info
|
||||
- vulnerability_type: e.g., sqli_error, xss_reflected, ssrf, command_injection, etc.
|
||||
- endpoint: the affected URL
|
||||
- parameter: affected parameter (if applicable)
|
||||
- evidence: the actual proof (HTTP response, error, data leaked)
|
||||
- poc_code: the command or request that confirmed it
|
||||
|
||||
Respond ONLY with valid JSON:
|
||||
{{"findings": [{{"title": "...", "severity": "...", "vulnerability_type": "...", "endpoint": "...", "parameter": "...", "evidence": "...", "poc_code": "..."}}]}}
|
||||
|
||||
If no confirmed findings, respond: {{"findings": []}}"""
|
||||
|
||||
|
||||
async def ai_extract_findings(text: str, llm, max_chars: int = 8000) -> List[ParsedFinding]:
|
||||
"""Tier 3: AI-assisted extraction of findings from unstructured CLI output."""
|
||||
if not text or len(text.strip()) < 100:
|
||||
return []
|
||||
|
||||
# Truncate to max_chars
|
||||
if len(text) > max_chars:
|
||||
text = text[:max_chars] + "\n... [truncated]"
|
||||
|
||||
prompt = AI_EXTRACT_PROMPT.format(output=text)
|
||||
|
||||
try:
|
||||
response = await llm.generate(
|
||||
prompt=prompt,
|
||||
system="You are a security finding extractor. Extract only confirmed vulnerabilities with real evidence.",
|
||||
max_tokens=2000,
|
||||
)
|
||||
|
||||
if not response:
|
||||
return []
|
||||
|
||||
# Extract JSON from response
|
||||
json_match = re.search(r'\{.*"findings".*\}', response, re.DOTALL)
|
||||
if not json_match:
|
||||
return []
|
||||
|
||||
data = json.loads(json_match.group(0))
|
||||
findings_data = data.get("findings", [])
|
||||
|
||||
findings = []
|
||||
for fd in findings_data:
|
||||
if not fd.get("title"):
|
||||
continue
|
||||
findings.append(ParsedFinding(
|
||||
title=fd["title"],
|
||||
severity=fd.get("severity", "medium"),
|
||||
vulnerability_type=fd.get("vulnerability_type", ""),
|
||||
endpoint=fd.get("endpoint", ""),
|
||||
parameter=fd.get("parameter", ""),
|
||||
evidence=fd.get("evidence", ""),
|
||||
poc_code=fd.get("poc_code", ""),
|
||||
))
|
||||
|
||||
logger.info(f"[CLI-PARSER] AI extracted {len(findings)} findings")
|
||||
return findings
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"[CLI-PARSER] AI extraction failed: {e}")
|
||||
return []
|
||||
Reference in New Issue
Block a user