mirror of
https://github.com/CyberSecurityUP/NeuroSploit.git
synced 2026-03-20 09:13:37 +00:00
116 modules | 100 vuln types | 18 API routes | 18 frontend pages Major features: - VulnEngine: 100 vuln types, 526+ payloads, 12 testers, anti-hallucination prompts - Autonomous Agent: 3-stream auto pentest, multi-session (5 concurrent), pause/resume/stop - CLI Agent: Claude Code / Gemini CLI / Codex CLI inside Kali containers - Validation Pipeline: negative controls, proof of execution, confidence scoring, judge - AI Reasoning: ReACT engine, token budget, endpoint classifier, CVE hunter, deep recon - Multi-Agent: 5 specialists + orchestrator + researcher AI + vuln type agents - RAG System: BM25/TF-IDF/ChromaDB vectorstore, few-shot, reasoning templates - Smart Router: 20 providers (8 CLI OAuth + 12 API), tier failover, token refresh - Kali Sandbox: container-per-scan, 56 tools, VPN support, on-demand install - Full IA Testing: methodology-driven comprehensive pentest sessions - Notifications: Discord, Telegram, WhatsApp/Twilio multi-channel alerts - Frontend: React/TypeScript with 18 pages, real-time WebSocket updates
458 lines
16 KiB
Python
458 lines
16 KiB
Python
"""
|
||
CLI Output Parser - 3-tier finding extraction from CLI agent output.
|
||
|
||
Tier 1: JSON marker blocks (===FINDING_START=== / ===FINDING_END===)
|
||
Tier 2: Regex patterns for known tool output formats (nuclei, nmap, sqlmap)
|
||
Tier 3: AI-assisted extraction via LLM for unstructured text
|
||
"""
|
||
import json
|
||
import re
|
||
import logging
|
||
from dataclasses import dataclass, field
|
||
from typing import List, Dict, Optional, Set
|
||
|
||
logger = logging.getLogger(__name__)
|
||
|
||
# JSON finding markers used in CLI instructions
|
||
FINDING_START = "===FINDING_START==="
|
||
FINDING_END = "===FINDING_END==="
|
||
|
||
# Progress markers
|
||
PHASE_PATTERN = re.compile(r'\[PHASE\]\s*(.+)', re.IGNORECASE)
|
||
COMPLETE_PATTERN = re.compile(r'\[COMPLETE\]', re.IGNORECASE)
|
||
PROGRESS_PATTERN = re.compile(r'\[PROGRESS\]\s*(\d+)%?\s*(.*)', re.IGNORECASE)
|
||
|
||
# Severity keywords for regex extraction
|
||
SEVERITY_MAP = {
|
||
"critical": "critical", "crit": "critical",
|
||
"high": "high",
|
||
"medium": "medium", "med": "medium",
|
||
"low": "low",
|
||
"info": "info", "informational": "info",
|
||
}
|
||
|
||
# Nuclei JSONL output pattern
|
||
NUCLEI_JSON_PATTERN = re.compile(r'^\{.*"template-id".*"matched-at".*\}$', re.MULTILINE)
|
||
|
||
# Generic vulnerability patterns in CLI output
|
||
VULN_PATTERNS = [
|
||
# [VULNERABILITY] Title - Severity
|
||
re.compile(
|
||
r'\[(?:VULNERABILITY|VULN|FINDING|ALERT)\]\s*(.+?)(?:\s*[-–]\s*(critical|high|medium|low|info))?$',
|
||
re.IGNORECASE | re.MULTILINE
|
||
),
|
||
# SQLMap style: Parameter 'X' is vulnerable
|
||
re.compile(
|
||
r"(?:Parameter|Param)\s+['\"]?(\w+)['\"]?\s+(?:is|appears)\s+(?:vulnerable|injectable)",
|
||
re.IGNORECASE
|
||
),
|
||
# Nuclei text: [severity] [template-id] URL
|
||
re.compile(
|
||
r'\[(critical|high|medium|low|info)\]\s*\[([^\]]+)\]\s*(https?://\S+)',
|
||
re.IGNORECASE
|
||
),
|
||
]
|
||
|
||
|
||
@dataclass
|
||
class ParsedFinding:
|
||
"""A finding extracted from CLI output."""
|
||
title: str
|
||
severity: str = "medium"
|
||
vulnerability_type: str = ""
|
||
endpoint: str = ""
|
||
parameter: str = ""
|
||
evidence: str = ""
|
||
poc_code: str = ""
|
||
request: str = ""
|
||
response: str = ""
|
||
impact: str = ""
|
||
cvss_score: Optional[float] = None
|
||
source: str = "cli_agent"
|
||
|
||
def to_dict(self) -> Dict:
|
||
d = {
|
||
"title": self.title,
|
||
"severity": self.severity,
|
||
"vulnerability_type": self.vulnerability_type or self._infer_vuln_type(),
|
||
"affected_endpoint": self.endpoint,
|
||
"parameter": self.parameter,
|
||
"evidence": self.evidence,
|
||
"poc_code": self.poc_code,
|
||
"request": self.request,
|
||
"response": self.response,
|
||
"impact": self.impact,
|
||
"source": self.source,
|
||
"ai_status": "confirmed",
|
||
"ai_verified": True,
|
||
"confidence_score": 70,
|
||
}
|
||
if self.cvss_score:
|
||
d["cvss_score"] = self.cvss_score
|
||
return d
|
||
|
||
def _infer_vuln_type(self) -> str:
|
||
"""Infer vulnerability type from title keywords."""
|
||
title_lower = self.title.lower()
|
||
type_map = {
|
||
"sql injection": "sqli_error", "sqli": "sqli_error",
|
||
"xss": "xss_reflected", "cross-site scripting": "xss_reflected",
|
||
"stored xss": "xss_stored", "dom xss": "xss_dom",
|
||
"command injection": "command_injection", "rce": "command_injection",
|
||
"ssrf": "ssrf", "server-side request": "ssrf",
|
||
"lfi": "lfi", "local file": "lfi", "path traversal": "path_traversal",
|
||
"rfi": "rfi", "remote file": "rfi",
|
||
"xxe": "xxe", "xml external": "xxe",
|
||
"ssti": "ssti", "template injection": "ssti",
|
||
"csrf": "csrf", "cross-site request": "csrf",
|
||
"idor": "idor", "insecure direct": "idor",
|
||
"open redirect": "open_redirect",
|
||
"file upload": "file_upload",
|
||
"directory listing": "directory_listing",
|
||
"information disclosure": "information_disclosure",
|
||
"sensitive data": "sensitive_data_exposure",
|
||
"security header": "security_headers",
|
||
"ssl": "ssl_issues", "tls": "ssl_issues",
|
||
"cors": "cors_misconfig",
|
||
"crlf": "crlf_injection",
|
||
"nosql": "nosql_injection",
|
||
"ldap": "ldap_injection",
|
||
"jwt": "jwt_manipulation",
|
||
"auth bypass": "auth_bypass",
|
||
"brute force": "brute_force",
|
||
"rate limit": "rate_limit_bypass",
|
||
"clickjacking": "clickjacking",
|
||
"http smuggling": "http_smuggling",
|
||
"cache poison": "cache_poisoning",
|
||
"deserialization": "insecure_deserialization",
|
||
"prototype pollution": "prototype_pollution",
|
||
"graphql": "graphql_injection",
|
||
"host header": "host_header_injection",
|
||
"race condition": "race_condition",
|
||
"business logic": "business_logic",
|
||
}
|
||
for keyword, vtype in type_map.items():
|
||
if keyword in title_lower:
|
||
return vtype
|
||
return "unknown"
|
||
|
||
|
||
class CLIOutputParser:
|
||
"""3-tier output parser for CLI agent findings."""
|
||
|
||
def __init__(self):
|
||
self._seen_finding_hashes: Set[str] = set()
|
||
self._buffer = "" # Accumulates partial JSON blocks across chunks
|
||
self._unparsed_chunks: List[str] = []
|
||
self._total_findings = 0
|
||
self._phases_seen: List[str] = []
|
||
self._is_complete = False
|
||
|
||
def parse_chunk(self, text: str) -> List[ParsedFinding]:
|
||
"""Parse a chunk of CLI output. Returns newly extracted findings."""
|
||
if not text or not text.strip():
|
||
return []
|
||
|
||
findings: List[ParsedFinding] = []
|
||
|
||
# Track progress markers
|
||
for m in PHASE_PATTERN.finditer(text):
|
||
phase = m.group(1).strip()
|
||
if phase not in self._phases_seen:
|
||
self._phases_seen.append(phase)
|
||
logger.info(f"[CLI-PARSER] Phase: {phase}")
|
||
|
||
if COMPLETE_PATTERN.search(text):
|
||
self._is_complete = True
|
||
|
||
# Tier 1: JSON marker blocks
|
||
combined = self._buffer + text
|
||
tier1 = self._extract_json_markers(combined)
|
||
findings.extend(tier1)
|
||
|
||
# Tier 2: Regex patterns
|
||
tier2 = self._extract_regex_findings(text)
|
||
findings.extend(tier2)
|
||
|
||
# Tier 2b: Nuclei JSONL
|
||
tier2b = self._extract_nuclei_jsonl(text)
|
||
findings.extend(tier2b)
|
||
|
||
# Track unparsed text for later AI extraction
|
||
if not tier1 and not tier2 and not tier2b:
|
||
if len(text.strip()) > 50:
|
||
self._unparsed_chunks.append(text)
|
||
|
||
# Deduplicate
|
||
unique = []
|
||
for f in findings:
|
||
h = f"{f.title}|{f.endpoint}|{f.severity}"
|
||
if h not in self._seen_finding_hashes:
|
||
self._seen_finding_hashes.add(h)
|
||
unique.append(f)
|
||
self._total_findings += 1
|
||
|
||
return unique
|
||
|
||
def get_unparsed_text(self, clear: bool = True) -> str:
|
||
"""Get accumulated unparsed text for AI extraction."""
|
||
text = "\n".join(self._unparsed_chunks)
|
||
if clear:
|
||
self._unparsed_chunks = []
|
||
return text
|
||
|
||
@property
|
||
def is_complete(self) -> bool:
|
||
return self._is_complete
|
||
|
||
@property
|
||
def phases(self) -> List[str]:
|
||
return self._phases_seen
|
||
|
||
@property
|
||
def total_findings(self) -> int:
|
||
return self._total_findings
|
||
|
||
def _extract_json_markers(self, text: str) -> List[ParsedFinding]:
|
||
"""Tier 1: Extract findings from ===FINDING_START=== / ===FINDING_END=== blocks."""
|
||
findings = []
|
||
remaining_buffer = ""
|
||
|
||
# Find all complete blocks
|
||
parts = text.split(FINDING_START)
|
||
for i, part in enumerate(parts):
|
||
if i == 0:
|
||
continue # Text before first marker
|
||
|
||
if FINDING_END in part:
|
||
json_text, after = part.split(FINDING_END, 1)
|
||
json_text = json_text.strip()
|
||
try:
|
||
data = json.loads(json_text)
|
||
f = self._json_to_finding(data)
|
||
if f:
|
||
findings.append(f)
|
||
except json.JSONDecodeError:
|
||
# Try to fix common JSON issues
|
||
fixed = self._try_fix_json(json_text)
|
||
if fixed:
|
||
f = self._json_to_finding(fixed)
|
||
if f:
|
||
findings.append(f)
|
||
else:
|
||
logger.debug(f"[CLI-PARSER] Invalid JSON in marker block: {json_text[:100]}")
|
||
else:
|
||
# Incomplete block - save to buffer for next chunk
|
||
remaining_buffer = FINDING_START + part
|
||
|
||
self._buffer = remaining_buffer
|
||
return findings
|
||
|
||
def _extract_regex_findings(self, text: str) -> List[ParsedFinding]:
|
||
"""Tier 2: Extract findings using regex patterns."""
|
||
findings = []
|
||
|
||
for pattern in VULN_PATTERNS:
|
||
for match in pattern.finditer(text):
|
||
groups = match.groups()
|
||
if len(groups) >= 1:
|
||
title = groups[0].strip()
|
||
severity = "medium"
|
||
endpoint = ""
|
||
|
||
if len(groups) >= 2 and groups[1]:
|
||
sev = groups[1].lower().strip()
|
||
severity = SEVERITY_MAP.get(sev, "medium")
|
||
|
||
if len(groups) >= 3 and groups[2]:
|
||
endpoint = groups[2].strip()
|
||
|
||
# Skip very short or generic titles
|
||
if len(title) < 5 or title.lower() in ("n/a", "none", "test"):
|
||
continue
|
||
|
||
findings.append(ParsedFinding(
|
||
title=title,
|
||
severity=severity,
|
||
endpoint=endpoint,
|
||
evidence=match.group(0),
|
||
))
|
||
|
||
return findings
|
||
|
||
def _extract_nuclei_jsonl(self, text: str) -> List[ParsedFinding]:
|
||
"""Tier 2b: Extract findings from Nuclei JSONL output."""
|
||
findings = []
|
||
|
||
for match in NUCLEI_JSON_PATTERN.finditer(text):
|
||
try:
|
||
data = json.loads(match.group(0))
|
||
template_id = data.get("template-id", "")
|
||
matched_at = data.get("matched-at", "")
|
||
info = data.get("info", {})
|
||
severity = info.get("severity", "medium").lower()
|
||
name = info.get("name", template_id)
|
||
description = info.get("description", "")
|
||
|
||
findings.append(ParsedFinding(
|
||
title=f"[Nuclei] {name}",
|
||
severity=SEVERITY_MAP.get(severity, "medium"),
|
||
vulnerability_type=self._nuclei_to_vuln_type(template_id),
|
||
endpoint=matched_at,
|
||
evidence=f"Template: {template_id}\n{description}",
|
||
poc_code=f"nuclei -t {template_id} -u {matched_at}",
|
||
))
|
||
except json.JSONDecodeError:
|
||
continue
|
||
|
||
return findings
|
||
|
||
def _json_to_finding(self, data: Dict) -> Optional[ParsedFinding]:
|
||
"""Convert a JSON dict to ParsedFinding."""
|
||
title = data.get("title", "").strip()
|
||
if not title:
|
||
return None
|
||
|
||
severity = data.get("severity", "medium").lower()
|
||
severity = SEVERITY_MAP.get(severity, severity)
|
||
if severity not in ("critical", "high", "medium", "low", "info"):
|
||
severity = "medium"
|
||
|
||
return ParsedFinding(
|
||
title=title,
|
||
severity=severity,
|
||
vulnerability_type=data.get("vulnerability_type", ""),
|
||
endpoint=data.get("endpoint", data.get("affected_endpoint", "")),
|
||
parameter=data.get("parameter", ""),
|
||
evidence=data.get("evidence", ""),
|
||
poc_code=data.get("poc_code", data.get("poc", "")),
|
||
request=data.get("request", ""),
|
||
response=data.get("response", ""),
|
||
impact=data.get("impact", ""),
|
||
cvss_score=data.get("cvss_score"),
|
||
)
|
||
|
||
@staticmethod
|
||
def _try_fix_json(text: str) -> Optional[Dict]:
|
||
"""Try to fix common JSON issues."""
|
||
# Remove trailing commas
|
||
fixed = re.sub(r',\s*}', '}', text)
|
||
fixed = re.sub(r',\s*]', ']', fixed)
|
||
# Try to parse
|
||
try:
|
||
return json.loads(fixed)
|
||
except json.JSONDecodeError:
|
||
pass
|
||
# Try wrapping in braces
|
||
if not fixed.startswith('{'):
|
||
try:
|
||
return json.loads('{' + fixed + '}')
|
||
except json.JSONDecodeError:
|
||
pass
|
||
return None
|
||
|
||
@staticmethod
|
||
def _nuclei_to_vuln_type(template_id: str) -> str:
|
||
"""Map nuclei template ID to vulnerability type."""
|
||
tid = template_id.lower()
|
||
mappings = {
|
||
"sqli": "sqli_error", "sql-injection": "sqli_error",
|
||
"xss": "xss_reflected", "cross-site-scripting": "xss_reflected",
|
||
"ssrf": "ssrf", "server-side-request": "ssrf",
|
||
"lfi": "lfi", "local-file": "lfi",
|
||
"rfi": "rfi", "remote-file": "rfi",
|
||
"rce": "command_injection", "command-injection": "command_injection",
|
||
"ssti": "ssti", "template-injection": "ssti",
|
||
"xxe": "xxe", "xml-external": "xxe",
|
||
"redirect": "open_redirect",
|
||
"cors": "cors_misconfig",
|
||
"crlf": "crlf_injection",
|
||
"csrf": "csrf",
|
||
"header-injection": "header_injection",
|
||
"directory-listing": "directory_listing",
|
||
"info-disclosure": "information_disclosure",
|
||
"exposure": "sensitive_data_exposure",
|
||
"ssl": "ssl_issues", "tls": "ssl_issues",
|
||
"default-login": "default_credentials",
|
||
"misconfig": "security_headers",
|
||
}
|
||
for key, vtype in mappings.items():
|
||
if key in tid:
|
||
return vtype
|
||
return "unknown"
|
||
|
||
|
||
# AI-assisted extraction prompt template
|
||
AI_EXTRACT_PROMPT = """Analyze this penetration testing CLI output and extract any CONFIRMED vulnerability findings.
|
||
|
||
IMPORTANT: Only extract findings where there is clear evidence of a vulnerability (error messages,
|
||
data leakage, successful exploitation). Do NOT extract theoretical or untested issues.
|
||
|
||
CLI Output:
|
||
{output}
|
||
|
||
For each confirmed finding, provide:
|
||
- title: concise vulnerability name
|
||
- severity: critical|high|medium|low|info
|
||
- vulnerability_type: e.g., sqli_error, xss_reflected, ssrf, command_injection, etc.
|
||
- endpoint: the affected URL
|
||
- parameter: affected parameter (if applicable)
|
||
- evidence: the actual proof (HTTP response, error, data leaked)
|
||
- poc_code: the command or request that confirmed it
|
||
|
||
Respond ONLY with valid JSON:
|
||
{{"findings": [{{"title": "...", "severity": "...", "vulnerability_type": "...", "endpoint": "...", "parameter": "...", "evidence": "...", "poc_code": "..."}}]}}
|
||
|
||
If no confirmed findings, respond: {{"findings": []}}"""
|
||
|
||
|
||
async def ai_extract_findings(text: str, llm, max_chars: int = 8000) -> List[ParsedFinding]:
|
||
"""Tier 3: AI-assisted extraction of findings from unstructured CLI output."""
|
||
if not text or len(text.strip()) < 100:
|
||
return []
|
||
|
||
# Truncate to max_chars
|
||
if len(text) > max_chars:
|
||
text = text[:max_chars] + "\n... [truncated]"
|
||
|
||
prompt = AI_EXTRACT_PROMPT.format(output=text)
|
||
|
||
try:
|
||
response = await llm.generate(
|
||
prompt=prompt,
|
||
system="You are a security finding extractor. Extract only confirmed vulnerabilities with real evidence.",
|
||
max_tokens=2000,
|
||
)
|
||
|
||
if not response:
|
||
return []
|
||
|
||
# Extract JSON from response
|
||
json_match = re.search(r'\{.*"findings".*\}', response, re.DOTALL)
|
||
if not json_match:
|
||
return []
|
||
|
||
data = json.loads(json_match.group(0))
|
||
findings_data = data.get("findings", [])
|
||
|
||
findings = []
|
||
for fd in findings_data:
|
||
if not fd.get("title"):
|
||
continue
|
||
findings.append(ParsedFinding(
|
||
title=fd["title"],
|
||
severity=fd.get("severity", "medium"),
|
||
vulnerability_type=fd.get("vulnerability_type", ""),
|
||
endpoint=fd.get("endpoint", ""),
|
||
parameter=fd.get("parameter", ""),
|
||
evidence=fd.get("evidence", ""),
|
||
poc_code=fd.get("poc_code", ""),
|
||
))
|
||
|
||
logger.info(f"[CLI-PARSER] AI extracted {len(findings)} findings")
|
||
return findings
|
||
|
||
except Exception as e:
|
||
logger.warning(f"[CLI-PARSER] AI extraction failed: {e}")
|
||
return []
|