NeuroSploit v3.2 - Autonomous AI Penetration Testing Platform

116 modules | 100 vuln types | 18 API routes | 18 frontend pages

Major features:
- VulnEngine: 100 vuln types, 526+ payloads, 12 testers, anti-hallucination prompts
- Autonomous Agent: 3-stream auto pentest, multi-session (5 concurrent), pause/resume/stop
- CLI Agent: Claude Code / Gemini CLI / Codex CLI inside Kali containers
- Validation Pipeline: negative controls, proof of execution, confidence scoring, judge
- AI Reasoning: ReACT engine, token budget, endpoint classifier, CVE hunter, deep recon
- Multi-Agent: 5 specialists + orchestrator + researcher AI + vuln type agents
- RAG System: BM25/TF-IDF/ChromaDB vectorstore, few-shot, reasoning templates
- Smart Router: 20 providers (8 CLI OAuth + 12 API), tier failover, token refresh
- Kali Sandbox: container-per-scan, 56 tools, VPN support, on-demand install
- Full IA Testing: methodology-driven comprehensive pentest sessions
- Notifications: Discord, Telegram, WhatsApp/Twilio multi-channel alerts
- Frontend: React/TypeScript with 18 pages, real-time WebSocket updates
This commit is contained in:
CyberSecurityUP
2026-02-22 17:58:12 -03:00
commit e0935793c5
271 changed files with 132462 additions and 0 deletions

View File

@@ -0,0 +1,457 @@
"""
CLI Output Parser - 3-tier finding extraction from CLI agent output.
Tier 1: JSON marker blocks (===FINDING_START=== / ===FINDING_END===)
Tier 2: Regex patterns for known tool output formats (nuclei, nmap, sqlmap)
Tier 3: AI-assisted extraction via LLM for unstructured text
"""
import json
import re
import logging
from dataclasses import dataclass, field
from typing import List, Dict, Optional, Set
logger = logging.getLogger(__name__)
# JSON finding markers used in CLI instructions
FINDING_START = "===FINDING_START==="
FINDING_END = "===FINDING_END==="
# Progress markers
PHASE_PATTERN = re.compile(r'\[PHASE\]\s*(.+)', re.IGNORECASE)
COMPLETE_PATTERN = re.compile(r'\[COMPLETE\]', re.IGNORECASE)
PROGRESS_PATTERN = re.compile(r'\[PROGRESS\]\s*(\d+)%?\s*(.*)', re.IGNORECASE)
# Severity keywords for regex extraction
SEVERITY_MAP = {
"critical": "critical", "crit": "critical",
"high": "high",
"medium": "medium", "med": "medium",
"low": "low",
"info": "info", "informational": "info",
}
# Nuclei JSONL output pattern
NUCLEI_JSON_PATTERN = re.compile(r'^\{.*"template-id".*"matched-at".*\}$', re.MULTILINE)
# Generic vulnerability patterns in CLI output
VULN_PATTERNS = [
# [VULNERABILITY] Title - Severity
re.compile(
r'\[(?:VULNERABILITY|VULN|FINDING|ALERT)\]\s*(.+?)(?:\s*[-]\s*(critical|high|medium|low|info))?$',
re.IGNORECASE | re.MULTILINE
),
# SQLMap style: Parameter 'X' is vulnerable
re.compile(
r"(?:Parameter|Param)\s+['\"]?(\w+)['\"]?\s+(?:is|appears)\s+(?:vulnerable|injectable)",
re.IGNORECASE
),
# Nuclei text: [severity] [template-id] URL
re.compile(
r'\[(critical|high|medium|low|info)\]\s*\[([^\]]+)\]\s*(https?://\S+)',
re.IGNORECASE
),
]
@dataclass
class ParsedFinding:
"""A finding extracted from CLI output."""
title: str
severity: str = "medium"
vulnerability_type: str = ""
endpoint: str = ""
parameter: str = ""
evidence: str = ""
poc_code: str = ""
request: str = ""
response: str = ""
impact: str = ""
cvss_score: Optional[float] = None
source: str = "cli_agent"
def to_dict(self) -> Dict:
d = {
"title": self.title,
"severity": self.severity,
"vulnerability_type": self.vulnerability_type or self._infer_vuln_type(),
"affected_endpoint": self.endpoint,
"parameter": self.parameter,
"evidence": self.evidence,
"poc_code": self.poc_code,
"request": self.request,
"response": self.response,
"impact": self.impact,
"source": self.source,
"ai_status": "confirmed",
"ai_verified": True,
"confidence_score": 70,
}
if self.cvss_score:
d["cvss_score"] = self.cvss_score
return d
def _infer_vuln_type(self) -> str:
"""Infer vulnerability type from title keywords."""
title_lower = self.title.lower()
type_map = {
"sql injection": "sqli_error", "sqli": "sqli_error",
"xss": "xss_reflected", "cross-site scripting": "xss_reflected",
"stored xss": "xss_stored", "dom xss": "xss_dom",
"command injection": "command_injection", "rce": "command_injection",
"ssrf": "ssrf", "server-side request": "ssrf",
"lfi": "lfi", "local file": "lfi", "path traversal": "path_traversal",
"rfi": "rfi", "remote file": "rfi",
"xxe": "xxe", "xml external": "xxe",
"ssti": "ssti", "template injection": "ssti",
"csrf": "csrf", "cross-site request": "csrf",
"idor": "idor", "insecure direct": "idor",
"open redirect": "open_redirect",
"file upload": "file_upload",
"directory listing": "directory_listing",
"information disclosure": "information_disclosure",
"sensitive data": "sensitive_data_exposure",
"security header": "security_headers",
"ssl": "ssl_issues", "tls": "ssl_issues",
"cors": "cors_misconfig",
"crlf": "crlf_injection",
"nosql": "nosql_injection",
"ldap": "ldap_injection",
"jwt": "jwt_manipulation",
"auth bypass": "auth_bypass",
"brute force": "brute_force",
"rate limit": "rate_limit_bypass",
"clickjacking": "clickjacking",
"http smuggling": "http_smuggling",
"cache poison": "cache_poisoning",
"deserialization": "insecure_deserialization",
"prototype pollution": "prototype_pollution",
"graphql": "graphql_injection",
"host header": "host_header_injection",
"race condition": "race_condition",
"business logic": "business_logic",
}
for keyword, vtype in type_map.items():
if keyword in title_lower:
return vtype
return "unknown"
class CLIOutputParser:
"""3-tier output parser for CLI agent findings."""
def __init__(self):
self._seen_finding_hashes: Set[str] = set()
self._buffer = "" # Accumulates partial JSON blocks across chunks
self._unparsed_chunks: List[str] = []
self._total_findings = 0
self._phases_seen: List[str] = []
self._is_complete = False
def parse_chunk(self, text: str) -> List[ParsedFinding]:
"""Parse a chunk of CLI output. Returns newly extracted findings."""
if not text or not text.strip():
return []
findings: List[ParsedFinding] = []
# Track progress markers
for m in PHASE_PATTERN.finditer(text):
phase = m.group(1).strip()
if phase not in self._phases_seen:
self._phases_seen.append(phase)
logger.info(f"[CLI-PARSER] Phase: {phase}")
if COMPLETE_PATTERN.search(text):
self._is_complete = True
# Tier 1: JSON marker blocks
combined = self._buffer + text
tier1 = self._extract_json_markers(combined)
findings.extend(tier1)
# Tier 2: Regex patterns
tier2 = self._extract_regex_findings(text)
findings.extend(tier2)
# Tier 2b: Nuclei JSONL
tier2b = self._extract_nuclei_jsonl(text)
findings.extend(tier2b)
# Track unparsed text for later AI extraction
if not tier1 and not tier2 and not tier2b:
if len(text.strip()) > 50:
self._unparsed_chunks.append(text)
# Deduplicate
unique = []
for f in findings:
h = f"{f.title}|{f.endpoint}|{f.severity}"
if h not in self._seen_finding_hashes:
self._seen_finding_hashes.add(h)
unique.append(f)
self._total_findings += 1
return unique
def get_unparsed_text(self, clear: bool = True) -> str:
"""Get accumulated unparsed text for AI extraction."""
text = "\n".join(self._unparsed_chunks)
if clear:
self._unparsed_chunks = []
return text
@property
def is_complete(self) -> bool:
return self._is_complete
@property
def phases(self) -> List[str]:
return self._phases_seen
@property
def total_findings(self) -> int:
return self._total_findings
def _extract_json_markers(self, text: str) -> List[ParsedFinding]:
"""Tier 1: Extract findings from ===FINDING_START=== / ===FINDING_END=== blocks."""
findings = []
remaining_buffer = ""
# Find all complete blocks
parts = text.split(FINDING_START)
for i, part in enumerate(parts):
if i == 0:
continue # Text before first marker
if FINDING_END in part:
json_text, after = part.split(FINDING_END, 1)
json_text = json_text.strip()
try:
data = json.loads(json_text)
f = self._json_to_finding(data)
if f:
findings.append(f)
except json.JSONDecodeError:
# Try to fix common JSON issues
fixed = self._try_fix_json(json_text)
if fixed:
f = self._json_to_finding(fixed)
if f:
findings.append(f)
else:
logger.debug(f"[CLI-PARSER] Invalid JSON in marker block: {json_text[:100]}")
else:
# Incomplete block - save to buffer for next chunk
remaining_buffer = FINDING_START + part
self._buffer = remaining_buffer
return findings
def _extract_regex_findings(self, text: str) -> List[ParsedFinding]:
"""Tier 2: Extract findings using regex patterns."""
findings = []
for pattern in VULN_PATTERNS:
for match in pattern.finditer(text):
groups = match.groups()
if len(groups) >= 1:
title = groups[0].strip()
severity = "medium"
endpoint = ""
if len(groups) >= 2 and groups[1]:
sev = groups[1].lower().strip()
severity = SEVERITY_MAP.get(sev, "medium")
if len(groups) >= 3 and groups[2]:
endpoint = groups[2].strip()
# Skip very short or generic titles
if len(title) < 5 or title.lower() in ("n/a", "none", "test"):
continue
findings.append(ParsedFinding(
title=title,
severity=severity,
endpoint=endpoint,
evidence=match.group(0),
))
return findings
def _extract_nuclei_jsonl(self, text: str) -> List[ParsedFinding]:
"""Tier 2b: Extract findings from Nuclei JSONL output."""
findings = []
for match in NUCLEI_JSON_PATTERN.finditer(text):
try:
data = json.loads(match.group(0))
template_id = data.get("template-id", "")
matched_at = data.get("matched-at", "")
info = data.get("info", {})
severity = info.get("severity", "medium").lower()
name = info.get("name", template_id)
description = info.get("description", "")
findings.append(ParsedFinding(
title=f"[Nuclei] {name}",
severity=SEVERITY_MAP.get(severity, "medium"),
vulnerability_type=self._nuclei_to_vuln_type(template_id),
endpoint=matched_at,
evidence=f"Template: {template_id}\n{description}",
poc_code=f"nuclei -t {template_id} -u {matched_at}",
))
except json.JSONDecodeError:
continue
return findings
def _json_to_finding(self, data: Dict) -> Optional[ParsedFinding]:
"""Convert a JSON dict to ParsedFinding."""
title = data.get("title", "").strip()
if not title:
return None
severity = data.get("severity", "medium").lower()
severity = SEVERITY_MAP.get(severity, severity)
if severity not in ("critical", "high", "medium", "low", "info"):
severity = "medium"
return ParsedFinding(
title=title,
severity=severity,
vulnerability_type=data.get("vulnerability_type", ""),
endpoint=data.get("endpoint", data.get("affected_endpoint", "")),
parameter=data.get("parameter", ""),
evidence=data.get("evidence", ""),
poc_code=data.get("poc_code", data.get("poc", "")),
request=data.get("request", ""),
response=data.get("response", ""),
impact=data.get("impact", ""),
cvss_score=data.get("cvss_score"),
)
@staticmethod
def _try_fix_json(text: str) -> Optional[Dict]:
"""Try to fix common JSON issues."""
# Remove trailing commas
fixed = re.sub(r',\s*}', '}', text)
fixed = re.sub(r',\s*]', ']', fixed)
# Try to parse
try:
return json.loads(fixed)
except json.JSONDecodeError:
pass
# Try wrapping in braces
if not fixed.startswith('{'):
try:
return json.loads('{' + fixed + '}')
except json.JSONDecodeError:
pass
return None
@staticmethod
def _nuclei_to_vuln_type(template_id: str) -> str:
"""Map nuclei template ID to vulnerability type."""
tid = template_id.lower()
mappings = {
"sqli": "sqli_error", "sql-injection": "sqli_error",
"xss": "xss_reflected", "cross-site-scripting": "xss_reflected",
"ssrf": "ssrf", "server-side-request": "ssrf",
"lfi": "lfi", "local-file": "lfi",
"rfi": "rfi", "remote-file": "rfi",
"rce": "command_injection", "command-injection": "command_injection",
"ssti": "ssti", "template-injection": "ssti",
"xxe": "xxe", "xml-external": "xxe",
"redirect": "open_redirect",
"cors": "cors_misconfig",
"crlf": "crlf_injection",
"csrf": "csrf",
"header-injection": "header_injection",
"directory-listing": "directory_listing",
"info-disclosure": "information_disclosure",
"exposure": "sensitive_data_exposure",
"ssl": "ssl_issues", "tls": "ssl_issues",
"default-login": "default_credentials",
"misconfig": "security_headers",
}
for key, vtype in mappings.items():
if key in tid:
return vtype
return "unknown"
# AI-assisted extraction prompt template
AI_EXTRACT_PROMPT = """Analyze this penetration testing CLI output and extract any CONFIRMED vulnerability findings.
IMPORTANT: Only extract findings where there is clear evidence of a vulnerability (error messages,
data leakage, successful exploitation). Do NOT extract theoretical or untested issues.
CLI Output:
{output}
For each confirmed finding, provide:
- title: concise vulnerability name
- severity: critical|high|medium|low|info
- vulnerability_type: e.g., sqli_error, xss_reflected, ssrf, command_injection, etc.
- endpoint: the affected URL
- parameter: affected parameter (if applicable)
- evidence: the actual proof (HTTP response, error, data leaked)
- poc_code: the command or request that confirmed it
Respond ONLY with valid JSON:
{{"findings": [{{"title": "...", "severity": "...", "vulnerability_type": "...", "endpoint": "...", "parameter": "...", "evidence": "...", "poc_code": "..."}}]}}
If no confirmed findings, respond: {{"findings": []}}"""
async def ai_extract_findings(text: str, llm, max_chars: int = 8000) -> List[ParsedFinding]:
"""Tier 3: AI-assisted extraction of findings from unstructured CLI output."""
if not text or len(text.strip()) < 100:
return []
# Truncate to max_chars
if len(text) > max_chars:
text = text[:max_chars] + "\n... [truncated]"
prompt = AI_EXTRACT_PROMPT.format(output=text)
try:
response = await llm.generate(
prompt=prompt,
system="You are a security finding extractor. Extract only confirmed vulnerabilities with real evidence.",
max_tokens=2000,
)
if not response:
return []
# Extract JSON from response
json_match = re.search(r'\{.*"findings".*\}', response, re.DOTALL)
if not json_match:
return []
data = json.loads(json_match.group(0))
findings_data = data.get("findings", [])
findings = []
for fd in findings_data:
if not fd.get("title"):
continue
findings.append(ParsedFinding(
title=fd["title"],
severity=fd.get("severity", "medium"),
vulnerability_type=fd.get("vulnerability_type", ""),
endpoint=fd.get("endpoint", ""),
parameter=fd.get("parameter", ""),
evidence=fd.get("evidence", ""),
poc_code=fd.get("poc_code", ""),
))
logger.info(f"[CLI-PARSER] AI extracted {len(findings)} findings")
return findings
except Exception as e:
logger.warning(f"[CLI-PARSER] AI extraction failed: {e}")
return []