NeuroSploit v3.2 - Autonomous AI Penetration Testing Platform

116 modules | 100 vuln types | 18 API routes | 18 frontend pages

Major features:
- VulnEngine: 100 vuln types, 526+ payloads, 12 testers, anti-hallucination prompts
- Autonomous Agent: 3-stream auto pentest, multi-session (5 concurrent), pause/resume/stop
- CLI Agent: Claude Code / Gemini CLI / Codex CLI inside Kali containers
- Validation Pipeline: negative controls, proof of execution, confidence scoring, judge
- AI Reasoning: ReACT engine, token budget, endpoint classifier, CVE hunter, deep recon
- Multi-Agent: 5 specialists + orchestrator + researcher AI + vuln type agents
- RAG System: BM25/TF-IDF/ChromaDB vectorstore, few-shot, reasoning templates
- Smart Router: 20 providers (8 CLI OAuth + 12 API), tier failover, token refresh
- Kali Sandbox: container-per-scan, 56 tools, VPN support, on-demand install
- Full IA Testing: methodology-driven comprehensive pentest sessions
- Notifications: Discord, Telegram, WhatsApp/Twilio multi-channel alerts
- Frontend: React/TypeScript with 18 pages, real-time WebSocket updates
This commit is contained in:
CyberSecurityUP
2026-02-22 17:58:12 -03:00
commit e0935793c5
271 changed files with 132462 additions and 0 deletions

View File

@@ -0,0 +1,552 @@
"""
Methodology Loader - Parses external pentest methodology .md files and indexes them
for smart injection into all LLM call sites in the autonomous agent.
Supports FASE-based methodology documents (like pentestcompleto.md) as well as
generic markdown documents. Maps sections to vulnerability types and agent contexts
for targeted injection with per-context character budgets.
"""
import logging
import os
import re
from dataclasses import dataclass, field
from typing import Dict, List, Optional
logger = logging.getLogger(__name__)
# ─── FASE → Vulnerability Type Mapping ───────────────────────────────────────
# Maps each FASE section to the agent's vulnerability type identifiers.
# These match the 100 types in vuln_engine/registry.py.
FASE_VULN_TYPE_MAP: Dict[str, List[str]] = {
"fase_0": [], # Recon - broad, no specific vuln types
"fase_1": [], # Architecture analysis - broad strategy
"fase_2": [
"jwt_manipulation", "session_fixation", "broken_auth", "auth_bypass",
"insecure_password_reset", "account_takeover", "cookie_manipulation",
"captcha_bypass", "session_hijacking",
],
"fase_3": [
"idor", "bola", "bfla", "privilege_escalation", "forced_browsing",
"auth_bypass", "mass_assignment",
],
"fase_4": [
"race_condition", "business_logic", "workflow_bypass",
"payment_manipulation", "insufficient_anti_automation",
],
"fase_5": [], # CVE/Zero-day - applies to all types via strategy context
"fase_6": [
"ssrf", "cloud_misconfig", "s3_bucket_misconfiguration",
"cloud_metadata_exposure", "serverless_misconfiguration",
"kubernetes_misconfig", "iam_misconfig",
],
"fase_7": [], # OWASP WSTG reference - strategy context
"fase_8": [
"bola", "bfla", "mass_assignment", "excessive_data_exposure",
"api_abuse", "api_rate_limiting", "rest_api_versioning",
"broken_auth", "ssrf",
],
"fase_9": [
"graphql_injection", "graphql_introspection", "graphql_dos",
"websocket_security", "grpc_security",
],
"fase_10": [
"sqli_error", "sqli_union", "sqli_blind", "sqli_time", "sqli_oob",
"nosql_injection", "ssti", "ldap_injection", "xpath_injection",
"crlf_injection", "header_injection", "parameter_pollution",
"command_injection", "email_injection", "expression_language_injection",
"log_injection", "orm_injection", "ssi_injection", "xslt_injection",
"csv_injection",
],
"fase_11": [
"xss_reflected", "xss_stored", "xss_dom", "cors_misconfig",
"csp_bypass", "clickjacking", "open_redirect", "prototype_pollution",
"html_injection", "css_injection", "dom_clobbering", "postmessage_abuse",
"dangling_markup",
],
"fase_12": [
"http_request_smuggling", "cache_poisoning", "cache_deception",
"http2_smuggling", "connection_pool_poisoning", "http_method_tampering",
],
"fase_13": [
"file_upload", "lfi", "rfi", "path_traversal", "zip_slip",
],
"fase_14": [
"ssrf", "dns_rebinding", "blind_ssrf",
],
"fase_15": [
"broken_auth", "insecure_password_reset", "brute_force",
"account_enumeration", "captcha_bypass", "session_fixation",
"account_takeover", "mfa_bypass",
],
"fase_16": [
"mass_assignment", "rate_limit_bypass", "api_rate_limiting",
"brute_force",
],
"fase_17": [
"information_disclosure", "subdomain_takeover", "directory_listing",
"default_credentials", "security_headers", "ssl_tls",
"debug_endpoints", "backup_files", "source_code_exposure",
"sensitive_data_exposure",
],
"fase_18": [
"insecure_deserialization",
],
"fase_19": [
"denial_of_service", "graphql_dos", "redos", "xml_bomb",
],
"fase_20": [
"xxe",
],
}
# ─── FASE → Agent Context Mapping ────────────────────────────────────────────
# Maps each FASE to the agent contexts where it should be injected.
FASE_CONTEXT_MAP: Dict[str, List[str]] = {
"fase_0": ["strategy"],
"fase_1": ["strategy"],
"fase_2": ["testing", "verification", "confirmation"],
"fase_3": ["testing", "verification", "confirmation"],
"fase_4": ["testing", "confirmation", "strategy"],
"fase_5": ["strategy", "testing"],
"fase_6": ["testing", "verification"],
"fase_7": ["strategy"],
"fase_8": ["testing", "verification", "confirmation"],
"fase_9": ["testing", "verification"],
"fase_10": ["testing", "verification", "confirmation"],
"fase_11": ["testing", "verification", "confirmation"],
"fase_12": ["testing", "verification"],
"fase_13": ["testing", "verification", "confirmation"],
"fase_14": ["testing", "verification"],
"fase_15": ["testing", "verification", "confirmation"],
"fase_16": ["testing", "confirmation"],
"fase_17": ["testing", "reporting"],
"fase_18": ["testing", "verification", "confirmation"],
"fase_19": ["testing"],
"fase_20": ["testing", "verification", "confirmation"],
}
# ─── Keyword → Vuln Type Mapping (for non-FASE documents) ───────────────────
KEYWORD_VULN_MAP: Dict[str, List[str]] = {
"sql injection": ["sqli_error", "sqli_union", "sqli_blind", "sqli_time"],
"xss": ["xss_reflected", "xss_stored", "xss_dom"],
"cross-site scripting": ["xss_reflected", "xss_stored", "xss_dom"],
"ssrf": ["ssrf", "blind_ssrf"],
"server-side request forgery": ["ssrf", "blind_ssrf"],
"xxe": ["xxe"],
"xml external entity": ["xxe"],
"ssti": ["ssti"],
"template injection": ["ssti"],
"idor": ["idor", "bola"],
"broken access": ["bola", "bfla", "idor"],
"deserialization": ["insecure_deserialization"],
"file upload": ["file_upload"],
"lfi": ["lfi", "path_traversal"],
"local file inclusion": ["lfi", "path_traversal"],
"rfi": ["rfi"],
"remote file inclusion": ["rfi"],
"command injection": ["command_injection"],
"cors": ["cors_misconfig"],
"csrf": ["csrf"],
"clickjacking": ["clickjacking"],
"open redirect": ["open_redirect"],
"jwt": ["jwt_manipulation"],
"oauth": ["broken_auth", "auth_bypass"],
"race condition": ["race_condition"],
"prototype pollution": ["prototype_pollution"],
"request smuggling": ["http_request_smuggling"],
"cache poisoning": ["cache_poisoning"],
"graphql": ["graphql_injection", "graphql_introspection", "graphql_dos"],
"websocket": ["websocket_security"],
"nosql": ["nosql_injection"],
"ldap": ["ldap_injection"],
"crlf": ["crlf_injection"],
"mass assignment": ["mass_assignment"],
"rate limit": ["rate_limit_bypass", "api_rate_limiting"],
}
@dataclass
class MethodologySection:
"""A parsed section from a methodology document."""
fase_id: str
title: str
content: str
sub_sections: Dict[str, str] = field(default_factory=dict)
vuln_types: List[str] = field(default_factory=list)
contexts: List[str] = field(default_factory=list)
@property
def char_count(self) -> int:
return len(self.content)
class MethodologyIndex:
"""Indexed methodology for fast retrieval by vuln_type and context."""
def __init__(self):
self.sections: Dict[str, MethodologySection] = {}
self.vuln_type_index: Dict[str, List[str]] = {} # vuln_type → [fase_ids]
self.context_index: Dict[str, List[str]] = {} # context → [fase_ids]
def add_section(self, section: MethodologySection) -> None:
self.sections[section.fase_id] = section
for vt in section.vuln_types:
self.vuln_type_index.setdefault(vt, []).append(section.fase_id)
for ctx in section.contexts:
self.context_index.setdefault(ctx, []).append(section.fase_id)
def get_for_vuln_and_context(
self,
vuln_type: str,
context: str,
max_chars: int = 2000,
) -> str:
"""Get methodology text relevant to both vuln_type and context.
Prefers sub-sections that mention the vuln_type for precision.
Truncates to max_chars budget.
"""
if not self.sections:
return ""
candidate_fase_ids: set = set()
# Find FASEs matching vuln_type
if vuln_type:
# Direct match
for fid in self.vuln_type_index.get(vuln_type, []):
candidate_fase_ids.add(fid)
# Fuzzy match: try without common suffixes
base_vt = vuln_type.replace("_reflected", "").replace("_stored", "").replace("_dom", "")
base_vt = base_vt.replace("_error", "").replace("_union", "").replace("_blind", "").replace("_time", "")
if base_vt != vuln_type:
for fid in self.vuln_type_index.get(base_vt, []):
candidate_fase_ids.add(fid)
# Filter by context
if context:
context_fases = set(self.context_index.get(context, []))
if candidate_fase_ids:
# Intersect for precision
filtered = candidate_fase_ids & context_fases
if filtered:
candidate_fase_ids = filtered
# If intersection is empty, keep vuln_type matches (they're more specific)
else:
# No vuln_type specified: use all context matches
candidate_fase_ids = context_fases
if not candidate_fase_ids:
return ""
# Build output, preferring targeted sub-sections
parts: List[str] = []
total = 0
for fase_id in sorted(candidate_fase_ids):
section = self.sections.get(fase_id)
if not section:
continue
remaining = max_chars - total
if remaining < 100:
break
# Try to find a targeted sub-section first
best_sub = self._find_best_subsection(section, vuln_type)
if best_sub:
title, content = best_sub
text = f"### {title}\n{content}"
else:
# Use full section content, truncated
text = f"### {section.title}\n{section.content}"
if len(text) > remaining:
text = text[:remaining]
if len(text) < 50:
continue # Skip tiny fragments
parts.append(text)
total += len(text)
return "\n\n".join(parts)
def _find_best_subsection(
self, section: MethodologySection, vuln_type: str
) -> Optional[tuple]:
"""Find the sub-section most relevant to a vuln_type."""
if not vuln_type or not section.sub_sections:
return None
# Normalize for matching
vt_variants = set()
vt_lower = vuln_type.lower()
vt_variants.add(vt_lower)
vt_variants.add(vt_lower.replace("_", " "))
vt_variants.add(vt_lower.replace("_", "-"))
# Common name mappings
name_map = {
"sqli": "sql injection",
"xss_reflected": "reflected xss",
"xss_stored": "stored xss",
"xss_dom": "dom xss",
"lfi": "lfi",
"rfi": "rfi",
"ssrf": "ssrf",
"ssti": "ssti",
"xxe": "xxe",
"nosql_injection": "nosql",
"crlf_injection": "crlf",
"cors_misconfig": "cors",
"insecure_deserialization": "deserialization",
"http_request_smuggling": "request smuggling",
"cache_poisoning": "cache poisoning",
"prototype_pollution": "prototype pollution",
}
mapped = name_map.get(vt_lower)
if mapped:
vt_variants.add(mapped)
best_score = 0
best = None
for sub_title, sub_content in section.sub_sections.items():
title_lower = sub_title.lower()
score = 0
for variant in vt_variants:
if variant in title_lower:
score = 10 # Title match is strongest
break
if variant in sub_content[:500].lower():
score = max(score, 5) # Content match
if score > best_score:
best_score = score
best = (sub_title, sub_content)
return best
class MethodologyLoader:
"""Loads and indexes methodology documents from files or DB prompts."""
def load_from_file(self, file_path: str) -> MethodologyIndex:
"""Load a .md methodology file and build an index."""
if not os.path.exists(file_path):
logger.warning(f"Methodology file not found: {file_path}")
return MethodologyIndex()
try:
with open(file_path, "r", encoding="utf-8") as f:
content = f.read()
except Exception as e:
logger.error(f"Failed to read methodology file: {e}")
return MethodologyIndex()
sections = self._parse_markdown_sections(content)
index = MethodologyIndex()
for section in sections:
index.add_section(section)
logger.info(
f"[METHODOLOGY] Loaded {len(sections)} sections from {file_path} "
f"({sum(s.char_count for s in sections)} chars, "
f"{len(index.vuln_type_index)} vuln types mapped)"
)
return index
def load_from_db_prompts(self, prompts: List[Dict]) -> MethodologyIndex:
"""Index database-loaded custom prompts into a MethodologyIndex."""
index = MethodologyIndex()
for i, p in enumerate(prompts):
content = p.get("content", "")
if not content:
continue
parsed_vulns = p.get("parsed_vulnerabilities", [])
# Try FASE-based parsing first
sections = self._parse_markdown_sections(content)
if not sections:
# Treat entire content as one section
vuln_types = [
v.get("type", "") for v in parsed_vulns if v.get("type")
]
if not vuln_types:
vuln_types = self._detect_vuln_types_by_keywords(content)
section = MethodologySection(
fase_id=f"db_prompt_{i}",
title=p.get("name", f"Custom Prompt {i}"),
content=content,
sub_sections={},
vuln_types=vuln_types,
contexts=["testing", "strategy", "confirmation",
"verification", "reporting"],
)
sections = [section]
for section in sections:
index.add_section(section)
logger.info(
f"[METHODOLOGY] Indexed {len(index.sections)} sections from "
f"{len(prompts)} DB prompts"
)
return index
def merge_indices(self, *indices: MethodologyIndex) -> MethodologyIndex:
"""Merge multiple MethodologyIndex objects into one."""
merged = MethodologyIndex()
for idx in indices:
for section in idx.sections.values():
# Avoid duplicate fase_ids
if section.fase_id not in merged.sections:
merged.add_section(section)
return merged
def _parse_markdown_sections(self, content: str) -> List[MethodologySection]:
"""Parse a markdown document into indexed sections.
Looks for FASE headings first, falls back to generic ## headings.
"""
sections = self._parse_fase_sections(content)
if sections:
return sections
# Fallback: parse generic ## headings
return self._parse_generic_sections(content)
def _parse_fase_sections(self, content: str) -> List[MethodologySection]:
"""Parse FASE-structured methodology documents."""
# Match ## FASE N: or # FASE N: or ## 🔐 FASE N: (with emoji)
fase_pattern = re.compile(
r'^(#{1,2})\s*(?:[^\w]*\s*)?FASE\s+(\d+)\s*[:\-]?\s*(.*?)$',
re.MULTILINE | re.IGNORECASE,
)
matches = list(fase_pattern.finditer(content))
if not matches:
return []
sections: List[MethodologySection] = []
# Also capture pre-FASE content (e.g., recon steps before FASE 1)
if matches[0].start() > 200:
pre_content = content[:matches[0].start()].strip()
if pre_content:
pre_subs = self._extract_sub_sections(pre_content)
sections.append(MethodologySection(
fase_id="fase_0",
title="Recon & Preparation",
content=pre_content,
sub_sections=pre_subs,
vuln_types=FASE_VULN_TYPE_MAP.get("fase_0", []),
contexts=FASE_CONTEXT_MAP.get("fase_0", ["strategy"]),
))
for i, match in enumerate(matches):
fase_num = match.group(2)
fase_title = f"FASE {fase_num}: {match.group(3).strip()}"
start = match.end()
end = matches[i + 1].start() if i + 1 < len(matches) else len(content)
body = content[start:end].strip()
fase_id = f"fase_{fase_num}"
sub_sections = self._extract_sub_sections(body)
vuln_types = FASE_VULN_TYPE_MAP.get(fase_id, [])
contexts = FASE_CONTEXT_MAP.get(fase_id, ["testing"])
# If not in our hardcoded map, try keyword detection
if not vuln_types:
vuln_types = self._detect_vuln_types_by_keywords(body)
sections.append(MethodologySection(
fase_id=fase_id,
title=fase_title,
content=body,
sub_sections=sub_sections,
vuln_types=vuln_types,
contexts=contexts,
))
return sections
def _parse_generic_sections(self, content: str) -> List[MethodologySection]:
"""Parse generic ## heading structured documents."""
heading_pattern = re.compile(r'^##\s+(.*?)$', re.MULTILINE)
matches = list(heading_pattern.finditer(content))
if not matches:
return []
sections: List[MethodologySection] = []
for i, match in enumerate(matches):
title = match.group(1).strip()
start = match.end()
end = matches[i + 1].start() if i + 1 < len(matches) else len(content)
body = content[start:end].strip()
vuln_types = self._detect_vuln_types_by_keywords(
title + " " + body[:1000]
)
sub_sections = self._extract_sub_sections(body)
sections.append(MethodologySection(
fase_id=f"section_{i}",
title=title,
content=body,
sub_sections=sub_sections,
vuln_types=vuln_types,
contexts=["testing", "strategy"],
))
return sections
def _extract_sub_sections(self, body: str) -> Dict[str, str]:
"""Extract ### sub-sections from a section body."""
sub_pattern = re.compile(r'^###\s+(.*?)$', re.MULTILINE)
sub_matches = list(sub_pattern.finditer(body))
sub_sections: Dict[str, str] = {}
for j, sub in enumerate(sub_matches):
sub_title = sub.group(1).strip()
sub_start = sub.end()
sub_end = (
sub_matches[j + 1].start()
if j + 1 < len(sub_matches)
else len(body)
)
sub_content = body[sub_start:sub_end].strip()
if sub_content:
sub_sections[sub_title] = sub_content
return sub_sections
def _detect_vuln_types_by_keywords(self, text: str) -> List[str]:
"""Detect vuln types from text content via keyword matching."""
text_lower = text.lower()
found: List[str] = []
seen: set = set()
for keyword, types in KEYWORD_VULN_MAP.items():
if keyword in text_lower:
for vt in types:
if vt not in seen:
found.append(vt)
seen.add(vt)
return found