mirror of
https://github.com/CyberSecurityUP/NeuroSploit.git
synced 2026-03-20 09:13:37 +00:00
116 modules | 100 vuln types | 18 API routes | 18 frontend pages Major features: - VulnEngine: 100 vuln types, 526+ payloads, 12 testers, anti-hallucination prompts - Autonomous Agent: 3-stream auto pentest, multi-session (5 concurrent), pause/resume/stop - CLI Agent: Claude Code / Gemini CLI / Codex CLI inside Kali containers - Validation Pipeline: negative controls, proof of execution, confidence scoring, judge - AI Reasoning: ReACT engine, token budget, endpoint classifier, CVE hunter, deep recon - Multi-Agent: 5 specialists + orchestrator + researcher AI + vuln type agents - RAG System: BM25/TF-IDF/ChromaDB vectorstore, few-shot, reasoning templates - Smart Router: 20 providers (8 CLI OAuth + 12 API), tier failover, token refresh - Kali Sandbox: container-per-scan, 56 tools, VPN support, on-demand install - Full IA Testing: methodology-driven comprehensive pentest sessions - Notifications: Discord, Telegram, WhatsApp/Twilio multi-channel alerts - Frontend: React/TypeScript with 18 pages, real-time WebSocket updates
553 lines
20 KiB
Python
553 lines
20 KiB
Python
"""
|
|
Methodology Loader - Parses external pentest methodology .md files and indexes them
|
|
for smart injection into all LLM call sites in the autonomous agent.
|
|
|
|
Supports FASE-based methodology documents (like pentestcompleto.md) as well as
|
|
generic markdown documents. Maps sections to vulnerability types and agent contexts
|
|
for targeted injection with per-context character budgets.
|
|
"""
|
|
|
|
import logging
|
|
import os
|
|
import re
|
|
from dataclasses import dataclass, field
|
|
from typing import Dict, List, Optional
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
# ─── FASE → Vulnerability Type Mapping ───────────────────────────────────────
|
|
# Maps each FASE section to the agent's vulnerability type identifiers.
|
|
# These match the 100 types in vuln_engine/registry.py.
|
|
|
|
FASE_VULN_TYPE_MAP: Dict[str, List[str]] = {
|
|
"fase_0": [], # Recon - broad, no specific vuln types
|
|
"fase_1": [], # Architecture analysis - broad strategy
|
|
"fase_2": [
|
|
"jwt_manipulation", "session_fixation", "broken_auth", "auth_bypass",
|
|
"insecure_password_reset", "account_takeover", "cookie_manipulation",
|
|
"captcha_bypass", "session_hijacking",
|
|
],
|
|
"fase_3": [
|
|
"idor", "bola", "bfla", "privilege_escalation", "forced_browsing",
|
|
"auth_bypass", "mass_assignment",
|
|
],
|
|
"fase_4": [
|
|
"race_condition", "business_logic", "workflow_bypass",
|
|
"payment_manipulation", "insufficient_anti_automation",
|
|
],
|
|
"fase_5": [], # CVE/Zero-day - applies to all types via strategy context
|
|
"fase_6": [
|
|
"ssrf", "cloud_misconfig", "s3_bucket_misconfiguration",
|
|
"cloud_metadata_exposure", "serverless_misconfiguration",
|
|
"kubernetes_misconfig", "iam_misconfig",
|
|
],
|
|
"fase_7": [], # OWASP WSTG reference - strategy context
|
|
"fase_8": [
|
|
"bola", "bfla", "mass_assignment", "excessive_data_exposure",
|
|
"api_abuse", "api_rate_limiting", "rest_api_versioning",
|
|
"broken_auth", "ssrf",
|
|
],
|
|
"fase_9": [
|
|
"graphql_injection", "graphql_introspection", "graphql_dos",
|
|
"websocket_security", "grpc_security",
|
|
],
|
|
"fase_10": [
|
|
"sqli_error", "sqli_union", "sqli_blind", "sqli_time", "sqli_oob",
|
|
"nosql_injection", "ssti", "ldap_injection", "xpath_injection",
|
|
"crlf_injection", "header_injection", "parameter_pollution",
|
|
"command_injection", "email_injection", "expression_language_injection",
|
|
"log_injection", "orm_injection", "ssi_injection", "xslt_injection",
|
|
"csv_injection",
|
|
],
|
|
"fase_11": [
|
|
"xss_reflected", "xss_stored", "xss_dom", "cors_misconfig",
|
|
"csp_bypass", "clickjacking", "open_redirect", "prototype_pollution",
|
|
"html_injection", "css_injection", "dom_clobbering", "postmessage_abuse",
|
|
"dangling_markup",
|
|
],
|
|
"fase_12": [
|
|
"http_request_smuggling", "cache_poisoning", "cache_deception",
|
|
"http2_smuggling", "connection_pool_poisoning", "http_method_tampering",
|
|
],
|
|
"fase_13": [
|
|
"file_upload", "lfi", "rfi", "path_traversal", "zip_slip",
|
|
],
|
|
"fase_14": [
|
|
"ssrf", "dns_rebinding", "blind_ssrf",
|
|
],
|
|
"fase_15": [
|
|
"broken_auth", "insecure_password_reset", "brute_force",
|
|
"account_enumeration", "captcha_bypass", "session_fixation",
|
|
"account_takeover", "mfa_bypass",
|
|
],
|
|
"fase_16": [
|
|
"mass_assignment", "rate_limit_bypass", "api_rate_limiting",
|
|
"brute_force",
|
|
],
|
|
"fase_17": [
|
|
"information_disclosure", "subdomain_takeover", "directory_listing",
|
|
"default_credentials", "security_headers", "ssl_tls",
|
|
"debug_endpoints", "backup_files", "source_code_exposure",
|
|
"sensitive_data_exposure",
|
|
],
|
|
"fase_18": [
|
|
"insecure_deserialization",
|
|
],
|
|
"fase_19": [
|
|
"denial_of_service", "graphql_dos", "redos", "xml_bomb",
|
|
],
|
|
"fase_20": [
|
|
"xxe",
|
|
],
|
|
}
|
|
|
|
|
|
# ─── FASE → Agent Context Mapping ────────────────────────────────────────────
|
|
# Maps each FASE to the agent contexts where it should be injected.
|
|
|
|
FASE_CONTEXT_MAP: Dict[str, List[str]] = {
|
|
"fase_0": ["strategy"],
|
|
"fase_1": ["strategy"],
|
|
"fase_2": ["testing", "verification", "confirmation"],
|
|
"fase_3": ["testing", "verification", "confirmation"],
|
|
"fase_4": ["testing", "confirmation", "strategy"],
|
|
"fase_5": ["strategy", "testing"],
|
|
"fase_6": ["testing", "verification"],
|
|
"fase_7": ["strategy"],
|
|
"fase_8": ["testing", "verification", "confirmation"],
|
|
"fase_9": ["testing", "verification"],
|
|
"fase_10": ["testing", "verification", "confirmation"],
|
|
"fase_11": ["testing", "verification", "confirmation"],
|
|
"fase_12": ["testing", "verification"],
|
|
"fase_13": ["testing", "verification", "confirmation"],
|
|
"fase_14": ["testing", "verification"],
|
|
"fase_15": ["testing", "verification", "confirmation"],
|
|
"fase_16": ["testing", "confirmation"],
|
|
"fase_17": ["testing", "reporting"],
|
|
"fase_18": ["testing", "verification", "confirmation"],
|
|
"fase_19": ["testing"],
|
|
"fase_20": ["testing", "verification", "confirmation"],
|
|
}
|
|
|
|
|
|
# ─── Keyword → Vuln Type Mapping (for non-FASE documents) ───────────────────
|
|
|
|
KEYWORD_VULN_MAP: Dict[str, List[str]] = {
|
|
"sql injection": ["sqli_error", "sqli_union", "sqli_blind", "sqli_time"],
|
|
"xss": ["xss_reflected", "xss_stored", "xss_dom"],
|
|
"cross-site scripting": ["xss_reflected", "xss_stored", "xss_dom"],
|
|
"ssrf": ["ssrf", "blind_ssrf"],
|
|
"server-side request forgery": ["ssrf", "blind_ssrf"],
|
|
"xxe": ["xxe"],
|
|
"xml external entity": ["xxe"],
|
|
"ssti": ["ssti"],
|
|
"template injection": ["ssti"],
|
|
"idor": ["idor", "bola"],
|
|
"broken access": ["bola", "bfla", "idor"],
|
|
"deserialization": ["insecure_deserialization"],
|
|
"file upload": ["file_upload"],
|
|
"lfi": ["lfi", "path_traversal"],
|
|
"local file inclusion": ["lfi", "path_traversal"],
|
|
"rfi": ["rfi"],
|
|
"remote file inclusion": ["rfi"],
|
|
"command injection": ["command_injection"],
|
|
"cors": ["cors_misconfig"],
|
|
"csrf": ["csrf"],
|
|
"clickjacking": ["clickjacking"],
|
|
"open redirect": ["open_redirect"],
|
|
"jwt": ["jwt_manipulation"],
|
|
"oauth": ["broken_auth", "auth_bypass"],
|
|
"race condition": ["race_condition"],
|
|
"prototype pollution": ["prototype_pollution"],
|
|
"request smuggling": ["http_request_smuggling"],
|
|
"cache poisoning": ["cache_poisoning"],
|
|
"graphql": ["graphql_injection", "graphql_introspection", "graphql_dos"],
|
|
"websocket": ["websocket_security"],
|
|
"nosql": ["nosql_injection"],
|
|
"ldap": ["ldap_injection"],
|
|
"crlf": ["crlf_injection"],
|
|
"mass assignment": ["mass_assignment"],
|
|
"rate limit": ["rate_limit_bypass", "api_rate_limiting"],
|
|
}
|
|
|
|
|
|
@dataclass
|
|
class MethodologySection:
|
|
"""A parsed section from a methodology document."""
|
|
fase_id: str
|
|
title: str
|
|
content: str
|
|
sub_sections: Dict[str, str] = field(default_factory=dict)
|
|
vuln_types: List[str] = field(default_factory=list)
|
|
contexts: List[str] = field(default_factory=list)
|
|
|
|
@property
|
|
def char_count(self) -> int:
|
|
return len(self.content)
|
|
|
|
|
|
class MethodologyIndex:
|
|
"""Indexed methodology for fast retrieval by vuln_type and context."""
|
|
|
|
def __init__(self):
|
|
self.sections: Dict[str, MethodologySection] = {}
|
|
self.vuln_type_index: Dict[str, List[str]] = {} # vuln_type → [fase_ids]
|
|
self.context_index: Dict[str, List[str]] = {} # context → [fase_ids]
|
|
|
|
def add_section(self, section: MethodologySection) -> None:
|
|
self.sections[section.fase_id] = section
|
|
for vt in section.vuln_types:
|
|
self.vuln_type_index.setdefault(vt, []).append(section.fase_id)
|
|
for ctx in section.contexts:
|
|
self.context_index.setdefault(ctx, []).append(section.fase_id)
|
|
|
|
def get_for_vuln_and_context(
|
|
self,
|
|
vuln_type: str,
|
|
context: str,
|
|
max_chars: int = 2000,
|
|
) -> str:
|
|
"""Get methodology text relevant to both vuln_type and context.
|
|
|
|
Prefers sub-sections that mention the vuln_type for precision.
|
|
Truncates to max_chars budget.
|
|
"""
|
|
if not self.sections:
|
|
return ""
|
|
|
|
candidate_fase_ids: set = set()
|
|
|
|
# Find FASEs matching vuln_type
|
|
if vuln_type:
|
|
# Direct match
|
|
for fid in self.vuln_type_index.get(vuln_type, []):
|
|
candidate_fase_ids.add(fid)
|
|
# Fuzzy match: try without common suffixes
|
|
base_vt = vuln_type.replace("_reflected", "").replace("_stored", "").replace("_dom", "")
|
|
base_vt = base_vt.replace("_error", "").replace("_union", "").replace("_blind", "").replace("_time", "")
|
|
if base_vt != vuln_type:
|
|
for fid in self.vuln_type_index.get(base_vt, []):
|
|
candidate_fase_ids.add(fid)
|
|
|
|
# Filter by context
|
|
if context:
|
|
context_fases = set(self.context_index.get(context, []))
|
|
if candidate_fase_ids:
|
|
# Intersect for precision
|
|
filtered = candidate_fase_ids & context_fases
|
|
if filtered:
|
|
candidate_fase_ids = filtered
|
|
# If intersection is empty, keep vuln_type matches (they're more specific)
|
|
else:
|
|
# No vuln_type specified: use all context matches
|
|
candidate_fase_ids = context_fases
|
|
|
|
if not candidate_fase_ids:
|
|
return ""
|
|
|
|
# Build output, preferring targeted sub-sections
|
|
parts: List[str] = []
|
|
total = 0
|
|
|
|
for fase_id in sorted(candidate_fase_ids):
|
|
section = self.sections.get(fase_id)
|
|
if not section:
|
|
continue
|
|
|
|
remaining = max_chars - total
|
|
if remaining < 100:
|
|
break
|
|
|
|
# Try to find a targeted sub-section first
|
|
best_sub = self._find_best_subsection(section, vuln_type)
|
|
|
|
if best_sub:
|
|
title, content = best_sub
|
|
text = f"### {title}\n{content}"
|
|
else:
|
|
# Use full section content, truncated
|
|
text = f"### {section.title}\n{section.content}"
|
|
|
|
if len(text) > remaining:
|
|
text = text[:remaining]
|
|
|
|
if len(text) < 50:
|
|
continue # Skip tiny fragments
|
|
|
|
parts.append(text)
|
|
total += len(text)
|
|
|
|
return "\n\n".join(parts)
|
|
|
|
def _find_best_subsection(
|
|
self, section: MethodologySection, vuln_type: str
|
|
) -> Optional[tuple]:
|
|
"""Find the sub-section most relevant to a vuln_type."""
|
|
if not vuln_type or not section.sub_sections:
|
|
return None
|
|
|
|
# Normalize for matching
|
|
vt_variants = set()
|
|
vt_lower = vuln_type.lower()
|
|
vt_variants.add(vt_lower)
|
|
vt_variants.add(vt_lower.replace("_", " "))
|
|
vt_variants.add(vt_lower.replace("_", "-"))
|
|
|
|
# Common name mappings
|
|
name_map = {
|
|
"sqli": "sql injection",
|
|
"xss_reflected": "reflected xss",
|
|
"xss_stored": "stored xss",
|
|
"xss_dom": "dom xss",
|
|
"lfi": "lfi",
|
|
"rfi": "rfi",
|
|
"ssrf": "ssrf",
|
|
"ssti": "ssti",
|
|
"xxe": "xxe",
|
|
"nosql_injection": "nosql",
|
|
"crlf_injection": "crlf",
|
|
"cors_misconfig": "cors",
|
|
"insecure_deserialization": "deserialization",
|
|
"http_request_smuggling": "request smuggling",
|
|
"cache_poisoning": "cache poisoning",
|
|
"prototype_pollution": "prototype pollution",
|
|
}
|
|
mapped = name_map.get(vt_lower)
|
|
if mapped:
|
|
vt_variants.add(mapped)
|
|
|
|
best_score = 0
|
|
best = None
|
|
|
|
for sub_title, sub_content in section.sub_sections.items():
|
|
title_lower = sub_title.lower()
|
|
score = 0
|
|
for variant in vt_variants:
|
|
if variant in title_lower:
|
|
score = 10 # Title match is strongest
|
|
break
|
|
if variant in sub_content[:500].lower():
|
|
score = max(score, 5) # Content match
|
|
|
|
if score > best_score:
|
|
best_score = score
|
|
best = (sub_title, sub_content)
|
|
|
|
return best
|
|
|
|
|
|
class MethodologyLoader:
|
|
"""Loads and indexes methodology documents from files or DB prompts."""
|
|
|
|
def load_from_file(self, file_path: str) -> MethodologyIndex:
|
|
"""Load a .md methodology file and build an index."""
|
|
if not os.path.exists(file_path):
|
|
logger.warning(f"Methodology file not found: {file_path}")
|
|
return MethodologyIndex()
|
|
|
|
try:
|
|
with open(file_path, "r", encoding="utf-8") as f:
|
|
content = f.read()
|
|
except Exception as e:
|
|
logger.error(f"Failed to read methodology file: {e}")
|
|
return MethodologyIndex()
|
|
|
|
sections = self._parse_markdown_sections(content)
|
|
index = MethodologyIndex()
|
|
for section in sections:
|
|
index.add_section(section)
|
|
|
|
logger.info(
|
|
f"[METHODOLOGY] Loaded {len(sections)} sections from {file_path} "
|
|
f"({sum(s.char_count for s in sections)} chars, "
|
|
f"{len(index.vuln_type_index)} vuln types mapped)"
|
|
)
|
|
return index
|
|
|
|
def load_from_db_prompts(self, prompts: List[Dict]) -> MethodologyIndex:
|
|
"""Index database-loaded custom prompts into a MethodologyIndex."""
|
|
index = MethodologyIndex()
|
|
|
|
for i, p in enumerate(prompts):
|
|
content = p.get("content", "")
|
|
if not content:
|
|
continue
|
|
|
|
parsed_vulns = p.get("parsed_vulnerabilities", [])
|
|
|
|
# Try FASE-based parsing first
|
|
sections = self._parse_markdown_sections(content)
|
|
|
|
if not sections:
|
|
# Treat entire content as one section
|
|
vuln_types = [
|
|
v.get("type", "") for v in parsed_vulns if v.get("type")
|
|
]
|
|
if not vuln_types:
|
|
vuln_types = self._detect_vuln_types_by_keywords(content)
|
|
|
|
section = MethodologySection(
|
|
fase_id=f"db_prompt_{i}",
|
|
title=p.get("name", f"Custom Prompt {i}"),
|
|
content=content,
|
|
sub_sections={},
|
|
vuln_types=vuln_types,
|
|
contexts=["testing", "strategy", "confirmation",
|
|
"verification", "reporting"],
|
|
)
|
|
sections = [section]
|
|
|
|
for section in sections:
|
|
index.add_section(section)
|
|
|
|
logger.info(
|
|
f"[METHODOLOGY] Indexed {len(index.sections)} sections from "
|
|
f"{len(prompts)} DB prompts"
|
|
)
|
|
return index
|
|
|
|
def merge_indices(self, *indices: MethodologyIndex) -> MethodologyIndex:
|
|
"""Merge multiple MethodologyIndex objects into one."""
|
|
merged = MethodologyIndex()
|
|
for idx in indices:
|
|
for section in idx.sections.values():
|
|
# Avoid duplicate fase_ids
|
|
if section.fase_id not in merged.sections:
|
|
merged.add_section(section)
|
|
return merged
|
|
|
|
def _parse_markdown_sections(self, content: str) -> List[MethodologySection]:
|
|
"""Parse a markdown document into indexed sections.
|
|
|
|
Looks for FASE headings first, falls back to generic ## headings.
|
|
"""
|
|
sections = self._parse_fase_sections(content)
|
|
if sections:
|
|
return sections
|
|
|
|
# Fallback: parse generic ## headings
|
|
return self._parse_generic_sections(content)
|
|
|
|
def _parse_fase_sections(self, content: str) -> List[MethodologySection]:
|
|
"""Parse FASE-structured methodology documents."""
|
|
# Match ## FASE N: or # FASE N: or ## 🔐 FASE N: (with emoji)
|
|
fase_pattern = re.compile(
|
|
r'^(#{1,2})\s*(?:[^\w]*\s*)?FASE\s+(\d+)\s*[:\-]?\s*(.*?)$',
|
|
re.MULTILINE | re.IGNORECASE,
|
|
)
|
|
|
|
matches = list(fase_pattern.finditer(content))
|
|
if not matches:
|
|
return []
|
|
|
|
sections: List[MethodologySection] = []
|
|
|
|
# Also capture pre-FASE content (e.g., recon steps before FASE 1)
|
|
if matches[0].start() > 200:
|
|
pre_content = content[:matches[0].start()].strip()
|
|
if pre_content:
|
|
pre_subs = self._extract_sub_sections(pre_content)
|
|
sections.append(MethodologySection(
|
|
fase_id="fase_0",
|
|
title="Recon & Preparation",
|
|
content=pre_content,
|
|
sub_sections=pre_subs,
|
|
vuln_types=FASE_VULN_TYPE_MAP.get("fase_0", []),
|
|
contexts=FASE_CONTEXT_MAP.get("fase_0", ["strategy"]),
|
|
))
|
|
|
|
for i, match in enumerate(matches):
|
|
fase_num = match.group(2)
|
|
fase_title = f"FASE {fase_num}: {match.group(3).strip()}"
|
|
start = match.end()
|
|
end = matches[i + 1].start() if i + 1 < len(matches) else len(content)
|
|
body = content[start:end].strip()
|
|
|
|
fase_id = f"fase_{fase_num}"
|
|
sub_sections = self._extract_sub_sections(body)
|
|
vuln_types = FASE_VULN_TYPE_MAP.get(fase_id, [])
|
|
contexts = FASE_CONTEXT_MAP.get(fase_id, ["testing"])
|
|
|
|
# If not in our hardcoded map, try keyword detection
|
|
if not vuln_types:
|
|
vuln_types = self._detect_vuln_types_by_keywords(body)
|
|
|
|
sections.append(MethodologySection(
|
|
fase_id=fase_id,
|
|
title=fase_title,
|
|
content=body,
|
|
sub_sections=sub_sections,
|
|
vuln_types=vuln_types,
|
|
contexts=contexts,
|
|
))
|
|
|
|
return sections
|
|
|
|
def _parse_generic_sections(self, content: str) -> List[MethodologySection]:
|
|
"""Parse generic ## heading structured documents."""
|
|
heading_pattern = re.compile(r'^##\s+(.*?)$', re.MULTILINE)
|
|
matches = list(heading_pattern.finditer(content))
|
|
|
|
if not matches:
|
|
return []
|
|
|
|
sections: List[MethodologySection] = []
|
|
|
|
for i, match in enumerate(matches):
|
|
title = match.group(1).strip()
|
|
start = match.end()
|
|
end = matches[i + 1].start() if i + 1 < len(matches) else len(content)
|
|
body = content[start:end].strip()
|
|
|
|
vuln_types = self._detect_vuln_types_by_keywords(
|
|
title + " " + body[:1000]
|
|
)
|
|
sub_sections = self._extract_sub_sections(body)
|
|
|
|
sections.append(MethodologySection(
|
|
fase_id=f"section_{i}",
|
|
title=title,
|
|
content=body,
|
|
sub_sections=sub_sections,
|
|
vuln_types=vuln_types,
|
|
contexts=["testing", "strategy"],
|
|
))
|
|
|
|
return sections
|
|
|
|
def _extract_sub_sections(self, body: str) -> Dict[str, str]:
|
|
"""Extract ### sub-sections from a section body."""
|
|
sub_pattern = re.compile(r'^###\s+(.*?)$', re.MULTILINE)
|
|
sub_matches = list(sub_pattern.finditer(body))
|
|
sub_sections: Dict[str, str] = {}
|
|
|
|
for j, sub in enumerate(sub_matches):
|
|
sub_title = sub.group(1).strip()
|
|
sub_start = sub.end()
|
|
sub_end = (
|
|
sub_matches[j + 1].start()
|
|
if j + 1 < len(sub_matches)
|
|
else len(body)
|
|
)
|
|
sub_content = body[sub_start:sub_end].strip()
|
|
if sub_content:
|
|
sub_sections[sub_title] = sub_content
|
|
|
|
return sub_sections
|
|
|
|
def _detect_vuln_types_by_keywords(self, text: str) -> List[str]:
|
|
"""Detect vuln types from text content via keyword matching."""
|
|
text_lower = text.lower()
|
|
found: List[str] = []
|
|
seen: set = set()
|
|
|
|
for keyword, types in KEYWORD_VULN_MAP.items():
|
|
if keyword in text_lower:
|
|
for vt in types:
|
|
if vt not in seen:
|
|
found.append(vt)
|
|
seen.add(vt)
|
|
|
|
return found
|