NeuroSploit v3.2 - Autonomous AI Penetration Testing Platform

116 modules | 100 vuln types | 18 API routes | 18 frontend pages

Major features:
- VulnEngine: 100 vuln types, 526+ payloads, 12 testers, anti-hallucination prompts
- Autonomous Agent: 3-stream auto pentest, multi-session (5 concurrent), pause/resume/stop
- CLI Agent: Claude Code / Gemini CLI / Codex CLI inside Kali containers
- Validation Pipeline: negative controls, proof of execution, confidence scoring, judge
- AI Reasoning: ReACT engine, token budget, endpoint classifier, CVE hunter, deep recon
- Multi-Agent: 5 specialists + orchestrator + researcher AI + vuln type agents
- RAG System: BM25/TF-IDF/ChromaDB vectorstore, few-shot, reasoning templates
- Smart Router: 20 providers (8 CLI OAuth + 12 API), tier failover, token refresh
- Kali Sandbox: container-per-scan, 56 tools, VPN support, on-demand install
- Full IA Testing: methodology-driven comprehensive pentest sessions
- Notifications: Discord, Telegram, WhatsApp/Twilio multi-channel alerts
- Frontend: React/TypeScript with 18 pages, real-time WebSocket updates
This commit is contained in:
CyberSecurityUP
2026-02-22 17:58:12 -03:00
commit e0935793c5
271 changed files with 132462 additions and 0 deletions
+1
View File
@@ -0,0 +1 @@
+500
View File
@@ -0,0 +1,500 @@
#!/usr/bin/env python3
"""
Browser Validator - Playwright-based security finding validation.
Provides browser-based validation for security findings:
- Navigate to target URLs with payloads
- Detect security triggers (XSS dialogs, error patterns, etc.)
- Capture screenshots at each validation step
- Store evidence in structured per-finding directories
Screenshots are stored at: reports/screenshots/{finding_id}/
"""
import asyncio
import base64
import logging
from datetime import datetime
from pathlib import Path
from typing import Dict, List, Optional
logger = logging.getLogger(__name__)
try:
from playwright.async_api import async_playwright, Page, Browser, BrowserContext
HAS_PLAYWRIGHT = True
except ImportError:
HAS_PLAYWRIGHT = False
logger.debug("Playwright not installed. Browser validation disabled.")
# Known security trigger patterns in page content
SECURITY_TRIGGERS = {
'xss': ['<script>alert(', 'onerror=', 'onload=', 'javascript:'],
'sqli': ['SQL syntax', 'mysql_fetch', 'pg_query', 'ORA-', 'sqlite3.OperationalError',
'SQLSTATE', 'syntax error at or near', 'unclosed quotation mark'],
'lfi': ['root:x:0', '/etc/passwd', '[boot loader]', 'Windows\\system.ini'],
'rce': ['uid=', 'gid=', 'groups=', 'total ', 'drwx'],
'error_disclosure': ['Stack Trace', 'Traceback (most recent call last)',
'Exception in thread', 'Fatal error', 'Parse error'],
}
class BrowserValidator:
"""Playwright-based browser validation for security findings."""
def __init__(self, screenshots_dir: str = "reports/screenshots"):
self.screenshots_dir = Path(screenshots_dir)
self.screenshots_dir.mkdir(parents=True, exist_ok=True)
self.browser: Optional['Browser'] = None
self._playwright = None
async def start(self, headless: bool = True):
"""Launch browser instance."""
if not HAS_PLAYWRIGHT:
raise RuntimeError(
"Playwright not installed. Install with: pip install playwright && python -m playwright install chromium"
)
self._playwright = await async_playwright().start()
self.browser = await self._playwright.chromium.launch(headless=headless)
logger.info(f"Browser started (headless={headless})")
async def stop(self):
"""Close browser and clean up."""
if self.browser:
await self.browser.close()
self.browser = None
if self._playwright:
await self._playwright.stop()
self._playwright = None
logger.info("Browser stopped")
async def validate_finding(self, finding_id: str, url: str,
payload: Optional[str] = None,
method: str = "GET",
interaction_steps: Optional[List[Dict]] = None,
timeout: int = 30000) -> Dict:
"""Validate a security finding in a real browser.
Args:
finding_id: Unique identifier for the finding
url: Target URL (may include payload in query params)
payload: Optional payload description for logging
method: HTTP method (currently GET-based navigation)
interaction_steps: Optional list of browser interaction steps
timeout: Navigation timeout in milliseconds
Returns:
Dict with validation result, screenshots, evidence
"""
if not self.browser:
return {"error": "Browser not started. Call start() first."}
finding_dir = self.screenshots_dir / finding_id
finding_dir.mkdir(parents=True, exist_ok=True)
validation = {
"finding_id": finding_id,
"url": url,
"payload": payload,
"timestamp": datetime.now().isoformat(),
"validated": False,
"screenshots": [],
"console_logs": [],
"dialog_detected": False,
"dialog_messages": [],
"triggers_found": [],
"evidence": "",
"page_title": "",
"status_code": None,
"error": None
}
context = await self.browser.new_context(
ignore_https_errors=True,
user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
"(KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
)
page = await context.new_page()
# Capture console messages
console_msgs = []
page.on("console", lambda msg: console_msgs.append({
"type": msg.type, "text": msg.text
}))
# Capture JavaScript dialogs (XSS alert/prompt/confirm detection)
dialog_messages = []
async def handle_dialog(dialog):
dialog_messages.append({
"type": dialog.type,
"message": dialog.message
})
await dialog.dismiss()
page.on("dialog", handle_dialog)
# Track response status
response_status = [None]
def on_response(response):
if response.url == url or response.url.rstrip('/') == url.rstrip('/'):
response_status[0] = response.status
page.on("response", on_response)
try:
# Navigate to the URL
response = await page.goto(url, wait_until="networkidle", timeout=timeout)
if response:
validation["status_code"] = response.status
validation["page_title"] = await page.title()
# Take initial screenshot
ss_path = finding_dir / "01_initial.png"
await page.screenshot(path=str(ss_path), full_page=True)
validation["screenshots"].append(str(ss_path))
# Execute interaction steps if provided
if interaction_steps:
for i, step in enumerate(interaction_steps):
step_name = step.get('name', f'step_{i+2}')
try:
await self._execute_step(page, step)
await page.wait_for_timeout(500) # Brief pause
ss_path = finding_dir / f"{i+2:02d}_{step_name}.png"
await page.screenshot(path=str(ss_path))
validation["screenshots"].append(str(ss_path))
except Exception as e:
logger.warning(f"Interaction step '{step_name}' failed: {e}")
# Check for dialog detection (XSS)
if dialog_messages:
validation["validated"] = True
validation["dialog_detected"] = True
validation["dialog_messages"] = dialog_messages
validation["evidence"] = f"JavaScript dialog triggered: {dialog_messages[0]['message']}"
ss_path = finding_dir / "xss_dialog_detected.png"
await page.screenshot(path=str(ss_path))
validation["screenshots"].append(str(ss_path))
# Check for security triggers in page content
content = await page.content()
for trigger_type, patterns in SECURITY_TRIGGERS.items():
for pattern in patterns:
if pattern.lower() in content.lower():
validation["triggers_found"].append({
"type": trigger_type,
"pattern": pattern
})
if validation["triggers_found"] and not validation["validated"]:
validation["validated"] = True
first_trigger = validation["triggers_found"][0]
validation["evidence"] = (
f"Security trigger detected: {first_trigger['type']} "
f"(pattern: {first_trigger['pattern']})"
)
ss_path = finding_dir / "trigger_detected.png"
await page.screenshot(path=str(ss_path))
validation["screenshots"].append(str(ss_path))
# Check console for errors that might indicate vulnerabilities
error_msgs = [m for m in console_msgs if m["type"] in ("error", "warning")]
if error_msgs:
validation["console_logs"] = console_msgs
except Exception as e:
validation["error"] = str(e)
logger.error(f"Browser validation error for {finding_id}: {e}")
try:
ss_path = finding_dir / "error.png"
await page.screenshot(path=str(ss_path))
validation["screenshots"].append(str(ss_path))
except Exception:
pass
finally:
await context.close()
return validation
async def verify_stored_xss(
self,
finding_id: str,
form_url: str,
form_data: Dict[str, str],
display_url: str,
submit_selector: str = "button[type=submit], input[type=submit], button:not([type])",
timeout: int = 30000,
) -> Dict:
"""Two-phase stored XSS verification using browser.
Phase 1: Navigate to form page, fill fields with payload, submit.
Phase 2: Navigate to display page, check for dialog (alert/confirm/prompt).
Args:
finding_id: Unique ID for this verification attempt
form_url: URL containing the form to submit
form_data: Dict mapping CSS selectors to values (payload in relevant fields)
display_url: URL where stored content is displayed
submit_selector: CSS selector(s) for submit button (comma-separated)
timeout: Navigation timeout in ms
Returns:
Dict with verification results, dialog detection, screenshots
"""
if not self.browser:
return {"error": "Browser not started. Call start() first."}
finding_dir = self.screenshots_dir / finding_id
finding_dir.mkdir(parents=True, exist_ok=True)
result = {
"finding_id": finding_id,
"form_url": form_url,
"display_url": display_url,
"timestamp": datetime.now().isoformat(),
"phase1_success": False,
"phase2_success": False,
"xss_confirmed": False,
"dialog_detected": False,
"dialog_messages": [],
"screenshots": [],
"evidence": "",
"error": None,
}
context = await self.browser.new_context(
ignore_https_errors=True,
user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
"(KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
)
page = await context.new_page()
dialog_messages = []
async def handle_dialog(dialog):
dialog_messages.append({
"type": dialog.type,
"message": dialog.message,
"phase": "phase2" if result["phase1_success"] else "phase1"
})
await dialog.dismiss()
page.on("dialog", handle_dialog)
try:
# === PHASE 1: Navigate to form and submit payload ===
await page.goto(form_url, wait_until="networkidle", timeout=timeout)
ss_path = finding_dir / "01_form_page.png"
await page.screenshot(path=str(ss_path), full_page=True)
result["screenshots"].append(str(ss_path))
# Fill form fields
for selector, value in form_data.items():
try:
await page.fill(selector, value)
except Exception:
try:
await page.type(selector, value)
except Exception as fill_err:
logger.warning(f"Could not fill {selector}: {fill_err}")
ss_path = finding_dir / "02_form_filled.png"
await page.screenshot(path=str(ss_path))
result["screenshots"].append(str(ss_path))
# Submit
submitted = False
for sel in submit_selector.split(","):
sel = sel.strip()
try:
btn = await page.query_selector(sel)
if btn:
await btn.click()
submitted = True
break
except Exception:
continue
if not submitted and form_data:
# Fallback: press Enter on last filled field
last_sel = list(form_data.keys())[-1]
try:
await page.press(last_sel, "Enter")
except Exception:
pass
try:
await page.wait_for_load_state("networkidle", timeout=10000)
except Exception:
await page.wait_for_timeout(3000)
ss_path = finding_dir / "03_after_submit.png"
await page.screenshot(path=str(ss_path), full_page=True)
result["screenshots"].append(str(ss_path))
result["phase1_success"] = True
# === PHASE 2: Navigate to display page ===
await page.goto(display_url, wait_until="networkidle", timeout=timeout)
await page.wait_for_timeout(1000)
ss_path = finding_dir / "04_display_page.png"
await page.screenshot(path=str(ss_path), full_page=True)
result["screenshots"].append(str(ss_path))
# Check for dialogs triggered on display page
if dialog_messages:
phase2_dialogs = [d for d in dialog_messages if d.get("phase") == "phase2"]
if phase2_dialogs:
result["xss_confirmed"] = True
result["dialog_detected"] = True
result["dialog_messages"] = dialog_messages
result["evidence"] = (
f"Stored XSS CONFIRMED: JavaScript dialog triggered on display page. "
f"Dialog: {phase2_dialogs[0]['type']}('{phase2_dialogs[0]['message']}')"
)
result["phase2_success"] = True
ss_path = finding_dir / "05_xss_confirmed.png"
await page.screenshot(path=str(ss_path))
result["screenshots"].append(str(ss_path))
else:
result["evidence"] = (
"Dialog triggered during form submission (phase1), not on display page."
)
# Content-based fallback if no dialog
if not result["xss_confirmed"]:
content = await page.content()
for _, payload_val in form_data.items():
if payload_val in content:
payload_lower = payload_val.lower()
for tag in ["<script", "onerror=", "onload=", "<svg", "<img",
"onfocus=", "onclick=", "ontoggle"]:
if tag in payload_lower:
result["phase2_success"] = True
result["evidence"] = (
f"Stored payload with '{tag}' found unescaped on display page. "
f"Dialog may be blocked by CSP."
)
break
break
except Exception as e:
result["error"] = str(e)
logger.error(f"Stored XSS verification error: {e}")
try:
ss_path = finding_dir / "error.png"
await page.screenshot(path=str(ss_path))
result["screenshots"].append(str(ss_path))
except Exception:
pass
finally:
await context.close()
return result
async def _execute_step(self, page: 'Page', step: Dict):
"""Execute a single browser interaction step."""
action = step.get("action", "")
if action == "click":
await page.click(step["selector"])
elif action == "fill":
await page.fill(step["selector"], step["value"])
elif action == "type":
await page.type(step["selector"], step["value"])
elif action == "submit":
selector = step.get("selector", "button[type=submit]")
await page.click(selector)
elif action == "wait":
await page.wait_for_timeout(step.get("ms", 2000))
elif action == "navigate":
await page.goto(step["url"], wait_until="networkidle")
elif action == "select":
await page.select_option(step["selector"], step["value"])
elif action == "check":
await page.check(step["selector"])
elif action == "press":
await page.press(step.get("selector", "body"), step["key"])
else:
logger.warning(f"Unknown interaction action: {action}")
async def batch_validate(self, findings: List[Dict],
headless: bool = True) -> List[Dict]:
"""Validate multiple findings in sequence.
Args:
findings: List of dicts with 'finding_id', 'url', and optional 'payload'
headless: Run browser in headless mode
Returns:
List of validation results
"""
results = []
await self.start(headless=headless)
try:
for finding in findings:
result = await self.validate_finding(
finding_id=finding['finding_id'],
url=finding['url'],
payload=finding.get('payload'),
interaction_steps=finding.get('interaction_steps')
)
results.append(result)
finally:
await self.stop()
return results
def validate_finding_sync(finding_id: str, url: str,
payload: str = None,
screenshots_dir: str = "reports/screenshots",
headless: bool = True) -> Dict:
"""Synchronous wrapper for browser validation.
For use in synchronous code paths (e.g., BaseAgent).
"""
if not HAS_PLAYWRIGHT:
return {
"finding_id": finding_id,
"skipped": True,
"reason": "Playwright not installed"
}
async def _run():
validator = BrowserValidator(screenshots_dir=screenshots_dir)
await validator.start(headless=headless)
try:
return await validator.validate_finding(finding_id, url, payload)
finally:
await validator.stop()
try:
return asyncio.run(_run())
except RuntimeError:
# Already in an async context - use nest_asyncio or skip
logger.warning("Cannot run sync validation inside async context")
return {
"finding_id": finding_id,
"skipped": True,
"reason": "Async context conflict"
}
def embed_screenshot(filepath: str) -> str:
"""Convert a screenshot file to a base64 data URI for HTML embedding."""
path = Path(filepath)
if not path.exists():
return ""
with open(path, 'rb') as f:
data = base64.b64encode(f.read()).decode('ascii')
return f"data:image/png;base64,{data}"
+209
View File
@@ -0,0 +1,209 @@
"""
NeuroSploit v3 - Container Pool
Global coordinator for per-scan Kali Linux containers.
Tracks all running sandbox containers, enforces max concurrent limits,
handles lifecycle management and orphan cleanup.
"""
import asyncio
import json
import logging
import threading
from datetime import datetime, timedelta
from typing import Dict, Optional
logger = logging.getLogger(__name__)
try:
import docker
from docker.errors import NotFound
HAS_DOCKER = True
except ImportError:
HAS_DOCKER = False
from core.kali_sandbox import KaliSandbox
class ContainerPool:
"""Global pool managing per-scan KaliSandbox instances.
Thread-safe. One pool per process. Enforces resource limits.
"""
def __init__(
self,
image: str = "neurosploit-kali:latest",
max_concurrent: int = 5,
memory_limit: str = "2g",
cpu_limit: float = 2.0,
container_ttl_minutes: int = 60,
):
self.image = image
self.max_concurrent = max_concurrent
self.memory_limit = memory_limit
self.cpu_limit = cpu_limit
self.container_ttl = timedelta(minutes=container_ttl_minutes)
self._sandboxes: Dict[str, KaliSandbox] = {}
self._lock = asyncio.Lock()
@classmethod
def from_config(cls) -> "ContainerPool":
"""Create pool from config/config.json sandbox section."""
try:
with open("config/config.json") as f:
cfg = json.load(f)
sandbox_cfg = cfg.get("sandbox", {})
kali_cfg = sandbox_cfg.get("kali", {})
resources = sandbox_cfg.get("resources", {})
return cls(
image=kali_cfg.get("image", "neurosploit-kali:latest"),
max_concurrent=kali_cfg.get("max_concurrent", 5),
memory_limit=resources.get("memory_limit", "2g"),
cpu_limit=resources.get("cpu_limit", 2.0),
container_ttl_minutes=kali_cfg.get("container_ttl_minutes", 60),
)
except Exception as e:
logger.warning(f"Could not load pool config, using defaults: {e}")
return cls()
async def get_or_create(
self, scan_id: str, enable_vpn: bool = False,
) -> KaliSandbox:
"""Get existing sandbox for scan_id, or create a new one.
Raises RuntimeError if max_concurrent limit reached.
"""
async with self._lock:
# Return existing
if scan_id in self._sandboxes:
sb = self._sandboxes[scan_id]
if sb.is_available:
return sb
else:
del self._sandboxes[scan_id]
# Check limit
active = sum(1 for sb in self._sandboxes.values() if sb.is_available)
if active >= self.max_concurrent:
raise RuntimeError(
f"Max concurrent containers ({self.max_concurrent}) reached. "
f"Active scans: {list(self._sandboxes.keys())}"
)
# Create new
sb = KaliSandbox(
scan_id=scan_id,
image=self.image,
memory_limit=self.memory_limit,
cpu_limit=self.cpu_limit,
enable_vpn=enable_vpn,
)
ok, msg = await sb.initialize()
if not ok:
raise RuntimeError(f"Failed to create Kali sandbox: {msg}")
self._sandboxes[scan_id] = sb
logger.info(
f"Pool: created container for scan {scan_id} "
f"({active + 1}/{self.max_concurrent} active)"
)
return sb
async def destroy(self, scan_id: str):
"""Stop and remove the container for a specific scan."""
async with self._lock:
sb = self._sandboxes.pop(scan_id, None)
if sb:
await sb.stop()
logger.info(f"Pool: destroyed container for scan {scan_id}")
async def cleanup_all(self):
"""Destroy all managed containers (shutdown hook)."""
async with self._lock:
scan_ids = list(self._sandboxes.keys())
for sid in scan_ids:
await self.destroy(sid)
logger.info("Pool: all containers destroyed")
async def cleanup_orphans(self):
"""Find and remove neurosploit-* containers not tracked by this pool."""
if not HAS_DOCKER:
return
try:
client = docker.from_env()
containers = client.containers.list(
all=True,
filters={"label": "neurosploit.type=kali-sandbox"},
)
async with self._lock:
tracked = set(self._sandboxes.keys())
removed = 0
for c in containers:
scan_id = c.labels.get("neurosploit.scan_id", "")
if scan_id not in tracked:
try:
c.stop(timeout=5)
except Exception:
pass
try:
c.remove(force=True)
removed += 1
logger.info(f"Pool: removed orphan container {c.name}")
except Exception:
pass
if removed:
logger.info(f"Pool: cleaned up {removed} orphan containers")
except Exception as e:
logger.warning(f"Pool: orphan cleanup failed: {e}")
async def cleanup_expired(self):
"""Remove containers that have exceeded their TTL."""
now = datetime.utcnow()
async with self._lock:
expired = [
sid for sid, sb in self._sandboxes.items()
if sb._created_at and (now - sb._created_at) > self.container_ttl
]
for sid in expired:
logger.warning(f"Pool: container for scan {sid} exceeded TTL, destroying")
await self.destroy(sid)
def list_sandboxes(self) -> Dict[str, Dict]:
"""List all tracked sandboxes with status."""
result = {}
for sid, sb in self._sandboxes.items():
result[sid] = {
"scan_id": sid,
"container_name": sb.container_name,
"available": sb.is_available,
"installed_tools": sorted(sb._installed_tools),
"created_at": sb._created_at.isoformat() if sb._created_at else None,
}
return result
@property
def active_count(self) -> int:
return sum(1 for sb in self._sandboxes.values() if sb.is_available)
# ---------------------------------------------------------------------------
# Global singleton pool
# ---------------------------------------------------------------------------
_pool: Optional[ContainerPool] = None
_pool_lock = threading.Lock()
def get_pool() -> ContainerPool:
"""Get or create the global container pool."""
global _pool
if _pool is None:
with _pool_lock:
if _pool is None:
_pool = ContainerPool.from_config()
return _pool
+468
View File
@@ -0,0 +1,468 @@
#!/usr/bin/env python3
"""
Context Builder - Consolidates all recon outputs into a single file for LLM consumption
This module aggregates results from all reconnaissance tools into a single
consolidated file that will be used by the LLM to enhance testing capabilities.
"""
import json
import os
from datetime import datetime
from pathlib import Path
from typing import Dict, List, Any, Set, Optional
from urllib.parse import urlparse
import logging
logger = logging.getLogger(__name__)
class ReconContextBuilder:
"""
Consolidates all reconnaissance data into a single context for LLM consumption.
Generates consolidated files:
- consolidated_context.json - Complete JSON with all data
- consolidated_context.txt - Text version for direct LLM consumption
"""
def __init__(self, output_dir: str = "results"):
"""Initialize the builder."""
self.output_dir = Path(output_dir)
self.output_dir.mkdir(parents=True, exist_ok=True)
# Collected data
self.target_info: Dict[str, Any] = {}
self.subdomains: Set[str] = set()
self.live_hosts: Set[str] = set()
self.urls: Set[str] = set()
self.urls_with_params: Set[str] = set()
self.open_ports: List[Dict] = []
self.technologies: List[str] = []
self.vulnerabilities: List[Dict] = []
self.dns_records: List[str] = []
self.js_files: Set[str] = set()
self.api_endpoints: Set[str] = set()
self.interesting_paths: Set[str] = set()
self.secrets: List[str] = []
self.raw_outputs: Dict[str, str] = {}
self.tool_results: Dict[str, Dict] = {}
def set_target(self, target: str, target_type: str = "domain"):
"""Set the primary target."""
self.target_info = {
"primary_target": target,
"type": target_type,
"timestamp": datetime.now().isoformat()
}
# Auto-add as in-scope
if target_type == "domain":
self.subdomains.add(target)
elif target_type == "url":
parsed = urlparse(target)
if parsed.netloc:
self.subdomains.add(parsed.netloc)
self.live_hosts.add(target)
def add_subdomains(self, subdomains: List[str]):
"""Add discovered subdomains."""
for sub in subdomains:
sub = sub.strip().lower()
if sub and self._is_valid_domain(sub):
self.subdomains.add(sub)
def add_live_hosts(self, hosts: List[str]):
"""Add active HTTP hosts."""
for host in hosts:
host = host.strip()
if host:
self.live_hosts.add(host)
def add_urls(self, urls: List[str]):
"""Add discovered URLs."""
for url in urls:
url = url.strip()
if url and url.startswith(('http://', 'https://')):
self.urls.add(url)
# Separate URLs with parameters
if '?' in url and '=' in url:
self.urls_with_params.add(url)
def add_open_ports(self, ports: List[Dict]):
"""Add discovered open ports."""
for port in ports:
if port not in self.open_ports:
self.open_ports.append(port)
def add_technologies(self, techs: List[str]):
"""Add detected technologies."""
for tech in techs:
if tech and tech not in self.technologies:
self.technologies.append(tech)
def add_vulnerabilities(self, vulns: List[Dict]):
"""Add found vulnerabilities."""
for vuln in vulns:
if vuln not in self.vulnerabilities:
self.vulnerabilities.append(vuln)
def add_dns_records(self, records: List[str]):
"""Add DNS records."""
for record in records:
if record and record not in self.dns_records:
self.dns_records.append(record)
def add_js_files(self, js_urls: List[str]):
"""Add found JavaScript files."""
for js in js_urls:
if js and '.js' in js.lower():
self.js_files.add(js)
def add_api_endpoints(self, endpoints: List[str]):
"""Add API endpoints."""
for ep in endpoints:
if ep:
self.api_endpoints.add(ep)
def add_interesting_paths(self, paths: List[str]):
"""Add interesting paths."""
keywords = ['admin', 'login', 'dashboard', 'api', 'config', 'backup',
'debug', 'test', 'dev', 'staging', 'internal', 'upload',
'console', 'panel', 'phpinfo', 'swagger', '.git', '.env']
for path in paths:
path_lower = path.lower()
if any(kw in path_lower for kw in keywords):
self.interesting_paths.add(path)
def add_secrets(self, secrets: List[str]):
"""Add potential secrets found."""
for secret in secrets:
if secret and secret not in self.secrets:
self.secrets.append(secret)
def add_raw_output(self, tool_name: str, output: str):
"""Add raw output from a tool."""
self.raw_outputs[tool_name] = output
def add_tool_result(self, tool_name: str, result: Dict):
"""Add structured result from a tool."""
self.tool_results[tool_name] = result
def _is_valid_domain(self, domain: str) -> bool:
"""Check if it's a valid domain."""
if not domain or '..' in domain or domain.startswith('.'):
return False
parts = domain.split('.')
return len(parts) >= 2 and all(p for p in parts)
def _extract_params_from_urls(self) -> Dict[str, List[str]]:
"""Extract unique parameters from URLs."""
params = {}
for url in self.urls_with_params:
if '?' in url:
query = url.split('?')[1]
for pair in query.split('&'):
if '=' in pair:
param_name = pair.split('=')[0]
if param_name not in params:
params[param_name] = []
params[param_name].append(url)
return params
def _categorize_vulnerabilities(self) -> Dict[str, List[Dict]]:
"""Categorize vulnerabilities by severity."""
categories = {
'critical': [],
'high': [],
'medium': [],
'low': [],
'info': []
}
for vuln in self.vulnerabilities:
severity = vuln.get('severity', 'info').lower()
if severity in categories:
categories[severity].append(vuln)
return categories
def _build_attack_surface(self) -> Dict[str, Any]:
"""Build attack surface summary."""
return {
"total_subdomains": len(self.subdomains),
"live_hosts": len(self.live_hosts),
"total_urls": len(self.urls),
"urls_with_params": len(self.urls_with_params),
"open_ports": len(self.open_ports),
"js_files": len(self.js_files),
"api_endpoints": len(self.api_endpoints),
"interesting_paths": len(self.interesting_paths),
"technologies_detected": len(self.technologies),
"vulnerabilities_found": len(self.vulnerabilities),
"secrets_found": len(self.secrets)
}
def _build_recommendations(self) -> List[str]:
"""Generate recommendations based on findings."""
recs = []
vuln_cats = self._categorize_vulnerabilities()
if vuln_cats['critical']:
recs.append(f"CRITICAL: {len(vuln_cats['critical'])} critical vulnerabilities found - immediate action required!")
if vuln_cats['high']:
recs.append(f"HIGH: {len(vuln_cats['high'])} high severity vulnerabilities need attention.")
if self.urls_with_params:
recs.append(f"Test {len(self.urls_with_params)} URLs with parameters for SQLi, XSS, etc.")
if self.api_endpoints:
recs.append(f"Review {len(self.api_endpoints)} API endpoints for authentication/authorization issues.")
if self.secrets:
recs.append(f"SECRETS: {len(self.secrets)} potential secrets exposed - rotate credentials!")
if self.interesting_paths:
recs.append(f"Investigate {len(self.interesting_paths)} interesting paths found.")
if len(self.live_hosts) > 50:
recs.append("Large attack surface detected - consider network segmentation.")
return recs
def build(self) -> Dict[str, Any]:
"""Build the consolidated context."""
logger.info("Building consolidated context for LLM...")
context = {
"metadata": {
"generated_at": datetime.now().isoformat(),
"generator": "NeuroSploit Recon",
"version": "2.0.0"
},
"target": self.target_info,
"attack_surface": self._build_attack_surface(),
"data": {
"subdomains": sorted(list(self.subdomains)),
"live_hosts": sorted(list(self.live_hosts)),
"urls": {
"all": list(self.urls)[:500],
"with_params": list(self.urls_with_params),
"total_count": len(self.urls)
},
"open_ports": self.open_ports,
"technologies": self.technologies,
"dns_records": self.dns_records,
"js_files": list(self.js_files),
"api_endpoints": list(self.api_endpoints),
"interesting_paths": list(self.interesting_paths),
"unique_params": self._extract_params_from_urls(),
"secrets": self.secrets[:50]
},
"vulnerabilities": {
"total": len(self.vulnerabilities),
"by_severity": self._categorize_vulnerabilities(),
"all": self.vulnerabilities[:100]
},
"recommendations": self._build_recommendations(),
"tool_results": self.tool_results
}
return context
def build_text_context(self) -> str:
"""Build context in text format for LLM."""
ctx = self.build()
lines = [
"=" * 80,
"NEUROSPLOIT - CONSOLIDATED RECONNAISSANCE CONTEXT",
"=" * 80,
"",
f"Primary Target: {ctx['target'].get('primary_target', 'N/A')}",
f"Generated at: {ctx['metadata']['generated_at']}",
"",
"-" * 40,
"ATTACK SURFACE",
"-" * 40,
]
for key, value in ctx['attack_surface'].items():
lines.append(f" {key}: {value}")
lines.extend([
"",
"-" * 40,
"DISCOVERED SUBDOMAINS",
"-" * 40,
])
for sub in ctx['data']['subdomains'][:50]:
lines.append(f" - {sub}")
if len(ctx['data']['subdomains']) > 50:
lines.append(f" ... and {len(ctx['data']['subdomains']) - 50} more")
lines.extend([
"",
"-" * 40,
"LIVE HOSTS (HTTP)",
"-" * 40,
])
for host in ctx['data']['live_hosts'][:30]:
lines.append(f" - {host}")
lines.extend([
"",
"-" * 40,
"OPEN PORTS",
"-" * 40,
])
for port in ctx['data']['open_ports'][:30]:
lines.append(f" - {port.get('port', 'N/A')}/{port.get('protocol', 'tcp')} - {port.get('service', 'unknown')}")
lines.extend([
"",
"-" * 40,
"DETECTED TECHNOLOGIES",
"-" * 40,
])
for tech in ctx['data']['technologies'][:20]:
lines.append(f" - {tech}")
lines.extend([
"",
"-" * 40,
"URLs WITH PARAMETERS (for injection testing)",
"-" * 40,
])
for url in ctx['data']['urls']['with_params'][:50]:
lines.append(f" - {url}")
lines.extend([
"",
"-" * 40,
"API ENDPOINTS",
"-" * 40,
])
for ep in ctx['data']['api_endpoints']:
lines.append(f" - {ep}")
lines.extend([
"",
"-" * 40,
"INTERESTING PATHS",
"-" * 40,
])
for path in ctx['data']['interesting_paths']:
lines.append(f" - {path}")
lines.extend([
"",
"-" * 40,
"VULNERABILITIES FOUND",
"-" * 40,
f"Total: {ctx['vulnerabilities']['total']}",
f"Critical: {len(ctx['vulnerabilities']['by_severity']['critical'])}",
f"High: {len(ctx['vulnerabilities']['by_severity']['high'])}",
f"Medium: {len(ctx['vulnerabilities']['by_severity']['medium'])}",
f"Low: {len(ctx['vulnerabilities']['by_severity']['low'])}",
"",
])
for vuln in ctx['vulnerabilities']['all'][:30]:
lines.append(f" [{vuln.get('severity', 'INFO').upper()}] {vuln.get('title', 'N/A')}")
lines.append(f" Endpoint: {vuln.get('affected_endpoint', 'N/A')}")
if ctx['data']['secrets']:
lines.extend([
"",
"-" * 40,
"POTENTIAL EXPOSED SECRETS",
"-" * 40,
])
for secret in ctx['data']['secrets'][:20]:
lines.append(f" [!] {secret[:100]}")
lines.extend([
"",
"-" * 40,
"RECOMMENDATIONS FOR LLM",
"-" * 40,
])
for rec in ctx['recommendations']:
lines.append(f" * {rec}")
lines.extend([
"",
"=" * 80,
"END OF CONTEXT - USE THIS DATA TO ENHANCE TESTING",
"=" * 80,
])
return "\n".join(lines)
def save(self, session_id: str = None) -> Dict[str, Path]:
"""Save the consolidated context to files."""
if not session_id:
session_id = datetime.now().strftime("%Y%m%d_%H%M%S")
# Paths
json_path = self.output_dir / f"context_{session_id}.json"
txt_path = self.output_dir / f"context_{session_id}.txt"
# Build and save JSON
context = self.build()
with open(json_path, 'w') as f:
json.dump(context, f, indent=2, default=str)
# Build and save TXT
text_context = self.build_text_context()
with open(txt_path, 'w') as f:
f.write(text_context)
logger.info(f"Context saved to: {json_path} and {txt_path}")
return {
"json": json_path,
"txt": txt_path,
"context": context
}
def get_llm_prompt_context(self) -> str:
"""Return context formatted for inclusion in LLM prompt."""
return self.build_text_context()
def load_context_from_file(context_file: str) -> Optional[Dict]:
"""Load recon context from a JSON file."""
try:
with open(context_file, 'r') as f:
return json.load(f)
except Exception as e:
logger.error(f"Error loading context: {e}")
return None
def merge_contexts(contexts: List[Dict]) -> Dict:
"""Merge multiple recon contexts into one."""
merged = ReconContextBuilder()
for ctx in contexts:
data = ctx.get('data', {})
merged.add_subdomains(data.get('subdomains', []))
merged.add_live_hosts(data.get('live_hosts', []))
merged.add_urls(data.get('urls', {}).get('all', []))
merged.add_open_ports(data.get('open_ports', []))
merged.add_technologies(data.get('technologies', []))
merged.add_dns_records(data.get('dns_records', []))
merged.add_js_files(data.get('js_files', []))
merged.add_api_endpoints(data.get('api_endpoints', []))
merged.add_secrets(data.get('secrets', []))
for vuln in ctx.get('vulnerabilities', {}).get('all', []):
merged.add_vulnerabilities([vuln])
return merged.build()
+571
View File
@@ -0,0 +1,571 @@
"""
NeuroSploit v3 - Kali Linux Per-Scan Sandbox
Each scan gets its own Docker container based on kalilinux/kali-rolling.
Tools installed on-demand the first time they are requested.
Container destroyed when scan completes.
"""
import asyncio
import hashlib
import io
import json
import logging
import os
import re
import shlex
import tarfile
import time
from datetime import datetime
from typing import Dict, Any, Optional, List, Tuple, Set
logger = logging.getLogger(__name__)
try:
import docker
from docker.errors import DockerException, NotFound, APIError
HAS_DOCKER = True
except ImportError:
HAS_DOCKER = False
from core.sandbox_manager import (
BaseSandbox, SandboxResult,
parse_nuclei_jsonl, parse_naabu_output,
)
from core.tool_registry import ToolRegistry
class KaliSandbox(BaseSandbox):
"""Per-scan Docker container based on Kali Linux.
Lifecycle: create -> install tools on demand -> execute -> destroy.
Each instance owns exactly one container named 'neurosploit-{scan_id}'.
"""
DEFAULT_TIMEOUT = 300
MAX_OUTPUT = 2 * 1024 * 1024 # 2MB
def __init__(
self,
scan_id: str,
image: str = "neurosploit-kali:latest",
memory_limit: str = "2g",
cpu_limit: float = 2.0,
network_mode: str = "bridge",
enable_vpn: bool = False,
):
self.scan_id = scan_id
self.container_name = f"neurosploit-{scan_id}"
self.image = image
self.memory_limit = memory_limit
self.cpu_limit = cpu_limit
self.network_mode = network_mode
self.enable_vpn = enable_vpn
self._client = None
self._container = None
self._available = False
self._installed_tools: Set[str] = set()
self._tool_registry = ToolRegistry()
self._created_at: Optional[datetime] = None
self._vpn_connected = False
self._vpn_config_path: Optional[str] = None
async def initialize(self) -> Tuple[bool, str]:
"""Create and start a new Kali container for this scan."""
if not HAS_DOCKER:
return False, "Docker SDK not installed"
try:
self._client = docker.from_env()
self._client.ping()
except Exception as e:
return False, f"Docker not available: {e}"
# Check if container already exists (resume after crash)
try:
existing = self._client.containers.get(self.container_name)
if existing.status == "running":
self._container = existing
self._available = True
self._created_at = datetime.utcnow()
return True, f"Resumed existing container {self.container_name}"
else:
existing.remove(force=True)
except NotFound:
pass
# Check image exists
try:
self._client.images.get(self.image)
except NotFound:
return False, (
f"Kali sandbox image '{self.image}' not found. "
"Build with: docker build -f docker/Dockerfile.kali -t neurosploit-kali:latest docker/"
)
# Create container
try:
cpu_quota = int(self.cpu_limit * 100000)
run_kwargs: Dict[str, Any] = dict(
image=self.image,
command="sleep infinity",
name=self.container_name,
detach=True,
network_mode=self.network_mode,
mem_limit=self.memory_limit,
cpu_period=100000,
cpu_quota=cpu_quota,
cap_add=["NET_RAW", "NET_ADMIN"],
security_opt=["no-new-privileges:true"],
labels={
"neurosploit.scan_id": self.scan_id,
"neurosploit.type": "kali-sandbox",
},
)
if self.enable_vpn:
run_kwargs["devices"] = ["/dev/net/tun:/dev/net/tun"]
self._container = self._client.containers.run(**run_kwargs)
self._available = True
self._created_at = datetime.utcnow()
logger.info(f"Created Kali container {self.container_name} for scan {self.scan_id}")
return True, f"Container {self.container_name} started"
except Exception as e:
return False, f"Failed to create container: {e}"
@property
def is_available(self) -> bool:
return self._available and self._container is not None
@property
def container_id(self) -> Optional[str]:
"""Short Docker container ID."""
return self._container.short_id if self._container else None
@property
def image_digest(self) -> Optional[str]:
"""Docker image digest (sha256 prefix)."""
if not self._container:
return None
try:
return self._container.image.id[:19]
except Exception:
return None
async def stop(self):
"""Stop and remove this scan's container."""
if self._container:
try:
self._container.stop(timeout=10)
except Exception:
pass
try:
self._container.remove(force=True)
logger.info(f"Destroyed container {self.container_name}")
except Exception as e:
logger.warning(f"Error removing {self.container_name}: {e}")
self._container = None
self._available = False
async def health_check(self) -> Dict:
"""Run health check on this container."""
if not self.is_available:
return {"status": "unavailable", "scan_id": self.scan_id, "tools": []}
result = await self._exec(
"nuclei -version 2>&1; naabu -version 2>&1; nmap --version 2>&1 | head -1",
timeout=15,
)
tools = []
output = (result.stdout or "").lower()
for tool in ["nuclei", "naabu", "nmap"]:
if tool in output:
tools.append(tool)
uptime = 0.0
if self._created_at:
uptime = (datetime.utcnow() - self._created_at).total_seconds()
return {
"status": "healthy" if tools else "degraded",
"scan_id": self.scan_id,
"container": self.container_name,
"tools": tools,
"installed_tools": sorted(self._installed_tools),
"uptime_seconds": uptime,
}
# ------------------------------------------------------------------
# Low-level execution
# ------------------------------------------------------------------
async def _exec(self, command: str, timeout: int = DEFAULT_TIMEOUT) -> SandboxResult:
"""Execute command inside this container via docker exec."""
task_id = hashlib.md5(f"{time.time()}-{command[:50]}".encode()).hexdigest()[:8]
started_at = datetime.utcnow().isoformat()
if not self.is_available:
return SandboxResult(
tool="kali", command=command, exit_code=-1,
stdout="", stderr="", duration_seconds=0,
error="Container not available",
task_id=task_id, started_at=started_at,
completed_at=datetime.utcnow().isoformat(),
)
started = time.time()
try:
exec_result = await asyncio.get_event_loop().run_in_executor(
None,
lambda: self._container.exec_run(
cmd=["bash", "-c", command],
stdout=True, stderr=True, demux=True,
),
)
duration = time.time() - started
completed_at = datetime.utcnow().isoformat()
stdout_raw, stderr_raw = exec_result.output
stdout = (stdout_raw or b"").decode("utf-8", errors="replace")
stderr = (stderr_raw or b"").decode("utf-8", errors="replace")
if len(stdout) > self.MAX_OUTPUT:
stdout = stdout[: self.MAX_OUTPUT] + "\n... [truncated]"
if len(stderr) > self.MAX_OUTPUT:
stderr = stderr[: self.MAX_OUTPUT] + "\n... [truncated]"
return SandboxResult(
tool="kali", command=command,
exit_code=exec_result.exit_code,
stdout=stdout, stderr=stderr,
duration_seconds=round(duration, 2),
task_id=task_id, started_at=started_at,
completed_at=completed_at,
)
except Exception as e:
duration = time.time() - started
return SandboxResult(
tool="kali", command=command, exit_code=-1,
stdout="", stderr="", duration_seconds=round(duration, 2),
error=str(e),
task_id=task_id, started_at=started_at,
completed_at=datetime.utcnow().isoformat(),
)
# ------------------------------------------------------------------
# On-demand tool installation
# ------------------------------------------------------------------
async def _ensure_tool(self, tool: str) -> bool:
"""Ensure a tool is installed in this container. Returns True if available."""
if tool in self._installed_tools:
return True
# Check if already present in the base image
check = await self._exec(f"which {shlex.quote(tool)} 2>/dev/null", timeout=10)
if check.exit_code == 0 and check.stdout.strip():
self._installed_tools.add(tool)
return True
# Get install recipe from registry
recipe = self._tool_registry.get_install_command(tool)
if not recipe:
logger.warning(f"No install recipe for '{tool}' in Kali container")
return False
logger.info(f"[{self.container_name}] Installing {tool}...")
result = await self._exec(recipe, timeout=300)
if result.exit_code == 0:
self._installed_tools.add(tool)
logger.info(f"[{self.container_name}] Installed {tool} successfully")
return True
else:
logger.warning(
f"[{self.container_name}] Failed to install {tool}: "
f"{(result.stderr or result.stdout or '')[:300]}"
)
return False
# ------------------------------------------------------------------
# High-level tool APIs (same signatures as SandboxManager)
# ------------------------------------------------------------------
async def run_nuclei(
self, target, templates=None, severity=None,
tags=None, rate_limit=150, timeout=600,
) -> SandboxResult:
await self._ensure_tool("nuclei")
cmd_parts = [
"nuclei", "-u", shlex.quote(target),
"-jsonl", "-rate-limit", str(rate_limit),
"-silent", "-no-color",
]
if templates:
cmd_parts.extend(["-t", shlex.quote(templates)])
if severity:
cmd_parts.extend(["-severity", shlex.quote(severity)])
if tags:
cmd_parts.extend(["-tags", shlex.quote(tags)])
result = await self._exec(" ".join(cmd_parts) + " 2>/dev/null", timeout=timeout)
result.tool = "nuclei"
if result.stdout:
result.findings = parse_nuclei_jsonl(result.stdout)
return result
async def run_naabu(
self, target, ports=None, top_ports=None,
scan_type="s", rate=1000, timeout=300,
) -> SandboxResult:
await self._ensure_tool("naabu")
cmd_parts = [
"naabu", "-host", shlex.quote(target),
"-json", "-rate", str(rate), "-silent", "-no-color",
]
if ports:
cmd_parts.extend(["-p", shlex.quote(str(ports))])
elif top_ports:
cmd_parts.extend(["-top-ports", str(top_ports)])
else:
cmd_parts.extend(["-top-ports", "1000"])
if scan_type:
cmd_parts.extend(["-scan-type", scan_type])
result = await self._exec(" ".join(cmd_parts) + " 2>/dev/null", timeout=timeout)
result.tool = "naabu"
if result.stdout:
result.findings = parse_naabu_output(result.stdout)
return result
async def run_httpx(self, targets, timeout=120) -> SandboxResult:
await self._ensure_tool("httpx")
if isinstance(targets, str):
targets = [targets]
target_str = "\\n".join(shlex.quote(t) for t in targets)
command = (
f'echo -e "{target_str}" | httpx -silent -json '
f'-title -tech-detect -status-code -content-length '
f'-follow-redirects -no-color 2>/dev/null'
)
result = await self._exec(command, timeout=timeout)
result.tool = "httpx"
if result.stdout:
findings = []
for line in result.stdout.strip().split("\\n"):
try:
data = json.loads(line)
findings.append({
"url": data.get("url", ""),
"status_code": data.get("status_code", 0),
"title": data.get("title", ""),
"technologies": data.get("tech", []),
"content_length": data.get("content_length", 0),
"webserver": data.get("webserver", ""),
})
except (json.JSONDecodeError, ValueError):
continue
result.findings = findings
return result
async def run_subfinder(self, domain, timeout=120) -> SandboxResult:
await self._ensure_tool("subfinder")
command = f"subfinder -d {shlex.quote(domain)} -silent -no-color 2>/dev/null"
result = await self._exec(command, timeout=timeout)
result.tool = "subfinder"
if result.stdout:
subs = [s.strip() for s in result.stdout.strip().split("\\n") if s.strip()]
result.findings = [{"subdomain": s} for s in subs]
return result
async def run_nmap(self, target, ports=None, scripts=True, timeout=300) -> SandboxResult:
await self._ensure_tool("nmap")
cmd_parts = ["nmap", "-sV"]
if scripts:
cmd_parts.append("-sC")
if ports:
cmd_parts.extend(["-p", shlex.quote(str(ports))])
cmd_parts.extend(["-oN", "/dev/stdout", shlex.quote(target)])
result = await self._exec(" ".join(cmd_parts) + " 2>/dev/null", timeout=timeout)
result.tool = "nmap"
return result
async def run_tool(self, tool, args, timeout=300) -> SandboxResult:
"""Run any tool (validates whitelist, installs on demand)."""
# Load whitelist from config
allowed_tools = set()
try:
with open("config/config.json") as f:
cfg = json.load(f)
allowed_tools = set(cfg.get("sandbox", {}).get("tools", []))
except Exception:
pass
if not allowed_tools:
allowed_tools = {
"nuclei", "naabu", "nmap", "httpx", "subfinder", "katana",
"dnsx", "ffuf", "gobuster", "dalfox", "nikto", "sqlmap",
"whatweb", "curl", "dig", "whois", "masscan", "dirsearch",
"wfuzz", "arjun", "wafw00f", "waybackurls",
}
if tool not in allowed_tools:
return SandboxResult(
tool=tool, command=f"{tool} {args}", exit_code=-1,
stdout="", stderr="", duration_seconds=0,
error=f"Tool '{tool}' not in allowed list",
)
if not await self._ensure_tool(tool):
return SandboxResult(
tool=tool, command=f"{tool} {args}", exit_code=-1,
stdout="", stderr="", duration_seconds=0,
error=f"Could not install '{tool}' in Kali container",
)
result = await self._exec(f"{shlex.quote(tool)} {args} 2>&1", timeout=timeout)
result.tool = tool
return result
async def execute_raw(self, command, timeout=300) -> SandboxResult:
result = await self._exec(command, timeout=timeout)
result.tool = "raw"
return result
# ------------------------------------------------------------------
# File upload
# ------------------------------------------------------------------
async def upload_file(self, file_bytes: bytes, dest_path: str) -> bool:
"""Upload a file into the container via docker put_archive."""
if not self.is_available:
return False
tar_stream = io.BytesIO()
fname = os.path.basename(dest_path)
tarinfo = tarfile.TarInfo(name=fname)
tarinfo.size = len(file_bytes)
tarinfo.mode = 0o600
with tarfile.open(fileobj=tar_stream, mode="w") as tar:
tar.addfile(tarinfo, io.BytesIO(file_bytes))
tar_stream.seek(0)
dest_dir = os.path.dirname(dest_path) or "/"
try:
await self._exec(f"mkdir -p {shlex.quote(dest_dir)}", timeout=10)
loop = asyncio.get_event_loop()
success = await loop.run_in_executor(
None,
lambda: self._container.put_archive(dest_dir, tar_stream),
)
return bool(success)
except Exception as e:
logger.warning(f"Failed to upload file to {dest_path}: {e}")
return False
# ------------------------------------------------------------------
# VPN lifecycle
# ------------------------------------------------------------------
async def connect_vpn(
self,
config_bytes: bytes,
username: Optional[str] = None,
password: Optional[str] = None,
) -> Tuple[bool, str]:
"""Upload .ovpn config and start OpenVPN inside the container."""
if not self.is_available:
return False, "Container not available"
ovpn_path = "/etc/openvpn/client.ovpn"
if not await self.upload_file(config_bytes, ovpn_path):
return False, "Failed to upload .ovpn config"
self._vpn_config_path = ovpn_path
# Write auth file if credentials provided
if username and password:
auth_content = f"{username}\n{password}\n".encode()
auth_path = "/etc/openvpn/auth.txt"
if not await self.upload_file(auth_content, auth_path):
return False, "Failed to upload credentials"
await self._exec(f"chmod 600 {auth_path}", timeout=5)
# Append auth-user-pass directive if not present
await self._exec(
f"grep -q 'auth-user-pass' {ovpn_path} || "
f"echo 'auth-user-pass {auth_path}' >> {ovpn_path}",
timeout=5,
)
# Replace bare auth-user-pass with path version
await self._exec(
f"sed -i 's|auth-user-pass$|auth-user-pass {auth_path}|' {ovpn_path}",
timeout=5,
)
# Create TUN device if missing
await self._exec(
"mkdir -p /dev/net && "
"[ -c /dev/net/tun ] || mknod /dev/net/tun c 10 200; "
"chmod 600 /dev/net/tun",
timeout=5,
)
# Kill any existing OpenVPN
await self._exec("pkill -9 openvpn 2>/dev/null", timeout=5)
# Start OpenVPN
result = await self._exec(
f"openvpn --config {ovpn_path} --daemon "
f"--log /var/log/openvpn.log "
f"--writepid /var/run/openvpn.pid",
timeout=15,
)
if result.exit_code != 0:
return False, f"OpenVPN start failed: {result.stderr or result.stdout}"
# Wait up to 20s for tun interface
for _ in range(20):
await asyncio.sleep(1)
check = await self._exec("ip addr show tun0 2>/dev/null", timeout=5)
if check.exit_code == 0 and "inet " in check.stdout:
self._vpn_connected = True
match = re.search(r"inet\s+(\d+\.\d+\.\d+\.\d+)", check.stdout)
ip = match.group(1) if match else "unknown"
return True, f"VPN connected. Tunnel IP: {ip}"
# Timeout - check log
log_result = await self._exec("tail -30 /var/log/openvpn.log 2>/dev/null", timeout=5)
return False, f"VPN timed out. Log: {(log_result.stdout or '')[-500:]}"
async def disconnect_vpn(self) -> Tuple[bool, str]:
"""Kill OpenVPN process inside the container."""
if not self.is_available:
return False, "Container not available"
await self._exec(
"kill $(cat /var/run/openvpn.pid 2>/dev/null) 2>/dev/null; "
"pkill -9 openvpn 2>/dev/null",
timeout=10,
)
self._vpn_connected = False
return True, "VPN disconnected"
async def get_vpn_status(self) -> Dict:
"""Check VPN status inside the container."""
if not self.is_available:
return {"connected": False, "ip": None, "interface": None}
connected = False
ip_addr = None
proc_check = await self._exec("pgrep -a openvpn", timeout=5)
if proc_check.exit_code == 0 and proc_check.stdout.strip():
connected = True
if connected:
tun_check = await self._exec("ip addr show tun0 2>/dev/null", timeout=5)
if tun_check.exit_code == 0:
match = re.search(r"inet\s+(\d+\.\d+\.\d+\.\d+)", tun_check.stdout)
if match:
ip_addr = match.group(1)
else:
connected = False # Process alive but no interface yet
self._vpn_connected = connected
return {"connected": connected, "ip": ip_addr, "interface": "tun0" if connected else None}
+281
View File
@@ -0,0 +1,281 @@
#!/usr/bin/env python3
"""
Knowledge Augmentor - Adversarial pattern recognition from bug bounty data.
Loads the bug bounty finetuning dataset and provides retrieval-based
context enrichment for agent prompts. This is for PATTERN RECOGNITION
and adversarial intuition -- NOT for replaying exploits.
The augmentor:
- Builds a keyword index by vulnerability type
- Retrieves relevant patterns matching current testing context
- Injects formatted reference material into agent prompts
- Explicitly instructs the model to adapt, not copy
"""
import json
import logging
from typing import Dict, List, Optional
from pathlib import Path
logger = logging.getLogger(__name__)
# Optional RAG engine for semantic search upgrade
try:
from backend.core.rag import RAGEngine
HAS_RAG_ENGINE = True
except ImportError:
HAS_RAG_ENGINE = False
RAGEngine = None
class KnowledgeAugmentor:
"""Retrieval-based knowledge augmentation from bug bounty dataset.
Supports two retrieval modes:
- RAG mode (when RAGEngine available): Semantic vector search for better relevance
- Keyword mode (default fallback): Keyword index matching (original behavior)
"""
# Vulnerability type keyword mappings
VULN_KEYWORDS = {
'xss': ['xss', 'cross-site scripting', 'reflected xss', 'stored xss', 'dom xss',
'script injection', 'html injection'],
'sqli': ['sql injection', 'sqli', 'union select', 'blind sql', 'error-based sql',
'time-based sql', 'second-order sql'],
'ssrf': ['ssrf', 'server-side request forgery', 'internal service'],
'idor': ['idor', 'insecure direct object', 'broken object level',
'bola', 'horizontal privilege'],
'rce': ['rce', 'remote code execution', 'command injection', 'os command',
'code execution', 'shell injection'],
'lfi': ['lfi', 'local file inclusion', 'path traversal', 'directory traversal',
'file read', 'file disclosure'],
'auth_bypass': ['authentication bypass', 'broken authentication', 'auth bypass',
'session fixation', 'jwt', 'token manipulation'],
'csrf': ['csrf', 'cross-site request forgery', 'state-changing'],
'open_redirect': ['open redirect', 'url redirect', 'redirect vulnerability'],
'xxe': ['xxe', 'xml external entity', 'xml injection'],
'ssti': ['ssti', 'server-side template injection', 'template injection'],
'race_condition': ['race condition', 'toctou', 'concurrency'],
'graphql': ['graphql', 'introspection', 'batching attack'],
'api': ['api', 'rest api', 'broken api', 'api key', 'rate limiting'],
'deserialization': ['deserialization', 'insecure deserialization', 'pickle',
'object injection'],
'upload': ['file upload', 'unrestricted upload', 'web shell', 'upload bypass'],
'cors': ['cors', 'cross-origin', 'origin validation'],
'subdomain_takeover': ['subdomain takeover', 'dangling dns', 'cname'],
'information_disclosure': ['information disclosure', 'sensitive data', 'data exposure',
'directory listing', 'source code disclosure'],
}
def __init__(self, dataset_path: str = "models/bug-bounty/bugbounty_finetuning_dataset.json",
max_patterns: int = 3, rag_engine=None):
self.dataset_path = Path(dataset_path)
self.max_patterns = max_patterns
self.entries: List[Dict] = []
self.index: Dict[str, List[int]] = {} # vuln_type -> list of entry indices
self._loaded = False
# RAG engine for semantic search (optional upgrade)
self._rag_engine = rag_engine
if not self._rag_engine and HAS_RAG_ENGINE:
try:
import os
if os.getenv("ENABLE_RAG", "true").lower() != "false":
self._rag_engine = RAGEngine(data_dir="data", backend=os.getenv("RAG_BACKEND", "auto"))
if not self._rag_engine.is_indexed:
self._rag_engine.index_all()
except Exception as e:
logger.debug(f"RAG engine not available for augmentor: {e}")
self._rag_engine = None
def _ensure_loaded(self):
"""Lazy load and index the dataset on first use."""
if self._loaded:
return
if not self.dataset_path.exists():
logger.warning(f"Bug bounty dataset not found: {self.dataset_path}")
self._loaded = True
return
try:
with open(self.dataset_path, 'r', encoding='utf-8') as f:
self.entries = json.load(f)
logger.info(f"Loaded {len(self.entries)} entries from bug bounty dataset")
self._build_index()
except Exception as e:
logger.error(f"Failed to load bug bounty dataset: {e}")
self._loaded = True
def _build_index(self):
"""Build keyword index over the dataset entries."""
for i, entry in enumerate(self.entries):
text = (
entry.get('instruction', '') + ' ' +
entry.get('input', '') + ' ' +
entry.get('output', '')
).lower()
for vuln_type, keywords in self.VULN_KEYWORDS.items():
for kw in keywords:
if kw in text:
self.index.setdefault(vuln_type, []).append(i)
break # One match per vuln_type per entry
indexed_types = {k: len(v) for k, v in self.index.items()}
logger.info(f"Knowledge index built: {indexed_types}")
def get_relevant_patterns(self, vulnerability_type: str,
technologies: Optional[List[str]] = None,
max_entries: Optional[int] = None) -> str:
"""Retrieve relevant bug bounty patterns for context enrichment.
Args:
vulnerability_type: Type of vulnerability being tested (e.g., 'xss', 'sqli')
technologies: Optional list of detected technologies for relevance boosting
max_entries: Override default max patterns count
Returns:
Formatted string for injection into LLM prompts as cognitive augmentation.
Returns empty string if no relevant patterns found.
"""
# Try RAG-based semantic retrieval first (much better relevance)
if self._rag_engine:
try:
tech_str = ", ".join(technologies[:3]) if technologies else ""
rag_context = self._rag_engine.get_testing_context(
vuln_type=vulnerability_type,
technology=tech_str,
max_chars=3000
)
if rag_context and len(rag_context) > 50:
return rag_context
except Exception as e:
logger.debug(f"RAG retrieval failed, falling back to keyword: {e}")
# Fallback: keyword-based retrieval (original behavior)
self._ensure_loaded()
limit = max_entries or self.max_patterns
vuln_key = vulnerability_type.lower().replace(' ', '_').replace('-', '_')
# Try exact match first, then partial
candidates = self.index.get(vuln_key, [])
if not candidates:
# Try partial matching
for key, indices in self.index.items():
if vuln_key in key or key in vuln_key:
candidates = indices
break
if not candidates:
return ""
# Deduplicate
candidates = list(dict.fromkeys(candidates))
# Score by technology relevance if technologies provided
if technologies:
scored = []
for idx in candidates:
entry = self.entries[idx]
text = (entry.get('output', '') + ' ' + entry.get('instruction', '')).lower()
tech_score = sum(1 for t in technologies if t.lower() in text)
scored.append((tech_score, idx))
scored.sort(key=lambda x: x[0], reverse=True)
candidates = [idx for _, idx in scored]
selected = candidates[:limit]
# Build augmentation context
augmentation = (
"\n\n=== ADVERSARIAL PATTERN CONTEXT (Bug Bounty Knowledge) ===\n"
"These are REFERENCE PATTERNS for understanding attack vectors and methodology.\n"
"ADAPT the approach to the current target. Do NOT replay exact exploits.\n"
"Use these as cognitive anchors for creative hypothesis generation.\n\n"
)
for i, idx in enumerate(selected, 1):
entry = self.entries[idx]
instruction = entry.get('instruction', '')[:300]
output = entry.get('output', '')
# Extract methodology-relevant sections, truncate for context budget
methodology = self._extract_methodology(output, max_chars=1500)
augmentation += f"--- Pattern {i} ---\n"
augmentation += f"Context: {instruction}\n"
augmentation += f"Methodology:\n{methodology}\n\n"
augmentation += "=== END ADVERSARIAL PATTERN CONTEXT ===\n"
return augmentation
def _extract_methodology(self, text: str, max_chars: int = 1500) -> str:
"""Extract the most methodology-relevant portion of a writeup."""
# Look for methodology/steps/approach sections
markers = ['### steps', '### methodology', '### approach', '### exploitation',
'## steps', '## methodology', '## approach', '## exploitation',
'steps to reproduce', 'reproduction steps', 'proof of concept']
text_lower = text.lower()
for marker in markers:
idx = text_lower.find(marker)
if idx != -1:
return text[idx:idx + max_chars]
# Fall back to first max_chars of the output
return text[:max_chars]
def get_available_types(self) -> List[str]:
"""Return list of vulnerability types that have indexed entries."""
self._ensure_loaded()
return sorted(self.index.keys())
def get_entry_count(self, vulnerability_type: str) -> int:
"""Return count of indexed entries for a vulnerability type."""
self._ensure_loaded()
vuln_key = vulnerability_type.lower().replace(' ', '_').replace('-', '_')
return len(self.index.get(vuln_key, []))
def _get_custom_knowledge_patterns(self, vuln_type: str) -> str:
"""Get patterns from user-uploaded custom knowledge documents."""
try:
from backend.core.knowledge_processor import KnowledgeProcessor
processor = KnowledgeProcessor()
entries = processor.get_patterns_for_vuln(vuln_type, max_entries=3)
if not entries:
return ""
context = "\n\n=== CUSTOM KNOWLEDGE (User-Uploaded Research) ===\n"
for i, entry in enumerate(entries, 1):
context += f"--- Source: {entry.get('source_doc', 'Unknown')} ---\n"
if entry.get("methodology"):
context += f"Methodology: {entry['methodology']}\n"
if entry.get("key_insights"):
context += f"Key Insight: {entry['key_insights']}\n"
if entry.get("payloads"):
context += f"Payloads: {', '.join(entry['payloads'][:5])}\n"
if entry.get("bypass_techniques"):
context += f"Bypasses: {', '.join(entry['bypass_techniques'][:5])}\n"
context += "\n"
context += "=== END CUSTOM KNOWLEDGE ===\n"
return context
except Exception as e:
logger.debug(f"Custom knowledge lookup failed: {e}")
return ""
def get_relevant_patterns_with_custom(self, vulnerability_type: str,
technologies: Optional[List[str]] = None,
max_entries: Optional[int] = None) -> str:
"""Get patterns from both bug bounty dataset AND custom uploaded knowledge."""
# Original dataset patterns
result = self.get_relevant_patterns(vulnerability_type, technologies, max_entries)
# Custom knowledge patterns
custom = self._get_custom_knowledge_patterns(vulnerability_type)
if custom:
result += custom
return result
+882
View File
@@ -0,0 +1,882 @@
#!/usr/bin/env python3
"""
LLM Manager - Unified interface for multiple LLM providers
Supports: Claude, GPT, Gemini, Ollama, and custom models
"""
import os
import json
import subprocess
import time
from typing import Dict, List, Optional, Any
import logging
import requests
from pathlib import Path
import re
# Retry configuration
MAX_RETRIES = 3
RETRY_DELAY = 1.0 # seconds
RETRY_MULTIPLIER = 2.0
logger = logging.getLogger(__name__)
class LLMManager:
"""Manage multiple LLM providers"""
def __init__(self, config: Dict):
"""Initialize LLM manager"""
self.config = config.get('llm', {})
self.default_profile_name = self.config.get('default_profile', 'gemini_pro_default')
self.profiles = self.config.get('profiles', {})
self.active_profile = self.profiles.get(self.default_profile_name, {})
# Load active profile settings
self.provider = self.active_profile.get('provider', 'gemini').lower()
self.model = self.active_profile.get('model', 'gemini-pro')
self.api_key = self._get_api_key(self.active_profile.get('api_key', ''))
self.temperature = self.active_profile.get('temperature', 0.7)
self.max_tokens = self.active_profile.get('max_tokens', 4096)
# New LLM parameters
self.input_token_limit = self.active_profile.get('input_token_limit', 4096)
self.output_token_limit = self.active_profile.get('output_token_limit', 4096)
self.cache_enabled = self.active_profile.get('cache_enabled', False)
self.search_context_level = self.active_profile.get('search_context_level', 'medium') # low, medium, high
self.pdf_support_enabled = self.active_profile.get('pdf_support_enabled', False)
self.guardrails_enabled = self.active_profile.get('guardrails_enabled', False)
self.hallucination_mitigation_strategy = self.active_profile.get('hallucination_mitigation_strategy', None)
# MAX_OUTPUT_TOKENS override from environment (up to 64000 for Claude)
env_max_tokens = os.getenv('MAX_OUTPUT_TOKENS', '').strip()
if env_max_tokens:
try:
override = int(env_max_tokens)
self.max_tokens = override
self.output_token_limit = override
logger.info(f"MAX_OUTPUT_TOKENS override applied: {override}")
except ValueError:
logger.warning(f"Invalid MAX_OUTPUT_TOKENS value: {env_max_tokens}")
# Model router (lazy init, set externally or via config)
self._model_router = None
# New prompt loading
self.json_prompts_file_path = Path("prompts/library.json")
self.md_prompts_dir_path = Path("prompts/md_library")
self.prompts = self._load_all_prompts() # New method to load both
logger.info(f"Initialized LLM Manager - Provider: {self.provider}, Model: {self.model}, Profile: {self.default_profile_name}")
def _get_api_key(self, api_key_config: str) -> str:
"""Helper to get API key from config or environment variable"""
if api_key_config.startswith('${') and api_key_config.endswith('}'):
env_var = api_key_config[2:-1]
return os.getenv(env_var, '')
return api_key_config
def _load_all_prompts(self) -> Dict:
"""Load prompts from JSON library and Markdown files (both prompts/ and prompts/md_library/)."""
all_prompts = {
"json_prompts": {},
"md_prompts": {}
}
# Load from JSON library
if self.json_prompts_file_path.exists():
try:
with open(self.json_prompts_file_path, 'r') as f:
all_prompts["json_prompts"] = json.load(f)
logger.info(f"Loaded prompts from JSON library: {self.json_prompts_file_path}")
except Exception as e:
logger.error(f"Error loading prompts from {self.json_prompts_file_path}: {e}")
else:
logger.warning(f"JSON prompts file not found at {self.json_prompts_file_path}. Some AI functionalities might be limited.")
# Load from both prompts/ root and prompts/md_library/
prompts_root = Path("prompts")
md_dirs = [prompts_root, self.md_prompts_dir_path]
for md_dir in md_dirs:
if md_dir.is_dir():
for md_file in md_dir.glob("*.md"):
try:
content = md_file.read_text()
prompt_name = md_file.stem # Use filename as prompt name
# Skip if already loaded (md_library has priority)
if prompt_name in all_prompts["md_prompts"]:
continue
# Try structured format first (## User Prompt / ## System Prompt)
user_prompt_match = re.search(r"## User Prompt\n(.*?)(?=\n## System Prompt|\Z)", content, re.DOTALL)
system_prompt_match = re.search(r"## System Prompt\n(.*?)(?=\n## User Prompt|\Z)", content, re.DOTALL)
user_prompt = user_prompt_match.group(1).strip() if user_prompt_match else ""
system_prompt = system_prompt_match.group(1).strip() if system_prompt_match else ""
# If no structured format, use entire content as system_prompt
if not user_prompt and not system_prompt:
system_prompt = content.strip()
user_prompt = "" # Will be filled with user input at runtime
logger.debug(f"Loaded {md_file.name} as full-content prompt")
if user_prompt or system_prompt:
all_prompts["md_prompts"][prompt_name] = {
"user_prompt": user_prompt,
"system_prompt": system_prompt
}
logger.debug(f"Loaded prompt: {prompt_name}")
except Exception as e:
logger.error(f"Error loading prompt from {md_file.name}: {e}")
logger.info(f"Loaded {len(all_prompts['md_prompts'])} prompts from Markdown files.")
return all_prompts
def get_prompt(self, library_type: str, category: str, name: str, default: str = "") -> str:
"""Retrieve a specific prompt by library type, category, and name.
`library_type` can be "json_prompts" or "md_prompts".
`category` can be a JSON top-level key (e.g., 'exploitation') or an MD filename (e.g., 'red_team_agent').
`name` can be a JSON sub-key (e.g., 'ai_exploit_planning_user') or 'user_prompt'/'system_prompt' for MD.
"""
return self.prompts.get(library_type, {}).get(category, {}).get(name, default)
def generate(self, prompt: str, system_prompt: Optional[str] = None) -> str:
"""Generate response from LLM and apply hallucination mitigation if configured."""
raw_response = ""
try:
if self.provider == 'claude':
raw_response = self._generate_claude(prompt, system_prompt)
elif self.provider == 'gpt':
raw_response = self._generate_gpt(prompt, system_prompt)
elif self.provider == 'gemini':
raw_response = self._generate_gemini(prompt, system_prompt)
elif self.provider == 'ollama':
raw_response = self._generate_ollama(prompt, system_prompt)
elif self.provider == 'gemini-cli':
raw_response = self._generate_gemini_cli(prompt, system_prompt)
elif self.provider == 'lmstudio':
raw_response = self._generate_lmstudio(prompt, system_prompt)
elif self.provider == 'openrouter':
raw_response = self._generate_openrouter(prompt, system_prompt)
else:
raise ValueError(f"Unsupported provider: {self.provider}")
except Exception as e:
logger.error(f"Error generating raw response: {e}")
return f"Error: {str(e)}"
if self.guardrails_enabled:
raw_response = self._apply_guardrails(raw_response) # Apply guardrails here
if self.hallucination_mitigation_strategy and self.hallucination_mitigation_strategy in ["grounding", "self_reflection", "consistency_check"]:
logger.debug(f"Applying hallucination mitigation strategy: {self.hallucination_mitigation_strategy}")
return self._mitigate_hallucination(raw_response, prompt, system_prompt)
return raw_response
def routed_generate(self, prompt: str, system_prompt: Optional[str] = None, task_type: str = "default") -> str:
"""Generate with optional model routing based on task type.
If model routing is enabled and a route exists for the task_type,
a dedicated LLMManager for that profile handles the request.
Otherwise falls back to the default generate().
Task types: reasoning, analysis, generation, validation, default
"""
if self._model_router:
result = self._model_router.generate(prompt, system_prompt, task_type)
if result is not None:
return result
return self.generate(prompt, system_prompt)
def _apply_guardrails(self, response: str) -> str:
"""Applies basic guardrails to the LLM response."""
if not self.guardrails_enabled:
return response
logger.debug("Applying guardrails...")
# Example: Simple keyword filtering
harmful_keywords = ["malicious_exploit_command", "destroy_system", "wipe_data", "unauthorized_access"] # Placeholder keywords
for keyword in harmful_keywords:
if keyword in response.lower():
logger.warning(f"Guardrail triggered: Found potentially harmful keyword '{keyword}'. Response will be sanitized or flagged.")
# A more robust solution would involve redaction, re-prompting, or flagging for human review.
# For this example, we'll replace the keyword.
response = response.replace(keyword, "[REDACTED_HARMFUL_CONTENT]")
response = response.replace(keyword.upper(), "[REDACTED_HARMFUL_CONTENT]")
# Example: Length check (if response is excessively long and not expected)
# Using output_token_limit for a more accurate comparison
if len(response.split()) > self.output_token_limit * 1.5: # Roughly estimate tokens by word count
logger.warning("Guardrail triggered: Response is excessively long. Truncating or flagging.")
response = " ".join(response.split()[:int(self.output_token_limit * 1.5)]) + "\n[RESPONSE TRUNCATED BY GUARDRAIL]"
# Ethical check (can be another LLM call, but for simplicity, a fixed instruction)
# This is more about ensuring the tone and content align with ethical hacking principles.
# This is a very simplistic example. A real ethical check would be more nuanced.
# For now, just a log or a general check for explicit unethical instructions.
if any(bad_phrase in response.lower() for bad_phrase in ["perform illegal activity", "bypass security illegally"]):
logger.warning("Guardrail triggered: Response contains potentially unethical instructions. Flagging for review.")
response = "[UNETHICAL CONTENT FLAGGED FOR REVIEW]\n" + response
return response
def _mitigate_hallucination(self, raw_response: str, original_prompt: str, original_system_prompt: Optional[str]) -> str:
"""Applies configured hallucination mitigation strategy."""
strategy = self.hallucination_mitigation_strategy
# Temporarily disable mitigation to prevent infinite recursion when calling self.generate internally
original_mitigation_state = self.hallucination_mitigation_strategy
self.hallucination_mitigation_strategy = None
try:
if strategy == "grounding":
verification_prompt = f"""Review the following response:
---
{raw_response}
---
Based *only* on the context provided in the original prompt (user: '{original_prompt}', system: '{original_system_prompt or "None"}'), is this response factual and directly supported by the context? If not, correct it to be factual. If the response is completely unsourced or makes claims beyond the context, state 'UNSOURCED'."""
logger.debug("Applying grounding strategy: Re-prompting for factual verification.")
return self.generate(verification_prompt, "You are a fact-checker whose sole purpose is to verify LLM output against provided context.")
elif strategy == "self_reflection":
reflection_prompt = f"""Critically review the following response for accuracy, logical consistency, and adherence to the original prompt's instructions:
Original Prompt (User): {original_prompt}
Original Prompt (System): {original_system_prompt or "None"}
Generated Response: {raw_response}
Identify any potential hallucinations, inconsistencies, or areas where the response might have deviated from facts or instructions. If you find issues, provide a corrected and more reliable version of the response. If the response is good, state 'ACCURATE'."""
logger.debug("Applying self-reflection strategy: Re-prompting for self-critique.")
return self.generate(reflection_prompt, "You are an AI assistant designed to critically evaluate and improve other AI-generated content.")
elif strategy == "consistency_check":
logger.debug("Applying consistency check strategy: Generating multiple responses for comparison.")
responses = []
for i in range(3): # Generate 3 responses for consistency check
logger.debug(f"Generating response {i+1} for consistency check.")
res = self.generate(original_prompt, original_system_prompt)
responses.append(res)
if len(set(responses)) == 1:
return responses[0]
else:
logger.warning("Consistency check found varying responses. Attempting to synthesize a consistent answer.")
synthesis_prompt = (
f"Synthesize a single, consistent, and factual response from the following AI-generated options. "
f"Prioritize factual accuracy and avoid information present in only one response if contradictory. "
f"If there's significant disagreement, state the core disagreement.\n\n"
f"Options:\n" + "\n---\n".join(responses)
)
return self.generate(synthesis_prompt, "You are a highly analytical AI assistant tasked with synthesizing consistent information from multiple sources.")
return raw_response # Fallback if strategy not recognized or implemented
finally:
self.hallucination_mitigation_strategy = original_mitigation_state # Restore original state
def _generate_claude(self, prompt: str, system_prompt: Optional[str] = None) -> str:
"""Generate using Claude API with requests (bypasses httpx/SSL issues on macOS)"""
if not self.api_key:
raise ValueError("ANTHROPIC_API_KEY not set. Please set the environment variable or configure in config.yaml")
url = "https://api.anthropic.com/v1/messages"
headers = {
"x-api-key": self.api_key,
"anthropic-version": "2023-06-01",
"content-type": "application/json"
}
data = {
"model": self.model,
"max_tokens": self.max_tokens,
"temperature": self.temperature,
"messages": [{"role": "user", "content": prompt}]
}
if system_prompt:
data["system"] = system_prompt
last_error = None
for attempt in range(MAX_RETRIES):
try:
logger.debug(f"Claude API request attempt {attempt + 1}/{MAX_RETRIES}")
response = requests.post(
url,
headers=headers,
json=data,
timeout=120
)
if response.status_code == 200:
result = response.json()
return result["content"][0]["text"]
elif response.status_code == 401:
logger.error("Claude API authentication failed. Check your ANTHROPIC_API_KEY")
raise ValueError(f"Invalid API key: {response.text}")
elif response.status_code == 429:
last_error = f"Rate limit: {response.text}"
logger.warning(f"Claude API rate limit hit (attempt {attempt + 1}/{MAX_RETRIES})")
if attempt < MAX_RETRIES - 1:
sleep_time = RETRY_DELAY * (RETRY_MULTIPLIER ** (attempt + 1))
logger.info(f"Rate limited. Retrying in {sleep_time:.1f}s...")
time.sleep(sleep_time)
elif response.status_code >= 500:
last_error = f"Server error {response.status_code}: {response.text}"
logger.warning(f"Claude API server error (attempt {attempt + 1}/{MAX_RETRIES}): {response.status_code}")
if attempt < MAX_RETRIES - 1:
sleep_time = RETRY_DELAY * (RETRY_MULTIPLIER ** attempt)
logger.info(f"Retrying in {sleep_time:.1f}s...")
time.sleep(sleep_time)
else:
logger.error(f"Claude API error: {response.status_code} - {response.text}")
raise ValueError(f"API error {response.status_code}: {response.text}")
except requests.exceptions.Timeout as e:
last_error = e
logger.warning(f"Claude API timeout (attempt {attempt + 1}/{MAX_RETRIES})")
if attempt < MAX_RETRIES - 1:
sleep_time = RETRY_DELAY * (RETRY_MULTIPLIER ** attempt)
logger.info(f"Retrying in {sleep_time:.1f}s...")
time.sleep(sleep_time)
except requests.exceptions.ConnectionError as e:
last_error = e
logger.warning(f"Claude API connection error (attempt {attempt + 1}/{MAX_RETRIES}): {e}")
if attempt < MAX_RETRIES - 1:
sleep_time = RETRY_DELAY * (RETRY_MULTIPLIER ** attempt)
logger.info(f"Retrying in {sleep_time:.1f}s...")
time.sleep(sleep_time)
except requests.exceptions.RequestException as e:
last_error = e
logger.warning(f"Claude API request error (attempt {attempt + 1}/{MAX_RETRIES}): {e}")
if attempt < MAX_RETRIES - 1:
sleep_time = RETRY_DELAY * (RETRY_MULTIPLIER ** attempt)
logger.info(f"Retrying in {sleep_time:.1f}s...")
time.sleep(sleep_time)
raise ConnectionError(f"Failed to connect to Claude API after {MAX_RETRIES} attempts: {last_error}")
def _generate_gpt(self, prompt: str, system_prompt: Optional[str] = None) -> str:
"""Generate using OpenAI GPT API with requests (bypasses SDK issues)"""
if not self.api_key:
raise ValueError("OPENAI_API_KEY not set. Please set the environment variable or configure in config.yaml")
url = "https://api.openai.com/v1/chat/completions"
headers = {
"Authorization": f"Bearer {self.api_key}",
"Content-Type": "application/json"
}
messages = []
if system_prompt:
messages.append({"role": "system", "content": system_prompt})
messages.append({"role": "user", "content": prompt})
data = {
"model": self.model,
"messages": messages,
"temperature": self.temperature,
"max_tokens": self.max_tokens
}
last_error = None
for attempt in range(MAX_RETRIES):
try:
logger.debug(f"OpenAI API request attempt {attempt + 1}/{MAX_RETRIES}")
response = requests.post(
url,
headers=headers,
json=data,
timeout=120
)
if response.status_code == 200:
result = response.json()
return result["choices"][0]["message"]["content"]
elif response.status_code == 401:
logger.error("OpenAI API authentication failed. Check your OPENAI_API_KEY")
raise ValueError(f"Invalid API key: {response.text}")
elif response.status_code == 429:
last_error = f"Rate limit: {response.text}"
logger.warning(f"OpenAI API rate limit hit (attempt {attempt + 1}/{MAX_RETRIES})")
if attempt < MAX_RETRIES - 1:
sleep_time = RETRY_DELAY * (RETRY_MULTIPLIER ** (attempt + 1))
logger.info(f"Rate limited. Retrying in {sleep_time:.1f}s...")
time.sleep(sleep_time)
elif response.status_code >= 500:
last_error = f"Server error {response.status_code}: {response.text}"
logger.warning(f"OpenAI API server error (attempt {attempt + 1}/{MAX_RETRIES})")
if attempt < MAX_RETRIES - 1:
sleep_time = RETRY_DELAY * (RETRY_MULTIPLIER ** attempt)
logger.info(f"Retrying in {sleep_time:.1f}s...")
time.sleep(sleep_time)
else:
logger.error(f"OpenAI API error: {response.status_code} - {response.text}")
raise ValueError(f"API error {response.status_code}: {response.text}")
except requests.exceptions.Timeout as e:
last_error = e
logger.warning(f"OpenAI API timeout (attempt {attempt + 1}/{MAX_RETRIES})")
if attempt < MAX_RETRIES - 1:
sleep_time = RETRY_DELAY * (RETRY_MULTIPLIER ** attempt)
logger.info(f"Retrying in {sleep_time:.1f}s...")
time.sleep(sleep_time)
except requests.exceptions.ConnectionError as e:
last_error = e
logger.warning(f"OpenAI API connection error (attempt {attempt + 1}/{MAX_RETRIES}): {e}")
if attempt < MAX_RETRIES - 1:
sleep_time = RETRY_DELAY * (RETRY_MULTIPLIER ** attempt)
logger.info(f"Retrying in {sleep_time:.1f}s...")
time.sleep(sleep_time)
except requests.exceptions.RequestException as e:
last_error = e
logger.warning(f"OpenAI API request error (attempt {attempt + 1}/{MAX_RETRIES}): {e}")
if attempt < MAX_RETRIES - 1:
sleep_time = RETRY_DELAY * (RETRY_MULTIPLIER ** attempt)
logger.info(f"Retrying in {sleep_time:.1f}s...")
time.sleep(sleep_time)
raise ConnectionError(f"Failed to connect to OpenAI API after {MAX_RETRIES} attempts: {last_error}")
def _generate_gemini(self, prompt: str, system_prompt: Optional[str] = None) -> str:
"""Generate using Google Gemini API with requests (bypasses SDK issues)"""
if not self.api_key:
raise ValueError("GOOGLE_API_KEY not set. Please set the environment variable or configure in config.yaml")
# Use v1beta for generateContent endpoint
url = f"https://generativelanguage.googleapis.com/v1beta/models/{self.model}:generateContent?key={self.api_key}"
headers = {
"Content-Type": "application/json"
}
full_prompt = prompt
if system_prompt:
full_prompt = f"{system_prompt}\n\n{prompt}"
data = {
"contents": [{"parts": [{"text": full_prompt}]}],
"generationConfig": {
"temperature": self.temperature,
"maxOutputTokens": self.max_tokens
}
}
last_error = None
for attempt in range(MAX_RETRIES):
try:
logger.debug(f"Gemini API request attempt {attempt + 1}/{MAX_RETRIES}")
response = requests.post(
url,
headers=headers,
json=data,
timeout=120
)
if response.status_code == 200:
result = response.json()
return result["candidates"][0]["content"]["parts"][0]["text"]
elif response.status_code == 401 or response.status_code == 403:
logger.error("Gemini API authentication failed. Check your GOOGLE_API_KEY")
raise ValueError(f"Invalid API key: {response.text}")
elif response.status_code == 429:
last_error = f"Rate limit: {response.text}"
logger.warning(f"Gemini API rate limit hit (attempt {attempt + 1}/{MAX_RETRIES})")
if attempt < MAX_RETRIES - 1:
sleep_time = RETRY_DELAY * (RETRY_MULTIPLIER ** (attempt + 1))
logger.info(f"Rate limited. Retrying in {sleep_time:.1f}s...")
time.sleep(sleep_time)
elif response.status_code >= 500:
last_error = f"Server error {response.status_code}: {response.text}"
logger.warning(f"Gemini API server error (attempt {attempt + 1}/{MAX_RETRIES})")
if attempt < MAX_RETRIES - 1:
sleep_time = RETRY_DELAY * (RETRY_MULTIPLIER ** attempt)
logger.info(f"Retrying in {sleep_time:.1f}s...")
time.sleep(sleep_time)
else:
logger.error(f"Gemini API error: {response.status_code} - {response.text}")
raise ValueError(f"API error {response.status_code}: {response.text}")
except requests.exceptions.Timeout as e:
last_error = e
logger.warning(f"Gemini API timeout (attempt {attempt + 1}/{MAX_RETRIES})")
if attempt < MAX_RETRIES - 1:
sleep_time = RETRY_DELAY * (RETRY_MULTIPLIER ** attempt)
logger.info(f"Retrying in {sleep_time:.1f}s...")
time.sleep(sleep_time)
except requests.exceptions.ConnectionError as e:
last_error = e
logger.warning(f"Gemini API connection error (attempt {attempt + 1}/{MAX_RETRIES}): {e}")
if attempt < MAX_RETRIES - 1:
sleep_time = RETRY_DELAY * (RETRY_MULTIPLIER ** attempt)
logger.info(f"Retrying in {sleep_time:.1f}s...")
time.sleep(sleep_time)
except requests.exceptions.RequestException as e:
last_error = e
logger.warning(f"Gemini API request error (attempt {attempt + 1}/{MAX_RETRIES}): {e}")
if attempt < MAX_RETRIES - 1:
sleep_time = RETRY_DELAY * (RETRY_MULTIPLIER ** attempt)
logger.info(f"Retrying in {sleep_time:.1f}s...")
time.sleep(sleep_time)
raise ConnectionError(f"Failed to connect to Gemini API after {MAX_RETRIES} attempts: {last_error}")
def _generate_gemini_cli(self, prompt: str, system_prompt: Optional[str] = None) -> str:
"""Generate using Gemini CLI"""
try:
full_prompt = prompt
if system_prompt:
full_prompt = f"{system_prompt}\n\n{prompt}"
# Use gemini CLI tool
cmd = ['gemini', 'chat', '-m', self.model]
result = subprocess.run(
cmd,
input=full_prompt.encode(),
capture_output=True,
timeout=120
)
if result.returncode == 0:
return result.stdout.decode().strip()
else:
error = result.stderr.decode().strip()
logger.error(f"Gemini CLI error: {error}")
return f"Error: {error}"
except subprocess.TimeoutExpired:
logger.error("Gemini CLI timeout")
return "Error: Request timeout"
except Exception as e:
logger.error(f"Gemini CLI error: {e}")
return f"Error: {str(e)}"
def _generate_ollama(self, prompt: str, system_prompt: Optional[str] = None) -> str:
"""Generate using Ollama local models"""
try:
url = "http://localhost:11434/api/generate"
data = {
"model": self.model,
"prompt": prompt,
"stream": False,
"options": {
"temperature": self.temperature,
"num_predict": self.max_tokens
}
}
if system_prompt:
data["system"] = system_prompt
response = requests.post(url, json=data, timeout=120)
response.raise_for_status()
return response.json()["response"]
except Exception as e:
logger.error(f"Ollama error: {e}")
return f"Error: {str(e)}"
def _generate_lmstudio(self, prompt: str, system_prompt: Optional[str] = None) -> str:
"""
Generate using LM Studio local server.
LM Studio provides an OpenAI-compatible API at http://localhost:1234/v1
"""
try:
# LM Studio uses OpenAI-compatible API
url = "http://localhost:1234/v1/chat/completions"
messages = []
if system_prompt:
messages.append({"role": "system", "content": system_prompt})
messages.append({"role": "user", "content": prompt})
data = {
"model": self.model, # LM Studio auto-detects loaded model
"messages": messages,
"temperature": self.temperature,
"max_tokens": self.max_tokens,
"stream": False
}
logger.debug(f"Sending request to LM Studio at {url}")
response = requests.post(url, json=data, timeout=120)
response.raise_for_status()
result = response.json()
return result["choices"][0]["message"]["content"]
except requests.exceptions.ConnectionError:
logger.error("LM Studio connection error. Ensure LM Studio server is running on http://localhost:1234")
return "Error: Cannot connect to LM Studio. Please ensure LM Studio server is running on port 1234."
except requests.exceptions.Timeout:
logger.error("LM Studio request timeout")
return "Error: LM Studio request timeout after 120 seconds"
except KeyError as e:
logger.error(f"LM Studio response format error: {e}")
return f"Error: Unexpected response format from LM Studio: {str(e)}"
except Exception as e:
logger.error(f"LM Studio error: {e}")
return f"Error: {str(e)}"
def _generate_openrouter(self, prompt: str, system_prompt: Optional[str] = None) -> str:
"""Generate using OpenRouter API (OpenAI-compatible).
OpenRouter supports hundreds of models through a unified API.
Models are specified as provider/model (e.g., 'anthropic/claude-sonnet-4-20250514').
API key comes from OPENROUTER_API_KEY env var or config profile.
"""
if not self.api_key:
raise ValueError("OPENROUTER_API_KEY not set. Please set the environment variable or configure in config.json")
url = "https://openrouter.ai/api/v1/chat/completions"
headers = {
"Authorization": f"Bearer {self.api_key}",
"Content-Type": "application/json",
"HTTP-Referer": "https://github.com/neurosploit",
"X-Title": "NeuroSploit"
}
messages = []
if system_prompt:
messages.append({"role": "system", "content": system_prompt})
messages.append({"role": "user", "content": prompt})
data = {
"model": self.model,
"messages": messages,
"temperature": self.temperature,
"max_tokens": self.max_tokens
}
last_error = None
for attempt in range(MAX_RETRIES):
try:
logger.debug(f"OpenRouter API request attempt {attempt + 1}/{MAX_RETRIES} (model: {self.model})")
response = requests.post(url, headers=headers, json=data, timeout=180)
if response.status_code == 200:
result = response.json()
return result["choices"][0]["message"]["content"]
elif response.status_code == 401:
raise ValueError(f"Invalid OpenRouter API key: {response.text}")
elif response.status_code == 429:
last_error = f"Rate limit: {response.text}"
logger.warning(f"OpenRouter rate limit (attempt {attempt + 1}/{MAX_RETRIES})")
if attempt < MAX_RETRIES - 1:
sleep_time = RETRY_DELAY * (RETRY_MULTIPLIER ** (attempt + 1))
time.sleep(sleep_time)
elif response.status_code >= 500:
last_error = f"Server error {response.status_code}: {response.text}"
logger.warning(f"OpenRouter server error (attempt {attempt + 1}/{MAX_RETRIES})")
if attempt < MAX_RETRIES - 1:
sleep_time = RETRY_DELAY * (RETRY_MULTIPLIER ** attempt)
time.sleep(sleep_time)
else:
raise ValueError(f"OpenRouter API error {response.status_code}: {response.text}")
except requests.exceptions.Timeout:
last_error = "Timeout"
logger.warning(f"OpenRouter timeout (attempt {attempt + 1}/{MAX_RETRIES})")
if attempt < MAX_RETRIES - 1:
time.sleep(RETRY_DELAY * (RETRY_MULTIPLIER ** attempt))
except requests.exceptions.ConnectionError as e:
raise ValueError(f"Cannot connect to OpenRouter API: {e}")
raise ValueError(f"OpenRouter API failed after {MAX_RETRIES} retries: {last_error}")
def analyze_vulnerability(self, vulnerability_data: Dict) -> Dict:
"""Analyze vulnerability and suggest exploits"""
# This prompt will be fetched from library.json later
prompt = self.get_prompt("json_prompts", "exploitation", "analyze_vulnerability_user", default=f"""
Analyze the following vulnerability data and provide exploitation recommendations:
Vulnerability: {json.dumps(vulnerability_data, indent=2)}
Provide:
1. Severity assessment (Critical/High/Medium/Low)
2. Exploitation difficulty
3. Potential impact
4. Recommended exploit techniques
5. Detection evasion strategies
6. Post-exploitation actions
Response in JSON format.
""")
system_prompt = self.get_prompt("json_prompts", "exploitation", "analyze_vulnerability_system", default="""You are an expert penetration tester and security researcher.
Analyze vulnerabilities and provide detailed, actionable exploitation strategies.
Consider OWASP, CWE, and MITRE ATT&CK frameworks.
Always include ethical considerations and legal boundaries.""")
response = self.generate(prompt, system_prompt)
try:
return json.loads(response)
except:
return {"raw_response": response}
def generate_payload(self, target_info: Dict, vulnerability_type: str) -> str:
"""Generate exploit payload"""
# This prompt will be fetched from library.json later
prompt = self.get_prompt("json_prompts", "exploitation", "generate_payload_user", default=f"""
Generate an exploit payload for the following scenario:
Target Information:
{json.dumps(target_info, indent=2)}
Vulnerability Type: {vulnerability_type}
Requirements:
1. Generate a working payload
2. Include obfuscation techniques
3. Add error handling
4. Ensure minimal detection footprint
5. Include cleanup procedures
Provide the payload code with detailed comments.
""")
system_prompt = self.get_prompt("json_prompts", "exploitation", "generate_payload_system", default="""You are an expert exploit developer.
Generate sophisticated, tested payloads that are effective yet responsible.
Always include safety mechanisms and ethical guidelines.""")
return self.generate(prompt, system_prompt)
def suggest_privilege_escalation(self, system_info: Dict) -> List[str]:
"""Suggest privilege escalation techniques"""
# This prompt will be fetched from library.json later
prompt = self.get_prompt("json_prompts", "privesc", "suggest_privilege_escalation_user", default=f"""
Based on the following system information, suggest privilege escalation techniques:
System Info:
{json.dumps(system_info, indent=2)}
Provide:
1. Top 5 privilege escalation vectors
2. Required tools and commands
3. Detection likelihood
4. Success probability
5. Alternative approaches
Response in JSON format with prioritized list.
""")
system_prompt = self.get_prompt("json_prompts", "privesc", "suggest_privilege_escalation_system", default="""You are a privilege escalation specialist.
Analyze system configurations and suggest effective escalation paths.
Consider Windows, Linux, and Active Directory environments.""")
response = self.generate(prompt, system_prompt)
try:
result = json.loads(response)
return result.get('techniques', [])
except:
return []
def analyze_network_topology(self, scan_results: Dict) -> Dict:
"""Analyze network topology and suggest attack paths"""
# This prompt will be fetched from library.json later
prompt = self.get_prompt("json_prompts", "network_recon", "analyze_network_topology_user", default=f"""
Analyze the network topology and suggest attack paths:
Scan Results:
{json.dumps(scan_results, indent=2)}
Provide:
1. Network architecture overview
2. Critical assets identification
3. Attack surface analysis
4. Recommended attack paths (prioritized)
5. Lateral movement opportunities
6. Persistence locations
Response in JSON format.
""")
system_prompt = self.get_prompt("json_prompts", "network_recon", "analyze_network_topology_system", default="""You are a network penetration testing expert.
Analyze network structures and identify optimal attack vectors.
Consider defense-in-depth and detection mechanisms.""")
response = self.generate(prompt, system_prompt)
try:
return json.loads(response)
except:
return {"raw_response": response}
def analyze_web_vulnerability(self, vulnerability_type: str, vulnerability_data: Dict) -> Dict:
"""Analyze a specific web vulnerability using the appropriate prompt from library.json"""
user_prompt_name = f"{vulnerability_type.lower()}_user"
system_prompt_name = f"{vulnerability_type.lower()}_system"
# Dynamically fetch user prompt, passing vulnerability_data
user_prompt_template = self.get_prompt("json_prompts", "vulnerability_testing", user_prompt_name)
if not user_prompt_template:
logger.warning(f"No user prompt found for vulnerability type: {vulnerability_type}")
return {"error": f"No user prompt template for {vulnerability_type}"}
# Replace placeholder in the user prompt template
if vulnerability_type.lower() == "ssrf":
prompt = user_prompt_template.format(http_data_json=json.dumps(vulnerability_data, indent=2))
elif vulnerability_type.lower() == "sql_injection":
prompt = user_prompt_template.format(input_data_json=json.dumps(vulnerability_data, indent=2))
elif vulnerability_type.lower() == "xss":
prompt = user_prompt_template.format(xss_data_json=json.dumps(vulnerability_data, indent=2))
elif vulnerability_type.lower() == "lfi":
prompt = user_prompt_template.format(lfi_data_json=json.dumps(vulnerability_data, indent=2))
elif vulnerability_type.lower() == "broken_object":
prompt = user_prompt_template.format(api_data_json=json.dumps(vulnerability_data, indent=2))
elif vulnerability_type.lower() == "broken_auth":
prompt = user_prompt_template.format(auth_data_json=json.dumps(vulnerability_data, indent=2))
else:
logger.warning(f"Unsupported vulnerability type for analysis: {vulnerability_type}")
return {"error": f"Unsupported vulnerability type: {vulnerability_type}"}
system_prompt = self.get_prompt("json_prompts", "vulnerability_testing", system_prompt_name)
if not system_prompt:
logger.warning(f"No system prompt found for vulnerability type: {vulnerability_type}")
# Use a generic system prompt if a specific one isn't found
system_prompt = "You are an expert web security tester. Analyze the provided data for vulnerabilities and offer exploitation steps and remediation."
response = self.generate(prompt, system_prompt)
try:
return json.loads(response)
except json.JSONDecodeError:
logger.error(f"Failed to decode JSON response for {vulnerability_type} analysis: {response}")
return {"raw_response": response}
except Exception as e:
logger.error(f"Error during {vulnerability_type} analysis: {e}")
return {"error": str(e), "raw_response": response}
+244
View File
@@ -0,0 +1,244 @@
#!/usr/bin/env python3
"""
MCP Client - Model Context Protocol tool connectivity.
Provides a standard interface for connecting to MCP servers and
executing tools. Supports both stdio and SSE transports.
Coexists with existing subprocess-based tool execution:
- MCP is tried first when enabled
- Falls back silently to subprocess if MCP unavailable
"""
import asyncio
import json
import logging
from typing import Dict, List, Optional, Any
from pathlib import Path
logger = logging.getLogger(__name__)
try:
from mcp import ClientSession, StdioServerParameters
from mcp.client.stdio import stdio_client
HAS_MCP = True
except ImportError:
HAS_MCP = False
logger.debug("MCP package not installed. MCP tool connectivity disabled.")
try:
from mcp.client.sse import sse_client
HAS_MCP_SSE = True
except ImportError:
HAS_MCP_SSE = False
class MCPToolClient:
"""Client for connecting to MCP servers and executing tools."""
def __init__(self, config: Dict):
mcp_config = config.get('mcp_servers', {})
self.enabled = mcp_config.get('enabled', False) and HAS_MCP
self.servers_config = mcp_config.get('servers', {})
self._sessions: Dict[str, Any] = {} # server_name -> (session, cleanup)
self._available_tools: Dict[str, List[Dict]] = {} # server_name -> tools list
if self.enabled:
logger.info(f"MCP client initialized with {len(self.servers_config)} server(s)")
else:
if not HAS_MCP and mcp_config.get('enabled', False):
logger.warning("MCP enabled in config but mcp package not installed. "
"Install with: pip install mcp>=1.0.0")
async def connect(self, server_name: str) -> bool:
"""Establish connection to an MCP server.
Returns True if connection successful, False otherwise.
"""
if not self.enabled:
return False
if server_name in self._sessions:
return True # Already connected
server_config = self.servers_config.get(server_name)
if not server_config:
logger.error(f"MCP server '{server_name}' not found in config")
return False
transport = server_config.get('transport', 'stdio')
try:
if transport == 'stdio':
return await self._connect_stdio(server_name, server_config)
elif transport == 'sse':
return await self._connect_sse(server_name, server_config)
else:
logger.error(f"Unsupported MCP transport: {transport}")
return False
except Exception as e:
logger.error(f"Failed to connect to MCP server '{server_name}': {e}")
return False
async def _connect_stdio(self, server_name: str, config: Dict) -> bool:
"""Connect to a stdio-based MCP server."""
if not HAS_MCP:
return False
command = config.get('command', '')
args = config.get('args', [])
if not command:
logger.error(f"MCP server '{server_name}' has no command specified")
return False
server_params = StdioServerParameters(
command=command,
args=args,
env=config.get('env')
)
try:
# Create the stdio client connection
read_stream, write_stream = await asyncio.wait_for(
self._start_stdio_process(server_params),
timeout=30
)
session = ClientSession(read_stream, write_stream)
await session.initialize()
# Cache available tools
tools_result = await session.list_tools()
self._available_tools[server_name] = [
{"name": t.name, "description": t.description}
for t in tools_result.tools
]
self._sessions[server_name] = session
logger.info(f"Connected to MCP server '{server_name}' via stdio "
f"({len(self._available_tools[server_name])} tools available)")
return True
except Exception as e:
logger.error(f"Stdio connection to '{server_name}' failed: {e}")
return False
async def _start_stdio_process(self, params: 'StdioServerParameters'):
"""Start a stdio MCP server process."""
async with stdio_client(params) as (read, write):
return read, write
async def _connect_sse(self, server_name: str, config: Dict) -> bool:
"""Connect to an SSE-based MCP server."""
if not HAS_MCP_SSE:
logger.error("MCP SSE transport not available")
return False
url = config.get('url', '')
if not url:
logger.error(f"MCP server '{server_name}' has no URL specified")
return False
try:
async with sse_client(url) as (read, write):
session = ClientSession(read, write)
await session.initialize()
tools_result = await session.list_tools()
self._available_tools[server_name] = [
{"name": t.name, "description": t.description}
for t in tools_result.tools
]
self._sessions[server_name] = session
logger.info(f"Connected to MCP server '{server_name}' via SSE "
f"({len(self._available_tools[server_name])} tools available)")
return True
except Exception as e:
logger.error(f"SSE connection to '{server_name}' failed: {e}")
return False
async def call_tool(self, server_name: str, tool_name: str,
arguments: Optional[Dict] = None) -> Optional[str]:
"""Call a tool on an MCP server.
Returns the tool result as a string, or None if the call fails.
"""
if not self.enabled:
return None
session = self._sessions.get(server_name)
if not session:
connected = await self.connect(server_name)
if not connected:
return None
session = self._sessions.get(server_name)
try:
result = await session.call_tool(tool_name, arguments or {})
# Extract text content from result
if result.content:
texts = [c.text for c in result.content if hasattr(c, 'text')]
return '\n'.join(texts) if texts else str(result.content)
return ""
except Exception as e:
logger.error(f"MCP tool call failed ({server_name}/{tool_name}): {e}")
return None
async def list_tools(self, server_name: str = None) -> Dict[str, List[Dict]]:
"""List available tools from MCP servers.
If server_name is specified, lists tools for that server only.
Otherwise lists tools from all connected servers.
"""
if server_name:
tools = self._available_tools.get(server_name, [])
return {server_name: tools}
return dict(self._available_tools)
def find_tool_server(self, tool_name: str) -> Optional[str]:
"""Find which MCP server provides a given tool.
Returns the server name, or None if no server has the tool.
"""
for server_name, tools in self._available_tools.items():
for tool in tools:
if tool["name"] == tool_name:
return server_name
return None
async def try_tool(self, tool_name: str, arguments: Optional[Dict] = None) -> Optional[str]:
"""Try to execute a tool via any available MCP server.
Searches all configured servers for the tool and executes it.
Returns None silently if no server has the tool (for fallback pattern).
"""
if not self.enabled:
return None
# Connect to any servers not yet connected
for server_name in self.servers_config:
if server_name not in self._sessions:
await self.connect(server_name)
server = self.find_tool_server(tool_name)
if server:
return await self.call_tool(server, tool_name, arguments)
return None # Silent fallback
async def disconnect_all(self):
"""Disconnect from all MCP servers."""
for server_name, session in self._sessions.items():
try:
if hasattr(session, 'close'):
await session.close()
except Exception as e:
logger.debug(f"Error closing MCP session '{server_name}': {e}")
self._sessions.clear()
self._available_tools.clear()
logger.info("Disconnected from all MCP servers")
+626
View File
@@ -0,0 +1,626 @@
#!/usr/bin/env python3
"""
NeuroSploit MCP Server — Exposes pentest tools via Model Context Protocol.
Tools:
- screenshot_capture: Playwright browser screenshots
- payload_delivery: HTTP payload sending with full response capture
- dns_lookup: DNS record enumeration
- port_scan: TCP port scanning
- technology_detect: HTTP header-based tech fingerprinting
- subdomain_enumerate: Subdomain discovery via DNS brute-force
- save_finding: Persist a finding to agent memory
- get_vuln_prompt: Retrieve AI decision prompt for a vuln type
- execute_nuclei: Run Nuclei scanner in Docker sandbox (8000+ templates)
- execute_naabu: Run Naabu port scanner in Docker sandbox
- sandbox_health: Check sandbox container status
- sandbox_exec: Execute any allowed tool in the sandbox
Usage:
python3 -m core.mcp_server # stdio transport (default)
MCP_TRANSPORT=sse python3 -m core.mcp_server # SSE transport
"""
import asyncio
import json
import os
import socket
import logging
from typing import Dict, Any, Optional, List
logger = logging.getLogger(__name__)
# Guard MCP import — server only works where mcp package is available
try:
from mcp.server import Server
from mcp.server.stdio import stdio_server
from mcp.types import Tool, TextContent
HAS_MCP = True
except ImportError:
HAS_MCP = False
logger.warning("MCP package not installed. Install with: pip install 'mcp>=1.0.0'")
# Guard Playwright import
try:
from core.browser_validator import BrowserValidator
HAS_PLAYWRIGHT = True
except ImportError:
HAS_PLAYWRIGHT = False
# AI prompts access
try:
from backend.core.vuln_engine.ai_prompts import get_prompt, build_testing_prompt
HAS_AI_PROMPTS = True
except ImportError:
HAS_AI_PROMPTS = False
# Security sandbox access
try:
from core.sandbox_manager import get_sandbox, SandboxManager
HAS_SANDBOX = True
except ImportError:
HAS_SANDBOX = False
# ---------------------------------------------------------------------------
# Tool implementations
# ---------------------------------------------------------------------------
async def _screenshot_capture(url: str, selector: Optional[str] = None) -> Dict:
"""Capture a screenshot of a URL using Playwright."""
if not HAS_PLAYWRIGHT:
return {"error": "Playwright not available", "screenshot": None}
try:
bv = BrowserValidator()
result = await bv.capture_screenshot(url, selector=selector)
return {"url": url, "screenshot_base64": result.get("screenshot", ""), "status": "ok"}
except Exception as e:
return {"error": str(e), "screenshot": None}
async def _payload_delivery(
endpoint: str,
method: str = "GET",
payload: str = "",
content_type: str = "application/x-www-form-urlencoded",
headers: Optional[Dict] = None,
param: str = "q",
) -> Dict:
"""Send an HTTP request with a payload and capture full response."""
import aiohttp
try:
async with aiohttp.ClientSession() as session:
req_headers = {"Content-Type": content_type}
if headers:
req_headers.update(headers)
if method.upper() == "GET":
async with session.get(endpoint, params={param: payload}, headers=req_headers, timeout=15, allow_redirects=False) as resp:
body = await resp.text()
return {
"status": resp.status,
"headers": dict(resp.headers),
"body": body[:5000],
"body_length": len(body),
}
else:
data = {param: payload} if content_type != "application/json" else None
json_data = json.loads(payload) if content_type == "application/json" else None
async with session.request(
method.upper(), endpoint, data=data, json=json_data,
headers=req_headers, timeout=15, allow_redirects=False
) as resp:
body = await resp.text()
return {
"status": resp.status,
"headers": dict(resp.headers),
"body": body[:5000],
"body_length": len(body),
}
except Exception as e:
return {"error": str(e)}
async def _dns_lookup(domain: str, record_type: str = "A") -> Dict:
"""Perform DNS lookups for a domain."""
import subprocess
try:
result = subprocess.run(
["dig", "+short", domain, record_type],
capture_output=True, text=True, timeout=10
)
records = [r.strip() for r in result.stdout.strip().split("\n") if r.strip()]
return {"domain": domain, "type": record_type, "records": records}
except FileNotFoundError:
# Fallback to socket for A records
if record_type.upper() == "A":
try:
ips = socket.getaddrinfo(domain, None, socket.AF_INET)
records = list(set(ip[4][0] for ip in ips))
return {"domain": domain, "type": "A", "records": records}
except socket.gaierror as e:
return {"domain": domain, "type": "A", "error": str(e)}
return {"error": "dig command not available and only A records supported via fallback"}
except Exception as e:
return {"error": str(e)}
async def _port_scan(host: str, ports: str = "80,443,8080,8443,3000,5000") -> Dict:
"""Scan TCP ports on a host."""
port_list = [int(p.strip()) for p in ports.split(",") if p.strip().isdigit()]
results = {}
async def check_port(port: int):
try:
reader, writer = await asyncio.wait_for(
asyncio.open_connection(host, port), timeout=3
)
writer.close()
await writer.wait_closed()
return port, "open"
except (asyncio.TimeoutError, ConnectionRefusedError, OSError):
return port, "closed"
tasks = [check_port(p) for p in port_list[:100]]
for coro in asyncio.as_completed(tasks):
port, state = await coro
results[str(port)] = state
open_ports = [p for p, s in results.items() if s == "open"]
return {"host": host, "ports": results, "open_ports": open_ports}
async def _technology_detect(url: str) -> Dict:
"""Detect technologies from HTTP response headers."""
import aiohttp
try:
async with aiohttp.ClientSession() as session:
async with session.get(url, timeout=10, allow_redirects=True) as resp:
headers = dict(resp.headers)
body = await resp.text()
techs = []
server = headers.get("Server", "")
if server:
techs.append(f"Server: {server}")
powered_by = headers.get("X-Powered-By", "")
if powered_by:
techs.append(f"X-Powered-By: {powered_by}")
# Framework detection from body
framework_markers = {
"React": ["react", "_next/static", "__NEXT_DATA__"],
"Vue.js": ["vue.js", "__vue__", "v-cloak"],
"Angular": ["ng-version", "angular"],
"jQuery": ["jquery"],
"WordPress": ["wp-content", "wp-includes"],
"Laravel": ["laravel_session", "csrf-token"],
"Django": ["csrfmiddlewaretoken", "django"],
"Rails": ["csrf-param", "action_dispatch"],
"Spring": ["jsessionid"],
"Express": ["connect.sid"],
}
body_lower = body.lower()
for tech, markers in framework_markers.items():
if any(m.lower() in body_lower for m in markers):
techs.append(tech)
return {"url": url, "technologies": techs, "headers": {
k: v for k, v in headers.items()
if k.lower() in ("server", "x-powered-by", "x-aspnet-version",
"x-generator", "x-drupal-cache", "x-framework")
}}
except Exception as e:
return {"error": str(e)}
async def _subdomain_enumerate(domain: str) -> Dict:
"""Enumerate subdomains via common prefixes."""
prefixes = [
"www", "api", "admin", "app", "dev", "staging", "test", "mail",
"ftp", "cdn", "blog", "shop", "docs", "status", "dashboard",
"portal", "m", "mobile", "beta", "demo", "v2", "internal",
]
found = []
async def check_subdomain(prefix: str):
subdomain = f"{prefix}.{domain}"
try:
socket.getaddrinfo(subdomain, None, socket.AF_INET)
return subdomain
except socket.gaierror:
return None
tasks = [check_subdomain(p) for p in prefixes]
results = await asyncio.gather(*tasks)
found = [r for r in results if r]
return {"domain": domain, "subdomains": found, "count": len(found)}
async def _save_finding(finding_json: str) -> Dict:
"""Persist a finding (JSON string). Returns confirmation."""
try:
finding = json.loads(finding_json)
# Validate required fields
required = ["title", "severity", "vulnerability_type", "affected_endpoint"]
missing = [f for f in required if f not in finding]
if missing:
return {"error": f"Missing required fields: {missing}"}
return {"status": "saved", "finding_id": finding.get("id", "unknown"), "title": finding["title"]}
except json.JSONDecodeError as e:
return {"error": f"Invalid JSON: {e}"}
async def _get_vuln_prompt(vuln_type: str, target: str = "", endpoint: str = "", param: str = "", tech: str = "") -> Dict:
"""Retrieve the AI decision prompt for a vulnerability type."""
if not HAS_AI_PROMPTS:
return {"error": "AI prompts module not available"}
try:
prompt_data = get_prompt(vuln_type, {
"TARGET_URL": target,
"ENDPOINT": endpoint,
"PARAMETER": param,
"TECHNOLOGY": tech,
})
if not prompt_data:
return {"error": f"No prompt found for vuln type: {vuln_type}"}
full_prompt = build_testing_prompt(vuln_type, target, endpoint, param, tech)
return {"vuln_type": vuln_type, "prompt": prompt_data, "full_prompt": full_prompt}
except Exception as e:
return {"error": str(e)}
# ---------------------------------------------------------------------------
# Sandbox tool implementations (Docker-based real tools)
# ---------------------------------------------------------------------------
async def _execute_nuclei(
target: str,
templates: Optional[str] = None,
severity: Optional[str] = None,
tags: Optional[str] = None,
rate_limit: int = 150,
) -> Dict:
"""Run Nuclei vulnerability scanner in the Docker sandbox."""
if not HAS_SANDBOX:
return {"error": "Sandbox module not available. Install docker SDK: pip install docker"}
try:
sandbox = await get_sandbox()
if not sandbox.is_available:
return {"error": "Sandbox container not running. Build with: cd docker && docker compose -f docker-compose.sandbox.yml up -d"}
result = await sandbox.run_nuclei(
target=target,
templates=templates,
severity=severity,
tags=tags,
rate_limit=rate_limit,
)
return {
"tool": "nuclei",
"target": target,
"exit_code": result.exit_code,
"findings": result.findings,
"findings_count": len(result.findings),
"duration_seconds": result.duration_seconds,
"raw_output": result.stdout[:3000] if result.stdout else "",
"error": result.error,
}
except Exception as e:
return {"error": str(e)}
async def _execute_naabu(
target: str,
ports: Optional[str] = None,
top_ports: Optional[int] = None,
rate: int = 1000,
) -> Dict:
"""Run Naabu port scanner in the Docker sandbox."""
if not HAS_SANDBOX:
return {"error": "Sandbox module not available"}
try:
sandbox = await get_sandbox()
if not sandbox.is_available:
return {"error": "Sandbox container not running"}
result = await sandbox.run_naabu(
target=target,
ports=ports,
top_ports=top_ports,
rate=rate,
)
open_ports = [f["port"] for f in result.findings]
return {
"tool": "naabu",
"target": target,
"exit_code": result.exit_code,
"open_ports": sorted(open_ports),
"port_count": len(open_ports),
"findings": result.findings,
"duration_seconds": result.duration_seconds,
"error": result.error,
}
except Exception as e:
return {"error": str(e)}
async def _sandbox_health() -> Dict:
"""Check sandbox container health and available tools."""
if not HAS_SANDBOX:
return {"status": "unavailable", "reason": "Sandbox module not installed"}
try:
sandbox = await get_sandbox()
return await sandbox.health_check()
except Exception as e:
return {"status": "error", "reason": str(e)}
async def _sandbox_exec(tool: str, args: str, timeout: int = 300) -> Dict:
"""Execute any allowed tool in the Docker sandbox."""
if not HAS_SANDBOX:
return {"error": "Sandbox module not available"}
try:
sandbox = await get_sandbox()
if not sandbox.is_available:
return {"error": "Sandbox container not running"}
result = await sandbox.run_tool(tool=tool, args=args, timeout=timeout)
return {
"tool": tool,
"exit_code": result.exit_code,
"stdout": result.stdout[:5000] if result.stdout else "",
"stderr": result.stderr[:2000] if result.stderr else "",
"duration_seconds": result.duration_seconds,
"error": result.error,
}
except Exception as e:
return {"error": str(e)}
# ---------------------------------------------------------------------------
# MCP Server Definition
# ---------------------------------------------------------------------------
TOOLS = [
{
"name": "screenshot_capture",
"description": "Capture a browser screenshot of a URL using Playwright",
"inputSchema": {
"type": "object",
"properties": {
"url": {"type": "string", "description": "URL to screenshot"},
"selector": {"type": "string", "description": "Optional CSS selector to capture"},
},
"required": ["url"],
},
},
{
"name": "payload_delivery",
"description": "Send an HTTP request with a payload and capture the full response",
"inputSchema": {
"type": "object",
"properties": {
"endpoint": {"type": "string", "description": "Target URL"},
"method": {"type": "string", "description": "HTTP method", "default": "GET"},
"payload": {"type": "string", "description": "Payload value"},
"content_type": {"type": "string", "default": "application/x-www-form-urlencoded"},
"param": {"type": "string", "description": "Parameter name", "default": "q"},
},
"required": ["endpoint", "payload"],
},
},
{
"name": "dns_lookup",
"description": "Perform DNS lookups for a domain",
"inputSchema": {
"type": "object",
"properties": {
"domain": {"type": "string", "description": "Domain to look up"},
"record_type": {"type": "string", "default": "A", "description": "DNS record type"},
},
"required": ["domain"],
},
},
{
"name": "port_scan",
"description": "Scan TCP ports on a host",
"inputSchema": {
"type": "object",
"properties": {
"host": {"type": "string", "description": "Target host"},
"ports": {"type": "string", "default": "80,443,8080,8443,3000,5000", "description": "Comma-separated ports"},
},
"required": ["host"],
},
},
{
"name": "technology_detect",
"description": "Detect technologies from HTTP response headers and body",
"inputSchema": {
"type": "object",
"properties": {
"url": {"type": "string", "description": "URL to analyze"},
},
"required": ["url"],
},
},
{
"name": "subdomain_enumerate",
"description": "Enumerate subdomains via common prefix brute-force",
"inputSchema": {
"type": "object",
"properties": {
"domain": {"type": "string", "description": "Base domain to enumerate"},
},
"required": ["domain"],
},
},
{
"name": "save_finding",
"description": "Persist a vulnerability finding (JSON string)",
"inputSchema": {
"type": "object",
"properties": {
"finding_json": {"type": "string", "description": "Finding as JSON string"},
},
"required": ["finding_json"],
},
},
{
"name": "get_vuln_prompt",
"description": "Retrieve the AI decision prompt for a vulnerability type",
"inputSchema": {
"type": "object",
"properties": {
"vuln_type": {"type": "string", "description": "Vulnerability type key"},
"target": {"type": "string", "description": "Target URL"},
"endpoint": {"type": "string", "description": "Specific endpoint"},
"param": {"type": "string", "description": "Parameter name"},
"tech": {"type": "string", "description": "Detected technology"},
},
"required": ["vuln_type"],
},
},
# --- Sandbox tools (Docker-based real security tools) ---
{
"name": "execute_nuclei",
"description": "Run Nuclei vulnerability scanner (8000+ templates) in Docker sandbox. Returns structured findings with severity, CVE, CWE.",
"inputSchema": {
"type": "object",
"properties": {
"target": {"type": "string", "description": "Target URL to scan"},
"templates": {"type": "string", "description": "Specific template path (e.g. 'cves/2024/', 'vulnerabilities/xss/')"},
"severity": {"type": "string", "description": "Filter: critical,high,medium,low,info"},
"tags": {"type": "string", "description": "Filter by tags: xss,sqli,lfi,ssrf,rce"},
"rate_limit": {"type": "integer", "description": "Requests per second (default 150)", "default": 150},
},
"required": ["target"],
},
},
{
"name": "execute_naabu",
"description": "Run Naabu port scanner in Docker sandbox. Fast SYN-based scanning with configurable port ranges.",
"inputSchema": {
"type": "object",
"properties": {
"target": {"type": "string", "description": "IP address or hostname to scan"},
"ports": {"type": "string", "description": "Ports to scan (e.g. '80,443,8080' or '1-65535')"},
"top_ports": {"type": "integer", "description": "Scan top N ports (e.g. 100, 1000)"},
"rate": {"type": "integer", "description": "Packets per second (default 1000)", "default": 1000},
},
"required": ["target"],
},
},
{
"name": "sandbox_health",
"description": "Check Docker sandbox status and available security tools",
"inputSchema": {
"type": "object",
"properties": {},
},
},
{
"name": "sandbox_exec",
"description": "Execute any allowed security tool in the Docker sandbox (nuclei, naabu, nmap, httpx, subfinder, katana, ffuf, sqlmap, nikto, etc.)",
"inputSchema": {
"type": "object",
"properties": {
"tool": {"type": "string", "description": "Tool name (e.g. nuclei, naabu, nmap, httpx, subfinder, katana, ffuf, gobuster, dalfox, nikto, sqlmap, curl)"},
"args": {"type": "string", "description": "Command-line arguments for the tool"},
"timeout": {"type": "integer", "description": "Max execution time in seconds (default 300)", "default": 300},
},
"required": ["tool", "args"],
},
},
]
# Tool dispatcher
TOOL_HANDLERS = {
"screenshot_capture": lambda args: _screenshot_capture(args["url"], args.get("selector")),
"payload_delivery": lambda args: _payload_delivery(
args["endpoint"], args.get("method", "GET"), args.get("payload", ""),
args.get("content_type", "application/x-www-form-urlencoded"),
args.get("headers"), args.get("param", "q")
),
"dns_lookup": lambda args: _dns_lookup(args["domain"], args.get("record_type", "A")),
"port_scan": lambda args: _port_scan(args["host"], args.get("ports", "80,443,8080,8443,3000,5000")),
"technology_detect": lambda args: _technology_detect(args["url"]),
"subdomain_enumerate": lambda args: _subdomain_enumerate(args["domain"]),
"save_finding": lambda args: _save_finding(args["finding_json"]),
"get_vuln_prompt": lambda args: _get_vuln_prompt(
args["vuln_type"], args.get("target", ""), args.get("endpoint", ""),
args.get("param", ""), args.get("tech", "")
),
# Sandbox tools
"execute_nuclei": lambda args: _execute_nuclei(
args["target"], args.get("templates"), args.get("severity"),
args.get("tags"), args.get("rate_limit", 150)
),
"execute_naabu": lambda args: _execute_naabu(
args["target"], args.get("ports"), args.get("top_ports"),
args.get("rate", 1000)
),
"sandbox_health": lambda args: _sandbox_health(),
"sandbox_exec": lambda args: _sandbox_exec(
args["tool"], args["args"], args.get("timeout", 300)
),
}
def create_mcp_server() -> "Server":
"""Create and configure the MCP server with all pentest tools."""
if not HAS_MCP:
raise RuntimeError("MCP package not installed. Install with: pip install 'mcp>=1.0.0'")
server = Server("neurosploit-tools")
@server.list_tools()
async def list_tools() -> list:
return [Tool(**t) for t in TOOLS]
@server.call_tool()
async def call_tool(name: str, arguments: dict) -> list:
handler = TOOL_HANDLERS.get(name)
if not handler:
return [TextContent(type="text", text=json.dumps({"error": f"Unknown tool: {name}"}))]
try:
result = await handler(arguments)
return [TextContent(type="text", text=json.dumps(result, default=str))]
except Exception as e:
return [TextContent(type="text", text=json.dumps({"error": str(e)}))]
return server
async def main():
"""Run the MCP server via stdio transport."""
server = create_mcp_server()
transport = os.getenv("MCP_TRANSPORT", "stdio")
if transport == "stdio":
async with stdio_server() as (read_stream, write_stream):
await server.run(read_stream, write_stream, server.create_initialization_options())
else:
logger.error(f"Unsupported transport: {transport}. Use 'stdio'.")
if __name__ == "__main__":
asyncio.run(main())
+81
View File
@@ -0,0 +1,81 @@
#!/usr/bin/env python3
"""
Model Router - Task-type-based LLM routing.
Routes requests to different LLM profiles based on task type:
- reasoning: Complex logic and decision-making
- analysis: Data analysis and pattern recognition
- generation: Content and payload generation
- validation: Result verification and confirmation
Enabled/disabled via config. When disabled, callers fall back to their default provider.
"""
import os
import logging
from typing import Dict, Optional, Callable
logger = logging.getLogger(__name__)
class ModelRouter:
"""Routes LLM requests to different profiles based on task type."""
def __init__(self, config: Dict, llm_manager_factory: Callable):
"""
Args:
config: Full application config dict (must contain 'model_routing' and 'llm' keys)
llm_manager_factory: Callable that takes a profile name and returns an LLMManager instance
"""
routing_config = config.get('model_routing', {})
self.enabled = routing_config.get('enabled', False)
# Allow env var override
env_override = os.getenv('ENABLE_MODEL_ROUTING', '').strip().lower()
if env_override == 'true':
self.enabled = True
elif env_override == 'false':
self.enabled = False
self.routes = routing_config.get('routes', {})
self.llm_manager_factory = llm_manager_factory
self._managers = {} # Cache LLMManager instances per profile
if self.enabled:
logger.info(f"Model routing enabled with routes: {list(self.routes.keys())}")
else:
logger.debug("Model routing disabled")
def generate(self, prompt: str, system_prompt: Optional[str] = None,
task_type: str = "default") -> Optional[str]:
"""Route a generation request to the appropriate LLM profile.
Returns None if routing is disabled or no route matches,
allowing callers to fall back to their default provider.
"""
if not self.enabled:
return None
profile = self.routes.get(task_type, self.routes.get('default'))
if not profile:
logger.debug(f"No route for task_type '{task_type}', falling back to default")
return None
try:
if profile not in self._managers:
self._managers[profile] = self.llm_manager_factory(profile)
manager = self._managers[profile]
logger.debug(f"Routing task_type '{task_type}' to profile '{profile}' "
f"(provider: {manager.provider}, model: {manager.model})")
return manager.generate(prompt, system_prompt)
except Exception as e:
logger.error(f"Model routing error for profile '{profile}': {e}")
return None
def get_profile_for_task(self, task_type: str) -> Optional[str]:
"""Get the profile name that would handle a given task type."""
if not self.enabled:
return None
return self.routes.get(task_type, self.routes.get('default'))
+626
View File
@@ -0,0 +1,626 @@
#!/usr/bin/env python3
"""
Pentest Executor - Executes real pentest tools and captures outputs for PoC generation
"""
import subprocess
import shutil
import json
import re
import os
import logging
import socket
import urllib.parse
from typing import Dict, List, Optional, Any
from datetime import datetime
from dataclasses import dataclass, field, asdict
logger = logging.getLogger(__name__)
@dataclass
class Vulnerability:
"""Represents a discovered vulnerability with PoC"""
title: str
severity: str # Critical, High, Medium, Low, Info
cvss_score: float
cvss_vector: str
description: str
affected_endpoint: str
impact: str
poc_request: str
poc_response: str
poc_payload: str
remediation: str
references: List[str] = field(default_factory=list)
cwe_id: str = ""
tool_output: str = ""
timestamp: str = field(default_factory=lambda: datetime.now().isoformat())
@dataclass
class ScanResult:
"""Contains all scan results and findings"""
target: str
scan_started: str
scan_completed: str = ""
tools_executed: List[Dict] = field(default_factory=list)
vulnerabilities: List[Vulnerability] = field(default_factory=list)
open_ports: List[Dict] = field(default_factory=list)
technologies: List[str] = field(default_factory=list)
raw_outputs: Dict[str, str] = field(default_factory=dict)
class PentestExecutor:
"""Executes real pentest tools and captures outputs"""
def __init__(self, target: str, config: Dict = None, recon_context: Dict = None):
self.target = self._normalize_target(target)
self.config = config or {}
self.recon_context = recon_context # Contexto consolidado do recon
self.scan_result = ScanResult(
target=self.target,
scan_started=datetime.now().isoformat()
)
self.timeout = 300 # 5 minutes default timeout
# Se tiver contexto de recon, pre-popula dados
if self.recon_context:
self._load_from_recon_context()
def _load_from_recon_context(self):
"""Carrega dados do contexto de recon consolidado."""
if not self.recon_context:
return
data = self.recon_context.get('data', {})
# Carrega tecnologias detectadas
techs = data.get('technologies', [])
self.scan_result.technologies.extend(techs)
# Carrega portas abertas
ports = data.get('open_ports', [])
for port in ports:
if port not in self.scan_result.open_ports:
self.scan_result.open_ports.append(port)
# Carrega vulnerabilidades ja encontradas
vulns = self.recon_context.get('vulnerabilities', {}).get('all', [])
for v in vulns:
vuln = Vulnerability(
title=v.get('title', v.get('name', 'Unknown')),
severity=v.get('severity', 'Info').capitalize(),
cvss_score=self._severity_to_cvss(v.get('severity', 'info')),
cvss_vector="",
description=v.get('description', ''),
affected_endpoint=v.get('affected_endpoint', v.get('url', self.target)),
impact=f"{v.get('severity', 'info')} severity vulnerability",
poc_request=v.get('curl_command', ''),
poc_response="",
poc_payload="",
remediation="Apply vendor patches and security best practices"
)
self.scan_result.vulnerabilities.append(vuln)
logger.info(f"Carregados do recon: {len(techs)} techs, {len(ports)} portas, {len(vulns)} vulns")
@classmethod
def load_context_from_file(cls, context_file: str) -> Optional[Dict]:
"""Carrega contexto de recon de um arquivo JSON."""
try:
with open(context_file, 'r') as f:
return json.load(f)
except Exception as e:
logger.error(f"Erro ao carregar contexto: {e}")
return None
def get_urls_with_params(self) -> List[str]:
"""Retorna URLs com parametros do contexto para testes de injecao."""
if not self.recon_context:
return []
data = self.recon_context.get('data', {})
urls = data.get('urls', {})
if isinstance(urls, dict):
return urls.get('with_params', [])
return []
def get_api_endpoints(self) -> List[str]:
"""Retorna endpoints de API do contexto."""
if not self.recon_context:
return []
data = self.recon_context.get('data', {})
return data.get('api_endpoints', [])
def get_interesting_paths(self) -> List[str]:
"""Retorna caminhos interessantes do contexto."""
if not self.recon_context:
return []
data = self.recon_context.get('data', {})
return data.get('interesting_paths', [])
def get_live_hosts(self) -> List[str]:
"""Retorna hosts ativos do contexto."""
if not self.recon_context:
return []
data = self.recon_context.get('data', {})
return data.get('live_hosts', [])
def get_context_for_llm(self) -> str:
"""Retorna o contexto formatado para incluir no prompt do LLM."""
if not self.recon_context:
return ""
lines = [
"=== CONTEXTO DE RECON CONSOLIDADO ===",
f"Alvo: {self.recon_context.get('target', {}).get('primary_target', 'N/A')}",
"",
"SUPERFICIE DE ATAQUE:",
]
attack_surface = self.recon_context.get('attack_surface', {})
for key, value in attack_surface.items():
lines.append(f" - {key}: {value}")
lines.append("\nTECNOLOGIAS DETECTADAS:")
for tech in self.scan_result.technologies[:10]:
lines.append(f" - {tech}")
lines.append("\nURLs COM PARAMETROS (para testes de injecao):")
for url in self.get_urls_with_params()[:20]:
lines.append(f" - {url}")
lines.append("\nENDPOINTS DE API:")
for ep in self.get_api_endpoints()[:10]:
lines.append(f" - {ep}")
lines.append("\nVULNERABILIDADES JA ENCONTRADAS:")
for vuln in self.scan_result.vulnerabilities[:10]:
lines.append(f" - [{vuln.severity}] {vuln.title}")
return "\n".join(lines)
def _normalize_target(self, target: str) -> str:
"""Normalize target URL/IP"""
target = target.strip()
if not target.startswith(('http://', 'https://')):
# Check if it's an IP
try:
socket.inet_aton(target.split('/')[0].split(':')[0])
return target # It's an IP
except socket.error:
# Assume it's a domain
return f"https://{target}"
return target
def _get_domain(self) -> str:
"""Extract domain from target"""
parsed = urllib.parse.urlparse(self.target)
return parsed.netloc or parsed.path.split('/')[0]
def _get_ip(self) -> Optional[str]:
"""Resolve target to IP"""
try:
domain = self._get_domain()
return socket.gethostbyname(domain.split(':')[0])
except socket.error:
return None
def _run_command(self, cmd: List[str], timeout: int = None) -> Dict:
"""Run a command and capture output"""
timeout = timeout or self.timeout
tool_name = cmd[0] if cmd else "unknown"
result = {
"tool": tool_name,
"command": " ".join(cmd),
"success": False,
"stdout": "",
"stderr": "",
"exit_code": -1,
"timestamp": datetime.now().isoformat()
}
# Check if tool exists
if not shutil.which(cmd[0]):
result["stderr"] = f"Tool '{cmd[0]}' not found. Please install it using 'install_tools' command."
logger.warning(f"Tool not found: {cmd[0]}")
return result
try:
print(f"[*] Executing: {' '.join(cmd)}")
logger.info(f"Executing: {' '.join(cmd)}")
proc = subprocess.run(
cmd,
capture_output=True,
text=True,
timeout=timeout
)
result["stdout"] = proc.stdout
result["stderr"] = proc.stderr
result["exit_code"] = proc.returncode
result["success"] = proc.returncode == 0
except subprocess.TimeoutExpired:
result["stderr"] = f"Command timed out after {timeout} seconds"
logger.warning(f"Timeout: {' '.join(cmd)}")
except Exception as e:
result["stderr"] = str(e)
logger.error(f"Error executing {cmd[0]}: {e}")
self.scan_result.tools_executed.append(result)
self.scan_result.raw_outputs[tool_name] = result["stdout"]
return result
def run_nmap_scan(self, ports: str = "1-1000", extra_args: List[str] = None) -> Dict:
"""Run nmap port scan"""
domain = self._get_domain()
cmd = ["nmap", "-sV", "-sC", "-p", ports, "--open", domain]
if extra_args:
cmd.extend(extra_args)
result = self._run_command(cmd)
if result["success"]:
self._parse_nmap_output(result["stdout"])
return result
def _parse_nmap_output(self, output: str):
"""Parse nmap output for open ports"""
port_pattern = r"(\d+)/(\w+)\s+open\s+(\S+)\s*(.*)"
for match in re.finditer(port_pattern, output):
port_info = {
"port": int(match.group(1)),
"protocol": match.group(2),
"service": match.group(3),
"version": match.group(4).strip()
}
self.scan_result.open_ports.append(port_info)
print(f" [+] Found: {port_info['port']}/{port_info['protocol']} - {port_info['service']} {port_info['version']}")
def run_nikto_scan(self) -> Dict:
"""Run nikto web vulnerability scan"""
cmd = ["nikto", "-h", self.target, "-Format", "txt", "-nointeractive"]
result = self._run_command(cmd, timeout=600)
if result["success"] or result["stdout"]:
self._parse_nikto_output(result["stdout"])
return result
def _parse_nikto_output(self, output: str):
"""Parse nikto output for vulnerabilities"""
vuln_patterns = [
(r"OSVDB-\d+:.*", "Medium"),
(r"\+ (/[^\s]+).*SQL injection", "High"),
(r"\+ (/[^\s]+).*XSS", "High"),
(r"\+ The X-XSS-Protection header", "Low"),
(r"\+ The X-Content-Type-Options header", "Low"),
(r"\+ Server leaks", "Medium"),
(r"\+ Retrieved x-powered-by header", "Info"),
]
for line in output.split('\n'):
for pattern, severity in vuln_patterns:
if re.search(pattern, line, re.IGNORECASE):
vuln = Vulnerability(
title=line.strip()[:100],
severity=severity,
cvss_score=self._severity_to_cvss(severity),
cvss_vector="",
description=line.strip(),
affected_endpoint=self.target,
impact=f"{severity} severity finding detected by Nikto",
poc_request=f"GET {self.target} HTTP/1.1",
poc_response="See tool output",
poc_payload="N/A - Passive scan",
remediation="Review and fix the identified issue",
tool_output=line
)
self.scan_result.vulnerabilities.append(vuln)
def run_nuclei_scan(self, templates: str = None) -> Dict:
"""Run nuclei vulnerability scan"""
cmd = ["nuclei", "-u", self.target, "-silent", "-nc", "-j"]
if templates:
cmd.extend(["-t", templates])
result = self._run_command(cmd, timeout=600)
if result["stdout"]:
self._parse_nuclei_output(result["stdout"])
return result
def _parse_nuclei_output(self, output: str):
"""Parse nuclei JSON output for vulnerabilities"""
for line in output.strip().split('\n'):
if not line.strip():
continue
try:
finding = json.loads(line)
severity = finding.get("info", {}).get("severity", "unknown").capitalize()
vuln = Vulnerability(
title=finding.get("info", {}).get("name", "Unknown"),
severity=severity,
cvss_score=self._severity_to_cvss(severity),
cvss_vector=finding.get("info", {}).get("classification", {}).get("cvss-metrics", ""),
description=finding.get("info", {}).get("description", ""),
affected_endpoint=finding.get("matched-at", self.target),
impact=finding.get("info", {}).get("impact", f"{severity} severity vulnerability"),
poc_request=finding.get("curl-command", f"curl -X GET '{finding.get('matched-at', self.target)}'"),
poc_response=finding.get("response", "")[:500] if finding.get("response") else "See tool output",
poc_payload=finding.get("matcher-name", "Template-based detection"),
remediation=finding.get("info", {}).get("remediation", "Apply vendor patches"),
references=finding.get("info", {}).get("reference", []),
cwe_id=str(finding.get("info", {}).get("classification", {}).get("cwe-id", "")),
tool_output=json.dumps(finding, indent=2)
)
self.scan_result.vulnerabilities.append(vuln)
print(f" [!] {severity}: {vuln.title} at {vuln.affected_endpoint}")
except json.JSONDecodeError:
continue
def run_sqlmap_scan(self, param: str = None) -> Dict:
"""Run sqlmap SQL injection scan"""
cmd = ["sqlmap", "-u", self.target, "--batch", "--level=2", "--risk=2",
"--random-agent", "--threads=5", "--output-dir=/tmp/sqlmap_output"]
if param:
cmd.extend(["--param", param])
result = self._run_command(cmd, timeout=600)
if result["stdout"]:
self._parse_sqlmap_output(result["stdout"])
return result
def _parse_sqlmap_output(self, output: str):
"""Parse sqlmap output for SQL injection vulnerabilities"""
if "is vulnerable" in output.lower() or "injection" in output.lower():
# Extract injection details
vuln_type = "Blind" if "blind" in output.lower() else "Error-based"
if "union" in output.lower():
vuln_type = "UNION-based"
elif "time-based" in output.lower():
vuln_type = "Time-based blind"
# Extract payload
payload_match = re.search(r"Payload: (.+)", output)
payload = payload_match.group(1) if payload_match else "See tool output"
vuln = Vulnerability(
title=f"SQL Injection ({vuln_type})",
severity="Critical",
cvss_score=9.8,
cvss_vector="CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:H/I:H/A:H",
description=f"SQL Injection vulnerability detected. Type: {vuln_type}. This allows an attacker to manipulate database queries.",
affected_endpoint=self.target,
impact="Complete database compromise. Attacker can read, modify, or delete data. Potential for remote code execution.",
poc_request=f"GET {self.target}?param={payload} HTTP/1.1\nHost: {self._get_domain()}\nUser-Agent: Mozilla/5.0",
poc_response="Database error or data disclosure in response",
poc_payload=payload,
remediation="Use parameterized queries/prepared statements. Implement input validation. Apply least privilege to database accounts.",
cwe_id="CWE-89",
references=["https://owasp.org/www-community/attacks/SQL_Injection"],
tool_output=output[:2000]
)
self.scan_result.vulnerabilities.append(vuln)
print(f" [!!!] CRITICAL: SQL Injection found!")
def run_ffuf_scan(self, wordlist: str = "/usr/share/wordlists/dirb/common.txt") -> Dict:
"""Run ffuf directory/file bruteforce"""
target_url = self.target.rstrip('/') + "/FUZZ"
cmd = ["ffuf", "-u", target_url, "-w", wordlist, "-mc", "200,301,302,403",
"-o", "/tmp/ffuf_output.json", "-of", "json", "-t", "50"]
result = self._run_command(cmd, timeout=300)
# Parse output file if exists
if os.path.exists("/tmp/ffuf_output.json"):
try:
with open("/tmp/ffuf_output.json", "r") as f:
ffuf_data = json.load(f)
for res in ffuf_data.get("results", []):
print(f" [+] Found: {res.get('url')} (Status: {res.get('status')})")
except:
pass
return result
def run_curl_test(self, method: str = "GET", path: str = "/", headers: Dict = None, data: str = None) -> Dict:
"""Run curl request and capture full request/response"""
url = self.target.rstrip('/') + path
cmd = ["curl", "-v", "-s", "-k", "-X", method, url]
if headers:
for k, v in headers.items():
cmd.extend(["-H", f"{k}: {v}"])
if data:
cmd.extend(["-d", data])
result = self._run_command(cmd)
return result
def run_http_security_check(self) -> Dict:
"""Check HTTP security headers"""
cmd = ["curl", "-s", "-I", "-k", self.target]
result = self._run_command(cmd)
if result["success"]:
self._parse_security_headers(result["stdout"])
return result
def _parse_security_headers(self, headers: str):
"""Parse response headers for security issues"""
required_headers = {
"X-Frame-Options": ("Missing X-Frame-Options", "Medium", "Clickjacking protection"),
"X-Content-Type-Options": ("Missing X-Content-Type-Options", "Low", "MIME type sniffing protection"),
"X-XSS-Protection": ("Missing X-XSS-Protection", "Low", "XSS filter"),
"Strict-Transport-Security": ("Missing HSTS Header", "Medium", "HTTPS enforcement"),
"Content-Security-Policy": ("Missing Content-Security-Policy", "Medium", "XSS/injection protection"),
}
headers_lower = headers.lower()
for header, (title, severity, desc) in required_headers.items():
if header.lower() not in headers_lower:
vuln = Vulnerability(
title=title,
severity=severity,
cvss_score=self._severity_to_cvss(severity),
cvss_vector="",
description=f"The {header} header is not set. This header provides {desc}.",
affected_endpoint=self.target,
impact=f"Missing {desc} could lead to attacks",
poc_request=f"curl -I {self.target}",
poc_response=headers[:500],
poc_payload="N/A - Header check",
remediation=f"Add the {header} header to all HTTP responses",
cwe_id="CWE-693"
)
self.scan_result.vulnerabilities.append(vuln)
def run_whatweb_scan(self) -> Dict:
"""Run whatweb technology detection"""
cmd = ["whatweb", "-a", "3", "--color=never", self.target]
result = self._run_command(cmd)
if result["stdout"]:
# Extract technologies
techs = re.findall(r'\[([^\]]+)\]', result["stdout"])
self.scan_result.technologies.extend(techs[:20])
print(f" [+] Technologies: {', '.join(techs[:10])}")
return result
def _severity_to_cvss(self, severity: str) -> float:
"""Convert severity to CVSS score"""
mapping = {
"critical": 9.5,
"high": 7.5,
"medium": 5.5,
"low": 3.0,
"info": 0.0,
"unknown": 0.0
}
return mapping.get(severity.lower(), 0.0)
def run_full_scan(self) -> ScanResult:
"""Run a complete pentest scan"""
print(f"\n{'='*60}")
print(f"[*] Starting Full Pentest Scan on: {self.target}")
print(f"{'='*60}\n")
# Phase 1: Reconnaissance
print("[Phase 1] Reconnaissance")
print("-" * 40)
print("[*] Running port scan...")
self.run_nmap_scan()
print("\n[*] Running technology detection...")
self.run_whatweb_scan()
print("\n[*] Checking security headers...")
self.run_http_security_check()
# Phase 2: Vulnerability Scanning
print(f"\n[Phase 2] Vulnerability Scanning")
print("-" * 40)
print("[*] Running Nuclei scan...")
self.run_nuclei_scan()
print("\n[*] Running Nikto scan...")
self.run_nikto_scan()
# Phase 3: Specific Tests
print(f"\n[Phase 3] Specific Vulnerability Tests")
print("-" * 40)
print("[*] Testing for SQL Injection...")
self.run_sqlmap_scan()
print("\n[*] Running directory enumeration...")
self.run_ffuf_scan()
# Complete scan
self.scan_result.scan_completed = datetime.now().isoformat()
print(f"\n{'='*60}")
print(f"[*] Scan Complete!")
print(f" - Tools Executed: {len(self.scan_result.tools_executed)}")
print(f" - Vulnerabilities Found: {len(self.scan_result.vulnerabilities)}")
print(f" - Open Ports: {len(self.scan_result.open_ports)}")
print(f"{'='*60}\n")
return self.scan_result
def run_quick_scan(self) -> ScanResult:
"""Run a quick scan with essential tools only"""
print(f"\n{'='*60}")
print(f"[*] Starting Quick Scan on: {self.target}")
print(f"{'='*60}\n")
print("[*] Running port scan (top 100 ports)...")
self.run_nmap_scan(ports="1-100")
print("\n[*] Checking security headers...")
self.run_http_security_check()
print("\n[*] Running Nuclei scan...")
self.run_nuclei_scan()
self.scan_result.scan_completed = datetime.now().isoformat()
print(f"\n{'='*60}")
print(f"[*] Quick Scan Complete!")
print(f" - Vulnerabilities Found: {len(self.scan_result.vulnerabilities)}")
print(f"{'='*60}\n")
return self.scan_result
def get_findings_summary(self) -> Dict:
"""Get summary of findings"""
severity_count = {"Critical": 0, "High": 0, "Medium": 0, "Low": 0, "Info": 0}
for vuln in self.scan_result.vulnerabilities:
sev = vuln.severity.capitalize()
if sev in severity_count:
severity_count[sev] += 1
return {
"target": self.target,
"total_vulnerabilities": len(self.scan_result.vulnerabilities),
"severity_breakdown": severity_count,
"open_ports": len(self.scan_result.open_ports),
"technologies": self.scan_result.technologies,
"tools_executed": len(self.scan_result.tools_executed)
}
def to_dict(self) -> Dict:
"""Convert scan results to dictionary"""
return {
"target": self.scan_result.target,
"scan_started": self.scan_result.scan_started,
"scan_completed": self.scan_result.scan_completed,
"tools_executed": self.scan_result.tools_executed,
"vulnerabilities": [asdict(v) for v in self.scan_result.vulnerabilities],
"open_ports": self.scan_result.open_ports,
"technologies": self.scan_result.technologies,
"summary": self.get_findings_summary()
}
+698
View File
@@ -0,0 +1,698 @@
#!/usr/bin/env python3
"""
Professional Pentest Report Generator
Generates detailed reports with PoCs, CVSS scores, requests/responses
"""
import base64
import json
import os
from datetime import datetime
from pathlib import Path
from typing import Dict, List, Any
import html
import logging
logger = logging.getLogger(__name__)
class ReportGenerator:
"""Generates professional penetration testing reports"""
def __init__(self, scan_results: Dict, llm_analysis: str = ""):
self.scan_results = scan_results
self.llm_analysis = llm_analysis
self.timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
def _get_severity_color(self, severity: str) -> str:
"""Get color for severity level"""
colors = {
"critical": "#dc3545",
"high": "#fd7e14",
"medium": "#ffc107",
"low": "#17a2b8",
"info": "#6c757d"
}
return colors.get(severity.lower(), "#6c757d")
def _get_severity_badge(self, severity: str) -> str:
"""Get HTML badge for severity"""
color = self._get_severity_color(severity)
return f'<span class="badge" style="background-color: {color}; color: white; padding: 5px 10px; border-radius: 4px;">{severity.upper()}</span>'
def _escape_html(self, text: str) -> str:
"""Escape HTML characters"""
if not text:
return ""
return html.escape(str(text))
def _format_code_block(self, code: str, language: str = "") -> str:
"""Format code block with syntax highlighting"""
escaped = self._escape_html(code)
return f'<pre><code class="language-{language}">{escaped}</code></pre>'
def embed_screenshot(self, filepath: str) -> str:
"""Convert a screenshot file to a base64 data URI for HTML embedding."""
path = Path(filepath)
if not path.exists():
return ""
try:
with open(path, 'rb') as f:
data = base64.b64encode(f.read()).decode('ascii')
return f"data:image/png;base64,{data}"
except Exception:
return ""
def build_screenshots_html(self, finding_id: str, screenshots_dir: str = "reports/screenshots") -> str:
"""Build screenshot grid HTML for a finding, embedding images as base64."""
finding_dir = Path(screenshots_dir) / finding_id
if not finding_dir.exists():
return ""
screenshots = sorted(finding_dir.glob("*.png"))[:3]
if not screenshots:
return ""
cards = ""
for ss in screenshots:
data_uri = self.embed_screenshot(str(ss))
if data_uri:
caption = ss.stem.replace('_', ' ').title()
cards += f"""
<div class="screenshot-card">
<img src="{data_uri}" alt="{caption}" />
<div class="screenshot-caption">{caption}</div>
</div>"""
return f'<div class="screenshot-grid">{cards}</div>' if cards else ""
def generate_executive_summary(self) -> str:
"""Generate executive summary section"""
summary = self.scan_results.get("summary", {})
severity = summary.get("severity_breakdown", {})
total = summary.get("total_vulnerabilities", 0)
critical = severity.get("Critical", 0)
high = severity.get("High", 0)
medium = severity.get("Medium", 0)
low = severity.get("Low", 0)
risk_level = "Critical" if critical > 0 else "High" if high > 0 else "Medium" if medium > 0 else "Low"
return f"""
<div class="card executive-summary">
<div class="card-header">
<h2>Executive Summary</h2>
</div>
<div class="card-body">
<div class="row">
<div class="col-md-6">
<h4>Assessment Overview</h4>
<table class="table">
<tr><td><strong>Target:</strong></td><td>{self._escape_html(self.scan_results.get('target', 'N/A'))}</td></tr>
<tr><td><strong>Scan Started:</strong></td><td>{self.scan_results.get('scan_started', 'N/A')}</td></tr>
<tr><td><strong>Scan Completed:</strong></td><td>{self.scan_results.get('scan_completed', 'N/A')}</td></tr>
<tr><td><strong>Overall Risk Level:</strong></td><td>{self._get_severity_badge(risk_level)}</td></tr>
</table>
</div>
<div class="col-md-6">
<h4>Findings Summary</h4>
<div class="severity-chart">
<div class="severity-bar critical" style="width: {critical * 20}%">{critical} Critical</div>
<div class="severity-bar high" style="width: {high * 20}%">{high} High</div>
<div class="severity-bar medium" style="width: {medium * 20}%">{medium} Medium</div>
<div class="severity-bar low" style="width: {low * 20}%">{low} Low</div>
</div>
<p class="mt-3"><strong>Total Vulnerabilities:</strong> {total}</p>
<p><strong>Open Ports Found:</strong> {summary.get('open_ports', 0)}</p>
<p><strong>Tools Executed:</strong> {summary.get('tools_executed', 0)}</p>
</div>
</div>
</div>
</div>
"""
def generate_vulnerability_card(self, vuln: Dict, index: int) -> str:
"""Generate HTML card for a single vulnerability"""
severity = vuln.get("severity", "Unknown")
color = self._get_severity_color(severity)
# Build references list
refs_html = ""
if vuln.get("references"):
refs_html = "<ul>"
for ref in vuln.get("references", [])[:5]:
refs_html += f'<li><a href="{self._escape_html(ref)}" target="_blank">{self._escape_html(ref)}</a></li>'
refs_html += "</ul>"
return f"""
<div class="vulnerability-card" id="vuln-{index}">
<div class="vuln-header" style="border-left: 5px solid {color};">
<div class="vuln-title">
<h3>{self._escape_html(vuln.get('title', 'Unknown Vulnerability'))}</h3>
<div class="vuln-meta">
{self._get_severity_badge(severity)}
<span class="cvss-score">CVSS: {vuln.get('cvss_score', 'N/A')}</span>
{f'<span class="cwe-id">CWE: {vuln.get("cwe_id")}</span>' if vuln.get('cwe_id') else ''}
</div>
</div>
</div>
<div class="vuln-body">
<div class="vuln-section">
<h4>Description</h4>
<p>{self._escape_html(vuln.get('description', 'No description available'))}</p>
</div>
<div class="vuln-section">
<h4>Affected Endpoint</h4>
<code class="endpoint">{self._escape_html(vuln.get('affected_endpoint', 'N/A'))}</code>
</div>
<div class="vuln-section">
<h4>Impact</h4>
<p>{self._escape_html(vuln.get('impact', 'Impact not assessed'))}</p>
</div>
<div class="vuln-section poc-section">
<h4>Proof of Concept (PoC)</h4>
<div class="poc-item">
<h5>Request</h5>
{self._format_code_block(vuln.get('poc_request', 'N/A'), 'http')}
</div>
<div class="poc-item">
<h5>Payload</h5>
{self._format_code_block(vuln.get('poc_payload', 'N/A'), 'text')}
</div>
<div class="poc-item">
<h5>Response</h5>
{self._format_code_block(vuln.get('poc_response', 'N/A')[:1000], 'http')}
</div>
</div>
{f'''<div class="vuln-section">
<h4>CVSS Vector</h4>
<code>{self._escape_html(vuln.get('cvss_vector', 'N/A'))}</code>
</div>''' if vuln.get('cvss_vector') else ''}
<div class="vuln-section remediation">
<h4>Remediation</h4>
<p>{self._escape_html(vuln.get('remediation', 'Consult vendor documentation for patches'))}</p>
</div>
{f'''<div class="vuln-section">
<h4>References</h4>
{refs_html}
</div>''' if refs_html else ''}
{f'''<div class="vuln-section tool-output">
<h4>Raw Tool Output</h4>
{self._format_code_block(vuln.get('tool_output', '')[:2000], 'text')}
</div>''' if vuln.get('tool_output') else ''}
</div>
</div>
"""
def generate_open_ports_section(self) -> str:
"""Generate open ports section"""
ports = self.scan_results.get("open_ports", [])
if not ports:
return ""
rows = ""
for port in ports:
rows += f"""
<tr>
<td>{port.get('port', 'N/A')}</td>
<td>{port.get('protocol', 'N/A')}</td>
<td>{self._escape_html(port.get('service', 'N/A'))}</td>
<td>{self._escape_html(port.get('version', 'N/A'))}</td>
</tr>
"""
return f"""
<div class="card">
<div class="card-header">
<h2>Open Ports & Services</h2>
</div>
<div class="card-body">
<table class="table table-striped">
<thead>
<tr>
<th>Port</th>
<th>Protocol</th>
<th>Service</th>
<th>Version</th>
</tr>
</thead>
<tbody>
{rows}
</tbody>
</table>
</div>
</div>
"""
def generate_tools_executed_section(self) -> str:
"""Generate tools executed section"""
tools = self.scan_results.get("tools_executed", [])
if not tools:
return ""
rows = ""
for tool in tools:
status = "Success" if tool.get("success") else "Failed"
status_class = "text-success" if tool.get("success") else "text-danger"
rows += f"""
<tr>
<td>{self._escape_html(tool.get('tool', 'N/A'))}</td>
<td><code>{self._escape_html(tool.get('command', 'N/A')[:100])}</code></td>
<td class="{status_class}">{status}</td>
<td>{tool.get('timestamp', 'N/A')}</td>
</tr>
"""
return f"""
<div class="card">
<div class="card-header">
<h2>Tools Executed</h2>
</div>
<div class="card-body">
<table class="table table-striped">
<thead>
<tr>
<th>Tool</th>
<th>Command</th>
<th>Status</th>
<th>Timestamp</th>
</tr>
</thead>
<tbody>
{rows}
</tbody>
</table>
</div>
</div>
"""
def generate_llm_analysis_section(self) -> str:
"""Generate AI analysis section"""
if not self.llm_analysis:
return ""
import mistune
analysis_html = mistune.html(self.llm_analysis)
return f"""
<div class="card">
<div class="card-header">
<h2>AI Security Analysis</h2>
</div>
<div class="card-body llm-analysis">
{analysis_html}
</div>
</div>
"""
def generate_html_report(self) -> str:
"""Generate complete HTML report"""
vulnerabilities = self.scan_results.get("vulnerabilities", [])
# Sort vulnerabilities by severity
severity_order = {"Critical": 0, "High": 1, "Medium": 2, "Low": 3, "Info": 4}
vulnerabilities.sort(key=lambda x: severity_order.get(x.get("severity", "Info").capitalize(), 5))
vuln_cards = ""
for i, vuln in enumerate(vulnerabilities, 1):
vuln_cards += self.generate_vulnerability_card(vuln, i)
# Table of contents
toc_items = ""
for i, vuln in enumerate(vulnerabilities, 1):
severity = vuln.get("severity", "Unknown")
color = self._get_severity_color(severity)
toc_items += f'<li><a href="#vuln-{i}" style="color: {color};">[{severity.upper()}] {self._escape_html(vuln.get("title", "Unknown")[:50])}</a></li>'
html = f"""
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>NeuroSploitv2 - Penetration Test Report</title>
<link href="https://cdn.jsdelivr.net/npm/bootstrap@5.3.3/dist/css/bootstrap.min.css" rel="stylesheet">
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/styles/atom-one-dark.min.css">
<style>
:root {{
--bg-dark: #0d1117;
--bg-card: #161b22;
--border-color: #30363d;
--text-primary: #c9d1d9;
--text-secondary: #8b949e;
--accent-green: #00ff00;
--critical-color: #dc3545;
--high-color: #fd7e14;
--medium-color: #ffc107;
--low-color: #17a2b8;
}}
body {{
background-color: var(--bg-dark);
color: var(--text-primary);
font-family: 'Segoe UI', system-ui, sans-serif;
}}
.container {{
max-width: 1200px;
padding: 20px;
}}
.report-header {{
text-align: center;
padding: 40px 0;
border-bottom: 2px solid var(--accent-green);
margin-bottom: 30px;
}}
.report-header h1 {{
font-size: 2.5rem;
color: var(--accent-green);
text-shadow: 0 0 10px var(--accent-green);
margin-bottom: 10px;
}}
.card {{
background-color: var(--bg-card);
border: 1px solid var(--border-color);
border-radius: 8px;
margin-bottom: 20px;
}}
.card-header {{
background-color: rgba(0, 255, 0, 0.1);
border-bottom: 1px solid var(--border-color);
padding: 15px 20px;
}}
.card-header h2 {{
margin: 0;
color: var(--accent-green);
font-size: 1.3rem;
}}
.card-body {{
padding: 20px;
}}
.table {{
color: var(--text-primary);
}}
.table th {{
border-color: var(--border-color);
color: var(--accent-green);
}}
.table td {{
border-color: var(--border-color);
}}
.vulnerability-card {{
background-color: var(--bg-card);
border: 1px solid var(--border-color);
border-radius: 8px;
margin-bottom: 25px;
overflow: hidden;
}}
.vuln-header {{
padding: 20px;
background-color: rgba(0, 0, 0, 0.3);
}}
.vuln-title h3 {{
margin: 0 0 10px 0;
font-size: 1.2rem;
}}
.vuln-meta {{
display: flex;
gap: 15px;
align-items: center;
flex-wrap: wrap;
}}
.cvss-score {{
background-color: #333;
padding: 5px 10px;
border-radius: 4px;
font-family: monospace;
}}
.cwe-id {{
background-color: #1a365d;
padding: 5px 10px;
border-radius: 4px;
font-family: monospace;
}}
.vuln-body {{
padding: 20px;
}}
.vuln-section {{
margin-bottom: 20px;
padding-bottom: 15px;
border-bottom: 1px solid var(--border-color);
}}
.vuln-section:last-child {{
border-bottom: none;
margin-bottom: 0;
}}
.vuln-section h4 {{
color: var(--accent-green);
font-size: 1rem;
margin-bottom: 10px;
}}
.vuln-section h5 {{
color: var(--text-secondary);
font-size: 0.9rem;
margin: 10px 0 5px 0;
}}
.poc-section {{
background-color: rgba(0, 0, 0, 0.2);
padding: 15px;
border-radius: 8px;
}}
.poc-item {{
margin-bottom: 15px;
}}
pre {{
background-color: #1e1e1e;
padding: 15px;
border-radius: 6px;
overflow-x: auto;
margin: 0;
}}
code {{
font-family: 'Fira Code', 'Consolas', monospace;
font-size: 0.85rem;
}}
.endpoint {{
background-color: #333;
padding: 8px 12px;
border-radius: 4px;
display: inline-block;
word-break: break-all;
}}
.remediation {{
background-color: rgba(0, 255, 0, 0.05);
border-left: 3px solid var(--accent-green);
padding-left: 15px;
}}
.severity-chart {{
display: flex;
flex-direction: column;
gap: 5px;
}}
.severity-bar {{
padding: 8px 15px;
border-radius: 4px;
font-weight: bold;
min-width: 100px;
}}
.severity-bar.critical {{ background-color: var(--critical-color); }}
.severity-bar.high {{ background-color: var(--high-color); color: #000; }}
.severity-bar.medium {{ background-color: var(--medium-color); color: #000; }}
.severity-bar.low {{ background-color: var(--low-color); }}
.toc {{
background-color: var(--bg-card);
border: 1px solid var(--border-color);
border-radius: 8px;
padding: 20px;
margin-bottom: 30px;
}}
.toc h3 {{
color: var(--accent-green);
margin-bottom: 15px;
}}
.toc ul {{
list-style: none;
padding: 0;
margin: 0;
}}
.toc li {{
padding: 5px 0;
}}
.toc a {{
text-decoration: none;
}}
.toc a:hover {{
text-decoration: underline;
}}
.llm-analysis {{
line-height: 1.8;
}}
.llm-analysis h2 {{
color: var(--accent-green);
border-bottom: 1px solid var(--border-color);
padding-bottom: 10px;
}}
.footer {{
text-align: center;
padding: 30px;
border-top: 1px solid var(--border-color);
margin-top: 30px;
color: var(--text-secondary);
}}
@media print {{
body {{
background-color: white;
color: black;
}}
.vulnerability-card {{
page-break-inside: avoid;
}}
}}
</style>
</head>
<body>
<div class="container">
<div class="report-header">
<h1>NeuroSploitv2</h1>
<p class="lead">Penetration Test Report</p>
<p class="text-muted">Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}</p>
</div>
{self.generate_executive_summary()}
<div class="toc">
<h3>Table of Contents - Vulnerabilities ({len(vulnerabilities)})</h3>
<ul>
{toc_items}
</ul>
</div>
{self.generate_open_ports_section()}
{self.generate_tools_executed_section()}
<div class="card">
<div class="card-header">
<h2>Vulnerability Details</h2>
</div>
<div class="card-body">
{vuln_cards if vuln_cards else '<p class="text-muted">No vulnerabilities found during the assessment.</p>'}
</div>
</div>
{self.generate_llm_analysis_section()}
<div class="footer">
<p>Report generated by <strong>NeuroSploitv2</strong> - AI-Powered Penetration Testing Framework</p>
<p class="small">This report is confidential and intended for authorized personnel only.</p>
</div>
</div>
<script src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/highlight.min.js"></script>
<script>hljs.highlightAll();</script>
</body>
</html>
"""
return html
def save_report(self, output_dir: str = "reports") -> str:
"""Save HTML report to a per-report folder with screenshots."""
import shutil
# Create per-report folder
target = self.scan_results.get("target_url", self.scan_results.get("target", "unknown"))
target_name = target.replace("://", "_").replace("/", "_").rstrip("_")[:40]
report_folder = f"report_{target_name}_{self.timestamp}"
report_dir = os.path.join(output_dir, report_folder)
os.makedirs(report_dir, exist_ok=True)
filename = f"pentest_report_{self.timestamp}.html"
filepath = os.path.join(report_dir, filename)
html_content = self.generate_html_report()
with open(filepath, 'w', encoding='utf-8') as f:
f.write(html_content)
# Copy screenshots into report folder
screenshots_src = os.path.join("reports", "screenshots")
if os.path.exists(screenshots_src):
screenshots_dest = os.path.join(report_dir, "screenshots")
vulns = self.scan_results.get("vulnerabilities", [])
for vuln in vulns:
fid = vuln.get("id", "")
if fid:
src_dir = os.path.join(screenshots_src, str(fid))
if os.path.exists(src_dir):
dest_dir = os.path.join(screenshots_dest, str(fid))
os.makedirs(dest_dir, exist_ok=True)
for ss_file in Path(src_dir).glob("*.png"):
shutil.copy2(str(ss_file), os.path.join(dest_dir, ss_file.name))
logger.info(f"Report saved to: {filepath}")
return filepath
def save_json_report(self, output_dir: str = "results") -> str:
"""Save JSON report to file"""
os.makedirs(output_dir, exist_ok=True)
filename = f"pentest_results_{self.timestamp}.json"
filepath = os.path.join(output_dir, filename)
with open(filepath, 'w', encoding='utf-8') as f:
json.dump(self.scan_results, f, indent=2, default=str)
logger.info(f"JSON results saved to: {filepath}")
return filepath
+761
View File
@@ -0,0 +1,761 @@
#!/usr/bin/env python3
"""
NeuroSploit Security Sandbox Manager
Manages Docker-based security tool execution in an isolated container.
Provides high-level API for running Nuclei, Naabu, and other tools.
Architecture:
- Persistent sandbox container (neurosploit-sandbox) stays running
- Tools executed via `docker exec` for sub-second startup
- Output collected from container stdout + output files
- Resource limits enforced (2GB RAM, 2 CPU)
- Network isolation with controlled egress
"""
import asyncio
import json
import logging
import os
import re
import shlex
from abc import ABC, abstractmethod
from typing import Dict, Any, Optional, List, Tuple
from dataclasses import dataclass, field
from datetime import datetime
logger = logging.getLogger(__name__)
# Guard Docker SDK import
try:
import docker
from docker.errors import DockerException, NotFound, APIError
HAS_DOCKER = True
except ImportError:
HAS_DOCKER = False
logger.warning("Docker SDK not installed. Install with: pip install docker")
@dataclass
class SandboxResult:
"""Result from a sandboxed tool execution."""
tool: str
command: str
exit_code: int
stdout: str
stderr: str
duration_seconds: float
findings: List[Dict] = field(default_factory=list)
error: Optional[str] = None
started_at: Optional[str] = None # ISO 8601 timestamp
completed_at: Optional[str] = None # ISO 8601 timestamp
task_id: Optional[str] = None # Unique execution ID (hex[:8])
# ---------------------------------------------------------------------------
# Nuclei output parser
# ---------------------------------------------------------------------------
def parse_nuclei_jsonl(output: str) -> List[Dict]:
"""Parse Nuclei JSONL output into structured findings."""
findings = []
severity_map = {
"critical": "critical",
"high": "high",
"medium": "medium",
"low": "low",
"info": "info",
}
for line in output.strip().split("\n"):
if not line.strip():
continue
try:
data = json.loads(line)
info = data.get("info", {})
tags = info.get("tags", [])
findings.append({
"title": info.get("name", "Unknown"),
"severity": severity_map.get(info.get("severity", "info"), "info"),
"vulnerability_type": tags[0] if tags else "vulnerability",
"description": info.get("description", ""),
"affected_endpoint": data.get("matched-at", ""),
"evidence": data.get("matcher-name", ""),
"template_id": data.get("template-id", ""),
"curl_command": data.get("curl-command", ""),
"remediation": info.get("remediation", "Review and fix the vulnerability"),
"references": info.get("reference", []),
"cwe": info.get("classification", {}).get("cwe-id", []),
"cvss_score": info.get("classification", {}).get("cvss-score", 0),
})
except (json.JSONDecodeError, KeyError):
continue
return findings
# ---------------------------------------------------------------------------
# Naabu output parser
# ---------------------------------------------------------------------------
def parse_naabu_output(output: str) -> List[Dict]:
"""Parse Naabu output into structured port findings."""
findings = []
seen = set()
for line in output.strip().split("\n"):
line = line.strip()
if not line:
continue
# Naabu JSON mode: {"host":"x","ip":"y","port":80}
try:
data = json.loads(line)
host = data.get("host", data.get("ip", ""))
port = data.get("port", 0)
key = f"{host}:{port}"
if key not in seen:
seen.add(key)
findings.append({
"host": host,
"port": port,
"protocol": "tcp",
})
continue
except (json.JSONDecodeError, KeyError):
pass
# Text mode: host:port
match = re.match(r"^(.+?):(\d+)$", line)
if match:
host, port = match.group(1), int(match.group(2))
key = f"{host}:{port}"
if key not in seen:
seen.add(key)
findings.append({
"host": host,
"port": port,
"protocol": "tcp",
})
return findings
class BaseSandbox(ABC):
"""Abstract interface for sandbox implementations (legacy shared + per-scan Kali)."""
@abstractmethod
async def initialize(self) -> Tuple[bool, str]: ...
@property
@abstractmethod
def is_available(self) -> bool: ...
@abstractmethod
async def stop(self): ...
@abstractmethod
async def health_check(self) -> Dict: ...
@abstractmethod
async def run_nuclei(self, target, templates=None, severity=None,
tags=None, rate_limit=150, timeout=600) -> "SandboxResult": ...
@abstractmethod
async def run_naabu(self, target, ports=None, top_ports=None,
scan_type="s", rate=1000, timeout=300) -> "SandboxResult": ...
@abstractmethod
async def run_httpx(self, targets, timeout=120) -> "SandboxResult": ...
@abstractmethod
async def run_subfinder(self, domain, timeout=120) -> "SandboxResult": ...
@abstractmethod
async def run_nmap(self, target, ports=None, scripts=True, timeout=300) -> "SandboxResult": ...
@abstractmethod
async def run_tool(self, tool, args, timeout=300) -> "SandboxResult": ...
@abstractmethod
async def execute_raw(self, command, timeout=300) -> "SandboxResult": ...
class SandboxManager(BaseSandbox):
"""
Legacy shared sandbox: persistent Docker container running security tools.
Tools are executed via `docker exec` for fast invocation.
Used by MCP server and terminal API (no scan_id context).
"""
SANDBOX_IMAGE = "neurosploit-sandbox:latest"
SANDBOX_CONTAINER = "neurosploit-sandbox"
DEFAULT_TIMEOUT = 300 # 5 minutes
MAX_OUTPUT = 2 * 1024 * 1024 # 2MB
# Known install commands for tools not pre-installed in the sandbox
KNOWN_INSTALLS = {
"wpscan": "gem install wpscan 2>&1",
"joomscan": "pip3 install joomscan 2>&1",
"dirsearch": "pip3 install dirsearch 2>&1",
"commix": "pip3 install commix 2>&1",
"wfuzz": "pip3 install wfuzz 2>&1",
"sslyze": "pip3 install sslyze 2>&1",
"retire": "npm install -g retire 2>&1",
"testssl": "apt-get update -qq && apt-get install -y -qq testssl.sh 2>&1",
"trufflehog": "pip3 install trufflehog 2>&1",
"gitleaks": "GO111MODULE=on go install github.com/gitleaks/gitleaks/v8@latest 2>&1",
}
def __init__(self):
self._client: Optional[Any] = None
self._container: Optional[Any] = None
self._available = False
self._temp_installed: set = set() # Tools temporarily installed for cleanup
# ------------------------------------------------------------------
# Lifecycle
# ------------------------------------------------------------------
async def initialize(self) -> Tuple[bool, str]:
"""Initialize Docker client and ensure sandbox is running."""
if not HAS_DOCKER:
return False, "Docker SDK not installed"
try:
self._client = docker.from_env()
self._client.ping()
except Exception as e:
return False, f"Docker not available: {e}"
# Check if sandbox container already running
try:
container = self._client.containers.get(self.SANDBOX_CONTAINER)
if container.status == "running":
self._container = container
self._available = True
return True, "Sandbox already running"
else:
container.remove(force=True)
except NotFound:
pass
# Check if image exists
try:
self._client.images.get(self.SANDBOX_IMAGE)
except NotFound:
return False, (
f"Sandbox image '{self.SANDBOX_IMAGE}' not found. "
"Build with: cd docker && docker compose -f docker-compose.sandbox.yml build"
)
# Start sandbox container
try:
self._container = self._client.containers.run(
self.SANDBOX_IMAGE,
command="sleep infinity",
name=self.SANDBOX_CONTAINER,
detach=True,
restart_policy={"Name": "unless-stopped"},
network_mode="bridge",
mem_limit="2g",
cpu_period=100000,
cpu_quota=200000, # 2 CPUs
cap_add=["NET_RAW", "NET_ADMIN"],
security_opt=["no-new-privileges:true"],
)
self._available = True
return True, "Sandbox started"
except Exception as e:
return False, f"Failed to start sandbox: {e}"
@property
def is_available(self) -> bool:
"""Check if sandbox is ready for tool execution."""
return self._available and self._container is not None
async def stop(self):
"""Stop and remove the sandbox container."""
if self._container:
try:
self._container.stop(timeout=10)
self._container.remove(force=True)
except Exception:
pass
self._container = None
self._available = False
async def health_check(self) -> Dict:
"""Run health check on the sandbox container."""
if not self.is_available:
return {"status": "unavailable", "tools": []}
result = await self._exec("nuclei -version 2>&1 && naabu -version 2>&1 && nmap --version 2>&1 | head -1")
tools = []
if "nuclei" in result.stdout.lower():
tools.append("nuclei")
if "naabu" in result.stdout.lower():
tools.append("naabu")
if "nmap" in result.stdout.lower():
tools.append("nmap")
return {
"status": "healthy" if tools else "degraded",
"tools": tools,
"container": self.SANDBOX_CONTAINER,
"uptime": self._container.attrs.get("State", {}).get("StartedAt", "") if self._container else "",
}
# ------------------------------------------------------------------
# Low-level execution
# ------------------------------------------------------------------
async def _exec(
self, command: str, timeout: int = DEFAULT_TIMEOUT
) -> SandboxResult:
"""Execute a command inside the sandbox container."""
if not self.is_available:
return SandboxResult(
tool="sandbox", command=command, exit_code=-1,
stdout="", stderr="", duration_seconds=0,
error="Sandbox not available",
)
started = datetime.utcnow()
try:
exec_result = await asyncio.get_event_loop().run_in_executor(
None,
lambda: self._container.exec_run(
cmd=["bash", "-c", command],
stdout=True,
stderr=True,
demux=True,
),
)
duration = (datetime.utcnow() - started).total_seconds()
stdout_raw, stderr_raw = exec_result.output
stdout = (stdout_raw or b"").decode("utf-8", errors="replace")
stderr = (stderr_raw or b"").decode("utf-8", errors="replace")
# Truncate oversized output
if len(stdout) > self.MAX_OUTPUT:
stdout = stdout[: self.MAX_OUTPUT] + "\n... [truncated]"
if len(stderr) > self.MAX_OUTPUT:
stderr = stderr[: self.MAX_OUTPUT] + "\n... [truncated]"
return SandboxResult(
tool="sandbox",
command=command,
exit_code=exec_result.exit_code,
stdout=stdout,
stderr=stderr,
duration_seconds=duration,
)
except Exception as e:
duration = (datetime.utcnow() - started).total_seconds()
return SandboxResult(
tool="sandbox", command=command, exit_code=-1,
stdout="", stderr="", duration_seconds=duration,
error=str(e),
)
# ------------------------------------------------------------------
# High-level tool APIs
# ------------------------------------------------------------------
async def run_nuclei(
self,
target: str,
templates: Optional[str] = None,
severity: Optional[str] = None,
tags: Optional[str] = None,
rate_limit: int = 150,
timeout: int = 600,
) -> SandboxResult:
"""
Run Nuclei vulnerability scanner against a target.
Args:
target: URL or host to scan
templates: Specific template path/ID (e.g., "cves/2024/")
severity: Filter by severity (critical,high,medium,low,info)
tags: Filter by tags (e.g., "xss,sqli,lfi")
rate_limit: Requests per second (default 150)
timeout: Max execution time in seconds
"""
cmd_parts = [
"nuclei",
"-u", shlex.quote(target),
"-jsonl",
"-rate-limit", str(rate_limit),
"-silent",
"-no-color",
]
if templates:
cmd_parts.extend(["-t", shlex.quote(templates)])
if severity:
cmd_parts.extend(["-severity", shlex.quote(severity)])
if tags:
cmd_parts.extend(["-tags", shlex.quote(tags)])
command = " ".join(cmd_parts) + " 2>/dev/null"
result = await self._exec(command, timeout=timeout)
result.tool = "nuclei"
# Parse findings
if result.stdout:
result.findings = parse_nuclei_jsonl(result.stdout)
return result
async def run_naabu(
self,
target: str,
ports: Optional[str] = None,
top_ports: Optional[int] = None,
scan_type: str = "s",
rate: int = 1000,
timeout: int = 300,
) -> SandboxResult:
"""
Run Naabu port scanner against a target.
Args:
target: IP address or hostname to scan
ports: Specific ports (e.g., "80,443,8080" or "1-65535")
top_ports: Use top N ports (e.g., 100, 1000)
scan_type: SYN (s), CONNECT (c)
rate: Packets per second
timeout: Max execution time in seconds
"""
cmd_parts = [
"naabu",
"-host", shlex.quote(target),
"-json",
"-rate", str(rate),
"-silent",
"-no-color",
]
if ports:
cmd_parts.extend(["-p", shlex.quote(ports)])
elif top_ports:
cmd_parts.extend(["-top-ports", str(top_ports)])
else:
cmd_parts.extend(["-top-ports", "1000"])
if scan_type:
cmd_parts.extend(["-scan-type", scan_type])
command = " ".join(cmd_parts) + " 2>/dev/null"
result = await self._exec(command, timeout=timeout)
result.tool = "naabu"
# Parse port findings
if result.stdout:
result.findings = parse_naabu_output(result.stdout)
return result
async def run_httpx(
self,
targets: List[str],
timeout: int = 120,
) -> SandboxResult:
"""
Run HTTPX for HTTP probing and tech detection.
Args:
targets: List of URLs/hosts to probe
timeout: Max execution time
"""
target_str = "\\n".join(shlex.quote(t) for t in targets)
command = (
f'echo -e "{target_str}" | httpx -silent -json '
f'-title -tech-detect -status-code -content-length '
f'-follow-redirects -no-color 2>/dev/null'
)
result = await self._exec(command, timeout=timeout)
result.tool = "httpx"
# Parse JSON lines
if result.stdout:
findings = []
for line in result.stdout.strip().split("\n"):
try:
data = json.loads(line)
findings.append({
"url": data.get("url", ""),
"status_code": data.get("status_code", 0),
"title": data.get("title", ""),
"technologies": data.get("tech", []),
"content_length": data.get("content_length", 0),
"webserver": data.get("webserver", ""),
})
except json.JSONDecodeError:
continue
result.findings = findings
return result
async def run_subfinder(
self,
domain: str,
timeout: int = 120,
) -> SandboxResult:
"""
Run Subfinder for subdomain enumeration.
Args:
domain: Base domain to enumerate
timeout: Max execution time
"""
command = f"subfinder -d {shlex.quote(domain)} -silent -no-color 2>/dev/null"
result = await self._exec(command, timeout=timeout)
result.tool = "subfinder"
if result.stdout:
subdomains = [s.strip() for s in result.stdout.strip().split("\n") if s.strip()]
result.findings = [{"subdomain": s} for s in subdomains]
return result
async def run_nmap(
self,
target: str,
ports: Optional[str] = None,
scripts: bool = True,
timeout: int = 300,
) -> SandboxResult:
"""
Run Nmap network scanner.
Args:
target: IP/hostname to scan
ports: Port specification
scripts: Enable default scripts (-sC)
timeout: Max execution time
"""
cmd_parts = ["nmap", "-sV"]
if scripts:
cmd_parts.append("-sC")
if ports:
cmd_parts.extend(["-p", shlex.quote(ports)])
cmd_parts.extend(["-oN", "/dev/stdout", shlex.quote(target)])
command = " ".join(cmd_parts) + " 2>/dev/null"
result = await self._exec(command, timeout=timeout)
result.tool = "nmap"
return result
async def execute_raw(
self,
command: str,
timeout: int = DEFAULT_TIMEOUT,
) -> SandboxResult:
"""
Execute an arbitrary shell command inside the sandbox container.
Used by the Terminal Agent for interactive infrastructure testing.
Returns raw stdout/stderr/exit_code.
Args:
command: Shell command to execute (passed to sh -c)
timeout: Max execution time in seconds
"""
result = await self._exec(f"sh -c {shlex.quote(command)}", timeout=timeout)
result.tool = "raw"
return result
async def run_tool(
self,
tool: str,
args: str,
timeout: int = DEFAULT_TIMEOUT,
) -> SandboxResult:
"""
Run any tool available in the sandbox.
Args:
tool: Tool name (nuclei, naabu, nmap, httpx, etc.)
args: Command-line arguments as string
timeout: Max execution time
"""
# Validate tool is available
allowed_tools = {
"nuclei", "naabu", "nmap", "httpx", "subfinder", "katana",
"dnsx", "ffuf", "gobuster", "dalfox", "nikto", "sqlmap",
"whatweb", "curl", "dig", "whois", "masscan", "dirsearch",
"wfuzz", "arjun", "wafw00f", "waybackurls",
}
if tool not in allowed_tools:
return SandboxResult(
tool=tool, command=f"{tool} {args}", exit_code=-1,
stdout="", stderr="", duration_seconds=0,
error=f"Tool '{tool}' not in allowed list: {sorted(allowed_tools)}",
)
command = f"{tool} {args} 2>&1"
result = await self._exec(command, timeout=timeout)
result.tool = tool
return result
# ------------------------------------------------------------------
# Dynamic tool install / run / cleanup
# ------------------------------------------------------------------
async def install_tool(
self, tool: str, install_cmd: str = ""
) -> Tuple[bool, str]:
"""
Temporarily install a tool in the sandbox container.
Args:
tool: Tool name (must be in KNOWN_INSTALLS or provide install_cmd)
install_cmd: Custom install command (overrides KNOWN_INSTALLS)
Returns:
(success, message) tuple
"""
if not self.is_available:
return False, "Sandbox not available"
cmd = install_cmd or self.KNOWN_INSTALLS.get(tool, "")
if not cmd:
return False, f"No install command for '{tool}'"
logger.info(f"Installing tool '{tool}' in sandbox...")
result = await self._exec(cmd, timeout=120)
success = result.exit_code == 0
if success:
self._temp_installed.add(tool)
logger.info(f"Tool '{tool}' installed successfully")
else:
logger.warning(f"Tool '{tool}' install failed: {result.stderr[:200]}")
msg = result.stdout[:500] if success else result.stderr[:500]
return success, msg
async def run_and_cleanup(
self,
tool: str,
args: str,
cleanup: bool = True,
timeout: int = 180,
) -> SandboxResult:
"""
Install tool if needed, run it, collect output, then cleanup.
This is the primary method for dynamic tool execution:
1. Check if tool exists in sandbox
2. Install if missing (from KNOWN_INSTALLS)
3. Run the tool with given arguments
4. Cleanup the installation if it was temporary
Args:
tool: Tool name
args: Command-line arguments
cleanup: Whether to remove temporarily installed tools
timeout: Max execution time in seconds
Returns:
SandboxResult with stdout, stderr, findings
"""
if not self.is_available:
return SandboxResult(
tool=tool, command=f"{tool} {args}", exit_code=-1,
stdout="", stderr="", duration_seconds=0,
error="Sandbox not available",
)
# Check if tool exists
check = await self._exec(f"which {tool} 2>/dev/null")
if check.exit_code != 0:
# Try to install
ok, msg = await self.install_tool(tool)
if not ok:
return SandboxResult(
tool=tool, command=f"{tool} {args}", exit_code=-1,
stdout="", stderr=msg, duration_seconds=0,
error=f"Install failed: {msg}",
)
# Run tool
result = await self.run_tool(tool, args, timeout=timeout)
# Cleanup if temporarily installed
if cleanup and tool in self._temp_installed:
logger.info(f"Cleaning up temporarily installed tool: {tool}")
await self._exec(
f"pip3 uninstall -y {shlex.quote(tool)} 2>/dev/null; "
f"gem uninstall -x {shlex.quote(tool)} 2>/dev/null; "
f"npm uninstall -g {shlex.quote(tool)} 2>/dev/null; "
f"rm -f $(which {shlex.quote(tool)}) 2>/dev/null",
timeout=30,
)
self._temp_installed.discard(tool)
return result
async def cleanup_temp_tools(self):
"""Remove all temporarily installed tools."""
if not self._temp_installed:
return
for tool in list(self._temp_installed):
logger.info(f"Cleaning up temp tool: {tool}")
await self._exec(
f"pip3 uninstall -y {shlex.quote(tool)} 2>/dev/null; "
f"gem uninstall -x {shlex.quote(tool)} 2>/dev/null; "
f"npm uninstall -g {shlex.quote(tool)} 2>/dev/null; "
f"rm -f $(which {shlex.quote(tool)}) 2>/dev/null",
timeout=30,
)
self._temp_installed.discard(tool)
# ---------------------------------------------------------------------------
# Global singleton
# ---------------------------------------------------------------------------
_manager: Optional[SandboxManager] = None
# Alias for backward compatibility
LegacySandboxManager = SandboxManager
async def get_sandbox(scan_id: Optional[str] = None) -> BaseSandbox:
"""Get a sandbox instance.
Args:
scan_id: If provided, returns a per-scan KaliSandbox from the container pool.
If None, returns the legacy shared SandboxManager.
Backward compatible: all existing callers use get_sandbox() with no args.
Agent passes scan_id for per-scan container isolation.
"""
if scan_id is not None:
try:
from core.container_pool import get_pool
pool = get_pool()
return await pool.get_or_create(scan_id)
except Exception as e:
logger.warning(f"Per-scan sandbox failed ({e}), falling back to shared")
# Fall through to legacy
# Legacy path: shared persistent container
global _manager
if _manager is None:
_manager = SandboxManager()
ok, msg = await _manager.initialize()
if not ok:
logger.warning(f"Sandbox initialization: {msg}")
return _manager
+219
View File
@@ -0,0 +1,219 @@
#!/usr/bin/env python3
"""
Scan Scheduler - Recurring task orchestration for NeuroSploit.
Supports cron expressions and interval-based scheduling for:
- Reconnaissance scans
- Vulnerability validation
- Re-analysis of previous findings
Uses APScheduler with SQLite persistence so jobs survive restarts.
"""
import json
import logging
from datetime import datetime
from typing import Dict, List, Optional
from pathlib import Path
logger = logging.getLogger(__name__)
try:
from apscheduler.schedulers.asyncio import AsyncIOScheduler
from apscheduler.triggers.cron import CronTrigger
from apscheduler.triggers.interval import IntervalTrigger
from apscheduler.jobstores.sqlalchemy import SQLAlchemyJobStore
HAS_APSCHEDULER = True
except ImportError:
HAS_APSCHEDULER = False
logger.warning("APScheduler not installed. Scheduler disabled. Install with: pip install apscheduler>=3.10.0")
class ScanScheduler:
"""Manages recurring scan jobs via APScheduler."""
def __init__(self, config: Dict, database_url: str = "sqlite:///./data/neurosploit_scheduler.db"):
self.config = config
self.scheduler_config = config.get('scheduler', {})
self.enabled = self.scheduler_config.get('enabled', False)
self.jobs_meta: Dict[str, Dict] = {} # job_id -> metadata
self._scan_callback = None
if not HAS_APSCHEDULER:
self.enabled = False
self.scheduler = None
return
jobstores = {
'default': SQLAlchemyJobStore(url=database_url)
}
self.scheduler = AsyncIOScheduler(jobstores=jobstores)
# Load pre-configured jobs from config
for job_config in self.scheduler_config.get('jobs', []):
try:
self.add_job(
job_id=job_config['id'],
target=job_config['target'],
scan_type=job_config.get('scan_type', 'quick'),
cron_expression=job_config.get('cron'),
interval_minutes=job_config.get('interval_minutes'),
agent_role=job_config.get('agent_role'),
llm_profile=job_config.get('llm_profile')
)
except Exception as e:
logger.error(f"Failed to load scheduled job '{job_config.get('id', '?')}': {e}")
def set_scan_callback(self, callback):
"""Set the callback function that executes scans.
The callback signature should be:
async def callback(target: str, scan_type: str,
agent_role: Optional[str], llm_profile: Optional[str]) -> Dict
"""
self._scan_callback = callback
def add_job(self, job_id: str, target: str, scan_type: str = "quick",
cron_expression: Optional[str] = None,
interval_minutes: Optional[int] = None,
agent_role: Optional[str] = None,
llm_profile: Optional[str] = None) -> Dict:
"""Schedule a recurring scan job.
Args:
job_id: Unique identifier for the job
target: Target URL or IP
scan_type: 'quick', 'full', 'recon', or 'analysis'
cron_expression: Cron schedule (e.g., '0 */6 * * *' for every 6 hours)
interval_minutes: Alternative to cron - run every N minutes
agent_role: Optional agent role for AI analysis
llm_profile: Optional LLM profile override
"""
if not self.scheduler:
return {"error": "Scheduler not available (APScheduler not installed)"}
if cron_expression:
trigger = CronTrigger.from_crontab(cron_expression)
schedule_desc = f"cron: {cron_expression}"
elif interval_minutes:
trigger = IntervalTrigger(minutes=interval_minutes)
schedule_desc = f"every {interval_minutes} minutes"
else:
return {"error": "Provide either cron_expression or interval_minutes"}
self.scheduler.add_job(
self._execute_scheduled_scan,
trigger=trigger,
id=job_id,
args=[target, scan_type, agent_role, llm_profile],
replace_existing=True,
name=f"scan_{target}_{scan_type}"
)
meta = {
"id": job_id,
"target": target,
"scan_type": scan_type,
"schedule": schedule_desc,
"agent_role": agent_role,
"llm_profile": llm_profile,
"created_at": datetime.now().isoformat(),
"last_run": None,
"run_count": 0,
"status": "active"
}
self.jobs_meta[job_id] = meta
logger.info(f"Scheduled job '{job_id}': {target} ({scan_type}) - {schedule_desc}")
return meta
def remove_job(self, job_id: str) -> bool:
"""Remove a scheduled job."""
if not self.scheduler:
return False
try:
self.scheduler.remove_job(job_id)
self.jobs_meta.pop(job_id, None)
logger.info(f"Removed scheduled job: {job_id}")
return True
except Exception as e:
logger.error(f"Failed to remove job '{job_id}': {e}")
return False
def pause_job(self, job_id: str) -> bool:
"""Pause a scheduled job."""
if not self.scheduler:
return False
try:
self.scheduler.pause_job(job_id)
if job_id in self.jobs_meta:
self.jobs_meta[job_id]["status"] = "paused"
return True
except Exception as e:
logger.error(f"Failed to pause job '{job_id}': {e}")
return False
def resume_job(self, job_id: str) -> bool:
"""Resume a paused job."""
if not self.scheduler:
return False
try:
self.scheduler.resume_job(job_id)
if job_id in self.jobs_meta:
self.jobs_meta[job_id]["status"] = "active"
return True
except Exception as e:
logger.error(f"Failed to resume job '{job_id}': {e}")
return False
def list_jobs(self) -> List[Dict]:
"""List all scheduled jobs with metadata."""
jobs = []
if self.scheduler:
for job in self.scheduler.get_jobs():
meta = self.jobs_meta.get(job.id, {})
jobs.append({
"id": job.id,
"name": job.name,
"next_run": str(job.next_run_time) if job.next_run_time else None,
"target": meta.get("target", "unknown"),
"scan_type": meta.get("scan_type", "unknown"),
"schedule": meta.get("schedule", "unknown"),
"status": meta.get("status", "active"),
"last_run": meta.get("last_run"),
"run_count": meta.get("run_count", 0)
})
return jobs
async def _execute_scheduled_scan(self, target: str, scan_type: str,
agent_role: Optional[str],
llm_profile: Optional[str]):
"""Execute a scheduled scan. Called by APScheduler."""
job_id = f"scan_{target}_{scan_type}"
logger.info(f"Executing scheduled scan: {target} ({scan_type})")
if job_id in self.jobs_meta:
self.jobs_meta[job_id]["last_run"] = datetime.now().isoformat()
self.jobs_meta[job_id]["run_count"] += 1
if self._scan_callback:
try:
result = await self._scan_callback(target, scan_type, agent_role, llm_profile)
logger.info(f"Scheduled scan completed: {target} ({scan_type})")
return result
except Exception as e:
logger.error(f"Scheduled scan failed for {target}: {e}")
else:
logger.warning("No scan callback registered. Scheduled scan skipped.")
def start(self):
"""Start the scheduler."""
if self.scheduler and self.enabled:
self.scheduler.start()
logger.info(f"Scheduler started with {len(self.list_jobs())} jobs")
def stop(self):
"""Stop the scheduler gracefully."""
if self.scheduler and self.scheduler.running:
self.scheduler.shutdown(wait=False)
logger.info("Scheduler stopped")
+376
View File
@@ -0,0 +1,376 @@
#!/usr/bin/env python3
"""
Tool Installer - Installs required pentest tools for NeuroSploitv2
"""
import subprocess
import shutil
import os
import sys
import logging
from typing import Dict, List, Tuple
logger = logging.getLogger(__name__)
# Tool definitions with installation commands for different package managers
PENTEST_TOOLS = {
"nmap": {
"description": "Network scanner and port mapper",
"check_cmd": "nmap --version",
"install": {
"apt": "sudo apt-get install -y nmap",
"yum": "sudo yum install -y nmap",
"dnf": "sudo dnf install -y nmap",
"brew": "brew install nmap",
"pacman": "sudo pacman -S --noconfirm nmap"
},
"binary": "nmap"
},
"sqlmap": {
"description": "SQL injection detection and exploitation",
"check_cmd": "sqlmap --version",
"install": {
"apt": "sudo apt-get install -y sqlmap",
"yum": "sudo pip3 install sqlmap",
"dnf": "sudo dnf install -y sqlmap",
"brew": "brew install sqlmap",
"pacman": "sudo pacman -S --noconfirm sqlmap",
"pip": "pip3 install sqlmap"
},
"binary": "sqlmap"
},
"nikto": {
"description": "Web server vulnerability scanner",
"check_cmd": "nikto -Version",
"install": {
"apt": "sudo apt-get install -y nikto",
"yum": "sudo yum install -y nikto",
"dnf": "sudo dnf install -y nikto",
"brew": "brew install nikto",
"pacman": "sudo pacman -S --noconfirm nikto"
},
"binary": "nikto"
},
"gobuster": {
"description": "Directory/file & DNS busting tool",
"check_cmd": "gobuster version",
"install": {
"apt": "sudo apt-get install -y gobuster",
"brew": "brew install gobuster",
"go": "go install github.com/OJ/gobuster/v3@latest"
},
"binary": "gobuster"
},
"nuclei": {
"description": "Fast vulnerability scanner based on templates",
"check_cmd": "nuclei -version",
"install": {
"go": "go install -v github.com/projectdiscovery/nuclei/v3/cmd/nuclei@latest",
"brew": "brew install nuclei"
},
"binary": "nuclei"
},
"subfinder": {
"description": "Subdomain discovery tool",
"check_cmd": "subfinder -version",
"install": {
"go": "go install -v github.com/projectdiscovery/subfinder/v2/cmd/subfinder@latest",
"brew": "brew install subfinder"
},
"binary": "subfinder"
},
"httpx": {
"description": "HTTP toolkit for probing",
"check_cmd": "httpx -version",
"install": {
"go": "go install -v github.com/projectdiscovery/httpx/cmd/httpx@latest",
"brew": "brew install httpx"
},
"binary": "httpx"
},
"ffuf": {
"description": "Fast web fuzzer",
"check_cmd": "ffuf -V",
"install": {
"apt": "sudo apt-get install -y ffuf",
"go": "go install github.com/ffuf/ffuf/v2@latest",
"brew": "brew install ffuf"
},
"binary": "ffuf"
},
"hydra": {
"description": "Network login cracker",
"check_cmd": "hydra -h",
"install": {
"apt": "sudo apt-get install -y hydra",
"yum": "sudo yum install -y hydra",
"dnf": "sudo dnf install -y hydra",
"brew": "brew install hydra",
"pacman": "sudo pacman -S --noconfirm hydra"
},
"binary": "hydra"
},
"whatweb": {
"description": "Web technology identifier",
"check_cmd": "whatweb --version",
"install": {
"apt": "sudo apt-get install -y whatweb",
"brew": "brew install whatweb",
"gem": "sudo gem install whatweb"
},
"binary": "whatweb"
},
"wpscan": {
"description": "WordPress vulnerability scanner",
"check_cmd": "wpscan --version",
"install": {
"apt": "sudo apt-get install -y wpscan",
"brew": "brew install wpscan",
"gem": "sudo gem install wpscan"
},
"binary": "wpscan"
},
"curl": {
"description": "HTTP client for requests",
"check_cmd": "curl --version",
"install": {
"apt": "sudo apt-get install -y curl",
"yum": "sudo yum install -y curl",
"dnf": "sudo dnf install -y curl",
"brew": "brew install curl",
"pacman": "sudo pacman -S --noconfirm curl"
},
"binary": "curl"
},
"jq": {
"description": "JSON processor for parsing outputs",
"check_cmd": "jq --version",
"install": {
"apt": "sudo apt-get install -y jq",
"yum": "sudo yum install -y jq",
"dnf": "sudo dnf install -y jq",
"brew": "brew install jq",
"pacman": "sudo pacman -S --noconfirm jq"
},
"binary": "jq"
},
"dirsearch": {
"description": "Web path discovery tool",
"check_cmd": "dirsearch --version",
"install": {
"pip": "pip3 install dirsearch"
},
"binary": "dirsearch"
},
"wafw00f": {
"description": "Web Application Firewall detection",
"check_cmd": "wafw00f -h",
"install": {
"pip": "pip3 install wafw00f"
},
"binary": "wafw00f"
}
}
class ToolInstaller:
"""Manages installation of pentest tools"""
def __init__(self):
self.package_manager = self._detect_package_manager()
def _detect_package_manager(self) -> str:
"""Detect the system's package manager"""
managers = [
("apt-get", "apt"),
("dnf", "dnf"),
("yum", "yum"),
("pacman", "pacman"),
("brew", "brew")
]
for cmd, name in managers:
if shutil.which(cmd):
return name
# Fallback to pip for Python tools
return "pip"
def check_tool_installed(self, tool_name: str) -> Tuple[bool, str]:
"""Check if a tool is installed and return its path"""
tool_info = PENTEST_TOOLS.get(tool_name)
if not tool_info:
return False, ""
binary = tool_info.get("binary", tool_name)
path = shutil.which(binary)
if path:
return True, path
# Check common paths
common_paths = [
f"/usr/bin/{binary}",
f"/usr/local/bin/{binary}",
f"/opt/{binary}/{binary}",
os.path.expanduser(f"~/go/bin/{binary}"),
f"/snap/bin/{binary}"
]
for p in common_paths:
if os.path.isfile(p) and os.access(p, os.X_OK):
return True, p
return False, ""
def get_tools_status(self) -> Dict[str, Dict]:
"""Get installation status of all tools"""
status = {}
for tool_name, tool_info in PENTEST_TOOLS.items():
installed, path = self.check_tool_installed(tool_name)
status[tool_name] = {
"installed": installed,
"path": path,
"description": tool_info["description"]
}
return status
def install_tool(self, tool_name: str) -> Tuple[bool, str]:
"""Install a specific tool"""
if tool_name not in PENTEST_TOOLS:
return False, f"Unknown tool: {tool_name}"
tool_info = PENTEST_TOOLS[tool_name]
install_cmds = tool_info.get("install", {})
# Try package manager first
if self.package_manager in install_cmds:
cmd = install_cmds[self.package_manager]
elif "pip" in install_cmds:
cmd = install_cmds["pip"]
elif "go" in install_cmds and shutil.which("go"):
cmd = install_cmds["go"]
elif "gem" in install_cmds and shutil.which("gem"):
cmd = install_cmds["gem"]
else:
return False, f"No installation method available for {tool_name} on this system"
print(f"[*] Installing {tool_name}...")
print(f" Command: {cmd}")
try:
result = subprocess.run(
cmd,
shell=True,
capture_output=True,
text=True,
timeout=300
)
if result.returncode == 0:
# Verify installation
installed, path = self.check_tool_installed(tool_name)
if installed:
return True, f"Successfully installed {tool_name} at {path}"
else:
return True, f"Installation completed but binary not found in PATH"
else:
return False, f"Installation failed: {result.stderr}"
except subprocess.TimeoutExpired:
return False, "Installation timed out"
except Exception as e:
return False, f"Installation error: {str(e)}"
def install_all_tools(self) -> Dict[str, Tuple[bool, str]]:
"""Install all pentest tools"""
results = {}
for tool_name in PENTEST_TOOLS:
installed, path = self.check_tool_installed(tool_name)
if installed:
results[tool_name] = (True, f"Already installed at {path}")
else:
results[tool_name] = self.install_tool(tool_name)
return results
def install_essential_tools(self) -> Dict[str, Tuple[bool, str]]:
"""Install only essential tools for basic pentesting"""
essential = ["nmap", "sqlmap", "nikto", "nuclei", "curl", "jq", "httpx", "ffuf"]
results = {}
for tool_name in essential:
installed, path = self.check_tool_installed(tool_name)
if installed:
results[tool_name] = (True, f"Already installed at {path}")
else:
results[tool_name] = self.install_tool(tool_name)
return results
def print_tools_menu():
"""Print the tools installation menu"""
installer = ToolInstaller()
status = installer.get_tools_status()
print("\n" + "="*70)
print(" PENTEST TOOLS INSTALLATION MANAGER")
print("="*70)
print(f"\nDetected Package Manager: {installer.package_manager}")
print("\nAvailable Tools:")
print("-"*70)
for i, (tool_name, info) in enumerate(status.items(), 1):
status_icon = "[+]" if info["installed"] else "[-]"
status_text = "Installed" if info["installed"] else "Not Installed"
print(f" {i:2}. {status_icon} {tool_name:15} - {info['description'][:40]}")
print("-"*70)
print("\nOptions:")
print(" A - Install ALL tools")
print(" E - Install ESSENTIAL tools only (nmap, sqlmap, nikto, nuclei, etc.)")
print(" 1-N - Install specific tool by number")
print(" Q - Return to main menu")
print("-"*70)
return installer, list(status.keys())
def run_installer_menu():
"""Run the interactive installer menu"""
while True:
installer, tool_list = print_tools_menu()
choice = input("\nSelect option: ").strip().upper()
if choice == 'Q':
break
elif choice == 'A':
print("\n[*] Installing all tools...")
results = installer.install_all_tools()
for tool, (success, msg) in results.items():
icon = "[+]" if success else "[!]"
print(f" {icon} {tool}: {msg}")
input("\nPress Enter to continue...")
elif choice == 'E':
print("\n[*] Installing essential tools...")
results = installer.install_essential_tools()
for tool, (success, msg) in results.items():
icon = "[+]" if success else "[!]"
print(f" {icon} {tool}: {msg}")
input("\nPress Enter to continue...")
else:
try:
idx = int(choice) - 1
if 0 <= idx < len(tool_list):
tool_name = tool_list[idx]
success, msg = installer.install_tool(tool_name)
icon = "[+]" if success else "[!]"
print(f"\n {icon} {msg}")
input("\nPress Enter to continue...")
else:
print("[!] Invalid selection")
except ValueError:
print("[!] Invalid input")
if __name__ == "__main__":
run_installer_menu()
+110
View File
@@ -0,0 +1,110 @@
"""
NeuroSploit v3 - Tool Installation Registry for Kali Containers
Maps tool names to installation commands that work inside kalilinux/kali-rolling.
Tools grouped by method: pre-installed (base image), apt (Kali repos), go install, pip.
"""
from typing import Optional, Dict
class ToolRegistry:
"""Registry of tool installation recipes for Kali sandbox containers."""
# Tools pre-installed in Dockerfile.kali (no install needed)
PRE_INSTALLED = {
# Go tools (pre-compiled in builder stage)
"nuclei", "naabu", "httpx", "subfinder", "katana", "dnsx",
"uncover", "ffuf", "gobuster", "dalfox", "waybackurls",
# APT tools (pre-installed in runtime stage)
"nmap", "nikto", "sqlmap", "masscan", "whatweb",
# System tools
"curl", "wget", "git", "python3", "pip3", "go",
"jq", "dig", "whois", "openssl", "netcat", "bash",
}
# APT packages available in Kali repos (on-demand, not pre-installed)
APT_TOOLS: Dict[str, str] = {
"wpscan": "apt-get update -qq && DEBIAN_FRONTEND=noninteractive apt-get install -y -qq wpscan",
"dirb": "apt-get update -qq && DEBIAN_FRONTEND=noninteractive apt-get install -y -qq dirb",
"hydra": "apt-get update -qq && DEBIAN_FRONTEND=noninteractive apt-get install -y -qq hydra",
"john": "apt-get update -qq && DEBIAN_FRONTEND=noninteractive apt-get install -y -qq john",
"hashcat": "apt-get update -qq && DEBIAN_FRONTEND=noninteractive apt-get install -y -qq hashcat",
"testssl": "apt-get update -qq && DEBIAN_FRONTEND=noninteractive apt-get install -y -qq testssl.sh",
"testssl.sh": "apt-get update -qq && DEBIAN_FRONTEND=noninteractive apt-get install -y -qq testssl.sh",
"sslscan": "apt-get update -qq && DEBIAN_FRONTEND=noninteractive apt-get install -y -qq sslscan",
"enum4linux": "apt-get update -qq && DEBIAN_FRONTEND=noninteractive apt-get install -y -qq enum4linux",
"nbtscan": "apt-get update -qq && DEBIAN_FRONTEND=noninteractive apt-get install -y -qq nbtscan",
"dnsrecon": "apt-get update -qq && DEBIAN_FRONTEND=noninteractive apt-get install -y -qq dnsrecon",
"fierce": "apt-get update -qq && DEBIAN_FRONTEND=noninteractive apt-get install -y -qq fierce",
"amass": "apt-get update -qq && DEBIAN_FRONTEND=noninteractive apt-get install -y -qq amass",
"responder": "apt-get update -qq && DEBIAN_FRONTEND=noninteractive apt-get install -y -qq responder",
"medusa": "apt-get update -qq && DEBIAN_FRONTEND=noninteractive apt-get install -y -qq medusa",
"crackmapexec":"apt-get update -qq && DEBIAN_FRONTEND=noninteractive apt-get install -y -qq crackmapexec",
}
# Go tools installed via `go install` (on-demand, not pre-compiled)
GO_TOOLS: Dict[str, str] = {
"gau": "github.com/lc/gau/v2/cmd/gau@latest",
"gitleaks": "github.com/gitleaks/gitleaks/v8@latest",
"anew": "github.com/tomnomnom/anew@latest",
"httprobe": "github.com/tomnomnom/httprobe@latest",
}
# Python tools via pip
PIP_TOOLS: Dict[str, str] = {
"dirsearch": "pip3 install --no-cache-dir --break-system-packages dirsearch",
"wfuzz": "pip3 install --no-cache-dir --break-system-packages wfuzz",
"arjun": "pip3 install --no-cache-dir --break-system-packages arjun",
"wafw00f": "pip3 install --no-cache-dir --break-system-packages wafw00f",
"sslyze": "pip3 install --no-cache-dir --break-system-packages sslyze",
"commix": "pip3 install --no-cache-dir --break-system-packages commix",
"trufflehog":"pip3 install --no-cache-dir --break-system-packages trufflehog",
"retire": "pip3 install --no-cache-dir --break-system-packages retirejs",
}
def get_install_command(self, tool: str) -> Optional[str]:
"""Get the install command for a tool inside a Kali container.
Returns None if the tool is pre-installed or unknown.
"""
if tool in self.PRE_INSTALLED:
return None # Already available
if tool in self.APT_TOOLS:
return self.APT_TOOLS[tool]
if tool in self.GO_TOOLS:
go_pkg = self.GO_TOOLS[tool]
return (
f"export GOPATH=/root/go && export PATH=$PATH:/root/go/bin && "
f"go install -v {go_pkg} && "
f"cp /root/go/bin/{tool} /usr/local/bin/ 2>/dev/null || true"
)
if tool in self.PIP_TOOLS:
return self.PIP_TOOLS[tool]
return None
def is_known(self, tool: str) -> bool:
"""Check if we have a recipe for this tool."""
return (
tool in self.PRE_INSTALLED
or tool in self.APT_TOOLS
or tool in self.GO_TOOLS
or tool in self.PIP_TOOLS
)
def all_tools(self) -> Dict[str, str]:
"""Return all known tools and their install method."""
result = {}
for t in self.PRE_INSTALLED:
result[t] = "pre-installed"
for t in self.APT_TOOLS:
result[t] = "apt"
for t in self.GO_TOOLS:
result[t] = "go"
for t in self.PIP_TOOLS:
result[t] = "pip"
return result