Add files via upload

This commit is contained in:
Joas A Santos
2026-02-11 10:50:37 -03:00
committed by GitHub
parent 30acd5afc7
commit aac5b8f365
16 changed files with 7655 additions and 149 deletions

View File

@@ -1,35 +1,7 @@
{ {
"llm": { "llm": {
"default_profile": "ollama_whiterabbit", "default_profile": "gemini_pro_default",
"profiles": { "profiles": {
"ollama_whiterabbit": {
"provider": "ollama",
"model": "lazarevtill/Llama-3-WhiteRabbitNeo-8B-v2.0:q4_0",
"api_key": "",
"temperature": 0.8,
"max_tokens": 4096,
"input_token_limit": 8000,
"output_token_limit": 4000,
"cache_enabled": false,
"search_context_level": "high",
"pdf_support_enabled": false,
"guardrails_enabled": false,
"hallucination_mitigation_strategy": null
},
"ollama_llama3_default": {
"provider": "ollama",
"model": "llama3:8b",
"api_key": "",
"temperature": 0.7,
"max_tokens": 4096,
"input_token_limit": 8000,
"output_token_limit": 4000,
"cache_enabled": true,
"search_context_level": "medium",
"pdf_support_enabled": false,
"guardrails_enabled": true,
"hallucination_mitigation_strategy": null
},
"gemini_pro_default": { "gemini_pro_default": {
"provider": "gemini", "provider": "gemini",
"model": "gemini-pro", "model": "gemini-pro",
@@ -43,52 +15,25 @@
"pdf_support_enabled": true, "pdf_support_enabled": true,
"guardrails_enabled": true, "guardrails_enabled": true,
"hallucination_mitigation_strategy": "consistency_check" "hallucination_mitigation_strategy": "consistency_check"
},
"claude_opus_default": {
"provider": "claude",
"model": "claude-sonnet-4-20250514",
"api_key": "${ANTHROPIC_API_KEY}",
"temperature": 0.8,
"max_tokens": 8192,
"input_token_limit": 200000,
"output_token_limit": 8192,
"cache_enabled": false,
"search_context_level": "high",
"pdf_support_enabled": false,
"guardrails_enabled": false,
"hallucination_mitigation_strategy": null
},
"gpt_4o_default": {
"provider": "gpt",
"model": "gpt-4o",
"api_key": "${OPENAI_API_KEY}",
"temperature": 0.7,
"max_tokens": 4096,
"input_token_limit": 128000,
"output_token_limit": 4096,
"cache_enabled": true,
"search_context_level": "high",
"pdf_support_enabled": true,
"guardrails_enabled": true,
"hallucination_mitigation_strategy": "consistency_check"
},
"lmstudio_default": {
"provider": "lmstudio",
"model": "local-model",
"api_key": "",
"temperature": 0.7,
"max_tokens": 4096,
"input_token_limit": 32000,
"output_token_limit": 4096,
"cache_enabled": false,
"search_context_level": "medium",
"pdf_support_enabled": false,
"guardrails_enabled": true,
"hallucination_mitigation_strategy": null
} }
} }
}, },
"agent_roles": { "agent_roles": {
"pentest_generalist": {
"enabled": true,
"tools_allowed": [
"nmap",
"metasploit",
"burpsuite",
"sqlmap",
"hydra"
],
"description": "Performs comprehensive penetration tests across various domains.",
"methodology": ["OWASP-WSTG", "PTES", "OWASP-Top10-2021"],
"default_prompt": "auto_pentest",
"vuln_coverage": 100,
"ai_prompts": true
},
"bug_bounty_hunter": { "bug_bounty_hunter": {
"enabled": true, "enabled": true,
"tools_allowed": [ "tools_allowed": [
@@ -97,67 +42,11 @@
"burpsuite", "burpsuite",
"sqlmap" "sqlmap"
], ],
"description": "Focuses on web application vulnerabilities, leveraging recon and exploitation tools." "description": "Focuses on web application vulnerabilities with 100 vuln types.",
}, "methodology": ["OWASP-WSTG", "OWASP-Top10-2021"],
"blue_team_agent": { "default_prompt": "auto_pentest",
"enabled": true, "vuln_coverage": 100,
"tools_allowed": [], "ai_prompts": true
"description": "Analyzes logs and telemetry for threats, provides defensive strategies."
},
"exploit_expert": {
"enabled": true,
"tools_allowed": [
"metasploit",
"nmap"
],
"description": "Devises exploitation strategies and payloads for identified vulnerabilities."
},
"red_team_agent": {
"enabled": true,
"tools_allowed": [
"nmap",
"metasploit",
"hydra"
],
"description": "Plans and executes simulated attacks to test an organization's defenses."
},
"replay_attack_specialist": {
"enabled": true,
"tools_allowed": [
"burpsuite"
],
"description": "Identifies and leverages replay attack vectors in network traffic or authentication."
},
"pentest_generalist": {
"enabled": true,
"tools_allowed": [
"nmap",
"subfinder",
"nuclei",
"metasploit",
"burpsuite",
"sqlmap",
"hydra"
],
"description": "Performs comprehensive penetration tests across various domains."
},
"owasp_expert": {
"enabled": true,
"tools_allowed": [
"burpsuite",
"sqlmap"
],
"description": "Specializes in assessing web applications against OWASP Top 10 vulnerabilities."
},
"cwe_expert": {
"enabled": true,
"tools_allowed": [],
"description": "Analyzes code and reports for weaknesses based on MITRE CWE Top 25."
},
"malware_analyst": {
"enabled": true,
"tools_allowed": [],
"description": "Examines malware samples to understand functionality and identify IOCs."
} }
}, },
"methodologies": { "methodologies": {
@@ -171,20 +60,51 @@
"nmap": "/usr/bin/nmap", "nmap": "/usr/bin/nmap",
"metasploit": "/usr/bin/msfconsole", "metasploit": "/usr/bin/msfconsole",
"burpsuite": "/usr/bin/burpsuite", "burpsuite": "/usr/bin/burpsuite",
"sqlmap": "/usr/local/bin/sqlmap", "sqlmap": "/usr/bin/sqlmap",
"hydra": "/usr/bin/hydra", "hydra": "/usr/bin/hydra"
"nuclei": "/usr/local/bin/nuclei", },
"nikto": "/usr/bin/nikto", "mcp_servers": {
"gobuster": "/usr/bin/gobuster", "neurosploit_tools": {
"ffuf": "/usr/bin/ffuf", "transport": "stdio",
"subfinder": "/opt/homebrew/bin/subfinder", "command": "python3",
"httpx": "/usr/local/bin/httpx", "args": ["-m", "core.mcp_server"],
"whatweb": "/usr/bin/whatweb", "description": "NeuroSploit pentest tools: screenshots, payload delivery, DNS, port scan, tech detect, subdomain enum, findings, AI prompts, Nuclei scanner, Naabu port scanner, sandbox execution"
"curl": "/usr/bin/curl", }
"wpscan": "/usr/bin/wpscan", },
"dirsearch": "/usr/local/bin/dirsearch", "sandbox": {
"wafw00f": "/usr/local/bin/wafw00f", "enabled": false,
"jq": "/usr/bin/jq" "mode": "per_scan",
"image": "neurosploit-sandbox:latest",
"container_name": "neurosploit-sandbox",
"auto_start": false,
"kali": {
"enabled": true,
"image": "neurosploit-kali:latest",
"max_concurrent": 5,
"container_ttl_minutes": 60,
"auto_cleanup_orphans": true
},
"resources": {
"memory_limit": "2g",
"cpu_limit": 2.0
},
"tools": [
"nuclei", "naabu", "nmap", "httpx", "subfinder", "katana",
"dnsx", "ffuf", "gobuster", "dalfox", "nikto", "sqlmap",
"whatweb", "curl", "dig", "whois", "masscan", "dirsearch",
"wfuzz", "arjun", "wafw00f", "waybackurls"
],
"nuclei": {
"rate_limit": 150,
"timeout": 600,
"severity_filter": "critical,high,medium",
"auto_update_templates": true
},
"naabu": {
"rate": 1000,
"top_ports": 1000,
"timeout": 300
}
}, },
"output": { "output": {
"format": "json", "format": "json",

500
core/browser_validator.py Normal file
View File

@@ -0,0 +1,500 @@
#!/usr/bin/env python3
"""
Browser Validator - Playwright-based security finding validation.
Provides browser-based validation for security findings:
- Navigate to target URLs with payloads
- Detect security triggers (XSS dialogs, error patterns, etc.)
- Capture screenshots at each validation step
- Store evidence in structured per-finding directories
Screenshots are stored at: reports/screenshots/{finding_id}/
"""
import asyncio
import base64
import logging
from datetime import datetime
from pathlib import Path
from typing import Dict, List, Optional
logger = logging.getLogger(__name__)
try:
from playwright.async_api import async_playwright, Page, Browser, BrowserContext
HAS_PLAYWRIGHT = True
except ImportError:
HAS_PLAYWRIGHT = False
logger.debug("Playwright not installed. Browser validation disabled.")
# Known security trigger patterns in page content
SECURITY_TRIGGERS = {
'xss': ['<script>alert(', 'onerror=', 'onload=', 'javascript:'],
'sqli': ['SQL syntax', 'mysql_fetch', 'pg_query', 'ORA-', 'sqlite3.OperationalError',
'SQLSTATE', 'syntax error at or near', 'unclosed quotation mark'],
'lfi': ['root:x:0', '/etc/passwd', '[boot loader]', 'Windows\\system.ini'],
'rce': ['uid=', 'gid=', 'groups=', 'total ', 'drwx'],
'error_disclosure': ['Stack Trace', 'Traceback (most recent call last)',
'Exception in thread', 'Fatal error', 'Parse error'],
}
class BrowserValidator:
"""Playwright-based browser validation for security findings."""
def __init__(self, screenshots_dir: str = "reports/screenshots"):
self.screenshots_dir = Path(screenshots_dir)
self.screenshots_dir.mkdir(parents=True, exist_ok=True)
self.browser: Optional['Browser'] = None
self._playwright = None
async def start(self, headless: bool = True):
"""Launch browser instance."""
if not HAS_PLAYWRIGHT:
raise RuntimeError(
"Playwright not installed. Install with: pip install playwright && python -m playwright install chromium"
)
self._playwright = await async_playwright().start()
self.browser = await self._playwright.chromium.launch(headless=headless)
logger.info(f"Browser started (headless={headless})")
async def stop(self):
"""Close browser and clean up."""
if self.browser:
await self.browser.close()
self.browser = None
if self._playwright:
await self._playwright.stop()
self._playwright = None
logger.info("Browser stopped")
async def validate_finding(self, finding_id: str, url: str,
payload: Optional[str] = None,
method: str = "GET",
interaction_steps: Optional[List[Dict]] = None,
timeout: int = 30000) -> Dict:
"""Validate a security finding in a real browser.
Args:
finding_id: Unique identifier for the finding
url: Target URL (may include payload in query params)
payload: Optional payload description for logging
method: HTTP method (currently GET-based navigation)
interaction_steps: Optional list of browser interaction steps
timeout: Navigation timeout in milliseconds
Returns:
Dict with validation result, screenshots, evidence
"""
if not self.browser:
return {"error": "Browser not started. Call start() first."}
finding_dir = self.screenshots_dir / finding_id
finding_dir.mkdir(parents=True, exist_ok=True)
validation = {
"finding_id": finding_id,
"url": url,
"payload": payload,
"timestamp": datetime.now().isoformat(),
"validated": False,
"screenshots": [],
"console_logs": [],
"dialog_detected": False,
"dialog_messages": [],
"triggers_found": [],
"evidence": "",
"page_title": "",
"status_code": None,
"error": None
}
context = await self.browser.new_context(
ignore_https_errors=True,
user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
"(KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
)
page = await context.new_page()
# Capture console messages
console_msgs = []
page.on("console", lambda msg: console_msgs.append({
"type": msg.type, "text": msg.text
}))
# Capture JavaScript dialogs (XSS alert/prompt/confirm detection)
dialog_messages = []
async def handle_dialog(dialog):
dialog_messages.append({
"type": dialog.type,
"message": dialog.message
})
await dialog.dismiss()
page.on("dialog", handle_dialog)
# Track response status
response_status = [None]
def on_response(response):
if response.url == url or response.url.rstrip('/') == url.rstrip('/'):
response_status[0] = response.status
page.on("response", on_response)
try:
# Navigate to the URL
response = await page.goto(url, wait_until="networkidle", timeout=timeout)
if response:
validation["status_code"] = response.status
validation["page_title"] = await page.title()
# Take initial screenshot
ss_path = finding_dir / "01_initial.png"
await page.screenshot(path=str(ss_path), full_page=True)
validation["screenshots"].append(str(ss_path))
# Execute interaction steps if provided
if interaction_steps:
for i, step in enumerate(interaction_steps):
step_name = step.get('name', f'step_{i+2}')
try:
await self._execute_step(page, step)
await page.wait_for_timeout(500) # Brief pause
ss_path = finding_dir / f"{i+2:02d}_{step_name}.png"
await page.screenshot(path=str(ss_path))
validation["screenshots"].append(str(ss_path))
except Exception as e:
logger.warning(f"Interaction step '{step_name}' failed: {e}")
# Check for dialog detection (XSS)
if dialog_messages:
validation["validated"] = True
validation["dialog_detected"] = True
validation["dialog_messages"] = dialog_messages
validation["evidence"] = f"JavaScript dialog triggered: {dialog_messages[0]['message']}"
ss_path = finding_dir / "xss_dialog_detected.png"
await page.screenshot(path=str(ss_path))
validation["screenshots"].append(str(ss_path))
# Check for security triggers in page content
content = await page.content()
for trigger_type, patterns in SECURITY_TRIGGERS.items():
for pattern in patterns:
if pattern.lower() in content.lower():
validation["triggers_found"].append({
"type": trigger_type,
"pattern": pattern
})
if validation["triggers_found"] and not validation["validated"]:
validation["validated"] = True
first_trigger = validation["triggers_found"][0]
validation["evidence"] = (
f"Security trigger detected: {first_trigger['type']} "
f"(pattern: {first_trigger['pattern']})"
)
ss_path = finding_dir / "trigger_detected.png"
await page.screenshot(path=str(ss_path))
validation["screenshots"].append(str(ss_path))
# Check console for errors that might indicate vulnerabilities
error_msgs = [m for m in console_msgs if m["type"] in ("error", "warning")]
if error_msgs:
validation["console_logs"] = console_msgs
except Exception as e:
validation["error"] = str(e)
logger.error(f"Browser validation error for {finding_id}: {e}")
try:
ss_path = finding_dir / "error.png"
await page.screenshot(path=str(ss_path))
validation["screenshots"].append(str(ss_path))
except Exception:
pass
finally:
await context.close()
return validation
async def verify_stored_xss(
self,
finding_id: str,
form_url: str,
form_data: Dict[str, str],
display_url: str,
submit_selector: str = "button[type=submit], input[type=submit], button:not([type])",
timeout: int = 30000,
) -> Dict:
"""Two-phase stored XSS verification using browser.
Phase 1: Navigate to form page, fill fields with payload, submit.
Phase 2: Navigate to display page, check for dialog (alert/confirm/prompt).
Args:
finding_id: Unique ID for this verification attempt
form_url: URL containing the form to submit
form_data: Dict mapping CSS selectors to values (payload in relevant fields)
display_url: URL where stored content is displayed
submit_selector: CSS selector(s) for submit button (comma-separated)
timeout: Navigation timeout in ms
Returns:
Dict with verification results, dialog detection, screenshots
"""
if not self.browser:
return {"error": "Browser not started. Call start() first."}
finding_dir = self.screenshots_dir / finding_id
finding_dir.mkdir(parents=True, exist_ok=True)
result = {
"finding_id": finding_id,
"form_url": form_url,
"display_url": display_url,
"timestamp": datetime.now().isoformat(),
"phase1_success": False,
"phase2_success": False,
"xss_confirmed": False,
"dialog_detected": False,
"dialog_messages": [],
"screenshots": [],
"evidence": "",
"error": None,
}
context = await self.browser.new_context(
ignore_https_errors=True,
user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
"(KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
)
page = await context.new_page()
dialog_messages = []
async def handle_dialog(dialog):
dialog_messages.append({
"type": dialog.type,
"message": dialog.message,
"phase": "phase2" if result["phase1_success"] else "phase1"
})
await dialog.dismiss()
page.on("dialog", handle_dialog)
try:
# === PHASE 1: Navigate to form and submit payload ===
await page.goto(form_url, wait_until="networkidle", timeout=timeout)
ss_path = finding_dir / "01_form_page.png"
await page.screenshot(path=str(ss_path), full_page=True)
result["screenshots"].append(str(ss_path))
# Fill form fields
for selector, value in form_data.items():
try:
await page.fill(selector, value)
except Exception:
try:
await page.type(selector, value)
except Exception as fill_err:
logger.warning(f"Could not fill {selector}: {fill_err}")
ss_path = finding_dir / "02_form_filled.png"
await page.screenshot(path=str(ss_path))
result["screenshots"].append(str(ss_path))
# Submit
submitted = False
for sel in submit_selector.split(","):
sel = sel.strip()
try:
btn = await page.query_selector(sel)
if btn:
await btn.click()
submitted = True
break
except Exception:
continue
if not submitted and form_data:
# Fallback: press Enter on last filled field
last_sel = list(form_data.keys())[-1]
try:
await page.press(last_sel, "Enter")
except Exception:
pass
try:
await page.wait_for_load_state("networkidle", timeout=10000)
except Exception:
await page.wait_for_timeout(3000)
ss_path = finding_dir / "03_after_submit.png"
await page.screenshot(path=str(ss_path), full_page=True)
result["screenshots"].append(str(ss_path))
result["phase1_success"] = True
# === PHASE 2: Navigate to display page ===
await page.goto(display_url, wait_until="networkidle", timeout=timeout)
await page.wait_for_timeout(1000)
ss_path = finding_dir / "04_display_page.png"
await page.screenshot(path=str(ss_path), full_page=True)
result["screenshots"].append(str(ss_path))
# Check for dialogs triggered on display page
if dialog_messages:
phase2_dialogs = [d for d in dialog_messages if d.get("phase") == "phase2"]
if phase2_dialogs:
result["xss_confirmed"] = True
result["dialog_detected"] = True
result["dialog_messages"] = dialog_messages
result["evidence"] = (
f"Stored XSS CONFIRMED: JavaScript dialog triggered on display page. "
f"Dialog: {phase2_dialogs[0]['type']}('{phase2_dialogs[0]['message']}')"
)
result["phase2_success"] = True
ss_path = finding_dir / "05_xss_confirmed.png"
await page.screenshot(path=str(ss_path))
result["screenshots"].append(str(ss_path))
else:
result["evidence"] = (
"Dialog triggered during form submission (phase1), not on display page."
)
# Content-based fallback if no dialog
if not result["xss_confirmed"]:
content = await page.content()
for _, payload_val in form_data.items():
if payload_val in content:
payload_lower = payload_val.lower()
for tag in ["<script", "onerror=", "onload=", "<svg", "<img",
"onfocus=", "onclick=", "ontoggle"]:
if tag in payload_lower:
result["phase2_success"] = True
result["evidence"] = (
f"Stored payload with '{tag}' found unescaped on display page. "
f"Dialog may be blocked by CSP."
)
break
break
except Exception as e:
result["error"] = str(e)
logger.error(f"Stored XSS verification error: {e}")
try:
ss_path = finding_dir / "error.png"
await page.screenshot(path=str(ss_path))
result["screenshots"].append(str(ss_path))
except Exception:
pass
finally:
await context.close()
return result
async def _execute_step(self, page: 'Page', step: Dict):
"""Execute a single browser interaction step."""
action = step.get("action", "")
if action == "click":
await page.click(step["selector"])
elif action == "fill":
await page.fill(step["selector"], step["value"])
elif action == "type":
await page.type(step["selector"], step["value"])
elif action == "submit":
selector = step.get("selector", "button[type=submit]")
await page.click(selector)
elif action == "wait":
await page.wait_for_timeout(step.get("ms", 2000))
elif action == "navigate":
await page.goto(step["url"], wait_until="networkidle")
elif action == "select":
await page.select_option(step["selector"], step["value"])
elif action == "check":
await page.check(step["selector"])
elif action == "press":
await page.press(step.get("selector", "body"), step["key"])
else:
logger.warning(f"Unknown interaction action: {action}")
async def batch_validate(self, findings: List[Dict],
headless: bool = True) -> List[Dict]:
"""Validate multiple findings in sequence.
Args:
findings: List of dicts with 'finding_id', 'url', and optional 'payload'
headless: Run browser in headless mode
Returns:
List of validation results
"""
results = []
await self.start(headless=headless)
try:
for finding in findings:
result = await self.validate_finding(
finding_id=finding['finding_id'],
url=finding['url'],
payload=finding.get('payload'),
interaction_steps=finding.get('interaction_steps')
)
results.append(result)
finally:
await self.stop()
return results
def validate_finding_sync(finding_id: str, url: str,
payload: str = None,
screenshots_dir: str = "reports/screenshots",
headless: bool = True) -> Dict:
"""Synchronous wrapper for browser validation.
For use in synchronous code paths (e.g., BaseAgent).
"""
if not HAS_PLAYWRIGHT:
return {
"finding_id": finding_id,
"skipped": True,
"reason": "Playwright not installed"
}
async def _run():
validator = BrowserValidator(screenshots_dir=screenshots_dir)
await validator.start(headless=headless)
try:
return await validator.validate_finding(finding_id, url, payload)
finally:
await validator.stop()
try:
return asyncio.run(_run())
except RuntimeError:
# Already in an async context - use nest_asyncio or skip
logger.warning("Cannot run sync validation inside async context")
return {
"finding_id": finding_id,
"skipped": True,
"reason": "Async context conflict"
}
def embed_screenshot(filepath: str) -> str:
"""Convert a screenshot file to a base64 data URI for HTML embedding."""
path = Path(filepath)
if not path.exists():
return ""
with open(path, 'rb') as f:
data = base64.b64encode(f.read()).decode('ascii')
return f"data:image/png;base64,{data}"

206
core/container_pool.py Normal file
View File

@@ -0,0 +1,206 @@
"""
NeuroSploit v3 - Container Pool
Global coordinator for per-scan Kali Linux containers.
Tracks all running sandbox containers, enforces max concurrent limits,
handles lifecycle management and orphan cleanup.
"""
import asyncio
import json
import logging
import threading
from datetime import datetime, timedelta
from typing import Dict, Optional
logger = logging.getLogger(__name__)
try:
import docker
from docker.errors import NotFound
HAS_DOCKER = True
except ImportError:
HAS_DOCKER = False
from core.kali_sandbox import KaliSandbox
class ContainerPool:
"""Global pool managing per-scan KaliSandbox instances.
Thread-safe. One pool per process. Enforces resource limits.
"""
def __init__(
self,
image: str = "neurosploit-kali:latest",
max_concurrent: int = 5,
memory_limit: str = "2g",
cpu_limit: float = 2.0,
container_ttl_minutes: int = 60,
):
self.image = image
self.max_concurrent = max_concurrent
self.memory_limit = memory_limit
self.cpu_limit = cpu_limit
self.container_ttl = timedelta(minutes=container_ttl_minutes)
self._sandboxes: Dict[str, KaliSandbox] = {}
self._lock = asyncio.Lock()
@classmethod
def from_config(cls) -> "ContainerPool":
"""Create pool from config/config.json sandbox section."""
try:
with open("config/config.json") as f:
cfg = json.load(f)
sandbox_cfg = cfg.get("sandbox", {})
kali_cfg = sandbox_cfg.get("kali", {})
resources = sandbox_cfg.get("resources", {})
return cls(
image=kali_cfg.get("image", "neurosploit-kali:latest"),
max_concurrent=kali_cfg.get("max_concurrent", 5),
memory_limit=resources.get("memory_limit", "2g"),
cpu_limit=resources.get("cpu_limit", 2.0),
container_ttl_minutes=kali_cfg.get("container_ttl_minutes", 60),
)
except Exception as e:
logger.warning(f"Could not load pool config, using defaults: {e}")
return cls()
async def get_or_create(self, scan_id: str) -> KaliSandbox:
"""Get existing sandbox for scan_id, or create a new one.
Raises RuntimeError if max_concurrent limit reached.
"""
async with self._lock:
# Return existing
if scan_id in self._sandboxes:
sb = self._sandboxes[scan_id]
if sb.is_available:
return sb
else:
del self._sandboxes[scan_id]
# Check limit
active = sum(1 for sb in self._sandboxes.values() if sb.is_available)
if active >= self.max_concurrent:
raise RuntimeError(
f"Max concurrent containers ({self.max_concurrent}) reached. "
f"Active scans: {list(self._sandboxes.keys())}"
)
# Create new
sb = KaliSandbox(
scan_id=scan_id,
image=self.image,
memory_limit=self.memory_limit,
cpu_limit=self.cpu_limit,
)
ok, msg = await sb.initialize()
if not ok:
raise RuntimeError(f"Failed to create Kali sandbox: {msg}")
self._sandboxes[scan_id] = sb
logger.info(
f"Pool: created container for scan {scan_id} "
f"({active + 1}/{self.max_concurrent} active)"
)
return sb
async def destroy(self, scan_id: str):
"""Stop and remove the container for a specific scan."""
async with self._lock:
sb = self._sandboxes.pop(scan_id, None)
if sb:
await sb.stop()
logger.info(f"Pool: destroyed container for scan {scan_id}")
async def cleanup_all(self):
"""Destroy all managed containers (shutdown hook)."""
async with self._lock:
scan_ids = list(self._sandboxes.keys())
for sid in scan_ids:
await self.destroy(sid)
logger.info("Pool: all containers destroyed")
async def cleanup_orphans(self):
"""Find and remove neurosploit-* containers not tracked by this pool."""
if not HAS_DOCKER:
return
try:
client = docker.from_env()
containers = client.containers.list(
all=True,
filters={"label": "neurosploit.type=kali-sandbox"},
)
async with self._lock:
tracked = set(self._sandboxes.keys())
removed = 0
for c in containers:
scan_id = c.labels.get("neurosploit.scan_id", "")
if scan_id not in tracked:
try:
c.stop(timeout=5)
except Exception:
pass
try:
c.remove(force=True)
removed += 1
logger.info(f"Pool: removed orphan container {c.name}")
except Exception:
pass
if removed:
logger.info(f"Pool: cleaned up {removed} orphan containers")
except Exception as e:
logger.warning(f"Pool: orphan cleanup failed: {e}")
async def cleanup_expired(self):
"""Remove containers that have exceeded their TTL."""
now = datetime.utcnow()
async with self._lock:
expired = [
sid for sid, sb in self._sandboxes.items()
if sb._created_at and (now - sb._created_at) > self.container_ttl
]
for sid in expired:
logger.warning(f"Pool: container for scan {sid} exceeded TTL, destroying")
await self.destroy(sid)
def list_sandboxes(self) -> Dict[str, Dict]:
"""List all tracked sandboxes with status."""
result = {}
for sid, sb in self._sandboxes.items():
result[sid] = {
"scan_id": sid,
"container_name": sb.container_name,
"available": sb.is_available,
"installed_tools": sorted(sb._installed_tools),
"created_at": sb._created_at.isoformat() if sb._created_at else None,
}
return result
@property
def active_count(self) -> int:
return sum(1 for sb in self._sandboxes.values() if sb.is_available)
# ---------------------------------------------------------------------------
# Global singleton pool
# ---------------------------------------------------------------------------
_pool: Optional[ContainerPool] = None
_pool_lock = threading.Lock()
def get_pool() -> ContainerPool:
"""Get or create the global container pool."""
global _pool
if _pool is None:
with _pool_lock:
if _pool is None:
_pool = ContainerPool.from_config()
return _pool

392
core/kali_sandbox.py Normal file
View File

@@ -0,0 +1,392 @@
"""
NeuroSploit v3 - Kali Linux Per-Scan Sandbox
Each scan gets its own Docker container based on kalilinux/kali-rolling.
Tools installed on-demand the first time they are requested.
Container destroyed when scan completes.
"""
import asyncio
import json
import logging
import shlex
import time
from datetime import datetime
from typing import Dict, Any, Optional, List, Tuple, Set
logger = logging.getLogger(__name__)
try:
import docker
from docker.errors import DockerException, NotFound, APIError
HAS_DOCKER = True
except ImportError:
HAS_DOCKER = False
from core.sandbox_manager import (
BaseSandbox, SandboxResult,
parse_nuclei_jsonl, parse_naabu_output,
)
from core.tool_registry import ToolRegistry
class KaliSandbox(BaseSandbox):
"""Per-scan Docker container based on Kali Linux.
Lifecycle: create -> install tools on demand -> execute -> destroy.
Each instance owns exactly one container named 'neurosploit-{scan_id}'.
"""
DEFAULT_TIMEOUT = 300
MAX_OUTPUT = 2 * 1024 * 1024 # 2MB
def __init__(
self,
scan_id: str,
image: str = "neurosploit-kali:latest",
memory_limit: str = "2g",
cpu_limit: float = 2.0,
network_mode: str = "bridge",
):
self.scan_id = scan_id
self.container_name = f"neurosploit-{scan_id}"
self.image = image
self.memory_limit = memory_limit
self.cpu_limit = cpu_limit
self.network_mode = network_mode
self._client = None
self._container = None
self._available = False
self._installed_tools: Set[str] = set()
self._tool_registry = ToolRegistry()
self._created_at: Optional[datetime] = None
async def initialize(self) -> Tuple[bool, str]:
"""Create and start a new Kali container for this scan."""
if not HAS_DOCKER:
return False, "Docker SDK not installed"
try:
self._client = docker.from_env()
self._client.ping()
except Exception as e:
return False, f"Docker not available: {e}"
# Check if container already exists (resume after crash)
try:
existing = self._client.containers.get(self.container_name)
if existing.status == "running":
self._container = existing
self._available = True
self._created_at = datetime.utcnow()
return True, f"Resumed existing container {self.container_name}"
else:
existing.remove(force=True)
except NotFound:
pass
# Check image exists
try:
self._client.images.get(self.image)
except NotFound:
return False, (
f"Kali sandbox image '{self.image}' not found. "
"Build with: docker build -f docker/Dockerfile.kali -t neurosploit-kali:latest docker/"
)
# Create container
try:
cpu_quota = int(self.cpu_limit * 100000)
self._container = self._client.containers.run(
self.image,
command="sleep infinity",
name=self.container_name,
detach=True,
network_mode=self.network_mode,
mem_limit=self.memory_limit,
cpu_period=100000,
cpu_quota=cpu_quota,
cap_add=["NET_RAW", "NET_ADMIN"],
security_opt=["no-new-privileges:true"],
labels={
"neurosploit.scan_id": self.scan_id,
"neurosploit.type": "kali-sandbox",
},
)
self._available = True
self._created_at = datetime.utcnow()
logger.info(f"Created Kali container {self.container_name} for scan {self.scan_id}")
return True, f"Container {self.container_name} started"
except Exception as e:
return False, f"Failed to create container: {e}"
@property
def is_available(self) -> bool:
return self._available and self._container is not None
async def stop(self):
"""Stop and remove this scan's container."""
if self._container:
try:
self._container.stop(timeout=10)
except Exception:
pass
try:
self._container.remove(force=True)
logger.info(f"Destroyed container {self.container_name}")
except Exception as e:
logger.warning(f"Error removing {self.container_name}: {e}")
self._container = None
self._available = False
async def health_check(self) -> Dict:
"""Run health check on this container."""
if not self.is_available:
return {"status": "unavailable", "scan_id": self.scan_id, "tools": []}
result = await self._exec(
"nuclei -version 2>&1; naabu -version 2>&1; nmap --version 2>&1 | head -1",
timeout=15,
)
tools = []
output = (result.stdout or "").lower()
for tool in ["nuclei", "naabu", "nmap"]:
if tool in output:
tools.append(tool)
uptime = 0.0
if self._created_at:
uptime = (datetime.utcnow() - self._created_at).total_seconds()
return {
"status": "healthy" if tools else "degraded",
"scan_id": self.scan_id,
"container": self.container_name,
"tools": tools,
"installed_tools": sorted(self._installed_tools),
"uptime_seconds": uptime,
}
# ------------------------------------------------------------------
# Low-level execution
# ------------------------------------------------------------------
async def _exec(self, command: str, timeout: int = DEFAULT_TIMEOUT) -> SandboxResult:
"""Execute command inside this container via docker exec."""
if not self.is_available:
return SandboxResult(
tool="kali", command=command, exit_code=-1,
stdout="", stderr="", duration_seconds=0,
error="Container not available",
)
started = time.time()
try:
exec_result = await asyncio.get_event_loop().run_in_executor(
None,
lambda: self._container.exec_run(
cmd=["bash", "-c", command],
stdout=True, stderr=True, demux=True,
),
)
duration = time.time() - started
stdout_raw, stderr_raw = exec_result.output
stdout = (stdout_raw or b"").decode("utf-8", errors="replace")
stderr = (stderr_raw or b"").decode("utf-8", errors="replace")
if len(stdout) > self.MAX_OUTPUT:
stdout = stdout[: self.MAX_OUTPUT] + "\n... [truncated]"
if len(stderr) > self.MAX_OUTPUT:
stderr = stderr[: self.MAX_OUTPUT] + "\n... [truncated]"
return SandboxResult(
tool="kali", command=command,
exit_code=exec_result.exit_code,
stdout=stdout, stderr=stderr,
duration_seconds=round(duration, 2),
)
except Exception as e:
duration = time.time() - started
return SandboxResult(
tool="kali", command=command, exit_code=-1,
stdout="", stderr="", duration_seconds=round(duration, 2),
error=str(e),
)
# ------------------------------------------------------------------
# On-demand tool installation
# ------------------------------------------------------------------
async def _ensure_tool(self, tool: str) -> bool:
"""Ensure a tool is installed in this container. Returns True if available."""
if tool in self._installed_tools:
return True
# Check if already present in the base image
check = await self._exec(f"which {shlex.quote(tool)} 2>/dev/null", timeout=10)
if check.exit_code == 0 and check.stdout.strip():
self._installed_tools.add(tool)
return True
# Get install recipe from registry
recipe = self._tool_registry.get_install_command(tool)
if not recipe:
logger.warning(f"No install recipe for '{tool}' in Kali container")
return False
logger.info(f"[{self.container_name}] Installing {tool}...")
result = await self._exec(recipe, timeout=300)
if result.exit_code == 0:
self._installed_tools.add(tool)
logger.info(f"[{self.container_name}] Installed {tool} successfully")
return True
else:
logger.warning(
f"[{self.container_name}] Failed to install {tool}: "
f"{(result.stderr or result.stdout or '')[:300]}"
)
return False
# ------------------------------------------------------------------
# High-level tool APIs (same signatures as SandboxManager)
# ------------------------------------------------------------------
async def run_nuclei(
self, target, templates=None, severity=None,
tags=None, rate_limit=150, timeout=600,
) -> SandboxResult:
await self._ensure_tool("nuclei")
cmd_parts = [
"nuclei", "-u", shlex.quote(target),
"-jsonl", "-rate-limit", str(rate_limit),
"-silent", "-no-color",
]
if templates:
cmd_parts.extend(["-t", shlex.quote(templates)])
if severity:
cmd_parts.extend(["-severity", shlex.quote(severity)])
if tags:
cmd_parts.extend(["-tags", shlex.quote(tags)])
result = await self._exec(" ".join(cmd_parts) + " 2>/dev/null", timeout=timeout)
result.tool = "nuclei"
if result.stdout:
result.findings = parse_nuclei_jsonl(result.stdout)
return result
async def run_naabu(
self, target, ports=None, top_ports=None,
scan_type="s", rate=1000, timeout=300,
) -> SandboxResult:
await self._ensure_tool("naabu")
cmd_parts = [
"naabu", "-host", shlex.quote(target),
"-json", "-rate", str(rate), "-silent", "-no-color",
]
if ports:
cmd_parts.extend(["-p", shlex.quote(str(ports))])
elif top_ports:
cmd_parts.extend(["-top-ports", str(top_ports)])
else:
cmd_parts.extend(["-top-ports", "1000"])
if scan_type:
cmd_parts.extend(["-scan-type", scan_type])
result = await self._exec(" ".join(cmd_parts) + " 2>/dev/null", timeout=timeout)
result.tool = "naabu"
if result.stdout:
result.findings = parse_naabu_output(result.stdout)
return result
async def run_httpx(self, targets, timeout=120) -> SandboxResult:
await self._ensure_tool("httpx")
if isinstance(targets, str):
targets = [targets]
target_str = "\\n".join(shlex.quote(t) for t in targets)
command = (
f'echo -e "{target_str}" | httpx -silent -json '
f'-title -tech-detect -status-code -content-length '
f'-follow-redirects -no-color 2>/dev/null'
)
result = await self._exec(command, timeout=timeout)
result.tool = "httpx"
if result.stdout:
findings = []
for line in result.stdout.strip().split("\\n"):
try:
data = json.loads(line)
findings.append({
"url": data.get("url", ""),
"status_code": data.get("status_code", 0),
"title": data.get("title", ""),
"technologies": data.get("tech", []),
"content_length": data.get("content_length", 0),
"webserver": data.get("webserver", ""),
})
except (json.JSONDecodeError, ValueError):
continue
result.findings = findings
return result
async def run_subfinder(self, domain, timeout=120) -> SandboxResult:
await self._ensure_tool("subfinder")
command = f"subfinder -d {shlex.quote(domain)} -silent -no-color 2>/dev/null"
result = await self._exec(command, timeout=timeout)
result.tool = "subfinder"
if result.stdout:
subs = [s.strip() for s in result.stdout.strip().split("\\n") if s.strip()]
result.findings = [{"subdomain": s} for s in subs]
return result
async def run_nmap(self, target, ports=None, scripts=True, timeout=300) -> SandboxResult:
await self._ensure_tool("nmap")
cmd_parts = ["nmap", "-sV"]
if scripts:
cmd_parts.append("-sC")
if ports:
cmd_parts.extend(["-p", shlex.quote(str(ports))])
cmd_parts.extend(["-oN", "/dev/stdout", shlex.quote(target)])
result = await self._exec(" ".join(cmd_parts) + " 2>/dev/null", timeout=timeout)
result.tool = "nmap"
return result
async def run_tool(self, tool, args, timeout=300) -> SandboxResult:
"""Run any tool (validates whitelist, installs on demand)."""
# Load whitelist from config
allowed_tools = set()
try:
with open("config/config.json") as f:
cfg = json.load(f)
allowed_tools = set(cfg.get("sandbox", {}).get("tools", []))
except Exception:
pass
if not allowed_tools:
allowed_tools = {
"nuclei", "naabu", "nmap", "httpx", "subfinder", "katana",
"dnsx", "ffuf", "gobuster", "dalfox", "nikto", "sqlmap",
"whatweb", "curl", "dig", "whois", "masscan", "dirsearch",
"wfuzz", "arjun", "wafw00f", "waybackurls",
}
if tool not in allowed_tools:
return SandboxResult(
tool=tool, command=f"{tool} {args}", exit_code=-1,
stdout="", stderr="", duration_seconds=0,
error=f"Tool '{tool}' not in allowed list",
)
if not await self._ensure_tool(tool):
return SandboxResult(
tool=tool, command=f"{tool} {args}", exit_code=-1,
stdout="", stderr="", duration_seconds=0,
error=f"Could not install '{tool}' in Kali container",
)
result = await self._exec(f"{shlex.quote(tool)} {args} 2>&1", timeout=timeout)
result.tool = tool
return result
async def execute_raw(self, command, timeout=300) -> SandboxResult:
result = await self._exec(command, timeout=timeout)
result.tool = "raw"
return result

199
core/knowledge_augmentor.py Normal file
View File

@@ -0,0 +1,199 @@
#!/usr/bin/env python3
"""
Knowledge Augmentor - Adversarial pattern recognition from bug bounty data.
Loads the bug bounty finetuning dataset and provides retrieval-based
context enrichment for agent prompts. This is for PATTERN RECOGNITION
and adversarial intuition -- NOT for replaying exploits.
The augmentor:
- Builds a keyword index by vulnerability type
- Retrieves relevant patterns matching current testing context
- Injects formatted reference material into agent prompts
- Explicitly instructs the model to adapt, not copy
"""
import json
import logging
from typing import Dict, List, Optional
from pathlib import Path
logger = logging.getLogger(__name__)
class KnowledgeAugmentor:
"""Retrieval-based knowledge augmentation from bug bounty dataset."""
# Vulnerability type keyword mappings
VULN_KEYWORDS = {
'xss': ['xss', 'cross-site scripting', 'reflected xss', 'stored xss', 'dom xss',
'script injection', 'html injection'],
'sqli': ['sql injection', 'sqli', 'union select', 'blind sql', 'error-based sql',
'time-based sql', 'second-order sql'],
'ssrf': ['ssrf', 'server-side request forgery', 'internal service'],
'idor': ['idor', 'insecure direct object', 'broken object level',
'bola', 'horizontal privilege'],
'rce': ['rce', 'remote code execution', 'command injection', 'os command',
'code execution', 'shell injection'],
'lfi': ['lfi', 'local file inclusion', 'path traversal', 'directory traversal',
'file read', 'file disclosure'],
'auth_bypass': ['authentication bypass', 'broken authentication', 'auth bypass',
'session fixation', 'jwt', 'token manipulation'],
'csrf': ['csrf', 'cross-site request forgery', 'state-changing'],
'open_redirect': ['open redirect', 'url redirect', 'redirect vulnerability'],
'xxe': ['xxe', 'xml external entity', 'xml injection'],
'ssti': ['ssti', 'server-side template injection', 'template injection'],
'race_condition': ['race condition', 'toctou', 'concurrency'],
'graphql': ['graphql', 'introspection', 'batching attack'],
'api': ['api', 'rest api', 'broken api', 'api key', 'rate limiting'],
'deserialization': ['deserialization', 'insecure deserialization', 'pickle',
'object injection'],
'upload': ['file upload', 'unrestricted upload', 'web shell', 'upload bypass'],
'cors': ['cors', 'cross-origin', 'origin validation'],
'subdomain_takeover': ['subdomain takeover', 'dangling dns', 'cname'],
'information_disclosure': ['information disclosure', 'sensitive data', 'data exposure',
'directory listing', 'source code disclosure'],
}
def __init__(self, dataset_path: str = "models/bug-bounty/bugbounty_finetuning_dataset.json",
max_patterns: int = 3):
self.dataset_path = Path(dataset_path)
self.max_patterns = max_patterns
self.entries: List[Dict] = []
self.index: Dict[str, List[int]] = {} # vuln_type -> list of entry indices
self._loaded = False
def _ensure_loaded(self):
"""Lazy load and index the dataset on first use."""
if self._loaded:
return
if not self.dataset_path.exists():
logger.warning(f"Bug bounty dataset not found: {self.dataset_path}")
self._loaded = True
return
try:
with open(self.dataset_path, 'r', encoding='utf-8') as f:
self.entries = json.load(f)
logger.info(f"Loaded {len(self.entries)} entries from bug bounty dataset")
self._build_index()
except Exception as e:
logger.error(f"Failed to load bug bounty dataset: {e}")
self._loaded = True
def _build_index(self):
"""Build keyword index over the dataset entries."""
for i, entry in enumerate(self.entries):
text = (
entry.get('instruction', '') + ' ' +
entry.get('input', '') + ' ' +
entry.get('output', '')
).lower()
for vuln_type, keywords in self.VULN_KEYWORDS.items():
for kw in keywords:
if kw in text:
self.index.setdefault(vuln_type, []).append(i)
break # One match per vuln_type per entry
indexed_types = {k: len(v) for k, v in self.index.items()}
logger.info(f"Knowledge index built: {indexed_types}")
def get_relevant_patterns(self, vulnerability_type: str,
technologies: Optional[List[str]] = None,
max_entries: Optional[int] = None) -> str:
"""Retrieve relevant bug bounty patterns for context enrichment.
Args:
vulnerability_type: Type of vulnerability being tested (e.g., 'xss', 'sqli')
technologies: Optional list of detected technologies for relevance boosting
max_entries: Override default max patterns count
Returns:
Formatted string for injection into LLM prompts as cognitive augmentation.
Returns empty string if no relevant patterns found.
"""
self._ensure_loaded()
limit = max_entries or self.max_patterns
vuln_key = vulnerability_type.lower().replace(' ', '_').replace('-', '_')
# Try exact match first, then partial
candidates = self.index.get(vuln_key, [])
if not candidates:
# Try partial matching
for key, indices in self.index.items():
if vuln_key in key or key in vuln_key:
candidates = indices
break
if not candidates:
return ""
# Deduplicate
candidates = list(dict.fromkeys(candidates))
# Score by technology relevance if technologies provided
if technologies:
scored = []
for idx in candidates:
entry = self.entries[idx]
text = (entry.get('output', '') + ' ' + entry.get('instruction', '')).lower()
tech_score = sum(1 for t in technologies if t.lower() in text)
scored.append((tech_score, idx))
scored.sort(key=lambda x: x[0], reverse=True)
candidates = [idx for _, idx in scored]
selected = candidates[:limit]
# Build augmentation context
augmentation = (
"\n\n=== ADVERSARIAL PATTERN CONTEXT (Bug Bounty Knowledge) ===\n"
"These are REFERENCE PATTERNS for understanding attack vectors and methodology.\n"
"ADAPT the approach to the current target. Do NOT replay exact exploits.\n"
"Use these as cognitive anchors for creative hypothesis generation.\n\n"
)
for i, idx in enumerate(selected, 1):
entry = self.entries[idx]
instruction = entry.get('instruction', '')[:300]
output = entry.get('output', '')
# Extract methodology-relevant sections, truncate for context budget
methodology = self._extract_methodology(output, max_chars=1500)
augmentation += f"--- Pattern {i} ---\n"
augmentation += f"Context: {instruction}\n"
augmentation += f"Methodology:\n{methodology}\n\n"
augmentation += "=== END ADVERSARIAL PATTERN CONTEXT ===\n"
return augmentation
def _extract_methodology(self, text: str, max_chars: int = 1500) -> str:
"""Extract the most methodology-relevant portion of a writeup."""
# Look for methodology/steps/approach sections
markers = ['### steps', '### methodology', '### approach', '### exploitation',
'## steps', '## methodology', '## approach', '## exploitation',
'steps to reproduce', 'reproduction steps', 'proof of concept']
text_lower = text.lower()
for marker in markers:
idx = text_lower.find(marker)
if idx != -1:
return text[idx:idx + max_chars]
# Fall back to first max_chars of the output
return text[:max_chars]
def get_available_types(self) -> List[str]:
"""Return list of vulnerability types that have indexed entries."""
self._ensure_loaded()
return sorted(self.index.keys())
def get_entry_count(self, vulnerability_type: str) -> int:
"""Return count of indexed entries for a vulnerability type."""
self._ensure_loaded()
vuln_key = vulnerability_type.lower().replace(' ', '_').replace('-', '_')
return len(self.index.get(vuln_key, []))

View File

@@ -48,6 +48,20 @@ class LLMManager:
self.pdf_support_enabled = self.active_profile.get('pdf_support_enabled', False) self.pdf_support_enabled = self.active_profile.get('pdf_support_enabled', False)
self.guardrails_enabled = self.active_profile.get('guardrails_enabled', False) self.guardrails_enabled = self.active_profile.get('guardrails_enabled', False)
self.hallucination_mitigation_strategy = self.active_profile.get('hallucination_mitigation_strategy', None) self.hallucination_mitigation_strategy = self.active_profile.get('hallucination_mitigation_strategy', None)
# MAX_OUTPUT_TOKENS override from environment (up to 64000 for Claude)
env_max_tokens = os.getenv('MAX_OUTPUT_TOKENS', '').strip()
if env_max_tokens:
try:
override = int(env_max_tokens)
self.max_tokens = override
self.output_token_limit = override
logger.info(f"MAX_OUTPUT_TOKENS override applied: {override}")
except ValueError:
logger.warning(f"Invalid MAX_OUTPUT_TOKENS value: {env_max_tokens}")
# Model router (lazy init, set externally or via config)
self._model_router = None
# New prompt loading # New prompt loading
self.json_prompts_file_path = Path("prompts/library.json") self.json_prompts_file_path = Path("prompts/library.json")
@@ -147,6 +161,8 @@ class LLMManager:
raw_response = self._generate_gemini_cli(prompt, system_prompt) raw_response = self._generate_gemini_cli(prompt, system_prompt)
elif self.provider == 'lmstudio': elif self.provider == 'lmstudio':
raw_response = self._generate_lmstudio(prompt, system_prompt) raw_response = self._generate_lmstudio(prompt, system_prompt)
elif self.provider == 'openrouter':
raw_response = self._generate_openrouter(prompt, system_prompt)
else: else:
raise ValueError(f"Unsupported provider: {self.provider}") raise ValueError(f"Unsupported provider: {self.provider}")
except Exception as e: except Exception as e:
@@ -162,6 +178,21 @@ class LLMManager:
return raw_response return raw_response
def routed_generate(self, prompt: str, system_prompt: Optional[str] = None, task_type: str = "default") -> str:
"""Generate with optional model routing based on task type.
If model routing is enabled and a route exists for the task_type,
a dedicated LLMManager for that profile handles the request.
Otherwise falls back to the default generate().
Task types: reasoning, analysis, generation, validation, default
"""
if self._model_router:
result = self._model_router.generate(prompt, system_prompt, task_type)
if result is not None:
return result
return self.generate(prompt, system_prompt)
def _apply_guardrails(self, response: str) -> str: def _apply_guardrails(self, response: str) -> str:
"""Applies basic guardrails to the LLM response.""" """Applies basic guardrails to the LLM response."""
if not self.guardrails_enabled: if not self.guardrails_enabled:
@@ -614,6 +645,77 @@ Identify any potential hallucinations, inconsistencies, or areas where the respo
logger.error(f"LM Studio error: {e}") logger.error(f"LM Studio error: {e}")
return f"Error: {str(e)}" return f"Error: {str(e)}"
def _generate_openrouter(self, prompt: str, system_prompt: Optional[str] = None) -> str:
"""Generate using OpenRouter API (OpenAI-compatible).
OpenRouter supports hundreds of models through a unified API.
Models are specified as provider/model (e.g., 'anthropic/claude-sonnet-4-20250514').
API key comes from OPENROUTER_API_KEY env var or config profile.
"""
if not self.api_key:
raise ValueError("OPENROUTER_API_KEY not set. Please set the environment variable or configure in config.json")
url = "https://openrouter.ai/api/v1/chat/completions"
headers = {
"Authorization": f"Bearer {self.api_key}",
"Content-Type": "application/json",
"HTTP-Referer": "https://github.com/neurosploit",
"X-Title": "NeuroSploit"
}
messages = []
if system_prompt:
messages.append({"role": "system", "content": system_prompt})
messages.append({"role": "user", "content": prompt})
data = {
"model": self.model,
"messages": messages,
"temperature": self.temperature,
"max_tokens": self.max_tokens
}
last_error = None
for attempt in range(MAX_RETRIES):
try:
logger.debug(f"OpenRouter API request attempt {attempt + 1}/{MAX_RETRIES} (model: {self.model})")
response = requests.post(url, headers=headers, json=data, timeout=180)
if response.status_code == 200:
result = response.json()
return result["choices"][0]["message"]["content"]
elif response.status_code == 401:
raise ValueError(f"Invalid OpenRouter API key: {response.text}")
elif response.status_code == 429:
last_error = f"Rate limit: {response.text}"
logger.warning(f"OpenRouter rate limit (attempt {attempt + 1}/{MAX_RETRIES})")
if attempt < MAX_RETRIES - 1:
sleep_time = RETRY_DELAY * (RETRY_MULTIPLIER ** (attempt + 1))
time.sleep(sleep_time)
elif response.status_code >= 500:
last_error = f"Server error {response.status_code}: {response.text}"
logger.warning(f"OpenRouter server error (attempt {attempt + 1}/{MAX_RETRIES})")
if attempt < MAX_RETRIES - 1:
sleep_time = RETRY_DELAY * (RETRY_MULTIPLIER ** attempt)
time.sleep(sleep_time)
else:
raise ValueError(f"OpenRouter API error {response.status_code}: {response.text}")
except requests.exceptions.Timeout:
last_error = "Timeout"
logger.warning(f"OpenRouter timeout (attempt {attempt + 1}/{MAX_RETRIES})")
if attempt < MAX_RETRIES - 1:
time.sleep(RETRY_DELAY * (RETRY_MULTIPLIER ** attempt))
except requests.exceptions.ConnectionError as e:
raise ValueError(f"Cannot connect to OpenRouter API: {e}")
raise ValueError(f"OpenRouter API failed after {MAX_RETRIES} retries: {last_error}")
def analyze_vulnerability(self, vulnerability_data: Dict) -> Dict: def analyze_vulnerability(self, vulnerability_data: Dict) -> Dict:
"""Analyze vulnerability and suggest exploits""" """Analyze vulnerability and suggest exploits"""
# This prompt will be fetched from library.json later # This prompt will be fetched from library.json later

244
core/mcp_client.py Normal file
View File

@@ -0,0 +1,244 @@
#!/usr/bin/env python3
"""
MCP Client - Model Context Protocol tool connectivity.
Provides a standard interface for connecting to MCP servers and
executing tools. Supports both stdio and SSE transports.
Coexists with existing subprocess-based tool execution:
- MCP is tried first when enabled
- Falls back silently to subprocess if MCP unavailable
"""
import asyncio
import json
import logging
from typing import Dict, List, Optional, Any
from pathlib import Path
logger = logging.getLogger(__name__)
try:
from mcp import ClientSession, StdioServerParameters
from mcp.client.stdio import stdio_client
HAS_MCP = True
except ImportError:
HAS_MCP = False
logger.debug("MCP package not installed. MCP tool connectivity disabled.")
try:
from mcp.client.sse import sse_client
HAS_MCP_SSE = True
except ImportError:
HAS_MCP_SSE = False
class MCPToolClient:
"""Client for connecting to MCP servers and executing tools."""
def __init__(self, config: Dict):
mcp_config = config.get('mcp_servers', {})
self.enabled = mcp_config.get('enabled', False) and HAS_MCP
self.servers_config = mcp_config.get('servers', {})
self._sessions: Dict[str, Any] = {} # server_name -> (session, cleanup)
self._available_tools: Dict[str, List[Dict]] = {} # server_name -> tools list
if self.enabled:
logger.info(f"MCP client initialized with {len(self.servers_config)} server(s)")
else:
if not HAS_MCP and mcp_config.get('enabled', False):
logger.warning("MCP enabled in config but mcp package not installed. "
"Install with: pip install mcp>=1.0.0")
async def connect(self, server_name: str) -> bool:
"""Establish connection to an MCP server.
Returns True if connection successful, False otherwise.
"""
if not self.enabled:
return False
if server_name in self._sessions:
return True # Already connected
server_config = self.servers_config.get(server_name)
if not server_config:
logger.error(f"MCP server '{server_name}' not found in config")
return False
transport = server_config.get('transport', 'stdio')
try:
if transport == 'stdio':
return await self._connect_stdio(server_name, server_config)
elif transport == 'sse':
return await self._connect_sse(server_name, server_config)
else:
logger.error(f"Unsupported MCP transport: {transport}")
return False
except Exception as e:
logger.error(f"Failed to connect to MCP server '{server_name}': {e}")
return False
async def _connect_stdio(self, server_name: str, config: Dict) -> bool:
"""Connect to a stdio-based MCP server."""
if not HAS_MCP:
return False
command = config.get('command', '')
args = config.get('args', [])
if not command:
logger.error(f"MCP server '{server_name}' has no command specified")
return False
server_params = StdioServerParameters(
command=command,
args=args,
env=config.get('env')
)
try:
# Create the stdio client connection
read_stream, write_stream = await asyncio.wait_for(
self._start_stdio_process(server_params),
timeout=30
)
session = ClientSession(read_stream, write_stream)
await session.initialize()
# Cache available tools
tools_result = await session.list_tools()
self._available_tools[server_name] = [
{"name": t.name, "description": t.description}
for t in tools_result.tools
]
self._sessions[server_name] = session
logger.info(f"Connected to MCP server '{server_name}' via stdio "
f"({len(self._available_tools[server_name])} tools available)")
return True
except Exception as e:
logger.error(f"Stdio connection to '{server_name}' failed: {e}")
return False
async def _start_stdio_process(self, params: 'StdioServerParameters'):
"""Start a stdio MCP server process."""
async with stdio_client(params) as (read, write):
return read, write
async def _connect_sse(self, server_name: str, config: Dict) -> bool:
"""Connect to an SSE-based MCP server."""
if not HAS_MCP_SSE:
logger.error("MCP SSE transport not available")
return False
url = config.get('url', '')
if not url:
logger.error(f"MCP server '{server_name}' has no URL specified")
return False
try:
async with sse_client(url) as (read, write):
session = ClientSession(read, write)
await session.initialize()
tools_result = await session.list_tools()
self._available_tools[server_name] = [
{"name": t.name, "description": t.description}
for t in tools_result.tools
]
self._sessions[server_name] = session
logger.info(f"Connected to MCP server '{server_name}' via SSE "
f"({len(self._available_tools[server_name])} tools available)")
return True
except Exception as e:
logger.error(f"SSE connection to '{server_name}' failed: {e}")
return False
async def call_tool(self, server_name: str, tool_name: str,
arguments: Optional[Dict] = None) -> Optional[str]:
"""Call a tool on an MCP server.
Returns the tool result as a string, or None if the call fails.
"""
if not self.enabled:
return None
session = self._sessions.get(server_name)
if not session:
connected = await self.connect(server_name)
if not connected:
return None
session = self._sessions.get(server_name)
try:
result = await session.call_tool(tool_name, arguments or {})
# Extract text content from result
if result.content:
texts = [c.text for c in result.content if hasattr(c, 'text')]
return '\n'.join(texts) if texts else str(result.content)
return ""
except Exception as e:
logger.error(f"MCP tool call failed ({server_name}/{tool_name}): {e}")
return None
async def list_tools(self, server_name: str = None) -> Dict[str, List[Dict]]:
"""List available tools from MCP servers.
If server_name is specified, lists tools for that server only.
Otherwise lists tools from all connected servers.
"""
if server_name:
tools = self._available_tools.get(server_name, [])
return {server_name: tools}
return dict(self._available_tools)
def find_tool_server(self, tool_name: str) -> Optional[str]:
"""Find which MCP server provides a given tool.
Returns the server name, or None if no server has the tool.
"""
for server_name, tools in self._available_tools.items():
for tool in tools:
if tool["name"] == tool_name:
return server_name
return None
async def try_tool(self, tool_name: str, arguments: Optional[Dict] = None) -> Optional[str]:
"""Try to execute a tool via any available MCP server.
Searches all configured servers for the tool and executes it.
Returns None silently if no server has the tool (for fallback pattern).
"""
if not self.enabled:
return None
# Connect to any servers not yet connected
for server_name in self.servers_config:
if server_name not in self._sessions:
await self.connect(server_name)
server = self.find_tool_server(tool_name)
if server:
return await self.call_tool(server, tool_name, arguments)
return None # Silent fallback
async def disconnect_all(self):
"""Disconnect from all MCP servers."""
for server_name, session in self._sessions.items():
try:
if hasattr(session, 'close'):
await session.close()
except Exception as e:
logger.debug(f"Error closing MCP session '{server_name}': {e}")
self._sessions.clear()
self._available_tools.clear()
logger.info("Disconnected from all MCP servers")

626
core/mcp_server.py Normal file
View File

@@ -0,0 +1,626 @@
#!/usr/bin/env python3
"""
NeuroSploit MCP Server — Exposes pentest tools via Model Context Protocol.
Tools:
- screenshot_capture: Playwright browser screenshots
- payload_delivery: HTTP payload sending with full response capture
- dns_lookup: DNS record enumeration
- port_scan: TCP port scanning
- technology_detect: HTTP header-based tech fingerprinting
- subdomain_enumerate: Subdomain discovery via DNS brute-force
- save_finding: Persist a finding to agent memory
- get_vuln_prompt: Retrieve AI decision prompt for a vuln type
- execute_nuclei: Run Nuclei scanner in Docker sandbox (8000+ templates)
- execute_naabu: Run Naabu port scanner in Docker sandbox
- sandbox_health: Check sandbox container status
- sandbox_exec: Execute any allowed tool in the sandbox
Usage:
python3 -m core.mcp_server # stdio transport (default)
MCP_TRANSPORT=sse python3 -m core.mcp_server # SSE transport
"""
import asyncio
import json
import os
import socket
import logging
from typing import Dict, Any, Optional, List
logger = logging.getLogger(__name__)
# Guard MCP import — server only works where mcp package is available
try:
from mcp.server import Server
from mcp.server.stdio import stdio_server
from mcp.types import Tool, TextContent
HAS_MCP = True
except ImportError:
HAS_MCP = False
logger.warning("MCP package not installed. Install with: pip install 'mcp>=1.0.0'")
# Guard Playwright import
try:
from core.browser_validator import BrowserValidator
HAS_PLAYWRIGHT = True
except ImportError:
HAS_PLAYWRIGHT = False
# AI prompts access
try:
from backend.core.vuln_engine.ai_prompts import get_prompt, build_testing_prompt
HAS_AI_PROMPTS = True
except ImportError:
HAS_AI_PROMPTS = False
# Security sandbox access
try:
from core.sandbox_manager import get_sandbox, SandboxManager
HAS_SANDBOX = True
except ImportError:
HAS_SANDBOX = False
# ---------------------------------------------------------------------------
# Tool implementations
# ---------------------------------------------------------------------------
async def _screenshot_capture(url: str, selector: Optional[str] = None) -> Dict:
"""Capture a screenshot of a URL using Playwright."""
if not HAS_PLAYWRIGHT:
return {"error": "Playwright not available", "screenshot": None}
try:
bv = BrowserValidator()
result = await bv.capture_screenshot(url, selector=selector)
return {"url": url, "screenshot_base64": result.get("screenshot", ""), "status": "ok"}
except Exception as e:
return {"error": str(e), "screenshot": None}
async def _payload_delivery(
endpoint: str,
method: str = "GET",
payload: str = "",
content_type: str = "application/x-www-form-urlencoded",
headers: Optional[Dict] = None,
param: str = "q",
) -> Dict:
"""Send an HTTP request with a payload and capture full response."""
import aiohttp
try:
async with aiohttp.ClientSession() as session:
req_headers = {"Content-Type": content_type}
if headers:
req_headers.update(headers)
if method.upper() == "GET":
async with session.get(endpoint, params={param: payload}, headers=req_headers, timeout=15, allow_redirects=False) as resp:
body = await resp.text()
return {
"status": resp.status,
"headers": dict(resp.headers),
"body": body[:5000],
"body_length": len(body),
}
else:
data = {param: payload} if content_type != "application/json" else None
json_data = json.loads(payload) if content_type == "application/json" else None
async with session.request(
method.upper(), endpoint, data=data, json=json_data,
headers=req_headers, timeout=15, allow_redirects=False
) as resp:
body = await resp.text()
return {
"status": resp.status,
"headers": dict(resp.headers),
"body": body[:5000],
"body_length": len(body),
}
except Exception as e:
return {"error": str(e)}
async def _dns_lookup(domain: str, record_type: str = "A") -> Dict:
"""Perform DNS lookups for a domain."""
import subprocess
try:
result = subprocess.run(
["dig", "+short", domain, record_type],
capture_output=True, text=True, timeout=10
)
records = [r.strip() for r in result.stdout.strip().split("\n") if r.strip()]
return {"domain": domain, "type": record_type, "records": records}
except FileNotFoundError:
# Fallback to socket for A records
if record_type.upper() == "A":
try:
ips = socket.getaddrinfo(domain, None, socket.AF_INET)
records = list(set(ip[4][0] for ip in ips))
return {"domain": domain, "type": "A", "records": records}
except socket.gaierror as e:
return {"domain": domain, "type": "A", "error": str(e)}
return {"error": "dig command not available and only A records supported via fallback"}
except Exception as e:
return {"error": str(e)}
async def _port_scan(host: str, ports: str = "80,443,8080,8443,3000,5000") -> Dict:
"""Scan TCP ports on a host."""
port_list = [int(p.strip()) for p in ports.split(",") if p.strip().isdigit()]
results = {}
async def check_port(port: int):
try:
reader, writer = await asyncio.wait_for(
asyncio.open_connection(host, port), timeout=3
)
writer.close()
await writer.wait_closed()
return port, "open"
except (asyncio.TimeoutError, ConnectionRefusedError, OSError):
return port, "closed"
tasks = [check_port(p) for p in port_list[:100]]
for coro in asyncio.as_completed(tasks):
port, state = await coro
results[str(port)] = state
open_ports = [p for p, s in results.items() if s == "open"]
return {"host": host, "ports": results, "open_ports": open_ports}
async def _technology_detect(url: str) -> Dict:
"""Detect technologies from HTTP response headers."""
import aiohttp
try:
async with aiohttp.ClientSession() as session:
async with session.get(url, timeout=10, allow_redirects=True) as resp:
headers = dict(resp.headers)
body = await resp.text()
techs = []
server = headers.get("Server", "")
if server:
techs.append(f"Server: {server}")
powered_by = headers.get("X-Powered-By", "")
if powered_by:
techs.append(f"X-Powered-By: {powered_by}")
# Framework detection from body
framework_markers = {
"React": ["react", "_next/static", "__NEXT_DATA__"],
"Vue.js": ["vue.js", "__vue__", "v-cloak"],
"Angular": ["ng-version", "angular"],
"jQuery": ["jquery"],
"WordPress": ["wp-content", "wp-includes"],
"Laravel": ["laravel_session", "csrf-token"],
"Django": ["csrfmiddlewaretoken", "django"],
"Rails": ["csrf-param", "action_dispatch"],
"Spring": ["jsessionid"],
"Express": ["connect.sid"],
}
body_lower = body.lower()
for tech, markers in framework_markers.items():
if any(m.lower() in body_lower for m in markers):
techs.append(tech)
return {"url": url, "technologies": techs, "headers": {
k: v for k, v in headers.items()
if k.lower() in ("server", "x-powered-by", "x-aspnet-version",
"x-generator", "x-drupal-cache", "x-framework")
}}
except Exception as e:
return {"error": str(e)}
async def _subdomain_enumerate(domain: str) -> Dict:
"""Enumerate subdomains via common prefixes."""
prefixes = [
"www", "api", "admin", "app", "dev", "staging", "test", "mail",
"ftp", "cdn", "blog", "shop", "docs", "status", "dashboard",
"portal", "m", "mobile", "beta", "demo", "v2", "internal",
]
found = []
async def check_subdomain(prefix: str):
subdomain = f"{prefix}.{domain}"
try:
socket.getaddrinfo(subdomain, None, socket.AF_INET)
return subdomain
except socket.gaierror:
return None
tasks = [check_subdomain(p) for p in prefixes]
results = await asyncio.gather(*tasks)
found = [r for r in results if r]
return {"domain": domain, "subdomains": found, "count": len(found)}
async def _save_finding(finding_json: str) -> Dict:
"""Persist a finding (JSON string). Returns confirmation."""
try:
finding = json.loads(finding_json)
# Validate required fields
required = ["title", "severity", "vulnerability_type", "affected_endpoint"]
missing = [f for f in required if f not in finding]
if missing:
return {"error": f"Missing required fields: {missing}"}
return {"status": "saved", "finding_id": finding.get("id", "unknown"), "title": finding["title"]}
except json.JSONDecodeError as e:
return {"error": f"Invalid JSON: {e}"}
async def _get_vuln_prompt(vuln_type: str, target: str = "", endpoint: str = "", param: str = "", tech: str = "") -> Dict:
"""Retrieve the AI decision prompt for a vulnerability type."""
if not HAS_AI_PROMPTS:
return {"error": "AI prompts module not available"}
try:
prompt_data = get_prompt(vuln_type, {
"TARGET_URL": target,
"ENDPOINT": endpoint,
"PARAMETER": param,
"TECHNOLOGY": tech,
})
if not prompt_data:
return {"error": f"No prompt found for vuln type: {vuln_type}"}
full_prompt = build_testing_prompt(vuln_type, target, endpoint, param, tech)
return {"vuln_type": vuln_type, "prompt": prompt_data, "full_prompt": full_prompt}
except Exception as e:
return {"error": str(e)}
# ---------------------------------------------------------------------------
# Sandbox tool implementations (Docker-based real tools)
# ---------------------------------------------------------------------------
async def _execute_nuclei(
target: str,
templates: Optional[str] = None,
severity: Optional[str] = None,
tags: Optional[str] = None,
rate_limit: int = 150,
) -> Dict:
"""Run Nuclei vulnerability scanner in the Docker sandbox."""
if not HAS_SANDBOX:
return {"error": "Sandbox module not available. Install docker SDK: pip install docker"}
try:
sandbox = await get_sandbox()
if not sandbox.is_available:
return {"error": "Sandbox container not running. Build with: cd docker && docker compose -f docker-compose.sandbox.yml up -d"}
result = await sandbox.run_nuclei(
target=target,
templates=templates,
severity=severity,
tags=tags,
rate_limit=rate_limit,
)
return {
"tool": "nuclei",
"target": target,
"exit_code": result.exit_code,
"findings": result.findings,
"findings_count": len(result.findings),
"duration_seconds": result.duration_seconds,
"raw_output": result.stdout[:3000] if result.stdout else "",
"error": result.error,
}
except Exception as e:
return {"error": str(e)}
async def _execute_naabu(
target: str,
ports: Optional[str] = None,
top_ports: Optional[int] = None,
rate: int = 1000,
) -> Dict:
"""Run Naabu port scanner in the Docker sandbox."""
if not HAS_SANDBOX:
return {"error": "Sandbox module not available"}
try:
sandbox = await get_sandbox()
if not sandbox.is_available:
return {"error": "Sandbox container not running"}
result = await sandbox.run_naabu(
target=target,
ports=ports,
top_ports=top_ports,
rate=rate,
)
open_ports = [f["port"] for f in result.findings]
return {
"tool": "naabu",
"target": target,
"exit_code": result.exit_code,
"open_ports": sorted(open_ports),
"port_count": len(open_ports),
"findings": result.findings,
"duration_seconds": result.duration_seconds,
"error": result.error,
}
except Exception as e:
return {"error": str(e)}
async def _sandbox_health() -> Dict:
"""Check sandbox container health and available tools."""
if not HAS_SANDBOX:
return {"status": "unavailable", "reason": "Sandbox module not installed"}
try:
sandbox = await get_sandbox()
return await sandbox.health_check()
except Exception as e:
return {"status": "error", "reason": str(e)}
async def _sandbox_exec(tool: str, args: str, timeout: int = 300) -> Dict:
"""Execute any allowed tool in the Docker sandbox."""
if not HAS_SANDBOX:
return {"error": "Sandbox module not available"}
try:
sandbox = await get_sandbox()
if not sandbox.is_available:
return {"error": "Sandbox container not running"}
result = await sandbox.run_tool(tool=tool, args=args, timeout=timeout)
return {
"tool": tool,
"exit_code": result.exit_code,
"stdout": result.stdout[:5000] if result.stdout else "",
"stderr": result.stderr[:2000] if result.stderr else "",
"duration_seconds": result.duration_seconds,
"error": result.error,
}
except Exception as e:
return {"error": str(e)}
# ---------------------------------------------------------------------------
# MCP Server Definition
# ---------------------------------------------------------------------------
TOOLS = [
{
"name": "screenshot_capture",
"description": "Capture a browser screenshot of a URL using Playwright",
"inputSchema": {
"type": "object",
"properties": {
"url": {"type": "string", "description": "URL to screenshot"},
"selector": {"type": "string", "description": "Optional CSS selector to capture"},
},
"required": ["url"],
},
},
{
"name": "payload_delivery",
"description": "Send an HTTP request with a payload and capture the full response",
"inputSchema": {
"type": "object",
"properties": {
"endpoint": {"type": "string", "description": "Target URL"},
"method": {"type": "string", "description": "HTTP method", "default": "GET"},
"payload": {"type": "string", "description": "Payload value"},
"content_type": {"type": "string", "default": "application/x-www-form-urlencoded"},
"param": {"type": "string", "description": "Parameter name", "default": "q"},
},
"required": ["endpoint", "payload"],
},
},
{
"name": "dns_lookup",
"description": "Perform DNS lookups for a domain",
"inputSchema": {
"type": "object",
"properties": {
"domain": {"type": "string", "description": "Domain to look up"},
"record_type": {"type": "string", "default": "A", "description": "DNS record type"},
},
"required": ["domain"],
},
},
{
"name": "port_scan",
"description": "Scan TCP ports on a host",
"inputSchema": {
"type": "object",
"properties": {
"host": {"type": "string", "description": "Target host"},
"ports": {"type": "string", "default": "80,443,8080,8443,3000,5000", "description": "Comma-separated ports"},
},
"required": ["host"],
},
},
{
"name": "technology_detect",
"description": "Detect technologies from HTTP response headers and body",
"inputSchema": {
"type": "object",
"properties": {
"url": {"type": "string", "description": "URL to analyze"},
},
"required": ["url"],
},
},
{
"name": "subdomain_enumerate",
"description": "Enumerate subdomains via common prefix brute-force",
"inputSchema": {
"type": "object",
"properties": {
"domain": {"type": "string", "description": "Base domain to enumerate"},
},
"required": ["domain"],
},
},
{
"name": "save_finding",
"description": "Persist a vulnerability finding (JSON string)",
"inputSchema": {
"type": "object",
"properties": {
"finding_json": {"type": "string", "description": "Finding as JSON string"},
},
"required": ["finding_json"],
},
},
{
"name": "get_vuln_prompt",
"description": "Retrieve the AI decision prompt for a vulnerability type",
"inputSchema": {
"type": "object",
"properties": {
"vuln_type": {"type": "string", "description": "Vulnerability type key"},
"target": {"type": "string", "description": "Target URL"},
"endpoint": {"type": "string", "description": "Specific endpoint"},
"param": {"type": "string", "description": "Parameter name"},
"tech": {"type": "string", "description": "Detected technology"},
},
"required": ["vuln_type"],
},
},
# --- Sandbox tools (Docker-based real security tools) ---
{
"name": "execute_nuclei",
"description": "Run Nuclei vulnerability scanner (8000+ templates) in Docker sandbox. Returns structured findings with severity, CVE, CWE.",
"inputSchema": {
"type": "object",
"properties": {
"target": {"type": "string", "description": "Target URL to scan"},
"templates": {"type": "string", "description": "Specific template path (e.g. 'cves/2024/', 'vulnerabilities/xss/')"},
"severity": {"type": "string", "description": "Filter: critical,high,medium,low,info"},
"tags": {"type": "string", "description": "Filter by tags: xss,sqli,lfi,ssrf,rce"},
"rate_limit": {"type": "integer", "description": "Requests per second (default 150)", "default": 150},
},
"required": ["target"],
},
},
{
"name": "execute_naabu",
"description": "Run Naabu port scanner in Docker sandbox. Fast SYN-based scanning with configurable port ranges.",
"inputSchema": {
"type": "object",
"properties": {
"target": {"type": "string", "description": "IP address or hostname to scan"},
"ports": {"type": "string", "description": "Ports to scan (e.g. '80,443,8080' or '1-65535')"},
"top_ports": {"type": "integer", "description": "Scan top N ports (e.g. 100, 1000)"},
"rate": {"type": "integer", "description": "Packets per second (default 1000)", "default": 1000},
},
"required": ["target"],
},
},
{
"name": "sandbox_health",
"description": "Check Docker sandbox status and available security tools",
"inputSchema": {
"type": "object",
"properties": {},
},
},
{
"name": "sandbox_exec",
"description": "Execute any allowed security tool in the Docker sandbox (nuclei, naabu, nmap, httpx, subfinder, katana, ffuf, sqlmap, nikto, etc.)",
"inputSchema": {
"type": "object",
"properties": {
"tool": {"type": "string", "description": "Tool name (e.g. nuclei, naabu, nmap, httpx, subfinder, katana, ffuf, gobuster, dalfox, nikto, sqlmap, curl)"},
"args": {"type": "string", "description": "Command-line arguments for the tool"},
"timeout": {"type": "integer", "description": "Max execution time in seconds (default 300)", "default": 300},
},
"required": ["tool", "args"],
},
},
]
# Tool dispatcher
TOOL_HANDLERS = {
"screenshot_capture": lambda args: _screenshot_capture(args["url"], args.get("selector")),
"payload_delivery": lambda args: _payload_delivery(
args["endpoint"], args.get("method", "GET"), args.get("payload", ""),
args.get("content_type", "application/x-www-form-urlencoded"),
args.get("headers"), args.get("param", "q")
),
"dns_lookup": lambda args: _dns_lookup(args["domain"], args.get("record_type", "A")),
"port_scan": lambda args: _port_scan(args["host"], args.get("ports", "80,443,8080,8443,3000,5000")),
"technology_detect": lambda args: _technology_detect(args["url"]),
"subdomain_enumerate": lambda args: _subdomain_enumerate(args["domain"]),
"save_finding": lambda args: _save_finding(args["finding_json"]),
"get_vuln_prompt": lambda args: _get_vuln_prompt(
args["vuln_type"], args.get("target", ""), args.get("endpoint", ""),
args.get("param", ""), args.get("tech", "")
),
# Sandbox tools
"execute_nuclei": lambda args: _execute_nuclei(
args["target"], args.get("templates"), args.get("severity"),
args.get("tags"), args.get("rate_limit", 150)
),
"execute_naabu": lambda args: _execute_naabu(
args["target"], args.get("ports"), args.get("top_ports"),
args.get("rate", 1000)
),
"sandbox_health": lambda args: _sandbox_health(),
"sandbox_exec": lambda args: _sandbox_exec(
args["tool"], args["args"], args.get("timeout", 300)
),
}
def create_mcp_server() -> "Server":
"""Create and configure the MCP server with all pentest tools."""
if not HAS_MCP:
raise RuntimeError("MCP package not installed. Install with: pip install 'mcp>=1.0.0'")
server = Server("neurosploit-tools")
@server.list_tools()
async def list_tools() -> list:
return [Tool(**t) for t in TOOLS]
@server.call_tool()
async def call_tool(name: str, arguments: dict) -> list:
handler = TOOL_HANDLERS.get(name)
if not handler:
return [TextContent(type="text", text=json.dumps({"error": f"Unknown tool: {name}"}))]
try:
result = await handler(arguments)
return [TextContent(type="text", text=json.dumps(result, default=str))]
except Exception as e:
return [TextContent(type="text", text=json.dumps({"error": str(e)}))]
return server
async def main():
"""Run the MCP server via stdio transport."""
server = create_mcp_server()
transport = os.getenv("MCP_TRANSPORT", "stdio")
if transport == "stdio":
async with stdio_server() as (read_stream, write_stream):
await server.run(read_stream, write_stream, server.create_initialization_options())
else:
logger.error(f"Unsupported transport: {transport}. Use 'stdio'.")
if __name__ == "__main__":
asyncio.run(main())

81
core/model_router.py Normal file
View File

@@ -0,0 +1,81 @@
#!/usr/bin/env python3
"""
Model Router - Task-type-based LLM routing.
Routes requests to different LLM profiles based on task type:
- reasoning: Complex logic and decision-making
- analysis: Data analysis and pattern recognition
- generation: Content and payload generation
- validation: Result verification and confirmation
Enabled/disabled via config. When disabled, callers fall back to their default provider.
"""
import os
import logging
from typing import Dict, Optional, Callable
logger = logging.getLogger(__name__)
class ModelRouter:
"""Routes LLM requests to different profiles based on task type."""
def __init__(self, config: Dict, llm_manager_factory: Callable):
"""
Args:
config: Full application config dict (must contain 'model_routing' and 'llm' keys)
llm_manager_factory: Callable that takes a profile name and returns an LLMManager instance
"""
routing_config = config.get('model_routing', {})
self.enabled = routing_config.get('enabled', False)
# Allow env var override
env_override = os.getenv('ENABLE_MODEL_ROUTING', '').strip().lower()
if env_override == 'true':
self.enabled = True
elif env_override == 'false':
self.enabled = False
self.routes = routing_config.get('routes', {})
self.llm_manager_factory = llm_manager_factory
self._managers = {} # Cache LLMManager instances per profile
if self.enabled:
logger.info(f"Model routing enabled with routes: {list(self.routes.keys())}")
else:
logger.debug("Model routing disabled")
def generate(self, prompt: str, system_prompt: Optional[str] = None,
task_type: str = "default") -> Optional[str]:
"""Route a generation request to the appropriate LLM profile.
Returns None if routing is disabled or no route matches,
allowing callers to fall back to their default provider.
"""
if not self.enabled:
return None
profile = self.routes.get(task_type, self.routes.get('default'))
if not profile:
logger.debug(f"No route for task_type '{task_type}', falling back to default")
return None
try:
if profile not in self._managers:
self._managers[profile] = self.llm_manager_factory(profile)
manager = self._managers[profile]
logger.debug(f"Routing task_type '{task_type}' to profile '{profile}' "
f"(provider: {manager.provider}, model: {manager.model})")
return manager.generate(prompt, system_prompt)
except Exception as e:
logger.error(f"Model routing error for profile '{profile}': {e}")
return None
def get_profile_for_task(self, task_type: str) -> Optional[str]:
"""Get the profile name that would handle a given task type."""
if not self.enabled:
return None
return self.routes.get(task_type, self.routes.get('default'))

View File

@@ -4,9 +4,11 @@ Professional Pentest Report Generator
Generates detailed reports with PoCs, CVSS scores, requests/responses Generates detailed reports with PoCs, CVSS scores, requests/responses
""" """
import base64
import json import json
import os import os
from datetime import datetime from datetime import datetime
from pathlib import Path
from typing import Dict, List, Any from typing import Dict, List, Any
import html import html
import logging import logging
@@ -49,6 +51,41 @@ class ReportGenerator:
escaped = self._escape_html(code) escaped = self._escape_html(code)
return f'<pre><code class="language-{language}">{escaped}</code></pre>' return f'<pre><code class="language-{language}">{escaped}</code></pre>'
def embed_screenshot(self, filepath: str) -> str:
"""Convert a screenshot file to a base64 data URI for HTML embedding."""
path = Path(filepath)
if not path.exists():
return ""
try:
with open(path, 'rb') as f:
data = base64.b64encode(f.read()).decode('ascii')
return f"data:image/png;base64,{data}"
except Exception:
return ""
def build_screenshots_html(self, finding_id: str, screenshots_dir: str = "reports/screenshots") -> str:
"""Build screenshot grid HTML for a finding, embedding images as base64."""
finding_dir = Path(screenshots_dir) / finding_id
if not finding_dir.exists():
return ""
screenshots = sorted(finding_dir.glob("*.png"))[:3]
if not screenshots:
return ""
cards = ""
for ss in screenshots:
data_uri = self.embed_screenshot(str(ss))
if data_uri:
caption = ss.stem.replace('_', ' ').title()
cards += f"""
<div class="screenshot-card">
<img src="{data_uri}" alt="{caption}" />
<div class="screenshot-caption">{caption}</div>
</div>"""
return f'<div class="screenshot-grid">{cards}</div>' if cards else ""
def generate_executive_summary(self) -> str: def generate_executive_summary(self) -> str:
"""Generate executive summary section""" """Generate executive summary section"""
summary = self.scan_results.get("summary", {}) summary = self.scan_results.get("summary", {})
@@ -611,17 +648,39 @@ class ReportGenerator:
return html return html
def save_report(self, output_dir: str = "reports") -> str: def save_report(self, output_dir: str = "reports") -> str:
"""Save HTML report to file""" """Save HTML report to a per-report folder with screenshots."""
os.makedirs(output_dir, exist_ok=True) import shutil
# Create per-report folder
target = self.scan_results.get("target_url", self.scan_results.get("target", "unknown"))
target_name = target.replace("://", "_").replace("/", "_").rstrip("_")[:40]
report_folder = f"report_{target_name}_{self.timestamp}"
report_dir = os.path.join(output_dir, report_folder)
os.makedirs(report_dir, exist_ok=True)
filename = f"pentest_report_{self.timestamp}.html" filename = f"pentest_report_{self.timestamp}.html"
filepath = os.path.join(output_dir, filename) filepath = os.path.join(report_dir, filename)
html_content = self.generate_html_report() html_content = self.generate_html_report()
with open(filepath, 'w', encoding='utf-8') as f: with open(filepath, 'w', encoding='utf-8') as f:
f.write(html_content) f.write(html_content)
# Copy screenshots into report folder
screenshots_src = os.path.join("reports", "screenshots")
if os.path.exists(screenshots_src):
screenshots_dest = os.path.join(report_dir, "screenshots")
vulns = self.scan_results.get("vulnerabilities", [])
for vuln in vulns:
fid = vuln.get("id", "")
if fid:
src_dir = os.path.join(screenshots_src, str(fid))
if os.path.exists(src_dir):
dest_dir = os.path.join(screenshots_dest, str(fid))
os.makedirs(dest_dir, exist_ok=True)
for ss_file in Path(src_dir).glob("*.png"):
shutil.copy2(str(ss_file), os.path.join(dest_dir, ss_file.name))
logger.info(f"Report saved to: {filepath}") logger.info(f"Report saved to: {filepath}")
return filepath return filepath

758
core/sandbox_manager.py Normal file
View File

@@ -0,0 +1,758 @@
#!/usr/bin/env python3
"""
NeuroSploit Security Sandbox Manager
Manages Docker-based security tool execution in an isolated container.
Provides high-level API for running Nuclei, Naabu, and other tools.
Architecture:
- Persistent sandbox container (neurosploit-sandbox) stays running
- Tools executed via `docker exec` for sub-second startup
- Output collected from container stdout + output files
- Resource limits enforced (2GB RAM, 2 CPU)
- Network isolation with controlled egress
"""
import asyncio
import json
import logging
import os
import re
import shlex
from abc import ABC, abstractmethod
from typing import Dict, Any, Optional, List, Tuple
from dataclasses import dataclass, field
from datetime import datetime
logger = logging.getLogger(__name__)
# Guard Docker SDK import
try:
import docker
from docker.errors import DockerException, NotFound, APIError
HAS_DOCKER = True
except ImportError:
HAS_DOCKER = False
logger.warning("Docker SDK not installed. Install with: pip install docker")
@dataclass
class SandboxResult:
"""Result from a sandboxed tool execution."""
tool: str
command: str
exit_code: int
stdout: str
stderr: str
duration_seconds: float
findings: List[Dict] = field(default_factory=list)
error: Optional[str] = None
# ---------------------------------------------------------------------------
# Nuclei output parser
# ---------------------------------------------------------------------------
def parse_nuclei_jsonl(output: str) -> List[Dict]:
"""Parse Nuclei JSONL output into structured findings."""
findings = []
severity_map = {
"critical": "critical",
"high": "high",
"medium": "medium",
"low": "low",
"info": "info",
}
for line in output.strip().split("\n"):
if not line.strip():
continue
try:
data = json.loads(line)
info = data.get("info", {})
tags = info.get("tags", [])
findings.append({
"title": info.get("name", "Unknown"),
"severity": severity_map.get(info.get("severity", "info"), "info"),
"vulnerability_type": tags[0] if tags else "vulnerability",
"description": info.get("description", ""),
"affected_endpoint": data.get("matched-at", ""),
"evidence": data.get("matcher-name", ""),
"template_id": data.get("template-id", ""),
"curl_command": data.get("curl-command", ""),
"remediation": info.get("remediation", "Review and fix the vulnerability"),
"references": info.get("reference", []),
"cwe": info.get("classification", {}).get("cwe-id", []),
"cvss_score": info.get("classification", {}).get("cvss-score", 0),
})
except (json.JSONDecodeError, KeyError):
continue
return findings
# ---------------------------------------------------------------------------
# Naabu output parser
# ---------------------------------------------------------------------------
def parse_naabu_output(output: str) -> List[Dict]:
"""Parse Naabu output into structured port findings."""
findings = []
seen = set()
for line in output.strip().split("\n"):
line = line.strip()
if not line:
continue
# Naabu JSON mode: {"host":"x","ip":"y","port":80}
try:
data = json.loads(line)
host = data.get("host", data.get("ip", ""))
port = data.get("port", 0)
key = f"{host}:{port}"
if key not in seen:
seen.add(key)
findings.append({
"host": host,
"port": port,
"protocol": "tcp",
})
continue
except (json.JSONDecodeError, KeyError):
pass
# Text mode: host:port
match = re.match(r"^(.+?):(\d+)$", line)
if match:
host, port = match.group(1), int(match.group(2))
key = f"{host}:{port}"
if key not in seen:
seen.add(key)
findings.append({
"host": host,
"port": port,
"protocol": "tcp",
})
return findings
class BaseSandbox(ABC):
"""Abstract interface for sandbox implementations (legacy shared + per-scan Kali)."""
@abstractmethod
async def initialize(self) -> Tuple[bool, str]: ...
@property
@abstractmethod
def is_available(self) -> bool: ...
@abstractmethod
async def stop(self): ...
@abstractmethod
async def health_check(self) -> Dict: ...
@abstractmethod
async def run_nuclei(self, target, templates=None, severity=None,
tags=None, rate_limit=150, timeout=600) -> "SandboxResult": ...
@abstractmethod
async def run_naabu(self, target, ports=None, top_ports=None,
scan_type="s", rate=1000, timeout=300) -> "SandboxResult": ...
@abstractmethod
async def run_httpx(self, targets, timeout=120) -> "SandboxResult": ...
@abstractmethod
async def run_subfinder(self, domain, timeout=120) -> "SandboxResult": ...
@abstractmethod
async def run_nmap(self, target, ports=None, scripts=True, timeout=300) -> "SandboxResult": ...
@abstractmethod
async def run_tool(self, tool, args, timeout=300) -> "SandboxResult": ...
@abstractmethod
async def execute_raw(self, command, timeout=300) -> "SandboxResult": ...
class SandboxManager(BaseSandbox):
"""
Legacy shared sandbox: persistent Docker container running security tools.
Tools are executed via `docker exec` for fast invocation.
Used by MCP server and terminal API (no scan_id context).
"""
SANDBOX_IMAGE = "neurosploit-sandbox:latest"
SANDBOX_CONTAINER = "neurosploit-sandbox"
DEFAULT_TIMEOUT = 300 # 5 minutes
MAX_OUTPUT = 2 * 1024 * 1024 # 2MB
# Known install commands for tools not pre-installed in the sandbox
KNOWN_INSTALLS = {
"wpscan": "gem install wpscan 2>&1",
"joomscan": "pip3 install joomscan 2>&1",
"dirsearch": "pip3 install dirsearch 2>&1",
"commix": "pip3 install commix 2>&1",
"wfuzz": "pip3 install wfuzz 2>&1",
"sslyze": "pip3 install sslyze 2>&1",
"retire": "npm install -g retire 2>&1",
"testssl": "apt-get update -qq && apt-get install -y -qq testssl.sh 2>&1",
"trufflehog": "pip3 install trufflehog 2>&1",
"gitleaks": "GO111MODULE=on go install github.com/gitleaks/gitleaks/v8@latest 2>&1",
}
def __init__(self):
self._client: Optional[Any] = None
self._container: Optional[Any] = None
self._available = False
self._temp_installed: set = set() # Tools temporarily installed for cleanup
# ------------------------------------------------------------------
# Lifecycle
# ------------------------------------------------------------------
async def initialize(self) -> Tuple[bool, str]:
"""Initialize Docker client and ensure sandbox is running."""
if not HAS_DOCKER:
return False, "Docker SDK not installed"
try:
self._client = docker.from_env()
self._client.ping()
except Exception as e:
return False, f"Docker not available: {e}"
# Check if sandbox container already running
try:
container = self._client.containers.get(self.SANDBOX_CONTAINER)
if container.status == "running":
self._container = container
self._available = True
return True, "Sandbox already running"
else:
container.remove(force=True)
except NotFound:
pass
# Check if image exists
try:
self._client.images.get(self.SANDBOX_IMAGE)
except NotFound:
return False, (
f"Sandbox image '{self.SANDBOX_IMAGE}' not found. "
"Build with: cd docker && docker compose -f docker-compose.sandbox.yml build"
)
# Start sandbox container
try:
self._container = self._client.containers.run(
self.SANDBOX_IMAGE,
command="sleep infinity",
name=self.SANDBOX_CONTAINER,
detach=True,
restart_policy={"Name": "unless-stopped"},
network_mode="bridge",
mem_limit="2g",
cpu_period=100000,
cpu_quota=200000, # 2 CPUs
cap_add=["NET_RAW", "NET_ADMIN"],
security_opt=["no-new-privileges:true"],
)
self._available = True
return True, "Sandbox started"
except Exception as e:
return False, f"Failed to start sandbox: {e}"
@property
def is_available(self) -> bool:
"""Check if sandbox is ready for tool execution."""
return self._available and self._container is not None
async def stop(self):
"""Stop and remove the sandbox container."""
if self._container:
try:
self._container.stop(timeout=10)
self._container.remove(force=True)
except Exception:
pass
self._container = None
self._available = False
async def health_check(self) -> Dict:
"""Run health check on the sandbox container."""
if not self.is_available:
return {"status": "unavailable", "tools": []}
result = await self._exec("nuclei -version 2>&1 && naabu -version 2>&1 && nmap --version 2>&1 | head -1")
tools = []
if "nuclei" in result.stdout.lower():
tools.append("nuclei")
if "naabu" in result.stdout.lower():
tools.append("naabu")
if "nmap" in result.stdout.lower():
tools.append("nmap")
return {
"status": "healthy" if tools else "degraded",
"tools": tools,
"container": self.SANDBOX_CONTAINER,
"uptime": self._container.attrs.get("State", {}).get("StartedAt", "") if self._container else "",
}
# ------------------------------------------------------------------
# Low-level execution
# ------------------------------------------------------------------
async def _exec(
self, command: str, timeout: int = DEFAULT_TIMEOUT
) -> SandboxResult:
"""Execute a command inside the sandbox container."""
if not self.is_available:
return SandboxResult(
tool="sandbox", command=command, exit_code=-1,
stdout="", stderr="", duration_seconds=0,
error="Sandbox not available",
)
started = datetime.utcnow()
try:
exec_result = await asyncio.get_event_loop().run_in_executor(
None,
lambda: self._container.exec_run(
cmd=["bash", "-c", command],
stdout=True,
stderr=True,
demux=True,
),
)
duration = (datetime.utcnow() - started).total_seconds()
stdout_raw, stderr_raw = exec_result.output
stdout = (stdout_raw or b"").decode("utf-8", errors="replace")
stderr = (stderr_raw or b"").decode("utf-8", errors="replace")
# Truncate oversized output
if len(stdout) > self.MAX_OUTPUT:
stdout = stdout[: self.MAX_OUTPUT] + "\n... [truncated]"
if len(stderr) > self.MAX_OUTPUT:
stderr = stderr[: self.MAX_OUTPUT] + "\n... [truncated]"
return SandboxResult(
tool="sandbox",
command=command,
exit_code=exec_result.exit_code,
stdout=stdout,
stderr=stderr,
duration_seconds=duration,
)
except Exception as e:
duration = (datetime.utcnow() - started).total_seconds()
return SandboxResult(
tool="sandbox", command=command, exit_code=-1,
stdout="", stderr="", duration_seconds=duration,
error=str(e),
)
# ------------------------------------------------------------------
# High-level tool APIs
# ------------------------------------------------------------------
async def run_nuclei(
self,
target: str,
templates: Optional[str] = None,
severity: Optional[str] = None,
tags: Optional[str] = None,
rate_limit: int = 150,
timeout: int = 600,
) -> SandboxResult:
"""
Run Nuclei vulnerability scanner against a target.
Args:
target: URL or host to scan
templates: Specific template path/ID (e.g., "cves/2024/")
severity: Filter by severity (critical,high,medium,low,info)
tags: Filter by tags (e.g., "xss,sqli,lfi")
rate_limit: Requests per second (default 150)
timeout: Max execution time in seconds
"""
cmd_parts = [
"nuclei",
"-u", shlex.quote(target),
"-jsonl",
"-rate-limit", str(rate_limit),
"-silent",
"-no-color",
]
if templates:
cmd_parts.extend(["-t", shlex.quote(templates)])
if severity:
cmd_parts.extend(["-severity", shlex.quote(severity)])
if tags:
cmd_parts.extend(["-tags", shlex.quote(tags)])
command = " ".join(cmd_parts) + " 2>/dev/null"
result = await self._exec(command, timeout=timeout)
result.tool = "nuclei"
# Parse findings
if result.stdout:
result.findings = parse_nuclei_jsonl(result.stdout)
return result
async def run_naabu(
self,
target: str,
ports: Optional[str] = None,
top_ports: Optional[int] = None,
scan_type: str = "s",
rate: int = 1000,
timeout: int = 300,
) -> SandboxResult:
"""
Run Naabu port scanner against a target.
Args:
target: IP address or hostname to scan
ports: Specific ports (e.g., "80,443,8080" or "1-65535")
top_ports: Use top N ports (e.g., 100, 1000)
scan_type: SYN (s), CONNECT (c)
rate: Packets per second
timeout: Max execution time in seconds
"""
cmd_parts = [
"naabu",
"-host", shlex.quote(target),
"-json",
"-rate", str(rate),
"-silent",
"-no-color",
]
if ports:
cmd_parts.extend(["-p", shlex.quote(ports)])
elif top_ports:
cmd_parts.extend(["-top-ports", str(top_ports)])
else:
cmd_parts.extend(["-top-ports", "1000"])
if scan_type:
cmd_parts.extend(["-scan-type", scan_type])
command = " ".join(cmd_parts) + " 2>/dev/null"
result = await self._exec(command, timeout=timeout)
result.tool = "naabu"
# Parse port findings
if result.stdout:
result.findings = parse_naabu_output(result.stdout)
return result
async def run_httpx(
self,
targets: List[str],
timeout: int = 120,
) -> SandboxResult:
"""
Run HTTPX for HTTP probing and tech detection.
Args:
targets: List of URLs/hosts to probe
timeout: Max execution time
"""
target_str = "\\n".join(shlex.quote(t) for t in targets)
command = (
f'echo -e "{target_str}" | httpx -silent -json '
f'-title -tech-detect -status-code -content-length '
f'-follow-redirects -no-color 2>/dev/null'
)
result = await self._exec(command, timeout=timeout)
result.tool = "httpx"
# Parse JSON lines
if result.stdout:
findings = []
for line in result.stdout.strip().split("\n"):
try:
data = json.loads(line)
findings.append({
"url": data.get("url", ""),
"status_code": data.get("status_code", 0),
"title": data.get("title", ""),
"technologies": data.get("tech", []),
"content_length": data.get("content_length", 0),
"webserver": data.get("webserver", ""),
})
except json.JSONDecodeError:
continue
result.findings = findings
return result
async def run_subfinder(
self,
domain: str,
timeout: int = 120,
) -> SandboxResult:
"""
Run Subfinder for subdomain enumeration.
Args:
domain: Base domain to enumerate
timeout: Max execution time
"""
command = f"subfinder -d {shlex.quote(domain)} -silent -no-color 2>/dev/null"
result = await self._exec(command, timeout=timeout)
result.tool = "subfinder"
if result.stdout:
subdomains = [s.strip() for s in result.stdout.strip().split("\n") if s.strip()]
result.findings = [{"subdomain": s} for s in subdomains]
return result
async def run_nmap(
self,
target: str,
ports: Optional[str] = None,
scripts: bool = True,
timeout: int = 300,
) -> SandboxResult:
"""
Run Nmap network scanner.
Args:
target: IP/hostname to scan
ports: Port specification
scripts: Enable default scripts (-sC)
timeout: Max execution time
"""
cmd_parts = ["nmap", "-sV"]
if scripts:
cmd_parts.append("-sC")
if ports:
cmd_parts.extend(["-p", shlex.quote(ports)])
cmd_parts.extend(["-oN", "/dev/stdout", shlex.quote(target)])
command = " ".join(cmd_parts) + " 2>/dev/null"
result = await self._exec(command, timeout=timeout)
result.tool = "nmap"
return result
async def execute_raw(
self,
command: str,
timeout: int = DEFAULT_TIMEOUT,
) -> SandboxResult:
"""
Execute an arbitrary shell command inside the sandbox container.
Used by the Terminal Agent for interactive infrastructure testing.
Returns raw stdout/stderr/exit_code.
Args:
command: Shell command to execute (passed to sh -c)
timeout: Max execution time in seconds
"""
result = await self._exec(f"sh -c {shlex.quote(command)}", timeout=timeout)
result.tool = "raw"
return result
async def run_tool(
self,
tool: str,
args: str,
timeout: int = DEFAULT_TIMEOUT,
) -> SandboxResult:
"""
Run any tool available in the sandbox.
Args:
tool: Tool name (nuclei, naabu, nmap, httpx, etc.)
args: Command-line arguments as string
timeout: Max execution time
"""
# Validate tool is available
allowed_tools = {
"nuclei", "naabu", "nmap", "httpx", "subfinder", "katana",
"dnsx", "ffuf", "gobuster", "dalfox", "nikto", "sqlmap",
"whatweb", "curl", "dig", "whois", "masscan", "dirsearch",
"wfuzz", "arjun", "wafw00f", "waybackurls",
}
if tool not in allowed_tools:
return SandboxResult(
tool=tool, command=f"{tool} {args}", exit_code=-1,
stdout="", stderr="", duration_seconds=0,
error=f"Tool '{tool}' not in allowed list: {sorted(allowed_tools)}",
)
command = f"{tool} {args} 2>&1"
result = await self._exec(command, timeout=timeout)
result.tool = tool
return result
# ------------------------------------------------------------------
# Dynamic tool install / run / cleanup
# ------------------------------------------------------------------
async def install_tool(
self, tool: str, install_cmd: str = ""
) -> Tuple[bool, str]:
"""
Temporarily install a tool in the sandbox container.
Args:
tool: Tool name (must be in KNOWN_INSTALLS or provide install_cmd)
install_cmd: Custom install command (overrides KNOWN_INSTALLS)
Returns:
(success, message) tuple
"""
if not self.is_available:
return False, "Sandbox not available"
cmd = install_cmd or self.KNOWN_INSTALLS.get(tool, "")
if not cmd:
return False, f"No install command for '{tool}'"
logger.info(f"Installing tool '{tool}' in sandbox...")
result = await self._exec(cmd, timeout=120)
success = result.exit_code == 0
if success:
self._temp_installed.add(tool)
logger.info(f"Tool '{tool}' installed successfully")
else:
logger.warning(f"Tool '{tool}' install failed: {result.stderr[:200]}")
msg = result.stdout[:500] if success else result.stderr[:500]
return success, msg
async def run_and_cleanup(
self,
tool: str,
args: str,
cleanup: bool = True,
timeout: int = 180,
) -> SandboxResult:
"""
Install tool if needed, run it, collect output, then cleanup.
This is the primary method for dynamic tool execution:
1. Check if tool exists in sandbox
2. Install if missing (from KNOWN_INSTALLS)
3. Run the tool with given arguments
4. Cleanup the installation if it was temporary
Args:
tool: Tool name
args: Command-line arguments
cleanup: Whether to remove temporarily installed tools
timeout: Max execution time in seconds
Returns:
SandboxResult with stdout, stderr, findings
"""
if not self.is_available:
return SandboxResult(
tool=tool, command=f"{tool} {args}", exit_code=-1,
stdout="", stderr="", duration_seconds=0,
error="Sandbox not available",
)
# Check if tool exists
check = await self._exec(f"which {tool} 2>/dev/null")
if check.exit_code != 0:
# Try to install
ok, msg = await self.install_tool(tool)
if not ok:
return SandboxResult(
tool=tool, command=f"{tool} {args}", exit_code=-1,
stdout="", stderr=msg, duration_seconds=0,
error=f"Install failed: {msg}",
)
# Run tool
result = await self.run_tool(tool, args, timeout=timeout)
# Cleanup if temporarily installed
if cleanup and tool in self._temp_installed:
logger.info(f"Cleaning up temporarily installed tool: {tool}")
await self._exec(
f"pip3 uninstall -y {shlex.quote(tool)} 2>/dev/null; "
f"gem uninstall -x {shlex.quote(tool)} 2>/dev/null; "
f"npm uninstall -g {shlex.quote(tool)} 2>/dev/null; "
f"rm -f $(which {shlex.quote(tool)}) 2>/dev/null",
timeout=30,
)
self._temp_installed.discard(tool)
return result
async def cleanup_temp_tools(self):
"""Remove all temporarily installed tools."""
if not self._temp_installed:
return
for tool in list(self._temp_installed):
logger.info(f"Cleaning up temp tool: {tool}")
await self._exec(
f"pip3 uninstall -y {shlex.quote(tool)} 2>/dev/null; "
f"gem uninstall -x {shlex.quote(tool)} 2>/dev/null; "
f"npm uninstall -g {shlex.quote(tool)} 2>/dev/null; "
f"rm -f $(which {shlex.quote(tool)}) 2>/dev/null",
timeout=30,
)
self._temp_installed.discard(tool)
# ---------------------------------------------------------------------------
# Global singleton
# ---------------------------------------------------------------------------
_manager: Optional[SandboxManager] = None
# Alias for backward compatibility
LegacySandboxManager = SandboxManager
async def get_sandbox(scan_id: Optional[str] = None) -> BaseSandbox:
"""Get a sandbox instance.
Args:
scan_id: If provided, returns a per-scan KaliSandbox from the container pool.
If None, returns the legacy shared SandboxManager.
Backward compatible: all existing callers use get_sandbox() with no args.
Agent passes scan_id for per-scan container isolation.
"""
if scan_id is not None:
try:
from core.container_pool import get_pool
pool = get_pool()
return await pool.get_or_create(scan_id)
except Exception as e:
logger.warning(f"Per-scan sandbox failed ({e}), falling back to shared")
# Fall through to legacy
# Legacy path: shared persistent container
global _manager
if _manager is None:
_manager = SandboxManager()
ok, msg = await _manager.initialize()
if not ok:
logger.warning(f"Sandbox initialization: {msg}")
return _manager

219
core/scheduler.py Normal file
View File

@@ -0,0 +1,219 @@
#!/usr/bin/env python3
"""
Scan Scheduler - Recurring task orchestration for NeuroSploit.
Supports cron expressions and interval-based scheduling for:
- Reconnaissance scans
- Vulnerability validation
- Re-analysis of previous findings
Uses APScheduler with SQLite persistence so jobs survive restarts.
"""
import json
import logging
from datetime import datetime
from typing import Dict, List, Optional
from pathlib import Path
logger = logging.getLogger(__name__)
try:
from apscheduler.schedulers.asyncio import AsyncIOScheduler
from apscheduler.triggers.cron import CronTrigger
from apscheduler.triggers.interval import IntervalTrigger
from apscheduler.jobstores.sqlalchemy import SQLAlchemyJobStore
HAS_APSCHEDULER = True
except ImportError:
HAS_APSCHEDULER = False
logger.warning("APScheduler not installed. Scheduler disabled. Install with: pip install apscheduler>=3.10.0")
class ScanScheduler:
"""Manages recurring scan jobs via APScheduler."""
def __init__(self, config: Dict, database_url: str = "sqlite:///./data/neurosploit_scheduler.db"):
self.config = config
self.scheduler_config = config.get('scheduler', {})
self.enabled = self.scheduler_config.get('enabled', False)
self.jobs_meta: Dict[str, Dict] = {} # job_id -> metadata
self._scan_callback = None
if not HAS_APSCHEDULER:
self.enabled = False
self.scheduler = None
return
jobstores = {
'default': SQLAlchemyJobStore(url=database_url)
}
self.scheduler = AsyncIOScheduler(jobstores=jobstores)
# Load pre-configured jobs from config
for job_config in self.scheduler_config.get('jobs', []):
try:
self.add_job(
job_id=job_config['id'],
target=job_config['target'],
scan_type=job_config.get('scan_type', 'quick'),
cron_expression=job_config.get('cron'),
interval_minutes=job_config.get('interval_minutes'),
agent_role=job_config.get('agent_role'),
llm_profile=job_config.get('llm_profile')
)
except Exception as e:
logger.error(f"Failed to load scheduled job '{job_config.get('id', '?')}': {e}")
def set_scan_callback(self, callback):
"""Set the callback function that executes scans.
The callback signature should be:
async def callback(target: str, scan_type: str,
agent_role: Optional[str], llm_profile: Optional[str]) -> Dict
"""
self._scan_callback = callback
def add_job(self, job_id: str, target: str, scan_type: str = "quick",
cron_expression: Optional[str] = None,
interval_minutes: Optional[int] = None,
agent_role: Optional[str] = None,
llm_profile: Optional[str] = None) -> Dict:
"""Schedule a recurring scan job.
Args:
job_id: Unique identifier for the job
target: Target URL or IP
scan_type: 'quick', 'full', 'recon', or 'analysis'
cron_expression: Cron schedule (e.g., '0 */6 * * *' for every 6 hours)
interval_minutes: Alternative to cron - run every N minutes
agent_role: Optional agent role for AI analysis
llm_profile: Optional LLM profile override
"""
if not self.scheduler:
return {"error": "Scheduler not available (APScheduler not installed)"}
if cron_expression:
trigger = CronTrigger.from_crontab(cron_expression)
schedule_desc = f"cron: {cron_expression}"
elif interval_minutes:
trigger = IntervalTrigger(minutes=interval_minutes)
schedule_desc = f"every {interval_minutes} minutes"
else:
return {"error": "Provide either cron_expression or interval_minutes"}
self.scheduler.add_job(
self._execute_scheduled_scan,
trigger=trigger,
id=job_id,
args=[target, scan_type, agent_role, llm_profile],
replace_existing=True,
name=f"scan_{target}_{scan_type}"
)
meta = {
"id": job_id,
"target": target,
"scan_type": scan_type,
"schedule": schedule_desc,
"agent_role": agent_role,
"llm_profile": llm_profile,
"created_at": datetime.now().isoformat(),
"last_run": None,
"run_count": 0,
"status": "active"
}
self.jobs_meta[job_id] = meta
logger.info(f"Scheduled job '{job_id}': {target} ({scan_type}) - {schedule_desc}")
return meta
def remove_job(self, job_id: str) -> bool:
"""Remove a scheduled job."""
if not self.scheduler:
return False
try:
self.scheduler.remove_job(job_id)
self.jobs_meta.pop(job_id, None)
logger.info(f"Removed scheduled job: {job_id}")
return True
except Exception as e:
logger.error(f"Failed to remove job '{job_id}': {e}")
return False
def pause_job(self, job_id: str) -> bool:
"""Pause a scheduled job."""
if not self.scheduler:
return False
try:
self.scheduler.pause_job(job_id)
if job_id in self.jobs_meta:
self.jobs_meta[job_id]["status"] = "paused"
return True
except Exception as e:
logger.error(f"Failed to pause job '{job_id}': {e}")
return False
def resume_job(self, job_id: str) -> bool:
"""Resume a paused job."""
if not self.scheduler:
return False
try:
self.scheduler.resume_job(job_id)
if job_id in self.jobs_meta:
self.jobs_meta[job_id]["status"] = "active"
return True
except Exception as e:
logger.error(f"Failed to resume job '{job_id}': {e}")
return False
def list_jobs(self) -> List[Dict]:
"""List all scheduled jobs with metadata."""
jobs = []
if self.scheduler:
for job in self.scheduler.get_jobs():
meta = self.jobs_meta.get(job.id, {})
jobs.append({
"id": job.id,
"name": job.name,
"next_run": str(job.next_run_time) if job.next_run_time else None,
"target": meta.get("target", "unknown"),
"scan_type": meta.get("scan_type", "unknown"),
"schedule": meta.get("schedule", "unknown"),
"status": meta.get("status", "active"),
"last_run": meta.get("last_run"),
"run_count": meta.get("run_count", 0)
})
return jobs
async def _execute_scheduled_scan(self, target: str, scan_type: str,
agent_role: Optional[str],
llm_profile: Optional[str]):
"""Execute a scheduled scan. Called by APScheduler."""
job_id = f"scan_{target}_{scan_type}"
logger.info(f"Executing scheduled scan: {target} ({scan_type})")
if job_id in self.jobs_meta:
self.jobs_meta[job_id]["last_run"] = datetime.now().isoformat()
self.jobs_meta[job_id]["run_count"] += 1
if self._scan_callback:
try:
result = await self._scan_callback(target, scan_type, agent_role, llm_profile)
logger.info(f"Scheduled scan completed: {target} ({scan_type})")
return result
except Exception as e:
logger.error(f"Scheduled scan failed for {target}: {e}")
else:
logger.warning("No scan callback registered. Scheduled scan skipped.")
def start(self):
"""Start the scheduler."""
if self.scheduler and self.enabled:
self.scheduler.start()
logger.info(f"Scheduler started with {len(self.list_jobs())} jobs")
def stop(self):
"""Stop the scheduler gracefully."""
if self.scheduler and self.scheduler.running:
self.scheduler.shutdown(wait=False)
logger.info("Scheduler stopped")

110
core/tool_registry.py Normal file
View File

@@ -0,0 +1,110 @@
"""
NeuroSploit v3 - Tool Installation Registry for Kali Containers
Maps tool names to installation commands that work inside kalilinux/kali-rolling.
Tools grouped by method: pre-installed (base image), apt (Kali repos), go install, pip.
"""
from typing import Optional, Dict
class ToolRegistry:
"""Registry of tool installation recipes for Kali sandbox containers."""
# Tools pre-installed in Dockerfile.kali (no install needed)
PRE_INSTALLED = {
# Go tools (pre-compiled in builder stage)
"nuclei", "naabu", "httpx", "subfinder", "katana", "dnsx",
"uncover", "ffuf", "gobuster", "dalfox", "waybackurls",
# APT tools (pre-installed in runtime stage)
"nmap", "nikto", "sqlmap", "masscan", "whatweb",
# System tools
"curl", "wget", "git", "python3", "pip3", "go",
"jq", "dig", "whois", "openssl", "netcat", "bash",
}
# APT packages available in Kali repos (on-demand, not pre-installed)
APT_TOOLS: Dict[str, str] = {
"wpscan": "apt-get update -qq && DEBIAN_FRONTEND=noninteractive apt-get install -y -qq wpscan",
"dirb": "apt-get update -qq && DEBIAN_FRONTEND=noninteractive apt-get install -y -qq dirb",
"hydra": "apt-get update -qq && DEBIAN_FRONTEND=noninteractive apt-get install -y -qq hydra",
"john": "apt-get update -qq && DEBIAN_FRONTEND=noninteractive apt-get install -y -qq john",
"hashcat": "apt-get update -qq && DEBIAN_FRONTEND=noninteractive apt-get install -y -qq hashcat",
"testssl": "apt-get update -qq && DEBIAN_FRONTEND=noninteractive apt-get install -y -qq testssl.sh",
"testssl.sh": "apt-get update -qq && DEBIAN_FRONTEND=noninteractive apt-get install -y -qq testssl.sh",
"sslscan": "apt-get update -qq && DEBIAN_FRONTEND=noninteractive apt-get install -y -qq sslscan",
"enum4linux": "apt-get update -qq && DEBIAN_FRONTEND=noninteractive apt-get install -y -qq enum4linux",
"nbtscan": "apt-get update -qq && DEBIAN_FRONTEND=noninteractive apt-get install -y -qq nbtscan",
"dnsrecon": "apt-get update -qq && DEBIAN_FRONTEND=noninteractive apt-get install -y -qq dnsrecon",
"fierce": "apt-get update -qq && DEBIAN_FRONTEND=noninteractive apt-get install -y -qq fierce",
"amass": "apt-get update -qq && DEBIAN_FRONTEND=noninteractive apt-get install -y -qq amass",
"responder": "apt-get update -qq && DEBIAN_FRONTEND=noninteractive apt-get install -y -qq responder",
"medusa": "apt-get update -qq && DEBIAN_FRONTEND=noninteractive apt-get install -y -qq medusa",
"crackmapexec":"apt-get update -qq && DEBIAN_FRONTEND=noninteractive apt-get install -y -qq crackmapexec",
}
# Go tools installed via `go install` (on-demand, not pre-compiled)
GO_TOOLS: Dict[str, str] = {
"gau": "github.com/lc/gau/v2/cmd/gau@latest",
"gitleaks": "github.com/gitleaks/gitleaks/v8@latest",
"anew": "github.com/tomnomnom/anew@latest",
"httprobe": "github.com/tomnomnom/httprobe@latest",
}
# Python tools via pip
PIP_TOOLS: Dict[str, str] = {
"dirsearch": "pip3 install --no-cache-dir --break-system-packages dirsearch",
"wfuzz": "pip3 install --no-cache-dir --break-system-packages wfuzz",
"arjun": "pip3 install --no-cache-dir --break-system-packages arjun",
"wafw00f": "pip3 install --no-cache-dir --break-system-packages wafw00f",
"sslyze": "pip3 install --no-cache-dir --break-system-packages sslyze",
"commix": "pip3 install --no-cache-dir --break-system-packages commix",
"trufflehog":"pip3 install --no-cache-dir --break-system-packages trufflehog",
"retire": "pip3 install --no-cache-dir --break-system-packages retirejs",
}
def get_install_command(self, tool: str) -> Optional[str]:
"""Get the install command for a tool inside a Kali container.
Returns None if the tool is pre-installed or unknown.
"""
if tool in self.PRE_INSTALLED:
return None # Already available
if tool in self.APT_TOOLS:
return self.APT_TOOLS[tool]
if tool in self.GO_TOOLS:
go_pkg = self.GO_TOOLS[tool]
return (
f"export GOPATH=/root/go && export PATH=$PATH:/root/go/bin && "
f"go install -v {go_pkg} && "
f"cp /root/go/bin/{tool} /usr/local/bin/ 2>/dev/null || true"
)
if tool in self.PIP_TOOLS:
return self.PIP_TOOLS[tool]
return None
def is_known(self, tool: str) -> bool:
"""Check if we have a recipe for this tool."""
return (
tool in self.PRE_INSTALLED
or tool in self.APT_TOOLS
or tool in self.GO_TOOLS
or tool in self.PIP_TOOLS
)
def all_tools(self) -> Dict[str, str]:
"""Return all known tools and their install method."""
result = {}
for t in self.PRE_INSTALLED:
result[t] = "pre-installed"
for t in self.APT_TOOLS:
result[t] = "apt"
for t in self.GO_TOOLS:
result[t] = "go"
for t in self.PIP_TOOLS:
result[t] = "pip"
return result

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff