mirror of
https://github.com/CyberSecurityUP/NeuroSploit.git
synced 2026-02-12 14:02:45 +00:00
Add files via upload
This commit is contained in:
@@ -1,35 +1,7 @@
|
|||||||
{
|
{
|
||||||
"llm": {
|
"llm": {
|
||||||
"default_profile": "ollama_whiterabbit",
|
"default_profile": "gemini_pro_default",
|
||||||
"profiles": {
|
"profiles": {
|
||||||
"ollama_whiterabbit": {
|
|
||||||
"provider": "ollama",
|
|
||||||
"model": "lazarevtill/Llama-3-WhiteRabbitNeo-8B-v2.0:q4_0",
|
|
||||||
"api_key": "",
|
|
||||||
"temperature": 0.8,
|
|
||||||
"max_tokens": 4096,
|
|
||||||
"input_token_limit": 8000,
|
|
||||||
"output_token_limit": 4000,
|
|
||||||
"cache_enabled": false,
|
|
||||||
"search_context_level": "high",
|
|
||||||
"pdf_support_enabled": false,
|
|
||||||
"guardrails_enabled": false,
|
|
||||||
"hallucination_mitigation_strategy": null
|
|
||||||
},
|
|
||||||
"ollama_llama3_default": {
|
|
||||||
"provider": "ollama",
|
|
||||||
"model": "llama3:8b",
|
|
||||||
"api_key": "",
|
|
||||||
"temperature": 0.7,
|
|
||||||
"max_tokens": 4096,
|
|
||||||
"input_token_limit": 8000,
|
|
||||||
"output_token_limit": 4000,
|
|
||||||
"cache_enabled": true,
|
|
||||||
"search_context_level": "medium",
|
|
||||||
"pdf_support_enabled": false,
|
|
||||||
"guardrails_enabled": true,
|
|
||||||
"hallucination_mitigation_strategy": null
|
|
||||||
},
|
|
||||||
"gemini_pro_default": {
|
"gemini_pro_default": {
|
||||||
"provider": "gemini",
|
"provider": "gemini",
|
||||||
"model": "gemini-pro",
|
"model": "gemini-pro",
|
||||||
@@ -43,52 +15,25 @@
|
|||||||
"pdf_support_enabled": true,
|
"pdf_support_enabled": true,
|
||||||
"guardrails_enabled": true,
|
"guardrails_enabled": true,
|
||||||
"hallucination_mitigation_strategy": "consistency_check"
|
"hallucination_mitigation_strategy": "consistency_check"
|
||||||
},
|
|
||||||
"claude_opus_default": {
|
|
||||||
"provider": "claude",
|
|
||||||
"model": "claude-sonnet-4-20250514",
|
|
||||||
"api_key": "${ANTHROPIC_API_KEY}",
|
|
||||||
"temperature": 0.8,
|
|
||||||
"max_tokens": 8192,
|
|
||||||
"input_token_limit": 200000,
|
|
||||||
"output_token_limit": 8192,
|
|
||||||
"cache_enabled": false,
|
|
||||||
"search_context_level": "high",
|
|
||||||
"pdf_support_enabled": false,
|
|
||||||
"guardrails_enabled": false,
|
|
||||||
"hallucination_mitigation_strategy": null
|
|
||||||
},
|
|
||||||
"gpt_4o_default": {
|
|
||||||
"provider": "gpt",
|
|
||||||
"model": "gpt-4o",
|
|
||||||
"api_key": "${OPENAI_API_KEY}",
|
|
||||||
"temperature": 0.7,
|
|
||||||
"max_tokens": 4096,
|
|
||||||
"input_token_limit": 128000,
|
|
||||||
"output_token_limit": 4096,
|
|
||||||
"cache_enabled": true,
|
|
||||||
"search_context_level": "high",
|
|
||||||
"pdf_support_enabled": true,
|
|
||||||
"guardrails_enabled": true,
|
|
||||||
"hallucination_mitigation_strategy": "consistency_check"
|
|
||||||
},
|
|
||||||
"lmstudio_default": {
|
|
||||||
"provider": "lmstudio",
|
|
||||||
"model": "local-model",
|
|
||||||
"api_key": "",
|
|
||||||
"temperature": 0.7,
|
|
||||||
"max_tokens": 4096,
|
|
||||||
"input_token_limit": 32000,
|
|
||||||
"output_token_limit": 4096,
|
|
||||||
"cache_enabled": false,
|
|
||||||
"search_context_level": "medium",
|
|
||||||
"pdf_support_enabled": false,
|
|
||||||
"guardrails_enabled": true,
|
|
||||||
"hallucination_mitigation_strategy": null
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"agent_roles": {
|
"agent_roles": {
|
||||||
|
"pentest_generalist": {
|
||||||
|
"enabled": true,
|
||||||
|
"tools_allowed": [
|
||||||
|
"nmap",
|
||||||
|
"metasploit",
|
||||||
|
"burpsuite",
|
||||||
|
"sqlmap",
|
||||||
|
"hydra"
|
||||||
|
],
|
||||||
|
"description": "Performs comprehensive penetration tests across various domains.",
|
||||||
|
"methodology": ["OWASP-WSTG", "PTES", "OWASP-Top10-2021"],
|
||||||
|
"default_prompt": "auto_pentest",
|
||||||
|
"vuln_coverage": 100,
|
||||||
|
"ai_prompts": true
|
||||||
|
},
|
||||||
"bug_bounty_hunter": {
|
"bug_bounty_hunter": {
|
||||||
"enabled": true,
|
"enabled": true,
|
||||||
"tools_allowed": [
|
"tools_allowed": [
|
||||||
@@ -97,67 +42,11 @@
|
|||||||
"burpsuite",
|
"burpsuite",
|
||||||
"sqlmap"
|
"sqlmap"
|
||||||
],
|
],
|
||||||
"description": "Focuses on web application vulnerabilities, leveraging recon and exploitation tools."
|
"description": "Focuses on web application vulnerabilities with 100 vuln types.",
|
||||||
},
|
"methodology": ["OWASP-WSTG", "OWASP-Top10-2021"],
|
||||||
"blue_team_agent": {
|
"default_prompt": "auto_pentest",
|
||||||
"enabled": true,
|
"vuln_coverage": 100,
|
||||||
"tools_allowed": [],
|
"ai_prompts": true
|
||||||
"description": "Analyzes logs and telemetry for threats, provides defensive strategies."
|
|
||||||
},
|
|
||||||
"exploit_expert": {
|
|
||||||
"enabled": true,
|
|
||||||
"tools_allowed": [
|
|
||||||
"metasploit",
|
|
||||||
"nmap"
|
|
||||||
],
|
|
||||||
"description": "Devises exploitation strategies and payloads for identified vulnerabilities."
|
|
||||||
},
|
|
||||||
"red_team_agent": {
|
|
||||||
"enabled": true,
|
|
||||||
"tools_allowed": [
|
|
||||||
"nmap",
|
|
||||||
"metasploit",
|
|
||||||
"hydra"
|
|
||||||
],
|
|
||||||
"description": "Plans and executes simulated attacks to test an organization's defenses."
|
|
||||||
},
|
|
||||||
"replay_attack_specialist": {
|
|
||||||
"enabled": true,
|
|
||||||
"tools_allowed": [
|
|
||||||
"burpsuite"
|
|
||||||
],
|
|
||||||
"description": "Identifies and leverages replay attack vectors in network traffic or authentication."
|
|
||||||
},
|
|
||||||
"pentest_generalist": {
|
|
||||||
"enabled": true,
|
|
||||||
"tools_allowed": [
|
|
||||||
"nmap",
|
|
||||||
"subfinder",
|
|
||||||
"nuclei",
|
|
||||||
"metasploit",
|
|
||||||
"burpsuite",
|
|
||||||
"sqlmap",
|
|
||||||
"hydra"
|
|
||||||
],
|
|
||||||
"description": "Performs comprehensive penetration tests across various domains."
|
|
||||||
},
|
|
||||||
"owasp_expert": {
|
|
||||||
"enabled": true,
|
|
||||||
"tools_allowed": [
|
|
||||||
"burpsuite",
|
|
||||||
"sqlmap"
|
|
||||||
],
|
|
||||||
"description": "Specializes in assessing web applications against OWASP Top 10 vulnerabilities."
|
|
||||||
},
|
|
||||||
"cwe_expert": {
|
|
||||||
"enabled": true,
|
|
||||||
"tools_allowed": [],
|
|
||||||
"description": "Analyzes code and reports for weaknesses based on MITRE CWE Top 25."
|
|
||||||
},
|
|
||||||
"malware_analyst": {
|
|
||||||
"enabled": true,
|
|
||||||
"tools_allowed": [],
|
|
||||||
"description": "Examines malware samples to understand functionality and identify IOCs."
|
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"methodologies": {
|
"methodologies": {
|
||||||
@@ -171,20 +60,51 @@
|
|||||||
"nmap": "/usr/bin/nmap",
|
"nmap": "/usr/bin/nmap",
|
||||||
"metasploit": "/usr/bin/msfconsole",
|
"metasploit": "/usr/bin/msfconsole",
|
||||||
"burpsuite": "/usr/bin/burpsuite",
|
"burpsuite": "/usr/bin/burpsuite",
|
||||||
"sqlmap": "/usr/local/bin/sqlmap",
|
"sqlmap": "/usr/bin/sqlmap",
|
||||||
"hydra": "/usr/bin/hydra",
|
"hydra": "/usr/bin/hydra"
|
||||||
"nuclei": "/usr/local/bin/nuclei",
|
},
|
||||||
"nikto": "/usr/bin/nikto",
|
"mcp_servers": {
|
||||||
"gobuster": "/usr/bin/gobuster",
|
"neurosploit_tools": {
|
||||||
"ffuf": "/usr/bin/ffuf",
|
"transport": "stdio",
|
||||||
"subfinder": "/opt/homebrew/bin/subfinder",
|
"command": "python3",
|
||||||
"httpx": "/usr/local/bin/httpx",
|
"args": ["-m", "core.mcp_server"],
|
||||||
"whatweb": "/usr/bin/whatweb",
|
"description": "NeuroSploit pentest tools: screenshots, payload delivery, DNS, port scan, tech detect, subdomain enum, findings, AI prompts, Nuclei scanner, Naabu port scanner, sandbox execution"
|
||||||
"curl": "/usr/bin/curl",
|
}
|
||||||
"wpscan": "/usr/bin/wpscan",
|
},
|
||||||
"dirsearch": "/usr/local/bin/dirsearch",
|
"sandbox": {
|
||||||
"wafw00f": "/usr/local/bin/wafw00f",
|
"enabled": false,
|
||||||
"jq": "/usr/bin/jq"
|
"mode": "per_scan",
|
||||||
|
"image": "neurosploit-sandbox:latest",
|
||||||
|
"container_name": "neurosploit-sandbox",
|
||||||
|
"auto_start": false,
|
||||||
|
"kali": {
|
||||||
|
"enabled": true,
|
||||||
|
"image": "neurosploit-kali:latest",
|
||||||
|
"max_concurrent": 5,
|
||||||
|
"container_ttl_minutes": 60,
|
||||||
|
"auto_cleanup_orphans": true
|
||||||
|
},
|
||||||
|
"resources": {
|
||||||
|
"memory_limit": "2g",
|
||||||
|
"cpu_limit": 2.0
|
||||||
|
},
|
||||||
|
"tools": [
|
||||||
|
"nuclei", "naabu", "nmap", "httpx", "subfinder", "katana",
|
||||||
|
"dnsx", "ffuf", "gobuster", "dalfox", "nikto", "sqlmap",
|
||||||
|
"whatweb", "curl", "dig", "whois", "masscan", "dirsearch",
|
||||||
|
"wfuzz", "arjun", "wafw00f", "waybackurls"
|
||||||
|
],
|
||||||
|
"nuclei": {
|
||||||
|
"rate_limit": 150,
|
||||||
|
"timeout": 600,
|
||||||
|
"severity_filter": "critical,high,medium",
|
||||||
|
"auto_update_templates": true
|
||||||
|
},
|
||||||
|
"naabu": {
|
||||||
|
"rate": 1000,
|
||||||
|
"top_ports": 1000,
|
||||||
|
"timeout": 300
|
||||||
|
}
|
||||||
},
|
},
|
||||||
"output": {
|
"output": {
|
||||||
"format": "json",
|
"format": "json",
|
||||||
|
|||||||
500
core/browser_validator.py
Normal file
500
core/browser_validator.py
Normal file
@@ -0,0 +1,500 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
Browser Validator - Playwright-based security finding validation.
|
||||||
|
|
||||||
|
Provides browser-based validation for security findings:
|
||||||
|
- Navigate to target URLs with payloads
|
||||||
|
- Detect security triggers (XSS dialogs, error patterns, etc.)
|
||||||
|
- Capture screenshots at each validation step
|
||||||
|
- Store evidence in structured per-finding directories
|
||||||
|
|
||||||
|
Screenshots are stored at: reports/screenshots/{finding_id}/
|
||||||
|
"""
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import base64
|
||||||
|
import logging
|
||||||
|
from datetime import datetime
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Dict, List, Optional
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
try:
|
||||||
|
from playwright.async_api import async_playwright, Page, Browser, BrowserContext
|
||||||
|
HAS_PLAYWRIGHT = True
|
||||||
|
except ImportError:
|
||||||
|
HAS_PLAYWRIGHT = False
|
||||||
|
logger.debug("Playwright not installed. Browser validation disabled.")
|
||||||
|
|
||||||
|
|
||||||
|
# Known security trigger patterns in page content
|
||||||
|
SECURITY_TRIGGERS = {
|
||||||
|
'xss': ['<script>alert(', 'onerror=', 'onload=', 'javascript:'],
|
||||||
|
'sqli': ['SQL syntax', 'mysql_fetch', 'pg_query', 'ORA-', 'sqlite3.OperationalError',
|
||||||
|
'SQLSTATE', 'syntax error at or near', 'unclosed quotation mark'],
|
||||||
|
'lfi': ['root:x:0', '/etc/passwd', '[boot loader]', 'Windows\\system.ini'],
|
||||||
|
'rce': ['uid=', 'gid=', 'groups=', 'total ', 'drwx'],
|
||||||
|
'error_disclosure': ['Stack Trace', 'Traceback (most recent call last)',
|
||||||
|
'Exception in thread', 'Fatal error', 'Parse error'],
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class BrowserValidator:
|
||||||
|
"""Playwright-based browser validation for security findings."""
|
||||||
|
|
||||||
|
def __init__(self, screenshots_dir: str = "reports/screenshots"):
|
||||||
|
self.screenshots_dir = Path(screenshots_dir)
|
||||||
|
self.screenshots_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
self.browser: Optional['Browser'] = None
|
||||||
|
self._playwright = None
|
||||||
|
|
||||||
|
async def start(self, headless: bool = True):
|
||||||
|
"""Launch browser instance."""
|
||||||
|
if not HAS_PLAYWRIGHT:
|
||||||
|
raise RuntimeError(
|
||||||
|
"Playwright not installed. Install with: pip install playwright && python -m playwright install chromium"
|
||||||
|
)
|
||||||
|
self._playwright = await async_playwright().start()
|
||||||
|
self.browser = await self._playwright.chromium.launch(headless=headless)
|
||||||
|
logger.info(f"Browser started (headless={headless})")
|
||||||
|
|
||||||
|
async def stop(self):
|
||||||
|
"""Close browser and clean up."""
|
||||||
|
if self.browser:
|
||||||
|
await self.browser.close()
|
||||||
|
self.browser = None
|
||||||
|
if self._playwright:
|
||||||
|
await self._playwright.stop()
|
||||||
|
self._playwright = None
|
||||||
|
logger.info("Browser stopped")
|
||||||
|
|
||||||
|
async def validate_finding(self, finding_id: str, url: str,
|
||||||
|
payload: Optional[str] = None,
|
||||||
|
method: str = "GET",
|
||||||
|
interaction_steps: Optional[List[Dict]] = None,
|
||||||
|
timeout: int = 30000) -> Dict:
|
||||||
|
"""Validate a security finding in a real browser.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
finding_id: Unique identifier for the finding
|
||||||
|
url: Target URL (may include payload in query params)
|
||||||
|
payload: Optional payload description for logging
|
||||||
|
method: HTTP method (currently GET-based navigation)
|
||||||
|
interaction_steps: Optional list of browser interaction steps
|
||||||
|
timeout: Navigation timeout in milliseconds
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Dict with validation result, screenshots, evidence
|
||||||
|
"""
|
||||||
|
if not self.browser:
|
||||||
|
return {"error": "Browser not started. Call start() first."}
|
||||||
|
|
||||||
|
finding_dir = self.screenshots_dir / finding_id
|
||||||
|
finding_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
validation = {
|
||||||
|
"finding_id": finding_id,
|
||||||
|
"url": url,
|
||||||
|
"payload": payload,
|
||||||
|
"timestamp": datetime.now().isoformat(),
|
||||||
|
"validated": False,
|
||||||
|
"screenshots": [],
|
||||||
|
"console_logs": [],
|
||||||
|
"dialog_detected": False,
|
||||||
|
"dialog_messages": [],
|
||||||
|
"triggers_found": [],
|
||||||
|
"evidence": "",
|
||||||
|
"page_title": "",
|
||||||
|
"status_code": None,
|
||||||
|
"error": None
|
||||||
|
}
|
||||||
|
|
||||||
|
context = await self.browser.new_context(
|
||||||
|
ignore_https_errors=True,
|
||||||
|
user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
|
||||||
|
"(KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
|
||||||
|
)
|
||||||
|
page = await context.new_page()
|
||||||
|
|
||||||
|
# Capture console messages
|
||||||
|
console_msgs = []
|
||||||
|
page.on("console", lambda msg: console_msgs.append({
|
||||||
|
"type": msg.type, "text": msg.text
|
||||||
|
}))
|
||||||
|
|
||||||
|
# Capture JavaScript dialogs (XSS alert/prompt/confirm detection)
|
||||||
|
dialog_messages = []
|
||||||
|
|
||||||
|
async def handle_dialog(dialog):
|
||||||
|
dialog_messages.append({
|
||||||
|
"type": dialog.type,
|
||||||
|
"message": dialog.message
|
||||||
|
})
|
||||||
|
await dialog.dismiss()
|
||||||
|
|
||||||
|
page.on("dialog", handle_dialog)
|
||||||
|
|
||||||
|
# Track response status
|
||||||
|
response_status = [None]
|
||||||
|
|
||||||
|
def on_response(response):
|
||||||
|
if response.url == url or response.url.rstrip('/') == url.rstrip('/'):
|
||||||
|
response_status[0] = response.status
|
||||||
|
|
||||||
|
page.on("response", on_response)
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Navigate to the URL
|
||||||
|
response = await page.goto(url, wait_until="networkidle", timeout=timeout)
|
||||||
|
if response:
|
||||||
|
validation["status_code"] = response.status
|
||||||
|
|
||||||
|
validation["page_title"] = await page.title()
|
||||||
|
|
||||||
|
# Take initial screenshot
|
||||||
|
ss_path = finding_dir / "01_initial.png"
|
||||||
|
await page.screenshot(path=str(ss_path), full_page=True)
|
||||||
|
validation["screenshots"].append(str(ss_path))
|
||||||
|
|
||||||
|
# Execute interaction steps if provided
|
||||||
|
if interaction_steps:
|
||||||
|
for i, step in enumerate(interaction_steps):
|
||||||
|
step_name = step.get('name', f'step_{i+2}')
|
||||||
|
try:
|
||||||
|
await self._execute_step(page, step)
|
||||||
|
await page.wait_for_timeout(500) # Brief pause
|
||||||
|
ss_path = finding_dir / f"{i+2:02d}_{step_name}.png"
|
||||||
|
await page.screenshot(path=str(ss_path))
|
||||||
|
validation["screenshots"].append(str(ss_path))
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Interaction step '{step_name}' failed: {e}")
|
||||||
|
|
||||||
|
# Check for dialog detection (XSS)
|
||||||
|
if dialog_messages:
|
||||||
|
validation["validated"] = True
|
||||||
|
validation["dialog_detected"] = True
|
||||||
|
validation["dialog_messages"] = dialog_messages
|
||||||
|
validation["evidence"] = f"JavaScript dialog triggered: {dialog_messages[0]['message']}"
|
||||||
|
|
||||||
|
ss_path = finding_dir / "xss_dialog_detected.png"
|
||||||
|
await page.screenshot(path=str(ss_path))
|
||||||
|
validation["screenshots"].append(str(ss_path))
|
||||||
|
|
||||||
|
# Check for security triggers in page content
|
||||||
|
content = await page.content()
|
||||||
|
for trigger_type, patterns in SECURITY_TRIGGERS.items():
|
||||||
|
for pattern in patterns:
|
||||||
|
if pattern.lower() in content.lower():
|
||||||
|
validation["triggers_found"].append({
|
||||||
|
"type": trigger_type,
|
||||||
|
"pattern": pattern
|
||||||
|
})
|
||||||
|
|
||||||
|
if validation["triggers_found"] and not validation["validated"]:
|
||||||
|
validation["validated"] = True
|
||||||
|
first_trigger = validation["triggers_found"][0]
|
||||||
|
validation["evidence"] = (
|
||||||
|
f"Security trigger detected: {first_trigger['type']} "
|
||||||
|
f"(pattern: {first_trigger['pattern']})"
|
||||||
|
)
|
||||||
|
|
||||||
|
ss_path = finding_dir / "trigger_detected.png"
|
||||||
|
await page.screenshot(path=str(ss_path))
|
||||||
|
validation["screenshots"].append(str(ss_path))
|
||||||
|
|
||||||
|
# Check console for errors that might indicate vulnerabilities
|
||||||
|
error_msgs = [m for m in console_msgs if m["type"] in ("error", "warning")]
|
||||||
|
if error_msgs:
|
||||||
|
validation["console_logs"] = console_msgs
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
validation["error"] = str(e)
|
||||||
|
logger.error(f"Browser validation error for {finding_id}: {e}")
|
||||||
|
|
||||||
|
try:
|
||||||
|
ss_path = finding_dir / "error.png"
|
||||||
|
await page.screenshot(path=str(ss_path))
|
||||||
|
validation["screenshots"].append(str(ss_path))
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
finally:
|
||||||
|
await context.close()
|
||||||
|
|
||||||
|
return validation
|
||||||
|
|
||||||
|
async def verify_stored_xss(
|
||||||
|
self,
|
||||||
|
finding_id: str,
|
||||||
|
form_url: str,
|
||||||
|
form_data: Dict[str, str],
|
||||||
|
display_url: str,
|
||||||
|
submit_selector: str = "button[type=submit], input[type=submit], button:not([type])",
|
||||||
|
timeout: int = 30000,
|
||||||
|
) -> Dict:
|
||||||
|
"""Two-phase stored XSS verification using browser.
|
||||||
|
|
||||||
|
Phase 1: Navigate to form page, fill fields with payload, submit.
|
||||||
|
Phase 2: Navigate to display page, check for dialog (alert/confirm/prompt).
|
||||||
|
|
||||||
|
Args:
|
||||||
|
finding_id: Unique ID for this verification attempt
|
||||||
|
form_url: URL containing the form to submit
|
||||||
|
form_data: Dict mapping CSS selectors to values (payload in relevant fields)
|
||||||
|
display_url: URL where stored content is displayed
|
||||||
|
submit_selector: CSS selector(s) for submit button (comma-separated)
|
||||||
|
timeout: Navigation timeout in ms
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Dict with verification results, dialog detection, screenshots
|
||||||
|
"""
|
||||||
|
if not self.browser:
|
||||||
|
return {"error": "Browser not started. Call start() first."}
|
||||||
|
|
||||||
|
finding_dir = self.screenshots_dir / finding_id
|
||||||
|
finding_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
result = {
|
||||||
|
"finding_id": finding_id,
|
||||||
|
"form_url": form_url,
|
||||||
|
"display_url": display_url,
|
||||||
|
"timestamp": datetime.now().isoformat(),
|
||||||
|
"phase1_success": False,
|
||||||
|
"phase2_success": False,
|
||||||
|
"xss_confirmed": False,
|
||||||
|
"dialog_detected": False,
|
||||||
|
"dialog_messages": [],
|
||||||
|
"screenshots": [],
|
||||||
|
"evidence": "",
|
||||||
|
"error": None,
|
||||||
|
}
|
||||||
|
|
||||||
|
context = await self.browser.new_context(
|
||||||
|
ignore_https_errors=True,
|
||||||
|
user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
|
||||||
|
"(KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
|
||||||
|
)
|
||||||
|
page = await context.new_page()
|
||||||
|
|
||||||
|
dialog_messages = []
|
||||||
|
|
||||||
|
async def handle_dialog(dialog):
|
||||||
|
dialog_messages.append({
|
||||||
|
"type": dialog.type,
|
||||||
|
"message": dialog.message,
|
||||||
|
"phase": "phase2" if result["phase1_success"] else "phase1"
|
||||||
|
})
|
||||||
|
await dialog.dismiss()
|
||||||
|
|
||||||
|
page.on("dialog", handle_dialog)
|
||||||
|
|
||||||
|
try:
|
||||||
|
# === PHASE 1: Navigate to form and submit payload ===
|
||||||
|
await page.goto(form_url, wait_until="networkidle", timeout=timeout)
|
||||||
|
|
||||||
|
ss_path = finding_dir / "01_form_page.png"
|
||||||
|
await page.screenshot(path=str(ss_path), full_page=True)
|
||||||
|
result["screenshots"].append(str(ss_path))
|
||||||
|
|
||||||
|
# Fill form fields
|
||||||
|
for selector, value in form_data.items():
|
||||||
|
try:
|
||||||
|
await page.fill(selector, value)
|
||||||
|
except Exception:
|
||||||
|
try:
|
||||||
|
await page.type(selector, value)
|
||||||
|
except Exception as fill_err:
|
||||||
|
logger.warning(f"Could not fill {selector}: {fill_err}")
|
||||||
|
|
||||||
|
ss_path = finding_dir / "02_form_filled.png"
|
||||||
|
await page.screenshot(path=str(ss_path))
|
||||||
|
result["screenshots"].append(str(ss_path))
|
||||||
|
|
||||||
|
# Submit
|
||||||
|
submitted = False
|
||||||
|
for sel in submit_selector.split(","):
|
||||||
|
sel = sel.strip()
|
||||||
|
try:
|
||||||
|
btn = await page.query_selector(sel)
|
||||||
|
if btn:
|
||||||
|
await btn.click()
|
||||||
|
submitted = True
|
||||||
|
break
|
||||||
|
except Exception:
|
||||||
|
continue
|
||||||
|
|
||||||
|
if not submitted and form_data:
|
||||||
|
# Fallback: press Enter on last filled field
|
||||||
|
last_sel = list(form_data.keys())[-1]
|
||||||
|
try:
|
||||||
|
await page.press(last_sel, "Enter")
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
try:
|
||||||
|
await page.wait_for_load_state("networkidle", timeout=10000)
|
||||||
|
except Exception:
|
||||||
|
await page.wait_for_timeout(3000)
|
||||||
|
|
||||||
|
ss_path = finding_dir / "03_after_submit.png"
|
||||||
|
await page.screenshot(path=str(ss_path), full_page=True)
|
||||||
|
result["screenshots"].append(str(ss_path))
|
||||||
|
result["phase1_success"] = True
|
||||||
|
|
||||||
|
# === PHASE 2: Navigate to display page ===
|
||||||
|
await page.goto(display_url, wait_until="networkidle", timeout=timeout)
|
||||||
|
await page.wait_for_timeout(1000)
|
||||||
|
|
||||||
|
ss_path = finding_dir / "04_display_page.png"
|
||||||
|
await page.screenshot(path=str(ss_path), full_page=True)
|
||||||
|
result["screenshots"].append(str(ss_path))
|
||||||
|
|
||||||
|
# Check for dialogs triggered on display page
|
||||||
|
if dialog_messages:
|
||||||
|
phase2_dialogs = [d for d in dialog_messages if d.get("phase") == "phase2"]
|
||||||
|
if phase2_dialogs:
|
||||||
|
result["xss_confirmed"] = True
|
||||||
|
result["dialog_detected"] = True
|
||||||
|
result["dialog_messages"] = dialog_messages
|
||||||
|
result["evidence"] = (
|
||||||
|
f"Stored XSS CONFIRMED: JavaScript dialog triggered on display page. "
|
||||||
|
f"Dialog: {phase2_dialogs[0]['type']}('{phase2_dialogs[0]['message']}')"
|
||||||
|
)
|
||||||
|
result["phase2_success"] = True
|
||||||
|
|
||||||
|
ss_path = finding_dir / "05_xss_confirmed.png"
|
||||||
|
await page.screenshot(path=str(ss_path))
|
||||||
|
result["screenshots"].append(str(ss_path))
|
||||||
|
else:
|
||||||
|
result["evidence"] = (
|
||||||
|
"Dialog triggered during form submission (phase1), not on display page."
|
||||||
|
)
|
||||||
|
|
||||||
|
# Content-based fallback if no dialog
|
||||||
|
if not result["xss_confirmed"]:
|
||||||
|
content = await page.content()
|
||||||
|
for _, payload_val in form_data.items():
|
||||||
|
if payload_val in content:
|
||||||
|
payload_lower = payload_val.lower()
|
||||||
|
for tag in ["<script", "onerror=", "onload=", "<svg", "<img",
|
||||||
|
"onfocus=", "onclick=", "ontoggle"]:
|
||||||
|
if tag in payload_lower:
|
||||||
|
result["phase2_success"] = True
|
||||||
|
result["evidence"] = (
|
||||||
|
f"Stored payload with '{tag}' found unescaped on display page. "
|
||||||
|
f"Dialog may be blocked by CSP."
|
||||||
|
)
|
||||||
|
break
|
||||||
|
break
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
result["error"] = str(e)
|
||||||
|
logger.error(f"Stored XSS verification error: {e}")
|
||||||
|
try:
|
||||||
|
ss_path = finding_dir / "error.png"
|
||||||
|
await page.screenshot(path=str(ss_path))
|
||||||
|
result["screenshots"].append(str(ss_path))
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
finally:
|
||||||
|
await context.close()
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
async def _execute_step(self, page: 'Page', step: Dict):
|
||||||
|
"""Execute a single browser interaction step."""
|
||||||
|
action = step.get("action", "")
|
||||||
|
|
||||||
|
if action == "click":
|
||||||
|
await page.click(step["selector"])
|
||||||
|
elif action == "fill":
|
||||||
|
await page.fill(step["selector"], step["value"])
|
||||||
|
elif action == "type":
|
||||||
|
await page.type(step["selector"], step["value"])
|
||||||
|
elif action == "submit":
|
||||||
|
selector = step.get("selector", "button[type=submit]")
|
||||||
|
await page.click(selector)
|
||||||
|
elif action == "wait":
|
||||||
|
await page.wait_for_timeout(step.get("ms", 2000))
|
||||||
|
elif action == "navigate":
|
||||||
|
await page.goto(step["url"], wait_until="networkidle")
|
||||||
|
elif action == "select":
|
||||||
|
await page.select_option(step["selector"], step["value"])
|
||||||
|
elif action == "check":
|
||||||
|
await page.check(step["selector"])
|
||||||
|
elif action == "press":
|
||||||
|
await page.press(step.get("selector", "body"), step["key"])
|
||||||
|
else:
|
||||||
|
logger.warning(f"Unknown interaction action: {action}")
|
||||||
|
|
||||||
|
async def batch_validate(self, findings: List[Dict],
|
||||||
|
headless: bool = True) -> List[Dict]:
|
||||||
|
"""Validate multiple findings in sequence.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
findings: List of dicts with 'finding_id', 'url', and optional 'payload'
|
||||||
|
headless: Run browser in headless mode
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of validation results
|
||||||
|
"""
|
||||||
|
results = []
|
||||||
|
await self.start(headless=headless)
|
||||||
|
try:
|
||||||
|
for finding in findings:
|
||||||
|
result = await self.validate_finding(
|
||||||
|
finding_id=finding['finding_id'],
|
||||||
|
url=finding['url'],
|
||||||
|
payload=finding.get('payload'),
|
||||||
|
interaction_steps=finding.get('interaction_steps')
|
||||||
|
)
|
||||||
|
results.append(result)
|
||||||
|
finally:
|
||||||
|
await self.stop()
|
||||||
|
return results
|
||||||
|
|
||||||
|
|
||||||
|
def validate_finding_sync(finding_id: str, url: str,
|
||||||
|
payload: str = None,
|
||||||
|
screenshots_dir: str = "reports/screenshots",
|
||||||
|
headless: bool = True) -> Dict:
|
||||||
|
"""Synchronous wrapper for browser validation.
|
||||||
|
|
||||||
|
For use in synchronous code paths (e.g., BaseAgent).
|
||||||
|
"""
|
||||||
|
if not HAS_PLAYWRIGHT:
|
||||||
|
return {
|
||||||
|
"finding_id": finding_id,
|
||||||
|
"skipped": True,
|
||||||
|
"reason": "Playwright not installed"
|
||||||
|
}
|
||||||
|
|
||||||
|
async def _run():
|
||||||
|
validator = BrowserValidator(screenshots_dir=screenshots_dir)
|
||||||
|
await validator.start(headless=headless)
|
||||||
|
try:
|
||||||
|
return await validator.validate_finding(finding_id, url, payload)
|
||||||
|
finally:
|
||||||
|
await validator.stop()
|
||||||
|
|
||||||
|
try:
|
||||||
|
return asyncio.run(_run())
|
||||||
|
except RuntimeError:
|
||||||
|
# Already in an async context - use nest_asyncio or skip
|
||||||
|
logger.warning("Cannot run sync validation inside async context")
|
||||||
|
return {
|
||||||
|
"finding_id": finding_id,
|
||||||
|
"skipped": True,
|
||||||
|
"reason": "Async context conflict"
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def embed_screenshot(filepath: str) -> str:
|
||||||
|
"""Convert a screenshot file to a base64 data URI for HTML embedding."""
|
||||||
|
path = Path(filepath)
|
||||||
|
if not path.exists():
|
||||||
|
return ""
|
||||||
|
with open(path, 'rb') as f:
|
||||||
|
data = base64.b64encode(f.read()).decode('ascii')
|
||||||
|
return f"data:image/png;base64,{data}"
|
||||||
206
core/container_pool.py
Normal file
206
core/container_pool.py
Normal file
@@ -0,0 +1,206 @@
|
|||||||
|
"""
|
||||||
|
NeuroSploit v3 - Container Pool
|
||||||
|
|
||||||
|
Global coordinator for per-scan Kali Linux containers.
|
||||||
|
Tracks all running sandbox containers, enforces max concurrent limits,
|
||||||
|
handles lifecycle management and orphan cleanup.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
import threading
|
||||||
|
from datetime import datetime, timedelta
|
||||||
|
from typing import Dict, Optional
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
try:
|
||||||
|
import docker
|
||||||
|
from docker.errors import NotFound
|
||||||
|
HAS_DOCKER = True
|
||||||
|
except ImportError:
|
||||||
|
HAS_DOCKER = False
|
||||||
|
|
||||||
|
from core.kali_sandbox import KaliSandbox
|
||||||
|
|
||||||
|
|
||||||
|
class ContainerPool:
|
||||||
|
"""Global pool managing per-scan KaliSandbox instances.
|
||||||
|
|
||||||
|
Thread-safe. One pool per process. Enforces resource limits.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
image: str = "neurosploit-kali:latest",
|
||||||
|
max_concurrent: int = 5,
|
||||||
|
memory_limit: str = "2g",
|
||||||
|
cpu_limit: float = 2.0,
|
||||||
|
container_ttl_minutes: int = 60,
|
||||||
|
):
|
||||||
|
self.image = image
|
||||||
|
self.max_concurrent = max_concurrent
|
||||||
|
self.memory_limit = memory_limit
|
||||||
|
self.cpu_limit = cpu_limit
|
||||||
|
self.container_ttl = timedelta(minutes=container_ttl_minutes)
|
||||||
|
|
||||||
|
self._sandboxes: Dict[str, KaliSandbox] = {}
|
||||||
|
self._lock = asyncio.Lock()
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def from_config(cls) -> "ContainerPool":
|
||||||
|
"""Create pool from config/config.json sandbox section."""
|
||||||
|
try:
|
||||||
|
with open("config/config.json") as f:
|
||||||
|
cfg = json.load(f)
|
||||||
|
sandbox_cfg = cfg.get("sandbox", {})
|
||||||
|
kali_cfg = sandbox_cfg.get("kali", {})
|
||||||
|
resources = sandbox_cfg.get("resources", {})
|
||||||
|
|
||||||
|
return cls(
|
||||||
|
image=kali_cfg.get("image", "neurosploit-kali:latest"),
|
||||||
|
max_concurrent=kali_cfg.get("max_concurrent", 5),
|
||||||
|
memory_limit=resources.get("memory_limit", "2g"),
|
||||||
|
cpu_limit=resources.get("cpu_limit", 2.0),
|
||||||
|
container_ttl_minutes=kali_cfg.get("container_ttl_minutes", 60),
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Could not load pool config, using defaults: {e}")
|
||||||
|
return cls()
|
||||||
|
|
||||||
|
async def get_or_create(self, scan_id: str) -> KaliSandbox:
|
||||||
|
"""Get existing sandbox for scan_id, or create a new one.
|
||||||
|
|
||||||
|
Raises RuntimeError if max_concurrent limit reached.
|
||||||
|
"""
|
||||||
|
async with self._lock:
|
||||||
|
# Return existing
|
||||||
|
if scan_id in self._sandboxes:
|
||||||
|
sb = self._sandboxes[scan_id]
|
||||||
|
if sb.is_available:
|
||||||
|
return sb
|
||||||
|
else:
|
||||||
|
del self._sandboxes[scan_id]
|
||||||
|
|
||||||
|
# Check limit
|
||||||
|
active = sum(1 for sb in self._sandboxes.values() if sb.is_available)
|
||||||
|
if active >= self.max_concurrent:
|
||||||
|
raise RuntimeError(
|
||||||
|
f"Max concurrent containers ({self.max_concurrent}) reached. "
|
||||||
|
f"Active scans: {list(self._sandboxes.keys())}"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Create new
|
||||||
|
sb = KaliSandbox(
|
||||||
|
scan_id=scan_id,
|
||||||
|
image=self.image,
|
||||||
|
memory_limit=self.memory_limit,
|
||||||
|
cpu_limit=self.cpu_limit,
|
||||||
|
)
|
||||||
|
ok, msg = await sb.initialize()
|
||||||
|
if not ok:
|
||||||
|
raise RuntimeError(f"Failed to create Kali sandbox: {msg}")
|
||||||
|
|
||||||
|
self._sandboxes[scan_id] = sb
|
||||||
|
logger.info(
|
||||||
|
f"Pool: created container for scan {scan_id} "
|
||||||
|
f"({active + 1}/{self.max_concurrent} active)"
|
||||||
|
)
|
||||||
|
return sb
|
||||||
|
|
||||||
|
async def destroy(self, scan_id: str):
|
||||||
|
"""Stop and remove the container for a specific scan."""
|
||||||
|
async with self._lock:
|
||||||
|
sb = self._sandboxes.pop(scan_id, None)
|
||||||
|
if sb:
|
||||||
|
await sb.stop()
|
||||||
|
logger.info(f"Pool: destroyed container for scan {scan_id}")
|
||||||
|
|
||||||
|
async def cleanup_all(self):
|
||||||
|
"""Destroy all managed containers (shutdown hook)."""
|
||||||
|
async with self._lock:
|
||||||
|
scan_ids = list(self._sandboxes.keys())
|
||||||
|
for sid in scan_ids:
|
||||||
|
await self.destroy(sid)
|
||||||
|
logger.info("Pool: all containers destroyed")
|
||||||
|
|
||||||
|
async def cleanup_orphans(self):
|
||||||
|
"""Find and remove neurosploit-* containers not tracked by this pool."""
|
||||||
|
if not HAS_DOCKER:
|
||||||
|
return
|
||||||
|
|
||||||
|
try:
|
||||||
|
client = docker.from_env()
|
||||||
|
containers = client.containers.list(
|
||||||
|
all=True,
|
||||||
|
filters={"label": "neurosploit.type=kali-sandbox"},
|
||||||
|
)
|
||||||
|
async with self._lock:
|
||||||
|
tracked = set(self._sandboxes.keys())
|
||||||
|
|
||||||
|
removed = 0
|
||||||
|
for c in containers:
|
||||||
|
scan_id = c.labels.get("neurosploit.scan_id", "")
|
||||||
|
if scan_id not in tracked:
|
||||||
|
try:
|
||||||
|
c.stop(timeout=5)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
try:
|
||||||
|
c.remove(force=True)
|
||||||
|
removed += 1
|
||||||
|
logger.info(f"Pool: removed orphan container {c.name}")
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
if removed:
|
||||||
|
logger.info(f"Pool: cleaned up {removed} orphan containers")
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Pool: orphan cleanup failed: {e}")
|
||||||
|
|
||||||
|
async def cleanup_expired(self):
|
||||||
|
"""Remove containers that have exceeded their TTL."""
|
||||||
|
now = datetime.utcnow()
|
||||||
|
async with self._lock:
|
||||||
|
expired = [
|
||||||
|
sid for sid, sb in self._sandboxes.items()
|
||||||
|
if sb._created_at and (now - sb._created_at) > self.container_ttl
|
||||||
|
]
|
||||||
|
for sid in expired:
|
||||||
|
logger.warning(f"Pool: container for scan {sid} exceeded TTL, destroying")
|
||||||
|
await self.destroy(sid)
|
||||||
|
|
||||||
|
def list_sandboxes(self) -> Dict[str, Dict]:
|
||||||
|
"""List all tracked sandboxes with status."""
|
||||||
|
result = {}
|
||||||
|
for sid, sb in self._sandboxes.items():
|
||||||
|
result[sid] = {
|
||||||
|
"scan_id": sid,
|
||||||
|
"container_name": sb.container_name,
|
||||||
|
"available": sb.is_available,
|
||||||
|
"installed_tools": sorted(sb._installed_tools),
|
||||||
|
"created_at": sb._created_at.isoformat() if sb._created_at else None,
|
||||||
|
}
|
||||||
|
return result
|
||||||
|
|
||||||
|
@property
|
||||||
|
def active_count(self) -> int:
|
||||||
|
return sum(1 for sb in self._sandboxes.values() if sb.is_available)
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Global singleton pool
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
_pool: Optional[ContainerPool] = None
|
||||||
|
_pool_lock = threading.Lock()
|
||||||
|
|
||||||
|
|
||||||
|
def get_pool() -> ContainerPool:
|
||||||
|
"""Get or create the global container pool."""
|
||||||
|
global _pool
|
||||||
|
if _pool is None:
|
||||||
|
with _pool_lock:
|
||||||
|
if _pool is None:
|
||||||
|
_pool = ContainerPool.from_config()
|
||||||
|
return _pool
|
||||||
392
core/kali_sandbox.py
Normal file
392
core/kali_sandbox.py
Normal file
@@ -0,0 +1,392 @@
|
|||||||
|
"""
|
||||||
|
NeuroSploit v3 - Kali Linux Per-Scan Sandbox
|
||||||
|
|
||||||
|
Each scan gets its own Docker container based on kalilinux/kali-rolling.
|
||||||
|
Tools installed on-demand the first time they are requested.
|
||||||
|
Container destroyed when scan completes.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
import shlex
|
||||||
|
import time
|
||||||
|
from datetime import datetime
|
||||||
|
from typing import Dict, Any, Optional, List, Tuple, Set
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
try:
|
||||||
|
import docker
|
||||||
|
from docker.errors import DockerException, NotFound, APIError
|
||||||
|
HAS_DOCKER = True
|
||||||
|
except ImportError:
|
||||||
|
HAS_DOCKER = False
|
||||||
|
|
||||||
|
from core.sandbox_manager import (
|
||||||
|
BaseSandbox, SandboxResult,
|
||||||
|
parse_nuclei_jsonl, parse_naabu_output,
|
||||||
|
)
|
||||||
|
from core.tool_registry import ToolRegistry
|
||||||
|
|
||||||
|
|
||||||
|
class KaliSandbox(BaseSandbox):
|
||||||
|
"""Per-scan Docker container based on Kali Linux.
|
||||||
|
|
||||||
|
Lifecycle: create -> install tools on demand -> execute -> destroy.
|
||||||
|
Each instance owns exactly one container named 'neurosploit-{scan_id}'.
|
||||||
|
"""
|
||||||
|
|
||||||
|
DEFAULT_TIMEOUT = 300
|
||||||
|
MAX_OUTPUT = 2 * 1024 * 1024 # 2MB
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
scan_id: str,
|
||||||
|
image: str = "neurosploit-kali:latest",
|
||||||
|
memory_limit: str = "2g",
|
||||||
|
cpu_limit: float = 2.0,
|
||||||
|
network_mode: str = "bridge",
|
||||||
|
):
|
||||||
|
self.scan_id = scan_id
|
||||||
|
self.container_name = f"neurosploit-{scan_id}"
|
||||||
|
self.image = image
|
||||||
|
self.memory_limit = memory_limit
|
||||||
|
self.cpu_limit = cpu_limit
|
||||||
|
self.network_mode = network_mode
|
||||||
|
|
||||||
|
self._client = None
|
||||||
|
self._container = None
|
||||||
|
self._available = False
|
||||||
|
self._installed_tools: Set[str] = set()
|
||||||
|
self._tool_registry = ToolRegistry()
|
||||||
|
self._created_at: Optional[datetime] = None
|
||||||
|
|
||||||
|
async def initialize(self) -> Tuple[bool, str]:
|
||||||
|
"""Create and start a new Kali container for this scan."""
|
||||||
|
if not HAS_DOCKER:
|
||||||
|
return False, "Docker SDK not installed"
|
||||||
|
|
||||||
|
try:
|
||||||
|
self._client = docker.from_env()
|
||||||
|
self._client.ping()
|
||||||
|
except Exception as e:
|
||||||
|
return False, f"Docker not available: {e}"
|
||||||
|
|
||||||
|
# Check if container already exists (resume after crash)
|
||||||
|
try:
|
||||||
|
existing = self._client.containers.get(self.container_name)
|
||||||
|
if existing.status == "running":
|
||||||
|
self._container = existing
|
||||||
|
self._available = True
|
||||||
|
self._created_at = datetime.utcnow()
|
||||||
|
return True, f"Resumed existing container {self.container_name}"
|
||||||
|
else:
|
||||||
|
existing.remove(force=True)
|
||||||
|
except NotFound:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Check image exists
|
||||||
|
try:
|
||||||
|
self._client.images.get(self.image)
|
||||||
|
except NotFound:
|
||||||
|
return False, (
|
||||||
|
f"Kali sandbox image '{self.image}' not found. "
|
||||||
|
"Build with: docker build -f docker/Dockerfile.kali -t neurosploit-kali:latest docker/"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Create container
|
||||||
|
try:
|
||||||
|
cpu_quota = int(self.cpu_limit * 100000)
|
||||||
|
self._container = self._client.containers.run(
|
||||||
|
self.image,
|
||||||
|
command="sleep infinity",
|
||||||
|
name=self.container_name,
|
||||||
|
detach=True,
|
||||||
|
network_mode=self.network_mode,
|
||||||
|
mem_limit=self.memory_limit,
|
||||||
|
cpu_period=100000,
|
||||||
|
cpu_quota=cpu_quota,
|
||||||
|
cap_add=["NET_RAW", "NET_ADMIN"],
|
||||||
|
security_opt=["no-new-privileges:true"],
|
||||||
|
labels={
|
||||||
|
"neurosploit.scan_id": self.scan_id,
|
||||||
|
"neurosploit.type": "kali-sandbox",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
self._available = True
|
||||||
|
self._created_at = datetime.utcnow()
|
||||||
|
logger.info(f"Created Kali container {self.container_name} for scan {self.scan_id}")
|
||||||
|
return True, f"Container {self.container_name} started"
|
||||||
|
except Exception as e:
|
||||||
|
return False, f"Failed to create container: {e}"
|
||||||
|
|
||||||
|
@property
|
||||||
|
def is_available(self) -> bool:
|
||||||
|
return self._available and self._container is not None
|
||||||
|
|
||||||
|
async def stop(self):
|
||||||
|
"""Stop and remove this scan's container."""
|
||||||
|
if self._container:
|
||||||
|
try:
|
||||||
|
self._container.stop(timeout=10)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
try:
|
||||||
|
self._container.remove(force=True)
|
||||||
|
logger.info(f"Destroyed container {self.container_name}")
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Error removing {self.container_name}: {e}")
|
||||||
|
self._container = None
|
||||||
|
self._available = False
|
||||||
|
|
||||||
|
async def health_check(self) -> Dict:
|
||||||
|
"""Run health check on this container."""
|
||||||
|
if not self.is_available:
|
||||||
|
return {"status": "unavailable", "scan_id": self.scan_id, "tools": []}
|
||||||
|
|
||||||
|
result = await self._exec(
|
||||||
|
"nuclei -version 2>&1; naabu -version 2>&1; nmap --version 2>&1 | head -1",
|
||||||
|
timeout=15,
|
||||||
|
)
|
||||||
|
tools = []
|
||||||
|
output = (result.stdout or "").lower()
|
||||||
|
for tool in ["nuclei", "naabu", "nmap"]:
|
||||||
|
if tool in output:
|
||||||
|
tools.append(tool)
|
||||||
|
|
||||||
|
uptime = 0.0
|
||||||
|
if self._created_at:
|
||||||
|
uptime = (datetime.utcnow() - self._created_at).total_seconds()
|
||||||
|
|
||||||
|
return {
|
||||||
|
"status": "healthy" if tools else "degraded",
|
||||||
|
"scan_id": self.scan_id,
|
||||||
|
"container": self.container_name,
|
||||||
|
"tools": tools,
|
||||||
|
"installed_tools": sorted(self._installed_tools),
|
||||||
|
"uptime_seconds": uptime,
|
||||||
|
}
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# Low-level execution
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
async def _exec(self, command: str, timeout: int = DEFAULT_TIMEOUT) -> SandboxResult:
|
||||||
|
"""Execute command inside this container via docker exec."""
|
||||||
|
if not self.is_available:
|
||||||
|
return SandboxResult(
|
||||||
|
tool="kali", command=command, exit_code=-1,
|
||||||
|
stdout="", stderr="", duration_seconds=0,
|
||||||
|
error="Container not available",
|
||||||
|
)
|
||||||
|
|
||||||
|
started = time.time()
|
||||||
|
try:
|
||||||
|
exec_result = await asyncio.get_event_loop().run_in_executor(
|
||||||
|
None,
|
||||||
|
lambda: self._container.exec_run(
|
||||||
|
cmd=["bash", "-c", command],
|
||||||
|
stdout=True, stderr=True, demux=True,
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
duration = time.time() - started
|
||||||
|
stdout_raw, stderr_raw = exec_result.output
|
||||||
|
stdout = (stdout_raw or b"").decode("utf-8", errors="replace")
|
||||||
|
stderr = (stderr_raw or b"").decode("utf-8", errors="replace")
|
||||||
|
|
||||||
|
if len(stdout) > self.MAX_OUTPUT:
|
||||||
|
stdout = stdout[: self.MAX_OUTPUT] + "\n... [truncated]"
|
||||||
|
if len(stderr) > self.MAX_OUTPUT:
|
||||||
|
stderr = stderr[: self.MAX_OUTPUT] + "\n... [truncated]"
|
||||||
|
|
||||||
|
return SandboxResult(
|
||||||
|
tool="kali", command=command,
|
||||||
|
exit_code=exec_result.exit_code,
|
||||||
|
stdout=stdout, stderr=stderr,
|
||||||
|
duration_seconds=round(duration, 2),
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
duration = time.time() - started
|
||||||
|
return SandboxResult(
|
||||||
|
tool="kali", command=command, exit_code=-1,
|
||||||
|
stdout="", stderr="", duration_seconds=round(duration, 2),
|
||||||
|
error=str(e),
|
||||||
|
)
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# On-demand tool installation
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
async def _ensure_tool(self, tool: str) -> bool:
|
||||||
|
"""Ensure a tool is installed in this container. Returns True if available."""
|
||||||
|
if tool in self._installed_tools:
|
||||||
|
return True
|
||||||
|
|
||||||
|
# Check if already present in the base image
|
||||||
|
check = await self._exec(f"which {shlex.quote(tool)} 2>/dev/null", timeout=10)
|
||||||
|
if check.exit_code == 0 and check.stdout.strip():
|
||||||
|
self._installed_tools.add(tool)
|
||||||
|
return True
|
||||||
|
|
||||||
|
# Get install recipe from registry
|
||||||
|
recipe = self._tool_registry.get_install_command(tool)
|
||||||
|
if not recipe:
|
||||||
|
logger.warning(f"No install recipe for '{tool}' in Kali container")
|
||||||
|
return False
|
||||||
|
|
||||||
|
logger.info(f"[{self.container_name}] Installing {tool}...")
|
||||||
|
result = await self._exec(recipe, timeout=300)
|
||||||
|
if result.exit_code == 0:
|
||||||
|
self._installed_tools.add(tool)
|
||||||
|
logger.info(f"[{self.container_name}] Installed {tool} successfully")
|
||||||
|
return True
|
||||||
|
else:
|
||||||
|
logger.warning(
|
||||||
|
f"[{self.container_name}] Failed to install {tool}: "
|
||||||
|
f"{(result.stderr or result.stdout or '')[:300]}"
|
||||||
|
)
|
||||||
|
return False
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# High-level tool APIs (same signatures as SandboxManager)
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
async def run_nuclei(
|
||||||
|
self, target, templates=None, severity=None,
|
||||||
|
tags=None, rate_limit=150, timeout=600,
|
||||||
|
) -> SandboxResult:
|
||||||
|
await self._ensure_tool("nuclei")
|
||||||
|
cmd_parts = [
|
||||||
|
"nuclei", "-u", shlex.quote(target),
|
||||||
|
"-jsonl", "-rate-limit", str(rate_limit),
|
||||||
|
"-silent", "-no-color",
|
||||||
|
]
|
||||||
|
if templates:
|
||||||
|
cmd_parts.extend(["-t", shlex.quote(templates)])
|
||||||
|
if severity:
|
||||||
|
cmd_parts.extend(["-severity", shlex.quote(severity)])
|
||||||
|
if tags:
|
||||||
|
cmd_parts.extend(["-tags", shlex.quote(tags)])
|
||||||
|
|
||||||
|
result = await self._exec(" ".join(cmd_parts) + " 2>/dev/null", timeout=timeout)
|
||||||
|
result.tool = "nuclei"
|
||||||
|
if result.stdout:
|
||||||
|
result.findings = parse_nuclei_jsonl(result.stdout)
|
||||||
|
return result
|
||||||
|
|
||||||
|
async def run_naabu(
|
||||||
|
self, target, ports=None, top_ports=None,
|
||||||
|
scan_type="s", rate=1000, timeout=300,
|
||||||
|
) -> SandboxResult:
|
||||||
|
await self._ensure_tool("naabu")
|
||||||
|
cmd_parts = [
|
||||||
|
"naabu", "-host", shlex.quote(target),
|
||||||
|
"-json", "-rate", str(rate), "-silent", "-no-color",
|
||||||
|
]
|
||||||
|
if ports:
|
||||||
|
cmd_parts.extend(["-p", shlex.quote(str(ports))])
|
||||||
|
elif top_ports:
|
||||||
|
cmd_parts.extend(["-top-ports", str(top_ports)])
|
||||||
|
else:
|
||||||
|
cmd_parts.extend(["-top-ports", "1000"])
|
||||||
|
if scan_type:
|
||||||
|
cmd_parts.extend(["-scan-type", scan_type])
|
||||||
|
|
||||||
|
result = await self._exec(" ".join(cmd_parts) + " 2>/dev/null", timeout=timeout)
|
||||||
|
result.tool = "naabu"
|
||||||
|
if result.stdout:
|
||||||
|
result.findings = parse_naabu_output(result.stdout)
|
||||||
|
return result
|
||||||
|
|
||||||
|
async def run_httpx(self, targets, timeout=120) -> SandboxResult:
|
||||||
|
await self._ensure_tool("httpx")
|
||||||
|
if isinstance(targets, str):
|
||||||
|
targets = [targets]
|
||||||
|
target_str = "\\n".join(shlex.quote(t) for t in targets)
|
||||||
|
command = (
|
||||||
|
f'echo -e "{target_str}" | httpx -silent -json '
|
||||||
|
f'-title -tech-detect -status-code -content-length '
|
||||||
|
f'-follow-redirects -no-color 2>/dev/null'
|
||||||
|
)
|
||||||
|
result = await self._exec(command, timeout=timeout)
|
||||||
|
result.tool = "httpx"
|
||||||
|
if result.stdout:
|
||||||
|
findings = []
|
||||||
|
for line in result.stdout.strip().split("\\n"):
|
||||||
|
try:
|
||||||
|
data = json.loads(line)
|
||||||
|
findings.append({
|
||||||
|
"url": data.get("url", ""),
|
||||||
|
"status_code": data.get("status_code", 0),
|
||||||
|
"title": data.get("title", ""),
|
||||||
|
"technologies": data.get("tech", []),
|
||||||
|
"content_length": data.get("content_length", 0),
|
||||||
|
"webserver": data.get("webserver", ""),
|
||||||
|
})
|
||||||
|
except (json.JSONDecodeError, ValueError):
|
||||||
|
continue
|
||||||
|
result.findings = findings
|
||||||
|
return result
|
||||||
|
|
||||||
|
async def run_subfinder(self, domain, timeout=120) -> SandboxResult:
|
||||||
|
await self._ensure_tool("subfinder")
|
||||||
|
command = f"subfinder -d {shlex.quote(domain)} -silent -no-color 2>/dev/null"
|
||||||
|
result = await self._exec(command, timeout=timeout)
|
||||||
|
result.tool = "subfinder"
|
||||||
|
if result.stdout:
|
||||||
|
subs = [s.strip() for s in result.stdout.strip().split("\\n") if s.strip()]
|
||||||
|
result.findings = [{"subdomain": s} for s in subs]
|
||||||
|
return result
|
||||||
|
|
||||||
|
async def run_nmap(self, target, ports=None, scripts=True, timeout=300) -> SandboxResult:
|
||||||
|
await self._ensure_tool("nmap")
|
||||||
|
cmd_parts = ["nmap", "-sV"]
|
||||||
|
if scripts:
|
||||||
|
cmd_parts.append("-sC")
|
||||||
|
if ports:
|
||||||
|
cmd_parts.extend(["-p", shlex.quote(str(ports))])
|
||||||
|
cmd_parts.extend(["-oN", "/dev/stdout", shlex.quote(target)])
|
||||||
|
result = await self._exec(" ".join(cmd_parts) + " 2>/dev/null", timeout=timeout)
|
||||||
|
result.tool = "nmap"
|
||||||
|
return result
|
||||||
|
|
||||||
|
async def run_tool(self, tool, args, timeout=300) -> SandboxResult:
|
||||||
|
"""Run any tool (validates whitelist, installs on demand)."""
|
||||||
|
# Load whitelist from config
|
||||||
|
allowed_tools = set()
|
||||||
|
try:
|
||||||
|
with open("config/config.json") as f:
|
||||||
|
cfg = json.load(f)
|
||||||
|
allowed_tools = set(cfg.get("sandbox", {}).get("tools", []))
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
if not allowed_tools:
|
||||||
|
allowed_tools = {
|
||||||
|
"nuclei", "naabu", "nmap", "httpx", "subfinder", "katana",
|
||||||
|
"dnsx", "ffuf", "gobuster", "dalfox", "nikto", "sqlmap",
|
||||||
|
"whatweb", "curl", "dig", "whois", "masscan", "dirsearch",
|
||||||
|
"wfuzz", "arjun", "wafw00f", "waybackurls",
|
||||||
|
}
|
||||||
|
|
||||||
|
if tool not in allowed_tools:
|
||||||
|
return SandboxResult(
|
||||||
|
tool=tool, command=f"{tool} {args}", exit_code=-1,
|
||||||
|
stdout="", stderr="", duration_seconds=0,
|
||||||
|
error=f"Tool '{tool}' not in allowed list",
|
||||||
|
)
|
||||||
|
|
||||||
|
if not await self._ensure_tool(tool):
|
||||||
|
return SandboxResult(
|
||||||
|
tool=tool, command=f"{tool} {args}", exit_code=-1,
|
||||||
|
stdout="", stderr="", duration_seconds=0,
|
||||||
|
error=f"Could not install '{tool}' in Kali container",
|
||||||
|
)
|
||||||
|
|
||||||
|
result = await self._exec(f"{shlex.quote(tool)} {args} 2>&1", timeout=timeout)
|
||||||
|
result.tool = tool
|
||||||
|
return result
|
||||||
|
|
||||||
|
async def execute_raw(self, command, timeout=300) -> SandboxResult:
|
||||||
|
result = await self._exec(command, timeout=timeout)
|
||||||
|
result.tool = "raw"
|
||||||
|
return result
|
||||||
199
core/knowledge_augmentor.py
Normal file
199
core/knowledge_augmentor.py
Normal file
@@ -0,0 +1,199 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
Knowledge Augmentor - Adversarial pattern recognition from bug bounty data.
|
||||||
|
|
||||||
|
Loads the bug bounty finetuning dataset and provides retrieval-based
|
||||||
|
context enrichment for agent prompts. This is for PATTERN RECOGNITION
|
||||||
|
and adversarial intuition -- NOT for replaying exploits.
|
||||||
|
|
||||||
|
The augmentor:
|
||||||
|
- Builds a keyword index by vulnerability type
|
||||||
|
- Retrieves relevant patterns matching current testing context
|
||||||
|
- Injects formatted reference material into agent prompts
|
||||||
|
- Explicitly instructs the model to adapt, not copy
|
||||||
|
"""
|
||||||
|
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
from typing import Dict, List, Optional
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
class KnowledgeAugmentor:
|
||||||
|
"""Retrieval-based knowledge augmentation from bug bounty dataset."""
|
||||||
|
|
||||||
|
# Vulnerability type keyword mappings
|
||||||
|
VULN_KEYWORDS = {
|
||||||
|
'xss': ['xss', 'cross-site scripting', 'reflected xss', 'stored xss', 'dom xss',
|
||||||
|
'script injection', 'html injection'],
|
||||||
|
'sqli': ['sql injection', 'sqli', 'union select', 'blind sql', 'error-based sql',
|
||||||
|
'time-based sql', 'second-order sql'],
|
||||||
|
'ssrf': ['ssrf', 'server-side request forgery', 'internal service'],
|
||||||
|
'idor': ['idor', 'insecure direct object', 'broken object level',
|
||||||
|
'bola', 'horizontal privilege'],
|
||||||
|
'rce': ['rce', 'remote code execution', 'command injection', 'os command',
|
||||||
|
'code execution', 'shell injection'],
|
||||||
|
'lfi': ['lfi', 'local file inclusion', 'path traversal', 'directory traversal',
|
||||||
|
'file read', 'file disclosure'],
|
||||||
|
'auth_bypass': ['authentication bypass', 'broken authentication', 'auth bypass',
|
||||||
|
'session fixation', 'jwt', 'token manipulation'],
|
||||||
|
'csrf': ['csrf', 'cross-site request forgery', 'state-changing'],
|
||||||
|
'open_redirect': ['open redirect', 'url redirect', 'redirect vulnerability'],
|
||||||
|
'xxe': ['xxe', 'xml external entity', 'xml injection'],
|
||||||
|
'ssti': ['ssti', 'server-side template injection', 'template injection'],
|
||||||
|
'race_condition': ['race condition', 'toctou', 'concurrency'],
|
||||||
|
'graphql': ['graphql', 'introspection', 'batching attack'],
|
||||||
|
'api': ['api', 'rest api', 'broken api', 'api key', 'rate limiting'],
|
||||||
|
'deserialization': ['deserialization', 'insecure deserialization', 'pickle',
|
||||||
|
'object injection'],
|
||||||
|
'upload': ['file upload', 'unrestricted upload', 'web shell', 'upload bypass'],
|
||||||
|
'cors': ['cors', 'cross-origin', 'origin validation'],
|
||||||
|
'subdomain_takeover': ['subdomain takeover', 'dangling dns', 'cname'],
|
||||||
|
'information_disclosure': ['information disclosure', 'sensitive data', 'data exposure',
|
||||||
|
'directory listing', 'source code disclosure'],
|
||||||
|
}
|
||||||
|
|
||||||
|
def __init__(self, dataset_path: str = "models/bug-bounty/bugbounty_finetuning_dataset.json",
|
||||||
|
max_patterns: int = 3):
|
||||||
|
self.dataset_path = Path(dataset_path)
|
||||||
|
self.max_patterns = max_patterns
|
||||||
|
self.entries: List[Dict] = []
|
||||||
|
self.index: Dict[str, List[int]] = {} # vuln_type -> list of entry indices
|
||||||
|
self._loaded = False
|
||||||
|
|
||||||
|
def _ensure_loaded(self):
|
||||||
|
"""Lazy load and index the dataset on first use."""
|
||||||
|
if self._loaded:
|
||||||
|
return
|
||||||
|
|
||||||
|
if not self.dataset_path.exists():
|
||||||
|
logger.warning(f"Bug bounty dataset not found: {self.dataset_path}")
|
||||||
|
self._loaded = True
|
||||||
|
return
|
||||||
|
|
||||||
|
try:
|
||||||
|
with open(self.dataset_path, 'r', encoding='utf-8') as f:
|
||||||
|
self.entries = json.load(f)
|
||||||
|
logger.info(f"Loaded {len(self.entries)} entries from bug bounty dataset")
|
||||||
|
self._build_index()
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Failed to load bug bounty dataset: {e}")
|
||||||
|
|
||||||
|
self._loaded = True
|
||||||
|
|
||||||
|
def _build_index(self):
|
||||||
|
"""Build keyword index over the dataset entries."""
|
||||||
|
for i, entry in enumerate(self.entries):
|
||||||
|
text = (
|
||||||
|
entry.get('instruction', '') + ' ' +
|
||||||
|
entry.get('input', '') + ' ' +
|
||||||
|
entry.get('output', '')
|
||||||
|
).lower()
|
||||||
|
|
||||||
|
for vuln_type, keywords in self.VULN_KEYWORDS.items():
|
||||||
|
for kw in keywords:
|
||||||
|
if kw in text:
|
||||||
|
self.index.setdefault(vuln_type, []).append(i)
|
||||||
|
break # One match per vuln_type per entry
|
||||||
|
|
||||||
|
indexed_types = {k: len(v) for k, v in self.index.items()}
|
||||||
|
logger.info(f"Knowledge index built: {indexed_types}")
|
||||||
|
|
||||||
|
def get_relevant_patterns(self, vulnerability_type: str,
|
||||||
|
technologies: Optional[List[str]] = None,
|
||||||
|
max_entries: Optional[int] = None) -> str:
|
||||||
|
"""Retrieve relevant bug bounty patterns for context enrichment.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
vulnerability_type: Type of vulnerability being tested (e.g., 'xss', 'sqli')
|
||||||
|
technologies: Optional list of detected technologies for relevance boosting
|
||||||
|
max_entries: Override default max patterns count
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Formatted string for injection into LLM prompts as cognitive augmentation.
|
||||||
|
Returns empty string if no relevant patterns found.
|
||||||
|
"""
|
||||||
|
self._ensure_loaded()
|
||||||
|
|
||||||
|
limit = max_entries or self.max_patterns
|
||||||
|
vuln_key = vulnerability_type.lower().replace(' ', '_').replace('-', '_')
|
||||||
|
|
||||||
|
# Try exact match first, then partial
|
||||||
|
candidates = self.index.get(vuln_key, [])
|
||||||
|
if not candidates:
|
||||||
|
# Try partial matching
|
||||||
|
for key, indices in self.index.items():
|
||||||
|
if vuln_key in key or key in vuln_key:
|
||||||
|
candidates = indices
|
||||||
|
break
|
||||||
|
|
||||||
|
if not candidates:
|
||||||
|
return ""
|
||||||
|
|
||||||
|
# Deduplicate
|
||||||
|
candidates = list(dict.fromkeys(candidates))
|
||||||
|
|
||||||
|
# Score by technology relevance if technologies provided
|
||||||
|
if technologies:
|
||||||
|
scored = []
|
||||||
|
for idx in candidates:
|
||||||
|
entry = self.entries[idx]
|
||||||
|
text = (entry.get('output', '') + ' ' + entry.get('instruction', '')).lower()
|
||||||
|
tech_score = sum(1 for t in technologies if t.lower() in text)
|
||||||
|
scored.append((tech_score, idx))
|
||||||
|
scored.sort(key=lambda x: x[0], reverse=True)
|
||||||
|
candidates = [idx for _, idx in scored]
|
||||||
|
|
||||||
|
selected = candidates[:limit]
|
||||||
|
|
||||||
|
# Build augmentation context
|
||||||
|
augmentation = (
|
||||||
|
"\n\n=== ADVERSARIAL PATTERN CONTEXT (Bug Bounty Knowledge) ===\n"
|
||||||
|
"These are REFERENCE PATTERNS for understanding attack vectors and methodology.\n"
|
||||||
|
"ADAPT the approach to the current target. Do NOT replay exact exploits.\n"
|
||||||
|
"Use these as cognitive anchors for creative hypothesis generation.\n\n"
|
||||||
|
)
|
||||||
|
|
||||||
|
for i, idx in enumerate(selected, 1):
|
||||||
|
entry = self.entries[idx]
|
||||||
|
instruction = entry.get('instruction', '')[:300]
|
||||||
|
output = entry.get('output', '')
|
||||||
|
|
||||||
|
# Extract methodology-relevant sections, truncate for context budget
|
||||||
|
methodology = self._extract_methodology(output, max_chars=1500)
|
||||||
|
|
||||||
|
augmentation += f"--- Pattern {i} ---\n"
|
||||||
|
augmentation += f"Context: {instruction}\n"
|
||||||
|
augmentation += f"Methodology:\n{methodology}\n\n"
|
||||||
|
|
||||||
|
augmentation += "=== END ADVERSARIAL PATTERN CONTEXT ===\n"
|
||||||
|
return augmentation
|
||||||
|
|
||||||
|
def _extract_methodology(self, text: str, max_chars: int = 1500) -> str:
|
||||||
|
"""Extract the most methodology-relevant portion of a writeup."""
|
||||||
|
# Look for methodology/steps/approach sections
|
||||||
|
markers = ['### steps', '### methodology', '### approach', '### exploitation',
|
||||||
|
'## steps', '## methodology', '## approach', '## exploitation',
|
||||||
|
'steps to reproduce', 'reproduction steps', 'proof of concept']
|
||||||
|
|
||||||
|
text_lower = text.lower()
|
||||||
|
for marker in markers:
|
||||||
|
idx = text_lower.find(marker)
|
||||||
|
if idx != -1:
|
||||||
|
return text[idx:idx + max_chars]
|
||||||
|
|
||||||
|
# Fall back to first max_chars of the output
|
||||||
|
return text[:max_chars]
|
||||||
|
|
||||||
|
def get_available_types(self) -> List[str]:
|
||||||
|
"""Return list of vulnerability types that have indexed entries."""
|
||||||
|
self._ensure_loaded()
|
||||||
|
return sorted(self.index.keys())
|
||||||
|
|
||||||
|
def get_entry_count(self, vulnerability_type: str) -> int:
|
||||||
|
"""Return count of indexed entries for a vulnerability type."""
|
||||||
|
self._ensure_loaded()
|
||||||
|
vuln_key = vulnerability_type.lower().replace(' ', '_').replace('-', '_')
|
||||||
|
return len(self.index.get(vuln_key, []))
|
||||||
@@ -48,6 +48,20 @@ class LLMManager:
|
|||||||
self.pdf_support_enabled = self.active_profile.get('pdf_support_enabled', False)
|
self.pdf_support_enabled = self.active_profile.get('pdf_support_enabled', False)
|
||||||
self.guardrails_enabled = self.active_profile.get('guardrails_enabled', False)
|
self.guardrails_enabled = self.active_profile.get('guardrails_enabled', False)
|
||||||
self.hallucination_mitigation_strategy = self.active_profile.get('hallucination_mitigation_strategy', None)
|
self.hallucination_mitigation_strategy = self.active_profile.get('hallucination_mitigation_strategy', None)
|
||||||
|
|
||||||
|
# MAX_OUTPUT_TOKENS override from environment (up to 64000 for Claude)
|
||||||
|
env_max_tokens = os.getenv('MAX_OUTPUT_TOKENS', '').strip()
|
||||||
|
if env_max_tokens:
|
||||||
|
try:
|
||||||
|
override = int(env_max_tokens)
|
||||||
|
self.max_tokens = override
|
||||||
|
self.output_token_limit = override
|
||||||
|
logger.info(f"MAX_OUTPUT_TOKENS override applied: {override}")
|
||||||
|
except ValueError:
|
||||||
|
logger.warning(f"Invalid MAX_OUTPUT_TOKENS value: {env_max_tokens}")
|
||||||
|
|
||||||
|
# Model router (lazy init, set externally or via config)
|
||||||
|
self._model_router = None
|
||||||
|
|
||||||
# New prompt loading
|
# New prompt loading
|
||||||
self.json_prompts_file_path = Path("prompts/library.json")
|
self.json_prompts_file_path = Path("prompts/library.json")
|
||||||
@@ -147,6 +161,8 @@ class LLMManager:
|
|||||||
raw_response = self._generate_gemini_cli(prompt, system_prompt)
|
raw_response = self._generate_gemini_cli(prompt, system_prompt)
|
||||||
elif self.provider == 'lmstudio':
|
elif self.provider == 'lmstudio':
|
||||||
raw_response = self._generate_lmstudio(prompt, system_prompt)
|
raw_response = self._generate_lmstudio(prompt, system_prompt)
|
||||||
|
elif self.provider == 'openrouter':
|
||||||
|
raw_response = self._generate_openrouter(prompt, system_prompt)
|
||||||
else:
|
else:
|
||||||
raise ValueError(f"Unsupported provider: {self.provider}")
|
raise ValueError(f"Unsupported provider: {self.provider}")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
@@ -162,6 +178,21 @@ class LLMManager:
|
|||||||
|
|
||||||
return raw_response
|
return raw_response
|
||||||
|
|
||||||
|
def routed_generate(self, prompt: str, system_prompt: Optional[str] = None, task_type: str = "default") -> str:
|
||||||
|
"""Generate with optional model routing based on task type.
|
||||||
|
|
||||||
|
If model routing is enabled and a route exists for the task_type,
|
||||||
|
a dedicated LLMManager for that profile handles the request.
|
||||||
|
Otherwise falls back to the default generate().
|
||||||
|
|
||||||
|
Task types: reasoning, analysis, generation, validation, default
|
||||||
|
"""
|
||||||
|
if self._model_router:
|
||||||
|
result = self._model_router.generate(prompt, system_prompt, task_type)
|
||||||
|
if result is not None:
|
||||||
|
return result
|
||||||
|
return self.generate(prompt, system_prompt)
|
||||||
|
|
||||||
def _apply_guardrails(self, response: str) -> str:
|
def _apply_guardrails(self, response: str) -> str:
|
||||||
"""Applies basic guardrails to the LLM response."""
|
"""Applies basic guardrails to the LLM response."""
|
||||||
if not self.guardrails_enabled:
|
if not self.guardrails_enabled:
|
||||||
@@ -614,6 +645,77 @@ Identify any potential hallucinations, inconsistencies, or areas where the respo
|
|||||||
logger.error(f"LM Studio error: {e}")
|
logger.error(f"LM Studio error: {e}")
|
||||||
return f"Error: {str(e)}"
|
return f"Error: {str(e)}"
|
||||||
|
|
||||||
|
def _generate_openrouter(self, prompt: str, system_prompt: Optional[str] = None) -> str:
|
||||||
|
"""Generate using OpenRouter API (OpenAI-compatible).
|
||||||
|
|
||||||
|
OpenRouter supports hundreds of models through a unified API.
|
||||||
|
Models are specified as provider/model (e.g., 'anthropic/claude-sonnet-4-20250514').
|
||||||
|
API key comes from OPENROUTER_API_KEY env var or config profile.
|
||||||
|
"""
|
||||||
|
if not self.api_key:
|
||||||
|
raise ValueError("OPENROUTER_API_KEY not set. Please set the environment variable or configure in config.json")
|
||||||
|
|
||||||
|
url = "https://openrouter.ai/api/v1/chat/completions"
|
||||||
|
headers = {
|
||||||
|
"Authorization": f"Bearer {self.api_key}",
|
||||||
|
"Content-Type": "application/json",
|
||||||
|
"HTTP-Referer": "https://github.com/neurosploit",
|
||||||
|
"X-Title": "NeuroSploit"
|
||||||
|
}
|
||||||
|
|
||||||
|
messages = []
|
||||||
|
if system_prompt:
|
||||||
|
messages.append({"role": "system", "content": system_prompt})
|
||||||
|
messages.append({"role": "user", "content": prompt})
|
||||||
|
|
||||||
|
data = {
|
||||||
|
"model": self.model,
|
||||||
|
"messages": messages,
|
||||||
|
"temperature": self.temperature,
|
||||||
|
"max_tokens": self.max_tokens
|
||||||
|
}
|
||||||
|
|
||||||
|
last_error = None
|
||||||
|
for attempt in range(MAX_RETRIES):
|
||||||
|
try:
|
||||||
|
logger.debug(f"OpenRouter API request attempt {attempt + 1}/{MAX_RETRIES} (model: {self.model})")
|
||||||
|
response = requests.post(url, headers=headers, json=data, timeout=180)
|
||||||
|
|
||||||
|
if response.status_code == 200:
|
||||||
|
result = response.json()
|
||||||
|
return result["choices"][0]["message"]["content"]
|
||||||
|
|
||||||
|
elif response.status_code == 401:
|
||||||
|
raise ValueError(f"Invalid OpenRouter API key: {response.text}")
|
||||||
|
|
||||||
|
elif response.status_code == 429:
|
||||||
|
last_error = f"Rate limit: {response.text}"
|
||||||
|
logger.warning(f"OpenRouter rate limit (attempt {attempt + 1}/{MAX_RETRIES})")
|
||||||
|
if attempt < MAX_RETRIES - 1:
|
||||||
|
sleep_time = RETRY_DELAY * (RETRY_MULTIPLIER ** (attempt + 1))
|
||||||
|
time.sleep(sleep_time)
|
||||||
|
|
||||||
|
elif response.status_code >= 500:
|
||||||
|
last_error = f"Server error {response.status_code}: {response.text}"
|
||||||
|
logger.warning(f"OpenRouter server error (attempt {attempt + 1}/{MAX_RETRIES})")
|
||||||
|
if attempt < MAX_RETRIES - 1:
|
||||||
|
sleep_time = RETRY_DELAY * (RETRY_MULTIPLIER ** attempt)
|
||||||
|
time.sleep(sleep_time)
|
||||||
|
|
||||||
|
else:
|
||||||
|
raise ValueError(f"OpenRouter API error {response.status_code}: {response.text}")
|
||||||
|
|
||||||
|
except requests.exceptions.Timeout:
|
||||||
|
last_error = "Timeout"
|
||||||
|
logger.warning(f"OpenRouter timeout (attempt {attempt + 1}/{MAX_RETRIES})")
|
||||||
|
if attempt < MAX_RETRIES - 1:
|
||||||
|
time.sleep(RETRY_DELAY * (RETRY_MULTIPLIER ** attempt))
|
||||||
|
|
||||||
|
except requests.exceptions.ConnectionError as e:
|
||||||
|
raise ValueError(f"Cannot connect to OpenRouter API: {e}")
|
||||||
|
|
||||||
|
raise ValueError(f"OpenRouter API failed after {MAX_RETRIES} retries: {last_error}")
|
||||||
|
|
||||||
def analyze_vulnerability(self, vulnerability_data: Dict) -> Dict:
|
def analyze_vulnerability(self, vulnerability_data: Dict) -> Dict:
|
||||||
"""Analyze vulnerability and suggest exploits"""
|
"""Analyze vulnerability and suggest exploits"""
|
||||||
# This prompt will be fetched from library.json later
|
# This prompt will be fetched from library.json later
|
||||||
|
|||||||
244
core/mcp_client.py
Normal file
244
core/mcp_client.py
Normal file
@@ -0,0 +1,244 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
MCP Client - Model Context Protocol tool connectivity.
|
||||||
|
|
||||||
|
Provides a standard interface for connecting to MCP servers and
|
||||||
|
executing tools. Supports both stdio and SSE transports.
|
||||||
|
|
||||||
|
Coexists with existing subprocess-based tool execution:
|
||||||
|
- MCP is tried first when enabled
|
||||||
|
- Falls back silently to subprocess if MCP unavailable
|
||||||
|
"""
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
from typing import Dict, List, Optional, Any
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
try:
|
||||||
|
from mcp import ClientSession, StdioServerParameters
|
||||||
|
from mcp.client.stdio import stdio_client
|
||||||
|
HAS_MCP = True
|
||||||
|
except ImportError:
|
||||||
|
HAS_MCP = False
|
||||||
|
logger.debug("MCP package not installed. MCP tool connectivity disabled.")
|
||||||
|
|
||||||
|
try:
|
||||||
|
from mcp.client.sse import sse_client
|
||||||
|
HAS_MCP_SSE = True
|
||||||
|
except ImportError:
|
||||||
|
HAS_MCP_SSE = False
|
||||||
|
|
||||||
|
|
||||||
|
class MCPToolClient:
|
||||||
|
"""Client for connecting to MCP servers and executing tools."""
|
||||||
|
|
||||||
|
def __init__(self, config: Dict):
|
||||||
|
mcp_config = config.get('mcp_servers', {})
|
||||||
|
self.enabled = mcp_config.get('enabled', False) and HAS_MCP
|
||||||
|
self.servers_config = mcp_config.get('servers', {})
|
||||||
|
self._sessions: Dict[str, Any] = {} # server_name -> (session, cleanup)
|
||||||
|
self._available_tools: Dict[str, List[Dict]] = {} # server_name -> tools list
|
||||||
|
|
||||||
|
if self.enabled:
|
||||||
|
logger.info(f"MCP client initialized with {len(self.servers_config)} server(s)")
|
||||||
|
else:
|
||||||
|
if not HAS_MCP and mcp_config.get('enabled', False):
|
||||||
|
logger.warning("MCP enabled in config but mcp package not installed. "
|
||||||
|
"Install with: pip install mcp>=1.0.0")
|
||||||
|
|
||||||
|
async def connect(self, server_name: str) -> bool:
|
||||||
|
"""Establish connection to an MCP server.
|
||||||
|
|
||||||
|
Returns True if connection successful, False otherwise.
|
||||||
|
"""
|
||||||
|
if not self.enabled:
|
||||||
|
return False
|
||||||
|
|
||||||
|
if server_name in self._sessions:
|
||||||
|
return True # Already connected
|
||||||
|
|
||||||
|
server_config = self.servers_config.get(server_name)
|
||||||
|
if not server_config:
|
||||||
|
logger.error(f"MCP server '{server_name}' not found in config")
|
||||||
|
return False
|
||||||
|
|
||||||
|
transport = server_config.get('transport', 'stdio')
|
||||||
|
|
||||||
|
try:
|
||||||
|
if transport == 'stdio':
|
||||||
|
return await self._connect_stdio(server_name, server_config)
|
||||||
|
elif transport == 'sse':
|
||||||
|
return await self._connect_sse(server_name, server_config)
|
||||||
|
else:
|
||||||
|
logger.error(f"Unsupported MCP transport: {transport}")
|
||||||
|
return False
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Failed to connect to MCP server '{server_name}': {e}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
async def _connect_stdio(self, server_name: str, config: Dict) -> bool:
|
||||||
|
"""Connect to a stdio-based MCP server."""
|
||||||
|
if not HAS_MCP:
|
||||||
|
return False
|
||||||
|
|
||||||
|
command = config.get('command', '')
|
||||||
|
args = config.get('args', [])
|
||||||
|
|
||||||
|
if not command:
|
||||||
|
logger.error(f"MCP server '{server_name}' has no command specified")
|
||||||
|
return False
|
||||||
|
|
||||||
|
server_params = StdioServerParameters(
|
||||||
|
command=command,
|
||||||
|
args=args,
|
||||||
|
env=config.get('env')
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Create the stdio client connection
|
||||||
|
read_stream, write_stream = await asyncio.wait_for(
|
||||||
|
self._start_stdio_process(server_params),
|
||||||
|
timeout=30
|
||||||
|
)
|
||||||
|
|
||||||
|
session = ClientSession(read_stream, write_stream)
|
||||||
|
await session.initialize()
|
||||||
|
|
||||||
|
# Cache available tools
|
||||||
|
tools_result = await session.list_tools()
|
||||||
|
self._available_tools[server_name] = [
|
||||||
|
{"name": t.name, "description": t.description}
|
||||||
|
for t in tools_result.tools
|
||||||
|
]
|
||||||
|
|
||||||
|
self._sessions[server_name] = session
|
||||||
|
logger.info(f"Connected to MCP server '{server_name}' via stdio "
|
||||||
|
f"({len(self._available_tools[server_name])} tools available)")
|
||||||
|
return True
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Stdio connection to '{server_name}' failed: {e}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
async def _start_stdio_process(self, params: 'StdioServerParameters'):
|
||||||
|
"""Start a stdio MCP server process."""
|
||||||
|
async with stdio_client(params) as (read, write):
|
||||||
|
return read, write
|
||||||
|
|
||||||
|
async def _connect_sse(self, server_name: str, config: Dict) -> bool:
|
||||||
|
"""Connect to an SSE-based MCP server."""
|
||||||
|
if not HAS_MCP_SSE:
|
||||||
|
logger.error("MCP SSE transport not available")
|
||||||
|
return False
|
||||||
|
|
||||||
|
url = config.get('url', '')
|
||||||
|
if not url:
|
||||||
|
logger.error(f"MCP server '{server_name}' has no URL specified")
|
||||||
|
return False
|
||||||
|
|
||||||
|
try:
|
||||||
|
async with sse_client(url) as (read, write):
|
||||||
|
session = ClientSession(read, write)
|
||||||
|
await session.initialize()
|
||||||
|
|
||||||
|
tools_result = await session.list_tools()
|
||||||
|
self._available_tools[server_name] = [
|
||||||
|
{"name": t.name, "description": t.description}
|
||||||
|
for t in tools_result.tools
|
||||||
|
]
|
||||||
|
|
||||||
|
self._sessions[server_name] = session
|
||||||
|
logger.info(f"Connected to MCP server '{server_name}' via SSE "
|
||||||
|
f"({len(self._available_tools[server_name])} tools available)")
|
||||||
|
return True
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"SSE connection to '{server_name}' failed: {e}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
async def call_tool(self, server_name: str, tool_name: str,
|
||||||
|
arguments: Optional[Dict] = None) -> Optional[str]:
|
||||||
|
"""Call a tool on an MCP server.
|
||||||
|
|
||||||
|
Returns the tool result as a string, or None if the call fails.
|
||||||
|
"""
|
||||||
|
if not self.enabled:
|
||||||
|
return None
|
||||||
|
|
||||||
|
session = self._sessions.get(server_name)
|
||||||
|
if not session:
|
||||||
|
connected = await self.connect(server_name)
|
||||||
|
if not connected:
|
||||||
|
return None
|
||||||
|
session = self._sessions.get(server_name)
|
||||||
|
|
||||||
|
try:
|
||||||
|
result = await session.call_tool(tool_name, arguments or {})
|
||||||
|
# Extract text content from result
|
||||||
|
if result.content:
|
||||||
|
texts = [c.text for c in result.content if hasattr(c, 'text')]
|
||||||
|
return '\n'.join(texts) if texts else str(result.content)
|
||||||
|
return ""
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"MCP tool call failed ({server_name}/{tool_name}): {e}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
async def list_tools(self, server_name: str = None) -> Dict[str, List[Dict]]:
|
||||||
|
"""List available tools from MCP servers.
|
||||||
|
|
||||||
|
If server_name is specified, lists tools for that server only.
|
||||||
|
Otherwise lists tools from all connected servers.
|
||||||
|
"""
|
||||||
|
if server_name:
|
||||||
|
tools = self._available_tools.get(server_name, [])
|
||||||
|
return {server_name: tools}
|
||||||
|
|
||||||
|
return dict(self._available_tools)
|
||||||
|
|
||||||
|
def find_tool_server(self, tool_name: str) -> Optional[str]:
|
||||||
|
"""Find which MCP server provides a given tool.
|
||||||
|
|
||||||
|
Returns the server name, or None if no server has the tool.
|
||||||
|
"""
|
||||||
|
for server_name, tools in self._available_tools.items():
|
||||||
|
for tool in tools:
|
||||||
|
if tool["name"] == tool_name:
|
||||||
|
return server_name
|
||||||
|
return None
|
||||||
|
|
||||||
|
async def try_tool(self, tool_name: str, arguments: Optional[Dict] = None) -> Optional[str]:
|
||||||
|
"""Try to execute a tool via any available MCP server.
|
||||||
|
|
||||||
|
Searches all configured servers for the tool and executes it.
|
||||||
|
Returns None silently if no server has the tool (for fallback pattern).
|
||||||
|
"""
|
||||||
|
if not self.enabled:
|
||||||
|
return None
|
||||||
|
|
||||||
|
# Connect to any servers not yet connected
|
||||||
|
for server_name in self.servers_config:
|
||||||
|
if server_name not in self._sessions:
|
||||||
|
await self.connect(server_name)
|
||||||
|
|
||||||
|
server = self.find_tool_server(tool_name)
|
||||||
|
if server:
|
||||||
|
return await self.call_tool(server, tool_name, arguments)
|
||||||
|
|
||||||
|
return None # Silent fallback
|
||||||
|
|
||||||
|
async def disconnect_all(self):
|
||||||
|
"""Disconnect from all MCP servers."""
|
||||||
|
for server_name, session in self._sessions.items():
|
||||||
|
try:
|
||||||
|
if hasattr(session, 'close'):
|
||||||
|
await session.close()
|
||||||
|
except Exception as e:
|
||||||
|
logger.debug(f"Error closing MCP session '{server_name}': {e}")
|
||||||
|
self._sessions.clear()
|
||||||
|
self._available_tools.clear()
|
||||||
|
logger.info("Disconnected from all MCP servers")
|
||||||
626
core/mcp_server.py
Normal file
626
core/mcp_server.py
Normal file
@@ -0,0 +1,626 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
NeuroSploit MCP Server — Exposes pentest tools via Model Context Protocol.
|
||||||
|
|
||||||
|
Tools:
|
||||||
|
- screenshot_capture: Playwright browser screenshots
|
||||||
|
- payload_delivery: HTTP payload sending with full response capture
|
||||||
|
- dns_lookup: DNS record enumeration
|
||||||
|
- port_scan: TCP port scanning
|
||||||
|
- technology_detect: HTTP header-based tech fingerprinting
|
||||||
|
- subdomain_enumerate: Subdomain discovery via DNS brute-force
|
||||||
|
- save_finding: Persist a finding to agent memory
|
||||||
|
- get_vuln_prompt: Retrieve AI decision prompt for a vuln type
|
||||||
|
- execute_nuclei: Run Nuclei scanner in Docker sandbox (8000+ templates)
|
||||||
|
- execute_naabu: Run Naabu port scanner in Docker sandbox
|
||||||
|
- sandbox_health: Check sandbox container status
|
||||||
|
- sandbox_exec: Execute any allowed tool in the sandbox
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
python3 -m core.mcp_server # stdio transport (default)
|
||||||
|
MCP_TRANSPORT=sse python3 -m core.mcp_server # SSE transport
|
||||||
|
"""
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import socket
|
||||||
|
import logging
|
||||||
|
from typing import Dict, Any, Optional, List
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
# Guard MCP import — server only works where mcp package is available
|
||||||
|
try:
|
||||||
|
from mcp.server import Server
|
||||||
|
from mcp.server.stdio import stdio_server
|
||||||
|
from mcp.types import Tool, TextContent
|
||||||
|
HAS_MCP = True
|
||||||
|
except ImportError:
|
||||||
|
HAS_MCP = False
|
||||||
|
logger.warning("MCP package not installed. Install with: pip install 'mcp>=1.0.0'")
|
||||||
|
|
||||||
|
# Guard Playwright import
|
||||||
|
try:
|
||||||
|
from core.browser_validator import BrowserValidator
|
||||||
|
HAS_PLAYWRIGHT = True
|
||||||
|
except ImportError:
|
||||||
|
HAS_PLAYWRIGHT = False
|
||||||
|
|
||||||
|
# AI prompts access
|
||||||
|
try:
|
||||||
|
from backend.core.vuln_engine.ai_prompts import get_prompt, build_testing_prompt
|
||||||
|
HAS_AI_PROMPTS = True
|
||||||
|
except ImportError:
|
||||||
|
HAS_AI_PROMPTS = False
|
||||||
|
|
||||||
|
# Security sandbox access
|
||||||
|
try:
|
||||||
|
from core.sandbox_manager import get_sandbox, SandboxManager
|
||||||
|
HAS_SANDBOX = True
|
||||||
|
except ImportError:
|
||||||
|
HAS_SANDBOX = False
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Tool implementations
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
async def _screenshot_capture(url: str, selector: Optional[str] = None) -> Dict:
|
||||||
|
"""Capture a screenshot of a URL using Playwright."""
|
||||||
|
if not HAS_PLAYWRIGHT:
|
||||||
|
return {"error": "Playwright not available", "screenshot": None}
|
||||||
|
|
||||||
|
try:
|
||||||
|
bv = BrowserValidator()
|
||||||
|
result = await bv.capture_screenshot(url, selector=selector)
|
||||||
|
return {"url": url, "screenshot_base64": result.get("screenshot", ""), "status": "ok"}
|
||||||
|
except Exception as e:
|
||||||
|
return {"error": str(e), "screenshot": None}
|
||||||
|
|
||||||
|
|
||||||
|
async def _payload_delivery(
|
||||||
|
endpoint: str,
|
||||||
|
method: str = "GET",
|
||||||
|
payload: str = "",
|
||||||
|
content_type: str = "application/x-www-form-urlencoded",
|
||||||
|
headers: Optional[Dict] = None,
|
||||||
|
param: str = "q",
|
||||||
|
) -> Dict:
|
||||||
|
"""Send an HTTP request with a payload and capture full response."""
|
||||||
|
import aiohttp
|
||||||
|
|
||||||
|
try:
|
||||||
|
async with aiohttp.ClientSession() as session:
|
||||||
|
req_headers = {"Content-Type": content_type}
|
||||||
|
if headers:
|
||||||
|
req_headers.update(headers)
|
||||||
|
|
||||||
|
if method.upper() == "GET":
|
||||||
|
async with session.get(endpoint, params={param: payload}, headers=req_headers, timeout=15, allow_redirects=False) as resp:
|
||||||
|
body = await resp.text()
|
||||||
|
return {
|
||||||
|
"status": resp.status,
|
||||||
|
"headers": dict(resp.headers),
|
||||||
|
"body": body[:5000],
|
||||||
|
"body_length": len(body),
|
||||||
|
}
|
||||||
|
else:
|
||||||
|
data = {param: payload} if content_type != "application/json" else None
|
||||||
|
json_data = json.loads(payload) if content_type == "application/json" else None
|
||||||
|
async with session.request(
|
||||||
|
method.upper(), endpoint, data=data, json=json_data,
|
||||||
|
headers=req_headers, timeout=15, allow_redirects=False
|
||||||
|
) as resp:
|
||||||
|
body = await resp.text()
|
||||||
|
return {
|
||||||
|
"status": resp.status,
|
||||||
|
"headers": dict(resp.headers),
|
||||||
|
"body": body[:5000],
|
||||||
|
"body_length": len(body),
|
||||||
|
}
|
||||||
|
except Exception as e:
|
||||||
|
return {"error": str(e)}
|
||||||
|
|
||||||
|
|
||||||
|
async def _dns_lookup(domain: str, record_type: str = "A") -> Dict:
|
||||||
|
"""Perform DNS lookups for a domain."""
|
||||||
|
import subprocess
|
||||||
|
|
||||||
|
try:
|
||||||
|
result = subprocess.run(
|
||||||
|
["dig", "+short", domain, record_type],
|
||||||
|
capture_output=True, text=True, timeout=10
|
||||||
|
)
|
||||||
|
records = [r.strip() for r in result.stdout.strip().split("\n") if r.strip()]
|
||||||
|
return {"domain": domain, "type": record_type, "records": records}
|
||||||
|
except FileNotFoundError:
|
||||||
|
# Fallback to socket for A records
|
||||||
|
if record_type.upper() == "A":
|
||||||
|
try:
|
||||||
|
ips = socket.getaddrinfo(domain, None, socket.AF_INET)
|
||||||
|
records = list(set(ip[4][0] for ip in ips))
|
||||||
|
return {"domain": domain, "type": "A", "records": records}
|
||||||
|
except socket.gaierror as e:
|
||||||
|
return {"domain": domain, "type": "A", "error": str(e)}
|
||||||
|
return {"error": "dig command not available and only A records supported via fallback"}
|
||||||
|
except Exception as e:
|
||||||
|
return {"error": str(e)}
|
||||||
|
|
||||||
|
|
||||||
|
async def _port_scan(host: str, ports: str = "80,443,8080,8443,3000,5000") -> Dict:
|
||||||
|
"""Scan TCP ports on a host."""
|
||||||
|
port_list = [int(p.strip()) for p in ports.split(",") if p.strip().isdigit()]
|
||||||
|
results = {}
|
||||||
|
|
||||||
|
async def check_port(port: int):
|
||||||
|
try:
|
||||||
|
reader, writer = await asyncio.wait_for(
|
||||||
|
asyncio.open_connection(host, port), timeout=3
|
||||||
|
)
|
||||||
|
writer.close()
|
||||||
|
await writer.wait_closed()
|
||||||
|
return port, "open"
|
||||||
|
except (asyncio.TimeoutError, ConnectionRefusedError, OSError):
|
||||||
|
return port, "closed"
|
||||||
|
|
||||||
|
tasks = [check_port(p) for p in port_list[:100]]
|
||||||
|
for coro in asyncio.as_completed(tasks):
|
||||||
|
port, state = await coro
|
||||||
|
results[str(port)] = state
|
||||||
|
|
||||||
|
open_ports = [p for p, s in results.items() if s == "open"]
|
||||||
|
return {"host": host, "ports": results, "open_ports": open_ports}
|
||||||
|
|
||||||
|
|
||||||
|
async def _technology_detect(url: str) -> Dict:
|
||||||
|
"""Detect technologies from HTTP response headers."""
|
||||||
|
import aiohttp
|
||||||
|
|
||||||
|
try:
|
||||||
|
async with aiohttp.ClientSession() as session:
|
||||||
|
async with session.get(url, timeout=10, allow_redirects=True) as resp:
|
||||||
|
headers = dict(resp.headers)
|
||||||
|
body = await resp.text()
|
||||||
|
|
||||||
|
techs = []
|
||||||
|
server = headers.get("Server", "")
|
||||||
|
if server:
|
||||||
|
techs.append(f"Server: {server}")
|
||||||
|
|
||||||
|
powered_by = headers.get("X-Powered-By", "")
|
||||||
|
if powered_by:
|
||||||
|
techs.append(f"X-Powered-By: {powered_by}")
|
||||||
|
|
||||||
|
# Framework detection from body
|
||||||
|
framework_markers = {
|
||||||
|
"React": ["react", "_next/static", "__NEXT_DATA__"],
|
||||||
|
"Vue.js": ["vue.js", "__vue__", "v-cloak"],
|
||||||
|
"Angular": ["ng-version", "angular"],
|
||||||
|
"jQuery": ["jquery"],
|
||||||
|
"WordPress": ["wp-content", "wp-includes"],
|
||||||
|
"Laravel": ["laravel_session", "csrf-token"],
|
||||||
|
"Django": ["csrfmiddlewaretoken", "django"],
|
||||||
|
"Rails": ["csrf-param", "action_dispatch"],
|
||||||
|
"Spring": ["jsessionid"],
|
||||||
|
"Express": ["connect.sid"],
|
||||||
|
}
|
||||||
|
|
||||||
|
body_lower = body.lower()
|
||||||
|
for tech, markers in framework_markers.items():
|
||||||
|
if any(m.lower() in body_lower for m in markers):
|
||||||
|
techs.append(tech)
|
||||||
|
|
||||||
|
return {"url": url, "technologies": techs, "headers": {
|
||||||
|
k: v for k, v in headers.items()
|
||||||
|
if k.lower() in ("server", "x-powered-by", "x-aspnet-version",
|
||||||
|
"x-generator", "x-drupal-cache", "x-framework")
|
||||||
|
}}
|
||||||
|
except Exception as e:
|
||||||
|
return {"error": str(e)}
|
||||||
|
|
||||||
|
|
||||||
|
async def _subdomain_enumerate(domain: str) -> Dict:
|
||||||
|
"""Enumerate subdomains via common prefixes."""
|
||||||
|
prefixes = [
|
||||||
|
"www", "api", "admin", "app", "dev", "staging", "test", "mail",
|
||||||
|
"ftp", "cdn", "blog", "shop", "docs", "status", "dashboard",
|
||||||
|
"portal", "m", "mobile", "beta", "demo", "v2", "internal",
|
||||||
|
]
|
||||||
|
|
||||||
|
found = []
|
||||||
|
|
||||||
|
async def check_subdomain(prefix: str):
|
||||||
|
subdomain = f"{prefix}.{domain}"
|
||||||
|
try:
|
||||||
|
socket.getaddrinfo(subdomain, None, socket.AF_INET)
|
||||||
|
return subdomain
|
||||||
|
except socket.gaierror:
|
||||||
|
return None
|
||||||
|
|
||||||
|
tasks = [check_subdomain(p) for p in prefixes]
|
||||||
|
results = await asyncio.gather(*tasks)
|
||||||
|
found = [r for r in results if r]
|
||||||
|
|
||||||
|
return {"domain": domain, "subdomains": found, "count": len(found)}
|
||||||
|
|
||||||
|
|
||||||
|
async def _save_finding(finding_json: str) -> Dict:
|
||||||
|
"""Persist a finding (JSON string). Returns confirmation."""
|
||||||
|
try:
|
||||||
|
finding = json.loads(finding_json)
|
||||||
|
# Validate required fields
|
||||||
|
required = ["title", "severity", "vulnerability_type", "affected_endpoint"]
|
||||||
|
missing = [f for f in required if f not in finding]
|
||||||
|
if missing:
|
||||||
|
return {"error": f"Missing required fields: {missing}"}
|
||||||
|
return {"status": "saved", "finding_id": finding.get("id", "unknown"), "title": finding["title"]}
|
||||||
|
except json.JSONDecodeError as e:
|
||||||
|
return {"error": f"Invalid JSON: {e}"}
|
||||||
|
|
||||||
|
|
||||||
|
async def _get_vuln_prompt(vuln_type: str, target: str = "", endpoint: str = "", param: str = "", tech: str = "") -> Dict:
|
||||||
|
"""Retrieve the AI decision prompt for a vulnerability type."""
|
||||||
|
if not HAS_AI_PROMPTS:
|
||||||
|
return {"error": "AI prompts module not available"}
|
||||||
|
|
||||||
|
try:
|
||||||
|
prompt_data = get_prompt(vuln_type, {
|
||||||
|
"TARGET_URL": target,
|
||||||
|
"ENDPOINT": endpoint,
|
||||||
|
"PARAMETER": param,
|
||||||
|
"TECHNOLOGY": tech,
|
||||||
|
})
|
||||||
|
if not prompt_data:
|
||||||
|
return {"error": f"No prompt found for vuln type: {vuln_type}"}
|
||||||
|
full_prompt = build_testing_prompt(vuln_type, target, endpoint, param, tech)
|
||||||
|
return {"vuln_type": vuln_type, "prompt": prompt_data, "full_prompt": full_prompt}
|
||||||
|
except Exception as e:
|
||||||
|
return {"error": str(e)}
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Sandbox tool implementations (Docker-based real tools)
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
async def _execute_nuclei(
|
||||||
|
target: str,
|
||||||
|
templates: Optional[str] = None,
|
||||||
|
severity: Optional[str] = None,
|
||||||
|
tags: Optional[str] = None,
|
||||||
|
rate_limit: int = 150,
|
||||||
|
) -> Dict:
|
||||||
|
"""Run Nuclei vulnerability scanner in the Docker sandbox."""
|
||||||
|
if not HAS_SANDBOX:
|
||||||
|
return {"error": "Sandbox module not available. Install docker SDK: pip install docker"}
|
||||||
|
|
||||||
|
try:
|
||||||
|
sandbox = await get_sandbox()
|
||||||
|
if not sandbox.is_available:
|
||||||
|
return {"error": "Sandbox container not running. Build with: cd docker && docker compose -f docker-compose.sandbox.yml up -d"}
|
||||||
|
|
||||||
|
result = await sandbox.run_nuclei(
|
||||||
|
target=target,
|
||||||
|
templates=templates,
|
||||||
|
severity=severity,
|
||||||
|
tags=tags,
|
||||||
|
rate_limit=rate_limit,
|
||||||
|
)
|
||||||
|
|
||||||
|
return {
|
||||||
|
"tool": "nuclei",
|
||||||
|
"target": target,
|
||||||
|
"exit_code": result.exit_code,
|
||||||
|
"findings": result.findings,
|
||||||
|
"findings_count": len(result.findings),
|
||||||
|
"duration_seconds": result.duration_seconds,
|
||||||
|
"raw_output": result.stdout[:3000] if result.stdout else "",
|
||||||
|
"error": result.error,
|
||||||
|
}
|
||||||
|
except Exception as e:
|
||||||
|
return {"error": str(e)}
|
||||||
|
|
||||||
|
|
||||||
|
async def _execute_naabu(
|
||||||
|
target: str,
|
||||||
|
ports: Optional[str] = None,
|
||||||
|
top_ports: Optional[int] = None,
|
||||||
|
rate: int = 1000,
|
||||||
|
) -> Dict:
|
||||||
|
"""Run Naabu port scanner in the Docker sandbox."""
|
||||||
|
if not HAS_SANDBOX:
|
||||||
|
return {"error": "Sandbox module not available"}
|
||||||
|
|
||||||
|
try:
|
||||||
|
sandbox = await get_sandbox()
|
||||||
|
if not sandbox.is_available:
|
||||||
|
return {"error": "Sandbox container not running"}
|
||||||
|
|
||||||
|
result = await sandbox.run_naabu(
|
||||||
|
target=target,
|
||||||
|
ports=ports,
|
||||||
|
top_ports=top_ports,
|
||||||
|
rate=rate,
|
||||||
|
)
|
||||||
|
|
||||||
|
open_ports = [f["port"] for f in result.findings]
|
||||||
|
return {
|
||||||
|
"tool": "naabu",
|
||||||
|
"target": target,
|
||||||
|
"exit_code": result.exit_code,
|
||||||
|
"open_ports": sorted(open_ports),
|
||||||
|
"port_count": len(open_ports),
|
||||||
|
"findings": result.findings,
|
||||||
|
"duration_seconds": result.duration_seconds,
|
||||||
|
"error": result.error,
|
||||||
|
}
|
||||||
|
except Exception as e:
|
||||||
|
return {"error": str(e)}
|
||||||
|
|
||||||
|
|
||||||
|
async def _sandbox_health() -> Dict:
|
||||||
|
"""Check sandbox container health and available tools."""
|
||||||
|
if not HAS_SANDBOX:
|
||||||
|
return {"status": "unavailable", "reason": "Sandbox module not installed"}
|
||||||
|
|
||||||
|
try:
|
||||||
|
sandbox = await get_sandbox()
|
||||||
|
return await sandbox.health_check()
|
||||||
|
except Exception as e:
|
||||||
|
return {"status": "error", "reason": str(e)}
|
||||||
|
|
||||||
|
|
||||||
|
async def _sandbox_exec(tool: str, args: str, timeout: int = 300) -> Dict:
|
||||||
|
"""Execute any allowed tool in the Docker sandbox."""
|
||||||
|
if not HAS_SANDBOX:
|
||||||
|
return {"error": "Sandbox module not available"}
|
||||||
|
|
||||||
|
try:
|
||||||
|
sandbox = await get_sandbox()
|
||||||
|
if not sandbox.is_available:
|
||||||
|
return {"error": "Sandbox container not running"}
|
||||||
|
|
||||||
|
result = await sandbox.run_tool(tool=tool, args=args, timeout=timeout)
|
||||||
|
|
||||||
|
return {
|
||||||
|
"tool": tool,
|
||||||
|
"exit_code": result.exit_code,
|
||||||
|
"stdout": result.stdout[:5000] if result.stdout else "",
|
||||||
|
"stderr": result.stderr[:2000] if result.stderr else "",
|
||||||
|
"duration_seconds": result.duration_seconds,
|
||||||
|
"error": result.error,
|
||||||
|
}
|
||||||
|
except Exception as e:
|
||||||
|
return {"error": str(e)}
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# MCP Server Definition
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
TOOLS = [
|
||||||
|
{
|
||||||
|
"name": "screenshot_capture",
|
||||||
|
"description": "Capture a browser screenshot of a URL using Playwright",
|
||||||
|
"inputSchema": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"url": {"type": "string", "description": "URL to screenshot"},
|
||||||
|
"selector": {"type": "string", "description": "Optional CSS selector to capture"},
|
||||||
|
},
|
||||||
|
"required": ["url"],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "payload_delivery",
|
||||||
|
"description": "Send an HTTP request with a payload and capture the full response",
|
||||||
|
"inputSchema": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"endpoint": {"type": "string", "description": "Target URL"},
|
||||||
|
"method": {"type": "string", "description": "HTTP method", "default": "GET"},
|
||||||
|
"payload": {"type": "string", "description": "Payload value"},
|
||||||
|
"content_type": {"type": "string", "default": "application/x-www-form-urlencoded"},
|
||||||
|
"param": {"type": "string", "description": "Parameter name", "default": "q"},
|
||||||
|
},
|
||||||
|
"required": ["endpoint", "payload"],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "dns_lookup",
|
||||||
|
"description": "Perform DNS lookups for a domain",
|
||||||
|
"inputSchema": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"domain": {"type": "string", "description": "Domain to look up"},
|
||||||
|
"record_type": {"type": "string", "default": "A", "description": "DNS record type"},
|
||||||
|
},
|
||||||
|
"required": ["domain"],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "port_scan",
|
||||||
|
"description": "Scan TCP ports on a host",
|
||||||
|
"inputSchema": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"host": {"type": "string", "description": "Target host"},
|
||||||
|
"ports": {"type": "string", "default": "80,443,8080,8443,3000,5000", "description": "Comma-separated ports"},
|
||||||
|
},
|
||||||
|
"required": ["host"],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "technology_detect",
|
||||||
|
"description": "Detect technologies from HTTP response headers and body",
|
||||||
|
"inputSchema": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"url": {"type": "string", "description": "URL to analyze"},
|
||||||
|
},
|
||||||
|
"required": ["url"],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "subdomain_enumerate",
|
||||||
|
"description": "Enumerate subdomains via common prefix brute-force",
|
||||||
|
"inputSchema": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"domain": {"type": "string", "description": "Base domain to enumerate"},
|
||||||
|
},
|
||||||
|
"required": ["domain"],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "save_finding",
|
||||||
|
"description": "Persist a vulnerability finding (JSON string)",
|
||||||
|
"inputSchema": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"finding_json": {"type": "string", "description": "Finding as JSON string"},
|
||||||
|
},
|
||||||
|
"required": ["finding_json"],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "get_vuln_prompt",
|
||||||
|
"description": "Retrieve the AI decision prompt for a vulnerability type",
|
||||||
|
"inputSchema": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"vuln_type": {"type": "string", "description": "Vulnerability type key"},
|
||||||
|
"target": {"type": "string", "description": "Target URL"},
|
||||||
|
"endpoint": {"type": "string", "description": "Specific endpoint"},
|
||||||
|
"param": {"type": "string", "description": "Parameter name"},
|
||||||
|
"tech": {"type": "string", "description": "Detected technology"},
|
||||||
|
},
|
||||||
|
"required": ["vuln_type"],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
# --- Sandbox tools (Docker-based real security tools) ---
|
||||||
|
{
|
||||||
|
"name": "execute_nuclei",
|
||||||
|
"description": "Run Nuclei vulnerability scanner (8000+ templates) in Docker sandbox. Returns structured findings with severity, CVE, CWE.",
|
||||||
|
"inputSchema": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"target": {"type": "string", "description": "Target URL to scan"},
|
||||||
|
"templates": {"type": "string", "description": "Specific template path (e.g. 'cves/2024/', 'vulnerabilities/xss/')"},
|
||||||
|
"severity": {"type": "string", "description": "Filter: critical,high,medium,low,info"},
|
||||||
|
"tags": {"type": "string", "description": "Filter by tags: xss,sqli,lfi,ssrf,rce"},
|
||||||
|
"rate_limit": {"type": "integer", "description": "Requests per second (default 150)", "default": 150},
|
||||||
|
},
|
||||||
|
"required": ["target"],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "execute_naabu",
|
||||||
|
"description": "Run Naabu port scanner in Docker sandbox. Fast SYN-based scanning with configurable port ranges.",
|
||||||
|
"inputSchema": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"target": {"type": "string", "description": "IP address or hostname to scan"},
|
||||||
|
"ports": {"type": "string", "description": "Ports to scan (e.g. '80,443,8080' or '1-65535')"},
|
||||||
|
"top_ports": {"type": "integer", "description": "Scan top N ports (e.g. 100, 1000)"},
|
||||||
|
"rate": {"type": "integer", "description": "Packets per second (default 1000)", "default": 1000},
|
||||||
|
},
|
||||||
|
"required": ["target"],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "sandbox_health",
|
||||||
|
"description": "Check Docker sandbox status and available security tools",
|
||||||
|
"inputSchema": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "sandbox_exec",
|
||||||
|
"description": "Execute any allowed security tool in the Docker sandbox (nuclei, naabu, nmap, httpx, subfinder, katana, ffuf, sqlmap, nikto, etc.)",
|
||||||
|
"inputSchema": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"tool": {"type": "string", "description": "Tool name (e.g. nuclei, naabu, nmap, httpx, subfinder, katana, ffuf, gobuster, dalfox, nikto, sqlmap, curl)"},
|
||||||
|
"args": {"type": "string", "description": "Command-line arguments for the tool"},
|
||||||
|
"timeout": {"type": "integer", "description": "Max execution time in seconds (default 300)", "default": 300},
|
||||||
|
},
|
||||||
|
"required": ["tool", "args"],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
# Tool dispatcher
|
||||||
|
TOOL_HANDLERS = {
|
||||||
|
"screenshot_capture": lambda args: _screenshot_capture(args["url"], args.get("selector")),
|
||||||
|
"payload_delivery": lambda args: _payload_delivery(
|
||||||
|
args["endpoint"], args.get("method", "GET"), args.get("payload", ""),
|
||||||
|
args.get("content_type", "application/x-www-form-urlencoded"),
|
||||||
|
args.get("headers"), args.get("param", "q")
|
||||||
|
),
|
||||||
|
"dns_lookup": lambda args: _dns_lookup(args["domain"], args.get("record_type", "A")),
|
||||||
|
"port_scan": lambda args: _port_scan(args["host"], args.get("ports", "80,443,8080,8443,3000,5000")),
|
||||||
|
"technology_detect": lambda args: _technology_detect(args["url"]),
|
||||||
|
"subdomain_enumerate": lambda args: _subdomain_enumerate(args["domain"]),
|
||||||
|
"save_finding": lambda args: _save_finding(args["finding_json"]),
|
||||||
|
"get_vuln_prompt": lambda args: _get_vuln_prompt(
|
||||||
|
args["vuln_type"], args.get("target", ""), args.get("endpoint", ""),
|
||||||
|
args.get("param", ""), args.get("tech", "")
|
||||||
|
),
|
||||||
|
# Sandbox tools
|
||||||
|
"execute_nuclei": lambda args: _execute_nuclei(
|
||||||
|
args["target"], args.get("templates"), args.get("severity"),
|
||||||
|
args.get("tags"), args.get("rate_limit", 150)
|
||||||
|
),
|
||||||
|
"execute_naabu": lambda args: _execute_naabu(
|
||||||
|
args["target"], args.get("ports"), args.get("top_ports"),
|
||||||
|
args.get("rate", 1000)
|
||||||
|
),
|
||||||
|
"sandbox_health": lambda args: _sandbox_health(),
|
||||||
|
"sandbox_exec": lambda args: _sandbox_exec(
|
||||||
|
args["tool"], args["args"], args.get("timeout", 300)
|
||||||
|
),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def create_mcp_server() -> "Server":
|
||||||
|
"""Create and configure the MCP server with all pentest tools."""
|
||||||
|
if not HAS_MCP:
|
||||||
|
raise RuntimeError("MCP package not installed. Install with: pip install 'mcp>=1.0.0'")
|
||||||
|
|
||||||
|
server = Server("neurosploit-tools")
|
||||||
|
|
||||||
|
@server.list_tools()
|
||||||
|
async def list_tools() -> list:
|
||||||
|
return [Tool(**t) for t in TOOLS]
|
||||||
|
|
||||||
|
@server.call_tool()
|
||||||
|
async def call_tool(name: str, arguments: dict) -> list:
|
||||||
|
handler = TOOL_HANDLERS.get(name)
|
||||||
|
if not handler:
|
||||||
|
return [TextContent(type="text", text=json.dumps({"error": f"Unknown tool: {name}"}))]
|
||||||
|
|
||||||
|
try:
|
||||||
|
result = await handler(arguments)
|
||||||
|
return [TextContent(type="text", text=json.dumps(result, default=str))]
|
||||||
|
except Exception as e:
|
||||||
|
return [TextContent(type="text", text=json.dumps({"error": str(e)}))]
|
||||||
|
|
||||||
|
return server
|
||||||
|
|
||||||
|
|
||||||
|
async def main():
|
||||||
|
"""Run the MCP server via stdio transport."""
|
||||||
|
server = create_mcp_server()
|
||||||
|
|
||||||
|
transport = os.getenv("MCP_TRANSPORT", "stdio")
|
||||||
|
if transport == "stdio":
|
||||||
|
async with stdio_server() as (read_stream, write_stream):
|
||||||
|
await server.run(read_stream, write_stream, server.create_initialization_options())
|
||||||
|
else:
|
||||||
|
logger.error(f"Unsupported transport: {transport}. Use 'stdio'.")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
asyncio.run(main())
|
||||||
81
core/model_router.py
Normal file
81
core/model_router.py
Normal file
@@ -0,0 +1,81 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
Model Router - Task-type-based LLM routing.
|
||||||
|
|
||||||
|
Routes requests to different LLM profiles based on task type:
|
||||||
|
- reasoning: Complex logic and decision-making
|
||||||
|
- analysis: Data analysis and pattern recognition
|
||||||
|
- generation: Content and payload generation
|
||||||
|
- validation: Result verification and confirmation
|
||||||
|
|
||||||
|
Enabled/disabled via config. When disabled, callers fall back to their default provider.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
import logging
|
||||||
|
from typing import Dict, Optional, Callable
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
class ModelRouter:
|
||||||
|
"""Routes LLM requests to different profiles based on task type."""
|
||||||
|
|
||||||
|
def __init__(self, config: Dict, llm_manager_factory: Callable):
|
||||||
|
"""
|
||||||
|
Args:
|
||||||
|
config: Full application config dict (must contain 'model_routing' and 'llm' keys)
|
||||||
|
llm_manager_factory: Callable that takes a profile name and returns an LLMManager instance
|
||||||
|
"""
|
||||||
|
routing_config = config.get('model_routing', {})
|
||||||
|
self.enabled = routing_config.get('enabled', False)
|
||||||
|
|
||||||
|
# Allow env var override
|
||||||
|
env_override = os.getenv('ENABLE_MODEL_ROUTING', '').strip().lower()
|
||||||
|
if env_override == 'true':
|
||||||
|
self.enabled = True
|
||||||
|
elif env_override == 'false':
|
||||||
|
self.enabled = False
|
||||||
|
|
||||||
|
self.routes = routing_config.get('routes', {})
|
||||||
|
self.llm_manager_factory = llm_manager_factory
|
||||||
|
self._managers = {} # Cache LLMManager instances per profile
|
||||||
|
|
||||||
|
if self.enabled:
|
||||||
|
logger.info(f"Model routing enabled with routes: {list(self.routes.keys())}")
|
||||||
|
else:
|
||||||
|
logger.debug("Model routing disabled")
|
||||||
|
|
||||||
|
def generate(self, prompt: str, system_prompt: Optional[str] = None,
|
||||||
|
task_type: str = "default") -> Optional[str]:
|
||||||
|
"""Route a generation request to the appropriate LLM profile.
|
||||||
|
|
||||||
|
Returns None if routing is disabled or no route matches,
|
||||||
|
allowing callers to fall back to their default provider.
|
||||||
|
"""
|
||||||
|
if not self.enabled:
|
||||||
|
return None
|
||||||
|
|
||||||
|
profile = self.routes.get(task_type, self.routes.get('default'))
|
||||||
|
if not profile:
|
||||||
|
logger.debug(f"No route for task_type '{task_type}', falling back to default")
|
||||||
|
return None
|
||||||
|
|
||||||
|
try:
|
||||||
|
if profile not in self._managers:
|
||||||
|
self._managers[profile] = self.llm_manager_factory(profile)
|
||||||
|
|
||||||
|
manager = self._managers[profile]
|
||||||
|
logger.debug(f"Routing task_type '{task_type}' to profile '{profile}' "
|
||||||
|
f"(provider: {manager.provider}, model: {manager.model})")
|
||||||
|
return manager.generate(prompt, system_prompt)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Model routing error for profile '{profile}': {e}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
def get_profile_for_task(self, task_type: str) -> Optional[str]:
|
||||||
|
"""Get the profile name that would handle a given task type."""
|
||||||
|
if not self.enabled:
|
||||||
|
return None
|
||||||
|
return self.routes.get(task_type, self.routes.get('default'))
|
||||||
@@ -4,9 +4,11 @@ Professional Pentest Report Generator
|
|||||||
Generates detailed reports with PoCs, CVSS scores, requests/responses
|
Generates detailed reports with PoCs, CVSS scores, requests/responses
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
import base64
|
||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
from pathlib import Path
|
||||||
from typing import Dict, List, Any
|
from typing import Dict, List, Any
|
||||||
import html
|
import html
|
||||||
import logging
|
import logging
|
||||||
@@ -49,6 +51,41 @@ class ReportGenerator:
|
|||||||
escaped = self._escape_html(code)
|
escaped = self._escape_html(code)
|
||||||
return f'<pre><code class="language-{language}">{escaped}</code></pre>'
|
return f'<pre><code class="language-{language}">{escaped}</code></pre>'
|
||||||
|
|
||||||
|
def embed_screenshot(self, filepath: str) -> str:
|
||||||
|
"""Convert a screenshot file to a base64 data URI for HTML embedding."""
|
||||||
|
path = Path(filepath)
|
||||||
|
if not path.exists():
|
||||||
|
return ""
|
||||||
|
try:
|
||||||
|
with open(path, 'rb') as f:
|
||||||
|
data = base64.b64encode(f.read()).decode('ascii')
|
||||||
|
return f"data:image/png;base64,{data}"
|
||||||
|
except Exception:
|
||||||
|
return ""
|
||||||
|
|
||||||
|
def build_screenshots_html(self, finding_id: str, screenshots_dir: str = "reports/screenshots") -> str:
|
||||||
|
"""Build screenshot grid HTML for a finding, embedding images as base64."""
|
||||||
|
finding_dir = Path(screenshots_dir) / finding_id
|
||||||
|
if not finding_dir.exists():
|
||||||
|
return ""
|
||||||
|
|
||||||
|
screenshots = sorted(finding_dir.glob("*.png"))[:3]
|
||||||
|
if not screenshots:
|
||||||
|
return ""
|
||||||
|
|
||||||
|
cards = ""
|
||||||
|
for ss in screenshots:
|
||||||
|
data_uri = self.embed_screenshot(str(ss))
|
||||||
|
if data_uri:
|
||||||
|
caption = ss.stem.replace('_', ' ').title()
|
||||||
|
cards += f"""
|
||||||
|
<div class="screenshot-card">
|
||||||
|
<img src="{data_uri}" alt="{caption}" />
|
||||||
|
<div class="screenshot-caption">{caption}</div>
|
||||||
|
</div>"""
|
||||||
|
|
||||||
|
return f'<div class="screenshot-grid">{cards}</div>' if cards else ""
|
||||||
|
|
||||||
def generate_executive_summary(self) -> str:
|
def generate_executive_summary(self) -> str:
|
||||||
"""Generate executive summary section"""
|
"""Generate executive summary section"""
|
||||||
summary = self.scan_results.get("summary", {})
|
summary = self.scan_results.get("summary", {})
|
||||||
@@ -611,17 +648,39 @@ class ReportGenerator:
|
|||||||
return html
|
return html
|
||||||
|
|
||||||
def save_report(self, output_dir: str = "reports") -> str:
|
def save_report(self, output_dir: str = "reports") -> str:
|
||||||
"""Save HTML report to file"""
|
"""Save HTML report to a per-report folder with screenshots."""
|
||||||
os.makedirs(output_dir, exist_ok=True)
|
import shutil
|
||||||
|
|
||||||
|
# Create per-report folder
|
||||||
|
target = self.scan_results.get("target_url", self.scan_results.get("target", "unknown"))
|
||||||
|
target_name = target.replace("://", "_").replace("/", "_").rstrip("_")[:40]
|
||||||
|
report_folder = f"report_{target_name}_{self.timestamp}"
|
||||||
|
report_dir = os.path.join(output_dir, report_folder)
|
||||||
|
os.makedirs(report_dir, exist_ok=True)
|
||||||
|
|
||||||
filename = f"pentest_report_{self.timestamp}.html"
|
filename = f"pentest_report_{self.timestamp}.html"
|
||||||
filepath = os.path.join(output_dir, filename)
|
filepath = os.path.join(report_dir, filename)
|
||||||
|
|
||||||
html_content = self.generate_html_report()
|
html_content = self.generate_html_report()
|
||||||
|
|
||||||
with open(filepath, 'w', encoding='utf-8') as f:
|
with open(filepath, 'w', encoding='utf-8') as f:
|
||||||
f.write(html_content)
|
f.write(html_content)
|
||||||
|
|
||||||
|
# Copy screenshots into report folder
|
||||||
|
screenshots_src = os.path.join("reports", "screenshots")
|
||||||
|
if os.path.exists(screenshots_src):
|
||||||
|
screenshots_dest = os.path.join(report_dir, "screenshots")
|
||||||
|
vulns = self.scan_results.get("vulnerabilities", [])
|
||||||
|
for vuln in vulns:
|
||||||
|
fid = vuln.get("id", "")
|
||||||
|
if fid:
|
||||||
|
src_dir = os.path.join(screenshots_src, str(fid))
|
||||||
|
if os.path.exists(src_dir):
|
||||||
|
dest_dir = os.path.join(screenshots_dest, str(fid))
|
||||||
|
os.makedirs(dest_dir, exist_ok=True)
|
||||||
|
for ss_file in Path(src_dir).glob("*.png"):
|
||||||
|
shutil.copy2(str(ss_file), os.path.join(dest_dir, ss_file.name))
|
||||||
|
|
||||||
logger.info(f"Report saved to: {filepath}")
|
logger.info(f"Report saved to: {filepath}")
|
||||||
return filepath
|
return filepath
|
||||||
|
|
||||||
|
|||||||
758
core/sandbox_manager.py
Normal file
758
core/sandbox_manager.py
Normal file
@@ -0,0 +1,758 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
NeuroSploit Security Sandbox Manager
|
||||||
|
|
||||||
|
Manages Docker-based security tool execution in an isolated container.
|
||||||
|
Provides high-level API for running Nuclei, Naabu, and other tools.
|
||||||
|
|
||||||
|
Architecture:
|
||||||
|
- Persistent sandbox container (neurosploit-sandbox) stays running
|
||||||
|
- Tools executed via `docker exec` for sub-second startup
|
||||||
|
- Output collected from container stdout + output files
|
||||||
|
- Resource limits enforced (2GB RAM, 2 CPU)
|
||||||
|
- Network isolation with controlled egress
|
||||||
|
"""
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
import shlex
|
||||||
|
from abc import ABC, abstractmethod
|
||||||
|
from typing import Dict, Any, Optional, List, Tuple
|
||||||
|
from dataclasses import dataclass, field
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
# Guard Docker SDK import
|
||||||
|
try:
|
||||||
|
import docker
|
||||||
|
from docker.errors import DockerException, NotFound, APIError
|
||||||
|
HAS_DOCKER = True
|
||||||
|
except ImportError:
|
||||||
|
HAS_DOCKER = False
|
||||||
|
logger.warning("Docker SDK not installed. Install with: pip install docker")
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class SandboxResult:
|
||||||
|
"""Result from a sandboxed tool execution."""
|
||||||
|
tool: str
|
||||||
|
command: str
|
||||||
|
exit_code: int
|
||||||
|
stdout: str
|
||||||
|
stderr: str
|
||||||
|
duration_seconds: float
|
||||||
|
findings: List[Dict] = field(default_factory=list)
|
||||||
|
error: Optional[str] = None
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Nuclei output parser
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
def parse_nuclei_jsonl(output: str) -> List[Dict]:
|
||||||
|
"""Parse Nuclei JSONL output into structured findings."""
|
||||||
|
findings = []
|
||||||
|
severity_map = {
|
||||||
|
"critical": "critical",
|
||||||
|
"high": "high",
|
||||||
|
"medium": "medium",
|
||||||
|
"low": "low",
|
||||||
|
"info": "info",
|
||||||
|
}
|
||||||
|
|
||||||
|
for line in output.strip().split("\n"):
|
||||||
|
if not line.strip():
|
||||||
|
continue
|
||||||
|
try:
|
||||||
|
data = json.loads(line)
|
||||||
|
info = data.get("info", {})
|
||||||
|
tags = info.get("tags", [])
|
||||||
|
findings.append({
|
||||||
|
"title": info.get("name", "Unknown"),
|
||||||
|
"severity": severity_map.get(info.get("severity", "info"), "info"),
|
||||||
|
"vulnerability_type": tags[0] if tags else "vulnerability",
|
||||||
|
"description": info.get("description", ""),
|
||||||
|
"affected_endpoint": data.get("matched-at", ""),
|
||||||
|
"evidence": data.get("matcher-name", ""),
|
||||||
|
"template_id": data.get("template-id", ""),
|
||||||
|
"curl_command": data.get("curl-command", ""),
|
||||||
|
"remediation": info.get("remediation", "Review and fix the vulnerability"),
|
||||||
|
"references": info.get("reference", []),
|
||||||
|
"cwe": info.get("classification", {}).get("cwe-id", []),
|
||||||
|
"cvss_score": info.get("classification", {}).get("cvss-score", 0),
|
||||||
|
})
|
||||||
|
except (json.JSONDecodeError, KeyError):
|
||||||
|
continue
|
||||||
|
|
||||||
|
return findings
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Naabu output parser
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
def parse_naabu_output(output: str) -> List[Dict]:
|
||||||
|
"""Parse Naabu output into structured port findings."""
|
||||||
|
findings = []
|
||||||
|
seen = set()
|
||||||
|
|
||||||
|
for line in output.strip().split("\n"):
|
||||||
|
line = line.strip()
|
||||||
|
if not line:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Naabu JSON mode: {"host":"x","ip":"y","port":80}
|
||||||
|
try:
|
||||||
|
data = json.loads(line)
|
||||||
|
host = data.get("host", data.get("ip", ""))
|
||||||
|
port = data.get("port", 0)
|
||||||
|
key = f"{host}:{port}"
|
||||||
|
if key not in seen:
|
||||||
|
seen.add(key)
|
||||||
|
findings.append({
|
||||||
|
"host": host,
|
||||||
|
"port": port,
|
||||||
|
"protocol": "tcp",
|
||||||
|
})
|
||||||
|
continue
|
||||||
|
except (json.JSONDecodeError, KeyError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Text mode: host:port
|
||||||
|
match = re.match(r"^(.+?):(\d+)$", line)
|
||||||
|
if match:
|
||||||
|
host, port = match.group(1), int(match.group(2))
|
||||||
|
key = f"{host}:{port}"
|
||||||
|
if key not in seen:
|
||||||
|
seen.add(key)
|
||||||
|
findings.append({
|
||||||
|
"host": host,
|
||||||
|
"port": port,
|
||||||
|
"protocol": "tcp",
|
||||||
|
})
|
||||||
|
|
||||||
|
return findings
|
||||||
|
|
||||||
|
|
||||||
|
class BaseSandbox(ABC):
|
||||||
|
"""Abstract interface for sandbox implementations (legacy shared + per-scan Kali)."""
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
async def initialize(self) -> Tuple[bool, str]: ...
|
||||||
|
|
||||||
|
@property
|
||||||
|
@abstractmethod
|
||||||
|
def is_available(self) -> bool: ...
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
async def stop(self): ...
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
async def health_check(self) -> Dict: ...
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
async def run_nuclei(self, target, templates=None, severity=None,
|
||||||
|
tags=None, rate_limit=150, timeout=600) -> "SandboxResult": ...
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
async def run_naabu(self, target, ports=None, top_ports=None,
|
||||||
|
scan_type="s", rate=1000, timeout=300) -> "SandboxResult": ...
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
async def run_httpx(self, targets, timeout=120) -> "SandboxResult": ...
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
async def run_subfinder(self, domain, timeout=120) -> "SandboxResult": ...
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
async def run_nmap(self, target, ports=None, scripts=True, timeout=300) -> "SandboxResult": ...
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
async def run_tool(self, tool, args, timeout=300) -> "SandboxResult": ...
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
async def execute_raw(self, command, timeout=300) -> "SandboxResult": ...
|
||||||
|
|
||||||
|
|
||||||
|
class SandboxManager(BaseSandbox):
|
||||||
|
"""
|
||||||
|
Legacy shared sandbox: persistent Docker container running security tools.
|
||||||
|
|
||||||
|
Tools are executed via `docker exec` for fast invocation.
|
||||||
|
Used by MCP server and terminal API (no scan_id context).
|
||||||
|
"""
|
||||||
|
|
||||||
|
SANDBOX_IMAGE = "neurosploit-sandbox:latest"
|
||||||
|
SANDBOX_CONTAINER = "neurosploit-sandbox"
|
||||||
|
DEFAULT_TIMEOUT = 300 # 5 minutes
|
||||||
|
MAX_OUTPUT = 2 * 1024 * 1024 # 2MB
|
||||||
|
|
||||||
|
# Known install commands for tools not pre-installed in the sandbox
|
||||||
|
KNOWN_INSTALLS = {
|
||||||
|
"wpscan": "gem install wpscan 2>&1",
|
||||||
|
"joomscan": "pip3 install joomscan 2>&1",
|
||||||
|
"dirsearch": "pip3 install dirsearch 2>&1",
|
||||||
|
"commix": "pip3 install commix 2>&1",
|
||||||
|
"wfuzz": "pip3 install wfuzz 2>&1",
|
||||||
|
"sslyze": "pip3 install sslyze 2>&1",
|
||||||
|
"retire": "npm install -g retire 2>&1",
|
||||||
|
"testssl": "apt-get update -qq && apt-get install -y -qq testssl.sh 2>&1",
|
||||||
|
"trufflehog": "pip3 install trufflehog 2>&1",
|
||||||
|
"gitleaks": "GO111MODULE=on go install github.com/gitleaks/gitleaks/v8@latest 2>&1",
|
||||||
|
}
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self._client: Optional[Any] = None
|
||||||
|
self._container: Optional[Any] = None
|
||||||
|
self._available = False
|
||||||
|
self._temp_installed: set = set() # Tools temporarily installed for cleanup
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# Lifecycle
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
|
||||||
|
async def initialize(self) -> Tuple[bool, str]:
|
||||||
|
"""Initialize Docker client and ensure sandbox is running."""
|
||||||
|
if not HAS_DOCKER:
|
||||||
|
return False, "Docker SDK not installed"
|
||||||
|
|
||||||
|
try:
|
||||||
|
self._client = docker.from_env()
|
||||||
|
self._client.ping()
|
||||||
|
except Exception as e:
|
||||||
|
return False, f"Docker not available: {e}"
|
||||||
|
|
||||||
|
# Check if sandbox container already running
|
||||||
|
try:
|
||||||
|
container = self._client.containers.get(self.SANDBOX_CONTAINER)
|
||||||
|
if container.status == "running":
|
||||||
|
self._container = container
|
||||||
|
self._available = True
|
||||||
|
return True, "Sandbox already running"
|
||||||
|
else:
|
||||||
|
container.remove(force=True)
|
||||||
|
except NotFound:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Check if image exists
|
||||||
|
try:
|
||||||
|
self._client.images.get(self.SANDBOX_IMAGE)
|
||||||
|
except NotFound:
|
||||||
|
return False, (
|
||||||
|
f"Sandbox image '{self.SANDBOX_IMAGE}' not found. "
|
||||||
|
"Build with: cd docker && docker compose -f docker-compose.sandbox.yml build"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Start sandbox container
|
||||||
|
try:
|
||||||
|
self._container = self._client.containers.run(
|
||||||
|
self.SANDBOX_IMAGE,
|
||||||
|
command="sleep infinity",
|
||||||
|
name=self.SANDBOX_CONTAINER,
|
||||||
|
detach=True,
|
||||||
|
restart_policy={"Name": "unless-stopped"},
|
||||||
|
network_mode="bridge",
|
||||||
|
mem_limit="2g",
|
||||||
|
cpu_period=100000,
|
||||||
|
cpu_quota=200000, # 2 CPUs
|
||||||
|
cap_add=["NET_RAW", "NET_ADMIN"],
|
||||||
|
security_opt=["no-new-privileges:true"],
|
||||||
|
)
|
||||||
|
self._available = True
|
||||||
|
return True, "Sandbox started"
|
||||||
|
except Exception as e:
|
||||||
|
return False, f"Failed to start sandbox: {e}"
|
||||||
|
|
||||||
|
@property
|
||||||
|
def is_available(self) -> bool:
|
||||||
|
"""Check if sandbox is ready for tool execution."""
|
||||||
|
return self._available and self._container is not None
|
||||||
|
|
||||||
|
async def stop(self):
|
||||||
|
"""Stop and remove the sandbox container."""
|
||||||
|
if self._container:
|
||||||
|
try:
|
||||||
|
self._container.stop(timeout=10)
|
||||||
|
self._container.remove(force=True)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
self._container = None
|
||||||
|
self._available = False
|
||||||
|
|
||||||
|
async def health_check(self) -> Dict:
|
||||||
|
"""Run health check on the sandbox container."""
|
||||||
|
if not self.is_available:
|
||||||
|
return {"status": "unavailable", "tools": []}
|
||||||
|
|
||||||
|
result = await self._exec("nuclei -version 2>&1 && naabu -version 2>&1 && nmap --version 2>&1 | head -1")
|
||||||
|
tools = []
|
||||||
|
if "nuclei" in result.stdout.lower():
|
||||||
|
tools.append("nuclei")
|
||||||
|
if "naabu" in result.stdout.lower():
|
||||||
|
tools.append("naabu")
|
||||||
|
if "nmap" in result.stdout.lower():
|
||||||
|
tools.append("nmap")
|
||||||
|
|
||||||
|
return {
|
||||||
|
"status": "healthy" if tools else "degraded",
|
||||||
|
"tools": tools,
|
||||||
|
"container": self.SANDBOX_CONTAINER,
|
||||||
|
"uptime": self._container.attrs.get("State", {}).get("StartedAt", "") if self._container else "",
|
||||||
|
}
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# Low-level execution
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
|
||||||
|
async def _exec(
|
||||||
|
self, command: str, timeout: int = DEFAULT_TIMEOUT
|
||||||
|
) -> SandboxResult:
|
||||||
|
"""Execute a command inside the sandbox container."""
|
||||||
|
if not self.is_available:
|
||||||
|
return SandboxResult(
|
||||||
|
tool="sandbox", command=command, exit_code=-1,
|
||||||
|
stdout="", stderr="", duration_seconds=0,
|
||||||
|
error="Sandbox not available",
|
||||||
|
)
|
||||||
|
|
||||||
|
started = datetime.utcnow()
|
||||||
|
|
||||||
|
try:
|
||||||
|
exec_result = await asyncio.get_event_loop().run_in_executor(
|
||||||
|
None,
|
||||||
|
lambda: self._container.exec_run(
|
||||||
|
cmd=["bash", "-c", command],
|
||||||
|
stdout=True,
|
||||||
|
stderr=True,
|
||||||
|
demux=True,
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
duration = (datetime.utcnow() - started).total_seconds()
|
||||||
|
|
||||||
|
stdout_raw, stderr_raw = exec_result.output
|
||||||
|
stdout = (stdout_raw or b"").decode("utf-8", errors="replace")
|
||||||
|
stderr = (stderr_raw or b"").decode("utf-8", errors="replace")
|
||||||
|
|
||||||
|
# Truncate oversized output
|
||||||
|
if len(stdout) > self.MAX_OUTPUT:
|
||||||
|
stdout = stdout[: self.MAX_OUTPUT] + "\n... [truncated]"
|
||||||
|
if len(stderr) > self.MAX_OUTPUT:
|
||||||
|
stderr = stderr[: self.MAX_OUTPUT] + "\n... [truncated]"
|
||||||
|
|
||||||
|
return SandboxResult(
|
||||||
|
tool="sandbox",
|
||||||
|
command=command,
|
||||||
|
exit_code=exec_result.exit_code,
|
||||||
|
stdout=stdout,
|
||||||
|
stderr=stderr,
|
||||||
|
duration_seconds=duration,
|
||||||
|
)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
duration = (datetime.utcnow() - started).total_seconds()
|
||||||
|
return SandboxResult(
|
||||||
|
tool="sandbox", command=command, exit_code=-1,
|
||||||
|
stdout="", stderr="", duration_seconds=duration,
|
||||||
|
error=str(e),
|
||||||
|
)
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# High-level tool APIs
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
|
||||||
|
async def run_nuclei(
|
||||||
|
self,
|
||||||
|
target: str,
|
||||||
|
templates: Optional[str] = None,
|
||||||
|
severity: Optional[str] = None,
|
||||||
|
tags: Optional[str] = None,
|
||||||
|
rate_limit: int = 150,
|
||||||
|
timeout: int = 600,
|
||||||
|
) -> SandboxResult:
|
||||||
|
"""
|
||||||
|
Run Nuclei vulnerability scanner against a target.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
target: URL or host to scan
|
||||||
|
templates: Specific template path/ID (e.g., "cves/2024/")
|
||||||
|
severity: Filter by severity (critical,high,medium,low,info)
|
||||||
|
tags: Filter by tags (e.g., "xss,sqli,lfi")
|
||||||
|
rate_limit: Requests per second (default 150)
|
||||||
|
timeout: Max execution time in seconds
|
||||||
|
"""
|
||||||
|
cmd_parts = [
|
||||||
|
"nuclei",
|
||||||
|
"-u", shlex.quote(target),
|
||||||
|
"-jsonl",
|
||||||
|
"-rate-limit", str(rate_limit),
|
||||||
|
"-silent",
|
||||||
|
"-no-color",
|
||||||
|
]
|
||||||
|
|
||||||
|
if templates:
|
||||||
|
cmd_parts.extend(["-t", shlex.quote(templates)])
|
||||||
|
if severity:
|
||||||
|
cmd_parts.extend(["-severity", shlex.quote(severity)])
|
||||||
|
if tags:
|
||||||
|
cmd_parts.extend(["-tags", shlex.quote(tags)])
|
||||||
|
|
||||||
|
command = " ".join(cmd_parts) + " 2>/dev/null"
|
||||||
|
result = await self._exec(command, timeout=timeout)
|
||||||
|
result.tool = "nuclei"
|
||||||
|
|
||||||
|
# Parse findings
|
||||||
|
if result.stdout:
|
||||||
|
result.findings = parse_nuclei_jsonl(result.stdout)
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
async def run_naabu(
|
||||||
|
self,
|
||||||
|
target: str,
|
||||||
|
ports: Optional[str] = None,
|
||||||
|
top_ports: Optional[int] = None,
|
||||||
|
scan_type: str = "s",
|
||||||
|
rate: int = 1000,
|
||||||
|
timeout: int = 300,
|
||||||
|
) -> SandboxResult:
|
||||||
|
"""
|
||||||
|
Run Naabu port scanner against a target.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
target: IP address or hostname to scan
|
||||||
|
ports: Specific ports (e.g., "80,443,8080" or "1-65535")
|
||||||
|
top_ports: Use top N ports (e.g., 100, 1000)
|
||||||
|
scan_type: SYN (s), CONNECT (c)
|
||||||
|
rate: Packets per second
|
||||||
|
timeout: Max execution time in seconds
|
||||||
|
"""
|
||||||
|
cmd_parts = [
|
||||||
|
"naabu",
|
||||||
|
"-host", shlex.quote(target),
|
||||||
|
"-json",
|
||||||
|
"-rate", str(rate),
|
||||||
|
"-silent",
|
||||||
|
"-no-color",
|
||||||
|
]
|
||||||
|
|
||||||
|
if ports:
|
||||||
|
cmd_parts.extend(["-p", shlex.quote(ports)])
|
||||||
|
elif top_ports:
|
||||||
|
cmd_parts.extend(["-top-ports", str(top_ports)])
|
||||||
|
else:
|
||||||
|
cmd_parts.extend(["-top-ports", "1000"])
|
||||||
|
|
||||||
|
if scan_type:
|
||||||
|
cmd_parts.extend(["-scan-type", scan_type])
|
||||||
|
|
||||||
|
command = " ".join(cmd_parts) + " 2>/dev/null"
|
||||||
|
result = await self._exec(command, timeout=timeout)
|
||||||
|
result.tool = "naabu"
|
||||||
|
|
||||||
|
# Parse port findings
|
||||||
|
if result.stdout:
|
||||||
|
result.findings = parse_naabu_output(result.stdout)
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
async def run_httpx(
|
||||||
|
self,
|
||||||
|
targets: List[str],
|
||||||
|
timeout: int = 120,
|
||||||
|
) -> SandboxResult:
|
||||||
|
"""
|
||||||
|
Run HTTPX for HTTP probing and tech detection.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
targets: List of URLs/hosts to probe
|
||||||
|
timeout: Max execution time
|
||||||
|
"""
|
||||||
|
target_str = "\\n".join(shlex.quote(t) for t in targets)
|
||||||
|
command = (
|
||||||
|
f'echo -e "{target_str}" | httpx -silent -json '
|
||||||
|
f'-title -tech-detect -status-code -content-length '
|
||||||
|
f'-follow-redirects -no-color 2>/dev/null'
|
||||||
|
)
|
||||||
|
|
||||||
|
result = await self._exec(command, timeout=timeout)
|
||||||
|
result.tool = "httpx"
|
||||||
|
|
||||||
|
# Parse JSON lines
|
||||||
|
if result.stdout:
|
||||||
|
findings = []
|
||||||
|
for line in result.stdout.strip().split("\n"):
|
||||||
|
try:
|
||||||
|
data = json.loads(line)
|
||||||
|
findings.append({
|
||||||
|
"url": data.get("url", ""),
|
||||||
|
"status_code": data.get("status_code", 0),
|
||||||
|
"title": data.get("title", ""),
|
||||||
|
"technologies": data.get("tech", []),
|
||||||
|
"content_length": data.get("content_length", 0),
|
||||||
|
"webserver": data.get("webserver", ""),
|
||||||
|
})
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
continue
|
||||||
|
result.findings = findings
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
async def run_subfinder(
|
||||||
|
self,
|
||||||
|
domain: str,
|
||||||
|
timeout: int = 120,
|
||||||
|
) -> SandboxResult:
|
||||||
|
"""
|
||||||
|
Run Subfinder for subdomain enumeration.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
domain: Base domain to enumerate
|
||||||
|
timeout: Max execution time
|
||||||
|
"""
|
||||||
|
command = f"subfinder -d {shlex.quote(domain)} -silent -no-color 2>/dev/null"
|
||||||
|
result = await self._exec(command, timeout=timeout)
|
||||||
|
result.tool = "subfinder"
|
||||||
|
|
||||||
|
if result.stdout:
|
||||||
|
subdomains = [s.strip() for s in result.stdout.strip().split("\n") if s.strip()]
|
||||||
|
result.findings = [{"subdomain": s} for s in subdomains]
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
async def run_nmap(
|
||||||
|
self,
|
||||||
|
target: str,
|
||||||
|
ports: Optional[str] = None,
|
||||||
|
scripts: bool = True,
|
||||||
|
timeout: int = 300,
|
||||||
|
) -> SandboxResult:
|
||||||
|
"""
|
||||||
|
Run Nmap network scanner.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
target: IP/hostname to scan
|
||||||
|
ports: Port specification
|
||||||
|
scripts: Enable default scripts (-sC)
|
||||||
|
timeout: Max execution time
|
||||||
|
"""
|
||||||
|
cmd_parts = ["nmap", "-sV"]
|
||||||
|
if scripts:
|
||||||
|
cmd_parts.append("-sC")
|
||||||
|
if ports:
|
||||||
|
cmd_parts.extend(["-p", shlex.quote(ports)])
|
||||||
|
cmd_parts.extend(["-oN", "/dev/stdout", shlex.quote(target)])
|
||||||
|
|
||||||
|
command = " ".join(cmd_parts) + " 2>/dev/null"
|
||||||
|
result = await self._exec(command, timeout=timeout)
|
||||||
|
result.tool = "nmap"
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
async def execute_raw(
|
||||||
|
self,
|
||||||
|
command: str,
|
||||||
|
timeout: int = DEFAULT_TIMEOUT,
|
||||||
|
) -> SandboxResult:
|
||||||
|
"""
|
||||||
|
Execute an arbitrary shell command inside the sandbox container.
|
||||||
|
|
||||||
|
Used by the Terminal Agent for interactive infrastructure testing.
|
||||||
|
Returns raw stdout/stderr/exit_code.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
command: Shell command to execute (passed to sh -c)
|
||||||
|
timeout: Max execution time in seconds
|
||||||
|
"""
|
||||||
|
result = await self._exec(f"sh -c {shlex.quote(command)}", timeout=timeout)
|
||||||
|
result.tool = "raw"
|
||||||
|
return result
|
||||||
|
|
||||||
|
async def run_tool(
|
||||||
|
self,
|
||||||
|
tool: str,
|
||||||
|
args: str,
|
||||||
|
timeout: int = DEFAULT_TIMEOUT,
|
||||||
|
) -> SandboxResult:
|
||||||
|
"""
|
||||||
|
Run any tool available in the sandbox.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
tool: Tool name (nuclei, naabu, nmap, httpx, etc.)
|
||||||
|
args: Command-line arguments as string
|
||||||
|
timeout: Max execution time
|
||||||
|
"""
|
||||||
|
# Validate tool is available
|
||||||
|
allowed_tools = {
|
||||||
|
"nuclei", "naabu", "nmap", "httpx", "subfinder", "katana",
|
||||||
|
"dnsx", "ffuf", "gobuster", "dalfox", "nikto", "sqlmap",
|
||||||
|
"whatweb", "curl", "dig", "whois", "masscan", "dirsearch",
|
||||||
|
"wfuzz", "arjun", "wafw00f", "waybackurls",
|
||||||
|
}
|
||||||
|
|
||||||
|
if tool not in allowed_tools:
|
||||||
|
return SandboxResult(
|
||||||
|
tool=tool, command=f"{tool} {args}", exit_code=-1,
|
||||||
|
stdout="", stderr="", duration_seconds=0,
|
||||||
|
error=f"Tool '{tool}' not in allowed list: {sorted(allowed_tools)}",
|
||||||
|
)
|
||||||
|
|
||||||
|
command = f"{tool} {args} 2>&1"
|
||||||
|
result = await self._exec(command, timeout=timeout)
|
||||||
|
result.tool = tool
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# Dynamic tool install / run / cleanup
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
|
||||||
|
async def install_tool(
|
||||||
|
self, tool: str, install_cmd: str = ""
|
||||||
|
) -> Tuple[bool, str]:
|
||||||
|
"""
|
||||||
|
Temporarily install a tool in the sandbox container.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
tool: Tool name (must be in KNOWN_INSTALLS or provide install_cmd)
|
||||||
|
install_cmd: Custom install command (overrides KNOWN_INSTALLS)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
(success, message) tuple
|
||||||
|
"""
|
||||||
|
if not self.is_available:
|
||||||
|
return False, "Sandbox not available"
|
||||||
|
|
||||||
|
cmd = install_cmd or self.KNOWN_INSTALLS.get(tool, "")
|
||||||
|
if not cmd:
|
||||||
|
return False, f"No install command for '{tool}'"
|
||||||
|
|
||||||
|
logger.info(f"Installing tool '{tool}' in sandbox...")
|
||||||
|
result = await self._exec(cmd, timeout=120)
|
||||||
|
success = result.exit_code == 0
|
||||||
|
|
||||||
|
if success:
|
||||||
|
self._temp_installed.add(tool)
|
||||||
|
logger.info(f"Tool '{tool}' installed successfully")
|
||||||
|
else:
|
||||||
|
logger.warning(f"Tool '{tool}' install failed: {result.stderr[:200]}")
|
||||||
|
|
||||||
|
msg = result.stdout[:500] if success else result.stderr[:500]
|
||||||
|
return success, msg
|
||||||
|
|
||||||
|
async def run_and_cleanup(
|
||||||
|
self,
|
||||||
|
tool: str,
|
||||||
|
args: str,
|
||||||
|
cleanup: bool = True,
|
||||||
|
timeout: int = 180,
|
||||||
|
) -> SandboxResult:
|
||||||
|
"""
|
||||||
|
Install tool if needed, run it, collect output, then cleanup.
|
||||||
|
|
||||||
|
This is the primary method for dynamic tool execution:
|
||||||
|
1. Check if tool exists in sandbox
|
||||||
|
2. Install if missing (from KNOWN_INSTALLS)
|
||||||
|
3. Run the tool with given arguments
|
||||||
|
4. Cleanup the installation if it was temporary
|
||||||
|
|
||||||
|
Args:
|
||||||
|
tool: Tool name
|
||||||
|
args: Command-line arguments
|
||||||
|
cleanup: Whether to remove temporarily installed tools
|
||||||
|
timeout: Max execution time in seconds
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
SandboxResult with stdout, stderr, findings
|
||||||
|
"""
|
||||||
|
if not self.is_available:
|
||||||
|
return SandboxResult(
|
||||||
|
tool=tool, command=f"{tool} {args}", exit_code=-1,
|
||||||
|
stdout="", stderr="", duration_seconds=0,
|
||||||
|
error="Sandbox not available",
|
||||||
|
)
|
||||||
|
|
||||||
|
# Check if tool exists
|
||||||
|
check = await self._exec(f"which {tool} 2>/dev/null")
|
||||||
|
if check.exit_code != 0:
|
||||||
|
# Try to install
|
||||||
|
ok, msg = await self.install_tool(tool)
|
||||||
|
if not ok:
|
||||||
|
return SandboxResult(
|
||||||
|
tool=tool, command=f"{tool} {args}", exit_code=-1,
|
||||||
|
stdout="", stderr=msg, duration_seconds=0,
|
||||||
|
error=f"Install failed: {msg}",
|
||||||
|
)
|
||||||
|
|
||||||
|
# Run tool
|
||||||
|
result = await self.run_tool(tool, args, timeout=timeout)
|
||||||
|
|
||||||
|
# Cleanup if temporarily installed
|
||||||
|
if cleanup and tool in self._temp_installed:
|
||||||
|
logger.info(f"Cleaning up temporarily installed tool: {tool}")
|
||||||
|
await self._exec(
|
||||||
|
f"pip3 uninstall -y {shlex.quote(tool)} 2>/dev/null; "
|
||||||
|
f"gem uninstall -x {shlex.quote(tool)} 2>/dev/null; "
|
||||||
|
f"npm uninstall -g {shlex.quote(tool)} 2>/dev/null; "
|
||||||
|
f"rm -f $(which {shlex.quote(tool)}) 2>/dev/null",
|
||||||
|
timeout=30,
|
||||||
|
)
|
||||||
|
self._temp_installed.discard(tool)
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
async def cleanup_temp_tools(self):
|
||||||
|
"""Remove all temporarily installed tools."""
|
||||||
|
if not self._temp_installed:
|
||||||
|
return
|
||||||
|
for tool in list(self._temp_installed):
|
||||||
|
logger.info(f"Cleaning up temp tool: {tool}")
|
||||||
|
await self._exec(
|
||||||
|
f"pip3 uninstall -y {shlex.quote(tool)} 2>/dev/null; "
|
||||||
|
f"gem uninstall -x {shlex.quote(tool)} 2>/dev/null; "
|
||||||
|
f"npm uninstall -g {shlex.quote(tool)} 2>/dev/null; "
|
||||||
|
f"rm -f $(which {shlex.quote(tool)}) 2>/dev/null",
|
||||||
|
timeout=30,
|
||||||
|
)
|
||||||
|
self._temp_installed.discard(tool)
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Global singleton
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
_manager: Optional[SandboxManager] = None
|
||||||
|
|
||||||
|
# Alias for backward compatibility
|
||||||
|
LegacySandboxManager = SandboxManager
|
||||||
|
|
||||||
|
|
||||||
|
async def get_sandbox(scan_id: Optional[str] = None) -> BaseSandbox:
|
||||||
|
"""Get a sandbox instance.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
scan_id: If provided, returns a per-scan KaliSandbox from the container pool.
|
||||||
|
If None, returns the legacy shared SandboxManager.
|
||||||
|
|
||||||
|
Backward compatible: all existing callers use get_sandbox() with no args.
|
||||||
|
Agent passes scan_id for per-scan container isolation.
|
||||||
|
"""
|
||||||
|
if scan_id is not None:
|
||||||
|
try:
|
||||||
|
from core.container_pool import get_pool
|
||||||
|
pool = get_pool()
|
||||||
|
return await pool.get_or_create(scan_id)
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Per-scan sandbox failed ({e}), falling back to shared")
|
||||||
|
# Fall through to legacy
|
||||||
|
|
||||||
|
# Legacy path: shared persistent container
|
||||||
|
global _manager
|
||||||
|
if _manager is None:
|
||||||
|
_manager = SandboxManager()
|
||||||
|
ok, msg = await _manager.initialize()
|
||||||
|
if not ok:
|
||||||
|
logger.warning(f"Sandbox initialization: {msg}")
|
||||||
|
return _manager
|
||||||
219
core/scheduler.py
Normal file
219
core/scheduler.py
Normal file
@@ -0,0 +1,219 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
Scan Scheduler - Recurring task orchestration for NeuroSploit.
|
||||||
|
|
||||||
|
Supports cron expressions and interval-based scheduling for:
|
||||||
|
- Reconnaissance scans
|
||||||
|
- Vulnerability validation
|
||||||
|
- Re-analysis of previous findings
|
||||||
|
|
||||||
|
Uses APScheduler with SQLite persistence so jobs survive restarts.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
from datetime import datetime
|
||||||
|
from typing import Dict, List, Optional
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
try:
|
||||||
|
from apscheduler.schedulers.asyncio import AsyncIOScheduler
|
||||||
|
from apscheduler.triggers.cron import CronTrigger
|
||||||
|
from apscheduler.triggers.interval import IntervalTrigger
|
||||||
|
from apscheduler.jobstores.sqlalchemy import SQLAlchemyJobStore
|
||||||
|
HAS_APSCHEDULER = True
|
||||||
|
except ImportError:
|
||||||
|
HAS_APSCHEDULER = False
|
||||||
|
logger.warning("APScheduler not installed. Scheduler disabled. Install with: pip install apscheduler>=3.10.0")
|
||||||
|
|
||||||
|
|
||||||
|
class ScanScheduler:
|
||||||
|
"""Manages recurring scan jobs via APScheduler."""
|
||||||
|
|
||||||
|
def __init__(self, config: Dict, database_url: str = "sqlite:///./data/neurosploit_scheduler.db"):
|
||||||
|
self.config = config
|
||||||
|
self.scheduler_config = config.get('scheduler', {})
|
||||||
|
self.enabled = self.scheduler_config.get('enabled', False)
|
||||||
|
self.jobs_meta: Dict[str, Dict] = {} # job_id -> metadata
|
||||||
|
self._scan_callback = None
|
||||||
|
|
||||||
|
if not HAS_APSCHEDULER:
|
||||||
|
self.enabled = False
|
||||||
|
self.scheduler = None
|
||||||
|
return
|
||||||
|
|
||||||
|
jobstores = {
|
||||||
|
'default': SQLAlchemyJobStore(url=database_url)
|
||||||
|
}
|
||||||
|
self.scheduler = AsyncIOScheduler(jobstores=jobstores)
|
||||||
|
|
||||||
|
# Load pre-configured jobs from config
|
||||||
|
for job_config in self.scheduler_config.get('jobs', []):
|
||||||
|
try:
|
||||||
|
self.add_job(
|
||||||
|
job_id=job_config['id'],
|
||||||
|
target=job_config['target'],
|
||||||
|
scan_type=job_config.get('scan_type', 'quick'),
|
||||||
|
cron_expression=job_config.get('cron'),
|
||||||
|
interval_minutes=job_config.get('interval_minutes'),
|
||||||
|
agent_role=job_config.get('agent_role'),
|
||||||
|
llm_profile=job_config.get('llm_profile')
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Failed to load scheduled job '{job_config.get('id', '?')}': {e}")
|
||||||
|
|
||||||
|
def set_scan_callback(self, callback):
|
||||||
|
"""Set the callback function that executes scans.
|
||||||
|
|
||||||
|
The callback signature should be:
|
||||||
|
async def callback(target: str, scan_type: str,
|
||||||
|
agent_role: Optional[str], llm_profile: Optional[str]) -> Dict
|
||||||
|
"""
|
||||||
|
self._scan_callback = callback
|
||||||
|
|
||||||
|
def add_job(self, job_id: str, target: str, scan_type: str = "quick",
|
||||||
|
cron_expression: Optional[str] = None,
|
||||||
|
interval_minutes: Optional[int] = None,
|
||||||
|
agent_role: Optional[str] = None,
|
||||||
|
llm_profile: Optional[str] = None) -> Dict:
|
||||||
|
"""Schedule a recurring scan job.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
job_id: Unique identifier for the job
|
||||||
|
target: Target URL or IP
|
||||||
|
scan_type: 'quick', 'full', 'recon', or 'analysis'
|
||||||
|
cron_expression: Cron schedule (e.g., '0 */6 * * *' for every 6 hours)
|
||||||
|
interval_minutes: Alternative to cron - run every N minutes
|
||||||
|
agent_role: Optional agent role for AI analysis
|
||||||
|
llm_profile: Optional LLM profile override
|
||||||
|
"""
|
||||||
|
if not self.scheduler:
|
||||||
|
return {"error": "Scheduler not available (APScheduler not installed)"}
|
||||||
|
|
||||||
|
if cron_expression:
|
||||||
|
trigger = CronTrigger.from_crontab(cron_expression)
|
||||||
|
schedule_desc = f"cron: {cron_expression}"
|
||||||
|
elif interval_minutes:
|
||||||
|
trigger = IntervalTrigger(minutes=interval_minutes)
|
||||||
|
schedule_desc = f"every {interval_minutes} minutes"
|
||||||
|
else:
|
||||||
|
return {"error": "Provide either cron_expression or interval_minutes"}
|
||||||
|
|
||||||
|
self.scheduler.add_job(
|
||||||
|
self._execute_scheduled_scan,
|
||||||
|
trigger=trigger,
|
||||||
|
id=job_id,
|
||||||
|
args=[target, scan_type, agent_role, llm_profile],
|
||||||
|
replace_existing=True,
|
||||||
|
name=f"scan_{target}_{scan_type}"
|
||||||
|
)
|
||||||
|
|
||||||
|
meta = {
|
||||||
|
"id": job_id,
|
||||||
|
"target": target,
|
||||||
|
"scan_type": scan_type,
|
||||||
|
"schedule": schedule_desc,
|
||||||
|
"agent_role": agent_role,
|
||||||
|
"llm_profile": llm_profile,
|
||||||
|
"created_at": datetime.now().isoformat(),
|
||||||
|
"last_run": None,
|
||||||
|
"run_count": 0,
|
||||||
|
"status": "active"
|
||||||
|
}
|
||||||
|
self.jobs_meta[job_id] = meta
|
||||||
|
|
||||||
|
logger.info(f"Scheduled job '{job_id}': {target} ({scan_type}) - {schedule_desc}")
|
||||||
|
return meta
|
||||||
|
|
||||||
|
def remove_job(self, job_id: str) -> bool:
|
||||||
|
"""Remove a scheduled job."""
|
||||||
|
if not self.scheduler:
|
||||||
|
return False
|
||||||
|
try:
|
||||||
|
self.scheduler.remove_job(job_id)
|
||||||
|
self.jobs_meta.pop(job_id, None)
|
||||||
|
logger.info(f"Removed scheduled job: {job_id}")
|
||||||
|
return True
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Failed to remove job '{job_id}': {e}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
def pause_job(self, job_id: str) -> bool:
|
||||||
|
"""Pause a scheduled job."""
|
||||||
|
if not self.scheduler:
|
||||||
|
return False
|
||||||
|
try:
|
||||||
|
self.scheduler.pause_job(job_id)
|
||||||
|
if job_id in self.jobs_meta:
|
||||||
|
self.jobs_meta[job_id]["status"] = "paused"
|
||||||
|
return True
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Failed to pause job '{job_id}': {e}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
def resume_job(self, job_id: str) -> bool:
|
||||||
|
"""Resume a paused job."""
|
||||||
|
if not self.scheduler:
|
||||||
|
return False
|
||||||
|
try:
|
||||||
|
self.scheduler.resume_job(job_id)
|
||||||
|
if job_id in self.jobs_meta:
|
||||||
|
self.jobs_meta[job_id]["status"] = "active"
|
||||||
|
return True
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Failed to resume job '{job_id}': {e}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
def list_jobs(self) -> List[Dict]:
|
||||||
|
"""List all scheduled jobs with metadata."""
|
||||||
|
jobs = []
|
||||||
|
if self.scheduler:
|
||||||
|
for job in self.scheduler.get_jobs():
|
||||||
|
meta = self.jobs_meta.get(job.id, {})
|
||||||
|
jobs.append({
|
||||||
|
"id": job.id,
|
||||||
|
"name": job.name,
|
||||||
|
"next_run": str(job.next_run_time) if job.next_run_time else None,
|
||||||
|
"target": meta.get("target", "unknown"),
|
||||||
|
"scan_type": meta.get("scan_type", "unknown"),
|
||||||
|
"schedule": meta.get("schedule", "unknown"),
|
||||||
|
"status": meta.get("status", "active"),
|
||||||
|
"last_run": meta.get("last_run"),
|
||||||
|
"run_count": meta.get("run_count", 0)
|
||||||
|
})
|
||||||
|
return jobs
|
||||||
|
|
||||||
|
async def _execute_scheduled_scan(self, target: str, scan_type: str,
|
||||||
|
agent_role: Optional[str],
|
||||||
|
llm_profile: Optional[str]):
|
||||||
|
"""Execute a scheduled scan. Called by APScheduler."""
|
||||||
|
job_id = f"scan_{target}_{scan_type}"
|
||||||
|
logger.info(f"Executing scheduled scan: {target} ({scan_type})")
|
||||||
|
|
||||||
|
if job_id in self.jobs_meta:
|
||||||
|
self.jobs_meta[job_id]["last_run"] = datetime.now().isoformat()
|
||||||
|
self.jobs_meta[job_id]["run_count"] += 1
|
||||||
|
|
||||||
|
if self._scan_callback:
|
||||||
|
try:
|
||||||
|
result = await self._scan_callback(target, scan_type, agent_role, llm_profile)
|
||||||
|
logger.info(f"Scheduled scan completed: {target} ({scan_type})")
|
||||||
|
return result
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Scheduled scan failed for {target}: {e}")
|
||||||
|
else:
|
||||||
|
logger.warning("No scan callback registered. Scheduled scan skipped.")
|
||||||
|
|
||||||
|
def start(self):
|
||||||
|
"""Start the scheduler."""
|
||||||
|
if self.scheduler and self.enabled:
|
||||||
|
self.scheduler.start()
|
||||||
|
logger.info(f"Scheduler started with {len(self.list_jobs())} jobs")
|
||||||
|
|
||||||
|
def stop(self):
|
||||||
|
"""Stop the scheduler gracefully."""
|
||||||
|
if self.scheduler and self.scheduler.running:
|
||||||
|
self.scheduler.shutdown(wait=False)
|
||||||
|
logger.info("Scheduler stopped")
|
||||||
110
core/tool_registry.py
Normal file
110
core/tool_registry.py
Normal file
@@ -0,0 +1,110 @@
|
|||||||
|
"""
|
||||||
|
NeuroSploit v3 - Tool Installation Registry for Kali Containers
|
||||||
|
|
||||||
|
Maps tool names to installation commands that work inside kalilinux/kali-rolling.
|
||||||
|
Tools grouped by method: pre-installed (base image), apt (Kali repos), go install, pip.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from typing import Optional, Dict
|
||||||
|
|
||||||
|
|
||||||
|
class ToolRegistry:
|
||||||
|
"""Registry of tool installation recipes for Kali sandbox containers."""
|
||||||
|
|
||||||
|
# Tools pre-installed in Dockerfile.kali (no install needed)
|
||||||
|
PRE_INSTALLED = {
|
||||||
|
# Go tools (pre-compiled in builder stage)
|
||||||
|
"nuclei", "naabu", "httpx", "subfinder", "katana", "dnsx",
|
||||||
|
"uncover", "ffuf", "gobuster", "dalfox", "waybackurls",
|
||||||
|
# APT tools (pre-installed in runtime stage)
|
||||||
|
"nmap", "nikto", "sqlmap", "masscan", "whatweb",
|
||||||
|
# System tools
|
||||||
|
"curl", "wget", "git", "python3", "pip3", "go",
|
||||||
|
"jq", "dig", "whois", "openssl", "netcat", "bash",
|
||||||
|
}
|
||||||
|
|
||||||
|
# APT packages available in Kali repos (on-demand, not pre-installed)
|
||||||
|
APT_TOOLS: Dict[str, str] = {
|
||||||
|
"wpscan": "apt-get update -qq && DEBIAN_FRONTEND=noninteractive apt-get install -y -qq wpscan",
|
||||||
|
"dirb": "apt-get update -qq && DEBIAN_FRONTEND=noninteractive apt-get install -y -qq dirb",
|
||||||
|
"hydra": "apt-get update -qq && DEBIAN_FRONTEND=noninteractive apt-get install -y -qq hydra",
|
||||||
|
"john": "apt-get update -qq && DEBIAN_FRONTEND=noninteractive apt-get install -y -qq john",
|
||||||
|
"hashcat": "apt-get update -qq && DEBIAN_FRONTEND=noninteractive apt-get install -y -qq hashcat",
|
||||||
|
"testssl": "apt-get update -qq && DEBIAN_FRONTEND=noninteractive apt-get install -y -qq testssl.sh",
|
||||||
|
"testssl.sh": "apt-get update -qq && DEBIAN_FRONTEND=noninteractive apt-get install -y -qq testssl.sh",
|
||||||
|
"sslscan": "apt-get update -qq && DEBIAN_FRONTEND=noninteractive apt-get install -y -qq sslscan",
|
||||||
|
"enum4linux": "apt-get update -qq && DEBIAN_FRONTEND=noninteractive apt-get install -y -qq enum4linux",
|
||||||
|
"nbtscan": "apt-get update -qq && DEBIAN_FRONTEND=noninteractive apt-get install -y -qq nbtscan",
|
||||||
|
"dnsrecon": "apt-get update -qq && DEBIAN_FRONTEND=noninteractive apt-get install -y -qq dnsrecon",
|
||||||
|
"fierce": "apt-get update -qq && DEBIAN_FRONTEND=noninteractive apt-get install -y -qq fierce",
|
||||||
|
"amass": "apt-get update -qq && DEBIAN_FRONTEND=noninteractive apt-get install -y -qq amass",
|
||||||
|
"responder": "apt-get update -qq && DEBIAN_FRONTEND=noninteractive apt-get install -y -qq responder",
|
||||||
|
"medusa": "apt-get update -qq && DEBIAN_FRONTEND=noninteractive apt-get install -y -qq medusa",
|
||||||
|
"crackmapexec":"apt-get update -qq && DEBIAN_FRONTEND=noninteractive apt-get install -y -qq crackmapexec",
|
||||||
|
}
|
||||||
|
|
||||||
|
# Go tools installed via `go install` (on-demand, not pre-compiled)
|
||||||
|
GO_TOOLS: Dict[str, str] = {
|
||||||
|
"gau": "github.com/lc/gau/v2/cmd/gau@latest",
|
||||||
|
"gitleaks": "github.com/gitleaks/gitleaks/v8@latest",
|
||||||
|
"anew": "github.com/tomnomnom/anew@latest",
|
||||||
|
"httprobe": "github.com/tomnomnom/httprobe@latest",
|
||||||
|
}
|
||||||
|
|
||||||
|
# Python tools via pip
|
||||||
|
PIP_TOOLS: Dict[str, str] = {
|
||||||
|
"dirsearch": "pip3 install --no-cache-dir --break-system-packages dirsearch",
|
||||||
|
"wfuzz": "pip3 install --no-cache-dir --break-system-packages wfuzz",
|
||||||
|
"arjun": "pip3 install --no-cache-dir --break-system-packages arjun",
|
||||||
|
"wafw00f": "pip3 install --no-cache-dir --break-system-packages wafw00f",
|
||||||
|
"sslyze": "pip3 install --no-cache-dir --break-system-packages sslyze",
|
||||||
|
"commix": "pip3 install --no-cache-dir --break-system-packages commix",
|
||||||
|
"trufflehog":"pip3 install --no-cache-dir --break-system-packages trufflehog",
|
||||||
|
"retire": "pip3 install --no-cache-dir --break-system-packages retirejs",
|
||||||
|
}
|
||||||
|
|
||||||
|
def get_install_command(self, tool: str) -> Optional[str]:
|
||||||
|
"""Get the install command for a tool inside a Kali container.
|
||||||
|
|
||||||
|
Returns None if the tool is pre-installed or unknown.
|
||||||
|
"""
|
||||||
|
if tool in self.PRE_INSTALLED:
|
||||||
|
return None # Already available
|
||||||
|
|
||||||
|
if tool in self.APT_TOOLS:
|
||||||
|
return self.APT_TOOLS[tool]
|
||||||
|
|
||||||
|
if tool in self.GO_TOOLS:
|
||||||
|
go_pkg = self.GO_TOOLS[tool]
|
||||||
|
return (
|
||||||
|
f"export GOPATH=/root/go && export PATH=$PATH:/root/go/bin && "
|
||||||
|
f"go install -v {go_pkg} && "
|
||||||
|
f"cp /root/go/bin/{tool} /usr/local/bin/ 2>/dev/null || true"
|
||||||
|
)
|
||||||
|
|
||||||
|
if tool in self.PIP_TOOLS:
|
||||||
|
return self.PIP_TOOLS[tool]
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
def is_known(self, tool: str) -> bool:
|
||||||
|
"""Check if we have a recipe for this tool."""
|
||||||
|
return (
|
||||||
|
tool in self.PRE_INSTALLED
|
||||||
|
or tool in self.APT_TOOLS
|
||||||
|
or tool in self.GO_TOOLS
|
||||||
|
or tool in self.PIP_TOOLS
|
||||||
|
)
|
||||||
|
|
||||||
|
def all_tools(self) -> Dict[str, str]:
|
||||||
|
"""Return all known tools and their install method."""
|
||||||
|
result = {}
|
||||||
|
for t in self.PRE_INSTALLED:
|
||||||
|
result[t] = "pre-installed"
|
||||||
|
for t in self.APT_TOOLS:
|
||||||
|
result[t] = "apt"
|
||||||
|
for t in self.GO_TOOLS:
|
||||||
|
result[t] = "go"
|
||||||
|
for t in self.PIP_TOOLS:
|
||||||
|
result[t] = "pip"
|
||||||
|
return result
|
||||||
1365
data/reports/report_20260209_001839/report_20260209_001839.html
Normal file
1365
data/reports/report_20260209_001839/report_20260209_001839.html
Normal file
File diff suppressed because it is too large
Load Diff
1365
data/reports/report_20260209_001855/report_20260209_001855.html
Normal file
1365
data/reports/report_20260209_001855/report_20260209_001855.html
Normal file
File diff suppressed because it is too large
Load Diff
1360
data/vuln_knowledge_base.json
Normal file
1360
data/vuln_knowledge_base.json
Normal file
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user