NeuroSploit v3.2 - Autonomous AI Penetration Testing Platform

116 modules | 100 vuln types | 18 API routes | 18 frontend pages

Major features:
- VulnEngine: 100 vuln types, 526+ payloads, 12 testers, anti-hallucination prompts
- Autonomous Agent: 3-stream auto pentest, multi-session (5 concurrent), pause/resume/stop
- CLI Agent: Claude Code / Gemini CLI / Codex CLI inside Kali containers
- Validation Pipeline: negative controls, proof of execution, confidence scoring, judge
- AI Reasoning: ReACT engine, token budget, endpoint classifier, CVE hunter, deep recon
- Multi-Agent: 5 specialists + orchestrator + researcher AI + vuln type agents
- RAG System: BM25/TF-IDF/ChromaDB vectorstore, few-shot, reasoning templates
- Smart Router: 20 providers (8 CLI OAuth + 12 API), tier failover, token refresh
- Kali Sandbox: container-per-scan, 56 tools, VPN support, on-demand install
- Full IA Testing: methodology-driven comprehensive pentest sessions
- Notifications: Discord, Telegram, WhatsApp/Twilio multi-channel alerts
- Frontend: React/TypeScript with 18 pages, real-time WebSocket updates
This commit is contained in:
CyberSecurityUP
2026-02-22 17:58:12 -03:00
commit e0935793c5
271 changed files with 132462 additions and 0 deletions

View File

@@ -0,0 +1,721 @@
"""
CLI Agent Runner - Executes AI CLI tools (Claude Code, Gemini CLI, Codex CLI) inside
Kali Linux Docker containers for autonomous penetration testing.
Architecture:
1. Detects OAuth token from SmartRouter
2. Creates per-scan Kali container via ContainerPool
3. Installs Node.js + selected CLI tool
4. Uploads methodology file + instructions
5. Runs CLI in non-interactive mode (background process)
6. Polls output file, extracts findings in real-time
7. Findings flow through existing validation pipeline
Follows ResearcherAgent pattern (lifecycle, callbacks, sandbox integration).
"""
import os
import time
import asyncio
import logging
import hashlib
from dataclasses import dataclass, field
from typing import Dict, List, Optional, Tuple, Callable, Any
logger = logging.getLogger(__name__)
# ═══════════════════════════════════════════════════════════════════════════════
# CLI Provider Definitions
# ═══════════════════════════════════════════════════════════════════════════════
@dataclass
class CLIProvider:
"""Definition of a CLI tool that can run inside the Kali container."""
id: str
name: str
npm_package: str
command: str
auth_env: str # Env var for the OAuth/API token
non_interactive_flags: str # Flags for non-interactive mode
model_flag: str # Flag to specify model
needs_nodejs: bool = True # Most CLI tools are npm-based
install_cmd: Optional[str] = None # Override for non-npm install
prompt_method: str = "stdin" # "stdin", "flag", "file"
extra_setup: Optional[str] = None # Extra setup commands after install
CLI_PROVIDERS: Dict[str, CLIProvider] = {
"claude_code": CLIProvider(
id="claude_code",
name="Claude Code",
npm_package="@anthropic-ai/claude-code",
command="claude",
auth_env="ANTHROPIC_API_KEY",
non_interactive_flags="--print --dangerously-skip-permissions --verbose",
model_flag="--model",
prompt_method="stdin",
),
"gemini_cli": CLIProvider(
id="gemini_cli",
name="Gemini CLI",
npm_package="@anthropic-ai/claude-code", # Gemini CLI uses same approach
command="gemini",
auth_env="GEMINI_API_KEY",
non_interactive_flags="--sandbox",
model_flag="--model",
prompt_method="stdin",
install_cmd="npm install -g @anthropic-ai/claude-code", # fallback to claude if gemini CLI not available
),
"codex_cli": CLIProvider(
id="codex_cli",
name="OpenAI Codex CLI",
npm_package="@openai/codex",
command="codex",
auth_env="OPENAI_API_KEY",
non_interactive_flags="--full-auto --quiet",
model_flag="--model",
prompt_method="stdin",
),
}
# ═══════════════════════════════════════════════════════════════════════════════
# Result Data Classes
# ═══════════════════════════════════════════════════════════════════════════════
@dataclass
class CLIAgentResult:
"""Result of a CLI agent run."""
findings: List[Dict] = field(default_factory=list)
raw_output: str = ""
duration: float = 0.0
exit_code: int = -1
tools_used: List[str] = field(default_factory=list)
phases_completed: List[str] = field(default_factory=list)
total_output_lines: int = 0
cli_provider: str = ""
error: Optional[str] = None
# ═══════════════════════════════════════════════════════════════════════════════
# CLI Agent Runner
# ═══════════════════════════════════════════════════════════════════════════════
class CLIAgentRunner:
"""
Runs an AI CLI tool inside a Kali Linux container for penetration testing.
Lifecycle:
runner = CLIAgentRunner(...)
ok, msg = await runner.initialize() # Container + CLI install
result = await runner.run() # Execute + poll findings
await runner.shutdown() # Cleanup
"""
WORK_DIR = "/opt/pentest"
OUTPUT_LOG = "/opt/pentest/output.log"
FINDINGS_LOG = "/opt/pentest/findings.jsonl"
def __init__(
self,
scan_id: str,
target: str,
cli_provider_id: str = "claude_code",
methodology_path: Optional[str] = None,
preferred_model: Optional[str] = None,
log_callback: Optional[Callable] = None,
progress_callback: Optional[Callable] = None,
finding_callback: Optional[Callable] = None,
auth_headers: Optional[Dict] = None,
max_runtime: Optional[int] = None,
token_budget: Optional[Any] = None,
llm: Optional[Any] = None,
):
self.scan_id = scan_id
self.target = target
self.cli_provider_id = cli_provider_id
self.methodology_path = methodology_path or os.getenv(
"METHODOLOGY_FILE", "/opt/Prompts-PenTest/pentestcompleto_en.md"
)
self.preferred_model = preferred_model
self.log_callback = log_callback
self.progress_callback = progress_callback
self.finding_callback = finding_callback
self.auth_headers = auth_headers or {}
self.token_budget = token_budget
self.llm = llm
# Runtime config
self.max_runtime = max_runtime or int(os.getenv("CLI_AGENT_MAX_RUNTIME", "1800"))
self.poll_interval = 3 # seconds between output checks
self.stale_timeout = 300 # kill if no new output for 5 min
self.ai_extract_interval = 300 # AI extraction every 5 min
# State
self._sandbox = None
self._provider: Optional[CLIProvider] = None
self._oauth_token: Optional[str] = None
self._cli_pid: Optional[str] = None
self._cancelled = False
self._output_offset = 0
self._last_output_time = 0.0
self._start_time = 0.0
self._all_output: List[str] = []
# Parser
from backend.core.cli_output_parser import CLIOutputParser
self._parser = CLIOutputParser()
# Recon data (set by autonomous_agent before run, for auto_pentest integration)
self.recon_data: Optional[Dict] = None
self.existing_findings: Optional[List] = None
# ── Logging Helpers ────────────────────────────────────────────────────
async def _log(self, level: str, message: str):
if self.log_callback:
try:
await self.log_callback(level, f"[CLI-AGENT] {message}")
except Exception:
pass
logger.log(
getattr(logging, level.upper(), logging.INFO),
f"[CLI-AGENT] {message}"
)
async def _progress(self, pct: int, phase: str):
if self.progress_callback:
try:
await self.progress_callback(pct, phase)
except Exception:
pass
# ── Lifecycle ──────────────────────────────────────────────────────────
async def initialize(self) -> Tuple[bool, str]:
"""Initialize: create container, install CLI, upload files."""
try:
# 1. Resolve provider
self._provider = CLI_PROVIDERS.get(self.cli_provider_id)
if not self._provider:
return False, f"Unknown CLI provider: {self.cli_provider_id}"
await self._log("info", f"Provider: {self._provider.name}")
# 2. Get OAuth token from SmartRouter
self._oauth_token = self._get_oauth_token(self.cli_provider_id)
if not self._oauth_token:
# Try API key from env
env_key = self._provider.auth_env
self._oauth_token = os.getenv(env_key, "")
if not self._oauth_token:
return False, (
f"No OAuth token or API key found for {self._provider.name}. "
f"Connect via Providers page or set {env_key} in .env"
)
await self._log("info", "Using API key from environment")
else:
await self._log("info", "Using OAuth token from SmartRouter")
# 3. Create Kali sandbox container
await self._log("info", "Creating Kali sandbox container...")
try:
from core.container_pool import get_pool
pool = get_pool()
self._sandbox = await pool.get_or_create(
scan_id=f"cli-agent-{self.scan_id}",
enable_vpn=False,
)
await self._log("info", f"Container ready: {getattr(self._sandbox, 'container_name', 'kali')}")
except Exception as e:
return False, f"Failed to create Kali container: {e}"
# 4. Install Node.js + CLI tool
await self._log("info", "Installing Node.js...")
await self._progress(2, "Installing Node.js")
result = await self._sandbox.execute_raw(
"which node > /dev/null 2>&1 && echo 'exists' || "
"(apt-get update -qq && DEBIAN_FRONTEND=noninteractive apt-get install -y -qq nodejs npm > /dev/null 2>&1 && echo 'installed')",
timeout=120,
)
if "exists" in result.stdout:
await self._log("info", "Node.js already available")
elif "installed" in result.stdout:
await self._log("info", "Node.js installed successfully")
else:
return False, f"Failed to install Node.js: {result.stderr[:200]}"
await self._log("info", f"Installing {self._provider.name} CLI...")
await self._progress(4, f"Installing {self._provider.name}")
install_cmd = self._provider.install_cmd or f"npm install -g {self._provider.npm_package}"
result = await self._sandbox.execute_raw(install_cmd, timeout=180)
# Verify CLI is available
verify = await self._sandbox.execute_raw(f"which {self._provider.command}", timeout=10)
if verify.exit_code != 0:
# Try npx fallback
verify2 = await self._sandbox.execute_raw(
f"npx --yes {self._provider.npm_package} --version", timeout=60
)
if verify2.exit_code != 0:
return False, f"CLI tool '{self._provider.command}' not found after installation"
await self._log("info", f"CLI available via npx")
else:
await self._log("info", f"CLI installed: {verify.stdout.strip()}")
# 5. Extra setup if needed
if self._provider.extra_setup:
await self._sandbox.execute_raw(self._provider.extra_setup, timeout=60)
# 6. Upload files
await self._log("info", "Uploading methodology and instructions...")
await self._progress(6, "Uploading files")
await self._upload_files()
# 7. Inject OAuth token as env var
await self._inject_token()
await self._log("info", "Initialization complete")
await self._progress(8, "Ready to start")
return True, "CLI Agent initialized successfully"
except Exception as e:
logger.exception("[CLI-AGENT] Initialization failed")
return False, f"Initialization error: {e}"
async def run(self) -> CLIAgentResult:
"""Execute the CLI agent and poll for findings."""
if not self._sandbox or not self._provider:
return CLIAgentResult(error="Not initialized")
self._start_time = time.time()
self._last_output_time = self._start_time
try:
# Start CLI process
await self._log("info", f"Starting {self._provider.name} against {self.target}")
await self._progress(10, f"{self._provider.name} starting")
pid = await self._start_cli_process()
if not pid:
return CLIAgentResult(error="Failed to start CLI process")
self._cli_pid = pid
await self._log("info", f"CLI process started (PID: {pid})")
# Poll output loop
result = await self._poll_output_loop()
return result
except asyncio.CancelledError:
await self._log("warning", "Run cancelled")
await self._kill_cli_process()
return CLIAgentResult(error="Cancelled")
except Exception as e:
logger.exception("[CLI-AGENT] Run failed")
await self._log("error", f"Run error: {e}")
return CLIAgentResult(error=str(e))
async def shutdown(self):
"""Cleanup: kill CLI process and destroy container."""
await self._kill_cli_process()
if self._sandbox:
try:
from core.container_pool import get_pool
await get_pool().destroy(f"cli-agent-{self.scan_id}")
await self._log("info", "Container destroyed")
except Exception as e:
logger.warning(f"[CLI-AGENT] Cleanup error: {e}")
self._sandbox = None
def cancel(self):
"""Signal cancellation."""
self._cancelled = True
# ── Container Setup (Private) ──────────────────────────────────────────
def _get_oauth_token(self, provider_id: str) -> Optional[str]:
"""Retrieve OAuth token from SmartRouter ProviderRegistry."""
try:
from backend.core.smart_router import get_registry
registry = get_registry()
if not registry:
return None
accounts = registry.get_active_accounts(provider_id)
if not accounts:
return None
# Use first active account
account = accounts[0]
credential = registry.get_credential(account.id)
if credential:
logger.info(f"[CLI-AGENT] Got OAuth token for {provider_id} (account: {account.label})")
return credential
return None
except Exception as e:
logger.debug(f"[CLI-AGENT] SmartRouter token retrieval failed: {e}")
return None
async def _upload_files(self):
"""Upload methodology file, instructions, and CLAUDE.md to container."""
from backend.core.cli_instructions_builder import (
build_instructions, build_claude_md, load_methodology
)
# Create work directory
await self._sandbox.execute_raw(f"mkdir -p {self.WORK_DIR}", timeout=5)
# Load and upload methodology
methodology = load_methodology(self.methodology_path)
if methodology:
await self._sandbox.upload_file(
methodology.encode("utf-8"),
f"{self.WORK_DIR}/methodology.md",
)
await self._log("info", f"Uploaded methodology ({len(methodology)} chars)")
else:
await self._log("warning", "No methodology file available")
# Build and upload instructions
extra_context = None
if self.recon_data:
# Include recon context if available (auto_pentest integration)
endpoints = self.recon_data.get("endpoints", [])[:20]
techs = self.recon_data.get("technologies", [])
extra_parts = []
if techs:
extra_parts.append(f"Detected technologies: {', '.join(techs)}")
if endpoints:
ep_list = "\n".join(
f"- {e.get('method', 'GET')} {e.get('url', '')}" for e in endpoints[:15]
)
extra_parts.append(f"Discovered endpoints:\n{ep_list}")
if self.existing_findings:
extra_parts.append(
f"Already found {len(self.existing_findings)} vulnerabilities. "
f"Focus on areas not yet tested."
)
extra_context = "\n".join(extra_parts)
instructions = build_instructions(
target=self.target,
auth_headers=self.auth_headers if self.auth_headers else None,
methodology_path=f"{self.WORK_DIR}/methodology.md",
extra_context=extra_context,
)
await self._sandbox.upload_file(
instructions.encode("utf-8"),
f"{self.WORK_DIR}/instructions.md",
)
# Build and upload CLAUDE.md (auto-read by Claude Code)
claude_md = build_claude_md(
target=self.target,
auth_headers=self.auth_headers if self.auth_headers else None,
)
await self._sandbox.upload_file(
claude_md.encode("utf-8"),
f"{self.WORK_DIR}/CLAUDE.md",
)
async def _inject_token(self):
"""Inject OAuth/API token as environment variable in container."""
if not self._oauth_token or not self._provider:
return
# Write to .bashrc so it's available to background processes
env_var = self._provider.auth_env
# Use base64 encoding to safely pass token with special chars
import base64
encoded = base64.b64encode(self._oauth_token.encode()).decode()
await self._sandbox.execute_raw(
f'echo \'export {env_var}="$(echo {encoded} | base64 -d)"\' >> /root/.bashrc',
timeout=5,
)
# Also write to a env file that can be sourced
await self._sandbox.execute_raw(
f'echo \'export {env_var}="$(echo {encoded} | base64 -d)"\' > {self.WORK_DIR}/.env',
timeout=5,
)
await self._log("info", f"Token injected as ${env_var}")
# ── Execution (Private) ────────────────────────────────────────────────
async def _start_cli_process(self) -> Optional[str]:
"""Start the CLI tool as a background process in the container."""
provider = self._provider
if not provider:
return None
# Build model flag
model_part = ""
if self.preferred_model and provider.model_flag:
model_part = f"{provider.model_flag} {self.preferred_model}"
# Build the prompt - read instructions file
prompt_input = f"cat {self.WORK_DIR}/instructions.md"
# Build CLI command based on provider
if provider.id == "claude_code":
cli_cmd = (
f"cd {self.WORK_DIR} && "
f"source {self.WORK_DIR}/.env && "
f"{provider.command} {provider.non_interactive_flags} "
f"{model_part} "
f"\"$(cat {self.WORK_DIR}/instructions.md)\""
)
elif provider.id == "codex_cli":
cli_cmd = (
f"cd {self.WORK_DIR} && "
f"source {self.WORK_DIR}/.env && "
f"{provider.command} {provider.non_interactive_flags} "
f"{model_part} "
f"\"$(cat {self.WORK_DIR}/instructions.md)\""
)
else:
# Generic fallback
cli_cmd = (
f"cd {self.WORK_DIR} && "
f"source {self.WORK_DIR}/.env && "
f"{provider.command} {provider.non_interactive_flags} "
f"{model_part} "
f"\"$(cat {self.WORK_DIR}/instructions.md)\""
)
# Run as background process with output capture
full_cmd = (
f"nohup bash -c '{cli_cmd}' "
f"> {self.OUTPUT_LOG} 2>&1 & echo $!"
)
result = await self._sandbox.execute_raw(full_cmd, timeout=15)
pid = result.stdout.strip().split('\n')[-1].strip()
if pid and pid.isdigit():
return pid
await self._log("error", f"Failed to get PID. stdout: {result.stdout[:200]}, stderr: {result.stderr[:200]}")
return None
async def _poll_output_loop(self) -> CLIAgentResult:
"""Main polling loop: read output, parse findings, check process status."""
last_ai_extract = time.time()
all_findings: List[Dict] = []
raw_output_parts: List[str] = []
while not self._cancelled:
elapsed = time.time() - self._start_time
# Check max runtime
if elapsed > self.max_runtime:
await self._log("warning", f"Max runtime ({self.max_runtime}s) exceeded, stopping")
await self._kill_cli_process()
break
# Read new output
new_text = await self._read_new_output()
if new_text:
self._last_output_time = time.time()
raw_output_parts.append(new_text)
# Log interesting lines (not every line to avoid spam)
for line in new_text.split('\n'):
line_s = line.strip()
if not line_s:
continue
# Always log phase markers and findings
if any(kw in line_s for kw in [
'[PHASE]', '[COMPLETE]', '[FINDING]', '[VULNERABILITY]',
'FINDING_START', 'FINDING_END', '[critical]', '[high]',
'Confirmed', 'Vulnerability found',
]):
await self._log("info", line_s[:300])
elif len(self._all_output) % 20 == 0:
# Log every 20th line as debug
await self._log("debug", line_s[:200])
# Parse findings from new output
parsed = self._parser.parse_chunk(new_text)
for finding in parsed:
finding_dict = finding.to_dict()
finding_dict["affected_endpoint"] = finding_dict.get("affected_endpoint") or self.target
all_findings.append(finding_dict)
# Emit finding through callback
if self.finding_callback:
try:
await self.finding_callback(finding_dict)
except Exception as e:
logger.debug(f"Finding callback error: {e}")
await self._log("success",
f"Finding: {finding.title} [{finding.severity.upper()}]")
# Check stale timeout (no output for too long)
stale_elapsed = time.time() - self._last_output_time
if stale_elapsed > self.stale_timeout:
await self._log("warning", f"No output for {int(stale_elapsed)}s, stopping")
await self._kill_cli_process()
break
# AI extraction on accumulated unparsed text (every 5 min)
if (time.time() - last_ai_extract > self.ai_extract_interval
and self.llm and self._parser.get_unparsed_text(clear=False)):
last_ai_extract = time.time()
await self._run_ai_extraction(all_findings)
# Check if CLI process is still running
if not await self._is_process_alive():
await self._log("info", "CLI process has exited")
# Read any remaining output
remaining = await self._read_new_output()
if remaining:
raw_output_parts.append(remaining)
parsed = self._parser.parse_chunk(remaining)
for finding in parsed:
finding_dict = finding.to_dict()
finding_dict["affected_endpoint"] = finding_dict.get("affected_endpoint") or self.target
all_findings.append(finding_dict)
if self.finding_callback:
try:
await self.finding_callback(finding_dict)
except Exception:
pass
break
# Update progress (time-based heuristic)
pct = min(90, 10 + int((elapsed / self.max_runtime) * 80))
phase = f"{self._provider.name} testing ({int(elapsed)}s)"
if self._parser.phases:
phase = f"{self._parser.phases[-1]} ({int(elapsed)}s)"
await self._progress(pct, phase)
await asyncio.sleep(self.poll_interval)
# Final AI extraction on any remaining unparsed text
if self.llm:
await self._run_ai_extraction(all_findings)
# Get exit code
exit_code = -1
try:
if self._cli_pid:
result = await self._sandbox.execute_raw(
f"wait {self._cli_pid} 2>/dev/null; echo $?", timeout=5
)
code = result.stdout.strip().split('\n')[-1].strip()
if code.isdigit():
exit_code = int(code)
except Exception:
pass
duration = time.time() - self._start_time
raw_output = "\n".join(raw_output_parts)
await self._log("info",
f"Completed: {len(all_findings)} findings, "
f"{self._parser.total_findings} total parsed, "
f"{int(duration)}s elapsed")
await self._progress(95, "CLI Agent complete")
return CLIAgentResult(
findings=all_findings,
raw_output=raw_output[:500000], # Cap raw output at 500KB
duration=duration,
exit_code=exit_code,
phases_completed=self._parser.phases,
total_output_lines=len(self._all_output),
cli_provider=self.cli_provider_id,
)
async def _read_new_output(self) -> str:
"""Read new output from the CLI's log file since last check."""
try:
# Use dd to read from offset (more reliable than tail -c +N)
result = await self._sandbox.execute_raw(
f"dd if={self.OUTPUT_LOG} bs=1 skip={self._output_offset} 2>/dev/null",
timeout=10,
)
if result.stdout:
self._output_offset += len(result.stdout.encode('utf-8'))
self._all_output.extend(result.stdout.split('\n'))
return result.stdout
except Exception as e:
logger.debug(f"[CLI-AGENT] Read output error: {e}")
return ""
async def _is_process_alive(self) -> bool:
"""Check if the CLI process is still running."""
if not self._cli_pid:
return False
try:
result = await self._sandbox.execute_raw(
f"kill -0 {self._cli_pid} 2>/dev/null && echo alive || echo dead",
timeout=5,
)
return "alive" in result.stdout
except Exception:
return False
async def _kill_cli_process(self):
"""Kill the CLI process in the container."""
if not self._cli_pid or not self._sandbox:
return
try:
await self._sandbox.execute_raw(
f"kill {self._cli_pid} 2>/dev/null; sleep 1; kill -9 {self._cli_pid} 2>/dev/null",
timeout=10,
)
await self._log("info", f"CLI process {self._cli_pid} killed")
except Exception as e:
logger.debug(f"[CLI-AGENT] Kill error: {e}")
async def _run_ai_extraction(self, all_findings: List[Dict]):
"""Run AI-assisted finding extraction on unparsed text."""
unparsed = self._parser.get_unparsed_text(clear=True)
if not unparsed or len(unparsed) < 200:
return
try:
from backend.core.cli_output_parser import ai_extract_findings
ai_findings = await ai_extract_findings(unparsed, self.llm)
for finding in ai_findings:
finding_dict = finding.to_dict()
# Check for duplicates
h = f"{finding.title}|{finding.endpoint}|{finding.severity}"
existing_hashes = {
f"{f.get('title', '')}|{f.get('affected_endpoint', '')}|{f.get('severity', '')}"
for f in all_findings
}
if h not in existing_hashes:
finding_dict["affected_endpoint"] = finding_dict.get("affected_endpoint") or self.target
all_findings.append(finding_dict)
if self.finding_callback:
try:
await self.finding_callback(finding_dict)
except Exception:
pass
await self._log("success",
f"AI-extracted: {finding.title} [{finding.severity.upper()}]")
except Exception as e:
logger.debug(f"[CLI-AGENT] AI extraction error: {e}")
# ── Status ──────────────────────────────────────────────────────────────
def get_status(self) -> Dict:
"""Return current runner status."""
elapsed = time.time() - self._start_time if self._start_time else 0
return {
"provider": self.cli_provider_id,
"provider_name": self._provider.name if self._provider else "",
"target": self.target,
"running": self._cli_pid is not None and not self._cancelled,
"elapsed": int(elapsed),
"findings_count": self._parser.total_findings,
"phases": self._parser.phases,
"output_lines": len(self._all_output),
"is_complete": self._parser.is_complete,
}