""" CLI Agent Runner - Executes AI CLI tools (Claude Code, Gemini CLI, Codex CLI) inside Kali Linux Docker containers for autonomous penetration testing. Architecture: 1. Detects OAuth token from SmartRouter 2. Creates per-scan Kali container via ContainerPool 3. Installs Node.js + selected CLI tool 4. Uploads methodology file + instructions 5. Runs CLI in non-interactive mode (background process) 6. Polls output file, extracts findings in real-time 7. Findings flow through existing validation pipeline Follows ResearcherAgent pattern (lifecycle, callbacks, sandbox integration). """ import os import time import asyncio import logging import hashlib from dataclasses import dataclass, field from typing import Dict, List, Optional, Tuple, Callable, Any logger = logging.getLogger(__name__) # ═══════════════════════════════════════════════════════════════════════════════ # CLI Provider Definitions # ═══════════════════════════════════════════════════════════════════════════════ @dataclass class CLIProvider: """Definition of a CLI tool that can run inside the Kali container.""" id: str name: str npm_package: str command: str auth_env: str # Env var for the OAuth/API token non_interactive_flags: str # Flags for non-interactive mode model_flag: str # Flag to specify model needs_nodejs: bool = True # Most CLI tools are npm-based install_cmd: Optional[str] = None # Override for non-npm install prompt_method: str = "stdin" # "stdin", "flag", "file" extra_setup: Optional[str] = None # Extra setup commands after install CLI_PROVIDERS: Dict[str, CLIProvider] = { "claude_code": CLIProvider( id="claude_code", name="Claude Code", npm_package="@anthropic-ai/claude-code", command="claude", auth_env="ANTHROPIC_API_KEY", non_interactive_flags="--print --dangerously-skip-permissions --verbose", model_flag="--model", prompt_method="stdin", ), "gemini_cli": CLIProvider( id="gemini_cli", name="Gemini CLI", npm_package="@anthropic-ai/claude-code", # Gemini CLI uses same approach command="gemini", auth_env="GEMINI_API_KEY", non_interactive_flags="--sandbox", model_flag="--model", prompt_method="stdin", install_cmd="npm install -g @anthropic-ai/claude-code", # fallback to claude if gemini CLI not available ), "codex_cli": CLIProvider( id="codex_cli", name="OpenAI Codex CLI", npm_package="@openai/codex", command="codex", auth_env="OPENAI_API_KEY", non_interactive_flags="--full-auto --quiet", model_flag="--model", prompt_method="stdin", ), } # ═══════════════════════════════════════════════════════════════════════════════ # Result Data Classes # ═══════════════════════════════════════════════════════════════════════════════ @dataclass class CLIAgentResult: """Result of a CLI agent run.""" findings: List[Dict] = field(default_factory=list) raw_output: str = "" duration: float = 0.0 exit_code: int = -1 tools_used: List[str] = field(default_factory=list) phases_completed: List[str] = field(default_factory=list) total_output_lines: int = 0 cli_provider: str = "" error: Optional[str] = None # ═══════════════════════════════════════════════════════════════════════════════ # CLI Agent Runner # ═══════════════════════════════════════════════════════════════════════════════ class CLIAgentRunner: """ Runs an AI CLI tool inside a Kali Linux container for penetration testing. Lifecycle: runner = CLIAgentRunner(...) ok, msg = await runner.initialize() # Container + CLI install result = await runner.run() # Execute + poll findings await runner.shutdown() # Cleanup """ WORK_DIR = "/opt/pentest" OUTPUT_LOG = "/opt/pentest/output.log" FINDINGS_LOG = "/opt/pentest/findings.jsonl" def __init__( self, scan_id: str, target: str, cli_provider_id: str = "claude_code", methodology_path: Optional[str] = None, preferred_model: Optional[str] = None, log_callback: Optional[Callable] = None, progress_callback: Optional[Callable] = None, finding_callback: Optional[Callable] = None, auth_headers: Optional[Dict] = None, max_runtime: Optional[int] = None, token_budget: Optional[Any] = None, llm: Optional[Any] = None, ): self.scan_id = scan_id self.target = target self.cli_provider_id = cli_provider_id self.methodology_path = methodology_path or os.getenv( "METHODOLOGY_FILE", "/opt/Prompts-PenTest/pentestcompleto_en.md" ) self.preferred_model = preferred_model self.log_callback = log_callback self.progress_callback = progress_callback self.finding_callback = finding_callback self.auth_headers = auth_headers or {} self.token_budget = token_budget self.llm = llm # Runtime config self.max_runtime = max_runtime or int(os.getenv("CLI_AGENT_MAX_RUNTIME", "1800")) self.poll_interval = 3 # seconds between output checks self.stale_timeout = 300 # kill if no new output for 5 min self.ai_extract_interval = 300 # AI extraction every 5 min # State self._sandbox = None self._provider: Optional[CLIProvider] = None self._oauth_token: Optional[str] = None self._cli_pid: Optional[str] = None self._cancelled = False self._output_offset = 0 self._last_output_time = 0.0 self._start_time = 0.0 self._all_output: List[str] = [] # Parser from backend.core.cli_output_parser import CLIOutputParser self._parser = CLIOutputParser() # Recon data (set by autonomous_agent before run, for auto_pentest integration) self.recon_data: Optional[Dict] = None self.existing_findings: Optional[List] = None # ── Logging Helpers ──────────────────────────────────────────────────── async def _log(self, level: str, message: str): if self.log_callback: try: await self.log_callback(level, f"[CLI-AGENT] {message}") except Exception: pass logger.log( getattr(logging, level.upper(), logging.INFO), f"[CLI-AGENT] {message}" ) async def _progress(self, pct: int, phase: str): if self.progress_callback: try: await self.progress_callback(pct, phase) except Exception: pass # ── Lifecycle ────────────────────────────────────────────────────────── async def initialize(self) -> Tuple[bool, str]: """Initialize: create container, install CLI, upload files.""" try: # 1. Resolve provider self._provider = CLI_PROVIDERS.get(self.cli_provider_id) if not self._provider: return False, f"Unknown CLI provider: {self.cli_provider_id}" await self._log("info", f"Provider: {self._provider.name}") # 2. Get OAuth token from SmartRouter self._oauth_token = self._get_oauth_token(self.cli_provider_id) if not self._oauth_token: # Try API key from env env_key = self._provider.auth_env self._oauth_token = os.getenv(env_key, "") if not self._oauth_token: return False, ( f"No OAuth token or API key found for {self._provider.name}. " f"Connect via Providers page or set {env_key} in .env" ) await self._log("info", "Using API key from environment") else: await self._log("info", "Using OAuth token from SmartRouter") # 3. Create Kali sandbox container await self._log("info", "Creating Kali sandbox container...") try: from core.container_pool import get_pool pool = get_pool() self._sandbox = await pool.get_or_create( scan_id=f"cli-agent-{self.scan_id}", enable_vpn=False, ) await self._log("info", f"Container ready: {getattr(self._sandbox, 'container_name', 'kali')}") except Exception as e: return False, f"Failed to create Kali container: {e}" # 4. Install Node.js + CLI tool await self._log("info", "Installing Node.js...") await self._progress(2, "Installing Node.js") result = await self._sandbox.execute_raw( "which node > /dev/null 2>&1 && echo 'exists' || " "(apt-get update -qq && DEBIAN_FRONTEND=noninteractive apt-get install -y -qq nodejs npm > /dev/null 2>&1 && echo 'installed')", timeout=120, ) if "exists" in result.stdout: await self._log("info", "Node.js already available") elif "installed" in result.stdout: await self._log("info", "Node.js installed successfully") else: return False, f"Failed to install Node.js: {result.stderr[:200]}" await self._log("info", f"Installing {self._provider.name} CLI...") await self._progress(4, f"Installing {self._provider.name}") install_cmd = self._provider.install_cmd or f"npm install -g {self._provider.npm_package}" result = await self._sandbox.execute_raw(install_cmd, timeout=180) # Verify CLI is available verify = await self._sandbox.execute_raw(f"which {self._provider.command}", timeout=10) if verify.exit_code != 0: # Try npx fallback verify2 = await self._sandbox.execute_raw( f"npx --yes {self._provider.npm_package} --version", timeout=60 ) if verify2.exit_code != 0: return False, f"CLI tool '{self._provider.command}' not found after installation" await self._log("info", f"CLI available via npx") else: await self._log("info", f"CLI installed: {verify.stdout.strip()}") # 5. Extra setup if needed if self._provider.extra_setup: await self._sandbox.execute_raw(self._provider.extra_setup, timeout=60) # 6. Upload files await self._log("info", "Uploading methodology and instructions...") await self._progress(6, "Uploading files") await self._upload_files() # 7. Inject OAuth token as env var await self._inject_token() await self._log("info", "Initialization complete") await self._progress(8, "Ready to start") return True, "CLI Agent initialized successfully" except Exception as e: logger.exception("[CLI-AGENT] Initialization failed") return False, f"Initialization error: {e}" async def run(self) -> CLIAgentResult: """Execute the CLI agent and poll for findings.""" if not self._sandbox or not self._provider: return CLIAgentResult(error="Not initialized") self._start_time = time.time() self._last_output_time = self._start_time try: # Start CLI process await self._log("info", f"Starting {self._provider.name} against {self.target}") await self._progress(10, f"{self._provider.name} starting") pid = await self._start_cli_process() if not pid: return CLIAgentResult(error="Failed to start CLI process") self._cli_pid = pid await self._log("info", f"CLI process started (PID: {pid})") # Poll output loop result = await self._poll_output_loop() return result except asyncio.CancelledError: await self._log("warning", "Run cancelled") await self._kill_cli_process() return CLIAgentResult(error="Cancelled") except Exception as e: logger.exception("[CLI-AGENT] Run failed") await self._log("error", f"Run error: {e}") return CLIAgentResult(error=str(e)) async def shutdown(self): """Cleanup: kill CLI process and destroy container.""" await self._kill_cli_process() if self._sandbox: try: from core.container_pool import get_pool await get_pool().destroy(f"cli-agent-{self.scan_id}") await self._log("info", "Container destroyed") except Exception as e: logger.warning(f"[CLI-AGENT] Cleanup error: {e}") self._sandbox = None def cancel(self): """Signal cancellation.""" self._cancelled = True # ── Container Setup (Private) ────────────────────────────────────────── def _get_oauth_token(self, provider_id: str) -> Optional[str]: """Retrieve OAuth token from SmartRouter ProviderRegistry.""" try: from backend.core.smart_router import get_registry registry = get_registry() if not registry: return None accounts = registry.get_active_accounts(provider_id) if not accounts: return None # Use first active account account = accounts[0] credential = registry.get_credential(account.id) if credential: logger.info(f"[CLI-AGENT] Got OAuth token for {provider_id} (account: {account.label})") return credential return None except Exception as e: logger.debug(f"[CLI-AGENT] SmartRouter token retrieval failed: {e}") return None async def _upload_files(self): """Upload methodology file, instructions, and CLAUDE.md to container.""" from backend.core.cli_instructions_builder import ( build_instructions, build_claude_md, load_methodology ) # Create work directory await self._sandbox.execute_raw(f"mkdir -p {self.WORK_DIR}", timeout=5) # Load and upload methodology methodology = load_methodology(self.methodology_path) if methodology: await self._sandbox.upload_file( methodology.encode("utf-8"), f"{self.WORK_DIR}/methodology.md", ) await self._log("info", f"Uploaded methodology ({len(methodology)} chars)") else: await self._log("warning", "No methodology file available") # Build and upload instructions extra_context = None if self.recon_data: # Include recon context if available (auto_pentest integration) endpoints = self.recon_data.get("endpoints", [])[:20] techs = self.recon_data.get("technologies", []) extra_parts = [] if techs: extra_parts.append(f"Detected technologies: {', '.join(techs)}") if endpoints: ep_list = "\n".join( f"- {e.get('method', 'GET')} {e.get('url', '')}" for e in endpoints[:15] ) extra_parts.append(f"Discovered endpoints:\n{ep_list}") if self.existing_findings: extra_parts.append( f"Already found {len(self.existing_findings)} vulnerabilities. " f"Focus on areas not yet tested." ) extra_context = "\n".join(extra_parts) instructions = build_instructions( target=self.target, auth_headers=self.auth_headers if self.auth_headers else None, methodology_path=f"{self.WORK_DIR}/methodology.md", extra_context=extra_context, ) await self._sandbox.upload_file( instructions.encode("utf-8"), f"{self.WORK_DIR}/instructions.md", ) # Build and upload CLAUDE.md (auto-read by Claude Code) claude_md = build_claude_md( target=self.target, auth_headers=self.auth_headers if self.auth_headers else None, ) await self._sandbox.upload_file( claude_md.encode("utf-8"), f"{self.WORK_DIR}/CLAUDE.md", ) async def _inject_token(self): """Inject OAuth/API token as environment variable in container.""" if not self._oauth_token or not self._provider: return # Write to .bashrc so it's available to background processes env_var = self._provider.auth_env # Use base64 encoding to safely pass token with special chars import base64 encoded = base64.b64encode(self._oauth_token.encode()).decode() await self._sandbox.execute_raw( f'echo \'export {env_var}="$(echo {encoded} | base64 -d)"\' >> /root/.bashrc', timeout=5, ) # Also write to a env file that can be sourced await self._sandbox.execute_raw( f'echo \'export {env_var}="$(echo {encoded} | base64 -d)"\' > {self.WORK_DIR}/.env', timeout=5, ) await self._log("info", f"Token injected as ${env_var}") # ── Execution (Private) ──────────────────────────────────────────────── async def _start_cli_process(self) -> Optional[str]: """Start the CLI tool as a background process in the container.""" provider = self._provider if not provider: return None # Build model flag model_part = "" if self.preferred_model and provider.model_flag: model_part = f"{provider.model_flag} {self.preferred_model}" # Build the prompt - read instructions file prompt_input = f"cat {self.WORK_DIR}/instructions.md" # Build CLI command based on provider if provider.id == "claude_code": cli_cmd = ( f"cd {self.WORK_DIR} && " f"source {self.WORK_DIR}/.env && " f"{provider.command} {provider.non_interactive_flags} " f"{model_part} " f"\"$(cat {self.WORK_DIR}/instructions.md)\"" ) elif provider.id == "codex_cli": cli_cmd = ( f"cd {self.WORK_DIR} && " f"source {self.WORK_DIR}/.env && " f"{provider.command} {provider.non_interactive_flags} " f"{model_part} " f"\"$(cat {self.WORK_DIR}/instructions.md)\"" ) else: # Generic fallback cli_cmd = ( f"cd {self.WORK_DIR} && " f"source {self.WORK_DIR}/.env && " f"{provider.command} {provider.non_interactive_flags} " f"{model_part} " f"\"$(cat {self.WORK_DIR}/instructions.md)\"" ) # Run as background process with output capture full_cmd = ( f"nohup bash -c '{cli_cmd}' " f"> {self.OUTPUT_LOG} 2>&1 & echo $!" ) result = await self._sandbox.execute_raw(full_cmd, timeout=15) pid = result.stdout.strip().split('\n')[-1].strip() if pid and pid.isdigit(): return pid await self._log("error", f"Failed to get PID. stdout: {result.stdout[:200]}, stderr: {result.stderr[:200]}") return None async def _poll_output_loop(self) -> CLIAgentResult: """Main polling loop: read output, parse findings, check process status.""" last_ai_extract = time.time() all_findings: List[Dict] = [] raw_output_parts: List[str] = [] while not self._cancelled: elapsed = time.time() - self._start_time # Check max runtime if elapsed > self.max_runtime: await self._log("warning", f"Max runtime ({self.max_runtime}s) exceeded, stopping") await self._kill_cli_process() break # Read new output new_text = await self._read_new_output() if new_text: self._last_output_time = time.time() raw_output_parts.append(new_text) # Log interesting lines (not every line to avoid spam) for line in new_text.split('\n'): line_s = line.strip() if not line_s: continue # Always log phase markers and findings if any(kw in line_s for kw in [ '[PHASE]', '[COMPLETE]', '[FINDING]', '[VULNERABILITY]', 'FINDING_START', 'FINDING_END', '[critical]', '[high]', 'Confirmed', 'Vulnerability found', ]): await self._log("info", line_s[:300]) elif len(self._all_output) % 20 == 0: # Log every 20th line as debug await self._log("debug", line_s[:200]) # Parse findings from new output parsed = self._parser.parse_chunk(new_text) for finding in parsed: finding_dict = finding.to_dict() finding_dict["affected_endpoint"] = finding_dict.get("affected_endpoint") or self.target all_findings.append(finding_dict) # Emit finding through callback if self.finding_callback: try: await self.finding_callback(finding_dict) except Exception as e: logger.debug(f"Finding callback error: {e}") await self._log("success", f"Finding: {finding.title} [{finding.severity.upper()}]") # Check stale timeout (no output for too long) stale_elapsed = time.time() - self._last_output_time if stale_elapsed > self.stale_timeout: await self._log("warning", f"No output for {int(stale_elapsed)}s, stopping") await self._kill_cli_process() break # AI extraction on accumulated unparsed text (every 5 min) if (time.time() - last_ai_extract > self.ai_extract_interval and self.llm and self._parser.get_unparsed_text(clear=False)): last_ai_extract = time.time() await self._run_ai_extraction(all_findings) # Check if CLI process is still running if not await self._is_process_alive(): await self._log("info", "CLI process has exited") # Read any remaining output remaining = await self._read_new_output() if remaining: raw_output_parts.append(remaining) parsed = self._parser.parse_chunk(remaining) for finding in parsed: finding_dict = finding.to_dict() finding_dict["affected_endpoint"] = finding_dict.get("affected_endpoint") or self.target all_findings.append(finding_dict) if self.finding_callback: try: await self.finding_callback(finding_dict) except Exception: pass break # Update progress (time-based heuristic) pct = min(90, 10 + int((elapsed / self.max_runtime) * 80)) phase = f"{self._provider.name} testing ({int(elapsed)}s)" if self._parser.phases: phase = f"{self._parser.phases[-1]} ({int(elapsed)}s)" await self._progress(pct, phase) await asyncio.sleep(self.poll_interval) # Final AI extraction on any remaining unparsed text if self.llm: await self._run_ai_extraction(all_findings) # Get exit code exit_code = -1 try: if self._cli_pid: result = await self._sandbox.execute_raw( f"wait {self._cli_pid} 2>/dev/null; echo $?", timeout=5 ) code = result.stdout.strip().split('\n')[-1].strip() if code.isdigit(): exit_code = int(code) except Exception: pass duration = time.time() - self._start_time raw_output = "\n".join(raw_output_parts) await self._log("info", f"Completed: {len(all_findings)} findings, " f"{self._parser.total_findings} total parsed, " f"{int(duration)}s elapsed") await self._progress(95, "CLI Agent complete") return CLIAgentResult( findings=all_findings, raw_output=raw_output[:500000], # Cap raw output at 500KB duration=duration, exit_code=exit_code, phases_completed=self._parser.phases, total_output_lines=len(self._all_output), cli_provider=self.cli_provider_id, ) async def _read_new_output(self) -> str: """Read new output from the CLI's log file since last check.""" try: # Use dd to read from offset (more reliable than tail -c +N) result = await self._sandbox.execute_raw( f"dd if={self.OUTPUT_LOG} bs=1 skip={self._output_offset} 2>/dev/null", timeout=10, ) if result.stdout: self._output_offset += len(result.stdout.encode('utf-8')) self._all_output.extend(result.stdout.split('\n')) return result.stdout except Exception as e: logger.debug(f"[CLI-AGENT] Read output error: {e}") return "" async def _is_process_alive(self) -> bool: """Check if the CLI process is still running.""" if not self._cli_pid: return False try: result = await self._sandbox.execute_raw( f"kill -0 {self._cli_pid} 2>/dev/null && echo alive || echo dead", timeout=5, ) return "alive" in result.stdout except Exception: return False async def _kill_cli_process(self): """Kill the CLI process in the container.""" if not self._cli_pid or not self._sandbox: return try: await self._sandbox.execute_raw( f"kill {self._cli_pid} 2>/dev/null; sleep 1; kill -9 {self._cli_pid} 2>/dev/null", timeout=10, ) await self._log("info", f"CLI process {self._cli_pid} killed") except Exception as e: logger.debug(f"[CLI-AGENT] Kill error: {e}") async def _run_ai_extraction(self, all_findings: List[Dict]): """Run AI-assisted finding extraction on unparsed text.""" unparsed = self._parser.get_unparsed_text(clear=True) if not unparsed or len(unparsed) < 200: return try: from backend.core.cli_output_parser import ai_extract_findings ai_findings = await ai_extract_findings(unparsed, self.llm) for finding in ai_findings: finding_dict = finding.to_dict() # Check for duplicates h = f"{finding.title}|{finding.endpoint}|{finding.severity}" existing_hashes = { f"{f.get('title', '')}|{f.get('affected_endpoint', '')}|{f.get('severity', '')}" for f in all_findings } if h not in existing_hashes: finding_dict["affected_endpoint"] = finding_dict.get("affected_endpoint") or self.target all_findings.append(finding_dict) if self.finding_callback: try: await self.finding_callback(finding_dict) except Exception: pass await self._log("success", f"AI-extracted: {finding.title} [{finding.severity.upper()}]") except Exception as e: logger.debug(f"[CLI-AGENT] AI extraction error: {e}") # ── Status ────────────────────────────────────────────────────────────── def get_status(self) -> Dict: """Return current runner status.""" elapsed = time.time() - self._start_time if self._start_time else 0 return { "provider": self.cli_provider_id, "provider_name": self._provider.name if self._provider else "", "target": self.target, "running": self._cli_pid is not None and not self._cancelled, "elapsed": int(elapsed), "findings_count": self._parser.total_findings, "phases": self._parser.phases, "output_lines": len(self._all_output), "is_complete": self._parser.is_complete, }