Files
shannon/src/ai/claude-executor.js

674 lines
27 KiB
JavaScript
Raw Blame History

This file contains invisible Unicode characters
This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import { $, fs, path } from 'zx';
import chalk from 'chalk';
import { query } from '@anthropic-ai/claude-agent-sdk';
import { fileURLToPath } from 'url';
import { dirname } from 'path';
import { isRetryableError, getRetryDelay, PentestError } from '../error-handling.js';
import { ProgressIndicator } from '../progress-indicator.js';
import { timingResults, costResults, Timer, formatDuration } from '../utils/metrics.js';
import { createGitCheckpoint, commitGitSuccess, rollbackGitWorkspace } from '../utils/git-manager.js';
import { AGENT_VALIDATORS, MCP_AGENT_MAPPING } from '../constants.js';
import { filterJsonToolCalls, getAgentPrefix } from '../utils/output-formatter.js';
import { generateSessionLogPath } from '../session-manager.js';
import { AuditSession } from '../audit/index.js';
import { createShannonHelperServer } from '../../mcp-server/src/index.js';
const __filename = fileURLToPath(import.meta.url);
const __dirname = dirname(__filename);
/**
* Convert agent name to prompt name for MCP_AGENT_MAPPING lookup
*
* @param {string} agentName - Agent name (e.g., 'xss-vuln', 'injection-exploit')
* @returns {string} Prompt name (e.g., 'vuln-xss', 'exploit-injection')
*/
function agentNameToPromptName(agentName) {
// Special cases
if (agentName === 'pre-recon') return 'pre-recon-code';
if (agentName === 'report') return 'report-executive';
if (agentName === 'recon') return 'recon';
// Pattern: {type}-vuln → vuln-{type}
const vulnMatch = agentName.match(/^(.+)-vuln$/);
if (vulnMatch) {
return `vuln-${vulnMatch[1]}`;
}
// Pattern: {type}-exploit → exploit-{type}
const exploitMatch = agentName.match(/^(.+)-exploit$/);
if (exploitMatch) {
return `exploit-${exploitMatch[1]}`;
}
// Default: return as-is
return agentName;
}
// Simplified validation using direct agent name mapping
async function validateAgentOutput(result, agentName, sourceDir) {
console.log(chalk.blue(` 🔍 Validating ${agentName} agent output`));
try {
// Check if agent completed successfully
if (!result.success || !result.result) {
console.log(chalk.red(` ❌ Validation failed: Agent execution was unsuccessful`));
return false;
}
// Get validator function for this agent
const validator = AGENT_VALIDATORS[agentName];
if (!validator) {
console.log(chalk.yellow(` ⚠️ No validator found for agent "${agentName}" - assuming success`));
console.log(chalk.green(` ✅ Validation passed: Unknown agent with successful result`));
return true;
}
console.log(chalk.blue(` 📋 Using validator for agent: ${agentName}`));
console.log(chalk.blue(` 📂 Source directory: ${sourceDir}`));
// Apply validation function
const validationResult = await validator(sourceDir);
if (validationResult) {
console.log(chalk.green(` ✅ Validation passed: Required files/structure present`));
} else {
console.log(chalk.red(` ❌ Validation failed: Missing required deliverable files`));
}
return validationResult;
} catch (error) {
console.log(chalk.red(` ❌ Validation failed with error: ${error.message}`));
return false; // Assume invalid on validation error
}
}
// Pure function: Run Claude Code with SDK - Maximum Autonomy
// WARNING: This is a low-level function. Use runClaudePromptWithRetry() for agent execution to ensure:
// - Retry logic and error handling
// - Output validation
// - Prompt snapshotting for debugging
// - Git checkpoint/rollback safety
async function runClaudePrompt(prompt, sourceDir, allowedTools = 'Read', context = '', description = 'Claude analysis', agentName = null, colorFn = chalk.cyan, sessionMetadata = null, auditSession = null, attemptNumber = 1) {
const timer = new Timer(`agent-${description.toLowerCase().replace(/\s+/g, '-')}`);
const fullPrompt = context ? `${context}\n\n${prompt}` : prompt;
let totalCost = 0;
let partialCost = 0; // Track partial cost for crash safety
// Auto-detect execution mode to adjust logging behavior
const isParallelExecution = description.includes('vuln agent') || description.includes('exploit agent');
const useCleanOutput = description.includes('Pre-recon agent') ||
description.includes('Recon agent') ||
description.includes('Executive Summary and Report Cleanup') ||
description.includes('vuln agent') ||
description.includes('exploit agent');
// Disable status manager - using simple JSON filtering for all agents now
const statusManager = null;
// Setup progress indicator for clean output agents
let progressIndicator = null;
if (useCleanOutput) {
const agentType = description.includes('Pre-recon') ? 'pre-reconnaissance' :
description.includes('Recon') ? 'reconnaissance' :
description.includes('Report') ? 'report generation' : 'analysis';
progressIndicator = new ProgressIndicator(`Running ${agentType}...`);
}
// NOTE: Logging now handled by AuditSession (append-only, crash-safe)
// Legacy log path generation kept for compatibility
let logFilePath = null;
if (sessionMetadata && sessionMetadata.webUrl && sessionMetadata.id) {
const timestamp = new Date().toISOString().replace(/T/, '_').replace(/[:.]/g, '-').slice(0, 19);
const agentName = description.toLowerCase().replace(/\s+/g, '-');
const logDir = generateSessionLogPath(sessionMetadata.webUrl, sessionMetadata.id);
logFilePath = path.join(logDir, `${timestamp}_${agentName}_attempt-${attemptNumber}.log`);
} else {
console.log(chalk.blue(` 🤖 Running Claude Code: ${description}...`));
}
// Declare variables that need to be accessible in both try and catch blocks
let turnCount = 0;
try {
// Create MCP server with target directory context
const shannonHelperServer = createShannonHelperServer(sourceDir);
// Look up agent's assigned Playwright MCP server
// Convert agent name (e.g., 'xss-vuln') to prompt name (e.g., 'vuln-xss')
let playwrightMcpName = null;
if (agentName) {
const promptName = agentNameToPromptName(agentName);
playwrightMcpName = MCP_AGENT_MAPPING[promptName];
if (playwrightMcpName) {
console.log(chalk.gray(` 🎭 Assigned ${agentName}${playwrightMcpName}`));
}
}
// Configure MCP servers: shannon-helper (SDK) + playwright-agentN (stdio)
const mcpServers = {
'shannon-helper': shannonHelperServer,
};
// Add Playwright MCP server if this agent needs browser automation
if (playwrightMcpName) {
const userDataDir = `/tmp/${playwrightMcpName}`;
mcpServers[playwrightMcpName] = {
type: 'stdio',
command: 'npx',
args: ['@playwright/mcp@latest', '--isolated', '--user-data-dir', userDataDir],
env: {
...process.env,
PLAYWRIGHT_HEADLESS: 'true', // Ensure headless mode for security and CI compatibility
},
};
}
const options = {
model: 'claude-sonnet-4-5-20250929', // Use latest Claude 4.5 Sonnet
maxTurns: 10_000, // Maximum turns for autonomous work
cwd: sourceDir, // Set working directory using SDK option
permissionMode: 'bypassPermissions', // Bypass all permission checks for pentesting
mcpServers,
};
// SDK Options only shown for verbose agents (not clean output)
if (!useCleanOutput) {
console.log(chalk.gray(` SDK Options: maxTurns=${options.maxTurns}, cwd=${sourceDir}, permissions=BYPASS`));
}
let result = null;
let messages = [];
let apiErrorDetected = false;
// Start progress indicator for clean output agents
if (progressIndicator) {
progressIndicator.start();
}
let messageCount = 0;
try {
for await (const message of query({ prompt: fullPrompt, options })) {
messageCount++;
if (message.type === "assistant") {
turnCount++;
const content = Array.isArray(message.message.content)
? message.message.content.map(c => c.text || JSON.stringify(c)).join('\n')
: message.message.content;
if (statusManager) {
// Smart status updates for parallel execution
const toolUse = statusManager.parseToolUse(content);
statusManager.updateAgentStatus(description, {
tool_use: toolUse,
assistant_text: content,
turnCount
});
} else if (useCleanOutput) {
// Clean output for all agents: filter JSON tool calls but show meaningful text
const cleanedContent = filterJsonToolCalls(content);
if (cleanedContent.trim()) {
// Temporarily stop progress indicator to show output
if (progressIndicator) {
progressIndicator.stop();
}
if (isParallelExecution) {
// Compact output for parallel agents with prefixes
const prefix = getAgentPrefix(description);
console.log(colorFn(`${prefix} ${cleanedContent}`));
} else {
// Full turn output for single agents
console.log(colorFn(`\n 🤖 Turn ${turnCount} (${description}):`))
console.log(colorFn(` ${cleanedContent}`));
}
// Restart progress indicator after output
if (progressIndicator) {
progressIndicator.start();
}
}
} else {
// Full streaming output - show complete messages with specialist color
console.log(colorFn(`\n 🤖 Turn ${turnCount} (${description}):`))
console.log(colorFn(` ${content}`));
}
// Log to audit system (crash-safe, append-only)
if (auditSession) {
await auditSession.logEvent('llm_response', {
turn: turnCount,
content,
timestamp: new Date().toISOString()
});
}
messages.push(content);
// Check for API error patterns in assistant message content
if (content && typeof content === 'string') {
const lowerContent = content.toLowerCase();
if (lowerContent.includes('api error') || lowerContent.includes('terminated')) {
apiErrorDetected = true;
console.log(chalk.red(` ⚠️ API Error detected in assistant response: ${content.trim()}`));
}
}
} else if (message.type === "system" && message.subtype === "init") {
// Show useful system info only for verbose agents
if (!useCleanOutput) {
console.log(chalk.blue(` Model: ${message.model}, Permission: ${message.permissionMode}`));
if (message.mcp_servers && message.mcp_servers.length > 0) {
const mcpStatus = message.mcp_servers.map(s => `${s.name}(${s.status})`).join(', ');
console.log(chalk.blue(` 📦 MCP: ${mcpStatus}`));
}
}
} else if (message.type === "user") {
// Skip user messages (these are our own inputs echoed back)
continue;
} else if (message.type === "tool_use") {
console.log(chalk.yellow(`\n 🔧 Using Tool: ${message.name}`));
if (message.input && Object.keys(message.input).length > 0) {
console.log(chalk.gray(` Input: ${JSON.stringify(message.input, null, 2)}`));
}
// Log tool start event
if (auditSession) {
await auditSession.logEvent('tool_start', {
toolName: message.name,
parameters: message.input,
timestamp: new Date().toISOString()
});
}
} else if (message.type === "tool_result") {
console.log(chalk.green(` ✅ Tool Result:`));
if (message.content) {
// Show tool results but truncate if too long
const resultStr = typeof message.content === 'string' ? message.content : JSON.stringify(message.content, null, 2);
if (resultStr.length > 500) {
console.log(chalk.gray(` ${resultStr.slice(0, 500)}...\n [Result truncated - ${resultStr.length} total chars]`));
} else {
console.log(chalk.gray(` ${resultStr}`));
}
}
// Log tool end event
if (auditSession) {
await auditSession.logEvent('tool_end', {
result: message.content,
timestamp: new Date().toISOString()
});
}
} else if (message.type === "result") {
result = message.result;
if (!statusManager) {
if (useCleanOutput) {
// Clean completion output - just duration and cost
console.log(chalk.magenta(`\n 🏁 COMPLETED:`));
const cost = message.total_cost_usd || 0;
console.log(chalk.gray(` ⏱️ Duration: ${(message.duration_ms/1000).toFixed(1)}s, Cost: $${cost.toFixed(4)}`));
if (message.subtype === "error_max_turns") {
console.log(chalk.red(` ⚠️ Stopped: Hit maximum turns limit`));
} else if (message.subtype === "error_during_execution") {
console.log(chalk.red(` ❌ Stopped: Execution error`));
}
if (message.permission_denials && message.permission_denials.length > 0) {
console.log(chalk.yellow(` 🚫 ${message.permission_denials.length} permission denials`));
}
} else {
// Full completion output for agents without clean output
console.log(chalk.magenta(`\n 🏁 COMPLETED:`));
const cost = message.total_cost_usd || 0;
console.log(chalk.gray(` ⏱️ Duration: ${(message.duration_ms/1000).toFixed(1)}s, Cost: $${cost.toFixed(4)}`));
if (message.subtype === "error_max_turns") {
console.log(chalk.red(` ⚠️ Stopped: Hit maximum turns limit`));
} else if (message.subtype === "error_during_execution") {
console.log(chalk.red(` ❌ Stopped: Execution error`));
}
if (message.permission_denials && message.permission_denials.length > 0) {
console.log(chalk.yellow(` 🚫 ${message.permission_denials.length} permission denials`));
}
// Show result content (if it's reasonable length)
if (result && typeof result === 'string') {
if (result.length > 1000) {
console.log(chalk.magenta(` 📄 ${result.slice(0, 1000)}... [${result.length} total chars]`));
} else {
console.log(chalk.magenta(` 📄 ${result}`));
}
}
}
}
// Track cost for all agents
const cost = message.total_cost_usd || 0;
const agentKey = description.toLowerCase().replace(/\s+/g, '-');
costResults.agents[agentKey] = cost;
costResults.total += cost;
// Store cost for return value and partial tracking
totalCost = cost;
partialCost = cost;
break;
} else {
// Log any other message types we might not be handling
console.log(chalk.gray(` 💬 ${message.type}: ${JSON.stringify(message, null, 2)}`));
}
}
} catch (queryError) {
throw queryError; // Re-throw to outer catch
}
const duration = timer.stop();
const agentKey = description.toLowerCase().replace(/\s+/g, '-');
timingResults.agents[agentKey] = duration;
// API error detection is logged but not immediately failed
// Let the retry logic handle validation first
if (apiErrorDetected) {
console.log(chalk.yellow(` ⚠️ API Error detected in ${description} - will validate deliverables before failing`));
}
// Finish status line for parallel execution
if (statusManager) {
statusManager.clearAgentStatus(description);
statusManager.finishStatusLine();
}
// NOTE: Log writing now handled by AuditSession (crash-safe, append-only)
// Legacy log writing removed - audit system handles this automatically
// Show completion messages based on agent type
if (progressIndicator) {
// Single agents with progress indicator
const agentType = description.includes('Pre-recon') ? 'Pre-recon analysis' :
description.includes('Recon') ? 'Reconnaissance' :
description.includes('Report') ? 'Report generation' : 'Analysis';
progressIndicator.finish(`${agentType} complete! (${turnCount} turns, ${formatDuration(duration)})`);
} else if (isParallelExecution) {
// Compact completion for parallel agents
const prefix = getAgentPrefix(description);
console.log(chalk.green(`${prefix} ✅ Complete (${turnCount} turns, ${formatDuration(duration)})`));
} else if (!useCleanOutput) {
// Verbose completion for remaining agents
console.log(chalk.green(` ✅ Claude Code completed: ${description} (${turnCount} turns) in ${formatDuration(duration)}`));
}
// Return result with log file path for all agents
const returnData = {
result,
success: true,
duration,
turns: turnCount,
cost: totalCost,
partialCost, // Include partial cost for crash recovery
apiErrorDetected
};
if (logFilePath) {
returnData.logFile = logFilePath;
}
return returnData;
} catch (error) {
const duration = timer.stop();
const agentKey = description.toLowerCase().replace(/\s+/g, '-');
timingResults.agents[agentKey] = duration;
// Clear status for parallel execution before showing error
if (statusManager) {
statusManager.clearAgentStatus(description);
statusManager.finishStatusLine();
}
// Log error to audit system
if (auditSession) {
await auditSession.logEvent('error', {
message: error.message,
errorType: error.constructor.name,
stack: error.stack,
duration,
turns: turnCount,
timestamp: new Date().toISOString()
});
}
// Show error messages based on agent type
if (progressIndicator) {
// Single agents with progress indicator
progressIndicator.stop();
const agentType = description.includes('Pre-recon') ? 'Pre-recon analysis' :
description.includes('Recon') ? 'Reconnaissance' :
description.includes('Report') ? 'Report generation' : 'Analysis';
console.log(chalk.red(`${agentType} failed (${formatDuration(duration)})`));
} else if (isParallelExecution) {
// Compact error for parallel agents
const prefix = getAgentPrefix(description);
console.log(chalk.red(`${prefix} ❌ Failed (${formatDuration(duration)})`));
} else if (!useCleanOutput) {
// Verbose error for remaining agents
console.log(chalk.red(` ❌ Claude Code failed: ${description} (${formatDuration(duration)})`));
}
console.log(chalk.red(` Error Type: ${error.constructor.name}`));
console.log(chalk.red(` Message: ${error.message}`));
console.log(chalk.gray(` Agent: ${description}`));
console.log(chalk.gray(` Working Directory: ${sourceDir}`));
console.log(chalk.gray(` Retryable: ${isRetryableError(error) ? 'Yes' : 'No'}`));
// Log additional context if available
if (error.code) {
console.log(chalk.gray(` Error Code: ${error.code}`));
}
if (error.status) {
console.log(chalk.gray(` HTTP Status: ${error.status}`));
}
// Save detailed error to log file for debugging
try {
const errorLog = {
timestamp: new Date().toISOString(),
agent: description,
error: {
name: error.constructor.name,
message: error.message,
code: error.code,
status: error.status,
stack: error.stack
},
context: {
sourceDir,
prompt: fullPrompt.slice(0, 200) + '...',
retryable: isRetryableError(error)
},
duration
};
const logPath = path.join(sourceDir, 'error.log');
await fs.appendFile(logPath, JSON.stringify(errorLog) + '\n');
} catch (logError) {
// Ignore logging errors to avoid cascading failures
console.log(chalk.gray(` (Failed to write error log: ${logError.message})`));
}
return {
error: error.message,
errorType: error.constructor.name,
prompt: fullPrompt.slice(0, 100) + '...',
success: false,
duration,
cost: partialCost, // Include partial cost on error
retryable: isRetryableError(error)
};
}
}
// PREFERRED: Production-ready Claude agent execution with full orchestration
// This is the standard function for all agent execution. Provides:
// - Intelligent retry logic with exponential backoff
// - Output validation to ensure deliverables are created
// - Prompt snapshotting for debugging and reproducibility
// - Git checkpoint/rollback safety for workspace protection
// - Comprehensive error handling and logging
// - Crash-safe audit logging via AuditSession
export async function runClaudePromptWithRetry(prompt, sourceDir, allowedTools = 'Read', context = '', description = 'Claude analysis', agentName = null, colorFn = chalk.cyan, sessionMetadata = null) {
const maxRetries = 3;
let lastError;
let retryContext = context; // Preserve context between retries
console.log(chalk.cyan(`🚀 Starting ${description} with ${maxRetries} max attempts`));
// Initialize audit session (crash-safe logging)
let auditSession = null;
if (sessionMetadata && agentName) {
auditSession = new AuditSession(sessionMetadata);
await auditSession.initialize();
}
for (let attempt = 1; attempt <= maxRetries; attempt++) {
// Create checkpoint before each attempt
await createGitCheckpoint(sourceDir, description, attempt);
// Start agent tracking in audit system (saves prompt snapshot automatically)
if (auditSession) {
const fullPrompt = retryContext ? `${retryContext}\n\n${prompt}` : prompt;
await auditSession.startAgent(agentName, fullPrompt, attempt);
}
try {
const result = await runClaudePrompt(prompt, sourceDir, allowedTools, retryContext, description, agentName, colorFn, sessionMetadata, auditSession, attempt);
// Validate output after successful run
if (result.success) {
const validationPassed = await validateAgentOutput(result, agentName, sourceDir);
if (validationPassed) {
// Check if API error was detected but validation passed
if (result.apiErrorDetected) {
console.log(chalk.yellow(`📋 Validation: Ready for exploitation despite API error warnings`));
}
// Record successful attempt in audit system
if (auditSession) {
await auditSession.endAgent(agentName, {
attemptNumber: attempt,
duration_ms: result.duration,
cost_usd: result.cost || 0,
success: true,
checkpoint: await getGitCommitHash(sourceDir)
});
}
// Commit successful changes (will include the snapshot)
await commitGitSuccess(sourceDir, description);
console.log(chalk.green.bold(`🎉 ${description} completed successfully on attempt ${attempt}/${maxRetries}`));
return result;
} else {
// Agent completed but output validation failed
console.log(chalk.yellow(`⚠️ ${description} completed but output validation failed`));
// Record failed validation attempt in audit system
if (auditSession) {
await auditSession.endAgent(agentName, {
attemptNumber: attempt,
duration_ms: result.duration,
cost_usd: result.partialCost || result.cost || 0,
success: false,
error: 'Output validation failed',
isFinalAttempt: attempt === maxRetries
});
}
// If API error detected AND validation failed, this is a retryable error
if (result.apiErrorDetected) {
console.log(chalk.yellow(`⚠️ API Error detected with validation failure - treating as retryable`));
lastError = new Error('API Error: terminated with validation failure');
} else {
lastError = new Error('Output validation failed');
}
if (attempt < maxRetries) {
// Rollback contaminated workspace
await rollbackGitWorkspace(sourceDir, 'validation failure');
continue;
} else {
// FAIL FAST - Don't continue with broken pipeline
throw new PentestError(
`Agent ${description} failed output validation after ${maxRetries} attempts. Required deliverable files were not created.`,
'validation',
false,
{ description, sourceDir, attemptsExhausted: maxRetries }
);
}
}
}
} catch (error) {
lastError = error;
// Record failed attempt in audit system
if (auditSession) {
await auditSession.endAgent(agentName, {
attemptNumber: attempt,
duration_ms: error.duration || 0,
cost_usd: error.cost || 0,
success: false,
error: error.message,
isFinalAttempt: attempt === maxRetries
});
}
// Check if error is retryable
if (!isRetryableError(error)) {
console.log(chalk.red(`${description} failed with non-retryable error: ${error.message}`));
await rollbackGitWorkspace(sourceDir, 'non-retryable error cleanup');
throw error;
}
if (attempt < maxRetries) {
// Rollback for clean retry
await rollbackGitWorkspace(sourceDir, 'retryable error cleanup');
const delay = getRetryDelay(error, attempt);
const delaySeconds = (delay / 1000).toFixed(1);
console.log(chalk.yellow(`⚠️ ${description} failed (attempt ${attempt}/${maxRetries})`));
console.log(chalk.gray(` Error: ${error.message}`));
console.log(chalk.gray(` Workspace rolled back, retrying in ${delaySeconds}s...`));
// Preserve any partial results for next retry
if (error.partialResults) {
retryContext = `${context}\n\nPrevious partial results: ${JSON.stringify(error.partialResults)}`;
}
await new Promise(resolve => setTimeout(resolve, delay));
} else {
await rollbackGitWorkspace(sourceDir, 'final failure cleanup');
console.log(chalk.red(`${description} failed after ${maxRetries} attempts`));
console.log(chalk.red(` Final error: ${error.message}`));
}
}
}
throw lastError;
}
// Helper function to get git commit hash
async function getGitCommitHash(sourceDir) {
try {
const result = await $`cd ${sourceDir} && git rev-parse HEAD`;
return result.stdout.trim();
} catch (error) {
return null;
}
}