mirror of
https://github.com/KeygraphHQ/shannon.git
synced 2026-02-12 17:22:50 +00:00
674 lines
27 KiB
JavaScript
674 lines
27 KiB
JavaScript
import { $, fs, path } from 'zx';
|
||
import chalk from 'chalk';
|
||
import { query } from '@anthropic-ai/claude-agent-sdk';
|
||
import { fileURLToPath } from 'url';
|
||
import { dirname } from 'path';
|
||
|
||
import { isRetryableError, getRetryDelay, PentestError } from '../error-handling.js';
|
||
import { ProgressIndicator } from '../progress-indicator.js';
|
||
import { timingResults, costResults, Timer, formatDuration } from '../utils/metrics.js';
|
||
import { createGitCheckpoint, commitGitSuccess, rollbackGitWorkspace } from '../utils/git-manager.js';
|
||
import { AGENT_VALIDATORS, MCP_AGENT_MAPPING } from '../constants.js';
|
||
import { filterJsonToolCalls, getAgentPrefix } from '../utils/output-formatter.js';
|
||
import { generateSessionLogPath } from '../session-manager.js';
|
||
import { AuditSession } from '../audit/index.js';
|
||
import { createShannonHelperServer } from '../../mcp-server/src/index.js';
|
||
|
||
const __filename = fileURLToPath(import.meta.url);
|
||
const __dirname = dirname(__filename);
|
||
|
||
/**
|
||
* Convert agent name to prompt name for MCP_AGENT_MAPPING lookup
|
||
*
|
||
* @param {string} agentName - Agent name (e.g., 'xss-vuln', 'injection-exploit')
|
||
* @returns {string} Prompt name (e.g., 'vuln-xss', 'exploit-injection')
|
||
*/
|
||
function agentNameToPromptName(agentName) {
|
||
// Special cases
|
||
if (agentName === 'pre-recon') return 'pre-recon-code';
|
||
if (agentName === 'report') return 'report-executive';
|
||
if (agentName === 'recon') return 'recon';
|
||
|
||
// Pattern: {type}-vuln → vuln-{type}
|
||
const vulnMatch = agentName.match(/^(.+)-vuln$/);
|
||
if (vulnMatch) {
|
||
return `vuln-${vulnMatch[1]}`;
|
||
}
|
||
|
||
// Pattern: {type}-exploit → exploit-{type}
|
||
const exploitMatch = agentName.match(/^(.+)-exploit$/);
|
||
if (exploitMatch) {
|
||
return `exploit-${exploitMatch[1]}`;
|
||
}
|
||
|
||
// Default: return as-is
|
||
return agentName;
|
||
}
|
||
|
||
// Simplified validation using direct agent name mapping
|
||
async function validateAgentOutput(result, agentName, sourceDir) {
|
||
console.log(chalk.blue(` 🔍 Validating ${agentName} agent output`));
|
||
|
||
try {
|
||
// Check if agent completed successfully
|
||
if (!result.success || !result.result) {
|
||
console.log(chalk.red(` ❌ Validation failed: Agent execution was unsuccessful`));
|
||
return false;
|
||
}
|
||
|
||
// Get validator function for this agent
|
||
const validator = AGENT_VALIDATORS[agentName];
|
||
|
||
if (!validator) {
|
||
console.log(chalk.yellow(` ⚠️ No validator found for agent "${agentName}" - assuming success`));
|
||
console.log(chalk.green(` ✅ Validation passed: Unknown agent with successful result`));
|
||
return true;
|
||
}
|
||
|
||
console.log(chalk.blue(` 📋 Using validator for agent: ${agentName}`));
|
||
console.log(chalk.blue(` 📂 Source directory: ${sourceDir}`));
|
||
|
||
// Apply validation function
|
||
const validationResult = await validator(sourceDir);
|
||
|
||
if (validationResult) {
|
||
console.log(chalk.green(` ✅ Validation passed: Required files/structure present`));
|
||
} else {
|
||
console.log(chalk.red(` ❌ Validation failed: Missing required deliverable files`));
|
||
}
|
||
|
||
return validationResult;
|
||
|
||
} catch (error) {
|
||
console.log(chalk.red(` ❌ Validation failed with error: ${error.message}`));
|
||
return false; // Assume invalid on validation error
|
||
}
|
||
}
|
||
|
||
// Pure function: Run Claude Code with SDK - Maximum Autonomy
|
||
// WARNING: This is a low-level function. Use runClaudePromptWithRetry() for agent execution to ensure:
|
||
// - Retry logic and error handling
|
||
// - Output validation
|
||
// - Prompt snapshotting for debugging
|
||
// - Git checkpoint/rollback safety
|
||
async function runClaudePrompt(prompt, sourceDir, allowedTools = 'Read', context = '', description = 'Claude analysis', agentName = null, colorFn = chalk.cyan, sessionMetadata = null, auditSession = null, attemptNumber = 1) {
|
||
const timer = new Timer(`agent-${description.toLowerCase().replace(/\s+/g, '-')}`);
|
||
const fullPrompt = context ? `${context}\n\n${prompt}` : prompt;
|
||
let totalCost = 0;
|
||
let partialCost = 0; // Track partial cost for crash safety
|
||
|
||
// Auto-detect execution mode to adjust logging behavior
|
||
const isParallelExecution = description.includes('vuln agent') || description.includes('exploit agent');
|
||
const useCleanOutput = description.includes('Pre-recon agent') ||
|
||
description.includes('Recon agent') ||
|
||
description.includes('Executive Summary and Report Cleanup') ||
|
||
description.includes('vuln agent') ||
|
||
description.includes('exploit agent');
|
||
|
||
// Disable status manager - using simple JSON filtering for all agents now
|
||
const statusManager = null;
|
||
|
||
// Setup progress indicator for clean output agents
|
||
let progressIndicator = null;
|
||
if (useCleanOutput) {
|
||
const agentType = description.includes('Pre-recon') ? 'pre-reconnaissance' :
|
||
description.includes('Recon') ? 'reconnaissance' :
|
||
description.includes('Report') ? 'report generation' : 'analysis';
|
||
progressIndicator = new ProgressIndicator(`Running ${agentType}...`);
|
||
}
|
||
|
||
// NOTE: Logging now handled by AuditSession (append-only, crash-safe)
|
||
// Legacy log path generation kept for compatibility
|
||
let logFilePath = null;
|
||
if (sessionMetadata && sessionMetadata.webUrl && sessionMetadata.id) {
|
||
const timestamp = new Date().toISOString().replace(/T/, '_').replace(/[:.]/g, '-').slice(0, 19);
|
||
const agentName = description.toLowerCase().replace(/\s+/g, '-');
|
||
const logDir = generateSessionLogPath(sessionMetadata.webUrl, sessionMetadata.id);
|
||
logFilePath = path.join(logDir, `${timestamp}_${agentName}_attempt-${attemptNumber}.log`);
|
||
} else {
|
||
console.log(chalk.blue(` 🤖 Running Claude Code: ${description}...`));
|
||
}
|
||
|
||
// Declare variables that need to be accessible in both try and catch blocks
|
||
let turnCount = 0;
|
||
|
||
try {
|
||
// Create MCP server with target directory context
|
||
const shannonHelperServer = createShannonHelperServer(sourceDir);
|
||
|
||
// Look up agent's assigned Playwright MCP server
|
||
// Convert agent name (e.g., 'xss-vuln') to prompt name (e.g., 'vuln-xss')
|
||
let playwrightMcpName = null;
|
||
if (agentName) {
|
||
const promptName = agentNameToPromptName(agentName);
|
||
playwrightMcpName = MCP_AGENT_MAPPING[promptName];
|
||
|
||
if (playwrightMcpName) {
|
||
console.log(chalk.gray(` 🎭 Assigned ${agentName} → ${playwrightMcpName}`));
|
||
}
|
||
}
|
||
|
||
// Configure MCP servers: shannon-helper (SDK) + playwright-agentN (stdio)
|
||
const mcpServers = {
|
||
'shannon-helper': shannonHelperServer,
|
||
};
|
||
|
||
// Add Playwright MCP server if this agent needs browser automation
|
||
if (playwrightMcpName) {
|
||
const userDataDir = `/tmp/${playwrightMcpName}`;
|
||
mcpServers[playwrightMcpName] = {
|
||
type: 'stdio',
|
||
command: 'npx',
|
||
args: ['@playwright/mcp@latest', '--isolated', '--user-data-dir', userDataDir],
|
||
env: {
|
||
...process.env,
|
||
PLAYWRIGHT_HEADLESS: 'true', // Ensure headless mode for security and CI compatibility
|
||
},
|
||
};
|
||
}
|
||
|
||
const options = {
|
||
model: 'claude-sonnet-4-5-20250929', // Use latest Claude 4.5 Sonnet
|
||
maxTurns: 10_000, // Maximum turns for autonomous work
|
||
cwd: sourceDir, // Set working directory using SDK option
|
||
permissionMode: 'bypassPermissions', // Bypass all permission checks for pentesting
|
||
mcpServers,
|
||
};
|
||
|
||
// SDK Options only shown for verbose agents (not clean output)
|
||
if (!useCleanOutput) {
|
||
console.log(chalk.gray(` SDK Options: maxTurns=${options.maxTurns}, cwd=${sourceDir}, permissions=BYPASS`));
|
||
}
|
||
|
||
let result = null;
|
||
let messages = [];
|
||
let apiErrorDetected = false;
|
||
|
||
// Start progress indicator for clean output agents
|
||
if (progressIndicator) {
|
||
progressIndicator.start();
|
||
}
|
||
|
||
|
||
let messageCount = 0;
|
||
try {
|
||
for await (const message of query({ prompt: fullPrompt, options })) {
|
||
messageCount++;
|
||
|
||
if (message.type === "assistant") {
|
||
turnCount++;
|
||
|
||
const content = Array.isArray(message.message.content)
|
||
? message.message.content.map(c => c.text || JSON.stringify(c)).join('\n')
|
||
: message.message.content;
|
||
|
||
if (statusManager) {
|
||
// Smart status updates for parallel execution
|
||
const toolUse = statusManager.parseToolUse(content);
|
||
statusManager.updateAgentStatus(description, {
|
||
tool_use: toolUse,
|
||
assistant_text: content,
|
||
turnCount
|
||
});
|
||
} else if (useCleanOutput) {
|
||
// Clean output for all agents: filter JSON tool calls but show meaningful text
|
||
const cleanedContent = filterJsonToolCalls(content);
|
||
if (cleanedContent.trim()) {
|
||
// Temporarily stop progress indicator to show output
|
||
if (progressIndicator) {
|
||
progressIndicator.stop();
|
||
}
|
||
|
||
if (isParallelExecution) {
|
||
// Compact output for parallel agents with prefixes
|
||
const prefix = getAgentPrefix(description);
|
||
console.log(colorFn(`${prefix} ${cleanedContent}`));
|
||
} else {
|
||
// Full turn output for single agents
|
||
console.log(colorFn(`\n 🤖 Turn ${turnCount} (${description}):`))
|
||
console.log(colorFn(` ${cleanedContent}`));
|
||
}
|
||
|
||
// Restart progress indicator after output
|
||
if (progressIndicator) {
|
||
progressIndicator.start();
|
||
}
|
||
}
|
||
} else {
|
||
// Full streaming output - show complete messages with specialist color
|
||
console.log(colorFn(`\n 🤖 Turn ${turnCount} (${description}):`))
|
||
console.log(colorFn(` ${content}`));
|
||
}
|
||
|
||
// Log to audit system (crash-safe, append-only)
|
||
if (auditSession) {
|
||
await auditSession.logEvent('llm_response', {
|
||
turn: turnCount,
|
||
content,
|
||
timestamp: new Date().toISOString()
|
||
});
|
||
}
|
||
|
||
messages.push(content);
|
||
|
||
// Check for API error patterns in assistant message content
|
||
if (content && typeof content === 'string') {
|
||
const lowerContent = content.toLowerCase();
|
||
if (lowerContent.includes('api error') || lowerContent.includes('terminated')) {
|
||
apiErrorDetected = true;
|
||
console.log(chalk.red(` ⚠️ API Error detected in assistant response: ${content.trim()}`));
|
||
}
|
||
}
|
||
|
||
} else if (message.type === "system" && message.subtype === "init") {
|
||
// Show useful system info only for verbose agents
|
||
if (!useCleanOutput) {
|
||
console.log(chalk.blue(` ℹ️ Model: ${message.model}, Permission: ${message.permissionMode}`));
|
||
if (message.mcp_servers && message.mcp_servers.length > 0) {
|
||
const mcpStatus = message.mcp_servers.map(s => `${s.name}(${s.status})`).join(', ');
|
||
console.log(chalk.blue(` 📦 MCP: ${mcpStatus}`));
|
||
}
|
||
}
|
||
|
||
} else if (message.type === "user") {
|
||
// Skip user messages (these are our own inputs echoed back)
|
||
continue;
|
||
|
||
} else if (message.type === "tool_use") {
|
||
console.log(chalk.yellow(`\n 🔧 Using Tool: ${message.name}`));
|
||
if (message.input && Object.keys(message.input).length > 0) {
|
||
console.log(chalk.gray(` Input: ${JSON.stringify(message.input, null, 2)}`));
|
||
}
|
||
|
||
// Log tool start event
|
||
if (auditSession) {
|
||
await auditSession.logEvent('tool_start', {
|
||
toolName: message.name,
|
||
parameters: message.input,
|
||
timestamp: new Date().toISOString()
|
||
});
|
||
}
|
||
} else if (message.type === "tool_result") {
|
||
console.log(chalk.green(` ✅ Tool Result:`));
|
||
if (message.content) {
|
||
// Show tool results but truncate if too long
|
||
const resultStr = typeof message.content === 'string' ? message.content : JSON.stringify(message.content, null, 2);
|
||
if (resultStr.length > 500) {
|
||
console.log(chalk.gray(` ${resultStr.slice(0, 500)}...\n [Result truncated - ${resultStr.length} total chars]`));
|
||
} else {
|
||
console.log(chalk.gray(` ${resultStr}`));
|
||
}
|
||
}
|
||
|
||
// Log tool end event
|
||
if (auditSession) {
|
||
await auditSession.logEvent('tool_end', {
|
||
result: message.content,
|
||
timestamp: new Date().toISOString()
|
||
});
|
||
}
|
||
} else if (message.type === "result") {
|
||
result = message.result;
|
||
|
||
if (!statusManager) {
|
||
if (useCleanOutput) {
|
||
// Clean completion output - just duration and cost
|
||
console.log(chalk.magenta(`\n 🏁 COMPLETED:`));
|
||
const cost = message.total_cost_usd || 0;
|
||
console.log(chalk.gray(` ⏱️ Duration: ${(message.duration_ms/1000).toFixed(1)}s, Cost: $${cost.toFixed(4)}`));
|
||
|
||
if (message.subtype === "error_max_turns") {
|
||
console.log(chalk.red(` ⚠️ Stopped: Hit maximum turns limit`));
|
||
} else if (message.subtype === "error_during_execution") {
|
||
console.log(chalk.red(` ❌ Stopped: Execution error`));
|
||
}
|
||
|
||
if (message.permission_denials && message.permission_denials.length > 0) {
|
||
console.log(chalk.yellow(` 🚫 ${message.permission_denials.length} permission denials`));
|
||
}
|
||
} else {
|
||
// Full completion output for agents without clean output
|
||
console.log(chalk.magenta(`\n 🏁 COMPLETED:`));
|
||
const cost = message.total_cost_usd || 0;
|
||
console.log(chalk.gray(` ⏱️ Duration: ${(message.duration_ms/1000).toFixed(1)}s, Cost: $${cost.toFixed(4)}`));
|
||
|
||
if (message.subtype === "error_max_turns") {
|
||
console.log(chalk.red(` ⚠️ Stopped: Hit maximum turns limit`));
|
||
} else if (message.subtype === "error_during_execution") {
|
||
console.log(chalk.red(` ❌ Stopped: Execution error`));
|
||
}
|
||
|
||
if (message.permission_denials && message.permission_denials.length > 0) {
|
||
console.log(chalk.yellow(` 🚫 ${message.permission_denials.length} permission denials`));
|
||
}
|
||
|
||
// Show result content (if it's reasonable length)
|
||
if (result && typeof result === 'string') {
|
||
if (result.length > 1000) {
|
||
console.log(chalk.magenta(` 📄 ${result.slice(0, 1000)}... [${result.length} total chars]`));
|
||
} else {
|
||
console.log(chalk.magenta(` 📄 ${result}`));
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
// Track cost for all agents
|
||
const cost = message.total_cost_usd || 0;
|
||
const agentKey = description.toLowerCase().replace(/\s+/g, '-');
|
||
costResults.agents[agentKey] = cost;
|
||
costResults.total += cost;
|
||
|
||
// Store cost for return value and partial tracking
|
||
totalCost = cost;
|
||
partialCost = cost;
|
||
break;
|
||
} else {
|
||
// Log any other message types we might not be handling
|
||
console.log(chalk.gray(` 💬 ${message.type}: ${JSON.stringify(message, null, 2)}`));
|
||
}
|
||
}
|
||
} catch (queryError) {
|
||
throw queryError; // Re-throw to outer catch
|
||
}
|
||
|
||
const duration = timer.stop();
|
||
const agentKey = description.toLowerCase().replace(/\s+/g, '-');
|
||
timingResults.agents[agentKey] = duration;
|
||
|
||
// API error detection is logged but not immediately failed
|
||
// Let the retry logic handle validation first
|
||
if (apiErrorDetected) {
|
||
console.log(chalk.yellow(` ⚠️ API Error detected in ${description} - will validate deliverables before failing`));
|
||
}
|
||
|
||
// Finish status line for parallel execution
|
||
if (statusManager) {
|
||
statusManager.clearAgentStatus(description);
|
||
statusManager.finishStatusLine();
|
||
}
|
||
|
||
// NOTE: Log writing now handled by AuditSession (crash-safe, append-only)
|
||
// Legacy log writing removed - audit system handles this automatically
|
||
|
||
// Show completion messages based on agent type
|
||
if (progressIndicator) {
|
||
// Single agents with progress indicator
|
||
const agentType = description.includes('Pre-recon') ? 'Pre-recon analysis' :
|
||
description.includes('Recon') ? 'Reconnaissance' :
|
||
description.includes('Report') ? 'Report generation' : 'Analysis';
|
||
progressIndicator.finish(`${agentType} complete! (${turnCount} turns, ${formatDuration(duration)})`);
|
||
} else if (isParallelExecution) {
|
||
// Compact completion for parallel agents
|
||
const prefix = getAgentPrefix(description);
|
||
console.log(chalk.green(`${prefix} ✅ Complete (${turnCount} turns, ${formatDuration(duration)})`));
|
||
} else if (!useCleanOutput) {
|
||
// Verbose completion for remaining agents
|
||
console.log(chalk.green(` ✅ Claude Code completed: ${description} (${turnCount} turns) in ${formatDuration(duration)}`));
|
||
}
|
||
|
||
// Return result with log file path for all agents
|
||
const returnData = {
|
||
result,
|
||
success: true,
|
||
duration,
|
||
turns: turnCount,
|
||
cost: totalCost,
|
||
partialCost, // Include partial cost for crash recovery
|
||
apiErrorDetected
|
||
};
|
||
if (logFilePath) {
|
||
returnData.logFile = logFilePath;
|
||
}
|
||
return returnData;
|
||
|
||
} catch (error) {
|
||
const duration = timer.stop();
|
||
const agentKey = description.toLowerCase().replace(/\s+/g, '-');
|
||
timingResults.agents[agentKey] = duration;
|
||
|
||
// Clear status for parallel execution before showing error
|
||
if (statusManager) {
|
||
statusManager.clearAgentStatus(description);
|
||
statusManager.finishStatusLine();
|
||
}
|
||
|
||
// Log error to audit system
|
||
if (auditSession) {
|
||
await auditSession.logEvent('error', {
|
||
message: error.message,
|
||
errorType: error.constructor.name,
|
||
stack: error.stack,
|
||
duration,
|
||
turns: turnCount,
|
||
timestamp: new Date().toISOString()
|
||
});
|
||
}
|
||
|
||
// Show error messages based on agent type
|
||
if (progressIndicator) {
|
||
// Single agents with progress indicator
|
||
progressIndicator.stop();
|
||
const agentType = description.includes('Pre-recon') ? 'Pre-recon analysis' :
|
||
description.includes('Recon') ? 'Reconnaissance' :
|
||
description.includes('Report') ? 'Report generation' : 'Analysis';
|
||
console.log(chalk.red(`❌ ${agentType} failed (${formatDuration(duration)})`));
|
||
} else if (isParallelExecution) {
|
||
// Compact error for parallel agents
|
||
const prefix = getAgentPrefix(description);
|
||
console.log(chalk.red(`${prefix} ❌ Failed (${formatDuration(duration)})`));
|
||
} else if (!useCleanOutput) {
|
||
// Verbose error for remaining agents
|
||
console.log(chalk.red(` ❌ Claude Code failed: ${description} (${formatDuration(duration)})`));
|
||
}
|
||
console.log(chalk.red(` Error Type: ${error.constructor.name}`));
|
||
console.log(chalk.red(` Message: ${error.message}`));
|
||
console.log(chalk.gray(` Agent: ${description}`));
|
||
console.log(chalk.gray(` Working Directory: ${sourceDir}`));
|
||
console.log(chalk.gray(` Retryable: ${isRetryableError(error) ? 'Yes' : 'No'}`));
|
||
|
||
// Log additional context if available
|
||
if (error.code) {
|
||
console.log(chalk.gray(` Error Code: ${error.code}`));
|
||
}
|
||
if (error.status) {
|
||
console.log(chalk.gray(` HTTP Status: ${error.status}`));
|
||
}
|
||
|
||
// Save detailed error to log file for debugging
|
||
try {
|
||
const errorLog = {
|
||
timestamp: new Date().toISOString(),
|
||
agent: description,
|
||
error: {
|
||
name: error.constructor.name,
|
||
message: error.message,
|
||
code: error.code,
|
||
status: error.status,
|
||
stack: error.stack
|
||
},
|
||
context: {
|
||
sourceDir,
|
||
prompt: fullPrompt.slice(0, 200) + '...',
|
||
retryable: isRetryableError(error)
|
||
},
|
||
duration
|
||
};
|
||
|
||
const logPath = path.join(sourceDir, 'error.log');
|
||
await fs.appendFile(logPath, JSON.stringify(errorLog) + '\n');
|
||
} catch (logError) {
|
||
// Ignore logging errors to avoid cascading failures
|
||
console.log(chalk.gray(` (Failed to write error log: ${logError.message})`));
|
||
}
|
||
|
||
return {
|
||
error: error.message,
|
||
errorType: error.constructor.name,
|
||
prompt: fullPrompt.slice(0, 100) + '...',
|
||
success: false,
|
||
duration,
|
||
cost: partialCost, // Include partial cost on error
|
||
retryable: isRetryableError(error)
|
||
};
|
||
}
|
||
}
|
||
|
||
// PREFERRED: Production-ready Claude agent execution with full orchestration
|
||
// This is the standard function for all agent execution. Provides:
|
||
// - Intelligent retry logic with exponential backoff
|
||
// - Output validation to ensure deliverables are created
|
||
// - Prompt snapshotting for debugging and reproducibility
|
||
// - Git checkpoint/rollback safety for workspace protection
|
||
// - Comprehensive error handling and logging
|
||
// - Crash-safe audit logging via AuditSession
|
||
export async function runClaudePromptWithRetry(prompt, sourceDir, allowedTools = 'Read', context = '', description = 'Claude analysis', agentName = null, colorFn = chalk.cyan, sessionMetadata = null) {
|
||
const maxRetries = 3;
|
||
let lastError;
|
||
let retryContext = context; // Preserve context between retries
|
||
|
||
console.log(chalk.cyan(`🚀 Starting ${description} with ${maxRetries} max attempts`));
|
||
|
||
// Initialize audit session (crash-safe logging)
|
||
let auditSession = null;
|
||
if (sessionMetadata && agentName) {
|
||
auditSession = new AuditSession(sessionMetadata);
|
||
await auditSession.initialize();
|
||
}
|
||
|
||
for (let attempt = 1; attempt <= maxRetries; attempt++) {
|
||
// Create checkpoint before each attempt
|
||
await createGitCheckpoint(sourceDir, description, attempt);
|
||
|
||
// Start agent tracking in audit system (saves prompt snapshot automatically)
|
||
if (auditSession) {
|
||
const fullPrompt = retryContext ? `${retryContext}\n\n${prompt}` : prompt;
|
||
await auditSession.startAgent(agentName, fullPrompt, attempt);
|
||
}
|
||
|
||
try {
|
||
const result = await runClaudePrompt(prompt, sourceDir, allowedTools, retryContext, description, agentName, colorFn, sessionMetadata, auditSession, attempt);
|
||
|
||
// Validate output after successful run
|
||
if (result.success) {
|
||
const validationPassed = await validateAgentOutput(result, agentName, sourceDir);
|
||
|
||
if (validationPassed) {
|
||
// Check if API error was detected but validation passed
|
||
if (result.apiErrorDetected) {
|
||
console.log(chalk.yellow(`📋 Validation: Ready for exploitation despite API error warnings`));
|
||
}
|
||
|
||
// Record successful attempt in audit system
|
||
if (auditSession) {
|
||
await auditSession.endAgent(agentName, {
|
||
attemptNumber: attempt,
|
||
duration_ms: result.duration,
|
||
cost_usd: result.cost || 0,
|
||
success: true,
|
||
checkpoint: await getGitCommitHash(sourceDir)
|
||
});
|
||
}
|
||
|
||
// Commit successful changes (will include the snapshot)
|
||
await commitGitSuccess(sourceDir, description);
|
||
console.log(chalk.green.bold(`🎉 ${description} completed successfully on attempt ${attempt}/${maxRetries}`));
|
||
return result;
|
||
} else {
|
||
// Agent completed but output validation failed
|
||
console.log(chalk.yellow(`⚠️ ${description} completed but output validation failed`));
|
||
|
||
// Record failed validation attempt in audit system
|
||
if (auditSession) {
|
||
await auditSession.endAgent(agentName, {
|
||
attemptNumber: attempt,
|
||
duration_ms: result.duration,
|
||
cost_usd: result.partialCost || result.cost || 0,
|
||
success: false,
|
||
error: 'Output validation failed',
|
||
isFinalAttempt: attempt === maxRetries
|
||
});
|
||
}
|
||
|
||
// If API error detected AND validation failed, this is a retryable error
|
||
if (result.apiErrorDetected) {
|
||
console.log(chalk.yellow(`⚠️ API Error detected with validation failure - treating as retryable`));
|
||
lastError = new Error('API Error: terminated with validation failure');
|
||
} else {
|
||
lastError = new Error('Output validation failed');
|
||
}
|
||
|
||
if (attempt < maxRetries) {
|
||
// Rollback contaminated workspace
|
||
await rollbackGitWorkspace(sourceDir, 'validation failure');
|
||
continue;
|
||
} else {
|
||
// FAIL FAST - Don't continue with broken pipeline
|
||
throw new PentestError(
|
||
`Agent ${description} failed output validation after ${maxRetries} attempts. Required deliverable files were not created.`,
|
||
'validation',
|
||
false,
|
||
{ description, sourceDir, attemptsExhausted: maxRetries }
|
||
);
|
||
}
|
||
}
|
||
}
|
||
|
||
} catch (error) {
|
||
lastError = error;
|
||
|
||
// Record failed attempt in audit system
|
||
if (auditSession) {
|
||
await auditSession.endAgent(agentName, {
|
||
attemptNumber: attempt,
|
||
duration_ms: error.duration || 0,
|
||
cost_usd: error.cost || 0,
|
||
success: false,
|
||
error: error.message,
|
||
isFinalAttempt: attempt === maxRetries
|
||
});
|
||
}
|
||
|
||
// Check if error is retryable
|
||
if (!isRetryableError(error)) {
|
||
console.log(chalk.red(`❌ ${description} failed with non-retryable error: ${error.message}`));
|
||
await rollbackGitWorkspace(sourceDir, 'non-retryable error cleanup');
|
||
throw error;
|
||
}
|
||
|
||
if (attempt < maxRetries) {
|
||
// Rollback for clean retry
|
||
await rollbackGitWorkspace(sourceDir, 'retryable error cleanup');
|
||
|
||
const delay = getRetryDelay(error, attempt);
|
||
const delaySeconds = (delay / 1000).toFixed(1);
|
||
console.log(chalk.yellow(`⚠️ ${description} failed (attempt ${attempt}/${maxRetries})`));
|
||
console.log(chalk.gray(` Error: ${error.message}`));
|
||
console.log(chalk.gray(` Workspace rolled back, retrying in ${delaySeconds}s...`));
|
||
|
||
// Preserve any partial results for next retry
|
||
if (error.partialResults) {
|
||
retryContext = `${context}\n\nPrevious partial results: ${JSON.stringify(error.partialResults)}`;
|
||
}
|
||
|
||
await new Promise(resolve => setTimeout(resolve, delay));
|
||
} else {
|
||
await rollbackGitWorkspace(sourceDir, 'final failure cleanup');
|
||
console.log(chalk.red(`❌ ${description} failed after ${maxRetries} attempts`));
|
||
console.log(chalk.red(` Final error: ${error.message}`));
|
||
}
|
||
}
|
||
}
|
||
|
||
throw lastError;
|
||
}
|
||
|
||
// Helper function to get git commit hash
|
||
async function getGitCommitHash(sourceDir) {
|
||
try {
|
||
const result = await $`cd ${sourceDir} && git rev-parse HEAD`;
|
||
return result.stdout.trim();
|
||
} catch (error) {
|
||
return null;
|
||
}
|
||
} |