From d3816a29faef5811d4735f3478cdac3cdd2e4bc3 Mon Sep 17 00:00:00 2001 From: ajmallesh Date: Mon, 16 Feb 2026 16:12:21 -0800 Subject: [PATCH] refactor: extract services layer, Result type, and ErrorCode classification MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add DI container (src/services/) with AgentExecutionService, ConfigLoaderService, and ExploitationCheckerService — pure domain logic with no Temporal dependencies - Introduce Result type and ErrorCode enum for code-based error classification in classifyErrorForTemporal, replacing scattered string matching - Consolidate billing/spending cap detection into utils/billing-detection.ts with shared pattern lists across message-handlers, claude-executor, and error-handling - Extract LogStream abstraction for append-only logging with backpressure, used by both AgentLogger and WorkflowLogger - Simplify activities.ts from inline lifecycle logic to thin wrappers delegating to services, with heartbeat and error classification - Expand config-parser with human-readable AJV errors, security validation, and rule type-specific checks --- CLAUDE.md | 1 + src/ai/claude-executor.ts | 28 +- src/ai/message-handlers.ts | 28 +- src/audit/audit-session.ts | 26 +- src/audit/log-stream.ts | 127 ++++++++ src/audit/logger.ts | 69 +--- src/audit/metrics-tracker.ts | 18 +- src/audit/workflow-logger.ts | 113 +++---- src/config-parser.ts | 348 ++++++++++++++++---- src/constants.ts | 5 +- src/error-handling.ts | 98 ++++-- src/phases/reporting.ts | 9 +- src/queue-validation.ts | 29 +- src/services/agent-execution.ts | 278 ++++++++++++++++ src/services/config-loader.ts | 75 +++++ src/services/container.ts | 117 +++++++ src/services/exploitation-checker.ts | 74 +++++ src/services/index.ts | 20 ++ src/session-manager.ts | 55 +++- src/temporal/activities.ts | 465 ++++++++------------------- src/temporal/shared.ts | 13 +- src/temporal/summary-mapper.ts | 45 +++ src/temporal/workflows.ts | 28 +- src/types/agents.ts | 65 +--- src/types/config.ts | 1 - src/types/errors.ts | 33 ++ src/types/index.ts | 2 + src/types/metrics.ts | 19 ++ src/types/result.ts | 62 ++++ src/utils/billing-detection.ts | 95 ++++++ src/utils/git-manager.ts | 25 +- 31 files changed, 1664 insertions(+), 707 deletions(-) create mode 100644 src/audit/log-stream.ts create mode 100644 src/services/agent-execution.ts create mode 100644 src/services/config-loader.ts create mode 100644 src/services/container.ts create mode 100644 src/services/exploitation-checker.ts create mode 100644 src/services/index.ts create mode 100644 src/temporal/summary-mapper.ts create mode 100644 src/types/metrics.ts create mode 100644 src/types/result.ts create mode 100644 src/utils/billing-detection.ts diff --git a/CLAUDE.md b/CLAUDE.md index 70555c0..4bb7c11 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -117,6 +117,7 @@ Defensive security tool only. Use only on systems you own or have explicit permi - Dense callback chains when sequential logic is clearer - Sacrificing readability for DRY — some repetition is fine if clearer - Abstractions for one-time operations +- Backwards-compatibility shims, deprecated wrappers, or re-exports for removed code — delete the old code, don't preserve it ## Key Files diff --git a/src/ai/claude-executor.ts b/src/ai/claude-executor.ts index a2321d9..1b62da0 100644 --- a/src/ai/claude-executor.ts +++ b/src/ai/claude-executor.ts @@ -11,12 +11,13 @@ import chalk, { type ChalkInstance } from 'chalk'; import { query } from '@anthropic-ai/claude-agent-sdk'; import { isRetryableError, PentestError } from '../error-handling.js'; +import { isSpendingCapBehavior } from '../utils/billing-detection.js'; import { timingResults, Timer } from '../utils/metrics.js'; import { formatTimestamp } from '../utils/formatting.js'; import { AGENT_VALIDATORS, MCP_AGENT_MAPPING } from '../constants.js'; import { AuditSession } from '../audit/index.js'; import { createShannonHelperServer } from '../../mcp-server/dist/index.js'; -import { getPromptNameForAgent } from '../types/agents.js'; +import { AGENTS } from '../session-manager.js'; import type { AgentName } from '../types/index.js'; import { dispatchMessage } from './message-handlers.js'; @@ -65,8 +66,8 @@ function buildMcpServers( }; if (agentName) { - const promptName = getPromptNameForAgent(agentName as AgentName); - const playwrightMcpName = MCP_AGENT_MAPPING[promptName as keyof typeof MCP_AGENT_MAPPING] || null; + const promptTemplate = AGENTS[agentName as AgentName].promptTemplate; + const playwrightMcpName = MCP_AGENT_MAPPING[promptTemplate as keyof typeof MCP_AGENT_MAPPING] || null; if (playwrightMcpName) { console.log(chalk.gray(` Assigned ${agentName} -> ${playwrightMcpName}`)); @@ -263,22 +264,13 @@ export async function runClaudePrompt( // === SPENDING CAP SAFEGUARD === // Defense-in-depth: Detect spending cap that slipped through detectApiError(). - // When spending cap is hit, Claude returns a short message with $0 cost. - // Legitimate agent work NEVER costs $0 with only 1-2 turns. - if (turnCount <= 2 && totalCost === 0) { - const resultLower = (result || '').toLowerCase(); - const BILLING_KEYWORDS = ['spending', 'cap', 'limit', 'budget', 'resets']; - const looksLikeBillingError = BILLING_KEYWORDS.some((kw) => - resultLower.includes(kw) + // Uses consolidated billing detection from utils/billing-detection.ts + if (isSpendingCapBehavior(turnCount, totalCost, result || '')) { + throw new PentestError( + `Spending cap likely reached (turns=${turnCount}, cost=$0): ${result?.slice(0, 100)}`, + 'billing', + true // Retryable - Temporal will use 5-30 min backoff ); - - if (looksLikeBillingError) { - throw new PentestError( - `Spending cap likely reached (turns=${turnCount}, cost=$0): ${result?.slice(0, 100)}`, - 'billing', - true // Retryable - Temporal will use 5-30 min backoff - ); - } } const duration = timer.stop(); diff --git a/src/ai/message-handlers.ts b/src/ai/message-handlers.ts index f5113d7..c791be8 100644 --- a/src/ai/message-handlers.ts +++ b/src/ai/message-handlers.ts @@ -7,6 +7,8 @@ // Pure functions for processing SDK message types import { PentestError } from '../error-handling.js'; +import { ErrorCode } from '../types/errors.js'; +import { matchesBillingTextPattern } from '../utils/billing-detection.js'; import { filterJsonToolCalls } from '../utils/output-formatter.js'; import { formatTimestamp } from '../utils/formatting.js'; import chalk from 'chalk'; @@ -75,25 +77,15 @@ function detectApiError(content: string): ApiErrorDetection { // When Claude Code hits its spending cap, it returns a short message like // "Spending cap reached resets 8am" instead of throwing an error. // These should retry with 5-30 min backoff so workflows can recover when cap resets. - const BILLING_PATTERNS = [ - 'spending cap', - 'spending limit', - 'cap reached', - 'budget exceeded', - 'usage limit', - ]; - - const isBillingError = BILLING_PATTERNS.some((pattern) => - lowerContent.includes(pattern) - ); - - if (isBillingError) { + if (matchesBillingTextPattern(content)) { return { detected: true, shouldThrow: new PentestError( `Billing limit reached: ${content.slice(0, 100)}`, 'billing', - true // RETRYABLE - Temporal will use 5-30 min backoff + true, // RETRYABLE - Temporal will use 5-30 min backoff + {}, + ErrorCode.SPENDING_CAP_REACHED ), }; } @@ -127,7 +119,9 @@ function handleStructuredError( shouldThrow: new PentestError( `Billing error (structured): ${content.slice(0, 100)}`, 'billing', - true // Retryable with backoff + true, // Retryable with backoff + {}, + ErrorCode.INSUFFICIENT_CREDITS ), }; case 'rate_limit': @@ -136,7 +130,9 @@ function handleStructuredError( shouldThrow: new PentestError( `Rate limit hit (structured): ${content.slice(0, 100)}`, 'network', - true // Retryable with backoff + true, // Retryable with backoff + {}, + ErrorCode.API_RATE_LIMITED ), }; case 'authentication_failed': diff --git a/src/audit/audit-session.ts b/src/audit/audit-session.ts index 7e8cc6a..26cade4 100644 --- a/src/audit/audit-session.ts +++ b/src/audit/audit-session.ts @@ -18,6 +18,8 @@ import { initializeAuditStructure, type SessionMetadata } from './utils.js'; import { formatTimestamp } from '../utils/formatting.js'; import { SessionMutex } from '../utils/concurrency.js'; import type { AgentEndResult } from '../types/index.js'; +import { PentestError } from '../error-handling.js'; +import { ErrorCode } from '../types/errors.js'; // Global mutex instance const sessionMutex = new SessionMutex(); @@ -40,10 +42,22 @@ export class AuditSession { // Validate required fields if (!this.sessionId) { - throw new Error('sessionMetadata.id is required'); + throw new PentestError( + 'sessionMetadata.id is required', + 'config', + false, + { field: 'sessionMetadata.id' }, + ErrorCode.CONFIG_VALIDATION_FAILED + ); } if (!this.sessionMetadata.webUrl) { - throw new Error('sessionMetadata.webUrl is required'); + throw new PentestError( + 'sessionMetadata.webUrl is required', + 'config', + false, + { field: 'sessionMetadata.webUrl' }, + ErrorCode.CONFIG_VALIDATION_FAILED + ); } // Components @@ -124,7 +138,13 @@ export class AuditSession { */ async logEvent(eventType: string, eventData: unknown): Promise { if (!this.currentLogger) { - throw new Error('No active logger. Call startAgent() first.'); + throw new PentestError( + 'No active logger. Call startAgent() first.', + 'validation', + false, + {}, + ErrorCode.AGENT_EXECUTION_FAILED + ); } // Log to agent-specific log file (JSON format) diff --git a/src/audit/log-stream.ts b/src/audit/log-stream.ts new file mode 100644 index 0000000..b97bef5 --- /dev/null +++ b/src/audit/log-stream.ts @@ -0,0 +1,127 @@ +// Copyright (C) 2025 Keygraph, Inc. +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License version 3 +// as published by the Free Software Foundation. + +/** + * LogStream - Stream composition utility for append-only logging + * + * Encapsulates the common stream management pattern used by AgentLogger + * and WorkflowLogger: opening streams in append mode, handling backpressure, + * and proper cleanup. + */ + +import fs from 'fs'; +import path from 'path'; +import { ensureDirectory } from '../utils/file-io.js'; + +/** + * LogStream - Manages a single append-only log file stream + */ +export class LogStream { + private readonly filePath: string; + private stream: fs.WriteStream | null = null; + private _isOpen: boolean = false; + + constructor(filePath: string) { + this.filePath = filePath; + } + + /** + * Open the stream for writing (creates parent directories, opens in append mode) + */ + async open(): Promise { + if (this._isOpen) { + return; + } + + // Ensure parent directory exists + await ensureDirectory(path.dirname(this.filePath)); + + // Create write stream in append mode + this.stream = fs.createWriteStream(this.filePath, { + flags: 'a', + encoding: 'utf8', + autoClose: true, + }); + + // Handle stream errors to prevent crashes (log and mark closed) + this.stream.on('error', (err) => { + console.error(`LogStream error for ${this.filePath}:`, err.message); + this._isOpen = false; + }); + + this._isOpen = true; + } + + /** + * Write text to the stream with backpressure handling + */ + async write(text: string): Promise { + return new Promise((resolve, reject) => { + if (!this._isOpen || !this.stream) { + reject(new Error('LogStream not open')); + return; + } + + const stream = this.stream; + let drainHandler: (() => void) | null = null; + + const cleanup = () => { + if (drainHandler) { + stream.removeListener('drain', drainHandler); + drainHandler = null; + } + }; + + const needsDrain = !stream.write(text, 'utf8', (error) => { + cleanup(); + if (error) { + reject(error); + } else if (!needsDrain) { + resolve(); + } + }); + + if (needsDrain) { + drainHandler = () => { + cleanup(); + resolve(); + }; + stream.once('drain', drainHandler); + } + }); + } + + /** + * Close the stream (flush and close) + */ + async close(): Promise { + if (!this._isOpen || !this.stream) { + return; + } + + return new Promise((resolve) => { + this.stream!.end(() => { + this._isOpen = false; + this.stream = null; + resolve(); + }); + }); + } + + /** + * Check if the stream is currently open + */ + get isOpen(): boolean { + return this._isOpen; + } + + /** + * Get the file path this stream writes to + */ + get path(): string { + return this.filePath; + } +} diff --git a/src/audit/logger.ts b/src/audit/logger.ts index c8e902d..89b2347 100644 --- a/src/audit/logger.ts +++ b/src/audit/logger.ts @@ -8,10 +8,9 @@ * Append-Only Agent Logger * * Provides crash-safe, append-only logging for agent execution. - * Uses file streams with immediate flush to prevent data loss. + * Uses LogStream for stream management with backpressure handling. */ -import fs from 'fs'; import { generateLogPath, generatePromptPath, @@ -19,6 +18,7 @@ import { } from './utils.js'; import { atomicWrite } from '../utils/file-io.js'; import { formatTimestamp } from '../utils/formatting.js'; +import { LogStream } from './log-stream.js'; interface LogEvent { type: string; @@ -30,13 +30,11 @@ interface LogEvent { * AgentLogger - Manages append-only logging for a single agent execution */ export class AgentLogger { - private sessionMetadata: SessionMetadata; - private agentName: string; - private attemptNumber: number; - private timestamp: number; - private logPath: string; - private stream: fs.WriteStream | null = null; - private isOpen: boolean = false; + private readonly sessionMetadata: SessionMetadata; + private readonly agentName: string; + private readonly attemptNumber: number; + private readonly timestamp: number; + private readonly logStream: LogStream; constructor(sessionMetadata: SessionMetadata, agentName: string, attemptNumber: number) { this.sessionMetadata = sessionMetadata; @@ -44,26 +42,20 @@ export class AgentLogger { this.attemptNumber = attemptNumber; this.timestamp = Date.now(); - // Generate log file path - this.logPath = generateLogPath(sessionMetadata, agentName, this.timestamp, attemptNumber); + // Generate log file path and create stream + const logPath = generateLogPath(sessionMetadata, agentName, this.timestamp, attemptNumber); + this.logStream = new LogStream(logPath); } /** * Initialize the log stream (creates file and opens stream) */ async initialize(): Promise { - if (this.isOpen) { + if (this.logStream.isOpen) { return; // Already initialized } - // Create write stream with append mode and auto-flush - this.stream = fs.createWriteStream(this.logPath, { - flags: 'a', // Append mode - encoding: 'utf8', - autoClose: true, - }); - - this.isOpen = true; + await this.logStream.open(); // Write header await this.writeHeader(); @@ -83,29 +75,7 @@ export class AgentLogger { `========================================\n`, ].join('\n'); - return this.writeRaw(header); - } - - /** - * Write raw text to log file with immediate flush - */ - private writeRaw(text: string): Promise { - return new Promise((resolve, reject) => { - if (!this.isOpen || !this.stream) { - reject(new Error('Logger not initialized')); - return; - } - - const needsDrain = !this.stream.write(text, 'utf8', (error) => { - if (error) reject(error); - }); - - if (needsDrain) { - this.stream.once('drain', resolve); - } else { - resolve(); - } - }); + return this.logStream.write(header); } /** @@ -120,23 +90,14 @@ export class AgentLogger { }; const eventLine = `${JSON.stringify(event)}\n`; - return this.writeRaw(eventLine); + return this.logStream.write(eventLine); } /** * Close the log stream */ async close(): Promise { - if (!this.isOpen || !this.stream) { - return; - } - - return new Promise((resolve) => { - this.stream!.end(() => { - this.isOpen = false; - resolve(); - }); - }); + return this.logStream.close(); } /** diff --git a/src/audit/metrics-tracker.ts b/src/audit/metrics-tracker.ts index 89cdf2b..97bfe6a 100644 --- a/src/audit/metrics-tracker.ts +++ b/src/audit/metrics-tracker.ts @@ -18,6 +18,8 @@ import { import { atomicWrite, readJson, fileExists } from '../utils/file-io.js'; import { formatTimestamp, calculatePercentage } from '../utils/formatting.js'; import { AGENT_PHASE_MAP, type PhaseName } from '../session-manager.js'; +import { PentestError } from '../error-handling.js'; +import { ErrorCode } from '../types/errors.js'; import type { AgentName, AgentEndResult } from '../types/index.js'; interface AttemptData { @@ -159,7 +161,13 @@ export class MetricsTracker { */ async endAgent(agentName: string, result: AgentEndResult): Promise { if (!this.data) { - throw new Error('MetricsTracker not initialized'); + throw new PentestError( + 'MetricsTracker not initialized', + 'validation', + false, + {}, + ErrorCode.AGENT_EXECUTION_FAILED + ); } // Initialize agent metrics if not exists @@ -251,7 +259,13 @@ export class MetricsTracker { checkpointHash?: string ): Promise { if (!this.data) { - throw new Error('MetricsTracker not initialized'); + throw new PentestError( + 'MetricsTracker not initialized', + 'validation', + false, + {}, + ErrorCode.AGENT_EXECUTION_FAILED + ); } // Ensure originalWorkflowId is set (backfill if missing from old sessions) diff --git a/src/audit/workflow-logger.ts b/src/audit/workflow-logger.ts index d64ff4f..5276bb0 100644 --- a/src/audit/workflow-logger.ts +++ b/src/audit/workflow-logger.ts @@ -11,10 +11,10 @@ * Optimized for `tail -f` viewing during concurrent workflow execution. */ -import fs from 'fs'; -import path from 'path'; -import { generateWorkflowLogPath, ensureDirectory, type SessionMetadata } from './utils.js'; +import fs from 'fs/promises'; +import { generateWorkflowLogPath, type SessionMetadata } from './utils.js'; import { formatDuration, formatTimestamp } from '../utils/formatting.js'; +import { LogStream } from './log-stream.js'; export interface AgentLogDetails { attemptNumber?: number; @@ -42,38 +42,28 @@ export interface WorkflowSummary { * WorkflowLogger - Manages the unified workflow log file */ export class WorkflowLogger { - private sessionMetadata: SessionMetadata; - private logPath: string; - private stream: fs.WriteStream | null = null; - private initialized: boolean = false; + private readonly sessionMetadata: SessionMetadata; + private readonly logStream: LogStream; constructor(sessionMetadata: SessionMetadata) { this.sessionMetadata = sessionMetadata; - this.logPath = generateWorkflowLogPath(sessionMetadata); + const logPath = generateWorkflowLogPath(sessionMetadata); + this.logStream = new LogStream(logPath); } /** * Initialize the log stream (creates file and writes header) */ async initialize(): Promise { - if (this.initialized) { + if (this.logStream.isOpen) { return; } - // Ensure directory exists - await ensureDirectory(path.dirname(this.logPath)); - - // Create write stream with append mode - this.stream = fs.createWriteStream(this.logPath, { - flags: 'a', - encoding: 'utf8', - autoClose: true, - }); - - this.initialized = true; + // Open the stream (LogStream.open() handles directory creation) + await this.logStream.open(); // Write header only if file is new (empty) - const stats = await fs.promises.stat(this.logPath).catch(() => null); + const stats = await fs.stat(this.logStream.path).catch(() => null); if (!stats || stats.size === 0) { await this.writeHeader(); } @@ -94,29 +84,7 @@ export class WorkflowLogger { ``, ].join('\n'); - return this.writeRaw(header); - } - - /** - * Write raw text to log file with immediate flush - */ - private writeRaw(text: string): Promise { - return new Promise((resolve, reject) => { - if (!this.initialized || !this.stream) { - reject(new Error('WorkflowLogger not initialized')); - return; - } - - const needsDrain = !this.stream.write(text, 'utf8', (error) => { - if (error) reject(error); - }); - - if (needsDrain) { - this.stream.once('drain', resolve); - } else { - resolve(); - } - }); + return this.logStream.write(header); } /** @@ -138,10 +106,10 @@ export class WorkflowLogger { // Add blank line before phase start for readability if (event === 'start') { - await this.writeRaw('\n'); + await this.logStream.write('\n'); } - await this.writeRaw(line); + await this.logStream.write(line); } /** @@ -184,7 +152,7 @@ export class WorkflowLogger { } const line = `[${this.formatLogTime()}] [AGENT] ${message}\n`; - await this.writeRaw(line); + await this.logStream.write(line); } /** @@ -194,7 +162,7 @@ export class WorkflowLogger { await this.ensureInitialized(); const line = `[${this.formatLogTime()}] [${eventType.toUpperCase()}] ${message}\n`; - await this.writeRaw(line); + await this.logStream.write(line); } /** @@ -205,7 +173,7 @@ export class WorkflowLogger { const contextStr = context ? ` (${context})` : ''; const line = `[${this.formatLogTime()}] [ERROR] ${error.message}${contextStr}\n`; - await this.writeRaw(line); + await this.logStream.write(line); } /** @@ -301,7 +269,7 @@ export class WorkflowLogger { const params = this.formatToolParams(toolName, parameters); const paramStr = params ? `: ${params}` : ''; const line = `[${this.formatLogTime()}] [${agentName}] [TOOL] ${toolName}${paramStr}\n`; - await this.writeRaw(line); + await this.logStream.write(line); } /** @@ -313,7 +281,7 @@ export class WorkflowLogger { // Show full content, replacing newlines with escaped version for single-line output const escaped = content.replace(/\n/g, '\\n'); const line = `[${this.formatLogTime()}] [${agentName}] [LLM] Turn ${turn}: ${escaped}\n`; - await this.writeRaw(line); + await this.logStream.write(line); } /** @@ -324,42 +292,42 @@ export class WorkflowLogger { const status = summary.status === 'completed' ? 'COMPLETED' : 'FAILED'; - await this.writeRaw('\n'); - await this.writeRaw(`================================================================================\n`); - await this.writeRaw(`Workflow ${status}\n`); - await this.writeRaw(`────────────────────────────────────────\n`); - await this.writeRaw(`Workflow ID: ${this.sessionMetadata.id}\n`); - await this.writeRaw(`Status: ${summary.status}\n`); - await this.writeRaw(`Duration: ${formatDuration(summary.totalDurationMs)}\n`); - await this.writeRaw(`Total Cost: $${summary.totalCostUsd.toFixed(4)}\n`); - await this.writeRaw(`Agents: ${summary.completedAgents.length} completed\n`); + await this.logStream.write('\n'); + await this.logStream.write(`================================================================================\n`); + await this.logStream.write(`Workflow ${status}\n`); + await this.logStream.write(`────────────────────────────────────────\n`); + await this.logStream.write(`Workflow ID: ${this.sessionMetadata.id}\n`); + await this.logStream.write(`Status: ${summary.status}\n`); + await this.logStream.write(`Duration: ${formatDuration(summary.totalDurationMs)}\n`); + await this.logStream.write(`Total Cost: $${summary.totalCostUsd.toFixed(4)}\n`); + await this.logStream.write(`Agents: ${summary.completedAgents.length} completed\n`); if (summary.error) { - await this.writeRaw(`Error: ${summary.error}\n`); + await this.logStream.write(`Error: ${summary.error}\n`); } - await this.writeRaw(`\n`); - await this.writeRaw(`Agent Breakdown:\n`); + await this.logStream.write(`\n`); + await this.logStream.write(`Agent Breakdown:\n`); for (const agentName of summary.completedAgents) { const metrics = summary.agentMetrics[agentName]; if (metrics) { const duration = formatDuration(metrics.durationMs); const cost = metrics.costUsd !== null ? `$${metrics.costUsd.toFixed(4)}` : 'N/A'; - await this.writeRaw(` - ${agentName} (${duration}, ${cost})\n`); + await this.logStream.write(` - ${agentName} (${duration}, ${cost})\n`); } else { - await this.writeRaw(` - ${agentName}\n`); + await this.logStream.write(` - ${agentName}\n`); } } - await this.writeRaw(`================================================================================\n`); + await this.logStream.write(`================================================================================\n`); } /** * Ensure initialized (helper for lazy initialization) */ private async ensureInitialized(): Promise { - if (!this.initialized) { + if (!this.logStream.isOpen) { await this.initialize(); } } @@ -368,15 +336,6 @@ export class WorkflowLogger { * Close the log stream */ async close(): Promise { - if (!this.initialized || !this.stream) { - return; - } - - return new Promise((resolve) => { - this.stream!.end(() => { - this.initialized = false; - resolve(); - }); - }); + return this.logStream.close(); } } diff --git a/src/config-parser.ts b/src/config-parser.ts index 0c56072..f3ee9d4 100644 --- a/src/config-parser.ts +++ b/src/config-parser.ts @@ -7,9 +7,10 @@ import { createRequire } from 'module'; import { fs } from 'zx'; import yaml from 'js-yaml'; -import { Ajv, type ValidateFunction } from 'ajv'; +import { Ajv, type ValidateFunction, type ErrorObject } from 'ajv'; import type { FormatsPlugin } from 'ajv-formats'; import { PentestError } from './error-handling.js'; +import { ErrorCode } from './types/errors.js'; import type { Config, Rule, @@ -53,20 +54,155 @@ const DANGEROUS_PATTERNS: RegExp[] = [ /file:/i, // File URLs ]; +/** + * Format a single AJV error into a human-readable message. + * Translates AJV error keywords into plain English descriptions. + */ +function formatAjvError(error: ErrorObject): string { + const path = error.instancePath || 'root'; + const params = error.params as Record; + + switch (error.keyword) { + case 'required': { + const missingProperty = params.missingProperty as string; + return `Missing required field: "${missingProperty}" at ${path || 'root'}`; + } + + case 'type': { + const expectedType = params.type as string; + return `Invalid type at ${path}: expected ${expectedType}`; + } + + case 'enum': { + const allowedValues = params.allowedValues as unknown[]; + const formattedValues = allowedValues.map((v) => `"${v}"`).join(', '); + return `Invalid value at ${path}: must be one of [${formattedValues}]`; + } + + case 'additionalProperties': { + const additionalProperty = params.additionalProperty as string; + return `Unknown field at ${path}: "${additionalProperty}" is not allowed`; + } + + case 'minLength': { + const limit = params.limit as number; + return `Value at ${path} is too short: must have at least ${limit} character(s)`; + } + + case 'maxLength': { + const limit = params.limit as number; + return `Value at ${path} is too long: must have at most ${limit} character(s)`; + } + + case 'minimum': { + const limit = params.limit as number; + return `Value at ${path} is too small: must be >= ${limit}`; + } + + case 'maximum': { + const limit = params.limit as number; + return `Value at ${path} is too large: must be <= ${limit}`; + } + + case 'minItems': { + const limit = params.limit as number; + return `Array at ${path} has too few items: must have at least ${limit} item(s)`; + } + + case 'maxItems': { + const limit = params.limit as number; + return `Array at ${path} has too many items: must have at most ${limit} item(s)`; + } + + case 'pattern': { + const pattern = params.pattern as string; + return `Value at ${path} does not match required pattern: ${pattern}`; + } + + case 'format': { + const format = params.format as string; + return `Value at ${path} must be a valid ${format}`; + } + + case 'const': { + const allowedValue = params.allowedValue as unknown; + return `Value at ${path} must be exactly "${allowedValue}"`; + } + + case 'oneOf': { + return `Value at ${path} must match exactly one schema (matched ${params.passingSchemas ?? 0})`; + } + + case 'anyOf': { + return `Value at ${path} must match at least one of the allowed schemas`; + } + + case 'not': { + return `Value at ${path} matches a schema it should not match`; + } + + case 'if': { + return `Value at ${path} does not satisfy conditional schema requirements`; + } + + case 'uniqueItems': { + const i = params.i as number; + const j = params.j as number; + return `Array at ${path} contains duplicate items at positions ${j} and ${i}`; + } + + case 'propertyNames': { + const propertyName = params.propertyName as string; + return `Invalid property name at ${path}: "${propertyName}" does not match naming requirements`; + } + + case 'dependencies': + case 'dependentRequired': { + const property = params.property as string; + const missingProperty = params.missingProperty as string; + return `Missing dependent field at ${path}: "${missingProperty}" is required when "${property}" is present`; + } + + default: { + // Fallback for any unhandled keywords - use AJV's message if available + const message = error.message || `validation failed for keyword "${error.keyword}"`; + return `${path}: ${message}`; + } + } +} + +/** + * Format all AJV errors into a list of human-readable messages. + * Returns an array of formatted error strings. + */ +function formatAjvErrors(errors: ErrorObject[]): string[] { + return errors.map(formatAjvError); +} + // Parse and load YAML configuration file with enhanced safety export const parseConfig = async (configPath: string): Promise => { try { // File existence check if (!(await fs.pathExists(configPath))) { - throw new Error(`Configuration file not found: ${configPath}`); + throw new PentestError( + `Configuration file not found: ${configPath}`, + 'config', + false, + { configPath }, + ErrorCode.CONFIG_NOT_FOUND + ); } // File size check (prevent extremely large files) const stats = await fs.stat(configPath); const maxFileSize = 1024 * 1024; // 1MB if (stats.size > maxFileSize) { - throw new Error( - `Configuration file too large: ${stats.size} bytes (maximum: ${maxFileSize} bytes)` + throw new PentestError( + `Configuration file too large: ${stats.size} bytes (maximum: ${maxFileSize} bytes)`, + 'config', + false, + { configPath, fileSize: stats.size, maxFileSize }, + ErrorCode.CONFIG_VALIDATION_FAILED ); } @@ -75,7 +211,13 @@ export const parseConfig = async (configPath: string): Promise => { // Basic content validation if (!configContent.trim()) { - throw new Error('Configuration file is empty'); + throw new PentestError( + 'Configuration file is empty', + 'config', + false, + { configPath }, + ErrorCode.CONFIG_VALIDATION_FAILED + ); } // Parse YAML with safety options @@ -88,12 +230,24 @@ export const parseConfig = async (configPath: string): Promise => { }); } catch (yamlError) { const errMsg = yamlError instanceof Error ? yamlError.message : String(yamlError); - throw new Error(`YAML parsing failed: ${errMsg}`); + throw new PentestError( + `YAML parsing failed: ${errMsg}`, + 'config', + false, + { configPath, originalError: errMsg }, + ErrorCode.CONFIG_PARSE_ERROR + ); } // Additional safety check if (config === null || config === undefined) { - throw new Error('Configuration file resulted in null/undefined after parsing'); + throw new PentestError( + 'Configuration file resulted in null/undefined after parsing', + 'config', + false, + { configPath }, + ErrorCode.CONFIG_PARSE_ERROR + ); } // Validate the configuration structure and content @@ -101,20 +255,19 @@ export const parseConfig = async (configPath: string): Promise => { return config as Config; } catch (error) { - const errMsg = error instanceof Error ? error.message : String(error); - // Enhance error message with context - if ( - errMsg.startsWith('Configuration file not found') || - errMsg.startsWith('YAML parsing failed') || - errMsg.includes('must be') || - errMsg.includes('exceeds maximum') - ) { - // These are already well-formatted errors, re-throw as-is + // PentestError instances are already well-formatted, re-throw as-is + if (error instanceof PentestError) { throw error; - } else { - // Wrap other errors with context - throw new Error(`Failed to parse configuration file '${configPath}': ${errMsg}`); } + // Wrap other errors with context + const errMsg = error instanceof Error ? error.message : String(error); + throw new PentestError( + `Failed to parse configuration file '${configPath}': ${errMsg}`, + 'config', + false, + { configPath, originalError: errMsg }, + ErrorCode.CONFIG_PARSE_ERROR + ); } }; @@ -122,32 +275,42 @@ export const parseConfig = async (configPath: string): Promise => { const validateConfig = (config: Config): void => { // Basic structure validation if (!config || typeof config !== 'object') { - throw new Error('Configuration must be a valid object'); + throw new PentestError( + 'Configuration must be a valid object', + 'config', + false, + {}, + ErrorCode.CONFIG_VALIDATION_FAILED + ); } if (Array.isArray(config)) { - throw new Error('Configuration must be an object, not an array'); + throw new PentestError( + 'Configuration must be an object, not an array', + 'config', + false, + {}, + ErrorCode.CONFIG_VALIDATION_FAILED + ); } // JSON Schema validation const isValid = validateSchema(config); if (!isValid) { const errors = validateSchema.errors || []; - const errorMessages = errors.map((err) => { - const path = err.instancePath || 'root'; - return `${path}: ${err.message}`; - }); - throw new Error(`Configuration validation failed:\n - ${errorMessages.join('\n - ')}`); + const errorMessages = formatAjvErrors(errors); + throw new PentestError( + `Configuration validation failed:\n - ${errorMessages.join('\n - ')}`, + 'config', + false, + { validationErrors: errorMessages }, + ErrorCode.CONFIG_VALIDATION_FAILED + ); } // Additional security validation performSecurityValidation(config); - // Warn if deprecated fields are used - if (config.login) { - console.warn('⚠️ The "login" section is deprecated. Please use "authentication" instead.'); - } - // Ensure at least some configuration is provided if (!config.rules && !config.authentication) { console.warn( @@ -166,17 +329,40 @@ const performSecurityValidation = (config: Config): void => { if (config.authentication) { const auth = config.authentication; + // Check login_url for dangerous patterns (AJV's "uri" format allows javascript: per RFC 3986) + if (auth.login_url) { + for (const pattern of DANGEROUS_PATTERNS) { + if (pattern.test(auth.login_url)) { + throw new PentestError( + `authentication.login_url contains potentially dangerous pattern: ${pattern.source}`, + 'config', + false, + { field: 'login_url', pattern: pattern.source }, + ErrorCode.CONFIG_VALIDATION_FAILED + ); + } + } + } + // Check for dangerous patterns in credentials if (auth.credentials) { for (const pattern of DANGEROUS_PATTERNS) { if (pattern.test(auth.credentials.username)) { - throw new Error( - 'authentication.credentials.username contains potentially dangerous pattern' + throw new PentestError( + `authentication.credentials.username contains potentially dangerous pattern: ${pattern.source}`, + 'config', + false, + { field: 'credentials.username', pattern: pattern.source }, + ErrorCode.CONFIG_VALIDATION_FAILED ); } if (pattern.test(auth.credentials.password)) { - throw new Error( - 'authentication.credentials.password contains potentially dangerous pattern' + throw new PentestError( + `authentication.credentials.password contains potentially dangerous pattern: ${pattern.source}`, + 'config', + false, + { field: 'credentials.password', pattern: pattern.source }, + ErrorCode.CONFIG_VALIDATION_FAILED ); } } @@ -187,8 +373,12 @@ const performSecurityValidation = (config: Config): void => { auth.login_flow.forEach((step, index) => { for (const pattern of DANGEROUS_PATTERNS) { if (pattern.test(step)) { - throw new Error( - `authentication.login_flow[${index}] contains potentially dangerous pattern: ${pattern.source}` + throw new PentestError( + `authentication.login_flow[${index}] contains potentially dangerous pattern: ${pattern.source}`, + 'config', + false, + { field: `login_flow[${index}]`, pattern: pattern.source }, + ErrorCode.CONFIG_VALIDATION_FAILED ); } } @@ -216,13 +406,21 @@ const validateRulesSecurity = (rules: Rule[] | undefined, ruleType: string): voi // Security validation for (const pattern of DANGEROUS_PATTERNS) { if (pattern.test(rule.url_path)) { - throw new Error( - `rules.${ruleType}[${index}].url_path contains potentially dangerous pattern: ${pattern.source}` + throw new PentestError( + `rules.${ruleType}[${index}].url_path contains potentially dangerous pattern: ${pattern.source}`, + 'config', + false, + { field: `rules.${ruleType}[${index}].url_path`, pattern: pattern.source }, + ErrorCode.CONFIG_VALIDATION_FAILED ); } if (pattern.test(rule.description)) { - throw new Error( - `rules.${ruleType}[${index}].description contains potentially dangerous pattern: ${pattern.source}` + throw new PentestError( + `rules.${ruleType}[${index}].description contains potentially dangerous pattern: ${pattern.source}`, + 'config', + false, + { field: `rules.${ruleType}[${index}].description`, pattern: pattern.source }, + ErrorCode.CONFIG_VALIDATION_FAILED ); } } @@ -234,10 +432,18 @@ const validateRulesSecurity = (rules: Rule[] | undefined, ruleType: string): voi // Validate rule based on its specific type const validateRuleTypeSpecific = (rule: Rule, ruleType: string, index: number): void => { + const field = `rules.${ruleType}[${index}].url_path`; + switch (rule.type) { case 'path': if (!rule.url_path.startsWith('/')) { - throw new Error(`rules.${ruleType}[${index}].url_path for type 'path' must start with '/'`); + throw new PentestError( + `${field} for type 'path' must start with '/'`, + 'config', + false, + { field, ruleType: rule.type }, + ErrorCode.CONFIG_VALIDATION_FAILED + ); } break; @@ -245,14 +451,22 @@ const validateRuleTypeSpecific = (rule: Rule, ruleType: string, index: number): case 'domain': // Basic domain validation - no slashes allowed if (rule.url_path.includes('/')) { - throw new Error( - `rules.${ruleType}[${index}].url_path for type '${rule.type}' cannot contain '/' characters` + throw new PentestError( + `${field} for type '${rule.type}' cannot contain '/' characters`, + 'config', + false, + { field, ruleType: rule.type }, + ErrorCode.CONFIG_VALIDATION_FAILED ); } // Must contain at least one dot for domains if (rule.type === 'domain' && !rule.url_path.includes('.')) { - throw new Error( - `rules.${ruleType}[${index}].url_path for type 'domain' must be a valid domain name` + throw new PentestError( + `${field} for type 'domain' must be a valid domain name`, + 'config', + false, + { field, ruleType: rule.type }, + ErrorCode.CONFIG_VALIDATION_FAILED ); } break; @@ -260,8 +474,12 @@ const validateRuleTypeSpecific = (rule: Rule, ruleType: string, index: number): case 'method': { const allowedMethods = ['GET', 'POST', 'PUT', 'DELETE', 'PATCH', 'HEAD', 'OPTIONS']; if (!allowedMethods.includes(rule.url_path.toUpperCase())) { - throw new Error( - `rules.${ruleType}[${index}].url_path for type 'method' must be one of: ${allowedMethods.join(', ')}` + throw new PentestError( + `${field} for type 'method' must be one of: ${allowedMethods.join(', ')}`, + 'config', + false, + { field, ruleType: rule.type, allowedMethods }, + ErrorCode.CONFIG_VALIDATION_FAILED ); } break; @@ -270,8 +488,12 @@ const validateRuleTypeSpecific = (rule: Rule, ruleType: string, index: number): case 'header': // Header name validation (basic) if (!rule.url_path.match(/^[a-zA-Z0-9\-_]+$/)) { - throw new Error( - `rules.${ruleType}[${index}].url_path for type 'header' must be a valid header name (alphanumeric, hyphens, underscores only)` + throw new PentestError( + `${field} for type 'header' must be a valid header name (alphanumeric, hyphens, underscores only)`, + 'config', + false, + { field, ruleType: rule.type }, + ErrorCode.CONFIG_VALIDATION_FAILED ); } break; @@ -279,8 +501,12 @@ const validateRuleTypeSpecific = (rule: Rule, ruleType: string, index: number): case 'parameter': // Parameter name validation (basic) if (!rule.url_path.match(/^[a-zA-Z0-9\-_]+$/)) { - throw new Error( - `rules.${ruleType}[${index}].url_path for type 'parameter' must be a valid parameter name (alphanumeric, hyphens, underscores only)` + throw new PentestError( + `${field} for type 'parameter' must be a valid parameter name (alphanumeric, hyphens, underscores only)`, + 'config', + false, + { field, ruleType: rule.type }, + ErrorCode.CONFIG_VALIDATION_FAILED ); } break; @@ -293,8 +519,12 @@ const checkForDuplicates = (rules: Rule[], ruleType: string): void => { rules.forEach((rule, index) => { const key = `${rule.type}:${rule.url_path}`; if (seen.has(key)) { - throw new Error( - `Duplicate rule found in rules.${ruleType}[${index}]: ${rule.type} '${rule.url_path}'` + throw new PentestError( + `Duplicate rule found in rules.${ruleType}[${index}]: ${rule.type} '${rule.url_path}'`, + 'config', + false, + { field: `rules.${ruleType}[${index}]`, ruleType: rule.type, urlPath: rule.url_path }, + ErrorCode.CONFIG_VALIDATION_FAILED ); } seen.add(key); @@ -308,8 +538,12 @@ const checkForConflicts = (avoidRules: Rule[] = [], focusRules: Rule[] = []): vo focusRules.forEach((rule, index) => { const key = `${rule.type}:${rule.url_path}`; if (avoidSet.has(key)) { - throw new Error( - `Conflicting rule found: rules.focus[${index}] '${rule.url_path}' also exists in rules.avoid` + throw new PentestError( + `Conflicting rule found: rules.focus[${index}] '${rule.url_path}' also exists in rules.avoid`, + 'config', + false, + { field: `rules.focus[${index}]`, urlPath: rule.url_path }, + ErrorCode.CONFIG_VALIDATION_FAILED ); } }); @@ -347,7 +581,7 @@ const sanitizeAuthentication = (auth: Authentication): Authentication => { password: auth.credentials.password, ...(auth.credentials.totp_secret && { totp_secret: auth.credentials.totp_secret.trim() }), }, - login_flow: auth.login_flow.map((step) => step.trim()), + ...(auth.login_flow && { login_flow: auth.login_flow.map((step) => step.trim()) }), success_condition: { type: auth.success_condition.type.toLowerCase().trim() as Authentication['success_condition']['type'], value: auth.success_condition.value.trim(), diff --git a/src/constants.ts b/src/constants.ts index 4db8e9f..e69af39 100644 --- a/src/constants.ts +++ b/src/constants.ts @@ -7,7 +7,7 @@ import { path, fs } from 'zx'; import chalk from 'chalk'; import { validateQueueAndDeliverable, type VulnType } from './queue-validation.js'; -import type { AgentName, PromptName, PlaywrightAgent, AgentValidator } from './types/agents.js'; +import type { AgentName, PlaywrightAgent, AgentValidator } from './types/agents.js'; // Factory function for vulnerability queue validators function createVulnValidator(vulnType: VulnType): AgentValidator { @@ -32,7 +32,8 @@ function createExploitValidator(vulnType: VulnType): AgentValidator { } // MCP agent mapping - assigns each agent to a specific Playwright instance to prevent conflicts -export const MCP_AGENT_MAPPING: Record = Object.freeze({ +// Keys are promptTemplate values from AGENTS registry (session-manager.ts) +export const MCP_AGENT_MAPPING: Record = Object.freeze({ // Phase 1: Pre-reconnaissance (actual prompt name is 'pre-recon-code') // NOTE: Pre-recon is pure code analysis and doesn't use browser automation, // but assigning MCP server anyway for consistency and future extensibility diff --git a/src/error-handling.ts b/src/error-handling.ts index bb8b4c5..88fa501 100644 --- a/src/error-handling.ts +++ b/src/error-handling.ts @@ -4,11 +4,16 @@ // it under the terms of the GNU Affero General Public License version 3 // as published by the Free Software Foundation. -import type { - PentestErrorType, - PentestErrorContext, - PromptErrorResult, +import { + ErrorCode, + type PentestErrorType, + type PentestErrorContext, + type PromptErrorResult, } from './types/errors.js'; +import { + matchesBillingApiPattern, + matchesBillingTextPattern, +} from './utils/billing-detection.js'; // Custom error class for pentest operations export class PentestError extends Error { @@ -17,18 +22,24 @@ export class PentestError extends Error { retryable: boolean; context: PentestErrorContext; timestamp: string; + /** Optional specific error code for reliable classification */ + code?: ErrorCode; constructor( message: string, type: PentestErrorType, retryable: boolean = false, - context: PentestErrorContext = {} + context: PentestErrorContext = {}, + code?: ErrorCode ) { super(message); this.type = type; this.retryable = retryable; this.context = context; this.timestamp = new Date().toISOString(); + if (code !== undefined) { + this.code = code; + } } } @@ -102,6 +113,53 @@ export function isRetryableError(error: Error): boolean { return RETRYABLE_PATTERNS.some((pattern) => message.includes(pattern)); } +/** + * Classifies errors by ErrorCode for reliable, code-based classification. + * Used when error is a PentestError with a specific ErrorCode. + */ +function classifyByErrorCode( + code: ErrorCode, + retryableFromError: boolean +): { type: string; retryable: boolean } { + switch (code) { + // Billing errors - retryable (wait for cap reset or credits added) + case ErrorCode.SPENDING_CAP_REACHED: + case ErrorCode.INSUFFICIENT_CREDITS: + return { type: 'BillingError', retryable: true }; + + case ErrorCode.API_RATE_LIMITED: + return { type: 'RateLimitError', retryable: true }; + + // Config errors - non-retryable (need manual fix) + case ErrorCode.CONFIG_NOT_FOUND: + case ErrorCode.CONFIG_VALIDATION_FAILED: + case ErrorCode.CONFIG_PARSE_ERROR: + return { type: 'ConfigurationError', retryable: false }; + + // Prompt errors - non-retryable (need manual fix) + case ErrorCode.PROMPT_LOAD_FAILED: + return { type: 'ConfigurationError', retryable: false }; + + // Git errors - non-retryable (indicates workspace corruption) + case ErrorCode.GIT_CHECKPOINT_FAILED: + case ErrorCode.GIT_ROLLBACK_FAILED: + return { type: 'GitError', retryable: false }; + + // Validation errors - retryable (agent may succeed on retry) + case ErrorCode.OUTPUT_VALIDATION_FAILED: + case ErrorCode.DELIVERABLE_NOT_FOUND: + return { type: 'OutputValidationError', retryable: true }; + + // Agent execution - use the retryable flag from the error + case ErrorCode.AGENT_EXECUTION_FAILED: + return { type: 'AgentExecutionError', retryable: retryableFromError }; + + default: + // Unknown code - fall through to string matching + return { type: 'UnknownError', retryable: retryableFromError }; + } +} + /** * Classifies errors for Temporal workflow retry behavior. * Returns error type and whether Temporal should retry. @@ -109,31 +167,25 @@ export function isRetryableError(error: Error): boolean { * Used by activities to wrap errors in ApplicationFailure: * - Retryable errors: Temporal retries with configured backoff * - Non-retryable errors: Temporal fails immediately + * + * Classification priority: + * 1. If error is PentestError with ErrorCode, classify by code (reliable) + * 2. Fall through to string matching for external errors (SDK, network, etc.) */ export function classifyErrorForTemporal(error: unknown): { type: string; retryable: boolean } { + // === CODE-BASED CLASSIFICATION (Preferred for internal errors) === + if (error instanceof PentestError && error.code !== undefined) { + return classifyByErrorCode(error.code, error.retryable); + } + + // === STRING-BASED CLASSIFICATION (Fallback for external errors) === const message = (error instanceof Error ? error.message : String(error)).toLowerCase(); // === BILLING ERRORS (Retryable with long backoff) === // Anthropic returns billing as 400 invalid_request_error // Human can add credits OR wait for spending cap to reset (5-30 min backoff) - if ( - message.includes('billing_error') || - message.includes('credit balance is too low') || - message.includes('insufficient credits') || - message.includes('usage is blocked due to insufficient credits') || - message.includes('please visit plans & billing') || - message.includes('please visit plans and billing') || - message.includes('usage limit reached') || - message.includes('quota exceeded') || - message.includes('daily rate limit') || - message.includes('limit will reset') || - // Claude Code spending cap patterns (returns short message instead of error) - message.includes('spending cap') || - message.includes('spending limit') || - message.includes('cap reached') || - message.includes('budget exceeded') || - message.includes('billing limit reached') - ) { + // Check both API patterns and text patterns for comprehensive detection + if (matchesBillingApiPattern(message) || matchesBillingTextPattern(message)) { return { type: 'BillingError', retryable: true }; } diff --git a/src/phases/reporting.ts b/src/phases/reporting.ts index ec9f86b..128b91a 100644 --- a/src/phases/reporting.ts +++ b/src/phases/reporting.ts @@ -7,6 +7,7 @@ import { fs, path } from 'zx'; import chalk from 'chalk'; import { PentestError } from '../error-handling.js'; +import { ErrorCode } from '../types/errors.js'; interface DeliverableFile { name: string; @@ -34,7 +35,13 @@ export async function assembleFinalReport(sourceDir: string): Promise { sections.push(content); console.log(chalk.green(`✅ Added ${file.name} findings`)); } else if (file.required) { - throw new Error(`Required file ${file.path} not found`); + throw new PentestError( + `Required deliverable file not found: ${file.path}`, + 'filesystem', + false, + { deliverableFile: file.path, sourceDir }, + ErrorCode.DELIVERABLE_NOT_FOUND + ); } else { console.log(chalk.gray(`⏭️ No ${file.name} deliverable found`)); } diff --git a/src/queue-validation.ts b/src/queue-validation.ts index ce21e1d..a55d80f 100644 --- a/src/queue-validation.ts +++ b/src/queue-validation.ts @@ -6,6 +6,8 @@ import { fs, path } from 'zx'; import { PentestError } from './error-handling.js'; +import { ErrorCode } from './types/errors.js'; +import { type Result, ok, err } from './types/result.js'; import { asyncPipe } from './utils/functional.js'; export type VulnType = 'injection' | 'xss' | 'auth' | 'ssrf' | 'authz'; @@ -67,11 +69,10 @@ export interface ExploitationDecision { vulnType: VulnType; } -export interface SafeValidationResult { - success: boolean; - data?: ExploitationDecision; - error?: PentestError; -} +/** + * Result type for safe validation - explicit error handling. + */ +export type SafeValidationResult = Result; // Vulnerability type configuration as immutable data const VULN_TYPE_CONFIG: VulnTypeConfig = Object.freeze({ @@ -196,7 +197,8 @@ const validateExistenceRules = ( deliverablePath: pathsWithExistence.deliverable, queuePath: pathsWithExistence.queue, existence, - } + }, + ErrorCode.DELIVERABLE_NOT_FOUND ), }; } @@ -311,15 +313,18 @@ export async function validateQueueAndDeliverable( ); } -// Pure function to safely validate (returns result instead of throwing) -export const safeValidateQueueAndDeliverable = async ( +/** + * Safely validate queue and deliverable files. + * Returns Result for explicit error handling. + */ +export async function validateQueueSafe( vulnType: VulnType, sourceDir: string -): Promise => { +): Promise { try { const result = await validateQueueAndDeliverable(vulnType, sourceDir); - return { success: true, data: result }; + return ok(result); } catch (error) { - return { success: false, error: error as PentestError }; + return err(error as PentestError); } -}; +} diff --git a/src/services/agent-execution.ts b/src/services/agent-execution.ts new file mode 100644 index 0000000..ee09d5c --- /dev/null +++ b/src/services/agent-execution.ts @@ -0,0 +1,278 @@ +// Copyright (C) 2025 Keygraph, Inc. +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License version 3 +// as published by the Free Software Foundation. + +/** + * Agent Execution Service + * + * Handles the full agent lifecycle: + * - Load config via ConfigLoaderService + * - Load prompt template using AGENTS[agentName].promptTemplate + * - Create git checkpoint + * - Start audit logging + * - Invoke Claude SDK via runClaudePrompt + * - Spending cap check using isSpendingCapBehavior + * - Handle failure (rollback, audit) + * - Validate output using AGENTS[agentName].deliverableFilename + * - Commit on success, log metrics + * + * No Temporal dependencies - pure domain logic. + */ + +import chalk from 'chalk'; + +import { Result, ok, err, isErr } from '../types/result.js'; +import { ErrorCode } from '../types/errors.js'; +import { PentestError } from '../error-handling.js'; +import { isSpendingCapBehavior } from '../utils/billing-detection.js'; +import { AGENTS } from '../session-manager.js'; +import { loadPrompt } from '../prompts/prompt-manager.js'; +import { + runClaudePrompt, + validateAgentOutput, + type ClaudePromptResult, +} from '../ai/claude-executor.js'; +import { + createGitCheckpoint, + commitGitSuccess, + rollbackGitWorkspace, + getGitCommitHash, +} from '../utils/git-manager.js'; +import { AuditSession } from '../audit/index.js'; +import type { AgentEndResult } from '../types/audit.js'; +import type { AgentName } from '../types/agents.js'; +import type { ConfigLoaderService } from './config-loader.js'; +import type { AgentMetrics } from '../types/metrics.js'; + +/** + * Input for agent execution. + */ +export interface AgentExecutionInput { + webUrl: string; + repoPath: string; + configPath?: string | undefined; + pipelineTestingMode?: boolean | undefined; + attemptNumber: number; +} + +/** + * Service for executing agents with full lifecycle management. + * + * NOTE: AuditSession is passed per-execution, NOT stored on the service. + * This is critical for parallel agent execution - each agent needs its own + * AuditSession instance because AuditSession uses instance state (currentAgentName) + * to track which agent is currently logging. + */ +export class AgentExecutionService { + private readonly configLoader: ConfigLoaderService; + + constructor(configLoader: ConfigLoaderService) { + this.configLoader = configLoader; + } + + /** + * Execute an agent with full lifecycle management. + * + * @param agentName - Name of the agent to execute + * @param input - Execution input parameters + * @param auditSession - Audit session for this specific agent execution + * @returns Result containing AgentEndResult on success, PentestError on failure + */ + async execute( + agentName: AgentName, + input: AgentExecutionInput, + auditSession: AuditSession + ): Promise> { + const { webUrl, repoPath, configPath, pipelineTestingMode = false, attemptNumber } = input; + + // 1. Load config (if provided) + const configResult = await this.configLoader.loadOptional(configPath); + if (isErr(configResult)) { + return configResult; + } + const distributedConfig = configResult.value; + + // 2. Load prompt + const promptTemplate = AGENTS[agentName].promptTemplate; + let prompt: string; + try { + prompt = await loadPrompt( + promptTemplate, + { webUrl, repoPath }, + distributedConfig, + pipelineTestingMode + ); + } catch (error) { + const errorMessage = error instanceof Error ? error.message : String(error); + return err( + new PentestError( + `Failed to load prompt for ${agentName}: ${errorMessage}`, + 'prompt', + false, + { agentName, promptTemplate, originalError: errorMessage }, + ErrorCode.PROMPT_LOAD_FAILED + ) + ); + } + + // 3. Create git checkpoint before execution + try { + await createGitCheckpoint(repoPath, agentName, attemptNumber); + } catch (error) { + const errorMessage = error instanceof Error ? error.message : String(error); + return err( + new PentestError( + `Failed to create git checkpoint for ${agentName}: ${errorMessage}`, + 'filesystem', + false, + { agentName, repoPath, originalError: errorMessage }, + ErrorCode.GIT_CHECKPOINT_FAILED + ) + ); + } + + // 4. Start audit logging + await auditSession.startAgent(agentName, prompt, attemptNumber); + + // 5. Execute agent + const result: ClaudePromptResult = await runClaudePrompt( + prompt, + repoPath, + '', // context + agentName, // description + agentName, + chalk.cyan, + auditSession + ); + + // 6. Spending cap check - defense-in-depth + if (result.success && (result.turns ?? 0) <= 2 && (result.cost || 0) === 0) { + const resultText = result.result || ''; + if (isSpendingCapBehavior(result.turns ?? 0, result.cost || 0, resultText)) { + await rollbackGitWorkspace(repoPath, 'spending cap detected'); + const endResult: AgentEndResult = { + attemptNumber, + duration_ms: result.duration, + cost_usd: 0, + success: false, + model: result.model, + error: `Spending cap likely reached: ${resultText.slice(0, 100)}`, + }; + await auditSession.endAgent(agentName, endResult); + return err( + new PentestError( + `Spending cap likely reached: ${resultText.slice(0, 100)}`, + 'billing', + true, // Retryable with long backoff + { agentName, turns: result.turns, cost: result.cost }, + ErrorCode.SPENDING_CAP_REACHED + ) + ); + } + } + + // 7. Handle execution failure + if (!result.success) { + await rollbackGitWorkspace(repoPath, 'execution failure'); + const endResult: AgentEndResult = { + attemptNumber, + duration_ms: result.duration, + cost_usd: result.cost || 0, + success: false, + model: result.model, + error: result.error || 'Execution failed', + }; + await auditSession.endAgent(agentName, endResult); + return err( + new PentestError( + result.error || 'Agent execution failed', + 'validation', + result.retryable ?? true, + { agentName, originalError: result.error }, + ErrorCode.AGENT_EXECUTION_FAILED + ) + ); + } + + // 8. Validate output + const validationPassed = await validateAgentOutput(result, agentName, repoPath); + if (!validationPassed) { + await rollbackGitWorkspace(repoPath, 'validation failure'); + const endResult: AgentEndResult = { + attemptNumber, + duration_ms: result.duration, + cost_usd: result.cost || 0, + success: false, + model: result.model, + error: 'Output validation failed', + }; + await auditSession.endAgent(agentName, endResult); + return err( + new PentestError( + `Agent ${agentName} failed output validation`, + 'validation', + true, // Retryable - agent may succeed on retry + { agentName, deliverableFilename: AGENTS[agentName].deliverableFilename }, + ErrorCode.OUTPUT_VALIDATION_FAILED + ) + ); + } + + // 9. Success - commit deliverables, then capture checkpoint hash + await commitGitSuccess(repoPath, agentName); + const commitHash = await getGitCommitHash(repoPath); + + const endResult: AgentEndResult = { + attemptNumber, + duration_ms: result.duration, + cost_usd: result.cost || 0, + success: true, + model: result.model, + ...(commitHash && { checkpoint: commitHash }), + }; + await auditSession.endAgent(agentName, endResult); + + return ok(endResult); + } + + /** + * Execute an agent, throwing PentestError on failure. + * + * This is the preferred method for Temporal activities, which need to + * catch errors and classify them into ApplicationFailure. Avoids requiring + * activities to import Result utilities, keeping the boundary clean. + * + * @param agentName - Name of the agent to execute + * @param input - Execution input parameters + * @param auditSession - Audit session for this specific agent execution + * @returns AgentEndResult on success + * @throws PentestError on failure + */ + async executeOrThrow( + agentName: AgentName, + input: AgentExecutionInput, + auditSession: AuditSession + ): Promise { + const result = await this.execute(agentName, input, auditSession); + if (isErr(result)) { + throw result.error; + } + return result.value; + } + + /** + * Convert AgentEndResult to AgentMetrics for workflow state. + */ + static toMetrics(endResult: AgentEndResult, result: ClaudePromptResult): AgentMetrics { + return { + durationMs: endResult.duration_ms, + inputTokens: null, // Not currently exposed by SDK wrapper + outputTokens: null, + costUsd: endResult.cost_usd, + numTurns: result.turns ?? null, + model: result.model, + }; + } +} diff --git a/src/services/config-loader.ts b/src/services/config-loader.ts new file mode 100644 index 0000000..629ca43 --- /dev/null +++ b/src/services/config-loader.ts @@ -0,0 +1,75 @@ +// Copyright (C) 2025 Keygraph, Inc. +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License version 3 +// as published by the Free Software Foundation. + +/** + * Config Loader Service + * + * Wraps parseConfig + distributeConfig with Result type for explicit error handling. + * Pure service with no Temporal dependencies. + */ + +import { parseConfig, distributeConfig } from '../config-parser.js'; +import { PentestError } from '../error-handling.js'; +import { Result, ok, err } from '../types/result.js'; +import { ErrorCode } from '../types/errors.js'; +import type { DistributedConfig } from '../types/config.js'; + +/** + * Service for loading and distributing configuration files. + * + * Provides a Result-based API for explicit error handling, + * allowing callers to decide how to handle failures. + */ +export class ConfigLoaderService { + /** + * Load and distribute a configuration file. + * + * @param configPath - Path to the YAML configuration file + * @returns Result containing DistributedConfig on success, PentestError on failure + */ + async load(configPath: string): Promise> { + try { + const config = await parseConfig(configPath); + const distributed = distributeConfig(config); + return ok(distributed); + } catch (error) { + const errorMessage = error instanceof Error ? error.message : String(error); + + // Determine appropriate error code based on error message + let errorCode = ErrorCode.CONFIG_PARSE_ERROR; + if (errorMessage.includes('not found') || errorMessage.includes('ENOENT')) { + errorCode = ErrorCode.CONFIG_NOT_FOUND; + } else if (errorMessage.includes('validation failed')) { + errorCode = ErrorCode.CONFIG_VALIDATION_FAILED; + } + + return err( + new PentestError( + `Failed to load config ${configPath}: ${errorMessage}`, + 'config', + false, + { configPath, originalError: errorMessage }, + errorCode + ) + ); + } + } + + /** + * Load config if path is provided, otherwise return null config. + * + * @param configPath - Optional path to the YAML configuration file + * @returns Result containing DistributedConfig (or null) on success, PentestError on failure + */ + async loadOptional( + configPath: string | undefined + ): Promise> { + if (!configPath) { + return ok(null); + } + return this.load(configPath); + } +} diff --git a/src/services/container.ts b/src/services/container.ts new file mode 100644 index 0000000..f0573aa --- /dev/null +++ b/src/services/container.ts @@ -0,0 +1,117 @@ +// Copyright (C) 2025 Keygraph, Inc. +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License version 3 +// as published by the Free Software Foundation. + +/** + * Dependency Injection Container + * + * Provides a per-workflow container for service instances. + * Services are wired with explicit constructor injection. + * + * Usage: + * const container = getOrCreateContainer(workflowId, sessionMetadata); + * const auditSession = new AuditSession(sessionMetadata); // Per-agent + * await auditSession.initialize(workflowId); + * const result = await container.agentExecution.executeOrThrow(agentName, input, auditSession); + */ + +import type { SessionMetadata } from '../audit/utils.js'; +import { AgentExecutionService } from './agent-execution.js'; +import { ConfigLoaderService } from './config-loader.js'; +import { ExploitationCheckerService } from './exploitation-checker.js'; + +/** + * Dependencies required to create a Container. + * + * NOTE: AuditSession is NOT stored in the container. + * Each agent execution receives its own AuditSession instance + * because AuditSession uses instance state (currentAgentName) that + * cannot be shared across parallel agents. + */ +export interface ContainerDependencies { + readonly sessionMetadata: SessionMetadata; +} + +/** + * DI Container for a single workflow. + * + * Holds all service instances for the workflow lifecycle. + * Services are instantiated once and reused across agent executions. + * + * NOTE: AuditSession is NOT stored here - it's passed per agent execution + * to support parallel agents each having their own logging context. + */ +export class Container { + readonly sessionMetadata: SessionMetadata; + readonly agentExecution: AgentExecutionService; + readonly configLoader: ConfigLoaderService; + readonly exploitationChecker: ExploitationCheckerService; + + constructor(deps: ContainerDependencies) { + this.sessionMetadata = deps.sessionMetadata; + + // Wire services with explicit constructor injection + this.configLoader = new ConfigLoaderService(); + this.exploitationChecker = new ExploitationCheckerService(); + this.agentExecution = new AgentExecutionService(this.configLoader); + } +} + +/** + * Map of workflowId to Container instance. + * Each workflow gets its own container scoped to its lifecycle. + */ +const containers = new Map(); + +/** + * Get or create a Container for a workflow. + * + * If a container already exists for the workflowId, returns it. + * Otherwise, creates a new container with the provided dependencies. + * + * @param workflowId - Unique workflow identifier + * @param sessionMetadata - Session metadata for audit paths + * @returns Container instance for the workflow + */ +export function getOrCreateContainer( + workflowId: string, + sessionMetadata: SessionMetadata +): Container { + let container = containers.get(workflowId); + + if (!container) { + container = new Container({ sessionMetadata }); + containers.set(workflowId, container); + } + + return container; +} + +/** + * Remove a Container when a workflow completes. + * + * Should be called in logWorkflowComplete to clean up resources. + * + * @param workflowId - Unique workflow identifier + */ +export function removeContainer(workflowId: string): void { + containers.delete(workflowId); +} + +/** + * Get an existing Container for a workflow, if one exists. + * + * Unlike getOrCreateContainer, this does NOT create a new container. + * Returns undefined if no container exists for the workflowId. + * + * Useful for lightweight activities that can benefit from an existing + * container but don't need to create one. + * + * @param workflowId - Unique workflow identifier + * @returns Container instance or undefined + */ +export function getContainer(workflowId: string): Container | undefined { + return containers.get(workflowId); +} diff --git a/src/services/exploitation-checker.ts b/src/services/exploitation-checker.ts new file mode 100644 index 0000000..a5c30d7 --- /dev/null +++ b/src/services/exploitation-checker.ts @@ -0,0 +1,74 @@ +// Copyright (C) 2025 Keygraph, Inc. +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License version 3 +// as published by the Free Software Foundation. + +/** + * Exploitation Checker Service + * + * Pure domain logic for determining whether exploitation should run. + * Reads queue file, parses JSON, returns decision. + * + * No Temporal dependencies - this is pure business logic. + */ + +import chalk from 'chalk'; +import { + validateQueueSafe, + type VulnType, + type ExploitationDecision, +} from '../queue-validation.js'; +import { isOk } from '../types/result.js'; + +/** + * Service for checking exploitation queue decisions. + * + * Determines whether an exploit agent should run based on + * the vulnerability analysis deliverables and queue files. + */ +export class ExploitationCheckerService { + /** + * Check if exploitation should run for a given vulnerability type. + * + * Reads the vulnerability queue file and returns the decision. + * This is pure domain logic - reads queue file, parses JSON, returns decision. + * + * @param vulnType - Type of vulnerability (injection, xss, auth, ssrf, authz) + * @param repoPath - Path to the repository containing deliverables + * @returns ExploitationDecision indicating whether to exploit + * @throws PentestError if validation fails and is retryable + */ + async checkQueue(vulnType: VulnType, repoPath: string): Promise { + const result = await validateQueueSafe(vulnType, repoPath); + + if (isOk(result)) { + const decision = result.value; + console.log( + chalk.blue( + ` ${vulnType}: ${decision.shouldExploit ? `${decision.vulnerabilityCount} vulnerabilities found` : 'no vulnerabilities, skipping exploitation'}` + ) + ); + return decision; + } + + // Validation failed - check if we should retry or skip + const error = result.error; + if (error.retryable) { + // Re-throw retryable errors so caller can handle retry + console.log(chalk.yellow(` ${vulnType}: ${error.message} (retryable)`)); + throw error; + } + + // Non-retryable error - skip exploitation gracefully + console.log( + chalk.yellow(` ${vulnType}: ${error.message}, skipping exploitation`) + ); + return { + shouldExploit: false, + shouldRetry: false, + vulnerabilityCount: 0, + vulnType, + }; + } +} diff --git a/src/services/index.ts b/src/services/index.ts new file mode 100644 index 0000000..159eea2 --- /dev/null +++ b/src/services/index.ts @@ -0,0 +1,20 @@ +// Copyright (C) 2025 Keygraph, Inc. +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License version 3 +// as published by the Free Software Foundation. + +/** + * Services Module + * + * Exports DI container and service classes for Shannon agent execution. + * Services are pure domain logic with no Temporal dependencies. + */ + +export { Container, getOrCreateContainer, removeContainer } from './container.js'; +export type { ContainerDependencies } from './container.js'; + +export { ConfigLoaderService } from './config-loader.js'; +export { ExploitationCheckerService } from './exploitation-checker.js'; +export { AgentExecutionService } from './agent-execution.js'; +export type { AgentExecutionInput } from './agent-execution.js'; diff --git a/src/session-manager.ts b/src/session-manager.ts index 4e7fced..6307e01 100644 --- a/src/session-manager.ts +++ b/src/session-manager.ts @@ -7,72 +7,99 @@ import type { AgentName, AgentDefinition } from './types/index.js'; // Agent definitions according to PRD +// NOTE: deliverableFilename values must match mcp-server/src/types/deliverables.ts:DELIVERABLE_FILENAMES export const AGENTS: Readonly> = Object.freeze({ 'pre-recon': { name: 'pre-recon', displayName: 'Pre-recon agent', - prerequisites: [] + prerequisites: [], + promptTemplate: 'pre-recon-code', + deliverableFilename: 'code_analysis_deliverable.md', }, 'recon': { name: 'recon', displayName: 'Recon agent', - prerequisites: ['pre-recon'] + prerequisites: ['pre-recon'], + promptTemplate: 'recon', + deliverableFilename: 'recon_deliverable.md', }, 'injection-vuln': { name: 'injection-vuln', displayName: 'Injection vuln agent', - prerequisites: ['recon'] + prerequisites: ['recon'], + promptTemplate: 'vuln-injection', + deliverableFilename: 'injection_analysis_deliverable.md', }, 'xss-vuln': { name: 'xss-vuln', displayName: 'XSS vuln agent', - prerequisites: ['recon'] + prerequisites: ['recon'], + promptTemplate: 'vuln-xss', + deliverableFilename: 'xss_analysis_deliverable.md', }, 'auth-vuln': { name: 'auth-vuln', displayName: 'Auth vuln agent', - prerequisites: ['recon'] + prerequisites: ['recon'], + promptTemplate: 'vuln-auth', + deliverableFilename: 'auth_analysis_deliverable.md', }, 'ssrf-vuln': { name: 'ssrf-vuln', displayName: 'SSRF vuln agent', - prerequisites: ['recon'] + prerequisites: ['recon'], + promptTemplate: 'vuln-ssrf', + deliverableFilename: 'ssrf_analysis_deliverable.md', }, 'authz-vuln': { name: 'authz-vuln', displayName: 'Authz vuln agent', - prerequisites: ['recon'] + prerequisites: ['recon'], + promptTemplate: 'vuln-authz', + deliverableFilename: 'authz_analysis_deliverable.md', }, 'injection-exploit': { name: 'injection-exploit', displayName: 'Injection exploit agent', - prerequisites: ['injection-vuln'] + prerequisites: ['injection-vuln'], + promptTemplate: 'exploit-injection', + deliverableFilename: 'injection_exploitation_evidence.md', }, 'xss-exploit': { name: 'xss-exploit', displayName: 'XSS exploit agent', - prerequisites: ['xss-vuln'] + prerequisites: ['xss-vuln'], + promptTemplate: 'exploit-xss', + deliverableFilename: 'xss_exploitation_evidence.md', }, 'auth-exploit': { name: 'auth-exploit', displayName: 'Auth exploit agent', - prerequisites: ['auth-vuln'] + prerequisites: ['auth-vuln'], + promptTemplate: 'exploit-auth', + deliverableFilename: 'auth_exploitation_evidence.md', }, 'ssrf-exploit': { name: 'ssrf-exploit', displayName: 'SSRF exploit agent', - prerequisites: ['ssrf-vuln'] + prerequisites: ['ssrf-vuln'], + promptTemplate: 'exploit-ssrf', + deliverableFilename: 'ssrf_exploitation_evidence.md', }, 'authz-exploit': { name: 'authz-exploit', displayName: 'Authz exploit agent', - prerequisites: ['authz-vuln'] + prerequisites: ['authz-vuln'], + promptTemplate: 'exploit-authz', + deliverableFilename: 'authz_exploitation_evidence.md', }, 'report': { name: 'report', displayName: 'Report agent', - prerequisites: ['injection-exploit', 'xss-exploit', 'auth-exploit', 'ssrf-exploit', 'authz-exploit'] - } + prerequisites: ['injection-exploit', 'xss-exploit', 'auth-exploit', 'ssrf-exploit', 'authz-exploit'], + promptTemplate: 'report-executive', + deliverableFilename: 'comprehensive_security_assessment_report.md', + }, }); // Phase names for metrics aggregation diff --git a/src/temporal/activities.ts b/src/temporal/activities.ts index 1a2252e..88abcfd 100644 --- a/src/temporal/activities.ts +++ b/src/temporal/activities.ts @@ -7,28 +7,57 @@ /** * Temporal activities for Shannon agent execution. * - * Each activity wraps a single agent execution with: + * Each activity wraps service calls with Temporal-specific concerns: * - Heartbeat loop (2s interval) to signal worker liveness - * - Git checkpoint/rollback/commit per attempt - * - Error classification for Temporal retry behavior - * - Audit session logging + * - Error classification into ApplicationFailure + * - Container lifecycle management * - * Temporal handles retries based on error classification: - * - Retryable: BillingError, TransientError (429, 5xx, network) - * - Non-retryable: AuthenticationError, PermissionError, ConfigurationError, etc. + * Business logic is delegated to services in src/services/. */ import { heartbeat, ApplicationFailure, Context } from '@temporalio/activity'; import chalk from 'chalk'; +import path from 'path'; +import fs from 'fs/promises'; + +import { classifyErrorForTemporal, PentestError } from '../error-handling.js'; +import { ErrorCode } from '../types/errors.js'; +import { getOrCreateContainer, getContainer, removeContainer } from '../services/container.js'; +import { ExploitationCheckerService } from '../services/exploitation-checker.js'; +import type { VulnType, ExploitationDecision } from '../queue-validation.js'; +import { AuditSession } from '../audit/index.js'; +import type { WorkflowSummary } from '../audit/workflow-logger.js'; +import type { AgentName } from '../types/agents.js'; +import { ALL_AGENTS } from '../types/agents.js'; +import type { AgentMetrics, ResumeState } from './shared.js'; +import { copyDeliverablesToAudit, type SessionMetadata, readJson, fileExists } from '../audit/utils.js'; +import { assembleFinalReport, injectModelIntoReport } from '../phases/reporting.js'; +import { AGENTS } from '../session-manager.js'; +import { executeGitCommandWithRetry } from '../utils/git-manager.js'; +import type { ResumeAttempt } from '../audit/metrics-tracker.js'; // Max lengths to prevent Temporal protobuf buffer overflow const MAX_ERROR_MESSAGE_LENGTH = 2000; const MAX_STACK_TRACE_LENGTH = 1000; // Max retries for output validation errors (agent didn't save deliverables) -// Lower than default 50 since this is unlikely to self-heal const MAX_OUTPUT_VALIDATION_RETRIES = 3; +const HEARTBEAT_INTERVAL_MS = 2000; + +/** + * Input for all agent activities. + */ +export interface ActivityInput { + webUrl: string; + repoPath: string; + configPath?: string; + outputPath?: string; + pipelineTestingMode?: boolean; + workflowId: string; + sessionId: string; +} + /** * Truncate error message to prevent buffer overflow in Temporal serialization. */ @@ -48,85 +77,34 @@ function truncateStackTrace(failure: ApplicationFailure): void { } } -import { - runClaudePrompt, - validateAgentOutput, - type ClaudePromptResult, -} from '../ai/claude-executor.js'; -import { loadPrompt } from '../prompts/prompt-manager.js'; -import { parseConfig, distributeConfig } from '../config-parser.js'; -import { classifyErrorForTemporal } from '../error-handling.js'; -import { - safeValidateQueueAndDeliverable, - type VulnType, - type ExploitationDecision, -} from '../queue-validation.js'; -import { - createGitCheckpoint, - commitGitSuccess, - rollbackGitWorkspace, - getGitCommitHash, -} from '../utils/git-manager.js'; -import { assembleFinalReport, injectModelIntoReport } from '../phases/reporting.js'; -import { getPromptNameForAgent } from '../types/agents.js'; -import { AuditSession } from '../audit/index.js'; -import type { WorkflowSummary } from '../audit/workflow-logger.js'; -import type { AgentName } from '../types/agents.js'; -import { getDeliverablePath, ALL_AGENTS } from '../types/agents.js'; -import type { AgentMetrics, ResumeState } from './shared.js'; -import type { DistributedConfig } from '../types/config.js'; -import { copyDeliverablesToAudit, type SessionMetadata, readJson, fileExists } from '../audit/utils.js'; -import type { ResumeAttempt } from '../audit/metrics-tracker.js'; -import { executeGitCommandWithRetry } from '../utils/git-manager.js'; -import path from 'path'; -import fs from 'fs/promises'; - -const HEARTBEAT_INTERVAL_MS = 2000; // Must be < heartbeatTimeout (10min production, 5min testing) - /** - * Input for all agent activities. - * Matches PipelineInput but with required workflowId for audit correlation. + * Build SessionMetadata from ActivityInput. */ -export interface ActivityInput { - webUrl: string; - repoPath: string; - configPath?: string; - outputPath?: string; - pipelineTestingMode?: boolean; - workflowId: string; - sessionId: string; // Workspace name (for resume) or workflowId (for new runs) +function buildSessionMetadata(input: ActivityInput): SessionMetadata { + const { webUrl, repoPath, outputPath, sessionId } = input; + return { + id: sessionId, + webUrl, + repoPath, + ...(outputPath && { outputPath }), + }; } /** - * Core activity implementation. + * Core activity implementation using services. * * Executes a single agent with: * 1. Heartbeat loop for worker liveness - * 2. Config loading (if configPath provided) - * 3. Audit session initialization - * 4. Prompt loading - * 5. Git checkpoint before execution - * 6. Agent execution (single attempt) - * 7. Output validation - * 8. Git commit on success, rollback on failure - * 9. Error classification for Temporal retry + * 2. Container creation/reuse + * 3. Service-based agent execution + * 4. Error classification for Temporal retry */ async function runAgentActivity( agentName: AgentName, input: ActivityInput ): Promise { - const { - webUrl, - repoPath, - configPath, - outputPath, - pipelineTestingMode = false, - workflowId, - } = input; - + const { repoPath, configPath, pipelineTestingMode = false, workflowId, webUrl } = input; const startTime = Date.now(); - - // Get attempt number from Temporal context (tracks retries automatically) const attemptNumber = Context.current().info.attempt; // Heartbeat loop - signals worker is alive to Temporal server @@ -136,158 +114,63 @@ async function runAgentActivity( }, HEARTBEAT_INTERVAL_MS); try { - // 1. Load config (if provided) - let distributedConfig: DistributedConfig | null = null; - if (configPath) { - try { - const config = await parseConfig(configPath); - distributedConfig = distributeConfig(config); - } catch (err) { - throw new Error(`Failed to load config ${configPath}: ${err instanceof Error ? err.message : String(err)}`); - } - } + // Build session metadata and get/create container + const sessionMetadata = buildSessionMetadata(input); + const container = getOrCreateContainer(workflowId, sessionMetadata); - // 2. Build session metadata for audit - // Use sessionId (workspace name) for directory, workflowId for tracking - const sessionMetadata: SessionMetadata = { - id: input.sessionId, - webUrl, - repoPath, - ...(outputPath && { outputPath }), - }; - - // 3. Initialize audit session (idempotent, safe across retries) + // Create audit session for THIS agent execution + // NOTE: Each agent needs its own AuditSession because AuditSession uses + // instance state (currentAgentName) that cannot be shared across parallel agents const auditSession = new AuditSession(sessionMetadata); await auditSession.initialize(workflowId); - // 4. Load prompt - const promptName = getPromptNameForAgent(agentName); - const prompt = await loadPrompt( - promptName, - { webUrl, repoPath }, - distributedConfig, - pipelineTestingMode - ); - - // 5. Create git checkpoint before execution - await createGitCheckpoint(repoPath, agentName, attemptNumber); - await auditSession.startAgent(agentName, prompt, attemptNumber); - - // 6. Execute agent (single attempt - Temporal handles retries) - const result: ClaudePromptResult = await runClaudePrompt( - prompt, - repoPath, - '', // context - agentName, // description + // Execute agent via service (throws PentestError on failure) + const endResult = await container.agentExecution.executeOrThrow( agentName, - chalk.cyan, + { + webUrl, + repoPath, + configPath, + pipelineTestingMode, + attemptNumber, + }, auditSession ); - // 6.5. Sanity check: Detect spending cap that slipped through all detection layers - // Defense-in-depth: A successful agent execution should never have ≤2 turns with $0 cost - if (result.success && (result.turns ?? 0) <= 2 && (result.cost || 0) === 0) { - const resultText = result.result || ''; - const looksLikeBillingError = /spending|cap|limit|budget|resets/i.test(resultText); - - if (looksLikeBillingError) { - await rollbackGitWorkspace(repoPath, 'spending cap detected'); - await auditSession.endAgent(agentName, { - attemptNumber, - duration_ms: result.duration, - cost_usd: 0, - success: false, - model: result.model, - error: `Spending cap likely reached: ${resultText.slice(0, 100)}`, - }); - // Throw as billing error so Temporal retries with long backoff - throw new Error(`Spending cap likely reached: ${resultText.slice(0, 100)}`); - } - } - - // 7. Handle execution failure - if (!result.success) { - await rollbackGitWorkspace(repoPath, 'execution failure'); - await auditSession.endAgent(agentName, { - attemptNumber, - duration_ms: result.duration, - cost_usd: result.cost || 0, - success: false, - model: result.model, - error: result.error || 'Execution failed', - }); - throw new Error(result.error || 'Agent execution failed'); - } - - // 8. Validate output - const validationPassed = await validateAgentOutput(result, agentName, repoPath); - if (!validationPassed) { - await rollbackGitWorkspace(repoPath, 'validation failure'); - await auditSession.endAgent(agentName, { - attemptNumber, - duration_ms: result.duration, - cost_usd: result.cost || 0, - success: false, - model: result.model, - error: 'Output validation failed', - }); - - // Limit output validation retries (unlikely to self-heal) - if (attemptNumber >= MAX_OUTPUT_VALIDATION_RETRIES) { - throw ApplicationFailure.nonRetryable( - `Agent ${agentName} failed output validation after ${attemptNumber} attempts`, - 'OutputValidationError', - [{ agentName, attemptNumber, elapsed: Date.now() - startTime }] - ); - } - // Let Temporal retry (will be classified as OutputValidationError) - throw new Error(`Agent ${agentName} failed output validation`); - } - - // 9. Success - commit deliverables, then capture checkpoint hash - await commitGitSuccess(repoPath, agentName); - const commitHash = await getGitCommitHash(repoPath); - await auditSession.endAgent(agentName, { - attemptNumber, - duration_ms: result.duration, - cost_usd: result.cost || 0, - success: true, - model: result.model, - ...(commitHash && { checkpoint: commitHash }), - }); - - // 10. Return metrics + // Success - return metrics return { durationMs: Date.now() - startTime, - inputTokens: null, // Not currently exposed by SDK wrapper + inputTokens: null, outputTokens: null, - costUsd: result.cost ?? null, - numTurns: result.turns ?? null, - model: result.model, + costUsd: endResult.cost_usd, + numTurns: null, + model: endResult.model, }; } catch (error) { - // Rollback git workspace before Temporal retry to ensure clean state - try { - await rollbackGitWorkspace(repoPath, 'error recovery'); - } catch (rollbackErr) { - // Log but don't fail - rollback is best-effort - console.error(`Failed to rollback git workspace for ${agentName}:`, rollbackErr); - } - - // If error is already an ApplicationFailure (e.g., from our retry limit logic), - // re-throw it directly without re-classifying + // If error is already an ApplicationFailure, re-throw directly if (error instanceof ApplicationFailure) { throw error; } + // Check if output validation retry limit reached (PentestError with code) + if ( + error instanceof PentestError && + error.code === ErrorCode.OUTPUT_VALIDATION_FAILED && + attemptNumber >= MAX_OUTPUT_VALIDATION_RETRIES + ) { + throw ApplicationFailure.nonRetryable( + `Agent ${agentName} failed output validation after ${attemptNumber} attempts`, + 'OutputValidationError', + [{ agentName, attemptNumber, elapsed: Date.now() - startTime }] + ); + } + // Classify error for Temporal retry behavior const classified = classifyErrorForTemporal(error); - // Truncate message to prevent protobuf buffer overflow const rawMessage = error instanceof Error ? error.message : String(error); const message = truncateErrorMessage(rawMessage); if (classified.retryable) { - // Temporal will retry with configured backoff const failure = ApplicationFailure.create({ message, type: classified.type, @@ -296,7 +179,6 @@ async function runAgentActivity( truncateStackTrace(failure); throw failure; } else { - // Fail immediately - no retry const failure = ApplicationFailure.nonRetryable(message, classified.type, [ { agentName, attemptNumber, elapsed: Date.now() - startTime }, ]); @@ -309,7 +191,6 @@ async function runAgentActivity( } // === Individual Agent Activity Exports === -// Each function is a thin wrapper around runAgentActivity with the agent name. export async function runPreReconAgent(input: ActivityInput): Promise { return runAgentActivity('pre-recon', input); @@ -363,25 +244,24 @@ export async function runReportAgent(input: ActivityInput): Promise { const { repoPath } = input; - console.log(chalk.blue('📝 Assembling deliverables from specialist agents...')); + console.log(chalk.blue(' Assembling deliverables from specialist agents...')); try { await assembleFinalReport(repoPath); } catch (error) { const err = error as Error; - console.log(chalk.yellow(`⚠️ Error assembling final report: ${err.message}`)); - // Don't throw - the report agent can still create content even if no exploitation files exist + console.log(chalk.yellow(` Warning: Error assembling final report: ${err.message}`)); } } /** * Inject model metadata into the final report. - * This must be called AFTER runReportAgent to add the model information to the Executive Summary. */ export async function injectReportMetadataActivity(input: ActivityInput): Promise { const { repoPath, sessionId, outputPath } = input; @@ -392,65 +272,33 @@ export async function injectReportMetadataActivity(input: ActivityInput): Promis await injectModelIntoReport(repoPath, effectiveOutputPath); } catch (error) { const err = error as Error; - console.log(chalk.yellow(`⚠️ Error injecting model into report: ${err.message}`)); - // Don't throw - this is a non-critical enhancement + console.log(chalk.yellow(` Warning: Error injecting model into report: ${err.message}`)); } } +// === Exploitation Queue Check === + /** * Check if exploitation should run for a given vulnerability type. - * Reads the vulnerability queue file and returns the decision. * - * This activity allows the workflow to skip exploit agents entirely - * when no vulnerabilities were found, saving API calls and time. - * - * Error handling: - * - Retryable errors (missing files, invalid JSON): re-throw for Temporal retry - * - Non-retryable errors: skip exploitation gracefully + * Uses existing container if available (from prior agent runs), + * otherwise creates service directly (stateless, no dependencies). */ export async function checkExploitationQueue( input: ActivityInput, vulnType: VulnType ): Promise { - const { repoPath } = input; + const { repoPath, workflowId } = input; - const result = await safeValidateQueueAndDeliverable(vulnType, repoPath); + // Reuse container's service if available (from prior vuln agent runs) + const existingContainer = getContainer(workflowId); + const checker = existingContainer?.exploitationChecker ?? new ExploitationCheckerService(); - if (result.success && result.data) { - const { shouldExploit, vulnerabilityCount } = result.data; - console.log( - chalk.blue( - `🔍 ${vulnType}: ${shouldExploit ? `${vulnerabilityCount} vulnerabilities found` : 'no vulnerabilities, skipping exploitation'}` - ) - ); - return result.data; - } - - // Validation failed - check if we should retry or skip - const error = result.error; - if (error?.retryable) { - // Re-throw retryable errors so Temporal can retry the vuln agent - console.log(chalk.yellow(`⚠️ ${vulnType}: ${error.message} (retrying)`)); - throw error; - } - - // Non-retryable error - skip exploitation gracefully - console.log( - chalk.yellow(`⚠️ ${vulnType}: ${error?.message ?? 'Unknown error'}, skipping exploitation`) - ); - return { - shouldExploit: false, - shouldRetry: false, - vulnerabilityCount: 0, - vulnType, - }; + return checker.checkQueue(vulnType, repoPath); } // === Resume Activities === -/** - * Session.json structure for resume state loading - */ interface SessionJson { session: { id: string; @@ -460,18 +308,18 @@ interface SessionJson { resumeAttempts?: ResumeAttempt[]; }; metrics: { - agents: Record; + agents: Record< + string, + { + status: 'in-progress' | 'success' | 'failed'; + checkpoint?: string; + } + >; }; } /** * Load resume state from an existing workspace. - * Validates workspace exists, URL matches, and determines which agents to skip. - * - * @throws ApplicationFailure.nonRetryable if workspace not found or URL mismatch */ export async function loadResumeState( workspaceName: string, @@ -480,7 +328,6 @@ export async function loadResumeState( ): Promise { const sessionPath = path.join('./audit-logs', workspaceName, 'session.json'); - // Validate workspace exists const exists = await fileExists(sessionPath); if (!exists) { throw ApplicationFailure.nonRetryable( @@ -489,7 +336,6 @@ export async function loadResumeState( ); } - // Load session.json let session: SessionJson; try { session = await readJson(sessionPath); @@ -501,7 +347,6 @@ export async function loadResumeState( ); } - // Validate URL matches if (session.session.webUrl !== expectedUrl) { throw ApplicationFailure.nonRetryable( `URL mismatch with workspace\n Workspace URL: ${session.session.webUrl}\n Provided URL: ${expectedUrl}`, @@ -509,20 +354,17 @@ export async function loadResumeState( ); } - // Find completed agents (status === 'success' AND deliverable exists) const completedAgents: string[] = []; const agents = session.metrics.agents; for (const agentName of ALL_AGENTS) { const agentData = agents[agentName]; - - // Skip if agent never ran or didn't succeed if (!agentData || agentData.status !== 'success') { continue; } - // Validate deliverable exists - const deliverablePath = getDeliverablePath(agentName, expectedRepoPath); + const deliverableFilename = AGENTS[agentName].deliverableFilename; + const deliverablePath = `${expectedRepoPath}/deliverables/${deliverableFilename}`; const deliverableExists = await fileExists(deliverablePath); if (!deliverableExists) { @@ -532,11 +374,9 @@ export async function loadResumeState( continue; } - // Agent completed successfully and deliverable exists completedAgents.push(agentName); } - // Find latest checkpoint from completed agents const checkpoints = completedAgents .map((name) => agents[name]?.checkpoint) .filter((hash): hash is string => hash != null); @@ -548,18 +388,16 @@ export async function loadResumeState( throw ApplicationFailure.nonRetryable( `Cannot resume workspace ${workspaceName}: ` + - (successAgents.length > 0 - ? `${successAgents.length} agent(s) show success in session.json (${successAgents.join(', ')}) ` + - `but their deliverable files are missing from disk. ` + - `Start a fresh run instead.` - : `No agents completed successfully. Start a fresh run instead.`), + (successAgents.length > 0 + ? `${successAgents.length} agent(s) show success in session.json (${successAgents.join(', ')}) ` + + `but their deliverable files are missing from disk. ` + + `Start a fresh run instead.` + : `No agents completed successfully. Start a fresh run instead.`), 'NoCheckpointsError' ); } - // Find most recent commit among checkpoints const checkpointHash = await findLatestCommit(expectedRepoPath, checkpoints); - const originalWorkflowId = session.session.originalWorkflowId || session.session.id; console.log(chalk.cyan(`=== RESUME STATE ===`)); @@ -576,20 +414,21 @@ export async function loadResumeState( }; } -/** - * Find the most recent commit among a list of commit hashes. - * Uses git rev-list to determine which commit is newest. - */ async function findLatestCommit(repoPath: string, commitHashes: string[]): Promise { if (commitHashes.length === 1) { const hash = commitHashes[0]; if (!hash) { - throw new Error('Empty commit hash in array'); + throw new PentestError( + 'Empty commit hash in array', + 'filesystem', + false, // Non-retryable - corrupt workspace state + { phase: 'resume' }, + ErrorCode.GIT_CHECKPOINT_FAILED + ); } return hash; } - // Use git rev-list to find the most recent commit among all hashes const result = await executeGitCommandWithRetry( ['git', 'rev-list', '--max-count=1', ...commitHashes], repoPath, @@ -601,10 +440,6 @@ async function findLatestCommit(repoPath: string, commitHashes: string[]): Promi /** * Restore git workspace to a checkpoint and clean up partial deliverables. - * - * @param repoPath - Repository path - * @param checkpointHash - Git commit hash to reset to - * @param incompleteAgents - Agents that didn't complete (will have deliverables cleaned up) */ export async function restoreGitCheckpoint( repoPath: string, @@ -613,8 +448,6 @@ export async function restoreGitCheckpoint( ): Promise { console.log(chalk.blue(`Restoring git workspace to ${checkpointHash}...`)); - // Checkpoint hash points to the success commit (after commitGitSuccess), - // so git reset --hard naturally preserves all completed agent deliverables. await executeGitCommandWithRetry( ['git', 'reset', '--hard', checkpointHash], repoPath, @@ -626,9 +459,9 @@ export async function restoreGitCheckpoint( 'clean untracked files for resume' ); - // Clean up any partial deliverables from incomplete agents for (const agentName of incompleteAgents) { - const deliverablePath = getDeliverablePath(agentName, repoPath); + const deliverableFilename = AGENTS[agentName].deliverableFilename; + const deliverablePath = `${repoPath}/deliverables/${deliverableFilename}`; try { const exists = await fileExists(deliverablePath); if (exists) { @@ -645,48 +478,31 @@ export async function restoreGitCheckpoint( /** * Record a resume attempt in session.json. - * Tracks the new workflow ID, terminated workflows, and checkpoint hash. */ export async function recordResumeAttempt( input: ActivityInput, terminatedWorkflows: string[], checkpointHash: string ): Promise { - const { webUrl, repoPath, outputPath, sessionId, workflowId } = input; - - const sessionMetadata: SessionMetadata = { - id: sessionId, - webUrl, - repoPath, - ...(outputPath && { outputPath }), - }; - + const sessionMetadata = buildSessionMetadata(input); const auditSession = new AuditSession(sessionMetadata); await auditSession.initialize(); - - await auditSession.addResumeAttempt(workflowId, terminatedWorkflows, checkpointHash); + await auditSession.addResumeAttempt(input.workflowId, terminatedWorkflows, checkpointHash); } +// === Phase Transition Activities === + /** * Log phase transition to the unified workflow log. - * Called at phase boundaries for per-workflow logging. */ export async function logPhaseTransition( input: ActivityInput, phase: string, event: 'start' | 'complete' ): Promise { - const { webUrl, repoPath, outputPath, sessionId, workflowId } = input; - - const sessionMetadata: SessionMetadata = { - id: sessionId, - webUrl, - repoPath, - ...(outputPath && { outputPath }), - }; - + const sessionMetadata = buildSessionMetadata(input); const auditSession = new AuditSession(sessionMetadata); - await auditSession.initialize(workflowId); + await auditSession.initialize(input.workflowId); if (event === 'start') { await auditSession.logPhaseStart(phase); @@ -696,28 +512,22 @@ export async function logPhaseTransition( } /** - * Log workflow completion with full summary to the unified workflow log. - * Called at the end of the workflow to write a summary breakdown. + * Log workflow completion with full summary. + * Cleans up container when done. */ export async function logWorkflowComplete( input: ActivityInput, summary: WorkflowSummary ): Promise { - const { webUrl, repoPath, outputPath, sessionId, workflowId } = input; - - const sessionMetadata: SessionMetadata = { - id: sessionId, - webUrl, - repoPath, - ...(outputPath && { outputPath }), - }; + const { repoPath, workflowId } = input; + const sessionMetadata = buildSessionMetadata(input); const auditSession = new AuditSession(sessionMetadata); await auditSession.initialize(workflowId); await auditSession.updateSessionStatus(summary.status); - // Use cumulative metrics from session.json (includes all resume attempts) - const sessionData = await auditSession.getMetrics() as { + // Use cumulative metrics from session.json + const sessionData = (await auditSession.getMetrics()) as { metrics: { total_duration_ms: number; total_cost_usd: number; @@ -725,7 +535,7 @@ export async function logWorkflowComplete( }; }; - // Fill in metrics for skipped agents (completed in previous runs) + // Fill in metrics for skipped agents const agentMetrics = { ...summary.agentMetrics }; for (const agentName of summary.completedAgents) { if (!agentMetrics[agentName]) { @@ -747,10 +557,13 @@ export async function logWorkflowComplete( }; await auditSession.logWorkflowComplete(cumulativeSummary); - // Copy all deliverables to audit-logs once at workflow end (non-fatal) + // Copy deliverables to audit-logs try { await copyDeliverablesToAudit(sessionMetadata, repoPath); } catch (copyErr) { console.error('Failed to copy deliverables to audit-logs:', copyErr); } + + // Clean up container + removeContainer(workflowId); } diff --git a/src/temporal/shared.ts b/src/temporal/shared.ts index 5280645..b8cd582 100644 --- a/src/temporal/shared.ts +++ b/src/temporal/shared.ts @@ -1,5 +1,9 @@ import { defineQuery } from '@temporalio/workflow'; +// Re-export AgentMetrics from central types location +export type { AgentMetrics } from '../types/metrics.js'; +import type { AgentMetrics } from '../types/metrics.js'; + // === Types === export interface PipelineInput { @@ -22,15 +26,6 @@ export interface ResumeState { originalWorkflowId: string; } -export interface AgentMetrics { - durationMs: number; - inputTokens: number | null; - outputTokens: number | null; - costUsd: number | null; - numTurns: number | null; - model?: string | undefined; -} - export interface PipelineSummary { totalCostUsd: number; totalDurationMs: number; // Wall-clock time (end - start) diff --git a/src/temporal/summary-mapper.ts b/src/temporal/summary-mapper.ts new file mode 100644 index 0000000..e160026 --- /dev/null +++ b/src/temporal/summary-mapper.ts @@ -0,0 +1,45 @@ +// Copyright (C) 2025 Keygraph, Inc. +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License version 3 +// as published by the Free Software Foundation. + +/** + * Maps PipelineState to WorkflowSummary for audit logging. + * Pure function with no side effects. + */ + +import type { PipelineState } from './shared.js'; +import type { WorkflowSummary } from '../audit/workflow-logger.js'; + +/** + * Maps PipelineState to WorkflowSummary. + * + * This function is deterministic (no Date.now() or I/O) so it can be + * safely imported into Temporal workflows. The caller must ensure + * state.summary is set before calling (via computeSummary). + */ +export function toWorkflowSummary( + state: PipelineState, + status: 'completed' | 'failed' +): WorkflowSummary { + // state.summary must be computed before calling this mapper + const summary = state.summary; + if (!summary) { + throw new Error('toWorkflowSummary: state.summary must be set before calling'); + } + + return { + status, + totalDurationMs: summary.totalDurationMs, + totalCostUsd: summary.totalCostUsd, + completedAgents: state.completedAgents, + agentMetrics: Object.fromEntries( + Object.entries(state.agentMetrics).map(([name, m]) => [ + name, + { durationMs: m.durationMs, costUsd: m.costUsd }, + ]) + ), + ...(state.error && { error: state.error }), + }; +} diff --git a/src/temporal/workflows.ts b/src/temporal/workflows.ts index 3bc2804..c94b274 100644 --- a/src/temporal/workflows.ts +++ b/src/temporal/workflows.ts @@ -43,6 +43,7 @@ import { import type { VulnType } from '../queue-validation.js'; import type { AgentName } from '../types/agents.js'; import { ALL_AGENTS } from '../types/agents.js'; +import { toWorkflowSummary } from './summary-mapper.js'; // Retry configuration for production (long intervals for billing recovery) const PRODUCTION_RETRY = { @@ -417,18 +418,7 @@ export async function pentestPipelineWorkflow( state.summary = computeSummary(state); // Log workflow completion summary - await a.logWorkflowComplete(activityInput, { - status: 'completed', - totalDurationMs: state.summary.totalDurationMs, - totalCostUsd: state.summary.totalCostUsd, - completedAgents: state.completedAgents, - agentMetrics: Object.fromEntries( - Object.entries(state.agentMetrics).map(([name, m]) => [ - name, - { durationMs: m.durationMs, costUsd: m.costUsd }, - ]) - ), - }); + await a.logWorkflowComplete(activityInput, toWorkflowSummary(state, 'completed')); return state; } catch (error) { @@ -438,19 +428,7 @@ export async function pentestPipelineWorkflow( state.summary = computeSummary(state); // Log workflow failure summary - await a.logWorkflowComplete(activityInput, { - status: 'failed', - totalDurationMs: state.summary.totalDurationMs, - totalCostUsd: state.summary.totalCostUsd, - completedAgents: state.completedAgents, - agentMetrics: Object.fromEntries( - Object.entries(state.agentMetrics).map(([name, m]) => [ - name, - { durationMs: m.durationMs, costUsd: m.costUsd }, - ]) - ), - error: state.error ?? undefined, - }); + await a.logWorkflowComplete(activityInput, toWorkflowSummary(state, 'failed')); throw error; } diff --git a/src/types/agents.ts b/src/types/agents.ts index 041e0f3..c9fa9ad 100644 --- a/src/types/agents.ts +++ b/src/types/agents.ts @@ -34,21 +34,6 @@ export const ALL_AGENTS = [ */ export type AgentName = typeof ALL_AGENTS[number]; -export type PromptName = - | 'pre-recon-code' - | 'recon' - | 'vuln-injection' - | 'vuln-xss' - | 'vuln-auth' - | 'vuln-ssrf' - | 'vuln-authz' - | 'exploit-injection' - | 'exploit-xss' - | 'exploit-auth' - | 'exploit-ssrf' - | 'exploit-authz' - | 'report-executive'; - export type PlaywrightAgent = | 'playwright-agent1' | 'playwright-agent2' @@ -69,52 +54,6 @@ export interface AgentDefinition { name: AgentName; displayName: string; prerequisites: AgentName[]; -} - -/** - * Maps an agent name to its corresponding prompt file name. - */ -export function getPromptNameForAgent(agentName: AgentName): PromptName { - const mappings: Record = { - 'pre-recon': 'pre-recon-code', - 'recon': 'recon', - 'injection-vuln': 'vuln-injection', - 'xss-vuln': 'vuln-xss', - 'auth-vuln': 'vuln-auth', - 'ssrf-vuln': 'vuln-ssrf', - 'authz-vuln': 'vuln-authz', - 'injection-exploit': 'exploit-injection', - 'xss-exploit': 'exploit-xss', - 'auth-exploit': 'exploit-auth', - 'ssrf-exploit': 'exploit-ssrf', - 'authz-exploit': 'exploit-authz', - 'report': 'report-executive', - }; - - return mappings[agentName]; -} - -/** - * Maps an agent name to its deliverable file path. - * Must match mcp-server/src/types/deliverables.ts:DELIVERABLE_FILENAMES - */ -export function getDeliverablePath(agentName: AgentName, repoPath: string): string { - const deliverableMap: Record = { - 'pre-recon': 'code_analysis_deliverable.md', - 'recon': 'recon_deliverable.md', - 'injection-vuln': 'injection_analysis_deliverable.md', - 'xss-vuln': 'xss_analysis_deliverable.md', - 'auth-vuln': 'auth_analysis_deliverable.md', - 'ssrf-vuln': 'ssrf_analysis_deliverable.md', - 'authz-vuln': 'authz_analysis_deliverable.md', - 'injection-exploit': 'injection_exploitation_evidence.md', - 'xss-exploit': 'xss_exploitation_evidence.md', - 'auth-exploit': 'auth_exploitation_evidence.md', - 'ssrf-exploit': 'ssrf_exploitation_evidence.md', - 'authz-exploit': 'authz_exploitation_evidence.md', - 'report': 'comprehensive_security_assessment_report.md', - }; - - const filename = deliverableMap[agentName]; - return `${repoPath}/deliverables/${filename}`; + promptTemplate: string; + deliverableFilename: string; } diff --git a/src/types/config.ts b/src/types/config.ts index 0262a59..f076003 100644 --- a/src/types/config.ts +++ b/src/types/config.ts @@ -51,7 +51,6 @@ export interface Authentication { export interface Config { rules?: Rules; authentication?: Authentication; - login?: unknown; } export interface DistributedConfig { diff --git a/src/types/errors.ts b/src/types/errors.ts index 42bf091..f67594f 100644 --- a/src/types/errors.ts +++ b/src/types/errors.ts @@ -8,6 +8,39 @@ * Error type definitions */ +/** + * Specific error codes for reliable classification. + * + * ErrorCode provides precision within the coarse 8-category PentestErrorType. + * Used by classifyErrorForTemporal for code-based classification (preferred) + * with string matching as fallback for external errors. + */ +export enum ErrorCode { + // Config errors (PentestErrorType: 'config') + CONFIG_NOT_FOUND = 'CONFIG_NOT_FOUND', + CONFIG_VALIDATION_FAILED = 'CONFIG_VALIDATION_FAILED', + CONFIG_PARSE_ERROR = 'CONFIG_PARSE_ERROR', + + // Agent execution errors (PentestErrorType: 'validation') + AGENT_EXECUTION_FAILED = 'AGENT_EXECUTION_FAILED', + OUTPUT_VALIDATION_FAILED = 'OUTPUT_VALIDATION_FAILED', + + // Billing errors (PentestErrorType: 'billing') + API_RATE_LIMITED = 'API_RATE_LIMITED', + SPENDING_CAP_REACHED = 'SPENDING_CAP_REACHED', + INSUFFICIENT_CREDITS = 'INSUFFICIENT_CREDITS', + + // Git errors (PentestErrorType: 'filesystem') + GIT_CHECKPOINT_FAILED = 'GIT_CHECKPOINT_FAILED', + GIT_ROLLBACK_FAILED = 'GIT_ROLLBACK_FAILED', + + // Prompt errors (PentestErrorType: 'prompt') + PROMPT_LOAD_FAILED = 'PROMPT_LOAD_FAILED', + + // Validation errors (PentestErrorType: 'validation') + DELIVERABLE_NOT_FOUND = 'DELIVERABLE_NOT_FOUND', +} + export type PentestErrorType = | 'config' | 'network' diff --git a/src/types/index.ts b/src/types/index.ts index 8968dd3..2061444 100644 --- a/src/types/index.ts +++ b/src/types/index.ts @@ -12,3 +12,5 @@ export * from './errors.js'; export * from './config.js'; export * from './agents.js'; export * from './audit.js'; +export * from './result.js'; +export * from './metrics.js'; diff --git a/src/types/metrics.ts b/src/types/metrics.ts new file mode 100644 index 0000000..18422ff --- /dev/null +++ b/src/types/metrics.ts @@ -0,0 +1,19 @@ +// Copyright (C) 2025 Keygraph, Inc. +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License version 3 +// as published by the Free Software Foundation. + +/** + * Agent metrics types used across services and activities. + * Centralized here to avoid temporal/shared.ts import boundary violations. + */ + +export interface AgentMetrics { + durationMs: number; + inputTokens: number | null; + outputTokens: number | null; + costUsd: number | null; + numTurns: number | null; + model?: string | undefined; +} diff --git a/src/types/result.ts b/src/types/result.ts new file mode 100644 index 0000000..9f79b71 --- /dev/null +++ b/src/types/result.ts @@ -0,0 +1,62 @@ +// Copyright (C) 2025 Keygraph, Inc. +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License version 3 +// as published by the Free Software Foundation. + +/** + * Minimal Result type for explicit error handling. + * + * A discriminated union that makes error handling explicit without adding + * heavy machinery. Used in key modules (config loading, agent execution, + * queue validation) where callers need to make decisions based on error type. + */ + +/** + * Success variant of Result + */ +export interface Ok { + readonly ok: true; + readonly value: T; +} + +/** + * Error variant of Result + */ +export interface Err { + readonly ok: false; + readonly error: E; +} + +/** + * Result type - either Ok with a value or Err with an error + */ +export type Result = Ok | Err; + +/** + * Create a success Result + */ +export function ok(value: T): Ok { + return { ok: true, value }; +} + +/** + * Create an error Result + */ +export function err(error: E): Err { + return { ok: false, error }; +} + +/** + * Type guard for Ok variant + */ +export function isOk(result: Result): result is Ok { + return result.ok === true; +} + +/** + * Type guard for Err variant + */ +export function isErr(result: Result): result is Err { + return result.ok === false; +} diff --git a/src/utils/billing-detection.ts b/src/utils/billing-detection.ts new file mode 100644 index 0000000..a5258f9 --- /dev/null +++ b/src/utils/billing-detection.ts @@ -0,0 +1,95 @@ +// Copyright (C) 2025 Keygraph, Inc. +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License version 3 +// as published by the Free Software Foundation. + +/** + * Consolidated billing/spending cap detection utilities. + * + * Anthropic's spending cap behavior is inconsistent: + * - Sometimes a proper SDK error (billing_error) + * - Sometimes Claude responds with text about the cap + * - Sometimes partial billing before cutoff + * + * This module provides defense-in-depth detection with shared pattern lists + * to prevent drift between detection points. + */ + +/** + * Text patterns for SDK output sniffing (what Claude says). + * Used by message-handlers.ts and the behavioral heuristic. + */ +export const BILLING_TEXT_PATTERNS = [ + 'spending cap', + 'spending limit', + 'cap reached', + 'budget exceeded', + 'usage limit', + 'resets', +] as const; + +/** + * API patterns for error message classification (what the API returns). + * Used by classifyErrorForTemporal in error-handling.ts. + */ +export const BILLING_API_PATTERNS = [ + 'billing_error', + 'credit balance is too low', + 'insufficient credits', + 'usage is blocked due to insufficient credits', + 'please visit plans & billing', + 'please visit plans and billing', + 'usage limit reached', + 'quota exceeded', + 'daily rate limit', + 'limit will reset', + 'billing limit reached', +] as const; + +/** + * Checks if text matches any billing text pattern. + * Used for sniffing SDK output content for spending cap messages. + */ +export function matchesBillingTextPattern(text: string): boolean { + const lowerText = text.toLowerCase(); + return BILLING_TEXT_PATTERNS.some((pattern) => lowerText.includes(pattern)); +} + +/** + * Checks if an error message matches any billing API pattern. + * Used for classifying API error messages. + */ +export function matchesBillingApiPattern(message: string): boolean { + const lowerMessage = message.toLowerCase(); + return BILLING_API_PATTERNS.some((pattern) => lowerMessage.includes(pattern)); +} + +/** + * Behavioral heuristic for detecting spending cap. + * + * When Claude hits a spending cap, it often returns a short message + * with $0 cost. Legitimate agent work NEVER costs $0 with only 1-2 turns. + * + * This combines three signals: + * 1. Very low turn count (<=2) + * 2. Zero cost ($0) + * 3. Text matches billing patterns + * + * @param turns - Number of turns the agent took + * @param cost - Total cost in USD + * @param resultText - The result text from the agent + * @returns true if this looks like a spending cap hit + */ +export function isSpendingCapBehavior( + turns: number, + cost: number, + resultText: string +): boolean { + // Only check if turns <= 2 AND cost is exactly 0 + if (turns > 2 || cost !== 0) { + return false; + } + + return matchesBillingTextPattern(resultText); +} diff --git a/src/utils/git-manager.ts b/src/utils/git-manager.ts index 780bdcd..06a1557 100644 --- a/src/utils/git-manager.ts +++ b/src/utils/git-manager.ts @@ -6,6 +6,8 @@ import { $ } from 'zx'; import chalk from 'chalk'; +import { PentestError } from '../error-handling.js'; +import { ErrorCode } from '../types/errors.js'; /** * Check if a directory is a git repository. @@ -148,7 +150,13 @@ export async function executeGitCommandWithRetry( throw error; } } - throw new Error(`Git command failed after ${maxRetries} retries`); + throw new PentestError( + `Git command failed after ${maxRetries} retries`, + 'filesystem', + true, // Retryable - transient git lock issues + { maxRetries, description }, + ErrorCode.GIT_CHECKPOINT_FAILED + ); } finally { gitSemaphore.release(); } @@ -189,9 +197,18 @@ export async function rollbackGitWorkspace( ); return { success: true }; } catch (error) { - const result = toErrorResult(error); - console.log(chalk.red(` ❌ Rollback failed after retries: ${result.error?.message}`)); - return result; + const errMsg = error instanceof Error ? error.message : String(error); + console.log(chalk.red(` ❌ Rollback failed after retries: ${errMsg}`)); + return { + success: false, + error: new PentestError( + `Git rollback failed: ${errMsg}`, + 'filesystem', + false, // Non-retryable - rollback is best-effort cleanup + { sourceDir, reason }, + ErrorCode.GIT_ROLLBACK_FAILED + ), + }; } }