diff --git a/.claude/commands/debug.md b/.claude/commands/debug.md index 0f4de11..5dc2a8e 100644 --- a/.claude/commands/debug.md +++ b/.claude/commands/debug.md @@ -8,13 +8,14 @@ You are debugging an issue. Follow this structured approach to avoid spinning in - Read the full error message and stack trace - Identify the layer where the error originated: - **CLI/Args** - Input validation, path resolution - - **Config Parsing** - YAML parsing, JSON Schema validation - - **Session Management** - Mutex, session.json, lock files - - **Audit System** - Logging, metrics tracking, atomic writes - - **Claude SDK** - Agent execution, MCP servers, turn handling - - **Git Operations** - Checkpoints, rollback, commit - - **Tool Execution** - nmap, subfinder, whatweb - - **Validation** - Deliverable checks, queue validation + - **Config Parsing** - YAML parsing, JSON Schema validation (`src/config-parser.ts`) + - **Session Management** - Agent definitions (`src/session-manager.ts`), mutex (`src/utils/concurrency.ts`) + - **DI Container** - Container initialization/lookup (`src/services/container.ts`) + - **Services** - AgentExecutionService, ConfigLoaderService, ExploitationCheckerService, error-handling (`src/services/`) + - **Audit System** - Logging, metrics tracking, atomic writes (`src/audit/`) + - **Claude SDK** - Agent execution, MCP servers, turn handling (`src/ai/claude-executor.ts`) + - **Git Operations** - Checkpoints, rollback, commit (`src/services/git-manager.ts`) + - **Validation** - Deliverable checks, queue validation (`src/services/queue-validation.ts`) ## Step 2: Check Relevant Logs @@ -37,12 +38,14 @@ For Shannon, trace through these layers: 1. **Temporal Client** → `src/temporal/client.ts` - Workflow initiation 2. **Workflow** → `src/temporal/workflows.ts` - Pipeline orchestration -3. **Activities** → `src/temporal/activities.ts` - Agent execution with heartbeats -4. **Config** → `src/config-parser.ts` - YAML loading, schema validation -5. **Session** → `src/session-manager.ts` - Agent definitions, execution order -6. **Audit** → `src/audit/audit-session.ts` - Logging facade, metrics tracking -7. **Executor** → `src/ai/claude-executor.ts` - SDK calls, MCP setup, retry logic -8. **Validation** → `src/queue-validation.ts` - Deliverable checks +3. **Activities** → `src/temporal/activities.ts` - Thin wrappers: heartbeat, error classification +4. **Container** → `src/services/container.ts` - Per-workflow DI +5. **Services** → `src/services/agent-execution.ts` - Agent lifecycle +6. **Config** → `src/config-parser.ts` via `src/services/config-loader.ts` +7. **Prompts** → `src/services/prompt-manager.ts` +8. **Audit** → `src/audit/audit-session.ts` - Logging facade, metrics tracking +9. **Executor** → `src/ai/claude-executor.ts` - SDK calls, MCP setup, retry logic +10. **Validation** → `src/services/queue-validation.ts` - Deliverable checks ## Step 4: Identify Root Cause @@ -58,7 +61,10 @@ For Shannon, trace through these layers: | Cost/timing not tracked | Metrics not reloaded before update | Add `metricsTracker.reload()` before updates | | session.json corrupted | Partial write during crash | Delete and restart, or restore from backup | | YAML config rejected | Invalid schema or unsafe content | Run through AJV validator manually | -| Prompt variable not replaced | Missing `{{VARIABLE}}` in context | Check `prompt-manager.ts` interpolation | +| Prompt variable not replaced | Missing `{{VARIABLE}}` in context | Check `src/services/prompt-manager.ts` interpolation | +| Service returns Err result | Check `ErrorCode` in Result | Trace through `classifyErrorForTemporal()` in `src/services/error-handling.ts` | +| Container not found | `getOrCreateContainer()` not called | Check activity setup code in `src/temporal/activities.ts` | +| ActivityLogger undefined | `createActivityLogger()` not called | Must be called at top of each activity function | **MCP Server Issues:** ```bash @@ -123,6 +129,8 @@ shannon --pipeline-testing ## Quick Reference: Error Types +`ErrorCode` enum in `src/types/errors.ts` provides finer-grained classification used by `classifyErrorForTemporal()` in `src/services/error-handling.ts`. + | PentestError Type | Meaning | Retryable? | |-------------------|---------|------------| | `config` | Configuration file issues | No | diff --git a/.claude/commands/review.md b/.claude/commands/review.md index 31b60a4..d5b6b90 100644 --- a/.claude/commands/review.md +++ b/.claude/commands/review.md @@ -19,6 +19,8 @@ git diff HEAD - [ ] **Retryable flag matches behavior** - If error will be retried, set `retryable: true` - [ ] **Context includes debugging info** - Add relevant paths, tool names, error codes to context object - [ ] **Never swallow errors silently** - Always log or propagate errors +- [ ] **Use ErrorCode enum** - Prefer `ErrorCode.CONFIG_INVALID` over string matching for classification +- [ ] **Result for service returns** - Services return `Result`, not throw ### Audit System & Concurrency (CRITICAL) - [ ] **Mutex protection for parallel operations** - Use `sessionMutex.lock()` when updating `session.json` during parallel agent execution @@ -41,6 +43,13 @@ git diff HEAD - [ ] **Duplicate rule detection** - Same `type:url_path` cannot appear twice - [ ] **JSON Schema validation before use** - Config must pass AJV validation +### Services Layer & DI Container (CRITICAL) +- [ ] **Business logic in services, not activities** — Activities: heartbeat loop, error classification, container calls only. Domain logic → `src/services/` +- [ ] **Services accept ActivityLogger** — Never import `@temporalio/*` in services. Use `ActivityLogger` interface from `src/types/` +- [ ] **Result type for fallible operations** — Service methods return `Result`, unwrap with `isOk()`/`isErr()`. Activities call `executeOrThrow()` at the boundary +- [ ] **Container lifecycle** — `getOrCreateContainer()` at activity start, `removeContainer()` only in workflow cleanup +- [ ] **AuditSession not in container** — Must be passed per-agent call (parallel safety) + ### Session & Agent Management (CRITICAL) - [ ] **Deliverable dependencies respected** - Exploitation agents only run if vulnerability queue exists AND has items - [ ] **Queue validation before exploitation** - Use `safeValidateQueueAndDeliverable()` to check eligibility @@ -91,6 +100,8 @@ git diff HEAD - [ ] **Duplicate retry logic** - Don't implement retry at both caller and callee level - [ ] **Hardcoded error message matching** - Prefer error codes over regex on error.message - [ ] **Missing timeout on long operations** - Git operations and API calls should have timeouts +- [ ] **Console.log in services** — Use `ActivityLogger`. Only CLI display code (`client.ts`, `worker.ts`, `output-formatters.ts`) uses console.log +- [ ] **Temporal imports in services** — Services must stay Temporal-agnostic. If you need Temporal APIs, it belongs in activities ### Code Quality - [ ] **No dead code added** - Remove unused imports, functions, variables diff --git a/CLAUDE.md b/CLAUDE.md index 70555c0..13d6c82 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -41,18 +41,20 @@ npm run build ## Architecture ### Core Modules -- `src/session-manager.ts` — Agent definitions, execution order, parallel groups -- `src/ai/claude-executor.ts` — Claude Agent SDK integration with retry logic and git checkpoints +- `src/session-manager.ts` — Agent definitions (`AGENTS` record). Agent types in `src/types/agents.ts` - `src/config-parser.ts` — YAML config parsing with JSON Schema validation -- `src/error-handling.ts` — Categorized error types (PentestError, ConfigError, NetworkError) with retry logic -- `src/tool-checker.ts` — Validates external security tool availability before execution -- `src/queue-validation.ts` — Deliverable validation and agent prerequisites +- `src/ai/claude-executor.ts` — Claude Agent SDK integration with retry logic +- `src/services/` — Business logic layer (Temporal-agnostic). Activities delegate here. Key: `agent-execution.ts`, `error-handling.ts`, `container.ts` +- `src/types/` — Consolidated types: `Result`, `ErrorCode`, `AgentName`, `ActivityLogger`, etc. +- `src/utils/` — Shared utilities (file I/O, formatting, concurrency) ### Temporal Orchestration Durable workflow orchestration with crash recovery, queryable progress, intelligent retry, and parallel execution (5 concurrent agents in vuln/exploit phases). - `src/temporal/workflows.ts` — Main workflow (`pentestPipelineWorkflow`) -- `src/temporal/activities.ts` — Activity implementations with heartbeats +- `src/temporal/activities.ts` — Thin wrappers — heartbeat loop, error classification, container lifecycle. Business logic delegated to `src/services/` +- `src/temporal/activity-logger.ts` — `TemporalActivityLogger` implementation of `ActivityLogger` interface +- `src/temporal/summary-mapper.ts` — Maps `PipelineSummary` to `WorkflowSummary` - `src/temporal/worker.ts` — Worker entry point - `src/temporal/client.ts` — CLI client for starting workflows - `src/temporal/shared.ts` — Types, interfaces, query definitions @@ -66,30 +68,32 @@ Durable workflow orchestration with crash recovery, queryable progress, intellig ### Supporting Systems - **Configuration** — YAML configs in `configs/` with JSON Schema validation (`config-schema.json`). Supports auth settings, MFA/TOTP, and per-app testing parameters -- **Prompts** — Per-phase templates in `prompts/` with variable substitution (`{{TARGET_URL}}`, `{{CONFIG_CONTEXT}}`). Shared partials in `prompts/shared/` via `prompt-manager.ts` +- **Prompts** — Per-phase templates in `prompts/` with variable substitution (`{{TARGET_URL}}`, `{{CONFIG_CONTEXT}}`). Shared partials in `prompts/shared/` via `src/services/prompt-manager.ts` - **SDK Integration** — Uses `@anthropic-ai/claude-agent-sdk` with `maxTurns: 10_000` and `bypassPermissions` mode. Playwright MCP for browser automation, TOTP generation via MCP tool. Login flow template at `prompts/shared/login-instructions.txt` supports form, SSO, API, and basic auth -- **Audit System** — Crash-safe append-only logging in `audit-logs/{hostname}_{sessionId}/`. Tracks session metrics, per-agent logs, prompts, and deliverables +- **Audit System** — Crash-safe append-only logging in `audit-logs/{hostname}_{sessionId}/`. Tracks session metrics, per-agent logs, prompts, and deliverables. WorkflowLogger (`audit/workflow-logger.ts`) provides unified human-readable per-workflow logs, backed by LogStream (`audit/log-stream.ts`) shared stream primitive - **Deliverables** — Saved to `deliverables/` in the target repo via the `save_deliverable` MCP tool - **Workspaces & Resume** — Named workspaces via `WORKSPACE=` or auto-named from URL+timestamp. Resume passes `--workspace` to the Temporal client (`src/temporal/client.ts`), which loads `session.json` to detect completed agents. `loadResumeState()` in `src/temporal/activities.ts` validates deliverable existence, restores git checkpoints, and cleans up incomplete deliverables. Workspace listing via `src/temporal/workspaces.ts` ## Development Notes ### Adding a New Agent -1. Define agent in `src/session-manager.ts` (add to `AGENT_QUEUE` and parallel group) +1. Define agent in `src/session-manager.ts` (add to `AGENTS` record). `ALL_AGENTS`/`AgentName` types live in `src/types/agents.ts` 2. Create prompt template in `prompts/` (e.g., `vuln-newtype.txt`) -3. Add activity function in `src/temporal/activities.ts` +3. Two-layer pattern: add a thin activity wrapper in `src/temporal/activities.ts` (heartbeat + error classification). `AgentExecutionService` in `src/services/agent-execution.ts` handles the agent lifecycle automatically via the `AGENTS` registry 4. Register activity in `src/temporal/workflows.ts` within the appropriate phase ### Modifying Prompts - Variable substitution: `{{TARGET_URL}}`, `{{CONFIG_CONTEXT}}`, `{{LOGIN_INSTRUCTIONS}}` -- Shared partials in `prompts/shared/` included via `prompt-manager.ts` +- Shared partials in `prompts/shared/` included via `src/services/prompt-manager.ts` - Test with `PIPELINE_TESTING=true` for fast iteration ### Key Design Patterns - **Configuration-Driven** — YAML configs with JSON Schema validation - **Progressive Analysis** — Each phase builds on previous results - **SDK-First** — Claude Agent SDK handles autonomous analysis -- **Modular Error Handling** — Categorized errors with automatic retry (3 attempts per agent) +- **Modular Error Handling** — `ErrorCode` enum, `Result` for explicit error propagation, automatic retry (3 attempts per agent) +- **Services Boundary** — Activities are thin Temporal wrappers; `src/services/` owns business logic, accepts `ActivityLogger`, returns `Result`. No Temporal imports in services +- **DI Container** — Per-workflow in `src/services/container.ts`. `AuditSession` excluded (parallel safety) ### Security Defensive security tool only. Use only on systems you own or have explicit permission to test. @@ -111,18 +115,36 @@ Defensive security tool only. Use only on systems you own or have explicit permi - Use `function` keyword for top-level functions (not arrow functions) - Explicit return type annotations on exported/top-level functions - Prefer `readonly` for data that shouldn't be mutated +- `exactOptionalPropertyTypes` is enabled — use spread for optional props, not direct `undefined` assignment ### Avoid - Combining multiple concerns into a single function to "save lines" - Dense callback chains when sequential logic is clearer - Sacrificing readability for DRY — some repetition is fine if clearer - Abstractions for one-time operations +- Backwards-compatibility shims, deprecated wrappers, or re-exports for removed code — delete the old code, don't preserve it + +### Comments +Comments must be **timeless** — no references to this conversation, refactoring history, or the AI. + +**Patterns used in this codebase:** +- `/** JSDoc */` — file headers (after license) and exported functions/interfaces +- `// N. Description` — numbered sequential steps inside function bodies. Use when a + function has 3+ distinct phases where at least one isn't immediately obvious from the + code. Each step marks the start of a logical phase. Reference: `AgentExecutionService.execute` + (steps 1-9) and `injectModelIntoReport` (steps 1-5) +- `// === Section ===` — high-level dividers between groups of functions in long files, + or to label major branching/classification blocks (e.g., `// === SPENDING CAP SAFEGUARD ===`). + Not for sequential steps inside function bodies — use numbered steps for that +- `// NOTE:` / `// WARNING:` / `// IMPORTANT:` — gotchas and constraints + +**Never:** obvious comments, conversation references ("as discussed"), history ("moved from X") ## Key Files **Entry Points:** `src/temporal/workflows.ts`, `src/temporal/activities.ts`, `src/temporal/worker.ts`, `src/temporal/client.ts` -**Core Logic:** `src/session-manager.ts`, `src/ai/claude-executor.ts`, `src/config-parser.ts`, `src/audit/` +**Core Logic:** `src/session-manager.ts`, `src/ai/claude-executor.ts`, `src/config-parser.ts`, `src/services/`, `src/audit/` **Config:** `shannon` (CLI), `docker-compose.yml`, `configs/`, `prompts/` diff --git a/package-lock.json b/package-lock.json index b309592..9d2b980 100644 --- a/package-lock.json +++ b/package-lock.json @@ -21,7 +21,6 @@ "figlet": "^1.9.3", "gradient-string": "^3.0.0", "js-yaml": "^4.1.0", - "zod": "^4.3.6", "zx": "^8.0.0" }, "devDependencies": { diff --git a/package.json b/package.json index 78a50ee..8758b3f 100644 --- a/package.json +++ b/package.json @@ -23,7 +23,6 @@ "figlet": "^1.9.3", "gradient-string": "^3.0.0", "js-yaml": "^4.1.0", - "zod": "^4.3.6", "zx": "^8.0.0" }, "devDependencies": { diff --git a/src/ai/claude-executor.ts b/src/ai/claude-executor.ts index ceab2d6..3340b56 100644 --- a/src/ai/claude-executor.ts +++ b/src/ai/claude-executor.ts @@ -7,18 +7,16 @@ // Production Claude agent execution with retry, git checkpoints, and audit logging import { fs, path } from 'zx'; -import chalk, { type ChalkInstance } from 'chalk'; import { query } from '@anthropic-ai/claude-agent-sdk'; -import { isRetryableError, getRetryDelay, PentestError } from '../error-handling.js'; -import { timingResults, Timer } from '../utils/metrics.js'; +import { isRetryableError, PentestError } from '../services/error-handling.js'; +import { isSpendingCapBehavior } from '../utils/billing-detection.js'; +import { Timer } from '../utils/metrics.js'; import { formatTimestamp } from '../utils/formatting.js'; -import { createGitCheckpoint, commitGitSuccess, rollbackGitWorkspace, getGitCommitHash } from '../utils/git-manager.js'; -import { AGENT_VALIDATORS, MCP_AGENT_MAPPING } from '../constants.js'; +import { AGENT_VALIDATORS, MCP_AGENT_MAPPING } from '../session-manager.js'; import { AuditSession } from '../audit/index.js'; import { createShannonHelperServer } from '../../mcp-server/dist/index.js'; -import type { SessionMetadata } from '../audit/utils.js'; -import { getPromptNameForAgent } from '../types/agents.js'; +import { AGENTS } from '../session-manager.js'; import type { AgentName } from '../types/index.js'; import { dispatchMessage } from './message-handlers.js'; @@ -26,6 +24,7 @@ import { detectExecutionContext, formatErrorOutput, formatCompletionMessage } fr import { createProgressManager } from './progress-manager.js'; import { createAuditLogger } from './audit-logger.js'; import { getActualModelName } from './router-utils.js'; +import type { ActivityLogger } from '../types/activity-logger.js'; declare global { var SHANNON_DISABLE_LOADER: boolean | undefined; @@ -58,24 +57,27 @@ type McpServer = ReturnType | StdioMcpServer; // Configures MCP servers for agent execution, with Docker-specific Chromium handling function buildMcpServers( sourceDir: string, - agentName: string | null + agentName: string | null, + logger: ActivityLogger ): Record { + // 1. Create the shannon-helper server (always present) const shannonHelperServer = createShannonHelperServer(sourceDir); const mcpServers: Record = { 'shannon-helper': shannonHelperServer, }; + // 2. Look up the agent's Playwright MCP mapping if (agentName) { - const promptName = getPromptNameForAgent(agentName as AgentName); - const playwrightMcpName = MCP_AGENT_MAPPING[promptName as keyof typeof MCP_AGENT_MAPPING] || null; + const promptTemplate = AGENTS[agentName as AgentName].promptTemplate; + const playwrightMcpName = MCP_AGENT_MAPPING[promptTemplate as keyof typeof MCP_AGENT_MAPPING] || null; if (playwrightMcpName) { - console.log(chalk.gray(` Assigned ${agentName} -> ${playwrightMcpName}`)); + logger.info(`Assigned ${agentName} -> ${playwrightMcpName}`); const userDataDir = `/tmp/${playwrightMcpName}`; - // Docker uses system Chromium; local dev uses Playwright's bundled browsers + // 3. Configure Playwright MCP args with Docker/local browser handling const isDocker = process.env.SHANNON_DOCKER === 'true'; const mcpArgs: string[] = [ @@ -84,7 +86,6 @@ function buildMcpServers( '--user-data-dir', userDataDir, ]; - // Docker: Use system Chromium; Local: Use Playwright's bundled browsers if (isDocker) { mcpArgs.push('--executable-path', '/usr/bin/chromium-browser'); mcpArgs.push('--browser', 'chromium'); @@ -107,6 +108,7 @@ function buildMcpServers( } } + // 4. Return configured servers return mcpServers; } @@ -142,23 +144,23 @@ async function writeErrorLog( }; const logPath = path.join(sourceDir, 'error.log'); await fs.appendFile(logPath, JSON.stringify(errorLog) + '\n'); - } catch (logError) { - const logErrMsg = logError instanceof Error ? logError.message : String(logError); - console.log(chalk.gray(` (Failed to write error log: ${logErrMsg})`)); + } catch { + // Best-effort error log writing - don't propagate failures } } export async function validateAgentOutput( result: ClaudePromptResult, agentName: string | null, - sourceDir: string + sourceDir: string, + logger: ActivityLogger ): Promise { - console.log(chalk.blue(` Validating ${agentName} agent output`)); + logger.info(`Validating ${agentName} agent output`); try { // Check if agent completed successfully if (!result.success || !result.result) { - console.log(chalk.red(` Validation failed: Agent execution was unsuccessful`)); + logger.error('Validation failed: Agent execution was unsuccessful'); return false; } @@ -166,28 +168,27 @@ export async function validateAgentOutput( const validator = agentName ? AGENT_VALIDATORS[agentName as keyof typeof AGENT_VALIDATORS] : undefined; if (!validator) { - console.log(chalk.yellow(` No validator found for agent "${agentName}" - assuming success`)); - console.log(chalk.green(` Validation passed: Unknown agent with successful result`)); + logger.warn(`No validator found for agent "${agentName}" - assuming success`); + logger.info('Validation passed: Unknown agent with successful result'); return true; } - console.log(chalk.blue(` Using validator for agent: ${agentName}`)); - console.log(chalk.blue(` Source directory: ${sourceDir}`)); + logger.info(`Using validator for agent: ${agentName}`, { sourceDir }); // Apply validation function - const validationResult = await validator(sourceDir); + const validationResult = await validator(sourceDir, logger); if (validationResult) { - console.log(chalk.green(` Validation passed: Required files/structure present`)); + logger.info('Validation passed: Required files/structure present'); } else { - console.log(chalk.red(` Validation failed: Missing required deliverable files`)); + logger.error('Validation failed: Missing required deliverable files'); } return validationResult; } catch (error) { const errMsg = error instanceof Error ? error.message : String(error); - console.log(chalk.red(` Validation failed with error: ${errMsg}`)); + logger.error(`Validation failed with error: ${errMsg}`); return false; } } @@ -200,14 +201,14 @@ export async function runClaudePrompt( context: string = '', description: string = 'Claude analysis', agentName: string | null = null, - colorFn: ChalkInstance = chalk.cyan, - sessionMetadata: SessionMetadata | null = null, auditSession: AuditSession | null = null, - attemptNumber: number = 1 + logger: ActivityLogger ): Promise { + // 1. Initialize timing and prompt const timer = new Timer(`agent-${description.toLowerCase().replace(/\s+/g, '-')}`); const fullPrompt = context ? `${context}\n\n${prompt}` : prompt; + // 2. Set up progress and audit infrastructure const execContext = detectExecutionContext(description); const progress = createProgressManager( { description, useCleanOutput: execContext.useCleanOutput }, @@ -215,11 +216,12 @@ export async function runClaudePrompt( ); const auditLogger = createAuditLogger(auditSession); - console.log(chalk.blue(` Running Claude Code: ${description}...`)); + logger.info(`Running Claude Code: ${description}...`); - const mcpServers = buildMcpServers(sourceDir, agentName); + // 3. Configure MCP servers + const mcpServers = buildMcpServers(sourceDir, agentName, logger); - // Build env vars to pass to SDK subprocesses + // 4. Build env vars to pass to SDK subprocesses const sdkEnv: Record = { CLAUDE_CODE_MAX_OUTPUT_TOKENS: process.env.CLAUDE_CODE_MAX_OUTPUT_TOKENS || '64000', }; @@ -230,6 +232,7 @@ export async function runClaudePrompt( sdkEnv.CLAUDE_CODE_OAUTH_TOKEN = process.env.CLAUDE_CODE_OAUTH_TOKEN; } + // 5. Configure SDK options const options = { model: 'claude-sonnet-4-5-20250929', maxTurns: 10_000, @@ -241,7 +244,7 @@ export async function runClaudePrompt( }; if (!execContext.useCleanOutput) { - console.log(chalk.gray(` SDK Options: maxTurns=${options.maxTurns}, cwd=${sourceDir}, permissions=BYPASS`)); + logger.info(`SDK Options: maxTurns=${options.maxTurns}, cwd=${sourceDir}, permissions=BYPASS`); } let turnCount = 0; @@ -252,10 +255,11 @@ export async function runClaudePrompt( progress.start(); try { + // 6. Process the message stream const messageLoopResult = await processMessageStream( fullPrompt, options, - { execContext, description, colorFn, progress, auditLogger }, + { execContext, description, progress, auditLogger, logger }, timer ); @@ -266,30 +270,21 @@ export async function runClaudePrompt( const model = messageLoopResult.model; // === SPENDING CAP SAFEGUARD === - // Defense-in-depth: Detect spending cap that slipped through detectApiError(). - // When spending cap is hit, Claude returns a short message with $0 cost. - // Legitimate agent work NEVER costs $0 with only 1-2 turns. - if (turnCount <= 2 && totalCost === 0) { - const resultLower = (result || '').toLowerCase(); - const BILLING_KEYWORDS = ['spending', 'cap', 'limit', 'budget', 'resets']; - const looksLikeBillingError = BILLING_KEYWORDS.some((kw) => - resultLower.includes(kw) + // 7. Defense-in-depth: Detect spending cap that slipped through detectApiError(). + // Uses consolidated billing detection from utils/billing-detection.ts + if (isSpendingCapBehavior(turnCount, totalCost, result || '')) { + throw new PentestError( + `Spending cap likely reached (turns=${turnCount}, cost=$0): ${result?.slice(0, 100)}`, + 'billing', + true // Retryable - Temporal will use 5-30 min backoff ); - - if (looksLikeBillingError) { - throw new PentestError( - `Spending cap likely reached (turns=${turnCount}, cost=$0): ${result?.slice(0, 100)}`, - 'billing', - true // Retryable - Temporal will use 5-30 min backoff - ); - } } + // 8. Finalize successful result const duration = timer.stop(); - timingResults.agents[execContext.agentKey] = duration; if (apiErrorDetected) { - console.log(chalk.yellow(` API Error detected in ${description} - will validate deliverables before failing`)); + logger.warn(`API Error detected in ${description} - will validate deliverables before failing`); } progress.finish(formatCompletionMessage(execContext, description, turnCount, duration)); @@ -306,8 +301,8 @@ export async function runClaudePrompt( }; } catch (error) { + // 9. Handle errors — log, write error file, return failure const duration = timer.stop(); - timingResults.agents[execContext.agentKey] = duration; const err = error as Error & { code?: string; status?: number }; @@ -340,9 +335,9 @@ interface MessageLoopResult { interface MessageLoopDeps { execContext: ReturnType; description: string; - colorFn: ChalkInstance; progress: ReturnType; auditLogger: ReturnType; + logger: ActivityLogger; } async function processMessageStream( @@ -351,7 +346,7 @@ async function processMessageStream( deps: MessageLoopDeps, timer: Timer ): Promise { - const { execContext, description, colorFn, progress, auditLogger } = deps; + const { execContext, description, progress, auditLogger, logger } = deps; const HEARTBEAT_INTERVAL = 30000; let turnCount = 0; @@ -365,7 +360,7 @@ async function processMessageStream( // Heartbeat logging when loader is disabled const now = Date.now(); if (global.SHANNON_DISABLE_LOADER && now - lastHeartbeat > HEARTBEAT_INTERVAL) { - console.log(chalk.blue(` [${Math.floor((now - timer.startTime) / 1000)}s] ${description} running... (Turn ${turnCount})`)); + logger.info(`[${Math.floor((now - timer.startTime) / 1000)}s] ${description} running... (Turn ${turnCount})`); lastHeartbeat = now; } @@ -377,7 +372,7 @@ async function processMessageStream( const dispatchResult = await dispatchMessage( message as { type: string; subtype?: string }, turnCount, - { execContext, description, colorFn, progress, auditLogger } + { execContext, description, progress, auditLogger, logger } ); if (dispatchResult.type === 'throw') { @@ -403,153 +398,3 @@ async function processMessageStream( return { turnCount, result, apiErrorDetected, cost, model }; } - -// Main entry point for agent execution. Handles retries, git checkpoints, and validation. -export async function runClaudePromptWithRetry( - prompt: string, - sourceDir: string, - _allowedTools: string = 'Read', - context: string = '', - description: string = 'Claude analysis', - agentName: string | null = null, - colorFn: ChalkInstance = chalk.cyan, - sessionMetadata: SessionMetadata | null = null -): Promise { - const maxRetries = 3; - let lastError: Error | undefined; - let retryContext = context; - - console.log(chalk.cyan(`Starting ${description} with ${maxRetries} max attempts`)); - - let auditSession: AuditSession | null = null; - if (sessionMetadata && agentName) { - auditSession = new AuditSession(sessionMetadata); - await auditSession.initialize(); - } - - for (let attempt = 1; attempt <= maxRetries; attempt++) { - await createGitCheckpoint(sourceDir, description, attempt); - - if (auditSession && agentName) { - const fullPrompt = retryContext ? `${retryContext}\n\n${prompt}` : prompt; - await auditSession.startAgent(agentName, fullPrompt, attempt); - } - - try { - const result = await runClaudePrompt( - prompt, sourceDir, retryContext, - description, agentName, colorFn, sessionMetadata, auditSession, attempt - ); - - if (result.success) { - const validationPassed = await validateAgentOutput(result, agentName, sourceDir); - - if (validationPassed) { - if (result.apiErrorDetected) { - console.log(chalk.yellow(`Validation: Ready for exploitation despite API error warnings`)); - } - - if (auditSession && agentName) { - const commitHash = await getGitCommitHash(sourceDir); - const endResult: { - attemptNumber: number; - duration_ms: number; - cost_usd: number; - success: true; - checkpoint?: string; - } = { - attemptNumber: attempt, - duration_ms: result.duration, - cost_usd: result.cost || 0, - success: true, - }; - if (commitHash) { - endResult.checkpoint = commitHash; - } - await auditSession.endAgent(agentName, endResult); - } - - await commitGitSuccess(sourceDir, description); - console.log(chalk.green.bold(`${description} completed successfully on attempt ${attempt}/${maxRetries}`)); - return result; - // Validation failure is retryable - agent might succeed on retry with cleaner workspace - } else { - console.log(chalk.yellow(`${description} completed but output validation failed`)); - - if (auditSession && agentName) { - await auditSession.endAgent(agentName, { - attemptNumber: attempt, - duration_ms: result.duration, - cost_usd: result.partialCost || result.cost || 0, - success: false, - error: 'Output validation failed', - isFinalAttempt: attempt === maxRetries - }); - } - - if (result.apiErrorDetected) { - console.log(chalk.yellow(`API Error detected with validation failure - treating as retryable`)); - lastError = new Error('API Error: terminated with validation failure'); - } else { - lastError = new Error('Output validation failed'); - } - - if (attempt < maxRetries) { - await rollbackGitWorkspace(sourceDir, 'validation failure'); - continue; - } else { - throw new PentestError( - `Agent ${description} failed output validation after ${maxRetries} attempts. Required deliverable files were not created.`, - 'validation', - false, - { description, sourceDir, attemptsExhausted: maxRetries } - ); - } - } - } - - } catch (error) { - const err = error as Error & { duration?: number; cost?: number; partialResults?: unknown }; - lastError = err; - - if (auditSession && agentName) { - await auditSession.endAgent(agentName, { - attemptNumber: attempt, - duration_ms: err.duration || 0, - cost_usd: err.cost || 0, - success: false, - error: err.message, - isFinalAttempt: attempt === maxRetries - }); - } - - if (!isRetryableError(err)) { - console.log(chalk.red(`${description} failed with non-retryable error: ${err.message}`)); - await rollbackGitWorkspace(sourceDir, 'non-retryable error cleanup'); - throw err; - } - - if (attempt < maxRetries) { - await rollbackGitWorkspace(sourceDir, 'retryable error cleanup'); - - const delay = getRetryDelay(err, attempt); - const delaySeconds = (delay / 1000).toFixed(1); - console.log(chalk.yellow(`${description} failed (attempt ${attempt}/${maxRetries})`)); - console.log(chalk.gray(` Error: ${err.message}`)); - console.log(chalk.gray(` Workspace rolled back, retrying in ${delaySeconds}s...`)); - - if (err.partialResults) { - retryContext = `${context}\n\nPrevious partial results: ${JSON.stringify(err.partialResults)}`; - } - - await new Promise(resolve => setTimeout(resolve, delay)); - } else { - await rollbackGitWorkspace(sourceDir, 'final failure cleanup'); - console.log(chalk.red(`${description} failed after ${maxRetries} attempts`)); - console.log(chalk.red(` Final error: ${err.message}`)); - } - } - } - - throw lastError; -} diff --git a/src/ai/message-handlers.ts b/src/ai/message-handlers.ts index 57cdd4e..e60ddb5 100644 --- a/src/ai/message-handlers.ts +++ b/src/ai/message-handlers.ts @@ -4,20 +4,19 @@ // it under the terms of the GNU Affero General Public License version 3 // as published by the Free Software Foundation. -// Pure functions for processing SDK message types - -import { PentestError } from '../error-handling.js'; -import { filterJsonToolCalls } from '../utils/output-formatter.js'; +import { PentestError } from '../services/error-handling.js'; +import { ErrorCode } from '../types/errors.js'; +import { matchesBillingTextPattern } from '../utils/billing-detection.js'; +import { filterJsonToolCalls } from './output-formatters.js'; import { formatTimestamp } from '../utils/formatting.js'; -import chalk from 'chalk'; import { getActualModelName } from './router-utils.js'; +import type { ActivityLogger } from '../types/activity-logger.js'; import { formatAssistantOutput, formatResultOutput, formatToolUseOutput, formatToolResultOutput, } from './output-formatters.js'; -import { costResults } from '../utils/metrics.js'; import type { AuditLogger } from './audit-logger.js'; import type { ProgressManager } from './progress-manager.js'; import type { @@ -35,10 +34,9 @@ import type { SystemInitMessage, ExecutionContext, } from './types.js'; -import type { ChalkInstance } from 'chalk'; // Handles both array and string content formats from SDK -export function extractMessageContent(message: AssistantMessage): string { +function extractMessageContent(message: AssistantMessage): string { const messageContent = message.message; if (Array.isArray(messageContent.content)) { @@ -51,7 +49,7 @@ export function extractMessageContent(message: AssistantMessage): string { } // Extracts only text content (no tool_use JSON) to avoid false positives in error detection -export function extractTextOnlyContent(message: AssistantMessage): string { +function extractTextOnlyContent(message: AssistantMessage): string { const messageContent = message.message; if (Array.isArray(messageContent.content)) { @@ -64,7 +62,7 @@ export function extractTextOnlyContent(message: AssistantMessage): string { return String(messageContent.content); } -export function detectApiError(content: string): ApiErrorDetection { +function detectApiError(content: string): ApiErrorDetection { if (!content || typeof content !== 'string') { return { detected: false }; } @@ -75,25 +73,15 @@ export function detectApiError(content: string): ApiErrorDetection { // When Claude Code hits its spending cap, it returns a short message like // "Spending cap reached resets 8am" instead of throwing an error. // These should retry with 5-30 min backoff so workflows can recover when cap resets. - const BILLING_PATTERNS = [ - 'spending cap', - 'spending limit', - 'cap reached', - 'budget exceeded', - 'usage limit', - ]; - - const isBillingError = BILLING_PATTERNS.some((pattern) => - lowerContent.includes(pattern) - ); - - if (isBillingError) { + if (matchesBillingTextPattern(content)) { return { detected: true, shouldThrow: new PentestError( `Billing limit reached: ${content.slice(0, 100)}`, 'billing', - true // RETRYABLE - Temporal will use 5-30 min backoff + true, // RETRYABLE - Temporal will use 5-30 min backoff + {}, + ErrorCode.SPENDING_CAP_REACHED ), }; } @@ -127,7 +115,9 @@ function handleStructuredError( shouldThrow: new PentestError( `Billing error (structured): ${content.slice(0, 100)}`, 'billing', - true // Retryable with backoff + true, // Retryable with backoff + {}, + ErrorCode.INSUFFICIENT_CREDITS ), }; case 'rate_limit': @@ -136,7 +126,9 @@ function handleStructuredError( shouldThrow: new PentestError( `Rate limit hit (structured): ${content.slice(0, 100)}`, 'network', - true // Retryable with backoff + true, // Retryable with backoff + {}, + ErrorCode.API_RATE_LIMITED ), }; case 'authentication_failed': @@ -181,7 +173,7 @@ function handleStructuredError( } } -export function handleAssistantMessage( +function handleAssistantMessage( message: AssistantMessage, turnCount: number ): AssistantResult { @@ -219,7 +211,7 @@ export function handleAssistantMessage( } // Final message of a query with cost/duration info -export function handleResultMessage(message: ResultMessage): ResultData { +function handleResultMessage(message: ResultMessage): ResultData { const result: ResultData = { result: message.result || null, cost: message.total_cost_usd || 0, @@ -236,14 +228,14 @@ export function handleResultMessage(message: ResultMessage): ResultData { if (message.stop_reason !== undefined) { result.stop_reason = message.stop_reason; if (message.stop_reason && message.stop_reason !== 'end_turn') { - console.log(chalk.yellow(` Stop reason: ${message.stop_reason}`)); + console.log(` Stop reason: ${message.stop_reason}`); } } return result; } -export function handleToolUseMessage(message: ToolUseMessage): ToolUseData { +function handleToolUseMessage(message: ToolUseMessage): ToolUseData { return { toolName: message.name, parameters: message.input || {}, @@ -252,7 +244,7 @@ export function handleToolUseMessage(message: ToolUseMessage): ToolUseData { } // Truncates long results for display (500 char limit), preserves full content for logging -export function handleToolResultMessage(message: ToolResultMessage): ToolResultData { +function handleToolResultMessage(message: ToolResultMessage): ToolResultData { const content = message.content; const contentStr = typeof content === 'string' ? content : JSON.stringify(content, null, 2); @@ -269,14 +261,12 @@ export function handleToolResultMessage(message: ToolResultMessage): ToolResultD }; } -// Output helper for console logging function outputLines(lines: string[]): void { for (const line of lines) { console.log(line); } } -// Message dispatch result types export type MessageDispatchAction = | { type: 'continue'; apiErrorDetected?: boolean | undefined; model?: string | undefined } | { type: 'complete'; result: string | null; cost: number } @@ -285,9 +275,9 @@ export type MessageDispatchAction = export interface MessageDispatchDeps { execContext: ExecutionContext; description: string; - colorFn: ChalkInstance; progress: ProgressManager; auditLogger: AuditLogger; + logger: ActivityLogger; } // Dispatches SDK messages to appropriate handlers and formatters @@ -296,7 +286,7 @@ export async function dispatchMessage( turnCount: number, deps: MessageDispatchDeps ): Promise { - const { execContext, description, colorFn, progress, auditLogger } = deps; + const { execContext, description, progress, auditLogger, logger } = deps; switch (message.type) { case 'assistant': { @@ -312,8 +302,7 @@ export async function dispatchMessage( assistantResult.cleanedContent, execContext, turnCount, - description, - colorFn + description )); progress.start(); } @@ -321,7 +310,7 @@ export async function dispatchMessage( await auditLogger.logLlmResponse(turnCount, assistantResult.content); if (assistantResult.apiErrorDetected) { - console.log(chalk.red(` API Error detected in assistant response`)); + logger.warn('API Error detected in assistant response'); return { type: 'continue', apiErrorDetected: true }; } @@ -333,10 +322,10 @@ export async function dispatchMessage( const initMsg = message as SystemInitMessage; const actualModel = getActualModelName(initMsg.model); if (!execContext.useCleanOutput) { - console.log(chalk.blue(` Model: ${actualModel}, Permission: ${initMsg.permissionMode}`)); + logger.info(`Model: ${actualModel}, Permission: ${initMsg.permissionMode}`); if (initMsg.mcp_servers && initMsg.mcp_servers.length > 0) { const mcpStatus = initMsg.mcp_servers.map(s => `${s.name}(${s.status})`).join(', '); - console.log(chalk.blue(` MCP: ${mcpStatus}`)); + logger.info(`MCP: ${mcpStatus}`); } } // Return actual model for tracking in audit logs @@ -368,13 +357,11 @@ export async function dispatchMessage( case 'result': { const resultData = handleResultMessage(message as ResultMessage); outputLines(formatResultOutput(resultData, !execContext.useCleanOutput)); - costResults.agents[execContext.agentKey] = resultData.cost; - costResults.total += resultData.cost; return { type: 'complete', result: resultData.result, cost: resultData.cost }; } default: - console.log(chalk.gray(` ${message.type}: ${JSON.stringify(message, null, 2)}`)); + logger.info(`Unhandled message type: ${message.type}`); return { type: 'continue' }; } } diff --git a/src/ai/output-formatters.ts b/src/ai/output-formatters.ts index 833c71c..34735f2 100644 --- a/src/ai/output-formatters.ts +++ b/src/ai/output-formatters.ts @@ -4,13 +4,267 @@ // it under the terms of the GNU Affero General Public License version 3 // as published by the Free Software Foundation. -// Pure functions for formatting console output - -import chalk from 'chalk'; import { extractAgentType, formatDuration } from '../utils/formatting.js'; -import { getAgentPrefix } from '../utils/output-formatter.js'; +import { AGENTS } from '../session-manager.js'; import type { ExecutionContext, ResultData } from './types.js'; +interface ToolCallInput { + url?: string; + element?: string; + key?: string; + fields?: unknown[]; + text?: string; + action?: string; + description?: string; + todos?: Array<{ + status: string; + content: string; + }>; + [key: string]: unknown; +} + +interface ToolCall { + name: string; + input?: ToolCallInput; +} + +/** + * Get agent prefix for parallel execution + */ +export function getAgentPrefix(description: string): string { + // Map agent names to their prefixes + const agentPrefixes: Record = { + 'injection-vuln': '[Injection]', + 'xss-vuln': '[XSS]', + 'auth-vuln': '[Auth]', + 'authz-vuln': '[Authz]', + 'ssrf-vuln': '[SSRF]', + 'injection-exploit': '[Injection]', + 'xss-exploit': '[XSS]', + 'auth-exploit': '[Auth]', + 'authz-exploit': '[Authz]', + 'ssrf-exploit': '[SSRF]', + }; + + // First try to match by agent name directly + for (const [agentName, prefix] of Object.entries(agentPrefixes)) { + const agent = AGENTS[agentName as keyof typeof AGENTS]; + if (agent && description.includes(agent.displayName)) { + return prefix; + } + } + + // Fallback to partial matches for backwards compatibility + if (description.includes('injection')) return '[Injection]'; + if (description.includes('xss')) return '[XSS]'; + if (description.includes('authz')) return '[Authz]'; // Check authz before auth + if (description.includes('auth')) return '[Auth]'; + if (description.includes('ssrf')) return '[SSRF]'; + + return '[Agent]'; +} + +/** + * Extract domain from URL for display + */ +function extractDomain(url: string): string { + try { + const urlObj = new URL(url); + return urlObj.hostname || url.slice(0, 30); + } catch { + return url.slice(0, 30); + } +} + +/** + * Summarize TodoWrite updates into clean progress indicators + */ +function summarizeTodoUpdate(input: ToolCallInput | undefined): string | null { + if (!input?.todos || !Array.isArray(input.todos)) { + return null; + } + + const todos = input.todos; + const completed = todos.filter((t) => t.status === 'completed'); + const inProgress = todos.filter((t) => t.status === 'in_progress'); + + // Show recently completed tasks + if (completed.length > 0) { + const recent = completed[completed.length - 1]!; + return `✅ ${recent.content}`; + } + + // Show current in-progress task + if (inProgress.length > 0) { + const current = inProgress[0]!; + return `🔄 ${current.content}`; + } + + return null; +} + +/** + * Format browser tool calls into clean progress indicators + */ +function formatBrowserAction(toolCall: ToolCall): string { + const toolName = toolCall.name; + const input = toolCall.input || {}; + + // Core Browser Operations + if (toolName === 'mcp__playwright__browser_navigate') { + const url = input.url || ''; + const domain = extractDomain(url); + return `🌐 Navigating to ${domain}`; + } + + if (toolName === 'mcp__playwright__browser_navigate_back') { + return `⬅️ Going back`; + } + + // Page Interaction + if (toolName === 'mcp__playwright__browser_click') { + const element = input.element || 'element'; + return `🖱️ Clicking ${element.slice(0, 25)}`; + } + + if (toolName === 'mcp__playwright__browser_hover') { + const element = input.element || 'element'; + return `👆 Hovering over ${element.slice(0, 20)}`; + } + + if (toolName === 'mcp__playwright__browser_type') { + const element = input.element || 'field'; + return `⌨️ Typing in ${element.slice(0, 20)}`; + } + + if (toolName === 'mcp__playwright__browser_press_key') { + const key = input.key || 'key'; + return `⌨️ Pressing ${key}`; + } + + // Form Handling + if (toolName === 'mcp__playwright__browser_fill_form') { + const fieldCount = input.fields?.length || 0; + return `📝 Filling ${fieldCount} form fields`; + } + + if (toolName === 'mcp__playwright__browser_select_option') { + return `📋 Selecting dropdown option`; + } + + if (toolName === 'mcp__playwright__browser_file_upload') { + return `📁 Uploading file`; + } + + // Page Analysis + if (toolName === 'mcp__playwright__browser_snapshot') { + return `📸 Taking page snapshot`; + } + + if (toolName === 'mcp__playwright__browser_take_screenshot') { + return `📸 Taking screenshot`; + } + + if (toolName === 'mcp__playwright__browser_evaluate') { + return `🔍 Running JavaScript analysis`; + } + + // Waiting & Monitoring + if (toolName === 'mcp__playwright__browser_wait_for') { + if (input.text) { + return `⏳ Waiting for "${input.text.slice(0, 20)}"`; + } + return `⏳ Waiting for page response`; + } + + if (toolName === 'mcp__playwright__browser_console_messages') { + return `📜 Checking console logs`; + } + + if (toolName === 'mcp__playwright__browser_network_requests') { + return `🌐 Analyzing network traffic`; + } + + // Tab Management + if (toolName === 'mcp__playwright__browser_tabs') { + const action = input.action || 'managing'; + return `🗂️ ${action} browser tab`; + } + + // Dialog Handling + if (toolName === 'mcp__playwright__browser_handle_dialog') { + return `💬 Handling browser dialog`; + } + + // Fallback for any missed tools + const actionType = toolName.split('_').pop(); + return `🌐 Browser: ${actionType}`; +} + +/** + * Filter out JSON tool calls from content, with special handling for Task calls + */ +export function filterJsonToolCalls(content: string | null | undefined): string { + if (!content || typeof content !== 'string') { + return content || ''; + } + + const lines = content.split('\n'); + const processedLines: string[] = []; + + for (const line of lines) { + const trimmed = line.trim(); + + // Skip empty lines + if (trimmed === '') { + continue; + } + + // Check if this is a JSON tool call + if (trimmed.startsWith('{"type":"tool_use"')) { + try { + const toolCall = JSON.parse(trimmed) as ToolCall; + + // Special handling for Task tool calls + if (toolCall.name === 'Task') { + const description = toolCall.input?.description || 'analysis agent'; + processedLines.push(`🚀 Launching ${description}`); + continue; + } + + // Special handling for TodoWrite tool calls + if (toolCall.name === 'TodoWrite') { + const summary = summarizeTodoUpdate(toolCall.input); + if (summary) { + processedLines.push(summary); + } + continue; + } + + // Special handling for browser tool calls + if (toolCall.name.startsWith('mcp__playwright__browser_')) { + const browserAction = formatBrowserAction(toolCall); + if (browserAction) { + processedLines.push(browserAction); + } + continue; + } + + // Hide all other tool calls (Read, Write, Grep, etc.) + continue; + } catch { + // If JSON parsing fails, treat as regular text + processedLines.push(line); + } + } else { + // Keep non-JSON lines (assistant text) + processedLines.push(line); + } + } + + return processedLines.join('\n'); +} + export function detectExecutionContext(description: string): ExecutionContext { const isParallelExecution = description.includes('vuln agent') || description.includes('exploit agent'); @@ -33,8 +287,7 @@ export function formatAssistantOutput( cleanedContent: string, context: ExecutionContext, turnCount: number, - description: string, - colorFn: typeof chalk.cyan = chalk.cyan + description: string ): string[] { if (!cleanedContent.trim()) { return []; @@ -45,11 +298,11 @@ export function formatAssistantOutput( if (context.isParallelExecution) { // Compact output for parallel agents with prefixes const prefix = getAgentPrefix(description); - lines.push(colorFn(`${prefix} ${cleanedContent}`)); + lines.push(`${prefix} ${cleanedContent}`); } else { // Full turn output for sequential agents - lines.push(colorFn(`\n Turn ${turnCount} (${description}):`)); - lines.push(colorFn(` ${cleanedContent}`)); + lines.push(`\n Turn ${turnCount} (${description}):`); + lines.push(` ${cleanedContent}`); } return lines; @@ -58,28 +311,24 @@ export function formatAssistantOutput( export function formatResultOutput(data: ResultData, showFullResult: boolean): string[] { const lines: string[] = []; - lines.push(chalk.magenta(`\n COMPLETED:`)); - lines.push( - chalk.gray( - ` Duration: ${(data.duration_ms / 1000).toFixed(1)}s, Cost: $${data.cost.toFixed(4)}` - ) - ); + lines.push(`\n COMPLETED:`); + lines.push(` Duration: ${(data.duration_ms / 1000).toFixed(1)}s, Cost: $${data.cost.toFixed(4)}`); if (data.subtype === 'error_max_turns') { - lines.push(chalk.red(` Stopped: Hit maximum turns limit`)); + lines.push(` Stopped: Hit maximum turns limit`); } else if (data.subtype === 'error_during_execution') { - lines.push(chalk.red(` Stopped: Execution error`)); + lines.push(` Stopped: Execution error`); } if (data.permissionDenials > 0) { - lines.push(chalk.yellow(` ${data.permissionDenials} permission denials`)); + lines.push(` ${data.permissionDenials} permission denials`); } if (showFullResult && data.result && typeof data.result === 'string') { if (data.result.length > 1000) { - lines.push(chalk.magenta(` ${data.result.slice(0, 1000)}... [${data.result.length} total chars]`)); + lines.push(` ${data.result.slice(0, 1000)}... [${data.result.length} total chars]`); } else { - lines.push(chalk.magenta(` ${data.result}`)); + lines.push(` ${data.result}`); } } @@ -98,24 +347,24 @@ export function formatErrorOutput( if (context.isParallelExecution) { const prefix = getAgentPrefix(description); - lines.push(chalk.red(`${prefix} Failed (${formatDuration(duration)})`)); + lines.push(`${prefix} Failed (${formatDuration(duration)})`); } else if (context.useCleanOutput) { - lines.push(chalk.red(`${context.agentType} failed (${formatDuration(duration)})`)); + lines.push(`${context.agentType} failed (${formatDuration(duration)})`); } else { - lines.push(chalk.red(` Claude Code failed: ${description} (${formatDuration(duration)})`)); + lines.push(` Claude Code failed: ${description} (${formatDuration(duration)})`); } - lines.push(chalk.red(` Error Type: ${error.constructor.name}`)); - lines.push(chalk.red(` Message: ${error.message}`)); - lines.push(chalk.gray(` Agent: ${description}`)); - lines.push(chalk.gray(` Working Directory: ${sourceDir}`)); - lines.push(chalk.gray(` Retryable: ${isRetryable ? 'Yes' : 'No'}`)); + lines.push(` Error Type: ${error.constructor.name}`); + lines.push(` Message: ${error.message}`); + lines.push(` Agent: ${description}`); + lines.push(` Working Directory: ${sourceDir}`); + lines.push(` Retryable: ${isRetryable ? 'Yes' : 'No'}`); if (error.code) { - lines.push(chalk.gray(` Error Code: ${error.code}`)); + lines.push(` Error Code: ${error.code}`); } if (error.status) { - lines.push(chalk.gray(` HTTP Status: ${error.status}`)); + lines.push(` HTTP Status: ${error.status}`); } return lines; @@ -129,18 +378,14 @@ export function formatCompletionMessage( ): string { if (context.isParallelExecution) { const prefix = getAgentPrefix(description); - return chalk.green(`${prefix} Complete (${turnCount} turns, ${formatDuration(duration)})`); + return `${prefix} Complete (${turnCount} turns, ${formatDuration(duration)})`; } if (context.useCleanOutput) { - return chalk.green( - `${context.agentType.charAt(0).toUpperCase() + context.agentType.slice(1)} complete! (${turnCount} turns, ${formatDuration(duration)})` - ); + return `${context.agentType.charAt(0).toUpperCase() + context.agentType.slice(1)} complete! (${turnCount} turns, ${formatDuration(duration)})`; } - return chalk.green( - ` Claude Code completed: ${description} (${turnCount} turns) in ${formatDuration(duration)}` - ); + return ` Claude Code completed: ${description} (${turnCount} turns) in ${formatDuration(duration)}`; } export function formatToolUseOutput( @@ -149,9 +394,9 @@ export function formatToolUseOutput( ): string[] { const lines: string[] = []; - lines.push(chalk.yellow(`\n Using Tool: ${toolName}`)); + lines.push(`\n Using Tool: ${toolName}`); if (input && Object.keys(input).length > 0) { - lines.push(chalk.gray(` Input: ${JSON.stringify(input, null, 2)}`)); + lines.push(` Input: ${JSON.stringify(input, null, 2)}`); } return lines; @@ -160,9 +405,9 @@ export function formatToolUseOutput( export function formatToolResultOutput(displayContent: string): string[] { const lines: string[] = []; - lines.push(chalk.green(` Tool Result:`)); + lines.push(` Tool Result:`); if (displayContent) { - lines.push(chalk.gray(` ${displayContent}`)); + lines.push(` ${displayContent}`); } return lines; diff --git a/src/ai/router-utils.ts b/src/ai/router-utils.ts index a2dbd06..df3d14b 100644 --- a/src/ai/router-utils.ts +++ b/src/ai/router-utils.ts @@ -26,9 +26,3 @@ export function getActualModelName(sdkReportedModel?: string): string | undefine return sdkReportedModel; } -/** - * Check if router mode is active. - */ -export function isRouterMode(): boolean { - return !!process.env.ANTHROPIC_BASE_URL && !!process.env.ROUTER_DEFAULT; -} diff --git a/src/ai/types.ts b/src/ai/types.ts index f742dd3..af1a6f6 100644 --- a/src/ai/types.ts +++ b/src/ai/types.ts @@ -13,22 +13,6 @@ export interface ExecutionContext { agentKey: string; } -export interface ProcessingState { - turnCount: number; - result: string | null; - apiErrorDetected: boolean; - totalCost: number; - partialCost: number; - lastHeartbeat: number; -} - -export interface ProcessingResult { - result: string | null; - turnCount: number; - apiErrorDetected: boolean; - totalCost: number; -} - export interface AssistantResult { content: string; cleanedContent: string; @@ -110,15 +94,6 @@ export interface ApiErrorDetection { shouldThrow?: Error; } -// Message types from SDK stream -export type SdkMessage = - | AssistantMessage - | ResultMessage - | ToolUseMessage - | ToolResultMessage - | SystemInitMessage - | UserMessage; - export interface SystemInitMessage { type: 'system'; subtype: 'init'; @@ -131,16 +106,3 @@ export interface UserMessage { type: 'user'; } -// Dispatch result types for message processing -export type MessageDispatchResult = - | { action: 'continue' } - | { action: 'break'; result: string | null; cost: number } - | { action: 'throw'; error: Error }; - -export interface MessageDispatchContext { - turnCount: number; - execContext: ExecutionContext; - description: string; - colorFn: (text: string) => string; - useCleanOutput: boolean; -} diff --git a/src/audit/audit-session.ts b/src/audit/audit-session.ts index 6e09133..0a63d5e 100644 --- a/src/audit/audit-session.ts +++ b/src/audit/audit-session.ts @@ -17,21 +17,13 @@ import { MetricsTracker } from './metrics-tracker.js'; import { initializeAuditStructure, type SessionMetadata } from './utils.js'; import { formatTimestamp } from '../utils/formatting.js'; import { SessionMutex } from '../utils/concurrency.js'; +import type { AgentEndResult } from '../types/index.js'; +import { PentestError } from '../services/error-handling.js'; +import { ErrorCode } from '../types/errors.js'; // Global mutex instance const sessionMutex = new SessionMutex(); -interface AgentEndResult { - attemptNumber: number; - duration_ms: number; - cost_usd: number; - success: boolean; - model?: string | undefined; - error?: string | undefined; - checkpoint?: string | undefined; - isFinalAttempt?: boolean | undefined; -} - /** * AuditSession - Main audit system facade */ @@ -50,10 +42,22 @@ export class AuditSession { // Validate required fields if (!this.sessionId) { - throw new Error('sessionMetadata.id is required'); + throw new PentestError( + 'sessionMetadata.id is required', + 'config', + false, + { field: 'sessionMetadata.id' }, + ErrorCode.CONFIG_VALIDATION_FAILED + ); } if (!this.sessionMetadata.webUrl) { - throw new Error('sessionMetadata.webUrl is required'); + throw new PentestError( + 'sessionMetadata.webUrl is required', + 'config', + false, + { field: 'sessionMetadata.webUrl' }, + ErrorCode.CONFIG_VALIDATION_FAILED + ); } // Components @@ -103,29 +107,26 @@ export class AuditSession { ): Promise { await this.ensureInitialized(); - // Save prompt snapshot (only on first attempt) + // 1. Save prompt snapshot (only on first attempt) if (attemptNumber === 1) { await AgentLogger.savePrompt(this.sessionMetadata, agentName, promptContent); } - // Track current agent name for workflow logging + // 2. Create and initialize the per-agent logger this.currentAgentName = agentName; - - // Create and initialize logger for this attempt this.currentLogger = new AgentLogger(this.sessionMetadata, agentName, attemptNumber); await this.currentLogger.initialize(); - // Start metrics tracking + // 3. Start metrics timer this.metricsTracker.startAgent(agentName, attemptNumber); - // Log start event + // 4. Log start event to both agent log and workflow log await this.currentLogger.logEvent('agent_start', { agentName, attemptNumber, timestamp: formatTimestamp(), }); - // Log to unified workflow log await this.workflowLogger.logAgent(agentName, 'start', { attemptNumber }); } @@ -134,7 +135,13 @@ export class AuditSession { */ async logEvent(eventType: string, eventData: unknown): Promise { if (!this.currentLogger) { - throw new Error('No active logger. Call startAgent() first.'); + throw new PentestError( + 'No active logger. Call startAgent() first.', + 'validation', + false, + {}, + ErrorCode.AGENT_EXECUTION_FAILED + ); } // Log to agent-specific log file (JSON format) @@ -167,7 +174,7 @@ export class AuditSession { * End agent execution (mutex-protected) */ async endAgent(agentName: string, result: AgentEndResult): Promise { - // Log end event + // 1. Finalize agent log and close the stream if (this.currentLogger) { await this.currentLogger.logEvent('agent_end', { agentName, @@ -177,15 +184,13 @@ export class AuditSession { timestamp: formatTimestamp(), }); - // Close logger await this.currentLogger.close(); this.currentLogger = null; } - // Reset current agent name + // 2. Log completion to the unified workflow log this.currentAgentName = null; - // Log to unified workflow log const agentLogDetails: AgentLogDetails = { attemptNumber: result.attemptNumber, duration_ms: result.duration_ms, @@ -195,13 +200,11 @@ export class AuditSession { }; await this.workflowLogger.logAgent(agentName, 'end', agentLogDetails); - // Mutex-protected update to session.json + // 3. Acquire mutex before touching session.json const unlock = await sessionMutex.lock(this.sessionId); try { - // Reload inside mutex to prevent lost updates during parallel exploitation phase + // 4. Reload-then-write inside mutex to prevent lost updates during parallel phases await this.metricsTracker.reload(); - - // Update metrics await this.metricsTracker.endAgent(agentName, result); } finally { unlock(); @@ -278,4 +281,18 @@ export class AuditSession { unlock(); } } + + /** + * Log resume header to workflow.log + * Call this when a workflow is resuming to add a visual separator + */ + async logResumeHeader(resumeInfo: { + previousWorkflowId: string; + newWorkflowId: string; + checkpointHash: string; + completedAgents: string[]; + }): Promise { + await this.ensureInitialized(); + await this.workflowLogger.logResumeHeader(resumeInfo); + } } diff --git a/src/audit/index.ts b/src/audit/index.ts index 834a8a1..97d3b7d 100644 --- a/src/audit/index.ts +++ b/src/audit/index.ts @@ -17,7 +17,3 @@ */ export { AuditSession } from './audit-session.js'; -export { AgentLogger } from './logger.js'; -export { WorkflowLogger } from './workflow-logger.js'; -export { MetricsTracker } from './metrics-tracker.js'; -export * as AuditUtils from './utils.js'; diff --git a/src/audit/log-stream.ts b/src/audit/log-stream.ts new file mode 100644 index 0000000..b97bef5 --- /dev/null +++ b/src/audit/log-stream.ts @@ -0,0 +1,127 @@ +// Copyright (C) 2025 Keygraph, Inc. +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License version 3 +// as published by the Free Software Foundation. + +/** + * LogStream - Stream composition utility for append-only logging + * + * Encapsulates the common stream management pattern used by AgentLogger + * and WorkflowLogger: opening streams in append mode, handling backpressure, + * and proper cleanup. + */ + +import fs from 'fs'; +import path from 'path'; +import { ensureDirectory } from '../utils/file-io.js'; + +/** + * LogStream - Manages a single append-only log file stream + */ +export class LogStream { + private readonly filePath: string; + private stream: fs.WriteStream | null = null; + private _isOpen: boolean = false; + + constructor(filePath: string) { + this.filePath = filePath; + } + + /** + * Open the stream for writing (creates parent directories, opens in append mode) + */ + async open(): Promise { + if (this._isOpen) { + return; + } + + // Ensure parent directory exists + await ensureDirectory(path.dirname(this.filePath)); + + // Create write stream in append mode + this.stream = fs.createWriteStream(this.filePath, { + flags: 'a', + encoding: 'utf8', + autoClose: true, + }); + + // Handle stream errors to prevent crashes (log and mark closed) + this.stream.on('error', (err) => { + console.error(`LogStream error for ${this.filePath}:`, err.message); + this._isOpen = false; + }); + + this._isOpen = true; + } + + /** + * Write text to the stream with backpressure handling + */ + async write(text: string): Promise { + return new Promise((resolve, reject) => { + if (!this._isOpen || !this.stream) { + reject(new Error('LogStream not open')); + return; + } + + const stream = this.stream; + let drainHandler: (() => void) | null = null; + + const cleanup = () => { + if (drainHandler) { + stream.removeListener('drain', drainHandler); + drainHandler = null; + } + }; + + const needsDrain = !stream.write(text, 'utf8', (error) => { + cleanup(); + if (error) { + reject(error); + } else if (!needsDrain) { + resolve(); + } + }); + + if (needsDrain) { + drainHandler = () => { + cleanup(); + resolve(); + }; + stream.once('drain', drainHandler); + } + }); + } + + /** + * Close the stream (flush and close) + */ + async close(): Promise { + if (!this._isOpen || !this.stream) { + return; + } + + return new Promise((resolve) => { + this.stream!.end(() => { + this._isOpen = false; + this.stream = null; + resolve(); + }); + }); + } + + /** + * Check if the stream is currently open + */ + get isOpen(): boolean { + return this._isOpen; + } + + /** + * Get the file path this stream writes to + */ + get path(): string { + return this.filePath; + } +} diff --git a/src/audit/logger.ts b/src/audit/logger.ts index c8e902d..d616bfa 100644 --- a/src/audit/logger.ts +++ b/src/audit/logger.ts @@ -8,10 +8,9 @@ * Append-Only Agent Logger * * Provides crash-safe, append-only logging for agent execution. - * Uses file streams with immediate flush to prevent data loss. + * Uses LogStream for stream management with backpressure handling. */ -import fs from 'fs'; import { generateLogPath, generatePromptPath, @@ -19,6 +18,7 @@ import { } from './utils.js'; import { atomicWrite } from '../utils/file-io.js'; import { formatTimestamp } from '../utils/formatting.js'; +import { LogStream } from './log-stream.js'; interface LogEvent { type: string; @@ -30,13 +30,11 @@ interface LogEvent { * AgentLogger - Manages append-only logging for a single agent execution */ export class AgentLogger { - private sessionMetadata: SessionMetadata; - private agentName: string; - private attemptNumber: number; - private timestamp: number; - private logPath: string; - private stream: fs.WriteStream | null = null; - private isOpen: boolean = false; + private readonly sessionMetadata: SessionMetadata; + private readonly agentName: string; + private readonly attemptNumber: number; + private readonly timestamp: number; + private readonly logStream: LogStream; constructor(sessionMetadata: SessionMetadata, agentName: string, attemptNumber: number) { this.sessionMetadata = sessionMetadata; @@ -44,26 +42,19 @@ export class AgentLogger { this.attemptNumber = attemptNumber; this.timestamp = Date.now(); - // Generate log file path - this.logPath = generateLogPath(sessionMetadata, agentName, this.timestamp, attemptNumber); + const logPath = generateLogPath(sessionMetadata, agentName, this.timestamp, attemptNumber); + this.logStream = new LogStream(logPath); } /** * Initialize the log stream (creates file and opens stream) */ async initialize(): Promise { - if (this.isOpen) { + if (this.logStream.isOpen) { return; // Already initialized } - // Create write stream with append mode and auto-flush - this.stream = fs.createWriteStream(this.logPath, { - flags: 'a', // Append mode - encoding: 'utf8', - autoClose: true, - }); - - this.isOpen = true; + await this.logStream.open(); // Write header await this.writeHeader(); @@ -83,29 +74,7 @@ export class AgentLogger { `========================================\n`, ].join('\n'); - return this.writeRaw(header); - } - - /** - * Write raw text to log file with immediate flush - */ - private writeRaw(text: string): Promise { - return new Promise((resolve, reject) => { - if (!this.isOpen || !this.stream) { - reject(new Error('Logger not initialized')); - return; - } - - const needsDrain = !this.stream.write(text, 'utf8', (error) => { - if (error) reject(error); - }); - - if (needsDrain) { - this.stream.once('drain', resolve); - } else { - resolve(); - } - }); + return this.logStream.write(header); } /** @@ -120,23 +89,14 @@ export class AgentLogger { }; const eventLine = `${JSON.stringify(event)}\n`; - return this.writeRaw(eventLine); + return this.logStream.write(eventLine); } /** * Close the log stream */ async close(): Promise { - if (!this.isOpen || !this.stream) { - return; - } - - return new Promise((resolve) => { - this.stream!.end(() => { - this.isOpen = false; - resolve(); - }); - }); + return this.logStream.close(); } /** diff --git a/src/audit/metrics-tracker.ts b/src/audit/metrics-tracker.ts index 4462d90..5853860 100644 --- a/src/audit/metrics-tracker.ts +++ b/src/audit/metrics-tracker.ts @@ -18,7 +18,9 @@ import { import { atomicWrite, readJson, fileExists } from '../utils/file-io.js'; import { formatTimestamp, calculatePercentage } from '../utils/formatting.js'; import { AGENT_PHASE_MAP, type PhaseName } from '../session-manager.js'; -import type { AgentName } from '../types/index.js'; +import { PentestError } from '../services/error-handling.js'; +import { ErrorCode } from '../types/errors.js'; +import type { AgentName, AgentEndResult } from '../types/index.js'; interface AttemptData { attempt_number: number; @@ -30,7 +32,7 @@ interface AttemptData { error?: string | undefined; } -interface AgentMetrics { +interface AgentAuditMetrics { status: 'in-progress' | 'success' | 'failed'; attempts: AttemptData[]; final_duration_ms: number; @@ -68,21 +70,10 @@ interface SessionData { total_duration_ms: number; total_cost_usd: number; phases: Record; - agents: Record; + agents: Record; }; } -interface AgentEndResult { - attemptNumber: number; - duration_ms: number; - cost_usd: number; - success: boolean; - model?: string | undefined; - error?: string | undefined; - checkpoint?: string | undefined; - isFinalAttempt?: boolean | undefined; -} - interface ActiveTimer { startTime: number; attemptNumber: number; @@ -170,10 +161,16 @@ export class MetricsTracker { */ async endAgent(agentName: string, result: AgentEndResult): Promise { if (!this.data) { - throw new Error('MetricsTracker not initialized'); + throw new PentestError( + 'MetricsTracker not initialized', + 'validation', + false, + {}, + ErrorCode.AGENT_EXECUTION_FAILED + ); } - // Initialize agent metrics if not exists + // 1. Initialize agent metrics if first time seeing this agent const existingAgent = this.data.metrics.agents[agentName]; const agent = existingAgent ?? { status: 'in-progress' as const, @@ -183,7 +180,7 @@ export class MetricsTracker { }; this.data.metrics.agents[agentName] = agent; - // Add attempt to array + // 2. Build attempt record with optional model/error fields const attempt: AttemptData = { attempt_number: result.attemptNumber, duration_ms: result.duration_ms, @@ -200,16 +197,18 @@ export class MetricsTracker { attempt.error = result.error; } + // 3. Append attempt to history agent.attempts.push(attempt); - // Update total cost (includes failed attempts) + // 4. Recalculate total cost across all attempts (includes failures) agent.total_cost_usd = agent.attempts.reduce((sum, a) => sum + a.cost_usd, 0); - // If successful, update final metrics and status + // 5. Update agent status based on outcome if (result.success) { agent.status = 'success'; agent.final_duration_ms = result.duration_ms; + // 6. Attach model and checkpoint metadata on success if (result.model) { agent.model = result.model; } @@ -218,19 +217,18 @@ export class MetricsTracker { agent.checkpoint = result.checkpoint; } } else { - // If this was the last attempt, mark as failed if (result.isFinalAttempt) { agent.status = 'failed'; } } - // Clear active timer + // 7. Clear active timer this.activeTimers.delete(agentName); - // Recalculate aggregations + // 8. Recalculate phase and session-level aggregations this.recalculateAggregations(); - // Save to disk + // 9. Persist to session.json await this.save(); } @@ -262,7 +260,13 @@ export class MetricsTracker { checkpointHash?: string ): Promise { if (!this.data) { - throw new Error('MetricsTracker not initialized'); + throw new PentestError( + 'MetricsTracker not initialized', + 'validation', + false, + {}, + ErrorCode.AGENT_EXECUTION_FAILED + ); } // Ensure originalWorkflowId is set (backfill if missing from old sessions) @@ -326,9 +330,9 @@ export class MetricsTracker { * Calculate phase-level metrics */ private calculatePhaseMetrics( - successfulAgents: Array<[string, AgentMetrics]> + successfulAgents: Array<[string, AgentAuditMetrics]> ): Record { - const phases: Record = { + const phases: Record = { 'pre-recon': [], 'recon': [], 'vulnerability-analysis': [], diff --git a/src/audit/utils.ts b/src/audit/utils.ts index b518c93..c4366ac 100644 --- a/src/audit/utils.ts +++ b/src/audit/utils.ts @@ -15,20 +15,17 @@ import fs from 'fs/promises'; import path from 'path'; import { fileURLToPath } from 'url'; +import { ensureDirectory } from '../utils/file-io.js'; + +export type { SessionMetadata } from '../types/audit.js'; +import type { SessionMetadata } from '../types/audit.js'; + const __filename = fileURLToPath(import.meta.url); const __dirname = path.dirname(__filename); // Get Shannon repository root -export const SHANNON_ROOT = path.resolve(__dirname, '..', '..'); -export const AUDIT_LOGS_DIR = path.join(SHANNON_ROOT, 'audit-logs'); - -export interface SessionMetadata { - id: string; - webUrl: string; - repoPath?: string; - outputPath?: string; - [key: string]: unknown; -} +const SHANNON_ROOT = path.resolve(__dirname, '..', '..'); +const AUDIT_LOGS_DIR = path.join(SHANNON_ROOT, 'audit-logs'); /** * Extract and sanitize hostname from URL for use in identifiers @@ -93,98 +90,6 @@ export function generateWorkflowLogPath(sessionMetadata: SessionMetadata): strin return path.join(auditPath, 'workflow.log'); } -/** - * Ensure directory exists (idempotent, race-safe) - */ -export async function ensureDirectory(dirPath: string): Promise { - try { - await fs.mkdir(dirPath, { recursive: true }); - } catch (error) { - // Ignore EEXIST errors (race condition safe) - if ((error as NodeJS.ErrnoException).code !== 'EEXIST') { - throw error; - } - } -} - -/** - * Atomic write using temp file + rename pattern - * Guarantees no partial writes or corruption on crash - */ -export async function atomicWrite(filePath: string, data: object | string): Promise { - const tempPath = `${filePath}.tmp`; - const content = typeof data === 'string' ? data : JSON.stringify(data, null, 2); - - try { - // Write to temp file - await fs.writeFile(tempPath, content, 'utf8'); - - // Atomic rename (POSIX guarantee: atomic on same filesystem) - await fs.rename(tempPath, filePath); - } catch (error) { - // Clean up temp file on failure - try { - await fs.unlink(tempPath); - } catch { - // Ignore cleanup errors - } - throw error; - } -} - -/** - * Format duration in milliseconds to human-readable string - */ -export function formatDuration(ms: number): string { - if (ms < 1000) { - return `${ms}ms`; - } - - const seconds = ms / 1000; - if (seconds < 60) { - return `${seconds.toFixed(1)}s`; - } - - const minutes = Math.floor(seconds / 60); - const remainingSeconds = Math.floor(seconds % 60); - return `${minutes}m ${remainingSeconds}s`; -} - -/** - * Format timestamp to ISO 8601 string - */ -export function formatTimestamp(timestamp: number = Date.now()): string { - return new Date(timestamp).toISOString(); -} - -/** - * Calculate percentage - */ -export function calculatePercentage(part: number, total: number): number { - if (total === 0) return 0; - return (part / total) * 100; -} - -/** - * Read and parse JSON file - */ -export async function readJson(filePath: string): Promise { - const content = await fs.readFile(filePath, 'utf8'); - return JSON.parse(content) as T; -} - -/** - * Check if file exists - */ -export async function fileExists(filePath: string): Promise { - try { - await fs.access(filePath); - return true; - } catch { - return false; - } -} - /** * Initialize audit directory structure for a session * Creates: audit-logs/{sessionId}/, agents/, prompts/, deliverables/ diff --git a/src/audit/workflow-logger.ts b/src/audit/workflow-logger.ts index d64ff4f..f01b80b 100644 --- a/src/audit/workflow-logger.ts +++ b/src/audit/workflow-logger.ts @@ -11,10 +11,10 @@ * Optimized for `tail -f` viewing during concurrent workflow execution. */ -import fs from 'fs'; -import path from 'path'; -import { generateWorkflowLogPath, ensureDirectory, type SessionMetadata } from './utils.js'; +import fs from 'fs/promises'; +import { generateWorkflowLogPath, type SessionMetadata } from './utils.js'; import { formatDuration, formatTimestamp } from '../utils/formatting.js'; +import { LogStream } from './log-stream.js'; export interface AgentLogDetails { attemptNumber?: number; @@ -42,38 +42,27 @@ export interface WorkflowSummary { * WorkflowLogger - Manages the unified workflow log file */ export class WorkflowLogger { - private sessionMetadata: SessionMetadata; - private logPath: string; - private stream: fs.WriteStream | null = null; - private initialized: boolean = false; + private readonly sessionMetadata: SessionMetadata; + private readonly logStream: LogStream; constructor(sessionMetadata: SessionMetadata) { this.sessionMetadata = sessionMetadata; - this.logPath = generateWorkflowLogPath(sessionMetadata); + const logPath = generateWorkflowLogPath(sessionMetadata); + this.logStream = new LogStream(logPath); } /** * Initialize the log stream (creates file and writes header) */ async initialize(): Promise { - if (this.initialized) { + if (this.logStream.isOpen) { return; } - // Ensure directory exists - await ensureDirectory(path.dirname(this.logPath)); - - // Create write stream with append mode - this.stream = fs.createWriteStream(this.logPath, { - flags: 'a', - encoding: 'utf8', - autoClose: true, - }); - - this.initialized = true; + await this.logStream.open(); // Write header only if file is new (empty) - const stats = await fs.promises.stat(this.logPath).catch(() => null); + const stats = await fs.stat(this.logStream.path).catch(() => null); if (!stats || stats.size === 0) { await this.writeHeader(); } @@ -94,29 +83,35 @@ export class WorkflowLogger { ``, ].join('\n'); - return this.writeRaw(header); + return this.logStream.write(header); } /** - * Write raw text to log file with immediate flush + * Write resume header to log file when workflow is resumed */ - private writeRaw(text: string): Promise { - return new Promise((resolve, reject) => { - if (!this.initialized || !this.stream) { - reject(new Error('WorkflowLogger not initialized')); - return; - } + async logResumeHeader(resumeInfo: { + previousWorkflowId: string; + newWorkflowId: string; + checkpointHash: string; + completedAgents: string[]; + }): Promise { + await this.ensureInitialized(); - const needsDrain = !this.stream.write(text, 'utf8', (error) => { - if (error) reject(error); - }); + const header = [ + ``, + `================================================================================`, + `RESUMED`, + `================================================================================`, + `Previous Workflow ID: ${resumeInfo.previousWorkflowId}`, + `New Workflow ID: ${resumeInfo.newWorkflowId}`, + `Resumed At: ${formatTimestamp()}`, + `Checkpoint: ${resumeInfo.checkpointHash}`, + `Completed: ${resumeInfo.completedAgents.length} agents (${resumeInfo.completedAgents.join(', ')})`, + `================================================================================`, + ``, + ].join('\n'); - if (needsDrain) { - this.stream.once('drain', resolve); - } else { - resolve(); - } - }); + return this.logStream.write(header); } /** @@ -138,10 +133,10 @@ export class WorkflowLogger { // Add blank line before phase start for readability if (event === 'start') { - await this.writeRaw('\n'); + await this.logStream.write('\n'); } - await this.writeRaw(line); + await this.logStream.write(line); } /** @@ -184,7 +179,7 @@ export class WorkflowLogger { } const line = `[${this.formatLogTime()}] [AGENT] ${message}\n`; - await this.writeRaw(line); + await this.logStream.write(line); } /** @@ -194,7 +189,7 @@ export class WorkflowLogger { await this.ensureInitialized(); const line = `[${this.formatLogTime()}] [${eventType.toUpperCase()}] ${message}\n`; - await this.writeRaw(line); + await this.logStream.write(line); } /** @@ -205,7 +200,7 @@ export class WorkflowLogger { const contextStr = context ? ` (${context})` : ''; const line = `[${this.formatLogTime()}] [ERROR] ${error.message}${contextStr}\n`; - await this.writeRaw(line); + await this.logStream.write(line); } /** @@ -301,7 +296,7 @@ export class WorkflowLogger { const params = this.formatToolParams(toolName, parameters); const paramStr = params ? `: ${params}` : ''; const line = `[${this.formatLogTime()}] [${agentName}] [TOOL] ${toolName}${paramStr}\n`; - await this.writeRaw(line); + await this.logStream.write(line); } /** @@ -313,7 +308,7 @@ export class WorkflowLogger { // Show full content, replacing newlines with escaped version for single-line output const escaped = content.replace(/\n/g, '\\n'); const line = `[${this.formatLogTime()}] [${agentName}] [LLM] Turn ${turn}: ${escaped}\n`; - await this.writeRaw(line); + await this.logStream.write(line); } /** @@ -324,42 +319,42 @@ export class WorkflowLogger { const status = summary.status === 'completed' ? 'COMPLETED' : 'FAILED'; - await this.writeRaw('\n'); - await this.writeRaw(`================================================================================\n`); - await this.writeRaw(`Workflow ${status}\n`); - await this.writeRaw(`────────────────────────────────────────\n`); - await this.writeRaw(`Workflow ID: ${this.sessionMetadata.id}\n`); - await this.writeRaw(`Status: ${summary.status}\n`); - await this.writeRaw(`Duration: ${formatDuration(summary.totalDurationMs)}\n`); - await this.writeRaw(`Total Cost: $${summary.totalCostUsd.toFixed(4)}\n`); - await this.writeRaw(`Agents: ${summary.completedAgents.length} completed\n`); + await this.logStream.write('\n'); + await this.logStream.write(`================================================================================\n`); + await this.logStream.write(`Workflow ${status}\n`); + await this.logStream.write(`────────────────────────────────────────\n`); + await this.logStream.write(`Workflow ID: ${this.sessionMetadata.id}\n`); + await this.logStream.write(`Status: ${summary.status}\n`); + await this.logStream.write(`Duration: ${formatDuration(summary.totalDurationMs)}\n`); + await this.logStream.write(`Total Cost: $${summary.totalCostUsd.toFixed(4)}\n`); + await this.logStream.write(`Agents: ${summary.completedAgents.length} completed\n`); if (summary.error) { - await this.writeRaw(`Error: ${summary.error}\n`); + await this.logStream.write(`Error: ${summary.error}\n`); } - await this.writeRaw(`\n`); - await this.writeRaw(`Agent Breakdown:\n`); + await this.logStream.write(`\n`); + await this.logStream.write(`Agent Breakdown:\n`); for (const agentName of summary.completedAgents) { const metrics = summary.agentMetrics[agentName]; if (metrics) { const duration = formatDuration(metrics.durationMs); const cost = metrics.costUsd !== null ? `$${metrics.costUsd.toFixed(4)}` : 'N/A'; - await this.writeRaw(` - ${agentName} (${duration}, ${cost})\n`); + await this.logStream.write(` - ${agentName} (${duration}, ${cost})\n`); } else { - await this.writeRaw(` - ${agentName}\n`); + await this.logStream.write(` - ${agentName}\n`); } } - await this.writeRaw(`================================================================================\n`); + await this.logStream.write(`================================================================================\n`); } /** * Ensure initialized (helper for lazy initialization) */ private async ensureInitialized(): Promise { - if (!this.initialized) { + if (!this.logStream.isOpen) { await this.initialize(); } } @@ -368,15 +363,6 @@ export class WorkflowLogger { * Close the log stream */ async close(): Promise { - if (!this.initialized || !this.stream) { - return; - } - - return new Promise((resolve) => { - this.stream!.end(() => { - this.initialized = false; - resolve(); - }); - }); + return this.logStream.close(); } } diff --git a/src/cli/input-validator.ts b/src/cli/input-validator.ts deleted file mode 100644 index dfcb597..0000000 --- a/src/cli/input-validator.ts +++ /dev/null @@ -1,59 +0,0 @@ -// Copyright (C) 2025 Keygraph, Inc. -// -// This program is free software: you can redistribute it and/or modify -// it under the terms of the GNU Affero General Public License version 3 -// as published by the Free Software Foundation. - -import { fs, path } from 'zx'; - -interface ValidationResult { - valid: boolean; - error?: string; - path?: string; -} - -// Helper function: Validate web URL -export function validateWebUrl(url: string): ValidationResult { - try { - const parsed = new URL(url); - if (!['http:', 'https:'].includes(parsed.protocol)) { - return { valid: false, error: 'Web URL must use HTTP or HTTPS protocol' }; - } - if (!parsed.hostname) { - return { valid: false, error: 'Web URL must have a valid hostname' }; - } - return { valid: true }; - } catch { - return { valid: false, error: 'Invalid web URL format' }; - } -} - -// Helper function: Validate local repository path -export async function validateRepoPath(repoPath: string): Promise { - try { - // Check if path exists - if (!(await fs.pathExists(repoPath))) { - return { valid: false, error: 'Repository path does not exist' }; - } - - // Check if it's a directory - const stats = await fs.stat(repoPath); - if (!stats.isDirectory()) { - return { valid: false, error: 'Repository path must be a directory' }; - } - - // Check if it's readable - try { - await fs.access(repoPath, fs.constants.R_OK); - } catch { - return { valid: false, error: 'Repository path is not readable' }; - } - - // Convert to absolute path - const absolutePath = path.resolve(repoPath); - return { valid: true, path: absolutePath }; - } catch (error) { - const errMsg = error instanceof Error ? error.message : String(error); - return { valid: false, error: `Invalid repository path: ${errMsg}` }; - } -} diff --git a/src/cli/ui.ts b/src/cli/ui.ts deleted file mode 100644 index b059457..0000000 --- a/src/cli/ui.ts +++ /dev/null @@ -1,49 +0,0 @@ -// Copyright (C) 2025 Keygraph, Inc. -// -// This program is free software: you can redistribute it and/or modify -// it under the terms of the GNU Affero General Public License version 3 -// as published by the Free Software Foundation. - -import chalk from 'chalk'; -import { displaySplashScreen } from '../splash-screen.js'; - -// Helper function: Display help information -export function showHelp(): void { - console.log(chalk.cyan.bold('AI Penetration Testing Agent')); - console.log(chalk.gray('Automated security assessment tool\n')); - - console.log(chalk.yellow.bold('USAGE:')); - console.log(' shannon [--config config.yaml] [--output /path/to/reports]\n'); - - console.log(chalk.yellow.bold('OPTIONS:')); - console.log( - ' --config YAML configuration file for authentication and testing parameters' - ); - console.log( - ' --output Custom output directory for session folder (default: ./audit-logs/)' - ); - console.log( - ' --pipeline-testing Use minimal prompts for fast pipeline testing (creates minimal deliverables)' - ); - console.log( - ' --disable-loader Disable the animated progress loader (useful when logs interfere with spinner)' - ); - console.log(' --help Show this help message\n'); - - console.log(chalk.yellow.bold('EXAMPLES:')); - console.log(' shannon "https://example.com" "/path/to/local/repo"'); - console.log(' shannon "https://example.com" "/path/to/local/repo" --config auth.yaml'); - console.log(' shannon "https://example.com" "/path/to/local/repo" --output /path/to/reports'); - console.log(' shannon "https://example.com" "/path/to/local/repo" --pipeline-testing\n'); - - console.log(chalk.yellow.bold('REQUIREMENTS:')); - console.log(' • WEB_URL must start with http:// or https://'); - console.log(' • REPO_PATH must be an accessible local directory'); - console.log(' • Only test systems you own or have permission to test\n'); - - console.log(chalk.yellow.bold('ENVIRONMENT VARIABLES:')); - console.log(' PENTEST_MAX_RETRIES Number of retries for AI agents (default: 3)'); -} - -// Export the splash screen function for use in main -export { displaySplashScreen }; diff --git a/src/config-parser.ts b/src/config-parser.ts index 610fb3a..484ad37 100644 --- a/src/config-parser.ts +++ b/src/config-parser.ts @@ -7,13 +7,13 @@ import { createRequire } from 'module'; import { fs } from 'zx'; import yaml from 'js-yaml'; -import { Ajv, type ValidateFunction } from 'ajv'; +import { Ajv, type ValidateFunction, type ErrorObject } from 'ajv'; import type { FormatsPlugin } from 'ajv-formats'; -import { PentestError } from './error-handling.js'; +import { PentestError } from './services/error-handling.js'; +import { ErrorCode } from './types/errors.js'; import type { Config, Rule, - Rules, Authentication, DistributedConfig, } from './types/config.js'; @@ -22,11 +22,9 @@ import type { const require = createRequire(import.meta.url); const addFormats: FormatsPlugin = require('ajv-formats'); -// Initialize AJV with formats const ajv = new Ajv({ allErrors: true, verbose: true }); addFormats(ajv); -// Load JSON Schema let configSchema: object; let validateSchema: ValidateFunction; @@ -45,7 +43,6 @@ try { ); } -// Security patterns to block const DANGEROUS_PATTERNS: RegExp[] = [ /\.\.\//, // Path traversal /[<>]/, // HTML/XML injection @@ -54,32 +51,171 @@ const DANGEROUS_PATTERNS: RegExp[] = [ /file:/i, // File URLs ]; -// Parse and load YAML configuration file with enhanced safety -export const parseConfig = async (configPath: string): Promise => { - try { - // File existence check - if (!(await fs.pathExists(configPath))) { - throw new Error(`Configuration file not found: ${configPath}`); +/** + * Format a single AJV error into a human-readable message. + * Translates AJV error keywords into plain English descriptions. + */ +function formatAjvError(error: ErrorObject): string { + const path = error.instancePath || 'root'; + const params = error.params as Record; + + switch (error.keyword) { + case 'required': { + const missingProperty = params.missingProperty as string; + return `Missing required field: "${missingProperty}" at ${path || 'root'}`; } - // File size check (prevent extremely large files) - const stats = await fs.stat(configPath); - const maxFileSize = 1024 * 1024; // 1MB - if (stats.size > maxFileSize) { - throw new Error( - `Configuration file too large: ${stats.size} bytes (maximum: ${maxFileSize} bytes)` + case 'type': { + const expectedType = params.type as string; + return `Invalid type at ${path}: expected ${expectedType}`; + } + + case 'enum': { + const allowedValues = params.allowedValues as unknown[]; + const formattedValues = allowedValues.map((v) => `"${v}"`).join(', '); + return `Invalid value at ${path}: must be one of [${formattedValues}]`; + } + + case 'additionalProperties': { + const additionalProperty = params.additionalProperty as string; + return `Unknown field at ${path}: "${additionalProperty}" is not allowed`; + } + + case 'minLength': { + const limit = params.limit as number; + return `Value at ${path} is too short: must have at least ${limit} character(s)`; + } + + case 'maxLength': { + const limit = params.limit as number; + return `Value at ${path} is too long: must have at most ${limit} character(s)`; + } + + case 'minimum': { + const limit = params.limit as number; + return `Value at ${path} is too small: must be >= ${limit}`; + } + + case 'maximum': { + const limit = params.limit as number; + return `Value at ${path} is too large: must be <= ${limit}`; + } + + case 'minItems': { + const limit = params.limit as number; + return `Array at ${path} has too few items: must have at least ${limit} item(s)`; + } + + case 'maxItems': { + const limit = params.limit as number; + return `Array at ${path} has too many items: must have at most ${limit} item(s)`; + } + + case 'pattern': { + const pattern = params.pattern as string; + return `Value at ${path} does not match required pattern: ${pattern}`; + } + + case 'format': { + const format = params.format as string; + return `Value at ${path} must be a valid ${format}`; + } + + case 'const': { + const allowedValue = params.allowedValue as unknown; + return `Value at ${path} must be exactly "${allowedValue}"`; + } + + case 'oneOf': { + return `Value at ${path} must match exactly one schema (matched ${params.passingSchemas ?? 0})`; + } + + case 'anyOf': { + return `Value at ${path} must match at least one of the allowed schemas`; + } + + case 'not': { + return `Value at ${path} matches a schema it should not match`; + } + + case 'if': { + return `Value at ${path} does not satisfy conditional schema requirements`; + } + + case 'uniqueItems': { + const i = params.i as number; + const j = params.j as number; + return `Array at ${path} contains duplicate items at positions ${j} and ${i}`; + } + + case 'propertyNames': { + const propertyName = params.propertyName as string; + return `Invalid property name at ${path}: "${propertyName}" does not match naming requirements`; + } + + case 'dependencies': + case 'dependentRequired': { + const property = params.property as string; + const missingProperty = params.missingProperty as string; + return `Missing dependent field at ${path}: "${missingProperty}" is required when "${property}" is present`; + } + + default: { + // Fallback for any unhandled keywords - use AJV's message if available + const message = error.message || `validation failed for keyword "${error.keyword}"`; + return `${path}: ${message}`; + } + } +} + +/** + * Format all AJV errors into a list of human-readable messages. + * Returns an array of formatted error strings. + */ +function formatAjvErrors(errors: ErrorObject[]): string[] { + return errors.map(formatAjvError); +} + +export const parseConfig = async (configPath: string): Promise => { + try { + // 1. Verify file exists + if (!(await fs.pathExists(configPath))) { + throw new PentestError( + `Configuration file not found: ${configPath}`, + 'config', + false, + { configPath }, + ErrorCode.CONFIG_NOT_FOUND ); } - // Read file content - const configContent = await fs.readFile(configPath, 'utf8'); - - // Basic content validation - if (!configContent.trim()) { - throw new Error('Configuration file is empty'); + // 2. Check file size + const stats = await fs.stat(configPath); + const maxFileSize = 1024 * 1024; // 1MB + if (stats.size > maxFileSize) { + throw new PentestError( + `Configuration file too large: ${stats.size} bytes (maximum: ${maxFileSize} bytes)`, + 'config', + false, + { configPath, fileSize: stats.size, maxFileSize }, + ErrorCode.CONFIG_VALIDATION_FAILED + ); } - // Parse YAML with safety options + // 3. Read and check for empty content + const configContent = await fs.readFile(configPath, 'utf8'); + + if (!configContent.trim()) { + throw new PentestError( + 'Configuration file is empty', + 'config', + false, + { configPath }, + ErrorCode.CONFIG_VALIDATION_FAILED + ); + } + + // 4. Parse YAML with safe schema let config: unknown; try { config = yaml.load(configContent, { @@ -89,67 +225,82 @@ export const parseConfig = async (configPath: string): Promise => { }); } catch (yamlError) { const errMsg = yamlError instanceof Error ? yamlError.message : String(yamlError); - throw new Error(`YAML parsing failed: ${errMsg}`); + throw new PentestError( + `YAML parsing failed: ${errMsg}`, + 'config', + false, + { configPath, originalError: errMsg }, + ErrorCode.CONFIG_PARSE_ERROR + ); } - // Additional safety check + // 5. Guard against null/undefined parse result if (config === null || config === undefined) { - throw new Error('Configuration file resulted in null/undefined after parsing'); + throw new PentestError( + 'Configuration file resulted in null/undefined after parsing', + 'config', + false, + { configPath }, + ErrorCode.CONFIG_PARSE_ERROR + ); } - // Validate the configuration structure and content + // 6. Validate schema, security rules, and return validateConfig(config as Config); return config as Config; } catch (error) { - const errMsg = error instanceof Error ? error.message : String(error); - // Enhance error message with context - if ( - errMsg.startsWith('Configuration file not found') || - errMsg.startsWith('YAML parsing failed') || - errMsg.includes('must be') || - errMsg.includes('exceeds maximum') - ) { - // These are already well-formatted errors, re-throw as-is + // PentestError instances are already well-formatted, re-throw as-is + if (error instanceof PentestError) { throw error; - } else { - // Wrap other errors with context - throw new Error(`Failed to parse configuration file '${configPath}': ${errMsg}`); } + const errMsg = error instanceof Error ? error.message : String(error); + throw new PentestError( + `Failed to parse configuration file '${configPath}': ${errMsg}`, + 'config', + false, + { configPath, originalError: errMsg }, + ErrorCode.CONFIG_PARSE_ERROR + ); } }; -// Validate overall configuration structure using JSON Schema const validateConfig = (config: Config): void => { - // Basic structure validation if (!config || typeof config !== 'object') { - throw new Error('Configuration must be a valid object'); + throw new PentestError( + 'Configuration must be a valid object', + 'config', + false, + {}, + ErrorCode.CONFIG_VALIDATION_FAILED + ); } if (Array.isArray(config)) { - throw new Error('Configuration must be an object, not an array'); + throw new PentestError( + 'Configuration must be an object, not an array', + 'config', + false, + {}, + ErrorCode.CONFIG_VALIDATION_FAILED + ); } - // JSON Schema validation const isValid = validateSchema(config); if (!isValid) { const errors = validateSchema.errors || []; - const errorMessages = errors.map((err) => { - const path = err.instancePath || 'root'; - return `${path}: ${err.message}`; - }); - throw new Error(`Configuration validation failed:\n - ${errorMessages.join('\n - ')}`); + const errorMessages = formatAjvErrors(errors); + throw new PentestError( + `Configuration validation failed:\n - ${errorMessages.join('\n - ')}`, + 'config', + false, + { validationErrors: errorMessages }, + ErrorCode.CONFIG_VALIDATION_FAILED + ); } - // Additional security validation performSecurityValidation(config); - // Warn if deprecated fields are used - if (config.login) { - console.warn('⚠️ The "login" section is deprecated. Please use "authentication" instead.'); - } - - // Ensure at least some configuration is provided if (!config.rules && !config.authentication) { console.warn( '⚠️ Configuration file contains no rules or authentication. The pentest will run without any scoping restrictions or login capabilities.' @@ -161,35 +312,58 @@ const validateConfig = (config: Config): void => { } }; -// Perform additional security validation beyond JSON Schema const performSecurityValidation = (config: Config): void => { - // Validate authentication section for security issues if (config.authentication) { const auth = config.authentication; - // Check for dangerous patterns in credentials - if (auth.credentials) { + // Check login_url for dangerous patterns (AJV's "uri" format allows javascript: per RFC 3986) + if (auth.login_url) { for (const pattern of DANGEROUS_PATTERNS) { - if (pattern.test(auth.credentials.username)) { - throw new Error( - 'authentication.credentials.username contains potentially dangerous pattern' - ); - } - if (pattern.test(auth.credentials.password)) { - throw new Error( - 'authentication.credentials.password contains potentially dangerous pattern' + if (pattern.test(auth.login_url)) { + throw new PentestError( + `authentication.login_url contains potentially dangerous pattern: ${pattern.source}`, + 'config', + false, + { field: 'login_url', pattern: pattern.source }, + ErrorCode.CONFIG_VALIDATION_FAILED + ); + } + } + } + + if (auth.credentials) { + for (const pattern of DANGEROUS_PATTERNS) { + if (pattern.test(auth.credentials.username)) { + throw new PentestError( + `authentication.credentials.username contains potentially dangerous pattern: ${pattern.source}`, + 'config', + false, + { field: 'credentials.username', pattern: pattern.source }, + ErrorCode.CONFIG_VALIDATION_FAILED + ); + } + if (pattern.test(auth.credentials.password)) { + throw new PentestError( + `authentication.credentials.password contains potentially dangerous pattern: ${pattern.source}`, + 'config', + false, + { field: 'credentials.password', pattern: pattern.source }, + ErrorCode.CONFIG_VALIDATION_FAILED ); } } } - // Check login flow for dangerous patterns if (auth.login_flow) { auth.login_flow.forEach((step, index) => { for (const pattern of DANGEROUS_PATTERNS) { if (pattern.test(step)) { - throw new Error( - `authentication.login_flow[${index}] contains potentially dangerous pattern: ${pattern.source}` + throw new PentestError( + `authentication.login_flow[${index}] contains potentially dangerous pattern: ${pattern.source}`, + 'config', + false, + { field: `login_flow[${index}]`, pattern: pattern.source }, + ErrorCode.CONFIG_VALIDATION_FAILED ); } } @@ -197,48 +371,58 @@ const performSecurityValidation = (config: Config): void => { } } - // Validate rules section for security issues if (config.rules) { validateRulesSecurity(config.rules.avoid, 'avoid'); validateRulesSecurity(config.rules.focus, 'focus'); - // Check for duplicate and conflicting rules checkForDuplicates(config.rules.avoid || [], 'avoid'); checkForDuplicates(config.rules.focus || [], 'focus'); checkForConflicts(config.rules.avoid, config.rules.focus); } }; -// Validate rules for security issues const validateRulesSecurity = (rules: Rule[] | undefined, ruleType: string): void => { if (!rules) return; rules.forEach((rule, index) => { - // Security validation for (const pattern of DANGEROUS_PATTERNS) { if (pattern.test(rule.url_path)) { - throw new Error( - `rules.${ruleType}[${index}].url_path contains potentially dangerous pattern: ${pattern.source}` + throw new PentestError( + `rules.${ruleType}[${index}].url_path contains potentially dangerous pattern: ${pattern.source}`, + 'config', + false, + { field: `rules.${ruleType}[${index}].url_path`, pattern: pattern.source }, + ErrorCode.CONFIG_VALIDATION_FAILED ); } if (pattern.test(rule.description)) { - throw new Error( - `rules.${ruleType}[${index}].description contains potentially dangerous pattern: ${pattern.source}` + throw new PentestError( + `rules.${ruleType}[${index}].description contains potentially dangerous pattern: ${pattern.source}`, + 'config', + false, + { field: `rules.${ruleType}[${index}].description`, pattern: pattern.source }, + ErrorCode.CONFIG_VALIDATION_FAILED ); } } - // Type-specific validation validateRuleTypeSpecific(rule, ruleType, index); }); }; -// Validate rule based on its specific type const validateRuleTypeSpecific = (rule: Rule, ruleType: string, index: number): void => { + const field = `rules.${ruleType}[${index}].url_path`; + switch (rule.type) { case 'path': if (!rule.url_path.startsWith('/')) { - throw new Error(`rules.${ruleType}[${index}].url_path for type 'path' must start with '/'`); + throw new PentestError( + `${field} for type 'path' must start with '/'`, + 'config', + false, + { field, ruleType: rule.type }, + ErrorCode.CONFIG_VALIDATION_FAILED + ); } break; @@ -246,14 +430,22 @@ const validateRuleTypeSpecific = (rule: Rule, ruleType: string, index: number): case 'domain': // Basic domain validation - no slashes allowed if (rule.url_path.includes('/')) { - throw new Error( - `rules.${ruleType}[${index}].url_path for type '${rule.type}' cannot contain '/' characters` + throw new PentestError( + `${field} for type '${rule.type}' cannot contain '/' characters`, + 'config', + false, + { field, ruleType: rule.type }, + ErrorCode.CONFIG_VALIDATION_FAILED ); } // Must contain at least one dot for domains if (rule.type === 'domain' && !rule.url_path.includes('.')) { - throw new Error( - `rules.${ruleType}[${index}].url_path for type 'domain' must be a valid domain name` + throw new PentestError( + `${field} for type 'domain' must be a valid domain name`, + 'config', + false, + { field, ruleType: rule.type }, + ErrorCode.CONFIG_VALIDATION_FAILED ); } break; @@ -261,62 +453,77 @@ const validateRuleTypeSpecific = (rule: Rule, ruleType: string, index: number): case 'method': { const allowedMethods = ['GET', 'POST', 'PUT', 'DELETE', 'PATCH', 'HEAD', 'OPTIONS']; if (!allowedMethods.includes(rule.url_path.toUpperCase())) { - throw new Error( - `rules.${ruleType}[${index}].url_path for type 'method' must be one of: ${allowedMethods.join(', ')}` + throw new PentestError( + `${field} for type 'method' must be one of: ${allowedMethods.join(', ')}`, + 'config', + false, + { field, ruleType: rule.type, allowedMethods }, + ErrorCode.CONFIG_VALIDATION_FAILED ); } break; } case 'header': - // Header name validation (basic) if (!rule.url_path.match(/^[a-zA-Z0-9\-_]+$/)) { - throw new Error( - `rules.${ruleType}[${index}].url_path for type 'header' must be a valid header name (alphanumeric, hyphens, underscores only)` + throw new PentestError( + `${field} for type 'header' must be a valid header name (alphanumeric, hyphens, underscores only)`, + 'config', + false, + { field, ruleType: rule.type }, + ErrorCode.CONFIG_VALIDATION_FAILED ); } break; case 'parameter': - // Parameter name validation (basic) if (!rule.url_path.match(/^[a-zA-Z0-9\-_]+$/)) { - throw new Error( - `rules.${ruleType}[${index}].url_path for type 'parameter' must be a valid parameter name (alphanumeric, hyphens, underscores only)` + throw new PentestError( + `${field} for type 'parameter' must be a valid parameter name (alphanumeric, hyphens, underscores only)`, + 'config', + false, + { field, ruleType: rule.type }, + ErrorCode.CONFIG_VALIDATION_FAILED ); } break; } }; -// Check for duplicate rules const checkForDuplicates = (rules: Rule[], ruleType: string): void => { const seen = new Set(); rules.forEach((rule, index) => { const key = `${rule.type}:${rule.url_path}`; if (seen.has(key)) { - throw new Error( - `Duplicate rule found in rules.${ruleType}[${index}]: ${rule.type} '${rule.url_path}'` + throw new PentestError( + `Duplicate rule found in rules.${ruleType}[${index}]: ${rule.type} '${rule.url_path}'`, + 'config', + false, + { field: `rules.${ruleType}[${index}]`, ruleType: rule.type, urlPath: rule.url_path }, + ErrorCode.CONFIG_VALIDATION_FAILED ); } seen.add(key); }); }; -// Check for conflicting rules between avoid and focus const checkForConflicts = (avoidRules: Rule[] = [], focusRules: Rule[] = []): void => { const avoidSet = new Set(avoidRules.map((rule) => `${rule.type}:${rule.url_path}`)); focusRules.forEach((rule, index) => { const key = `${rule.type}:${rule.url_path}`; if (avoidSet.has(key)) { - throw new Error( - `Conflicting rule found: rules.focus[${index}] '${rule.url_path}' also exists in rules.avoid` + throw new PentestError( + `Conflicting rule found: rules.focus[${index}] '${rule.url_path}' also exists in rules.avoid`, + 'config', + false, + { field: `rules.focus[${index}]`, urlPath: rule.url_path }, + ErrorCode.CONFIG_VALIDATION_FAILED ); } }); }; -// Sanitize and normalize rule values const sanitizeRule = (rule: Rule): Rule => { return { description: rule.description.trim(), @@ -325,7 +532,6 @@ const sanitizeRule = (rule: Rule): Rule => { }; }; -// Distribute configuration sections to different agents with sanitization export const distributeConfig = (config: Config | null): DistributedConfig => { const avoid = config?.rules?.avoid || []; const focus = config?.rules?.focus || []; @@ -338,7 +544,6 @@ export const distributeConfig = (config: Config | null): DistributedConfig => { }; }; -// Sanitize and normalize authentication values const sanitizeAuthentication = (auth: Authentication): Authentication => { return { login_type: auth.login_type.toLowerCase().trim() as Authentication['login_type'], @@ -348,7 +553,7 @@ const sanitizeAuthentication = (auth: Authentication): Authentication => { password: auth.credentials.password, ...(auth.credentials.totp_secret && { totp_secret: auth.credentials.totp_secret.trim() }), }, - login_flow: auth.login_flow.map((step) => step.trim()), + ...(auth.login_flow && { login_flow: auth.login_flow.map((step) => step.trim()) }), success_condition: { type: auth.success_condition.type.toLowerCase().trim() as Authentication['success_condition']['type'], value: auth.success_condition.value.trim(), diff --git a/src/constants.ts b/src/constants.ts deleted file mode 100644 index 4db8e9f..0000000 --- a/src/constants.ts +++ /dev/null @@ -1,110 +0,0 @@ -// Copyright (C) 2025 Keygraph, Inc. -// -// This program is free software: you can redistribute it and/or modify -// it under the terms of the GNU Affero General Public License version 3 -// as published by the Free Software Foundation. - -import { path, fs } from 'zx'; -import chalk from 'chalk'; -import { validateQueueAndDeliverable, type VulnType } from './queue-validation.js'; -import type { AgentName, PromptName, PlaywrightAgent, AgentValidator } from './types/agents.js'; - -// Factory function for vulnerability queue validators -function createVulnValidator(vulnType: VulnType): AgentValidator { - return async (sourceDir: string): Promise => { - try { - await validateQueueAndDeliverable(vulnType, sourceDir); - return true; - } catch (error) { - const errMsg = error instanceof Error ? error.message : String(error); - console.log(chalk.yellow(` Queue validation failed for ${vulnType}: ${errMsg}`)); - return false; - } - }; -} - -// Factory function for exploit deliverable validators -function createExploitValidator(vulnType: VulnType): AgentValidator { - return async (sourceDir: string): Promise => { - const evidenceFile = path.join(sourceDir, 'deliverables', `${vulnType}_exploitation_evidence.md`); - return await fs.pathExists(evidenceFile); - }; -} - -// MCP agent mapping - assigns each agent to a specific Playwright instance to prevent conflicts -export const MCP_AGENT_MAPPING: Record = Object.freeze({ - // Phase 1: Pre-reconnaissance (actual prompt name is 'pre-recon-code') - // NOTE: Pre-recon is pure code analysis and doesn't use browser automation, - // but assigning MCP server anyway for consistency and future extensibility - 'pre-recon-code': 'playwright-agent1', - - // Phase 2: Reconnaissance (actual prompt name is 'recon') - recon: 'playwright-agent2', - - // Phase 3: Vulnerability Analysis (5 parallel agents) - 'vuln-injection': 'playwright-agent1', - 'vuln-xss': 'playwright-agent2', - 'vuln-auth': 'playwright-agent3', - 'vuln-ssrf': 'playwright-agent4', - 'vuln-authz': 'playwright-agent5', - - // Phase 4: Exploitation (5 parallel agents - same as vuln counterparts) - 'exploit-injection': 'playwright-agent1', - 'exploit-xss': 'playwright-agent2', - 'exploit-auth': 'playwright-agent3', - 'exploit-ssrf': 'playwright-agent4', - 'exploit-authz': 'playwright-agent5', - - // Phase 5: Reporting (actual prompt name is 'report-executive') - // NOTE: Report generation is typically text-based and doesn't use browser automation, - // but assigning MCP server anyway for potential screenshot inclusion or future needs - 'report-executive': 'playwright-agent3', -}); - -// Direct agent-to-validator mapping - much simpler than pattern matching -export const AGENT_VALIDATORS: Record = Object.freeze({ - // Pre-reconnaissance agent - validates the code analysis deliverable created by the agent - 'pre-recon': async (sourceDir: string): Promise => { - const codeAnalysisFile = path.join(sourceDir, 'deliverables', 'code_analysis_deliverable.md'); - return await fs.pathExists(codeAnalysisFile); - }, - - // Reconnaissance agent - recon: async (sourceDir: string): Promise => { - const reconFile = path.join(sourceDir, 'deliverables', 'recon_deliverable.md'); - return await fs.pathExists(reconFile); - }, - - // Vulnerability analysis agents - 'injection-vuln': createVulnValidator('injection'), - 'xss-vuln': createVulnValidator('xss'), - 'auth-vuln': createVulnValidator('auth'), - 'ssrf-vuln': createVulnValidator('ssrf'), - 'authz-vuln': createVulnValidator('authz'), - - // Exploitation agents - 'injection-exploit': createExploitValidator('injection'), - 'xss-exploit': createExploitValidator('xss'), - 'auth-exploit': createExploitValidator('auth'), - 'ssrf-exploit': createExploitValidator('ssrf'), - 'authz-exploit': createExploitValidator('authz'), - - // Executive report agent - report: async (sourceDir: string): Promise => { - const reportFile = path.join( - sourceDir, - 'deliverables', - 'comprehensive_security_assessment_report.md' - ); - - const reportExists = await fs.pathExists(reportFile); - - if (!reportExists) { - console.log( - chalk.red(` ❌ Missing required deliverable: comprehensive_security_assessment_report.md`) - ); - } - - return reportExists; - }, -}); diff --git a/src/phases/pre-recon.ts b/src/phases/pre-recon.ts deleted file mode 100644 index 88dc3ed..0000000 --- a/src/phases/pre-recon.ts +++ /dev/null @@ -1,381 +0,0 @@ -// Copyright (C) 2025 Keygraph, Inc. -// -// This program is free software: you can redistribute it and/or modify -// it under the terms of the GNU Affero General Public License version 3 -// as published by the Free Software Foundation. - -import { $, fs, path } from 'zx'; -import chalk from 'chalk'; -import { Timer } from '../utils/metrics.js'; -import { formatDuration } from '../utils/formatting.js'; -import { handleToolError, PentestError } from '../error-handling.js'; -import { AGENTS } from '../session-manager.js'; -import { runClaudePromptWithRetry } from '../ai/claude-executor.js'; -import { loadPrompt } from '../prompts/prompt-manager.js'; -import type { ToolAvailability } from '../tool-checker.js'; -import type { DistributedConfig } from '../types/config.js'; - -interface AgentResult { - success: boolean; - duration: number; - cost?: number | undefined; - error?: string | undefined; - retryable?: boolean | undefined; -} - -type ToolName = 'nmap' | 'subfinder' | 'whatweb' | 'schemathesis'; -type ToolStatus = 'success' | 'skipped' | 'error'; - -interface TerminalScanResult { - tool: ToolName; - output: string; - status: ToolStatus; - duration: number; - success?: boolean; - error?: Error; -} - -interface PromptVariables { - webUrl: string; - repoPath: string; -} - -// Discriminated union for Wave1 tool results - clearer than loose union types -type Wave1ToolResult = - | { kind: 'scan'; result: TerminalScanResult } - | { kind: 'skipped'; message: string } - | { kind: 'agent'; result: AgentResult }; - -interface Wave1Results { - nmap: Wave1ToolResult; - subfinder: Wave1ToolResult; - whatweb: Wave1ToolResult; - naabu?: Wave1ToolResult; - codeAnalysis: AgentResult; -} - -interface Wave2Results { - schemathesis: TerminalScanResult; -} - -interface PreReconResult { - duration: number; - report: string; -} - -// Runs external security tools (nmap, whatweb, etc). Schemathesis requires schemas from code analysis. -async function runTerminalScan(tool: ToolName, target: string, sourceDir: string | null = null): Promise { - const timer = new Timer(`command-${tool}`); - try { - let result; - switch (tool) { - case 'nmap': { - console.log(chalk.blue(` 🔍 Running ${tool} scan...`)); - const nmapHostname = new URL(target).hostname; - result = await $({ silent: true, stdio: ['ignore', 'pipe', 'ignore'] })`nmap -sV -sC ${nmapHostname}`; - const duration = timer.stop(); - console.log(chalk.green(` ✅ ${tool} completed in ${formatDuration(duration)}`)); - return { tool: 'nmap', output: result.stdout, status: 'success', duration }; - } - case 'subfinder': { - console.log(chalk.blue(` 🔍 Running ${tool} scan...`)); - const hostname = new URL(target).hostname; - result = await $({ silent: true, stdio: ['ignore', 'pipe', 'ignore'] })`subfinder -d ${hostname}`; - const subfinderDuration = timer.stop(); - console.log(chalk.green(` ✅ ${tool} completed in ${formatDuration(subfinderDuration)}`)); - return { tool: 'subfinder', output: result.stdout, status: 'success', duration: subfinderDuration }; - } - case 'whatweb': { - console.log(chalk.blue(` 🔍 Running ${tool} scan...`)); - const command = `whatweb --open-timeout 30 --read-timeout 60 ${target}`; - console.log(chalk.gray(` Command: ${command}`)); - result = await $({ silent: true, stdio: ['ignore', 'pipe', 'ignore'] })`whatweb --open-timeout 30 --read-timeout 60 ${target}`; - const whatwebDuration = timer.stop(); - console.log(chalk.green(` ✅ ${tool} completed in ${formatDuration(whatwebDuration)}`)); - return { tool: 'whatweb', output: result.stdout, status: 'success', duration: whatwebDuration }; - } - case 'schemathesis': { - // Schemathesis depends on code analysis output - skip if no schemas found - const schemasDir = path.join(sourceDir || '.', 'outputs', 'schemas'); - if (await fs.pathExists(schemasDir)) { - const schemaFiles = await fs.readdir(schemasDir) as string[]; - const apiSchemas = schemaFiles.filter((f: string) => f.endsWith('.json') || f.endsWith('.yml') || f.endsWith('.yaml')); - if (apiSchemas.length > 0) { - console.log(chalk.blue(` 🔍 Running ${tool} scan...`)); - const allResults: string[] = []; - - // Run schemathesis on each schema file - for (const schemaFile of apiSchemas) { - const schemaPath = path.join(schemasDir, schemaFile); - try { - result = await $({ silent: true, stdio: ['ignore', 'pipe', 'ignore'] })`schemathesis run ${schemaPath} -u ${target} --max-failures=5`; - allResults.push(`Schema: ${schemaFile}\n${result.stdout}`); - } catch (schemaError) { - const err = schemaError as { stdout?: string; message?: string }; - allResults.push(`Schema: ${schemaFile}\nError: ${err.stdout || err.message}`); - } - } - - const schemaDuration = timer.stop(); - console.log(chalk.green(` ✅ ${tool} completed in ${formatDuration(schemaDuration)}`)); - return { tool: 'schemathesis', output: allResults.join('\n\n'), status: 'success', duration: schemaDuration }; - } else { - console.log(chalk.gray(` ⏭️ ${tool} - no API schemas found`)); - return { tool: 'schemathesis', output: 'No API schemas found', status: 'skipped', duration: timer.stop() }; - } - } else { - console.log(chalk.gray(` ⏭️ ${tool} - schemas directory not found`)); - return { tool: 'schemathesis', output: 'Schemas directory not found', status: 'skipped', duration: timer.stop() }; - } - } - default: - throw new Error(`Unknown tool: ${tool}`); - } - } catch (error) { - const duration = timer.stop(); - console.log(chalk.red(` ❌ ${tool} failed in ${formatDuration(duration)}`)); - return handleToolError(tool, error as Error & { code?: string }) as TerminalScanResult; - } -} - -// Wave 1: Initial footprinting + authentication -async function runPreReconWave1( - webUrl: string, - sourceDir: string, - variables: PromptVariables, - config: DistributedConfig | null, - pipelineTestingMode: boolean = false, - sessionId: string | null = null, - outputPath: string | null = null -): Promise { - console.log(chalk.blue(' → Launching Wave 1 operations in parallel...')); - - const operations: Promise[] = []; - - const skippedResult = (message: string): Wave1ToolResult => ({ kind: 'skipped', message }); - - // Skip external commands in pipeline testing mode - if (pipelineTestingMode) { - console.log(chalk.gray(' ⏭️ Skipping external tools (pipeline testing mode)')); - operations.push( - runClaudePromptWithRetry( - await loadPrompt('pre-recon-code', variables, null, pipelineTestingMode), - sourceDir, - '*', - '', - AGENTS['pre-recon'].displayName, - 'pre-recon', // Agent name for snapshot creation - chalk.cyan, - { id: sessionId!, webUrl, repoPath: sourceDir, ...(outputPath && { outputPath }) } // Session metadata for audit logging (STANDARD: use 'id' field) - ) - ); - const [codeAnalysis] = await Promise.all(operations); - return { - nmap: skippedResult('Skipped (pipeline testing mode)'), - subfinder: skippedResult('Skipped (pipeline testing mode)'), - whatweb: skippedResult('Skipped (pipeline testing mode)'), - codeAnalysis: codeAnalysis as AgentResult - }; - } else { - operations.push( - runTerminalScan('nmap', webUrl), - runTerminalScan('subfinder', webUrl), - runTerminalScan('whatweb', webUrl), - runClaudePromptWithRetry( - await loadPrompt('pre-recon-code', variables, null, pipelineTestingMode), - sourceDir, - '*', - '', - AGENTS['pre-recon'].displayName, - 'pre-recon', // Agent name for snapshot creation - chalk.cyan, - { id: sessionId!, webUrl, repoPath: sourceDir, ...(outputPath && { outputPath }) } // Session metadata for audit logging (STANDARD: use 'id' field) - ) - ); - } - - // Check if authentication config is provided for login instructions injection - console.log(chalk.gray(` → Config check: ${config ? 'present' : 'missing'}, Auth: ${config?.authentication ? 'present' : 'missing'}`)); - - const [nmap, subfinder, whatweb, codeAnalysis] = await Promise.all(operations); - - return { - nmap: { kind: 'scan', result: nmap as TerminalScanResult }, - subfinder: { kind: 'scan', result: subfinder as TerminalScanResult }, - whatweb: { kind: 'scan', result: whatweb as TerminalScanResult }, - codeAnalysis: codeAnalysis as AgentResult - }; -} - -// Wave 2: Additional scanning -async function runPreReconWave2( - webUrl: string, - sourceDir: string, - toolAvailability: ToolAvailability, - pipelineTestingMode: boolean = false -): Promise { - console.log(chalk.blue(' → Running Wave 2 additional scans in parallel...')); - - // Skip external commands in pipeline testing mode - if (pipelineTestingMode) { - console.log(chalk.gray(' ⏭️ Skipping external tools (pipeline testing mode)')); - return { - schemathesis: { tool: 'schemathesis', output: 'Skipped (pipeline testing mode)', status: 'skipped', duration: 0 } - }; - } - - const operations: Promise[] = []; - - // Parallel additional scans (only run if tools are available) - - if (toolAvailability.schemathesis) { - operations.push(runTerminalScan('schemathesis', webUrl, sourceDir)); - } - - // If no tools are available, return early - if (operations.length === 0) { - console.log(chalk.gray(' ⏭️ No Wave 2 tools available')); - return { - schemathesis: { tool: 'schemathesis', output: 'Tool not available', status: 'skipped', duration: 0 } - }; - } - - // Run all operations in parallel - const results = await Promise.all(operations); - - // Map results back to named properties - const response: Wave2Results = { - schemathesis: { tool: 'schemathesis', output: 'Tool not available', status: 'skipped', duration: 0 } - }; - let resultIndex = 0; - - if (toolAvailability.schemathesis) { - response.schemathesis = results[resultIndex++]!; - } else { - console.log(chalk.gray(' ⏭️ schemathesis - tool not available')); - } - - return response; -} - -// Extracts status and output from a Wave1 tool result -function extractResult(r: Wave1ToolResult | undefined): { status: string; output: string } { - if (!r) return { status: 'Skipped', output: 'No output' }; - switch (r.kind) { - case 'scan': - return { status: r.result.status || 'Skipped', output: r.result.output || 'No output' }; - case 'skipped': - return { status: 'Skipped', output: r.message }; - case 'agent': - return { status: r.result.success ? 'success' : 'error', output: 'See agent output' }; - } -} - -// Combines tool outputs into single deliverable. Falls back to reference if file missing. -async function stitchPreReconOutputs(wave1: Wave1Results, additionalScans: TerminalScanResult[], sourceDir: string): Promise { - // Try to read the code analysis deliverable file - let codeAnalysisContent = 'No analysis available'; - try { - const codeAnalysisPath = path.join(sourceDir, 'deliverables', 'code_analysis_deliverable.md'); - codeAnalysisContent = await fs.readFile(codeAnalysisPath, 'utf8'); - } catch (error) { - const err = error as Error; - console.log(chalk.yellow(`⚠️ Could not read code analysis deliverable: ${err.message}`)); - codeAnalysisContent = 'Analysis located in deliverables/code_analysis_deliverable.md'; - } - - // Build additional scans section - let additionalSection = ''; - if (additionalScans.length > 0) { - additionalSection = '\n## Authenticated Scans\n'; - for (const scan of additionalScans) { - additionalSection += ` -### ${scan.tool.toUpperCase()} -Status: ${scan.status} -${scan.output} -`; - } - } - - const nmap = extractResult(wave1.nmap); - const subfinder = extractResult(wave1.subfinder); - const whatweb = extractResult(wave1.whatweb); - const naabu = extractResult(wave1.naabu); - - const report = ` -# Pre-Reconnaissance Report - -## Port Discovery (naabu) -Status: ${naabu.status} -${naabu.output} - -## Network Scanning (nmap) -Status: ${nmap.status} -${nmap.output} - -## Subdomain Discovery (subfinder) -Status: ${subfinder.status} -${subfinder.output} - -## Technology Detection (whatweb) -Status: ${whatweb.status} -${whatweb.output} -## Code Analysis -${codeAnalysisContent} -${additionalSection} ---- -Report generated at: ${new Date().toISOString()} - `.trim(); - - // Ensure deliverables directory exists in the cloned repo - try { - const deliverablePath = path.join(sourceDir, 'deliverables', 'pre_recon_deliverable.md'); - await fs.ensureDir(path.join(sourceDir, 'deliverables')); - - // Write to file in the cloned repository - await fs.writeFile(deliverablePath, report); - } catch (error) { - const err = error as Error; - throw new PentestError( - `Failed to write pre-recon report: ${err.message}`, - 'filesystem', - false, - { sourceDir, originalError: err.message } - ); - } - - return report; -} - -// Main pre-recon phase execution function -export async function executePreReconPhase( - webUrl: string, - sourceDir: string, - variables: PromptVariables, - config: DistributedConfig | null, - toolAvailability: ToolAvailability, - pipelineTestingMode: boolean, - sessionId: string | null = null, - outputPath: string | null = null -): Promise { - console.log(chalk.yellow.bold('\n🔍 PHASE 1: PRE-RECONNAISSANCE')); - const timer = new Timer('phase-1-pre-recon'); - - console.log(chalk.yellow('Wave 1: Initial footprinting...')); - const wave1Results = await runPreReconWave1(webUrl, sourceDir, variables, config, pipelineTestingMode, sessionId, outputPath); - console.log(chalk.green(' ✅ Wave 1 operations completed')); - - console.log(chalk.yellow('Wave 2: Additional scanning...')); - const wave2Results = await runPreReconWave2(webUrl, sourceDir, toolAvailability, pipelineTestingMode); - console.log(chalk.green(' ✅ Wave 2 operations completed')); - - console.log(chalk.blue('📝 Stitching pre-recon outputs...')); - const additionalScans = wave2Results.schemathesis ? [wave2Results.schemathesis] : []; - const preReconReport = await stitchPreReconOutputs(wave1Results, additionalScans, sourceDir); - const duration = timer.stop(); - - console.log(chalk.green(`✅ Pre-reconnaissance complete in ${formatDuration(duration)}`)); - console.log(chalk.green(`💾 Saved to ${sourceDir}/deliverables/pre_recon_deliverable.md`)); - - return { duration, report: preReconReport }; -} diff --git a/src/progress-indicator.ts b/src/progress-indicator.ts index d6700d4..4ecaa0a 100644 --- a/src/progress-indicator.ts +++ b/src/progress-indicator.ts @@ -4,8 +4,6 @@ // it under the terms of the GNU Affero General Public License version 3 // as published by the Free Software Foundation. -import chalk from 'chalk'; - export class ProgressIndicator { private message: string; private frames: string[] = ['⠋', '⠙', '⠹', '⠸', '⠼', '⠴', '⠦', '⠧', '⠇', '⠏']; @@ -25,9 +23,7 @@ export class ProgressIndicator { this.interval = setInterval(() => { // Clear the line and write the spinner - process.stdout.write( - `\r${chalk.cyan(this.frames[this.frameIndex])} ${chalk.dim(this.message)}` - ); + process.stdout.write(`\r${this.frames[this.frameIndex]} ${this.message}`); this.frameIndex = (this.frameIndex + 1) % this.frames.length; }, 100); } @@ -47,6 +43,6 @@ export class ProgressIndicator { finish(successMessage: string = 'Complete'): void { this.stop(); - console.log(chalk.green(`✓ ${successMessage}`)); + console.log(`✓ ${successMessage}`); } } diff --git a/src/services/agent-execution.ts b/src/services/agent-execution.ts new file mode 100644 index 0000000..401f051 --- /dev/null +++ b/src/services/agent-execution.ts @@ -0,0 +1,291 @@ +// Copyright (C) 2025 Keygraph, Inc. +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License version 3 +// as published by the Free Software Foundation. + +/** + * Agent Execution Service + * + * Handles the full agent lifecycle: + * - Load config via ConfigLoaderService + * - Load prompt template using AGENTS[agentName].promptTemplate + * - Create git checkpoint + * - Start audit logging + * - Invoke Claude SDK via runClaudePrompt + * - Spending cap check using isSpendingCapBehavior + * - Handle failure (rollback, audit) + * - Validate output using AGENTS[agentName].deliverableFilename + * - Commit on success, log metrics + * + * No Temporal dependencies - pure domain logic. + */ + +import type { ActivityLogger } from '../types/activity-logger.js'; +import { Result, ok, err, isErr } from '../types/result.js'; +import { ErrorCode, type PentestErrorType } from '../types/errors.js'; +import { PentestError } from './error-handling.js'; +import { isSpendingCapBehavior } from '../utils/billing-detection.js'; +import { AGENTS } from '../session-manager.js'; +import { loadPrompt } from './prompt-manager.js'; +import { + runClaudePrompt, + validateAgentOutput, + type ClaudePromptResult, +} from '../ai/claude-executor.js'; +import { + createGitCheckpoint, + commitGitSuccess, + rollbackGitWorkspace, + getGitCommitHash, +} from './git-manager.js'; +import { AuditSession } from '../audit/index.js'; +import type { AgentEndResult } from '../types/audit.js'; +import type { AgentName } from '../types/agents.js'; +import type { ConfigLoaderService } from './config-loader.js'; +import type { AgentMetrics } from '../types/metrics.js'; + +/** + * Input for agent execution. + */ +export interface AgentExecutionInput { + webUrl: string; + repoPath: string; + configPath?: string | undefined; + pipelineTestingMode?: boolean | undefined; + attemptNumber: number; +} + +interface FailAgentOpts { + attemptNumber: number; + result: ClaudePromptResult; + rollbackReason: string; + errorMessage: string; + errorCode: ErrorCode; + category: PentestErrorType; + retryable: boolean; + context: Record; +} + +/** + * Service for executing agents with full lifecycle management. + * + * NOTE: AuditSession is passed per-execution, NOT stored on the service. + * This is critical for parallel agent execution - each agent needs its own + * AuditSession instance because AuditSession uses instance state (currentAgentName) + * to track which agent is currently logging. + */ +export class AgentExecutionService { + private readonly configLoader: ConfigLoaderService; + + constructor(configLoader: ConfigLoaderService) { + this.configLoader = configLoader; + } + + /** + * Execute an agent with full lifecycle management. + * + * @param agentName - Name of the agent to execute + * @param input - Execution input parameters + * @param auditSession - Audit session for this specific agent execution + * @returns Result containing AgentEndResult on success, PentestError on failure + */ + async execute( + agentName: AgentName, + input: AgentExecutionInput, + auditSession: AuditSession, + logger: ActivityLogger + ): Promise> { + const { webUrl, repoPath, configPath, pipelineTestingMode = false, attemptNumber } = input; + + // 1. Load config (if provided) + const configResult = await this.configLoader.loadOptional(configPath); + if (isErr(configResult)) { + return configResult; + } + const distributedConfig = configResult.value; + + // 2. Load prompt + const promptTemplate = AGENTS[agentName].promptTemplate; + let prompt: string; + try { + prompt = await loadPrompt( + promptTemplate, + { webUrl, repoPath }, + distributedConfig, + pipelineTestingMode, + logger + ); + } catch (error) { + const errorMessage = error instanceof Error ? error.message : String(error); + return err( + new PentestError( + `Failed to load prompt for ${agentName}: ${errorMessage}`, + 'prompt', + false, + { agentName, promptTemplate, originalError: errorMessage }, + ErrorCode.PROMPT_LOAD_FAILED + ) + ); + } + + // 3. Create git checkpoint before execution + try { + await createGitCheckpoint(repoPath, agentName, attemptNumber, logger); + } catch (error) { + const errorMessage = error instanceof Error ? error.message : String(error); + return err( + new PentestError( + `Failed to create git checkpoint for ${agentName}: ${errorMessage}`, + 'filesystem', + false, + { agentName, repoPath, originalError: errorMessage }, + ErrorCode.GIT_CHECKPOINT_FAILED + ) + ); + } + + // 4. Start audit logging + await auditSession.startAgent(agentName, prompt, attemptNumber); + + // 5. Execute agent + const result: ClaudePromptResult = await runClaudePrompt( + prompt, + repoPath, + '', // context + agentName, // description + agentName, + auditSession, + logger + ); + + // 6. Spending cap check - defense-in-depth + if (result.success && (result.turns ?? 0) <= 2 && (result.cost || 0) === 0) { + const resultText = result.result || ''; + if (isSpendingCapBehavior(result.turns ?? 0, result.cost || 0, resultText)) { + return this.failAgent(agentName, repoPath, auditSession, logger, { + attemptNumber, result, + rollbackReason: 'spending cap detected', + errorMessage: `Spending cap likely reached: ${resultText.slice(0, 100)}`, + errorCode: ErrorCode.SPENDING_CAP_REACHED, + category: 'billing', + retryable: true, + context: { agentName, turns: result.turns, cost: result.cost }, + }); + } + } + + // 7. Handle execution failure + if (!result.success) { + return this.failAgent(agentName, repoPath, auditSession, logger, { + attemptNumber, result, + rollbackReason: 'execution failure', + errorMessage: result.error || 'Agent execution failed', + errorCode: ErrorCode.AGENT_EXECUTION_FAILED, + category: 'validation', + retryable: result.retryable ?? true, + context: { agentName, originalError: result.error }, + }); + } + + // 8. Validate output + const validationPassed = await validateAgentOutput(result, agentName, repoPath, logger); + if (!validationPassed) { + return this.failAgent(agentName, repoPath, auditSession, logger, { + attemptNumber, result, + rollbackReason: 'validation failure', + errorMessage: `Agent ${agentName} failed output validation`, + errorCode: ErrorCode.OUTPUT_VALIDATION_FAILED, + category: 'validation', + retryable: true, + context: { agentName, deliverableFilename: AGENTS[agentName].deliverableFilename }, + }); + } + + // 9. Success - commit deliverables, then capture checkpoint hash + await commitGitSuccess(repoPath, agentName, logger); + const commitHash = await getGitCommitHash(repoPath); + + const endResult: AgentEndResult = { + attemptNumber, + duration_ms: result.duration, + cost_usd: result.cost || 0, + success: true, + model: result.model, + ...(commitHash && { checkpoint: commitHash }), + }; + await auditSession.endAgent(agentName, endResult); + + return ok(endResult); + } + + private async failAgent( + agentName: AgentName, + repoPath: string, + auditSession: AuditSession, + logger: ActivityLogger, + opts: FailAgentOpts + ): Promise> { + await rollbackGitWorkspace(repoPath, opts.rollbackReason, logger); + + const endResult: AgentEndResult = { + attemptNumber: opts.attemptNumber, + duration_ms: opts.result.duration, + cost_usd: opts.result.cost || 0, + success: false, + model: opts.result.model, + error: opts.errorMessage, + }; + await auditSession.endAgent(agentName, endResult); + + return err( + new PentestError( + opts.errorMessage, + opts.category, + opts.retryable, + opts.context, + opts.errorCode + ) + ); + } + + /** + * Execute an agent, throwing PentestError on failure. + * + * This is the preferred method for Temporal activities, which need to + * catch errors and classify them into ApplicationFailure. Avoids requiring + * activities to import Result utilities, keeping the boundary clean. + * + * @param agentName - Name of the agent to execute + * @param input - Execution input parameters + * @param auditSession - Audit session for this specific agent execution + * @returns AgentEndResult on success + * @throws PentestError on failure + */ + async executeOrThrow( + agentName: AgentName, + input: AgentExecutionInput, + auditSession: AuditSession, + logger: ActivityLogger + ): Promise { + const result = await this.execute(agentName, input, auditSession, logger); + if (isErr(result)) { + throw result.error; + } + return result.value; + } + + /** + * Convert AgentEndResult to AgentMetrics for workflow state. + */ + static toMetrics(endResult: AgentEndResult, result: ClaudePromptResult): AgentMetrics { + return { + durationMs: endResult.duration_ms, + inputTokens: null, // Not currently exposed by SDK wrapper + outputTokens: null, + costUsd: endResult.cost_usd, + numTurns: result.turns ?? null, + model: result.model, + }; + } +} diff --git a/src/services/config-loader.ts b/src/services/config-loader.ts new file mode 100644 index 0000000..506603b --- /dev/null +++ b/src/services/config-loader.ts @@ -0,0 +1,75 @@ +// Copyright (C) 2025 Keygraph, Inc. +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License version 3 +// as published by the Free Software Foundation. + +/** + * Config Loader Service + * + * Wraps parseConfig + distributeConfig with Result type for explicit error handling. + * Pure service with no Temporal dependencies. + */ + +import { parseConfig, distributeConfig } from '../config-parser.js'; +import { PentestError } from './error-handling.js'; +import { Result, ok, err } from '../types/result.js'; +import { ErrorCode } from '../types/errors.js'; +import type { DistributedConfig } from '../types/config.js'; + +/** + * Service for loading and distributing configuration files. + * + * Provides a Result-based API for explicit error handling, + * allowing callers to decide how to handle failures. + */ +export class ConfigLoaderService { + /** + * Load and distribute a configuration file. + * + * @param configPath - Path to the YAML configuration file + * @returns Result containing DistributedConfig on success, PentestError on failure + */ + async load(configPath: string): Promise> { + try { + const config = await parseConfig(configPath); + const distributed = distributeConfig(config); + return ok(distributed); + } catch (error) { + const errorMessage = error instanceof Error ? error.message : String(error); + + // Determine appropriate error code based on error message + let errorCode = ErrorCode.CONFIG_PARSE_ERROR; + if (errorMessage.includes('not found') || errorMessage.includes('ENOENT')) { + errorCode = ErrorCode.CONFIG_NOT_FOUND; + } else if (errorMessage.includes('validation failed')) { + errorCode = ErrorCode.CONFIG_VALIDATION_FAILED; + } + + return err( + new PentestError( + `Failed to load config ${configPath}: ${errorMessage}`, + 'config', + false, + { configPath, originalError: errorMessage }, + errorCode + ) + ); + } + } + + /** + * Load config if path is provided, otherwise return null config. + * + * @param configPath - Optional path to the YAML configuration file + * @returns Result containing DistributedConfig (or null) on success, PentestError on failure + */ + async loadOptional( + configPath: string | undefined + ): Promise> { + if (!configPath) { + return ok(null); + } + return this.load(configPath); + } +} diff --git a/src/services/container.ts b/src/services/container.ts new file mode 100644 index 0000000..f0573aa --- /dev/null +++ b/src/services/container.ts @@ -0,0 +1,117 @@ +// Copyright (C) 2025 Keygraph, Inc. +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License version 3 +// as published by the Free Software Foundation. + +/** + * Dependency Injection Container + * + * Provides a per-workflow container for service instances. + * Services are wired with explicit constructor injection. + * + * Usage: + * const container = getOrCreateContainer(workflowId, sessionMetadata); + * const auditSession = new AuditSession(sessionMetadata); // Per-agent + * await auditSession.initialize(workflowId); + * const result = await container.agentExecution.executeOrThrow(agentName, input, auditSession); + */ + +import type { SessionMetadata } from '../audit/utils.js'; +import { AgentExecutionService } from './agent-execution.js'; +import { ConfigLoaderService } from './config-loader.js'; +import { ExploitationCheckerService } from './exploitation-checker.js'; + +/** + * Dependencies required to create a Container. + * + * NOTE: AuditSession is NOT stored in the container. + * Each agent execution receives its own AuditSession instance + * because AuditSession uses instance state (currentAgentName) that + * cannot be shared across parallel agents. + */ +export interface ContainerDependencies { + readonly sessionMetadata: SessionMetadata; +} + +/** + * DI Container for a single workflow. + * + * Holds all service instances for the workflow lifecycle. + * Services are instantiated once and reused across agent executions. + * + * NOTE: AuditSession is NOT stored here - it's passed per agent execution + * to support parallel agents each having their own logging context. + */ +export class Container { + readonly sessionMetadata: SessionMetadata; + readonly agentExecution: AgentExecutionService; + readonly configLoader: ConfigLoaderService; + readonly exploitationChecker: ExploitationCheckerService; + + constructor(deps: ContainerDependencies) { + this.sessionMetadata = deps.sessionMetadata; + + // Wire services with explicit constructor injection + this.configLoader = new ConfigLoaderService(); + this.exploitationChecker = new ExploitationCheckerService(); + this.agentExecution = new AgentExecutionService(this.configLoader); + } +} + +/** + * Map of workflowId to Container instance. + * Each workflow gets its own container scoped to its lifecycle. + */ +const containers = new Map(); + +/** + * Get or create a Container for a workflow. + * + * If a container already exists for the workflowId, returns it. + * Otherwise, creates a new container with the provided dependencies. + * + * @param workflowId - Unique workflow identifier + * @param sessionMetadata - Session metadata for audit paths + * @returns Container instance for the workflow + */ +export function getOrCreateContainer( + workflowId: string, + sessionMetadata: SessionMetadata +): Container { + let container = containers.get(workflowId); + + if (!container) { + container = new Container({ sessionMetadata }); + containers.set(workflowId, container); + } + + return container; +} + +/** + * Remove a Container when a workflow completes. + * + * Should be called in logWorkflowComplete to clean up resources. + * + * @param workflowId - Unique workflow identifier + */ +export function removeContainer(workflowId: string): void { + containers.delete(workflowId); +} + +/** + * Get an existing Container for a workflow, if one exists. + * + * Unlike getOrCreateContainer, this does NOT create a new container. + * Returns undefined if no container exists for the workflowId. + * + * Useful for lightweight activities that can benefit from an existing + * container but don't need to create one. + * + * @param workflowId - Unique workflow identifier + * @returns Container instance or undefined + */ +export function getContainer(workflowId: string): Container | undefined { + return containers.get(workflowId); +} diff --git a/src/error-handling.ts b/src/services/error-handling.ts similarity index 56% rename from src/error-handling.ts rename to src/services/error-handling.ts index ac605a5..2f50d86 100644 --- a/src/error-handling.ts +++ b/src/services/error-handling.ts @@ -4,116 +4,44 @@ // it under the terms of the GNU Affero General Public License version 3 // as published by the Free Software Foundation. -import chalk from 'chalk'; -import { fs, path } from 'zx'; -import type { - PentestErrorType, - PentestErrorContext, - LogEntry, - ToolErrorResult, - PromptErrorResult, -} from './types/errors.js'; +import { + ErrorCode, + type PentestErrorType, + type PentestErrorContext, + type PromptErrorResult, +} from '../types/errors.js'; +import { + matchesBillingApiPattern, + matchesBillingTextPattern, +} from '../utils/billing-detection.js'; -// Temporal error classification for ApplicationFailure wrapping -export interface TemporalErrorClassification { - type: string; - retryable: boolean; -} - -// Custom error class for pentest operations export class PentestError extends Error { - name = 'PentestError' as const; + override name = 'PentestError' as const; type: PentestErrorType; retryable: boolean; context: PentestErrorContext; timestamp: string; + /** Optional specific error code for reliable classification */ + code?: ErrorCode; constructor( message: string, type: PentestErrorType, retryable: boolean = false, - context: PentestErrorContext = {} + context: PentestErrorContext = {}, + code?: ErrorCode ) { super(message); this.type = type; this.retryable = retryable; this.context = context; this.timestamp = new Date().toISOString(); - } -} - -// Centralized error logging function -export async function logError( - error: Error & { type?: PentestErrorType; retryable?: boolean; context?: PentestErrorContext }, - contextMsg: string, - sourceDir: string | null = null -): Promise { - const timestamp = new Date().toISOString(); - const logEntry: LogEntry = { - timestamp, - context: contextMsg, - error: { - name: error.name || error.constructor.name, - message: error.message, - type: error.type || 'unknown', - retryable: error.retryable || false, - }, - }; - // Only add stack if it exists - if (error.stack) { - logEntry.error.stack = error.stack; - } - - // Console logging with color - const prefix = error.retryable ? '⚠️' : '❌'; - const color = error.retryable ? chalk.yellow : chalk.red; - console.log(color(`${prefix} ${contextMsg}:`)); - console.log(color(` ${error.message}`)); - - if (error.context && Object.keys(error.context).length > 0) { - console.log(chalk.gray(` Context: ${JSON.stringify(error.context)}`)); - } - - // File logging (if source directory available) - if (sourceDir) { - try { - const logPath = path.join(sourceDir, 'error.log'); - await fs.appendFile(logPath, JSON.stringify(logEntry) + '\n'); - } catch (logErr) { - const errMsg = logErr instanceof Error ? logErr.message : String(logErr); - console.log(chalk.gray(` (Failed to write error log: ${errMsg})`)); + if (code !== undefined) { + this.code = code; } } - - return logEntry; } -// Handle tool execution errors -export function handleToolError( - toolName: string, - error: Error & { code?: string } -): ToolErrorResult { - const isRetryable = - error.code === 'ECONNRESET' || - error.code === 'ETIMEDOUT' || - error.code === 'ENOTFOUND'; - - return { - tool: toolName, - output: `Error: ${error.message}`, - status: 'error', - duration: 0, - success: false, - error: new PentestError( - `${toolName} execution failed: ${error.message}`, - 'tool', - isRetryable, - { toolName, originalError: error.message, errorCode: error.code } - ), - }; -} - -// Handle prompt loading errors export function handlePromptError( promptName: string, error: Error @@ -129,7 +57,6 @@ export function handlePromptError( }; } -// Patterns that indicate retryable errors const RETRYABLE_PATTERNS = [ // Network and connection errors 'network', @@ -173,28 +100,58 @@ const NON_RETRYABLE_PATTERNS = [ export function isRetryableError(error: Error): boolean { const message = error.message.toLowerCase(); - // Check for explicit non-retryable patterns first if (NON_RETRYABLE_PATTERNS.some((pattern) => message.includes(pattern))) { return false; } - // Check for retryable patterns return RETRYABLE_PATTERNS.some((pattern) => message.includes(pattern)); } -// Rate limit errors get longer base delay (30s) vs standard exponential backoff (2s) -export function getRetryDelay(error: Error, attempt: number): number { - const message = error.message.toLowerCase(); +/** + * Classifies errors by ErrorCode for reliable, code-based classification. + * Used when error is a PentestError with a specific ErrorCode. + */ +function classifyByErrorCode( + code: ErrorCode, + retryableFromError: boolean +): { type: string; retryable: boolean } { + switch (code) { + // Billing errors - retryable (wait for cap reset or credits added) + case ErrorCode.SPENDING_CAP_REACHED: + case ErrorCode.INSUFFICIENT_CREDITS: + return { type: 'BillingError', retryable: true }; - // Rate limiting gets longer delays - if (message.includes('rate limit') || message.includes('429')) { - return Math.min(30000 + attempt * 10000, 120000); // 30s, 40s, 50s, max 2min + case ErrorCode.API_RATE_LIMITED: + return { type: 'RateLimitError', retryable: true }; + + // Config errors - non-retryable (need manual fix) + case ErrorCode.CONFIG_NOT_FOUND: + case ErrorCode.CONFIG_VALIDATION_FAILED: + case ErrorCode.CONFIG_PARSE_ERROR: + return { type: 'ConfigurationError', retryable: false }; + + // Prompt errors - non-retryable (need manual fix) + case ErrorCode.PROMPT_LOAD_FAILED: + return { type: 'ConfigurationError', retryable: false }; + + // Git errors - non-retryable (indicates workspace corruption) + case ErrorCode.GIT_CHECKPOINT_FAILED: + case ErrorCode.GIT_ROLLBACK_FAILED: + return { type: 'GitError', retryable: false }; + + // Validation errors - retryable (agent may succeed on retry) + case ErrorCode.OUTPUT_VALIDATION_FAILED: + case ErrorCode.DELIVERABLE_NOT_FOUND: + return { type: 'OutputValidationError', retryable: true }; + + // Agent execution - use the retryable flag from the error + case ErrorCode.AGENT_EXECUTION_FAILED: + return { type: 'AgentExecutionError', retryable: retryableFromError }; + + default: + // Unknown code - fall through to string matching + return { type: 'UnknownError', retryable: retryableFromError }; } - - // Exponential backoff with jitter for other retryable errors - const baseDelay = Math.pow(2, attempt) * 1000; // 2s, 4s, 8s - const jitter = Math.random() * 1000; // 0-1s random - return Math.min(baseDelay + jitter, 30000); // Max 30s } /** @@ -204,31 +161,25 @@ export function getRetryDelay(error: Error, attempt: number): number { * Used by activities to wrap errors in ApplicationFailure: * - Retryable errors: Temporal retries with configured backoff * - Non-retryable errors: Temporal fails immediately + * + * Classification priority: + * 1. If error is PentestError with ErrorCode, classify by code (reliable) + * 2. Fall through to string matching for external errors (SDK, network, etc.) */ -export function classifyErrorForTemporal(error: unknown): TemporalErrorClassification { +export function classifyErrorForTemporal(error: unknown): { type: string; retryable: boolean } { + // === CODE-BASED CLASSIFICATION (Preferred for internal errors) === + if (error instanceof PentestError && error.code !== undefined) { + return classifyByErrorCode(error.code, error.retryable); + } + + // === STRING-BASED CLASSIFICATION (Fallback for external errors) === const message = (error instanceof Error ? error.message : String(error)).toLowerCase(); // === BILLING ERRORS (Retryable with long backoff) === // Anthropic returns billing as 400 invalid_request_error // Human can add credits OR wait for spending cap to reset (5-30 min backoff) - if ( - message.includes('billing_error') || - message.includes('credit balance is too low') || - message.includes('insufficient credits') || - message.includes('usage is blocked due to insufficient credits') || - message.includes('please visit plans & billing') || - message.includes('please visit plans and billing') || - message.includes('usage limit reached') || - message.includes('quota exceeded') || - message.includes('daily rate limit') || - message.includes('limit will reset') || - // Claude Code spending cap patterns (returns short message instead of error) - message.includes('spending cap') || - message.includes('spending limit') || - message.includes('cap reached') || - message.includes('budget exceeded') || - message.includes('billing limit reached') - ) { + // Check both API patterns and text patterns for comprehensive detection + if (matchesBillingApiPattern(message) || matchesBillingTextPattern(message)) { return { type: 'BillingError', retryable: true }; } diff --git a/src/services/exploitation-checker.ts b/src/services/exploitation-checker.ts new file mode 100644 index 0000000..2ecea91 --- /dev/null +++ b/src/services/exploitation-checker.ts @@ -0,0 +1,71 @@ +// Copyright (C) 2025 Keygraph, Inc. +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License version 3 +// as published by the Free Software Foundation. + +/** + * Exploitation Checker Service + * + * Pure domain logic for determining whether exploitation should run. + * Reads queue file, parses JSON, returns decision. + * + * No Temporal dependencies - this is pure business logic. + */ + +import { + validateQueueSafe, + type VulnType, + type ExploitationDecision, +} from './queue-validation.js'; +import { isOk } from '../types/result.js'; +import type { ActivityLogger } from '../types/activity-logger.js'; + +/** + * Service for checking exploitation queue decisions. + * + * Determines whether an exploit agent should run based on + * the vulnerability analysis deliverables and queue files. + */ +export class ExploitationCheckerService { + /** + * Check if exploitation should run for a given vulnerability type. + * + * Reads the vulnerability queue file and returns the decision. + * This is pure domain logic - reads queue file, parses JSON, returns decision. + * + * @param vulnType - Type of vulnerability (injection, xss, auth, ssrf, authz) + * @param repoPath - Path to the repository containing deliverables + * @param logger - ActivityLogger for structured logging + * @returns ExploitationDecision indicating whether to exploit + * @throws PentestError if validation fails and is retryable + */ + async checkQueue(vulnType: VulnType, repoPath: string, logger: ActivityLogger): Promise { + const result = await validateQueueSafe(vulnType, repoPath); + + if (isOk(result)) { + const decision = result.value; + logger.info( + `${vulnType}: ${decision.shouldExploit ? `${decision.vulnerabilityCount} vulnerabilities found` : 'no vulnerabilities, skipping exploitation'}` + ); + return decision; + } + + // Validation failed - check if we should retry or skip + const error = result.error; + if (error.retryable) { + // Re-throw retryable errors so caller can handle retry + logger.warn(`${vulnType}: ${error.message} (retryable)`); + throw error; + } + + // Non-retryable error - skip exploitation gracefully + logger.warn(`${vulnType}: ${error.message}, skipping exploitation`); + return { + shouldExploit: false, + shouldRetry: false, + vulnerabilityCount: 0, + vulnType, + }; + } +} diff --git a/src/utils/git-manager.ts b/src/services/git-manager.ts similarity index 68% rename from src/utils/git-manager.ts rename to src/services/git-manager.ts index 780bdcd..1f68ab1 100644 --- a/src/utils/git-manager.ts +++ b/src/services/git-manager.ts @@ -5,7 +5,9 @@ // as published by the Free Software Foundation. import { $ } from 'zx'; -import chalk from 'chalk'; +import { PentestError } from './error-handling.js'; +import { ErrorCode } from '../types/errors.js'; +import type { ActivityLogger } from '../types/activity-logger.js'; /** * Check if a directory is a git repository. @@ -51,17 +53,19 @@ function logChangeSummary( changes: string[], messageWithChanges: string, messageWithoutChanges: string, - color: typeof chalk.green, + logger: ActivityLogger, + level: 'info' | 'warn' = 'info', maxToShow: number = 5 ): void { if (changes.length > 0) { - console.log(color(messageWithChanges.replace('{count}', String(changes.length)))); - changes.slice(0, maxToShow).forEach((change) => console.log(chalk.gray(` ${change}`))); - if (changes.length > maxToShow) { - console.log(chalk.gray(` ... and ${changes.length - maxToShow} more files`)); - } + const msg = messageWithChanges.replace('{count}', String(changes.length)); + const fileList = changes.slice(0, maxToShow).map((c) => ` ${c}`).join(', '); + const suffix = changes.length > maxToShow + ? ` ... and ${changes.length - maxToShow} more files` + : ''; + logger[level](`${msg} ${fileList}${suffix}`); } else { - console.log(color(messageWithoutChanges)); + logger[level](messageWithoutChanges); } } @@ -136,10 +140,10 @@ export async function executeGitCommandWithRetry( if (isGitLockError(errMsg) && attempt < maxRetries) { const delay = Math.pow(2, attempt - 1) * 1000; - console.log( - chalk.yellow( - ` ⚠️ Git lock conflict during ${description} (attempt ${attempt}/${maxRetries}). Retrying in ${delay}ms...` - ) + // executeGitCommandWithRetry is also called outside activity context + // (e.g., from resume logic), so we use console.warn as a fallback here + console.warn( + `Git lock conflict during ${description} (attempt ${attempt}/${maxRetries}). Retrying in ${delay}ms...` ); await new Promise((resolve) => setTimeout(resolve, delay)); continue; @@ -148,7 +152,13 @@ export async function executeGitCommandWithRetry( throw error; } } - throw new Error(`Git command failed after ${maxRetries} retries`); + throw new PentestError( + `Git command failed after ${maxRetries} retries`, + 'filesystem', + true, // Retryable - transient git lock issues + { maxRetries, description }, + ErrorCode.GIT_CHECKPOINT_FAILED + ); } finally { gitSemaphore.release(); } @@ -157,15 +167,16 @@ export async function executeGitCommandWithRetry( // Two-phase reset: hard reset (tracked files) + clean (untracked files) export async function rollbackGitWorkspace( sourceDir: string, - reason: string = 'retry preparation' + reason: string = 'retry preparation', + logger: ActivityLogger ): Promise { // Skip git operations if not a git repository if (!(await isGitRepository(sourceDir))) { - console.log(chalk.gray(` ⏭️ Skipping git rollback (not a git repository)`)); + logger.info('Skipping git rollback (not a git repository)'); return { success: true }; } - console.log(chalk.yellow(` 🔄 Rolling back workspace for ${reason}`)); + logger.info(`Rolling back workspace for ${reason}`); try { const changes = await getChangedFiles(sourceDir, 'status check for rollback'); @@ -182,16 +193,26 @@ export async function rollbackGitWorkspace( logChangeSummary( changes, - ' ✅ Rollback completed - removed {count} contaminated changes:', - ' ✅ Rollback completed - no changes to remove', - chalk.yellow, + 'Rollback completed - removed {count} contaminated changes:', + 'Rollback completed - no changes to remove', + logger, + 'info', 3 ); return { success: true }; } catch (error) { - const result = toErrorResult(error); - console.log(chalk.red(` ❌ Rollback failed after retries: ${result.error?.message}`)); - return result; + const errMsg = error instanceof Error ? error.message : String(error); + logger.error(`Rollback failed after retries: ${errMsg}`); + return { + success: false, + error: new PentestError( + `Git rollback failed: ${errMsg}`, + 'filesystem', + false, // Non-retryable - rollback is best-effort cleanup + { sourceDir, reason }, + ErrorCode.GIT_ROLLBACK_FAILED + ), + }; } } @@ -199,29 +220,30 @@ export async function rollbackGitWorkspace( export async function createGitCheckpoint( sourceDir: string, description: string, - attempt: number + attempt: number, + logger: ActivityLogger ): Promise { // Skip git operations if not a git repository if (!(await isGitRepository(sourceDir))) { - console.log(chalk.gray(` ⏭️ Skipping git checkpoint (not a git repository)`)); + logger.info('Skipping git checkpoint (not a git repository)'); return { success: true }; } - console.log(chalk.blue(` 📍 Creating checkpoint for ${description} (attempt ${attempt})`)); + logger.info(`Creating checkpoint for ${description} (attempt ${attempt})`); try { - // First attempt: preserve existing deliverables. Retries: clean workspace to prevent pollution + // 1. On retries, clean workspace to prevent pollution from previous attempt if (attempt > 1) { - const cleanResult = await rollbackGitWorkspace(sourceDir, `${description} (retry cleanup)`); + const cleanResult = await rollbackGitWorkspace(sourceDir, `${description} (retry cleanup)`, logger); if (!cleanResult.success) { - console.log( - chalk.yellow(` ⚠️ Workspace cleanup failed, continuing anyway: ${cleanResult.error?.message}`) - ); + logger.warn(`Workspace cleanup failed, continuing anyway: ${cleanResult.error?.message}`); } } + // 2. Detect existing changes const changes = await getChangedFiles(sourceDir, 'status check'); const hasChanges = changes.length > 0; + // 3. Stage and commit checkpoint await executeGitCommandWithRetry(['git', 'add', '-A'], sourceDir, 'staging changes'); await executeGitCommandWithRetry( ['git', 'commit', '-m', `📍 Checkpoint: ${description} (attempt ${attempt})`, '--allow-empty'], @@ -229,30 +251,32 @@ export async function createGitCheckpoint( 'creating commit' ); + // 4. Log result if (hasChanges) { - console.log(chalk.blue(` ✅ Checkpoint created with uncommitted changes staged`)); + logger.info('Checkpoint created with uncommitted changes staged'); } else { - console.log(chalk.blue(` ✅ Empty checkpoint created (no workspace changes)`)); + logger.info('Empty checkpoint created (no workspace changes)'); } return { success: true }; } catch (error) { const result = toErrorResult(error); - console.log(chalk.yellow(` ⚠️ Checkpoint creation failed after retries: ${result.error?.message}`)); + logger.warn(`Checkpoint creation failed after retries: ${result.error?.message}`); return result; } } export async function commitGitSuccess( sourceDir: string, - description: string + description: string, + logger: ActivityLogger ): Promise { // Skip git operations if not a git repository if (!(await isGitRepository(sourceDir))) { - console.log(chalk.gray(` ⏭️ Skipping git commit (not a git repository)`)); + logger.info('Skipping git commit (not a git repository)'); return { success: true }; } - console.log(chalk.green(` 💾 Committing successful results for ${description}`)); + logger.info(`Committing successful results for ${description}`); try { const changes = await getChangedFiles(sourceDir, 'status check for success commit'); @@ -269,15 +293,14 @@ export async function commitGitSuccess( logChangeSummary( changes, - ' ✅ Success commit created with {count} file changes:', - ' ✅ Empty success commit created (agent made no file changes)', - chalk.green, - 5 + 'Success commit created with {count} file changes:', + 'Empty success commit created (agent made no file changes)', + logger ); return { success: true }; } catch (error) { const result = toErrorResult(error); - console.log(chalk.yellow(` ⚠️ Success commit failed after retries: ${result.error?.message}`)); + logger.warn(`Success commit failed after retries: ${result.error?.message}`); return result; } } diff --git a/src/services/index.ts b/src/services/index.ts new file mode 100644 index 0000000..29c489f --- /dev/null +++ b/src/services/index.ts @@ -0,0 +1,23 @@ +// Copyright (C) 2025 Keygraph, Inc. +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License version 3 +// as published by the Free Software Foundation. + +/** + * Services Module + * + * Exports DI container and service classes for Shannon agent execution. + * Services are pure domain logic with no Temporal dependencies. + */ + +export { Container, getOrCreateContainer, removeContainer } from './container.js'; +export type { ContainerDependencies } from './container.js'; + +export { ConfigLoaderService } from './config-loader.js'; +export { ExploitationCheckerService } from './exploitation-checker.js'; +export { AgentExecutionService } from './agent-execution.js'; +export type { AgentExecutionInput } from './agent-execution.js'; + +export { assembleFinalReport, injectModelIntoReport } from './reporting.js'; +export { loadPrompt } from './prompt-manager.js'; diff --git a/src/prompts/prompt-manager.ts b/src/services/prompt-manager.ts similarity index 82% rename from src/prompts/prompt-manager.ts rename to src/services/prompt-manager.ts index 3ff47d5..b660dc8 100644 --- a/src/prompts/prompt-manager.ts +++ b/src/services/prompt-manager.ts @@ -5,10 +5,10 @@ // as published by the Free Software Foundation. import { fs, path } from 'zx'; -import chalk from 'chalk'; -import { PentestError, handlePromptError } from '../error-handling.js'; -import { MCP_AGENT_MAPPING } from '../constants.js'; +import { PentestError, handlePromptError } from './error-handling.js'; +import { MCP_AGENT_MAPPING } from '../session-manager.js'; import type { Authentication, DistributedConfig } from '../types/config.js'; +import type { ActivityLogger } from '../types/activity-logger.js'; interface PromptVariables { webUrl: string; @@ -22,9 +22,9 @@ interface IncludeReplacement { } // Pure function: Build complete login instructions from config -async function buildLoginInstructions(authentication: Authentication): Promise { +async function buildLoginInstructions(authentication: Authentication, logger: ActivityLogger): Promise { try { - // Load the login instructions template + // 1. Load the login instructions template const loginInstructionsPath = path.join(import.meta.dirname, '..', '..', 'prompts', 'shared', 'login-instructions.txt'); if (!await fs.pathExists(loginInstructionsPath)) { @@ -38,37 +38,33 @@ async function buildLoginInstructions(authentication: Authentication): Promise { const regex = new RegExp(`([\\s\\S]*?)`, 'g'); const match = regex.exec(content); return match ? match[1]!.trim() : ''; }; - // Extract sections based on login type + // 2. Extract sections based on login type const loginType = authentication.login_type?.toUpperCase(); let loginInstructions = ''; - // Build instructions with only relevant sections const commonSection = getSection(fullTemplate, 'COMMON'); const authSection = loginType ? getSection(fullTemplate, loginType) : ''; // FORM or SSO const verificationSection = getSection(fullTemplate, 'VERIFICATION'); - // Fallback to full template if markers are missing (backward compatibility) + // 3. Assemble instructions from sections (fallback to full template if markers missing) if (!commonSection && !authSection && !verificationSection) { - console.log(chalk.yellow('⚠️ Section markers not found, using full login instructions template')); + logger.warn('Section markers not found, using full login instructions template'); loginInstructions = fullTemplate; } else { - // Combine relevant sections loginInstructions = [commonSection, authSection, verificationSection] - .filter(section => section) // Remove empty sections + .filter(section => section) .join('\n\n'); } - // Replace the user instructions placeholder with the login flow from config + // 4. Interpolate login flow and credential placeholders let userInstructions = (authentication.login_flow ?? []).join('\n'); - // Replace credential placeholders within the user instructions if (authentication.credentials) { if (authentication.credentials.username) { userInstructions = userInstructions.replace(/\$username/g, authentication.credentials.username); @@ -83,7 +79,7 @@ async function buildLoginInstructions(authentication: Authentication): Promise { try { if (!template || typeof template !== 'string') { @@ -174,7 +171,7 @@ async function interpolateVariables( // Extract and inject login instructions from config if (config.authentication?.login_flow) { - const loginInstructions = await buildLoginInstructions(config.authentication); + const loginInstructions = await buildLoginInstructions(config.authentication, logger); result = result.replace(/{{LOGIN_INSTRUCTIONS}}/g, loginInstructions); } else { result = result.replace(/{{LOGIN_INSTRUCTIONS}}/g, ''); @@ -189,7 +186,7 @@ async function interpolateVariables( // Validate that all placeholders have been replaced (excluding instructional text) const remainingPlaceholders = result.match(/\{\{[^}]+\}\}/g); if (remainingPlaceholders) { - console.log(chalk.yellow(`⚠️ Warning: Found unresolved placeholders in prompt: ${remainingPlaceholders.join(', ')}`)); + logger.warn(`Found unresolved placeholders in prompt: ${remainingPlaceholders.join(', ')}`); } return result; @@ -212,20 +209,19 @@ export async function loadPrompt( promptName: string, variables: PromptVariables, config: DistributedConfig | null = null, - pipelineTestingMode: boolean = false + pipelineTestingMode: boolean = false, + logger: ActivityLogger ): Promise { try { - // Use pipeline testing prompts if pipeline testing mode is enabled + // 1. Resolve prompt file path const baseDir = pipelineTestingMode ? 'prompts/pipeline-testing' : 'prompts'; const promptsDir = path.join(import.meta.dirname, '..', '..', baseDir); const promptPath = path.join(promptsDir, `${promptName}.txt`); - // Debug message for pipeline testing mode if (pipelineTestingMode) { - console.log(chalk.yellow(`⚡ Using pipeline testing prompt: ${promptPath}`)); + logger.info(`Using pipeline testing prompt: ${promptPath}`); } - // Check if file exists first if (!await fs.pathExists(promptPath)) { throw new PentestError( `Prompt file not found: ${promptPath}`, @@ -235,26 +231,26 @@ export async function loadPrompt( ); } - // Add MCP server assignment to variables + // 2. Assign MCP server based on agent name const enhancedVariables: PromptVariables = { ...variables }; - // Assign MCP server based on prompt name (agent name) const mcpServer = MCP_AGENT_MAPPING[promptName as keyof typeof MCP_AGENT_MAPPING]; if (mcpServer) { enhancedVariables.MCP_SERVER = mcpServer; - console.log(chalk.gray(` 🎭 Assigned ${promptName} → ${enhancedVariables.MCP_SERVER}`)); + logger.info(`Assigned ${promptName} -> ${enhancedVariables.MCP_SERVER}`); } else { - // Fallback for unknown agents enhancedVariables.MCP_SERVER = 'playwright-agent1'; - console.log(chalk.yellow(` 🎭 Unknown agent ${promptName}, using fallback → ${enhancedVariables.MCP_SERVER}`)); + logger.warn(`Unknown agent ${promptName}, using fallback -> ${enhancedVariables.MCP_SERVER}`); } + // 3. Read template file let template = await fs.readFile(promptPath, 'utf8'); - // Pre-process the template to handle @include directives + // 4. Process @include directives template = await processIncludes(template, promptsDir); - return await interpolateVariables(template, enhancedVariables, config); + // 5. Interpolate variables and return final prompt + return await interpolateVariables(template, enhancedVariables, config, logger); } catch (error) { if (error instanceof PentestError) { throw error; diff --git a/src/queue-validation.ts b/src/services/queue-validation.ts similarity index 91% rename from src/queue-validation.ts rename to src/services/queue-validation.ts index ce21e1d..dde8666 100644 --- a/src/queue-validation.ts +++ b/src/services/queue-validation.ts @@ -6,9 +6,12 @@ import { fs, path } from 'zx'; import { PentestError } from './error-handling.js'; -import { asyncPipe } from './utils/functional.js'; +import { ErrorCode } from '../types/errors.js'; +import { type Result, ok, err } from '../types/result.js'; +import { asyncPipe } from '../utils/functional.js'; +import type { VulnType, ExploitationDecision } from '../types/agents.js'; -export type VulnType = 'injection' | 'xss' | 'auth' | 'ssrf' | 'authz'; +export type { VulnType, ExploitationDecision } from '../types/agents.js'; interface VulnTypeConfigItem { deliverable: string; @@ -60,18 +63,11 @@ interface QueueValidationResult { error: string | null; } -export interface ExploitationDecision { - shouldExploit: boolean; - shouldRetry: boolean; - vulnerabilityCount: number; - vulnType: VulnType; -} -export interface SafeValidationResult { - success: boolean; - data?: ExploitationDecision; - error?: PentestError; -} +/** + * Result type for safe validation - explicit error handling. + */ +export type SafeValidationResult = Result; // Vulnerability type configuration as immutable data const VULN_TYPE_CONFIG: VulnTypeConfig = Object.freeze({ @@ -196,7 +192,8 @@ const validateExistenceRules = ( deliverablePath: pathsWithExistence.deliverable, queuePath: pathsWithExistence.queue, existence, - } + }, + ErrorCode.DELIVERABLE_NOT_FOUND ), }; } @@ -311,15 +308,18 @@ export async function validateQueueAndDeliverable( ); } -// Pure function to safely validate (returns result instead of throwing) -export const safeValidateQueueAndDeliverable = async ( +/** + * Safely validate queue and deliverable files. + * Returns Result for explicit error handling. + */ +export async function validateQueueSafe( vulnType: VulnType, sourceDir: string -): Promise => { +): Promise { try { const result = await validateQueueAndDeliverable(vulnType, sourceDir); - return { success: true, data: result }; + return ok(result); } catch (error) { - return { success: false, error: error as PentestError }; + return err(error as PentestError); } -}; +} diff --git a/src/phases/reporting.ts b/src/services/reporting.ts similarity index 76% rename from src/phases/reporting.ts rename to src/services/reporting.ts index ec9f86b..bc04fe1 100644 --- a/src/phases/reporting.ts +++ b/src/services/reporting.ts @@ -5,8 +5,9 @@ // as published by the Free Software Foundation. import { fs, path } from 'zx'; -import chalk from 'chalk'; -import { PentestError } from '../error-handling.js'; +import { PentestError } from './error-handling.js'; +import { ErrorCode } from '../types/errors.js'; +import type { ActivityLogger } from '../types/activity-logger.js'; interface DeliverableFile { name: string; @@ -15,7 +16,7 @@ interface DeliverableFile { } // Pure function: Assemble final report from specialist deliverables -export async function assembleFinalReport(sourceDir: string): Promise { +export async function assembleFinalReport(sourceDir: string, logger: ActivityLogger): Promise { const deliverableFiles: DeliverableFile[] = [ { name: 'Injection', path: 'injection_exploitation_evidence.md', required: false }, { name: 'XSS', path: 'xss_exploitation_evidence.md', required: false }, @@ -32,18 +33,24 @@ export async function assembleFinalReport(sourceDir: string): Promise { if (await fs.pathExists(filePath)) { const content = await fs.readFile(filePath, 'utf8'); sections.push(content); - console.log(chalk.green(`✅ Added ${file.name} findings`)); + logger.info(`Added ${file.name} findings`); } else if (file.required) { - throw new Error(`Required file ${file.path} not found`); + throw new PentestError( + `Required deliverable file not found: ${file.path}`, + 'filesystem', + false, + { deliverableFile: file.path, sourceDir }, + ErrorCode.DELIVERABLE_NOT_FOUND + ); } else { - console.log(chalk.gray(`⏭️ No ${file.name} deliverable found`)); + logger.info(`No ${file.name} deliverable found`); } } catch (error) { if (file.required) { throw error; } const err = error as Error; - console.log(chalk.yellow(`⚠️ Could not read ${file.path}: ${err.message}`)); + logger.warn(`Could not read ${file.path}: ${err.message}`); } } @@ -55,7 +62,7 @@ export async function assembleFinalReport(sourceDir: string): Promise { // Ensure deliverables directory exists await fs.ensureDir(deliverablesDir); await fs.writeFile(finalReportPath, finalContent); - console.log(chalk.green(`✅ Final report assembled at ${finalReportPath}`)); + logger.info(`Final report assembled at ${finalReportPath}`); } catch (error) { const err = error as Error; throw new PentestError( @@ -76,13 +83,14 @@ export async function assembleFinalReport(sourceDir: string): Promise { */ export async function injectModelIntoReport( repoPath: string, - outputPath: string + outputPath: string, + logger: ActivityLogger ): Promise { // 1. Read session.json to get model information const sessionJsonPath = path.join(outputPath, 'session.json'); if (!(await fs.pathExists(sessionJsonPath))) { - console.log(chalk.yellow('⚠️ session.json not found, skipping model injection')); + logger.warn('session.json not found, skipping model injection'); return; } @@ -103,18 +111,18 @@ export async function injectModelIntoReport( } if (models.size === 0) { - console.log(chalk.yellow('⚠️ No model information found in session.json')); + logger.warn('No model information found in session.json'); return; } const modelStr = Array.from(models).join(', '); - console.log(chalk.blue(`📝 Injecting model info into report: ${modelStr}`)); + logger.info(`Injecting model info into report: ${modelStr}`); // 3. Read the final report const reportPath = path.join(repoPath, 'deliverables', 'comprehensive_security_assessment_report.md'); if (!(await fs.pathExists(reportPath))) { - console.log(chalk.yellow('⚠️ Final report not found, skipping model injection')); + logger.warn('Final report not found, skipping model injection'); return; } @@ -132,7 +140,7 @@ export async function injectModelIntoReport( assessmentDatePattern, `$1\n${modelLine}` ); - console.log(chalk.green('✅ Model info injected into Executive Summary')); + logger.info('Model info injected into Executive Summary'); } else { // If no Assessment Date line found, try to add after Executive Summary header const execSummaryPattern = /^## Executive Summary$/m; @@ -142,9 +150,9 @@ export async function injectModelIntoReport( execSummaryPattern, `## Executive Summary\n- Model: ${modelStr}` ); - console.log(chalk.green('✅ Model info added to Executive Summary header')); + logger.info('Model info added to Executive Summary header'); } else { - console.log(chalk.yellow('⚠️ Could not find Executive Summary section')); + logger.warn('Could not find Executive Summary section'); return; } } diff --git a/src/session-manager.ts b/src/session-manager.ts index 335a74d..8180040 100644 --- a/src/session-manager.ts +++ b/src/session-manager.ts @@ -4,106 +4,105 @@ // it under the terms of the GNU Affero General Public License version 3 // as published by the Free Software Foundation. -import { path } from 'zx'; -import type { AgentName } from './types/index.js'; - -// Agent definition interface -export interface AgentDefinition { - name: AgentName; - displayName: string; - prerequisites: AgentName[]; -} +import { path, fs } from 'zx'; +import { validateQueueAndDeliverable } from './services/queue-validation.js'; +import type { AgentName, AgentDefinition, PlaywrightAgent, AgentValidator, VulnType } from './types/index.js'; +import type { ActivityLogger } from './types/activity-logger.js'; // Agent definitions according to PRD +// NOTE: deliverableFilename values must match mcp-server/src/types/deliverables.ts:DELIVERABLE_FILENAMES export const AGENTS: Readonly> = Object.freeze({ 'pre-recon': { name: 'pre-recon', displayName: 'Pre-recon agent', - prerequisites: [] + prerequisites: [], + promptTemplate: 'pre-recon-code', + deliverableFilename: 'code_analysis_deliverable.md', }, 'recon': { name: 'recon', displayName: 'Recon agent', - prerequisites: ['pre-recon'] + prerequisites: ['pre-recon'], + promptTemplate: 'recon', + deliverableFilename: 'recon_deliverable.md', }, 'injection-vuln': { name: 'injection-vuln', displayName: 'Injection vuln agent', - prerequisites: ['recon'] + prerequisites: ['recon'], + promptTemplate: 'vuln-injection', + deliverableFilename: 'injection_analysis_deliverable.md', }, 'xss-vuln': { name: 'xss-vuln', displayName: 'XSS vuln agent', - prerequisites: ['recon'] + prerequisites: ['recon'], + promptTemplate: 'vuln-xss', + deliverableFilename: 'xss_analysis_deliverable.md', }, 'auth-vuln': { name: 'auth-vuln', displayName: 'Auth vuln agent', - prerequisites: ['recon'] + prerequisites: ['recon'], + promptTemplate: 'vuln-auth', + deliverableFilename: 'auth_analysis_deliverable.md', }, 'ssrf-vuln': { name: 'ssrf-vuln', displayName: 'SSRF vuln agent', - prerequisites: ['recon'] + prerequisites: ['recon'], + promptTemplate: 'vuln-ssrf', + deliverableFilename: 'ssrf_analysis_deliverable.md', }, 'authz-vuln': { name: 'authz-vuln', displayName: 'Authz vuln agent', - prerequisites: ['recon'] + prerequisites: ['recon'], + promptTemplate: 'vuln-authz', + deliverableFilename: 'authz_analysis_deliverable.md', }, 'injection-exploit': { name: 'injection-exploit', displayName: 'Injection exploit agent', - prerequisites: ['injection-vuln'] + prerequisites: ['injection-vuln'], + promptTemplate: 'exploit-injection', + deliverableFilename: 'injection_exploitation_evidence.md', }, 'xss-exploit': { name: 'xss-exploit', displayName: 'XSS exploit agent', - prerequisites: ['xss-vuln'] + prerequisites: ['xss-vuln'], + promptTemplate: 'exploit-xss', + deliverableFilename: 'xss_exploitation_evidence.md', }, 'auth-exploit': { name: 'auth-exploit', displayName: 'Auth exploit agent', - prerequisites: ['auth-vuln'] + prerequisites: ['auth-vuln'], + promptTemplate: 'exploit-auth', + deliverableFilename: 'auth_exploitation_evidence.md', }, 'ssrf-exploit': { name: 'ssrf-exploit', displayName: 'SSRF exploit agent', - prerequisites: ['ssrf-vuln'] + prerequisites: ['ssrf-vuln'], + promptTemplate: 'exploit-ssrf', + deliverableFilename: 'ssrf_exploitation_evidence.md', }, 'authz-exploit': { name: 'authz-exploit', displayName: 'Authz exploit agent', - prerequisites: ['authz-vuln'] + prerequisites: ['authz-vuln'], + promptTemplate: 'exploit-authz', + deliverableFilename: 'authz_exploitation_evidence.md', }, 'report': { name: 'report', displayName: 'Report agent', - prerequisites: ['injection-exploit', 'xss-exploit', 'auth-exploit', 'ssrf-exploit', 'authz-exploit'] - } -}); - -// Agent execution order -export const AGENT_ORDER: readonly AgentName[] = Object.freeze([ - 'pre-recon', - 'recon', - 'injection-vuln', - 'xss-vuln', - 'auth-vuln', - 'ssrf-vuln', - 'authz-vuln', - 'injection-exploit', - 'xss-exploit', - 'auth-exploit', - 'ssrf-exploit', - 'authz-exploit', - 'report' -] as const); - -// Parallel execution groups -export const getParallelGroups = (): Readonly<{ vuln: AgentName[]; exploit: AgentName[] }> => Object.freeze({ - vuln: ['injection-vuln', 'xss-vuln', 'auth-vuln', 'ssrf-vuln', 'authz-vuln'], - exploit: ['injection-exploit', 'xss-exploit', 'auth-exploit', 'ssrf-exploit', 'authz-exploit'] + prerequisites: ['injection-exploit', 'xss-exploit', 'auth-exploit', 'ssrf-exploit', 'authz-exploit'], + promptTemplate: 'report-executive', + deliverableFilename: 'comprehensive_security_assessment_report.md', + }, }); // Phase names for metrics aggregation @@ -126,4 +125,101 @@ export const AGENT_PHASE_MAP: Readonly> = Object.fr 'report': 'reporting', }); +// Factory function for vulnerability queue validators +function createVulnValidator(vulnType: VulnType): AgentValidator { + return async (sourceDir: string, logger: ActivityLogger): Promise => { + try { + await validateQueueAndDeliverable(vulnType, sourceDir); + return true; + } catch (error) { + const errMsg = error instanceof Error ? error.message : String(error); + logger.warn(`Queue validation failed for ${vulnType}: ${errMsg}`); + return false; + } + }; +} +// Factory function for exploit deliverable validators +function createExploitValidator(vulnType: VulnType): AgentValidator { + return async (sourceDir: string): Promise => { + const evidenceFile = path.join(sourceDir, 'deliverables', `${vulnType}_exploitation_evidence.md`); + return await fs.pathExists(evidenceFile); + }; +} + +// MCP agent mapping - assigns each agent to a specific Playwright instance to prevent conflicts +// Keys are promptTemplate values from AGENTS registry +export const MCP_AGENT_MAPPING: Record = Object.freeze({ + // Phase 1: Pre-reconnaissance (actual prompt name is 'pre-recon-code') + // NOTE: Pre-recon is pure code analysis and doesn't use browser automation, + // but assigning MCP server anyway for consistency and future extensibility + 'pre-recon-code': 'playwright-agent1', + + // Phase 2: Reconnaissance (actual prompt name is 'recon') + recon: 'playwright-agent2', + + // Phase 3: Vulnerability Analysis (5 parallel agents) + 'vuln-injection': 'playwright-agent1', + 'vuln-xss': 'playwright-agent2', + 'vuln-auth': 'playwright-agent3', + 'vuln-ssrf': 'playwright-agent4', + 'vuln-authz': 'playwright-agent5', + + // Phase 4: Exploitation (5 parallel agents - same as vuln counterparts) + 'exploit-injection': 'playwright-agent1', + 'exploit-xss': 'playwright-agent2', + 'exploit-auth': 'playwright-agent3', + 'exploit-ssrf': 'playwright-agent4', + 'exploit-authz': 'playwright-agent5', + + // Phase 5: Reporting (actual prompt name is 'report-executive') + // NOTE: Report generation is typically text-based and doesn't use browser automation, + // but assigning MCP server anyway for potential screenshot inclusion or future needs + 'report-executive': 'playwright-agent3', +}); + +// Direct agent-to-validator mapping - much simpler than pattern matching +export const AGENT_VALIDATORS: Record = Object.freeze({ + // Pre-reconnaissance agent - validates the code analysis deliverable created by the agent + 'pre-recon': async (sourceDir: string): Promise => { + const codeAnalysisFile = path.join(sourceDir, 'deliverables', 'code_analysis_deliverable.md'); + return await fs.pathExists(codeAnalysisFile); + }, + + // Reconnaissance agent + recon: async (sourceDir: string): Promise => { + const reconFile = path.join(sourceDir, 'deliverables', 'recon_deliverable.md'); + return await fs.pathExists(reconFile); + }, + + // Vulnerability analysis agents + 'injection-vuln': createVulnValidator('injection'), + 'xss-vuln': createVulnValidator('xss'), + 'auth-vuln': createVulnValidator('auth'), + 'ssrf-vuln': createVulnValidator('ssrf'), + 'authz-vuln': createVulnValidator('authz'), + + // Exploitation agents + 'injection-exploit': createExploitValidator('injection'), + 'xss-exploit': createExploitValidator('xss'), + 'auth-exploit': createExploitValidator('auth'), + 'ssrf-exploit': createExploitValidator('ssrf'), + 'authz-exploit': createExploitValidator('authz'), + + // Executive report agent + report: async (sourceDir: string, logger: ActivityLogger): Promise => { + const reportFile = path.join( + sourceDir, + 'deliverables', + 'comprehensive_security_assessment_report.md' + ); + + const reportExists = await fs.pathExists(reportFile); + + if (!reportExists) { + logger.error('Missing required deliverable: comprehensive_security_assessment_report.md'); + } + + return reportExists; + }, +}); diff --git a/src/setup/environment.ts b/src/setup/environment.ts deleted file mode 100644 index 55f8bbe..0000000 --- a/src/setup/environment.ts +++ /dev/null @@ -1,56 +0,0 @@ -// Copyright (C) 2025 Keygraph, Inc. -// -// This program is free software: you can redistribute it and/or modify -// it under the terms of the GNU Affero General Public License version 3 -// as published by the Free Software Foundation. - -import { $, fs, path } from 'zx'; -import chalk from 'chalk'; -import { PentestError } from '../error-handling.js'; - -// Pure function: Setup local repository for testing -export async function setupLocalRepo(repoPath: string): Promise { - try { - const sourceDir = path.resolve(repoPath); - - // MCP servers are now configured via mcpServers option in claude-executor.js - // No need for pre-setup with claude CLI - - // Initialize git repository if not already initialized and create checkpoint - try { - // Check if it's already a git repository - const isGitRepo = await fs.pathExists(path.join(sourceDir, '.git')); - - if (!isGitRepo) { - await $`cd ${sourceDir} && git init`; - console.log(chalk.blue('✅ Git repository initialized')); - } - - // Configure git for pentest agent - await $`cd ${sourceDir} && git config user.name "Pentest Agent"`; - await $`cd ${sourceDir} && git config user.email "agent@localhost"`; - - // Create initial checkpoint - await $`cd ${sourceDir} && git add -A && git commit -m "Initial checkpoint: Local repository setup" --allow-empty`; - console.log(chalk.green('✅ Initial checkpoint created')); - } catch (gitError) { - const errMsg = gitError instanceof Error ? gitError.message : String(gitError); - console.log(chalk.yellow(`⚠️ Git setup warning: ${errMsg}`)); - // Non-fatal - continue without Git setup - } - - // MCP tools (save_deliverable, generate_totp) are now available natively via shannon-helper MCP server - // No need to copy bash scripts to target repository - - return sourceDir; - } catch (error) { - if (error instanceof PentestError) { - throw error; - } - const errMsg = error instanceof Error ? error.message : String(error); - throw new PentestError(`Local repository setup failed: ${errMsg}`, 'filesystem', false, { - repoPath, - originalError: errMsg, - }); - } -} diff --git a/src/temporal/activities.ts b/src/temporal/activities.ts index 6f76244..9d8a03e 100644 --- a/src/temporal/activities.ts +++ b/src/temporal/activities.ts @@ -7,28 +7,58 @@ /** * Temporal activities for Shannon agent execution. * - * Each activity wraps a single agent execution with: + * Each activity wraps service calls with Temporal-specific concerns: * - Heartbeat loop (2s interval) to signal worker liveness - * - Git checkpoint/rollback/commit per attempt - * - Error classification for Temporal retry behavior - * - Audit session logging + * - Error classification into ApplicationFailure + * - Container lifecycle management * - * Temporal handles retries based on error classification: - * - Retryable: BillingError, TransientError (429, 5xx, network) - * - Non-retryable: AuthenticationError, PermissionError, ConfigurationError, etc. + * Business logic is delegated to services in src/services/. */ import { heartbeat, ApplicationFailure, Context } from '@temporalio/activity'; -import chalk from 'chalk'; +import path from 'path'; +import fs from 'fs/promises'; + +import { classifyErrorForTemporal, PentestError } from '../services/error-handling.js'; +import { ErrorCode } from '../types/errors.js'; +import { getOrCreateContainer, getContainer, removeContainer } from '../services/container.js'; +import { ExploitationCheckerService } from '../services/exploitation-checker.js'; +import type { VulnType, ExploitationDecision } from '../services/queue-validation.js'; +import { AuditSession } from '../audit/index.js'; +import type { WorkflowSummary } from '../audit/workflow-logger.js'; +import type { AgentName } from '../types/agents.js'; +import { ALL_AGENTS } from '../types/agents.js'; +import type { AgentMetrics, ResumeState } from './shared.js'; +import { copyDeliverablesToAudit, type SessionMetadata } from '../audit/utils.js'; +import { readJson, fileExists } from '../utils/file-io.js'; +import { assembleFinalReport, injectModelIntoReport } from '../services/reporting.js'; +import { AGENTS } from '../session-manager.js'; +import { executeGitCommandWithRetry } from '../services/git-manager.js'; +import type { ResumeAttempt } from '../audit/metrics-tracker.js'; +import { createActivityLogger } from './activity-logger.js'; // Max lengths to prevent Temporal protobuf buffer overflow const MAX_ERROR_MESSAGE_LENGTH = 2000; const MAX_STACK_TRACE_LENGTH = 1000; // Max retries for output validation errors (agent didn't save deliverables) -// Lower than default 50 since this is unlikely to self-heal const MAX_OUTPUT_VALIDATION_RETRIES = 3; +const HEARTBEAT_INTERVAL_MS = 2000; + +/** + * Input for all agent activities. + */ +export interface ActivityInput { + webUrl: string; + repoPath: string; + configPath?: string; + outputPath?: string; + pipelineTestingMode?: boolean; + workflowId: string; + sessionId: string; +} + /** * Truncate error message to prevent buffer overflow in Temporal serialization. */ @@ -48,85 +78,34 @@ function truncateStackTrace(failure: ApplicationFailure): void { } } -import { - runClaudePrompt, - validateAgentOutput, - type ClaudePromptResult, -} from '../ai/claude-executor.js'; -import { loadPrompt } from '../prompts/prompt-manager.js'; -import { parseConfig, distributeConfig } from '../config-parser.js'; -import { classifyErrorForTemporal } from '../error-handling.js'; -import { - safeValidateQueueAndDeliverable, - type VulnType, - type ExploitationDecision, -} from '../queue-validation.js'; -import { - createGitCheckpoint, - commitGitSuccess, - rollbackGitWorkspace, - getGitCommitHash, -} from '../utils/git-manager.js'; -import { assembleFinalReport, injectModelIntoReport } from '../phases/reporting.js'; -import { getPromptNameForAgent } from '../types/agents.js'; -import { AuditSession } from '../audit/index.js'; -import type { WorkflowSummary } from '../audit/workflow-logger.js'; -import type { AgentName } from '../types/agents.js'; -import { getDeliverablePath, ALL_AGENTS } from '../types/agents.js'; -import type { AgentMetrics, ResumeState } from './shared.js'; -import type { DistributedConfig } from '../types/config.js'; -import { copyDeliverablesToAudit, type SessionMetadata, readJson, fileExists } from '../audit/utils.js'; -import type { ResumeAttempt } from '../audit/metrics-tracker.js'; -import { executeGitCommandWithRetry } from '../utils/git-manager.js'; -import path from 'path'; -import fs from 'fs/promises'; - -const HEARTBEAT_INTERVAL_MS = 2000; // Must be < heartbeatTimeout (10min production, 5min testing) - /** - * Input for all agent activities. - * Matches PipelineInput but with required workflowId for audit correlation. + * Build SessionMetadata from ActivityInput. */ -export interface ActivityInput { - webUrl: string; - repoPath: string; - configPath?: string; - outputPath?: string; - pipelineTestingMode?: boolean; - workflowId: string; - sessionId: string; // Workspace name (for resume) or workflowId (for new runs) +function buildSessionMetadata(input: ActivityInput): SessionMetadata { + const { webUrl, repoPath, outputPath, sessionId } = input; + return { + id: sessionId, + webUrl, + repoPath, + ...(outputPath && { outputPath }), + }; } /** - * Core activity implementation. + * Core activity implementation using services. * * Executes a single agent with: * 1. Heartbeat loop for worker liveness - * 2. Config loading (if configPath provided) - * 3. Audit session initialization - * 4. Prompt loading - * 5. Git checkpoint before execution - * 6. Agent execution (single attempt) - * 7. Output validation - * 8. Git commit on success, rollback on failure - * 9. Error classification for Temporal retry + * 2. Container creation/reuse + * 3. Service-based agent execution + * 4. Error classification for Temporal retry */ async function runAgentActivity( agentName: AgentName, input: ActivityInput ): Promise { - const { - webUrl, - repoPath, - configPath, - outputPath, - pipelineTestingMode = false, - workflowId, - } = input; - + const { repoPath, configPath, pipelineTestingMode = false, workflowId, webUrl } = input; const startTime = Date.now(); - - // Get attempt number from Temporal context (tracks retries automatically) const attemptNumber = Context.current().info.attempt; // Heartbeat loop - signals worker is alive to Temporal server @@ -136,160 +115,66 @@ async function runAgentActivity( }, HEARTBEAT_INTERVAL_MS); try { - // 1. Load config (if provided) - let distributedConfig: DistributedConfig | null = null; - if (configPath) { - try { - const config = await parseConfig(configPath); - distributedConfig = distributeConfig(config); - } catch (err) { - throw new Error(`Failed to load config ${configPath}: ${err instanceof Error ? err.message : String(err)}`); - } - } + const logger = createActivityLogger(); - // 2. Build session metadata for audit - // Use sessionId (workspace name) for directory, workflowId for tracking - const sessionMetadata: SessionMetadata = { - id: input.sessionId, - webUrl, - repoPath, - ...(outputPath && { outputPath }), - }; + // 1. Build session metadata and get/create container + const sessionMetadata = buildSessionMetadata(input); + const container = getOrCreateContainer(workflowId, sessionMetadata); - // 3. Initialize audit session (idempotent, safe across retries) + // 2. Create audit session for THIS agent execution + // NOTE: Each agent needs its own AuditSession because AuditSession uses + // instance state (currentAgentName) that cannot be shared across parallel agents const auditSession = new AuditSession(sessionMetadata); await auditSession.initialize(workflowId); - // 4. Load prompt - const promptName = getPromptNameForAgent(agentName); - const prompt = await loadPrompt( - promptName, - { webUrl, repoPath }, - distributedConfig, - pipelineTestingMode - ); - - // 5. Create git checkpoint before execution - await createGitCheckpoint(repoPath, agentName, attemptNumber); - await auditSession.startAgent(agentName, prompt, attemptNumber); - - // 6. Execute agent (single attempt - Temporal handles retries) - const result: ClaudePromptResult = await runClaudePrompt( - prompt, - repoPath, - '', // context - agentName, // description + // 3. Execute agent via service (throws PentestError on failure) + const endResult = await container.agentExecution.executeOrThrow( agentName, - chalk.cyan, - sessionMetadata, + { + webUrl, + repoPath, + configPath, + pipelineTestingMode, + attemptNumber, + }, auditSession, - attemptNumber + logger ); - // 6.5. Sanity check: Detect spending cap that slipped through all detection layers - // Defense-in-depth: A successful agent execution should never have ≤2 turns with $0 cost - if (result.success && (result.turns ?? 0) <= 2 && (result.cost || 0) === 0) { - const resultText = result.result || ''; - const looksLikeBillingError = /spending|cap|limit|budget|resets/i.test(resultText); - - if (looksLikeBillingError) { - await rollbackGitWorkspace(repoPath, 'spending cap detected'); - await auditSession.endAgent(agentName, { - attemptNumber, - duration_ms: result.duration, - cost_usd: 0, - success: false, - model: result.model, - error: `Spending cap likely reached: ${resultText.slice(0, 100)}`, - }); - // Throw as billing error so Temporal retries with long backoff - throw new Error(`Spending cap likely reached: ${resultText.slice(0, 100)}`); - } - } - - // 7. Handle execution failure - if (!result.success) { - await rollbackGitWorkspace(repoPath, 'execution failure'); - await auditSession.endAgent(agentName, { - attemptNumber, - duration_ms: result.duration, - cost_usd: result.cost || 0, - success: false, - model: result.model, - error: result.error || 'Execution failed', - }); - throw new Error(result.error || 'Agent execution failed'); - } - - // 8. Validate output - const validationPassed = await validateAgentOutput(result, agentName, repoPath); - if (!validationPassed) { - await rollbackGitWorkspace(repoPath, 'validation failure'); - await auditSession.endAgent(agentName, { - attemptNumber, - duration_ms: result.duration, - cost_usd: result.cost || 0, - success: false, - model: result.model, - error: 'Output validation failed', - }); - - // Limit output validation retries (unlikely to self-heal) - if (attemptNumber >= MAX_OUTPUT_VALIDATION_RETRIES) { - throw ApplicationFailure.nonRetryable( - `Agent ${agentName} failed output validation after ${attemptNumber} attempts`, - 'OutputValidationError', - [{ agentName, attemptNumber, elapsed: Date.now() - startTime }] - ); - } - // Let Temporal retry (will be classified as OutputValidationError) - throw new Error(`Agent ${agentName} failed output validation`); - } - - // 9. Success - commit deliverables, then capture checkpoint hash - await commitGitSuccess(repoPath, agentName); - const commitHash = await getGitCommitHash(repoPath); - await auditSession.endAgent(agentName, { - attemptNumber, - duration_ms: result.duration, - cost_usd: result.cost || 0, - success: true, - model: result.model, - ...(commitHash && { checkpoint: commitHash }), - }); - - // 10. Return metrics + // 4. Return metrics return { durationMs: Date.now() - startTime, - inputTokens: null, // Not currently exposed by SDK wrapper + inputTokens: null, outputTokens: null, - costUsd: result.cost ?? null, - numTurns: result.turns ?? null, - model: result.model, + costUsd: endResult.cost_usd, + numTurns: null, + model: endResult.model, }; } catch (error) { - // Rollback git workspace before Temporal retry to ensure clean state - try { - await rollbackGitWorkspace(repoPath, 'error recovery'); - } catch (rollbackErr) { - // Log but don't fail - rollback is best-effort - console.error(`Failed to rollback git workspace for ${agentName}:`, rollbackErr); - } - - // If error is already an ApplicationFailure (e.g., from our retry limit logic), - // re-throw it directly without re-classifying + // If error is already an ApplicationFailure, re-throw directly if (error instanceof ApplicationFailure) { throw error; } + // Check if output validation retry limit reached (PentestError with code) + if ( + error instanceof PentestError && + error.code === ErrorCode.OUTPUT_VALIDATION_FAILED && + attemptNumber >= MAX_OUTPUT_VALIDATION_RETRIES + ) { + throw ApplicationFailure.nonRetryable( + `Agent ${agentName} failed output validation after ${attemptNumber} attempts`, + 'OutputValidationError', + [{ agentName, attemptNumber, elapsed: Date.now() - startTime }] + ); + } + // Classify error for Temporal retry behavior const classified = classifyErrorForTemporal(error); - // Truncate message to prevent protobuf buffer overflow const rawMessage = error instanceof Error ? error.message : String(error); const message = truncateErrorMessage(rawMessage); if (classified.retryable) { - // Temporal will retry with configured backoff const failure = ApplicationFailure.create({ message, type: classified.type, @@ -298,7 +183,6 @@ async function runAgentActivity( truncateStackTrace(failure); throw failure; } else { - // Fail immediately - no retry const failure = ApplicationFailure.nonRetryable(message, classified.type, [ { agentName, attemptNumber, elapsed: Date.now() - startTime }, ]); @@ -310,9 +194,6 @@ async function runAgentActivity( } } -// === Individual Agent Activity Exports === -// Each function is a thin wrapper around runAgentActivity with the agent name. - export async function runPreReconAgent(input: ActivityInput): Promise { return runAgentActivity('pre-recon', input); } @@ -367,92 +248,56 @@ export async function runReportAgent(input: ActivityInput): Promise { const { repoPath } = input; - console.log(chalk.blue('📝 Assembling deliverables from specialist agents...')); + const logger = createActivityLogger(); + logger.info('Assembling deliverables from specialist agents...'); try { - await assembleFinalReport(repoPath); + await assembleFinalReport(repoPath, logger); } catch (error) { const err = error as Error; - console.log(chalk.yellow(`⚠️ Error assembling final report: ${err.message}`)); - // Don't throw - the report agent can still create content even if no exploitation files exist + logger.warn(`Error assembling final report: ${err.message}`); } } /** * Inject model metadata into the final report. - * This must be called AFTER runReportAgent to add the model information to the Executive Summary. */ export async function injectReportMetadataActivity(input: ActivityInput): Promise { const { repoPath, sessionId, outputPath } = input; + const logger = createActivityLogger(); const effectiveOutputPath = outputPath ? path.join(outputPath, sessionId) : path.join('./audit-logs', sessionId); try { - await injectModelIntoReport(repoPath, effectiveOutputPath); + await injectModelIntoReport(repoPath, effectiveOutputPath, logger); } catch (error) { const err = error as Error; - console.log(chalk.yellow(`⚠️ Error injecting model into report: ${err.message}`)); - // Don't throw - this is a non-critical enhancement + logger.warn(`Error injecting model into report: ${err.message}`); } } /** * Check if exploitation should run for a given vulnerability type. - * Reads the vulnerability queue file and returns the decision. * - * This activity allows the workflow to skip exploit agents entirely - * when no vulnerabilities were found, saving API calls and time. - * - * Error handling: - * - Retryable errors (missing files, invalid JSON): re-throw for Temporal retry - * - Non-retryable errors: skip exploitation gracefully + * Uses existing container if available (from prior agent runs), + * otherwise creates service directly (stateless, no dependencies). */ export async function checkExploitationQueue( input: ActivityInput, vulnType: VulnType ): Promise { - const { repoPath } = input; + const { repoPath, workflowId } = input; + const logger = createActivityLogger(); - const result = await safeValidateQueueAndDeliverable(vulnType, repoPath); + // Reuse container's service if available (from prior vuln agent runs) + const existingContainer = getContainer(workflowId); + const checker = existingContainer?.exploitationChecker ?? new ExploitationCheckerService(); - if (result.success && result.data) { - const { shouldExploit, vulnerabilityCount } = result.data; - console.log( - chalk.blue( - `🔍 ${vulnType}: ${shouldExploit ? `${vulnerabilityCount} vulnerabilities found` : 'no vulnerabilities, skipping exploitation'}` - ) - ); - return result.data; - } - - // Validation failed - check if we should retry or skip - const error = result.error; - if (error?.retryable) { - // Re-throw retryable errors so Temporal can retry the vuln agent - console.log(chalk.yellow(`⚠️ ${vulnType}: ${error.message} (retrying)`)); - throw error; - } - - // Non-retryable error - skip exploitation gracefully - console.log( - chalk.yellow(`⚠️ ${vulnType}: ${error?.message ?? 'Unknown error'}, skipping exploitation`) - ); - return { - shouldExploit: false, - shouldRetry: false, - vulnerabilityCount: 0, - vulnType, - }; + return checker.checkQueue(vulnType, repoPath, logger); } -// === Resume Activities === - -/** - * Session.json structure for resume state loading - */ interface SessionJson { session: { id: string; @@ -462,27 +307,27 @@ interface SessionJson { resumeAttempts?: ResumeAttempt[]; }; metrics: { - agents: Record; + agents: Record< + string, + { + status: 'in-progress' | 'success' | 'failed'; + checkpoint?: string; + } + >; }; } /** * Load resume state from an existing workspace. - * Validates workspace exists, URL matches, and determines which agents to skip. - * - * @throws ApplicationFailure.nonRetryable if workspace not found or URL mismatch */ export async function loadResumeState( workspaceName: string, expectedUrl: string, expectedRepoPath: string ): Promise { + // 1. Validate workspace exists const sessionPath = path.join('./audit-logs', workspaceName, 'session.json'); - // Validate workspace exists const exists = await fileExists(sessionPath); if (!exists) { throw ApplicationFailure.nonRetryable( @@ -491,7 +336,7 @@ export async function loadResumeState( ); } - // Load session.json + // 2. Parse session.json and validate URL match let session: SessionJson; try { session = await readJson(sessionPath); @@ -503,7 +348,6 @@ export async function loadResumeState( ); } - // Validate URL matches if (session.session.webUrl !== expectedUrl) { throw ApplicationFailure.nonRetryable( `URL mismatch with workspace\n Workspace URL: ${session.session.webUrl}\n Provided URL: ${expectedUrl}`, @@ -511,34 +355,30 @@ export async function loadResumeState( ); } - // Find completed agents (status === 'success' AND deliverable exists) + // 3. Cross-check agent status with deliverables on disk const completedAgents: string[] = []; const agents = session.metrics.agents; for (const agentName of ALL_AGENTS) { const agentData = agents[agentName]; - - // Skip if agent never ran or didn't succeed if (!agentData || agentData.status !== 'success') { continue; } - // Validate deliverable exists - const deliverablePath = getDeliverablePath(agentName, expectedRepoPath); + const deliverableFilename = AGENTS[agentName].deliverableFilename; + const deliverablePath = `${expectedRepoPath}/deliverables/${deliverableFilename}`; const deliverableExists = await fileExists(deliverablePath); if (!deliverableExists) { - console.log( - chalk.yellow(`Agent ${agentName} shows success but deliverable missing, will re-run`) - ); + const logger = createActivityLogger(); + logger.warn(`Agent ${agentName} shows success but deliverable missing, will re-run`); continue; } - // Agent completed successfully and deliverable exists completedAgents.push(agentName); } - // Find latest checkpoint from completed agents + // 4. Collect git checkpoints and validate at least one exists const checkpoints = completedAgents .map((name) => agents[name]?.checkpoint) .filter((hash): hash is string => hash != null); @@ -550,24 +390,26 @@ export async function loadResumeState( throw ApplicationFailure.nonRetryable( `Cannot resume workspace ${workspaceName}: ` + - (successAgents.length > 0 - ? `${successAgents.length} agent(s) show success in session.json (${successAgents.join(', ')}) ` + - `but their deliverable files are missing from disk. ` + - `Start a fresh run instead.` - : `No agents completed successfully. Start a fresh run instead.`), + (successAgents.length > 0 + ? `${successAgents.length} agent(s) show success in session.json (${successAgents.join(', ')}) ` + + `but their deliverable files are missing from disk. ` + + `Start a fresh run instead.` + : `No agents completed successfully. Start a fresh run instead.`), 'NoCheckpointsError' ); } - // Find most recent commit among checkpoints + // 5. Find the most recent checkpoint commit const checkpointHash = await findLatestCommit(expectedRepoPath, checkpoints); - const originalWorkflowId = session.session.originalWorkflowId || session.session.id; - console.log(chalk.cyan(`=== RESUME STATE ===`)); - console.log(`Workspace: ${workspaceName}`); - console.log(`Completed agents: ${completedAgents.length}`); - console.log(`Checkpoint: ${checkpointHash}`); + // 6. Log summary and return resume state + const logger = createActivityLogger(); + logger.info('Resume state loaded', { + workspace: workspaceName, + completedAgents: completedAgents.length, + checkpoint: checkpointHash, + }); return { workspaceName, @@ -578,20 +420,21 @@ export async function loadResumeState( }; } -/** - * Find the most recent commit among a list of commit hashes. - * Uses git rev-list to determine which commit is newest. - */ async function findLatestCommit(repoPath: string, commitHashes: string[]): Promise { if (commitHashes.length === 1) { const hash = commitHashes[0]; if (!hash) { - throw new Error('Empty commit hash in array'); + throw new PentestError( + 'Empty commit hash in array', + 'filesystem', + false, // Non-retryable - corrupt workspace state + { phase: 'resume' }, + ErrorCode.GIT_CHECKPOINT_FAILED + ); } return hash; } - // Use git rev-list to find the most recent commit among all hashes const result = await executeGitCommandWithRetry( ['git', 'rev-list', '--max-count=1', ...commitHashes], repoPath, @@ -603,20 +446,15 @@ async function findLatestCommit(repoPath: string, commitHashes: string[]): Promi /** * Restore git workspace to a checkpoint and clean up partial deliverables. - * - * @param repoPath - Repository path - * @param checkpointHash - Git commit hash to reset to - * @param incompleteAgents - Agents that didn't complete (will have deliverables cleaned up) */ export async function restoreGitCheckpoint( repoPath: string, checkpointHash: string, incompleteAgents: AgentName[] ): Promise { - console.log(chalk.blue(`Restoring git workspace to ${checkpointHash}...`)); + const logger = createActivityLogger(); + logger.info(`Restoring git workspace to ${checkpointHash}...`); - // Checkpoint hash points to the success commit (after commitGitSuccess), - // so git reset --hard naturally preserves all completed agent deliverables. await executeGitCommandWithRetry( ['git', 'reset', '--hard', checkpointHash], repoPath, @@ -628,67 +466,60 @@ export async function restoreGitCheckpoint( 'clean untracked files for resume' ); - // Clean up any partial deliverables from incomplete agents for (const agentName of incompleteAgents) { - const deliverablePath = getDeliverablePath(agentName, repoPath); + const deliverableFilename = AGENTS[agentName].deliverableFilename; + const deliverablePath = `${repoPath}/deliverables/${deliverableFilename}`; try { const exists = await fileExists(deliverablePath); if (exists) { - console.log(chalk.yellow(`Cleaning partial deliverable: ${agentName}`)); + logger.warn(`Cleaning partial deliverable: ${agentName}`); await fs.unlink(deliverablePath); } } catch (error) { - console.log(chalk.gray(`Note: Failed to delete ${deliverablePath}: ${error}`)); + logger.info(`Note: Failed to delete ${deliverablePath}: ${error}`); } } - console.log(chalk.green('Workspace restored to clean state')); + logger.info('Workspace restored to clean state'); } /** - * Record a resume attempt in session.json. - * Tracks the new workflow ID, terminated workflows, and checkpoint hash. + * Record a resume attempt in session.json and write resume header to workflow.log. */ export async function recordResumeAttempt( input: ActivityInput, terminatedWorkflows: string[], - checkpointHash: string + checkpointHash: string, + previousWorkflowId: string, + completedAgents: string[] ): Promise { - const { webUrl, repoPath, outputPath, sessionId, workflowId } = input; - - const sessionMetadata: SessionMetadata = { - id: sessionId, - webUrl, - repoPath, - ...(outputPath && { outputPath }), - }; - + const sessionMetadata = buildSessionMetadata(input); const auditSession = new AuditSession(sessionMetadata); await auditSession.initialize(); - await auditSession.addResumeAttempt(workflowId, terminatedWorkflows, checkpointHash); + // Update session.json with resume attempt + await auditSession.addResumeAttempt(input.workflowId, terminatedWorkflows, checkpointHash); + + // Write resume header to workflow.log + await auditSession.logResumeHeader({ + previousWorkflowId, + newWorkflowId: input.workflowId, + checkpointHash, + completedAgents, + }); } /** * Log phase transition to the unified workflow log. - * Called at phase boundaries for per-workflow logging. */ export async function logPhaseTransition( input: ActivityInput, phase: string, event: 'start' | 'complete' ): Promise { - const { webUrl, repoPath, outputPath, sessionId, workflowId } = input; - - const sessionMetadata: SessionMetadata = { - id: sessionId, - webUrl, - repoPath, - ...(outputPath && { outputPath }), - }; - + const sessionMetadata = buildSessionMetadata(input); const auditSession = new AuditSession(sessionMetadata); - await auditSession.initialize(workflowId); + await auditSession.initialize(input.workflowId); if (event === 'start') { await auditSession.logPhaseStart(phase); @@ -698,28 +529,23 @@ export async function logPhaseTransition( } /** - * Log workflow completion with full summary to the unified workflow log. - * Called at the end of the workflow to write a summary breakdown. + * Log workflow completion with full summary. + * Cleans up container when done. */ export async function logWorkflowComplete( input: ActivityInput, summary: WorkflowSummary ): Promise { - const { webUrl, repoPath, outputPath, sessionId, workflowId } = input; - - const sessionMetadata: SessionMetadata = { - id: sessionId, - webUrl, - repoPath, - ...(outputPath && { outputPath }), - }; + const { repoPath, workflowId } = input; + const sessionMetadata = buildSessionMetadata(input); + // 1. Initialize audit session and mark final status const auditSession = new AuditSession(sessionMetadata); await auditSession.initialize(workflowId); await auditSession.updateSessionStatus(summary.status); - // Use cumulative metrics from session.json (includes all resume attempts) - const sessionData = await auditSession.getMetrics() as { + // 2. Load cumulative metrics from session.json + const sessionData = (await auditSession.getMetrics()) as { metrics: { total_duration_ms: number; total_cost_usd: number; @@ -727,7 +553,7 @@ export async function logWorkflowComplete( }; }; - // Fill in metrics for skipped agents (completed in previous runs) + // 3. Fill in metrics for skipped agents (resumed from previous run) const agentMetrics = { ...summary.agentMetrics }; for (const agentName of summary.completedAgents) { if (!agentMetrics[agentName]) { @@ -741,18 +567,27 @@ export async function logWorkflowComplete( } } + // 4. Build cumulative summary with cross-run totals const cumulativeSummary: WorkflowSummary = { ...summary, totalDurationMs: sessionData.metrics.total_duration_ms, totalCostUsd: sessionData.metrics.total_cost_usd, agentMetrics, }; + + // 5. Write completion entry to workflow.log await auditSession.logWorkflowComplete(cumulativeSummary); - // Copy all deliverables to audit-logs once at workflow end (non-fatal) + // 6. Copy deliverables to audit-logs try { await copyDeliverablesToAudit(sessionMetadata, repoPath); } catch (copyErr) { - console.error('Failed to copy deliverables to audit-logs:', copyErr); + const logger = createActivityLogger(); + logger.error('Failed to copy deliverables to audit-logs', { + error: copyErr instanceof Error ? copyErr.message : String(copyErr), + }); } + + // 7. Clean up container + removeContainer(workflowId); } diff --git a/src/temporal/activity-logger.ts b/src/temporal/activity-logger.ts new file mode 100644 index 0000000..bcfe2b9 --- /dev/null +++ b/src/temporal/activity-logger.ts @@ -0,0 +1,34 @@ +// Copyright (C) 2025 Keygraph, Inc. +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License version 3 +// as published by the Free Software Foundation. + +import { Context } from '@temporalio/activity'; +import type { ActivityLogger } from '../types/activity-logger.js'; + +/** + * ActivityLogger backed by Temporal's Context.current().log. + * Must be called inside a running Temporal activity — throws otherwise. + */ +export class TemporalActivityLogger implements ActivityLogger { + info(message: string, attrs?: Record): void { + Context.current().log.info(message, attrs ?? {}); + } + + warn(message: string, attrs?: Record): void { + Context.current().log.warn(message, attrs ?? {}); + } + + error(message: string, attrs?: Record): void { + Context.current().log.error(message, attrs ?? {}); + } +} + +/** + * Create an ActivityLogger. Must be called inside a Temporal activity. + * Throws if called outside an activity context. + */ +export function createActivityLogger(): ActivityLogger { + return new TemporalActivityLogger(); +} diff --git a/src/temporal/client.ts b/src/temporal/client.ts index 243197e..7efe978 100644 --- a/src/temporal/client.ts +++ b/src/temporal/client.ts @@ -26,12 +26,11 @@ * TEMPORAL_ADDRESS - Temporal server address (default: localhost:7233) */ -import { Connection, Client, WorkflowNotFoundError } from '@temporalio/client'; +import { Connection, Client, WorkflowNotFoundError, type WorkflowHandle } from '@temporalio/client'; import dotenv from 'dotenv'; -import chalk from 'chalk'; import { displaySplashScreen } from '../splash-screen.js'; import { sanitizeHostname } from '../audit/utils.js'; -import { readJson, fileExists } from '../audit/utils.js'; +import { readJson, fileExists } from '../utils/file-io.js'; import path from 'path'; // Import types only - these don't pull in workflow runtime code import type { PipelineInput, PipelineState, PipelineProgress } from './shared.js'; @@ -89,18 +88,18 @@ async function terminateExistingWorkflows( const description = await handle.describe(); if (description.status.name === 'RUNNING') { - console.log(chalk.yellow(`Terminating running workflow: ${wfId}`)); + console.log(`Terminating running workflow: ${wfId}`); await handle.terminate('Superseded by resume workflow'); terminated.push(wfId); - console.log(chalk.green(`Terminated: ${wfId}`)); + console.log(`Terminated: ${wfId}`); } else { - console.log(chalk.gray(`Workflow already ${description.status.name}: ${wfId}`)); + console.log(`Workflow already ${description.status.name}: ${wfId}`); } } catch (error) { if (error instanceof WorkflowNotFoundError) { - console.log(chalk.gray(`Workflow not found (already cleaned up): ${wfId}`)); + console.log(`Workflow not found (already cleaned up): ${wfId}`); } else { - console.log(chalk.red(`Failed to terminate ${wfId}: ${error}`)); + console.log(`Failed to terminate ${wfId}: ${error}`); // Continue anyway - don't block resume on termination failure } } @@ -118,13 +117,13 @@ function isValidWorkspaceName(name: string): boolean { } function showUsage(): void { - console.log(chalk.cyan.bold('\nShannon Temporal Client')); - console.log(chalk.gray('Start a pentest pipeline workflow\n')); - console.log(chalk.yellow('Usage:')); + console.log('\nShannon Temporal Client'); + console.log('Start a pentest pipeline workflow\n'); + console.log('Usage:'); console.log( ' node dist/temporal/client.js [options]\n' ); - console.log(chalk.yellow('Options:')); + console.log('Options:'); console.log(' --config Configuration file path'); console.log(' --output Output directory for audit logs'); console.log(' --pipeline-testing Use minimal prompts for fast testing'); @@ -133,54 +132,65 @@ function showUsage(): void { ' --workflow-id Custom workflow ID (default: shannon-)' ); console.log(' --wait Wait for workflow completion with progress polling\n'); - console.log(chalk.yellow('Examples:')); + console.log('Examples:'); console.log(' node dist/temporal/client.js https://example.com /path/to/repo'); console.log( ' node dist/temporal/client.js https://example.com /path/to/repo --config config.yaml\n' ); } -async function startPipeline(): Promise { - const args = process.argv.slice(2); +// === CLI Argument Parsing === - if (args.includes('--help') || args.includes('-h') || args.length === 0) { +interface CliArgs { + webUrl: string; + repoPath: string; + configPath?: string; + outputPath?: string; + displayOutputPath?: string; + pipelineTestingMode: boolean; + customWorkflowId?: string; + waitForCompletion: boolean; + resumeFromWorkspace?: string; +} + +function parseCliArgs(argv: string[]): CliArgs { + if (argv.includes('--help') || argv.includes('-h') || argv.length === 0) { showUsage(); process.exit(0); } - // Parse arguments let webUrl: string | undefined; let repoPath: string | undefined; let configPath: string | undefined; let outputPath: string | undefined; - let displayOutputPath: string | undefined; // Host path for display purposes + let displayOutputPath: string | undefined; let pipelineTestingMode = false; let customWorkflowId: string | undefined; let waitForCompletion = false; let resumeFromWorkspace: string | undefined; - for (let i = 0; i < args.length; i++) { - const arg = args[i]; + for (let i = 0; i < argv.length; i++) { + const arg = argv[i]; if (arg === '--config') { - const nextArg = args[i + 1]; + const nextArg = argv[i + 1]; if (nextArg && !nextArg.startsWith('-')) { configPath = nextArg; i++; } } else if (arg === '--output') { - const nextArg = args[i + 1]; + const nextArg = argv[i + 1]; if (nextArg && !nextArg.startsWith('-')) { outputPath = nextArg; i++; } } else if (arg === '--display-output') { - const nextArg = args[i + 1]; + const nextArg = argv[i + 1]; if (nextArg && !nextArg.startsWith('-')) { displayOutputPath = nextArg; i++; } } else if (arg === '--workflow-id') { - const nextArg = args[i + 1]; + const nextArg = argv[i + 1]; if (nextArg && !nextArg.startsWith('-')) { customWorkflowId = nextArg; i++; @@ -188,7 +198,7 @@ async function startPipeline(): Promise { } else if (arg === '--pipeline-testing') { pipelineTestingMode = true; } else if (arg === '--workspace') { - const nextArg = args[i + 1]; + const nextArg = argv[i + 1]; if (nextArg && !nextArg.startsWith('-')) { resumeFromWorkspace = nextArg; i++; @@ -205,177 +215,233 @@ async function startPipeline(): Promise { } if (!webUrl || !repoPath) { - console.log(chalk.red('Error: webUrl and repoPath are required')); + console.log('Error: webUrl and repoPath are required'); showUsage(); process.exit(1); } - // Display splash screen + return { + webUrl, repoPath, pipelineTestingMode, waitForCompletion, + ...(configPath && { configPath }), + ...(outputPath && { outputPath }), + ...(displayOutputPath && { displayOutputPath }), + ...(customWorkflowId && { customWorkflowId }), + ...(resumeFromWorkspace && { resumeFromWorkspace }), + }; +} + +// === Workspace Resolution === + +interface WorkspaceResolution { + workflowId: string; + sessionId: string; + isResume: boolean; + terminatedWorkflows: string[]; +} + +async function resolveWorkspace( + client: Client, + args: CliArgs +): Promise { + if (!args.resumeFromWorkspace) { + const hostname = sanitizeHostname(args.webUrl); + const workflowId = args.customWorkflowId || `${hostname}_shannon-${Date.now()}`; + return { + workflowId, + sessionId: workflowId, + isResume: false, + terminatedWorkflows: [], + }; + } + + const workspace = args.resumeFromWorkspace; + const sessionPath = path.join('./audit-logs', workspace, 'session.json'); + const workspaceExists = await fileExists(sessionPath); + + if (workspaceExists) { + console.log('=== RESUME MODE ==='); + console.log(`Workspace: ${workspace}\n`); + + // 1. Terminate any running workflows from previous attempts + const terminatedWorkflows = await terminateExistingWorkflows(client, workspace); + if (terminatedWorkflows.length > 0) { + console.log(`Terminated ${terminatedWorkflows.length} previous workflow(s)\n`); + } + + // 2. Validate URL matches the workspace + const session = await readJson(sessionPath); + if (session.session.webUrl !== args.webUrl) { + console.error('ERROR: URL mismatch with workspace'); + console.error(` Workspace URL: ${session.session.webUrl}`); + console.error(` Provided URL: ${args.webUrl}`); + process.exit(1); + } + + // 3. Generate a new workflow ID scoped to this resume attempt + // 4. Return resolution with isResume=true so downstream uses resume logic + return { + workflowId: `${workspace}_resume_${Date.now()}`, + sessionId: workspace, + isResume: true, + terminatedWorkflows, + }; + } + + if (!isValidWorkspaceName(workspace)) { + console.error(`ERROR: Invalid workspace name: "${workspace}"`); + console.error(' Must be 1-128 characters, alphanumeric/hyphens/underscores, starting with alphanumeric'); + process.exit(1); + } + + console.log('=== NEW NAMED WORKSPACE ==='); + console.log(`Workspace: ${workspace}\n`); + + return { + workflowId: `${workspace}_shannon-${Date.now()}`, + sessionId: workspace, + isResume: false, + terminatedWorkflows: [], + }; +} + +// === Pipeline Input Construction === + +function buildPipelineInput(args: CliArgs, workspace: WorkspaceResolution): PipelineInput { + return { + webUrl: args.webUrl, + repoPath: args.repoPath, + workflowId: workspace.workflowId, + sessionId: workspace.sessionId, + ...(args.configPath && { configPath: args.configPath }), + ...(args.outputPath && { outputPath: args.outputPath }), + ...(args.pipelineTestingMode && { pipelineTestingMode: args.pipelineTestingMode }), + ...(workspace.isResume && args.resumeFromWorkspace && { resumeFromWorkspace: args.resumeFromWorkspace }), + ...(workspace.terminatedWorkflows.length > 0 && { terminatedWorkflows: workspace.terminatedWorkflows }), + }; +} + +// === Display Helpers === + +function displayWorkflowInfo(args: CliArgs, workspace: WorkspaceResolution): void { + console.log(`✓ Workflow started: ${workspace.workflowId}`); + if (workspace.isResume) { + console.log(` (Resuming workspace: ${workspace.sessionId})`); + } + console.log(); + console.log(` Target: ${args.webUrl}`); + console.log(` Repository: ${args.repoPath}`); + console.log(` Workspace: ${workspace.sessionId}`); + if (args.configPath) { + console.log(` Config: ${args.configPath}`); + } + if (args.displayOutputPath) { + console.log(` Output: ${args.displayOutputPath}`); + } + if (args.pipelineTestingMode) { + console.log(` Mode: Pipeline Testing`); + } + console.log(); +} + +function displayMonitoringInfo(args: CliArgs, workspace: WorkspaceResolution): void { + const effectiveDisplayPath = args.displayOutputPath || args.outputPath || './audit-logs'; + const outputDir = `${effectiveDisplayPath}/${workspace.sessionId}`; + + console.log('Monitor progress:'); + console.log(` Web UI: http://localhost:8233/namespaces/default/workflows/${workspace.workflowId}`); + console.log(` Logs: ./shannon logs ID=${workspace.workflowId}`); + console.log(); + console.log('Output:'); + console.log(` Reports: ${outputDir}`); + console.log(); +} + +// === Workflow Result Handling === + +async function waitForWorkflowResult( + handle: WorkflowHandle<(input: PipelineInput) => Promise>, + workspace: WorkspaceResolution +): Promise { + const progressInterval = setInterval(async () => { + try { + const progress = await handle.query(PROGRESS_QUERY); + const elapsed = Math.floor(progress.elapsedMs / 1000); + console.log( + `[${elapsed}s] Phase: ${progress.currentPhase || 'unknown'} | Agent: ${progress.currentAgent || 'none'} | Completed: ${progress.completedAgents.length}/13` + ); + } catch { + // Workflow may have completed + } + }, 30000); + + try { + // 1. Block until workflow completes + const result = await handle.result(); + clearInterval(progressInterval); + + // 2. Display run metrics + console.log('\nPipeline completed successfully!'); + if (result.summary) { + console.log(`Duration: ${Math.floor(result.summary.totalDurationMs / 1000)}s`); + console.log(`Agents completed: ${result.summary.agentCount}`); + console.log(`Total turns: ${result.summary.totalTurns}`); + console.log(`Run cost: $${result.summary.totalCostUsd.toFixed(4)}`); + + // 3. Show cumulative cost across all resume attempts + if (workspace.isResume) { + try { + const session = await readJson( + path.join('./audit-logs', workspace.sessionId, 'session.json') + ); + console.log(`Cumulative cost: $${session.metrics.total_cost_usd.toFixed(4)}`); + } catch { + // Non-fatal, skip cumulative cost display + } + } + } + } catch (error) { + clearInterval(progressInterval); + console.error('\nPipeline failed:', error); + process.exit(1); + } +} + +// === Main Entry Point === + +async function startPipeline(): Promise { + // 1. Parse CLI args and display splash + const args = parseCliArgs(process.argv.slice(2)); await displaySplashScreen(); + // 2. Connect to Temporal server const address = process.env.TEMPORAL_ADDRESS || 'localhost:7233'; - console.log(chalk.gray(`Connecting to Temporal at ${address}...`)); + console.log(`Connecting to Temporal at ${address}...`); const connection = await Connection.connect({ address }); const client = new Client({ connection }); try { - let terminatedWorkflows: string[] = []; - let workflowId: string; - let sessionId: string; // Workspace name (persistent directory) - let isResume = false; + // 3. Resolve workspace (new or resume) and build pipeline input + const workspace = await resolveWorkspace(client, args); + const input = buildPipelineInput(args, workspace); - if (resumeFromWorkspace) { - const sessionPath = path.join('./audit-logs', resumeFromWorkspace, 'session.json'); - const workspaceExists = await fileExists(sessionPath); - - if (workspaceExists) { - // === Resume Mode: existing workspace === - isResume = true; - console.log(chalk.cyan('=== RESUME MODE ===')); - console.log(`Workspace: ${resumeFromWorkspace}\n`); - - // Terminate any running workflows for this workspace - terminatedWorkflows = await terminateExistingWorkflows(client, resumeFromWorkspace); - - if (terminatedWorkflows.length > 0) { - console.log(chalk.yellow(`Terminated ${terminatedWorkflows.length} previous workflow(s)\n`)); - } - - // Validate URL matches workspace - const session = await readJson(sessionPath); - - if (session.session.webUrl !== webUrl) { - console.error(chalk.red('ERROR: URL mismatch with workspace')); - console.error(` Workspace URL: ${session.session.webUrl}`); - console.error(` Provided URL: ${webUrl}`); - process.exit(1); - } - - // Generate resume workflow ID - workflowId = `${resumeFromWorkspace}_resume_${Date.now()}`; - sessionId = resumeFromWorkspace; - } else { - // === New Named Workspace === - if (!isValidWorkspaceName(resumeFromWorkspace)) { - console.error(chalk.red(`ERROR: Invalid workspace name: "${resumeFromWorkspace}"`)); - console.error(chalk.gray(' Must be 1-128 characters, alphanumeric/hyphens/underscores, starting with alphanumeric')); - process.exit(1); - } - - console.log(chalk.cyan('=== NEW NAMED WORKSPACE ===')); - console.log(`Workspace: ${resumeFromWorkspace}\n`); - - workflowId = `${resumeFromWorkspace}_shannon-${Date.now()}`; - sessionId = resumeFromWorkspace; - } - } else { - // === New Auto-Named Workflow === - const hostname = sanitizeHostname(webUrl); - workflowId = customWorkflowId || `${hostname}_shannon-${Date.now()}`; - sessionId = workflowId; - } - - const input: PipelineInput = { - webUrl, - repoPath, - workflowId, // Add for audit correlation - sessionId, // Workspace directory name - ...(configPath && { configPath }), - ...(outputPath && { outputPath }), - ...(pipelineTestingMode && { pipelineTestingMode }), - ...(isResume && resumeFromWorkspace && { resumeFromWorkspace }), - ...(terminatedWorkflows.length > 0 && { terminatedWorkflows }), - }; - - // Determine output directory for display (use sessionId for persistent directory) - // Use displayOutputPath (host path) if provided, otherwise fall back to outputPath or default - const effectiveDisplayPath = displayOutputPath || outputPath || './audit-logs'; - const outputDir = `${effectiveDisplayPath}/${sessionId}`; - - console.log(chalk.green.bold(`✓ Workflow started: ${workflowId}`)); - if (isResume) { - console.log(chalk.gray(` (Resuming workspace: ${sessionId})`)); - } - console.log(); - console.log(chalk.white(' Target: ') + chalk.cyan(webUrl)); - console.log(chalk.white(' Repository: ') + chalk.cyan(repoPath)); - console.log(chalk.white(' Workspace: ') + chalk.cyan(sessionId)); - if (configPath) { - console.log(chalk.white(' Config: ') + chalk.cyan(configPath)); - } - if (displayOutputPath) { - console.log(chalk.white(' Output: ') + chalk.cyan(displayOutputPath)); - } - if (pipelineTestingMode) { - console.log(chalk.white(' Mode: ') + chalk.yellow('Pipeline Testing')); - } - console.log(); - - // Start workflow by name (not by importing the function) + // 4. Start the Temporal workflow const handle = await client.workflow.start<(input: PipelineInput) => Promise>( 'pentestPipelineWorkflow', { taskQueue: 'shannon-pipeline', - workflowId, + workflowId: workspace.workflowId, args: [input], } ); - if (!waitForCompletion) { - console.log(chalk.bold('Monitor progress:')); - console.log(chalk.white(' Web UI: ') + chalk.blue(`http://localhost:8233/namespaces/default/workflows/${workflowId}`)); - console.log(chalk.white(' Logs: ') + chalk.gray(`./shannon logs ID=${workflowId}`)); - console.log(); - console.log(chalk.bold('Output:')); - console.log(chalk.white(' Reports: ') + chalk.cyan(outputDir)); - console.log(); - return; - } + // 5. Display info and optionally wait for completion + displayWorkflowInfo(args, workspace); - // Poll for progress every 30 seconds - const progressInterval = setInterval(async () => { - try { - const progress = await handle.query(PROGRESS_QUERY); - const elapsed = Math.floor(progress.elapsedMs / 1000); - console.log( - chalk.gray(`[${elapsed}s]`), - chalk.cyan(`Phase: ${progress.currentPhase || 'unknown'}`), - chalk.gray(`| Agent: ${progress.currentAgent || 'none'}`), - chalk.gray(`| Completed: ${progress.completedAgents.length}/13`) - ); - } catch { - // Workflow may have completed - } - }, 30000); - - try { - const result = await handle.result(); - clearInterval(progressInterval); - - console.log(chalk.green.bold('\nPipeline completed successfully!')); - if (result.summary) { - console.log(chalk.gray(`Duration: ${Math.floor(result.summary.totalDurationMs / 1000)}s`)); - console.log(chalk.gray(`Agents completed: ${result.summary.agentCount}`)); - console.log(chalk.gray(`Total turns: ${result.summary.totalTurns}`)); - console.log(chalk.gray(`Run cost: $${result.summary.totalCostUsd.toFixed(4)}`)); - - // Show cumulative cost from session.json (includes all resume attempts) - if (isResume) { - try { - const session = await readJson( - path.join('./audit-logs', sessionId, 'session.json') - ); - console.log(chalk.gray(`Cumulative cost: $${session.metrics.total_cost_usd.toFixed(4)}`)); - } catch { - // Non-fatal, skip cumulative cost display - } - } - } - } catch (error) { - clearInterval(progressInterval); - console.error(chalk.red.bold('\nPipeline failed:'), error); - process.exit(1); + if (args.waitForCompletion) { + await waitForWorkflowResult(handle, workspace); + } else { + displayMonitoringInfo(args, workspace); } } finally { await connection.close(); @@ -383,6 +449,6 @@ async function startPipeline(): Promise { } startPipeline().catch((err) => { - console.error(chalk.red('Client error:'), err); + console.error('Client error:', err); process.exit(1); }); diff --git a/src/temporal/shared.ts b/src/temporal/shared.ts index 5280645..2a6d357 100644 --- a/src/temporal/shared.ts +++ b/src/temporal/shared.ts @@ -1,6 +1,7 @@ import { defineQuery } from '@temporalio/workflow'; -// === Types === +export type { AgentMetrics } from '../types/metrics.js'; +import type { AgentMetrics } from '../types/metrics.js'; export interface PipelineInput { webUrl: string; @@ -8,7 +9,7 @@ export interface PipelineInput { configPath?: string; outputPath?: string; pipelineTestingMode?: boolean; - workflowId?: string; // Added by client, used for audit correlation + workflowId?: string; // Used for audit correlation sessionId?: string; // Workspace directory name (distinct from workflowId for named workspaces) resumeFromWorkspace?: string; // Workspace name to resume from terminatedWorkflows?: string[]; // Workflows terminated during resume @@ -22,15 +23,6 @@ export interface ResumeState { originalWorkflowId: string; } -export interface AgentMetrics { - durationMs: number; - inputTokens: number | null; - outputTokens: number | null; - costUsd: number | null; - numTurns: number | null; - model?: string | undefined; -} - export interface PipelineSummary { totalCostUsd: number; totalDurationMs: number; // Wall-clock time (end - start) @@ -68,6 +60,4 @@ export interface VulnExploitPipelineResult { error: string | null; } -// === Queries === - export const getProgress = defineQuery('getProgress'); diff --git a/src/temporal/summary-mapper.ts b/src/temporal/summary-mapper.ts new file mode 100644 index 0000000..e160026 --- /dev/null +++ b/src/temporal/summary-mapper.ts @@ -0,0 +1,45 @@ +// Copyright (C) 2025 Keygraph, Inc. +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License version 3 +// as published by the Free Software Foundation. + +/** + * Maps PipelineState to WorkflowSummary for audit logging. + * Pure function with no side effects. + */ + +import type { PipelineState } from './shared.js'; +import type { WorkflowSummary } from '../audit/workflow-logger.js'; + +/** + * Maps PipelineState to WorkflowSummary. + * + * This function is deterministic (no Date.now() or I/O) so it can be + * safely imported into Temporal workflows. The caller must ensure + * state.summary is set before calling (via computeSummary). + */ +export function toWorkflowSummary( + state: PipelineState, + status: 'completed' | 'failed' +): WorkflowSummary { + // state.summary must be computed before calling this mapper + const summary = state.summary; + if (!summary) { + throw new Error('toWorkflowSummary: state.summary must be set before calling'); + } + + return { + status, + totalDurationMs: summary.totalDurationMs, + totalCostUsd: summary.totalCostUsd, + completedAgents: state.completedAgents, + agentMetrics: Object.fromEntries( + Object.entries(state.agentMetrics).map(([name, m]) => [ + name, + { durationMs: m.durationMs, costUsd: m.costUsd }, + ]) + ), + ...(state.error && { error: state.error }), + }; +} diff --git a/src/temporal/worker.ts b/src/temporal/worker.ts index 81c7f7e..b0f2f9b 100644 --- a/src/temporal/worker.ts +++ b/src/temporal/worker.ts @@ -24,7 +24,6 @@ import { NativeConnection, Worker, bundleWorkflowCode } from '@temporalio/worker import { fileURLToPath } from 'node:url'; import path from 'node:path'; import dotenv from 'dotenv'; -import chalk from 'chalk'; import * as activities from './activities.js'; dotenv.config(); @@ -33,12 +32,12 @@ const __dirname = path.dirname(fileURLToPath(import.meta.url)); async function runWorker(): Promise { const address = process.env.TEMPORAL_ADDRESS || 'localhost:7233'; - console.log(chalk.cyan(`Connecting to Temporal at ${address}...`)); + console.log(`Connecting to Temporal at ${address}...`); const connection = await NativeConnection.connect({ address }); // Bundle workflows for Temporal's V8 isolate - console.log(chalk.gray('Bundling workflows...')); + console.log('Bundling workflows...'); const workflowBundle = await bundleWorkflowCode({ workflowsPath: path.join(__dirname, 'workflows.js'), }); @@ -54,26 +53,26 @@ async function runWorker(): Promise { // Graceful shutdown handling const shutdown = async (): Promise => { - console.log(chalk.yellow('\nShutting down worker...')); + console.log('\nShutting down worker...'); worker.shutdown(); }; process.on('SIGINT', shutdown); process.on('SIGTERM', shutdown); - console.log(chalk.green('Shannon worker started')); - console.log(chalk.gray('Task queue: shannon-pipeline')); - console.log(chalk.gray('Press Ctrl+C to stop\n')); + console.log('Shannon worker started'); + console.log('Task queue: shannon-pipeline'); + console.log('Press Ctrl+C to stop\n'); try { await worker.run(); } finally { await connection.close(); - console.log(chalk.gray('Worker stopped')); + console.log('Worker stopped'); } } runWorker().catch((err) => { - console.error(chalk.red('Worker failed:'), err); + console.error('Worker failed:', err); process.exit(1); }); diff --git a/src/temporal/workflows.ts b/src/temporal/workflows.ts index 3bc2804..45344d5 100644 --- a/src/temporal/workflows.ts +++ b/src/temporal/workflows.ts @@ -24,6 +24,7 @@ */ import { + log, proxyActivities, setHandler, workflowInfo, @@ -40,9 +41,10 @@ import { type AgentMetrics, type ResumeState, } from './shared.js'; -import type { VulnType } from '../queue-validation.js'; +import type { VulnType } from '../services/queue-validation.js'; import type { AgentName } from '../types/agents.js'; import { ALL_AGENTS } from '../types/agents.js'; +import { toWorkflowSummary } from './summary-mapper.js'; // Retry configuration for production (long intervals for billing recovery) const PRODUCTION_RETRY = { @@ -103,11 +105,9 @@ export async function pentestPipelineWorkflow( ): Promise { const { workflowId } = workflowInfo(); - // Select activity proxy based on testing mode // Pipeline testing uses fast retry intervals (10s) for quick iteration const a = input.pipelineTestingMode ? testActs : acts; - // Workflow state (queryable) const state: PipelineState = { status: 'running', currentPhase: null, @@ -120,7 +120,6 @@ export async function pentestPipelineWorkflow( summary: null, }; - // Register query handler for real-time progress inspection setHandler(getProgress, (): PipelineProgress => ({ ...state, workflowId, @@ -145,18 +144,17 @@ export async function pentestPipelineWorkflow( }), }; - // === RESUME LOGIC === let resumeState: ResumeState | null = null; if (input.resumeFromWorkspace) { - // Load resume state from existing workspace + // 1. Load resume state (validates workspace, cross-checks deliverables) resumeState = await a.loadResumeState( input.resumeFromWorkspace, input.webUrl, input.repoPath ); - // Restore git checkpoint and clean up partial deliverables + // 2. Restore git workspace and clean up incomplete deliverables const incompleteAgents = ALL_AGENTS.filter( (agentName) => !resumeState!.completedAgents.includes(agentName) ) as AgentName[]; @@ -167,120 +165,59 @@ export async function pentestPipelineWorkflow( incompleteAgents ); - // Check if all agents are already complete + // 3. Short-circuit if all agents already completed if (resumeState.completedAgents.length === ALL_AGENTS.length) { - console.log(`All ${ALL_AGENTS.length} agents already completed. Nothing to resume.`); + log.info(`All ${ALL_AGENTS.length} agents already completed. Nothing to resume.`); state.status = 'completed'; state.completedAgents = [...resumeState.completedAgents]; state.summary = computeSummary(state); return state; } - // Record resume attempt in session.json + // 4. Record this resume attempt in session.json and workflow.log await a.recordResumeAttempt( activityInput, input.terminatedWorkflows || [], - resumeState.checkpointHash + resumeState.checkpointHash, + resumeState.originalWorkflowId, + resumeState.completedAgents ); - console.log('Resume state loaded and workspace restored'); + log.info('Resume state loaded and workspace restored'); } - // Helper to check if an agent should be skipped const shouldSkip = (agentName: string): boolean => { return resumeState?.completedAgents.includes(agentName) ?? false; }; - try { - // === Phase 1: Pre-Reconnaissance === - if (!shouldSkip('pre-recon')) { - state.currentPhase = 'pre-recon'; - state.currentAgent = 'pre-recon'; - await a.logPhaseTransition(activityInput, 'pre-recon', 'start'); - state.agentMetrics['pre-recon'] = - await a.runPreReconAgent(activityInput); - state.completedAgents.push('pre-recon'); - await a.logPhaseTransition(activityInput, 'pre-recon', 'complete'); + // Run a sequential agent phase (pre-recon, recon) + async function runSequentialPhase( + phaseName: string, + agentName: AgentName, + runAgent: (input: ActivityInput) => Promise + ): Promise { + if (!shouldSkip(agentName)) { + state.currentPhase = phaseName; + state.currentAgent = agentName; + await a.logPhaseTransition(activityInput, phaseName, 'start'); + state.agentMetrics[agentName] = await runAgent(activityInput); + state.completedAgents.push(agentName); + await a.logPhaseTransition(activityInput, phaseName, 'complete'); } else { - console.log('Skipping pre-recon (already complete)'); - state.completedAgents.push('pre-recon'); + log.info(`Skipping ${agentName} (already complete)`); + state.completedAgents.push(agentName); } + } - // === Phase 2: Reconnaissance === - if (!shouldSkip('recon')) { - state.currentPhase = 'recon'; - state.currentAgent = 'recon'; - await a.logPhaseTransition(activityInput, 'recon', 'start'); - state.agentMetrics['recon'] = await a.runReconAgent(activityInput); - state.completedAgents.push('recon'); - await a.logPhaseTransition(activityInput, 'recon', 'complete'); - } else { - console.log('Skipping recon (already complete)'); - state.completedAgents.push('recon'); - } - - // === Phases 3-4: Vulnerability Analysis + Exploitation (Pipelined) === - // Each vuln type runs as an independent pipeline: - // vuln agent → queue check → conditional exploit agent - // This eliminates the synchronization barrier between phases - each exploit - // starts immediately when its vuln agent finishes, not waiting for all. - state.currentPhase = 'vulnerability-exploitation'; - state.currentAgent = 'pipelines'; - await a.logPhaseTransition(activityInput, 'vulnerability-exploitation', 'start'); - - // Helper: Run a single vuln→exploit pipeline with skip logic - async function runVulnExploitPipeline( - vulnType: VulnType, - runVulnAgent: () => Promise, - runExploitAgent: () => Promise - ): Promise { - const vulnAgentName = `${vulnType}-vuln`; - const exploitAgentName = `${vulnType}-exploit`; - - // Step 1: Run vulnerability agent (or skip if completed) - let vulnMetrics: AgentMetrics | null = null; - if (!shouldSkip(vulnAgentName)) { - vulnMetrics = await runVulnAgent(); - } else { - console.log(`Skipping ${vulnAgentName} (already complete)`); - } - - // Step 2: Check exploitation queue (only if vuln agent ran or completed previously) - const decision = await a.checkExploitationQueue(activityInput, vulnType); - - // Step 3: Conditionally run exploit agent (skip if already completed) - let exploitMetrics: AgentMetrics | null = null; - if (decision.shouldExploit) { - if (!shouldSkip(exploitAgentName)) { - exploitMetrics = await runExploitAgent(); - } else { - console.log(`Skipping ${exploitAgentName} (already complete)`); - } - } - - return { - vulnType, - vulnMetrics, - exploitMetrics, - exploitDecision: { - shouldExploit: decision.shouldExploit, - vulnerabilityCount: decision.vulnerabilityCount, - }, - error: null, - }; - } - - // Determine which pipelines to run (skip if both vuln and exploit completed) - const pipelinesToRun: Array> = []; - - // Only run pipeline if at least one agent (vuln or exploit) is incomplete - const pipelineConfigs: Array<{ - vulnType: VulnType; - vulnAgent: string; - exploitAgent: string; - runVuln: () => Promise; - runExploit: () => Promise; - }> = [ + // Build pipeline configs for the 5 vuln→exploit pairs + function buildPipelineConfigs(): Array<{ + vulnType: VulnType; + vulnAgent: string; + exploitAgent: string; + runVuln: () => Promise; + runExploit: () => Promise; + }> { + return [ { vulnType: 'injection', vulnAgent: 'injection-vuln', @@ -317,56 +254,34 @@ export async function pentestPipelineWorkflow( runExploit: () => a.runAuthzExploitAgent(activityInput), }, ]; + } - for (const config of pipelineConfigs) { - const vulnComplete = shouldSkip(config.vulnAgent); - const exploitComplete = shouldSkip(config.exploitAgent); - - // Only run pipeline if at least one agent needs to run - if (!vulnComplete || !exploitComplete) { - pipelinesToRun.push( - runVulnExploitPipeline(config.vulnType, config.runVuln, config.runExploit) - ); - } else { - console.log( - `Skipping entire ${config.vulnType} pipeline (both agents complete)` - ); - // Still need to mark them as completed in state - state.completedAgents.push(config.vulnAgent, config.exploitAgent); - } - } - - // Run pipelines in parallel with graceful failure handling - // Promise.allSettled ensures other pipelines continue if one fails - const pipelineResults = await Promise.allSettled(pipelinesToRun); - - // Aggregate results from all pipelines + // Aggregate results from settled pipeline promises into workflow state + function aggregatePipelineResults( + results: PromiseSettledResult[] + ): void { const failedPipelines: string[] = []; - for (const result of pipelineResults) { + + for (const result of results) { if (result.status === 'fulfilled') { const { vulnType, vulnMetrics, exploitMetrics } = result.value; - // Record vuln agent const vulnAgentName = `${vulnType}-vuln`; if (vulnMetrics) { state.agentMetrics[vulnAgentName] = vulnMetrics; state.completedAgents.push(vulnAgentName); } else if (shouldSkip(vulnAgentName)) { - // Agent was skipped because already complete state.completedAgents.push(vulnAgentName); } - // Record exploit agent (if it ran) const exploitAgentName = `${vulnType}-exploit`; if (exploitMetrics) { state.agentMetrics[exploitAgentName] = exploitMetrics; state.completedAgents.push(exploitAgentName); } else if (shouldSkip(exploitAgentName)) { - // Agent was skipped because already complete state.completedAgents.push(exploitAgentName); } } else { - // Pipeline failed - log error but continue with others const errorMsg = result.reason instanceof Error ? result.reason.message @@ -375,15 +290,87 @@ export async function pentestPipelineWorkflow( } } - // Log any pipeline failures (workflow continues despite failures) if (failedPipelines.length > 0) { - console.log( - `⚠️ ${failedPipelines.length} pipeline(s) failed:`, - failedPipelines - ); + log.warn(`${failedPipelines.length} pipeline(s) failed`, { + failures: failedPipelines, + }); + } + } + + try { + // === Phase 1: Pre-Reconnaissance === + await runSequentialPhase('pre-recon', 'pre-recon', a.runPreReconAgent); + + // === Phase 2: Reconnaissance === + await runSequentialPhase('recon', 'recon', a.runReconAgent); + + // === Phases 3-4: Vulnerability Analysis + Exploitation (Pipelined) === + // Each vuln type runs as an independent pipeline: + // vuln agent → queue check → conditional exploit agent + // Exploits start immediately when their vuln finishes, not waiting for all. + state.currentPhase = 'vulnerability-exploitation'; + state.currentAgent = 'pipelines'; + await a.logPhaseTransition(activityInput, 'vulnerability-exploitation', 'start'); + + // Closure over shouldSkip and activityInput by design (Temporal replay safety) + async function runVulnExploitPipeline( + vulnType: VulnType, + runVulnAgent: () => Promise, + runExploitAgent: () => Promise + ): Promise { + const vulnAgentName = `${vulnType}-vuln`; + const exploitAgentName = `${vulnType}-exploit`; + + // 1. Run vulnerability analysis (or skip if resumed) + let vulnMetrics: AgentMetrics | null = null; + if (!shouldSkip(vulnAgentName)) { + vulnMetrics = await runVulnAgent(); + } else { + log.info(`Skipping ${vulnAgentName} (already complete)`); + } + + // 2. Check exploitation queue for actionable findings + const decision = await a.checkExploitationQueue(activityInput, vulnType); + + // 3. Conditionally run exploitation agent + let exploitMetrics: AgentMetrics | null = null; + if (decision.shouldExploit) { + if (!shouldSkip(exploitAgentName)) { + exploitMetrics = await runExploitAgent(); + } else { + log.info(`Skipping ${exploitAgentName} (already complete)`); + } + } + + return { + vulnType, + vulnMetrics, + exploitMetrics, + exploitDecision: { + shouldExploit: decision.shouldExploit, + vulnerabilityCount: decision.vulnerabilityCount, + }, + error: null, + }; } - // Update phase markers + const pipelineConfigs = buildPipelineConfigs(); + const pipelinesToRun: Array> = []; + + for (const config of pipelineConfigs) { + if (!shouldSkip(config.vulnAgent) || !shouldSkip(config.exploitAgent)) { + pipelinesToRun.push( + runVulnExploitPipeline(config.vulnType, config.runVuln, config.runExploit) + ); + } else { + log.info(`Skipping entire ${config.vulnType} pipeline (both agents complete)`); + state.completedAgents.push(config.vulnAgent, config.exploitAgent); + } + } + + const pipelineResults = await Promise.allSettled(pipelinesToRun); + aggregatePipelineResults(pipelineResults); + state.currentPhase = 'exploitation'; state.currentAgent = null; await a.logPhaseTransition(activityInput, 'vulnerability-exploitation', 'complete'); @@ -406,29 +393,17 @@ export async function pentestPipelineWorkflow( await a.logPhaseTransition(activityInput, 'reporting', 'complete'); } else { - console.log('Skipping report (already complete)'); + log.info('Skipping report (already complete)'); state.completedAgents.push('report'); } - // === Complete === state.status = 'completed'; state.currentPhase = null; state.currentAgent = null; state.summary = computeSummary(state); // Log workflow completion summary - await a.logWorkflowComplete(activityInput, { - status: 'completed', - totalDurationMs: state.summary.totalDurationMs, - totalCostUsd: state.summary.totalCostUsd, - completedAgents: state.completedAgents, - agentMetrics: Object.fromEntries( - Object.entries(state.agentMetrics).map(([name, m]) => [ - name, - { durationMs: m.durationMs, costUsd: m.costUsd }, - ]) - ), - }); + await a.logWorkflowComplete(activityInput, toWorkflowSummary(state, 'completed')); return state; } catch (error) { @@ -438,19 +413,7 @@ export async function pentestPipelineWorkflow( state.summary = computeSummary(state); // Log workflow failure summary - await a.logWorkflowComplete(activityInput, { - status: 'failed', - totalDurationMs: state.summary.totalDurationMs, - totalCostUsd: state.summary.totalCostUsd, - completedAgents: state.completedAgents, - agentMetrics: Object.fromEntries( - Object.entries(state.agentMetrics).map(([name, m]) => [ - name, - { durationMs: m.durationMs, costUsd: m.costUsd }, - ]) - ), - error: state.error ?? undefined, - }); + await a.logWorkflowComplete(activityInput, toWorkflowSummary(state, 'failed')); throw error; } diff --git a/src/temporal/workspaces.ts b/src/temporal/workspaces.ts index 4f46cd0..62d6b29 100644 --- a/src/temporal/workspaces.ts +++ b/src/temporal/workspaces.ts @@ -20,7 +20,6 @@ import fs from 'fs/promises'; import path from 'path'; -import chalk from 'chalk'; interface SessionJson { session: { @@ -59,16 +58,7 @@ function formatDuration(ms: number): string { } function getStatusDisplay(status: string): string { - switch (status) { - case 'completed': - return chalk.green(status); - case 'in-progress': - return chalk.yellow(status); - case 'failed': - return chalk.red(status); - default: - return status; - } + return status; } function truncate(str: string, maxLen: number): string { @@ -83,8 +73,8 @@ async function listWorkspaces(): Promise { try { entries = await fs.readdir(auditDir); } catch { - console.log(chalk.yellow('No audit-logs directory found.')); - console.log(chalk.gray(`Expected: ${auditDir}`)); + console.log('No audit-logs directory found.'); + console.log(`Expected: ${auditDir}`); return; } @@ -110,15 +100,15 @@ async function listWorkspaces(): Promise { } if (workspaces.length === 0) { - console.log(chalk.yellow('\nNo workspaces found.')); - console.log(chalk.gray('Run a pipeline first: ./shannon start URL= REPO=')); + console.log('\nNo workspaces found.'); + console.log('Run a pipeline first: ./shannon start URL= REPO='); return; } // Sort by creation date (most recent first) workspaces.sort((a, b) => b.createdAt.getTime() - a.createdAt.getTime()); - console.log(chalk.cyan.bold('\n=== Shannon Workspaces ===\n')); + console.log('\n=== Shannon Workspaces ===\n'); // Column widths const nameWidth = 30; @@ -129,16 +119,14 @@ async function listWorkspaces(): Promise { // Header console.log( - chalk.gray( - ' ' + - 'WORKSPACE'.padEnd(nameWidth) + - 'URL'.padEnd(urlWidth) + - 'STATUS'.padEnd(statusWidth) + - 'DURATION'.padEnd(durationWidth) + - 'COST'.padEnd(costWidth) - ) + ' ' + + 'WORKSPACE'.padEnd(nameWidth) + + 'URL'.padEnd(urlWidth) + + 'STATUS'.padEnd(statusWidth) + + 'DURATION'.padEnd(durationWidth) + + 'COST'.padEnd(costWidth) ); - console.log(chalk.gray(' ' + '\u2500'.repeat(nameWidth + urlWidth + statusWidth + durationWidth + costWidth))); + console.log(' ' + '\u2500'.repeat(nameWidth + urlWidth + statusWidth + durationWidth + costWidth)); let resumableCount = 0; @@ -154,15 +142,15 @@ async function listWorkspaces(): Promise { resumableCount++; } - const resumeTag = isResumable ? chalk.cyan(' (resumable)') : ''; + const resumeTag = isResumable ? ' (resumable)' : ''; console.log( ' ' + - chalk.white(truncate(ws.name, nameWidth - 2).padEnd(nameWidth)) + - chalk.gray(truncate(ws.url, urlWidth - 2).padEnd(urlWidth)) + - getStatusDisplay(ws.status).padEnd(statusWidth + 10) + // +10 for chalk escape codes - chalk.gray(duration.padEnd(durationWidth)) + - chalk.gray(cost.padEnd(costWidth)) + + truncate(ws.name, nameWidth - 2).padEnd(nameWidth) + + truncate(ws.url, urlWidth - 2).padEnd(urlWidth) + + getStatusDisplay(ws.status).padEnd(statusWidth) + + duration.padEnd(durationWidth) + + cost.padEnd(costWidth) + resumeTag ); } @@ -170,16 +158,16 @@ async function listWorkspaces(): Promise { console.log(); const summary = `${workspaces.length} workspace${workspaces.length === 1 ? '' : 's'} found`; const resumeSummary = resumableCount > 0 ? ` (${resumableCount} resumable)` : ''; - console.log(chalk.gray(`${summary}${resumeSummary}`)); + console.log(`${summary}${resumeSummary}`); if (resumableCount > 0) { - console.log(chalk.gray('\nResume with: ./shannon start URL= REPO= WORKSPACE=')); + console.log('\nResume with: ./shannon start URL= REPO= WORKSPACE='); } console.log(); } listWorkspaces().catch((err) => { - console.error(chalk.red('Error listing workspaces:'), err); + console.error('Error listing workspaces:', err); process.exit(1); }); diff --git a/src/tool-checker.ts b/src/tool-checker.ts deleted file mode 100644 index 6340575..0000000 --- a/src/tool-checker.ts +++ /dev/null @@ -1,66 +0,0 @@ -// Copyright (C) 2025 Keygraph, Inc. -// -// This program is free software: you can redistribute it and/or modify -// it under the terms of the GNU Affero General Public License version 3 -// as published by the Free Software Foundation. - -import { $ } from 'zx'; -import chalk from 'chalk'; - -type ToolName = 'nmap' | 'subfinder' | 'whatweb' | 'schemathesis'; - -export type ToolAvailability = Record; - -// Check availability of required tools -export const checkToolAvailability = async (): Promise => { - const tools: ToolName[] = ['nmap', 'subfinder', 'whatweb', 'schemathesis']; - const availability: ToolAvailability = { - nmap: false, - subfinder: false, - whatweb: false, - schemathesis: false - }; - - console.log(chalk.blue('🔧 Checking tool availability...')); - - for (const tool of tools) { - try { - await $`command -v ${tool}`; - availability[tool] = true; - console.log(chalk.green(` ✅ ${tool} - available`)); - } catch { - availability[tool] = false; - console.log(chalk.yellow(` ⚠️ ${tool} - not found`)); - } - } - - return availability; -}; - -// Handle missing tools with user-friendly messages -export const handleMissingTools = (toolAvailability: ToolAvailability): ToolName[] => { - const missing = (Object.entries(toolAvailability) as Array<[ToolName, boolean]>) - .filter(([, available]) => !available) - .map(([tool]) => tool); - - if (missing.length > 0) { - console.log(chalk.yellow(`\n⚠️ Missing tools: ${missing.join(', ')}`)); - console.log(chalk.gray('Some functionality will be limited. Install missing tools for full capability.')); - - // Provide installation hints - const installHints: Record = { - 'nmap': 'brew install nmap (macOS) or apt install nmap (Ubuntu)', - 'subfinder': 'go install -v github.com/projectdiscovery/subfinder/v2/cmd/subfinder@latest', - 'whatweb': 'gem install whatweb', - 'schemathesis': 'pip install schemathesis' - }; - - console.log(chalk.gray('\nInstallation hints:')); - missing.forEach(tool => { - console.log(chalk.gray(` ${tool}: ${installHints[tool]}`)); - }); - console.log(''); - } - - return missing; -}; diff --git a/src/types/activity-logger.ts b/src/types/activity-logger.ts new file mode 100644 index 0000000..ff2ae3f --- /dev/null +++ b/src/types/activity-logger.ts @@ -0,0 +1,15 @@ +// Copyright (C) 2025 Keygraph, Inc. +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License version 3 +// as published by the Free Software Foundation. + +/** + * Logger interface for services called from Temporal activities. + * Keeps services Temporal-agnostic while providing structured logging. + */ +export interface ActivityLogger { + info(message: string, attrs?: Record): void; + warn(message: string, attrs?: Record): void; + error(message: string, attrs?: Record): void; +} diff --git a/src/types/agents.ts b/src/types/agents.ts index 041e0f3..e0265a9 100644 --- a/src/types/agents.ts +++ b/src/types/agents.ts @@ -34,21 +34,6 @@ export const ALL_AGENTS = [ */ export type AgentName = typeof ALL_AGENTS[number]; -export type PromptName = - | 'pre-recon-code' - | 'recon' - | 'vuln-injection' - | 'vuln-xss' - | 'vuln-auth' - | 'vuln-ssrf' - | 'vuln-authz' - | 'exploit-injection' - | 'exploit-xss' - | 'exploit-auth' - | 'exploit-ssrf' - | 'exploit-authz' - | 'report-executive'; - export type PlaywrightAgent = | 'playwright-agent1' | 'playwright-agent2' @@ -56,7 +41,9 @@ export type PlaywrightAgent = | 'playwright-agent4' | 'playwright-agent5'; -export type AgentValidator = (sourceDir: string) => Promise; +import type { ActivityLogger } from './activity-logger.js'; + +export type AgentValidator = (sourceDir: string, logger: ActivityLogger) => Promise; export type AgentStatus = | 'pending' @@ -69,52 +56,21 @@ export interface AgentDefinition { name: AgentName; displayName: string; prerequisites: AgentName[]; + promptTemplate: string; + deliverableFilename: string; } /** - * Maps an agent name to its corresponding prompt file name. + * Vulnerability types supported by the pipeline. */ -export function getPromptNameForAgent(agentName: AgentName): PromptName { - const mappings: Record = { - 'pre-recon': 'pre-recon-code', - 'recon': 'recon', - 'injection-vuln': 'vuln-injection', - 'xss-vuln': 'vuln-xss', - 'auth-vuln': 'vuln-auth', - 'ssrf-vuln': 'vuln-ssrf', - 'authz-vuln': 'vuln-authz', - 'injection-exploit': 'exploit-injection', - 'xss-exploit': 'exploit-xss', - 'auth-exploit': 'exploit-auth', - 'ssrf-exploit': 'exploit-ssrf', - 'authz-exploit': 'exploit-authz', - 'report': 'report-executive', - }; - - return mappings[agentName]; -} +export type VulnType = 'injection' | 'xss' | 'auth' | 'ssrf' | 'authz'; /** - * Maps an agent name to its deliverable file path. - * Must match mcp-server/src/types/deliverables.ts:DELIVERABLE_FILENAMES + * Decision returned by queue validation for exploitation phase. */ -export function getDeliverablePath(agentName: AgentName, repoPath: string): string { - const deliverableMap: Record = { - 'pre-recon': 'code_analysis_deliverable.md', - 'recon': 'recon_deliverable.md', - 'injection-vuln': 'injection_analysis_deliverable.md', - 'xss-vuln': 'xss_analysis_deliverable.md', - 'auth-vuln': 'auth_analysis_deliverable.md', - 'ssrf-vuln': 'ssrf_analysis_deliverable.md', - 'authz-vuln': 'authz_analysis_deliverable.md', - 'injection-exploit': 'injection_exploitation_evidence.md', - 'xss-exploit': 'xss_exploitation_evidence.md', - 'auth-exploit': 'auth_exploitation_evidence.md', - 'ssrf-exploit': 'ssrf_exploitation_evidence.md', - 'authz-exploit': 'authz_exploitation_evidence.md', - 'report': 'comprehensive_security_assessment_report.md', - }; - - const filename = deliverableMap[agentName]; - return `${repoPath}/deliverables/${filename}`; +export interface ExploitationDecision { + shouldExploit: boolean; + shouldRetry: boolean; + vulnerabilityCount: number; + vulnType: VulnType; } diff --git a/src/types/audit.ts b/src/types/audit.ts new file mode 100644 index 0000000..c433bb1 --- /dev/null +++ b/src/types/audit.ts @@ -0,0 +1,35 @@ +// Copyright (C) 2025 Keygraph, Inc. +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License version 3 +// as published by the Free Software Foundation. + +/** + * Audit system type definitions + */ + +/** + * Cross-cutting session metadata used by services, temporal, and audit. + */ +export interface SessionMetadata { + id: string; + webUrl: string; + repoPath?: string; + outputPath?: string; + [key: string]: unknown; +} + +/** + * Result data passed to audit system when an agent execution ends. + * Used by both AuditSession and MetricsTracker. + */ +export interface AgentEndResult { + attemptNumber: number; + duration_ms: number; + cost_usd: number; + success: boolean; + model?: string | undefined; + error?: string | undefined; + checkpoint?: string | undefined; + isFinalAttempt?: boolean | undefined; +} diff --git a/src/types/config.ts b/src/types/config.ts index 548a979..f076003 100644 --- a/src/types/config.ts +++ b/src/types/config.ts @@ -29,10 +29,8 @@ export interface Rules { export type LoginType = 'form' | 'sso' | 'api' | 'basic'; -export type SuccessConditionType = 'url' | 'cookie' | 'element' | 'redirect'; - export interface SuccessCondition { - type: SuccessConditionType; + type: 'url' | 'cookie' | 'element' | 'redirect'; value: string; } @@ -53,7 +51,6 @@ export interface Authentication { export interface Config { rules?: Rules; authentication?: Authentication; - login?: unknown; // Deprecated } export interface DistributedConfig { diff --git a/src/types/errors.ts b/src/types/errors.ts index 42bf091..f67594f 100644 --- a/src/types/errors.ts +++ b/src/types/errors.ts @@ -8,6 +8,39 @@ * Error type definitions */ +/** + * Specific error codes for reliable classification. + * + * ErrorCode provides precision within the coarse 8-category PentestErrorType. + * Used by classifyErrorForTemporal for code-based classification (preferred) + * with string matching as fallback for external errors. + */ +export enum ErrorCode { + // Config errors (PentestErrorType: 'config') + CONFIG_NOT_FOUND = 'CONFIG_NOT_FOUND', + CONFIG_VALIDATION_FAILED = 'CONFIG_VALIDATION_FAILED', + CONFIG_PARSE_ERROR = 'CONFIG_PARSE_ERROR', + + // Agent execution errors (PentestErrorType: 'validation') + AGENT_EXECUTION_FAILED = 'AGENT_EXECUTION_FAILED', + OUTPUT_VALIDATION_FAILED = 'OUTPUT_VALIDATION_FAILED', + + // Billing errors (PentestErrorType: 'billing') + API_RATE_LIMITED = 'API_RATE_LIMITED', + SPENDING_CAP_REACHED = 'SPENDING_CAP_REACHED', + INSUFFICIENT_CREDITS = 'INSUFFICIENT_CREDITS', + + // Git errors (PentestErrorType: 'filesystem') + GIT_CHECKPOINT_FAILED = 'GIT_CHECKPOINT_FAILED', + GIT_ROLLBACK_FAILED = 'GIT_ROLLBACK_FAILED', + + // Prompt errors (PentestErrorType: 'prompt') + PROMPT_LOAD_FAILED = 'PROMPT_LOAD_FAILED', + + // Validation errors (PentestErrorType: 'validation') + DELIVERABLE_NOT_FOUND = 'DELIVERABLE_NOT_FOUND', +} + export type PentestErrorType = | 'config' | 'network' diff --git a/src/types/index.ts b/src/types/index.ts index 9d7088d..8cf4cd4 100644 --- a/src/types/index.ts +++ b/src/types/index.ts @@ -8,6 +8,10 @@ * Type definitions barrel export */ +export * from './activity-logger.js'; export * from './errors.js'; export * from './config.js'; export * from './agents.js'; +export * from './audit.js'; +export * from './result.js'; +export * from './metrics.js'; diff --git a/src/types/metrics.ts b/src/types/metrics.ts new file mode 100644 index 0000000..18422ff --- /dev/null +++ b/src/types/metrics.ts @@ -0,0 +1,19 @@ +// Copyright (C) 2025 Keygraph, Inc. +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License version 3 +// as published by the Free Software Foundation. + +/** + * Agent metrics types used across services and activities. + * Centralized here to avoid temporal/shared.ts import boundary violations. + */ + +export interface AgentMetrics { + durationMs: number; + inputTokens: number | null; + outputTokens: number | null; + costUsd: number | null; + numTurns: number | null; + model?: string | undefined; +} diff --git a/src/types/result.ts b/src/types/result.ts new file mode 100644 index 0000000..9f79b71 --- /dev/null +++ b/src/types/result.ts @@ -0,0 +1,62 @@ +// Copyright (C) 2025 Keygraph, Inc. +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License version 3 +// as published by the Free Software Foundation. + +/** + * Minimal Result type for explicit error handling. + * + * A discriminated union that makes error handling explicit without adding + * heavy machinery. Used in key modules (config loading, agent execution, + * queue validation) where callers need to make decisions based on error type. + */ + +/** + * Success variant of Result + */ +export interface Ok { + readonly ok: true; + readonly value: T; +} + +/** + * Error variant of Result + */ +export interface Err { + readonly ok: false; + readonly error: E; +} + +/** + * Result type - either Ok with a value or Err with an error + */ +export type Result = Ok | Err; + +/** + * Create a success Result + */ +export function ok(value: T): Ok { + return { ok: true, value }; +} + +/** + * Create an error Result + */ +export function err(error: E): Err { + return { ok: false, error }; +} + +/** + * Type guard for Ok variant + */ +export function isOk(result: Result): result is Ok { + return result.ok === true; +} + +/** + * Type guard for Err variant + */ +export function isErr(result: Result): result is Err { + return result.ok === false; +} diff --git a/src/utils/billing-detection.ts b/src/utils/billing-detection.ts new file mode 100644 index 0000000..a5258f9 --- /dev/null +++ b/src/utils/billing-detection.ts @@ -0,0 +1,95 @@ +// Copyright (C) 2025 Keygraph, Inc. +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License version 3 +// as published by the Free Software Foundation. + +/** + * Consolidated billing/spending cap detection utilities. + * + * Anthropic's spending cap behavior is inconsistent: + * - Sometimes a proper SDK error (billing_error) + * - Sometimes Claude responds with text about the cap + * - Sometimes partial billing before cutoff + * + * This module provides defense-in-depth detection with shared pattern lists + * to prevent drift between detection points. + */ + +/** + * Text patterns for SDK output sniffing (what Claude says). + * Used by message-handlers.ts and the behavioral heuristic. + */ +export const BILLING_TEXT_PATTERNS = [ + 'spending cap', + 'spending limit', + 'cap reached', + 'budget exceeded', + 'usage limit', + 'resets', +] as const; + +/** + * API patterns for error message classification (what the API returns). + * Used by classifyErrorForTemporal in error-handling.ts. + */ +export const BILLING_API_PATTERNS = [ + 'billing_error', + 'credit balance is too low', + 'insufficient credits', + 'usage is blocked due to insufficient credits', + 'please visit plans & billing', + 'please visit plans and billing', + 'usage limit reached', + 'quota exceeded', + 'daily rate limit', + 'limit will reset', + 'billing limit reached', +] as const; + +/** + * Checks if text matches any billing text pattern. + * Used for sniffing SDK output content for spending cap messages. + */ +export function matchesBillingTextPattern(text: string): boolean { + const lowerText = text.toLowerCase(); + return BILLING_TEXT_PATTERNS.some((pattern) => lowerText.includes(pattern)); +} + +/** + * Checks if an error message matches any billing API pattern. + * Used for classifying API error messages. + */ +export function matchesBillingApiPattern(message: string): boolean { + const lowerMessage = message.toLowerCase(); + return BILLING_API_PATTERNS.some((pattern) => lowerMessage.includes(pattern)); +} + +/** + * Behavioral heuristic for detecting spending cap. + * + * When Claude hits a spending cap, it often returns a short message + * with $0 cost. Legitimate agent work NEVER costs $0 with only 1-2 turns. + * + * This combines three signals: + * 1. Very low turn count (<=2) + * 2. Zero cost ($0) + * 3. Text matches billing patterns + * + * @param turns - Number of turns the agent took + * @param cost - Total cost in USD + * @param resultText - The result text from the agent + * @returns true if this looks like a spending cap hit + */ +export function isSpendingCapBehavior( + turns: number, + cost: number, + resultText: string +): boolean { + // Only check if turns <= 2 AND cost is exactly 0 + if (turns > 2 || cost !== 0) { + return false; + } + + return matchesBillingTextPattern(resultText); +} diff --git a/src/utils/metrics.ts b/src/utils/metrics.ts index 01cf79c..55033db 100644 --- a/src/utils/metrics.ts +++ b/src/utils/metrics.ts @@ -4,11 +4,6 @@ // it under the terms of the GNU Affero General Public License version 3 // as published by the Free Software Foundation. -import chalk from 'chalk'; -import { formatDuration } from './formatting.js'; - -// Timing utilities - export class Timer { name: string; startTime: number; @@ -29,82 +24,3 @@ export class Timer { return end - this.startTime; } } - -interface TimingResultsAgents { - [key: string]: number; -} - -interface TimingResults { - total: Timer | null; - agents: TimingResultsAgents; -} - -interface CostResultsAgents { - [key: string]: number; -} - -interface CostResults { - agents: CostResultsAgents; - total: number; -} - -// Global timing and cost tracker -export const timingResults: TimingResults = { - total: null, - agents: {}, -}; - -export const costResults: CostResults = { - agents: {}, - total: 0, -}; - -// Function to display comprehensive timing summary -export const displayTimingSummary = (): void => { - if (!timingResults.total) { - console.log(chalk.yellow('No timing data available')); - return; - } - - const totalDuration = timingResults.total.stop(); - - console.log(chalk.cyan.bold('\n⏱️ TIMING SUMMARY')); - console.log(chalk.gray('─'.repeat(60))); - - // Total execution time - console.log(chalk.cyan(`📊 Total Execution Time: ${formatDuration(totalDuration)}`)); - console.log(); - - // Agent breakdown - if (Object.keys(timingResults.agents).length > 0) { - console.log(chalk.magenta.bold('🤖 Agent Breakdown:')); - let agentTotal = 0; - for (const [agent, duration] of Object.entries(timingResults.agents)) { - const percentage = ((duration / totalDuration) * 100).toFixed(1); - const displayName = agent.replace(/-/g, ' '); - console.log( - chalk.magenta( - ` ${displayName.padEnd(20)} ${formatDuration(duration).padStart(8)} (${percentage}%)` - ) - ); - agentTotal += duration; - } - console.log( - chalk.gray( - ` ${'Agents Total'.padEnd(20)} ${formatDuration(agentTotal).padStart(8)} (${((agentTotal / totalDuration) * 100).toFixed(1)}%)` - ) - ); - } - - // Cost breakdown - if (Object.keys(costResults.agents).length > 0) { - console.log(chalk.green.bold('\n💰 Cost Breakdown:')); - for (const [agent, cost] of Object.entries(costResults.agents)) { - const displayName = agent.replace(/-/g, ' '); - console.log(chalk.green(` ${displayName.padEnd(20)} $${cost.toFixed(4).padStart(8)}`)); - } - console.log(chalk.gray(` ${'Total Cost'.padEnd(20)} $${costResults.total.toFixed(4).padStart(8)}`)); - } - - console.log(chalk.gray('─'.repeat(60))); -}; diff --git a/src/utils/output-formatter.ts b/src/utils/output-formatter.ts deleted file mode 100644 index 1dabd43..0000000 --- a/src/utils/output-formatter.ts +++ /dev/null @@ -1,264 +0,0 @@ -// Copyright (C) 2025 Keygraph, Inc. -// -// This program is free software: you can redistribute it and/or modify -// it under the terms of the GNU Affero General Public License version 3 -// as published by the Free Software Foundation. - -import { AGENTS } from '../session-manager.js'; - -interface ToolCallInput { - url?: string; - element?: string; - key?: string; - fields?: unknown[]; - text?: string; - action?: string; - description?: string; - todos?: Array<{ - status: string; - content: string; - }>; - [key: string]: unknown; -} - -interface ToolCall { - name: string; - input?: ToolCallInput; -} - -/** - * Extract domain from URL for display - */ -function extractDomain(url: string): string { - try { - const urlObj = new URL(url); - return urlObj.hostname || url.slice(0, 30); - } catch { - return url.slice(0, 30); - } -} - -/** - * Summarize TodoWrite updates into clean progress indicators - */ -function summarizeTodoUpdate(input: ToolCallInput | undefined): string | null { - if (!input?.todos || !Array.isArray(input.todos)) { - return null; - } - - const todos = input.todos; - const completed = todos.filter((t) => t.status === 'completed'); - const inProgress = todos.filter((t) => t.status === 'in_progress'); - - // Show recently completed tasks - if (completed.length > 0) { - const recent = completed[completed.length - 1]!; - return `✅ ${recent.content}`; - } - - // Show current in-progress task - if (inProgress.length > 0) { - const current = inProgress[0]!; - return `🔄 ${current.content}`; - } - - return null; -} - -/** - * Get agent prefix for parallel execution - */ -export function getAgentPrefix(description: string): string { - // Map agent names to their prefixes - const agentPrefixes: Record = { - 'injection-vuln': '[Injection]', - 'xss-vuln': '[XSS]', - 'auth-vuln': '[Auth]', - 'authz-vuln': '[Authz]', - 'ssrf-vuln': '[SSRF]', - 'injection-exploit': '[Injection]', - 'xss-exploit': '[XSS]', - 'auth-exploit': '[Auth]', - 'authz-exploit': '[Authz]', - 'ssrf-exploit': '[SSRF]', - }; - - // First try to match by agent name directly - for (const [agentName, prefix] of Object.entries(agentPrefixes)) { - const agent = AGENTS[agentName as keyof typeof AGENTS]; - if (agent && description.includes(agent.displayName)) { - return prefix; - } - } - - // Fallback to partial matches for backwards compatibility - if (description.includes('injection')) return '[Injection]'; - if (description.includes('xss')) return '[XSS]'; - if (description.includes('authz')) return '[Authz]'; // Check authz before auth - if (description.includes('auth')) return '[Auth]'; - if (description.includes('ssrf')) return '[SSRF]'; - - return '[Agent]'; -} - -/** - * Format browser tool calls into clean progress indicators - */ -function formatBrowserAction(toolCall: ToolCall): string { - const toolName = toolCall.name; - const input = toolCall.input || {}; - - // Core Browser Operations - if (toolName === 'mcp__playwright__browser_navigate') { - const url = input.url || ''; - const domain = extractDomain(url); - return `🌐 Navigating to ${domain}`; - } - - if (toolName === 'mcp__playwright__browser_navigate_back') { - return `⬅️ Going back`; - } - - // Page Interaction - if (toolName === 'mcp__playwright__browser_click') { - const element = input.element || 'element'; - return `🖱️ Clicking ${element.slice(0, 25)}`; - } - - if (toolName === 'mcp__playwright__browser_hover') { - const element = input.element || 'element'; - return `👆 Hovering over ${element.slice(0, 20)}`; - } - - if (toolName === 'mcp__playwright__browser_type') { - const element = input.element || 'field'; - return `⌨️ Typing in ${element.slice(0, 20)}`; - } - - if (toolName === 'mcp__playwright__browser_press_key') { - const key = input.key || 'key'; - return `⌨️ Pressing ${key}`; - } - - // Form Handling - if (toolName === 'mcp__playwright__browser_fill_form') { - const fieldCount = input.fields?.length || 0; - return `📝 Filling ${fieldCount} form fields`; - } - - if (toolName === 'mcp__playwright__browser_select_option') { - return `📋 Selecting dropdown option`; - } - - if (toolName === 'mcp__playwright__browser_file_upload') { - return `📁 Uploading file`; - } - - // Page Analysis - if (toolName === 'mcp__playwright__browser_snapshot') { - return `📸 Taking page snapshot`; - } - - if (toolName === 'mcp__playwright__browser_take_screenshot') { - return `📸 Taking screenshot`; - } - - if (toolName === 'mcp__playwright__browser_evaluate') { - return `🔍 Running JavaScript analysis`; - } - - // Waiting & Monitoring - if (toolName === 'mcp__playwright__browser_wait_for') { - if (input.text) { - return `⏳ Waiting for "${input.text.slice(0, 20)}"`; - } - return `⏳ Waiting for page response`; - } - - if (toolName === 'mcp__playwright__browser_console_messages') { - return `📜 Checking console logs`; - } - - if (toolName === 'mcp__playwright__browser_network_requests') { - return `🌐 Analyzing network traffic`; - } - - // Tab Management - if (toolName === 'mcp__playwright__browser_tabs') { - const action = input.action || 'managing'; - return `🗂️ ${action} browser tab`; - } - - // Dialog Handling - if (toolName === 'mcp__playwright__browser_handle_dialog') { - return `💬 Handling browser dialog`; - } - - // Fallback for any missed tools - const actionType = toolName.split('_').pop(); - return `🌐 Browser: ${actionType}`; -} - -/** - * Filter out JSON tool calls from content, with special handling for Task calls - */ -export function filterJsonToolCalls(content: string | null | undefined): string { - if (!content || typeof content !== 'string') { - return content || ''; - } - - const lines = content.split('\n'); - const processedLines: string[] = []; - - for (const line of lines) { - const trimmed = line.trim(); - - // Skip empty lines - if (trimmed === '') { - continue; - } - - // Check if this is a JSON tool call - if (trimmed.startsWith('{"type":"tool_use"')) { - try { - const toolCall = JSON.parse(trimmed) as ToolCall; - - // Special handling for Task tool calls - if (toolCall.name === 'Task') { - const description = toolCall.input?.description || 'analysis agent'; - processedLines.push(`🚀 Launching ${description}`); - continue; - } - - // Special handling for TodoWrite tool calls - if (toolCall.name === 'TodoWrite') { - const summary = summarizeTodoUpdate(toolCall.input); - if (summary) { - processedLines.push(summary); - } - continue; - } - - // Special handling for browser tool calls - if (toolCall.name.startsWith('mcp__playwright__browser_')) { - const browserAction = formatBrowserAction(toolCall); - if (browserAction) { - processedLines.push(browserAction); - } - continue; - } - - // Hide all other tool calls (Read, Write, Grep, etc.) - continue; - } catch { - // If JSON parsing fails, treat as regular text - processedLines.push(line); - } - } else { - // Keep non-JSON lines (assistant text) - processedLines.push(line); - } - } - - return processedLines.join('\n'); -} diff --git a/tsconfig.json b/tsconfig.json index f56a026..1222629 100644 --- a/tsconfig.json +++ b/tsconfig.json @@ -33,11 +33,11 @@ "exactOptionalPropertyTypes": true, // Style Options - // "noImplicitReturns": true, - // "noImplicitOverride": true, - // "noUnusedLocals": true, - // "noUnusedParameters": true, - // "noFallthroughCasesInSwitch": true, + "noImplicitReturns": true, + "noImplicitOverride": true, + "noUnusedLocals": true, + "noUnusedParameters": true, + "noFallthroughCasesInSwitch": true, // "noPropertyAccessFromIndexSignature": true, // Recommended Options