refactor: extract services layer, Result type, and ErrorCode classification

- Add DI container (src/services/) with AgentExecutionService, ConfigLoaderService, and ExploitationCheckerService — pure domain logic with no Temporal dependencies - Introduce Result<T, E> type and ErrorCode enum for code-based error classification in classifyErrorForTemporal, replacing scattered string matching - Consolidate billing/spending cap detection into utils/billing-detection.ts with shared pattern lists across message-handlers, claude-executor, and error-handling - Extract LogStream abstraction for append-only logging with backpressure, used by both AgentLogger and WorkflowLogger - Simplify activities.ts from inline lifecycle logic to thin wrappers delegating to services, with heartbeat and error classification - Expand config-parser with human-readable AJV errors, security validation, and rule type-specific checks
2026-07-05 04:38:03 +02:00 · 2026-02-16 16:12:21 -08:00
parent ae69478541
commit d3816a29fa
31 changed files with 1664 additions and 707 deletions
@@ -117,6 +117,7 @@ Defensive security tool only. Use only on systems you own or have explicit permi
 - Dense callback chains when sequential logic is clearer
 - Sacrificing readability for DRY — some repetition is fine if clearer
 - Abstractions for one-time operations
+- Backwards-compatibility shims, deprecated wrappers, or re-exports for removed code — delete the old code, don't preserve it

 ## Key Files

@@ -11,12 +11,13 @@ import chalk, { type ChalkInstance } from 'chalk';
 import { query } from '@anthropic-ai/claude-agent-sdk';

 import { isRetryableError, PentestError } from '../error-handling.js';
+import { isSpendingCapBehavior } from '../utils/billing-detection.js';
 import { timingResults, Timer } from '../utils/metrics.js';
 import { formatTimestamp } from '../utils/formatting.js';
 import { AGENT_VALIDATORS, MCP_AGENT_MAPPING } from '../constants.js';
 import { AuditSession } from '../audit/index.js';
 import { createShannonHelperServer } from '../../mcp-server/dist/index.js';
-import { getPromptNameForAgent } from '../types/agents.js';
+import { AGENTS } from '../session-manager.js';
 import type { AgentName } from '../types/index.js';

 import { dispatchMessage } from './message-handlers.js';
@@ -65,8 +66,8 @@ function buildMcpServers(
  };

  if (agentName) {
-    const promptName = getPromptNameForAgent(agentName as AgentName);
-    const playwrightMcpName = MCP_AGENT_MAPPING[promptName as keyof typeof MCP_AGENT_MAPPING] || null;
+    const promptTemplate = AGENTS[agentName as AgentName].promptTemplate;
+    const playwrightMcpName = MCP_AGENT_MAPPING[promptTemplate as keyof typeof MCP_AGENT_MAPPING] || null;

    if (playwrightMcpName) {
      console.log(chalk.gray(`    Assigned ${agentName} -> ${playwrightMcpName}`));
@@ -263,22 +264,13 @@ export async function runClaudePrompt(

    // === SPENDING CAP SAFEGUARD ===
    // Defense-in-depth: Detect spending cap that slipped through detectApiError().
-    // When spending cap is hit, Claude returns a short message with $0 cost.
-    // Legitimate agent work NEVER costs $0 with only 1-2 turns.
-    if (turnCount <= 2 && totalCost === 0) {
-      const resultLower = (result || '').toLowerCase();
-      const BILLING_KEYWORDS = ['spending', 'cap', 'limit', 'budget', 'resets'];
-      const looksLikeBillingError = BILLING_KEYWORDS.some((kw) =>
-        resultLower.includes(kw)
+    // Uses consolidated billing detection from utils/billing-detection.ts
+    if (isSpendingCapBehavior(turnCount, totalCost, result || '')) {
+      throw new PentestError(
+        `Spending cap likely reached (turns=${turnCount}, cost=$0): ${result?.slice(0, 100)}`,
+        'billing',
+        true // Retryable - Temporal will use 5-30 min backoff
      );
-
-      if (looksLikeBillingError) {
-        throw new PentestError(
-          `Spending cap likely reached (turns=${turnCount}, cost=$0): ${result?.slice(0, 100)}`,
-          'billing',
-          true // Retryable - Temporal will use 5-30 min backoff
-        );
-      }
    }

    const duration = timer.stop();
@@ -7,6 +7,8 @@
 // Pure functions for processing SDK message types

 import { PentestError } from '../error-handling.js';
+import { ErrorCode } from '../types/errors.js';
+import { matchesBillingTextPattern } from '../utils/billing-detection.js';
 import { filterJsonToolCalls } from '../utils/output-formatter.js';
 import { formatTimestamp } from '../utils/formatting.js';
 import chalk from 'chalk';
@@ -75,25 +77,15 @@ function detectApiError(content: string): ApiErrorDetection {
  // When Claude Code hits its spending cap, it returns a short message like
  // "Spending cap reached resets 8am" instead of throwing an error.
  // These should retry with 5-30 min backoff so workflows can recover when cap resets.
-  const BILLING_PATTERNS = [
-    'spending cap',
-    'spending limit',
-    'cap reached',
-    'budget exceeded',
-    'usage limit',
-  ];
-
-  const isBillingError = BILLING_PATTERNS.some((pattern) =>
-    lowerContent.includes(pattern)
-  );
-
-  if (isBillingError) {
+  if (matchesBillingTextPattern(content)) {
    return {
      detected: true,
      shouldThrow: new PentestError(
        `Billing limit reached: ${content.slice(0, 100)}`,
        'billing',
-        true // RETRYABLE - Temporal will use 5-30 min backoff
+        true, // RETRYABLE - Temporal will use 5-30 min backoff
+        {},
+        ErrorCode.SPENDING_CAP_REACHED
      ),
    };
  }
@@ -127,7 +119,9 @@ function handleStructuredError(
        shouldThrow: new PentestError(
          `Billing error (structured): ${content.slice(0, 100)}`,
          'billing',
-          true // Retryable with backoff
+          true, // Retryable with backoff
+          {},
+          ErrorCode.INSUFFICIENT_CREDITS
        ),
      };
    case 'rate_limit':
@@ -136,7 +130,9 @@ function handleStructuredError(
        shouldThrow: new PentestError(
          `Rate limit hit (structured): ${content.slice(0, 100)}`,
          'network',
-          true // Retryable with backoff
+          true, // Retryable with backoff
+          {},
+          ErrorCode.API_RATE_LIMITED
        ),
      };
    case 'authentication_failed':
@@ -18,6 +18,8 @@ import { initializeAuditStructure, type SessionMetadata } from './utils.js';
 import { formatTimestamp } from '../utils/formatting.js';
 import { SessionMutex } from '../utils/concurrency.js';
 import type { AgentEndResult } from '../types/index.js';
+import { PentestError } from '../error-handling.js';
+import { ErrorCode } from '../types/errors.js';

 // Global mutex instance
 const sessionMutex = new SessionMutex();
@@ -40,10 +42,22 @@ export class AuditSession {

    // Validate required fields
    if (!this.sessionId) {
-      throw new Error('sessionMetadata.id is required');
+      throw new PentestError(
+        'sessionMetadata.id is required',
+        'config',
+        false,
+        { field: 'sessionMetadata.id' },
+        ErrorCode.CONFIG_VALIDATION_FAILED
+      );
    }
    if (!this.sessionMetadata.webUrl) {
-      throw new Error('sessionMetadata.webUrl is required');
+      throw new PentestError(
+        'sessionMetadata.webUrl is required',
+        'config',
+        false,
+        { field: 'sessionMetadata.webUrl' },
+        ErrorCode.CONFIG_VALIDATION_FAILED
+      );
    }

    // Components
@@ -124,7 +138,13 @@ export class AuditSession {
   */
  async logEvent(eventType: string, eventData: unknown): Promise<void> {
    if (!this.currentLogger) {
-      throw new Error('No active logger. Call startAgent() first.');
+      throw new PentestError(
+        'No active logger. Call startAgent() first.',
+        'validation',
+        false,
+        {},
+        ErrorCode.AGENT_EXECUTION_FAILED
+      );
    }

    // Log to agent-specific log file (JSON format)
@@ -0,0 +1,127 @@
+// Copyright (C) 2025 Keygraph, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Affero General Public License version 3
+// as published by the Free Software Foundation.
+
+/**
+ * LogStream - Stream composition utility for append-only logging
+ *
+ * Encapsulates the common stream management pattern used by AgentLogger
+ * and WorkflowLogger: opening streams in append mode, handling backpressure,
+ * and proper cleanup.
+ */
+
+import fs from 'fs';
+import path from 'path';
+import { ensureDirectory } from '../utils/file-io.js';
+
+/**
+ * LogStream - Manages a single append-only log file stream
+ */
+export class LogStream {
+  private readonly filePath: string;
+  private stream: fs.WriteStream | null = null;
+  private _isOpen: boolean = false;
+
+  constructor(filePath: string) {
+    this.filePath = filePath;
+  }
+
+  /**
+   * Open the stream for writing (creates parent directories, opens in append mode)
+   */
+  async open(): Promise<void> {
+    if (this._isOpen) {
+      return;
+    }
+
+    // Ensure parent directory exists
+    await ensureDirectory(path.dirname(this.filePath));
+
+    // Create write stream in append mode
+    this.stream = fs.createWriteStream(this.filePath, {
+      flags: 'a',
+      encoding: 'utf8',
+      autoClose: true,
+    });
+
+    // Handle stream errors to prevent crashes (log and mark closed)
+    this.stream.on('error', (err) => {
+      console.error(`LogStream error for ${this.filePath}:`, err.message);
+      this._isOpen = false;
+    });
+
+    this._isOpen = true;
+  }
+
+  /**
+   * Write text to the stream with backpressure handling
+   */
+  async write(text: string): Promise<void> {
+    return new Promise((resolve, reject) => {
+      if (!this._isOpen || !this.stream) {
+        reject(new Error('LogStream not open'));
+        return;
+      }
+
+      const stream = this.stream;
+      let drainHandler: (() => void) | null = null;
+
+      const cleanup = () => {
+        if (drainHandler) {
+          stream.removeListener('drain', drainHandler);
+          drainHandler = null;
+        }
+      };
+
+      const needsDrain = !stream.write(text, 'utf8', (error) => {
+        cleanup();
+        if (error) {
+          reject(error);
+        } else if (!needsDrain) {
+          resolve();
+        }
+      });
+
+      if (needsDrain) {
+        drainHandler = () => {
+          cleanup();
+          resolve();
+        };
+        stream.once('drain', drainHandler);
+      }
+    });
+  }
+
+  /**
+   * Close the stream (flush and close)
+   */
+  async close(): Promise<void> {
+    if (!this._isOpen || !this.stream) {
+      return;
+    }
+
+    return new Promise((resolve) => {
+      this.stream!.end(() => {
+        this._isOpen = false;
+        this.stream = null;
+        resolve();
+      });
+    });
+  }
+
+  /**
+   * Check if the stream is currently open
+   */
+  get isOpen(): boolean {
+    return this._isOpen;
+  }
+
+  /**
+   * Get the file path this stream writes to
+   */
+  get path(): string {
+    return this.filePath;
+  }
+}
@@ -8,10 +8,9 @@
 * Append-Only Agent Logger
 *
 * Provides crash-safe, append-only logging for agent execution.
- * Uses file streams with immediate flush to prevent data loss.
+ * Uses LogStream for stream management with backpressure handling.
 */

-import fs from 'fs';
 import {
  generateLogPath,
  generatePromptPath,
@@ -19,6 +18,7 @@ import {
 } from './utils.js';
 import { atomicWrite } from '../utils/file-io.js';
 import { formatTimestamp } from '../utils/formatting.js';
+import { LogStream } from './log-stream.js';

 interface LogEvent {
  type: string;
@@ -30,13 +30,11 @@ interface LogEvent {
 * AgentLogger - Manages append-only logging for a single agent execution
 */
 export class AgentLogger {
-  private sessionMetadata: SessionMetadata;
-  private agentName: string;
-  private attemptNumber: number;
-  private timestamp: number;
-  private logPath: string;
-  private stream: fs.WriteStream | null = null;
-  private isOpen: boolean = false;
+  private readonly sessionMetadata: SessionMetadata;
+  private readonly agentName: string;
+  private readonly attemptNumber: number;
+  private readonly timestamp: number;
+  private readonly logStream: LogStream;

  constructor(sessionMetadata: SessionMetadata, agentName: string, attemptNumber: number) {
    this.sessionMetadata = sessionMetadata;
@@ -44,26 +42,20 @@ export class AgentLogger {
    this.attemptNumber = attemptNumber;
    this.timestamp = Date.now();

-    // Generate log file path
-    this.logPath = generateLogPath(sessionMetadata, agentName, this.timestamp, attemptNumber);
+    // Generate log file path and create stream
+    const logPath = generateLogPath(sessionMetadata, agentName, this.timestamp, attemptNumber);
+    this.logStream = new LogStream(logPath);
  }

  /**
   * Initialize the log stream (creates file and opens stream)
   */
  async initialize(): Promise<void> {
-    if (this.isOpen) {
+    if (this.logStream.isOpen) {
      return; // Already initialized
    }

-    // Create write stream with append mode and auto-flush
-    this.stream = fs.createWriteStream(this.logPath, {
-      flags: 'a', // Append mode
-      encoding: 'utf8',
-      autoClose: true,
-    });
-
-    this.isOpen = true;
+    await this.logStream.open();

    // Write header
    await this.writeHeader();
@@ -83,29 +75,7 @@ export class AgentLogger {
      `========================================\n`,
    ].join('\n');

-    return this.writeRaw(header);
-  }
-
-  /**
-   * Write raw text to log file with immediate flush
-   */
-  private writeRaw(text: string): Promise<void> {
-    return new Promise((resolve, reject) => {
-      if (!this.isOpen || !this.stream) {
-        reject(new Error('Logger not initialized'));
-        return;
-      }
-
-      const needsDrain = !this.stream.write(text, 'utf8', (error) => {
-        if (error) reject(error);
-      });
-
-      if (needsDrain) {
-        this.stream.once('drain', resolve);
-      } else {
-        resolve();
-      }
-    });
+    return this.logStream.write(header);
  }

  /**
@@ -120,23 +90,14 @@ export class AgentLogger {
    };

    const eventLine = `${JSON.stringify(event)}\n`;
-    return this.writeRaw(eventLine);
+    return this.logStream.write(eventLine);
  }

  /**
   * Close the log stream
   */
  async close(): Promise<void> {
-    if (!this.isOpen || !this.stream) {
-      return;
-    }
-
-    return new Promise((resolve) => {
-      this.stream!.end(() => {
-        this.isOpen = false;
-        resolve();
-      });
-    });
+    return this.logStream.close();
  }

  /**
@@ -18,6 +18,8 @@ import {
 import { atomicWrite, readJson, fileExists } from '../utils/file-io.js';
 import { formatTimestamp, calculatePercentage } from '../utils/formatting.js';
 import { AGENT_PHASE_MAP, type PhaseName } from '../session-manager.js';
+import { PentestError } from '../error-handling.js';
+import { ErrorCode } from '../types/errors.js';
 import type { AgentName, AgentEndResult } from '../types/index.js';

 interface AttemptData {
@@ -159,7 +161,13 @@ export class MetricsTracker {
   */
  async endAgent(agentName: string, result: AgentEndResult): Promise<void> {
    if (!this.data) {
-      throw new Error('MetricsTracker not initialized');
+      throw new PentestError(
+        'MetricsTracker not initialized',
+        'validation',
+        false,
+        {},
+        ErrorCode.AGENT_EXECUTION_FAILED
+      );
    }

    // Initialize agent metrics if not exists
@@ -251,7 +259,13 @@ export class MetricsTracker {
    checkpointHash?: string
  ): Promise<void> {
    if (!this.data) {
-      throw new Error('MetricsTracker not initialized');
+      throw new PentestError(
+        'MetricsTracker not initialized',
+        'validation',
+        false,
+        {},
+        ErrorCode.AGENT_EXECUTION_FAILED
+      );
    }

    // Ensure originalWorkflowId is set (backfill if missing from old sessions)
@@ -11,10 +11,10 @@
 * Optimized for `tail -f` viewing during concurrent workflow execution.
 */

-import fs from 'fs';
-import path from 'path';
-import { generateWorkflowLogPath, ensureDirectory, type SessionMetadata } from './utils.js';
+import fs from 'fs/promises';
+import { generateWorkflowLogPath, type SessionMetadata } from './utils.js';
 import { formatDuration, formatTimestamp } from '../utils/formatting.js';
+import { LogStream } from './log-stream.js';

 export interface AgentLogDetails {
  attemptNumber?: number;
@@ -42,38 +42,28 @@ export interface WorkflowSummary {
 * WorkflowLogger - Manages the unified workflow log file
 */
 export class WorkflowLogger {
-  private sessionMetadata: SessionMetadata;
-  private logPath: string;
-  private stream: fs.WriteStream | null = null;
-  private initialized: boolean = false;
+  private readonly sessionMetadata: SessionMetadata;
+  private readonly logStream: LogStream;

  constructor(sessionMetadata: SessionMetadata) {
    this.sessionMetadata = sessionMetadata;
-    this.logPath = generateWorkflowLogPath(sessionMetadata);
+    const logPath = generateWorkflowLogPath(sessionMetadata);
+    this.logStream = new LogStream(logPath);
  }

  /**
   * Initialize the log stream (creates file and writes header)
   */
  async initialize(): Promise<void> {
-    if (this.initialized) {
+    if (this.logStream.isOpen) {
      return;
    }

-    // Ensure directory exists
-    await ensureDirectory(path.dirname(this.logPath));
-
-    // Create write stream with append mode
-    this.stream = fs.createWriteStream(this.logPath, {
-      flags: 'a',
-      encoding: 'utf8',
-      autoClose: true,
-    });
-
-    this.initialized = true;
+    // Open the stream (LogStream.open() handles directory creation)
+    await this.logStream.open();

    // Write header only if file is new (empty)
-    const stats = await fs.promises.stat(this.logPath).catch(() => null);
+    const stats = await fs.stat(this.logStream.path).catch(() => null);
    if (!stats || stats.size === 0) {
      await this.writeHeader();
    }
@@ -94,29 +84,7 @@ export class WorkflowLogger {
      ``,
    ].join('\n');

-    return this.writeRaw(header);
-  }
-
-  /**
-   * Write raw text to log file with immediate flush
-   */
-  private writeRaw(text: string): Promise<void> {
-    return new Promise((resolve, reject) => {
-      if (!this.initialized || !this.stream) {
-        reject(new Error('WorkflowLogger not initialized'));
-        return;
-      }
-
-      const needsDrain = !this.stream.write(text, 'utf8', (error) => {
-        if (error) reject(error);
-      });
-
-      if (needsDrain) {
-        this.stream.once('drain', resolve);
-      } else {
-        resolve();
-      }
-    });
+    return this.logStream.write(header);
  }

  /**
@@ -138,10 +106,10 @@ export class WorkflowLogger {

    // Add blank line before phase start for readability
    if (event === 'start') {
-      await this.writeRaw('\n');
+      await this.logStream.write('\n');
    }

-    await this.writeRaw(line);
+    await this.logStream.write(line);
  }

  /**
@@ -184,7 +152,7 @@ export class WorkflowLogger {
    }

    const line = `[${this.formatLogTime()}] [AGENT] ${message}\n`;
-    await this.writeRaw(line);
+    await this.logStream.write(line);
  }

  /**
@@ -194,7 +162,7 @@ export class WorkflowLogger {
    await this.ensureInitialized();

    const line = `[${this.formatLogTime()}] [${eventType.toUpperCase()}] ${message}\n`;
-    await this.writeRaw(line);
+    await this.logStream.write(line);
  }

  /**
@@ -205,7 +173,7 @@ export class WorkflowLogger {

    const contextStr = context ? ` (${context})` : '';
    const line = `[${this.formatLogTime()}] [ERROR] ${error.message}${contextStr}\n`;
-    await this.writeRaw(line);
+    await this.logStream.write(line);
  }

  /**
@@ -301,7 +269,7 @@ export class WorkflowLogger {
    const params = this.formatToolParams(toolName, parameters);
    const paramStr = params ? `: ${params}` : '';
    const line = `[${this.formatLogTime()}] [${agentName}] [TOOL] ${toolName}${paramStr}\n`;
-    await this.writeRaw(line);
+    await this.logStream.write(line);
  }

  /**
@@ -313,7 +281,7 @@ export class WorkflowLogger {
    // Show full content, replacing newlines with escaped version for single-line output
    const escaped = content.replace(/\n/g, '\\n');
    const line = `[${this.formatLogTime()}] [${agentName}] [LLM] Turn ${turn}: ${escaped}\n`;
-    await this.writeRaw(line);
+    await this.logStream.write(line);
  }

  /**
@@ -324,42 +292,42 @@ export class WorkflowLogger {

    const status = summary.status === 'completed' ? 'COMPLETED' : 'FAILED';

-    await this.writeRaw('\n');
-    await this.writeRaw(`================================================================================\n`);
-    await this.writeRaw(`Workflow ${status}\n`);
-    await this.writeRaw(`────────────────────────────────────────\n`);
-    await this.writeRaw(`Workflow ID: ${this.sessionMetadata.id}\n`);
-    await this.writeRaw(`Status:      ${summary.status}\n`);
-    await this.writeRaw(`Duration:    ${formatDuration(summary.totalDurationMs)}\n`);
-    await this.writeRaw(`Total Cost:  $${summary.totalCostUsd.toFixed(4)}\n`);
-    await this.writeRaw(`Agents:      ${summary.completedAgents.length} completed\n`);
+    await this.logStream.write('\n');
+    await this.logStream.write(`================================================================================\n`);
+    await this.logStream.write(`Workflow ${status}\n`);
+    await this.logStream.write(`────────────────────────────────────────\n`);
+    await this.logStream.write(`Workflow ID: ${this.sessionMetadata.id}\n`);
+    await this.logStream.write(`Status:      ${summary.status}\n`);
+    await this.logStream.write(`Duration:    ${formatDuration(summary.totalDurationMs)}\n`);
+    await this.logStream.write(`Total Cost:  $${summary.totalCostUsd.toFixed(4)}\n`);
+    await this.logStream.write(`Agents:      ${summary.completedAgents.length} completed\n`);

    if (summary.error) {
-      await this.writeRaw(`Error:       ${summary.error}\n`);
+      await this.logStream.write(`Error:       ${summary.error}\n`);
    }

-    await this.writeRaw(`\n`);
-    await this.writeRaw(`Agent Breakdown:\n`);
+    await this.logStream.write(`\n`);
+    await this.logStream.write(`Agent Breakdown:\n`);

    for (const agentName of summary.completedAgents) {
      const metrics = summary.agentMetrics[agentName];
      if (metrics) {
        const duration = formatDuration(metrics.durationMs);
        const cost = metrics.costUsd !== null ? `$${metrics.costUsd.toFixed(4)}` : 'N/A';
-        await this.writeRaw(`  - ${agentName} (${duration}, ${cost})\n`);
+        await this.logStream.write(`  - ${agentName} (${duration}, ${cost})\n`);
      } else {
-        await this.writeRaw(`  - ${agentName}\n`);
+        await this.logStream.write(`  - ${agentName}\n`);
      }
    }

-    await this.writeRaw(`================================================================================\n`);
+    await this.logStream.write(`================================================================================\n`);
  }

  /**
   * Ensure initialized (helper for lazy initialization)
   */
  private async ensureInitialized(): Promise<void> {
-    if (!this.initialized) {
+    if (!this.logStream.isOpen) {
      await this.initialize();
    }
  }
@@ -368,15 +336,6 @@ export class WorkflowLogger {
   * Close the log stream
   */
  async close(): Promise<void> {
-    if (!this.initialized || !this.stream) {
-      return;
-    }
-
-    return new Promise((resolve) => {
-      this.stream!.end(() => {
-        this.initialized = false;
-        resolve();
-      });
-    });
+    return this.logStream.close();
  }
 }
@@ -7,9 +7,10 @@
 import { createRequire } from 'module';
 import { fs } from 'zx';
 import yaml from 'js-yaml';
-import { Ajv, type ValidateFunction } from 'ajv';
+import { Ajv, type ValidateFunction, type ErrorObject } from 'ajv';
 import type { FormatsPlugin } from 'ajv-formats';
 import { PentestError } from './error-handling.js';
+import { ErrorCode } from './types/errors.js';
 import type {
  Config,
  Rule,
@@ -53,20 +54,155 @@ const DANGEROUS_PATTERNS: RegExp[] = [
  /file:/i, // File URLs
 ];

+/**
+ * Format a single AJV error into a human-readable message.
+ * Translates AJV error keywords into plain English descriptions.
+ */
+function formatAjvError(error: ErrorObject): string {
+  const path = error.instancePath || 'root';
+  const params = error.params as Record<string, unknown>;
+
+  switch (error.keyword) {
+    case 'required': {
+      const missingProperty = params.missingProperty as string;
+      return `Missing required field: "${missingProperty}" at ${path || 'root'}`;
+    }
+
+    case 'type': {
+      const expectedType = params.type as string;
+      return `Invalid type at ${path}: expected ${expectedType}`;
+    }
+
+    case 'enum': {
+      const allowedValues = params.allowedValues as unknown[];
+      const formattedValues = allowedValues.map((v) => `"${v}"`).join(', ');
+      return `Invalid value at ${path}: must be one of [${formattedValues}]`;
+    }
+
+    case 'additionalProperties': {
+      const additionalProperty = params.additionalProperty as string;
+      return `Unknown field at ${path}: "${additionalProperty}" is not allowed`;
+    }
+
+    case 'minLength': {
+      const limit = params.limit as number;
+      return `Value at ${path} is too short: must have at least ${limit} character(s)`;
+    }
+
+    case 'maxLength': {
+      const limit = params.limit as number;
+      return `Value at ${path} is too long: must have at most ${limit} character(s)`;
+    }
+
+    case 'minimum': {
+      const limit = params.limit as number;
+      return `Value at ${path} is too small: must be >= ${limit}`;
+    }
+
+    case 'maximum': {
+      const limit = params.limit as number;
+      return `Value at ${path} is too large: must be <= ${limit}`;
+    }
+
+    case 'minItems': {
+      const limit = params.limit as number;
+      return `Array at ${path} has too few items: must have at least ${limit} item(s)`;
+    }
+
+    case 'maxItems': {
+      const limit = params.limit as number;
+      return `Array at ${path} has too many items: must have at most ${limit} item(s)`;
+    }
+
+    case 'pattern': {
+      const pattern = params.pattern as string;
+      return `Value at ${path} does not match required pattern: ${pattern}`;
+    }
+
+    case 'format': {
+      const format = params.format as string;
+      return `Value at ${path} must be a valid ${format}`;
+    }
+
+    case 'const': {
+      const allowedValue = params.allowedValue as unknown;
+      return `Value at ${path} must be exactly "${allowedValue}"`;
+    }
+
+    case 'oneOf': {
+      return `Value at ${path} must match exactly one schema (matched ${params.passingSchemas ?? 0})`;
+    }
+
+    case 'anyOf': {
+      return `Value at ${path} must match at least one of the allowed schemas`;
+    }
+
+    case 'not': {
+      return `Value at ${path} matches a schema it should not match`;
+    }
+
+    case 'if': {
+      return `Value at ${path} does not satisfy conditional schema requirements`;
+    }
+
+    case 'uniqueItems': {
+      const i = params.i as number;
+      const j = params.j as number;
+      return `Array at ${path} contains duplicate items at positions ${j} and ${i}`;
+    }
+
+    case 'propertyNames': {
+      const propertyName = params.propertyName as string;
+      return `Invalid property name at ${path}: "${propertyName}" does not match naming requirements`;
+    }
+
+    case 'dependencies':
+    case 'dependentRequired': {
+      const property = params.property as string;
+      const missingProperty = params.missingProperty as string;
+      return `Missing dependent field at ${path}: "${missingProperty}" is required when "${property}" is present`;
+    }
+
+    default: {
+      // Fallback for any unhandled keywords - use AJV's message if available
+      const message = error.message || `validation failed for keyword "${error.keyword}"`;
+      return `${path}: ${message}`;
+    }
+  }
+}
+
+/**
+ * Format all AJV errors into a list of human-readable messages.
+ * Returns an array of formatted error strings.
+ */
+function formatAjvErrors(errors: ErrorObject[]): string[] {
+  return errors.map(formatAjvError);
+}
+
 // Parse and load YAML configuration file with enhanced safety
 export const parseConfig = async (configPath: string): Promise<Config> => {
  try {
    // File existence check
    if (!(await fs.pathExists(configPath))) {
-      throw new Error(`Configuration file not found: ${configPath}`);
+      throw new PentestError(
+        `Configuration file not found: ${configPath}`,
+        'config',
+        false,
+        { configPath },
+        ErrorCode.CONFIG_NOT_FOUND
+      );
    }

    // File size check (prevent extremely large files)
    const stats = await fs.stat(configPath);
    const maxFileSize = 1024 * 1024; // 1MB
    if (stats.size > maxFileSize) {
-      throw new Error(
-        `Configuration file too large: ${stats.size} bytes (maximum: ${maxFileSize} bytes)`
+      throw new PentestError(
+        `Configuration file too large: ${stats.size} bytes (maximum: ${maxFileSize} bytes)`,
+        'config',
+        false,
+        { configPath, fileSize: stats.size, maxFileSize },
+        ErrorCode.CONFIG_VALIDATION_FAILED
      );
    }

@@ -75,7 +211,13 @@ export const parseConfig = async (configPath: string): Promise<Config> => {

    // Basic content validation
    if (!configContent.trim()) {
-      throw new Error('Configuration file is empty');
+      throw new PentestError(
+        'Configuration file is empty',
+        'config',
+        false,
+        { configPath },
+        ErrorCode.CONFIG_VALIDATION_FAILED
+      );
    }

    // Parse YAML with safety options
@@ -88,12 +230,24 @@ export const parseConfig = async (configPath: string): Promise<Config> => {
      });
    } catch (yamlError) {
      const errMsg = yamlError instanceof Error ? yamlError.message : String(yamlError);
-      throw new Error(`YAML parsing failed: ${errMsg}`);
+      throw new PentestError(
+        `YAML parsing failed: ${errMsg}`,
+        'config',
+        false,
+        { configPath, originalError: errMsg },
+        ErrorCode.CONFIG_PARSE_ERROR
+      );
    }

    // Additional safety check
    if (config === null || config === undefined) {
-      throw new Error('Configuration file resulted in null/undefined after parsing');
+      throw new PentestError(
+        'Configuration file resulted in null/undefined after parsing',
+        'config',
+        false,
+        { configPath },
+        ErrorCode.CONFIG_PARSE_ERROR
+      );
    }

    // Validate the configuration structure and content
@@ -101,20 +255,19 @@ export const parseConfig = async (configPath: string): Promise<Config> => {

    return config as Config;
  } catch (error) {
-    const errMsg = error instanceof Error ? error.message : String(error);
-    // Enhance error message with context
-    if (
-      errMsg.startsWith('Configuration file not found') ||
-      errMsg.startsWith('YAML parsing failed') ||
-      errMsg.includes('must be') ||
-      errMsg.includes('exceeds maximum')
-    ) {
-      // These are already well-formatted errors, re-throw as-is
+    // PentestError instances are already well-formatted, re-throw as-is
+    if (error instanceof PentestError) {
      throw error;
-    } else {
-      // Wrap other errors with context
-      throw new Error(`Failed to parse configuration file '${configPath}': ${errMsg}`);
    }
+    // Wrap other errors with context
+    const errMsg = error instanceof Error ? error.message : String(error);
+    throw new PentestError(
+      `Failed to parse configuration file '${configPath}': ${errMsg}`,
+      'config',
+      false,
+      { configPath, originalError: errMsg },
+      ErrorCode.CONFIG_PARSE_ERROR
+    );
  }
 };

@@ -122,32 +275,42 @@ export const parseConfig = async (configPath: string): Promise<Config> => {
 const validateConfig = (config: Config): void => {
  // Basic structure validation
  if (!config || typeof config !== 'object') {
-    throw new Error('Configuration must be a valid object');
+    throw new PentestError(
+      'Configuration must be a valid object',
+      'config',
+      false,
+      {},
+      ErrorCode.CONFIG_VALIDATION_FAILED
+    );
  }

  if (Array.isArray(config)) {
-    throw new Error('Configuration must be an object, not an array');
+    throw new PentestError(
+      'Configuration must be an object, not an array',
+      'config',
+      false,
+      {},
+      ErrorCode.CONFIG_VALIDATION_FAILED
+    );
  }

  // JSON Schema validation
  const isValid = validateSchema(config);
  if (!isValid) {
    const errors = validateSchema.errors || [];
-    const errorMessages = errors.map((err) => {
-      const path = err.instancePath || 'root';
-      return `${path}: ${err.message}`;
-    });
-    throw new Error(`Configuration validation failed:\n  - ${errorMessages.join('\n  - ')}`);
+    const errorMessages = formatAjvErrors(errors);
+    throw new PentestError(
+      `Configuration validation failed:\n  - ${errorMessages.join('\n  - ')}`,
+      'config',
+      false,
+      { validationErrors: errorMessages },
+      ErrorCode.CONFIG_VALIDATION_FAILED
+    );
  }

  // Additional security validation
  performSecurityValidation(config);

-  // Warn if deprecated fields are used
-  if (config.login) {
-    console.warn('⚠️  The "login" section is deprecated. Please use "authentication" instead.');
-  }
-
  // Ensure at least some configuration is provided
  if (!config.rules && !config.authentication) {
    console.warn(
@@ -166,17 +329,40 @@ const performSecurityValidation = (config: Config): void => {
  if (config.authentication) {
    const auth = config.authentication;

+    // Check login_url for dangerous patterns (AJV's "uri" format allows javascript: per RFC 3986)
+    if (auth.login_url) {
+      for (const pattern of DANGEROUS_PATTERNS) {
+        if (pattern.test(auth.login_url)) {
+          throw new PentestError(
+            `authentication.login_url contains potentially dangerous pattern: ${pattern.source}`,
+            'config',
+            false,
+            { field: 'login_url', pattern: pattern.source },
+            ErrorCode.CONFIG_VALIDATION_FAILED
+          );
+        }
+      }
+    }
+
    // Check for dangerous patterns in credentials
    if (auth.credentials) {
      for (const pattern of DANGEROUS_PATTERNS) {
        if (pattern.test(auth.credentials.username)) {
-          throw new Error(
-            'authentication.credentials.username contains potentially dangerous pattern'
+          throw new PentestError(
+            `authentication.credentials.username contains potentially dangerous pattern: ${pattern.source}`,
+            'config',
+            false,
+            { field: 'credentials.username', pattern: pattern.source },
+            ErrorCode.CONFIG_VALIDATION_FAILED
          );
        }
        if (pattern.test(auth.credentials.password)) {
-          throw new Error(
-            'authentication.credentials.password contains potentially dangerous pattern'
+          throw new PentestError(
+            `authentication.credentials.password contains potentially dangerous pattern: ${pattern.source}`,
+            'config',
+            false,
+            { field: 'credentials.password', pattern: pattern.source },
+            ErrorCode.CONFIG_VALIDATION_FAILED
          );
        }
      }
@@ -187,8 +373,12 @@ const performSecurityValidation = (config: Config): void => {
      auth.login_flow.forEach((step, index) => {
        for (const pattern of DANGEROUS_PATTERNS) {
          if (pattern.test(step)) {
-            throw new Error(
-              `authentication.login_flow[${index}] contains potentially dangerous pattern: ${pattern.source}`
+            throw new PentestError(
+              `authentication.login_flow[${index}] contains potentially dangerous pattern: ${pattern.source}`,
+              'config',
+              false,
+              { field: `login_flow[${index}]`, pattern: pattern.source },
+              ErrorCode.CONFIG_VALIDATION_FAILED
            );
          }
        }
@@ -216,13 +406,21 @@ const validateRulesSecurity = (rules: Rule[] | undefined, ruleType: string): voi
    // Security validation
    for (const pattern of DANGEROUS_PATTERNS) {
      if (pattern.test(rule.url_path)) {
-        throw new Error(
-          `rules.${ruleType}[${index}].url_path contains potentially dangerous pattern: ${pattern.source}`
+        throw new PentestError(
+          `rules.${ruleType}[${index}].url_path contains potentially dangerous pattern: ${pattern.source}`,
+          'config',
+          false,
+          { field: `rules.${ruleType}[${index}].url_path`, pattern: pattern.source },
+          ErrorCode.CONFIG_VALIDATION_FAILED
        );
      }
      if (pattern.test(rule.description)) {
-        throw new Error(
-          `rules.${ruleType}[${index}].description contains potentially dangerous pattern: ${pattern.source}`
+        throw new PentestError(
+          `rules.${ruleType}[${index}].description contains potentially dangerous pattern: ${pattern.source}`,
+          'config',
+          false,
+          { field: `rules.${ruleType}[${index}].description`, pattern: pattern.source },
+          ErrorCode.CONFIG_VALIDATION_FAILED
        );
      }
    }
@@ -234,10 +432,18 @@ const validateRulesSecurity = (rules: Rule[] | undefined, ruleType: string): voi

 // Validate rule based on its specific type
 const validateRuleTypeSpecific = (rule: Rule, ruleType: string, index: number): void => {
+  const field = `rules.${ruleType}[${index}].url_path`;
+
  switch (rule.type) {
    case 'path':
      if (!rule.url_path.startsWith('/')) {
-        throw new Error(`rules.${ruleType}[${index}].url_path for type 'path' must start with '/'`);
+        throw new PentestError(
+          `${field} for type 'path' must start with '/'`,
+          'config',
+          false,
+          { field, ruleType: rule.type },
+          ErrorCode.CONFIG_VALIDATION_FAILED
+        );
      }
      break;

@@ -245,14 +451,22 @@ const validateRuleTypeSpecific = (rule: Rule, ruleType: string, index: number):
    case 'domain':
      // Basic domain validation - no slashes allowed
      if (rule.url_path.includes('/')) {
-        throw new Error(
-          `rules.${ruleType}[${index}].url_path for type '${rule.type}' cannot contain '/' characters`
+        throw new PentestError(
+          `${field} for type '${rule.type}' cannot contain '/' characters`,
+          'config',
+          false,
+          { field, ruleType: rule.type },
+          ErrorCode.CONFIG_VALIDATION_FAILED
        );
      }
      // Must contain at least one dot for domains
      if (rule.type === 'domain' && !rule.url_path.includes('.')) {
-        throw new Error(
-          `rules.${ruleType}[${index}].url_path for type 'domain' must be a valid domain name`
+        throw new PentestError(
+          `${field} for type 'domain' must be a valid domain name`,
+          'config',
+          false,
+          { field, ruleType: rule.type },
+          ErrorCode.CONFIG_VALIDATION_FAILED
        );
      }
      break;
@@ -260,8 +474,12 @@ const validateRuleTypeSpecific = (rule: Rule, ruleType: string, index: number):
    case 'method': {
      const allowedMethods = ['GET', 'POST', 'PUT', 'DELETE', 'PATCH', 'HEAD', 'OPTIONS'];
      if (!allowedMethods.includes(rule.url_path.toUpperCase())) {
-        throw new Error(
-          `rules.${ruleType}[${index}].url_path for type 'method' must be one of: ${allowedMethods.join(', ')}`
+        throw new PentestError(
+          `${field} for type 'method' must be one of: ${allowedMethods.join(', ')}`,
+          'config',
+          false,
+          { field, ruleType: rule.type, allowedMethods },
+          ErrorCode.CONFIG_VALIDATION_FAILED
        );
      }
      break;
@@ -270,8 +488,12 @@ const validateRuleTypeSpecific = (rule: Rule, ruleType: string, index: number):
    case 'header':
      // Header name validation (basic)
      if (!rule.url_path.match(/^[a-zA-Z0-9\-_]+$/)) {
-        throw new Error(
-          `rules.${ruleType}[${index}].url_path for type 'header' must be a valid header name (alphanumeric, hyphens, underscores only)`
+        throw new PentestError(
+          `${field} for type 'header' must be a valid header name (alphanumeric, hyphens, underscores only)`,
+          'config',
+          false,
+          { field, ruleType: rule.type },
+          ErrorCode.CONFIG_VALIDATION_FAILED
        );
      }
      break;
@@ -279,8 +501,12 @@ const validateRuleTypeSpecific = (rule: Rule, ruleType: string, index: number):
    case 'parameter':
      // Parameter name validation (basic)
      if (!rule.url_path.match(/^[a-zA-Z0-9\-_]+$/)) {
-        throw new Error(
-          `rules.${ruleType}[${index}].url_path for type 'parameter' must be a valid parameter name (alphanumeric, hyphens, underscores only)`
+        throw new PentestError(
+          `${field} for type 'parameter' must be a valid parameter name (alphanumeric, hyphens, underscores only)`,
+          'config',
+          false,
+          { field, ruleType: rule.type },
+          ErrorCode.CONFIG_VALIDATION_FAILED
        );
      }
      break;
@@ -293,8 +519,12 @@ const checkForDuplicates = (rules: Rule[], ruleType: string): void => {
  rules.forEach((rule, index) => {
    const key = `${rule.type}:${rule.url_path}`;
    if (seen.has(key)) {
-      throw new Error(
-        `Duplicate rule found in rules.${ruleType}[${index}]: ${rule.type} '${rule.url_path}'`
+      throw new PentestError(
+        `Duplicate rule found in rules.${ruleType}[${index}]: ${rule.type} '${rule.url_path}'`,
+        'config',
+        false,
+        { field: `rules.${ruleType}[${index}]`, ruleType: rule.type, urlPath: rule.url_path },
+        ErrorCode.CONFIG_VALIDATION_FAILED
      );
    }
    seen.add(key);
@@ -308,8 +538,12 @@ const checkForConflicts = (avoidRules: Rule[] = [], focusRules: Rule[] = []): vo
  focusRules.forEach((rule, index) => {
    const key = `${rule.type}:${rule.url_path}`;
    if (avoidSet.has(key)) {
-      throw new Error(
-        `Conflicting rule found: rules.focus[${index}] '${rule.url_path}' also exists in rules.avoid`
+      throw new PentestError(
+        `Conflicting rule found: rules.focus[${index}] '${rule.url_path}' also exists in rules.avoid`,
+        'config',
+        false,
+        { field: `rules.focus[${index}]`, urlPath: rule.url_path },
+        ErrorCode.CONFIG_VALIDATION_FAILED
      );
    }
  });
@@ -347,7 +581,7 @@ const sanitizeAuthentication = (auth: Authentication): Authentication => {
      password: auth.credentials.password,
      ...(auth.credentials.totp_secret && { totp_secret: auth.credentials.totp_secret.trim() }),
    },
-    login_flow: auth.login_flow.map((step) => step.trim()),
+    ...(auth.login_flow && { login_flow: auth.login_flow.map((step) => step.trim()) }),
    success_condition: {
      type: auth.success_condition.type.toLowerCase().trim() as Authentication['success_condition']['type'],
      value: auth.success_condition.value.trim(),
@@ -7,7 +7,7 @@
 import { path, fs } from 'zx';
 import chalk from 'chalk';
 import { validateQueueAndDeliverable, type VulnType } from './queue-validation.js';
-import type { AgentName, PromptName, PlaywrightAgent, AgentValidator } from './types/agents.js';
+import type { AgentName, PlaywrightAgent, AgentValidator } from './types/agents.js';

 // Factory function for vulnerability queue validators
 function createVulnValidator(vulnType: VulnType): AgentValidator {
@@ -32,7 +32,8 @@ function createExploitValidator(vulnType: VulnType): AgentValidator {
 }

 // MCP agent mapping - assigns each agent to a specific Playwright instance to prevent conflicts
-export const MCP_AGENT_MAPPING: Record<PromptName, PlaywrightAgent> = Object.freeze({
+// Keys are promptTemplate values from AGENTS registry (session-manager.ts)
+export const MCP_AGENT_MAPPING: Record<string, PlaywrightAgent> = Object.freeze({
  // Phase 1: Pre-reconnaissance (actual prompt name is 'pre-recon-code')
  // NOTE: Pre-recon is pure code analysis and doesn't use browser automation,
  // but assigning MCP server anyway for consistency and future extensibility
@@ -4,11 +4,16 @@
 // it under the terms of the GNU Affero General Public License version 3
 // as published by the Free Software Foundation.

-import type {
-  PentestErrorType,
-  PentestErrorContext,
-  PromptErrorResult,
+import {
+  ErrorCode,
+  type PentestErrorType,
+  type PentestErrorContext,
+  type PromptErrorResult,
 } from './types/errors.js';
+import {
+  matchesBillingApiPattern,
+  matchesBillingTextPattern,
+} from './utils/billing-detection.js';

 // Custom error class for pentest operations
 export class PentestError extends Error {
@@ -17,18 +22,24 @@ export class PentestError extends Error {
  retryable: boolean;
  context: PentestErrorContext;
  timestamp: string;
+  /** Optional specific error code for reliable classification */
+  code?: ErrorCode;

  constructor(
    message: string,
    type: PentestErrorType,
    retryable: boolean = false,
-    context: PentestErrorContext = {}
+    context: PentestErrorContext = {},
+    code?: ErrorCode
  ) {
    super(message);
    this.type = type;
    this.retryable = retryable;
    this.context = context;
    this.timestamp = new Date().toISOString();
+    if (code !== undefined) {
+      this.code = code;
+    }
  }
 }

@@ -102,6 +113,53 @@ export function isRetryableError(error: Error): boolean {
  return RETRYABLE_PATTERNS.some((pattern) => message.includes(pattern));
 }

+/**
+ * Classifies errors by ErrorCode for reliable, code-based classification.
+ * Used when error is a PentestError with a specific ErrorCode.
+ */
+function classifyByErrorCode(
+  code: ErrorCode,
+  retryableFromError: boolean
+): { type: string; retryable: boolean } {
+  switch (code) {
+    // Billing errors - retryable (wait for cap reset or credits added)
+    case ErrorCode.SPENDING_CAP_REACHED:
+    case ErrorCode.INSUFFICIENT_CREDITS:
+      return { type: 'BillingError', retryable: true };
+
+    case ErrorCode.API_RATE_LIMITED:
+      return { type: 'RateLimitError', retryable: true };
+
+    // Config errors - non-retryable (need manual fix)
+    case ErrorCode.CONFIG_NOT_FOUND:
+    case ErrorCode.CONFIG_VALIDATION_FAILED:
+    case ErrorCode.CONFIG_PARSE_ERROR:
+      return { type: 'ConfigurationError', retryable: false };
+
+    // Prompt errors - non-retryable (need manual fix)
+    case ErrorCode.PROMPT_LOAD_FAILED:
+      return { type: 'ConfigurationError', retryable: false };
+
+    // Git errors - non-retryable (indicates workspace corruption)
+    case ErrorCode.GIT_CHECKPOINT_FAILED:
+    case ErrorCode.GIT_ROLLBACK_FAILED:
+      return { type: 'GitError', retryable: false };
+
+    // Validation errors - retryable (agent may succeed on retry)
+    case ErrorCode.OUTPUT_VALIDATION_FAILED:
+    case ErrorCode.DELIVERABLE_NOT_FOUND:
+      return { type: 'OutputValidationError', retryable: true };
+
+    // Agent execution - use the retryable flag from the error
+    case ErrorCode.AGENT_EXECUTION_FAILED:
+      return { type: 'AgentExecutionError', retryable: retryableFromError };
+
+    default:
+      // Unknown code - fall through to string matching
+      return { type: 'UnknownError', retryable: retryableFromError };
+  }
+}
+
 /**
 * Classifies errors for Temporal workflow retry behavior.
 * Returns error type and whether Temporal should retry.
@@ -109,31 +167,25 @@ export function isRetryableError(error: Error): boolean {
 * Used by activities to wrap errors in ApplicationFailure:
 * - Retryable errors: Temporal retries with configured backoff
 * - Non-retryable errors: Temporal fails immediately
+ *
+ * Classification priority:
+ * 1. If error is PentestError with ErrorCode, classify by code (reliable)
+ * 2. Fall through to string matching for external errors (SDK, network, etc.)
 */
 export function classifyErrorForTemporal(error: unknown): { type: string; retryable: boolean } {
+  // === CODE-BASED CLASSIFICATION (Preferred for internal errors) ===
+  if (error instanceof PentestError && error.code !== undefined) {
+    return classifyByErrorCode(error.code, error.retryable);
+  }
+
+  // === STRING-BASED CLASSIFICATION (Fallback for external errors) ===
  const message = (error instanceof Error ? error.message : String(error)).toLowerCase();

  // === BILLING ERRORS (Retryable with long backoff) ===
  // Anthropic returns billing as 400 invalid_request_error
  // Human can add credits OR wait for spending cap to reset (5-30 min backoff)
-  if (
-    message.includes('billing_error') ||
-    message.includes('credit balance is too low') ||
-    message.includes('insufficient credits') ||
-    message.includes('usage is blocked due to insufficient credits') ||
-    message.includes('please visit plans & billing') ||
-    message.includes('please visit plans and billing') ||
-    message.includes('usage limit reached') ||
-    message.includes('quota exceeded') ||
-    message.includes('daily rate limit') ||
-    message.includes('limit will reset') ||
-    // Claude Code spending cap patterns (returns short message instead of error)
-    message.includes('spending cap') ||
-    message.includes('spending limit') ||
-    message.includes('cap reached') ||
-    message.includes('budget exceeded') ||
-    message.includes('billing limit reached')
-  ) {
+  // Check both API patterns and text patterns for comprehensive detection
+  if (matchesBillingApiPattern(message) || matchesBillingTextPattern(message)) {
    return { type: 'BillingError', retryable: true };
  }

@@ -7,6 +7,7 @@
 import { fs, path } from 'zx';
 import chalk from 'chalk';
 import { PentestError } from '../error-handling.js';
+import { ErrorCode } from '../types/errors.js';

 interface DeliverableFile {
  name: string;
@@ -34,7 +35,13 @@ export async function assembleFinalReport(sourceDir: string): Promise<string> {
        sections.push(content);
        console.log(chalk.green(`✅ Added ${file.name} findings`));
      } else if (file.required) {
-        throw new Error(`Required file ${file.path} not found`);
+        throw new PentestError(
+          `Required deliverable file not found: ${file.path}`,
+          'filesystem',
+          false,
+          { deliverableFile: file.path, sourceDir },
+          ErrorCode.DELIVERABLE_NOT_FOUND
+        );
      } else {
        console.log(chalk.gray(`⏭️  No ${file.name} deliverable found`));
      }
@@ -6,6 +6,8 @@

 import { fs, path } from 'zx';
 import { PentestError } from './error-handling.js';
+import { ErrorCode } from './types/errors.js';
+import { type Result, ok, err } from './types/result.js';
 import { asyncPipe } from './utils/functional.js';

 export type VulnType = 'injection' | 'xss' | 'auth' | 'ssrf' | 'authz';
@@ -67,11 +69,10 @@ export interface ExploitationDecision {
  vulnType: VulnType;
 }

-export interface SafeValidationResult {
-  success: boolean;
-  data?: ExploitationDecision;
-  error?: PentestError;
-}
+/**
+ * Result type for safe validation - explicit error handling.
+ */
+export type SafeValidationResult = Result<ExploitationDecision, PentestError>;

 // Vulnerability type configuration as immutable data
 const VULN_TYPE_CONFIG: VulnTypeConfig = Object.freeze({
@@ -196,7 +197,8 @@ const validateExistenceRules = (
          deliverablePath: pathsWithExistence.deliverable,
          queuePath: pathsWithExistence.queue,
          existence,
-        }
+        },
+        ErrorCode.DELIVERABLE_NOT_FOUND
      ),
    };
  }
@@ -311,15 +313,18 @@ export async function validateQueueAndDeliverable(
  );
 }

-// Pure function to safely validate (returns result instead of throwing)
-export const safeValidateQueueAndDeliverable = async (
+/**
+ * Safely validate queue and deliverable files.
+ * Returns Result<ExploitationDecision, PentestError> for explicit error handling.
+ */
+export async function validateQueueSafe(
  vulnType: VulnType,
  sourceDir: string
-): Promise<SafeValidationResult> => {
+): Promise<SafeValidationResult> {
  try {
    const result = await validateQueueAndDeliverable(vulnType, sourceDir);
-    return { success: true, data: result };
+    return ok(result);
  } catch (error) {
-    return { success: false, error: error as PentestError };
+    return err(error as PentestError);
  }
-};
+}
@@ -0,0 +1,278 @@
+// Copyright (C) 2025 Keygraph, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Affero General Public License version 3
+// as published by the Free Software Foundation.
+
+/**
+ * Agent Execution Service
+ *
+ * Handles the full agent lifecycle:
+ * - Load config via ConfigLoaderService
+ * - Load prompt template using AGENTS[agentName].promptTemplate
+ * - Create git checkpoint
+ * - Start audit logging
+ * - Invoke Claude SDK via runClaudePrompt
+ * - Spending cap check using isSpendingCapBehavior
+ * - Handle failure (rollback, audit)
+ * - Validate output using AGENTS[agentName].deliverableFilename
+ * - Commit on success, log metrics
+ *
+ * No Temporal dependencies - pure domain logic.
+ */
+
+import chalk from 'chalk';
+
+import { Result, ok, err, isErr } from '../types/result.js';
+import { ErrorCode } from '../types/errors.js';
+import { PentestError } from '../error-handling.js';
+import { isSpendingCapBehavior } from '../utils/billing-detection.js';
+import { AGENTS } from '../session-manager.js';
+import { loadPrompt } from '../prompts/prompt-manager.js';
+import {
+  runClaudePrompt,
+  validateAgentOutput,
+  type ClaudePromptResult,
+} from '../ai/claude-executor.js';
+import {
+  createGitCheckpoint,
+  commitGitSuccess,
+  rollbackGitWorkspace,
+  getGitCommitHash,
+} from '../utils/git-manager.js';
+import { AuditSession } from '../audit/index.js';
+import type { AgentEndResult } from '../types/audit.js';
+import type { AgentName } from '../types/agents.js';
+import type { ConfigLoaderService } from './config-loader.js';
+import type { AgentMetrics } from '../types/metrics.js';
+
+/**
+ * Input for agent execution.
+ */
+export interface AgentExecutionInput {
+  webUrl: string;
+  repoPath: string;
+  configPath?: string | undefined;
+  pipelineTestingMode?: boolean | undefined;
+  attemptNumber: number;
+}
+
+/**
+ * Service for executing agents with full lifecycle management.
+ *
+ * NOTE: AuditSession is passed per-execution, NOT stored on the service.
+ * This is critical for parallel agent execution - each agent needs its own
+ * AuditSession instance because AuditSession uses instance state (currentAgentName)
+ * to track which agent is currently logging.
+ */
+export class AgentExecutionService {
+  private readonly configLoader: ConfigLoaderService;
+
+  constructor(configLoader: ConfigLoaderService) {
+    this.configLoader = configLoader;
+  }
+
+  /**
+   * Execute an agent with full lifecycle management.
+   *
+   * @param agentName - Name of the agent to execute
+   * @param input - Execution input parameters
+   * @param auditSession - Audit session for this specific agent execution
+   * @returns Result containing AgentEndResult on success, PentestError on failure
+   */
+  async execute(
+    agentName: AgentName,
+    input: AgentExecutionInput,
+    auditSession: AuditSession
+  ): Promise<Result<AgentEndResult, PentestError>> {
+    const { webUrl, repoPath, configPath, pipelineTestingMode = false, attemptNumber } = input;
+
+    // 1. Load config (if provided)
+    const configResult = await this.configLoader.loadOptional(configPath);
+    if (isErr(configResult)) {
+      return configResult;
+    }
+    const distributedConfig = configResult.value;
+
+    // 2. Load prompt
+    const promptTemplate = AGENTS[agentName].promptTemplate;
+    let prompt: string;
+    try {
+      prompt = await loadPrompt(
+        promptTemplate,
+        { webUrl, repoPath },
+        distributedConfig,
+        pipelineTestingMode
+      );
+    } catch (error) {
+      const errorMessage = error instanceof Error ? error.message : String(error);
+      return err(
+        new PentestError(
+          `Failed to load prompt for ${agentName}: ${errorMessage}`,
+          'prompt',
+          false,
+          { agentName, promptTemplate, originalError: errorMessage },
+          ErrorCode.PROMPT_LOAD_FAILED
+        )
+      );
+    }
+
+    // 3. Create git checkpoint before execution
+    try {
+      await createGitCheckpoint(repoPath, agentName, attemptNumber);
+    } catch (error) {
+      const errorMessage = error instanceof Error ? error.message : String(error);
+      return err(
+        new PentestError(
+          `Failed to create git checkpoint for ${agentName}: ${errorMessage}`,
+          'filesystem',
+          false,
+          { agentName, repoPath, originalError: errorMessage },
+          ErrorCode.GIT_CHECKPOINT_FAILED
+        )
+      );
+    }
+
+    // 4. Start audit logging
+    await auditSession.startAgent(agentName, prompt, attemptNumber);
+
+    // 5. Execute agent
+    const result: ClaudePromptResult = await runClaudePrompt(
+      prompt,
+      repoPath,
+      '', // context
+      agentName, // description
+      agentName,
+      chalk.cyan,
+      auditSession
+    );
+
+    // 6. Spending cap check - defense-in-depth
+    if (result.success && (result.turns ?? 0) <= 2 && (result.cost || 0) === 0) {
+      const resultText = result.result || '';
+      if (isSpendingCapBehavior(result.turns ?? 0, result.cost || 0, resultText)) {
+        await rollbackGitWorkspace(repoPath, 'spending cap detected');
+        const endResult: AgentEndResult = {
+          attemptNumber,
+          duration_ms: result.duration,
+          cost_usd: 0,
+          success: false,
+          model: result.model,
+          error: `Spending cap likely reached: ${resultText.slice(0, 100)}`,
+        };
+        await auditSession.endAgent(agentName, endResult);
+        return err(
+          new PentestError(
+            `Spending cap likely reached: ${resultText.slice(0, 100)}`,
+            'billing',
+            true, // Retryable with long backoff
+            { agentName, turns: result.turns, cost: result.cost },
+            ErrorCode.SPENDING_CAP_REACHED
+          )
+        );
+      }
+    }
+
+    // 7. Handle execution failure
+    if (!result.success) {
+      await rollbackGitWorkspace(repoPath, 'execution failure');
+      const endResult: AgentEndResult = {
+        attemptNumber,
+        duration_ms: result.duration,
+        cost_usd: result.cost || 0,
+        success: false,
+        model: result.model,
+        error: result.error || 'Execution failed',
+      };
+      await auditSession.endAgent(agentName, endResult);
+      return err(
+        new PentestError(
+          result.error || 'Agent execution failed',
+          'validation',
+          result.retryable ?? true,
+          { agentName, originalError: result.error },
+          ErrorCode.AGENT_EXECUTION_FAILED
+        )
+      );
+    }
+
+    // 8. Validate output
+    const validationPassed = await validateAgentOutput(result, agentName, repoPath);
+    if (!validationPassed) {
+      await rollbackGitWorkspace(repoPath, 'validation failure');
+      const endResult: AgentEndResult = {
+        attemptNumber,
+        duration_ms: result.duration,
+        cost_usd: result.cost || 0,
+        success: false,
+        model: result.model,
+        error: 'Output validation failed',
+      };
+      await auditSession.endAgent(agentName, endResult);
+      return err(
+        new PentestError(
+          `Agent ${agentName} failed output validation`,
+          'validation',
+          true, // Retryable - agent may succeed on retry
+          { agentName, deliverableFilename: AGENTS[agentName].deliverableFilename },
+          ErrorCode.OUTPUT_VALIDATION_FAILED
+        )
+      );
+    }
+
+    // 9. Success - commit deliverables, then capture checkpoint hash
+    await commitGitSuccess(repoPath, agentName);
+    const commitHash = await getGitCommitHash(repoPath);
+
+    const endResult: AgentEndResult = {
+      attemptNumber,
+      duration_ms: result.duration,
+      cost_usd: result.cost || 0,
+      success: true,
+      model: result.model,
+      ...(commitHash && { checkpoint: commitHash }),
+    };
+    await auditSession.endAgent(agentName, endResult);
+
+    return ok(endResult);
+  }
+
+  /**
+   * Execute an agent, throwing PentestError on failure.
+   *
+   * This is the preferred method for Temporal activities, which need to
+   * catch errors and classify them into ApplicationFailure. Avoids requiring
+   * activities to import Result utilities, keeping the boundary clean.
+   *
+   * @param agentName - Name of the agent to execute
+   * @param input - Execution input parameters
+   * @param auditSession - Audit session for this specific agent execution
+   * @returns AgentEndResult on success
+   * @throws PentestError on failure
+   */
+  async executeOrThrow(
+    agentName: AgentName,
+    input: AgentExecutionInput,
+    auditSession: AuditSession
+  ): Promise<AgentEndResult> {
+    const result = await this.execute(agentName, input, auditSession);
+    if (isErr(result)) {
+      throw result.error;
+    }
+    return result.value;
+  }
+
+  /**
+   * Convert AgentEndResult to AgentMetrics for workflow state.
+   */
+  static toMetrics(endResult: AgentEndResult, result: ClaudePromptResult): AgentMetrics {
+    return {
+      durationMs: endResult.duration_ms,
+      inputTokens: null, // Not currently exposed by SDK wrapper
+      outputTokens: null,
+      costUsd: endResult.cost_usd,
+      numTurns: result.turns ?? null,
+      model: result.model,
+    };
+  }
+}
@@ -0,0 +1,75 @@
+// Copyright (C) 2025 Keygraph, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Affero General Public License version 3
+// as published by the Free Software Foundation.
+
+/**
+ * Config Loader Service
+ *
+ * Wraps parseConfig + distributeConfig with Result type for explicit error handling.
+ * Pure service with no Temporal dependencies.
+ */
+
+import { parseConfig, distributeConfig } from '../config-parser.js';
+import { PentestError } from '../error-handling.js';
+import { Result, ok, err } from '../types/result.js';
+import { ErrorCode } from '../types/errors.js';
+import type { DistributedConfig } from '../types/config.js';
+
+/**
+ * Service for loading and distributing configuration files.
+ *
+ * Provides a Result-based API for explicit error handling,
+ * allowing callers to decide how to handle failures.
+ */
+export class ConfigLoaderService {
+  /**
+   * Load and distribute a configuration file.
+   *
+   * @param configPath - Path to the YAML configuration file
+   * @returns Result containing DistributedConfig on success, PentestError on failure
+   */
+  async load(configPath: string): Promise<Result<DistributedConfig, PentestError>> {
+    try {
+      const config = await parseConfig(configPath);
+      const distributed = distributeConfig(config);
+      return ok(distributed);
+    } catch (error) {
+      const errorMessage = error instanceof Error ? error.message : String(error);
+
+      // Determine appropriate error code based on error message
+      let errorCode = ErrorCode.CONFIG_PARSE_ERROR;
+      if (errorMessage.includes('not found') || errorMessage.includes('ENOENT')) {
+        errorCode = ErrorCode.CONFIG_NOT_FOUND;
+      } else if (errorMessage.includes('validation failed')) {
+        errorCode = ErrorCode.CONFIG_VALIDATION_FAILED;
+      }
+
+      return err(
+        new PentestError(
+          `Failed to load config ${configPath}: ${errorMessage}`,
+          'config',
+          false,
+          { configPath, originalError: errorMessage },
+          errorCode
+        )
+      );
+    }
+  }
+
+  /**
+   * Load config if path is provided, otherwise return null config.
+   *
+   * @param configPath - Optional path to the YAML configuration file
+   * @returns Result containing DistributedConfig (or null) on success, PentestError on failure
+   */
+  async loadOptional(
+    configPath: string | undefined
+  ): Promise<Result<DistributedConfig | null, PentestError>> {
+    if (!configPath) {
+      return ok(null);
+    }
+    return this.load(configPath);
+  }
+}
@@ -0,0 +1,117 @@
+// Copyright (C) 2025 Keygraph, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Affero General Public License version 3
+// as published by the Free Software Foundation.
+
+/**
+ * Dependency Injection Container
+ *
+ * Provides a per-workflow container for service instances.
+ * Services are wired with explicit constructor injection.
+ *
+ * Usage:
+ *   const container = getOrCreateContainer(workflowId, sessionMetadata);
+ *   const auditSession = new AuditSession(sessionMetadata);  // Per-agent
+ *   await auditSession.initialize(workflowId);
+ *   const result = await container.agentExecution.executeOrThrow(agentName, input, auditSession);
+ */
+
+import type { SessionMetadata } from '../audit/utils.js';
+import { AgentExecutionService } from './agent-execution.js';
+import { ConfigLoaderService } from './config-loader.js';
+import { ExploitationCheckerService } from './exploitation-checker.js';
+
+/**
+ * Dependencies required to create a Container.
+ *
+ * NOTE: AuditSession is NOT stored in the container.
+ * Each agent execution receives its own AuditSession instance
+ * because AuditSession uses instance state (currentAgentName) that
+ * cannot be shared across parallel agents.
+ */
+export interface ContainerDependencies {
+  readonly sessionMetadata: SessionMetadata;
+}
+
+/**
+ * DI Container for a single workflow.
+ *
+ * Holds all service instances for the workflow lifecycle.
+ * Services are instantiated once and reused across agent executions.
+ *
+ * NOTE: AuditSession is NOT stored here - it's passed per agent execution
+ * to support parallel agents each having their own logging context.
+ */
+export class Container {
+  readonly sessionMetadata: SessionMetadata;
+  readonly agentExecution: AgentExecutionService;
+  readonly configLoader: ConfigLoaderService;
+  readonly exploitationChecker: ExploitationCheckerService;
+
+  constructor(deps: ContainerDependencies) {
+    this.sessionMetadata = deps.sessionMetadata;
+
+    // Wire services with explicit constructor injection
+    this.configLoader = new ConfigLoaderService();
+    this.exploitationChecker = new ExploitationCheckerService();
+    this.agentExecution = new AgentExecutionService(this.configLoader);
+  }
+}
+
+/**
+ * Map of workflowId to Container instance.
+ * Each workflow gets its own container scoped to its lifecycle.
+ */
+const containers = new Map<string, Container>();
+
+/**
+ * Get or create a Container for a workflow.
+ *
+ * If a container already exists for the workflowId, returns it.
+ * Otherwise, creates a new container with the provided dependencies.
+ *
+ * @param workflowId - Unique workflow identifier
+ * @param sessionMetadata - Session metadata for audit paths
+ * @returns Container instance for the workflow
+ */
+export function getOrCreateContainer(
+  workflowId: string,
+  sessionMetadata: SessionMetadata
+): Container {
+  let container = containers.get(workflowId);
+
+  if (!container) {
+    container = new Container({ sessionMetadata });
+    containers.set(workflowId, container);
+  }
+
+  return container;
+}
+
+/**
+ * Remove a Container when a workflow completes.
+ *
+ * Should be called in logWorkflowComplete to clean up resources.
+ *
+ * @param workflowId - Unique workflow identifier
+ */
+export function removeContainer(workflowId: string): void {
+  containers.delete(workflowId);
+}
+
+/**
+ * Get an existing Container for a workflow, if one exists.
+ *
+ * Unlike getOrCreateContainer, this does NOT create a new container.
+ * Returns undefined if no container exists for the workflowId.
+ *
+ * Useful for lightweight activities that can benefit from an existing
+ * container but don't need to create one.
+ *
+ * @param workflowId - Unique workflow identifier
+ * @returns Container instance or undefined
+ */
+export function getContainer(workflowId: string): Container | undefined {
+  return containers.get(workflowId);
+}
@@ -0,0 +1,74 @@
+// Copyright (C) 2025 Keygraph, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Affero General Public License version 3
+// as published by the Free Software Foundation.
+
+/**
+ * Exploitation Checker Service
+ *
+ * Pure domain logic for determining whether exploitation should run.
+ * Reads queue file, parses JSON, returns decision.
+ *
+ * No Temporal dependencies - this is pure business logic.
+ */
+
+import chalk from 'chalk';
+import {
+  validateQueueSafe,
+  type VulnType,
+  type ExploitationDecision,
+} from '../queue-validation.js';
+import { isOk } from '../types/result.js';
+
+/**
+ * Service for checking exploitation queue decisions.
+ *
+ * Determines whether an exploit agent should run based on
+ * the vulnerability analysis deliverables and queue files.
+ */
+export class ExploitationCheckerService {
+  /**
+   * Check if exploitation should run for a given vulnerability type.
+   *
+   * Reads the vulnerability queue file and returns the decision.
+   * This is pure domain logic - reads queue file, parses JSON, returns decision.
+   *
+   * @param vulnType - Type of vulnerability (injection, xss, auth, ssrf, authz)
+   * @param repoPath - Path to the repository containing deliverables
+   * @returns ExploitationDecision indicating whether to exploit
+   * @throws PentestError if validation fails and is retryable
+   */
+  async checkQueue(vulnType: VulnType, repoPath: string): Promise<ExploitationDecision> {
+    const result = await validateQueueSafe(vulnType, repoPath);
+
+    if (isOk(result)) {
+      const decision = result.value;
+      console.log(
+        chalk.blue(
+          `  ${vulnType}: ${decision.shouldExploit ? `${decision.vulnerabilityCount} vulnerabilities found` : 'no vulnerabilities, skipping exploitation'}`
+        )
+      );
+      return decision;
+    }
+
+    // Validation failed - check if we should retry or skip
+    const error = result.error;
+    if (error.retryable) {
+      // Re-throw retryable errors so caller can handle retry
+      console.log(chalk.yellow(`  ${vulnType}: ${error.message} (retryable)`));
+      throw error;
+    }
+
+    // Non-retryable error - skip exploitation gracefully
+    console.log(
+      chalk.yellow(`  ${vulnType}: ${error.message}, skipping exploitation`)
+    );
+    return {
+      shouldExploit: false,
+      shouldRetry: false,
+      vulnerabilityCount: 0,
+      vulnType,
+    };
+  }
+}
@@ -0,0 +1,20 @@
+// Copyright (C) 2025 Keygraph, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Affero General Public License version 3
+// as published by the Free Software Foundation.
+
+/**
+ * Services Module
+ *
+ * Exports DI container and service classes for Shannon agent execution.
+ * Services are pure domain logic with no Temporal dependencies.
+ */
+
+export { Container, getOrCreateContainer, removeContainer } from './container.js';
+export type { ContainerDependencies } from './container.js';
+
+export { ConfigLoaderService } from './config-loader.js';
+export { ExploitationCheckerService } from './exploitation-checker.js';
+export { AgentExecutionService } from './agent-execution.js';
+export type { AgentExecutionInput } from './agent-execution.js';
@@ -7,72 +7,99 @@
 import type { AgentName, AgentDefinition } from './types/index.js';

 // Agent definitions according to PRD
+// NOTE: deliverableFilename values must match mcp-server/src/types/deliverables.ts:DELIVERABLE_FILENAMES
 export const AGENTS: Readonly<Record<AgentName, AgentDefinition>> = Object.freeze({
  'pre-recon': {
    name: 'pre-recon',
    displayName: 'Pre-recon agent',
-    prerequisites: []
+    prerequisites: [],
+    promptTemplate: 'pre-recon-code',
+    deliverableFilename: 'code_analysis_deliverable.md',
  },
  'recon': {
    name: 'recon',
    displayName: 'Recon agent',
-    prerequisites: ['pre-recon']
+    prerequisites: ['pre-recon'],
+    promptTemplate: 'recon',
+    deliverableFilename: 'recon_deliverable.md',
  },
  'injection-vuln': {
    name: 'injection-vuln',
    displayName: 'Injection vuln agent',
-    prerequisites: ['recon']
+    prerequisites: ['recon'],
+    promptTemplate: 'vuln-injection',
+    deliverableFilename: 'injection_analysis_deliverable.md',
  },
  'xss-vuln': {
    name: 'xss-vuln',
    displayName: 'XSS vuln agent',
-    prerequisites: ['recon']
+    prerequisites: ['recon'],
+    promptTemplate: 'vuln-xss',
+    deliverableFilename: 'xss_analysis_deliverable.md',
  },
  'auth-vuln': {
    name: 'auth-vuln',
    displayName: 'Auth vuln agent',
-    prerequisites: ['recon']
+    prerequisites: ['recon'],
+    promptTemplate: 'vuln-auth',
+    deliverableFilename: 'auth_analysis_deliverable.md',
  },
  'ssrf-vuln': {
    name: 'ssrf-vuln',
    displayName: 'SSRF vuln agent',
-    prerequisites: ['recon']
+    prerequisites: ['recon'],
+    promptTemplate: 'vuln-ssrf',
+    deliverableFilename: 'ssrf_analysis_deliverable.md',
  },
  'authz-vuln': {
    name: 'authz-vuln',
    displayName: 'Authz vuln agent',
-    prerequisites: ['recon']
+    prerequisites: ['recon'],
+    promptTemplate: 'vuln-authz',
+    deliverableFilename: 'authz_analysis_deliverable.md',
  },
  'injection-exploit': {
    name: 'injection-exploit',
    displayName: 'Injection exploit agent',
-    prerequisites: ['injection-vuln']
+    prerequisites: ['injection-vuln'],
+    promptTemplate: 'exploit-injection',
+    deliverableFilename: 'injection_exploitation_evidence.md',
  },
  'xss-exploit': {
    name: 'xss-exploit',
    displayName: 'XSS exploit agent',
-    prerequisites: ['xss-vuln']
+    prerequisites: ['xss-vuln'],
+    promptTemplate: 'exploit-xss',
+    deliverableFilename: 'xss_exploitation_evidence.md',
  },
  'auth-exploit': {
    name: 'auth-exploit',
    displayName: 'Auth exploit agent',
-    prerequisites: ['auth-vuln']
+    prerequisites: ['auth-vuln'],
+    promptTemplate: 'exploit-auth',
+    deliverableFilename: 'auth_exploitation_evidence.md',
  },
  'ssrf-exploit': {
    name: 'ssrf-exploit',
    displayName: 'SSRF exploit agent',
-    prerequisites: ['ssrf-vuln']
+    prerequisites: ['ssrf-vuln'],
+    promptTemplate: 'exploit-ssrf',
+    deliverableFilename: 'ssrf_exploitation_evidence.md',
  },
  'authz-exploit': {
    name: 'authz-exploit',
    displayName: 'Authz exploit agent',
-    prerequisites: ['authz-vuln']
+    prerequisites: ['authz-vuln'],
+    promptTemplate: 'exploit-authz',
+    deliverableFilename: 'authz_exploitation_evidence.md',
  },
  'report': {
    name: 'report',
    displayName: 'Report agent',
-    prerequisites: ['injection-exploit', 'xss-exploit', 'auth-exploit', 'ssrf-exploit', 'authz-exploit']
-  }
+    prerequisites: ['injection-exploit', 'xss-exploit', 'auth-exploit', 'ssrf-exploit', 'authz-exploit'],
+    promptTemplate: 'report-executive',
+    deliverableFilename: 'comprehensive_security_assessment_report.md',
+  },
 });

 // Phase names for metrics aggregation
@@ -7,28 +7,57 @@
 /**
 * Temporal activities for Shannon agent execution.
 *
- * Each activity wraps a single agent execution with:
+ * Each activity wraps service calls with Temporal-specific concerns:
 * - Heartbeat loop (2s interval) to signal worker liveness
- * - Git checkpoint/rollback/commit per attempt
- * - Error classification for Temporal retry behavior
- * - Audit session logging
+ * - Error classification into ApplicationFailure
+ * - Container lifecycle management
 *
- * Temporal handles retries based on error classification:
- * - Retryable: BillingError, TransientError (429, 5xx, network)
- * - Non-retryable: AuthenticationError, PermissionError, ConfigurationError, etc.
+ * Business logic is delegated to services in src/services/.
 */

 import { heartbeat, ApplicationFailure, Context } from '@temporalio/activity';
 import chalk from 'chalk';
+import path from 'path';
+import fs from 'fs/promises';
+
+import { classifyErrorForTemporal, PentestError } from '../error-handling.js';
+import { ErrorCode } from '../types/errors.js';
+import { getOrCreateContainer, getContainer, removeContainer } from '../services/container.js';
+import { ExploitationCheckerService } from '../services/exploitation-checker.js';
+import type { VulnType, ExploitationDecision } from '../queue-validation.js';
+import { AuditSession } from '../audit/index.js';
+import type { WorkflowSummary } from '../audit/workflow-logger.js';
+import type { AgentName } from '../types/agents.js';
+import { ALL_AGENTS } from '../types/agents.js';
+import type { AgentMetrics, ResumeState } from './shared.js';
+import { copyDeliverablesToAudit, type SessionMetadata, readJson, fileExists } from '../audit/utils.js';
+import { assembleFinalReport, injectModelIntoReport } from '../phases/reporting.js';
+import { AGENTS } from '../session-manager.js';
+import { executeGitCommandWithRetry } from '../utils/git-manager.js';
+import type { ResumeAttempt } from '../audit/metrics-tracker.js';

 // Max lengths to prevent Temporal protobuf buffer overflow
 const MAX_ERROR_MESSAGE_LENGTH = 2000;
 const MAX_STACK_TRACE_LENGTH = 1000;

 // Max retries for output validation errors (agent didn't save deliverables)
-// Lower than default 50 since this is unlikely to self-heal
 const MAX_OUTPUT_VALIDATION_RETRIES = 3;

+const HEARTBEAT_INTERVAL_MS = 2000;
+
+/**
+ * Input for all agent activities.
+ */
+export interface ActivityInput {
+  webUrl: string;
+  repoPath: string;
+  configPath?: string;
+  outputPath?: string;
+  pipelineTestingMode?: boolean;
+  workflowId: string;
+  sessionId: string;
+}
+
 /**
 * Truncate error message to prevent buffer overflow in Temporal serialization.
 */
@@ -48,85 +77,34 @@ function truncateStackTrace(failure: ApplicationFailure): void {
  }
 }

-import {
-  runClaudePrompt,
-  validateAgentOutput,
-  type ClaudePromptResult,
-} from '../ai/claude-executor.js';
-import { loadPrompt } from '../prompts/prompt-manager.js';
-import { parseConfig, distributeConfig } from '../config-parser.js';
-import { classifyErrorForTemporal } from '../error-handling.js';
-import {
-  safeValidateQueueAndDeliverable,
-  type VulnType,
-  type ExploitationDecision,
-} from '../queue-validation.js';
-import {
-  createGitCheckpoint,
-  commitGitSuccess,
-  rollbackGitWorkspace,
-  getGitCommitHash,
-} from '../utils/git-manager.js';
-import { assembleFinalReport, injectModelIntoReport } from '../phases/reporting.js';
-import { getPromptNameForAgent } from '../types/agents.js';
-import { AuditSession } from '../audit/index.js';
-import type { WorkflowSummary } from '../audit/workflow-logger.js';
-import type { AgentName } from '../types/agents.js';
-import { getDeliverablePath, ALL_AGENTS } from '../types/agents.js';
-import type { AgentMetrics, ResumeState } from './shared.js';
-import type { DistributedConfig } from '../types/config.js';
-import { copyDeliverablesToAudit, type SessionMetadata, readJson, fileExists } from '../audit/utils.js';
-import type { ResumeAttempt } from '../audit/metrics-tracker.js';
-import { executeGitCommandWithRetry } from '../utils/git-manager.js';
-import path from 'path';
-import fs from 'fs/promises';
-
-const HEARTBEAT_INTERVAL_MS = 2000; // Must be < heartbeatTimeout (10min production, 5min testing)
-
 /**
- * Input for all agent activities.
- * Matches PipelineInput but with required workflowId for audit correlation.
+ * Build SessionMetadata from ActivityInput.
 */
-export interface ActivityInput {
-  webUrl: string;
-  repoPath: string;
-  configPath?: string;
-  outputPath?: string;
-  pipelineTestingMode?: boolean;
-  workflowId: string;
-  sessionId: string; // Workspace name (for resume) or workflowId (for new runs)
+function buildSessionMetadata(input: ActivityInput): SessionMetadata {
+  const { webUrl, repoPath, outputPath, sessionId } = input;
+  return {
+    id: sessionId,
+    webUrl,
+    repoPath,
+    ...(outputPath && { outputPath }),
+  };
 }

 /**
- * Core activity implementation.
+ * Core activity implementation using services.
 *
 * Executes a single agent with:
 * 1. Heartbeat loop for worker liveness
- * 2. Config loading (if configPath provided)
- * 3. Audit session initialization
- * 4. Prompt loading
- * 5. Git checkpoint before execution
- * 6. Agent execution (single attempt)
- * 7. Output validation
- * 8. Git commit on success, rollback on failure
- * 9. Error classification for Temporal retry
+ * 2. Container creation/reuse
+ * 3. Service-based agent execution
+ * 4. Error classification for Temporal retry
 */
 async function runAgentActivity(
  agentName: AgentName,
  input: ActivityInput
 ): Promise<AgentMetrics> {
-  const {
-    webUrl,
-    repoPath,
-    configPath,
-    outputPath,
-    pipelineTestingMode = false,
-    workflowId,
-  } = input;
-
+  const { repoPath, configPath, pipelineTestingMode = false, workflowId, webUrl } = input;
  const startTime = Date.now();
-
-  // Get attempt number from Temporal context (tracks retries automatically)
  const attemptNumber = Context.current().info.attempt;

  // Heartbeat loop - signals worker is alive to Temporal server
@@ -136,158 +114,63 @@ async function runAgentActivity(
  }, HEARTBEAT_INTERVAL_MS);

  try {
-    // 1. Load config (if provided)
-    let distributedConfig: DistributedConfig | null = null;
-    if (configPath) {
-      try {
-        const config = await parseConfig(configPath);
-        distributedConfig = distributeConfig(config);
-      } catch (err) {
-        throw new Error(`Failed to load config ${configPath}: ${err instanceof Error ? err.message : String(err)}`);
-      }
-    }
+    // Build session metadata and get/create container
+    const sessionMetadata = buildSessionMetadata(input);
+    const container = getOrCreateContainer(workflowId, sessionMetadata);

-    // 2. Build session metadata for audit
-    // Use sessionId (workspace name) for directory, workflowId for tracking
-    const sessionMetadata: SessionMetadata = {
-      id: input.sessionId,
-      webUrl,
-      repoPath,
-      ...(outputPath && { outputPath }),
-    };
-
-    // 3. Initialize audit session (idempotent, safe across retries)
+    // Create audit session for THIS agent execution
+    // NOTE: Each agent needs its own AuditSession because AuditSession uses
+    // instance state (currentAgentName) that cannot be shared across parallel agents
    const auditSession = new AuditSession(sessionMetadata);
    await auditSession.initialize(workflowId);

-    // 4. Load prompt
-    const promptName = getPromptNameForAgent(agentName);
-    const prompt = await loadPrompt(
-      promptName,
-      { webUrl, repoPath },
-      distributedConfig,
-      pipelineTestingMode
-    );
-
-    // 5. Create git checkpoint before execution
-    await createGitCheckpoint(repoPath, agentName, attemptNumber);
-    await auditSession.startAgent(agentName, prompt, attemptNumber);
-
-    // 6. Execute agent (single attempt - Temporal handles retries)
-    const result: ClaudePromptResult = await runClaudePrompt(
-      prompt,
-      repoPath,
-      '', // context
-      agentName, // description
+    // Execute agent via service (throws PentestError on failure)
+    const endResult = await container.agentExecution.executeOrThrow(
      agentName,
-      chalk.cyan,
+      {
+        webUrl,
+        repoPath,
+        configPath,
+        pipelineTestingMode,
+        attemptNumber,
+      },
      auditSession
    );

-    // 6.5. Sanity check: Detect spending cap that slipped through all detection layers
-    // Defense-in-depth: A successful agent execution should never have ≤2 turns with $0 cost
-    if (result.success && (result.turns ?? 0) <= 2 && (result.cost || 0) === 0) {
-      const resultText = result.result || '';
-      const looksLikeBillingError = /spending|cap|limit|budget|resets/i.test(resultText);
-
-      if (looksLikeBillingError) {
-        await rollbackGitWorkspace(repoPath, 'spending cap detected');
-        await auditSession.endAgent(agentName, {
-          attemptNumber,
-          duration_ms: result.duration,
-          cost_usd: 0,
-          success: false,
-          model: result.model,
-          error: `Spending cap likely reached: ${resultText.slice(0, 100)}`,
-        });
-        // Throw as billing error so Temporal retries with long backoff
-        throw new Error(`Spending cap likely reached: ${resultText.slice(0, 100)}`);
-      }
-    }
-
-    // 7. Handle execution failure
-    if (!result.success) {
-      await rollbackGitWorkspace(repoPath, 'execution failure');
-      await auditSession.endAgent(agentName, {
-        attemptNumber,
-        duration_ms: result.duration,
-        cost_usd: result.cost || 0,
-        success: false,
-        model: result.model,
-        error: result.error || 'Execution failed',
-      });
-      throw new Error(result.error || 'Agent execution failed');
-    }
-
-    // 8. Validate output
-    const validationPassed = await validateAgentOutput(result, agentName, repoPath);
-    if (!validationPassed) {
-      await rollbackGitWorkspace(repoPath, 'validation failure');
-      await auditSession.endAgent(agentName, {
-        attemptNumber,
-        duration_ms: result.duration,
-        cost_usd: result.cost || 0,
-        success: false,
-        model: result.model,
-        error: 'Output validation failed',
-      });
-
-      // Limit output validation retries (unlikely to self-heal)
-      if (attemptNumber >= MAX_OUTPUT_VALIDATION_RETRIES) {
-        throw ApplicationFailure.nonRetryable(
-          `Agent ${agentName} failed output validation after ${attemptNumber} attempts`,
-          'OutputValidationError',
-          [{ agentName, attemptNumber, elapsed: Date.now() - startTime }]
-        );
-      }
-      // Let Temporal retry (will be classified as OutputValidationError)
-      throw new Error(`Agent ${agentName} failed output validation`);
-    }
-
-    // 9. Success - commit deliverables, then capture checkpoint hash
-    await commitGitSuccess(repoPath, agentName);
-    const commitHash = await getGitCommitHash(repoPath);
-    await auditSession.endAgent(agentName, {
-      attemptNumber,
-      duration_ms: result.duration,
-      cost_usd: result.cost || 0,
-      success: true,
-      model: result.model,
-      ...(commitHash && { checkpoint: commitHash }),
-    });
-
-    // 10. Return metrics
+    // Success - return metrics
    return {
      durationMs: Date.now() - startTime,
-      inputTokens: null, // Not currently exposed by SDK wrapper
+      inputTokens: null,
      outputTokens: null,
-      costUsd: result.cost ?? null,
-      numTurns: result.turns ?? null,
-      model: result.model,
+      costUsd: endResult.cost_usd,
+      numTurns: null,
+      model: endResult.model,
    };
  } catch (error) {
-    // Rollback git workspace before Temporal retry to ensure clean state
-    try {
-      await rollbackGitWorkspace(repoPath, 'error recovery');
-    } catch (rollbackErr) {
-      // Log but don't fail - rollback is best-effort
-      console.error(`Failed to rollback git workspace for ${agentName}:`, rollbackErr);
-    }
-
-    // If error is already an ApplicationFailure (e.g., from our retry limit logic),
-    // re-throw it directly without re-classifying
+    // If error is already an ApplicationFailure, re-throw directly
    if (error instanceof ApplicationFailure) {
      throw error;
    }

+    // Check if output validation retry limit reached (PentestError with code)
+    if (
+      error instanceof PentestError &&
+      error.code === ErrorCode.OUTPUT_VALIDATION_FAILED &&
+      attemptNumber >= MAX_OUTPUT_VALIDATION_RETRIES
+    ) {
+      throw ApplicationFailure.nonRetryable(
+        `Agent ${agentName} failed output validation after ${attemptNumber} attempts`,
+        'OutputValidationError',
+        [{ agentName, attemptNumber, elapsed: Date.now() - startTime }]
+      );
+    }
+
    // Classify error for Temporal retry behavior
    const classified = classifyErrorForTemporal(error);
-    // Truncate message to prevent protobuf buffer overflow
    const rawMessage = error instanceof Error ? error.message : String(error);
    const message = truncateErrorMessage(rawMessage);

    if (classified.retryable) {
-      // Temporal will retry with configured backoff
      const failure = ApplicationFailure.create({
        message,
        type: classified.type,
@@ -296,7 +179,6 @@ async function runAgentActivity(
      truncateStackTrace(failure);
      throw failure;
    } else {
-      // Fail immediately - no retry
      const failure = ApplicationFailure.nonRetryable(message, classified.type, [
        { agentName, attemptNumber, elapsed: Date.now() - startTime },
      ]);
@@ -309,7 +191,6 @@ async function runAgentActivity(
 }

 // === Individual Agent Activity Exports ===
-// Each function is a thin wrapper around runAgentActivity with the agent name.

 export async function runPreReconAgent(input: ActivityInput): Promise<AgentMetrics> {
  return runAgentActivity('pre-recon', input);
@@ -363,25 +244,24 @@ export async function runReportAgent(input: ActivityInput): Promise<AgentMetrics
  return runAgentActivity('report', input);
 }

+// === Report Assembly Activities ===
+
 /**
 * Assemble the final report by concatenating exploitation evidence files.
- * This must be called BEFORE runReportAgent to create the file that the report agent will modify.
 */
 export async function assembleReportActivity(input: ActivityInput): Promise<void> {
  const { repoPath } = input;
-  console.log(chalk.blue('📝 Assembling deliverables from specialist agents...'));
+  console.log(chalk.blue('  Assembling deliverables from specialist agents...'));
  try {
    await assembleFinalReport(repoPath);
  } catch (error) {
    const err = error as Error;
-    console.log(chalk.yellow(`⚠️ Error assembling final report: ${err.message}`));
-    // Don't throw - the report agent can still create content even if no exploitation files exist
+    console.log(chalk.yellow(`  Warning: Error assembling final report: ${err.message}`));
  }
 }

 /**
 * Inject model metadata into the final report.
- * This must be called AFTER runReportAgent to add the model information to the Executive Summary.
 */
 export async function injectReportMetadataActivity(input: ActivityInput): Promise<void> {
  const { repoPath, sessionId, outputPath } = input;
@@ -392,65 +272,33 @@ export async function injectReportMetadataActivity(input: ActivityInput): Promis
    await injectModelIntoReport(repoPath, effectiveOutputPath);
  } catch (error) {
    const err = error as Error;
-    console.log(chalk.yellow(`⚠️ Error injecting model into report: ${err.message}`));
-    // Don't throw - this is a non-critical enhancement
+    console.log(chalk.yellow(`  Warning: Error injecting model into report: ${err.message}`));
  }
 }

+// === Exploitation Queue Check ===
+
 /**
 * Check if exploitation should run for a given vulnerability type.
- * Reads the vulnerability queue file and returns the decision.
 *
- * This activity allows the workflow to skip exploit agents entirely
- * when no vulnerabilities were found, saving API calls and time.
- *
- * Error handling:
- * - Retryable errors (missing files, invalid JSON): re-throw for Temporal retry
- * - Non-retryable errors: skip exploitation gracefully
+ * Uses existing container if available (from prior agent runs),
+ * otherwise creates service directly (stateless, no dependencies).
 */
 export async function checkExploitationQueue(
  input: ActivityInput,
  vulnType: VulnType
 ): Promise<ExploitationDecision> {
-  const { repoPath } = input;
+  const { repoPath, workflowId } = input;

-  const result = await safeValidateQueueAndDeliverable(vulnType, repoPath);
+  // Reuse container's service if available (from prior vuln agent runs)
+  const existingContainer = getContainer(workflowId);
+  const checker = existingContainer?.exploitationChecker ?? new ExploitationCheckerService();

-  if (result.success && result.data) {
-    const { shouldExploit, vulnerabilityCount } = result.data;
-    console.log(
-      chalk.blue(
-        `🔍 ${vulnType}: ${shouldExploit ? `${vulnerabilityCount} vulnerabilities found` : 'no vulnerabilities, skipping exploitation'}`
-      )
-    );
-    return result.data;
-  }
-
-  // Validation failed - check if we should retry or skip
-  const error = result.error;
-  if (error?.retryable) {
-    // Re-throw retryable errors so Temporal can retry the vuln agent
-    console.log(chalk.yellow(`⚠️ ${vulnType}: ${error.message} (retrying)`));
-    throw error;
-  }
-
-  // Non-retryable error - skip exploitation gracefully
-  console.log(
-    chalk.yellow(`⚠️ ${vulnType}: ${error?.message ?? 'Unknown error'}, skipping exploitation`)
-  );
-  return {
-    shouldExploit: false,
-    shouldRetry: false,
-    vulnerabilityCount: 0,
-    vulnType,
-  };
+  return checker.checkQueue(vulnType, repoPath);
 }

 // === Resume Activities ===

-/**
- * Session.json structure for resume state loading
- */
 interface SessionJson {
  session: {
    id: string;
@@ -460,18 +308,18 @@ interface SessionJson {
    resumeAttempts?: ResumeAttempt[];
  };
  metrics: {
-    agents: Record<string, {
-      status: 'in-progress' | 'success' | 'failed';
-      checkpoint?: string;
-    }>;
+    agents: Record<
+      string,
+      {
+        status: 'in-progress' | 'success' | 'failed';
+        checkpoint?: string;
+      }
+    >;
  };
 }

 /**
 * Load resume state from an existing workspace.
- * Validates workspace exists, URL matches, and determines which agents to skip.
- *
- * @throws ApplicationFailure.nonRetryable if workspace not found or URL mismatch
 */
 export async function loadResumeState(
  workspaceName: string,
@@ -480,7 +328,6 @@ export async function loadResumeState(
 ): Promise<ResumeState> {
  const sessionPath = path.join('./audit-logs', workspaceName, 'session.json');

-  // Validate workspace exists
  const exists = await fileExists(sessionPath);
  if (!exists) {
    throw ApplicationFailure.nonRetryable(
@@ -489,7 +336,6 @@ export async function loadResumeState(
    );
  }

-  // Load session.json
  let session: SessionJson;
  try {
    session = await readJson<SessionJson>(sessionPath);
@@ -501,7 +347,6 @@ export async function loadResumeState(
    );
  }

-  // Validate URL matches
  if (session.session.webUrl !== expectedUrl) {
    throw ApplicationFailure.nonRetryable(
      `URL mismatch with workspace\n  Workspace URL: ${session.session.webUrl}\n  Provided URL:  ${expectedUrl}`,
@@ -509,20 +354,17 @@ export async function loadResumeState(
    );
  }

-  // Find completed agents (status === 'success' AND deliverable exists)
  const completedAgents: string[] = [];
  const agents = session.metrics.agents;

  for (const agentName of ALL_AGENTS) {
    const agentData = agents[agentName];
-
-    // Skip if agent never ran or didn't succeed
    if (!agentData || agentData.status !== 'success') {
      continue;
    }

-    // Validate deliverable exists
-    const deliverablePath = getDeliverablePath(agentName, expectedRepoPath);
+    const deliverableFilename = AGENTS[agentName].deliverableFilename;
+    const deliverablePath = `${expectedRepoPath}/deliverables/${deliverableFilename}`;
    const deliverableExists = await fileExists(deliverablePath);

    if (!deliverableExists) {
@@ -532,11 +374,9 @@ export async function loadResumeState(
      continue;
    }

-    // Agent completed successfully and deliverable exists
    completedAgents.push(agentName);
  }

-  // Find latest checkpoint from completed agents
  const checkpoints = completedAgents
    .map((name) => agents[name]?.checkpoint)
    .filter((hash): hash is string => hash != null);
@@ -548,18 +388,16 @@ export async function loadResumeState(

    throw ApplicationFailure.nonRetryable(
      `Cannot resume workspace ${workspaceName}: ` +
-      (successAgents.length > 0
-        ? `${successAgents.length} agent(s) show success in session.json (${successAgents.join(', ')}) ` +
-          `but their deliverable files are missing from disk. ` +
-          `Start a fresh run instead.`
-        : `No agents completed successfully. Start a fresh run instead.`),
+        (successAgents.length > 0
+          ? `${successAgents.length} agent(s) show success in session.json (${successAgents.join(', ')}) ` +
+            `but their deliverable files are missing from disk. ` +
+            `Start a fresh run instead.`
+          : `No agents completed successfully. Start a fresh run instead.`),
      'NoCheckpointsError'
    );
  }

-  // Find most recent commit among checkpoints
  const checkpointHash = await findLatestCommit(expectedRepoPath, checkpoints);
-
  const originalWorkflowId = session.session.originalWorkflowId || session.session.id;

  console.log(chalk.cyan(`=== RESUME STATE ===`));
@@ -576,20 +414,21 @@ export async function loadResumeState(
  };
 }

-/**
- * Find the most recent commit among a list of commit hashes.
- * Uses git rev-list to determine which commit is newest.
- */
 async function findLatestCommit(repoPath: string, commitHashes: string[]): Promise<string> {
  if (commitHashes.length === 1) {
    const hash = commitHashes[0];
    if (!hash) {
-      throw new Error('Empty commit hash in array');
+      throw new PentestError(
+        'Empty commit hash in array',
+        'filesystem',
+        false, // Non-retryable - corrupt workspace state
+        { phase: 'resume' },
+        ErrorCode.GIT_CHECKPOINT_FAILED
+      );
    }
    return hash;
  }

-  // Use git rev-list to find the most recent commit among all hashes
  const result = await executeGitCommandWithRetry(
    ['git', 'rev-list', '--max-count=1', ...commitHashes],
    repoPath,
@@ -601,10 +440,6 @@ async function findLatestCommit(repoPath: string, commitHashes: string[]): Promi

 /**
 * Restore git workspace to a checkpoint and clean up partial deliverables.
- *
- * @param repoPath - Repository path
- * @param checkpointHash - Git commit hash to reset to
- * @param incompleteAgents - Agents that didn't complete (will have deliverables cleaned up)
 */
 export async function restoreGitCheckpoint(
  repoPath: string,
@@ -613,8 +448,6 @@ export async function restoreGitCheckpoint(
 ): Promise<void> {
  console.log(chalk.blue(`Restoring git workspace to ${checkpointHash}...`));

-  // Checkpoint hash points to the success commit (after commitGitSuccess),
-  // so git reset --hard naturally preserves all completed agent deliverables.
  await executeGitCommandWithRetry(
    ['git', 'reset', '--hard', checkpointHash],
    repoPath,
@@ -626,9 +459,9 @@ export async function restoreGitCheckpoint(
    'clean untracked files for resume'
  );

-  // Clean up any partial deliverables from incomplete agents
  for (const agentName of incompleteAgents) {
-    const deliverablePath = getDeliverablePath(agentName, repoPath);
+    const deliverableFilename = AGENTS[agentName].deliverableFilename;
+    const deliverablePath = `${repoPath}/deliverables/${deliverableFilename}`;
    try {
      const exists = await fileExists(deliverablePath);
      if (exists) {
@@ -645,48 +478,31 @@ export async function restoreGitCheckpoint(

 /**
 * Record a resume attempt in session.json.
- * Tracks the new workflow ID, terminated workflows, and checkpoint hash.
 */
 export async function recordResumeAttempt(
  input: ActivityInput,
  terminatedWorkflows: string[],
  checkpointHash: string
 ): Promise<void> {
-  const { webUrl, repoPath, outputPath, sessionId, workflowId } = input;
-
-  const sessionMetadata: SessionMetadata = {
-    id: sessionId,
-    webUrl,
-    repoPath,
-    ...(outputPath && { outputPath }),
-  };
-
+  const sessionMetadata = buildSessionMetadata(input);
  const auditSession = new AuditSession(sessionMetadata);
  await auditSession.initialize();
-
-  await auditSession.addResumeAttempt(workflowId, terminatedWorkflows, checkpointHash);
+  await auditSession.addResumeAttempt(input.workflowId, terminatedWorkflows, checkpointHash);
 }

+// === Phase Transition Activities ===
+
 /**
 * Log phase transition to the unified workflow log.
- * Called at phase boundaries for per-workflow logging.
 */
 export async function logPhaseTransition(
  input: ActivityInput,
  phase: string,
  event: 'start' | 'complete'
 ): Promise<void> {
-  const { webUrl, repoPath, outputPath, sessionId, workflowId } = input;
-
-  const sessionMetadata: SessionMetadata = {
-    id: sessionId,
-    webUrl,
-    repoPath,
-    ...(outputPath && { outputPath }),
-  };
-
+  const sessionMetadata = buildSessionMetadata(input);
  const auditSession = new AuditSession(sessionMetadata);
-  await auditSession.initialize(workflowId);
+  await auditSession.initialize(input.workflowId);

  if (event === 'start') {
    await auditSession.logPhaseStart(phase);
@@ -696,28 +512,22 @@ export async function logPhaseTransition(
 }

 /**
- * Log workflow completion with full summary to the unified workflow log.
- * Called at the end of the workflow to write a summary breakdown.
+ * Log workflow completion with full summary.
+ * Cleans up container when done.
 */
 export async function logWorkflowComplete(
  input: ActivityInput,
  summary: WorkflowSummary
 ): Promise<void> {
-  const { webUrl, repoPath, outputPath, sessionId, workflowId } = input;
-
-  const sessionMetadata: SessionMetadata = {
-    id: sessionId,
-    webUrl,
-    repoPath,
-    ...(outputPath && { outputPath }),
-  };
+  const { repoPath, workflowId } = input;
+  const sessionMetadata = buildSessionMetadata(input);

  const auditSession = new AuditSession(sessionMetadata);
  await auditSession.initialize(workflowId);
  await auditSession.updateSessionStatus(summary.status);

-  // Use cumulative metrics from session.json (includes all resume attempts)
-  const sessionData = await auditSession.getMetrics() as {
+  // Use cumulative metrics from session.json
+  const sessionData = (await auditSession.getMetrics()) as {
    metrics: {
      total_duration_ms: number;
      total_cost_usd: number;
@@ -725,7 +535,7 @@ export async function logWorkflowComplete(
    };
  };

-  // Fill in metrics for skipped agents (completed in previous runs)
+  // Fill in metrics for skipped agents
  const agentMetrics = { ...summary.agentMetrics };
  for (const agentName of summary.completedAgents) {
    if (!agentMetrics[agentName]) {
@@ -747,10 +557,13 @@ export async function logWorkflowComplete(
  };
  await auditSession.logWorkflowComplete(cumulativeSummary);

-  // Copy all deliverables to audit-logs once at workflow end (non-fatal)
+  // Copy deliverables to audit-logs
  try {
    await copyDeliverablesToAudit(sessionMetadata, repoPath);
  } catch (copyErr) {
    console.error('Failed to copy deliverables to audit-logs:', copyErr);
  }
+
+  // Clean up container
+  removeContainer(workflowId);
 }
@@ -1,5 +1,9 @@
 import { defineQuery } from '@temporalio/workflow';

+// Re-export AgentMetrics from central types location
+export type { AgentMetrics } from '../types/metrics.js';
+import type { AgentMetrics } from '../types/metrics.js';
+
 // === Types ===

 export interface PipelineInput {
@@ -22,15 +26,6 @@ export interface ResumeState {
  originalWorkflowId: string;
 }

-export interface AgentMetrics {
-  durationMs: number;
-  inputTokens: number | null;
-  outputTokens: number | null;
-  costUsd: number | null;
-  numTurns: number | null;
-  model?: string | undefined;
-}
-
 export interface PipelineSummary {
  totalCostUsd: number;
  totalDurationMs: number; // Wall-clock time (end - start)
@@ -0,0 +1,45 @@
+// Copyright (C) 2025 Keygraph, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Affero General Public License version 3
+// as published by the Free Software Foundation.
+
+/**
+ * Maps PipelineState to WorkflowSummary for audit logging.
+ * Pure function with no side effects.
+ */
+
+import type { PipelineState } from './shared.js';
+import type { WorkflowSummary } from '../audit/workflow-logger.js';
+
+/**
+ * Maps PipelineState to WorkflowSummary.
+ *
+ * This function is deterministic (no Date.now() or I/O) so it can be
+ * safely imported into Temporal workflows. The caller must ensure
+ * state.summary is set before calling (via computeSummary).
+ */
+export function toWorkflowSummary(
+  state: PipelineState,
+  status: 'completed' | 'failed'
+): WorkflowSummary {
+  // state.summary must be computed before calling this mapper
+  const summary = state.summary;
+  if (!summary) {
+    throw new Error('toWorkflowSummary: state.summary must be set before calling');
+  }
+
+  return {
+    status,
+    totalDurationMs: summary.totalDurationMs,
+    totalCostUsd: summary.totalCostUsd,
+    completedAgents: state.completedAgents,
+    agentMetrics: Object.fromEntries(
+      Object.entries(state.agentMetrics).map(([name, m]) => [
+        name,
+        { durationMs: m.durationMs, costUsd: m.costUsd },
+      ])
+    ),
+    ...(state.error && { error: state.error }),
+  };
+}
@@ -43,6 +43,7 @@ import {
 import type { VulnType } from '../queue-validation.js';
 import type { AgentName } from '../types/agents.js';
 import { ALL_AGENTS } from '../types/agents.js';
+import { toWorkflowSummary } from './summary-mapper.js';

 // Retry configuration for production (long intervals for billing recovery)
 const PRODUCTION_RETRY = {
@@ -417,18 +418,7 @@ export async function pentestPipelineWorkflow(
    state.summary = computeSummary(state);

    // Log workflow completion summary
-    await a.logWorkflowComplete(activityInput, {
-      status: 'completed',
-      totalDurationMs: state.summary.totalDurationMs,
-      totalCostUsd: state.summary.totalCostUsd,
-      completedAgents: state.completedAgents,
-      agentMetrics: Object.fromEntries(
-        Object.entries(state.agentMetrics).map(([name, m]) => [
-          name,
-          { durationMs: m.durationMs, costUsd: m.costUsd },
-        ])
-      ),
-    });
+    await a.logWorkflowComplete(activityInput, toWorkflowSummary(state, 'completed'));

    return state;
  } catch (error) {
@@ -438,19 +428,7 @@ export async function pentestPipelineWorkflow(
    state.summary = computeSummary(state);

    // Log workflow failure summary
-    await a.logWorkflowComplete(activityInput, {
-      status: 'failed',
-      totalDurationMs: state.summary.totalDurationMs,
-      totalCostUsd: state.summary.totalCostUsd,
-      completedAgents: state.completedAgents,
-      agentMetrics: Object.fromEntries(
-        Object.entries(state.agentMetrics).map(([name, m]) => [
-          name,
-          { durationMs: m.durationMs, costUsd: m.costUsd },
-        ])
-      ),
-      error: state.error ?? undefined,
-    });
+    await a.logWorkflowComplete(activityInput, toWorkflowSummary(state, 'failed'));

    throw error;
  }
@@ -34,21 +34,6 @@ export const ALL_AGENTS = [
 */
 export type AgentName = typeof ALL_AGENTS[number];

-export type PromptName =
-  | 'pre-recon-code'
-  | 'recon'
-  | 'vuln-injection'
-  | 'vuln-xss'
-  | 'vuln-auth'
-  | 'vuln-ssrf'
-  | 'vuln-authz'
-  | 'exploit-injection'
-  | 'exploit-xss'
-  | 'exploit-auth'
-  | 'exploit-ssrf'
-  | 'exploit-authz'
-  | 'report-executive';
-
 export type PlaywrightAgent =
  | 'playwright-agent1'
  | 'playwright-agent2'
@@ -69,52 +54,6 @@ export interface AgentDefinition {
  name: AgentName;
  displayName: string;
  prerequisites: AgentName[];
-}
-
-/**
- * Maps an agent name to its corresponding prompt file name.
- */
-export function getPromptNameForAgent(agentName: AgentName): PromptName {
-  const mappings: Record<AgentName, PromptName> = {
-    'pre-recon': 'pre-recon-code',
-    'recon': 'recon',
-    'injection-vuln': 'vuln-injection',
-    'xss-vuln': 'vuln-xss',
-    'auth-vuln': 'vuln-auth',
-    'ssrf-vuln': 'vuln-ssrf',
-    'authz-vuln': 'vuln-authz',
-    'injection-exploit': 'exploit-injection',
-    'xss-exploit': 'exploit-xss',
-    'auth-exploit': 'exploit-auth',
-    'ssrf-exploit': 'exploit-ssrf',
-    'authz-exploit': 'exploit-authz',
-    'report': 'report-executive',
-  };
-
-  return mappings[agentName];
-}
-
-/**
- * Maps an agent name to its deliverable file path.
- * Must match mcp-server/src/types/deliverables.ts:DELIVERABLE_FILENAMES
- */
-export function getDeliverablePath(agentName: AgentName, repoPath: string): string {
-  const deliverableMap: Record<AgentName, string> = {
-    'pre-recon': 'code_analysis_deliverable.md',
-    'recon': 'recon_deliverable.md',
-    'injection-vuln': 'injection_analysis_deliverable.md',
-    'xss-vuln': 'xss_analysis_deliverable.md',
-    'auth-vuln': 'auth_analysis_deliverable.md',
-    'ssrf-vuln': 'ssrf_analysis_deliverable.md',
-    'authz-vuln': 'authz_analysis_deliverable.md',
-    'injection-exploit': 'injection_exploitation_evidence.md',
-    'xss-exploit': 'xss_exploitation_evidence.md',
-    'auth-exploit': 'auth_exploitation_evidence.md',
-    'ssrf-exploit': 'ssrf_exploitation_evidence.md',
-    'authz-exploit': 'authz_exploitation_evidence.md',
-    'report': 'comprehensive_security_assessment_report.md',
-  };
-
-  const filename = deliverableMap[agentName];
-  return `${repoPath}/deliverables/${filename}`;
+  promptTemplate: string;
+  deliverableFilename: string;
 }
@@ -51,7 +51,6 @@ export interface Authentication {
 export interface Config {
  rules?: Rules;
  authentication?: Authentication;
-  login?: unknown;
 }

 export interface DistributedConfig {
@@ -8,6 +8,39 @@
 * Error type definitions
 */

+/**
+ * Specific error codes for reliable classification.
+ *
+ * ErrorCode provides precision within the coarse 8-category PentestErrorType.
+ * Used by classifyErrorForTemporal for code-based classification (preferred)
+ * with string matching as fallback for external errors.
+ */
+export enum ErrorCode {
+  // Config errors (PentestErrorType: 'config')
+  CONFIG_NOT_FOUND = 'CONFIG_NOT_FOUND',
+  CONFIG_VALIDATION_FAILED = 'CONFIG_VALIDATION_FAILED',
+  CONFIG_PARSE_ERROR = 'CONFIG_PARSE_ERROR',
+
+  // Agent execution errors (PentestErrorType: 'validation')
+  AGENT_EXECUTION_FAILED = 'AGENT_EXECUTION_FAILED',
+  OUTPUT_VALIDATION_FAILED = 'OUTPUT_VALIDATION_FAILED',
+
+  // Billing errors (PentestErrorType: 'billing')
+  API_RATE_LIMITED = 'API_RATE_LIMITED',
+  SPENDING_CAP_REACHED = 'SPENDING_CAP_REACHED',
+  INSUFFICIENT_CREDITS = 'INSUFFICIENT_CREDITS',
+
+  // Git errors (PentestErrorType: 'filesystem')
+  GIT_CHECKPOINT_FAILED = 'GIT_CHECKPOINT_FAILED',
+  GIT_ROLLBACK_FAILED = 'GIT_ROLLBACK_FAILED',
+
+  // Prompt errors (PentestErrorType: 'prompt')
+  PROMPT_LOAD_FAILED = 'PROMPT_LOAD_FAILED',
+
+  // Validation errors (PentestErrorType: 'validation')
+  DELIVERABLE_NOT_FOUND = 'DELIVERABLE_NOT_FOUND',
+}
+
 export type PentestErrorType =
  | 'config'
  | 'network'
@@ -12,3 +12,5 @@ export * from './errors.js';
 export * from './config.js';
 export * from './agents.js';
 export * from './audit.js';
+export * from './result.js';
+export * from './metrics.js';
@@ -0,0 +1,19 @@
+// Copyright (C) 2025 Keygraph, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Affero General Public License version 3
+// as published by the Free Software Foundation.
+
+/**
+ * Agent metrics types used across services and activities.
+ * Centralized here to avoid temporal/shared.ts import boundary violations.
+ */
+
+export interface AgentMetrics {
+  durationMs: number;
+  inputTokens: number | null;
+  outputTokens: number | null;
+  costUsd: number | null;
+  numTurns: number | null;
+  model?: string | undefined;
+}
@@ -0,0 +1,62 @@
+// Copyright (C) 2025 Keygraph, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Affero General Public License version 3
+// as published by the Free Software Foundation.
+
+/**
+ * Minimal Result type for explicit error handling.
+ *
+ * A discriminated union that makes error handling explicit without adding
+ * heavy machinery. Used in key modules (config loading, agent execution,
+ * queue validation) where callers need to make decisions based on error type.
+ */
+
+/**
+ * Success variant of Result
+ */
+export interface Ok<T> {
+  readonly ok: true;
+  readonly value: T;
+}
+
+/**
+ * Error variant of Result
+ */
+export interface Err<E> {
+  readonly ok: false;
+  readonly error: E;
+}
+
+/**
+ * Result type - either Ok with a value or Err with an error
+ */
+export type Result<T, E> = Ok<T> | Err<E>;
+
+/**
+ * Create a success Result
+ */
+export function ok<T>(value: T): Ok<T> {
+  return { ok: true, value };
+}
+
+/**
+ * Create an error Result
+ */
+export function err<E>(error: E): Err<E> {
+  return { ok: false, error };
+}
+
+/**
+ * Type guard for Ok variant
+ */
+export function isOk<T, E>(result: Result<T, E>): result is Ok<T> {
+  return result.ok === true;
+}
+
+/**
+ * Type guard for Err variant
+ */
+export function isErr<T, E>(result: Result<T, E>): result is Err<E> {
+  return result.ok === false;
+}
@@ -0,0 +1,95 @@
+// Copyright (C) 2025 Keygraph, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Affero General Public License version 3
+// as published by the Free Software Foundation.
+
+/**
+ * Consolidated billing/spending cap detection utilities.
+ *
+ * Anthropic's spending cap behavior is inconsistent:
+ * - Sometimes a proper SDK error (billing_error)
+ * - Sometimes Claude responds with text about the cap
+ * - Sometimes partial billing before cutoff
+ *
+ * This module provides defense-in-depth detection with shared pattern lists
+ * to prevent drift between detection points.
+ */
+
+/**
+ * Text patterns for SDK output sniffing (what Claude says).
+ * Used by message-handlers.ts and the behavioral heuristic.
+ */
+export const BILLING_TEXT_PATTERNS = [
+  'spending cap',
+  'spending limit',
+  'cap reached',
+  'budget exceeded',
+  'usage limit',
+  'resets',
+] as const;
+
+/**
+ * API patterns for error message classification (what the API returns).
+ * Used by classifyErrorForTemporal in error-handling.ts.
+ */
+export const BILLING_API_PATTERNS = [
+  'billing_error',
+  'credit balance is too low',
+  'insufficient credits',
+  'usage is blocked due to insufficient credits',
+  'please visit plans & billing',
+  'please visit plans and billing',
+  'usage limit reached',
+  'quota exceeded',
+  'daily rate limit',
+  'limit will reset',
+  'billing limit reached',
+] as const;
+
+/**
+ * Checks if text matches any billing text pattern.
+ * Used for sniffing SDK output content for spending cap messages.
+ */
+export function matchesBillingTextPattern(text: string): boolean {
+  const lowerText = text.toLowerCase();
+  return BILLING_TEXT_PATTERNS.some((pattern) => lowerText.includes(pattern));
+}
+
+/**
+ * Checks if an error message matches any billing API pattern.
+ * Used for classifying API error messages.
+ */
+export function matchesBillingApiPattern(message: string): boolean {
+  const lowerMessage = message.toLowerCase();
+  return BILLING_API_PATTERNS.some((pattern) => lowerMessage.includes(pattern));
+}
+
+/**
+ * Behavioral heuristic for detecting spending cap.
+ *
+ * When Claude hits a spending cap, it often returns a short message
+ * with $0 cost. Legitimate agent work NEVER costs $0 with only 1-2 turns.
+ *
+ * This combines three signals:
+ * 1. Very low turn count (<=2)
+ * 2. Zero cost ($0)
+ * 3. Text matches billing patterns
+ *
+ * @param turns - Number of turns the agent took
+ * @param cost - Total cost in USD
+ * @param resultText - The result text from the agent
+ * @returns true if this looks like a spending cap hit
+ */
+export function isSpendingCapBehavior(
+  turns: number,
+  cost: number,
+  resultText: string
+): boolean {
+  // Only check if turns <= 2 AND cost is exactly 0
+  if (turns > 2 || cost !== 0) {
+    return false;
+  }
+
+  return matchesBillingTextPattern(resultText);
+}
@@ -6,6 +6,8 @@

 import { $ } from 'zx';
 import chalk from 'chalk';
+import { PentestError } from '../error-handling.js';
+import { ErrorCode } from '../types/errors.js';

 /**
 * Check if a directory is a git repository.
@@ -148,7 +150,13 @@ export async function executeGitCommandWithRetry(
        throw error;
      }
    }
-    throw new Error(`Git command failed after ${maxRetries} retries`);
+    throw new PentestError(
+      `Git command failed after ${maxRetries} retries`,
+      'filesystem',
+      true, // Retryable - transient git lock issues
+      { maxRetries, description },
+      ErrorCode.GIT_CHECKPOINT_FAILED
+    );
  } finally {
    gitSemaphore.release();
  }
@@ -189,9 +197,18 @@ export async function rollbackGitWorkspace(
    );
    return { success: true };
  } catch (error) {
-    const result = toErrorResult(error);
-    console.log(chalk.red(`    ❌ Rollback failed after retries: ${result.error?.message}`));
-    return result;
+    const errMsg = error instanceof Error ? error.message : String(error);
+    console.log(chalk.red(`    ❌ Rollback failed after retries: ${errMsg}`));
+    return {
+      success: false,
+      error: new PentestError(
+        `Git rollback failed: ${errMsg}`,
+        'filesystem',
+        false, // Non-retryable - rollback is best-effort cleanup
+        { sourceDir, reason },
+        ErrorCode.GIT_ROLLBACK_FAILED
+      ),
+    };
  }
 }