refactor: extract services layer, Result type, and ErrorCode classification

- Add DI container (src/services/) with AgentExecutionService, ConfigLoaderService, and ExploitationCheckerService — pure domain logic with no Temporal dependencies
- Introduce Result<T, E> type and ErrorCode enum for code-based error classification in classifyErrorForTemporal, replacing scattered string matching
- Consolidate billing/spending cap detection into utils/billing-detection.ts with shared pattern lists across message-handlers, claude-executor, and error-handling
- Extract LogStream abstraction for append-only logging with backpressure, used by both AgentLogger and WorkflowLogger
- Simplify activities.ts from inline lifecycle logic to thin wrappers delegating to services, with heartbeat and error classification
- Expand config-parser with human-readable AJV errors, security validation, and rule type-specific checks
This commit is contained in:
ajmallesh
2026-02-16 16:12:21 -08:00
parent ae69478541
commit d3816a29fa
31 changed files with 1664 additions and 707 deletions

View File

@@ -117,6 +117,7 @@ Defensive security tool only. Use only on systems you own or have explicit permi
- Dense callback chains when sequential logic is clearer
- Sacrificing readability for DRY — some repetition is fine if clearer
- Abstractions for one-time operations
- Backwards-compatibility shims, deprecated wrappers, or re-exports for removed code — delete the old code, don't preserve it
## Key Files

View File

@@ -11,12 +11,13 @@ import chalk, { type ChalkInstance } from 'chalk';
import { query } from '@anthropic-ai/claude-agent-sdk';
import { isRetryableError, PentestError } from '../error-handling.js';
import { isSpendingCapBehavior } from '../utils/billing-detection.js';
import { timingResults, Timer } from '../utils/metrics.js';
import { formatTimestamp } from '../utils/formatting.js';
import { AGENT_VALIDATORS, MCP_AGENT_MAPPING } from '../constants.js';
import { AuditSession } from '../audit/index.js';
import { createShannonHelperServer } from '../../mcp-server/dist/index.js';
import { getPromptNameForAgent } from '../types/agents.js';
import { AGENTS } from '../session-manager.js';
import type { AgentName } from '../types/index.js';
import { dispatchMessage } from './message-handlers.js';
@@ -65,8 +66,8 @@ function buildMcpServers(
};
if (agentName) {
const promptName = getPromptNameForAgent(agentName as AgentName);
const playwrightMcpName = MCP_AGENT_MAPPING[promptName as keyof typeof MCP_AGENT_MAPPING] || null;
const promptTemplate = AGENTS[agentName as AgentName].promptTemplate;
const playwrightMcpName = MCP_AGENT_MAPPING[promptTemplate as keyof typeof MCP_AGENT_MAPPING] || null;
if (playwrightMcpName) {
console.log(chalk.gray(` Assigned ${agentName} -> ${playwrightMcpName}`));
@@ -263,22 +264,13 @@ export async function runClaudePrompt(
// === SPENDING CAP SAFEGUARD ===
// Defense-in-depth: Detect spending cap that slipped through detectApiError().
// When spending cap is hit, Claude returns a short message with $0 cost.
// Legitimate agent work NEVER costs $0 with only 1-2 turns.
if (turnCount <= 2 && totalCost === 0) {
const resultLower = (result || '').toLowerCase();
const BILLING_KEYWORDS = ['spending', 'cap', 'limit', 'budget', 'resets'];
const looksLikeBillingError = BILLING_KEYWORDS.some((kw) =>
resultLower.includes(kw)
// Uses consolidated billing detection from utils/billing-detection.ts
if (isSpendingCapBehavior(turnCount, totalCost, result || '')) {
throw new PentestError(
`Spending cap likely reached (turns=${turnCount}, cost=$0): ${result?.slice(0, 100)}`,
'billing',
true // Retryable - Temporal will use 5-30 min backoff
);
if (looksLikeBillingError) {
throw new PentestError(
`Spending cap likely reached (turns=${turnCount}, cost=$0): ${result?.slice(0, 100)}`,
'billing',
true // Retryable - Temporal will use 5-30 min backoff
);
}
}
const duration = timer.stop();

View File

@@ -7,6 +7,8 @@
// Pure functions for processing SDK message types
import { PentestError } from '../error-handling.js';
import { ErrorCode } from '../types/errors.js';
import { matchesBillingTextPattern } from '../utils/billing-detection.js';
import { filterJsonToolCalls } from '../utils/output-formatter.js';
import { formatTimestamp } from '../utils/formatting.js';
import chalk from 'chalk';
@@ -75,25 +77,15 @@ function detectApiError(content: string): ApiErrorDetection {
// When Claude Code hits its spending cap, it returns a short message like
// "Spending cap reached resets 8am" instead of throwing an error.
// These should retry with 5-30 min backoff so workflows can recover when cap resets.
const BILLING_PATTERNS = [
'spending cap',
'spending limit',
'cap reached',
'budget exceeded',
'usage limit',
];
const isBillingError = BILLING_PATTERNS.some((pattern) =>
lowerContent.includes(pattern)
);
if (isBillingError) {
if (matchesBillingTextPattern(content)) {
return {
detected: true,
shouldThrow: new PentestError(
`Billing limit reached: ${content.slice(0, 100)}`,
'billing',
true // RETRYABLE - Temporal will use 5-30 min backoff
true, // RETRYABLE - Temporal will use 5-30 min backoff
{},
ErrorCode.SPENDING_CAP_REACHED
),
};
}
@@ -127,7 +119,9 @@ function handleStructuredError(
shouldThrow: new PentestError(
`Billing error (structured): ${content.slice(0, 100)}`,
'billing',
true // Retryable with backoff
true, // Retryable with backoff
{},
ErrorCode.INSUFFICIENT_CREDITS
),
};
case 'rate_limit':
@@ -136,7 +130,9 @@ function handleStructuredError(
shouldThrow: new PentestError(
`Rate limit hit (structured): ${content.slice(0, 100)}`,
'network',
true // Retryable with backoff
true, // Retryable with backoff
{},
ErrorCode.API_RATE_LIMITED
),
};
case 'authentication_failed':

View File

@@ -18,6 +18,8 @@ import { initializeAuditStructure, type SessionMetadata } from './utils.js';
import { formatTimestamp } from '../utils/formatting.js';
import { SessionMutex } from '../utils/concurrency.js';
import type { AgentEndResult } from '../types/index.js';
import { PentestError } from '../error-handling.js';
import { ErrorCode } from '../types/errors.js';
// Global mutex instance
const sessionMutex = new SessionMutex();
@@ -40,10 +42,22 @@ export class AuditSession {
// Validate required fields
if (!this.sessionId) {
throw new Error('sessionMetadata.id is required');
throw new PentestError(
'sessionMetadata.id is required',
'config',
false,
{ field: 'sessionMetadata.id' },
ErrorCode.CONFIG_VALIDATION_FAILED
);
}
if (!this.sessionMetadata.webUrl) {
throw new Error('sessionMetadata.webUrl is required');
throw new PentestError(
'sessionMetadata.webUrl is required',
'config',
false,
{ field: 'sessionMetadata.webUrl' },
ErrorCode.CONFIG_VALIDATION_FAILED
);
}
// Components
@@ -124,7 +138,13 @@ export class AuditSession {
*/
async logEvent(eventType: string, eventData: unknown): Promise<void> {
if (!this.currentLogger) {
throw new Error('No active logger. Call startAgent() first.');
throw new PentestError(
'No active logger. Call startAgent() first.',
'validation',
false,
{},
ErrorCode.AGENT_EXECUTION_FAILED
);
}
// Log to agent-specific log file (JSON format)

127
src/audit/log-stream.ts Normal file
View File

@@ -0,0 +1,127 @@
// Copyright (C) 2025 Keygraph, Inc.
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License version 3
// as published by the Free Software Foundation.
/**
* LogStream - Stream composition utility for append-only logging
*
* Encapsulates the common stream management pattern used by AgentLogger
* and WorkflowLogger: opening streams in append mode, handling backpressure,
* and proper cleanup.
*/
import fs from 'fs';
import path from 'path';
import { ensureDirectory } from '../utils/file-io.js';
/**
* LogStream - Manages a single append-only log file stream
*/
export class LogStream {
private readonly filePath: string;
private stream: fs.WriteStream | null = null;
private _isOpen: boolean = false;
constructor(filePath: string) {
this.filePath = filePath;
}
/**
* Open the stream for writing (creates parent directories, opens in append mode)
*/
async open(): Promise<void> {
if (this._isOpen) {
return;
}
// Ensure parent directory exists
await ensureDirectory(path.dirname(this.filePath));
// Create write stream in append mode
this.stream = fs.createWriteStream(this.filePath, {
flags: 'a',
encoding: 'utf8',
autoClose: true,
});
// Handle stream errors to prevent crashes (log and mark closed)
this.stream.on('error', (err) => {
console.error(`LogStream error for ${this.filePath}:`, err.message);
this._isOpen = false;
});
this._isOpen = true;
}
/**
* Write text to the stream with backpressure handling
*/
async write(text: string): Promise<void> {
return new Promise((resolve, reject) => {
if (!this._isOpen || !this.stream) {
reject(new Error('LogStream not open'));
return;
}
const stream = this.stream;
let drainHandler: (() => void) | null = null;
const cleanup = () => {
if (drainHandler) {
stream.removeListener('drain', drainHandler);
drainHandler = null;
}
};
const needsDrain = !stream.write(text, 'utf8', (error) => {
cleanup();
if (error) {
reject(error);
} else if (!needsDrain) {
resolve();
}
});
if (needsDrain) {
drainHandler = () => {
cleanup();
resolve();
};
stream.once('drain', drainHandler);
}
});
}
/**
* Close the stream (flush and close)
*/
async close(): Promise<void> {
if (!this._isOpen || !this.stream) {
return;
}
return new Promise((resolve) => {
this.stream!.end(() => {
this._isOpen = false;
this.stream = null;
resolve();
});
});
}
/**
* Check if the stream is currently open
*/
get isOpen(): boolean {
return this._isOpen;
}
/**
* Get the file path this stream writes to
*/
get path(): string {
return this.filePath;
}
}

View File

@@ -8,10 +8,9 @@
* Append-Only Agent Logger
*
* Provides crash-safe, append-only logging for agent execution.
* Uses file streams with immediate flush to prevent data loss.
* Uses LogStream for stream management with backpressure handling.
*/
import fs from 'fs';
import {
generateLogPath,
generatePromptPath,
@@ -19,6 +18,7 @@ import {
} from './utils.js';
import { atomicWrite } from '../utils/file-io.js';
import { formatTimestamp } from '../utils/formatting.js';
import { LogStream } from './log-stream.js';
interface LogEvent {
type: string;
@@ -30,13 +30,11 @@ interface LogEvent {
* AgentLogger - Manages append-only logging for a single agent execution
*/
export class AgentLogger {
private sessionMetadata: SessionMetadata;
private agentName: string;
private attemptNumber: number;
private timestamp: number;
private logPath: string;
private stream: fs.WriteStream | null = null;
private isOpen: boolean = false;
private readonly sessionMetadata: SessionMetadata;
private readonly agentName: string;
private readonly attemptNumber: number;
private readonly timestamp: number;
private readonly logStream: LogStream;
constructor(sessionMetadata: SessionMetadata, agentName: string, attemptNumber: number) {
this.sessionMetadata = sessionMetadata;
@@ -44,26 +42,20 @@ export class AgentLogger {
this.attemptNumber = attemptNumber;
this.timestamp = Date.now();
// Generate log file path
this.logPath = generateLogPath(sessionMetadata, agentName, this.timestamp, attemptNumber);
// Generate log file path and create stream
const logPath = generateLogPath(sessionMetadata, agentName, this.timestamp, attemptNumber);
this.logStream = new LogStream(logPath);
}
/**
* Initialize the log stream (creates file and opens stream)
*/
async initialize(): Promise<void> {
if (this.isOpen) {
if (this.logStream.isOpen) {
return; // Already initialized
}
// Create write stream with append mode and auto-flush
this.stream = fs.createWriteStream(this.logPath, {
flags: 'a', // Append mode
encoding: 'utf8',
autoClose: true,
});
this.isOpen = true;
await this.logStream.open();
// Write header
await this.writeHeader();
@@ -83,29 +75,7 @@ export class AgentLogger {
`========================================\n`,
].join('\n');
return this.writeRaw(header);
}
/**
* Write raw text to log file with immediate flush
*/
private writeRaw(text: string): Promise<void> {
return new Promise((resolve, reject) => {
if (!this.isOpen || !this.stream) {
reject(new Error('Logger not initialized'));
return;
}
const needsDrain = !this.stream.write(text, 'utf8', (error) => {
if (error) reject(error);
});
if (needsDrain) {
this.stream.once('drain', resolve);
} else {
resolve();
}
});
return this.logStream.write(header);
}
/**
@@ -120,23 +90,14 @@ export class AgentLogger {
};
const eventLine = `${JSON.stringify(event)}\n`;
return this.writeRaw(eventLine);
return this.logStream.write(eventLine);
}
/**
* Close the log stream
*/
async close(): Promise<void> {
if (!this.isOpen || !this.stream) {
return;
}
return new Promise((resolve) => {
this.stream!.end(() => {
this.isOpen = false;
resolve();
});
});
return this.logStream.close();
}
/**

View File

@@ -18,6 +18,8 @@ import {
import { atomicWrite, readJson, fileExists } from '../utils/file-io.js';
import { formatTimestamp, calculatePercentage } from '../utils/formatting.js';
import { AGENT_PHASE_MAP, type PhaseName } from '../session-manager.js';
import { PentestError } from '../error-handling.js';
import { ErrorCode } from '../types/errors.js';
import type { AgentName, AgentEndResult } from '../types/index.js';
interface AttemptData {
@@ -159,7 +161,13 @@ export class MetricsTracker {
*/
async endAgent(agentName: string, result: AgentEndResult): Promise<void> {
if (!this.data) {
throw new Error('MetricsTracker not initialized');
throw new PentestError(
'MetricsTracker not initialized',
'validation',
false,
{},
ErrorCode.AGENT_EXECUTION_FAILED
);
}
// Initialize agent metrics if not exists
@@ -251,7 +259,13 @@ export class MetricsTracker {
checkpointHash?: string
): Promise<void> {
if (!this.data) {
throw new Error('MetricsTracker not initialized');
throw new PentestError(
'MetricsTracker not initialized',
'validation',
false,
{},
ErrorCode.AGENT_EXECUTION_FAILED
);
}
// Ensure originalWorkflowId is set (backfill if missing from old sessions)

View File

@@ -11,10 +11,10 @@
* Optimized for `tail -f` viewing during concurrent workflow execution.
*/
import fs from 'fs';
import path from 'path';
import { generateWorkflowLogPath, ensureDirectory, type SessionMetadata } from './utils.js';
import fs from 'fs/promises';
import { generateWorkflowLogPath, type SessionMetadata } from './utils.js';
import { formatDuration, formatTimestamp } from '../utils/formatting.js';
import { LogStream } from './log-stream.js';
export interface AgentLogDetails {
attemptNumber?: number;
@@ -42,38 +42,28 @@ export interface WorkflowSummary {
* WorkflowLogger - Manages the unified workflow log file
*/
export class WorkflowLogger {
private sessionMetadata: SessionMetadata;
private logPath: string;
private stream: fs.WriteStream | null = null;
private initialized: boolean = false;
private readonly sessionMetadata: SessionMetadata;
private readonly logStream: LogStream;
constructor(sessionMetadata: SessionMetadata) {
this.sessionMetadata = sessionMetadata;
this.logPath = generateWorkflowLogPath(sessionMetadata);
const logPath = generateWorkflowLogPath(sessionMetadata);
this.logStream = new LogStream(logPath);
}
/**
* Initialize the log stream (creates file and writes header)
*/
async initialize(): Promise<void> {
if (this.initialized) {
if (this.logStream.isOpen) {
return;
}
// Ensure directory exists
await ensureDirectory(path.dirname(this.logPath));
// Create write stream with append mode
this.stream = fs.createWriteStream(this.logPath, {
flags: 'a',
encoding: 'utf8',
autoClose: true,
});
this.initialized = true;
// Open the stream (LogStream.open() handles directory creation)
await this.logStream.open();
// Write header only if file is new (empty)
const stats = await fs.promises.stat(this.logPath).catch(() => null);
const stats = await fs.stat(this.logStream.path).catch(() => null);
if (!stats || stats.size === 0) {
await this.writeHeader();
}
@@ -94,29 +84,7 @@ export class WorkflowLogger {
``,
].join('\n');
return this.writeRaw(header);
}
/**
* Write raw text to log file with immediate flush
*/
private writeRaw(text: string): Promise<void> {
return new Promise((resolve, reject) => {
if (!this.initialized || !this.stream) {
reject(new Error('WorkflowLogger not initialized'));
return;
}
const needsDrain = !this.stream.write(text, 'utf8', (error) => {
if (error) reject(error);
});
if (needsDrain) {
this.stream.once('drain', resolve);
} else {
resolve();
}
});
return this.logStream.write(header);
}
/**
@@ -138,10 +106,10 @@ export class WorkflowLogger {
// Add blank line before phase start for readability
if (event === 'start') {
await this.writeRaw('\n');
await this.logStream.write('\n');
}
await this.writeRaw(line);
await this.logStream.write(line);
}
/**
@@ -184,7 +152,7 @@ export class WorkflowLogger {
}
const line = `[${this.formatLogTime()}] [AGENT] ${message}\n`;
await this.writeRaw(line);
await this.logStream.write(line);
}
/**
@@ -194,7 +162,7 @@ export class WorkflowLogger {
await this.ensureInitialized();
const line = `[${this.formatLogTime()}] [${eventType.toUpperCase()}] ${message}\n`;
await this.writeRaw(line);
await this.logStream.write(line);
}
/**
@@ -205,7 +173,7 @@ export class WorkflowLogger {
const contextStr = context ? ` (${context})` : '';
const line = `[${this.formatLogTime()}] [ERROR] ${error.message}${contextStr}\n`;
await this.writeRaw(line);
await this.logStream.write(line);
}
/**
@@ -301,7 +269,7 @@ export class WorkflowLogger {
const params = this.formatToolParams(toolName, parameters);
const paramStr = params ? `: ${params}` : '';
const line = `[${this.formatLogTime()}] [${agentName}] [TOOL] ${toolName}${paramStr}\n`;
await this.writeRaw(line);
await this.logStream.write(line);
}
/**
@@ -313,7 +281,7 @@ export class WorkflowLogger {
// Show full content, replacing newlines with escaped version for single-line output
const escaped = content.replace(/\n/g, '\\n');
const line = `[${this.formatLogTime()}] [${agentName}] [LLM] Turn ${turn}: ${escaped}\n`;
await this.writeRaw(line);
await this.logStream.write(line);
}
/**
@@ -324,42 +292,42 @@ export class WorkflowLogger {
const status = summary.status === 'completed' ? 'COMPLETED' : 'FAILED';
await this.writeRaw('\n');
await this.writeRaw(`================================================================================\n`);
await this.writeRaw(`Workflow ${status}\n`);
await this.writeRaw(`────────────────────────────────────────\n`);
await this.writeRaw(`Workflow ID: ${this.sessionMetadata.id}\n`);
await this.writeRaw(`Status: ${summary.status}\n`);
await this.writeRaw(`Duration: ${formatDuration(summary.totalDurationMs)}\n`);
await this.writeRaw(`Total Cost: $${summary.totalCostUsd.toFixed(4)}\n`);
await this.writeRaw(`Agents: ${summary.completedAgents.length} completed\n`);
await this.logStream.write('\n');
await this.logStream.write(`================================================================================\n`);
await this.logStream.write(`Workflow ${status}\n`);
await this.logStream.write(`────────────────────────────────────────\n`);
await this.logStream.write(`Workflow ID: ${this.sessionMetadata.id}\n`);
await this.logStream.write(`Status: ${summary.status}\n`);
await this.logStream.write(`Duration: ${formatDuration(summary.totalDurationMs)}\n`);
await this.logStream.write(`Total Cost: $${summary.totalCostUsd.toFixed(4)}\n`);
await this.logStream.write(`Agents: ${summary.completedAgents.length} completed\n`);
if (summary.error) {
await this.writeRaw(`Error: ${summary.error}\n`);
await this.logStream.write(`Error: ${summary.error}\n`);
}
await this.writeRaw(`\n`);
await this.writeRaw(`Agent Breakdown:\n`);
await this.logStream.write(`\n`);
await this.logStream.write(`Agent Breakdown:\n`);
for (const agentName of summary.completedAgents) {
const metrics = summary.agentMetrics[agentName];
if (metrics) {
const duration = formatDuration(metrics.durationMs);
const cost = metrics.costUsd !== null ? `$${metrics.costUsd.toFixed(4)}` : 'N/A';
await this.writeRaw(` - ${agentName} (${duration}, ${cost})\n`);
await this.logStream.write(` - ${agentName} (${duration}, ${cost})\n`);
} else {
await this.writeRaw(` - ${agentName}\n`);
await this.logStream.write(` - ${agentName}\n`);
}
}
await this.writeRaw(`================================================================================\n`);
await this.logStream.write(`================================================================================\n`);
}
/**
* Ensure initialized (helper for lazy initialization)
*/
private async ensureInitialized(): Promise<void> {
if (!this.initialized) {
if (!this.logStream.isOpen) {
await this.initialize();
}
}
@@ -368,15 +336,6 @@ export class WorkflowLogger {
* Close the log stream
*/
async close(): Promise<void> {
if (!this.initialized || !this.stream) {
return;
}
return new Promise((resolve) => {
this.stream!.end(() => {
this.initialized = false;
resolve();
});
});
return this.logStream.close();
}
}

View File

@@ -7,9 +7,10 @@
import { createRequire } from 'module';
import { fs } from 'zx';
import yaml from 'js-yaml';
import { Ajv, type ValidateFunction } from 'ajv';
import { Ajv, type ValidateFunction, type ErrorObject } from 'ajv';
import type { FormatsPlugin } from 'ajv-formats';
import { PentestError } from './error-handling.js';
import { ErrorCode } from './types/errors.js';
import type {
Config,
Rule,
@@ -53,20 +54,155 @@ const DANGEROUS_PATTERNS: RegExp[] = [
/file:/i, // File URLs
];
/**
* Format a single AJV error into a human-readable message.
* Translates AJV error keywords into plain English descriptions.
*/
function formatAjvError(error: ErrorObject): string {
const path = error.instancePath || 'root';
const params = error.params as Record<string, unknown>;
switch (error.keyword) {
case 'required': {
const missingProperty = params.missingProperty as string;
return `Missing required field: "${missingProperty}" at ${path || 'root'}`;
}
case 'type': {
const expectedType = params.type as string;
return `Invalid type at ${path}: expected ${expectedType}`;
}
case 'enum': {
const allowedValues = params.allowedValues as unknown[];
const formattedValues = allowedValues.map((v) => `"${v}"`).join(', ');
return `Invalid value at ${path}: must be one of [${formattedValues}]`;
}
case 'additionalProperties': {
const additionalProperty = params.additionalProperty as string;
return `Unknown field at ${path}: "${additionalProperty}" is not allowed`;
}
case 'minLength': {
const limit = params.limit as number;
return `Value at ${path} is too short: must have at least ${limit} character(s)`;
}
case 'maxLength': {
const limit = params.limit as number;
return `Value at ${path} is too long: must have at most ${limit} character(s)`;
}
case 'minimum': {
const limit = params.limit as number;
return `Value at ${path} is too small: must be >= ${limit}`;
}
case 'maximum': {
const limit = params.limit as number;
return `Value at ${path} is too large: must be <= ${limit}`;
}
case 'minItems': {
const limit = params.limit as number;
return `Array at ${path} has too few items: must have at least ${limit} item(s)`;
}
case 'maxItems': {
const limit = params.limit as number;
return `Array at ${path} has too many items: must have at most ${limit} item(s)`;
}
case 'pattern': {
const pattern = params.pattern as string;
return `Value at ${path} does not match required pattern: ${pattern}`;
}
case 'format': {
const format = params.format as string;
return `Value at ${path} must be a valid ${format}`;
}
case 'const': {
const allowedValue = params.allowedValue as unknown;
return `Value at ${path} must be exactly "${allowedValue}"`;
}
case 'oneOf': {
return `Value at ${path} must match exactly one schema (matched ${params.passingSchemas ?? 0})`;
}
case 'anyOf': {
return `Value at ${path} must match at least one of the allowed schemas`;
}
case 'not': {
return `Value at ${path} matches a schema it should not match`;
}
case 'if': {
return `Value at ${path} does not satisfy conditional schema requirements`;
}
case 'uniqueItems': {
const i = params.i as number;
const j = params.j as number;
return `Array at ${path} contains duplicate items at positions ${j} and ${i}`;
}
case 'propertyNames': {
const propertyName = params.propertyName as string;
return `Invalid property name at ${path}: "${propertyName}" does not match naming requirements`;
}
case 'dependencies':
case 'dependentRequired': {
const property = params.property as string;
const missingProperty = params.missingProperty as string;
return `Missing dependent field at ${path}: "${missingProperty}" is required when "${property}" is present`;
}
default: {
// Fallback for any unhandled keywords - use AJV's message if available
const message = error.message || `validation failed for keyword "${error.keyword}"`;
return `${path}: ${message}`;
}
}
}
/**
* Format all AJV errors into a list of human-readable messages.
* Returns an array of formatted error strings.
*/
function formatAjvErrors(errors: ErrorObject[]): string[] {
return errors.map(formatAjvError);
}
// Parse and load YAML configuration file with enhanced safety
export const parseConfig = async (configPath: string): Promise<Config> => {
try {
// File existence check
if (!(await fs.pathExists(configPath))) {
throw new Error(`Configuration file not found: ${configPath}`);
throw new PentestError(
`Configuration file not found: ${configPath}`,
'config',
false,
{ configPath },
ErrorCode.CONFIG_NOT_FOUND
);
}
// File size check (prevent extremely large files)
const stats = await fs.stat(configPath);
const maxFileSize = 1024 * 1024; // 1MB
if (stats.size > maxFileSize) {
throw new Error(
`Configuration file too large: ${stats.size} bytes (maximum: ${maxFileSize} bytes)`
throw new PentestError(
`Configuration file too large: ${stats.size} bytes (maximum: ${maxFileSize} bytes)`,
'config',
false,
{ configPath, fileSize: stats.size, maxFileSize },
ErrorCode.CONFIG_VALIDATION_FAILED
);
}
@@ -75,7 +211,13 @@ export const parseConfig = async (configPath: string): Promise<Config> => {
// Basic content validation
if (!configContent.trim()) {
throw new Error('Configuration file is empty');
throw new PentestError(
'Configuration file is empty',
'config',
false,
{ configPath },
ErrorCode.CONFIG_VALIDATION_FAILED
);
}
// Parse YAML with safety options
@@ -88,12 +230,24 @@ export const parseConfig = async (configPath: string): Promise<Config> => {
});
} catch (yamlError) {
const errMsg = yamlError instanceof Error ? yamlError.message : String(yamlError);
throw new Error(`YAML parsing failed: ${errMsg}`);
throw new PentestError(
`YAML parsing failed: ${errMsg}`,
'config',
false,
{ configPath, originalError: errMsg },
ErrorCode.CONFIG_PARSE_ERROR
);
}
// Additional safety check
if (config === null || config === undefined) {
throw new Error('Configuration file resulted in null/undefined after parsing');
throw new PentestError(
'Configuration file resulted in null/undefined after parsing',
'config',
false,
{ configPath },
ErrorCode.CONFIG_PARSE_ERROR
);
}
// Validate the configuration structure and content
@@ -101,20 +255,19 @@ export const parseConfig = async (configPath: string): Promise<Config> => {
return config as Config;
} catch (error) {
const errMsg = error instanceof Error ? error.message : String(error);
// Enhance error message with context
if (
errMsg.startsWith('Configuration file not found') ||
errMsg.startsWith('YAML parsing failed') ||
errMsg.includes('must be') ||
errMsg.includes('exceeds maximum')
) {
// These are already well-formatted errors, re-throw as-is
// PentestError instances are already well-formatted, re-throw as-is
if (error instanceof PentestError) {
throw error;
} else {
// Wrap other errors with context
throw new Error(`Failed to parse configuration file '${configPath}': ${errMsg}`);
}
// Wrap other errors with context
const errMsg = error instanceof Error ? error.message : String(error);
throw new PentestError(
`Failed to parse configuration file '${configPath}': ${errMsg}`,
'config',
false,
{ configPath, originalError: errMsg },
ErrorCode.CONFIG_PARSE_ERROR
);
}
};
@@ -122,32 +275,42 @@ export const parseConfig = async (configPath: string): Promise<Config> => {
const validateConfig = (config: Config): void => {
// Basic structure validation
if (!config || typeof config !== 'object') {
throw new Error('Configuration must be a valid object');
throw new PentestError(
'Configuration must be a valid object',
'config',
false,
{},
ErrorCode.CONFIG_VALIDATION_FAILED
);
}
if (Array.isArray(config)) {
throw new Error('Configuration must be an object, not an array');
throw new PentestError(
'Configuration must be an object, not an array',
'config',
false,
{},
ErrorCode.CONFIG_VALIDATION_FAILED
);
}
// JSON Schema validation
const isValid = validateSchema(config);
if (!isValid) {
const errors = validateSchema.errors || [];
const errorMessages = errors.map((err) => {
const path = err.instancePath || 'root';
return `${path}: ${err.message}`;
});
throw new Error(`Configuration validation failed:\n - ${errorMessages.join('\n - ')}`);
const errorMessages = formatAjvErrors(errors);
throw new PentestError(
`Configuration validation failed:\n - ${errorMessages.join('\n - ')}`,
'config',
false,
{ validationErrors: errorMessages },
ErrorCode.CONFIG_VALIDATION_FAILED
);
}
// Additional security validation
performSecurityValidation(config);
// Warn if deprecated fields are used
if (config.login) {
console.warn('⚠️ The "login" section is deprecated. Please use "authentication" instead.');
}
// Ensure at least some configuration is provided
if (!config.rules && !config.authentication) {
console.warn(
@@ -166,17 +329,40 @@ const performSecurityValidation = (config: Config): void => {
if (config.authentication) {
const auth = config.authentication;
// Check login_url for dangerous patterns (AJV's "uri" format allows javascript: per RFC 3986)
if (auth.login_url) {
for (const pattern of DANGEROUS_PATTERNS) {
if (pattern.test(auth.login_url)) {
throw new PentestError(
`authentication.login_url contains potentially dangerous pattern: ${pattern.source}`,
'config',
false,
{ field: 'login_url', pattern: pattern.source },
ErrorCode.CONFIG_VALIDATION_FAILED
);
}
}
}
// Check for dangerous patterns in credentials
if (auth.credentials) {
for (const pattern of DANGEROUS_PATTERNS) {
if (pattern.test(auth.credentials.username)) {
throw new Error(
'authentication.credentials.username contains potentially dangerous pattern'
throw new PentestError(
`authentication.credentials.username contains potentially dangerous pattern: ${pattern.source}`,
'config',
false,
{ field: 'credentials.username', pattern: pattern.source },
ErrorCode.CONFIG_VALIDATION_FAILED
);
}
if (pattern.test(auth.credentials.password)) {
throw new Error(
'authentication.credentials.password contains potentially dangerous pattern'
throw new PentestError(
`authentication.credentials.password contains potentially dangerous pattern: ${pattern.source}`,
'config',
false,
{ field: 'credentials.password', pattern: pattern.source },
ErrorCode.CONFIG_VALIDATION_FAILED
);
}
}
@@ -187,8 +373,12 @@ const performSecurityValidation = (config: Config): void => {
auth.login_flow.forEach((step, index) => {
for (const pattern of DANGEROUS_PATTERNS) {
if (pattern.test(step)) {
throw new Error(
`authentication.login_flow[${index}] contains potentially dangerous pattern: ${pattern.source}`
throw new PentestError(
`authentication.login_flow[${index}] contains potentially dangerous pattern: ${pattern.source}`,
'config',
false,
{ field: `login_flow[${index}]`, pattern: pattern.source },
ErrorCode.CONFIG_VALIDATION_FAILED
);
}
}
@@ -216,13 +406,21 @@ const validateRulesSecurity = (rules: Rule[] | undefined, ruleType: string): voi
// Security validation
for (const pattern of DANGEROUS_PATTERNS) {
if (pattern.test(rule.url_path)) {
throw new Error(
`rules.${ruleType}[${index}].url_path contains potentially dangerous pattern: ${pattern.source}`
throw new PentestError(
`rules.${ruleType}[${index}].url_path contains potentially dangerous pattern: ${pattern.source}`,
'config',
false,
{ field: `rules.${ruleType}[${index}].url_path`, pattern: pattern.source },
ErrorCode.CONFIG_VALIDATION_FAILED
);
}
if (pattern.test(rule.description)) {
throw new Error(
`rules.${ruleType}[${index}].description contains potentially dangerous pattern: ${pattern.source}`
throw new PentestError(
`rules.${ruleType}[${index}].description contains potentially dangerous pattern: ${pattern.source}`,
'config',
false,
{ field: `rules.${ruleType}[${index}].description`, pattern: pattern.source },
ErrorCode.CONFIG_VALIDATION_FAILED
);
}
}
@@ -234,10 +432,18 @@ const validateRulesSecurity = (rules: Rule[] | undefined, ruleType: string): voi
// Validate rule based on its specific type
const validateRuleTypeSpecific = (rule: Rule, ruleType: string, index: number): void => {
const field = `rules.${ruleType}[${index}].url_path`;
switch (rule.type) {
case 'path':
if (!rule.url_path.startsWith('/')) {
throw new Error(`rules.${ruleType}[${index}].url_path for type 'path' must start with '/'`);
throw new PentestError(
`${field} for type 'path' must start with '/'`,
'config',
false,
{ field, ruleType: rule.type },
ErrorCode.CONFIG_VALIDATION_FAILED
);
}
break;
@@ -245,14 +451,22 @@ const validateRuleTypeSpecific = (rule: Rule, ruleType: string, index: number):
case 'domain':
// Basic domain validation - no slashes allowed
if (rule.url_path.includes('/')) {
throw new Error(
`rules.${ruleType}[${index}].url_path for type '${rule.type}' cannot contain '/' characters`
throw new PentestError(
`${field} for type '${rule.type}' cannot contain '/' characters`,
'config',
false,
{ field, ruleType: rule.type },
ErrorCode.CONFIG_VALIDATION_FAILED
);
}
// Must contain at least one dot for domains
if (rule.type === 'domain' && !rule.url_path.includes('.')) {
throw new Error(
`rules.${ruleType}[${index}].url_path for type 'domain' must be a valid domain name`
throw new PentestError(
`${field} for type 'domain' must be a valid domain name`,
'config',
false,
{ field, ruleType: rule.type },
ErrorCode.CONFIG_VALIDATION_FAILED
);
}
break;
@@ -260,8 +474,12 @@ const validateRuleTypeSpecific = (rule: Rule, ruleType: string, index: number):
case 'method': {
const allowedMethods = ['GET', 'POST', 'PUT', 'DELETE', 'PATCH', 'HEAD', 'OPTIONS'];
if (!allowedMethods.includes(rule.url_path.toUpperCase())) {
throw new Error(
`rules.${ruleType}[${index}].url_path for type 'method' must be one of: ${allowedMethods.join(', ')}`
throw new PentestError(
`${field} for type 'method' must be one of: ${allowedMethods.join(', ')}`,
'config',
false,
{ field, ruleType: rule.type, allowedMethods },
ErrorCode.CONFIG_VALIDATION_FAILED
);
}
break;
@@ -270,8 +488,12 @@ const validateRuleTypeSpecific = (rule: Rule, ruleType: string, index: number):
case 'header':
// Header name validation (basic)
if (!rule.url_path.match(/^[a-zA-Z0-9\-_]+$/)) {
throw new Error(
`rules.${ruleType}[${index}].url_path for type 'header' must be a valid header name (alphanumeric, hyphens, underscores only)`
throw new PentestError(
`${field} for type 'header' must be a valid header name (alphanumeric, hyphens, underscores only)`,
'config',
false,
{ field, ruleType: rule.type },
ErrorCode.CONFIG_VALIDATION_FAILED
);
}
break;
@@ -279,8 +501,12 @@ const validateRuleTypeSpecific = (rule: Rule, ruleType: string, index: number):
case 'parameter':
// Parameter name validation (basic)
if (!rule.url_path.match(/^[a-zA-Z0-9\-_]+$/)) {
throw new Error(
`rules.${ruleType}[${index}].url_path for type 'parameter' must be a valid parameter name (alphanumeric, hyphens, underscores only)`
throw new PentestError(
`${field} for type 'parameter' must be a valid parameter name (alphanumeric, hyphens, underscores only)`,
'config',
false,
{ field, ruleType: rule.type },
ErrorCode.CONFIG_VALIDATION_FAILED
);
}
break;
@@ -293,8 +519,12 @@ const checkForDuplicates = (rules: Rule[], ruleType: string): void => {
rules.forEach((rule, index) => {
const key = `${rule.type}:${rule.url_path}`;
if (seen.has(key)) {
throw new Error(
`Duplicate rule found in rules.${ruleType}[${index}]: ${rule.type} '${rule.url_path}'`
throw new PentestError(
`Duplicate rule found in rules.${ruleType}[${index}]: ${rule.type} '${rule.url_path}'`,
'config',
false,
{ field: `rules.${ruleType}[${index}]`, ruleType: rule.type, urlPath: rule.url_path },
ErrorCode.CONFIG_VALIDATION_FAILED
);
}
seen.add(key);
@@ -308,8 +538,12 @@ const checkForConflicts = (avoidRules: Rule[] = [], focusRules: Rule[] = []): vo
focusRules.forEach((rule, index) => {
const key = `${rule.type}:${rule.url_path}`;
if (avoidSet.has(key)) {
throw new Error(
`Conflicting rule found: rules.focus[${index}] '${rule.url_path}' also exists in rules.avoid`
throw new PentestError(
`Conflicting rule found: rules.focus[${index}] '${rule.url_path}' also exists in rules.avoid`,
'config',
false,
{ field: `rules.focus[${index}]`, urlPath: rule.url_path },
ErrorCode.CONFIG_VALIDATION_FAILED
);
}
});
@@ -347,7 +581,7 @@ const sanitizeAuthentication = (auth: Authentication): Authentication => {
password: auth.credentials.password,
...(auth.credentials.totp_secret && { totp_secret: auth.credentials.totp_secret.trim() }),
},
login_flow: auth.login_flow.map((step) => step.trim()),
...(auth.login_flow && { login_flow: auth.login_flow.map((step) => step.trim()) }),
success_condition: {
type: auth.success_condition.type.toLowerCase().trim() as Authentication['success_condition']['type'],
value: auth.success_condition.value.trim(),

View File

@@ -7,7 +7,7 @@
import { path, fs } from 'zx';
import chalk from 'chalk';
import { validateQueueAndDeliverable, type VulnType } from './queue-validation.js';
import type { AgentName, PromptName, PlaywrightAgent, AgentValidator } from './types/agents.js';
import type { AgentName, PlaywrightAgent, AgentValidator } from './types/agents.js';
// Factory function for vulnerability queue validators
function createVulnValidator(vulnType: VulnType): AgentValidator {
@@ -32,7 +32,8 @@ function createExploitValidator(vulnType: VulnType): AgentValidator {
}
// MCP agent mapping - assigns each agent to a specific Playwright instance to prevent conflicts
export const MCP_AGENT_MAPPING: Record<PromptName, PlaywrightAgent> = Object.freeze({
// Keys are promptTemplate values from AGENTS registry (session-manager.ts)
export const MCP_AGENT_MAPPING: Record<string, PlaywrightAgent> = Object.freeze({
// Phase 1: Pre-reconnaissance (actual prompt name is 'pre-recon-code')
// NOTE: Pre-recon is pure code analysis and doesn't use browser automation,
// but assigning MCP server anyway for consistency and future extensibility

View File

@@ -4,11 +4,16 @@
// it under the terms of the GNU Affero General Public License version 3
// as published by the Free Software Foundation.
import type {
PentestErrorType,
PentestErrorContext,
PromptErrorResult,
import {
ErrorCode,
type PentestErrorType,
type PentestErrorContext,
type PromptErrorResult,
} from './types/errors.js';
import {
matchesBillingApiPattern,
matchesBillingTextPattern,
} from './utils/billing-detection.js';
// Custom error class for pentest operations
export class PentestError extends Error {
@@ -17,18 +22,24 @@ export class PentestError extends Error {
retryable: boolean;
context: PentestErrorContext;
timestamp: string;
/** Optional specific error code for reliable classification */
code?: ErrorCode;
constructor(
message: string,
type: PentestErrorType,
retryable: boolean = false,
context: PentestErrorContext = {}
context: PentestErrorContext = {},
code?: ErrorCode
) {
super(message);
this.type = type;
this.retryable = retryable;
this.context = context;
this.timestamp = new Date().toISOString();
if (code !== undefined) {
this.code = code;
}
}
}
@@ -102,6 +113,53 @@ export function isRetryableError(error: Error): boolean {
return RETRYABLE_PATTERNS.some((pattern) => message.includes(pattern));
}
/**
* Classifies errors by ErrorCode for reliable, code-based classification.
* Used when error is a PentestError with a specific ErrorCode.
*/
function classifyByErrorCode(
code: ErrorCode,
retryableFromError: boolean
): { type: string; retryable: boolean } {
switch (code) {
// Billing errors - retryable (wait for cap reset or credits added)
case ErrorCode.SPENDING_CAP_REACHED:
case ErrorCode.INSUFFICIENT_CREDITS:
return { type: 'BillingError', retryable: true };
case ErrorCode.API_RATE_LIMITED:
return { type: 'RateLimitError', retryable: true };
// Config errors - non-retryable (need manual fix)
case ErrorCode.CONFIG_NOT_FOUND:
case ErrorCode.CONFIG_VALIDATION_FAILED:
case ErrorCode.CONFIG_PARSE_ERROR:
return { type: 'ConfigurationError', retryable: false };
// Prompt errors - non-retryable (need manual fix)
case ErrorCode.PROMPT_LOAD_FAILED:
return { type: 'ConfigurationError', retryable: false };
// Git errors - non-retryable (indicates workspace corruption)
case ErrorCode.GIT_CHECKPOINT_FAILED:
case ErrorCode.GIT_ROLLBACK_FAILED:
return { type: 'GitError', retryable: false };
// Validation errors - retryable (agent may succeed on retry)
case ErrorCode.OUTPUT_VALIDATION_FAILED:
case ErrorCode.DELIVERABLE_NOT_FOUND:
return { type: 'OutputValidationError', retryable: true };
// Agent execution - use the retryable flag from the error
case ErrorCode.AGENT_EXECUTION_FAILED:
return { type: 'AgentExecutionError', retryable: retryableFromError };
default:
// Unknown code - fall through to string matching
return { type: 'UnknownError', retryable: retryableFromError };
}
}
/**
* Classifies errors for Temporal workflow retry behavior.
* Returns error type and whether Temporal should retry.
@@ -109,31 +167,25 @@ export function isRetryableError(error: Error): boolean {
* Used by activities to wrap errors in ApplicationFailure:
* - Retryable errors: Temporal retries with configured backoff
* - Non-retryable errors: Temporal fails immediately
*
* Classification priority:
* 1. If error is PentestError with ErrorCode, classify by code (reliable)
* 2. Fall through to string matching for external errors (SDK, network, etc.)
*/
export function classifyErrorForTemporal(error: unknown): { type: string; retryable: boolean } {
// === CODE-BASED CLASSIFICATION (Preferred for internal errors) ===
if (error instanceof PentestError && error.code !== undefined) {
return classifyByErrorCode(error.code, error.retryable);
}
// === STRING-BASED CLASSIFICATION (Fallback for external errors) ===
const message = (error instanceof Error ? error.message : String(error)).toLowerCase();
// === BILLING ERRORS (Retryable with long backoff) ===
// Anthropic returns billing as 400 invalid_request_error
// Human can add credits OR wait for spending cap to reset (5-30 min backoff)
if (
message.includes('billing_error') ||
message.includes('credit balance is too low') ||
message.includes('insufficient credits') ||
message.includes('usage is blocked due to insufficient credits') ||
message.includes('please visit plans & billing') ||
message.includes('please visit plans and billing') ||
message.includes('usage limit reached') ||
message.includes('quota exceeded') ||
message.includes('daily rate limit') ||
message.includes('limit will reset') ||
// Claude Code spending cap patterns (returns short message instead of error)
message.includes('spending cap') ||
message.includes('spending limit') ||
message.includes('cap reached') ||
message.includes('budget exceeded') ||
message.includes('billing limit reached')
) {
// Check both API patterns and text patterns for comprehensive detection
if (matchesBillingApiPattern(message) || matchesBillingTextPattern(message)) {
return { type: 'BillingError', retryable: true };
}

View File

@@ -7,6 +7,7 @@
import { fs, path } from 'zx';
import chalk from 'chalk';
import { PentestError } from '../error-handling.js';
import { ErrorCode } from '../types/errors.js';
interface DeliverableFile {
name: string;
@@ -34,7 +35,13 @@ export async function assembleFinalReport(sourceDir: string): Promise<string> {
sections.push(content);
console.log(chalk.green(`✅ Added ${file.name} findings`));
} else if (file.required) {
throw new Error(`Required file ${file.path} not found`);
throw new PentestError(
`Required deliverable file not found: ${file.path}`,
'filesystem',
false,
{ deliverableFile: file.path, sourceDir },
ErrorCode.DELIVERABLE_NOT_FOUND
);
} else {
console.log(chalk.gray(`⏭️ No ${file.name} deliverable found`));
}

View File

@@ -6,6 +6,8 @@
import { fs, path } from 'zx';
import { PentestError } from './error-handling.js';
import { ErrorCode } from './types/errors.js';
import { type Result, ok, err } from './types/result.js';
import { asyncPipe } from './utils/functional.js';
export type VulnType = 'injection' | 'xss' | 'auth' | 'ssrf' | 'authz';
@@ -67,11 +69,10 @@ export interface ExploitationDecision {
vulnType: VulnType;
}
export interface SafeValidationResult {
success: boolean;
data?: ExploitationDecision;
error?: PentestError;
}
/**
* Result type for safe validation - explicit error handling.
*/
export type SafeValidationResult = Result<ExploitationDecision, PentestError>;
// Vulnerability type configuration as immutable data
const VULN_TYPE_CONFIG: VulnTypeConfig = Object.freeze({
@@ -196,7 +197,8 @@ const validateExistenceRules = (
deliverablePath: pathsWithExistence.deliverable,
queuePath: pathsWithExistence.queue,
existence,
}
},
ErrorCode.DELIVERABLE_NOT_FOUND
),
};
}
@@ -311,15 +313,18 @@ export async function validateQueueAndDeliverable(
);
}
// Pure function to safely validate (returns result instead of throwing)
export const safeValidateQueueAndDeliverable = async (
/**
* Safely validate queue and deliverable files.
* Returns Result<ExploitationDecision, PentestError> for explicit error handling.
*/
export async function validateQueueSafe(
vulnType: VulnType,
sourceDir: string
): Promise<SafeValidationResult> => {
): Promise<SafeValidationResult> {
try {
const result = await validateQueueAndDeliverable(vulnType, sourceDir);
return { success: true, data: result };
return ok(result);
} catch (error) {
return { success: false, error: error as PentestError };
return err(error as PentestError);
}
};
}

View File

@@ -0,0 +1,278 @@
// Copyright (C) 2025 Keygraph, Inc.
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License version 3
// as published by the Free Software Foundation.
/**
* Agent Execution Service
*
* Handles the full agent lifecycle:
* - Load config via ConfigLoaderService
* - Load prompt template using AGENTS[agentName].promptTemplate
* - Create git checkpoint
* - Start audit logging
* - Invoke Claude SDK via runClaudePrompt
* - Spending cap check using isSpendingCapBehavior
* - Handle failure (rollback, audit)
* - Validate output using AGENTS[agentName].deliverableFilename
* - Commit on success, log metrics
*
* No Temporal dependencies - pure domain logic.
*/
import chalk from 'chalk';
import { Result, ok, err, isErr } from '../types/result.js';
import { ErrorCode } from '../types/errors.js';
import { PentestError } from '../error-handling.js';
import { isSpendingCapBehavior } from '../utils/billing-detection.js';
import { AGENTS } from '../session-manager.js';
import { loadPrompt } from '../prompts/prompt-manager.js';
import {
runClaudePrompt,
validateAgentOutput,
type ClaudePromptResult,
} from '../ai/claude-executor.js';
import {
createGitCheckpoint,
commitGitSuccess,
rollbackGitWorkspace,
getGitCommitHash,
} from '../utils/git-manager.js';
import { AuditSession } from '../audit/index.js';
import type { AgentEndResult } from '../types/audit.js';
import type { AgentName } from '../types/agents.js';
import type { ConfigLoaderService } from './config-loader.js';
import type { AgentMetrics } from '../types/metrics.js';
/**
* Input for agent execution.
*/
export interface AgentExecutionInput {
webUrl: string;
repoPath: string;
configPath?: string | undefined;
pipelineTestingMode?: boolean | undefined;
attemptNumber: number;
}
/**
* Service for executing agents with full lifecycle management.
*
* NOTE: AuditSession is passed per-execution, NOT stored on the service.
* This is critical for parallel agent execution - each agent needs its own
* AuditSession instance because AuditSession uses instance state (currentAgentName)
* to track which agent is currently logging.
*/
export class AgentExecutionService {
private readonly configLoader: ConfigLoaderService;
constructor(configLoader: ConfigLoaderService) {
this.configLoader = configLoader;
}
/**
* Execute an agent with full lifecycle management.
*
* @param agentName - Name of the agent to execute
* @param input - Execution input parameters
* @param auditSession - Audit session for this specific agent execution
* @returns Result containing AgentEndResult on success, PentestError on failure
*/
async execute(
agentName: AgentName,
input: AgentExecutionInput,
auditSession: AuditSession
): Promise<Result<AgentEndResult, PentestError>> {
const { webUrl, repoPath, configPath, pipelineTestingMode = false, attemptNumber } = input;
// 1. Load config (if provided)
const configResult = await this.configLoader.loadOptional(configPath);
if (isErr(configResult)) {
return configResult;
}
const distributedConfig = configResult.value;
// 2. Load prompt
const promptTemplate = AGENTS[agentName].promptTemplate;
let prompt: string;
try {
prompt = await loadPrompt(
promptTemplate,
{ webUrl, repoPath },
distributedConfig,
pipelineTestingMode
);
} catch (error) {
const errorMessage = error instanceof Error ? error.message : String(error);
return err(
new PentestError(
`Failed to load prompt for ${agentName}: ${errorMessage}`,
'prompt',
false,
{ agentName, promptTemplate, originalError: errorMessage },
ErrorCode.PROMPT_LOAD_FAILED
)
);
}
// 3. Create git checkpoint before execution
try {
await createGitCheckpoint(repoPath, agentName, attemptNumber);
} catch (error) {
const errorMessage = error instanceof Error ? error.message : String(error);
return err(
new PentestError(
`Failed to create git checkpoint for ${agentName}: ${errorMessage}`,
'filesystem',
false,
{ agentName, repoPath, originalError: errorMessage },
ErrorCode.GIT_CHECKPOINT_FAILED
)
);
}
// 4. Start audit logging
await auditSession.startAgent(agentName, prompt, attemptNumber);
// 5. Execute agent
const result: ClaudePromptResult = await runClaudePrompt(
prompt,
repoPath,
'', // context
agentName, // description
agentName,
chalk.cyan,
auditSession
);
// 6. Spending cap check - defense-in-depth
if (result.success && (result.turns ?? 0) <= 2 && (result.cost || 0) === 0) {
const resultText = result.result || '';
if (isSpendingCapBehavior(result.turns ?? 0, result.cost || 0, resultText)) {
await rollbackGitWorkspace(repoPath, 'spending cap detected');
const endResult: AgentEndResult = {
attemptNumber,
duration_ms: result.duration,
cost_usd: 0,
success: false,
model: result.model,
error: `Spending cap likely reached: ${resultText.slice(0, 100)}`,
};
await auditSession.endAgent(agentName, endResult);
return err(
new PentestError(
`Spending cap likely reached: ${resultText.slice(0, 100)}`,
'billing',
true, // Retryable with long backoff
{ agentName, turns: result.turns, cost: result.cost },
ErrorCode.SPENDING_CAP_REACHED
)
);
}
}
// 7. Handle execution failure
if (!result.success) {
await rollbackGitWorkspace(repoPath, 'execution failure');
const endResult: AgentEndResult = {
attemptNumber,
duration_ms: result.duration,
cost_usd: result.cost || 0,
success: false,
model: result.model,
error: result.error || 'Execution failed',
};
await auditSession.endAgent(agentName, endResult);
return err(
new PentestError(
result.error || 'Agent execution failed',
'validation',
result.retryable ?? true,
{ agentName, originalError: result.error },
ErrorCode.AGENT_EXECUTION_FAILED
)
);
}
// 8. Validate output
const validationPassed = await validateAgentOutput(result, agentName, repoPath);
if (!validationPassed) {
await rollbackGitWorkspace(repoPath, 'validation failure');
const endResult: AgentEndResult = {
attemptNumber,
duration_ms: result.duration,
cost_usd: result.cost || 0,
success: false,
model: result.model,
error: 'Output validation failed',
};
await auditSession.endAgent(agentName, endResult);
return err(
new PentestError(
`Agent ${agentName} failed output validation`,
'validation',
true, // Retryable - agent may succeed on retry
{ agentName, deliverableFilename: AGENTS[agentName].deliverableFilename },
ErrorCode.OUTPUT_VALIDATION_FAILED
)
);
}
// 9. Success - commit deliverables, then capture checkpoint hash
await commitGitSuccess(repoPath, agentName);
const commitHash = await getGitCommitHash(repoPath);
const endResult: AgentEndResult = {
attemptNumber,
duration_ms: result.duration,
cost_usd: result.cost || 0,
success: true,
model: result.model,
...(commitHash && { checkpoint: commitHash }),
};
await auditSession.endAgent(agentName, endResult);
return ok(endResult);
}
/**
* Execute an agent, throwing PentestError on failure.
*
* This is the preferred method for Temporal activities, which need to
* catch errors and classify them into ApplicationFailure. Avoids requiring
* activities to import Result utilities, keeping the boundary clean.
*
* @param agentName - Name of the agent to execute
* @param input - Execution input parameters
* @param auditSession - Audit session for this specific agent execution
* @returns AgentEndResult on success
* @throws PentestError on failure
*/
async executeOrThrow(
agentName: AgentName,
input: AgentExecutionInput,
auditSession: AuditSession
): Promise<AgentEndResult> {
const result = await this.execute(agentName, input, auditSession);
if (isErr(result)) {
throw result.error;
}
return result.value;
}
/**
* Convert AgentEndResult to AgentMetrics for workflow state.
*/
static toMetrics(endResult: AgentEndResult, result: ClaudePromptResult): AgentMetrics {
return {
durationMs: endResult.duration_ms,
inputTokens: null, // Not currently exposed by SDK wrapper
outputTokens: null,
costUsd: endResult.cost_usd,
numTurns: result.turns ?? null,
model: result.model,
};
}
}

View File

@@ -0,0 +1,75 @@
// Copyright (C) 2025 Keygraph, Inc.
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License version 3
// as published by the Free Software Foundation.
/**
* Config Loader Service
*
* Wraps parseConfig + distributeConfig with Result type for explicit error handling.
* Pure service with no Temporal dependencies.
*/
import { parseConfig, distributeConfig } from '../config-parser.js';
import { PentestError } from '../error-handling.js';
import { Result, ok, err } from '../types/result.js';
import { ErrorCode } from '../types/errors.js';
import type { DistributedConfig } from '../types/config.js';
/**
* Service for loading and distributing configuration files.
*
* Provides a Result-based API for explicit error handling,
* allowing callers to decide how to handle failures.
*/
export class ConfigLoaderService {
/**
* Load and distribute a configuration file.
*
* @param configPath - Path to the YAML configuration file
* @returns Result containing DistributedConfig on success, PentestError on failure
*/
async load(configPath: string): Promise<Result<DistributedConfig, PentestError>> {
try {
const config = await parseConfig(configPath);
const distributed = distributeConfig(config);
return ok(distributed);
} catch (error) {
const errorMessage = error instanceof Error ? error.message : String(error);
// Determine appropriate error code based on error message
let errorCode = ErrorCode.CONFIG_PARSE_ERROR;
if (errorMessage.includes('not found') || errorMessage.includes('ENOENT')) {
errorCode = ErrorCode.CONFIG_NOT_FOUND;
} else if (errorMessage.includes('validation failed')) {
errorCode = ErrorCode.CONFIG_VALIDATION_FAILED;
}
return err(
new PentestError(
`Failed to load config ${configPath}: ${errorMessage}`,
'config',
false,
{ configPath, originalError: errorMessage },
errorCode
)
);
}
}
/**
* Load config if path is provided, otherwise return null config.
*
* @param configPath - Optional path to the YAML configuration file
* @returns Result containing DistributedConfig (or null) on success, PentestError on failure
*/
async loadOptional(
configPath: string | undefined
): Promise<Result<DistributedConfig | null, PentestError>> {
if (!configPath) {
return ok(null);
}
return this.load(configPath);
}
}

117
src/services/container.ts Normal file
View File

@@ -0,0 +1,117 @@
// Copyright (C) 2025 Keygraph, Inc.
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License version 3
// as published by the Free Software Foundation.
/**
* Dependency Injection Container
*
* Provides a per-workflow container for service instances.
* Services are wired with explicit constructor injection.
*
* Usage:
* const container = getOrCreateContainer(workflowId, sessionMetadata);
* const auditSession = new AuditSession(sessionMetadata); // Per-agent
* await auditSession.initialize(workflowId);
* const result = await container.agentExecution.executeOrThrow(agentName, input, auditSession);
*/
import type { SessionMetadata } from '../audit/utils.js';
import { AgentExecutionService } from './agent-execution.js';
import { ConfigLoaderService } from './config-loader.js';
import { ExploitationCheckerService } from './exploitation-checker.js';
/**
* Dependencies required to create a Container.
*
* NOTE: AuditSession is NOT stored in the container.
* Each agent execution receives its own AuditSession instance
* because AuditSession uses instance state (currentAgentName) that
* cannot be shared across parallel agents.
*/
export interface ContainerDependencies {
readonly sessionMetadata: SessionMetadata;
}
/**
* DI Container for a single workflow.
*
* Holds all service instances for the workflow lifecycle.
* Services are instantiated once and reused across agent executions.
*
* NOTE: AuditSession is NOT stored here - it's passed per agent execution
* to support parallel agents each having their own logging context.
*/
export class Container {
readonly sessionMetadata: SessionMetadata;
readonly agentExecution: AgentExecutionService;
readonly configLoader: ConfigLoaderService;
readonly exploitationChecker: ExploitationCheckerService;
constructor(deps: ContainerDependencies) {
this.sessionMetadata = deps.sessionMetadata;
// Wire services with explicit constructor injection
this.configLoader = new ConfigLoaderService();
this.exploitationChecker = new ExploitationCheckerService();
this.agentExecution = new AgentExecutionService(this.configLoader);
}
}
/**
* Map of workflowId to Container instance.
* Each workflow gets its own container scoped to its lifecycle.
*/
const containers = new Map<string, Container>();
/**
* Get or create a Container for a workflow.
*
* If a container already exists for the workflowId, returns it.
* Otherwise, creates a new container with the provided dependencies.
*
* @param workflowId - Unique workflow identifier
* @param sessionMetadata - Session metadata for audit paths
* @returns Container instance for the workflow
*/
export function getOrCreateContainer(
workflowId: string,
sessionMetadata: SessionMetadata
): Container {
let container = containers.get(workflowId);
if (!container) {
container = new Container({ sessionMetadata });
containers.set(workflowId, container);
}
return container;
}
/**
* Remove a Container when a workflow completes.
*
* Should be called in logWorkflowComplete to clean up resources.
*
* @param workflowId - Unique workflow identifier
*/
export function removeContainer(workflowId: string): void {
containers.delete(workflowId);
}
/**
* Get an existing Container for a workflow, if one exists.
*
* Unlike getOrCreateContainer, this does NOT create a new container.
* Returns undefined if no container exists for the workflowId.
*
* Useful for lightweight activities that can benefit from an existing
* container but don't need to create one.
*
* @param workflowId - Unique workflow identifier
* @returns Container instance or undefined
*/
export function getContainer(workflowId: string): Container | undefined {
return containers.get(workflowId);
}

View File

@@ -0,0 +1,74 @@
// Copyright (C) 2025 Keygraph, Inc.
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License version 3
// as published by the Free Software Foundation.
/**
* Exploitation Checker Service
*
* Pure domain logic for determining whether exploitation should run.
* Reads queue file, parses JSON, returns decision.
*
* No Temporal dependencies - this is pure business logic.
*/
import chalk from 'chalk';
import {
validateQueueSafe,
type VulnType,
type ExploitationDecision,
} from '../queue-validation.js';
import { isOk } from '../types/result.js';
/**
* Service for checking exploitation queue decisions.
*
* Determines whether an exploit agent should run based on
* the vulnerability analysis deliverables and queue files.
*/
export class ExploitationCheckerService {
/**
* Check if exploitation should run for a given vulnerability type.
*
* Reads the vulnerability queue file and returns the decision.
* This is pure domain logic - reads queue file, parses JSON, returns decision.
*
* @param vulnType - Type of vulnerability (injection, xss, auth, ssrf, authz)
* @param repoPath - Path to the repository containing deliverables
* @returns ExploitationDecision indicating whether to exploit
* @throws PentestError if validation fails and is retryable
*/
async checkQueue(vulnType: VulnType, repoPath: string): Promise<ExploitationDecision> {
const result = await validateQueueSafe(vulnType, repoPath);
if (isOk(result)) {
const decision = result.value;
console.log(
chalk.blue(
` ${vulnType}: ${decision.shouldExploit ? `${decision.vulnerabilityCount} vulnerabilities found` : 'no vulnerabilities, skipping exploitation'}`
)
);
return decision;
}
// Validation failed - check if we should retry or skip
const error = result.error;
if (error.retryable) {
// Re-throw retryable errors so caller can handle retry
console.log(chalk.yellow(` ${vulnType}: ${error.message} (retryable)`));
throw error;
}
// Non-retryable error - skip exploitation gracefully
console.log(
chalk.yellow(` ${vulnType}: ${error.message}, skipping exploitation`)
);
return {
shouldExploit: false,
shouldRetry: false,
vulnerabilityCount: 0,
vulnType,
};
}
}

20
src/services/index.ts Normal file
View File

@@ -0,0 +1,20 @@
// Copyright (C) 2025 Keygraph, Inc.
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License version 3
// as published by the Free Software Foundation.
/**
* Services Module
*
* Exports DI container and service classes for Shannon agent execution.
* Services are pure domain logic with no Temporal dependencies.
*/
export { Container, getOrCreateContainer, removeContainer } from './container.js';
export type { ContainerDependencies } from './container.js';
export { ConfigLoaderService } from './config-loader.js';
export { ExploitationCheckerService } from './exploitation-checker.js';
export { AgentExecutionService } from './agent-execution.js';
export type { AgentExecutionInput } from './agent-execution.js';

View File

@@ -7,72 +7,99 @@
import type { AgentName, AgentDefinition } from './types/index.js';
// Agent definitions according to PRD
// NOTE: deliverableFilename values must match mcp-server/src/types/deliverables.ts:DELIVERABLE_FILENAMES
export const AGENTS: Readonly<Record<AgentName, AgentDefinition>> = Object.freeze({
'pre-recon': {
name: 'pre-recon',
displayName: 'Pre-recon agent',
prerequisites: []
prerequisites: [],
promptTemplate: 'pre-recon-code',
deliverableFilename: 'code_analysis_deliverable.md',
},
'recon': {
name: 'recon',
displayName: 'Recon agent',
prerequisites: ['pre-recon']
prerequisites: ['pre-recon'],
promptTemplate: 'recon',
deliverableFilename: 'recon_deliverable.md',
},
'injection-vuln': {
name: 'injection-vuln',
displayName: 'Injection vuln agent',
prerequisites: ['recon']
prerequisites: ['recon'],
promptTemplate: 'vuln-injection',
deliverableFilename: 'injection_analysis_deliverable.md',
},
'xss-vuln': {
name: 'xss-vuln',
displayName: 'XSS vuln agent',
prerequisites: ['recon']
prerequisites: ['recon'],
promptTemplate: 'vuln-xss',
deliverableFilename: 'xss_analysis_deliverable.md',
},
'auth-vuln': {
name: 'auth-vuln',
displayName: 'Auth vuln agent',
prerequisites: ['recon']
prerequisites: ['recon'],
promptTemplate: 'vuln-auth',
deliverableFilename: 'auth_analysis_deliverable.md',
},
'ssrf-vuln': {
name: 'ssrf-vuln',
displayName: 'SSRF vuln agent',
prerequisites: ['recon']
prerequisites: ['recon'],
promptTemplate: 'vuln-ssrf',
deliverableFilename: 'ssrf_analysis_deliverable.md',
},
'authz-vuln': {
name: 'authz-vuln',
displayName: 'Authz vuln agent',
prerequisites: ['recon']
prerequisites: ['recon'],
promptTemplate: 'vuln-authz',
deliverableFilename: 'authz_analysis_deliverable.md',
},
'injection-exploit': {
name: 'injection-exploit',
displayName: 'Injection exploit agent',
prerequisites: ['injection-vuln']
prerequisites: ['injection-vuln'],
promptTemplate: 'exploit-injection',
deliverableFilename: 'injection_exploitation_evidence.md',
},
'xss-exploit': {
name: 'xss-exploit',
displayName: 'XSS exploit agent',
prerequisites: ['xss-vuln']
prerequisites: ['xss-vuln'],
promptTemplate: 'exploit-xss',
deliverableFilename: 'xss_exploitation_evidence.md',
},
'auth-exploit': {
name: 'auth-exploit',
displayName: 'Auth exploit agent',
prerequisites: ['auth-vuln']
prerequisites: ['auth-vuln'],
promptTemplate: 'exploit-auth',
deliverableFilename: 'auth_exploitation_evidence.md',
},
'ssrf-exploit': {
name: 'ssrf-exploit',
displayName: 'SSRF exploit agent',
prerequisites: ['ssrf-vuln']
prerequisites: ['ssrf-vuln'],
promptTemplate: 'exploit-ssrf',
deliverableFilename: 'ssrf_exploitation_evidence.md',
},
'authz-exploit': {
name: 'authz-exploit',
displayName: 'Authz exploit agent',
prerequisites: ['authz-vuln']
prerequisites: ['authz-vuln'],
promptTemplate: 'exploit-authz',
deliverableFilename: 'authz_exploitation_evidence.md',
},
'report': {
name: 'report',
displayName: 'Report agent',
prerequisites: ['injection-exploit', 'xss-exploit', 'auth-exploit', 'ssrf-exploit', 'authz-exploit']
}
prerequisites: ['injection-exploit', 'xss-exploit', 'auth-exploit', 'ssrf-exploit', 'authz-exploit'],
promptTemplate: 'report-executive',
deliverableFilename: 'comprehensive_security_assessment_report.md',
},
});
// Phase names for metrics aggregation

View File

@@ -7,28 +7,57 @@
/**
* Temporal activities for Shannon agent execution.
*
* Each activity wraps a single agent execution with:
* Each activity wraps service calls with Temporal-specific concerns:
* - Heartbeat loop (2s interval) to signal worker liveness
* - Git checkpoint/rollback/commit per attempt
* - Error classification for Temporal retry behavior
* - Audit session logging
* - Error classification into ApplicationFailure
* - Container lifecycle management
*
* Temporal handles retries based on error classification:
* - Retryable: BillingError, TransientError (429, 5xx, network)
* - Non-retryable: AuthenticationError, PermissionError, ConfigurationError, etc.
* Business logic is delegated to services in src/services/.
*/
import { heartbeat, ApplicationFailure, Context } from '@temporalio/activity';
import chalk from 'chalk';
import path from 'path';
import fs from 'fs/promises';
import { classifyErrorForTemporal, PentestError } from '../error-handling.js';
import { ErrorCode } from '../types/errors.js';
import { getOrCreateContainer, getContainer, removeContainer } from '../services/container.js';
import { ExploitationCheckerService } from '../services/exploitation-checker.js';
import type { VulnType, ExploitationDecision } from '../queue-validation.js';
import { AuditSession } from '../audit/index.js';
import type { WorkflowSummary } from '../audit/workflow-logger.js';
import type { AgentName } from '../types/agents.js';
import { ALL_AGENTS } from '../types/agents.js';
import type { AgentMetrics, ResumeState } from './shared.js';
import { copyDeliverablesToAudit, type SessionMetadata, readJson, fileExists } from '../audit/utils.js';
import { assembleFinalReport, injectModelIntoReport } from '../phases/reporting.js';
import { AGENTS } from '../session-manager.js';
import { executeGitCommandWithRetry } from '../utils/git-manager.js';
import type { ResumeAttempt } from '../audit/metrics-tracker.js';
// Max lengths to prevent Temporal protobuf buffer overflow
const MAX_ERROR_MESSAGE_LENGTH = 2000;
const MAX_STACK_TRACE_LENGTH = 1000;
// Max retries for output validation errors (agent didn't save deliverables)
// Lower than default 50 since this is unlikely to self-heal
const MAX_OUTPUT_VALIDATION_RETRIES = 3;
const HEARTBEAT_INTERVAL_MS = 2000;
/**
* Input for all agent activities.
*/
export interface ActivityInput {
webUrl: string;
repoPath: string;
configPath?: string;
outputPath?: string;
pipelineTestingMode?: boolean;
workflowId: string;
sessionId: string;
}
/**
* Truncate error message to prevent buffer overflow in Temporal serialization.
*/
@@ -48,85 +77,34 @@ function truncateStackTrace(failure: ApplicationFailure): void {
}
}
import {
runClaudePrompt,
validateAgentOutput,
type ClaudePromptResult,
} from '../ai/claude-executor.js';
import { loadPrompt } from '../prompts/prompt-manager.js';
import { parseConfig, distributeConfig } from '../config-parser.js';
import { classifyErrorForTemporal } from '../error-handling.js';
import {
safeValidateQueueAndDeliverable,
type VulnType,
type ExploitationDecision,
} from '../queue-validation.js';
import {
createGitCheckpoint,
commitGitSuccess,
rollbackGitWorkspace,
getGitCommitHash,
} from '../utils/git-manager.js';
import { assembleFinalReport, injectModelIntoReport } from '../phases/reporting.js';
import { getPromptNameForAgent } from '../types/agents.js';
import { AuditSession } from '../audit/index.js';
import type { WorkflowSummary } from '../audit/workflow-logger.js';
import type { AgentName } from '../types/agents.js';
import { getDeliverablePath, ALL_AGENTS } from '../types/agents.js';
import type { AgentMetrics, ResumeState } from './shared.js';
import type { DistributedConfig } from '../types/config.js';
import { copyDeliverablesToAudit, type SessionMetadata, readJson, fileExists } from '../audit/utils.js';
import type { ResumeAttempt } from '../audit/metrics-tracker.js';
import { executeGitCommandWithRetry } from '../utils/git-manager.js';
import path from 'path';
import fs from 'fs/promises';
const HEARTBEAT_INTERVAL_MS = 2000; // Must be < heartbeatTimeout (10min production, 5min testing)
/**
* Input for all agent activities.
* Matches PipelineInput but with required workflowId for audit correlation.
* Build SessionMetadata from ActivityInput.
*/
export interface ActivityInput {
webUrl: string;
repoPath: string;
configPath?: string;
outputPath?: string;
pipelineTestingMode?: boolean;
workflowId: string;
sessionId: string; // Workspace name (for resume) or workflowId (for new runs)
function buildSessionMetadata(input: ActivityInput): SessionMetadata {
const { webUrl, repoPath, outputPath, sessionId } = input;
return {
id: sessionId,
webUrl,
repoPath,
...(outputPath && { outputPath }),
};
}
/**
* Core activity implementation.
* Core activity implementation using services.
*
* Executes a single agent with:
* 1. Heartbeat loop for worker liveness
* 2. Config loading (if configPath provided)
* 3. Audit session initialization
* 4. Prompt loading
* 5. Git checkpoint before execution
* 6. Agent execution (single attempt)
* 7. Output validation
* 8. Git commit on success, rollback on failure
* 9. Error classification for Temporal retry
* 2. Container creation/reuse
* 3. Service-based agent execution
* 4. Error classification for Temporal retry
*/
async function runAgentActivity(
agentName: AgentName,
input: ActivityInput
): Promise<AgentMetrics> {
const {
webUrl,
repoPath,
configPath,
outputPath,
pipelineTestingMode = false,
workflowId,
} = input;
const { repoPath, configPath, pipelineTestingMode = false, workflowId, webUrl } = input;
const startTime = Date.now();
// Get attempt number from Temporal context (tracks retries automatically)
const attemptNumber = Context.current().info.attempt;
// Heartbeat loop - signals worker is alive to Temporal server
@@ -136,158 +114,63 @@ async function runAgentActivity(
}, HEARTBEAT_INTERVAL_MS);
try {
// 1. Load config (if provided)
let distributedConfig: DistributedConfig | null = null;
if (configPath) {
try {
const config = await parseConfig(configPath);
distributedConfig = distributeConfig(config);
} catch (err) {
throw new Error(`Failed to load config ${configPath}: ${err instanceof Error ? err.message : String(err)}`);
}
}
// Build session metadata and get/create container
const sessionMetadata = buildSessionMetadata(input);
const container = getOrCreateContainer(workflowId, sessionMetadata);
// 2. Build session metadata for audit
// Use sessionId (workspace name) for directory, workflowId for tracking
const sessionMetadata: SessionMetadata = {
id: input.sessionId,
webUrl,
repoPath,
...(outputPath && { outputPath }),
};
// 3. Initialize audit session (idempotent, safe across retries)
// Create audit session for THIS agent execution
// NOTE: Each agent needs its own AuditSession because AuditSession uses
// instance state (currentAgentName) that cannot be shared across parallel agents
const auditSession = new AuditSession(sessionMetadata);
await auditSession.initialize(workflowId);
// 4. Load prompt
const promptName = getPromptNameForAgent(agentName);
const prompt = await loadPrompt(
promptName,
{ webUrl, repoPath },
distributedConfig,
pipelineTestingMode
);
// 5. Create git checkpoint before execution
await createGitCheckpoint(repoPath, agentName, attemptNumber);
await auditSession.startAgent(agentName, prompt, attemptNumber);
// 6. Execute agent (single attempt - Temporal handles retries)
const result: ClaudePromptResult = await runClaudePrompt(
prompt,
repoPath,
'', // context
agentName, // description
// Execute agent via service (throws PentestError on failure)
const endResult = await container.agentExecution.executeOrThrow(
agentName,
chalk.cyan,
{
webUrl,
repoPath,
configPath,
pipelineTestingMode,
attemptNumber,
},
auditSession
);
// 6.5. Sanity check: Detect spending cap that slipped through all detection layers
// Defense-in-depth: A successful agent execution should never have ≤2 turns with $0 cost
if (result.success && (result.turns ?? 0) <= 2 && (result.cost || 0) === 0) {
const resultText = result.result || '';
const looksLikeBillingError = /spending|cap|limit|budget|resets/i.test(resultText);
if (looksLikeBillingError) {
await rollbackGitWorkspace(repoPath, 'spending cap detected');
await auditSession.endAgent(agentName, {
attemptNumber,
duration_ms: result.duration,
cost_usd: 0,
success: false,
model: result.model,
error: `Spending cap likely reached: ${resultText.slice(0, 100)}`,
});
// Throw as billing error so Temporal retries with long backoff
throw new Error(`Spending cap likely reached: ${resultText.slice(0, 100)}`);
}
}
// 7. Handle execution failure
if (!result.success) {
await rollbackGitWorkspace(repoPath, 'execution failure');
await auditSession.endAgent(agentName, {
attemptNumber,
duration_ms: result.duration,
cost_usd: result.cost || 0,
success: false,
model: result.model,
error: result.error || 'Execution failed',
});
throw new Error(result.error || 'Agent execution failed');
}
// 8. Validate output
const validationPassed = await validateAgentOutput(result, agentName, repoPath);
if (!validationPassed) {
await rollbackGitWorkspace(repoPath, 'validation failure');
await auditSession.endAgent(agentName, {
attemptNumber,
duration_ms: result.duration,
cost_usd: result.cost || 0,
success: false,
model: result.model,
error: 'Output validation failed',
});
// Limit output validation retries (unlikely to self-heal)
if (attemptNumber >= MAX_OUTPUT_VALIDATION_RETRIES) {
throw ApplicationFailure.nonRetryable(
`Agent ${agentName} failed output validation after ${attemptNumber} attempts`,
'OutputValidationError',
[{ agentName, attemptNumber, elapsed: Date.now() - startTime }]
);
}
// Let Temporal retry (will be classified as OutputValidationError)
throw new Error(`Agent ${agentName} failed output validation`);
}
// 9. Success - commit deliverables, then capture checkpoint hash
await commitGitSuccess(repoPath, agentName);
const commitHash = await getGitCommitHash(repoPath);
await auditSession.endAgent(agentName, {
attemptNumber,
duration_ms: result.duration,
cost_usd: result.cost || 0,
success: true,
model: result.model,
...(commitHash && { checkpoint: commitHash }),
});
// 10. Return metrics
// Success - return metrics
return {
durationMs: Date.now() - startTime,
inputTokens: null, // Not currently exposed by SDK wrapper
inputTokens: null,
outputTokens: null,
costUsd: result.cost ?? null,
numTurns: result.turns ?? null,
model: result.model,
costUsd: endResult.cost_usd,
numTurns: null,
model: endResult.model,
};
} catch (error) {
// Rollback git workspace before Temporal retry to ensure clean state
try {
await rollbackGitWorkspace(repoPath, 'error recovery');
} catch (rollbackErr) {
// Log but don't fail - rollback is best-effort
console.error(`Failed to rollback git workspace for ${agentName}:`, rollbackErr);
}
// If error is already an ApplicationFailure (e.g., from our retry limit logic),
// re-throw it directly without re-classifying
// If error is already an ApplicationFailure, re-throw directly
if (error instanceof ApplicationFailure) {
throw error;
}
// Check if output validation retry limit reached (PentestError with code)
if (
error instanceof PentestError &&
error.code === ErrorCode.OUTPUT_VALIDATION_FAILED &&
attemptNumber >= MAX_OUTPUT_VALIDATION_RETRIES
) {
throw ApplicationFailure.nonRetryable(
`Agent ${agentName} failed output validation after ${attemptNumber} attempts`,
'OutputValidationError',
[{ agentName, attemptNumber, elapsed: Date.now() - startTime }]
);
}
// Classify error for Temporal retry behavior
const classified = classifyErrorForTemporal(error);
// Truncate message to prevent protobuf buffer overflow
const rawMessage = error instanceof Error ? error.message : String(error);
const message = truncateErrorMessage(rawMessage);
if (classified.retryable) {
// Temporal will retry with configured backoff
const failure = ApplicationFailure.create({
message,
type: classified.type,
@@ -296,7 +179,6 @@ async function runAgentActivity(
truncateStackTrace(failure);
throw failure;
} else {
// Fail immediately - no retry
const failure = ApplicationFailure.nonRetryable(message, classified.type, [
{ agentName, attemptNumber, elapsed: Date.now() - startTime },
]);
@@ -309,7 +191,6 @@ async function runAgentActivity(
}
// === Individual Agent Activity Exports ===
// Each function is a thin wrapper around runAgentActivity with the agent name.
export async function runPreReconAgent(input: ActivityInput): Promise<AgentMetrics> {
return runAgentActivity('pre-recon', input);
@@ -363,25 +244,24 @@ export async function runReportAgent(input: ActivityInput): Promise<AgentMetrics
return runAgentActivity('report', input);
}
// === Report Assembly Activities ===
/**
* Assemble the final report by concatenating exploitation evidence files.
* This must be called BEFORE runReportAgent to create the file that the report agent will modify.
*/
export async function assembleReportActivity(input: ActivityInput): Promise<void> {
const { repoPath } = input;
console.log(chalk.blue('📝 Assembling deliverables from specialist agents...'));
console.log(chalk.blue(' Assembling deliverables from specialist agents...'));
try {
await assembleFinalReport(repoPath);
} catch (error) {
const err = error as Error;
console.log(chalk.yellow(`⚠️ Error assembling final report: ${err.message}`));
// Don't throw - the report agent can still create content even if no exploitation files exist
console.log(chalk.yellow(` Warning: Error assembling final report: ${err.message}`));
}
}
/**
* Inject model metadata into the final report.
* This must be called AFTER runReportAgent to add the model information to the Executive Summary.
*/
export async function injectReportMetadataActivity(input: ActivityInput): Promise<void> {
const { repoPath, sessionId, outputPath } = input;
@@ -392,65 +272,33 @@ export async function injectReportMetadataActivity(input: ActivityInput): Promis
await injectModelIntoReport(repoPath, effectiveOutputPath);
} catch (error) {
const err = error as Error;
console.log(chalk.yellow(`⚠️ Error injecting model into report: ${err.message}`));
// Don't throw - this is a non-critical enhancement
console.log(chalk.yellow(` Warning: Error injecting model into report: ${err.message}`));
}
}
// === Exploitation Queue Check ===
/**
* Check if exploitation should run for a given vulnerability type.
* Reads the vulnerability queue file and returns the decision.
*
* This activity allows the workflow to skip exploit agents entirely
* when no vulnerabilities were found, saving API calls and time.
*
* Error handling:
* - Retryable errors (missing files, invalid JSON): re-throw for Temporal retry
* - Non-retryable errors: skip exploitation gracefully
* Uses existing container if available (from prior agent runs),
* otherwise creates service directly (stateless, no dependencies).
*/
export async function checkExploitationQueue(
input: ActivityInput,
vulnType: VulnType
): Promise<ExploitationDecision> {
const { repoPath } = input;
const { repoPath, workflowId } = input;
const result = await safeValidateQueueAndDeliverable(vulnType, repoPath);
// Reuse container's service if available (from prior vuln agent runs)
const existingContainer = getContainer(workflowId);
const checker = existingContainer?.exploitationChecker ?? new ExploitationCheckerService();
if (result.success && result.data) {
const { shouldExploit, vulnerabilityCount } = result.data;
console.log(
chalk.blue(
`🔍 ${vulnType}: ${shouldExploit ? `${vulnerabilityCount} vulnerabilities found` : 'no vulnerabilities, skipping exploitation'}`
)
);
return result.data;
}
// Validation failed - check if we should retry or skip
const error = result.error;
if (error?.retryable) {
// Re-throw retryable errors so Temporal can retry the vuln agent
console.log(chalk.yellow(`⚠️ ${vulnType}: ${error.message} (retrying)`));
throw error;
}
// Non-retryable error - skip exploitation gracefully
console.log(
chalk.yellow(`⚠️ ${vulnType}: ${error?.message ?? 'Unknown error'}, skipping exploitation`)
);
return {
shouldExploit: false,
shouldRetry: false,
vulnerabilityCount: 0,
vulnType,
};
return checker.checkQueue(vulnType, repoPath);
}
// === Resume Activities ===
/**
* Session.json structure for resume state loading
*/
interface SessionJson {
session: {
id: string;
@@ -460,18 +308,18 @@ interface SessionJson {
resumeAttempts?: ResumeAttempt[];
};
metrics: {
agents: Record<string, {
status: 'in-progress' | 'success' | 'failed';
checkpoint?: string;
}>;
agents: Record<
string,
{
status: 'in-progress' | 'success' | 'failed';
checkpoint?: string;
}
>;
};
}
/**
* Load resume state from an existing workspace.
* Validates workspace exists, URL matches, and determines which agents to skip.
*
* @throws ApplicationFailure.nonRetryable if workspace not found or URL mismatch
*/
export async function loadResumeState(
workspaceName: string,
@@ -480,7 +328,6 @@ export async function loadResumeState(
): Promise<ResumeState> {
const sessionPath = path.join('./audit-logs', workspaceName, 'session.json');
// Validate workspace exists
const exists = await fileExists(sessionPath);
if (!exists) {
throw ApplicationFailure.nonRetryable(
@@ -489,7 +336,6 @@ export async function loadResumeState(
);
}
// Load session.json
let session: SessionJson;
try {
session = await readJson<SessionJson>(sessionPath);
@@ -501,7 +347,6 @@ export async function loadResumeState(
);
}
// Validate URL matches
if (session.session.webUrl !== expectedUrl) {
throw ApplicationFailure.nonRetryable(
`URL mismatch with workspace\n Workspace URL: ${session.session.webUrl}\n Provided URL: ${expectedUrl}`,
@@ -509,20 +354,17 @@ export async function loadResumeState(
);
}
// Find completed agents (status === 'success' AND deliverable exists)
const completedAgents: string[] = [];
const agents = session.metrics.agents;
for (const agentName of ALL_AGENTS) {
const agentData = agents[agentName];
// Skip if agent never ran or didn't succeed
if (!agentData || agentData.status !== 'success') {
continue;
}
// Validate deliverable exists
const deliverablePath = getDeliverablePath(agentName, expectedRepoPath);
const deliverableFilename = AGENTS[agentName].deliverableFilename;
const deliverablePath = `${expectedRepoPath}/deliverables/${deliverableFilename}`;
const deliverableExists = await fileExists(deliverablePath);
if (!deliverableExists) {
@@ -532,11 +374,9 @@ export async function loadResumeState(
continue;
}
// Agent completed successfully and deliverable exists
completedAgents.push(agentName);
}
// Find latest checkpoint from completed agents
const checkpoints = completedAgents
.map((name) => agents[name]?.checkpoint)
.filter((hash): hash is string => hash != null);
@@ -548,18 +388,16 @@ export async function loadResumeState(
throw ApplicationFailure.nonRetryable(
`Cannot resume workspace ${workspaceName}: ` +
(successAgents.length > 0
? `${successAgents.length} agent(s) show success in session.json (${successAgents.join(', ')}) ` +
`but their deliverable files are missing from disk. ` +
`Start a fresh run instead.`
: `No agents completed successfully. Start a fresh run instead.`),
(successAgents.length > 0
? `${successAgents.length} agent(s) show success in session.json (${successAgents.join(', ')}) ` +
`but their deliverable files are missing from disk. ` +
`Start a fresh run instead.`
: `No agents completed successfully. Start a fresh run instead.`),
'NoCheckpointsError'
);
}
// Find most recent commit among checkpoints
const checkpointHash = await findLatestCommit(expectedRepoPath, checkpoints);
const originalWorkflowId = session.session.originalWorkflowId || session.session.id;
console.log(chalk.cyan(`=== RESUME STATE ===`));
@@ -576,20 +414,21 @@ export async function loadResumeState(
};
}
/**
* Find the most recent commit among a list of commit hashes.
* Uses git rev-list to determine which commit is newest.
*/
async function findLatestCommit(repoPath: string, commitHashes: string[]): Promise<string> {
if (commitHashes.length === 1) {
const hash = commitHashes[0];
if (!hash) {
throw new Error('Empty commit hash in array');
throw new PentestError(
'Empty commit hash in array',
'filesystem',
false, // Non-retryable - corrupt workspace state
{ phase: 'resume' },
ErrorCode.GIT_CHECKPOINT_FAILED
);
}
return hash;
}
// Use git rev-list to find the most recent commit among all hashes
const result = await executeGitCommandWithRetry(
['git', 'rev-list', '--max-count=1', ...commitHashes],
repoPath,
@@ -601,10 +440,6 @@ async function findLatestCommit(repoPath: string, commitHashes: string[]): Promi
/**
* Restore git workspace to a checkpoint and clean up partial deliverables.
*
* @param repoPath - Repository path
* @param checkpointHash - Git commit hash to reset to
* @param incompleteAgents - Agents that didn't complete (will have deliverables cleaned up)
*/
export async function restoreGitCheckpoint(
repoPath: string,
@@ -613,8 +448,6 @@ export async function restoreGitCheckpoint(
): Promise<void> {
console.log(chalk.blue(`Restoring git workspace to ${checkpointHash}...`));
// Checkpoint hash points to the success commit (after commitGitSuccess),
// so git reset --hard naturally preserves all completed agent deliverables.
await executeGitCommandWithRetry(
['git', 'reset', '--hard', checkpointHash],
repoPath,
@@ -626,9 +459,9 @@ export async function restoreGitCheckpoint(
'clean untracked files for resume'
);
// Clean up any partial deliverables from incomplete agents
for (const agentName of incompleteAgents) {
const deliverablePath = getDeliverablePath(agentName, repoPath);
const deliverableFilename = AGENTS[agentName].deliverableFilename;
const deliverablePath = `${repoPath}/deliverables/${deliverableFilename}`;
try {
const exists = await fileExists(deliverablePath);
if (exists) {
@@ -645,48 +478,31 @@ export async function restoreGitCheckpoint(
/**
* Record a resume attempt in session.json.
* Tracks the new workflow ID, terminated workflows, and checkpoint hash.
*/
export async function recordResumeAttempt(
input: ActivityInput,
terminatedWorkflows: string[],
checkpointHash: string
): Promise<void> {
const { webUrl, repoPath, outputPath, sessionId, workflowId } = input;
const sessionMetadata: SessionMetadata = {
id: sessionId,
webUrl,
repoPath,
...(outputPath && { outputPath }),
};
const sessionMetadata = buildSessionMetadata(input);
const auditSession = new AuditSession(sessionMetadata);
await auditSession.initialize();
await auditSession.addResumeAttempt(workflowId, terminatedWorkflows, checkpointHash);
await auditSession.addResumeAttempt(input.workflowId, terminatedWorkflows, checkpointHash);
}
// === Phase Transition Activities ===
/**
* Log phase transition to the unified workflow log.
* Called at phase boundaries for per-workflow logging.
*/
export async function logPhaseTransition(
input: ActivityInput,
phase: string,
event: 'start' | 'complete'
): Promise<void> {
const { webUrl, repoPath, outputPath, sessionId, workflowId } = input;
const sessionMetadata: SessionMetadata = {
id: sessionId,
webUrl,
repoPath,
...(outputPath && { outputPath }),
};
const sessionMetadata = buildSessionMetadata(input);
const auditSession = new AuditSession(sessionMetadata);
await auditSession.initialize(workflowId);
await auditSession.initialize(input.workflowId);
if (event === 'start') {
await auditSession.logPhaseStart(phase);
@@ -696,28 +512,22 @@ export async function logPhaseTransition(
}
/**
* Log workflow completion with full summary to the unified workflow log.
* Called at the end of the workflow to write a summary breakdown.
* Log workflow completion with full summary.
* Cleans up container when done.
*/
export async function logWorkflowComplete(
input: ActivityInput,
summary: WorkflowSummary
): Promise<void> {
const { webUrl, repoPath, outputPath, sessionId, workflowId } = input;
const sessionMetadata: SessionMetadata = {
id: sessionId,
webUrl,
repoPath,
...(outputPath && { outputPath }),
};
const { repoPath, workflowId } = input;
const sessionMetadata = buildSessionMetadata(input);
const auditSession = new AuditSession(sessionMetadata);
await auditSession.initialize(workflowId);
await auditSession.updateSessionStatus(summary.status);
// Use cumulative metrics from session.json (includes all resume attempts)
const sessionData = await auditSession.getMetrics() as {
// Use cumulative metrics from session.json
const sessionData = (await auditSession.getMetrics()) as {
metrics: {
total_duration_ms: number;
total_cost_usd: number;
@@ -725,7 +535,7 @@ export async function logWorkflowComplete(
};
};
// Fill in metrics for skipped agents (completed in previous runs)
// Fill in metrics for skipped agents
const agentMetrics = { ...summary.agentMetrics };
for (const agentName of summary.completedAgents) {
if (!agentMetrics[agentName]) {
@@ -747,10 +557,13 @@ export async function logWorkflowComplete(
};
await auditSession.logWorkflowComplete(cumulativeSummary);
// Copy all deliverables to audit-logs once at workflow end (non-fatal)
// Copy deliverables to audit-logs
try {
await copyDeliverablesToAudit(sessionMetadata, repoPath);
} catch (copyErr) {
console.error('Failed to copy deliverables to audit-logs:', copyErr);
}
// Clean up container
removeContainer(workflowId);
}

View File

@@ -1,5 +1,9 @@
import { defineQuery } from '@temporalio/workflow';
// Re-export AgentMetrics from central types location
export type { AgentMetrics } from '../types/metrics.js';
import type { AgentMetrics } from '../types/metrics.js';
// === Types ===
export interface PipelineInput {
@@ -22,15 +26,6 @@ export interface ResumeState {
originalWorkflowId: string;
}
export interface AgentMetrics {
durationMs: number;
inputTokens: number | null;
outputTokens: number | null;
costUsd: number | null;
numTurns: number | null;
model?: string | undefined;
}
export interface PipelineSummary {
totalCostUsd: number;
totalDurationMs: number; // Wall-clock time (end - start)

View File

@@ -0,0 +1,45 @@
// Copyright (C) 2025 Keygraph, Inc.
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License version 3
// as published by the Free Software Foundation.
/**
* Maps PipelineState to WorkflowSummary for audit logging.
* Pure function with no side effects.
*/
import type { PipelineState } from './shared.js';
import type { WorkflowSummary } from '../audit/workflow-logger.js';
/**
* Maps PipelineState to WorkflowSummary.
*
* This function is deterministic (no Date.now() or I/O) so it can be
* safely imported into Temporal workflows. The caller must ensure
* state.summary is set before calling (via computeSummary).
*/
export function toWorkflowSummary(
state: PipelineState,
status: 'completed' | 'failed'
): WorkflowSummary {
// state.summary must be computed before calling this mapper
const summary = state.summary;
if (!summary) {
throw new Error('toWorkflowSummary: state.summary must be set before calling');
}
return {
status,
totalDurationMs: summary.totalDurationMs,
totalCostUsd: summary.totalCostUsd,
completedAgents: state.completedAgents,
agentMetrics: Object.fromEntries(
Object.entries(state.agentMetrics).map(([name, m]) => [
name,
{ durationMs: m.durationMs, costUsd: m.costUsd },
])
),
...(state.error && { error: state.error }),
};
}

View File

@@ -43,6 +43,7 @@ import {
import type { VulnType } from '../queue-validation.js';
import type { AgentName } from '../types/agents.js';
import { ALL_AGENTS } from '../types/agents.js';
import { toWorkflowSummary } from './summary-mapper.js';
// Retry configuration for production (long intervals for billing recovery)
const PRODUCTION_RETRY = {
@@ -417,18 +418,7 @@ export async function pentestPipelineWorkflow(
state.summary = computeSummary(state);
// Log workflow completion summary
await a.logWorkflowComplete(activityInput, {
status: 'completed',
totalDurationMs: state.summary.totalDurationMs,
totalCostUsd: state.summary.totalCostUsd,
completedAgents: state.completedAgents,
agentMetrics: Object.fromEntries(
Object.entries(state.agentMetrics).map(([name, m]) => [
name,
{ durationMs: m.durationMs, costUsd: m.costUsd },
])
),
});
await a.logWorkflowComplete(activityInput, toWorkflowSummary(state, 'completed'));
return state;
} catch (error) {
@@ -438,19 +428,7 @@ export async function pentestPipelineWorkflow(
state.summary = computeSummary(state);
// Log workflow failure summary
await a.logWorkflowComplete(activityInput, {
status: 'failed',
totalDurationMs: state.summary.totalDurationMs,
totalCostUsd: state.summary.totalCostUsd,
completedAgents: state.completedAgents,
agentMetrics: Object.fromEntries(
Object.entries(state.agentMetrics).map(([name, m]) => [
name,
{ durationMs: m.durationMs, costUsd: m.costUsd },
])
),
error: state.error ?? undefined,
});
await a.logWorkflowComplete(activityInput, toWorkflowSummary(state, 'failed'));
throw error;
}

View File

@@ -34,21 +34,6 @@ export const ALL_AGENTS = [
*/
export type AgentName = typeof ALL_AGENTS[number];
export type PromptName =
| 'pre-recon-code'
| 'recon'
| 'vuln-injection'
| 'vuln-xss'
| 'vuln-auth'
| 'vuln-ssrf'
| 'vuln-authz'
| 'exploit-injection'
| 'exploit-xss'
| 'exploit-auth'
| 'exploit-ssrf'
| 'exploit-authz'
| 'report-executive';
export type PlaywrightAgent =
| 'playwright-agent1'
| 'playwright-agent2'
@@ -69,52 +54,6 @@ export interface AgentDefinition {
name: AgentName;
displayName: string;
prerequisites: AgentName[];
}
/**
* Maps an agent name to its corresponding prompt file name.
*/
export function getPromptNameForAgent(agentName: AgentName): PromptName {
const mappings: Record<AgentName, PromptName> = {
'pre-recon': 'pre-recon-code',
'recon': 'recon',
'injection-vuln': 'vuln-injection',
'xss-vuln': 'vuln-xss',
'auth-vuln': 'vuln-auth',
'ssrf-vuln': 'vuln-ssrf',
'authz-vuln': 'vuln-authz',
'injection-exploit': 'exploit-injection',
'xss-exploit': 'exploit-xss',
'auth-exploit': 'exploit-auth',
'ssrf-exploit': 'exploit-ssrf',
'authz-exploit': 'exploit-authz',
'report': 'report-executive',
};
return mappings[agentName];
}
/**
* Maps an agent name to its deliverable file path.
* Must match mcp-server/src/types/deliverables.ts:DELIVERABLE_FILENAMES
*/
export function getDeliverablePath(agentName: AgentName, repoPath: string): string {
const deliverableMap: Record<AgentName, string> = {
'pre-recon': 'code_analysis_deliverable.md',
'recon': 'recon_deliverable.md',
'injection-vuln': 'injection_analysis_deliverable.md',
'xss-vuln': 'xss_analysis_deliverable.md',
'auth-vuln': 'auth_analysis_deliverable.md',
'ssrf-vuln': 'ssrf_analysis_deliverable.md',
'authz-vuln': 'authz_analysis_deliverable.md',
'injection-exploit': 'injection_exploitation_evidence.md',
'xss-exploit': 'xss_exploitation_evidence.md',
'auth-exploit': 'auth_exploitation_evidence.md',
'ssrf-exploit': 'ssrf_exploitation_evidence.md',
'authz-exploit': 'authz_exploitation_evidence.md',
'report': 'comprehensive_security_assessment_report.md',
};
const filename = deliverableMap[agentName];
return `${repoPath}/deliverables/${filename}`;
promptTemplate: string;
deliverableFilename: string;
}

View File

@@ -51,7 +51,6 @@ export interface Authentication {
export interface Config {
rules?: Rules;
authentication?: Authentication;
login?: unknown;
}
export interface DistributedConfig {

View File

@@ -8,6 +8,39 @@
* Error type definitions
*/
/**
* Specific error codes for reliable classification.
*
* ErrorCode provides precision within the coarse 8-category PentestErrorType.
* Used by classifyErrorForTemporal for code-based classification (preferred)
* with string matching as fallback for external errors.
*/
export enum ErrorCode {
// Config errors (PentestErrorType: 'config')
CONFIG_NOT_FOUND = 'CONFIG_NOT_FOUND',
CONFIG_VALIDATION_FAILED = 'CONFIG_VALIDATION_FAILED',
CONFIG_PARSE_ERROR = 'CONFIG_PARSE_ERROR',
// Agent execution errors (PentestErrorType: 'validation')
AGENT_EXECUTION_FAILED = 'AGENT_EXECUTION_FAILED',
OUTPUT_VALIDATION_FAILED = 'OUTPUT_VALIDATION_FAILED',
// Billing errors (PentestErrorType: 'billing')
API_RATE_LIMITED = 'API_RATE_LIMITED',
SPENDING_CAP_REACHED = 'SPENDING_CAP_REACHED',
INSUFFICIENT_CREDITS = 'INSUFFICIENT_CREDITS',
// Git errors (PentestErrorType: 'filesystem')
GIT_CHECKPOINT_FAILED = 'GIT_CHECKPOINT_FAILED',
GIT_ROLLBACK_FAILED = 'GIT_ROLLBACK_FAILED',
// Prompt errors (PentestErrorType: 'prompt')
PROMPT_LOAD_FAILED = 'PROMPT_LOAD_FAILED',
// Validation errors (PentestErrorType: 'validation')
DELIVERABLE_NOT_FOUND = 'DELIVERABLE_NOT_FOUND',
}
export type PentestErrorType =
| 'config'
| 'network'

View File

@@ -12,3 +12,5 @@ export * from './errors.js';
export * from './config.js';
export * from './agents.js';
export * from './audit.js';
export * from './result.js';
export * from './metrics.js';

19
src/types/metrics.ts Normal file
View File

@@ -0,0 +1,19 @@
// Copyright (C) 2025 Keygraph, Inc.
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License version 3
// as published by the Free Software Foundation.
/**
* Agent metrics types used across services and activities.
* Centralized here to avoid temporal/shared.ts import boundary violations.
*/
export interface AgentMetrics {
durationMs: number;
inputTokens: number | null;
outputTokens: number | null;
costUsd: number | null;
numTurns: number | null;
model?: string | undefined;
}

62
src/types/result.ts Normal file
View File

@@ -0,0 +1,62 @@
// Copyright (C) 2025 Keygraph, Inc.
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License version 3
// as published by the Free Software Foundation.
/**
* Minimal Result type for explicit error handling.
*
* A discriminated union that makes error handling explicit without adding
* heavy machinery. Used in key modules (config loading, agent execution,
* queue validation) where callers need to make decisions based on error type.
*/
/**
* Success variant of Result
*/
export interface Ok<T> {
readonly ok: true;
readonly value: T;
}
/**
* Error variant of Result
*/
export interface Err<E> {
readonly ok: false;
readonly error: E;
}
/**
* Result type - either Ok with a value or Err with an error
*/
export type Result<T, E> = Ok<T> | Err<E>;
/**
* Create a success Result
*/
export function ok<T>(value: T): Ok<T> {
return { ok: true, value };
}
/**
* Create an error Result
*/
export function err<E>(error: E): Err<E> {
return { ok: false, error };
}
/**
* Type guard for Ok variant
*/
export function isOk<T, E>(result: Result<T, E>): result is Ok<T> {
return result.ok === true;
}
/**
* Type guard for Err variant
*/
export function isErr<T, E>(result: Result<T, E>): result is Err<E> {
return result.ok === false;
}

View File

@@ -0,0 +1,95 @@
// Copyright (C) 2025 Keygraph, Inc.
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License version 3
// as published by the Free Software Foundation.
/**
* Consolidated billing/spending cap detection utilities.
*
* Anthropic's spending cap behavior is inconsistent:
* - Sometimes a proper SDK error (billing_error)
* - Sometimes Claude responds with text about the cap
* - Sometimes partial billing before cutoff
*
* This module provides defense-in-depth detection with shared pattern lists
* to prevent drift between detection points.
*/
/**
* Text patterns for SDK output sniffing (what Claude says).
* Used by message-handlers.ts and the behavioral heuristic.
*/
export const BILLING_TEXT_PATTERNS = [
'spending cap',
'spending limit',
'cap reached',
'budget exceeded',
'usage limit',
'resets',
] as const;
/**
* API patterns for error message classification (what the API returns).
* Used by classifyErrorForTemporal in error-handling.ts.
*/
export const BILLING_API_PATTERNS = [
'billing_error',
'credit balance is too low',
'insufficient credits',
'usage is blocked due to insufficient credits',
'please visit plans & billing',
'please visit plans and billing',
'usage limit reached',
'quota exceeded',
'daily rate limit',
'limit will reset',
'billing limit reached',
] as const;
/**
* Checks if text matches any billing text pattern.
* Used for sniffing SDK output content for spending cap messages.
*/
export function matchesBillingTextPattern(text: string): boolean {
const lowerText = text.toLowerCase();
return BILLING_TEXT_PATTERNS.some((pattern) => lowerText.includes(pattern));
}
/**
* Checks if an error message matches any billing API pattern.
* Used for classifying API error messages.
*/
export function matchesBillingApiPattern(message: string): boolean {
const lowerMessage = message.toLowerCase();
return BILLING_API_PATTERNS.some((pattern) => lowerMessage.includes(pattern));
}
/**
* Behavioral heuristic for detecting spending cap.
*
* When Claude hits a spending cap, it often returns a short message
* with $0 cost. Legitimate agent work NEVER costs $0 with only 1-2 turns.
*
* This combines three signals:
* 1. Very low turn count (<=2)
* 2. Zero cost ($0)
* 3. Text matches billing patterns
*
* @param turns - Number of turns the agent took
* @param cost - Total cost in USD
* @param resultText - The result text from the agent
* @returns true if this looks like a spending cap hit
*/
export function isSpendingCapBehavior(
turns: number,
cost: number,
resultText: string
): boolean {
// Only check if turns <= 2 AND cost is exactly 0
if (turns > 2 || cost !== 0) {
return false;
}
return matchesBillingTextPattern(resultText);
}

View File

@@ -6,6 +6,8 @@
import { $ } from 'zx';
import chalk from 'chalk';
import { PentestError } from '../error-handling.js';
import { ErrorCode } from '../types/errors.js';
/**
* Check if a directory is a git repository.
@@ -148,7 +150,13 @@ export async function executeGitCommandWithRetry(
throw error;
}
}
throw new Error(`Git command failed after ${maxRetries} retries`);
throw new PentestError(
`Git command failed after ${maxRetries} retries`,
'filesystem',
true, // Retryable - transient git lock issues
{ maxRetries, description },
ErrorCode.GIT_CHECKPOINT_FAILED
);
} finally {
gitSemaphore.release();
}
@@ -189,9 +197,18 @@ export async function rollbackGitWorkspace(
);
return { success: true };
} catch (error) {
const result = toErrorResult(error);
console.log(chalk.red(` ❌ Rollback failed after retries: ${result.error?.message}`));
return result;
const errMsg = error instanceof Error ? error.message : String(error);
console.log(chalk.red(` ❌ Rollback failed after retries: ${errMsg}`));
return {
success: false,
error: new PentestError(
`Git rollback failed: ${errMsg}`,
'filesystem',
false, // Non-retryable - rollback is best-effort cleanup
{ sourceDir, reason },
ErrorCode.GIT_ROLLBACK_FAILED
),
};
}
}