mirror of
https://github.com/KeygraphHQ/shannon.git
synced 2026-04-01 18:30:35 +02:00
refactor: extract services layer, Result type, and ErrorCode classification
- Add DI container (src/services/) with AgentExecutionService, ConfigLoaderService, and ExploitationCheckerService — pure domain logic with no Temporal dependencies - Introduce Result<T, E> type and ErrorCode enum for code-based error classification in classifyErrorForTemporal, replacing scattered string matching - Consolidate billing/spending cap detection into utils/billing-detection.ts with shared pattern lists across message-handlers, claude-executor, and error-handling - Extract LogStream abstraction for append-only logging with backpressure, used by both AgentLogger and WorkflowLogger - Simplify activities.ts from inline lifecycle logic to thin wrappers delegating to services, with heartbeat and error classification - Expand config-parser with human-readable AJV errors, security validation, and rule type-specific checks
This commit is contained in:
@@ -117,6 +117,7 @@ Defensive security tool only. Use only on systems you own or have explicit permi
|
||||
- Dense callback chains when sequential logic is clearer
|
||||
- Sacrificing readability for DRY — some repetition is fine if clearer
|
||||
- Abstractions for one-time operations
|
||||
- Backwards-compatibility shims, deprecated wrappers, or re-exports for removed code — delete the old code, don't preserve it
|
||||
|
||||
## Key Files
|
||||
|
||||
|
||||
@@ -11,12 +11,13 @@ import chalk, { type ChalkInstance } from 'chalk';
|
||||
import { query } from '@anthropic-ai/claude-agent-sdk';
|
||||
|
||||
import { isRetryableError, PentestError } from '../error-handling.js';
|
||||
import { isSpendingCapBehavior } from '../utils/billing-detection.js';
|
||||
import { timingResults, Timer } from '../utils/metrics.js';
|
||||
import { formatTimestamp } from '../utils/formatting.js';
|
||||
import { AGENT_VALIDATORS, MCP_AGENT_MAPPING } from '../constants.js';
|
||||
import { AuditSession } from '../audit/index.js';
|
||||
import { createShannonHelperServer } from '../../mcp-server/dist/index.js';
|
||||
import { getPromptNameForAgent } from '../types/agents.js';
|
||||
import { AGENTS } from '../session-manager.js';
|
||||
import type { AgentName } from '../types/index.js';
|
||||
|
||||
import { dispatchMessage } from './message-handlers.js';
|
||||
@@ -65,8 +66,8 @@ function buildMcpServers(
|
||||
};
|
||||
|
||||
if (agentName) {
|
||||
const promptName = getPromptNameForAgent(agentName as AgentName);
|
||||
const playwrightMcpName = MCP_AGENT_MAPPING[promptName as keyof typeof MCP_AGENT_MAPPING] || null;
|
||||
const promptTemplate = AGENTS[agentName as AgentName].promptTemplate;
|
||||
const playwrightMcpName = MCP_AGENT_MAPPING[promptTemplate as keyof typeof MCP_AGENT_MAPPING] || null;
|
||||
|
||||
if (playwrightMcpName) {
|
||||
console.log(chalk.gray(` Assigned ${agentName} -> ${playwrightMcpName}`));
|
||||
@@ -263,22 +264,13 @@ export async function runClaudePrompt(
|
||||
|
||||
// === SPENDING CAP SAFEGUARD ===
|
||||
// Defense-in-depth: Detect spending cap that slipped through detectApiError().
|
||||
// When spending cap is hit, Claude returns a short message with $0 cost.
|
||||
// Legitimate agent work NEVER costs $0 with only 1-2 turns.
|
||||
if (turnCount <= 2 && totalCost === 0) {
|
||||
const resultLower = (result || '').toLowerCase();
|
||||
const BILLING_KEYWORDS = ['spending', 'cap', 'limit', 'budget', 'resets'];
|
||||
const looksLikeBillingError = BILLING_KEYWORDS.some((kw) =>
|
||||
resultLower.includes(kw)
|
||||
// Uses consolidated billing detection from utils/billing-detection.ts
|
||||
if (isSpendingCapBehavior(turnCount, totalCost, result || '')) {
|
||||
throw new PentestError(
|
||||
`Spending cap likely reached (turns=${turnCount}, cost=$0): ${result?.slice(0, 100)}`,
|
||||
'billing',
|
||||
true // Retryable - Temporal will use 5-30 min backoff
|
||||
);
|
||||
|
||||
if (looksLikeBillingError) {
|
||||
throw new PentestError(
|
||||
`Spending cap likely reached (turns=${turnCount}, cost=$0): ${result?.slice(0, 100)}`,
|
||||
'billing',
|
||||
true // Retryable - Temporal will use 5-30 min backoff
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
const duration = timer.stop();
|
||||
|
||||
@@ -7,6 +7,8 @@
|
||||
// Pure functions for processing SDK message types
|
||||
|
||||
import { PentestError } from '../error-handling.js';
|
||||
import { ErrorCode } from '../types/errors.js';
|
||||
import { matchesBillingTextPattern } from '../utils/billing-detection.js';
|
||||
import { filterJsonToolCalls } from '../utils/output-formatter.js';
|
||||
import { formatTimestamp } from '../utils/formatting.js';
|
||||
import chalk from 'chalk';
|
||||
@@ -75,25 +77,15 @@ function detectApiError(content: string): ApiErrorDetection {
|
||||
// When Claude Code hits its spending cap, it returns a short message like
|
||||
// "Spending cap reached resets 8am" instead of throwing an error.
|
||||
// These should retry with 5-30 min backoff so workflows can recover when cap resets.
|
||||
const BILLING_PATTERNS = [
|
||||
'spending cap',
|
||||
'spending limit',
|
||||
'cap reached',
|
||||
'budget exceeded',
|
||||
'usage limit',
|
||||
];
|
||||
|
||||
const isBillingError = BILLING_PATTERNS.some((pattern) =>
|
||||
lowerContent.includes(pattern)
|
||||
);
|
||||
|
||||
if (isBillingError) {
|
||||
if (matchesBillingTextPattern(content)) {
|
||||
return {
|
||||
detected: true,
|
||||
shouldThrow: new PentestError(
|
||||
`Billing limit reached: ${content.slice(0, 100)}`,
|
||||
'billing',
|
||||
true // RETRYABLE - Temporal will use 5-30 min backoff
|
||||
true, // RETRYABLE - Temporal will use 5-30 min backoff
|
||||
{},
|
||||
ErrorCode.SPENDING_CAP_REACHED
|
||||
),
|
||||
};
|
||||
}
|
||||
@@ -127,7 +119,9 @@ function handleStructuredError(
|
||||
shouldThrow: new PentestError(
|
||||
`Billing error (structured): ${content.slice(0, 100)}`,
|
||||
'billing',
|
||||
true // Retryable with backoff
|
||||
true, // Retryable with backoff
|
||||
{},
|
||||
ErrorCode.INSUFFICIENT_CREDITS
|
||||
),
|
||||
};
|
||||
case 'rate_limit':
|
||||
@@ -136,7 +130,9 @@ function handleStructuredError(
|
||||
shouldThrow: new PentestError(
|
||||
`Rate limit hit (structured): ${content.slice(0, 100)}`,
|
||||
'network',
|
||||
true // Retryable with backoff
|
||||
true, // Retryable with backoff
|
||||
{},
|
||||
ErrorCode.API_RATE_LIMITED
|
||||
),
|
||||
};
|
||||
case 'authentication_failed':
|
||||
|
||||
@@ -18,6 +18,8 @@ import { initializeAuditStructure, type SessionMetadata } from './utils.js';
|
||||
import { formatTimestamp } from '../utils/formatting.js';
|
||||
import { SessionMutex } from '../utils/concurrency.js';
|
||||
import type { AgentEndResult } from '../types/index.js';
|
||||
import { PentestError } from '../error-handling.js';
|
||||
import { ErrorCode } from '../types/errors.js';
|
||||
|
||||
// Global mutex instance
|
||||
const sessionMutex = new SessionMutex();
|
||||
@@ -40,10 +42,22 @@ export class AuditSession {
|
||||
|
||||
// Validate required fields
|
||||
if (!this.sessionId) {
|
||||
throw new Error('sessionMetadata.id is required');
|
||||
throw new PentestError(
|
||||
'sessionMetadata.id is required',
|
||||
'config',
|
||||
false,
|
||||
{ field: 'sessionMetadata.id' },
|
||||
ErrorCode.CONFIG_VALIDATION_FAILED
|
||||
);
|
||||
}
|
||||
if (!this.sessionMetadata.webUrl) {
|
||||
throw new Error('sessionMetadata.webUrl is required');
|
||||
throw new PentestError(
|
||||
'sessionMetadata.webUrl is required',
|
||||
'config',
|
||||
false,
|
||||
{ field: 'sessionMetadata.webUrl' },
|
||||
ErrorCode.CONFIG_VALIDATION_FAILED
|
||||
);
|
||||
}
|
||||
|
||||
// Components
|
||||
@@ -124,7 +138,13 @@ export class AuditSession {
|
||||
*/
|
||||
async logEvent(eventType: string, eventData: unknown): Promise<void> {
|
||||
if (!this.currentLogger) {
|
||||
throw new Error('No active logger. Call startAgent() first.');
|
||||
throw new PentestError(
|
||||
'No active logger. Call startAgent() first.',
|
||||
'validation',
|
||||
false,
|
||||
{},
|
||||
ErrorCode.AGENT_EXECUTION_FAILED
|
||||
);
|
||||
}
|
||||
|
||||
// Log to agent-specific log file (JSON format)
|
||||
|
||||
127
src/audit/log-stream.ts
Normal file
127
src/audit/log-stream.ts
Normal file
@@ -0,0 +1,127 @@
|
||||
// Copyright (C) 2025 Keygraph, Inc.
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Affero General Public License version 3
|
||||
// as published by the Free Software Foundation.
|
||||
|
||||
/**
|
||||
* LogStream - Stream composition utility for append-only logging
|
||||
*
|
||||
* Encapsulates the common stream management pattern used by AgentLogger
|
||||
* and WorkflowLogger: opening streams in append mode, handling backpressure,
|
||||
* and proper cleanup.
|
||||
*/
|
||||
|
||||
import fs from 'fs';
|
||||
import path from 'path';
|
||||
import { ensureDirectory } from '../utils/file-io.js';
|
||||
|
||||
/**
|
||||
* LogStream - Manages a single append-only log file stream
|
||||
*/
|
||||
export class LogStream {
|
||||
private readonly filePath: string;
|
||||
private stream: fs.WriteStream | null = null;
|
||||
private _isOpen: boolean = false;
|
||||
|
||||
constructor(filePath: string) {
|
||||
this.filePath = filePath;
|
||||
}
|
||||
|
||||
/**
|
||||
* Open the stream for writing (creates parent directories, opens in append mode)
|
||||
*/
|
||||
async open(): Promise<void> {
|
||||
if (this._isOpen) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Ensure parent directory exists
|
||||
await ensureDirectory(path.dirname(this.filePath));
|
||||
|
||||
// Create write stream in append mode
|
||||
this.stream = fs.createWriteStream(this.filePath, {
|
||||
flags: 'a',
|
||||
encoding: 'utf8',
|
||||
autoClose: true,
|
||||
});
|
||||
|
||||
// Handle stream errors to prevent crashes (log and mark closed)
|
||||
this.stream.on('error', (err) => {
|
||||
console.error(`LogStream error for ${this.filePath}:`, err.message);
|
||||
this._isOpen = false;
|
||||
});
|
||||
|
||||
this._isOpen = true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Write text to the stream with backpressure handling
|
||||
*/
|
||||
async write(text: string): Promise<void> {
|
||||
return new Promise((resolve, reject) => {
|
||||
if (!this._isOpen || !this.stream) {
|
||||
reject(new Error('LogStream not open'));
|
||||
return;
|
||||
}
|
||||
|
||||
const stream = this.stream;
|
||||
let drainHandler: (() => void) | null = null;
|
||||
|
||||
const cleanup = () => {
|
||||
if (drainHandler) {
|
||||
stream.removeListener('drain', drainHandler);
|
||||
drainHandler = null;
|
||||
}
|
||||
};
|
||||
|
||||
const needsDrain = !stream.write(text, 'utf8', (error) => {
|
||||
cleanup();
|
||||
if (error) {
|
||||
reject(error);
|
||||
} else if (!needsDrain) {
|
||||
resolve();
|
||||
}
|
||||
});
|
||||
|
||||
if (needsDrain) {
|
||||
drainHandler = () => {
|
||||
cleanup();
|
||||
resolve();
|
||||
};
|
||||
stream.once('drain', drainHandler);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Close the stream (flush and close)
|
||||
*/
|
||||
async close(): Promise<void> {
|
||||
if (!this._isOpen || !this.stream) {
|
||||
return;
|
||||
}
|
||||
|
||||
return new Promise((resolve) => {
|
||||
this.stream!.end(() => {
|
||||
this._isOpen = false;
|
||||
this.stream = null;
|
||||
resolve();
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if the stream is currently open
|
||||
*/
|
||||
get isOpen(): boolean {
|
||||
return this._isOpen;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the file path this stream writes to
|
||||
*/
|
||||
get path(): string {
|
||||
return this.filePath;
|
||||
}
|
||||
}
|
||||
@@ -8,10 +8,9 @@
|
||||
* Append-Only Agent Logger
|
||||
*
|
||||
* Provides crash-safe, append-only logging for agent execution.
|
||||
* Uses file streams with immediate flush to prevent data loss.
|
||||
* Uses LogStream for stream management with backpressure handling.
|
||||
*/
|
||||
|
||||
import fs from 'fs';
|
||||
import {
|
||||
generateLogPath,
|
||||
generatePromptPath,
|
||||
@@ -19,6 +18,7 @@ import {
|
||||
} from './utils.js';
|
||||
import { atomicWrite } from '../utils/file-io.js';
|
||||
import { formatTimestamp } from '../utils/formatting.js';
|
||||
import { LogStream } from './log-stream.js';
|
||||
|
||||
interface LogEvent {
|
||||
type: string;
|
||||
@@ -30,13 +30,11 @@ interface LogEvent {
|
||||
* AgentLogger - Manages append-only logging for a single agent execution
|
||||
*/
|
||||
export class AgentLogger {
|
||||
private sessionMetadata: SessionMetadata;
|
||||
private agentName: string;
|
||||
private attemptNumber: number;
|
||||
private timestamp: number;
|
||||
private logPath: string;
|
||||
private stream: fs.WriteStream | null = null;
|
||||
private isOpen: boolean = false;
|
||||
private readonly sessionMetadata: SessionMetadata;
|
||||
private readonly agentName: string;
|
||||
private readonly attemptNumber: number;
|
||||
private readonly timestamp: number;
|
||||
private readonly logStream: LogStream;
|
||||
|
||||
constructor(sessionMetadata: SessionMetadata, agentName: string, attemptNumber: number) {
|
||||
this.sessionMetadata = sessionMetadata;
|
||||
@@ -44,26 +42,20 @@ export class AgentLogger {
|
||||
this.attemptNumber = attemptNumber;
|
||||
this.timestamp = Date.now();
|
||||
|
||||
// Generate log file path
|
||||
this.logPath = generateLogPath(sessionMetadata, agentName, this.timestamp, attemptNumber);
|
||||
// Generate log file path and create stream
|
||||
const logPath = generateLogPath(sessionMetadata, agentName, this.timestamp, attemptNumber);
|
||||
this.logStream = new LogStream(logPath);
|
||||
}
|
||||
|
||||
/**
|
||||
* Initialize the log stream (creates file and opens stream)
|
||||
*/
|
||||
async initialize(): Promise<void> {
|
||||
if (this.isOpen) {
|
||||
if (this.logStream.isOpen) {
|
||||
return; // Already initialized
|
||||
}
|
||||
|
||||
// Create write stream with append mode and auto-flush
|
||||
this.stream = fs.createWriteStream(this.logPath, {
|
||||
flags: 'a', // Append mode
|
||||
encoding: 'utf8',
|
||||
autoClose: true,
|
||||
});
|
||||
|
||||
this.isOpen = true;
|
||||
await this.logStream.open();
|
||||
|
||||
// Write header
|
||||
await this.writeHeader();
|
||||
@@ -83,29 +75,7 @@ export class AgentLogger {
|
||||
`========================================\n`,
|
||||
].join('\n');
|
||||
|
||||
return this.writeRaw(header);
|
||||
}
|
||||
|
||||
/**
|
||||
* Write raw text to log file with immediate flush
|
||||
*/
|
||||
private writeRaw(text: string): Promise<void> {
|
||||
return new Promise((resolve, reject) => {
|
||||
if (!this.isOpen || !this.stream) {
|
||||
reject(new Error('Logger not initialized'));
|
||||
return;
|
||||
}
|
||||
|
||||
const needsDrain = !this.stream.write(text, 'utf8', (error) => {
|
||||
if (error) reject(error);
|
||||
});
|
||||
|
||||
if (needsDrain) {
|
||||
this.stream.once('drain', resolve);
|
||||
} else {
|
||||
resolve();
|
||||
}
|
||||
});
|
||||
return this.logStream.write(header);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -120,23 +90,14 @@ export class AgentLogger {
|
||||
};
|
||||
|
||||
const eventLine = `${JSON.stringify(event)}\n`;
|
||||
return this.writeRaw(eventLine);
|
||||
return this.logStream.write(eventLine);
|
||||
}
|
||||
|
||||
/**
|
||||
* Close the log stream
|
||||
*/
|
||||
async close(): Promise<void> {
|
||||
if (!this.isOpen || !this.stream) {
|
||||
return;
|
||||
}
|
||||
|
||||
return new Promise((resolve) => {
|
||||
this.stream!.end(() => {
|
||||
this.isOpen = false;
|
||||
resolve();
|
||||
});
|
||||
});
|
||||
return this.logStream.close();
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@@ -18,6 +18,8 @@ import {
|
||||
import { atomicWrite, readJson, fileExists } from '../utils/file-io.js';
|
||||
import { formatTimestamp, calculatePercentage } from '../utils/formatting.js';
|
||||
import { AGENT_PHASE_MAP, type PhaseName } from '../session-manager.js';
|
||||
import { PentestError } from '../error-handling.js';
|
||||
import { ErrorCode } from '../types/errors.js';
|
||||
import type { AgentName, AgentEndResult } from '../types/index.js';
|
||||
|
||||
interface AttemptData {
|
||||
@@ -159,7 +161,13 @@ export class MetricsTracker {
|
||||
*/
|
||||
async endAgent(agentName: string, result: AgentEndResult): Promise<void> {
|
||||
if (!this.data) {
|
||||
throw new Error('MetricsTracker not initialized');
|
||||
throw new PentestError(
|
||||
'MetricsTracker not initialized',
|
||||
'validation',
|
||||
false,
|
||||
{},
|
||||
ErrorCode.AGENT_EXECUTION_FAILED
|
||||
);
|
||||
}
|
||||
|
||||
// Initialize agent metrics if not exists
|
||||
@@ -251,7 +259,13 @@ export class MetricsTracker {
|
||||
checkpointHash?: string
|
||||
): Promise<void> {
|
||||
if (!this.data) {
|
||||
throw new Error('MetricsTracker not initialized');
|
||||
throw new PentestError(
|
||||
'MetricsTracker not initialized',
|
||||
'validation',
|
||||
false,
|
||||
{},
|
||||
ErrorCode.AGENT_EXECUTION_FAILED
|
||||
);
|
||||
}
|
||||
|
||||
// Ensure originalWorkflowId is set (backfill if missing from old sessions)
|
||||
|
||||
@@ -11,10 +11,10 @@
|
||||
* Optimized for `tail -f` viewing during concurrent workflow execution.
|
||||
*/
|
||||
|
||||
import fs from 'fs';
|
||||
import path from 'path';
|
||||
import { generateWorkflowLogPath, ensureDirectory, type SessionMetadata } from './utils.js';
|
||||
import fs from 'fs/promises';
|
||||
import { generateWorkflowLogPath, type SessionMetadata } from './utils.js';
|
||||
import { formatDuration, formatTimestamp } from '../utils/formatting.js';
|
||||
import { LogStream } from './log-stream.js';
|
||||
|
||||
export interface AgentLogDetails {
|
||||
attemptNumber?: number;
|
||||
@@ -42,38 +42,28 @@ export interface WorkflowSummary {
|
||||
* WorkflowLogger - Manages the unified workflow log file
|
||||
*/
|
||||
export class WorkflowLogger {
|
||||
private sessionMetadata: SessionMetadata;
|
||||
private logPath: string;
|
||||
private stream: fs.WriteStream | null = null;
|
||||
private initialized: boolean = false;
|
||||
private readonly sessionMetadata: SessionMetadata;
|
||||
private readonly logStream: LogStream;
|
||||
|
||||
constructor(sessionMetadata: SessionMetadata) {
|
||||
this.sessionMetadata = sessionMetadata;
|
||||
this.logPath = generateWorkflowLogPath(sessionMetadata);
|
||||
const logPath = generateWorkflowLogPath(sessionMetadata);
|
||||
this.logStream = new LogStream(logPath);
|
||||
}
|
||||
|
||||
/**
|
||||
* Initialize the log stream (creates file and writes header)
|
||||
*/
|
||||
async initialize(): Promise<void> {
|
||||
if (this.initialized) {
|
||||
if (this.logStream.isOpen) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Ensure directory exists
|
||||
await ensureDirectory(path.dirname(this.logPath));
|
||||
|
||||
// Create write stream with append mode
|
||||
this.stream = fs.createWriteStream(this.logPath, {
|
||||
flags: 'a',
|
||||
encoding: 'utf8',
|
||||
autoClose: true,
|
||||
});
|
||||
|
||||
this.initialized = true;
|
||||
// Open the stream (LogStream.open() handles directory creation)
|
||||
await this.logStream.open();
|
||||
|
||||
// Write header only if file is new (empty)
|
||||
const stats = await fs.promises.stat(this.logPath).catch(() => null);
|
||||
const stats = await fs.stat(this.logStream.path).catch(() => null);
|
||||
if (!stats || stats.size === 0) {
|
||||
await this.writeHeader();
|
||||
}
|
||||
@@ -94,29 +84,7 @@ export class WorkflowLogger {
|
||||
``,
|
||||
].join('\n');
|
||||
|
||||
return this.writeRaw(header);
|
||||
}
|
||||
|
||||
/**
|
||||
* Write raw text to log file with immediate flush
|
||||
*/
|
||||
private writeRaw(text: string): Promise<void> {
|
||||
return new Promise((resolve, reject) => {
|
||||
if (!this.initialized || !this.stream) {
|
||||
reject(new Error('WorkflowLogger not initialized'));
|
||||
return;
|
||||
}
|
||||
|
||||
const needsDrain = !this.stream.write(text, 'utf8', (error) => {
|
||||
if (error) reject(error);
|
||||
});
|
||||
|
||||
if (needsDrain) {
|
||||
this.stream.once('drain', resolve);
|
||||
} else {
|
||||
resolve();
|
||||
}
|
||||
});
|
||||
return this.logStream.write(header);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -138,10 +106,10 @@ export class WorkflowLogger {
|
||||
|
||||
// Add blank line before phase start for readability
|
||||
if (event === 'start') {
|
||||
await this.writeRaw('\n');
|
||||
await this.logStream.write('\n');
|
||||
}
|
||||
|
||||
await this.writeRaw(line);
|
||||
await this.logStream.write(line);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -184,7 +152,7 @@ export class WorkflowLogger {
|
||||
}
|
||||
|
||||
const line = `[${this.formatLogTime()}] [AGENT] ${message}\n`;
|
||||
await this.writeRaw(line);
|
||||
await this.logStream.write(line);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -194,7 +162,7 @@ export class WorkflowLogger {
|
||||
await this.ensureInitialized();
|
||||
|
||||
const line = `[${this.formatLogTime()}] [${eventType.toUpperCase()}] ${message}\n`;
|
||||
await this.writeRaw(line);
|
||||
await this.logStream.write(line);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -205,7 +173,7 @@ export class WorkflowLogger {
|
||||
|
||||
const contextStr = context ? ` (${context})` : '';
|
||||
const line = `[${this.formatLogTime()}] [ERROR] ${error.message}${contextStr}\n`;
|
||||
await this.writeRaw(line);
|
||||
await this.logStream.write(line);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -301,7 +269,7 @@ export class WorkflowLogger {
|
||||
const params = this.formatToolParams(toolName, parameters);
|
||||
const paramStr = params ? `: ${params}` : '';
|
||||
const line = `[${this.formatLogTime()}] [${agentName}] [TOOL] ${toolName}${paramStr}\n`;
|
||||
await this.writeRaw(line);
|
||||
await this.logStream.write(line);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -313,7 +281,7 @@ export class WorkflowLogger {
|
||||
// Show full content, replacing newlines with escaped version for single-line output
|
||||
const escaped = content.replace(/\n/g, '\\n');
|
||||
const line = `[${this.formatLogTime()}] [${agentName}] [LLM] Turn ${turn}: ${escaped}\n`;
|
||||
await this.writeRaw(line);
|
||||
await this.logStream.write(line);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -324,42 +292,42 @@ export class WorkflowLogger {
|
||||
|
||||
const status = summary.status === 'completed' ? 'COMPLETED' : 'FAILED';
|
||||
|
||||
await this.writeRaw('\n');
|
||||
await this.writeRaw(`================================================================================\n`);
|
||||
await this.writeRaw(`Workflow ${status}\n`);
|
||||
await this.writeRaw(`────────────────────────────────────────\n`);
|
||||
await this.writeRaw(`Workflow ID: ${this.sessionMetadata.id}\n`);
|
||||
await this.writeRaw(`Status: ${summary.status}\n`);
|
||||
await this.writeRaw(`Duration: ${formatDuration(summary.totalDurationMs)}\n`);
|
||||
await this.writeRaw(`Total Cost: $${summary.totalCostUsd.toFixed(4)}\n`);
|
||||
await this.writeRaw(`Agents: ${summary.completedAgents.length} completed\n`);
|
||||
await this.logStream.write('\n');
|
||||
await this.logStream.write(`================================================================================\n`);
|
||||
await this.logStream.write(`Workflow ${status}\n`);
|
||||
await this.logStream.write(`────────────────────────────────────────\n`);
|
||||
await this.logStream.write(`Workflow ID: ${this.sessionMetadata.id}\n`);
|
||||
await this.logStream.write(`Status: ${summary.status}\n`);
|
||||
await this.logStream.write(`Duration: ${formatDuration(summary.totalDurationMs)}\n`);
|
||||
await this.logStream.write(`Total Cost: $${summary.totalCostUsd.toFixed(4)}\n`);
|
||||
await this.logStream.write(`Agents: ${summary.completedAgents.length} completed\n`);
|
||||
|
||||
if (summary.error) {
|
||||
await this.writeRaw(`Error: ${summary.error}\n`);
|
||||
await this.logStream.write(`Error: ${summary.error}\n`);
|
||||
}
|
||||
|
||||
await this.writeRaw(`\n`);
|
||||
await this.writeRaw(`Agent Breakdown:\n`);
|
||||
await this.logStream.write(`\n`);
|
||||
await this.logStream.write(`Agent Breakdown:\n`);
|
||||
|
||||
for (const agentName of summary.completedAgents) {
|
||||
const metrics = summary.agentMetrics[agentName];
|
||||
if (metrics) {
|
||||
const duration = formatDuration(metrics.durationMs);
|
||||
const cost = metrics.costUsd !== null ? `$${metrics.costUsd.toFixed(4)}` : 'N/A';
|
||||
await this.writeRaw(` - ${agentName} (${duration}, ${cost})\n`);
|
||||
await this.logStream.write(` - ${agentName} (${duration}, ${cost})\n`);
|
||||
} else {
|
||||
await this.writeRaw(` - ${agentName}\n`);
|
||||
await this.logStream.write(` - ${agentName}\n`);
|
||||
}
|
||||
}
|
||||
|
||||
await this.writeRaw(`================================================================================\n`);
|
||||
await this.logStream.write(`================================================================================\n`);
|
||||
}
|
||||
|
||||
/**
|
||||
* Ensure initialized (helper for lazy initialization)
|
||||
*/
|
||||
private async ensureInitialized(): Promise<void> {
|
||||
if (!this.initialized) {
|
||||
if (!this.logStream.isOpen) {
|
||||
await this.initialize();
|
||||
}
|
||||
}
|
||||
@@ -368,15 +336,6 @@ export class WorkflowLogger {
|
||||
* Close the log stream
|
||||
*/
|
||||
async close(): Promise<void> {
|
||||
if (!this.initialized || !this.stream) {
|
||||
return;
|
||||
}
|
||||
|
||||
return new Promise((resolve) => {
|
||||
this.stream!.end(() => {
|
||||
this.initialized = false;
|
||||
resolve();
|
||||
});
|
||||
});
|
||||
return this.logStream.close();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -7,9 +7,10 @@
|
||||
import { createRequire } from 'module';
|
||||
import { fs } from 'zx';
|
||||
import yaml from 'js-yaml';
|
||||
import { Ajv, type ValidateFunction } from 'ajv';
|
||||
import { Ajv, type ValidateFunction, type ErrorObject } from 'ajv';
|
||||
import type { FormatsPlugin } from 'ajv-formats';
|
||||
import { PentestError } from './error-handling.js';
|
||||
import { ErrorCode } from './types/errors.js';
|
||||
import type {
|
||||
Config,
|
||||
Rule,
|
||||
@@ -53,20 +54,155 @@ const DANGEROUS_PATTERNS: RegExp[] = [
|
||||
/file:/i, // File URLs
|
||||
];
|
||||
|
||||
/**
|
||||
* Format a single AJV error into a human-readable message.
|
||||
* Translates AJV error keywords into plain English descriptions.
|
||||
*/
|
||||
function formatAjvError(error: ErrorObject): string {
|
||||
const path = error.instancePath || 'root';
|
||||
const params = error.params as Record<string, unknown>;
|
||||
|
||||
switch (error.keyword) {
|
||||
case 'required': {
|
||||
const missingProperty = params.missingProperty as string;
|
||||
return `Missing required field: "${missingProperty}" at ${path || 'root'}`;
|
||||
}
|
||||
|
||||
case 'type': {
|
||||
const expectedType = params.type as string;
|
||||
return `Invalid type at ${path}: expected ${expectedType}`;
|
||||
}
|
||||
|
||||
case 'enum': {
|
||||
const allowedValues = params.allowedValues as unknown[];
|
||||
const formattedValues = allowedValues.map((v) => `"${v}"`).join(', ');
|
||||
return `Invalid value at ${path}: must be one of [${formattedValues}]`;
|
||||
}
|
||||
|
||||
case 'additionalProperties': {
|
||||
const additionalProperty = params.additionalProperty as string;
|
||||
return `Unknown field at ${path}: "${additionalProperty}" is not allowed`;
|
||||
}
|
||||
|
||||
case 'minLength': {
|
||||
const limit = params.limit as number;
|
||||
return `Value at ${path} is too short: must have at least ${limit} character(s)`;
|
||||
}
|
||||
|
||||
case 'maxLength': {
|
||||
const limit = params.limit as number;
|
||||
return `Value at ${path} is too long: must have at most ${limit} character(s)`;
|
||||
}
|
||||
|
||||
case 'minimum': {
|
||||
const limit = params.limit as number;
|
||||
return `Value at ${path} is too small: must be >= ${limit}`;
|
||||
}
|
||||
|
||||
case 'maximum': {
|
||||
const limit = params.limit as number;
|
||||
return `Value at ${path} is too large: must be <= ${limit}`;
|
||||
}
|
||||
|
||||
case 'minItems': {
|
||||
const limit = params.limit as number;
|
||||
return `Array at ${path} has too few items: must have at least ${limit} item(s)`;
|
||||
}
|
||||
|
||||
case 'maxItems': {
|
||||
const limit = params.limit as number;
|
||||
return `Array at ${path} has too many items: must have at most ${limit} item(s)`;
|
||||
}
|
||||
|
||||
case 'pattern': {
|
||||
const pattern = params.pattern as string;
|
||||
return `Value at ${path} does not match required pattern: ${pattern}`;
|
||||
}
|
||||
|
||||
case 'format': {
|
||||
const format = params.format as string;
|
||||
return `Value at ${path} must be a valid ${format}`;
|
||||
}
|
||||
|
||||
case 'const': {
|
||||
const allowedValue = params.allowedValue as unknown;
|
||||
return `Value at ${path} must be exactly "${allowedValue}"`;
|
||||
}
|
||||
|
||||
case 'oneOf': {
|
||||
return `Value at ${path} must match exactly one schema (matched ${params.passingSchemas ?? 0})`;
|
||||
}
|
||||
|
||||
case 'anyOf': {
|
||||
return `Value at ${path} must match at least one of the allowed schemas`;
|
||||
}
|
||||
|
||||
case 'not': {
|
||||
return `Value at ${path} matches a schema it should not match`;
|
||||
}
|
||||
|
||||
case 'if': {
|
||||
return `Value at ${path} does not satisfy conditional schema requirements`;
|
||||
}
|
||||
|
||||
case 'uniqueItems': {
|
||||
const i = params.i as number;
|
||||
const j = params.j as number;
|
||||
return `Array at ${path} contains duplicate items at positions ${j} and ${i}`;
|
||||
}
|
||||
|
||||
case 'propertyNames': {
|
||||
const propertyName = params.propertyName as string;
|
||||
return `Invalid property name at ${path}: "${propertyName}" does not match naming requirements`;
|
||||
}
|
||||
|
||||
case 'dependencies':
|
||||
case 'dependentRequired': {
|
||||
const property = params.property as string;
|
||||
const missingProperty = params.missingProperty as string;
|
||||
return `Missing dependent field at ${path}: "${missingProperty}" is required when "${property}" is present`;
|
||||
}
|
||||
|
||||
default: {
|
||||
// Fallback for any unhandled keywords - use AJV's message if available
|
||||
const message = error.message || `validation failed for keyword "${error.keyword}"`;
|
||||
return `${path}: ${message}`;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Format all AJV errors into a list of human-readable messages.
|
||||
* Returns an array of formatted error strings.
|
||||
*/
|
||||
function formatAjvErrors(errors: ErrorObject[]): string[] {
|
||||
return errors.map(formatAjvError);
|
||||
}
|
||||
|
||||
// Parse and load YAML configuration file with enhanced safety
|
||||
export const parseConfig = async (configPath: string): Promise<Config> => {
|
||||
try {
|
||||
// File existence check
|
||||
if (!(await fs.pathExists(configPath))) {
|
||||
throw new Error(`Configuration file not found: ${configPath}`);
|
||||
throw new PentestError(
|
||||
`Configuration file not found: ${configPath}`,
|
||||
'config',
|
||||
false,
|
||||
{ configPath },
|
||||
ErrorCode.CONFIG_NOT_FOUND
|
||||
);
|
||||
}
|
||||
|
||||
// File size check (prevent extremely large files)
|
||||
const stats = await fs.stat(configPath);
|
||||
const maxFileSize = 1024 * 1024; // 1MB
|
||||
if (stats.size > maxFileSize) {
|
||||
throw new Error(
|
||||
`Configuration file too large: ${stats.size} bytes (maximum: ${maxFileSize} bytes)`
|
||||
throw new PentestError(
|
||||
`Configuration file too large: ${stats.size} bytes (maximum: ${maxFileSize} bytes)`,
|
||||
'config',
|
||||
false,
|
||||
{ configPath, fileSize: stats.size, maxFileSize },
|
||||
ErrorCode.CONFIG_VALIDATION_FAILED
|
||||
);
|
||||
}
|
||||
|
||||
@@ -75,7 +211,13 @@ export const parseConfig = async (configPath: string): Promise<Config> => {
|
||||
|
||||
// Basic content validation
|
||||
if (!configContent.trim()) {
|
||||
throw new Error('Configuration file is empty');
|
||||
throw new PentestError(
|
||||
'Configuration file is empty',
|
||||
'config',
|
||||
false,
|
||||
{ configPath },
|
||||
ErrorCode.CONFIG_VALIDATION_FAILED
|
||||
);
|
||||
}
|
||||
|
||||
// Parse YAML with safety options
|
||||
@@ -88,12 +230,24 @@ export const parseConfig = async (configPath: string): Promise<Config> => {
|
||||
});
|
||||
} catch (yamlError) {
|
||||
const errMsg = yamlError instanceof Error ? yamlError.message : String(yamlError);
|
||||
throw new Error(`YAML parsing failed: ${errMsg}`);
|
||||
throw new PentestError(
|
||||
`YAML parsing failed: ${errMsg}`,
|
||||
'config',
|
||||
false,
|
||||
{ configPath, originalError: errMsg },
|
||||
ErrorCode.CONFIG_PARSE_ERROR
|
||||
);
|
||||
}
|
||||
|
||||
// Additional safety check
|
||||
if (config === null || config === undefined) {
|
||||
throw new Error('Configuration file resulted in null/undefined after parsing');
|
||||
throw new PentestError(
|
||||
'Configuration file resulted in null/undefined after parsing',
|
||||
'config',
|
||||
false,
|
||||
{ configPath },
|
||||
ErrorCode.CONFIG_PARSE_ERROR
|
||||
);
|
||||
}
|
||||
|
||||
// Validate the configuration structure and content
|
||||
@@ -101,20 +255,19 @@ export const parseConfig = async (configPath: string): Promise<Config> => {
|
||||
|
||||
return config as Config;
|
||||
} catch (error) {
|
||||
const errMsg = error instanceof Error ? error.message : String(error);
|
||||
// Enhance error message with context
|
||||
if (
|
||||
errMsg.startsWith('Configuration file not found') ||
|
||||
errMsg.startsWith('YAML parsing failed') ||
|
||||
errMsg.includes('must be') ||
|
||||
errMsg.includes('exceeds maximum')
|
||||
) {
|
||||
// These are already well-formatted errors, re-throw as-is
|
||||
// PentestError instances are already well-formatted, re-throw as-is
|
||||
if (error instanceof PentestError) {
|
||||
throw error;
|
||||
} else {
|
||||
// Wrap other errors with context
|
||||
throw new Error(`Failed to parse configuration file '${configPath}': ${errMsg}`);
|
||||
}
|
||||
// Wrap other errors with context
|
||||
const errMsg = error instanceof Error ? error.message : String(error);
|
||||
throw new PentestError(
|
||||
`Failed to parse configuration file '${configPath}': ${errMsg}`,
|
||||
'config',
|
||||
false,
|
||||
{ configPath, originalError: errMsg },
|
||||
ErrorCode.CONFIG_PARSE_ERROR
|
||||
);
|
||||
}
|
||||
};
|
||||
|
||||
@@ -122,32 +275,42 @@ export const parseConfig = async (configPath: string): Promise<Config> => {
|
||||
const validateConfig = (config: Config): void => {
|
||||
// Basic structure validation
|
||||
if (!config || typeof config !== 'object') {
|
||||
throw new Error('Configuration must be a valid object');
|
||||
throw new PentestError(
|
||||
'Configuration must be a valid object',
|
||||
'config',
|
||||
false,
|
||||
{},
|
||||
ErrorCode.CONFIG_VALIDATION_FAILED
|
||||
);
|
||||
}
|
||||
|
||||
if (Array.isArray(config)) {
|
||||
throw new Error('Configuration must be an object, not an array');
|
||||
throw new PentestError(
|
||||
'Configuration must be an object, not an array',
|
||||
'config',
|
||||
false,
|
||||
{},
|
||||
ErrorCode.CONFIG_VALIDATION_FAILED
|
||||
);
|
||||
}
|
||||
|
||||
// JSON Schema validation
|
||||
const isValid = validateSchema(config);
|
||||
if (!isValid) {
|
||||
const errors = validateSchema.errors || [];
|
||||
const errorMessages = errors.map((err) => {
|
||||
const path = err.instancePath || 'root';
|
||||
return `${path}: ${err.message}`;
|
||||
});
|
||||
throw new Error(`Configuration validation failed:\n - ${errorMessages.join('\n - ')}`);
|
||||
const errorMessages = formatAjvErrors(errors);
|
||||
throw new PentestError(
|
||||
`Configuration validation failed:\n - ${errorMessages.join('\n - ')}`,
|
||||
'config',
|
||||
false,
|
||||
{ validationErrors: errorMessages },
|
||||
ErrorCode.CONFIG_VALIDATION_FAILED
|
||||
);
|
||||
}
|
||||
|
||||
// Additional security validation
|
||||
performSecurityValidation(config);
|
||||
|
||||
// Warn if deprecated fields are used
|
||||
if (config.login) {
|
||||
console.warn('⚠️ The "login" section is deprecated. Please use "authentication" instead.');
|
||||
}
|
||||
|
||||
// Ensure at least some configuration is provided
|
||||
if (!config.rules && !config.authentication) {
|
||||
console.warn(
|
||||
@@ -166,17 +329,40 @@ const performSecurityValidation = (config: Config): void => {
|
||||
if (config.authentication) {
|
||||
const auth = config.authentication;
|
||||
|
||||
// Check login_url for dangerous patterns (AJV's "uri" format allows javascript: per RFC 3986)
|
||||
if (auth.login_url) {
|
||||
for (const pattern of DANGEROUS_PATTERNS) {
|
||||
if (pattern.test(auth.login_url)) {
|
||||
throw new PentestError(
|
||||
`authentication.login_url contains potentially dangerous pattern: ${pattern.source}`,
|
||||
'config',
|
||||
false,
|
||||
{ field: 'login_url', pattern: pattern.source },
|
||||
ErrorCode.CONFIG_VALIDATION_FAILED
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Check for dangerous patterns in credentials
|
||||
if (auth.credentials) {
|
||||
for (const pattern of DANGEROUS_PATTERNS) {
|
||||
if (pattern.test(auth.credentials.username)) {
|
||||
throw new Error(
|
||||
'authentication.credentials.username contains potentially dangerous pattern'
|
||||
throw new PentestError(
|
||||
`authentication.credentials.username contains potentially dangerous pattern: ${pattern.source}`,
|
||||
'config',
|
||||
false,
|
||||
{ field: 'credentials.username', pattern: pattern.source },
|
||||
ErrorCode.CONFIG_VALIDATION_FAILED
|
||||
);
|
||||
}
|
||||
if (pattern.test(auth.credentials.password)) {
|
||||
throw new Error(
|
||||
'authentication.credentials.password contains potentially dangerous pattern'
|
||||
throw new PentestError(
|
||||
`authentication.credentials.password contains potentially dangerous pattern: ${pattern.source}`,
|
||||
'config',
|
||||
false,
|
||||
{ field: 'credentials.password', pattern: pattern.source },
|
||||
ErrorCode.CONFIG_VALIDATION_FAILED
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -187,8 +373,12 @@ const performSecurityValidation = (config: Config): void => {
|
||||
auth.login_flow.forEach((step, index) => {
|
||||
for (const pattern of DANGEROUS_PATTERNS) {
|
||||
if (pattern.test(step)) {
|
||||
throw new Error(
|
||||
`authentication.login_flow[${index}] contains potentially dangerous pattern: ${pattern.source}`
|
||||
throw new PentestError(
|
||||
`authentication.login_flow[${index}] contains potentially dangerous pattern: ${pattern.source}`,
|
||||
'config',
|
||||
false,
|
||||
{ field: `login_flow[${index}]`, pattern: pattern.source },
|
||||
ErrorCode.CONFIG_VALIDATION_FAILED
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -216,13 +406,21 @@ const validateRulesSecurity = (rules: Rule[] | undefined, ruleType: string): voi
|
||||
// Security validation
|
||||
for (const pattern of DANGEROUS_PATTERNS) {
|
||||
if (pattern.test(rule.url_path)) {
|
||||
throw new Error(
|
||||
`rules.${ruleType}[${index}].url_path contains potentially dangerous pattern: ${pattern.source}`
|
||||
throw new PentestError(
|
||||
`rules.${ruleType}[${index}].url_path contains potentially dangerous pattern: ${pattern.source}`,
|
||||
'config',
|
||||
false,
|
||||
{ field: `rules.${ruleType}[${index}].url_path`, pattern: pattern.source },
|
||||
ErrorCode.CONFIG_VALIDATION_FAILED
|
||||
);
|
||||
}
|
||||
if (pattern.test(rule.description)) {
|
||||
throw new Error(
|
||||
`rules.${ruleType}[${index}].description contains potentially dangerous pattern: ${pattern.source}`
|
||||
throw new PentestError(
|
||||
`rules.${ruleType}[${index}].description contains potentially dangerous pattern: ${pattern.source}`,
|
||||
'config',
|
||||
false,
|
||||
{ field: `rules.${ruleType}[${index}].description`, pattern: pattern.source },
|
||||
ErrorCode.CONFIG_VALIDATION_FAILED
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -234,10 +432,18 @@ const validateRulesSecurity = (rules: Rule[] | undefined, ruleType: string): voi
|
||||
|
||||
// Validate rule based on its specific type
|
||||
const validateRuleTypeSpecific = (rule: Rule, ruleType: string, index: number): void => {
|
||||
const field = `rules.${ruleType}[${index}].url_path`;
|
||||
|
||||
switch (rule.type) {
|
||||
case 'path':
|
||||
if (!rule.url_path.startsWith('/')) {
|
||||
throw new Error(`rules.${ruleType}[${index}].url_path for type 'path' must start with '/'`);
|
||||
throw new PentestError(
|
||||
`${field} for type 'path' must start with '/'`,
|
||||
'config',
|
||||
false,
|
||||
{ field, ruleType: rule.type },
|
||||
ErrorCode.CONFIG_VALIDATION_FAILED
|
||||
);
|
||||
}
|
||||
break;
|
||||
|
||||
@@ -245,14 +451,22 @@ const validateRuleTypeSpecific = (rule: Rule, ruleType: string, index: number):
|
||||
case 'domain':
|
||||
// Basic domain validation - no slashes allowed
|
||||
if (rule.url_path.includes('/')) {
|
||||
throw new Error(
|
||||
`rules.${ruleType}[${index}].url_path for type '${rule.type}' cannot contain '/' characters`
|
||||
throw new PentestError(
|
||||
`${field} for type '${rule.type}' cannot contain '/' characters`,
|
||||
'config',
|
||||
false,
|
||||
{ field, ruleType: rule.type },
|
||||
ErrorCode.CONFIG_VALIDATION_FAILED
|
||||
);
|
||||
}
|
||||
// Must contain at least one dot for domains
|
||||
if (rule.type === 'domain' && !rule.url_path.includes('.')) {
|
||||
throw new Error(
|
||||
`rules.${ruleType}[${index}].url_path for type 'domain' must be a valid domain name`
|
||||
throw new PentestError(
|
||||
`${field} for type 'domain' must be a valid domain name`,
|
||||
'config',
|
||||
false,
|
||||
{ field, ruleType: rule.type },
|
||||
ErrorCode.CONFIG_VALIDATION_FAILED
|
||||
);
|
||||
}
|
||||
break;
|
||||
@@ -260,8 +474,12 @@ const validateRuleTypeSpecific = (rule: Rule, ruleType: string, index: number):
|
||||
case 'method': {
|
||||
const allowedMethods = ['GET', 'POST', 'PUT', 'DELETE', 'PATCH', 'HEAD', 'OPTIONS'];
|
||||
if (!allowedMethods.includes(rule.url_path.toUpperCase())) {
|
||||
throw new Error(
|
||||
`rules.${ruleType}[${index}].url_path for type 'method' must be one of: ${allowedMethods.join(', ')}`
|
||||
throw new PentestError(
|
||||
`${field} for type 'method' must be one of: ${allowedMethods.join(', ')}`,
|
||||
'config',
|
||||
false,
|
||||
{ field, ruleType: rule.type, allowedMethods },
|
||||
ErrorCode.CONFIG_VALIDATION_FAILED
|
||||
);
|
||||
}
|
||||
break;
|
||||
@@ -270,8 +488,12 @@ const validateRuleTypeSpecific = (rule: Rule, ruleType: string, index: number):
|
||||
case 'header':
|
||||
// Header name validation (basic)
|
||||
if (!rule.url_path.match(/^[a-zA-Z0-9\-_]+$/)) {
|
||||
throw new Error(
|
||||
`rules.${ruleType}[${index}].url_path for type 'header' must be a valid header name (alphanumeric, hyphens, underscores only)`
|
||||
throw new PentestError(
|
||||
`${field} for type 'header' must be a valid header name (alphanumeric, hyphens, underscores only)`,
|
||||
'config',
|
||||
false,
|
||||
{ field, ruleType: rule.type },
|
||||
ErrorCode.CONFIG_VALIDATION_FAILED
|
||||
);
|
||||
}
|
||||
break;
|
||||
@@ -279,8 +501,12 @@ const validateRuleTypeSpecific = (rule: Rule, ruleType: string, index: number):
|
||||
case 'parameter':
|
||||
// Parameter name validation (basic)
|
||||
if (!rule.url_path.match(/^[a-zA-Z0-9\-_]+$/)) {
|
||||
throw new Error(
|
||||
`rules.${ruleType}[${index}].url_path for type 'parameter' must be a valid parameter name (alphanumeric, hyphens, underscores only)`
|
||||
throw new PentestError(
|
||||
`${field} for type 'parameter' must be a valid parameter name (alphanumeric, hyphens, underscores only)`,
|
||||
'config',
|
||||
false,
|
||||
{ field, ruleType: rule.type },
|
||||
ErrorCode.CONFIG_VALIDATION_FAILED
|
||||
);
|
||||
}
|
||||
break;
|
||||
@@ -293,8 +519,12 @@ const checkForDuplicates = (rules: Rule[], ruleType: string): void => {
|
||||
rules.forEach((rule, index) => {
|
||||
const key = `${rule.type}:${rule.url_path}`;
|
||||
if (seen.has(key)) {
|
||||
throw new Error(
|
||||
`Duplicate rule found in rules.${ruleType}[${index}]: ${rule.type} '${rule.url_path}'`
|
||||
throw new PentestError(
|
||||
`Duplicate rule found in rules.${ruleType}[${index}]: ${rule.type} '${rule.url_path}'`,
|
||||
'config',
|
||||
false,
|
||||
{ field: `rules.${ruleType}[${index}]`, ruleType: rule.type, urlPath: rule.url_path },
|
||||
ErrorCode.CONFIG_VALIDATION_FAILED
|
||||
);
|
||||
}
|
||||
seen.add(key);
|
||||
@@ -308,8 +538,12 @@ const checkForConflicts = (avoidRules: Rule[] = [], focusRules: Rule[] = []): vo
|
||||
focusRules.forEach((rule, index) => {
|
||||
const key = `${rule.type}:${rule.url_path}`;
|
||||
if (avoidSet.has(key)) {
|
||||
throw new Error(
|
||||
`Conflicting rule found: rules.focus[${index}] '${rule.url_path}' also exists in rules.avoid`
|
||||
throw new PentestError(
|
||||
`Conflicting rule found: rules.focus[${index}] '${rule.url_path}' also exists in rules.avoid`,
|
||||
'config',
|
||||
false,
|
||||
{ field: `rules.focus[${index}]`, urlPath: rule.url_path },
|
||||
ErrorCode.CONFIG_VALIDATION_FAILED
|
||||
);
|
||||
}
|
||||
});
|
||||
@@ -347,7 +581,7 @@ const sanitizeAuthentication = (auth: Authentication): Authentication => {
|
||||
password: auth.credentials.password,
|
||||
...(auth.credentials.totp_secret && { totp_secret: auth.credentials.totp_secret.trim() }),
|
||||
},
|
||||
login_flow: auth.login_flow.map((step) => step.trim()),
|
||||
...(auth.login_flow && { login_flow: auth.login_flow.map((step) => step.trim()) }),
|
||||
success_condition: {
|
||||
type: auth.success_condition.type.toLowerCase().trim() as Authentication['success_condition']['type'],
|
||||
value: auth.success_condition.value.trim(),
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
import { path, fs } from 'zx';
|
||||
import chalk from 'chalk';
|
||||
import { validateQueueAndDeliverable, type VulnType } from './queue-validation.js';
|
||||
import type { AgentName, PromptName, PlaywrightAgent, AgentValidator } from './types/agents.js';
|
||||
import type { AgentName, PlaywrightAgent, AgentValidator } from './types/agents.js';
|
||||
|
||||
// Factory function for vulnerability queue validators
|
||||
function createVulnValidator(vulnType: VulnType): AgentValidator {
|
||||
@@ -32,7 +32,8 @@ function createExploitValidator(vulnType: VulnType): AgentValidator {
|
||||
}
|
||||
|
||||
// MCP agent mapping - assigns each agent to a specific Playwright instance to prevent conflicts
|
||||
export const MCP_AGENT_MAPPING: Record<PromptName, PlaywrightAgent> = Object.freeze({
|
||||
// Keys are promptTemplate values from AGENTS registry (session-manager.ts)
|
||||
export const MCP_AGENT_MAPPING: Record<string, PlaywrightAgent> = Object.freeze({
|
||||
// Phase 1: Pre-reconnaissance (actual prompt name is 'pre-recon-code')
|
||||
// NOTE: Pre-recon is pure code analysis and doesn't use browser automation,
|
||||
// but assigning MCP server anyway for consistency and future extensibility
|
||||
|
||||
@@ -4,11 +4,16 @@
|
||||
// it under the terms of the GNU Affero General Public License version 3
|
||||
// as published by the Free Software Foundation.
|
||||
|
||||
import type {
|
||||
PentestErrorType,
|
||||
PentestErrorContext,
|
||||
PromptErrorResult,
|
||||
import {
|
||||
ErrorCode,
|
||||
type PentestErrorType,
|
||||
type PentestErrorContext,
|
||||
type PromptErrorResult,
|
||||
} from './types/errors.js';
|
||||
import {
|
||||
matchesBillingApiPattern,
|
||||
matchesBillingTextPattern,
|
||||
} from './utils/billing-detection.js';
|
||||
|
||||
// Custom error class for pentest operations
|
||||
export class PentestError extends Error {
|
||||
@@ -17,18 +22,24 @@ export class PentestError extends Error {
|
||||
retryable: boolean;
|
||||
context: PentestErrorContext;
|
||||
timestamp: string;
|
||||
/** Optional specific error code for reliable classification */
|
||||
code?: ErrorCode;
|
||||
|
||||
constructor(
|
||||
message: string,
|
||||
type: PentestErrorType,
|
||||
retryable: boolean = false,
|
||||
context: PentestErrorContext = {}
|
||||
context: PentestErrorContext = {},
|
||||
code?: ErrorCode
|
||||
) {
|
||||
super(message);
|
||||
this.type = type;
|
||||
this.retryable = retryable;
|
||||
this.context = context;
|
||||
this.timestamp = new Date().toISOString();
|
||||
if (code !== undefined) {
|
||||
this.code = code;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -102,6 +113,53 @@ export function isRetryableError(error: Error): boolean {
|
||||
return RETRYABLE_PATTERNS.some((pattern) => message.includes(pattern));
|
||||
}
|
||||
|
||||
/**
|
||||
* Classifies errors by ErrorCode for reliable, code-based classification.
|
||||
* Used when error is a PentestError with a specific ErrorCode.
|
||||
*/
|
||||
function classifyByErrorCode(
|
||||
code: ErrorCode,
|
||||
retryableFromError: boolean
|
||||
): { type: string; retryable: boolean } {
|
||||
switch (code) {
|
||||
// Billing errors - retryable (wait for cap reset or credits added)
|
||||
case ErrorCode.SPENDING_CAP_REACHED:
|
||||
case ErrorCode.INSUFFICIENT_CREDITS:
|
||||
return { type: 'BillingError', retryable: true };
|
||||
|
||||
case ErrorCode.API_RATE_LIMITED:
|
||||
return { type: 'RateLimitError', retryable: true };
|
||||
|
||||
// Config errors - non-retryable (need manual fix)
|
||||
case ErrorCode.CONFIG_NOT_FOUND:
|
||||
case ErrorCode.CONFIG_VALIDATION_FAILED:
|
||||
case ErrorCode.CONFIG_PARSE_ERROR:
|
||||
return { type: 'ConfigurationError', retryable: false };
|
||||
|
||||
// Prompt errors - non-retryable (need manual fix)
|
||||
case ErrorCode.PROMPT_LOAD_FAILED:
|
||||
return { type: 'ConfigurationError', retryable: false };
|
||||
|
||||
// Git errors - non-retryable (indicates workspace corruption)
|
||||
case ErrorCode.GIT_CHECKPOINT_FAILED:
|
||||
case ErrorCode.GIT_ROLLBACK_FAILED:
|
||||
return { type: 'GitError', retryable: false };
|
||||
|
||||
// Validation errors - retryable (agent may succeed on retry)
|
||||
case ErrorCode.OUTPUT_VALIDATION_FAILED:
|
||||
case ErrorCode.DELIVERABLE_NOT_FOUND:
|
||||
return { type: 'OutputValidationError', retryable: true };
|
||||
|
||||
// Agent execution - use the retryable flag from the error
|
||||
case ErrorCode.AGENT_EXECUTION_FAILED:
|
||||
return { type: 'AgentExecutionError', retryable: retryableFromError };
|
||||
|
||||
default:
|
||||
// Unknown code - fall through to string matching
|
||||
return { type: 'UnknownError', retryable: retryableFromError };
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Classifies errors for Temporal workflow retry behavior.
|
||||
* Returns error type and whether Temporal should retry.
|
||||
@@ -109,31 +167,25 @@ export function isRetryableError(error: Error): boolean {
|
||||
* Used by activities to wrap errors in ApplicationFailure:
|
||||
* - Retryable errors: Temporal retries with configured backoff
|
||||
* - Non-retryable errors: Temporal fails immediately
|
||||
*
|
||||
* Classification priority:
|
||||
* 1. If error is PentestError with ErrorCode, classify by code (reliable)
|
||||
* 2. Fall through to string matching for external errors (SDK, network, etc.)
|
||||
*/
|
||||
export function classifyErrorForTemporal(error: unknown): { type: string; retryable: boolean } {
|
||||
// === CODE-BASED CLASSIFICATION (Preferred for internal errors) ===
|
||||
if (error instanceof PentestError && error.code !== undefined) {
|
||||
return classifyByErrorCode(error.code, error.retryable);
|
||||
}
|
||||
|
||||
// === STRING-BASED CLASSIFICATION (Fallback for external errors) ===
|
||||
const message = (error instanceof Error ? error.message : String(error)).toLowerCase();
|
||||
|
||||
// === BILLING ERRORS (Retryable with long backoff) ===
|
||||
// Anthropic returns billing as 400 invalid_request_error
|
||||
// Human can add credits OR wait for spending cap to reset (5-30 min backoff)
|
||||
if (
|
||||
message.includes('billing_error') ||
|
||||
message.includes('credit balance is too low') ||
|
||||
message.includes('insufficient credits') ||
|
||||
message.includes('usage is blocked due to insufficient credits') ||
|
||||
message.includes('please visit plans & billing') ||
|
||||
message.includes('please visit plans and billing') ||
|
||||
message.includes('usage limit reached') ||
|
||||
message.includes('quota exceeded') ||
|
||||
message.includes('daily rate limit') ||
|
||||
message.includes('limit will reset') ||
|
||||
// Claude Code spending cap patterns (returns short message instead of error)
|
||||
message.includes('spending cap') ||
|
||||
message.includes('spending limit') ||
|
||||
message.includes('cap reached') ||
|
||||
message.includes('budget exceeded') ||
|
||||
message.includes('billing limit reached')
|
||||
) {
|
||||
// Check both API patterns and text patterns for comprehensive detection
|
||||
if (matchesBillingApiPattern(message) || matchesBillingTextPattern(message)) {
|
||||
return { type: 'BillingError', retryable: true };
|
||||
}
|
||||
|
||||
|
||||
@@ -7,6 +7,7 @@
|
||||
import { fs, path } from 'zx';
|
||||
import chalk from 'chalk';
|
||||
import { PentestError } from '../error-handling.js';
|
||||
import { ErrorCode } from '../types/errors.js';
|
||||
|
||||
interface DeliverableFile {
|
||||
name: string;
|
||||
@@ -34,7 +35,13 @@ export async function assembleFinalReport(sourceDir: string): Promise<string> {
|
||||
sections.push(content);
|
||||
console.log(chalk.green(`✅ Added ${file.name} findings`));
|
||||
} else if (file.required) {
|
||||
throw new Error(`Required file ${file.path} not found`);
|
||||
throw new PentestError(
|
||||
`Required deliverable file not found: ${file.path}`,
|
||||
'filesystem',
|
||||
false,
|
||||
{ deliverableFile: file.path, sourceDir },
|
||||
ErrorCode.DELIVERABLE_NOT_FOUND
|
||||
);
|
||||
} else {
|
||||
console.log(chalk.gray(`⏭️ No ${file.name} deliverable found`));
|
||||
}
|
||||
|
||||
@@ -6,6 +6,8 @@
|
||||
|
||||
import { fs, path } from 'zx';
|
||||
import { PentestError } from './error-handling.js';
|
||||
import { ErrorCode } from './types/errors.js';
|
||||
import { type Result, ok, err } from './types/result.js';
|
||||
import { asyncPipe } from './utils/functional.js';
|
||||
|
||||
export type VulnType = 'injection' | 'xss' | 'auth' | 'ssrf' | 'authz';
|
||||
@@ -67,11 +69,10 @@ export interface ExploitationDecision {
|
||||
vulnType: VulnType;
|
||||
}
|
||||
|
||||
export interface SafeValidationResult {
|
||||
success: boolean;
|
||||
data?: ExploitationDecision;
|
||||
error?: PentestError;
|
||||
}
|
||||
/**
|
||||
* Result type for safe validation - explicit error handling.
|
||||
*/
|
||||
export type SafeValidationResult = Result<ExploitationDecision, PentestError>;
|
||||
|
||||
// Vulnerability type configuration as immutable data
|
||||
const VULN_TYPE_CONFIG: VulnTypeConfig = Object.freeze({
|
||||
@@ -196,7 +197,8 @@ const validateExistenceRules = (
|
||||
deliverablePath: pathsWithExistence.deliverable,
|
||||
queuePath: pathsWithExistence.queue,
|
||||
existence,
|
||||
}
|
||||
},
|
||||
ErrorCode.DELIVERABLE_NOT_FOUND
|
||||
),
|
||||
};
|
||||
}
|
||||
@@ -311,15 +313,18 @@ export async function validateQueueAndDeliverable(
|
||||
);
|
||||
}
|
||||
|
||||
// Pure function to safely validate (returns result instead of throwing)
|
||||
export const safeValidateQueueAndDeliverable = async (
|
||||
/**
|
||||
* Safely validate queue and deliverable files.
|
||||
* Returns Result<ExploitationDecision, PentestError> for explicit error handling.
|
||||
*/
|
||||
export async function validateQueueSafe(
|
||||
vulnType: VulnType,
|
||||
sourceDir: string
|
||||
): Promise<SafeValidationResult> => {
|
||||
): Promise<SafeValidationResult> {
|
||||
try {
|
||||
const result = await validateQueueAndDeliverable(vulnType, sourceDir);
|
||||
return { success: true, data: result };
|
||||
return ok(result);
|
||||
} catch (error) {
|
||||
return { success: false, error: error as PentestError };
|
||||
return err(error as PentestError);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
278
src/services/agent-execution.ts
Normal file
278
src/services/agent-execution.ts
Normal file
@@ -0,0 +1,278 @@
|
||||
// Copyright (C) 2025 Keygraph, Inc.
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Affero General Public License version 3
|
||||
// as published by the Free Software Foundation.
|
||||
|
||||
/**
|
||||
* Agent Execution Service
|
||||
*
|
||||
* Handles the full agent lifecycle:
|
||||
* - Load config via ConfigLoaderService
|
||||
* - Load prompt template using AGENTS[agentName].promptTemplate
|
||||
* - Create git checkpoint
|
||||
* - Start audit logging
|
||||
* - Invoke Claude SDK via runClaudePrompt
|
||||
* - Spending cap check using isSpendingCapBehavior
|
||||
* - Handle failure (rollback, audit)
|
||||
* - Validate output using AGENTS[agentName].deliverableFilename
|
||||
* - Commit on success, log metrics
|
||||
*
|
||||
* No Temporal dependencies - pure domain logic.
|
||||
*/
|
||||
|
||||
import chalk from 'chalk';
|
||||
|
||||
import { Result, ok, err, isErr } from '../types/result.js';
|
||||
import { ErrorCode } from '../types/errors.js';
|
||||
import { PentestError } from '../error-handling.js';
|
||||
import { isSpendingCapBehavior } from '../utils/billing-detection.js';
|
||||
import { AGENTS } from '../session-manager.js';
|
||||
import { loadPrompt } from '../prompts/prompt-manager.js';
|
||||
import {
|
||||
runClaudePrompt,
|
||||
validateAgentOutput,
|
||||
type ClaudePromptResult,
|
||||
} from '../ai/claude-executor.js';
|
||||
import {
|
||||
createGitCheckpoint,
|
||||
commitGitSuccess,
|
||||
rollbackGitWorkspace,
|
||||
getGitCommitHash,
|
||||
} from '../utils/git-manager.js';
|
||||
import { AuditSession } from '../audit/index.js';
|
||||
import type { AgentEndResult } from '../types/audit.js';
|
||||
import type { AgentName } from '../types/agents.js';
|
||||
import type { ConfigLoaderService } from './config-loader.js';
|
||||
import type { AgentMetrics } from '../types/metrics.js';
|
||||
|
||||
/**
|
||||
* Input for agent execution.
|
||||
*/
|
||||
export interface AgentExecutionInput {
|
||||
webUrl: string;
|
||||
repoPath: string;
|
||||
configPath?: string | undefined;
|
||||
pipelineTestingMode?: boolean | undefined;
|
||||
attemptNumber: number;
|
||||
}
|
||||
|
||||
/**
|
||||
* Service for executing agents with full lifecycle management.
|
||||
*
|
||||
* NOTE: AuditSession is passed per-execution, NOT stored on the service.
|
||||
* This is critical for parallel agent execution - each agent needs its own
|
||||
* AuditSession instance because AuditSession uses instance state (currentAgentName)
|
||||
* to track which agent is currently logging.
|
||||
*/
|
||||
export class AgentExecutionService {
|
||||
private readonly configLoader: ConfigLoaderService;
|
||||
|
||||
constructor(configLoader: ConfigLoaderService) {
|
||||
this.configLoader = configLoader;
|
||||
}
|
||||
|
||||
/**
|
||||
* Execute an agent with full lifecycle management.
|
||||
*
|
||||
* @param agentName - Name of the agent to execute
|
||||
* @param input - Execution input parameters
|
||||
* @param auditSession - Audit session for this specific agent execution
|
||||
* @returns Result containing AgentEndResult on success, PentestError on failure
|
||||
*/
|
||||
async execute(
|
||||
agentName: AgentName,
|
||||
input: AgentExecutionInput,
|
||||
auditSession: AuditSession
|
||||
): Promise<Result<AgentEndResult, PentestError>> {
|
||||
const { webUrl, repoPath, configPath, pipelineTestingMode = false, attemptNumber } = input;
|
||||
|
||||
// 1. Load config (if provided)
|
||||
const configResult = await this.configLoader.loadOptional(configPath);
|
||||
if (isErr(configResult)) {
|
||||
return configResult;
|
||||
}
|
||||
const distributedConfig = configResult.value;
|
||||
|
||||
// 2. Load prompt
|
||||
const promptTemplate = AGENTS[agentName].promptTemplate;
|
||||
let prompt: string;
|
||||
try {
|
||||
prompt = await loadPrompt(
|
||||
promptTemplate,
|
||||
{ webUrl, repoPath },
|
||||
distributedConfig,
|
||||
pipelineTestingMode
|
||||
);
|
||||
} catch (error) {
|
||||
const errorMessage = error instanceof Error ? error.message : String(error);
|
||||
return err(
|
||||
new PentestError(
|
||||
`Failed to load prompt for ${agentName}: ${errorMessage}`,
|
||||
'prompt',
|
||||
false,
|
||||
{ agentName, promptTemplate, originalError: errorMessage },
|
||||
ErrorCode.PROMPT_LOAD_FAILED
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
// 3. Create git checkpoint before execution
|
||||
try {
|
||||
await createGitCheckpoint(repoPath, agentName, attemptNumber);
|
||||
} catch (error) {
|
||||
const errorMessage = error instanceof Error ? error.message : String(error);
|
||||
return err(
|
||||
new PentestError(
|
||||
`Failed to create git checkpoint for ${agentName}: ${errorMessage}`,
|
||||
'filesystem',
|
||||
false,
|
||||
{ agentName, repoPath, originalError: errorMessage },
|
||||
ErrorCode.GIT_CHECKPOINT_FAILED
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
// 4. Start audit logging
|
||||
await auditSession.startAgent(agentName, prompt, attemptNumber);
|
||||
|
||||
// 5. Execute agent
|
||||
const result: ClaudePromptResult = await runClaudePrompt(
|
||||
prompt,
|
||||
repoPath,
|
||||
'', // context
|
||||
agentName, // description
|
||||
agentName,
|
||||
chalk.cyan,
|
||||
auditSession
|
||||
);
|
||||
|
||||
// 6. Spending cap check - defense-in-depth
|
||||
if (result.success && (result.turns ?? 0) <= 2 && (result.cost || 0) === 0) {
|
||||
const resultText = result.result || '';
|
||||
if (isSpendingCapBehavior(result.turns ?? 0, result.cost || 0, resultText)) {
|
||||
await rollbackGitWorkspace(repoPath, 'spending cap detected');
|
||||
const endResult: AgentEndResult = {
|
||||
attemptNumber,
|
||||
duration_ms: result.duration,
|
||||
cost_usd: 0,
|
||||
success: false,
|
||||
model: result.model,
|
||||
error: `Spending cap likely reached: ${resultText.slice(0, 100)}`,
|
||||
};
|
||||
await auditSession.endAgent(agentName, endResult);
|
||||
return err(
|
||||
new PentestError(
|
||||
`Spending cap likely reached: ${resultText.slice(0, 100)}`,
|
||||
'billing',
|
||||
true, // Retryable with long backoff
|
||||
{ agentName, turns: result.turns, cost: result.cost },
|
||||
ErrorCode.SPENDING_CAP_REACHED
|
||||
)
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// 7. Handle execution failure
|
||||
if (!result.success) {
|
||||
await rollbackGitWorkspace(repoPath, 'execution failure');
|
||||
const endResult: AgentEndResult = {
|
||||
attemptNumber,
|
||||
duration_ms: result.duration,
|
||||
cost_usd: result.cost || 0,
|
||||
success: false,
|
||||
model: result.model,
|
||||
error: result.error || 'Execution failed',
|
||||
};
|
||||
await auditSession.endAgent(agentName, endResult);
|
||||
return err(
|
||||
new PentestError(
|
||||
result.error || 'Agent execution failed',
|
||||
'validation',
|
||||
result.retryable ?? true,
|
||||
{ agentName, originalError: result.error },
|
||||
ErrorCode.AGENT_EXECUTION_FAILED
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
// 8. Validate output
|
||||
const validationPassed = await validateAgentOutput(result, agentName, repoPath);
|
||||
if (!validationPassed) {
|
||||
await rollbackGitWorkspace(repoPath, 'validation failure');
|
||||
const endResult: AgentEndResult = {
|
||||
attemptNumber,
|
||||
duration_ms: result.duration,
|
||||
cost_usd: result.cost || 0,
|
||||
success: false,
|
||||
model: result.model,
|
||||
error: 'Output validation failed',
|
||||
};
|
||||
await auditSession.endAgent(agentName, endResult);
|
||||
return err(
|
||||
new PentestError(
|
||||
`Agent ${agentName} failed output validation`,
|
||||
'validation',
|
||||
true, // Retryable - agent may succeed on retry
|
||||
{ agentName, deliverableFilename: AGENTS[agentName].deliverableFilename },
|
||||
ErrorCode.OUTPUT_VALIDATION_FAILED
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
// 9. Success - commit deliverables, then capture checkpoint hash
|
||||
await commitGitSuccess(repoPath, agentName);
|
||||
const commitHash = await getGitCommitHash(repoPath);
|
||||
|
||||
const endResult: AgentEndResult = {
|
||||
attemptNumber,
|
||||
duration_ms: result.duration,
|
||||
cost_usd: result.cost || 0,
|
||||
success: true,
|
||||
model: result.model,
|
||||
...(commitHash && { checkpoint: commitHash }),
|
||||
};
|
||||
await auditSession.endAgent(agentName, endResult);
|
||||
|
||||
return ok(endResult);
|
||||
}
|
||||
|
||||
/**
|
||||
* Execute an agent, throwing PentestError on failure.
|
||||
*
|
||||
* This is the preferred method for Temporal activities, which need to
|
||||
* catch errors and classify them into ApplicationFailure. Avoids requiring
|
||||
* activities to import Result utilities, keeping the boundary clean.
|
||||
*
|
||||
* @param agentName - Name of the agent to execute
|
||||
* @param input - Execution input parameters
|
||||
* @param auditSession - Audit session for this specific agent execution
|
||||
* @returns AgentEndResult on success
|
||||
* @throws PentestError on failure
|
||||
*/
|
||||
async executeOrThrow(
|
||||
agentName: AgentName,
|
||||
input: AgentExecutionInput,
|
||||
auditSession: AuditSession
|
||||
): Promise<AgentEndResult> {
|
||||
const result = await this.execute(agentName, input, auditSession);
|
||||
if (isErr(result)) {
|
||||
throw result.error;
|
||||
}
|
||||
return result.value;
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert AgentEndResult to AgentMetrics for workflow state.
|
||||
*/
|
||||
static toMetrics(endResult: AgentEndResult, result: ClaudePromptResult): AgentMetrics {
|
||||
return {
|
||||
durationMs: endResult.duration_ms,
|
||||
inputTokens: null, // Not currently exposed by SDK wrapper
|
||||
outputTokens: null,
|
||||
costUsd: endResult.cost_usd,
|
||||
numTurns: result.turns ?? null,
|
||||
model: result.model,
|
||||
};
|
||||
}
|
||||
}
|
||||
75
src/services/config-loader.ts
Normal file
75
src/services/config-loader.ts
Normal file
@@ -0,0 +1,75 @@
|
||||
// Copyright (C) 2025 Keygraph, Inc.
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Affero General Public License version 3
|
||||
// as published by the Free Software Foundation.
|
||||
|
||||
/**
|
||||
* Config Loader Service
|
||||
*
|
||||
* Wraps parseConfig + distributeConfig with Result type for explicit error handling.
|
||||
* Pure service with no Temporal dependencies.
|
||||
*/
|
||||
|
||||
import { parseConfig, distributeConfig } from '../config-parser.js';
|
||||
import { PentestError } from '../error-handling.js';
|
||||
import { Result, ok, err } from '../types/result.js';
|
||||
import { ErrorCode } from '../types/errors.js';
|
||||
import type { DistributedConfig } from '../types/config.js';
|
||||
|
||||
/**
|
||||
* Service for loading and distributing configuration files.
|
||||
*
|
||||
* Provides a Result-based API for explicit error handling,
|
||||
* allowing callers to decide how to handle failures.
|
||||
*/
|
||||
export class ConfigLoaderService {
|
||||
/**
|
||||
* Load and distribute a configuration file.
|
||||
*
|
||||
* @param configPath - Path to the YAML configuration file
|
||||
* @returns Result containing DistributedConfig on success, PentestError on failure
|
||||
*/
|
||||
async load(configPath: string): Promise<Result<DistributedConfig, PentestError>> {
|
||||
try {
|
||||
const config = await parseConfig(configPath);
|
||||
const distributed = distributeConfig(config);
|
||||
return ok(distributed);
|
||||
} catch (error) {
|
||||
const errorMessage = error instanceof Error ? error.message : String(error);
|
||||
|
||||
// Determine appropriate error code based on error message
|
||||
let errorCode = ErrorCode.CONFIG_PARSE_ERROR;
|
||||
if (errorMessage.includes('not found') || errorMessage.includes('ENOENT')) {
|
||||
errorCode = ErrorCode.CONFIG_NOT_FOUND;
|
||||
} else if (errorMessage.includes('validation failed')) {
|
||||
errorCode = ErrorCode.CONFIG_VALIDATION_FAILED;
|
||||
}
|
||||
|
||||
return err(
|
||||
new PentestError(
|
||||
`Failed to load config ${configPath}: ${errorMessage}`,
|
||||
'config',
|
||||
false,
|
||||
{ configPath, originalError: errorMessage },
|
||||
errorCode
|
||||
)
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Load config if path is provided, otherwise return null config.
|
||||
*
|
||||
* @param configPath - Optional path to the YAML configuration file
|
||||
* @returns Result containing DistributedConfig (or null) on success, PentestError on failure
|
||||
*/
|
||||
async loadOptional(
|
||||
configPath: string | undefined
|
||||
): Promise<Result<DistributedConfig | null, PentestError>> {
|
||||
if (!configPath) {
|
||||
return ok(null);
|
||||
}
|
||||
return this.load(configPath);
|
||||
}
|
||||
}
|
||||
117
src/services/container.ts
Normal file
117
src/services/container.ts
Normal file
@@ -0,0 +1,117 @@
|
||||
// Copyright (C) 2025 Keygraph, Inc.
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Affero General Public License version 3
|
||||
// as published by the Free Software Foundation.
|
||||
|
||||
/**
|
||||
* Dependency Injection Container
|
||||
*
|
||||
* Provides a per-workflow container for service instances.
|
||||
* Services are wired with explicit constructor injection.
|
||||
*
|
||||
* Usage:
|
||||
* const container = getOrCreateContainer(workflowId, sessionMetadata);
|
||||
* const auditSession = new AuditSession(sessionMetadata); // Per-agent
|
||||
* await auditSession.initialize(workflowId);
|
||||
* const result = await container.agentExecution.executeOrThrow(agentName, input, auditSession);
|
||||
*/
|
||||
|
||||
import type { SessionMetadata } from '../audit/utils.js';
|
||||
import { AgentExecutionService } from './agent-execution.js';
|
||||
import { ConfigLoaderService } from './config-loader.js';
|
||||
import { ExploitationCheckerService } from './exploitation-checker.js';
|
||||
|
||||
/**
|
||||
* Dependencies required to create a Container.
|
||||
*
|
||||
* NOTE: AuditSession is NOT stored in the container.
|
||||
* Each agent execution receives its own AuditSession instance
|
||||
* because AuditSession uses instance state (currentAgentName) that
|
||||
* cannot be shared across parallel agents.
|
||||
*/
|
||||
export interface ContainerDependencies {
|
||||
readonly sessionMetadata: SessionMetadata;
|
||||
}
|
||||
|
||||
/**
|
||||
* DI Container for a single workflow.
|
||||
*
|
||||
* Holds all service instances for the workflow lifecycle.
|
||||
* Services are instantiated once and reused across agent executions.
|
||||
*
|
||||
* NOTE: AuditSession is NOT stored here - it's passed per agent execution
|
||||
* to support parallel agents each having their own logging context.
|
||||
*/
|
||||
export class Container {
|
||||
readonly sessionMetadata: SessionMetadata;
|
||||
readonly agentExecution: AgentExecutionService;
|
||||
readonly configLoader: ConfigLoaderService;
|
||||
readonly exploitationChecker: ExploitationCheckerService;
|
||||
|
||||
constructor(deps: ContainerDependencies) {
|
||||
this.sessionMetadata = deps.sessionMetadata;
|
||||
|
||||
// Wire services with explicit constructor injection
|
||||
this.configLoader = new ConfigLoaderService();
|
||||
this.exploitationChecker = new ExploitationCheckerService();
|
||||
this.agentExecution = new AgentExecutionService(this.configLoader);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Map of workflowId to Container instance.
|
||||
* Each workflow gets its own container scoped to its lifecycle.
|
||||
*/
|
||||
const containers = new Map<string, Container>();
|
||||
|
||||
/**
|
||||
* Get or create a Container for a workflow.
|
||||
*
|
||||
* If a container already exists for the workflowId, returns it.
|
||||
* Otherwise, creates a new container with the provided dependencies.
|
||||
*
|
||||
* @param workflowId - Unique workflow identifier
|
||||
* @param sessionMetadata - Session metadata for audit paths
|
||||
* @returns Container instance for the workflow
|
||||
*/
|
||||
export function getOrCreateContainer(
|
||||
workflowId: string,
|
||||
sessionMetadata: SessionMetadata
|
||||
): Container {
|
||||
let container = containers.get(workflowId);
|
||||
|
||||
if (!container) {
|
||||
container = new Container({ sessionMetadata });
|
||||
containers.set(workflowId, container);
|
||||
}
|
||||
|
||||
return container;
|
||||
}
|
||||
|
||||
/**
|
||||
* Remove a Container when a workflow completes.
|
||||
*
|
||||
* Should be called in logWorkflowComplete to clean up resources.
|
||||
*
|
||||
* @param workflowId - Unique workflow identifier
|
||||
*/
|
||||
export function removeContainer(workflowId: string): void {
|
||||
containers.delete(workflowId);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get an existing Container for a workflow, if one exists.
|
||||
*
|
||||
* Unlike getOrCreateContainer, this does NOT create a new container.
|
||||
* Returns undefined if no container exists for the workflowId.
|
||||
*
|
||||
* Useful for lightweight activities that can benefit from an existing
|
||||
* container but don't need to create one.
|
||||
*
|
||||
* @param workflowId - Unique workflow identifier
|
||||
* @returns Container instance or undefined
|
||||
*/
|
||||
export function getContainer(workflowId: string): Container | undefined {
|
||||
return containers.get(workflowId);
|
||||
}
|
||||
74
src/services/exploitation-checker.ts
Normal file
74
src/services/exploitation-checker.ts
Normal file
@@ -0,0 +1,74 @@
|
||||
// Copyright (C) 2025 Keygraph, Inc.
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Affero General Public License version 3
|
||||
// as published by the Free Software Foundation.
|
||||
|
||||
/**
|
||||
* Exploitation Checker Service
|
||||
*
|
||||
* Pure domain logic for determining whether exploitation should run.
|
||||
* Reads queue file, parses JSON, returns decision.
|
||||
*
|
||||
* No Temporal dependencies - this is pure business logic.
|
||||
*/
|
||||
|
||||
import chalk from 'chalk';
|
||||
import {
|
||||
validateQueueSafe,
|
||||
type VulnType,
|
||||
type ExploitationDecision,
|
||||
} from '../queue-validation.js';
|
||||
import { isOk } from '../types/result.js';
|
||||
|
||||
/**
|
||||
* Service for checking exploitation queue decisions.
|
||||
*
|
||||
* Determines whether an exploit agent should run based on
|
||||
* the vulnerability analysis deliverables and queue files.
|
||||
*/
|
||||
export class ExploitationCheckerService {
|
||||
/**
|
||||
* Check if exploitation should run for a given vulnerability type.
|
||||
*
|
||||
* Reads the vulnerability queue file and returns the decision.
|
||||
* This is pure domain logic - reads queue file, parses JSON, returns decision.
|
||||
*
|
||||
* @param vulnType - Type of vulnerability (injection, xss, auth, ssrf, authz)
|
||||
* @param repoPath - Path to the repository containing deliverables
|
||||
* @returns ExploitationDecision indicating whether to exploit
|
||||
* @throws PentestError if validation fails and is retryable
|
||||
*/
|
||||
async checkQueue(vulnType: VulnType, repoPath: string): Promise<ExploitationDecision> {
|
||||
const result = await validateQueueSafe(vulnType, repoPath);
|
||||
|
||||
if (isOk(result)) {
|
||||
const decision = result.value;
|
||||
console.log(
|
||||
chalk.blue(
|
||||
` ${vulnType}: ${decision.shouldExploit ? `${decision.vulnerabilityCount} vulnerabilities found` : 'no vulnerabilities, skipping exploitation'}`
|
||||
)
|
||||
);
|
||||
return decision;
|
||||
}
|
||||
|
||||
// Validation failed - check if we should retry or skip
|
||||
const error = result.error;
|
||||
if (error.retryable) {
|
||||
// Re-throw retryable errors so caller can handle retry
|
||||
console.log(chalk.yellow(` ${vulnType}: ${error.message} (retryable)`));
|
||||
throw error;
|
||||
}
|
||||
|
||||
// Non-retryable error - skip exploitation gracefully
|
||||
console.log(
|
||||
chalk.yellow(` ${vulnType}: ${error.message}, skipping exploitation`)
|
||||
);
|
||||
return {
|
||||
shouldExploit: false,
|
||||
shouldRetry: false,
|
||||
vulnerabilityCount: 0,
|
||||
vulnType,
|
||||
};
|
||||
}
|
||||
}
|
||||
20
src/services/index.ts
Normal file
20
src/services/index.ts
Normal file
@@ -0,0 +1,20 @@
|
||||
// Copyright (C) 2025 Keygraph, Inc.
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Affero General Public License version 3
|
||||
// as published by the Free Software Foundation.
|
||||
|
||||
/**
|
||||
* Services Module
|
||||
*
|
||||
* Exports DI container and service classes for Shannon agent execution.
|
||||
* Services are pure domain logic with no Temporal dependencies.
|
||||
*/
|
||||
|
||||
export { Container, getOrCreateContainer, removeContainer } from './container.js';
|
||||
export type { ContainerDependencies } from './container.js';
|
||||
|
||||
export { ConfigLoaderService } from './config-loader.js';
|
||||
export { ExploitationCheckerService } from './exploitation-checker.js';
|
||||
export { AgentExecutionService } from './agent-execution.js';
|
||||
export type { AgentExecutionInput } from './agent-execution.js';
|
||||
@@ -7,72 +7,99 @@
|
||||
import type { AgentName, AgentDefinition } from './types/index.js';
|
||||
|
||||
// Agent definitions according to PRD
|
||||
// NOTE: deliverableFilename values must match mcp-server/src/types/deliverables.ts:DELIVERABLE_FILENAMES
|
||||
export const AGENTS: Readonly<Record<AgentName, AgentDefinition>> = Object.freeze({
|
||||
'pre-recon': {
|
||||
name: 'pre-recon',
|
||||
displayName: 'Pre-recon agent',
|
||||
prerequisites: []
|
||||
prerequisites: [],
|
||||
promptTemplate: 'pre-recon-code',
|
||||
deliverableFilename: 'code_analysis_deliverable.md',
|
||||
},
|
||||
'recon': {
|
||||
name: 'recon',
|
||||
displayName: 'Recon agent',
|
||||
prerequisites: ['pre-recon']
|
||||
prerequisites: ['pre-recon'],
|
||||
promptTemplate: 'recon',
|
||||
deliverableFilename: 'recon_deliverable.md',
|
||||
},
|
||||
'injection-vuln': {
|
||||
name: 'injection-vuln',
|
||||
displayName: 'Injection vuln agent',
|
||||
prerequisites: ['recon']
|
||||
prerequisites: ['recon'],
|
||||
promptTemplate: 'vuln-injection',
|
||||
deliverableFilename: 'injection_analysis_deliverable.md',
|
||||
},
|
||||
'xss-vuln': {
|
||||
name: 'xss-vuln',
|
||||
displayName: 'XSS vuln agent',
|
||||
prerequisites: ['recon']
|
||||
prerequisites: ['recon'],
|
||||
promptTemplate: 'vuln-xss',
|
||||
deliverableFilename: 'xss_analysis_deliverable.md',
|
||||
},
|
||||
'auth-vuln': {
|
||||
name: 'auth-vuln',
|
||||
displayName: 'Auth vuln agent',
|
||||
prerequisites: ['recon']
|
||||
prerequisites: ['recon'],
|
||||
promptTemplate: 'vuln-auth',
|
||||
deliverableFilename: 'auth_analysis_deliverable.md',
|
||||
},
|
||||
'ssrf-vuln': {
|
||||
name: 'ssrf-vuln',
|
||||
displayName: 'SSRF vuln agent',
|
||||
prerequisites: ['recon']
|
||||
prerequisites: ['recon'],
|
||||
promptTemplate: 'vuln-ssrf',
|
||||
deliverableFilename: 'ssrf_analysis_deliverable.md',
|
||||
},
|
||||
'authz-vuln': {
|
||||
name: 'authz-vuln',
|
||||
displayName: 'Authz vuln agent',
|
||||
prerequisites: ['recon']
|
||||
prerequisites: ['recon'],
|
||||
promptTemplate: 'vuln-authz',
|
||||
deliverableFilename: 'authz_analysis_deliverable.md',
|
||||
},
|
||||
'injection-exploit': {
|
||||
name: 'injection-exploit',
|
||||
displayName: 'Injection exploit agent',
|
||||
prerequisites: ['injection-vuln']
|
||||
prerequisites: ['injection-vuln'],
|
||||
promptTemplate: 'exploit-injection',
|
||||
deliverableFilename: 'injection_exploitation_evidence.md',
|
||||
},
|
||||
'xss-exploit': {
|
||||
name: 'xss-exploit',
|
||||
displayName: 'XSS exploit agent',
|
||||
prerequisites: ['xss-vuln']
|
||||
prerequisites: ['xss-vuln'],
|
||||
promptTemplate: 'exploit-xss',
|
||||
deliverableFilename: 'xss_exploitation_evidence.md',
|
||||
},
|
||||
'auth-exploit': {
|
||||
name: 'auth-exploit',
|
||||
displayName: 'Auth exploit agent',
|
||||
prerequisites: ['auth-vuln']
|
||||
prerequisites: ['auth-vuln'],
|
||||
promptTemplate: 'exploit-auth',
|
||||
deliverableFilename: 'auth_exploitation_evidence.md',
|
||||
},
|
||||
'ssrf-exploit': {
|
||||
name: 'ssrf-exploit',
|
||||
displayName: 'SSRF exploit agent',
|
||||
prerequisites: ['ssrf-vuln']
|
||||
prerequisites: ['ssrf-vuln'],
|
||||
promptTemplate: 'exploit-ssrf',
|
||||
deliverableFilename: 'ssrf_exploitation_evidence.md',
|
||||
},
|
||||
'authz-exploit': {
|
||||
name: 'authz-exploit',
|
||||
displayName: 'Authz exploit agent',
|
||||
prerequisites: ['authz-vuln']
|
||||
prerequisites: ['authz-vuln'],
|
||||
promptTemplate: 'exploit-authz',
|
||||
deliverableFilename: 'authz_exploitation_evidence.md',
|
||||
},
|
||||
'report': {
|
||||
name: 'report',
|
||||
displayName: 'Report agent',
|
||||
prerequisites: ['injection-exploit', 'xss-exploit', 'auth-exploit', 'ssrf-exploit', 'authz-exploit']
|
||||
}
|
||||
prerequisites: ['injection-exploit', 'xss-exploit', 'auth-exploit', 'ssrf-exploit', 'authz-exploit'],
|
||||
promptTemplate: 'report-executive',
|
||||
deliverableFilename: 'comprehensive_security_assessment_report.md',
|
||||
},
|
||||
});
|
||||
|
||||
// Phase names for metrics aggregation
|
||||
|
||||
@@ -7,28 +7,57 @@
|
||||
/**
|
||||
* Temporal activities for Shannon agent execution.
|
||||
*
|
||||
* Each activity wraps a single agent execution with:
|
||||
* Each activity wraps service calls with Temporal-specific concerns:
|
||||
* - Heartbeat loop (2s interval) to signal worker liveness
|
||||
* - Git checkpoint/rollback/commit per attempt
|
||||
* - Error classification for Temporal retry behavior
|
||||
* - Audit session logging
|
||||
* - Error classification into ApplicationFailure
|
||||
* - Container lifecycle management
|
||||
*
|
||||
* Temporal handles retries based on error classification:
|
||||
* - Retryable: BillingError, TransientError (429, 5xx, network)
|
||||
* - Non-retryable: AuthenticationError, PermissionError, ConfigurationError, etc.
|
||||
* Business logic is delegated to services in src/services/.
|
||||
*/
|
||||
|
||||
import { heartbeat, ApplicationFailure, Context } from '@temporalio/activity';
|
||||
import chalk from 'chalk';
|
||||
import path from 'path';
|
||||
import fs from 'fs/promises';
|
||||
|
||||
import { classifyErrorForTemporal, PentestError } from '../error-handling.js';
|
||||
import { ErrorCode } from '../types/errors.js';
|
||||
import { getOrCreateContainer, getContainer, removeContainer } from '../services/container.js';
|
||||
import { ExploitationCheckerService } from '../services/exploitation-checker.js';
|
||||
import type { VulnType, ExploitationDecision } from '../queue-validation.js';
|
||||
import { AuditSession } from '../audit/index.js';
|
||||
import type { WorkflowSummary } from '../audit/workflow-logger.js';
|
||||
import type { AgentName } from '../types/agents.js';
|
||||
import { ALL_AGENTS } from '../types/agents.js';
|
||||
import type { AgentMetrics, ResumeState } from './shared.js';
|
||||
import { copyDeliverablesToAudit, type SessionMetadata, readJson, fileExists } from '../audit/utils.js';
|
||||
import { assembleFinalReport, injectModelIntoReport } from '../phases/reporting.js';
|
||||
import { AGENTS } from '../session-manager.js';
|
||||
import { executeGitCommandWithRetry } from '../utils/git-manager.js';
|
||||
import type { ResumeAttempt } from '../audit/metrics-tracker.js';
|
||||
|
||||
// Max lengths to prevent Temporal protobuf buffer overflow
|
||||
const MAX_ERROR_MESSAGE_LENGTH = 2000;
|
||||
const MAX_STACK_TRACE_LENGTH = 1000;
|
||||
|
||||
// Max retries for output validation errors (agent didn't save deliverables)
|
||||
// Lower than default 50 since this is unlikely to self-heal
|
||||
const MAX_OUTPUT_VALIDATION_RETRIES = 3;
|
||||
|
||||
const HEARTBEAT_INTERVAL_MS = 2000;
|
||||
|
||||
/**
|
||||
* Input for all agent activities.
|
||||
*/
|
||||
export interface ActivityInput {
|
||||
webUrl: string;
|
||||
repoPath: string;
|
||||
configPath?: string;
|
||||
outputPath?: string;
|
||||
pipelineTestingMode?: boolean;
|
||||
workflowId: string;
|
||||
sessionId: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Truncate error message to prevent buffer overflow in Temporal serialization.
|
||||
*/
|
||||
@@ -48,85 +77,34 @@ function truncateStackTrace(failure: ApplicationFailure): void {
|
||||
}
|
||||
}
|
||||
|
||||
import {
|
||||
runClaudePrompt,
|
||||
validateAgentOutput,
|
||||
type ClaudePromptResult,
|
||||
} from '../ai/claude-executor.js';
|
||||
import { loadPrompt } from '../prompts/prompt-manager.js';
|
||||
import { parseConfig, distributeConfig } from '../config-parser.js';
|
||||
import { classifyErrorForTemporal } from '../error-handling.js';
|
||||
import {
|
||||
safeValidateQueueAndDeliverable,
|
||||
type VulnType,
|
||||
type ExploitationDecision,
|
||||
} from '../queue-validation.js';
|
||||
import {
|
||||
createGitCheckpoint,
|
||||
commitGitSuccess,
|
||||
rollbackGitWorkspace,
|
||||
getGitCommitHash,
|
||||
} from '../utils/git-manager.js';
|
||||
import { assembleFinalReport, injectModelIntoReport } from '../phases/reporting.js';
|
||||
import { getPromptNameForAgent } from '../types/agents.js';
|
||||
import { AuditSession } from '../audit/index.js';
|
||||
import type { WorkflowSummary } from '../audit/workflow-logger.js';
|
||||
import type { AgentName } from '../types/agents.js';
|
||||
import { getDeliverablePath, ALL_AGENTS } from '../types/agents.js';
|
||||
import type { AgentMetrics, ResumeState } from './shared.js';
|
||||
import type { DistributedConfig } from '../types/config.js';
|
||||
import { copyDeliverablesToAudit, type SessionMetadata, readJson, fileExists } from '../audit/utils.js';
|
||||
import type { ResumeAttempt } from '../audit/metrics-tracker.js';
|
||||
import { executeGitCommandWithRetry } from '../utils/git-manager.js';
|
||||
import path from 'path';
|
||||
import fs from 'fs/promises';
|
||||
|
||||
const HEARTBEAT_INTERVAL_MS = 2000; // Must be < heartbeatTimeout (10min production, 5min testing)
|
||||
|
||||
/**
|
||||
* Input for all agent activities.
|
||||
* Matches PipelineInput but with required workflowId for audit correlation.
|
||||
* Build SessionMetadata from ActivityInput.
|
||||
*/
|
||||
export interface ActivityInput {
|
||||
webUrl: string;
|
||||
repoPath: string;
|
||||
configPath?: string;
|
||||
outputPath?: string;
|
||||
pipelineTestingMode?: boolean;
|
||||
workflowId: string;
|
||||
sessionId: string; // Workspace name (for resume) or workflowId (for new runs)
|
||||
function buildSessionMetadata(input: ActivityInput): SessionMetadata {
|
||||
const { webUrl, repoPath, outputPath, sessionId } = input;
|
||||
return {
|
||||
id: sessionId,
|
||||
webUrl,
|
||||
repoPath,
|
||||
...(outputPath && { outputPath }),
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Core activity implementation.
|
||||
* Core activity implementation using services.
|
||||
*
|
||||
* Executes a single agent with:
|
||||
* 1. Heartbeat loop for worker liveness
|
||||
* 2. Config loading (if configPath provided)
|
||||
* 3. Audit session initialization
|
||||
* 4. Prompt loading
|
||||
* 5. Git checkpoint before execution
|
||||
* 6. Agent execution (single attempt)
|
||||
* 7. Output validation
|
||||
* 8. Git commit on success, rollback on failure
|
||||
* 9. Error classification for Temporal retry
|
||||
* 2. Container creation/reuse
|
||||
* 3. Service-based agent execution
|
||||
* 4. Error classification for Temporal retry
|
||||
*/
|
||||
async function runAgentActivity(
|
||||
agentName: AgentName,
|
||||
input: ActivityInput
|
||||
): Promise<AgentMetrics> {
|
||||
const {
|
||||
webUrl,
|
||||
repoPath,
|
||||
configPath,
|
||||
outputPath,
|
||||
pipelineTestingMode = false,
|
||||
workflowId,
|
||||
} = input;
|
||||
|
||||
const { repoPath, configPath, pipelineTestingMode = false, workflowId, webUrl } = input;
|
||||
const startTime = Date.now();
|
||||
|
||||
// Get attempt number from Temporal context (tracks retries automatically)
|
||||
const attemptNumber = Context.current().info.attempt;
|
||||
|
||||
// Heartbeat loop - signals worker is alive to Temporal server
|
||||
@@ -136,158 +114,63 @@ async function runAgentActivity(
|
||||
}, HEARTBEAT_INTERVAL_MS);
|
||||
|
||||
try {
|
||||
// 1. Load config (if provided)
|
||||
let distributedConfig: DistributedConfig | null = null;
|
||||
if (configPath) {
|
||||
try {
|
||||
const config = await parseConfig(configPath);
|
||||
distributedConfig = distributeConfig(config);
|
||||
} catch (err) {
|
||||
throw new Error(`Failed to load config ${configPath}: ${err instanceof Error ? err.message : String(err)}`);
|
||||
}
|
||||
}
|
||||
// Build session metadata and get/create container
|
||||
const sessionMetadata = buildSessionMetadata(input);
|
||||
const container = getOrCreateContainer(workflowId, sessionMetadata);
|
||||
|
||||
// 2. Build session metadata for audit
|
||||
// Use sessionId (workspace name) for directory, workflowId for tracking
|
||||
const sessionMetadata: SessionMetadata = {
|
||||
id: input.sessionId,
|
||||
webUrl,
|
||||
repoPath,
|
||||
...(outputPath && { outputPath }),
|
||||
};
|
||||
|
||||
// 3. Initialize audit session (idempotent, safe across retries)
|
||||
// Create audit session for THIS agent execution
|
||||
// NOTE: Each agent needs its own AuditSession because AuditSession uses
|
||||
// instance state (currentAgentName) that cannot be shared across parallel agents
|
||||
const auditSession = new AuditSession(sessionMetadata);
|
||||
await auditSession.initialize(workflowId);
|
||||
|
||||
// 4. Load prompt
|
||||
const promptName = getPromptNameForAgent(agentName);
|
||||
const prompt = await loadPrompt(
|
||||
promptName,
|
||||
{ webUrl, repoPath },
|
||||
distributedConfig,
|
||||
pipelineTestingMode
|
||||
);
|
||||
|
||||
// 5. Create git checkpoint before execution
|
||||
await createGitCheckpoint(repoPath, agentName, attemptNumber);
|
||||
await auditSession.startAgent(agentName, prompt, attemptNumber);
|
||||
|
||||
// 6. Execute agent (single attempt - Temporal handles retries)
|
||||
const result: ClaudePromptResult = await runClaudePrompt(
|
||||
prompt,
|
||||
repoPath,
|
||||
'', // context
|
||||
agentName, // description
|
||||
// Execute agent via service (throws PentestError on failure)
|
||||
const endResult = await container.agentExecution.executeOrThrow(
|
||||
agentName,
|
||||
chalk.cyan,
|
||||
{
|
||||
webUrl,
|
||||
repoPath,
|
||||
configPath,
|
||||
pipelineTestingMode,
|
||||
attemptNumber,
|
||||
},
|
||||
auditSession
|
||||
);
|
||||
|
||||
// 6.5. Sanity check: Detect spending cap that slipped through all detection layers
|
||||
// Defense-in-depth: A successful agent execution should never have ≤2 turns with $0 cost
|
||||
if (result.success && (result.turns ?? 0) <= 2 && (result.cost || 0) === 0) {
|
||||
const resultText = result.result || '';
|
||||
const looksLikeBillingError = /spending|cap|limit|budget|resets/i.test(resultText);
|
||||
|
||||
if (looksLikeBillingError) {
|
||||
await rollbackGitWorkspace(repoPath, 'spending cap detected');
|
||||
await auditSession.endAgent(agentName, {
|
||||
attemptNumber,
|
||||
duration_ms: result.duration,
|
||||
cost_usd: 0,
|
||||
success: false,
|
||||
model: result.model,
|
||||
error: `Spending cap likely reached: ${resultText.slice(0, 100)}`,
|
||||
});
|
||||
// Throw as billing error so Temporal retries with long backoff
|
||||
throw new Error(`Spending cap likely reached: ${resultText.slice(0, 100)}`);
|
||||
}
|
||||
}
|
||||
|
||||
// 7. Handle execution failure
|
||||
if (!result.success) {
|
||||
await rollbackGitWorkspace(repoPath, 'execution failure');
|
||||
await auditSession.endAgent(agentName, {
|
||||
attemptNumber,
|
||||
duration_ms: result.duration,
|
||||
cost_usd: result.cost || 0,
|
||||
success: false,
|
||||
model: result.model,
|
||||
error: result.error || 'Execution failed',
|
||||
});
|
||||
throw new Error(result.error || 'Agent execution failed');
|
||||
}
|
||||
|
||||
// 8. Validate output
|
||||
const validationPassed = await validateAgentOutput(result, agentName, repoPath);
|
||||
if (!validationPassed) {
|
||||
await rollbackGitWorkspace(repoPath, 'validation failure');
|
||||
await auditSession.endAgent(agentName, {
|
||||
attemptNumber,
|
||||
duration_ms: result.duration,
|
||||
cost_usd: result.cost || 0,
|
||||
success: false,
|
||||
model: result.model,
|
||||
error: 'Output validation failed',
|
||||
});
|
||||
|
||||
// Limit output validation retries (unlikely to self-heal)
|
||||
if (attemptNumber >= MAX_OUTPUT_VALIDATION_RETRIES) {
|
||||
throw ApplicationFailure.nonRetryable(
|
||||
`Agent ${agentName} failed output validation after ${attemptNumber} attempts`,
|
||||
'OutputValidationError',
|
||||
[{ agentName, attemptNumber, elapsed: Date.now() - startTime }]
|
||||
);
|
||||
}
|
||||
// Let Temporal retry (will be classified as OutputValidationError)
|
||||
throw new Error(`Agent ${agentName} failed output validation`);
|
||||
}
|
||||
|
||||
// 9. Success - commit deliverables, then capture checkpoint hash
|
||||
await commitGitSuccess(repoPath, agentName);
|
||||
const commitHash = await getGitCommitHash(repoPath);
|
||||
await auditSession.endAgent(agentName, {
|
||||
attemptNumber,
|
||||
duration_ms: result.duration,
|
||||
cost_usd: result.cost || 0,
|
||||
success: true,
|
||||
model: result.model,
|
||||
...(commitHash && { checkpoint: commitHash }),
|
||||
});
|
||||
|
||||
// 10. Return metrics
|
||||
// Success - return metrics
|
||||
return {
|
||||
durationMs: Date.now() - startTime,
|
||||
inputTokens: null, // Not currently exposed by SDK wrapper
|
||||
inputTokens: null,
|
||||
outputTokens: null,
|
||||
costUsd: result.cost ?? null,
|
||||
numTurns: result.turns ?? null,
|
||||
model: result.model,
|
||||
costUsd: endResult.cost_usd,
|
||||
numTurns: null,
|
||||
model: endResult.model,
|
||||
};
|
||||
} catch (error) {
|
||||
// Rollback git workspace before Temporal retry to ensure clean state
|
||||
try {
|
||||
await rollbackGitWorkspace(repoPath, 'error recovery');
|
||||
} catch (rollbackErr) {
|
||||
// Log but don't fail - rollback is best-effort
|
||||
console.error(`Failed to rollback git workspace for ${agentName}:`, rollbackErr);
|
||||
}
|
||||
|
||||
// If error is already an ApplicationFailure (e.g., from our retry limit logic),
|
||||
// re-throw it directly without re-classifying
|
||||
// If error is already an ApplicationFailure, re-throw directly
|
||||
if (error instanceof ApplicationFailure) {
|
||||
throw error;
|
||||
}
|
||||
|
||||
// Check if output validation retry limit reached (PentestError with code)
|
||||
if (
|
||||
error instanceof PentestError &&
|
||||
error.code === ErrorCode.OUTPUT_VALIDATION_FAILED &&
|
||||
attemptNumber >= MAX_OUTPUT_VALIDATION_RETRIES
|
||||
) {
|
||||
throw ApplicationFailure.nonRetryable(
|
||||
`Agent ${agentName} failed output validation after ${attemptNumber} attempts`,
|
||||
'OutputValidationError',
|
||||
[{ agentName, attemptNumber, elapsed: Date.now() - startTime }]
|
||||
);
|
||||
}
|
||||
|
||||
// Classify error for Temporal retry behavior
|
||||
const classified = classifyErrorForTemporal(error);
|
||||
// Truncate message to prevent protobuf buffer overflow
|
||||
const rawMessage = error instanceof Error ? error.message : String(error);
|
||||
const message = truncateErrorMessage(rawMessage);
|
||||
|
||||
if (classified.retryable) {
|
||||
// Temporal will retry with configured backoff
|
||||
const failure = ApplicationFailure.create({
|
||||
message,
|
||||
type: classified.type,
|
||||
@@ -296,7 +179,6 @@ async function runAgentActivity(
|
||||
truncateStackTrace(failure);
|
||||
throw failure;
|
||||
} else {
|
||||
// Fail immediately - no retry
|
||||
const failure = ApplicationFailure.nonRetryable(message, classified.type, [
|
||||
{ agentName, attemptNumber, elapsed: Date.now() - startTime },
|
||||
]);
|
||||
@@ -309,7 +191,6 @@ async function runAgentActivity(
|
||||
}
|
||||
|
||||
// === Individual Agent Activity Exports ===
|
||||
// Each function is a thin wrapper around runAgentActivity with the agent name.
|
||||
|
||||
export async function runPreReconAgent(input: ActivityInput): Promise<AgentMetrics> {
|
||||
return runAgentActivity('pre-recon', input);
|
||||
@@ -363,25 +244,24 @@ export async function runReportAgent(input: ActivityInput): Promise<AgentMetrics
|
||||
return runAgentActivity('report', input);
|
||||
}
|
||||
|
||||
// === Report Assembly Activities ===
|
||||
|
||||
/**
|
||||
* Assemble the final report by concatenating exploitation evidence files.
|
||||
* This must be called BEFORE runReportAgent to create the file that the report agent will modify.
|
||||
*/
|
||||
export async function assembleReportActivity(input: ActivityInput): Promise<void> {
|
||||
const { repoPath } = input;
|
||||
console.log(chalk.blue('📝 Assembling deliverables from specialist agents...'));
|
||||
console.log(chalk.blue(' Assembling deliverables from specialist agents...'));
|
||||
try {
|
||||
await assembleFinalReport(repoPath);
|
||||
} catch (error) {
|
||||
const err = error as Error;
|
||||
console.log(chalk.yellow(`⚠️ Error assembling final report: ${err.message}`));
|
||||
// Don't throw - the report agent can still create content even if no exploitation files exist
|
||||
console.log(chalk.yellow(` Warning: Error assembling final report: ${err.message}`));
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Inject model metadata into the final report.
|
||||
* This must be called AFTER runReportAgent to add the model information to the Executive Summary.
|
||||
*/
|
||||
export async function injectReportMetadataActivity(input: ActivityInput): Promise<void> {
|
||||
const { repoPath, sessionId, outputPath } = input;
|
||||
@@ -392,65 +272,33 @@ export async function injectReportMetadataActivity(input: ActivityInput): Promis
|
||||
await injectModelIntoReport(repoPath, effectiveOutputPath);
|
||||
} catch (error) {
|
||||
const err = error as Error;
|
||||
console.log(chalk.yellow(`⚠️ Error injecting model into report: ${err.message}`));
|
||||
// Don't throw - this is a non-critical enhancement
|
||||
console.log(chalk.yellow(` Warning: Error injecting model into report: ${err.message}`));
|
||||
}
|
||||
}
|
||||
|
||||
// === Exploitation Queue Check ===
|
||||
|
||||
/**
|
||||
* Check if exploitation should run for a given vulnerability type.
|
||||
* Reads the vulnerability queue file and returns the decision.
|
||||
*
|
||||
* This activity allows the workflow to skip exploit agents entirely
|
||||
* when no vulnerabilities were found, saving API calls and time.
|
||||
*
|
||||
* Error handling:
|
||||
* - Retryable errors (missing files, invalid JSON): re-throw for Temporal retry
|
||||
* - Non-retryable errors: skip exploitation gracefully
|
||||
* Uses existing container if available (from prior agent runs),
|
||||
* otherwise creates service directly (stateless, no dependencies).
|
||||
*/
|
||||
export async function checkExploitationQueue(
|
||||
input: ActivityInput,
|
||||
vulnType: VulnType
|
||||
): Promise<ExploitationDecision> {
|
||||
const { repoPath } = input;
|
||||
const { repoPath, workflowId } = input;
|
||||
|
||||
const result = await safeValidateQueueAndDeliverable(vulnType, repoPath);
|
||||
// Reuse container's service if available (from prior vuln agent runs)
|
||||
const existingContainer = getContainer(workflowId);
|
||||
const checker = existingContainer?.exploitationChecker ?? new ExploitationCheckerService();
|
||||
|
||||
if (result.success && result.data) {
|
||||
const { shouldExploit, vulnerabilityCount } = result.data;
|
||||
console.log(
|
||||
chalk.blue(
|
||||
`🔍 ${vulnType}: ${shouldExploit ? `${vulnerabilityCount} vulnerabilities found` : 'no vulnerabilities, skipping exploitation'}`
|
||||
)
|
||||
);
|
||||
return result.data;
|
||||
}
|
||||
|
||||
// Validation failed - check if we should retry or skip
|
||||
const error = result.error;
|
||||
if (error?.retryable) {
|
||||
// Re-throw retryable errors so Temporal can retry the vuln agent
|
||||
console.log(chalk.yellow(`⚠️ ${vulnType}: ${error.message} (retrying)`));
|
||||
throw error;
|
||||
}
|
||||
|
||||
// Non-retryable error - skip exploitation gracefully
|
||||
console.log(
|
||||
chalk.yellow(`⚠️ ${vulnType}: ${error?.message ?? 'Unknown error'}, skipping exploitation`)
|
||||
);
|
||||
return {
|
||||
shouldExploit: false,
|
||||
shouldRetry: false,
|
||||
vulnerabilityCount: 0,
|
||||
vulnType,
|
||||
};
|
||||
return checker.checkQueue(vulnType, repoPath);
|
||||
}
|
||||
|
||||
// === Resume Activities ===
|
||||
|
||||
/**
|
||||
* Session.json structure for resume state loading
|
||||
*/
|
||||
interface SessionJson {
|
||||
session: {
|
||||
id: string;
|
||||
@@ -460,18 +308,18 @@ interface SessionJson {
|
||||
resumeAttempts?: ResumeAttempt[];
|
||||
};
|
||||
metrics: {
|
||||
agents: Record<string, {
|
||||
status: 'in-progress' | 'success' | 'failed';
|
||||
checkpoint?: string;
|
||||
}>;
|
||||
agents: Record<
|
||||
string,
|
||||
{
|
||||
status: 'in-progress' | 'success' | 'failed';
|
||||
checkpoint?: string;
|
||||
}
|
||||
>;
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Load resume state from an existing workspace.
|
||||
* Validates workspace exists, URL matches, and determines which agents to skip.
|
||||
*
|
||||
* @throws ApplicationFailure.nonRetryable if workspace not found or URL mismatch
|
||||
*/
|
||||
export async function loadResumeState(
|
||||
workspaceName: string,
|
||||
@@ -480,7 +328,6 @@ export async function loadResumeState(
|
||||
): Promise<ResumeState> {
|
||||
const sessionPath = path.join('./audit-logs', workspaceName, 'session.json');
|
||||
|
||||
// Validate workspace exists
|
||||
const exists = await fileExists(sessionPath);
|
||||
if (!exists) {
|
||||
throw ApplicationFailure.nonRetryable(
|
||||
@@ -489,7 +336,6 @@ export async function loadResumeState(
|
||||
);
|
||||
}
|
||||
|
||||
// Load session.json
|
||||
let session: SessionJson;
|
||||
try {
|
||||
session = await readJson<SessionJson>(sessionPath);
|
||||
@@ -501,7 +347,6 @@ export async function loadResumeState(
|
||||
);
|
||||
}
|
||||
|
||||
// Validate URL matches
|
||||
if (session.session.webUrl !== expectedUrl) {
|
||||
throw ApplicationFailure.nonRetryable(
|
||||
`URL mismatch with workspace\n Workspace URL: ${session.session.webUrl}\n Provided URL: ${expectedUrl}`,
|
||||
@@ -509,20 +354,17 @@ export async function loadResumeState(
|
||||
);
|
||||
}
|
||||
|
||||
// Find completed agents (status === 'success' AND deliverable exists)
|
||||
const completedAgents: string[] = [];
|
||||
const agents = session.metrics.agents;
|
||||
|
||||
for (const agentName of ALL_AGENTS) {
|
||||
const agentData = agents[agentName];
|
||||
|
||||
// Skip if agent never ran or didn't succeed
|
||||
if (!agentData || agentData.status !== 'success') {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Validate deliverable exists
|
||||
const deliverablePath = getDeliverablePath(agentName, expectedRepoPath);
|
||||
const deliverableFilename = AGENTS[agentName].deliverableFilename;
|
||||
const deliverablePath = `${expectedRepoPath}/deliverables/${deliverableFilename}`;
|
||||
const deliverableExists = await fileExists(deliverablePath);
|
||||
|
||||
if (!deliverableExists) {
|
||||
@@ -532,11 +374,9 @@ export async function loadResumeState(
|
||||
continue;
|
||||
}
|
||||
|
||||
// Agent completed successfully and deliverable exists
|
||||
completedAgents.push(agentName);
|
||||
}
|
||||
|
||||
// Find latest checkpoint from completed agents
|
||||
const checkpoints = completedAgents
|
||||
.map((name) => agents[name]?.checkpoint)
|
||||
.filter((hash): hash is string => hash != null);
|
||||
@@ -548,18 +388,16 @@ export async function loadResumeState(
|
||||
|
||||
throw ApplicationFailure.nonRetryable(
|
||||
`Cannot resume workspace ${workspaceName}: ` +
|
||||
(successAgents.length > 0
|
||||
? `${successAgents.length} agent(s) show success in session.json (${successAgents.join(', ')}) ` +
|
||||
`but their deliverable files are missing from disk. ` +
|
||||
`Start a fresh run instead.`
|
||||
: `No agents completed successfully. Start a fresh run instead.`),
|
||||
(successAgents.length > 0
|
||||
? `${successAgents.length} agent(s) show success in session.json (${successAgents.join(', ')}) ` +
|
||||
`but their deliverable files are missing from disk. ` +
|
||||
`Start a fresh run instead.`
|
||||
: `No agents completed successfully. Start a fresh run instead.`),
|
||||
'NoCheckpointsError'
|
||||
);
|
||||
}
|
||||
|
||||
// Find most recent commit among checkpoints
|
||||
const checkpointHash = await findLatestCommit(expectedRepoPath, checkpoints);
|
||||
|
||||
const originalWorkflowId = session.session.originalWorkflowId || session.session.id;
|
||||
|
||||
console.log(chalk.cyan(`=== RESUME STATE ===`));
|
||||
@@ -576,20 +414,21 @@ export async function loadResumeState(
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the most recent commit among a list of commit hashes.
|
||||
* Uses git rev-list to determine which commit is newest.
|
||||
*/
|
||||
async function findLatestCommit(repoPath: string, commitHashes: string[]): Promise<string> {
|
||||
if (commitHashes.length === 1) {
|
||||
const hash = commitHashes[0];
|
||||
if (!hash) {
|
||||
throw new Error('Empty commit hash in array');
|
||||
throw new PentestError(
|
||||
'Empty commit hash in array',
|
||||
'filesystem',
|
||||
false, // Non-retryable - corrupt workspace state
|
||||
{ phase: 'resume' },
|
||||
ErrorCode.GIT_CHECKPOINT_FAILED
|
||||
);
|
||||
}
|
||||
return hash;
|
||||
}
|
||||
|
||||
// Use git rev-list to find the most recent commit among all hashes
|
||||
const result = await executeGitCommandWithRetry(
|
||||
['git', 'rev-list', '--max-count=1', ...commitHashes],
|
||||
repoPath,
|
||||
@@ -601,10 +440,6 @@ async function findLatestCommit(repoPath: string, commitHashes: string[]): Promi
|
||||
|
||||
/**
|
||||
* Restore git workspace to a checkpoint and clean up partial deliverables.
|
||||
*
|
||||
* @param repoPath - Repository path
|
||||
* @param checkpointHash - Git commit hash to reset to
|
||||
* @param incompleteAgents - Agents that didn't complete (will have deliverables cleaned up)
|
||||
*/
|
||||
export async function restoreGitCheckpoint(
|
||||
repoPath: string,
|
||||
@@ -613,8 +448,6 @@ export async function restoreGitCheckpoint(
|
||||
): Promise<void> {
|
||||
console.log(chalk.blue(`Restoring git workspace to ${checkpointHash}...`));
|
||||
|
||||
// Checkpoint hash points to the success commit (after commitGitSuccess),
|
||||
// so git reset --hard naturally preserves all completed agent deliverables.
|
||||
await executeGitCommandWithRetry(
|
||||
['git', 'reset', '--hard', checkpointHash],
|
||||
repoPath,
|
||||
@@ -626,9 +459,9 @@ export async function restoreGitCheckpoint(
|
||||
'clean untracked files for resume'
|
||||
);
|
||||
|
||||
// Clean up any partial deliverables from incomplete agents
|
||||
for (const agentName of incompleteAgents) {
|
||||
const deliverablePath = getDeliverablePath(agentName, repoPath);
|
||||
const deliverableFilename = AGENTS[agentName].deliverableFilename;
|
||||
const deliverablePath = `${repoPath}/deliverables/${deliverableFilename}`;
|
||||
try {
|
||||
const exists = await fileExists(deliverablePath);
|
||||
if (exists) {
|
||||
@@ -645,48 +478,31 @@ export async function restoreGitCheckpoint(
|
||||
|
||||
/**
|
||||
* Record a resume attempt in session.json.
|
||||
* Tracks the new workflow ID, terminated workflows, and checkpoint hash.
|
||||
*/
|
||||
export async function recordResumeAttempt(
|
||||
input: ActivityInput,
|
||||
terminatedWorkflows: string[],
|
||||
checkpointHash: string
|
||||
): Promise<void> {
|
||||
const { webUrl, repoPath, outputPath, sessionId, workflowId } = input;
|
||||
|
||||
const sessionMetadata: SessionMetadata = {
|
||||
id: sessionId,
|
||||
webUrl,
|
||||
repoPath,
|
||||
...(outputPath && { outputPath }),
|
||||
};
|
||||
|
||||
const sessionMetadata = buildSessionMetadata(input);
|
||||
const auditSession = new AuditSession(sessionMetadata);
|
||||
await auditSession.initialize();
|
||||
|
||||
await auditSession.addResumeAttempt(workflowId, terminatedWorkflows, checkpointHash);
|
||||
await auditSession.addResumeAttempt(input.workflowId, terminatedWorkflows, checkpointHash);
|
||||
}
|
||||
|
||||
// === Phase Transition Activities ===
|
||||
|
||||
/**
|
||||
* Log phase transition to the unified workflow log.
|
||||
* Called at phase boundaries for per-workflow logging.
|
||||
*/
|
||||
export async function logPhaseTransition(
|
||||
input: ActivityInput,
|
||||
phase: string,
|
||||
event: 'start' | 'complete'
|
||||
): Promise<void> {
|
||||
const { webUrl, repoPath, outputPath, sessionId, workflowId } = input;
|
||||
|
||||
const sessionMetadata: SessionMetadata = {
|
||||
id: sessionId,
|
||||
webUrl,
|
||||
repoPath,
|
||||
...(outputPath && { outputPath }),
|
||||
};
|
||||
|
||||
const sessionMetadata = buildSessionMetadata(input);
|
||||
const auditSession = new AuditSession(sessionMetadata);
|
||||
await auditSession.initialize(workflowId);
|
||||
await auditSession.initialize(input.workflowId);
|
||||
|
||||
if (event === 'start') {
|
||||
await auditSession.logPhaseStart(phase);
|
||||
@@ -696,28 +512,22 @@ export async function logPhaseTransition(
|
||||
}
|
||||
|
||||
/**
|
||||
* Log workflow completion with full summary to the unified workflow log.
|
||||
* Called at the end of the workflow to write a summary breakdown.
|
||||
* Log workflow completion with full summary.
|
||||
* Cleans up container when done.
|
||||
*/
|
||||
export async function logWorkflowComplete(
|
||||
input: ActivityInput,
|
||||
summary: WorkflowSummary
|
||||
): Promise<void> {
|
||||
const { webUrl, repoPath, outputPath, sessionId, workflowId } = input;
|
||||
|
||||
const sessionMetadata: SessionMetadata = {
|
||||
id: sessionId,
|
||||
webUrl,
|
||||
repoPath,
|
||||
...(outputPath && { outputPath }),
|
||||
};
|
||||
const { repoPath, workflowId } = input;
|
||||
const sessionMetadata = buildSessionMetadata(input);
|
||||
|
||||
const auditSession = new AuditSession(sessionMetadata);
|
||||
await auditSession.initialize(workflowId);
|
||||
await auditSession.updateSessionStatus(summary.status);
|
||||
|
||||
// Use cumulative metrics from session.json (includes all resume attempts)
|
||||
const sessionData = await auditSession.getMetrics() as {
|
||||
// Use cumulative metrics from session.json
|
||||
const sessionData = (await auditSession.getMetrics()) as {
|
||||
metrics: {
|
||||
total_duration_ms: number;
|
||||
total_cost_usd: number;
|
||||
@@ -725,7 +535,7 @@ export async function logWorkflowComplete(
|
||||
};
|
||||
};
|
||||
|
||||
// Fill in metrics for skipped agents (completed in previous runs)
|
||||
// Fill in metrics for skipped agents
|
||||
const agentMetrics = { ...summary.agentMetrics };
|
||||
for (const agentName of summary.completedAgents) {
|
||||
if (!agentMetrics[agentName]) {
|
||||
@@ -747,10 +557,13 @@ export async function logWorkflowComplete(
|
||||
};
|
||||
await auditSession.logWorkflowComplete(cumulativeSummary);
|
||||
|
||||
// Copy all deliverables to audit-logs once at workflow end (non-fatal)
|
||||
// Copy deliverables to audit-logs
|
||||
try {
|
||||
await copyDeliverablesToAudit(sessionMetadata, repoPath);
|
||||
} catch (copyErr) {
|
||||
console.error('Failed to copy deliverables to audit-logs:', copyErr);
|
||||
}
|
||||
|
||||
// Clean up container
|
||||
removeContainer(workflowId);
|
||||
}
|
||||
|
||||
@@ -1,5 +1,9 @@
|
||||
import { defineQuery } from '@temporalio/workflow';
|
||||
|
||||
// Re-export AgentMetrics from central types location
|
||||
export type { AgentMetrics } from '../types/metrics.js';
|
||||
import type { AgentMetrics } from '../types/metrics.js';
|
||||
|
||||
// === Types ===
|
||||
|
||||
export interface PipelineInput {
|
||||
@@ -22,15 +26,6 @@ export interface ResumeState {
|
||||
originalWorkflowId: string;
|
||||
}
|
||||
|
||||
export interface AgentMetrics {
|
||||
durationMs: number;
|
||||
inputTokens: number | null;
|
||||
outputTokens: number | null;
|
||||
costUsd: number | null;
|
||||
numTurns: number | null;
|
||||
model?: string | undefined;
|
||||
}
|
||||
|
||||
export interface PipelineSummary {
|
||||
totalCostUsd: number;
|
||||
totalDurationMs: number; // Wall-clock time (end - start)
|
||||
|
||||
45
src/temporal/summary-mapper.ts
Normal file
45
src/temporal/summary-mapper.ts
Normal file
@@ -0,0 +1,45 @@
|
||||
// Copyright (C) 2025 Keygraph, Inc.
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Affero General Public License version 3
|
||||
// as published by the Free Software Foundation.
|
||||
|
||||
/**
|
||||
* Maps PipelineState to WorkflowSummary for audit logging.
|
||||
* Pure function with no side effects.
|
||||
*/
|
||||
|
||||
import type { PipelineState } from './shared.js';
|
||||
import type { WorkflowSummary } from '../audit/workflow-logger.js';
|
||||
|
||||
/**
|
||||
* Maps PipelineState to WorkflowSummary.
|
||||
*
|
||||
* This function is deterministic (no Date.now() or I/O) so it can be
|
||||
* safely imported into Temporal workflows. The caller must ensure
|
||||
* state.summary is set before calling (via computeSummary).
|
||||
*/
|
||||
export function toWorkflowSummary(
|
||||
state: PipelineState,
|
||||
status: 'completed' | 'failed'
|
||||
): WorkflowSummary {
|
||||
// state.summary must be computed before calling this mapper
|
||||
const summary = state.summary;
|
||||
if (!summary) {
|
||||
throw new Error('toWorkflowSummary: state.summary must be set before calling');
|
||||
}
|
||||
|
||||
return {
|
||||
status,
|
||||
totalDurationMs: summary.totalDurationMs,
|
||||
totalCostUsd: summary.totalCostUsd,
|
||||
completedAgents: state.completedAgents,
|
||||
agentMetrics: Object.fromEntries(
|
||||
Object.entries(state.agentMetrics).map(([name, m]) => [
|
||||
name,
|
||||
{ durationMs: m.durationMs, costUsd: m.costUsd },
|
||||
])
|
||||
),
|
||||
...(state.error && { error: state.error }),
|
||||
};
|
||||
}
|
||||
@@ -43,6 +43,7 @@ import {
|
||||
import type { VulnType } from '../queue-validation.js';
|
||||
import type { AgentName } from '../types/agents.js';
|
||||
import { ALL_AGENTS } from '../types/agents.js';
|
||||
import { toWorkflowSummary } from './summary-mapper.js';
|
||||
|
||||
// Retry configuration for production (long intervals for billing recovery)
|
||||
const PRODUCTION_RETRY = {
|
||||
@@ -417,18 +418,7 @@ export async function pentestPipelineWorkflow(
|
||||
state.summary = computeSummary(state);
|
||||
|
||||
// Log workflow completion summary
|
||||
await a.logWorkflowComplete(activityInput, {
|
||||
status: 'completed',
|
||||
totalDurationMs: state.summary.totalDurationMs,
|
||||
totalCostUsd: state.summary.totalCostUsd,
|
||||
completedAgents: state.completedAgents,
|
||||
agentMetrics: Object.fromEntries(
|
||||
Object.entries(state.agentMetrics).map(([name, m]) => [
|
||||
name,
|
||||
{ durationMs: m.durationMs, costUsd: m.costUsd },
|
||||
])
|
||||
),
|
||||
});
|
||||
await a.logWorkflowComplete(activityInput, toWorkflowSummary(state, 'completed'));
|
||||
|
||||
return state;
|
||||
} catch (error) {
|
||||
@@ -438,19 +428,7 @@ export async function pentestPipelineWorkflow(
|
||||
state.summary = computeSummary(state);
|
||||
|
||||
// Log workflow failure summary
|
||||
await a.logWorkflowComplete(activityInput, {
|
||||
status: 'failed',
|
||||
totalDurationMs: state.summary.totalDurationMs,
|
||||
totalCostUsd: state.summary.totalCostUsd,
|
||||
completedAgents: state.completedAgents,
|
||||
agentMetrics: Object.fromEntries(
|
||||
Object.entries(state.agentMetrics).map(([name, m]) => [
|
||||
name,
|
||||
{ durationMs: m.durationMs, costUsd: m.costUsd },
|
||||
])
|
||||
),
|
||||
error: state.error ?? undefined,
|
||||
});
|
||||
await a.logWorkflowComplete(activityInput, toWorkflowSummary(state, 'failed'));
|
||||
|
||||
throw error;
|
||||
}
|
||||
|
||||
@@ -34,21 +34,6 @@ export const ALL_AGENTS = [
|
||||
*/
|
||||
export type AgentName = typeof ALL_AGENTS[number];
|
||||
|
||||
export type PromptName =
|
||||
| 'pre-recon-code'
|
||||
| 'recon'
|
||||
| 'vuln-injection'
|
||||
| 'vuln-xss'
|
||||
| 'vuln-auth'
|
||||
| 'vuln-ssrf'
|
||||
| 'vuln-authz'
|
||||
| 'exploit-injection'
|
||||
| 'exploit-xss'
|
||||
| 'exploit-auth'
|
||||
| 'exploit-ssrf'
|
||||
| 'exploit-authz'
|
||||
| 'report-executive';
|
||||
|
||||
export type PlaywrightAgent =
|
||||
| 'playwright-agent1'
|
||||
| 'playwright-agent2'
|
||||
@@ -69,52 +54,6 @@ export interface AgentDefinition {
|
||||
name: AgentName;
|
||||
displayName: string;
|
||||
prerequisites: AgentName[];
|
||||
}
|
||||
|
||||
/**
|
||||
* Maps an agent name to its corresponding prompt file name.
|
||||
*/
|
||||
export function getPromptNameForAgent(agentName: AgentName): PromptName {
|
||||
const mappings: Record<AgentName, PromptName> = {
|
||||
'pre-recon': 'pre-recon-code',
|
||||
'recon': 'recon',
|
||||
'injection-vuln': 'vuln-injection',
|
||||
'xss-vuln': 'vuln-xss',
|
||||
'auth-vuln': 'vuln-auth',
|
||||
'ssrf-vuln': 'vuln-ssrf',
|
||||
'authz-vuln': 'vuln-authz',
|
||||
'injection-exploit': 'exploit-injection',
|
||||
'xss-exploit': 'exploit-xss',
|
||||
'auth-exploit': 'exploit-auth',
|
||||
'ssrf-exploit': 'exploit-ssrf',
|
||||
'authz-exploit': 'exploit-authz',
|
||||
'report': 'report-executive',
|
||||
};
|
||||
|
||||
return mappings[agentName];
|
||||
}
|
||||
|
||||
/**
|
||||
* Maps an agent name to its deliverable file path.
|
||||
* Must match mcp-server/src/types/deliverables.ts:DELIVERABLE_FILENAMES
|
||||
*/
|
||||
export function getDeliverablePath(agentName: AgentName, repoPath: string): string {
|
||||
const deliverableMap: Record<AgentName, string> = {
|
||||
'pre-recon': 'code_analysis_deliverable.md',
|
||||
'recon': 'recon_deliverable.md',
|
||||
'injection-vuln': 'injection_analysis_deliverable.md',
|
||||
'xss-vuln': 'xss_analysis_deliverable.md',
|
||||
'auth-vuln': 'auth_analysis_deliverable.md',
|
||||
'ssrf-vuln': 'ssrf_analysis_deliverable.md',
|
||||
'authz-vuln': 'authz_analysis_deliverable.md',
|
||||
'injection-exploit': 'injection_exploitation_evidence.md',
|
||||
'xss-exploit': 'xss_exploitation_evidence.md',
|
||||
'auth-exploit': 'auth_exploitation_evidence.md',
|
||||
'ssrf-exploit': 'ssrf_exploitation_evidence.md',
|
||||
'authz-exploit': 'authz_exploitation_evidence.md',
|
||||
'report': 'comprehensive_security_assessment_report.md',
|
||||
};
|
||||
|
||||
const filename = deliverableMap[agentName];
|
||||
return `${repoPath}/deliverables/${filename}`;
|
||||
promptTemplate: string;
|
||||
deliverableFilename: string;
|
||||
}
|
||||
|
||||
@@ -51,7 +51,6 @@ export interface Authentication {
|
||||
export interface Config {
|
||||
rules?: Rules;
|
||||
authentication?: Authentication;
|
||||
login?: unknown;
|
||||
}
|
||||
|
||||
export interface DistributedConfig {
|
||||
|
||||
@@ -8,6 +8,39 @@
|
||||
* Error type definitions
|
||||
*/
|
||||
|
||||
/**
|
||||
* Specific error codes for reliable classification.
|
||||
*
|
||||
* ErrorCode provides precision within the coarse 8-category PentestErrorType.
|
||||
* Used by classifyErrorForTemporal for code-based classification (preferred)
|
||||
* with string matching as fallback for external errors.
|
||||
*/
|
||||
export enum ErrorCode {
|
||||
// Config errors (PentestErrorType: 'config')
|
||||
CONFIG_NOT_FOUND = 'CONFIG_NOT_FOUND',
|
||||
CONFIG_VALIDATION_FAILED = 'CONFIG_VALIDATION_FAILED',
|
||||
CONFIG_PARSE_ERROR = 'CONFIG_PARSE_ERROR',
|
||||
|
||||
// Agent execution errors (PentestErrorType: 'validation')
|
||||
AGENT_EXECUTION_FAILED = 'AGENT_EXECUTION_FAILED',
|
||||
OUTPUT_VALIDATION_FAILED = 'OUTPUT_VALIDATION_FAILED',
|
||||
|
||||
// Billing errors (PentestErrorType: 'billing')
|
||||
API_RATE_LIMITED = 'API_RATE_LIMITED',
|
||||
SPENDING_CAP_REACHED = 'SPENDING_CAP_REACHED',
|
||||
INSUFFICIENT_CREDITS = 'INSUFFICIENT_CREDITS',
|
||||
|
||||
// Git errors (PentestErrorType: 'filesystem')
|
||||
GIT_CHECKPOINT_FAILED = 'GIT_CHECKPOINT_FAILED',
|
||||
GIT_ROLLBACK_FAILED = 'GIT_ROLLBACK_FAILED',
|
||||
|
||||
// Prompt errors (PentestErrorType: 'prompt')
|
||||
PROMPT_LOAD_FAILED = 'PROMPT_LOAD_FAILED',
|
||||
|
||||
// Validation errors (PentestErrorType: 'validation')
|
||||
DELIVERABLE_NOT_FOUND = 'DELIVERABLE_NOT_FOUND',
|
||||
}
|
||||
|
||||
export type PentestErrorType =
|
||||
| 'config'
|
||||
| 'network'
|
||||
|
||||
@@ -12,3 +12,5 @@ export * from './errors.js';
|
||||
export * from './config.js';
|
||||
export * from './agents.js';
|
||||
export * from './audit.js';
|
||||
export * from './result.js';
|
||||
export * from './metrics.js';
|
||||
|
||||
19
src/types/metrics.ts
Normal file
19
src/types/metrics.ts
Normal file
@@ -0,0 +1,19 @@
|
||||
// Copyright (C) 2025 Keygraph, Inc.
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Affero General Public License version 3
|
||||
// as published by the Free Software Foundation.
|
||||
|
||||
/**
|
||||
* Agent metrics types used across services and activities.
|
||||
* Centralized here to avoid temporal/shared.ts import boundary violations.
|
||||
*/
|
||||
|
||||
export interface AgentMetrics {
|
||||
durationMs: number;
|
||||
inputTokens: number | null;
|
||||
outputTokens: number | null;
|
||||
costUsd: number | null;
|
||||
numTurns: number | null;
|
||||
model?: string | undefined;
|
||||
}
|
||||
62
src/types/result.ts
Normal file
62
src/types/result.ts
Normal file
@@ -0,0 +1,62 @@
|
||||
// Copyright (C) 2025 Keygraph, Inc.
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Affero General Public License version 3
|
||||
// as published by the Free Software Foundation.
|
||||
|
||||
/**
|
||||
* Minimal Result type for explicit error handling.
|
||||
*
|
||||
* A discriminated union that makes error handling explicit without adding
|
||||
* heavy machinery. Used in key modules (config loading, agent execution,
|
||||
* queue validation) where callers need to make decisions based on error type.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Success variant of Result
|
||||
*/
|
||||
export interface Ok<T> {
|
||||
readonly ok: true;
|
||||
readonly value: T;
|
||||
}
|
||||
|
||||
/**
|
||||
* Error variant of Result
|
||||
*/
|
||||
export interface Err<E> {
|
||||
readonly ok: false;
|
||||
readonly error: E;
|
||||
}
|
||||
|
||||
/**
|
||||
* Result type - either Ok with a value or Err with an error
|
||||
*/
|
||||
export type Result<T, E> = Ok<T> | Err<E>;
|
||||
|
||||
/**
|
||||
* Create a success Result
|
||||
*/
|
||||
export function ok<T>(value: T): Ok<T> {
|
||||
return { ok: true, value };
|
||||
}
|
||||
|
||||
/**
|
||||
* Create an error Result
|
||||
*/
|
||||
export function err<E>(error: E): Err<E> {
|
||||
return { ok: false, error };
|
||||
}
|
||||
|
||||
/**
|
||||
* Type guard for Ok variant
|
||||
*/
|
||||
export function isOk<T, E>(result: Result<T, E>): result is Ok<T> {
|
||||
return result.ok === true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Type guard for Err variant
|
||||
*/
|
||||
export function isErr<T, E>(result: Result<T, E>): result is Err<E> {
|
||||
return result.ok === false;
|
||||
}
|
||||
95
src/utils/billing-detection.ts
Normal file
95
src/utils/billing-detection.ts
Normal file
@@ -0,0 +1,95 @@
|
||||
// Copyright (C) 2025 Keygraph, Inc.
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Affero General Public License version 3
|
||||
// as published by the Free Software Foundation.
|
||||
|
||||
/**
|
||||
* Consolidated billing/spending cap detection utilities.
|
||||
*
|
||||
* Anthropic's spending cap behavior is inconsistent:
|
||||
* - Sometimes a proper SDK error (billing_error)
|
||||
* - Sometimes Claude responds with text about the cap
|
||||
* - Sometimes partial billing before cutoff
|
||||
*
|
||||
* This module provides defense-in-depth detection with shared pattern lists
|
||||
* to prevent drift between detection points.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Text patterns for SDK output sniffing (what Claude says).
|
||||
* Used by message-handlers.ts and the behavioral heuristic.
|
||||
*/
|
||||
export const BILLING_TEXT_PATTERNS = [
|
||||
'spending cap',
|
||||
'spending limit',
|
||||
'cap reached',
|
||||
'budget exceeded',
|
||||
'usage limit',
|
||||
'resets',
|
||||
] as const;
|
||||
|
||||
/**
|
||||
* API patterns for error message classification (what the API returns).
|
||||
* Used by classifyErrorForTemporal in error-handling.ts.
|
||||
*/
|
||||
export const BILLING_API_PATTERNS = [
|
||||
'billing_error',
|
||||
'credit balance is too low',
|
||||
'insufficient credits',
|
||||
'usage is blocked due to insufficient credits',
|
||||
'please visit plans & billing',
|
||||
'please visit plans and billing',
|
||||
'usage limit reached',
|
||||
'quota exceeded',
|
||||
'daily rate limit',
|
||||
'limit will reset',
|
||||
'billing limit reached',
|
||||
] as const;
|
||||
|
||||
/**
|
||||
* Checks if text matches any billing text pattern.
|
||||
* Used for sniffing SDK output content for spending cap messages.
|
||||
*/
|
||||
export function matchesBillingTextPattern(text: string): boolean {
|
||||
const lowerText = text.toLowerCase();
|
||||
return BILLING_TEXT_PATTERNS.some((pattern) => lowerText.includes(pattern));
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if an error message matches any billing API pattern.
|
||||
* Used for classifying API error messages.
|
||||
*/
|
||||
export function matchesBillingApiPattern(message: string): boolean {
|
||||
const lowerMessage = message.toLowerCase();
|
||||
return BILLING_API_PATTERNS.some((pattern) => lowerMessage.includes(pattern));
|
||||
}
|
||||
|
||||
/**
|
||||
* Behavioral heuristic for detecting spending cap.
|
||||
*
|
||||
* When Claude hits a spending cap, it often returns a short message
|
||||
* with $0 cost. Legitimate agent work NEVER costs $0 with only 1-2 turns.
|
||||
*
|
||||
* This combines three signals:
|
||||
* 1. Very low turn count (<=2)
|
||||
* 2. Zero cost ($0)
|
||||
* 3. Text matches billing patterns
|
||||
*
|
||||
* @param turns - Number of turns the agent took
|
||||
* @param cost - Total cost in USD
|
||||
* @param resultText - The result text from the agent
|
||||
* @returns true if this looks like a spending cap hit
|
||||
*/
|
||||
export function isSpendingCapBehavior(
|
||||
turns: number,
|
||||
cost: number,
|
||||
resultText: string
|
||||
): boolean {
|
||||
// Only check if turns <= 2 AND cost is exactly 0
|
||||
if (turns > 2 || cost !== 0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return matchesBillingTextPattern(resultText);
|
||||
}
|
||||
@@ -6,6 +6,8 @@
|
||||
|
||||
import { $ } from 'zx';
|
||||
import chalk from 'chalk';
|
||||
import { PentestError } from '../error-handling.js';
|
||||
import { ErrorCode } from '../types/errors.js';
|
||||
|
||||
/**
|
||||
* Check if a directory is a git repository.
|
||||
@@ -148,7 +150,13 @@ export async function executeGitCommandWithRetry(
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
throw new Error(`Git command failed after ${maxRetries} retries`);
|
||||
throw new PentestError(
|
||||
`Git command failed after ${maxRetries} retries`,
|
||||
'filesystem',
|
||||
true, // Retryable - transient git lock issues
|
||||
{ maxRetries, description },
|
||||
ErrorCode.GIT_CHECKPOINT_FAILED
|
||||
);
|
||||
} finally {
|
||||
gitSemaphore.release();
|
||||
}
|
||||
@@ -189,9 +197,18 @@ export async function rollbackGitWorkspace(
|
||||
);
|
||||
return { success: true };
|
||||
} catch (error) {
|
||||
const result = toErrorResult(error);
|
||||
console.log(chalk.red(` ❌ Rollback failed after retries: ${result.error?.message}`));
|
||||
return result;
|
||||
const errMsg = error instanceof Error ? error.message : String(error);
|
||||
console.log(chalk.red(` ❌ Rollback failed after retries: ${errMsg}`));
|
||||
return {
|
||||
success: false,
|
||||
error: new PentestError(
|
||||
`Git rollback failed: ${errMsg}`,
|
||||
'filesystem',
|
||||
false, // Non-retryable - rollback is best-effort cleanup
|
||||
{ sourceDir, reason },
|
||||
ErrorCode.GIT_ROLLBACK_FAILED
|
||||
),
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user