Merge branch 'main' into feat/telemetry

This commit is contained in:
Arjun Malleswaran
2026-01-15 10:44:28 -08:00
committed by GitHub
13 changed files with 2287 additions and 13 deletions
+3 -2
View File
@@ -1,12 +1,12 @@
services:
temporal:
image: temporalio/temporal:latest
command: ["server", "start-dev", "--db-filename", "/var/lib/temporal/temporal.db", "--ip", "0.0.0.0"]
command: ["server", "start-dev", "--db-filename", "/home/temporal/temporal.db", "--ip", "0.0.0.0"]
ports:
- "7233:7233" # gRPC
- "8233:8233" # Web UI (built-in)
volumes:
- temporal-data:/var/lib/temporal
- temporal-data:/home/temporal
healthcheck:
test: ["CMD", "temporal", "operator", "cluster", "health", "--address", "localhost:7233"]
interval: 10s
@@ -27,6 +27,7 @@ services:
condition: service_healthy
volumes:
- ./prompts:/app/prompts
- ./audit-logs:/app/audit-logs
- ${TARGET_REPO:-.}:/target-repo
- ${BENCHMARKS_BASE:-.}:/benchmarks
- ${HOME}/.shannon:/tmp/.shannon
+1621 -1
View File
File diff suppressed because it is too large Load Diff
+26 -2
View File
@@ -18,7 +18,7 @@ Shannon - AI Penetration Testing Framework
Usage:
./shannon start URL=<url> REPO=<path> Start a pentest workflow
./shannon logs View real-time worker logs
./shannon logs ID=<workflow-id> Tail logs for a specific workflow
./shannon query ID=<workflow-id> Query workflow progress
./shannon stop Stop all containers
./shannon help Show this help message
@@ -34,6 +34,7 @@ Options for 'stop':
Examples:
./shannon start URL=https://example.com REPO=/path/to/repo
./shannon start URL=https://example.com REPO=/path/to/repo CONFIG=./config.yaml
./shannon logs ID=example.com_shannon-1234567890
./shannon query ID=shannon-1234567890
./shannon stop CLEAN=true
@@ -139,7 +140,30 @@ cmd_start() {
}
cmd_logs() {
docker compose -f "$COMPOSE_FILE" logs -f worker "$@"
parse_args "$@"
if [ -z "$ID" ]; then
echo "ERROR: ID is required"
echo "Usage: ./shannon logs ID=<workflow-id>"
exit 1
fi
WORKFLOW_LOG="./audit-logs/${ID}/workflow.log"
if [ -f "$WORKFLOW_LOG" ]; then
echo "Tailing workflow log: $WORKFLOW_LOG"
tail -f "$WORKFLOW_LOG"
else
echo "ERROR: Workflow log not found: $WORKFLOW_LOG"
echo ""
echo "Possible causes:"
echo " - Workflow hasn't started yet"
echo " - Workflow ID is incorrect"
echo " - Workflow is using a custom OUTPUT path"
echo ""
echo "Check: ./shannon query ID=$ID for workflow details"
exit 1
fi
}
cmd_query() {
+20
View File
@@ -248,6 +248,26 @@ export async function runClaudePrompt(
apiErrorDetected = messageLoopResult.apiErrorDetected;
totalCost = messageLoopResult.cost;
// === SPENDING CAP SAFEGUARD ===
// Defense-in-depth: Detect spending cap that slipped through detectApiError().
// When spending cap is hit, Claude returns a short message with $0 cost.
// Legitimate agent work NEVER costs $0 with only 1-2 turns.
if (turnCount <= 2 && totalCost === 0) {
const resultLower = (result || '').toLowerCase();
const BILLING_KEYWORDS = ['spending', 'cap', 'limit', 'budget', 'resets'];
const looksLikeBillingError = BILLING_KEYWORDS.some((kw) =>
resultLower.includes(kw)
);
if (looksLikeBillingError) {
throw new PentestError(
`Spending cap likely reached (turns=${turnCount}, cost=$0): ${result?.slice(0, 100)}`,
'billing',
true // Retryable - Temporal will use 5-30 min backoff
);
}
}
const duration = timer.stop();
timingResults.agents[execContext.agentKey] = duration;
+29 -1
View File
@@ -55,7 +55,35 @@ export function detectApiError(content: string): ApiErrorDetection {
const lowerContent = content.toLowerCase();
// Fatal error - should throw immediately
// === BILLING/SPENDING CAP ERRORS (Retryable with long backoff) ===
// When Claude Code hits its spending cap, it returns a short message like
// "Spending cap reached resets 8am" instead of throwing an error.
// These should retry with 5-30 min backoff so workflows can recover when cap resets.
const BILLING_PATTERNS = [
'spending cap',
'spending limit',
'cap reached',
'budget exceeded',
'usage limit',
];
const isBillingError = BILLING_PATTERNS.some((pattern) =>
lowerContent.includes(pattern)
);
if (isBillingError) {
return {
detected: true,
shouldThrow: new PentestError(
`Billing limit reached: ${content.slice(0, 100)}`,
'billing',
true // RETRYABLE - Temporal will use 5-30 min backoff
),
};
}
// === SESSION LIMIT (Non-retryable) ===
// Different from spending cap - usually means something is fundamentally wrong
if (lowerContent.includes('session limit reached')) {
return {
detected: true,
+73
View File
@@ -12,6 +12,7 @@
*/
import { AgentLogger } from './logger.js';
import { WorkflowLogger, type AgentLogDetails, type WorkflowSummary } from './workflow-logger.js';
import { MetricsTracker } from './metrics-tracker.js';
import { initializeAuditStructure, type SessionMetadata } from './utils.js';
import { formatTimestamp } from '../utils/formatting.js';
@@ -37,7 +38,9 @@ export class AuditSession {
private sessionMetadata: SessionMetadata;
private sessionId: string;
private metricsTracker: MetricsTracker;
private workflowLogger: WorkflowLogger;
private currentLogger: AgentLogger | null = null;
private currentAgentName: string | null = null;
private initialized: boolean = false;
constructor(sessionMetadata: SessionMetadata) {
@@ -54,6 +57,7 @@ export class AuditSession {
// Components
this.metricsTracker = new MetricsTracker(sessionMetadata);
this.workflowLogger = new WorkflowLogger(sessionMetadata);
}
/**
@@ -71,6 +75,9 @@ export class AuditSession {
// Initialize metrics tracker (loads or creates session.json)
await this.metricsTracker.initialize();
// Initialize workflow logger
await this.workflowLogger.initialize();
this.initialized = true;
}
@@ -98,6 +105,9 @@ export class AuditSession {
await AgentLogger.savePrompt(this.sessionMetadata, agentName, promptContent);
}
// Track current agent name for workflow logging
this.currentAgentName = agentName;
// Create and initialize logger for this attempt
this.currentLogger = new AgentLogger(this.sessionMetadata, agentName, attemptNumber);
await this.currentLogger.initialize();
@@ -111,6 +121,9 @@ export class AuditSession {
attemptNumber,
timestamp: formatTimestamp(),
});
// Log to unified workflow log
await this.workflowLogger.logAgent(agentName, 'start', { attemptNumber });
}
/**
@@ -121,7 +134,30 @@ export class AuditSession {
throw new Error('No active logger. Call startAgent() first.');
}
// Log to agent-specific log file (JSON format)
await this.currentLogger.logEvent(eventType, eventData);
// Also log to unified workflow log (human-readable format)
const data = eventData as Record<string, unknown>;
const agentName = this.currentAgentName || 'unknown';
switch (eventType) {
case 'tool_start':
await this.workflowLogger.logToolStart(
agentName,
String(data.toolName || ''),
data.parameters
);
break;
case 'llm_response':
await this.workflowLogger.logLlmResponse(
agentName,
Number(data.turn || 0),
String(data.content || '')
);
break;
// tool_end and error events are intentionally not logged to workflow log
// to reduce noise - the agent completion message captures the outcome
}
}
/**
@@ -143,6 +179,19 @@ export class AuditSession {
this.currentLogger = null;
}
// Reset current agent name
this.currentAgentName = null;
// Log to unified workflow log
const agentLogDetails: AgentLogDetails = {
attemptNumber: result.attemptNumber,
duration_ms: result.duration_ms,
cost_usd: result.cost_usd,
success: result.success,
...(result.error !== undefined && { error: result.error }),
};
await this.workflowLogger.logAgent(agentName, 'end', agentLogDetails);
// Mutex-protected update to session.json
const unlock = await sessionMutex.lock(this.sessionId);
try {
@@ -178,4 +227,28 @@ export class AuditSession {
await this.ensureInitialized();
return this.metricsTracker.getMetrics();
}
/**
* Log phase start to unified workflow log
*/
async logPhaseStart(phase: string): Promise<void> {
await this.ensureInitialized();
await this.workflowLogger.logPhase(phase, 'start');
}
/**
* Log phase completion to unified workflow log
*/
async logPhaseComplete(phase: string): Promise<void> {
await this.ensureInitialized();
await this.workflowLogger.logPhase(phase, 'complete');
}
/**
* Log workflow completion to unified workflow log
*/
async logWorkflowComplete(summary: WorkflowSummary): Promise<void> {
await this.ensureInitialized();
await this.workflowLogger.logWorkflowComplete(summary);
}
}
+1
View File
@@ -18,5 +18,6 @@
export { AuditSession } from './audit-session.js';
export { AgentLogger } from './logger.js';
export { WorkflowLogger } from './workflow-logger.js';
export { MetricsTracker } from './metrics-tracker.js';
export * as AuditUtils from './utils.js';
+8
View File
@@ -85,6 +85,14 @@ export function generateSessionJsonPath(sessionMetadata: SessionMetadata): strin
return path.join(auditPath, 'session.json');
}
/**
* Generate path to workflow.log file
*/
export function generateWorkflowLogPath(sessionMetadata: SessionMetadata): string {
const auditPath = generateAuditPath(sessionMetadata);
return path.join(auditPath, 'workflow.log');
}
/**
* Ensure directory exists (idempotent, race-safe)
*/
+382
View File
@@ -0,0 +1,382 @@
// Copyright (C) 2025 Keygraph, Inc.
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License version 3
// as published by the Free Software Foundation.
/**
* Workflow Logger
*
* Provides a unified, human-readable log file per workflow.
* Optimized for `tail -f` viewing during concurrent workflow execution.
*/
import fs from 'fs';
import path from 'path';
import { generateWorkflowLogPath, ensureDirectory, type SessionMetadata } from './utils.js';
import { formatDuration, formatTimestamp } from '../utils/formatting.js';
export interface AgentLogDetails {
attemptNumber?: number;
duration_ms?: number;
cost_usd?: number;
success?: boolean;
error?: string;
}
export interface AgentMetricsSummary {
durationMs: number;
costUsd: number | null;
}
export interface WorkflowSummary {
status: 'completed' | 'failed';
totalDurationMs: number;
totalCostUsd: number;
completedAgents: string[];
agentMetrics: Record<string, AgentMetricsSummary>;
error?: string;
}
/**
* WorkflowLogger - Manages the unified workflow log file
*/
export class WorkflowLogger {
private sessionMetadata: SessionMetadata;
private logPath: string;
private stream: fs.WriteStream | null = null;
private initialized: boolean = false;
constructor(sessionMetadata: SessionMetadata) {
this.sessionMetadata = sessionMetadata;
this.logPath = generateWorkflowLogPath(sessionMetadata);
}
/**
* Initialize the log stream (creates file and writes header)
*/
async initialize(): Promise<void> {
if (this.initialized) {
return;
}
// Ensure directory exists
await ensureDirectory(path.dirname(this.logPath));
// Create write stream with append mode
this.stream = fs.createWriteStream(this.logPath, {
flags: 'a',
encoding: 'utf8',
autoClose: true,
});
this.initialized = true;
// Write header only if file is new (empty)
const stats = await fs.promises.stat(this.logPath).catch(() => null);
if (!stats || stats.size === 0) {
await this.writeHeader();
}
}
/**
* Write header to log file
*/
private async writeHeader(): Promise<void> {
const header = [
`================================================================================`,
`Shannon Pentest - Workflow Log`,
`================================================================================`,
`Workflow ID: ${this.sessionMetadata.id}`,
`Target URL: ${this.sessionMetadata.webUrl}`,
`Started: ${formatTimestamp()}`,
`================================================================================`,
``,
].join('\n');
return this.writeRaw(header);
}
/**
* Write raw text to log file with immediate flush
*/
private writeRaw(text: string): Promise<void> {
return new Promise((resolve, reject) => {
if (!this.initialized || !this.stream) {
reject(new Error('WorkflowLogger not initialized'));
return;
}
const needsDrain = !this.stream.write(text, 'utf8', (error) => {
if (error) reject(error);
});
if (needsDrain) {
this.stream.once('drain', resolve);
} else {
resolve();
}
});
}
/**
* Format timestamp for log line (local time, human readable)
*/
private formatLogTime(): string {
const now = new Date();
return now.toISOString().replace('T', ' ').slice(0, 19);
}
/**
* Log a phase transition event
*/
async logPhase(phase: string, event: 'start' | 'complete'): Promise<void> {
await this.ensureInitialized();
const action = event === 'start' ? 'Starting' : 'Completed';
const line = `[${this.formatLogTime()}] [PHASE] ${action}: ${phase}\n`;
// Add blank line before phase start for readability
if (event === 'start') {
await this.writeRaw('\n');
}
await this.writeRaw(line);
}
/**
* Log an agent event
*/
async logAgent(
agentName: string,
event: 'start' | 'end',
details?: AgentLogDetails
): Promise<void> {
await this.ensureInitialized();
let message: string;
if (event === 'start') {
const attempt = details?.attemptNumber ?? 1;
message = `${agentName}: Starting (attempt ${attempt})`;
} else {
const parts: string[] = [agentName + ':'];
if (details?.success === false) {
parts.push('Failed');
if (details?.error) {
parts.push(`- ${details.error}`);
}
} else {
parts.push('Completed');
}
if (details?.duration_ms !== undefined) {
parts.push(`(${formatDuration(details.duration_ms)}`);
if (details?.cost_usd !== undefined) {
parts.push(`$${details.cost_usd.toFixed(2)})`);
} else {
parts.push(')');
}
}
message = parts.join(' ');
}
const line = `[${this.formatLogTime()}] [AGENT] ${message}\n`;
await this.writeRaw(line);
}
/**
* Log a general event
*/
async logEvent(eventType: string, message: string): Promise<void> {
await this.ensureInitialized();
const line = `[${this.formatLogTime()}] [${eventType.toUpperCase()}] ${message}\n`;
await this.writeRaw(line);
}
/**
* Log an error
*/
async logError(error: Error, context?: string): Promise<void> {
await this.ensureInitialized();
const contextStr = context ? ` (${context})` : '';
const line = `[${this.formatLogTime()}] [ERROR] ${error.message}${contextStr}\n`;
await this.writeRaw(line);
}
/**
* Truncate string to max length with ellipsis
*/
private truncate(str: string, maxLen: number): string {
if (str.length <= maxLen) return str;
return str.slice(0, maxLen - 3) + '...';
}
/**
* Format tool parameters for human-readable display
*/
private formatToolParams(toolName: string, params: unknown): string {
if (!params || typeof params !== 'object') {
return '';
}
const p = params as Record<string, unknown>;
// Tool-specific formatting for common tools
switch (toolName) {
case 'Bash':
if (p.command) {
return this.truncate(String(p.command).replace(/\n/g, ' '), 100);
}
break;
case 'Read':
if (p.file_path) {
return String(p.file_path);
}
break;
case 'Write':
if (p.file_path) {
return String(p.file_path);
}
break;
case 'Edit':
if (p.file_path) {
return String(p.file_path);
}
break;
case 'Glob':
if (p.pattern) {
return String(p.pattern);
}
break;
case 'Grep':
if (p.pattern) {
const path = p.path ? ` in ${p.path}` : '';
return `"${this.truncate(String(p.pattern), 50)}"${path}`;
}
break;
case 'WebFetch':
if (p.url) {
return String(p.url);
}
break;
case 'mcp__playwright__browser_navigate':
if (p.url) {
return String(p.url);
}
break;
case 'mcp__playwright__browser_click':
if (p.selector) {
return this.truncate(String(p.selector), 60);
}
break;
case 'mcp__playwright__browser_type':
if (p.selector) {
const text = p.text ? `: "${this.truncate(String(p.text), 30)}"` : '';
return `${this.truncate(String(p.selector), 40)}${text}`;
}
break;
}
// Default: show first string-valued param truncated
for (const [key, val] of Object.entries(p)) {
if (typeof val === 'string' && val.length > 0) {
return `${key}=${this.truncate(val, 60)}`;
}
}
return '';
}
/**
* Log tool start event
*/
async logToolStart(agentName: string, toolName: string, parameters: unknown): Promise<void> {
await this.ensureInitialized();
const params = this.formatToolParams(toolName, parameters);
const paramStr = params ? `: ${params}` : '';
const line = `[${this.formatLogTime()}] [${agentName}] [TOOL] ${toolName}${paramStr}\n`;
await this.writeRaw(line);
}
/**
* Log LLM response
*/
async logLlmResponse(agentName: string, turn: number, content: string): Promise<void> {
await this.ensureInitialized();
// Show full content, replacing newlines with escaped version for single-line output
const escaped = content.replace(/\n/g, '\\n');
const line = `[${this.formatLogTime()}] [${agentName}] [LLM] Turn ${turn}: ${escaped}\n`;
await this.writeRaw(line);
}
/**
* Log workflow completion with full summary
*/
async logWorkflowComplete(summary: WorkflowSummary): Promise<void> {
await this.ensureInitialized();
const status = summary.status === 'completed' ? 'COMPLETED' : 'FAILED';
await this.writeRaw('\n');
await this.writeRaw(`================================================================================\n`);
await this.writeRaw(`Workflow ${status}\n`);
await this.writeRaw(`────────────────────────────────────────\n`);
await this.writeRaw(`Workflow ID: ${this.sessionMetadata.id}\n`);
await this.writeRaw(`Status: ${summary.status}\n`);
await this.writeRaw(`Duration: ${formatDuration(summary.totalDurationMs)}\n`);
await this.writeRaw(`Total Cost: $${summary.totalCostUsd.toFixed(4)}\n`);
await this.writeRaw(`Agents: ${summary.completedAgents.length} completed\n`);
if (summary.error) {
await this.writeRaw(`Error: ${summary.error}\n`);
}
await this.writeRaw(`\n`);
await this.writeRaw(`Agent Breakdown:\n`);
for (const agentName of summary.completedAgents) {
const metrics = summary.agentMetrics[agentName];
if (metrics) {
const duration = formatDuration(metrics.durationMs);
const cost = metrics.costUsd !== null ? `$${metrics.costUsd.toFixed(4)}` : 'N/A';
await this.writeRaw(` - ${agentName} (${duration}, ${cost})\n`);
} else {
await this.writeRaw(` - ${agentName}\n`);
}
}
await this.writeRaw(`================================================================================\n`);
}
/**
* Ensure initialized (helper for lazy initialization)
*/
private async ensureInitialized(): Promise<void> {
if (!this.initialized) {
await this.initialize();
}
}
/**
* Close the log stream
*/
async close(): Promise<void> {
if (!this.initialized || !this.stream) {
return;
}
return new Promise((resolve) => {
this.stream!.end(() => {
this.initialized = false;
resolve();
});
});
}
}
+8 -2
View File
@@ -210,7 +210,7 @@ export function classifyErrorForTemporal(error: unknown): TemporalErrorClassific
// === BILLING ERRORS (Retryable with long backoff) ===
// Anthropic returns billing as 400 invalid_request_error
// Human can add credits, so retry with 5-30 min backoff
// Human can add credits OR wait for spending cap to reset (5-30 min backoff)
if (
message.includes('billing_error') ||
message.includes('credit balance is too low') ||
@@ -221,7 +221,13 @@ export function classifyErrorForTemporal(error: unknown): TemporalErrorClassific
message.includes('usage limit reached') ||
message.includes('quota exceeded') ||
message.includes('daily rate limit') ||
message.includes('limit will reset')
message.includes('limit will reset') ||
// Claude Code spending cap patterns (returns short message instead of error)
message.includes('spending cap') ||
message.includes('spending limit') ||
message.includes('cap reached') ||
message.includes('budget exceeded') ||
message.includes('billing limit reached')
) {
return { type: 'BillingError', retryable: true };
}
+73 -1
View File
@@ -71,12 +71,13 @@ import { assembleFinalReport } from '../phases/reporting.js';
import { getPromptNameForAgent } from '../types/agents.js';
import { AuditSession } from '../audit/index.js';
import { telemetry, TelemetryEvent, hashTargetUrl } from '../telemetry/index.js';
import type { WorkflowSummary } from '../audit/workflow-logger.js';
import type { AgentName } from '../types/agents.js';
import type { AgentMetrics } from './shared.js';
import type { DistributedConfig } from '../types/config.js';
import type { SessionMetadata } from '../audit/utils.js';
const HEARTBEAT_INTERVAL_MS = 2000; // Must be < heartbeatTimeout (30s)
const HEARTBEAT_INTERVAL_MS = 2000; // Must be < heartbeatTimeout (10min production, 5min testing)
/**
* Input for all agent activities.
@@ -200,6 +201,26 @@ async function runAgentActivity(
attemptNumber
);
// 6.5. Sanity check: Detect spending cap that slipped through all detection layers
// Defense-in-depth: A successful agent execution should never have ≤2 turns with $0 cost
if (result.success && (result.turns ?? 0) <= 2 && (result.cost || 0) === 0) {
const resultText = result.result || '';
const looksLikeBillingError = /spending|cap|limit|budget|resets/i.test(resultText);
if (looksLikeBillingError) {
await rollbackGitWorkspace(repoPath, 'spending cap detected');
await auditSession.endAgent(agentName, {
attemptNumber,
duration_ms: result.duration,
cost_usd: 0,
success: false,
error: `Spending cap likely reached: ${resultText.slice(0, 100)}`,
});
// Throw as billing error so Temporal retries with long backoff
throw new Error(`Spending cap likely reached: ${resultText.slice(0, 100)}`);
}
}
// 7. Handle execution failure
if (!result.success) {
await rollbackGitWorkspace(repoPath, 'execution failure');
@@ -419,6 +440,7 @@ export async function runReportAgent(input: ActivityInput): Promise<AgentMetrics
});
throw error;
}
return runAgentActivity('report', input);
}
/**
@@ -485,3 +507,53 @@ export async function checkExploitationQueue(
vulnType,
};
}
/**
* Log phase transition to the unified workflow log.
* Called at phase boundaries for per-workflow logging.
*/
export async function logPhaseTransition(
input: ActivityInput,
phase: string,
event: 'start' | 'complete'
): Promise<void> {
const { webUrl, repoPath, outputPath, workflowId } = input;
const sessionMetadata: SessionMetadata = {
id: workflowId,
webUrl,
repoPath,
...(outputPath && { outputPath }),
};
const auditSession = new AuditSession(sessionMetadata);
await auditSession.initialize();
if (event === 'start') {
await auditSession.logPhaseStart(phase);
} else {
await auditSession.logPhaseComplete(phase);
}
}
/**
* Log workflow completion with full summary to the unified workflow log.
* Called at the end of the workflow to write a summary breakdown.
*/
export async function logWorkflowComplete(
input: ActivityInput,
summary: WorkflowSummary
): Promise<void> {
const { webUrl, repoPath, outputPath, workflowId } = input;
const sessionMetadata: SessionMetadata = {
id: workflowId,
webUrl,
repoPath,
...(outputPath && { outputPath }),
};
const auditSession = new AuditSession(sessionMetadata);
await auditSession.initialize();
await auditSession.logWorkflowComplete(summary);
}
+2 -2
View File
@@ -175,11 +175,11 @@ async function startPipeline(): Promise<void> {
target_hash: hashTargetUrl(webUrl),
workflow_id: workflowId,
});
if (!waitForCompletion) {
console.log(chalk.bold('Monitor progress:'));
console.log(chalk.white(' Web UI: ') + chalk.blue(`http://localhost:8233/namespaces/default/workflows/${workflowId}`));
console.log(chalk.white(' Logs: ') + chalk.gray('./shannon logs'));
console.log(chalk.white(' Logs: ') + chalk.gray(`./shannon logs ID=${workflowId}`));
console.log(chalk.white(' Query: ') + chalk.gray(`./shannon query ID=${workflowId}`));
console.log();
return;
+41 -2
View File
@@ -70,14 +70,14 @@ const TESTING_RETRY = {
// Activity proxy with production retry configuration (default)
const acts = proxyActivities<typeof activities>({
startToCloseTimeout: '2 hours',
heartbeatTimeout: '30 seconds',
heartbeatTimeout: '10 minutes', // Long timeout for resource-constrained workers with many concurrent activities
retry: PRODUCTION_RETRY,
});
// Activity proxy with testing retry configuration (fast)
const testActs = proxyActivities<typeof activities>({
startToCloseTimeout: '10 minutes',
heartbeatTimeout: '30 seconds',
heartbeatTimeout: '5 minutes', // Shorter for testing but still tolerant of resource contention
retry: TESTING_RETRY,
});
@@ -145,15 +145,19 @@ export async function pentestPipelineWorkflow(
// === Phase 1: Pre-Reconnaissance ===
state.currentPhase = 'pre-recon';
state.currentAgent = 'pre-recon';
await a.logPhaseTransition(activityInput, 'pre-recon', 'start');
state.agentMetrics['pre-recon'] =
await a.runPreReconAgent(activityInput);
state.completedAgents.push('pre-recon');
await a.logPhaseTransition(activityInput, 'pre-recon', 'complete');
// === Phase 2: Reconnaissance ===
state.currentPhase = 'recon';
state.currentAgent = 'recon';
await a.logPhaseTransition(activityInput, 'recon', 'start');
state.agentMetrics['recon'] = await a.runReconAgent(activityInput);
state.completedAgents.push('recon');
await a.logPhaseTransition(activityInput, 'recon', 'complete');
// === Phases 3-4: Vulnerability Analysis + Exploitation (Pipelined) ===
// Each vuln type runs as an independent pipeline:
@@ -162,6 +166,7 @@ export async function pentestPipelineWorkflow(
// starts immediately when its vuln agent finishes, not waiting for all.
state.currentPhase = 'vulnerability-exploitation';
state.currentAgent = 'pipelines';
await a.logPhaseTransition(activityInput, 'vulnerability-exploitation', 'start');
// Helper: Run a single vuln→exploit pipeline
async function runVulnExploitPipeline(
@@ -261,10 +266,12 @@ export async function pentestPipelineWorkflow(
// Update phase markers
state.currentPhase = 'exploitation';
state.currentAgent = null;
await a.logPhaseTransition(activityInput, 'vulnerability-exploitation', 'complete');
// === Phase 5: Reporting ===
state.currentPhase = 'reporting';
state.currentAgent = 'report';
await a.logPhaseTransition(activityInput, 'reporting', 'start');
// First, assemble the concatenated report from exploitation evidence files
await a.assembleReportActivity(activityInput);
@@ -282,18 +289,50 @@ export async function pentestPipelineWorkflow(
};
state.agentMetrics['report'] = await a.runReportAgent(reportInput);
state.completedAgents.push('report');
await a.logPhaseTransition(activityInput, 'reporting', 'complete');
// === Complete ===
state.status = 'completed';
state.currentPhase = null;
state.currentAgent = null;
state.summary = computeSummary(state);
// Log workflow completion summary
await a.logWorkflowComplete(activityInput, {
status: 'completed',
totalDurationMs: state.summary.totalDurationMs,
totalCostUsd: state.summary.totalCostUsd,
completedAgents: state.completedAgents,
agentMetrics: Object.fromEntries(
Object.entries(state.agentMetrics).map(([name, m]) => [
name,
{ durationMs: m.durationMs, costUsd: m.costUsd },
])
),
});
return state;
} catch (error) {
state.status = 'failed';
state.failedAgent = state.currentAgent;
state.error = error instanceof Error ? error.message : String(error);
state.summary = computeSummary(state);
// Log workflow failure summary
await a.logWorkflowComplete(activityInput, {
status: 'failed',
totalDurationMs: state.summary.totalDurationMs,
totalCostUsd: state.summary.totalCostUsd,
completedAgents: state.completedAgents,
agentMetrics: Object.fromEntries(
Object.entries(state.agentMetrics).map(([name, m]) => [
name,
{ durationMs: m.durationMs, costUsd: m.costUsd },
])
),
error: state.error ?? undefined,
});
throw error;
}
}