mirror of
https://github.com/KeygraphHQ/shannon.git
synced 2026-05-26 02:18:15 +02:00
Merge branch 'main' into feat/telemetry
This commit is contained in:
+3
-2
@@ -1,12 +1,12 @@
|
||||
services:
|
||||
temporal:
|
||||
image: temporalio/temporal:latest
|
||||
command: ["server", "start-dev", "--db-filename", "/var/lib/temporal/temporal.db", "--ip", "0.0.0.0"]
|
||||
command: ["server", "start-dev", "--db-filename", "/home/temporal/temporal.db", "--ip", "0.0.0.0"]
|
||||
ports:
|
||||
- "7233:7233" # gRPC
|
||||
- "8233:8233" # Web UI (built-in)
|
||||
volumes:
|
||||
- temporal-data:/var/lib/temporal
|
||||
- temporal-data:/home/temporal
|
||||
healthcheck:
|
||||
test: ["CMD", "temporal", "operator", "cluster", "health", "--address", "localhost:7233"]
|
||||
interval: 10s
|
||||
@@ -27,6 +27,7 @@ services:
|
||||
condition: service_healthy
|
||||
volumes:
|
||||
- ./prompts:/app/prompts
|
||||
- ./audit-logs:/app/audit-logs
|
||||
- ${TARGET_REPO:-.}:/target-repo
|
||||
- ${BENCHMARKS_BASE:-.}:/benchmarks
|
||||
- ${HOME}/.shannon:/tmp/.shannon
|
||||
|
||||
Generated
+1621
-1
File diff suppressed because it is too large
Load Diff
@@ -18,7 +18,7 @@ Shannon - AI Penetration Testing Framework
|
||||
|
||||
Usage:
|
||||
./shannon start URL=<url> REPO=<path> Start a pentest workflow
|
||||
./shannon logs View real-time worker logs
|
||||
./shannon logs ID=<workflow-id> Tail logs for a specific workflow
|
||||
./shannon query ID=<workflow-id> Query workflow progress
|
||||
./shannon stop Stop all containers
|
||||
./shannon help Show this help message
|
||||
@@ -34,6 +34,7 @@ Options for 'stop':
|
||||
Examples:
|
||||
./shannon start URL=https://example.com REPO=/path/to/repo
|
||||
./shannon start URL=https://example.com REPO=/path/to/repo CONFIG=./config.yaml
|
||||
./shannon logs ID=example.com_shannon-1234567890
|
||||
./shannon query ID=shannon-1234567890
|
||||
./shannon stop CLEAN=true
|
||||
|
||||
@@ -139,7 +140,30 @@ cmd_start() {
|
||||
}
|
||||
|
||||
cmd_logs() {
|
||||
docker compose -f "$COMPOSE_FILE" logs -f worker "$@"
|
||||
parse_args "$@"
|
||||
|
||||
if [ -z "$ID" ]; then
|
||||
echo "ERROR: ID is required"
|
||||
echo "Usage: ./shannon logs ID=<workflow-id>"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
WORKFLOW_LOG="./audit-logs/${ID}/workflow.log"
|
||||
|
||||
if [ -f "$WORKFLOW_LOG" ]; then
|
||||
echo "Tailing workflow log: $WORKFLOW_LOG"
|
||||
tail -f "$WORKFLOW_LOG"
|
||||
else
|
||||
echo "ERROR: Workflow log not found: $WORKFLOW_LOG"
|
||||
echo ""
|
||||
echo "Possible causes:"
|
||||
echo " - Workflow hasn't started yet"
|
||||
echo " - Workflow ID is incorrect"
|
||||
echo " - Workflow is using a custom OUTPUT path"
|
||||
echo ""
|
||||
echo "Check: ./shannon query ID=$ID for workflow details"
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
cmd_query() {
|
||||
|
||||
@@ -248,6 +248,26 @@ export async function runClaudePrompt(
|
||||
apiErrorDetected = messageLoopResult.apiErrorDetected;
|
||||
totalCost = messageLoopResult.cost;
|
||||
|
||||
// === SPENDING CAP SAFEGUARD ===
|
||||
// Defense-in-depth: Detect spending cap that slipped through detectApiError().
|
||||
// When spending cap is hit, Claude returns a short message with $0 cost.
|
||||
// Legitimate agent work NEVER costs $0 with only 1-2 turns.
|
||||
if (turnCount <= 2 && totalCost === 0) {
|
||||
const resultLower = (result || '').toLowerCase();
|
||||
const BILLING_KEYWORDS = ['spending', 'cap', 'limit', 'budget', 'resets'];
|
||||
const looksLikeBillingError = BILLING_KEYWORDS.some((kw) =>
|
||||
resultLower.includes(kw)
|
||||
);
|
||||
|
||||
if (looksLikeBillingError) {
|
||||
throw new PentestError(
|
||||
`Spending cap likely reached (turns=${turnCount}, cost=$0): ${result?.slice(0, 100)}`,
|
||||
'billing',
|
||||
true // Retryable - Temporal will use 5-30 min backoff
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
const duration = timer.stop();
|
||||
timingResults.agents[execContext.agentKey] = duration;
|
||||
|
||||
|
||||
@@ -55,7 +55,35 @@ export function detectApiError(content: string): ApiErrorDetection {
|
||||
|
||||
const lowerContent = content.toLowerCase();
|
||||
|
||||
// Fatal error - should throw immediately
|
||||
// === BILLING/SPENDING CAP ERRORS (Retryable with long backoff) ===
|
||||
// When Claude Code hits its spending cap, it returns a short message like
|
||||
// "Spending cap reached resets 8am" instead of throwing an error.
|
||||
// These should retry with 5-30 min backoff so workflows can recover when cap resets.
|
||||
const BILLING_PATTERNS = [
|
||||
'spending cap',
|
||||
'spending limit',
|
||||
'cap reached',
|
||||
'budget exceeded',
|
||||
'usage limit',
|
||||
];
|
||||
|
||||
const isBillingError = BILLING_PATTERNS.some((pattern) =>
|
||||
lowerContent.includes(pattern)
|
||||
);
|
||||
|
||||
if (isBillingError) {
|
||||
return {
|
||||
detected: true,
|
||||
shouldThrow: new PentestError(
|
||||
`Billing limit reached: ${content.slice(0, 100)}`,
|
||||
'billing',
|
||||
true // RETRYABLE - Temporal will use 5-30 min backoff
|
||||
),
|
||||
};
|
||||
}
|
||||
|
||||
// === SESSION LIMIT (Non-retryable) ===
|
||||
// Different from spending cap - usually means something is fundamentally wrong
|
||||
if (lowerContent.includes('session limit reached')) {
|
||||
return {
|
||||
detected: true,
|
||||
|
||||
@@ -12,6 +12,7 @@
|
||||
*/
|
||||
|
||||
import { AgentLogger } from './logger.js';
|
||||
import { WorkflowLogger, type AgentLogDetails, type WorkflowSummary } from './workflow-logger.js';
|
||||
import { MetricsTracker } from './metrics-tracker.js';
|
||||
import { initializeAuditStructure, type SessionMetadata } from './utils.js';
|
||||
import { formatTimestamp } from '../utils/formatting.js';
|
||||
@@ -37,7 +38,9 @@ export class AuditSession {
|
||||
private sessionMetadata: SessionMetadata;
|
||||
private sessionId: string;
|
||||
private metricsTracker: MetricsTracker;
|
||||
private workflowLogger: WorkflowLogger;
|
||||
private currentLogger: AgentLogger | null = null;
|
||||
private currentAgentName: string | null = null;
|
||||
private initialized: boolean = false;
|
||||
|
||||
constructor(sessionMetadata: SessionMetadata) {
|
||||
@@ -54,6 +57,7 @@ export class AuditSession {
|
||||
|
||||
// Components
|
||||
this.metricsTracker = new MetricsTracker(sessionMetadata);
|
||||
this.workflowLogger = new WorkflowLogger(sessionMetadata);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -71,6 +75,9 @@ export class AuditSession {
|
||||
// Initialize metrics tracker (loads or creates session.json)
|
||||
await this.metricsTracker.initialize();
|
||||
|
||||
// Initialize workflow logger
|
||||
await this.workflowLogger.initialize();
|
||||
|
||||
this.initialized = true;
|
||||
}
|
||||
|
||||
@@ -98,6 +105,9 @@ export class AuditSession {
|
||||
await AgentLogger.savePrompt(this.sessionMetadata, agentName, promptContent);
|
||||
}
|
||||
|
||||
// Track current agent name for workflow logging
|
||||
this.currentAgentName = agentName;
|
||||
|
||||
// Create and initialize logger for this attempt
|
||||
this.currentLogger = new AgentLogger(this.sessionMetadata, agentName, attemptNumber);
|
||||
await this.currentLogger.initialize();
|
||||
@@ -111,6 +121,9 @@ export class AuditSession {
|
||||
attemptNumber,
|
||||
timestamp: formatTimestamp(),
|
||||
});
|
||||
|
||||
// Log to unified workflow log
|
||||
await this.workflowLogger.logAgent(agentName, 'start', { attemptNumber });
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -121,7 +134,30 @@ export class AuditSession {
|
||||
throw new Error('No active logger. Call startAgent() first.');
|
||||
}
|
||||
|
||||
// Log to agent-specific log file (JSON format)
|
||||
await this.currentLogger.logEvent(eventType, eventData);
|
||||
|
||||
// Also log to unified workflow log (human-readable format)
|
||||
const data = eventData as Record<string, unknown>;
|
||||
const agentName = this.currentAgentName || 'unknown';
|
||||
switch (eventType) {
|
||||
case 'tool_start':
|
||||
await this.workflowLogger.logToolStart(
|
||||
agentName,
|
||||
String(data.toolName || ''),
|
||||
data.parameters
|
||||
);
|
||||
break;
|
||||
case 'llm_response':
|
||||
await this.workflowLogger.logLlmResponse(
|
||||
agentName,
|
||||
Number(data.turn || 0),
|
||||
String(data.content || '')
|
||||
);
|
||||
break;
|
||||
// tool_end and error events are intentionally not logged to workflow log
|
||||
// to reduce noise - the agent completion message captures the outcome
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -143,6 +179,19 @@ export class AuditSession {
|
||||
this.currentLogger = null;
|
||||
}
|
||||
|
||||
// Reset current agent name
|
||||
this.currentAgentName = null;
|
||||
|
||||
// Log to unified workflow log
|
||||
const agentLogDetails: AgentLogDetails = {
|
||||
attemptNumber: result.attemptNumber,
|
||||
duration_ms: result.duration_ms,
|
||||
cost_usd: result.cost_usd,
|
||||
success: result.success,
|
||||
...(result.error !== undefined && { error: result.error }),
|
||||
};
|
||||
await this.workflowLogger.logAgent(agentName, 'end', agentLogDetails);
|
||||
|
||||
// Mutex-protected update to session.json
|
||||
const unlock = await sessionMutex.lock(this.sessionId);
|
||||
try {
|
||||
@@ -178,4 +227,28 @@ export class AuditSession {
|
||||
await this.ensureInitialized();
|
||||
return this.metricsTracker.getMetrics();
|
||||
}
|
||||
|
||||
/**
|
||||
* Log phase start to unified workflow log
|
||||
*/
|
||||
async logPhaseStart(phase: string): Promise<void> {
|
||||
await this.ensureInitialized();
|
||||
await this.workflowLogger.logPhase(phase, 'start');
|
||||
}
|
||||
|
||||
/**
|
||||
* Log phase completion to unified workflow log
|
||||
*/
|
||||
async logPhaseComplete(phase: string): Promise<void> {
|
||||
await this.ensureInitialized();
|
||||
await this.workflowLogger.logPhase(phase, 'complete');
|
||||
}
|
||||
|
||||
/**
|
||||
* Log workflow completion to unified workflow log
|
||||
*/
|
||||
async logWorkflowComplete(summary: WorkflowSummary): Promise<void> {
|
||||
await this.ensureInitialized();
|
||||
await this.workflowLogger.logWorkflowComplete(summary);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -18,5 +18,6 @@
|
||||
|
||||
export { AuditSession } from './audit-session.js';
|
||||
export { AgentLogger } from './logger.js';
|
||||
export { WorkflowLogger } from './workflow-logger.js';
|
||||
export { MetricsTracker } from './metrics-tracker.js';
|
||||
export * as AuditUtils from './utils.js';
|
||||
|
||||
@@ -85,6 +85,14 @@ export function generateSessionJsonPath(sessionMetadata: SessionMetadata): strin
|
||||
return path.join(auditPath, 'session.json');
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate path to workflow.log file
|
||||
*/
|
||||
export function generateWorkflowLogPath(sessionMetadata: SessionMetadata): string {
|
||||
const auditPath = generateAuditPath(sessionMetadata);
|
||||
return path.join(auditPath, 'workflow.log');
|
||||
}
|
||||
|
||||
/**
|
||||
* Ensure directory exists (idempotent, race-safe)
|
||||
*/
|
||||
|
||||
@@ -0,0 +1,382 @@
|
||||
// Copyright (C) 2025 Keygraph, Inc.
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Affero General Public License version 3
|
||||
// as published by the Free Software Foundation.
|
||||
|
||||
/**
|
||||
* Workflow Logger
|
||||
*
|
||||
* Provides a unified, human-readable log file per workflow.
|
||||
* Optimized for `tail -f` viewing during concurrent workflow execution.
|
||||
*/
|
||||
|
||||
import fs from 'fs';
|
||||
import path from 'path';
|
||||
import { generateWorkflowLogPath, ensureDirectory, type SessionMetadata } from './utils.js';
|
||||
import { formatDuration, formatTimestamp } from '../utils/formatting.js';
|
||||
|
||||
export interface AgentLogDetails {
|
||||
attemptNumber?: number;
|
||||
duration_ms?: number;
|
||||
cost_usd?: number;
|
||||
success?: boolean;
|
||||
error?: string;
|
||||
}
|
||||
|
||||
export interface AgentMetricsSummary {
|
||||
durationMs: number;
|
||||
costUsd: number | null;
|
||||
}
|
||||
|
||||
export interface WorkflowSummary {
|
||||
status: 'completed' | 'failed';
|
||||
totalDurationMs: number;
|
||||
totalCostUsd: number;
|
||||
completedAgents: string[];
|
||||
agentMetrics: Record<string, AgentMetricsSummary>;
|
||||
error?: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* WorkflowLogger - Manages the unified workflow log file
|
||||
*/
|
||||
export class WorkflowLogger {
|
||||
private sessionMetadata: SessionMetadata;
|
||||
private logPath: string;
|
||||
private stream: fs.WriteStream | null = null;
|
||||
private initialized: boolean = false;
|
||||
|
||||
constructor(sessionMetadata: SessionMetadata) {
|
||||
this.sessionMetadata = sessionMetadata;
|
||||
this.logPath = generateWorkflowLogPath(sessionMetadata);
|
||||
}
|
||||
|
||||
/**
|
||||
* Initialize the log stream (creates file and writes header)
|
||||
*/
|
||||
async initialize(): Promise<void> {
|
||||
if (this.initialized) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Ensure directory exists
|
||||
await ensureDirectory(path.dirname(this.logPath));
|
||||
|
||||
// Create write stream with append mode
|
||||
this.stream = fs.createWriteStream(this.logPath, {
|
||||
flags: 'a',
|
||||
encoding: 'utf8',
|
||||
autoClose: true,
|
||||
});
|
||||
|
||||
this.initialized = true;
|
||||
|
||||
// Write header only if file is new (empty)
|
||||
const stats = await fs.promises.stat(this.logPath).catch(() => null);
|
||||
if (!stats || stats.size === 0) {
|
||||
await this.writeHeader();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Write header to log file
|
||||
*/
|
||||
private async writeHeader(): Promise<void> {
|
||||
const header = [
|
||||
`================================================================================`,
|
||||
`Shannon Pentest - Workflow Log`,
|
||||
`================================================================================`,
|
||||
`Workflow ID: ${this.sessionMetadata.id}`,
|
||||
`Target URL: ${this.sessionMetadata.webUrl}`,
|
||||
`Started: ${formatTimestamp()}`,
|
||||
`================================================================================`,
|
||||
``,
|
||||
].join('\n');
|
||||
|
||||
return this.writeRaw(header);
|
||||
}
|
||||
|
||||
/**
|
||||
* Write raw text to log file with immediate flush
|
||||
*/
|
||||
private writeRaw(text: string): Promise<void> {
|
||||
return new Promise((resolve, reject) => {
|
||||
if (!this.initialized || !this.stream) {
|
||||
reject(new Error('WorkflowLogger not initialized'));
|
||||
return;
|
||||
}
|
||||
|
||||
const needsDrain = !this.stream.write(text, 'utf8', (error) => {
|
||||
if (error) reject(error);
|
||||
});
|
||||
|
||||
if (needsDrain) {
|
||||
this.stream.once('drain', resolve);
|
||||
} else {
|
||||
resolve();
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Format timestamp for log line (local time, human readable)
|
||||
*/
|
||||
private formatLogTime(): string {
|
||||
const now = new Date();
|
||||
return now.toISOString().replace('T', ' ').slice(0, 19);
|
||||
}
|
||||
|
||||
/**
|
||||
* Log a phase transition event
|
||||
*/
|
||||
async logPhase(phase: string, event: 'start' | 'complete'): Promise<void> {
|
||||
await this.ensureInitialized();
|
||||
|
||||
const action = event === 'start' ? 'Starting' : 'Completed';
|
||||
const line = `[${this.formatLogTime()}] [PHASE] ${action}: ${phase}\n`;
|
||||
|
||||
// Add blank line before phase start for readability
|
||||
if (event === 'start') {
|
||||
await this.writeRaw('\n');
|
||||
}
|
||||
|
||||
await this.writeRaw(line);
|
||||
}
|
||||
|
||||
/**
|
||||
* Log an agent event
|
||||
*/
|
||||
async logAgent(
|
||||
agentName: string,
|
||||
event: 'start' | 'end',
|
||||
details?: AgentLogDetails
|
||||
): Promise<void> {
|
||||
await this.ensureInitialized();
|
||||
|
||||
let message: string;
|
||||
|
||||
if (event === 'start') {
|
||||
const attempt = details?.attemptNumber ?? 1;
|
||||
message = `${agentName}: Starting (attempt ${attempt})`;
|
||||
} else {
|
||||
const parts: string[] = [agentName + ':'];
|
||||
|
||||
if (details?.success === false) {
|
||||
parts.push('Failed');
|
||||
if (details?.error) {
|
||||
parts.push(`- ${details.error}`);
|
||||
}
|
||||
} else {
|
||||
parts.push('Completed');
|
||||
}
|
||||
|
||||
if (details?.duration_ms !== undefined) {
|
||||
parts.push(`(${formatDuration(details.duration_ms)}`);
|
||||
if (details?.cost_usd !== undefined) {
|
||||
parts.push(`$${details.cost_usd.toFixed(2)})`);
|
||||
} else {
|
||||
parts.push(')');
|
||||
}
|
||||
}
|
||||
|
||||
message = parts.join(' ');
|
||||
}
|
||||
|
||||
const line = `[${this.formatLogTime()}] [AGENT] ${message}\n`;
|
||||
await this.writeRaw(line);
|
||||
}
|
||||
|
||||
/**
|
||||
* Log a general event
|
||||
*/
|
||||
async logEvent(eventType: string, message: string): Promise<void> {
|
||||
await this.ensureInitialized();
|
||||
|
||||
const line = `[${this.formatLogTime()}] [${eventType.toUpperCase()}] ${message}\n`;
|
||||
await this.writeRaw(line);
|
||||
}
|
||||
|
||||
/**
|
||||
* Log an error
|
||||
*/
|
||||
async logError(error: Error, context?: string): Promise<void> {
|
||||
await this.ensureInitialized();
|
||||
|
||||
const contextStr = context ? ` (${context})` : '';
|
||||
const line = `[${this.formatLogTime()}] [ERROR] ${error.message}${contextStr}\n`;
|
||||
await this.writeRaw(line);
|
||||
}
|
||||
|
||||
/**
|
||||
* Truncate string to max length with ellipsis
|
||||
*/
|
||||
private truncate(str: string, maxLen: number): string {
|
||||
if (str.length <= maxLen) return str;
|
||||
return str.slice(0, maxLen - 3) + '...';
|
||||
}
|
||||
|
||||
/**
|
||||
* Format tool parameters for human-readable display
|
||||
*/
|
||||
private formatToolParams(toolName: string, params: unknown): string {
|
||||
if (!params || typeof params !== 'object') {
|
||||
return '';
|
||||
}
|
||||
|
||||
const p = params as Record<string, unknown>;
|
||||
|
||||
// Tool-specific formatting for common tools
|
||||
switch (toolName) {
|
||||
case 'Bash':
|
||||
if (p.command) {
|
||||
return this.truncate(String(p.command).replace(/\n/g, ' '), 100);
|
||||
}
|
||||
break;
|
||||
case 'Read':
|
||||
if (p.file_path) {
|
||||
return String(p.file_path);
|
||||
}
|
||||
break;
|
||||
case 'Write':
|
||||
if (p.file_path) {
|
||||
return String(p.file_path);
|
||||
}
|
||||
break;
|
||||
case 'Edit':
|
||||
if (p.file_path) {
|
||||
return String(p.file_path);
|
||||
}
|
||||
break;
|
||||
case 'Glob':
|
||||
if (p.pattern) {
|
||||
return String(p.pattern);
|
||||
}
|
||||
break;
|
||||
case 'Grep':
|
||||
if (p.pattern) {
|
||||
const path = p.path ? ` in ${p.path}` : '';
|
||||
return `"${this.truncate(String(p.pattern), 50)}"${path}`;
|
||||
}
|
||||
break;
|
||||
case 'WebFetch':
|
||||
if (p.url) {
|
||||
return String(p.url);
|
||||
}
|
||||
break;
|
||||
case 'mcp__playwright__browser_navigate':
|
||||
if (p.url) {
|
||||
return String(p.url);
|
||||
}
|
||||
break;
|
||||
case 'mcp__playwright__browser_click':
|
||||
if (p.selector) {
|
||||
return this.truncate(String(p.selector), 60);
|
||||
}
|
||||
break;
|
||||
case 'mcp__playwright__browser_type':
|
||||
if (p.selector) {
|
||||
const text = p.text ? `: "${this.truncate(String(p.text), 30)}"` : '';
|
||||
return `${this.truncate(String(p.selector), 40)}${text}`;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
// Default: show first string-valued param truncated
|
||||
for (const [key, val] of Object.entries(p)) {
|
||||
if (typeof val === 'string' && val.length > 0) {
|
||||
return `${key}=${this.truncate(val, 60)}`;
|
||||
}
|
||||
}
|
||||
|
||||
return '';
|
||||
}
|
||||
|
||||
/**
|
||||
* Log tool start event
|
||||
*/
|
||||
async logToolStart(agentName: string, toolName: string, parameters: unknown): Promise<void> {
|
||||
await this.ensureInitialized();
|
||||
|
||||
const params = this.formatToolParams(toolName, parameters);
|
||||
const paramStr = params ? `: ${params}` : '';
|
||||
const line = `[${this.formatLogTime()}] [${agentName}] [TOOL] ${toolName}${paramStr}\n`;
|
||||
await this.writeRaw(line);
|
||||
}
|
||||
|
||||
/**
|
||||
* Log LLM response
|
||||
*/
|
||||
async logLlmResponse(agentName: string, turn: number, content: string): Promise<void> {
|
||||
await this.ensureInitialized();
|
||||
|
||||
// Show full content, replacing newlines with escaped version for single-line output
|
||||
const escaped = content.replace(/\n/g, '\\n');
|
||||
const line = `[${this.formatLogTime()}] [${agentName}] [LLM] Turn ${turn}: ${escaped}\n`;
|
||||
await this.writeRaw(line);
|
||||
}
|
||||
|
||||
/**
|
||||
* Log workflow completion with full summary
|
||||
*/
|
||||
async logWorkflowComplete(summary: WorkflowSummary): Promise<void> {
|
||||
await this.ensureInitialized();
|
||||
|
||||
const status = summary.status === 'completed' ? 'COMPLETED' : 'FAILED';
|
||||
|
||||
await this.writeRaw('\n');
|
||||
await this.writeRaw(`================================================================================\n`);
|
||||
await this.writeRaw(`Workflow ${status}\n`);
|
||||
await this.writeRaw(`────────────────────────────────────────\n`);
|
||||
await this.writeRaw(`Workflow ID: ${this.sessionMetadata.id}\n`);
|
||||
await this.writeRaw(`Status: ${summary.status}\n`);
|
||||
await this.writeRaw(`Duration: ${formatDuration(summary.totalDurationMs)}\n`);
|
||||
await this.writeRaw(`Total Cost: $${summary.totalCostUsd.toFixed(4)}\n`);
|
||||
await this.writeRaw(`Agents: ${summary.completedAgents.length} completed\n`);
|
||||
|
||||
if (summary.error) {
|
||||
await this.writeRaw(`Error: ${summary.error}\n`);
|
||||
}
|
||||
|
||||
await this.writeRaw(`\n`);
|
||||
await this.writeRaw(`Agent Breakdown:\n`);
|
||||
|
||||
for (const agentName of summary.completedAgents) {
|
||||
const metrics = summary.agentMetrics[agentName];
|
||||
if (metrics) {
|
||||
const duration = formatDuration(metrics.durationMs);
|
||||
const cost = metrics.costUsd !== null ? `$${metrics.costUsd.toFixed(4)}` : 'N/A';
|
||||
await this.writeRaw(` - ${agentName} (${duration}, ${cost})\n`);
|
||||
} else {
|
||||
await this.writeRaw(` - ${agentName}\n`);
|
||||
}
|
||||
}
|
||||
|
||||
await this.writeRaw(`================================================================================\n`);
|
||||
}
|
||||
|
||||
/**
|
||||
* Ensure initialized (helper for lazy initialization)
|
||||
*/
|
||||
private async ensureInitialized(): Promise<void> {
|
||||
if (!this.initialized) {
|
||||
await this.initialize();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Close the log stream
|
||||
*/
|
||||
async close(): Promise<void> {
|
||||
if (!this.initialized || !this.stream) {
|
||||
return;
|
||||
}
|
||||
|
||||
return new Promise((resolve) => {
|
||||
this.stream!.end(() => {
|
||||
this.initialized = false;
|
||||
resolve();
|
||||
});
|
||||
});
|
||||
}
|
||||
}
|
||||
@@ -210,7 +210,7 @@ export function classifyErrorForTemporal(error: unknown): TemporalErrorClassific
|
||||
|
||||
// === BILLING ERRORS (Retryable with long backoff) ===
|
||||
// Anthropic returns billing as 400 invalid_request_error
|
||||
// Human can add credits, so retry with 5-30 min backoff
|
||||
// Human can add credits OR wait for spending cap to reset (5-30 min backoff)
|
||||
if (
|
||||
message.includes('billing_error') ||
|
||||
message.includes('credit balance is too low') ||
|
||||
@@ -221,7 +221,13 @@ export function classifyErrorForTemporal(error: unknown): TemporalErrorClassific
|
||||
message.includes('usage limit reached') ||
|
||||
message.includes('quota exceeded') ||
|
||||
message.includes('daily rate limit') ||
|
||||
message.includes('limit will reset')
|
||||
message.includes('limit will reset') ||
|
||||
// Claude Code spending cap patterns (returns short message instead of error)
|
||||
message.includes('spending cap') ||
|
||||
message.includes('spending limit') ||
|
||||
message.includes('cap reached') ||
|
||||
message.includes('budget exceeded') ||
|
||||
message.includes('billing limit reached')
|
||||
) {
|
||||
return { type: 'BillingError', retryable: true };
|
||||
}
|
||||
|
||||
@@ -71,12 +71,13 @@ import { assembleFinalReport } from '../phases/reporting.js';
|
||||
import { getPromptNameForAgent } from '../types/agents.js';
|
||||
import { AuditSession } from '../audit/index.js';
|
||||
import { telemetry, TelemetryEvent, hashTargetUrl } from '../telemetry/index.js';
|
||||
import type { WorkflowSummary } from '../audit/workflow-logger.js';
|
||||
import type { AgentName } from '../types/agents.js';
|
||||
import type { AgentMetrics } from './shared.js';
|
||||
import type { DistributedConfig } from '../types/config.js';
|
||||
import type { SessionMetadata } from '../audit/utils.js';
|
||||
|
||||
const HEARTBEAT_INTERVAL_MS = 2000; // Must be < heartbeatTimeout (30s)
|
||||
const HEARTBEAT_INTERVAL_MS = 2000; // Must be < heartbeatTimeout (10min production, 5min testing)
|
||||
|
||||
/**
|
||||
* Input for all agent activities.
|
||||
@@ -200,6 +201,26 @@ async function runAgentActivity(
|
||||
attemptNumber
|
||||
);
|
||||
|
||||
// 6.5. Sanity check: Detect spending cap that slipped through all detection layers
|
||||
// Defense-in-depth: A successful agent execution should never have ≤2 turns with $0 cost
|
||||
if (result.success && (result.turns ?? 0) <= 2 && (result.cost || 0) === 0) {
|
||||
const resultText = result.result || '';
|
||||
const looksLikeBillingError = /spending|cap|limit|budget|resets/i.test(resultText);
|
||||
|
||||
if (looksLikeBillingError) {
|
||||
await rollbackGitWorkspace(repoPath, 'spending cap detected');
|
||||
await auditSession.endAgent(agentName, {
|
||||
attemptNumber,
|
||||
duration_ms: result.duration,
|
||||
cost_usd: 0,
|
||||
success: false,
|
||||
error: `Spending cap likely reached: ${resultText.slice(0, 100)}`,
|
||||
});
|
||||
// Throw as billing error so Temporal retries with long backoff
|
||||
throw new Error(`Spending cap likely reached: ${resultText.slice(0, 100)}`);
|
||||
}
|
||||
}
|
||||
|
||||
// 7. Handle execution failure
|
||||
if (!result.success) {
|
||||
await rollbackGitWorkspace(repoPath, 'execution failure');
|
||||
@@ -419,6 +440,7 @@ export async function runReportAgent(input: ActivityInput): Promise<AgentMetrics
|
||||
});
|
||||
throw error;
|
||||
}
|
||||
return runAgentActivity('report', input);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -485,3 +507,53 @@ export async function checkExploitationQueue(
|
||||
vulnType,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Log phase transition to the unified workflow log.
|
||||
* Called at phase boundaries for per-workflow logging.
|
||||
*/
|
||||
export async function logPhaseTransition(
|
||||
input: ActivityInput,
|
||||
phase: string,
|
||||
event: 'start' | 'complete'
|
||||
): Promise<void> {
|
||||
const { webUrl, repoPath, outputPath, workflowId } = input;
|
||||
|
||||
const sessionMetadata: SessionMetadata = {
|
||||
id: workflowId,
|
||||
webUrl,
|
||||
repoPath,
|
||||
...(outputPath && { outputPath }),
|
||||
};
|
||||
|
||||
const auditSession = new AuditSession(sessionMetadata);
|
||||
await auditSession.initialize();
|
||||
|
||||
if (event === 'start') {
|
||||
await auditSession.logPhaseStart(phase);
|
||||
} else {
|
||||
await auditSession.logPhaseComplete(phase);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Log workflow completion with full summary to the unified workflow log.
|
||||
* Called at the end of the workflow to write a summary breakdown.
|
||||
*/
|
||||
export async function logWorkflowComplete(
|
||||
input: ActivityInput,
|
||||
summary: WorkflowSummary
|
||||
): Promise<void> {
|
||||
const { webUrl, repoPath, outputPath, workflowId } = input;
|
||||
|
||||
const sessionMetadata: SessionMetadata = {
|
||||
id: workflowId,
|
||||
webUrl,
|
||||
repoPath,
|
||||
...(outputPath && { outputPath }),
|
||||
};
|
||||
|
||||
const auditSession = new AuditSession(sessionMetadata);
|
||||
await auditSession.initialize();
|
||||
await auditSession.logWorkflowComplete(summary);
|
||||
}
|
||||
|
||||
@@ -175,11 +175,11 @@ async function startPipeline(): Promise<void> {
|
||||
target_hash: hashTargetUrl(webUrl),
|
||||
workflow_id: workflowId,
|
||||
});
|
||||
|
||||
|
||||
if (!waitForCompletion) {
|
||||
console.log(chalk.bold('Monitor progress:'));
|
||||
console.log(chalk.white(' Web UI: ') + chalk.blue(`http://localhost:8233/namespaces/default/workflows/${workflowId}`));
|
||||
console.log(chalk.white(' Logs: ') + chalk.gray('./shannon logs'));
|
||||
console.log(chalk.white(' Logs: ') + chalk.gray(`./shannon logs ID=${workflowId}`));
|
||||
console.log(chalk.white(' Query: ') + chalk.gray(`./shannon query ID=${workflowId}`));
|
||||
console.log();
|
||||
return;
|
||||
|
||||
@@ -70,14 +70,14 @@ const TESTING_RETRY = {
|
||||
// Activity proxy with production retry configuration (default)
|
||||
const acts = proxyActivities<typeof activities>({
|
||||
startToCloseTimeout: '2 hours',
|
||||
heartbeatTimeout: '30 seconds',
|
||||
heartbeatTimeout: '10 minutes', // Long timeout for resource-constrained workers with many concurrent activities
|
||||
retry: PRODUCTION_RETRY,
|
||||
});
|
||||
|
||||
// Activity proxy with testing retry configuration (fast)
|
||||
const testActs = proxyActivities<typeof activities>({
|
||||
startToCloseTimeout: '10 minutes',
|
||||
heartbeatTimeout: '30 seconds',
|
||||
heartbeatTimeout: '5 minutes', // Shorter for testing but still tolerant of resource contention
|
||||
retry: TESTING_RETRY,
|
||||
});
|
||||
|
||||
@@ -145,15 +145,19 @@ export async function pentestPipelineWorkflow(
|
||||
// === Phase 1: Pre-Reconnaissance ===
|
||||
state.currentPhase = 'pre-recon';
|
||||
state.currentAgent = 'pre-recon';
|
||||
await a.logPhaseTransition(activityInput, 'pre-recon', 'start');
|
||||
state.agentMetrics['pre-recon'] =
|
||||
await a.runPreReconAgent(activityInput);
|
||||
state.completedAgents.push('pre-recon');
|
||||
await a.logPhaseTransition(activityInput, 'pre-recon', 'complete');
|
||||
|
||||
// === Phase 2: Reconnaissance ===
|
||||
state.currentPhase = 'recon';
|
||||
state.currentAgent = 'recon';
|
||||
await a.logPhaseTransition(activityInput, 'recon', 'start');
|
||||
state.agentMetrics['recon'] = await a.runReconAgent(activityInput);
|
||||
state.completedAgents.push('recon');
|
||||
await a.logPhaseTransition(activityInput, 'recon', 'complete');
|
||||
|
||||
// === Phases 3-4: Vulnerability Analysis + Exploitation (Pipelined) ===
|
||||
// Each vuln type runs as an independent pipeline:
|
||||
@@ -162,6 +166,7 @@ export async function pentestPipelineWorkflow(
|
||||
// starts immediately when its vuln agent finishes, not waiting for all.
|
||||
state.currentPhase = 'vulnerability-exploitation';
|
||||
state.currentAgent = 'pipelines';
|
||||
await a.logPhaseTransition(activityInput, 'vulnerability-exploitation', 'start');
|
||||
|
||||
// Helper: Run a single vuln→exploit pipeline
|
||||
async function runVulnExploitPipeline(
|
||||
@@ -261,10 +266,12 @@ export async function pentestPipelineWorkflow(
|
||||
// Update phase markers
|
||||
state.currentPhase = 'exploitation';
|
||||
state.currentAgent = null;
|
||||
await a.logPhaseTransition(activityInput, 'vulnerability-exploitation', 'complete');
|
||||
|
||||
// === Phase 5: Reporting ===
|
||||
state.currentPhase = 'reporting';
|
||||
state.currentAgent = 'report';
|
||||
await a.logPhaseTransition(activityInput, 'reporting', 'start');
|
||||
|
||||
// First, assemble the concatenated report from exploitation evidence files
|
||||
await a.assembleReportActivity(activityInput);
|
||||
@@ -282,18 +289,50 @@ export async function pentestPipelineWorkflow(
|
||||
};
|
||||
state.agentMetrics['report'] = await a.runReportAgent(reportInput);
|
||||
state.completedAgents.push('report');
|
||||
await a.logPhaseTransition(activityInput, 'reporting', 'complete');
|
||||
|
||||
// === Complete ===
|
||||
state.status = 'completed';
|
||||
state.currentPhase = null;
|
||||
state.currentAgent = null;
|
||||
state.summary = computeSummary(state);
|
||||
|
||||
// Log workflow completion summary
|
||||
await a.logWorkflowComplete(activityInput, {
|
||||
status: 'completed',
|
||||
totalDurationMs: state.summary.totalDurationMs,
|
||||
totalCostUsd: state.summary.totalCostUsd,
|
||||
completedAgents: state.completedAgents,
|
||||
agentMetrics: Object.fromEntries(
|
||||
Object.entries(state.agentMetrics).map(([name, m]) => [
|
||||
name,
|
||||
{ durationMs: m.durationMs, costUsd: m.costUsd },
|
||||
])
|
||||
),
|
||||
});
|
||||
|
||||
return state;
|
||||
} catch (error) {
|
||||
state.status = 'failed';
|
||||
state.failedAgent = state.currentAgent;
|
||||
state.error = error instanceof Error ? error.message : String(error);
|
||||
state.summary = computeSummary(state);
|
||||
|
||||
// Log workflow failure summary
|
||||
await a.logWorkflowComplete(activityInput, {
|
||||
status: 'failed',
|
||||
totalDurationMs: state.summary.totalDurationMs,
|
||||
totalCostUsd: state.summary.totalCostUsd,
|
||||
completedAgents: state.completedAgents,
|
||||
agentMetrics: Object.fromEntries(
|
||||
Object.entries(state.agentMetrics).map(([name, m]) => [
|
||||
name,
|
||||
{ durationMs: m.durationMs, costUsd: m.costUsd },
|
||||
])
|
||||
),
|
||||
error: state.error ?? undefined,
|
||||
});
|
||||
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user