Merge branch 'main' into feat/telemetry

2026-07-12 15:56:40 +02:00 · 2026-01-15 10:44:28 -08:00
parent 7dc8cfe5c7 78a0a61208
commit f3cb5dce8f
13 changed files with 2287 additions and 13 deletions
@@ -1,12 +1,12 @@
 services:
  temporal:
    image: temporalio/temporal:latest
-    command: ["server", "start-dev", "--db-filename", "/var/lib/temporal/temporal.db", "--ip", "0.0.0.0"]
+    command: ["server", "start-dev", "--db-filename", "/home/temporal/temporal.db", "--ip", "0.0.0.0"]
    ports:
      - "7233:7233"   # gRPC
      - "8233:8233"   # Web UI (built-in)
    volumes:
-      - temporal-data:/var/lib/temporal
+      - temporal-data:/home/temporal
    healthcheck:
      test: ["CMD", "temporal", "operator", "cluster", "health", "--address", "localhost:7233"]
      interval: 10s
@@ -27,6 +27,7 @@ services:
        condition: service_healthy
    volumes:
      - ./prompts:/app/prompts
+      - ./audit-logs:/app/audit-logs
      - ${TARGET_REPO:-.}:/target-repo
      - ${BENCHMARKS_BASE:-.}:/benchmarks
      - ${HOME}/.shannon:/tmp/.shannon
@@ -18,7 +18,7 @@ Shannon - AI Penetration Testing Framework

 Usage:
  ./shannon start URL=<url> REPO=<path>   Start a pentest workflow
-  ./shannon logs                          View real-time worker logs
+  ./shannon logs ID=<workflow-id>         Tail logs for a specific workflow
  ./shannon query ID=<workflow-id>        Query workflow progress
  ./shannon stop                          Stop all containers
  ./shannon help                          Show this help message
@@ -34,6 +34,7 @@ Options for 'stop':
 Examples:
  ./shannon start URL=https://example.com REPO=/path/to/repo
  ./shannon start URL=https://example.com REPO=/path/to/repo CONFIG=./config.yaml
+  ./shannon logs ID=example.com_shannon-1234567890
  ./shannon query ID=shannon-1234567890
  ./shannon stop CLEAN=true

@@ -139,7 +140,30 @@ cmd_start() {
 }

 cmd_logs() {
-  docker compose -f "$COMPOSE_FILE" logs -f worker "$@"
+  parse_args "$@"
+
+  if [ -z "$ID" ]; then
+    echo "ERROR: ID is required"
+    echo "Usage: ./shannon logs ID=<workflow-id>"
+    exit 1
+  fi
+
+  WORKFLOW_LOG="./audit-logs/${ID}/workflow.log"
+
+  if [ -f "$WORKFLOW_LOG" ]; then
+    echo "Tailing workflow log: $WORKFLOW_LOG"
+    tail -f "$WORKFLOW_LOG"
+  else
+    echo "ERROR: Workflow log not found: $WORKFLOW_LOG"
+    echo ""
+    echo "Possible causes:"
+    echo "  - Workflow hasn't started yet"
+    echo "  - Workflow ID is incorrect"
+    echo "  - Workflow is using a custom OUTPUT path"
+    echo ""
+    echo "Check: ./shannon query ID=$ID for workflow details"
+    exit 1
+  fi
 }

 cmd_query() {
@@ -248,6 +248,26 @@ export async function runClaudePrompt(
    apiErrorDetected = messageLoopResult.apiErrorDetected;
    totalCost = messageLoopResult.cost;

+    // === SPENDING CAP SAFEGUARD ===
+    // Defense-in-depth: Detect spending cap that slipped through detectApiError().
+    // When spending cap is hit, Claude returns a short message with $0 cost.
+    // Legitimate agent work NEVER costs $0 with only 1-2 turns.
+    if (turnCount <= 2 && totalCost === 0) {
+      const resultLower = (result || '').toLowerCase();
+      const BILLING_KEYWORDS = ['spending', 'cap', 'limit', 'budget', 'resets'];
+      const looksLikeBillingError = BILLING_KEYWORDS.some((kw) =>
+        resultLower.includes(kw)
+      );
+
+      if (looksLikeBillingError) {
+        throw new PentestError(
+          `Spending cap likely reached (turns=${turnCount}, cost=$0): ${result?.slice(0, 100)}`,
+          'billing',
+          true // Retryable - Temporal will use 5-30 min backoff
+        );
+      }
+    }
+
    const duration = timer.stop();
    timingResults.agents[execContext.agentKey] = duration;

@@ -55,7 +55,35 @@ export function detectApiError(content: string): ApiErrorDetection {

  const lowerContent = content.toLowerCase();

-  // Fatal error - should throw immediately
+  // === BILLING/SPENDING CAP ERRORS (Retryable with long backoff) ===
+  // When Claude Code hits its spending cap, it returns a short message like
+  // "Spending cap reached resets 8am" instead of throwing an error.
+  // These should retry with 5-30 min backoff so workflows can recover when cap resets.
+  const BILLING_PATTERNS = [
+    'spending cap',
+    'spending limit',
+    'cap reached',
+    'budget exceeded',
+    'usage limit',
+  ];
+
+  const isBillingError = BILLING_PATTERNS.some((pattern) =>
+    lowerContent.includes(pattern)
+  );
+
+  if (isBillingError) {
+    return {
+      detected: true,
+      shouldThrow: new PentestError(
+        `Billing limit reached: ${content.slice(0, 100)}`,
+        'billing',
+        true // RETRYABLE - Temporal will use 5-30 min backoff
+      ),
+    };
+  }
+
+  // === SESSION LIMIT (Non-retryable) ===
+  // Different from spending cap - usually means something is fundamentally wrong
  if (lowerContent.includes('session limit reached')) {
    return {
      detected: true,
@@ -12,6 +12,7 @@
 */

 import { AgentLogger } from './logger.js';
+import { WorkflowLogger, type AgentLogDetails, type WorkflowSummary } from './workflow-logger.js';
 import { MetricsTracker } from './metrics-tracker.js';
 import { initializeAuditStructure, type SessionMetadata } from './utils.js';
 import { formatTimestamp } from '../utils/formatting.js';
@@ -37,7 +38,9 @@ export class AuditSession {
  private sessionMetadata: SessionMetadata;
  private sessionId: string;
  private metricsTracker: MetricsTracker;
+  private workflowLogger: WorkflowLogger;
  private currentLogger: AgentLogger | null = null;
+  private currentAgentName: string | null = null;
  private initialized: boolean = false;

  constructor(sessionMetadata: SessionMetadata) {
@@ -54,6 +57,7 @@ export class AuditSession {

    // Components
    this.metricsTracker = new MetricsTracker(sessionMetadata);
+    this.workflowLogger = new WorkflowLogger(sessionMetadata);
  }

  /**
@@ -71,6 +75,9 @@ export class AuditSession {
    // Initialize metrics tracker (loads or creates session.json)
    await this.metricsTracker.initialize();

+    // Initialize workflow logger
+    await this.workflowLogger.initialize();
+
    this.initialized = true;
  }

@@ -98,6 +105,9 @@ export class AuditSession {
      await AgentLogger.savePrompt(this.sessionMetadata, agentName, promptContent);
    }

+    // Track current agent name for workflow logging
+    this.currentAgentName = agentName;
+
    // Create and initialize logger for this attempt
    this.currentLogger = new AgentLogger(this.sessionMetadata, agentName, attemptNumber);
    await this.currentLogger.initialize();
@@ -111,6 +121,9 @@ export class AuditSession {
      attemptNumber,
      timestamp: formatTimestamp(),
    });
+
+    // Log to unified workflow log
+    await this.workflowLogger.logAgent(agentName, 'start', { attemptNumber });
  }

  /**
@@ -121,7 +134,30 @@ export class AuditSession {
      throw new Error('No active logger. Call startAgent() first.');
    }

+    // Log to agent-specific log file (JSON format)
    await this.currentLogger.logEvent(eventType, eventData);
+
+    // Also log to unified workflow log (human-readable format)
+    const data = eventData as Record<string, unknown>;
+    const agentName = this.currentAgentName || 'unknown';
+    switch (eventType) {
+      case 'tool_start':
+        await this.workflowLogger.logToolStart(
+          agentName,
+          String(data.toolName || ''),
+          data.parameters
+        );
+        break;
+      case 'llm_response':
+        await this.workflowLogger.logLlmResponse(
+          agentName,
+          Number(data.turn || 0),
+          String(data.content || '')
+        );
+        break;
+      // tool_end and error events are intentionally not logged to workflow log
+      // to reduce noise - the agent completion message captures the outcome
+    }
  }

  /**
@@ -143,6 +179,19 @@ export class AuditSession {
      this.currentLogger = null;
    }

+    // Reset current agent name
+    this.currentAgentName = null;
+
+    // Log to unified workflow log
+    const agentLogDetails: AgentLogDetails = {
+      attemptNumber: result.attemptNumber,
+      duration_ms: result.duration_ms,
+      cost_usd: result.cost_usd,
+      success: result.success,
+      ...(result.error !== undefined && { error: result.error }),
+    };
+    await this.workflowLogger.logAgent(agentName, 'end', agentLogDetails);
+
    // Mutex-protected update to session.json
    const unlock = await sessionMutex.lock(this.sessionId);
    try {
@@ -178,4 +227,28 @@ export class AuditSession {
    await this.ensureInitialized();
    return this.metricsTracker.getMetrics();
  }
+
+  /**
+   * Log phase start to unified workflow log
+   */
+  async logPhaseStart(phase: string): Promise<void> {
+    await this.ensureInitialized();
+    await this.workflowLogger.logPhase(phase, 'start');
+  }
+
+  /**
+   * Log phase completion to unified workflow log
+   */
+  async logPhaseComplete(phase: string): Promise<void> {
+    await this.ensureInitialized();
+    await this.workflowLogger.logPhase(phase, 'complete');
+  }
+
+  /**
+   * Log workflow completion to unified workflow log
+   */
+  async logWorkflowComplete(summary: WorkflowSummary): Promise<void> {
+    await this.ensureInitialized();
+    await this.workflowLogger.logWorkflowComplete(summary);
+  }
 }
@@ -18,5 +18,6 @@

 export { AuditSession } from './audit-session.js';
 export { AgentLogger } from './logger.js';
+export { WorkflowLogger } from './workflow-logger.js';
 export { MetricsTracker } from './metrics-tracker.js';
 export * as AuditUtils from './utils.js';
@@ -85,6 +85,14 @@ export function generateSessionJsonPath(sessionMetadata: SessionMetadata): strin
  return path.join(auditPath, 'session.json');
 }

+/**
+ * Generate path to workflow.log file
+ */
+export function generateWorkflowLogPath(sessionMetadata: SessionMetadata): string {
+  const auditPath = generateAuditPath(sessionMetadata);
+  return path.join(auditPath, 'workflow.log');
+}
+
 /**
 * Ensure directory exists (idempotent, race-safe)
 */
@@ -0,0 +1,382 @@
+// Copyright (C) 2025 Keygraph, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Affero General Public License version 3
+// as published by the Free Software Foundation.
+
+/**
+ * Workflow Logger
+ *
+ * Provides a unified, human-readable log file per workflow.
+ * Optimized for `tail -f` viewing during concurrent workflow execution.
+ */
+
+import fs from 'fs';
+import path from 'path';
+import { generateWorkflowLogPath, ensureDirectory, type SessionMetadata } from './utils.js';
+import { formatDuration, formatTimestamp } from '../utils/formatting.js';
+
+export interface AgentLogDetails {
+  attemptNumber?: number;
+  duration_ms?: number;
+  cost_usd?: number;
+  success?: boolean;
+  error?: string;
+}
+
+export interface AgentMetricsSummary {
+  durationMs: number;
+  costUsd: number | null;
+}
+
+export interface WorkflowSummary {
+  status: 'completed' | 'failed';
+  totalDurationMs: number;
+  totalCostUsd: number;
+  completedAgents: string[];
+  agentMetrics: Record<string, AgentMetricsSummary>;
+  error?: string;
+}
+
+/**
+ * WorkflowLogger - Manages the unified workflow log file
+ */
+export class WorkflowLogger {
+  private sessionMetadata: SessionMetadata;
+  private logPath: string;
+  private stream: fs.WriteStream | null = null;
+  private initialized: boolean = false;
+
+  constructor(sessionMetadata: SessionMetadata) {
+    this.sessionMetadata = sessionMetadata;
+    this.logPath = generateWorkflowLogPath(sessionMetadata);
+  }
+
+  /**
+   * Initialize the log stream (creates file and writes header)
+   */
+  async initialize(): Promise<void> {
+    if (this.initialized) {
+      return;
+    }
+
+    // Ensure directory exists
+    await ensureDirectory(path.dirname(this.logPath));
+
+    // Create write stream with append mode
+    this.stream = fs.createWriteStream(this.logPath, {
+      flags: 'a',
+      encoding: 'utf8',
+      autoClose: true,
+    });
+
+    this.initialized = true;
+
+    // Write header only if file is new (empty)
+    const stats = await fs.promises.stat(this.logPath).catch(() => null);
+    if (!stats || stats.size === 0) {
+      await this.writeHeader();
+    }
+  }
+
+  /**
+   * Write header to log file
+   */
+  private async writeHeader(): Promise<void> {
+    const header = [
+      `================================================================================`,
+      `Shannon Pentest - Workflow Log`,
+      `================================================================================`,
+      `Workflow ID: ${this.sessionMetadata.id}`,
+      `Target URL:  ${this.sessionMetadata.webUrl}`,
+      `Started:     ${formatTimestamp()}`,
+      `================================================================================`,
+      ``,
+    ].join('\n');
+
+    return this.writeRaw(header);
+  }
+
+  /**
+   * Write raw text to log file with immediate flush
+   */
+  private writeRaw(text: string): Promise<void> {
+    return new Promise((resolve, reject) => {
+      if (!this.initialized || !this.stream) {
+        reject(new Error('WorkflowLogger not initialized'));
+        return;
+      }
+
+      const needsDrain = !this.stream.write(text, 'utf8', (error) => {
+        if (error) reject(error);
+      });
+
+      if (needsDrain) {
+        this.stream.once('drain', resolve);
+      } else {
+        resolve();
+      }
+    });
+  }
+
+  /**
+   * Format timestamp for log line (local time, human readable)
+   */
+  private formatLogTime(): string {
+    const now = new Date();
+    return now.toISOString().replace('T', ' ').slice(0, 19);
+  }
+
+  /**
+   * Log a phase transition event
+   */
+  async logPhase(phase: string, event: 'start' | 'complete'): Promise<void> {
+    await this.ensureInitialized();
+
+    const action = event === 'start' ? 'Starting' : 'Completed';
+    const line = `[${this.formatLogTime()}] [PHASE] ${action}: ${phase}\n`;
+
+    // Add blank line before phase start for readability
+    if (event === 'start') {
+      await this.writeRaw('\n');
+    }
+
+    await this.writeRaw(line);
+  }
+
+  /**
+   * Log an agent event
+   */
+  async logAgent(
+    agentName: string,
+    event: 'start' | 'end',
+    details?: AgentLogDetails
+  ): Promise<void> {
+    await this.ensureInitialized();
+
+    let message: string;
+
+    if (event === 'start') {
+      const attempt = details?.attemptNumber ?? 1;
+      message = `${agentName}: Starting (attempt ${attempt})`;
+    } else {
+      const parts: string[] = [agentName + ':'];
+
+      if (details?.success === false) {
+        parts.push('Failed');
+        if (details?.error) {
+          parts.push(`- ${details.error}`);
+        }
+      } else {
+        parts.push('Completed');
+      }
+
+      if (details?.duration_ms !== undefined) {
+        parts.push(`(${formatDuration(details.duration_ms)}`);
+        if (details?.cost_usd !== undefined) {
+          parts.push(`$${details.cost_usd.toFixed(2)})`);
+        } else {
+          parts.push(')');
+        }
+      }
+
+      message = parts.join(' ');
+    }
+
+    const line = `[${this.formatLogTime()}] [AGENT] ${message}\n`;
+    await this.writeRaw(line);
+  }
+
+  /**
+   * Log a general event
+   */
+  async logEvent(eventType: string, message: string): Promise<void> {
+    await this.ensureInitialized();
+
+    const line = `[${this.formatLogTime()}] [${eventType.toUpperCase()}] ${message}\n`;
+    await this.writeRaw(line);
+  }
+
+  /**
+   * Log an error
+   */
+  async logError(error: Error, context?: string): Promise<void> {
+    await this.ensureInitialized();
+
+    const contextStr = context ? ` (${context})` : '';
+    const line = `[${this.formatLogTime()}] [ERROR] ${error.message}${contextStr}\n`;
+    await this.writeRaw(line);
+  }
+
+  /**
+   * Truncate string to max length with ellipsis
+   */
+  private truncate(str: string, maxLen: number): string {
+    if (str.length <= maxLen) return str;
+    return str.slice(0, maxLen - 3) + '...';
+  }
+
+  /**
+   * Format tool parameters for human-readable display
+   */
+  private formatToolParams(toolName: string, params: unknown): string {
+    if (!params || typeof params !== 'object') {
+      return '';
+    }
+
+    const p = params as Record<string, unknown>;
+
+    // Tool-specific formatting for common tools
+    switch (toolName) {
+      case 'Bash':
+        if (p.command) {
+          return this.truncate(String(p.command).replace(/\n/g, ' '), 100);
+        }
+        break;
+      case 'Read':
+        if (p.file_path) {
+          return String(p.file_path);
+        }
+        break;
+      case 'Write':
+        if (p.file_path) {
+          return String(p.file_path);
+        }
+        break;
+      case 'Edit':
+        if (p.file_path) {
+          return String(p.file_path);
+        }
+        break;
+      case 'Glob':
+        if (p.pattern) {
+          return String(p.pattern);
+        }
+        break;
+      case 'Grep':
+        if (p.pattern) {
+          const path = p.path ? ` in ${p.path}` : '';
+          return `"${this.truncate(String(p.pattern), 50)}"${path}`;
+        }
+        break;
+      case 'WebFetch':
+        if (p.url) {
+          return String(p.url);
+        }
+        break;
+      case 'mcp__playwright__browser_navigate':
+        if (p.url) {
+          return String(p.url);
+        }
+        break;
+      case 'mcp__playwright__browser_click':
+        if (p.selector) {
+          return this.truncate(String(p.selector), 60);
+        }
+        break;
+      case 'mcp__playwright__browser_type':
+        if (p.selector) {
+          const text = p.text ? `: "${this.truncate(String(p.text), 30)}"` : '';
+          return `${this.truncate(String(p.selector), 40)}${text}`;
+        }
+        break;
+    }
+
+    // Default: show first string-valued param truncated
+    for (const [key, val] of Object.entries(p)) {
+      if (typeof val === 'string' && val.length > 0) {
+        return `${key}=${this.truncate(val, 60)}`;
+      }
+    }
+
+    return '';
+  }
+
+  /**
+   * Log tool start event
+   */
+  async logToolStart(agentName: string, toolName: string, parameters: unknown): Promise<void> {
+    await this.ensureInitialized();
+
+    const params = this.formatToolParams(toolName, parameters);
+    const paramStr = params ? `: ${params}` : '';
+    const line = `[${this.formatLogTime()}] [${agentName}] [TOOL] ${toolName}${paramStr}\n`;
+    await this.writeRaw(line);
+  }
+
+  /**
+   * Log LLM response
+   */
+  async logLlmResponse(agentName: string, turn: number, content: string): Promise<void> {
+    await this.ensureInitialized();
+
+    // Show full content, replacing newlines with escaped version for single-line output
+    const escaped = content.replace(/\n/g, '\\n');
+    const line = `[${this.formatLogTime()}] [${agentName}] [LLM] Turn ${turn}: ${escaped}\n`;
+    await this.writeRaw(line);
+  }
+
+  /**
+   * Log workflow completion with full summary
+   */
+  async logWorkflowComplete(summary: WorkflowSummary): Promise<void> {
+    await this.ensureInitialized();
+
+    const status = summary.status === 'completed' ? 'COMPLETED' : 'FAILED';
+
+    await this.writeRaw('\n');
+    await this.writeRaw(`================================================================================\n`);
+    await this.writeRaw(`Workflow ${status}\n`);
+    await this.writeRaw(`────────────────────────────────────────\n`);
+    await this.writeRaw(`Workflow ID: ${this.sessionMetadata.id}\n`);
+    await this.writeRaw(`Status:      ${summary.status}\n`);
+    await this.writeRaw(`Duration:    ${formatDuration(summary.totalDurationMs)}\n`);
+    await this.writeRaw(`Total Cost:  $${summary.totalCostUsd.toFixed(4)}\n`);
+    await this.writeRaw(`Agents:      ${summary.completedAgents.length} completed\n`);
+
+    if (summary.error) {
+      await this.writeRaw(`Error:       ${summary.error}\n`);
+    }
+
+    await this.writeRaw(`\n`);
+    await this.writeRaw(`Agent Breakdown:\n`);
+
+    for (const agentName of summary.completedAgents) {
+      const metrics = summary.agentMetrics[agentName];
+      if (metrics) {
+        const duration = formatDuration(metrics.durationMs);
+        const cost = metrics.costUsd !== null ? `$${metrics.costUsd.toFixed(4)}` : 'N/A';
+        await this.writeRaw(`  - ${agentName} (${duration}, ${cost})\n`);
+      } else {
+        await this.writeRaw(`  - ${agentName}\n`);
+      }
+    }
+
+    await this.writeRaw(`================================================================================\n`);
+  }
+
+  /**
+   * Ensure initialized (helper for lazy initialization)
+   */
+  private async ensureInitialized(): Promise<void> {
+    if (!this.initialized) {
+      await this.initialize();
+    }
+  }
+
+  /**
+   * Close the log stream
+   */
+  async close(): Promise<void> {
+    if (!this.initialized || !this.stream) {
+      return;
+    }
+
+    return new Promise((resolve) => {
+      this.stream!.end(() => {
+        this.initialized = false;
+        resolve();
+      });
+    });
+  }
+}
@@ -210,7 +210,7 @@ export function classifyErrorForTemporal(error: unknown): TemporalErrorClassific

  // === BILLING ERRORS (Retryable with long backoff) ===
  // Anthropic returns billing as 400 invalid_request_error
-  // Human can add credits, so retry with 5-30 min backoff
+  // Human can add credits OR wait for spending cap to reset (5-30 min backoff)
  if (
    message.includes('billing_error') ||
    message.includes('credit balance is too low') ||
@@ -221,7 +221,13 @@ export function classifyErrorForTemporal(error: unknown): TemporalErrorClassific
    message.includes('usage limit reached') ||
    message.includes('quota exceeded') ||
    message.includes('daily rate limit') ||
-    message.includes('limit will reset')
+    message.includes('limit will reset') ||
+    // Claude Code spending cap patterns (returns short message instead of error)
+    message.includes('spending cap') ||
+    message.includes('spending limit') ||
+    message.includes('cap reached') ||
+    message.includes('budget exceeded') ||
+    message.includes('billing limit reached')
  ) {
    return { type: 'BillingError', retryable: true };
  }
@@ -71,12 +71,13 @@ import { assembleFinalReport } from '../phases/reporting.js';
 import { getPromptNameForAgent } from '../types/agents.js';
 import { AuditSession } from '../audit/index.js';
 import { telemetry, TelemetryEvent, hashTargetUrl } from '../telemetry/index.js';
+import type { WorkflowSummary } from '../audit/workflow-logger.js';
 import type { AgentName } from '../types/agents.js';
 import type { AgentMetrics } from './shared.js';
 import type { DistributedConfig } from '../types/config.js';
 import type { SessionMetadata } from '../audit/utils.js';

-const HEARTBEAT_INTERVAL_MS = 2000; // Must be < heartbeatTimeout (30s)
+const HEARTBEAT_INTERVAL_MS = 2000; // Must be < heartbeatTimeout (10min production, 5min testing)

 /**
 * Input for all agent activities.
@@ -200,6 +201,26 @@ async function runAgentActivity(
      attemptNumber
    );

+    // 6.5. Sanity check: Detect spending cap that slipped through all detection layers
+    // Defense-in-depth: A successful agent execution should never have ≤2 turns with $0 cost
+    if (result.success && (result.turns ?? 0) <= 2 && (result.cost || 0) === 0) {
+      const resultText = result.result || '';
+      const looksLikeBillingError = /spending|cap|limit|budget|resets/i.test(resultText);
+
+      if (looksLikeBillingError) {
+        await rollbackGitWorkspace(repoPath, 'spending cap detected');
+        await auditSession.endAgent(agentName, {
+          attemptNumber,
+          duration_ms: result.duration,
+          cost_usd: 0,
+          success: false,
+          error: `Spending cap likely reached: ${resultText.slice(0, 100)}`,
+        });
+        // Throw as billing error so Temporal retries with long backoff
+        throw new Error(`Spending cap likely reached: ${resultText.slice(0, 100)}`);
+      }
+    }
+
    // 7. Handle execution failure
    if (!result.success) {
      await rollbackGitWorkspace(repoPath, 'execution failure');
@@ -419,6 +440,7 @@ export async function runReportAgent(input: ActivityInput): Promise<AgentMetrics
    });
    throw error;
  }
+  return runAgentActivity('report', input);
 }

 /**
@@ -485,3 +507,53 @@ export async function checkExploitationQueue(
    vulnType,
  };
 }
+
+/**
+ * Log phase transition to the unified workflow log.
+ * Called at phase boundaries for per-workflow logging.
+ */
+export async function logPhaseTransition(
+  input: ActivityInput,
+  phase: string,
+  event: 'start' | 'complete'
+): Promise<void> {
+  const { webUrl, repoPath, outputPath, workflowId } = input;
+
+  const sessionMetadata: SessionMetadata = {
+    id: workflowId,
+    webUrl,
+    repoPath,
+    ...(outputPath && { outputPath }),
+  };
+
+  const auditSession = new AuditSession(sessionMetadata);
+  await auditSession.initialize();
+
+  if (event === 'start') {
+    await auditSession.logPhaseStart(phase);
+  } else {
+    await auditSession.logPhaseComplete(phase);
+  }
+}
+
+/**
+ * Log workflow completion with full summary to the unified workflow log.
+ * Called at the end of the workflow to write a summary breakdown.
+ */
+export async function logWorkflowComplete(
+  input: ActivityInput,
+  summary: WorkflowSummary
+): Promise<void> {
+  const { webUrl, repoPath, outputPath, workflowId } = input;
+
+  const sessionMetadata: SessionMetadata = {
+    id: workflowId,
+    webUrl,
+    repoPath,
+    ...(outputPath && { outputPath }),
+  };
+
+  const auditSession = new AuditSession(sessionMetadata);
+  await auditSession.initialize();
+  await auditSession.logWorkflowComplete(summary);
+}
@@ -175,11 +175,11 @@ async function startPipeline(): Promise<void> {
      target_hash: hashTargetUrl(webUrl),
      workflow_id: workflowId,
    });
-
+    
    if (!waitForCompletion) {
      console.log(chalk.bold('Monitor progress:'));
      console.log(chalk.white('  Web UI:  ') + chalk.blue(`http://localhost:8233/namespaces/default/workflows/${workflowId}`));
-      console.log(chalk.white('  Logs:    ') + chalk.gray('./shannon logs'));
+      console.log(chalk.white('  Logs:    ') + chalk.gray(`./shannon logs ID=${workflowId}`));
      console.log(chalk.white('  Query:   ') + chalk.gray(`./shannon query ID=${workflowId}`));
      console.log();
      return;
@@ -70,14 +70,14 @@ const TESTING_RETRY = {
 // Activity proxy with production retry configuration (default)
 const acts = proxyActivities<typeof activities>({
  startToCloseTimeout: '2 hours',
-  heartbeatTimeout: '30 seconds',
+  heartbeatTimeout: '10 minutes', // Long timeout for resource-constrained workers with many concurrent activities
  retry: PRODUCTION_RETRY,
 });

 // Activity proxy with testing retry configuration (fast)
 const testActs = proxyActivities<typeof activities>({
  startToCloseTimeout: '10 minutes',
-  heartbeatTimeout: '30 seconds',
+  heartbeatTimeout: '5 minutes', // Shorter for testing but still tolerant of resource contention
  retry: TESTING_RETRY,
 });

@@ -145,15 +145,19 @@ export async function pentestPipelineWorkflow(
    // === Phase 1: Pre-Reconnaissance ===
    state.currentPhase = 'pre-recon';
    state.currentAgent = 'pre-recon';
+    await a.logPhaseTransition(activityInput, 'pre-recon', 'start');
    state.agentMetrics['pre-recon'] =
      await a.runPreReconAgent(activityInput);
    state.completedAgents.push('pre-recon');
+    await a.logPhaseTransition(activityInput, 'pre-recon', 'complete');

    // === Phase 2: Reconnaissance ===
    state.currentPhase = 'recon';
    state.currentAgent = 'recon';
+    await a.logPhaseTransition(activityInput, 'recon', 'start');
    state.agentMetrics['recon'] = await a.runReconAgent(activityInput);
    state.completedAgents.push('recon');
+    await a.logPhaseTransition(activityInput, 'recon', 'complete');

    // === Phases 3-4: Vulnerability Analysis + Exploitation (Pipelined) ===
    // Each vuln type runs as an independent pipeline:
@@ -162,6 +166,7 @@ export async function pentestPipelineWorkflow(
    // starts immediately when its vuln agent finishes, not waiting for all.
    state.currentPhase = 'vulnerability-exploitation';
    state.currentAgent = 'pipelines';
+    await a.logPhaseTransition(activityInput, 'vulnerability-exploitation', 'start');

    // Helper: Run a single vuln→exploit pipeline
    async function runVulnExploitPipeline(
@@ -261,10 +266,12 @@ export async function pentestPipelineWorkflow(
    // Update phase markers
    state.currentPhase = 'exploitation';
    state.currentAgent = null;
+    await a.logPhaseTransition(activityInput, 'vulnerability-exploitation', 'complete');

    // === Phase 5: Reporting ===
    state.currentPhase = 'reporting';
    state.currentAgent = 'report';
+    await a.logPhaseTransition(activityInput, 'reporting', 'start');

    // First, assemble the concatenated report from exploitation evidence files
    await a.assembleReportActivity(activityInput);
@@ -282,18 +289,50 @@ export async function pentestPipelineWorkflow(
    };
    state.agentMetrics['report'] = await a.runReportAgent(reportInput);
    state.completedAgents.push('report');
+    await a.logPhaseTransition(activityInput, 'reporting', 'complete');

    // === Complete ===
    state.status = 'completed';
    state.currentPhase = null;
    state.currentAgent = null;
    state.summary = computeSummary(state);
+
+    // Log workflow completion summary
+    await a.logWorkflowComplete(activityInput, {
+      status: 'completed',
+      totalDurationMs: state.summary.totalDurationMs,
+      totalCostUsd: state.summary.totalCostUsd,
+      completedAgents: state.completedAgents,
+      agentMetrics: Object.fromEntries(
+        Object.entries(state.agentMetrics).map(([name, m]) => [
+          name,
+          { durationMs: m.durationMs, costUsd: m.costUsd },
+        ])
+      ),
+    });
+
    return state;
  } catch (error) {
    state.status = 'failed';
    state.failedAgent = state.currentAgent;
    state.error = error instanceof Error ? error.message : String(error);
    state.summary = computeSummary(state);
+
+    // Log workflow failure summary
+    await a.logWorkflowComplete(activityInput, {
+      status: 'failed',
+      totalDurationMs: state.summary.totalDurationMs,
+      totalCostUsd: state.summary.totalCostUsd,
+      completedAgents: state.completedAgents,
+      agentMetrics: Object.fromEntries(
+        Object.entries(state.agentMetrics).map(([name, m]) => [
+          name,
+          { durationMs: m.durationMs, costUsd: m.costUsd },
+        ])
+      ),
+      error: state.error ?? undefined,
+    });
+
    throw error;
  }
 }