refactor: add numbered step comments to 20 complex sequential functions

- Add // N. Description steps to temporal layer (client, activities, workflows) - Add steps to AI layer (claude-executor: runClaudePrompt, buildMcpServers) - Add steps to services layer (prompt-manager, config-parser, git-manager) - Add steps to audit layer (metrics-tracker, audit-session) - Update CLAUDE.md comment guidelines with clearer numbered-step vs section-divider guidance
2026-07-02 03:25:49 +02:00 · 2026-02-16 20:45:58 -08:00
parent d696a7584b
commit a960ad1182
10 changed files with 95 additions and 44 deletions
@@ -129,8 +129,13 @@ Comments must be **timeless** — no references to this conversation, refactorin

 **Patterns used in this codebase:**
 - `/** JSDoc */` — file headers (after license) and exported functions/interfaces
- `// 1. Step` — numbered steps for sequential operations
- `// === Section ===` — dividers in long files/functions
+- `// N. Description` — numbered sequential steps inside function bodies. Use when a
+  function has 3+ distinct phases where at least one isn't immediately obvious from the
+  code. Each step marks the start of a logical phase. Reference: `AgentExecutionService.execute`
+  (steps 1-9) and `injectModelIntoReport` (steps 1-5)
+- `// === Section ===` — high-level dividers between groups of functions in long files,
+  or to label major branching/classification blocks (e.g., `// === SPENDING CAP SAFEGUARD ===`).
+  Not for sequential steps inside function bodies — use numbered steps for that
 - `// NOTE:` / `// WARNING:` / `// IMPORTANT:` — gotchas and constraints

 **Never:** obvious comments, conversation references ("as discussed"), history ("moved from X")
@@ -60,12 +60,14 @@ function buildMcpServers(
  agentName: string | null,
  logger: ActivityLogger
 ): Record<string, McpServer> {
+  // 1. Create the shannon-helper server (always present)
  const shannonHelperServer = createShannonHelperServer(sourceDir);

  const mcpServers: Record<string, McpServer> = {
    'shannon-helper': shannonHelperServer,
  };

+  // 2. Look up the agent's Playwright MCP mapping
  if (agentName) {
    const promptTemplate = AGENTS[agentName as AgentName].promptTemplate;
    const playwrightMcpName = MCP_AGENT_MAPPING[promptTemplate as keyof typeof MCP_AGENT_MAPPING] || null;
@@ -75,7 +77,7 @@ function buildMcpServers(

      const userDataDir = `/tmp/${playwrightMcpName}`;

-      // Docker uses system Chromium; local dev uses Playwright's bundled browsers
+      // 3. Configure Playwright MCP args with Docker/local browser handling
      const isDocker = process.env.SHANNON_DOCKER === 'true';

      const mcpArgs: string[] = [
@@ -84,7 +86,6 @@ function buildMcpServers(
        '--user-data-dir', userDataDir,
      ];

-      // Docker: Use system Chromium; Local: Use Playwright's bundled browsers
      if (isDocker) {
        mcpArgs.push('--executable-path', '/usr/bin/chromium-browser');
        mcpArgs.push('--browser', 'chromium');
@@ -107,6 +108,7 @@ function buildMcpServers(
    }
  }

+  // 4. Return configured servers
  return mcpServers;
 }

@@ -202,9 +204,11 @@ export async function runClaudePrompt(
  auditSession: AuditSession | null = null,
  logger: ActivityLogger
 ): Promise<ClaudePromptResult> {
+  // 1. Initialize timing and prompt
  const timer = new Timer(`agent-${description.toLowerCase().replace(/\s+/g, '-')}`);
  const fullPrompt = context ? `${context}\n\n${prompt}` : prompt;

+  // 2. Set up progress and audit infrastructure
  const execContext = detectExecutionContext(description);
  const progress = createProgressManager(
    { description, useCleanOutput: execContext.useCleanOutput },
@@ -214,9 +218,10 @@ export async function runClaudePrompt(

  logger.info(`Running Claude Code: ${description}...`);

+  // 3. Configure MCP servers
  const mcpServers = buildMcpServers(sourceDir, agentName, logger);

-  // Build env vars to pass to SDK subprocesses
+  // 4. Build env vars to pass to SDK subprocesses
  const sdkEnv: Record<string, string> = {
    CLAUDE_CODE_MAX_OUTPUT_TOKENS: process.env.CLAUDE_CODE_MAX_OUTPUT_TOKENS || '64000',
  };
@@ -227,6 +232,7 @@ export async function runClaudePrompt(
    sdkEnv.CLAUDE_CODE_OAUTH_TOKEN = process.env.CLAUDE_CODE_OAUTH_TOKEN;
  }

+  // 5. Configure SDK options
  const options = {
    model: 'claude-sonnet-4-5-20250929',
    maxTurns: 10_000,
@@ -249,6 +255,7 @@ export async function runClaudePrompt(
  progress.start();

  try {
+    // 6. Process the message stream
    const messageLoopResult = await processMessageStream(
      fullPrompt,
      options,
@@ -263,7 +270,7 @@ export async function runClaudePrompt(
    const model = messageLoopResult.model;

    // === SPENDING CAP SAFEGUARD ===
-    // Defense-in-depth: Detect spending cap that slipped through detectApiError().
+    // 7. Defense-in-depth: Detect spending cap that slipped through detectApiError().
    // Uses consolidated billing detection from utils/billing-detection.ts
    if (isSpendingCapBehavior(turnCount, totalCost, result || '')) {
      throw new PentestError(
@@ -273,6 +280,7 @@ export async function runClaudePrompt(
      );
    }

+    // 8. Finalize successful result
    const duration = timer.stop();

    if (apiErrorDetected) {
@@ -293,6 +301,7 @@ export async function runClaudePrompt(
    };

  } catch (error) {
+    // 9. Handle errors — log, write error file, return failure
    const duration = timer.stop();

    const err = error as Error & { code?: string; status?: number };
@@ -107,18 +107,20 @@ export class AuditSession {
  ): Promise<void> {
    await this.ensureInitialized();

-    // Save prompt snapshot (only on first attempt)
+    // 1. Save prompt snapshot (only on first attempt)
    if (attemptNumber === 1) {
      await AgentLogger.savePrompt(this.sessionMetadata, agentName, promptContent);
    }

+    // 2. Create and initialize the per-agent logger
    this.currentAgentName = agentName;
-
    this.currentLogger = new AgentLogger(this.sessionMetadata, agentName, attemptNumber);
    await this.currentLogger.initialize();

+    // 3. Start metrics timer
    this.metricsTracker.startAgent(agentName, attemptNumber);

+    // 4. Log start event to both agent log and workflow log
    await this.currentLogger.logEvent('agent_start', {
      agentName,
      attemptNumber,
@@ -172,6 +174,7 @@ export class AuditSession {
   * End agent execution (mutex-protected)
   */
  async endAgent(agentName: string, result: AgentEndResult): Promise<void> {
+    // 1. Finalize agent log and close the stream
    if (this.currentLogger) {
      await this.currentLogger.logEvent('agent_end', {
        agentName,
@@ -185,6 +188,7 @@ export class AuditSession {
      this.currentLogger = null;
    }

+    // 2. Log completion to the unified workflow log
    this.currentAgentName = null;

    const agentLogDetails: AgentLogDetails = {
@@ -196,12 +200,11 @@ export class AuditSession {
    };
    await this.workflowLogger.logAgent(agentName, 'end', agentLogDetails);

-    // Mutex-protected update to session.json
+    // 3. Acquire mutex before touching session.json
    const unlock = await sessionMutex.lock(this.sessionId);
    try {
-      // Reload inside mutex to prevent lost updates during parallel exploitation phase
+      // 4. Reload-then-write inside mutex to prevent lost updates during parallel phases
      await this.metricsTracker.reload();
-
      await this.metricsTracker.endAgent(agentName, result);
    } finally {
      unlock();
@@ -170,7 +170,7 @@ export class MetricsTracker {
      );
    }

-    // Initialize agent metrics if not exists
+    // 1. Initialize agent metrics if first time seeing this agent
    const existingAgent = this.data.metrics.agents[agentName];
    const agent = existingAgent ?? {
      status: 'in-progress' as const,
@@ -180,7 +180,7 @@ export class MetricsTracker {
    };
    this.data.metrics.agents[agentName] = agent;

-    // Add attempt to array
+    // 2. Build attempt record with optional model/error fields
    const attempt: AttemptData = {
      attempt_number: result.attemptNumber,
      duration_ms: result.duration_ms,
@@ -197,16 +197,18 @@ export class MetricsTracker {
      attempt.error = result.error;
    }

+    // 3. Append attempt to history
    agent.attempts.push(attempt);

-    // Update total cost (includes failed attempts)
+    // 4. Recalculate total cost across all attempts (includes failures)
    agent.total_cost_usd = agent.attempts.reduce((sum, a) => sum + a.cost_usd, 0);

-    // If successful, update final metrics and status
+    // 5. Update agent status based on outcome
    if (result.success) {
      agent.status = 'success';
      agent.final_duration_ms = result.duration_ms;

+      // 6. Attach model and checkpoint metadata on success
      if (result.model) {
        agent.model = result.model;
      }
@@ -215,18 +217,18 @@ export class MetricsTracker {
        agent.checkpoint = result.checkpoint;
      }
    } else {
-      // If this was the last attempt, mark as failed
      if (result.isFinalAttempt) {
        agent.status = 'failed';
      }
    }

-    // Clear active timer
+    // 7. Clear active timer
    this.activeTimers.delete(agentName);

-    // Recalculate aggregations
+    // 8. Recalculate phase and session-level aggregations
    this.recalculateAggregations();

+    // 9. Persist to session.json
    await this.save();
  }

@@ -178,6 +178,7 @@ function formatAjvErrors(errors: ErrorObject[]): string[] {

 export const parseConfig = async (configPath: string): Promise<Config> => {
  try {
+    // 1. Verify file exists
    if (!(await fs.pathExists(configPath))) {
      throw new PentestError(
        `Configuration file not found: ${configPath}`,
@@ -188,6 +189,7 @@ export const parseConfig = async (configPath: string): Promise<Config> => {
      );
    }

+    // 2. Check file size
    const stats = await fs.stat(configPath);
    const maxFileSize = 1024 * 1024; // 1MB
    if (stats.size > maxFileSize) {
@@ -200,6 +202,7 @@ export const parseConfig = async (configPath: string): Promise<Config> => {
      );
    }

+    // 3. Read and check for empty content
    const configContent = await fs.readFile(configPath, 'utf8');

    if (!configContent.trim()) {
@@ -212,6 +215,7 @@ export const parseConfig = async (configPath: string): Promise<Config> => {
      );
    }

+    // 4. Parse YAML with safe schema
    let config: unknown;
    try {
      config = yaml.load(configContent, {
@@ -230,6 +234,7 @@ export const parseConfig = async (configPath: string): Promise<Config> => {
      );
    }

+    // 5. Guard against null/undefined parse result
    if (config === null || config === undefined) {
      throw new PentestError(
        'Configuration file resulted in null/undefined after parsing',
@@ -240,6 +245,7 @@ export const parseConfig = async (configPath: string): Promise<Config> => {
      );
    }

+    // 6. Validate schema, security rules, and return
    validateConfig(config as Config);

    return config as Config;
@@ -231,7 +231,7 @@ export async function createGitCheckpoint(

  logger.info(`Creating checkpoint for ${description} (attempt ${attempt})`);
  try {
-    // First attempt: preserve existing deliverables. Retries: clean workspace to prevent pollution
+    // 1. On retries, clean workspace to prevent pollution from previous attempt
    if (attempt > 1) {
      const cleanResult = await rollbackGitWorkspace(sourceDir, `${description} (retry cleanup)`, logger);
      if (!cleanResult.success) {
@@ -239,9 +239,11 @@ export async function createGitCheckpoint(
      }
    }

+    // 2. Detect existing changes
    const changes = await getChangedFiles(sourceDir, 'status check');
    const hasChanges = changes.length > 0;

+    // 3. Stage and commit checkpoint
    await executeGitCommandWithRetry(['git', 'add', '-A'], sourceDir, 'staging changes');
    await executeGitCommandWithRetry(
      ['git', 'commit', '-m', `📍 Checkpoint: ${description} (attempt ${attempt})`, '--allow-empty'],
@@ -249,6 +251,7 @@ export async function createGitCheckpoint(
      'creating commit'
    );

+    // 4. Log result
    if (hasChanges) {
      logger.info('Checkpoint created with uncommitted changes staged');
    } else {
@@ -24,7 +24,7 @@ interface IncludeReplacement {
 // Pure function: Build complete login instructions from config
 async function buildLoginInstructions(authentication: Authentication, logger: ActivityLogger): Promise<string> {
  try {
-    // Load the login instructions template
+    // 1. Load the login instructions template
    const loginInstructionsPath = path.join(import.meta.dirname, '..', '..', 'prompts', 'shared', 'login-instructions.txt');

    if (!await fs.pathExists(loginInstructionsPath)) {
@@ -38,37 +38,33 @@ async function buildLoginInstructions(authentication: Authentication, logger: Ac

    const fullTemplate = await fs.readFile(loginInstructionsPath, 'utf8');

-    // Helper function to extract sections based on markers
    const getSection = (content: string, sectionName: string): string => {
      const regex = new RegExp(`<!-- BEGIN:${sectionName} -->([\\s\\S]*?)<!-- END:${sectionName} -->`, 'g');
      const match = regex.exec(content);
      return match ? match[1]!.trim() : '';
    };

-    // Extract sections based on login type
+    // 2. Extract sections based on login type
    const loginType = authentication.login_type?.toUpperCase();
    let loginInstructions = '';

-    // Build instructions with only relevant sections
    const commonSection = getSection(fullTemplate, 'COMMON');
    const authSection = loginType ? getSection(fullTemplate, loginType) : ''; // FORM or SSO
    const verificationSection = getSection(fullTemplate, 'VERIFICATION');

-    // Fallback to full template if markers are missing (backward compatibility)
+    // 3. Assemble instructions from sections (fallback to full template if markers missing)
    if (!commonSection && !authSection && !verificationSection) {
      logger.warn('Section markers not found, using full login instructions template');
      loginInstructions = fullTemplate;
    } else {
-      // Combine relevant sections
      loginInstructions = [commonSection, authSection, verificationSection]
-        .filter(section => section) // Remove empty sections
+        .filter(section => section)
        .join('\n\n');
    }

-    // Replace the user instructions placeholder with the login flow from config
+    // 4. Interpolate login flow and credential placeholders
    let userInstructions = (authentication.login_flow ?? []).join('\n');

-    // Replace credential placeholders within the user instructions
    if (authentication.credentials) {
      if (authentication.credentials.username) {
        userInstructions = userInstructions.replace(/\$username/g, authentication.credentials.username);
@@ -83,7 +79,7 @@ async function buildLoginInstructions(authentication: Authentication, logger: Ac

    loginInstructions = loginInstructions.replace(/{{user_instructions}}/g, userInstructions);

-    // Replace TOTP secret placeholder if present in template
+    // 5. Replace TOTP secret placeholder if present in template
    if (authentication.credentials?.totp_secret) {
      loginInstructions = loginInstructions.replace(/{{totp_secret}}/g, authentication.credentials.totp_secret);
    }
@@ -217,17 +213,15 @@ export async function loadPrompt(
  logger: ActivityLogger
 ): Promise<string> {
  try {
-    // Use pipeline testing prompts if pipeline testing mode is enabled
+    // 1. Resolve prompt file path
    const baseDir = pipelineTestingMode ? 'prompts/pipeline-testing' : 'prompts';
    const promptsDir = path.join(import.meta.dirname, '..', '..', baseDir);
    const promptPath = path.join(promptsDir, `${promptName}.txt`);

-    // Debug message for pipeline testing mode
    if (pipelineTestingMode) {
      logger.info(`Using pipeline testing prompt: ${promptPath}`);
    }

-    // Check if file exists first
    if (!await fs.pathExists(promptPath)) {
      throw new PentestError(
        `Prompt file not found: ${promptPath}`,
@@ -237,25 +231,25 @@ export async function loadPrompt(
      );
    }

-    // Add MCP server assignment to variables
+    // 2. Assign MCP server based on agent name
    const enhancedVariables: PromptVariables = { ...variables };

-    // Assign MCP server based on prompt name (agent name)
    const mcpServer = MCP_AGENT_MAPPING[promptName as keyof typeof MCP_AGENT_MAPPING];
    if (mcpServer) {
      enhancedVariables.MCP_SERVER = mcpServer;
      logger.info(`Assigned ${promptName} -> ${enhancedVariables.MCP_SERVER}`);
    } else {
-      // Fallback for unknown agents
      enhancedVariables.MCP_SERVER = 'playwright-agent1';
      logger.warn(`Unknown agent ${promptName}, using fallback -> ${enhancedVariables.MCP_SERVER}`);
    }

+    // 3. Read template file
    let template = await fs.readFile(promptPath, 'utf8');

-    // Pre-process the template to handle @include directives
+    // 4. Process @include directives
    template = await processIncludes(template, promptsDir);

+    // 5. Interpolate variables and return final prompt
    return await interpolateVariables(template, enhancedVariables, config, logger);
  } catch (error) {
    if (error instanceof PentestError) {
@@ -117,17 +117,17 @@ async function runAgentActivity(
  try {
    const logger = createActivityLogger();

-    // Build session metadata and get/create container
+    // 1. Build session metadata and get/create container
    const sessionMetadata = buildSessionMetadata(input);
    const container = getOrCreateContainer(workflowId, sessionMetadata);

-    // Create audit session for THIS agent execution
+    // 2. Create audit session for THIS agent execution
    // NOTE: Each agent needs its own AuditSession because AuditSession uses
    // instance state (currentAgentName) that cannot be shared across parallel agents
    const auditSession = new AuditSession(sessionMetadata);
    await auditSession.initialize(workflowId);

-    // Execute agent via service (throws PentestError on failure)
+    // 3. Execute agent via service (throws PentestError on failure)
    const endResult = await container.agentExecution.executeOrThrow(
      agentName,
      {
@@ -141,7 +141,7 @@ async function runAgentActivity(
      logger
    );

-    // Success - return metrics
+    // 4. Return metrics
    return {
      durationMs: Date.now() - startTime,
      inputTokens: null,
@@ -325,6 +325,7 @@ export async function loadResumeState(
  expectedUrl: string,
  expectedRepoPath: string
 ): Promise<ResumeState> {
+  // 1. Validate workspace exists
  const sessionPath = path.join('./audit-logs', workspaceName, 'session.json');

  const exists = await fileExists(sessionPath);
@@ -335,6 +336,7 @@ export async function loadResumeState(
    );
  }

+  // 2. Parse session.json and validate URL match
  let session: SessionJson;
  try {
    session = await readJson<SessionJson>(sessionPath);
@@ -353,6 +355,7 @@ export async function loadResumeState(
    );
  }

+  // 3. Cross-check agent status with deliverables on disk
  const completedAgents: string[] = [];
  const agents = session.metrics.agents;

@@ -375,6 +378,7 @@ export async function loadResumeState(
    completedAgents.push(agentName);
  }

+  // 4. Collect git checkpoints and validate at least one exists
  const checkpoints = completedAgents
    .map((name) => agents[name]?.checkpoint)
    .filter((hash): hash is string => hash != null);
@@ -395,9 +399,11 @@ export async function loadResumeState(
    );
  }

+  // 5. Find the most recent checkpoint commit
  const checkpointHash = await findLatestCommit(expectedRepoPath, checkpoints);
  const originalWorkflowId = session.session.originalWorkflowId || session.session.id;

+  // 6. Log summary and return resume state
  const logger = createActivityLogger();
  logger.info('Resume state loaded', {
    workspace: workspaceName,
@@ -533,11 +539,12 @@ export async function logWorkflowComplete(
  const { repoPath, workflowId } = input;
  const sessionMetadata = buildSessionMetadata(input);

+  // 1. Initialize audit session and mark final status
  const auditSession = new AuditSession(sessionMetadata);
  await auditSession.initialize(workflowId);
  await auditSession.updateSessionStatus(summary.status);

-  // Use cumulative metrics from session.json
+  // 2. Load cumulative metrics from session.json
  const sessionData = (await auditSession.getMetrics()) as {
    metrics: {
      total_duration_ms: number;
@@ -546,7 +553,7 @@ export async function logWorkflowComplete(
    };
  };

-  // Fill in metrics for skipped agents
+  // 3. Fill in metrics for skipped agents (resumed from previous run)
  const agentMetrics = { ...summary.agentMetrics };
  for (const agentName of summary.completedAgents) {
    if (!agentMetrics[agentName]) {
@@ -560,15 +567,18 @@ export async function logWorkflowComplete(
    }
  }

+  // 4. Build cumulative summary with cross-run totals
  const cumulativeSummary: WorkflowSummary = {
    ...summary,
    totalDurationMs: sessionData.metrics.total_duration_ms,
    totalCostUsd: sessionData.metrics.total_cost_usd,
    agentMetrics,
  };
+
+  // 5. Write completion entry to workflow.log
  await auditSession.logWorkflowComplete(cumulativeSummary);

-  // Copy deliverables to audit-logs
+  // 6. Copy deliverables to audit-logs
  try {
    await copyDeliverablesToAudit(sessionMetadata, repoPath);
  } catch (copyErr) {
@@ -578,6 +588,6 @@ export async function logWorkflowComplete(
    });
  }

-  // Clean up container
+  // 7. Clean up container
  removeContainer(workflowId);
 }
@@ -262,11 +262,13 @@ async function resolveWorkspace(
    console.log('=== RESUME MODE ===');
    console.log(`Workspace: ${workspace}\n`);

+    // 1. Terminate any running workflows from previous attempts
    const terminatedWorkflows = await terminateExistingWorkflows(client, workspace);
    if (terminatedWorkflows.length > 0) {
      console.log(`Terminated ${terminatedWorkflows.length} previous workflow(s)\n`);
    }

+    // 2. Validate URL matches the workspace
    const session = await readJson<SessionJson>(sessionPath);
    if (session.session.webUrl !== args.webUrl) {
      console.error('ERROR: URL mismatch with workspace');
@@ -275,6 +277,8 @@ async function resolveWorkspace(
      process.exit(1);
    }

+    // 3. Generate a new workflow ID scoped to this resume attempt
+    // 4. Return resolution with isResume=true so downstream uses resume logic
    return {
      workflowId: `${workspace}_resume_${Date.now()}`,
      sessionId: workspace,
@@ -371,9 +375,11 @@ async function waitForWorkflowResult(
  }, 30000);

  try {
+    // 1. Block until workflow completes
    const result = await handle.result();
    clearInterval(progressInterval);

+    // 2. Display run metrics
    console.log('\nPipeline completed successfully!');
    if (result.summary) {
      console.log(`Duration: ${Math.floor(result.summary.totalDurationMs / 1000)}s`);
@@ -381,6 +387,7 @@ async function waitForWorkflowResult(
      console.log(`Total turns: ${result.summary.totalTurns}`);
      console.log(`Run cost: $${result.summary.totalCostUsd.toFixed(4)}`);

+      // 3. Show cumulative cost across all resume attempts
      if (workspace.isResume) {
        try {
          const session = await readJson<SessionJson>(
@@ -402,9 +409,11 @@ async function waitForWorkflowResult(
 // === Main Entry Point ===

 async function startPipeline(): Promise<void> {
+  // 1. Parse CLI args and display splash
  const args = parseCliArgs(process.argv.slice(2));
  await displaySplashScreen();

+  // 2. Connect to Temporal server
  const address = process.env.TEMPORAL_ADDRESS || 'localhost:7233';
  console.log(`Connecting to Temporal at ${address}...`);

@@ -412,9 +421,11 @@ async function startPipeline(): Promise<void> {
  const client = new Client({ connection });

  try {
+    // 3. Resolve workspace (new or resume) and build pipeline input
    const workspace = await resolveWorkspace(client, args);
    const input = buildPipelineInput(args, workspace);

+    // 4. Start the Temporal workflow
    const handle = await client.workflow.start<(input: PipelineInput) => Promise<PipelineState>>(
      'pentestPipelineWorkflow',
      {
@@ -424,6 +435,7 @@ async function startPipeline(): Promise<void> {
      }
    );

+    // 5. Display info and optionally wait for completion
    displayWorkflowInfo(args, workspace);

    if (args.waitForCompletion) {
@@ -147,12 +147,14 @@ export async function pentestPipelineWorkflow(
  let resumeState: ResumeState | null = null;

  if (input.resumeFromWorkspace) {
+    // 1. Load resume state (validates workspace, cross-checks deliverables)
    resumeState = await a.loadResumeState(
      input.resumeFromWorkspace,
      input.webUrl,
      input.repoPath
    );

+    // 2. Restore git workspace and clean up incomplete deliverables
    const incompleteAgents = ALL_AGENTS.filter(
      (agentName) => !resumeState!.completedAgents.includes(agentName)
    ) as AgentName[];
@@ -163,6 +165,7 @@ export async function pentestPipelineWorkflow(
      incompleteAgents
    );

+    // 3. Short-circuit if all agents already completed
    if (resumeState.completedAgents.length === ALL_AGENTS.length) {
      log.info(`All ${ALL_AGENTS.length} agents already completed. Nothing to resume.`);
      state.status = 'completed';
@@ -171,6 +174,7 @@ export async function pentestPipelineWorkflow(
      return state;
    }

+    // 4. Record this resume attempt in session.json and workflow.log
    await a.recordResumeAttempt(
      activityInput,
      input.terminatedWorkflows || [],
@@ -317,6 +321,7 @@ export async function pentestPipelineWorkflow(
      const vulnAgentName = `${vulnType}-vuln`;
      const exploitAgentName = `${vulnType}-exploit`;

+      // 1. Run vulnerability analysis (or skip if resumed)
      let vulnMetrics: AgentMetrics | null = null;
      if (!shouldSkip(vulnAgentName)) {
        vulnMetrics = await runVulnAgent();
@@ -324,8 +329,10 @@ export async function pentestPipelineWorkflow(
        log.info(`Skipping ${vulnAgentName} (already complete)`);
      }

+      // 2. Check exploitation queue for actionable findings
      const decision = await a.checkExploitationQueue(activityInput, vulnType);

+      // 3. Conditionally run exploitation agent
      let exploitMetrics: AgentMetrics | null = null;
      if (decision.shouldExploit) {
        if (!shouldSkip(exploitAgentName)) {