From 5596411bd38fff6478787addd656ea0283b3acca Mon Sep 17 00:00:00 2001
From: ezl-keygraph <ezhil@keygraph.io>
Date: Tue, 23 Jun 2026 14:25:17 +0530
Subject: [PATCH] fix: render agent deliverables before the success commit so
 resume preserves them (#377)

---
 apps/worker/src/services/agent-execution.ts |  11 +-
 apps/worker/src/session-manager.ts          |  17 ++-
 apps/worker/src/temporal/activities.ts      | 138 ++++++++------------
 3 files changed, 75 insertions(+), 91 deletions(-)
diff --git a/apps/worker/src/services/agent-execution.ts b/apps/worker/src/services/agent-execution.ts
index 1ad85b6..c70f10f 100644
--- a/apps/worker/src/services/agent-execution.ts
+++ b/apps/worker/src/services/agent-execution.ts
@@ -16,6 +16,7 @@
  * - Spending cap check using isSpendingCapBehavior
  * - Handle failure (rollback, audit)
  * - Validate output using AGENTS[agentName].deliverableFilename
+ * - Render the deliverable to disk via the writeDeliverable hook (if provided)
  * - Commit on success, log metrics
  *
  * No Temporal dependencies - pure domain logic.
@@ -55,6 +56,8 @@ export interface AgentExecutionInput {
   promptDir?: string | undefined;
   providerConfig?: import('../types/config.js').ProviderConfig | undefined;
   customTools?: import('@earendil-works/pi-coding-agent').ToolDefinition[];
+  // Renders the deliverable to disk; invoked after validation, before the success commit.
+  writeDeliverable?: (deliverablesPath: string) => Promise<void>;
 }
 
 interface FailAgentOpts {
@@ -110,6 +113,7 @@ export class AgentExecutionService {
       promptDir,
       providerConfig,
       customTools,
+      writeDeliverable,
     } = input;
 
     // 1. Load config (pre-parsed configData → raw YAML → file path)
@@ -241,7 +245,12 @@ export class AgentExecutionService {
       });
     }
 
-    // 10. Success - commit deliverables, then capture checkpoint hash
+    // 10. Render the deliverable to disk so the success commit below stages it
+    if (writeDeliverable) {
+      await writeDeliverable(deliverablesPath);
+    }
+
+    // 11. Success - commit deliverables, then capture checkpoint hash
     await commitGitSuccess(deliverablesPath, agentName, logger);
     const commitHash = await getGitCommitHash(deliverablesPath);
 
diff --git a/apps/worker/src/session-manager.ts b/apps/worker/src/session-manager.ts
index 5f31ad5..37121b5 100644
--- a/apps/worker/src/session-manager.ts
+++ b/apps/worker/src/session-manager.ts
@@ -127,12 +127,11 @@ export const AGENT_PHASE_MAP: Readonly<Record<AgentName, PhaseName>> = Object.fr
 
 // Factory function for vulnerability queue validators.
 //
-// Post-MCP-migration, the analysis_deliverable.md is rendered by the activity
-// wrapper after validateAgentOutput runs, so the previous "both files exist"
-// check would race the renderer. The validator only checks the queue.json —
-// that file is written by the submit-tool path in agent-execution.ts
-// before this validator runs. The downstream checkExploitationQueue still
-// renders the .md.
+// The analysis_deliverable.md is rendered via the writeDeliverable hook, which
+// AgentExecutionService runs after validateAgentOutput but before the success
+// commit — so a "both files exist" check here would race the renderer. The
+// validator only checks queue.json, written by the submit-tool path in
+// agent-execution.ts before this validator runs.
 function createVulnValidator(vulnType: VulnType): AgentValidator {
   return async (sourceDir: string, logger: ActivityLogger): Promise<boolean> => {
     const queueFile = path.join(sourceDir, `${vulnType}_exploitation_queue.json`);
@@ -145,9 +144,9 @@ function createVulnValidator(vulnType: VulnType): AgentValidator {
   };
 }
 
-// Exploitation agents — validation lives in runExploitAgentWithCollector post-processing
-// (collector harvest + renderer write). The deliverable file is written by the renderer
-// after the agent succeeds, so a file-existence check here would race the renderer.
+// Exploitation agents — the evidence deliverable is rendered via the writeDeliverable
+// hook after the agent succeeds (before the success commit), so a file-existence check
+// here would race the renderer.
 //
 // VulnType is kept in the import surface for createVulnValidator above; this factory
 // returns a no-op validator parameterized only for symmetry with the vuln-side factory.
diff --git a/apps/worker/src/temporal/activities.ts b/apps/worker/src/temporal/activities.ts
index ae7e193..ba7a297 100644
--- a/apps/worker/src/temporal/activities.ts
+++ b/apps/worker/src/temporal/activities.ts
@@ -138,6 +138,7 @@ async function runAgentActivity(
   agentName: AgentName,
   input: ActivityInput,
   customTools?: import('@earendil-works/pi-coding-agent').ToolDefinition[],
+  writeDeliverable?: (deliverablesPath: string) => Promise<void>,
 ): Promise<AgentMetrics> {
   const { repoPath, configPath, pipelineTestingMode = false, workflowId, webUrl } = input;
 
@@ -193,6 +194,7 @@ async function runAgentActivity(
         ...(input.promptDir !== undefined && { promptDir: input.promptDir }),
         ...(input.configYAML !== undefined && { configYAML: input.configYAML }),
         ...(customTools && { customTools }),
+        ...(writeDeliverable && { writeDeliverable }),
       },
       auditSession,
       logger,
@@ -256,28 +258,21 @@ export async function runPreReconAgent(input: ActivityInput): Promise<AgentMetri
   const { renderPreRecon } = await import('../services/pre-recon-renderer.js');
 
   const collector = createPreReconCollectorServer();
-  const metrics = await runAgentActivity('pre-recon', input, collector.tools);
 
-  // On resume, the agent is skipped and the collector is never populated.
-  // The cached deliverable from the prior run is the source of truth.
-  if (metrics.skipped) {
-    return metrics;
-  }
+  const writeDeliverable = async (deliverablesPath: string): Promise<void> => {
+    const logger = createActivityLogger();
+    // Skipped tools surface as renderer placeholders, not as activity failures.
+    const callStatus = collector.getCallStatus();
+    logger.info('Pre-recon tool call status', { callStatus });
 
-  const logger = createActivityLogger();
-  const dir = deliverablesDir(input.repoPath, input.deliverablesSubdir);
+    const collected = collector.getAll();
+    const markdown = renderPreRecon(collected);
+    const mdPath = path.join(deliverablesPath, 'pre_recon_deliverable.md');
+    await atomicWrite(mdPath, markdown);
+    logger.info(`Wrote pre_recon_deliverable.md from structured data (${markdown.length} bytes)`);
+  };
 
-  // Skipped tools surface as renderer placeholders, not as activity failures.
-  const callStatus = collector.getCallStatus();
-  logger.info('Pre-recon tool call status', { callStatus });
-
-  const collected = collector.getAll();
-  const markdown = renderPreRecon(collected);
-  const mdPath = path.join(dir, 'pre_recon_deliverable.md');
-  await atomicWrite(mdPath, markdown);
-  logger.info(`Wrote pre_recon_deliverable.md from structured data (${markdown.length} bytes)`);
-
-  return metrics;
+  return runAgentActivity('pre-recon', input, collector.tools, writeDeliverable);
 }
 
 export async function runReconAgent(input: ActivityInput): Promise<AgentMetrics> {
@@ -285,28 +280,21 @@ export async function runReconAgent(input: ActivityInput): Promise<AgentMetrics>
   const { renderRecon } = await import('../services/recon-renderer.js');
 
   const collector = createReconCollectorServer();
-  const metrics = await runAgentActivity('recon', input, collector.tools);
 
-  // On resume, the agent is skipped and the collector is never populated.
-  // The cached deliverable from the prior run is the source of truth.
-  if (metrics.skipped) {
-    return metrics;
-  }
+  const writeDeliverable = async (deliverablesPath: string): Promise<void> => {
+    const logger = createActivityLogger();
+    // Skipped tools surface as renderer placeholders, not as activity failures.
+    const callStatus = collector.getCallStatus();
+    logger.info('Recon tool call status', { callStatus });
 
-  const logger = createActivityLogger();
-  const dir = deliverablesDir(input.repoPath, input.deliverablesSubdir);
+    const collected = collector.getAll();
+    const markdown = renderRecon(collected);
+    const mdPath = path.join(deliverablesPath, 'recon_deliverable.md');
+    await atomicWrite(mdPath, markdown);
+    logger.info(`Wrote recon_deliverable.md from structured data (${markdown.length} bytes)`);
+  };
 
-  // Skipped tools surface as renderer placeholders, not as activity failures.
-  const callStatus = collector.getCallStatus();
-  logger.info('Recon tool call status', { callStatus });
-
-  const collected = collector.getAll();
-  const markdown = renderRecon(collected);
-  const mdPath = path.join(dir, 'recon_deliverable.md');
-  await atomicWrite(mdPath, markdown);
-  logger.info(`Wrote recon_deliverable.md from structured data (${markdown.length} bytes)`);
-
-  return metrics;
+  return runAgentActivity('recon', input, collector.tools, writeDeliverable);
 }
 
 async function runVulnAgentWithCollector(
@@ -318,28 +306,21 @@ async function runVulnAgentWithCollector(
   const { renderVulnDeliverable } = await import('../services/vuln-renderer.js');
 
   const collector = createVulnCollector(vulnClass);
-  const metrics = await runAgentActivity(agentName, input, collector.tools);
 
-  // On resume, the agent is skipped and the collector is never populated.
-  // The cached deliverable from the prior run is the source of truth.
-  if (metrics.skipped) {
-    return metrics;
-  }
+  const writeDeliverable = async (deliverablesPath: string): Promise<void> => {
+    const logger = createActivityLogger();
+    // Skipped tools surface as renderer placeholders, not as activity failures.
+    const callStatus = collector.getCallStatus();
+    logger.info(`${vulnClass} vuln tool call status`, { callStatus });
 
-  const logger = createActivityLogger();
-  const dir = deliverablesDir(input.repoPath, input.deliverablesSubdir);
+    const collected = collector.getAll();
+    const markdown = renderVulnDeliverable(vulnClass, collected);
+    const mdPath = path.join(deliverablesPath, `${vulnClass}_analysis_deliverable.md`);
+    await atomicWrite(mdPath, markdown);
+    logger.info(`Wrote ${vulnClass}_analysis_deliverable.md from structured data (${markdown.length} bytes)`);
+  };
 
-  // Skipped tools surface as renderer placeholders, not as activity failures.
-  const callStatus = collector.getCallStatus();
-  logger.info(`${vulnClass} vuln tool call status`, { callStatus });
-
-  const collected = collector.getAll();
-  const markdown = renderVulnDeliverable(vulnClass, collected);
-  const mdPath = path.join(dir, `${vulnClass}_analysis_deliverable.md`);
-  await atomicWrite(mdPath, markdown);
-  logger.info(`Wrote ${vulnClass}_analysis_deliverable.md from structured data (${markdown.length} bytes)`);
-
-  return metrics;
+  return runAgentActivity(agentName, input, collector.tools, writeDeliverable);
 }
 
 export async function runInjectionVulnAgent(input: ActivityInput): Promise<AgentMetrics> {
@@ -399,34 +380,29 @@ async function runExploitAgentWithCollector(
   const { validIds, idToType } = await readExploitQueue(queuePath);
 
   const collector = createExploitCollector({ vulnClass, validIds });
-  const metrics = await runAgentActivity(agentName, input, collector.tools);
 
-  // On resume, the agent is skipped and the collector is never populated.
-  // The cached deliverable from the prior run is the source of truth.
-  if (metrics.skipped) {
-    return metrics;
-  }
+  const writeDeliverable = async (deliverablesPath: string): Promise<void> => {
+    const logger = createActivityLogger();
+    const collected = collector.getAll();
+    const emittedIds = new Set(collected.map((e) => e.vulnerability_id));
+    const missingIds = [...validIds].filter((id) => !emittedIds.has(id));
+    const exploitedCount = collected.filter((e) => e.status === 'exploited').length;
+    const blockedCount = collected.filter((e) => e.status === 'blocked').length;
 
-  const logger = createActivityLogger();
-  const collected = collector.getAll();
-  const emittedIds = new Set(collected.map((e) => e.vulnerability_id));
-  const missingIds = [...validIds].filter((id) => !emittedIds.has(id));
-  const exploitedCount = collected.filter((e) => e.status === 'exploited').length;
-  const blockedCount = collected.filter((e) => e.status === 'blocked').length;
+    logger.info(`${vulnClass} exploit tool call metrics`, {
+      queueSize: validIds.size,
+      exploited: exploitedCount,
+      blocked: blockedCount,
+      missing: missingIds.length,
+    });
 
-  logger.info(`${vulnClass} exploit tool call metrics`, {
-    queueSize: validIds.size,
-    exploited: exploitedCount,
-    blocked: blockedCount,
-    missing: missingIds.length,
-  });
+    const markdown = renderExploitDeliverable(vulnClass, collected, idToType);
+    const mdPath = path.join(deliverablesPath, `${vulnClass}_exploitation_evidence.md`);
+    await atomicWrite(mdPath, markdown);
+    logger.info(`Wrote ${vulnClass}_exploitation_evidence.md from structured data (${markdown.length} bytes)`);
+  };
 
-  const markdown = renderExploitDeliverable(vulnClass, collected, idToType);
-  const mdPath = path.join(dir, `${vulnClass}_exploitation_evidence.md`);
-  await atomicWrite(mdPath, markdown);
-  logger.info(`Wrote ${vulnClass}_exploitation_evidence.md from structured data (${markdown.length} bytes)`);
-
-  return metrics;
+  return runAgentActivity(agentName, input, collector.tools, writeDeliverable);
 }
 
 export async function runInjectionExploitAgent(input: ActivityInput): Promise<AgentMetrics> {