From 5596411bd38fff6478787addd656ea0283b3acca Mon Sep 17 00:00:00 2001 From: ezl-keygraph Date: Tue, 23 Jun 2026 14:25:17 +0530 Subject: [PATCH] fix: render agent deliverables before the success commit so resume preserves them (#377) --- apps/worker/src/services/agent-execution.ts | 11 +- apps/worker/src/session-manager.ts | 17 ++- apps/worker/src/temporal/activities.ts | 138 ++++++++------------ 3 files changed, 75 insertions(+), 91 deletions(-) diff --git a/apps/worker/src/services/agent-execution.ts b/apps/worker/src/services/agent-execution.ts index 1ad85b6..c70f10f 100644 --- a/apps/worker/src/services/agent-execution.ts +++ b/apps/worker/src/services/agent-execution.ts @@ -16,6 +16,7 @@ * - Spending cap check using isSpendingCapBehavior * - Handle failure (rollback, audit) * - Validate output using AGENTS[agentName].deliverableFilename + * - Render the deliverable to disk via the writeDeliverable hook (if provided) * - Commit on success, log metrics * * No Temporal dependencies - pure domain logic. @@ -55,6 +56,8 @@ export interface AgentExecutionInput { promptDir?: string | undefined; providerConfig?: import('../types/config.js').ProviderConfig | undefined; customTools?: import('@earendil-works/pi-coding-agent').ToolDefinition[]; + // Renders the deliverable to disk; invoked after validation, before the success commit. + writeDeliverable?: (deliverablesPath: string) => Promise; } interface FailAgentOpts { @@ -110,6 +113,7 @@ export class AgentExecutionService { promptDir, providerConfig, customTools, + writeDeliverable, } = input; // 1. Load config (pre-parsed configData → raw YAML → file path) @@ -241,7 +245,12 @@ export class AgentExecutionService { }); } - // 10. Success - commit deliverables, then capture checkpoint hash + // 10. Render the deliverable to disk so the success commit below stages it + if (writeDeliverable) { + await writeDeliverable(deliverablesPath); + } + + // 11. Success - commit deliverables, then capture checkpoint hash await commitGitSuccess(deliverablesPath, agentName, logger); const commitHash = await getGitCommitHash(deliverablesPath); diff --git a/apps/worker/src/session-manager.ts b/apps/worker/src/session-manager.ts index 5f31ad5..37121b5 100644 --- a/apps/worker/src/session-manager.ts +++ b/apps/worker/src/session-manager.ts @@ -127,12 +127,11 @@ export const AGENT_PHASE_MAP: Readonly> = Object.fr // Factory function for vulnerability queue validators. // -// Post-MCP-migration, the analysis_deliverable.md is rendered by the activity -// wrapper after validateAgentOutput runs, so the previous "both files exist" -// check would race the renderer. The validator only checks the queue.json — -// that file is written by the submit-tool path in agent-execution.ts -// before this validator runs. The downstream checkExploitationQueue still -// renders the .md. +// The analysis_deliverable.md is rendered via the writeDeliverable hook, which +// AgentExecutionService runs after validateAgentOutput but before the success +// commit — so a "both files exist" check here would race the renderer. The +// validator only checks queue.json, written by the submit-tool path in +// agent-execution.ts before this validator runs. function createVulnValidator(vulnType: VulnType): AgentValidator { return async (sourceDir: string, logger: ActivityLogger): Promise => { const queueFile = path.join(sourceDir, `${vulnType}_exploitation_queue.json`); @@ -145,9 +144,9 @@ function createVulnValidator(vulnType: VulnType): AgentValidator { }; } -// Exploitation agents — validation lives in runExploitAgentWithCollector post-processing -// (collector harvest + renderer write). The deliverable file is written by the renderer -// after the agent succeeds, so a file-existence check here would race the renderer. +// Exploitation agents — the evidence deliverable is rendered via the writeDeliverable +// hook after the agent succeeds (before the success commit), so a file-existence check +// here would race the renderer. // // VulnType is kept in the import surface for createVulnValidator above; this factory // returns a no-op validator parameterized only for symmetry with the vuln-side factory. diff --git a/apps/worker/src/temporal/activities.ts b/apps/worker/src/temporal/activities.ts index ae7e193..ba7a297 100644 --- a/apps/worker/src/temporal/activities.ts +++ b/apps/worker/src/temporal/activities.ts @@ -138,6 +138,7 @@ async function runAgentActivity( agentName: AgentName, input: ActivityInput, customTools?: import('@earendil-works/pi-coding-agent').ToolDefinition[], + writeDeliverable?: (deliverablesPath: string) => Promise, ): Promise { const { repoPath, configPath, pipelineTestingMode = false, workflowId, webUrl } = input; @@ -193,6 +194,7 @@ async function runAgentActivity( ...(input.promptDir !== undefined && { promptDir: input.promptDir }), ...(input.configYAML !== undefined && { configYAML: input.configYAML }), ...(customTools && { customTools }), + ...(writeDeliverable && { writeDeliverable }), }, auditSession, logger, @@ -256,28 +258,21 @@ export async function runPreReconAgent(input: ActivityInput): Promise => { + const logger = createActivityLogger(); + // Skipped tools surface as renderer placeholders, not as activity failures. + const callStatus = collector.getCallStatus(); + logger.info('Pre-recon tool call status', { callStatus }); - const logger = createActivityLogger(); - const dir = deliverablesDir(input.repoPath, input.deliverablesSubdir); + const collected = collector.getAll(); + const markdown = renderPreRecon(collected); + const mdPath = path.join(deliverablesPath, 'pre_recon_deliverable.md'); + await atomicWrite(mdPath, markdown); + logger.info(`Wrote pre_recon_deliverable.md from structured data (${markdown.length} bytes)`); + }; - // Skipped tools surface as renderer placeholders, not as activity failures. - const callStatus = collector.getCallStatus(); - logger.info('Pre-recon tool call status', { callStatus }); - - const collected = collector.getAll(); - const markdown = renderPreRecon(collected); - const mdPath = path.join(dir, 'pre_recon_deliverable.md'); - await atomicWrite(mdPath, markdown); - logger.info(`Wrote pre_recon_deliverable.md from structured data (${markdown.length} bytes)`); - - return metrics; + return runAgentActivity('pre-recon', input, collector.tools, writeDeliverable); } export async function runReconAgent(input: ActivityInput): Promise { @@ -285,28 +280,21 @@ export async function runReconAgent(input: ActivityInput): Promise const { renderRecon } = await import('../services/recon-renderer.js'); const collector = createReconCollectorServer(); - const metrics = await runAgentActivity('recon', input, collector.tools); - // On resume, the agent is skipped and the collector is never populated. - // The cached deliverable from the prior run is the source of truth. - if (metrics.skipped) { - return metrics; - } + const writeDeliverable = async (deliverablesPath: string): Promise => { + const logger = createActivityLogger(); + // Skipped tools surface as renderer placeholders, not as activity failures. + const callStatus = collector.getCallStatus(); + logger.info('Recon tool call status', { callStatus }); - const logger = createActivityLogger(); - const dir = deliverablesDir(input.repoPath, input.deliverablesSubdir); + const collected = collector.getAll(); + const markdown = renderRecon(collected); + const mdPath = path.join(deliverablesPath, 'recon_deliverable.md'); + await atomicWrite(mdPath, markdown); + logger.info(`Wrote recon_deliverable.md from structured data (${markdown.length} bytes)`); + }; - // Skipped tools surface as renderer placeholders, not as activity failures. - const callStatus = collector.getCallStatus(); - logger.info('Recon tool call status', { callStatus }); - - const collected = collector.getAll(); - const markdown = renderRecon(collected); - const mdPath = path.join(dir, 'recon_deliverable.md'); - await atomicWrite(mdPath, markdown); - logger.info(`Wrote recon_deliverable.md from structured data (${markdown.length} bytes)`); - - return metrics; + return runAgentActivity('recon', input, collector.tools, writeDeliverable); } async function runVulnAgentWithCollector( @@ -318,28 +306,21 @@ async function runVulnAgentWithCollector( const { renderVulnDeliverable } = await import('../services/vuln-renderer.js'); const collector = createVulnCollector(vulnClass); - const metrics = await runAgentActivity(agentName, input, collector.tools); - // On resume, the agent is skipped and the collector is never populated. - // The cached deliverable from the prior run is the source of truth. - if (metrics.skipped) { - return metrics; - } + const writeDeliverable = async (deliverablesPath: string): Promise => { + const logger = createActivityLogger(); + // Skipped tools surface as renderer placeholders, not as activity failures. + const callStatus = collector.getCallStatus(); + logger.info(`${vulnClass} vuln tool call status`, { callStatus }); - const logger = createActivityLogger(); - const dir = deliverablesDir(input.repoPath, input.deliverablesSubdir); + const collected = collector.getAll(); + const markdown = renderVulnDeliverable(vulnClass, collected); + const mdPath = path.join(deliverablesPath, `${vulnClass}_analysis_deliverable.md`); + await atomicWrite(mdPath, markdown); + logger.info(`Wrote ${vulnClass}_analysis_deliverable.md from structured data (${markdown.length} bytes)`); + }; - // Skipped tools surface as renderer placeholders, not as activity failures. - const callStatus = collector.getCallStatus(); - logger.info(`${vulnClass} vuln tool call status`, { callStatus }); - - const collected = collector.getAll(); - const markdown = renderVulnDeliverable(vulnClass, collected); - const mdPath = path.join(dir, `${vulnClass}_analysis_deliverable.md`); - await atomicWrite(mdPath, markdown); - logger.info(`Wrote ${vulnClass}_analysis_deliverable.md from structured data (${markdown.length} bytes)`); - - return metrics; + return runAgentActivity(agentName, input, collector.tools, writeDeliverable); } export async function runInjectionVulnAgent(input: ActivityInput): Promise { @@ -399,34 +380,29 @@ async function runExploitAgentWithCollector( const { validIds, idToType } = await readExploitQueue(queuePath); const collector = createExploitCollector({ vulnClass, validIds }); - const metrics = await runAgentActivity(agentName, input, collector.tools); - // On resume, the agent is skipped and the collector is never populated. - // The cached deliverable from the prior run is the source of truth. - if (metrics.skipped) { - return metrics; - } + const writeDeliverable = async (deliverablesPath: string): Promise => { + const logger = createActivityLogger(); + const collected = collector.getAll(); + const emittedIds = new Set(collected.map((e) => e.vulnerability_id)); + const missingIds = [...validIds].filter((id) => !emittedIds.has(id)); + const exploitedCount = collected.filter((e) => e.status === 'exploited').length; + const blockedCount = collected.filter((e) => e.status === 'blocked').length; - const logger = createActivityLogger(); - const collected = collector.getAll(); - const emittedIds = new Set(collected.map((e) => e.vulnerability_id)); - const missingIds = [...validIds].filter((id) => !emittedIds.has(id)); - const exploitedCount = collected.filter((e) => e.status === 'exploited').length; - const blockedCount = collected.filter((e) => e.status === 'blocked').length; + logger.info(`${vulnClass} exploit tool call metrics`, { + queueSize: validIds.size, + exploited: exploitedCount, + blocked: blockedCount, + missing: missingIds.length, + }); - logger.info(`${vulnClass} exploit tool call metrics`, { - queueSize: validIds.size, - exploited: exploitedCount, - blocked: blockedCount, - missing: missingIds.length, - }); + const markdown = renderExploitDeliverable(vulnClass, collected, idToType); + const mdPath = path.join(deliverablesPath, `${vulnClass}_exploitation_evidence.md`); + await atomicWrite(mdPath, markdown); + logger.info(`Wrote ${vulnClass}_exploitation_evidence.md from structured data (${markdown.length} bytes)`); + }; - const markdown = renderExploitDeliverable(vulnClass, collected, idToType); - const mdPath = path.join(dir, `${vulnClass}_exploitation_evidence.md`); - await atomicWrite(mdPath, markdown); - logger.info(`Wrote ${vulnClass}_exploitation_evidence.md from structured data (${markdown.length} bytes)`); - - return metrics; + return runAgentActivity(agentName, input, collector.tools, writeDeliverable); } export async function runInjectionExploitAgent(input: ActivityInput): Promise {