mirror of
https://github.com/KeygraphHQ/shannon.git
synced 2026-06-30 10:35:36 +02:00
fix: render agent deliverables before the success commit so resume preserves them (#377)
This commit is contained in:
@@ -16,6 +16,7 @@
|
||||
* - Spending cap check using isSpendingCapBehavior
|
||||
* - Handle failure (rollback, audit)
|
||||
* - Validate output using AGENTS[agentName].deliverableFilename
|
||||
* - Render the deliverable to disk via the writeDeliverable hook (if provided)
|
||||
* - Commit on success, log metrics
|
||||
*
|
||||
* No Temporal dependencies - pure domain logic.
|
||||
@@ -55,6 +56,8 @@ export interface AgentExecutionInput {
|
||||
promptDir?: string | undefined;
|
||||
providerConfig?: import('../types/config.js').ProviderConfig | undefined;
|
||||
customTools?: import('@earendil-works/pi-coding-agent').ToolDefinition[];
|
||||
// Renders the deliverable to disk; invoked after validation, before the success commit.
|
||||
writeDeliverable?: (deliverablesPath: string) => Promise<void>;
|
||||
}
|
||||
|
||||
interface FailAgentOpts {
|
||||
@@ -110,6 +113,7 @@ export class AgentExecutionService {
|
||||
promptDir,
|
||||
providerConfig,
|
||||
customTools,
|
||||
writeDeliverable,
|
||||
} = input;
|
||||
|
||||
// 1. Load config (pre-parsed configData → raw YAML → file path)
|
||||
@@ -241,7 +245,12 @@ export class AgentExecutionService {
|
||||
});
|
||||
}
|
||||
|
||||
// 10. Success - commit deliverables, then capture checkpoint hash
|
||||
// 10. Render the deliverable to disk so the success commit below stages it
|
||||
if (writeDeliverable) {
|
||||
await writeDeliverable(deliverablesPath);
|
||||
}
|
||||
|
||||
// 11. Success - commit deliverables, then capture checkpoint hash
|
||||
await commitGitSuccess(deliverablesPath, agentName, logger);
|
||||
const commitHash = await getGitCommitHash(deliverablesPath);
|
||||
|
||||
|
||||
@@ -127,12 +127,11 @@ export const AGENT_PHASE_MAP: Readonly<Record<AgentName, PhaseName>> = Object.fr
|
||||
|
||||
// Factory function for vulnerability queue validators.
|
||||
//
|
||||
// Post-MCP-migration, the analysis_deliverable.md is rendered by the activity
|
||||
// wrapper after validateAgentOutput runs, so the previous "both files exist"
|
||||
// check would race the renderer. The validator only checks the queue.json —
|
||||
// that file is written by the submit-tool path in agent-execution.ts
|
||||
// before this validator runs. The downstream checkExploitationQueue still
|
||||
// renders the .md.
|
||||
// The analysis_deliverable.md is rendered via the writeDeliverable hook, which
|
||||
// AgentExecutionService runs after validateAgentOutput but before the success
|
||||
// commit — so a "both files exist" check here would race the renderer. The
|
||||
// validator only checks queue.json, written by the submit-tool path in
|
||||
// agent-execution.ts before this validator runs.
|
||||
function createVulnValidator(vulnType: VulnType): AgentValidator {
|
||||
return async (sourceDir: string, logger: ActivityLogger): Promise<boolean> => {
|
||||
const queueFile = path.join(sourceDir, `${vulnType}_exploitation_queue.json`);
|
||||
@@ -145,9 +144,9 @@ function createVulnValidator(vulnType: VulnType): AgentValidator {
|
||||
};
|
||||
}
|
||||
|
||||
// Exploitation agents — validation lives in runExploitAgentWithCollector post-processing
|
||||
// (collector harvest + renderer write). The deliverable file is written by the renderer
|
||||
// after the agent succeeds, so a file-existence check here would race the renderer.
|
||||
// Exploitation agents — the evidence deliverable is rendered via the writeDeliverable
|
||||
// hook after the agent succeeds (before the success commit), so a file-existence check
|
||||
// here would race the renderer.
|
||||
//
|
||||
// VulnType is kept in the import surface for createVulnValidator above; this factory
|
||||
// returns a no-op validator parameterized only for symmetry with the vuln-side factory.
|
||||
|
||||
@@ -138,6 +138,7 @@ async function runAgentActivity(
|
||||
agentName: AgentName,
|
||||
input: ActivityInput,
|
||||
customTools?: import('@earendil-works/pi-coding-agent').ToolDefinition[],
|
||||
writeDeliverable?: (deliverablesPath: string) => Promise<void>,
|
||||
): Promise<AgentMetrics> {
|
||||
const { repoPath, configPath, pipelineTestingMode = false, workflowId, webUrl } = input;
|
||||
|
||||
@@ -193,6 +194,7 @@ async function runAgentActivity(
|
||||
...(input.promptDir !== undefined && { promptDir: input.promptDir }),
|
||||
...(input.configYAML !== undefined && { configYAML: input.configYAML }),
|
||||
...(customTools && { customTools }),
|
||||
...(writeDeliverable && { writeDeliverable }),
|
||||
},
|
||||
auditSession,
|
||||
logger,
|
||||
@@ -256,28 +258,21 @@ export async function runPreReconAgent(input: ActivityInput): Promise<AgentMetri
|
||||
const { renderPreRecon } = await import('../services/pre-recon-renderer.js');
|
||||
|
||||
const collector = createPreReconCollectorServer();
|
||||
const metrics = await runAgentActivity('pre-recon', input, collector.tools);
|
||||
|
||||
// On resume, the agent is skipped and the collector is never populated.
|
||||
// The cached deliverable from the prior run is the source of truth.
|
||||
if (metrics.skipped) {
|
||||
return metrics;
|
||||
}
|
||||
const writeDeliverable = async (deliverablesPath: string): Promise<void> => {
|
||||
const logger = createActivityLogger();
|
||||
// Skipped tools surface as renderer placeholders, not as activity failures.
|
||||
const callStatus = collector.getCallStatus();
|
||||
logger.info('Pre-recon tool call status', { callStatus });
|
||||
|
||||
const logger = createActivityLogger();
|
||||
const dir = deliverablesDir(input.repoPath, input.deliverablesSubdir);
|
||||
const collected = collector.getAll();
|
||||
const markdown = renderPreRecon(collected);
|
||||
const mdPath = path.join(deliverablesPath, 'pre_recon_deliverable.md');
|
||||
await atomicWrite(mdPath, markdown);
|
||||
logger.info(`Wrote pre_recon_deliverable.md from structured data (${markdown.length} bytes)`);
|
||||
};
|
||||
|
||||
// Skipped tools surface as renderer placeholders, not as activity failures.
|
||||
const callStatus = collector.getCallStatus();
|
||||
logger.info('Pre-recon tool call status', { callStatus });
|
||||
|
||||
const collected = collector.getAll();
|
||||
const markdown = renderPreRecon(collected);
|
||||
const mdPath = path.join(dir, 'pre_recon_deliverable.md');
|
||||
await atomicWrite(mdPath, markdown);
|
||||
logger.info(`Wrote pre_recon_deliverable.md from structured data (${markdown.length} bytes)`);
|
||||
|
||||
return metrics;
|
||||
return runAgentActivity('pre-recon', input, collector.tools, writeDeliverable);
|
||||
}
|
||||
|
||||
export async function runReconAgent(input: ActivityInput): Promise<AgentMetrics> {
|
||||
@@ -285,28 +280,21 @@ export async function runReconAgent(input: ActivityInput): Promise<AgentMetrics>
|
||||
const { renderRecon } = await import('../services/recon-renderer.js');
|
||||
|
||||
const collector = createReconCollectorServer();
|
||||
const metrics = await runAgentActivity('recon', input, collector.tools);
|
||||
|
||||
// On resume, the agent is skipped and the collector is never populated.
|
||||
// The cached deliverable from the prior run is the source of truth.
|
||||
if (metrics.skipped) {
|
||||
return metrics;
|
||||
}
|
||||
const writeDeliverable = async (deliverablesPath: string): Promise<void> => {
|
||||
const logger = createActivityLogger();
|
||||
// Skipped tools surface as renderer placeholders, not as activity failures.
|
||||
const callStatus = collector.getCallStatus();
|
||||
logger.info('Recon tool call status', { callStatus });
|
||||
|
||||
const logger = createActivityLogger();
|
||||
const dir = deliverablesDir(input.repoPath, input.deliverablesSubdir);
|
||||
const collected = collector.getAll();
|
||||
const markdown = renderRecon(collected);
|
||||
const mdPath = path.join(deliverablesPath, 'recon_deliverable.md');
|
||||
await atomicWrite(mdPath, markdown);
|
||||
logger.info(`Wrote recon_deliverable.md from structured data (${markdown.length} bytes)`);
|
||||
};
|
||||
|
||||
// Skipped tools surface as renderer placeholders, not as activity failures.
|
||||
const callStatus = collector.getCallStatus();
|
||||
logger.info('Recon tool call status', { callStatus });
|
||||
|
||||
const collected = collector.getAll();
|
||||
const markdown = renderRecon(collected);
|
||||
const mdPath = path.join(dir, 'recon_deliverable.md');
|
||||
await atomicWrite(mdPath, markdown);
|
||||
logger.info(`Wrote recon_deliverable.md from structured data (${markdown.length} bytes)`);
|
||||
|
||||
return metrics;
|
||||
return runAgentActivity('recon', input, collector.tools, writeDeliverable);
|
||||
}
|
||||
|
||||
async function runVulnAgentWithCollector(
|
||||
@@ -318,28 +306,21 @@ async function runVulnAgentWithCollector(
|
||||
const { renderVulnDeliverable } = await import('../services/vuln-renderer.js');
|
||||
|
||||
const collector = createVulnCollector(vulnClass);
|
||||
const metrics = await runAgentActivity(agentName, input, collector.tools);
|
||||
|
||||
// On resume, the agent is skipped and the collector is never populated.
|
||||
// The cached deliverable from the prior run is the source of truth.
|
||||
if (metrics.skipped) {
|
||||
return metrics;
|
||||
}
|
||||
const writeDeliverable = async (deliverablesPath: string): Promise<void> => {
|
||||
const logger = createActivityLogger();
|
||||
// Skipped tools surface as renderer placeholders, not as activity failures.
|
||||
const callStatus = collector.getCallStatus();
|
||||
logger.info(`${vulnClass} vuln tool call status`, { callStatus });
|
||||
|
||||
const logger = createActivityLogger();
|
||||
const dir = deliverablesDir(input.repoPath, input.deliverablesSubdir);
|
||||
const collected = collector.getAll();
|
||||
const markdown = renderVulnDeliverable(vulnClass, collected);
|
||||
const mdPath = path.join(deliverablesPath, `${vulnClass}_analysis_deliverable.md`);
|
||||
await atomicWrite(mdPath, markdown);
|
||||
logger.info(`Wrote ${vulnClass}_analysis_deliverable.md from structured data (${markdown.length} bytes)`);
|
||||
};
|
||||
|
||||
// Skipped tools surface as renderer placeholders, not as activity failures.
|
||||
const callStatus = collector.getCallStatus();
|
||||
logger.info(`${vulnClass} vuln tool call status`, { callStatus });
|
||||
|
||||
const collected = collector.getAll();
|
||||
const markdown = renderVulnDeliverable(vulnClass, collected);
|
||||
const mdPath = path.join(dir, `${vulnClass}_analysis_deliverable.md`);
|
||||
await atomicWrite(mdPath, markdown);
|
||||
logger.info(`Wrote ${vulnClass}_analysis_deliverable.md from structured data (${markdown.length} bytes)`);
|
||||
|
||||
return metrics;
|
||||
return runAgentActivity(agentName, input, collector.tools, writeDeliverable);
|
||||
}
|
||||
|
||||
export async function runInjectionVulnAgent(input: ActivityInput): Promise<AgentMetrics> {
|
||||
@@ -399,34 +380,29 @@ async function runExploitAgentWithCollector(
|
||||
const { validIds, idToType } = await readExploitQueue(queuePath);
|
||||
|
||||
const collector = createExploitCollector({ vulnClass, validIds });
|
||||
const metrics = await runAgentActivity(agentName, input, collector.tools);
|
||||
|
||||
// On resume, the agent is skipped and the collector is never populated.
|
||||
// The cached deliverable from the prior run is the source of truth.
|
||||
if (metrics.skipped) {
|
||||
return metrics;
|
||||
}
|
||||
const writeDeliverable = async (deliverablesPath: string): Promise<void> => {
|
||||
const logger = createActivityLogger();
|
||||
const collected = collector.getAll();
|
||||
const emittedIds = new Set(collected.map((e) => e.vulnerability_id));
|
||||
const missingIds = [...validIds].filter((id) => !emittedIds.has(id));
|
||||
const exploitedCount = collected.filter((e) => e.status === 'exploited').length;
|
||||
const blockedCount = collected.filter((e) => e.status === 'blocked').length;
|
||||
|
||||
const logger = createActivityLogger();
|
||||
const collected = collector.getAll();
|
||||
const emittedIds = new Set(collected.map((e) => e.vulnerability_id));
|
||||
const missingIds = [...validIds].filter((id) => !emittedIds.has(id));
|
||||
const exploitedCount = collected.filter((e) => e.status === 'exploited').length;
|
||||
const blockedCount = collected.filter((e) => e.status === 'blocked').length;
|
||||
logger.info(`${vulnClass} exploit tool call metrics`, {
|
||||
queueSize: validIds.size,
|
||||
exploited: exploitedCount,
|
||||
blocked: blockedCount,
|
||||
missing: missingIds.length,
|
||||
});
|
||||
|
||||
logger.info(`${vulnClass} exploit tool call metrics`, {
|
||||
queueSize: validIds.size,
|
||||
exploited: exploitedCount,
|
||||
blocked: blockedCount,
|
||||
missing: missingIds.length,
|
||||
});
|
||||
const markdown = renderExploitDeliverable(vulnClass, collected, idToType);
|
||||
const mdPath = path.join(deliverablesPath, `${vulnClass}_exploitation_evidence.md`);
|
||||
await atomicWrite(mdPath, markdown);
|
||||
logger.info(`Wrote ${vulnClass}_exploitation_evidence.md from structured data (${markdown.length} bytes)`);
|
||||
};
|
||||
|
||||
const markdown = renderExploitDeliverable(vulnClass, collected, idToType);
|
||||
const mdPath = path.join(dir, `${vulnClass}_exploitation_evidence.md`);
|
||||
await atomicWrite(mdPath, markdown);
|
||||
logger.info(`Wrote ${vulnClass}_exploitation_evidence.md from structured data (${markdown.length} bytes)`);
|
||||
|
||||
return metrics;
|
||||
return runAgentActivity(agentName, input, collector.tools, writeDeliverable);
|
||||
}
|
||||
|
||||
export async function runInjectionExploitAgent(input: ActivityInput): Promise<AgentMetrics> {
|
||||
|
||||
Reference in New Issue
Block a user