mirror of
https://github.com/KeygraphHQ/shannon.git
synced 2026-04-01 02:10:55 +02:00
refactor: add numbered step comments to 20 complex sequential functions
- Add // N. Description steps to temporal layer (client, activities, workflows) - Add steps to AI layer (claude-executor: runClaudePrompt, buildMcpServers) - Add steps to services layer (prompt-manager, config-parser, git-manager) - Add steps to audit layer (metrics-tracker, audit-session) - Update CLAUDE.md comment guidelines with clearer numbered-step vs section-divider guidance
This commit is contained in:
@@ -129,8 +129,13 @@ Comments must be **timeless** — no references to this conversation, refactorin
|
||||
|
||||
**Patterns used in this codebase:**
|
||||
- `/** JSDoc */` — file headers (after license) and exported functions/interfaces
|
||||
- `// 1. Step` — numbered steps for sequential operations
|
||||
- `// === Section ===` — dividers in long files/functions
|
||||
- `// N. Description` — numbered sequential steps inside function bodies. Use when a
|
||||
function has 3+ distinct phases where at least one isn't immediately obvious from the
|
||||
code. Each step marks the start of a logical phase. Reference: `AgentExecutionService.execute`
|
||||
(steps 1-9) and `injectModelIntoReport` (steps 1-5)
|
||||
- `// === Section ===` — high-level dividers between groups of functions in long files,
|
||||
or to label major branching/classification blocks (e.g., `// === SPENDING CAP SAFEGUARD ===`).
|
||||
Not for sequential steps inside function bodies — use numbered steps for that
|
||||
- `// NOTE:` / `// WARNING:` / `// IMPORTANT:` — gotchas and constraints
|
||||
|
||||
**Never:** obvious comments, conversation references ("as discussed"), history ("moved from X")
|
||||
|
||||
@@ -60,12 +60,14 @@ function buildMcpServers(
|
||||
agentName: string | null,
|
||||
logger: ActivityLogger
|
||||
): Record<string, McpServer> {
|
||||
// 1. Create the shannon-helper server (always present)
|
||||
const shannonHelperServer = createShannonHelperServer(sourceDir);
|
||||
|
||||
const mcpServers: Record<string, McpServer> = {
|
||||
'shannon-helper': shannonHelperServer,
|
||||
};
|
||||
|
||||
// 2. Look up the agent's Playwright MCP mapping
|
||||
if (agentName) {
|
||||
const promptTemplate = AGENTS[agentName as AgentName].promptTemplate;
|
||||
const playwrightMcpName = MCP_AGENT_MAPPING[promptTemplate as keyof typeof MCP_AGENT_MAPPING] || null;
|
||||
@@ -75,7 +77,7 @@ function buildMcpServers(
|
||||
|
||||
const userDataDir = `/tmp/${playwrightMcpName}`;
|
||||
|
||||
// Docker uses system Chromium; local dev uses Playwright's bundled browsers
|
||||
// 3. Configure Playwright MCP args with Docker/local browser handling
|
||||
const isDocker = process.env.SHANNON_DOCKER === 'true';
|
||||
|
||||
const mcpArgs: string[] = [
|
||||
@@ -84,7 +86,6 @@ function buildMcpServers(
|
||||
'--user-data-dir', userDataDir,
|
||||
];
|
||||
|
||||
// Docker: Use system Chromium; Local: Use Playwright's bundled browsers
|
||||
if (isDocker) {
|
||||
mcpArgs.push('--executable-path', '/usr/bin/chromium-browser');
|
||||
mcpArgs.push('--browser', 'chromium');
|
||||
@@ -107,6 +108,7 @@ function buildMcpServers(
|
||||
}
|
||||
}
|
||||
|
||||
// 4. Return configured servers
|
||||
return mcpServers;
|
||||
}
|
||||
|
||||
@@ -202,9 +204,11 @@ export async function runClaudePrompt(
|
||||
auditSession: AuditSession | null = null,
|
||||
logger: ActivityLogger
|
||||
): Promise<ClaudePromptResult> {
|
||||
// 1. Initialize timing and prompt
|
||||
const timer = new Timer(`agent-${description.toLowerCase().replace(/\s+/g, '-')}`);
|
||||
const fullPrompt = context ? `${context}\n\n${prompt}` : prompt;
|
||||
|
||||
// 2. Set up progress and audit infrastructure
|
||||
const execContext = detectExecutionContext(description);
|
||||
const progress = createProgressManager(
|
||||
{ description, useCleanOutput: execContext.useCleanOutput },
|
||||
@@ -214,9 +218,10 @@ export async function runClaudePrompt(
|
||||
|
||||
logger.info(`Running Claude Code: ${description}...`);
|
||||
|
||||
// 3. Configure MCP servers
|
||||
const mcpServers = buildMcpServers(sourceDir, agentName, logger);
|
||||
|
||||
// Build env vars to pass to SDK subprocesses
|
||||
// 4. Build env vars to pass to SDK subprocesses
|
||||
const sdkEnv: Record<string, string> = {
|
||||
CLAUDE_CODE_MAX_OUTPUT_TOKENS: process.env.CLAUDE_CODE_MAX_OUTPUT_TOKENS || '64000',
|
||||
};
|
||||
@@ -227,6 +232,7 @@ export async function runClaudePrompt(
|
||||
sdkEnv.CLAUDE_CODE_OAUTH_TOKEN = process.env.CLAUDE_CODE_OAUTH_TOKEN;
|
||||
}
|
||||
|
||||
// 5. Configure SDK options
|
||||
const options = {
|
||||
model: 'claude-sonnet-4-5-20250929',
|
||||
maxTurns: 10_000,
|
||||
@@ -249,6 +255,7 @@ export async function runClaudePrompt(
|
||||
progress.start();
|
||||
|
||||
try {
|
||||
// 6. Process the message stream
|
||||
const messageLoopResult = await processMessageStream(
|
||||
fullPrompt,
|
||||
options,
|
||||
@@ -263,7 +270,7 @@ export async function runClaudePrompt(
|
||||
const model = messageLoopResult.model;
|
||||
|
||||
// === SPENDING CAP SAFEGUARD ===
|
||||
// Defense-in-depth: Detect spending cap that slipped through detectApiError().
|
||||
// 7. Defense-in-depth: Detect spending cap that slipped through detectApiError().
|
||||
// Uses consolidated billing detection from utils/billing-detection.ts
|
||||
if (isSpendingCapBehavior(turnCount, totalCost, result || '')) {
|
||||
throw new PentestError(
|
||||
@@ -273,6 +280,7 @@ export async function runClaudePrompt(
|
||||
);
|
||||
}
|
||||
|
||||
// 8. Finalize successful result
|
||||
const duration = timer.stop();
|
||||
|
||||
if (apiErrorDetected) {
|
||||
@@ -293,6 +301,7 @@ export async function runClaudePrompt(
|
||||
};
|
||||
|
||||
} catch (error) {
|
||||
// 9. Handle errors — log, write error file, return failure
|
||||
const duration = timer.stop();
|
||||
|
||||
const err = error as Error & { code?: string; status?: number };
|
||||
|
||||
@@ -107,18 +107,20 @@ export class AuditSession {
|
||||
): Promise<void> {
|
||||
await this.ensureInitialized();
|
||||
|
||||
// Save prompt snapshot (only on first attempt)
|
||||
// 1. Save prompt snapshot (only on first attempt)
|
||||
if (attemptNumber === 1) {
|
||||
await AgentLogger.savePrompt(this.sessionMetadata, agentName, promptContent);
|
||||
}
|
||||
|
||||
// 2. Create and initialize the per-agent logger
|
||||
this.currentAgentName = agentName;
|
||||
|
||||
this.currentLogger = new AgentLogger(this.sessionMetadata, agentName, attemptNumber);
|
||||
await this.currentLogger.initialize();
|
||||
|
||||
// 3. Start metrics timer
|
||||
this.metricsTracker.startAgent(agentName, attemptNumber);
|
||||
|
||||
// 4. Log start event to both agent log and workflow log
|
||||
await this.currentLogger.logEvent('agent_start', {
|
||||
agentName,
|
||||
attemptNumber,
|
||||
@@ -172,6 +174,7 @@ export class AuditSession {
|
||||
* End agent execution (mutex-protected)
|
||||
*/
|
||||
async endAgent(agentName: string, result: AgentEndResult): Promise<void> {
|
||||
// 1. Finalize agent log and close the stream
|
||||
if (this.currentLogger) {
|
||||
await this.currentLogger.logEvent('agent_end', {
|
||||
agentName,
|
||||
@@ -185,6 +188,7 @@ export class AuditSession {
|
||||
this.currentLogger = null;
|
||||
}
|
||||
|
||||
// 2. Log completion to the unified workflow log
|
||||
this.currentAgentName = null;
|
||||
|
||||
const agentLogDetails: AgentLogDetails = {
|
||||
@@ -196,12 +200,11 @@ export class AuditSession {
|
||||
};
|
||||
await this.workflowLogger.logAgent(agentName, 'end', agentLogDetails);
|
||||
|
||||
// Mutex-protected update to session.json
|
||||
// 3. Acquire mutex before touching session.json
|
||||
const unlock = await sessionMutex.lock(this.sessionId);
|
||||
try {
|
||||
// Reload inside mutex to prevent lost updates during parallel exploitation phase
|
||||
// 4. Reload-then-write inside mutex to prevent lost updates during parallel phases
|
||||
await this.metricsTracker.reload();
|
||||
|
||||
await this.metricsTracker.endAgent(agentName, result);
|
||||
} finally {
|
||||
unlock();
|
||||
|
||||
@@ -170,7 +170,7 @@ export class MetricsTracker {
|
||||
);
|
||||
}
|
||||
|
||||
// Initialize agent metrics if not exists
|
||||
// 1. Initialize agent metrics if first time seeing this agent
|
||||
const existingAgent = this.data.metrics.agents[agentName];
|
||||
const agent = existingAgent ?? {
|
||||
status: 'in-progress' as const,
|
||||
@@ -180,7 +180,7 @@ export class MetricsTracker {
|
||||
};
|
||||
this.data.metrics.agents[agentName] = agent;
|
||||
|
||||
// Add attempt to array
|
||||
// 2. Build attempt record with optional model/error fields
|
||||
const attempt: AttemptData = {
|
||||
attempt_number: result.attemptNumber,
|
||||
duration_ms: result.duration_ms,
|
||||
@@ -197,16 +197,18 @@ export class MetricsTracker {
|
||||
attempt.error = result.error;
|
||||
}
|
||||
|
||||
// 3. Append attempt to history
|
||||
agent.attempts.push(attempt);
|
||||
|
||||
// Update total cost (includes failed attempts)
|
||||
// 4. Recalculate total cost across all attempts (includes failures)
|
||||
agent.total_cost_usd = agent.attempts.reduce((sum, a) => sum + a.cost_usd, 0);
|
||||
|
||||
// If successful, update final metrics and status
|
||||
// 5. Update agent status based on outcome
|
||||
if (result.success) {
|
||||
agent.status = 'success';
|
||||
agent.final_duration_ms = result.duration_ms;
|
||||
|
||||
// 6. Attach model and checkpoint metadata on success
|
||||
if (result.model) {
|
||||
agent.model = result.model;
|
||||
}
|
||||
@@ -215,18 +217,18 @@ export class MetricsTracker {
|
||||
agent.checkpoint = result.checkpoint;
|
||||
}
|
||||
} else {
|
||||
// If this was the last attempt, mark as failed
|
||||
if (result.isFinalAttempt) {
|
||||
agent.status = 'failed';
|
||||
}
|
||||
}
|
||||
|
||||
// Clear active timer
|
||||
// 7. Clear active timer
|
||||
this.activeTimers.delete(agentName);
|
||||
|
||||
// Recalculate aggregations
|
||||
// 8. Recalculate phase and session-level aggregations
|
||||
this.recalculateAggregations();
|
||||
|
||||
// 9. Persist to session.json
|
||||
await this.save();
|
||||
}
|
||||
|
||||
|
||||
@@ -178,6 +178,7 @@ function formatAjvErrors(errors: ErrorObject[]): string[] {
|
||||
|
||||
export const parseConfig = async (configPath: string): Promise<Config> => {
|
||||
try {
|
||||
// 1. Verify file exists
|
||||
if (!(await fs.pathExists(configPath))) {
|
||||
throw new PentestError(
|
||||
`Configuration file not found: ${configPath}`,
|
||||
@@ -188,6 +189,7 @@ export const parseConfig = async (configPath: string): Promise<Config> => {
|
||||
);
|
||||
}
|
||||
|
||||
// 2. Check file size
|
||||
const stats = await fs.stat(configPath);
|
||||
const maxFileSize = 1024 * 1024; // 1MB
|
||||
if (stats.size > maxFileSize) {
|
||||
@@ -200,6 +202,7 @@ export const parseConfig = async (configPath: string): Promise<Config> => {
|
||||
);
|
||||
}
|
||||
|
||||
// 3. Read and check for empty content
|
||||
const configContent = await fs.readFile(configPath, 'utf8');
|
||||
|
||||
if (!configContent.trim()) {
|
||||
@@ -212,6 +215,7 @@ export const parseConfig = async (configPath: string): Promise<Config> => {
|
||||
);
|
||||
}
|
||||
|
||||
// 4. Parse YAML with safe schema
|
||||
let config: unknown;
|
||||
try {
|
||||
config = yaml.load(configContent, {
|
||||
@@ -230,6 +234,7 @@ export const parseConfig = async (configPath: string): Promise<Config> => {
|
||||
);
|
||||
}
|
||||
|
||||
// 5. Guard against null/undefined parse result
|
||||
if (config === null || config === undefined) {
|
||||
throw new PentestError(
|
||||
'Configuration file resulted in null/undefined after parsing',
|
||||
@@ -240,6 +245,7 @@ export const parseConfig = async (configPath: string): Promise<Config> => {
|
||||
);
|
||||
}
|
||||
|
||||
// 6. Validate schema, security rules, and return
|
||||
validateConfig(config as Config);
|
||||
|
||||
return config as Config;
|
||||
|
||||
@@ -231,7 +231,7 @@ export async function createGitCheckpoint(
|
||||
|
||||
logger.info(`Creating checkpoint for ${description} (attempt ${attempt})`);
|
||||
try {
|
||||
// First attempt: preserve existing deliverables. Retries: clean workspace to prevent pollution
|
||||
// 1. On retries, clean workspace to prevent pollution from previous attempt
|
||||
if (attempt > 1) {
|
||||
const cleanResult = await rollbackGitWorkspace(sourceDir, `${description} (retry cleanup)`, logger);
|
||||
if (!cleanResult.success) {
|
||||
@@ -239,9 +239,11 @@ export async function createGitCheckpoint(
|
||||
}
|
||||
}
|
||||
|
||||
// 2. Detect existing changes
|
||||
const changes = await getChangedFiles(sourceDir, 'status check');
|
||||
const hasChanges = changes.length > 0;
|
||||
|
||||
// 3. Stage and commit checkpoint
|
||||
await executeGitCommandWithRetry(['git', 'add', '-A'], sourceDir, 'staging changes');
|
||||
await executeGitCommandWithRetry(
|
||||
['git', 'commit', '-m', `📍 Checkpoint: ${description} (attempt ${attempt})`, '--allow-empty'],
|
||||
@@ -249,6 +251,7 @@ export async function createGitCheckpoint(
|
||||
'creating commit'
|
||||
);
|
||||
|
||||
// 4. Log result
|
||||
if (hasChanges) {
|
||||
logger.info('Checkpoint created with uncommitted changes staged');
|
||||
} else {
|
||||
|
||||
@@ -24,7 +24,7 @@ interface IncludeReplacement {
|
||||
// Pure function: Build complete login instructions from config
|
||||
async function buildLoginInstructions(authentication: Authentication, logger: ActivityLogger): Promise<string> {
|
||||
try {
|
||||
// Load the login instructions template
|
||||
// 1. Load the login instructions template
|
||||
const loginInstructionsPath = path.join(import.meta.dirname, '..', '..', 'prompts', 'shared', 'login-instructions.txt');
|
||||
|
||||
if (!await fs.pathExists(loginInstructionsPath)) {
|
||||
@@ -38,37 +38,33 @@ async function buildLoginInstructions(authentication: Authentication, logger: Ac
|
||||
|
||||
const fullTemplate = await fs.readFile(loginInstructionsPath, 'utf8');
|
||||
|
||||
// Helper function to extract sections based on markers
|
||||
const getSection = (content: string, sectionName: string): string => {
|
||||
const regex = new RegExp(`<!-- BEGIN:${sectionName} -->([\\s\\S]*?)<!-- END:${sectionName} -->`, 'g');
|
||||
const match = regex.exec(content);
|
||||
return match ? match[1]!.trim() : '';
|
||||
};
|
||||
|
||||
// Extract sections based on login type
|
||||
// 2. Extract sections based on login type
|
||||
const loginType = authentication.login_type?.toUpperCase();
|
||||
let loginInstructions = '';
|
||||
|
||||
// Build instructions with only relevant sections
|
||||
const commonSection = getSection(fullTemplate, 'COMMON');
|
||||
const authSection = loginType ? getSection(fullTemplate, loginType) : ''; // FORM or SSO
|
||||
const verificationSection = getSection(fullTemplate, 'VERIFICATION');
|
||||
|
||||
// Fallback to full template if markers are missing (backward compatibility)
|
||||
// 3. Assemble instructions from sections (fallback to full template if markers missing)
|
||||
if (!commonSection && !authSection && !verificationSection) {
|
||||
logger.warn('Section markers not found, using full login instructions template');
|
||||
loginInstructions = fullTemplate;
|
||||
} else {
|
||||
// Combine relevant sections
|
||||
loginInstructions = [commonSection, authSection, verificationSection]
|
||||
.filter(section => section) // Remove empty sections
|
||||
.filter(section => section)
|
||||
.join('\n\n');
|
||||
}
|
||||
|
||||
// Replace the user instructions placeholder with the login flow from config
|
||||
// 4. Interpolate login flow and credential placeholders
|
||||
let userInstructions = (authentication.login_flow ?? []).join('\n');
|
||||
|
||||
// Replace credential placeholders within the user instructions
|
||||
if (authentication.credentials) {
|
||||
if (authentication.credentials.username) {
|
||||
userInstructions = userInstructions.replace(/\$username/g, authentication.credentials.username);
|
||||
@@ -83,7 +79,7 @@ async function buildLoginInstructions(authentication: Authentication, logger: Ac
|
||||
|
||||
loginInstructions = loginInstructions.replace(/{{user_instructions}}/g, userInstructions);
|
||||
|
||||
// Replace TOTP secret placeholder if present in template
|
||||
// 5. Replace TOTP secret placeholder if present in template
|
||||
if (authentication.credentials?.totp_secret) {
|
||||
loginInstructions = loginInstructions.replace(/{{totp_secret}}/g, authentication.credentials.totp_secret);
|
||||
}
|
||||
@@ -217,17 +213,15 @@ export async function loadPrompt(
|
||||
logger: ActivityLogger
|
||||
): Promise<string> {
|
||||
try {
|
||||
// Use pipeline testing prompts if pipeline testing mode is enabled
|
||||
// 1. Resolve prompt file path
|
||||
const baseDir = pipelineTestingMode ? 'prompts/pipeline-testing' : 'prompts';
|
||||
const promptsDir = path.join(import.meta.dirname, '..', '..', baseDir);
|
||||
const promptPath = path.join(promptsDir, `${promptName}.txt`);
|
||||
|
||||
// Debug message for pipeline testing mode
|
||||
if (pipelineTestingMode) {
|
||||
logger.info(`Using pipeline testing prompt: ${promptPath}`);
|
||||
}
|
||||
|
||||
// Check if file exists first
|
||||
if (!await fs.pathExists(promptPath)) {
|
||||
throw new PentestError(
|
||||
`Prompt file not found: ${promptPath}`,
|
||||
@@ -237,25 +231,25 @@ export async function loadPrompt(
|
||||
);
|
||||
}
|
||||
|
||||
// Add MCP server assignment to variables
|
||||
// 2. Assign MCP server based on agent name
|
||||
const enhancedVariables: PromptVariables = { ...variables };
|
||||
|
||||
// Assign MCP server based on prompt name (agent name)
|
||||
const mcpServer = MCP_AGENT_MAPPING[promptName as keyof typeof MCP_AGENT_MAPPING];
|
||||
if (mcpServer) {
|
||||
enhancedVariables.MCP_SERVER = mcpServer;
|
||||
logger.info(`Assigned ${promptName} -> ${enhancedVariables.MCP_SERVER}`);
|
||||
} else {
|
||||
// Fallback for unknown agents
|
||||
enhancedVariables.MCP_SERVER = 'playwright-agent1';
|
||||
logger.warn(`Unknown agent ${promptName}, using fallback -> ${enhancedVariables.MCP_SERVER}`);
|
||||
}
|
||||
|
||||
// 3. Read template file
|
||||
let template = await fs.readFile(promptPath, 'utf8');
|
||||
|
||||
// Pre-process the template to handle @include directives
|
||||
// 4. Process @include directives
|
||||
template = await processIncludes(template, promptsDir);
|
||||
|
||||
// 5. Interpolate variables and return final prompt
|
||||
return await interpolateVariables(template, enhancedVariables, config, logger);
|
||||
} catch (error) {
|
||||
if (error instanceof PentestError) {
|
||||
|
||||
@@ -117,17 +117,17 @@ async function runAgentActivity(
|
||||
try {
|
||||
const logger = createActivityLogger();
|
||||
|
||||
// Build session metadata and get/create container
|
||||
// 1. Build session metadata and get/create container
|
||||
const sessionMetadata = buildSessionMetadata(input);
|
||||
const container = getOrCreateContainer(workflowId, sessionMetadata);
|
||||
|
||||
// Create audit session for THIS agent execution
|
||||
// 2. Create audit session for THIS agent execution
|
||||
// NOTE: Each agent needs its own AuditSession because AuditSession uses
|
||||
// instance state (currentAgentName) that cannot be shared across parallel agents
|
||||
const auditSession = new AuditSession(sessionMetadata);
|
||||
await auditSession.initialize(workflowId);
|
||||
|
||||
// Execute agent via service (throws PentestError on failure)
|
||||
// 3. Execute agent via service (throws PentestError on failure)
|
||||
const endResult = await container.agentExecution.executeOrThrow(
|
||||
agentName,
|
||||
{
|
||||
@@ -141,7 +141,7 @@ async function runAgentActivity(
|
||||
logger
|
||||
);
|
||||
|
||||
// Success - return metrics
|
||||
// 4. Return metrics
|
||||
return {
|
||||
durationMs: Date.now() - startTime,
|
||||
inputTokens: null,
|
||||
@@ -325,6 +325,7 @@ export async function loadResumeState(
|
||||
expectedUrl: string,
|
||||
expectedRepoPath: string
|
||||
): Promise<ResumeState> {
|
||||
// 1. Validate workspace exists
|
||||
const sessionPath = path.join('./audit-logs', workspaceName, 'session.json');
|
||||
|
||||
const exists = await fileExists(sessionPath);
|
||||
@@ -335,6 +336,7 @@ export async function loadResumeState(
|
||||
);
|
||||
}
|
||||
|
||||
// 2. Parse session.json and validate URL match
|
||||
let session: SessionJson;
|
||||
try {
|
||||
session = await readJson<SessionJson>(sessionPath);
|
||||
@@ -353,6 +355,7 @@ export async function loadResumeState(
|
||||
);
|
||||
}
|
||||
|
||||
// 3. Cross-check agent status with deliverables on disk
|
||||
const completedAgents: string[] = [];
|
||||
const agents = session.metrics.agents;
|
||||
|
||||
@@ -375,6 +378,7 @@ export async function loadResumeState(
|
||||
completedAgents.push(agentName);
|
||||
}
|
||||
|
||||
// 4. Collect git checkpoints and validate at least one exists
|
||||
const checkpoints = completedAgents
|
||||
.map((name) => agents[name]?.checkpoint)
|
||||
.filter((hash): hash is string => hash != null);
|
||||
@@ -395,9 +399,11 @@ export async function loadResumeState(
|
||||
);
|
||||
}
|
||||
|
||||
// 5. Find the most recent checkpoint commit
|
||||
const checkpointHash = await findLatestCommit(expectedRepoPath, checkpoints);
|
||||
const originalWorkflowId = session.session.originalWorkflowId || session.session.id;
|
||||
|
||||
// 6. Log summary and return resume state
|
||||
const logger = createActivityLogger();
|
||||
logger.info('Resume state loaded', {
|
||||
workspace: workspaceName,
|
||||
@@ -533,11 +539,12 @@ export async function logWorkflowComplete(
|
||||
const { repoPath, workflowId } = input;
|
||||
const sessionMetadata = buildSessionMetadata(input);
|
||||
|
||||
// 1. Initialize audit session and mark final status
|
||||
const auditSession = new AuditSession(sessionMetadata);
|
||||
await auditSession.initialize(workflowId);
|
||||
await auditSession.updateSessionStatus(summary.status);
|
||||
|
||||
// Use cumulative metrics from session.json
|
||||
// 2. Load cumulative metrics from session.json
|
||||
const sessionData = (await auditSession.getMetrics()) as {
|
||||
metrics: {
|
||||
total_duration_ms: number;
|
||||
@@ -546,7 +553,7 @@ export async function logWorkflowComplete(
|
||||
};
|
||||
};
|
||||
|
||||
// Fill in metrics for skipped agents
|
||||
// 3. Fill in metrics for skipped agents (resumed from previous run)
|
||||
const agentMetrics = { ...summary.agentMetrics };
|
||||
for (const agentName of summary.completedAgents) {
|
||||
if (!agentMetrics[agentName]) {
|
||||
@@ -560,15 +567,18 @@ export async function logWorkflowComplete(
|
||||
}
|
||||
}
|
||||
|
||||
// 4. Build cumulative summary with cross-run totals
|
||||
const cumulativeSummary: WorkflowSummary = {
|
||||
...summary,
|
||||
totalDurationMs: sessionData.metrics.total_duration_ms,
|
||||
totalCostUsd: sessionData.metrics.total_cost_usd,
|
||||
agentMetrics,
|
||||
};
|
||||
|
||||
// 5. Write completion entry to workflow.log
|
||||
await auditSession.logWorkflowComplete(cumulativeSummary);
|
||||
|
||||
// Copy deliverables to audit-logs
|
||||
// 6. Copy deliverables to audit-logs
|
||||
try {
|
||||
await copyDeliverablesToAudit(sessionMetadata, repoPath);
|
||||
} catch (copyErr) {
|
||||
@@ -578,6 +588,6 @@ export async function logWorkflowComplete(
|
||||
});
|
||||
}
|
||||
|
||||
// Clean up container
|
||||
// 7. Clean up container
|
||||
removeContainer(workflowId);
|
||||
}
|
||||
|
||||
@@ -262,11 +262,13 @@ async function resolveWorkspace(
|
||||
console.log('=== RESUME MODE ===');
|
||||
console.log(`Workspace: ${workspace}\n`);
|
||||
|
||||
// 1. Terminate any running workflows from previous attempts
|
||||
const terminatedWorkflows = await terminateExistingWorkflows(client, workspace);
|
||||
if (terminatedWorkflows.length > 0) {
|
||||
console.log(`Terminated ${terminatedWorkflows.length} previous workflow(s)\n`);
|
||||
}
|
||||
|
||||
// 2. Validate URL matches the workspace
|
||||
const session = await readJson<SessionJson>(sessionPath);
|
||||
if (session.session.webUrl !== args.webUrl) {
|
||||
console.error('ERROR: URL mismatch with workspace');
|
||||
@@ -275,6 +277,8 @@ async function resolveWorkspace(
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
// 3. Generate a new workflow ID scoped to this resume attempt
|
||||
// 4. Return resolution with isResume=true so downstream uses resume logic
|
||||
return {
|
||||
workflowId: `${workspace}_resume_${Date.now()}`,
|
||||
sessionId: workspace,
|
||||
@@ -371,9 +375,11 @@ async function waitForWorkflowResult(
|
||||
}, 30000);
|
||||
|
||||
try {
|
||||
// 1. Block until workflow completes
|
||||
const result = await handle.result();
|
||||
clearInterval(progressInterval);
|
||||
|
||||
// 2. Display run metrics
|
||||
console.log('\nPipeline completed successfully!');
|
||||
if (result.summary) {
|
||||
console.log(`Duration: ${Math.floor(result.summary.totalDurationMs / 1000)}s`);
|
||||
@@ -381,6 +387,7 @@ async function waitForWorkflowResult(
|
||||
console.log(`Total turns: ${result.summary.totalTurns}`);
|
||||
console.log(`Run cost: $${result.summary.totalCostUsd.toFixed(4)}`);
|
||||
|
||||
// 3. Show cumulative cost across all resume attempts
|
||||
if (workspace.isResume) {
|
||||
try {
|
||||
const session = await readJson<SessionJson>(
|
||||
@@ -402,9 +409,11 @@ async function waitForWorkflowResult(
|
||||
// === Main Entry Point ===
|
||||
|
||||
async function startPipeline(): Promise<void> {
|
||||
// 1. Parse CLI args and display splash
|
||||
const args = parseCliArgs(process.argv.slice(2));
|
||||
await displaySplashScreen();
|
||||
|
||||
// 2. Connect to Temporal server
|
||||
const address = process.env.TEMPORAL_ADDRESS || 'localhost:7233';
|
||||
console.log(`Connecting to Temporal at ${address}...`);
|
||||
|
||||
@@ -412,9 +421,11 @@ async function startPipeline(): Promise<void> {
|
||||
const client = new Client({ connection });
|
||||
|
||||
try {
|
||||
// 3. Resolve workspace (new or resume) and build pipeline input
|
||||
const workspace = await resolveWorkspace(client, args);
|
||||
const input = buildPipelineInput(args, workspace);
|
||||
|
||||
// 4. Start the Temporal workflow
|
||||
const handle = await client.workflow.start<(input: PipelineInput) => Promise<PipelineState>>(
|
||||
'pentestPipelineWorkflow',
|
||||
{
|
||||
@@ -424,6 +435,7 @@ async function startPipeline(): Promise<void> {
|
||||
}
|
||||
);
|
||||
|
||||
// 5. Display info and optionally wait for completion
|
||||
displayWorkflowInfo(args, workspace);
|
||||
|
||||
if (args.waitForCompletion) {
|
||||
|
||||
@@ -147,12 +147,14 @@ export async function pentestPipelineWorkflow(
|
||||
let resumeState: ResumeState | null = null;
|
||||
|
||||
if (input.resumeFromWorkspace) {
|
||||
// 1. Load resume state (validates workspace, cross-checks deliverables)
|
||||
resumeState = await a.loadResumeState(
|
||||
input.resumeFromWorkspace,
|
||||
input.webUrl,
|
||||
input.repoPath
|
||||
);
|
||||
|
||||
// 2. Restore git workspace and clean up incomplete deliverables
|
||||
const incompleteAgents = ALL_AGENTS.filter(
|
||||
(agentName) => !resumeState!.completedAgents.includes(agentName)
|
||||
) as AgentName[];
|
||||
@@ -163,6 +165,7 @@ export async function pentestPipelineWorkflow(
|
||||
incompleteAgents
|
||||
);
|
||||
|
||||
// 3. Short-circuit if all agents already completed
|
||||
if (resumeState.completedAgents.length === ALL_AGENTS.length) {
|
||||
log.info(`All ${ALL_AGENTS.length} agents already completed. Nothing to resume.`);
|
||||
state.status = 'completed';
|
||||
@@ -171,6 +174,7 @@ export async function pentestPipelineWorkflow(
|
||||
return state;
|
||||
}
|
||||
|
||||
// 4. Record this resume attempt in session.json and workflow.log
|
||||
await a.recordResumeAttempt(
|
||||
activityInput,
|
||||
input.terminatedWorkflows || [],
|
||||
@@ -317,6 +321,7 @@ export async function pentestPipelineWorkflow(
|
||||
const vulnAgentName = `${vulnType}-vuln`;
|
||||
const exploitAgentName = `${vulnType}-exploit`;
|
||||
|
||||
// 1. Run vulnerability analysis (or skip if resumed)
|
||||
let vulnMetrics: AgentMetrics | null = null;
|
||||
if (!shouldSkip(vulnAgentName)) {
|
||||
vulnMetrics = await runVulnAgent();
|
||||
@@ -324,8 +329,10 @@ export async function pentestPipelineWorkflow(
|
||||
log.info(`Skipping ${vulnAgentName} (already complete)`);
|
||||
}
|
||||
|
||||
// 2. Check exploitation queue for actionable findings
|
||||
const decision = await a.checkExploitationQueue(activityInput, vulnType);
|
||||
|
||||
// 3. Conditionally run exploitation agent
|
||||
let exploitMetrics: AgentMetrics | null = null;
|
||||
if (decision.shouldExploit) {
|
||||
if (!shouldSkip(exploitAgentName)) {
|
||||
|
||||
Reference in New Issue
Block a user