refactor: add numbered step comments to 20 complex sequential functions

- Add // N. Description steps to temporal layer (client, activities, workflows)
- Add steps to AI layer (claude-executor: runClaudePrompt, buildMcpServers)
- Add steps to services layer (prompt-manager, config-parser, git-manager)
- Add steps to audit layer (metrics-tracker, audit-session)
- Update CLAUDE.md comment guidelines with clearer numbered-step vs section-divider guidance
This commit is contained in:
ajmallesh
2026-02-16 20:45:58 -08:00
parent d696a7584b
commit a960ad1182
10 changed files with 95 additions and 44 deletions

View File

@@ -129,8 +129,13 @@ Comments must be **timeless** — no references to this conversation, refactorin
**Patterns used in this codebase:**
- `/** JSDoc */` — file headers (after license) and exported functions/interfaces
- `// 1. Step` — numbered steps for sequential operations
- `// === Section ===` — dividers in long files/functions
- `// N. Description` — numbered sequential steps inside function bodies. Use when a
function has 3+ distinct phases where at least one isn't immediately obvious from the
code. Each step marks the start of a logical phase. Reference: `AgentExecutionService.execute`
(steps 1-9) and `injectModelIntoReport` (steps 1-5)
- `// === Section ===` — high-level dividers between groups of functions in long files,
or to label major branching/classification blocks (e.g., `// === SPENDING CAP SAFEGUARD ===`).
Not for sequential steps inside function bodies — use numbered steps for that
- `// NOTE:` / `// WARNING:` / `// IMPORTANT:` — gotchas and constraints
**Never:** obvious comments, conversation references ("as discussed"), history ("moved from X")

View File

@@ -60,12 +60,14 @@ function buildMcpServers(
agentName: string | null,
logger: ActivityLogger
): Record<string, McpServer> {
// 1. Create the shannon-helper server (always present)
const shannonHelperServer = createShannonHelperServer(sourceDir);
const mcpServers: Record<string, McpServer> = {
'shannon-helper': shannonHelperServer,
};
// 2. Look up the agent's Playwright MCP mapping
if (agentName) {
const promptTemplate = AGENTS[agentName as AgentName].promptTemplate;
const playwrightMcpName = MCP_AGENT_MAPPING[promptTemplate as keyof typeof MCP_AGENT_MAPPING] || null;
@@ -75,7 +77,7 @@ function buildMcpServers(
const userDataDir = `/tmp/${playwrightMcpName}`;
// Docker uses system Chromium; local dev uses Playwright's bundled browsers
// 3. Configure Playwright MCP args with Docker/local browser handling
const isDocker = process.env.SHANNON_DOCKER === 'true';
const mcpArgs: string[] = [
@@ -84,7 +86,6 @@ function buildMcpServers(
'--user-data-dir', userDataDir,
];
// Docker: Use system Chromium; Local: Use Playwright's bundled browsers
if (isDocker) {
mcpArgs.push('--executable-path', '/usr/bin/chromium-browser');
mcpArgs.push('--browser', 'chromium');
@@ -107,6 +108,7 @@ function buildMcpServers(
}
}
// 4. Return configured servers
return mcpServers;
}
@@ -202,9 +204,11 @@ export async function runClaudePrompt(
auditSession: AuditSession | null = null,
logger: ActivityLogger
): Promise<ClaudePromptResult> {
// 1. Initialize timing and prompt
const timer = new Timer(`agent-${description.toLowerCase().replace(/\s+/g, '-')}`);
const fullPrompt = context ? `${context}\n\n${prompt}` : prompt;
// 2. Set up progress and audit infrastructure
const execContext = detectExecutionContext(description);
const progress = createProgressManager(
{ description, useCleanOutput: execContext.useCleanOutput },
@@ -214,9 +218,10 @@ export async function runClaudePrompt(
logger.info(`Running Claude Code: ${description}...`);
// 3. Configure MCP servers
const mcpServers = buildMcpServers(sourceDir, agentName, logger);
// Build env vars to pass to SDK subprocesses
// 4. Build env vars to pass to SDK subprocesses
const sdkEnv: Record<string, string> = {
CLAUDE_CODE_MAX_OUTPUT_TOKENS: process.env.CLAUDE_CODE_MAX_OUTPUT_TOKENS || '64000',
};
@@ -227,6 +232,7 @@ export async function runClaudePrompt(
sdkEnv.CLAUDE_CODE_OAUTH_TOKEN = process.env.CLAUDE_CODE_OAUTH_TOKEN;
}
// 5. Configure SDK options
const options = {
model: 'claude-sonnet-4-5-20250929',
maxTurns: 10_000,
@@ -249,6 +255,7 @@ export async function runClaudePrompt(
progress.start();
try {
// 6. Process the message stream
const messageLoopResult = await processMessageStream(
fullPrompt,
options,
@@ -263,7 +270,7 @@ export async function runClaudePrompt(
const model = messageLoopResult.model;
// === SPENDING CAP SAFEGUARD ===
// Defense-in-depth: Detect spending cap that slipped through detectApiError().
// 7. Defense-in-depth: Detect spending cap that slipped through detectApiError().
// Uses consolidated billing detection from utils/billing-detection.ts
if (isSpendingCapBehavior(turnCount, totalCost, result || '')) {
throw new PentestError(
@@ -273,6 +280,7 @@ export async function runClaudePrompt(
);
}
// 8. Finalize successful result
const duration = timer.stop();
if (apiErrorDetected) {
@@ -293,6 +301,7 @@ export async function runClaudePrompt(
};
} catch (error) {
// 9. Handle errors — log, write error file, return failure
const duration = timer.stop();
const err = error as Error & { code?: string; status?: number };

View File

@@ -107,18 +107,20 @@ export class AuditSession {
): Promise<void> {
await this.ensureInitialized();
// Save prompt snapshot (only on first attempt)
// 1. Save prompt snapshot (only on first attempt)
if (attemptNumber === 1) {
await AgentLogger.savePrompt(this.sessionMetadata, agentName, promptContent);
}
// 2. Create and initialize the per-agent logger
this.currentAgentName = agentName;
this.currentLogger = new AgentLogger(this.sessionMetadata, agentName, attemptNumber);
await this.currentLogger.initialize();
// 3. Start metrics timer
this.metricsTracker.startAgent(agentName, attemptNumber);
// 4. Log start event to both agent log and workflow log
await this.currentLogger.logEvent('agent_start', {
agentName,
attemptNumber,
@@ -172,6 +174,7 @@ export class AuditSession {
* End agent execution (mutex-protected)
*/
async endAgent(agentName: string, result: AgentEndResult): Promise<void> {
// 1. Finalize agent log and close the stream
if (this.currentLogger) {
await this.currentLogger.logEvent('agent_end', {
agentName,
@@ -185,6 +188,7 @@ export class AuditSession {
this.currentLogger = null;
}
// 2. Log completion to the unified workflow log
this.currentAgentName = null;
const agentLogDetails: AgentLogDetails = {
@@ -196,12 +200,11 @@ export class AuditSession {
};
await this.workflowLogger.logAgent(agentName, 'end', agentLogDetails);
// Mutex-protected update to session.json
// 3. Acquire mutex before touching session.json
const unlock = await sessionMutex.lock(this.sessionId);
try {
// Reload inside mutex to prevent lost updates during parallel exploitation phase
// 4. Reload-then-write inside mutex to prevent lost updates during parallel phases
await this.metricsTracker.reload();
await this.metricsTracker.endAgent(agentName, result);
} finally {
unlock();

View File

@@ -170,7 +170,7 @@ export class MetricsTracker {
);
}
// Initialize agent metrics if not exists
// 1. Initialize agent metrics if first time seeing this agent
const existingAgent = this.data.metrics.agents[agentName];
const agent = existingAgent ?? {
status: 'in-progress' as const,
@@ -180,7 +180,7 @@ export class MetricsTracker {
};
this.data.metrics.agents[agentName] = agent;
// Add attempt to array
// 2. Build attempt record with optional model/error fields
const attempt: AttemptData = {
attempt_number: result.attemptNumber,
duration_ms: result.duration_ms,
@@ -197,16 +197,18 @@ export class MetricsTracker {
attempt.error = result.error;
}
// 3. Append attempt to history
agent.attempts.push(attempt);
// Update total cost (includes failed attempts)
// 4. Recalculate total cost across all attempts (includes failures)
agent.total_cost_usd = agent.attempts.reduce((sum, a) => sum + a.cost_usd, 0);
// If successful, update final metrics and status
// 5. Update agent status based on outcome
if (result.success) {
agent.status = 'success';
agent.final_duration_ms = result.duration_ms;
// 6. Attach model and checkpoint metadata on success
if (result.model) {
agent.model = result.model;
}
@@ -215,18 +217,18 @@ export class MetricsTracker {
agent.checkpoint = result.checkpoint;
}
} else {
// If this was the last attempt, mark as failed
if (result.isFinalAttempt) {
agent.status = 'failed';
}
}
// Clear active timer
// 7. Clear active timer
this.activeTimers.delete(agentName);
// Recalculate aggregations
// 8. Recalculate phase and session-level aggregations
this.recalculateAggregations();
// 9. Persist to session.json
await this.save();
}

View File

@@ -178,6 +178,7 @@ function formatAjvErrors(errors: ErrorObject[]): string[] {
export const parseConfig = async (configPath: string): Promise<Config> => {
try {
// 1. Verify file exists
if (!(await fs.pathExists(configPath))) {
throw new PentestError(
`Configuration file not found: ${configPath}`,
@@ -188,6 +189,7 @@ export const parseConfig = async (configPath: string): Promise<Config> => {
);
}
// 2. Check file size
const stats = await fs.stat(configPath);
const maxFileSize = 1024 * 1024; // 1MB
if (stats.size > maxFileSize) {
@@ -200,6 +202,7 @@ export const parseConfig = async (configPath: string): Promise<Config> => {
);
}
// 3. Read and check for empty content
const configContent = await fs.readFile(configPath, 'utf8');
if (!configContent.trim()) {
@@ -212,6 +215,7 @@ export const parseConfig = async (configPath: string): Promise<Config> => {
);
}
// 4. Parse YAML with safe schema
let config: unknown;
try {
config = yaml.load(configContent, {
@@ -230,6 +234,7 @@ export const parseConfig = async (configPath: string): Promise<Config> => {
);
}
// 5. Guard against null/undefined parse result
if (config === null || config === undefined) {
throw new PentestError(
'Configuration file resulted in null/undefined after parsing',
@@ -240,6 +245,7 @@ export const parseConfig = async (configPath: string): Promise<Config> => {
);
}
// 6. Validate schema, security rules, and return
validateConfig(config as Config);
return config as Config;

View File

@@ -231,7 +231,7 @@ export async function createGitCheckpoint(
logger.info(`Creating checkpoint for ${description} (attempt ${attempt})`);
try {
// First attempt: preserve existing deliverables. Retries: clean workspace to prevent pollution
// 1. On retries, clean workspace to prevent pollution from previous attempt
if (attempt > 1) {
const cleanResult = await rollbackGitWorkspace(sourceDir, `${description} (retry cleanup)`, logger);
if (!cleanResult.success) {
@@ -239,9 +239,11 @@ export async function createGitCheckpoint(
}
}
// 2. Detect existing changes
const changes = await getChangedFiles(sourceDir, 'status check');
const hasChanges = changes.length > 0;
// 3. Stage and commit checkpoint
await executeGitCommandWithRetry(['git', 'add', '-A'], sourceDir, 'staging changes');
await executeGitCommandWithRetry(
['git', 'commit', '-m', `📍 Checkpoint: ${description} (attempt ${attempt})`, '--allow-empty'],
@@ -249,6 +251,7 @@ export async function createGitCheckpoint(
'creating commit'
);
// 4. Log result
if (hasChanges) {
logger.info('Checkpoint created with uncommitted changes staged');
} else {

View File

@@ -24,7 +24,7 @@ interface IncludeReplacement {
// Pure function: Build complete login instructions from config
async function buildLoginInstructions(authentication: Authentication, logger: ActivityLogger): Promise<string> {
try {
// Load the login instructions template
// 1. Load the login instructions template
const loginInstructionsPath = path.join(import.meta.dirname, '..', '..', 'prompts', 'shared', 'login-instructions.txt');
if (!await fs.pathExists(loginInstructionsPath)) {
@@ -38,37 +38,33 @@ async function buildLoginInstructions(authentication: Authentication, logger: Ac
const fullTemplate = await fs.readFile(loginInstructionsPath, 'utf8');
// Helper function to extract sections based on markers
const getSection = (content: string, sectionName: string): string => {
const regex = new RegExp(`<!-- BEGIN:${sectionName} -->([\\s\\S]*?)<!-- END:${sectionName} -->`, 'g');
const match = regex.exec(content);
return match ? match[1]!.trim() : '';
};
// Extract sections based on login type
// 2. Extract sections based on login type
const loginType = authentication.login_type?.toUpperCase();
let loginInstructions = '';
// Build instructions with only relevant sections
const commonSection = getSection(fullTemplate, 'COMMON');
const authSection = loginType ? getSection(fullTemplate, loginType) : ''; // FORM or SSO
const verificationSection = getSection(fullTemplate, 'VERIFICATION');
// Fallback to full template if markers are missing (backward compatibility)
// 3. Assemble instructions from sections (fallback to full template if markers missing)
if (!commonSection && !authSection && !verificationSection) {
logger.warn('Section markers not found, using full login instructions template');
loginInstructions = fullTemplate;
} else {
// Combine relevant sections
loginInstructions = [commonSection, authSection, verificationSection]
.filter(section => section) // Remove empty sections
.filter(section => section)
.join('\n\n');
}
// Replace the user instructions placeholder with the login flow from config
// 4. Interpolate login flow and credential placeholders
let userInstructions = (authentication.login_flow ?? []).join('\n');
// Replace credential placeholders within the user instructions
if (authentication.credentials) {
if (authentication.credentials.username) {
userInstructions = userInstructions.replace(/\$username/g, authentication.credentials.username);
@@ -83,7 +79,7 @@ async function buildLoginInstructions(authentication: Authentication, logger: Ac
loginInstructions = loginInstructions.replace(/{{user_instructions}}/g, userInstructions);
// Replace TOTP secret placeholder if present in template
// 5. Replace TOTP secret placeholder if present in template
if (authentication.credentials?.totp_secret) {
loginInstructions = loginInstructions.replace(/{{totp_secret}}/g, authentication.credentials.totp_secret);
}
@@ -217,17 +213,15 @@ export async function loadPrompt(
logger: ActivityLogger
): Promise<string> {
try {
// Use pipeline testing prompts if pipeline testing mode is enabled
// 1. Resolve prompt file path
const baseDir = pipelineTestingMode ? 'prompts/pipeline-testing' : 'prompts';
const promptsDir = path.join(import.meta.dirname, '..', '..', baseDir);
const promptPath = path.join(promptsDir, `${promptName}.txt`);
// Debug message for pipeline testing mode
if (pipelineTestingMode) {
logger.info(`Using pipeline testing prompt: ${promptPath}`);
}
// Check if file exists first
if (!await fs.pathExists(promptPath)) {
throw new PentestError(
`Prompt file not found: ${promptPath}`,
@@ -237,25 +231,25 @@ export async function loadPrompt(
);
}
// Add MCP server assignment to variables
// 2. Assign MCP server based on agent name
const enhancedVariables: PromptVariables = { ...variables };
// Assign MCP server based on prompt name (agent name)
const mcpServer = MCP_AGENT_MAPPING[promptName as keyof typeof MCP_AGENT_MAPPING];
if (mcpServer) {
enhancedVariables.MCP_SERVER = mcpServer;
logger.info(`Assigned ${promptName} -> ${enhancedVariables.MCP_SERVER}`);
} else {
// Fallback for unknown agents
enhancedVariables.MCP_SERVER = 'playwright-agent1';
logger.warn(`Unknown agent ${promptName}, using fallback -> ${enhancedVariables.MCP_SERVER}`);
}
// 3. Read template file
let template = await fs.readFile(promptPath, 'utf8');
// Pre-process the template to handle @include directives
// 4. Process @include directives
template = await processIncludes(template, promptsDir);
// 5. Interpolate variables and return final prompt
return await interpolateVariables(template, enhancedVariables, config, logger);
} catch (error) {
if (error instanceof PentestError) {

View File

@@ -117,17 +117,17 @@ async function runAgentActivity(
try {
const logger = createActivityLogger();
// Build session metadata and get/create container
// 1. Build session metadata and get/create container
const sessionMetadata = buildSessionMetadata(input);
const container = getOrCreateContainer(workflowId, sessionMetadata);
// Create audit session for THIS agent execution
// 2. Create audit session for THIS agent execution
// NOTE: Each agent needs its own AuditSession because AuditSession uses
// instance state (currentAgentName) that cannot be shared across parallel agents
const auditSession = new AuditSession(sessionMetadata);
await auditSession.initialize(workflowId);
// Execute agent via service (throws PentestError on failure)
// 3. Execute agent via service (throws PentestError on failure)
const endResult = await container.agentExecution.executeOrThrow(
agentName,
{
@@ -141,7 +141,7 @@ async function runAgentActivity(
logger
);
// Success - return metrics
// 4. Return metrics
return {
durationMs: Date.now() - startTime,
inputTokens: null,
@@ -325,6 +325,7 @@ export async function loadResumeState(
expectedUrl: string,
expectedRepoPath: string
): Promise<ResumeState> {
// 1. Validate workspace exists
const sessionPath = path.join('./audit-logs', workspaceName, 'session.json');
const exists = await fileExists(sessionPath);
@@ -335,6 +336,7 @@ export async function loadResumeState(
);
}
// 2. Parse session.json and validate URL match
let session: SessionJson;
try {
session = await readJson<SessionJson>(sessionPath);
@@ -353,6 +355,7 @@ export async function loadResumeState(
);
}
// 3. Cross-check agent status with deliverables on disk
const completedAgents: string[] = [];
const agents = session.metrics.agents;
@@ -375,6 +378,7 @@ export async function loadResumeState(
completedAgents.push(agentName);
}
// 4. Collect git checkpoints and validate at least one exists
const checkpoints = completedAgents
.map((name) => agents[name]?.checkpoint)
.filter((hash): hash is string => hash != null);
@@ -395,9 +399,11 @@ export async function loadResumeState(
);
}
// 5. Find the most recent checkpoint commit
const checkpointHash = await findLatestCommit(expectedRepoPath, checkpoints);
const originalWorkflowId = session.session.originalWorkflowId || session.session.id;
// 6. Log summary and return resume state
const logger = createActivityLogger();
logger.info('Resume state loaded', {
workspace: workspaceName,
@@ -533,11 +539,12 @@ export async function logWorkflowComplete(
const { repoPath, workflowId } = input;
const sessionMetadata = buildSessionMetadata(input);
// 1. Initialize audit session and mark final status
const auditSession = new AuditSession(sessionMetadata);
await auditSession.initialize(workflowId);
await auditSession.updateSessionStatus(summary.status);
// Use cumulative metrics from session.json
// 2. Load cumulative metrics from session.json
const sessionData = (await auditSession.getMetrics()) as {
metrics: {
total_duration_ms: number;
@@ -546,7 +553,7 @@ export async function logWorkflowComplete(
};
};
// Fill in metrics for skipped agents
// 3. Fill in metrics for skipped agents (resumed from previous run)
const agentMetrics = { ...summary.agentMetrics };
for (const agentName of summary.completedAgents) {
if (!agentMetrics[agentName]) {
@@ -560,15 +567,18 @@ export async function logWorkflowComplete(
}
}
// 4. Build cumulative summary with cross-run totals
const cumulativeSummary: WorkflowSummary = {
...summary,
totalDurationMs: sessionData.metrics.total_duration_ms,
totalCostUsd: sessionData.metrics.total_cost_usd,
agentMetrics,
};
// 5. Write completion entry to workflow.log
await auditSession.logWorkflowComplete(cumulativeSummary);
// Copy deliverables to audit-logs
// 6. Copy deliverables to audit-logs
try {
await copyDeliverablesToAudit(sessionMetadata, repoPath);
} catch (copyErr) {
@@ -578,6 +588,6 @@ export async function logWorkflowComplete(
});
}
// Clean up container
// 7. Clean up container
removeContainer(workflowId);
}

View File

@@ -262,11 +262,13 @@ async function resolveWorkspace(
console.log('=== RESUME MODE ===');
console.log(`Workspace: ${workspace}\n`);
// 1. Terminate any running workflows from previous attempts
const terminatedWorkflows = await terminateExistingWorkflows(client, workspace);
if (terminatedWorkflows.length > 0) {
console.log(`Terminated ${terminatedWorkflows.length} previous workflow(s)\n`);
}
// 2. Validate URL matches the workspace
const session = await readJson<SessionJson>(sessionPath);
if (session.session.webUrl !== args.webUrl) {
console.error('ERROR: URL mismatch with workspace');
@@ -275,6 +277,8 @@ async function resolveWorkspace(
process.exit(1);
}
// 3. Generate a new workflow ID scoped to this resume attempt
// 4. Return resolution with isResume=true so downstream uses resume logic
return {
workflowId: `${workspace}_resume_${Date.now()}`,
sessionId: workspace,
@@ -371,9 +375,11 @@ async function waitForWorkflowResult(
}, 30000);
try {
// 1. Block until workflow completes
const result = await handle.result();
clearInterval(progressInterval);
// 2. Display run metrics
console.log('\nPipeline completed successfully!');
if (result.summary) {
console.log(`Duration: ${Math.floor(result.summary.totalDurationMs / 1000)}s`);
@@ -381,6 +387,7 @@ async function waitForWorkflowResult(
console.log(`Total turns: ${result.summary.totalTurns}`);
console.log(`Run cost: $${result.summary.totalCostUsd.toFixed(4)}`);
// 3. Show cumulative cost across all resume attempts
if (workspace.isResume) {
try {
const session = await readJson<SessionJson>(
@@ -402,9 +409,11 @@ async function waitForWorkflowResult(
// === Main Entry Point ===
async function startPipeline(): Promise<void> {
// 1. Parse CLI args and display splash
const args = parseCliArgs(process.argv.slice(2));
await displaySplashScreen();
// 2. Connect to Temporal server
const address = process.env.TEMPORAL_ADDRESS || 'localhost:7233';
console.log(`Connecting to Temporal at ${address}...`);
@@ -412,9 +421,11 @@ async function startPipeline(): Promise<void> {
const client = new Client({ connection });
try {
// 3. Resolve workspace (new or resume) and build pipeline input
const workspace = await resolveWorkspace(client, args);
const input = buildPipelineInput(args, workspace);
// 4. Start the Temporal workflow
const handle = await client.workflow.start<(input: PipelineInput) => Promise<PipelineState>>(
'pentestPipelineWorkflow',
{
@@ -424,6 +435,7 @@ async function startPipeline(): Promise<void> {
}
);
// 5. Display info and optionally wait for completion
displayWorkflowInfo(args, workspace);
if (args.waitForCompletion) {

View File

@@ -147,12 +147,14 @@ export async function pentestPipelineWorkflow(
let resumeState: ResumeState | null = null;
if (input.resumeFromWorkspace) {
// 1. Load resume state (validates workspace, cross-checks deliverables)
resumeState = await a.loadResumeState(
input.resumeFromWorkspace,
input.webUrl,
input.repoPath
);
// 2. Restore git workspace and clean up incomplete deliverables
const incompleteAgents = ALL_AGENTS.filter(
(agentName) => !resumeState!.completedAgents.includes(agentName)
) as AgentName[];
@@ -163,6 +165,7 @@ export async function pentestPipelineWorkflow(
incompleteAgents
);
// 3. Short-circuit if all agents already completed
if (resumeState.completedAgents.length === ALL_AGENTS.length) {
log.info(`All ${ALL_AGENTS.length} agents already completed. Nothing to resume.`);
state.status = 'completed';
@@ -171,6 +174,7 @@ export async function pentestPipelineWorkflow(
return state;
}
// 4. Record this resume attempt in session.json and workflow.log
await a.recordResumeAttempt(
activityInput,
input.terminatedWorkflows || [],
@@ -317,6 +321,7 @@ export async function pentestPipelineWorkflow(
const vulnAgentName = `${vulnType}-vuln`;
const exploitAgentName = `${vulnType}-exploit`;
// 1. Run vulnerability analysis (or skip if resumed)
let vulnMetrics: AgentMetrics | null = null;
if (!shouldSkip(vulnAgentName)) {
vulnMetrics = await runVulnAgent();
@@ -324,8 +329,10 @@ export async function pentestPipelineWorkflow(
log.info(`Skipping ${vulnAgentName} (already complete)`);
}
// 2. Check exploitation queue for actionable findings
const decision = await a.checkExploitationQueue(activityInput, vulnType);
// 3. Conditionally run exploitation agent
let exploitMetrics: AgentMetrics | null = null;
if (decision.shouldExploit) {
if (!shouldSkip(exploitAgentName)) {