refactor: consolidate file layout and break circular dependencies

- Move error-handling, git-manager, prompt-manager, queue-validation, and reporting into src/services/
- Delete src/constants.ts — relocate AGENT_VALIDATORS and MCP_AGENT_MAPPING into session-manager.ts alongside agent definitions
- Delete src/utils/output-formatter.ts — absorb filterJsonToolCalls and getAgentPrefix into ai/output-formatters.ts
- Extract ActivityLogger interface into src/types/activity-logger.ts to break temporal/ → services circular dependency
- Consolidate VulnType, ExploitationDecision into types/agents.ts and SessionMetadata into types/audit.ts
- Remove dead timingResults/costResults globals from utils/metrics.ts and all consumers
This commit is contained in:
ajmallesh
2026-02-16 18:01:37 -08:00
parent 9074149778
commit b208949345
30 changed files with 480 additions and 476 deletions
+23
View File
@@ -119,6 +119,29 @@ Defensive security tool only. Use only on systems you own or have explicit permi
- Abstractions for one-time operations
- Backwards-compatibility shims, deprecated wrappers, or re-exports for removed code — delete the old code, don't preserve it
### Comments
- Explain **WHY**, not WHAT — code shows what it does
- Comments must be **timeless** — useful to a reader with no knowledge of this conversation
- Never reference: this chat, refactoring history ("moved from X"), the AI, or deleted files
- No comment is better than a bad comment
```typescript
// Bad: references refactoring history
// Moved from utils/helpers.ts
// Bad: references conversation
// Added per user request
// Bad: states the obvious
// Loop through the array
// Good: explains WHY
// Retry with backoff — Temporal server rejects rapid reconnects
// Good: documents a gotcha
// MUST use FAILSAFE_SCHEMA — default schema allows code execution
```
## Key Files
**Entry Points:** `src/temporal/workflows.ts`, `src/temporal/activities.ts`, `src/temporal/worker.ts`, `src/temporal/client.ts`
+4 -6
View File
@@ -9,11 +9,11 @@
import { fs, path } from 'zx';
import { query } from '@anthropic-ai/claude-agent-sdk';
import { isRetryableError, PentestError } from '../error-handling.js';
import { isRetryableError, PentestError } from '../services/error-handling.js';
import { isSpendingCapBehavior } from '../utils/billing-detection.js';
import { timingResults, Timer } from '../utils/metrics.js';
import { Timer } from '../utils/metrics.js';
import { formatTimestamp } from '../utils/formatting.js';
import { AGENT_VALIDATORS, MCP_AGENT_MAPPING } from '../constants.js';
import { AGENT_VALIDATORS, MCP_AGENT_MAPPING } from '../session-manager.js';
import { AuditSession } from '../audit/index.js';
import { createShannonHelperServer } from '../../mcp-server/dist/index.js';
import { AGENTS } from '../session-manager.js';
@@ -24,7 +24,7 @@ import { detectExecutionContext, formatErrorOutput, formatCompletionMessage } fr
import { createProgressManager } from './progress-manager.js';
import { createAuditLogger } from './audit-logger.js';
import { getActualModelName } from './router-utils.js';
import type { ActivityLogger } from '../temporal/activity-logger.js';
import type { ActivityLogger } from '../types/activity-logger.js';
declare global {
var SHANNON_DISABLE_LOADER: boolean | undefined;
@@ -274,7 +274,6 @@ export async function runClaudePrompt(
}
const duration = timer.stop();
timingResults.agents[execContext.agentKey] = duration;
if (apiErrorDetected) {
logger.warn(`API Error detected in ${description} - will validate deliverables before failing`);
@@ -295,7 +294,6 @@ export async function runClaudePrompt(
} catch (error) {
const duration = timer.stop();
timingResults.agents[execContext.agentKey] = duration;
const err = error as Error & { code?: string; status?: number };
+3 -6
View File
@@ -6,20 +6,19 @@
// Pure functions for processing SDK message types
import { PentestError } from '../error-handling.js';
import { PentestError } from '../services/error-handling.js';
import { ErrorCode } from '../types/errors.js';
import { matchesBillingTextPattern } from '../utils/billing-detection.js';
import { filterJsonToolCalls } from '../utils/output-formatter.js';
import { filterJsonToolCalls } from './output-formatters.js';
import { formatTimestamp } from '../utils/formatting.js';
import { getActualModelName } from './router-utils.js';
import type { ActivityLogger } from '../temporal/activity-logger.js';
import type { ActivityLogger } from '../types/activity-logger.js';
import {
formatAssistantOutput,
formatResultOutput,
formatToolUseOutput,
formatToolResultOutput,
} from './output-formatters.js';
import { costResults } from '../utils/metrics.js';
import type { AuditLogger } from './audit-logger.js';
import type { ProgressManager } from './progress-manager.js';
import type {
@@ -362,8 +361,6 @@ export async function dispatchMessage(
case 'result': {
const resultData = handleResultMessage(message as ResultMessage);
outputLines(formatResultOutput(resultData, !execContext.useCleanOutput));
costResults.agents[execContext.agentKey] = resultData.cost;
costResults.total += resultData.cost;
return { type: 'complete', result: resultData.result, cost: resultData.cost };
}
+266 -1
View File
@@ -7,9 +7,274 @@
// Pure functions for formatting console output
import { extractAgentType, formatDuration } from '../utils/formatting.js';
import { getAgentPrefix } from '../utils/output-formatter.js';
import { AGENTS } from '../session-manager.js';
import type { ExecutionContext, ResultData } from './types.js';
// --- Types for tool call filtering ---
interface ToolCallInput {
url?: string;
element?: string;
key?: string;
fields?: unknown[];
text?: string;
action?: string;
description?: string;
todos?: Array<{
status: string;
content: string;
}>;
[key: string]: unknown;
}
interface ToolCall {
name: string;
input?: ToolCallInput;
}
// --- Agent prefix logic ---
/**
* Get agent prefix for parallel execution
*/
export function getAgentPrefix(description: string): string {
// Map agent names to their prefixes
const agentPrefixes: Record<string, string> = {
'injection-vuln': '[Injection]',
'xss-vuln': '[XSS]',
'auth-vuln': '[Auth]',
'authz-vuln': '[Authz]',
'ssrf-vuln': '[SSRF]',
'injection-exploit': '[Injection]',
'xss-exploit': '[XSS]',
'auth-exploit': '[Auth]',
'authz-exploit': '[Authz]',
'ssrf-exploit': '[SSRF]',
};
// First try to match by agent name directly
for (const [agentName, prefix] of Object.entries(agentPrefixes)) {
const agent = AGENTS[agentName as keyof typeof AGENTS];
if (agent && description.includes(agent.displayName)) {
return prefix;
}
}
// Fallback to partial matches for backwards compatibility
if (description.includes('injection')) return '[Injection]';
if (description.includes('xss')) return '[XSS]';
if (description.includes('authz')) return '[Authz]'; // Check authz before auth
if (description.includes('auth')) return '[Auth]';
if (description.includes('ssrf')) return '[SSRF]';
return '[Agent]';
}
// --- Tool call filtering ---
/**
* Extract domain from URL for display
*/
function extractDomain(url: string): string {
try {
const urlObj = new URL(url);
return urlObj.hostname || url.slice(0, 30);
} catch {
return url.slice(0, 30);
}
}
/**
* Summarize TodoWrite updates into clean progress indicators
*/
function summarizeTodoUpdate(input: ToolCallInput | undefined): string | null {
if (!input?.todos || !Array.isArray(input.todos)) {
return null;
}
const todos = input.todos;
const completed = todos.filter((t) => t.status === 'completed');
const inProgress = todos.filter((t) => t.status === 'in_progress');
// Show recently completed tasks
if (completed.length > 0) {
const recent = completed[completed.length - 1]!;
return `${recent.content}`;
}
// Show current in-progress task
if (inProgress.length > 0) {
const current = inProgress[0]!;
return `🔄 ${current.content}`;
}
return null;
}
/**
* Format browser tool calls into clean progress indicators
*/
function formatBrowserAction(toolCall: ToolCall): string {
const toolName = toolCall.name;
const input = toolCall.input || {};
// Core Browser Operations
if (toolName === 'mcp__playwright__browser_navigate') {
const url = input.url || '';
const domain = extractDomain(url);
return `🌐 Navigating to ${domain}`;
}
if (toolName === 'mcp__playwright__browser_navigate_back') {
return `⬅️ Going back`;
}
// Page Interaction
if (toolName === 'mcp__playwright__browser_click') {
const element = input.element || 'element';
return `🖱️ Clicking ${element.slice(0, 25)}`;
}
if (toolName === 'mcp__playwright__browser_hover') {
const element = input.element || 'element';
return `👆 Hovering over ${element.slice(0, 20)}`;
}
if (toolName === 'mcp__playwright__browser_type') {
const element = input.element || 'field';
return `⌨️ Typing in ${element.slice(0, 20)}`;
}
if (toolName === 'mcp__playwright__browser_press_key') {
const key = input.key || 'key';
return `⌨️ Pressing ${key}`;
}
// Form Handling
if (toolName === 'mcp__playwright__browser_fill_form') {
const fieldCount = input.fields?.length || 0;
return `📝 Filling ${fieldCount} form fields`;
}
if (toolName === 'mcp__playwright__browser_select_option') {
return `📋 Selecting dropdown option`;
}
if (toolName === 'mcp__playwright__browser_file_upload') {
return `📁 Uploading file`;
}
// Page Analysis
if (toolName === 'mcp__playwright__browser_snapshot') {
return `📸 Taking page snapshot`;
}
if (toolName === 'mcp__playwright__browser_take_screenshot') {
return `📸 Taking screenshot`;
}
if (toolName === 'mcp__playwright__browser_evaluate') {
return `🔍 Running JavaScript analysis`;
}
// Waiting & Monitoring
if (toolName === 'mcp__playwright__browser_wait_for') {
if (input.text) {
return `⏳ Waiting for "${input.text.slice(0, 20)}"`;
}
return `⏳ Waiting for page response`;
}
if (toolName === 'mcp__playwright__browser_console_messages') {
return `📜 Checking console logs`;
}
if (toolName === 'mcp__playwright__browser_network_requests') {
return `🌐 Analyzing network traffic`;
}
// Tab Management
if (toolName === 'mcp__playwright__browser_tabs') {
const action = input.action || 'managing';
return `🗂️ ${action} browser tab`;
}
// Dialog Handling
if (toolName === 'mcp__playwright__browser_handle_dialog') {
return `💬 Handling browser dialog`;
}
// Fallback for any missed tools
const actionType = toolName.split('_').pop();
return `🌐 Browser: ${actionType}`;
}
/**
* Filter out JSON tool calls from content, with special handling for Task calls
*/
export function filterJsonToolCalls(content: string | null | undefined): string {
if (!content || typeof content !== 'string') {
return content || '';
}
const lines = content.split('\n');
const processedLines: string[] = [];
for (const line of lines) {
const trimmed = line.trim();
// Skip empty lines
if (trimmed === '') {
continue;
}
// Check if this is a JSON tool call
if (trimmed.startsWith('{"type":"tool_use"')) {
try {
const toolCall = JSON.parse(trimmed) as ToolCall;
// Special handling for Task tool calls
if (toolCall.name === 'Task') {
const description = toolCall.input?.description || 'analysis agent';
processedLines.push(`🚀 Launching ${description}`);
continue;
}
// Special handling for TodoWrite tool calls
if (toolCall.name === 'TodoWrite') {
const summary = summarizeTodoUpdate(toolCall.input);
if (summary) {
processedLines.push(summary);
}
continue;
}
// Special handling for browser tool calls
if (toolCall.name.startsWith('mcp__playwright__browser_')) {
const browserAction = formatBrowserAction(toolCall);
if (browserAction) {
processedLines.push(browserAction);
}
continue;
}
// Hide all other tool calls (Read, Write, Grep, etc.)
continue;
} catch {
// If JSON parsing fails, treat as regular text
processedLines.push(line);
}
} else {
// Keep non-JSON lines (assistant text)
processedLines.push(line);
}
}
return processedLines.join('\n');
}
// --- Console output formatting ---
export function detectExecutionContext(description: string): ExecutionContext {
const isParallelExecution =
description.includes('vuln agent') || description.includes('exploit agent');
+1 -1
View File
@@ -18,7 +18,7 @@ import { initializeAuditStructure, type SessionMetadata } from './utils.js';
import { formatTimestamp } from '../utils/formatting.js';
import { SessionMutex } from '../utils/concurrency.js';
import type { AgentEndResult } from '../types/index.js';
import { PentestError } from '../error-handling.js';
import { PentestError } from '../services/error-handling.js';
import { ErrorCode } from '../types/errors.js';
// Global mutex instance
+1 -1
View File
@@ -18,7 +18,7 @@ import {
import { atomicWrite, readJson, fileExists } from '../utils/file-io.js';
import { formatTimestamp, calculatePercentage } from '../utils/formatting.js';
import { AGENT_PHASE_MAP, type PhaseName } from '../session-manager.js';
import { PentestError } from '../error-handling.js';
import { PentestError } from '../services/error-handling.js';
import { ErrorCode } from '../types/errors.js';
import type { AgentName, AgentEndResult } from '../types/index.js';
+4 -11
View File
@@ -15,9 +15,10 @@ import fs from 'fs/promises';
import path from 'path';
import { fileURLToPath } from 'url';
// Import and re-export file I/O utilities from canonical location
import { ensureDirectory, atomicWrite, readJson, fileExists } from '../utils/file-io.js';
export { ensureDirectory, atomicWrite, readJson, fileExists };
import { ensureDirectory } from '../utils/file-io.js';
export type { SessionMetadata } from '../types/audit.js';
import type { SessionMetadata } from '../types/audit.js';
const __filename = fileURLToPath(import.meta.url);
const __dirname = path.dirname(__filename);
@@ -26,14 +27,6 @@ const __dirname = path.dirname(__filename);
const SHANNON_ROOT = path.resolve(__dirname, '..', '..');
const AUDIT_LOGS_DIR = path.join(SHANNON_ROOT, 'audit-logs');
export interface SessionMetadata {
id: string;
webUrl: string;
repoPath?: string;
outputPath?: string;
[key: string]: unknown;
}
/**
* Extract and sanitize hostname from URL for use in identifiers
*/
+1 -1
View File
@@ -9,7 +9,7 @@ import { fs } from 'zx';
import yaml from 'js-yaml';
import { Ajv, type ValidateFunction, type ErrorObject } from 'ajv';
import type { FormatsPlugin } from 'ajv-formats';
import { PentestError } from './error-handling.js';
import { PentestError } from './services/error-handling.js';
import { ErrorCode } from './types/errors.js';
import type {
Config,
-109
View File
@@ -1,109 +0,0 @@
// Copyright (C) 2025 Keygraph, Inc.
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License version 3
// as published by the Free Software Foundation.
import { path, fs } from 'zx';
import { validateQueueAndDeliverable, type VulnType } from './queue-validation.js';
import type { AgentName, PlaywrightAgent, AgentValidator } from './types/agents.js';
import type { ActivityLogger } from './temporal/activity-logger.js';
// Factory function for vulnerability queue validators
function createVulnValidator(vulnType: VulnType): AgentValidator {
return async (sourceDir: string, logger: ActivityLogger): Promise<boolean> => {
try {
await validateQueueAndDeliverable(vulnType, sourceDir);
return true;
} catch (error) {
const errMsg = error instanceof Error ? error.message : String(error);
logger.warn(`Queue validation failed for ${vulnType}: ${errMsg}`);
return false;
}
};
}
// Factory function for exploit deliverable validators
function createExploitValidator(vulnType: VulnType): AgentValidator {
return async (sourceDir: string): Promise<boolean> => {
const evidenceFile = path.join(sourceDir, 'deliverables', `${vulnType}_exploitation_evidence.md`);
return await fs.pathExists(evidenceFile);
};
}
// MCP agent mapping - assigns each agent to a specific Playwright instance to prevent conflicts
// Keys are promptTemplate values from AGENTS registry (session-manager.ts)
export const MCP_AGENT_MAPPING: Record<string, PlaywrightAgent> = Object.freeze({
// Phase 1: Pre-reconnaissance (actual prompt name is 'pre-recon-code')
// NOTE: Pre-recon is pure code analysis and doesn't use browser automation,
// but assigning MCP server anyway for consistency and future extensibility
'pre-recon-code': 'playwright-agent1',
// Phase 2: Reconnaissance (actual prompt name is 'recon')
recon: 'playwright-agent2',
// Phase 3: Vulnerability Analysis (5 parallel agents)
'vuln-injection': 'playwright-agent1',
'vuln-xss': 'playwright-agent2',
'vuln-auth': 'playwright-agent3',
'vuln-ssrf': 'playwright-agent4',
'vuln-authz': 'playwright-agent5',
// Phase 4: Exploitation (5 parallel agents - same as vuln counterparts)
'exploit-injection': 'playwright-agent1',
'exploit-xss': 'playwright-agent2',
'exploit-auth': 'playwright-agent3',
'exploit-ssrf': 'playwright-agent4',
'exploit-authz': 'playwright-agent5',
// Phase 5: Reporting (actual prompt name is 'report-executive')
// NOTE: Report generation is typically text-based and doesn't use browser automation,
// but assigning MCP server anyway for potential screenshot inclusion or future needs
'report-executive': 'playwright-agent3',
});
// Direct agent-to-validator mapping - much simpler than pattern matching
export const AGENT_VALIDATORS: Record<AgentName, AgentValidator> = Object.freeze({
// Pre-reconnaissance agent - validates the code analysis deliverable created by the agent
'pre-recon': async (sourceDir: string): Promise<boolean> => {
const codeAnalysisFile = path.join(sourceDir, 'deliverables', 'code_analysis_deliverable.md');
return await fs.pathExists(codeAnalysisFile);
},
// Reconnaissance agent
recon: async (sourceDir: string): Promise<boolean> => {
const reconFile = path.join(sourceDir, 'deliverables', 'recon_deliverable.md');
return await fs.pathExists(reconFile);
},
// Vulnerability analysis agents
'injection-vuln': createVulnValidator('injection'),
'xss-vuln': createVulnValidator('xss'),
'auth-vuln': createVulnValidator('auth'),
'ssrf-vuln': createVulnValidator('ssrf'),
'authz-vuln': createVulnValidator('authz'),
// Exploitation agents
'injection-exploit': createExploitValidator('injection'),
'xss-exploit': createExploitValidator('xss'),
'auth-exploit': createExploitValidator('auth'),
'ssrf-exploit': createExploitValidator('ssrf'),
'authz-exploit': createExploitValidator('authz'),
// Executive report agent
report: async (sourceDir: string, logger: ActivityLogger): Promise<boolean> => {
const reportFile = path.join(
sourceDir,
'deliverables',
'comprehensive_security_assessment_report.md'
);
const reportExists = await fs.pathExists(reportFile);
if (!reportExists) {
logger.error('Missing required deliverable: comprehensive_security_assessment_report.md');
}
return reportExists;
},
});
+4 -4
View File
@@ -21,13 +21,13 @@
* No Temporal dependencies - pure domain logic.
*/
import type { ActivityLogger } from '../temporal/activity-logger.js';
import type { ActivityLogger } from '../types/activity-logger.js';
import { Result, ok, err, isErr } from '../types/result.js';
import { ErrorCode } from '../types/errors.js';
import { PentestError } from '../error-handling.js';
import { PentestError } from './error-handling.js';
import { isSpendingCapBehavior } from '../utils/billing-detection.js';
import { AGENTS } from '../session-manager.js';
import { loadPrompt } from '../prompts/prompt-manager.js';
import { loadPrompt } from './prompt-manager.js';
import {
runClaudePrompt,
validateAgentOutput,
@@ -38,7 +38,7 @@ import {
commitGitSuccess,
rollbackGitWorkspace,
getGitCommitHash,
} from '../utils/git-manager.js';
} from './git-manager.js';
import { AuditSession } from '../audit/index.js';
import type { AgentEndResult } from '../types/audit.js';
import type { AgentName } from '../types/agents.js';
+1 -1
View File
@@ -12,7 +12,7 @@
*/
import { parseConfig, distributeConfig } from '../config-parser.js';
import { PentestError } from '../error-handling.js';
import { PentestError } from './error-handling.js';
import { Result, ok, err } from '../types/result.js';
import { ErrorCode } from '../types/errors.js';
import type { DistributedConfig } from '../types/config.js';
@@ -9,11 +9,11 @@ import {
type PentestErrorType,
type PentestErrorContext,
type PromptErrorResult,
} from './types/errors.js';
} from '../types/errors.js';
import {
matchesBillingApiPattern,
matchesBillingTextPattern,
} from './utils/billing-detection.js';
} from '../utils/billing-detection.js';
// Custom error class for pentest operations
export class PentestError extends Error {
+2 -2
View File
@@ -17,9 +17,9 @@ import {
validateQueueSafe,
type VulnType,
type ExploitationDecision,
} from '../queue-validation.js';
} from './queue-validation.js';
import { isOk } from '../types/result.js';
import type { ActivityLogger } from '../temporal/activity-logger.js';
import type { ActivityLogger } from '../types/activity-logger.js';
/**
* Service for checking exploitation queue decisions.
@@ -5,9 +5,9 @@
// as published by the Free Software Foundation.
import { $ } from 'zx';
import { PentestError } from '../error-handling.js';
import { PentestError } from './error-handling.js';
import { ErrorCode } from '../types/errors.js';
import type { ActivityLogger } from '../temporal/activity-logger.js';
import type { ActivityLogger } from '../types/activity-logger.js';
/**
* Check if a directory is a git repository.
+3
View File
@@ -18,3 +18,6 @@ export { ConfigLoaderService } from './config-loader.js';
export { ExploitationCheckerService } from './exploitation-checker.js';
export { AgentExecutionService } from './agent-execution.js';
export type { AgentExecutionInput } from './agent-execution.js';
export { assembleFinalReport, injectModelIntoReport } from './reporting.js';
export { loadPrompt } from './prompt-manager.js';
@@ -5,10 +5,10 @@
// as published by the Free Software Foundation.
import { fs, path } from 'zx';
import { PentestError, handlePromptError } from '../error-handling.js';
import { MCP_AGENT_MAPPING } from '../constants.js';
import { PentestError, handlePromptError } from './error-handling.js';
import { MCP_AGENT_MAPPING } from '../session-manager.js';
import type { Authentication, DistributedConfig } from '../types/config.js';
import type { ActivityLogger } from '../temporal/activity-logger.js';
import type { ActivityLogger } from '../types/activity-logger.js';
interface PromptVariables {
webUrl: string;
@@ -6,11 +6,12 @@
import { fs, path } from 'zx';
import { PentestError } from './error-handling.js';
import { ErrorCode } from './types/errors.js';
import { type Result, ok, err } from './types/result.js';
import { asyncPipe } from './utils/functional.js';
import { ErrorCode } from '../types/errors.js';
import { type Result, ok, err } from '../types/result.js';
import { asyncPipe } from '../utils/functional.js';
import type { VulnType, ExploitationDecision } from '../types/agents.js';
export type VulnType = 'injection' | 'xss' | 'auth' | 'ssrf' | 'authz';
export type { VulnType, ExploitationDecision } from '../types/agents.js';
interface VulnTypeConfigItem {
deliverable: string;
@@ -62,12 +63,6 @@ interface QueueValidationResult {
error: string | null;
}
export interface ExploitationDecision {
shouldExploit: boolean;
shouldRetry: boolean;
vulnerabilityCount: number;
vulnType: VulnType;
}
/**
* Result type for safe validation - explicit error handling.
@@ -5,9 +5,9 @@
// as published by the Free Software Foundation.
import { fs, path } from 'zx';
import { PentestError } from '../error-handling.js';
import { PentestError } from './error-handling.js';
import { ErrorCode } from '../types/errors.js';
import type { ActivityLogger } from '../temporal/activity-logger.js';
import type { ActivityLogger } from '../types/activity-logger.js';
interface DeliverableFile {
name: string;
+101 -1
View File
@@ -4,7 +4,10 @@
// it under the terms of the GNU Affero General Public License version 3
// as published by the Free Software Foundation.
import type { AgentName, AgentDefinition } from './types/index.js';
import { path, fs } from 'zx';
import { validateQueueAndDeliverable } from './services/queue-validation.js';
import type { AgentName, AgentDefinition, PlaywrightAgent, AgentValidator, VulnType } from './types/index.js';
import type { ActivityLogger } from './types/activity-logger.js';
// Agent definitions according to PRD
// NOTE: deliverableFilename values must match mcp-server/src/types/deliverables.ts:DELIVERABLE_FILENAMES
@@ -122,4 +125,101 @@ export const AGENT_PHASE_MAP: Readonly<Record<AgentName, PhaseName>> = Object.fr
'report': 'reporting',
});
// Factory function for vulnerability queue validators
function createVulnValidator(vulnType: VulnType): AgentValidator {
return async (sourceDir: string, logger: ActivityLogger): Promise<boolean> => {
try {
await validateQueueAndDeliverable(vulnType, sourceDir);
return true;
} catch (error) {
const errMsg = error instanceof Error ? error.message : String(error);
logger.warn(`Queue validation failed for ${vulnType}: ${errMsg}`);
return false;
}
};
}
// Factory function for exploit deliverable validators
function createExploitValidator(vulnType: VulnType): AgentValidator {
return async (sourceDir: string): Promise<boolean> => {
const evidenceFile = path.join(sourceDir, 'deliverables', `${vulnType}_exploitation_evidence.md`);
return await fs.pathExists(evidenceFile);
};
}
// MCP agent mapping - assigns each agent to a specific Playwright instance to prevent conflicts
// Keys are promptTemplate values from AGENTS registry
export const MCP_AGENT_MAPPING: Record<string, PlaywrightAgent> = Object.freeze({
// Phase 1: Pre-reconnaissance (actual prompt name is 'pre-recon-code')
// NOTE: Pre-recon is pure code analysis and doesn't use browser automation,
// but assigning MCP server anyway for consistency and future extensibility
'pre-recon-code': 'playwright-agent1',
// Phase 2: Reconnaissance (actual prompt name is 'recon')
recon: 'playwright-agent2',
// Phase 3: Vulnerability Analysis (5 parallel agents)
'vuln-injection': 'playwright-agent1',
'vuln-xss': 'playwright-agent2',
'vuln-auth': 'playwright-agent3',
'vuln-ssrf': 'playwright-agent4',
'vuln-authz': 'playwright-agent5',
// Phase 4: Exploitation (5 parallel agents - same as vuln counterparts)
'exploit-injection': 'playwright-agent1',
'exploit-xss': 'playwright-agent2',
'exploit-auth': 'playwright-agent3',
'exploit-ssrf': 'playwright-agent4',
'exploit-authz': 'playwright-agent5',
// Phase 5: Reporting (actual prompt name is 'report-executive')
// NOTE: Report generation is typically text-based and doesn't use browser automation,
// but assigning MCP server anyway for potential screenshot inclusion or future needs
'report-executive': 'playwright-agent3',
});
// Direct agent-to-validator mapping - much simpler than pattern matching
export const AGENT_VALIDATORS: Record<AgentName, AgentValidator> = Object.freeze({
// Pre-reconnaissance agent - validates the code analysis deliverable created by the agent
'pre-recon': async (sourceDir: string): Promise<boolean> => {
const codeAnalysisFile = path.join(sourceDir, 'deliverables', 'code_analysis_deliverable.md');
return await fs.pathExists(codeAnalysisFile);
},
// Reconnaissance agent
recon: async (sourceDir: string): Promise<boolean> => {
const reconFile = path.join(sourceDir, 'deliverables', 'recon_deliverable.md');
return await fs.pathExists(reconFile);
},
// Vulnerability analysis agents
'injection-vuln': createVulnValidator('injection'),
'xss-vuln': createVulnValidator('xss'),
'auth-vuln': createVulnValidator('auth'),
'ssrf-vuln': createVulnValidator('ssrf'),
'authz-vuln': createVulnValidator('authz'),
// Exploitation agents
'injection-exploit': createExploitValidator('injection'),
'xss-exploit': createExploitValidator('xss'),
'auth-exploit': createExploitValidator('auth'),
'ssrf-exploit': createExploitValidator('ssrf'),
'authz-exploit': createExploitValidator('authz'),
// Executive report agent
report: async (sourceDir: string, logger: ActivityLogger): Promise<boolean> => {
const reportFile = path.join(
sourceDir,
'deliverables',
'comprehensive_security_assessment_report.md'
);
const reportExists = await fs.pathExists(reportFile);
if (!reportExists) {
logger.error('Missing required deliverable: comprehensive_security_assessment_report.md');
}
return reportExists;
},
});
+6 -5
View File
@@ -19,20 +19,21 @@ import { heartbeat, ApplicationFailure, Context } from '@temporalio/activity';
import path from 'path';
import fs from 'fs/promises';
import { classifyErrorForTemporal, PentestError } from '../error-handling.js';
import { classifyErrorForTemporal, PentestError } from '../services/error-handling.js';
import { ErrorCode } from '../types/errors.js';
import { getOrCreateContainer, getContainer, removeContainer } from '../services/container.js';
import { ExploitationCheckerService } from '../services/exploitation-checker.js';
import type { VulnType, ExploitationDecision } from '../queue-validation.js';
import type { VulnType, ExploitationDecision } from '../services/queue-validation.js';
import { AuditSession } from '../audit/index.js';
import type { WorkflowSummary } from '../audit/workflow-logger.js';
import type { AgentName } from '../types/agents.js';
import { ALL_AGENTS } from '../types/agents.js';
import type { AgentMetrics, ResumeState } from './shared.js';
import { copyDeliverablesToAudit, type SessionMetadata, readJson, fileExists } from '../audit/utils.js';
import { assembleFinalReport, injectModelIntoReport } from '../phases/reporting.js';
import { copyDeliverablesToAudit, type SessionMetadata } from '../audit/utils.js';
import { readJson, fileExists } from '../utils/file-io.js';
import { assembleFinalReport, injectModelIntoReport } from '../services/reporting.js';
import { AGENTS } from '../session-manager.js';
import { executeGitCommandWithRetry } from '../utils/git-manager.js';
import { executeGitCommandWithRetry } from '../services/git-manager.js';
import type { ResumeAttempt } from '../audit/metrics-tracker.js';
import { createActivityLogger } from './activity-logger.js';
+1 -10
View File
@@ -5,16 +5,7 @@
// as published by the Free Software Foundation.
import { Context } from '@temporalio/activity';
/**
* Logger interface for services called from Temporal activities.
* Keeps services Temporal-agnostic while providing structured logging.
*/
export interface ActivityLogger {
info(message: string, attrs?: Record<string, unknown>): void;
warn(message: string, attrs?: Record<string, unknown>): void;
error(message: string, attrs?: Record<string, unknown>): void;
}
import type { ActivityLogger } from '../types/activity-logger.js';
/**
* ActivityLogger backed by Temporal's Context.current().log.
+1 -1
View File
@@ -30,7 +30,7 @@ import { Connection, Client, WorkflowNotFoundError } from '@temporalio/client';
import dotenv from 'dotenv';
import { displaySplashScreen } from '../splash-screen.js';
import { sanitizeHostname } from '../audit/utils.js';
import { readJson, fileExists } from '../audit/utils.js';
import { readJson, fileExists } from '../utils/file-io.js';
import path from 'path';
// Import types only - these don't pull in workflow runtime code
import type { PipelineInput, PipelineState, PipelineProgress } from './shared.js';
-1
View File
@@ -1,6 +1,5 @@
import { defineQuery } from '@temporalio/workflow';
// Re-export AgentMetrics from central types location
export type { AgentMetrics } from '../types/metrics.js';
import type { AgentMetrics } from '../types/metrics.js';
+1 -1
View File
@@ -41,7 +41,7 @@ import {
type AgentMetrics,
type ResumeState,
} from './shared.js';
import type { VulnType } from '../queue-validation.js';
import type { VulnType } from '../services/queue-validation.js';
import type { AgentName } from '../types/agents.js';
import { ALL_AGENTS } from '../types/agents.js';
import { toWorkflowSummary } from './summary-mapper.js';
+15
View File
@@ -0,0 +1,15 @@
// Copyright (C) 2025 Keygraph, Inc.
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License version 3
// as published by the Free Software Foundation.
/**
* Logger interface for services called from Temporal activities.
* Keeps services Temporal-agnostic while providing structured logging.
*/
export interface ActivityLogger {
info(message: string, attrs?: Record<string, unknown>): void;
warn(message: string, attrs?: Record<string, unknown>): void;
error(message: string, attrs?: Record<string, unknown>): void;
}
+16 -1
View File
@@ -41,7 +41,7 @@ export type PlaywrightAgent =
| 'playwright-agent4'
| 'playwright-agent5';
import type { ActivityLogger } from '../temporal/activity-logger.js';
import type { ActivityLogger } from './activity-logger.js';
export type AgentValidator = (sourceDir: string, logger: ActivityLogger) => Promise<boolean>;
@@ -59,3 +59,18 @@ export interface AgentDefinition {
promptTemplate: string;
deliverableFilename: string;
}
/**
* Vulnerability types supported by the pipeline.
*/
export type VulnType = 'injection' | 'xss' | 'auth' | 'ssrf' | 'authz';
/**
* Decision returned by queue validation for exploitation phase.
*/
export interface ExploitationDecision {
shouldExploit: boolean;
shouldRetry: boolean;
vulnerabilityCount: number;
vulnType: VulnType;
}
+11
View File
@@ -8,6 +8,17 @@
* Audit system type definitions
*/
/**
* Cross-cutting session metadata used by services, temporal, and audit.
*/
export interface SessionMetadata {
id: string;
webUrl: string;
repoPath?: string;
outputPath?: string;
[key: string]: unknown;
}
/**
* Result data passed to audit system when an agent execution ends.
* Used by both AuditSession and MetricsTracker.
+1
View File
@@ -8,6 +8,7 @@
* Type definitions barrel export
*/
export * from './activity-logger.js';
export * from './errors.js';
export * from './config.js';
export * from './agents.js';
-30
View File
@@ -26,33 +26,3 @@ export class Timer {
return end - this.startTime;
}
}
interface TimingResultsAgents {
[key: string]: number;
}
interface TimingResults {
total: Timer | null;
agents: TimingResultsAgents;
}
interface CostResultsAgents {
[key: string]: number;
}
interface CostResults {
agents: CostResultsAgents;
total: number;
}
// Global timing and cost tracker
export const timingResults: TimingResults = {
total: null,
agents: {},
};
export const costResults: CostResults = {
agents: {},
total: 0,
};
-264
View File
@@ -1,264 +0,0 @@
// Copyright (C) 2025 Keygraph, Inc.
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License version 3
// as published by the Free Software Foundation.
import { AGENTS } from '../session-manager.js';
interface ToolCallInput {
url?: string;
element?: string;
key?: string;
fields?: unknown[];
text?: string;
action?: string;
description?: string;
todos?: Array<{
status: string;
content: string;
}>;
[key: string]: unknown;
}
interface ToolCall {
name: string;
input?: ToolCallInput;
}
/**
* Extract domain from URL for display
*/
function extractDomain(url: string): string {
try {
const urlObj = new URL(url);
return urlObj.hostname || url.slice(0, 30);
} catch {
return url.slice(0, 30);
}
}
/**
* Summarize TodoWrite updates into clean progress indicators
*/
function summarizeTodoUpdate(input: ToolCallInput | undefined): string | null {
if (!input?.todos || !Array.isArray(input.todos)) {
return null;
}
const todos = input.todos;
const completed = todos.filter((t) => t.status === 'completed');
const inProgress = todos.filter((t) => t.status === 'in_progress');
// Show recently completed tasks
if (completed.length > 0) {
const recent = completed[completed.length - 1]!;
return `${recent.content}`;
}
// Show current in-progress task
if (inProgress.length > 0) {
const current = inProgress[0]!;
return `🔄 ${current.content}`;
}
return null;
}
/**
* Get agent prefix for parallel execution
*/
export function getAgentPrefix(description: string): string {
// Map agent names to their prefixes
const agentPrefixes: Record<string, string> = {
'injection-vuln': '[Injection]',
'xss-vuln': '[XSS]',
'auth-vuln': '[Auth]',
'authz-vuln': '[Authz]',
'ssrf-vuln': '[SSRF]',
'injection-exploit': '[Injection]',
'xss-exploit': '[XSS]',
'auth-exploit': '[Auth]',
'authz-exploit': '[Authz]',
'ssrf-exploit': '[SSRF]',
};
// First try to match by agent name directly
for (const [agentName, prefix] of Object.entries(agentPrefixes)) {
const agent = AGENTS[agentName as keyof typeof AGENTS];
if (agent && description.includes(agent.displayName)) {
return prefix;
}
}
// Fallback to partial matches for backwards compatibility
if (description.includes('injection')) return '[Injection]';
if (description.includes('xss')) return '[XSS]';
if (description.includes('authz')) return '[Authz]'; // Check authz before auth
if (description.includes('auth')) return '[Auth]';
if (description.includes('ssrf')) return '[SSRF]';
return '[Agent]';
}
/**
* Format browser tool calls into clean progress indicators
*/
function formatBrowserAction(toolCall: ToolCall): string {
const toolName = toolCall.name;
const input = toolCall.input || {};
// Core Browser Operations
if (toolName === 'mcp__playwright__browser_navigate') {
const url = input.url || '';
const domain = extractDomain(url);
return `🌐 Navigating to ${domain}`;
}
if (toolName === 'mcp__playwright__browser_navigate_back') {
return `⬅️ Going back`;
}
// Page Interaction
if (toolName === 'mcp__playwright__browser_click') {
const element = input.element || 'element';
return `🖱️ Clicking ${element.slice(0, 25)}`;
}
if (toolName === 'mcp__playwright__browser_hover') {
const element = input.element || 'element';
return `👆 Hovering over ${element.slice(0, 20)}`;
}
if (toolName === 'mcp__playwright__browser_type') {
const element = input.element || 'field';
return `⌨️ Typing in ${element.slice(0, 20)}`;
}
if (toolName === 'mcp__playwright__browser_press_key') {
const key = input.key || 'key';
return `⌨️ Pressing ${key}`;
}
// Form Handling
if (toolName === 'mcp__playwright__browser_fill_form') {
const fieldCount = input.fields?.length || 0;
return `📝 Filling ${fieldCount} form fields`;
}
if (toolName === 'mcp__playwright__browser_select_option') {
return `📋 Selecting dropdown option`;
}
if (toolName === 'mcp__playwright__browser_file_upload') {
return `📁 Uploading file`;
}
// Page Analysis
if (toolName === 'mcp__playwright__browser_snapshot') {
return `📸 Taking page snapshot`;
}
if (toolName === 'mcp__playwright__browser_take_screenshot') {
return `📸 Taking screenshot`;
}
if (toolName === 'mcp__playwright__browser_evaluate') {
return `🔍 Running JavaScript analysis`;
}
// Waiting & Monitoring
if (toolName === 'mcp__playwright__browser_wait_for') {
if (input.text) {
return `⏳ Waiting for "${input.text.slice(0, 20)}"`;
}
return `⏳ Waiting for page response`;
}
if (toolName === 'mcp__playwright__browser_console_messages') {
return `📜 Checking console logs`;
}
if (toolName === 'mcp__playwright__browser_network_requests') {
return `🌐 Analyzing network traffic`;
}
// Tab Management
if (toolName === 'mcp__playwright__browser_tabs') {
const action = input.action || 'managing';
return `🗂️ ${action} browser tab`;
}
// Dialog Handling
if (toolName === 'mcp__playwright__browser_handle_dialog') {
return `💬 Handling browser dialog`;
}
// Fallback for any missed tools
const actionType = toolName.split('_').pop();
return `🌐 Browser: ${actionType}`;
}
/**
* Filter out JSON tool calls from content, with special handling for Task calls
*/
export function filterJsonToolCalls(content: string | null | undefined): string {
if (!content || typeof content !== 'string') {
return content || '';
}
const lines = content.split('\n');
const processedLines: string[] = [];
for (const line of lines) {
const trimmed = line.trim();
// Skip empty lines
if (trimmed === '') {
continue;
}
// Check if this is a JSON tool call
if (trimmed.startsWith('{"type":"tool_use"')) {
try {
const toolCall = JSON.parse(trimmed) as ToolCall;
// Special handling for Task tool calls
if (toolCall.name === 'Task') {
const description = toolCall.input?.description || 'analysis agent';
processedLines.push(`🚀 Launching ${description}`);
continue;
}
// Special handling for TodoWrite tool calls
if (toolCall.name === 'TodoWrite') {
const summary = summarizeTodoUpdate(toolCall.input);
if (summary) {
processedLines.push(summary);
}
continue;
}
// Special handling for browser tool calls
if (toolCall.name.startsWith('mcp__playwright__browser_')) {
const browserAction = formatBrowserAction(toolCall);
if (browserAction) {
processedLines.push(browserAction);
}
continue;
}
// Hide all other tool calls (Read, Write, Grep, etc.)
continue;
} catch {
// If JSON parsing fails, treat as regular text
processedLines.push(line);
}
} else {
// Keep non-JSON lines (assistant text)
processedLines.push(line);
}
}
return processedLines.join('\n');
}