From b208949345697ebe40428f9ab27536c09740539d Mon Sep 17 00:00:00 2001 From: ajmallesh Date: Mon, 16 Feb 2026 18:01:37 -0800 Subject: [PATCH] refactor: consolidate file layout and break circular dependencies MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Move error-handling, git-manager, prompt-manager, queue-validation, and reporting into src/services/ - Delete src/constants.ts — relocate AGENT_VALIDATORS and MCP_AGENT_MAPPING into session-manager.ts alongside agent definitions - Delete src/utils/output-formatter.ts — absorb filterJsonToolCalls and getAgentPrefix into ai/output-formatters.ts - Extract ActivityLogger interface into src/types/activity-logger.ts to break temporal/ → services circular dependency - Consolidate VulnType, ExploitationDecision into types/agents.ts and SessionMetadata into types/audit.ts - Remove dead timingResults/costResults globals from utils/metrics.ts and all consumers --- CLAUDE.md | 23 ++ src/ai/claude-executor.ts | 10 +- src/ai/message-handlers.ts | 9 +- src/ai/output-formatters.ts | 267 +++++++++++++++++++- src/audit/audit-session.ts | 2 +- src/audit/metrics-tracker.ts | 2 +- src/audit/utils.ts | 15 +- src/config-parser.ts | 2 +- src/constants.ts | 109 -------- src/services/agent-execution.ts | 8 +- src/services/config-loader.ts | 2 +- src/{ => services}/error-handling.ts | 4 +- src/services/exploitation-checker.ts | 4 +- src/{utils => services}/git-manager.ts | 4 +- src/services/index.ts | 3 + src/{prompts => services}/prompt-manager.ts | 6 +- src/{ => services}/queue-validation.ts | 15 +- src/{phases => services}/reporting.ts | 4 +- src/session-manager.ts | 102 +++++++- src/temporal/activities.ts | 11 +- src/temporal/activity-logger.ts | 11 +- src/temporal/client.ts | 2 +- src/temporal/shared.ts | 1 - src/temporal/workflows.ts | 2 +- src/types/activity-logger.ts | 15 ++ src/types/agents.ts | 17 +- src/types/audit.ts | 11 + src/types/index.ts | 1 + src/utils/metrics.ts | 30 --- src/utils/output-formatter.ts | 264 ------------------- 30 files changed, 480 insertions(+), 476 deletions(-) delete mode 100644 src/constants.ts rename src/{ => services}/error-handling.ts (99%) rename src/{utils => services}/git-manager.ts (98%) rename src/{prompts => services}/prompt-manager.ts (98%) rename src/{ => services}/queue-validation.ts (96%) rename src/{phases => services}/reporting.ts (97%) create mode 100644 src/types/activity-logger.ts delete mode 100644 src/utils/output-formatter.ts diff --git a/CLAUDE.md b/CLAUDE.md index 4bb7c11..e16afae 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -119,6 +119,29 @@ Defensive security tool only. Use only on systems you own or have explicit permi - Abstractions for one-time operations - Backwards-compatibility shims, deprecated wrappers, or re-exports for removed code — delete the old code, don't preserve it +### Comments +- Explain **WHY**, not WHAT — code shows what it does +- Comments must be **timeless** — useful to a reader with no knowledge of this conversation +- Never reference: this chat, refactoring history ("moved from X"), the AI, or deleted files +- No comment is better than a bad comment + +```typescript +// Bad: references refactoring history +// Moved from utils/helpers.ts + +// Bad: references conversation +// Added per user request + +// Bad: states the obvious +// Loop through the array + +// Good: explains WHY +// Retry with backoff — Temporal server rejects rapid reconnects + +// Good: documents a gotcha +// MUST use FAILSAFE_SCHEMA — default schema allows code execution +``` + ## Key Files **Entry Points:** `src/temporal/workflows.ts`, `src/temporal/activities.ts`, `src/temporal/worker.ts`, `src/temporal/client.ts` diff --git a/src/ai/claude-executor.ts b/src/ai/claude-executor.ts index 28b038f..5c30312 100644 --- a/src/ai/claude-executor.ts +++ b/src/ai/claude-executor.ts @@ -9,11 +9,11 @@ import { fs, path } from 'zx'; import { query } from '@anthropic-ai/claude-agent-sdk'; -import { isRetryableError, PentestError } from '../error-handling.js'; +import { isRetryableError, PentestError } from '../services/error-handling.js'; import { isSpendingCapBehavior } from '../utils/billing-detection.js'; -import { timingResults, Timer } from '../utils/metrics.js'; +import { Timer } from '../utils/metrics.js'; import { formatTimestamp } from '../utils/formatting.js'; -import { AGENT_VALIDATORS, MCP_AGENT_MAPPING } from '../constants.js'; +import { AGENT_VALIDATORS, MCP_AGENT_MAPPING } from '../session-manager.js'; import { AuditSession } from '../audit/index.js'; import { createShannonHelperServer } from '../../mcp-server/dist/index.js'; import { AGENTS } from '../session-manager.js'; @@ -24,7 +24,7 @@ import { detectExecutionContext, formatErrorOutput, formatCompletionMessage } fr import { createProgressManager } from './progress-manager.js'; import { createAuditLogger } from './audit-logger.js'; import { getActualModelName } from './router-utils.js'; -import type { ActivityLogger } from '../temporal/activity-logger.js'; +import type { ActivityLogger } from '../types/activity-logger.js'; declare global { var SHANNON_DISABLE_LOADER: boolean | undefined; @@ -274,7 +274,6 @@ export async function runClaudePrompt( } const duration = timer.stop(); - timingResults.agents[execContext.agentKey] = duration; if (apiErrorDetected) { logger.warn(`API Error detected in ${description} - will validate deliverables before failing`); @@ -295,7 +294,6 @@ export async function runClaudePrompt( } catch (error) { const duration = timer.stop(); - timingResults.agents[execContext.agentKey] = duration; const err = error as Error & { code?: string; status?: number }; diff --git a/src/ai/message-handlers.ts b/src/ai/message-handlers.ts index cf5558f..1dc176f 100644 --- a/src/ai/message-handlers.ts +++ b/src/ai/message-handlers.ts @@ -6,20 +6,19 @@ // Pure functions for processing SDK message types -import { PentestError } from '../error-handling.js'; +import { PentestError } from '../services/error-handling.js'; import { ErrorCode } from '../types/errors.js'; import { matchesBillingTextPattern } from '../utils/billing-detection.js'; -import { filterJsonToolCalls } from '../utils/output-formatter.js'; +import { filterJsonToolCalls } from './output-formatters.js'; import { formatTimestamp } from '../utils/formatting.js'; import { getActualModelName } from './router-utils.js'; -import type { ActivityLogger } from '../temporal/activity-logger.js'; +import type { ActivityLogger } from '../types/activity-logger.js'; import { formatAssistantOutput, formatResultOutput, formatToolUseOutput, formatToolResultOutput, } from './output-formatters.js'; -import { costResults } from '../utils/metrics.js'; import type { AuditLogger } from './audit-logger.js'; import type { ProgressManager } from './progress-manager.js'; import type { @@ -362,8 +361,6 @@ export async function dispatchMessage( case 'result': { const resultData = handleResultMessage(message as ResultMessage); outputLines(formatResultOutput(resultData, !execContext.useCleanOutput)); - costResults.agents[execContext.agentKey] = resultData.cost; - costResults.total += resultData.cost; return { type: 'complete', result: resultData.result, cost: resultData.cost }; } diff --git a/src/ai/output-formatters.ts b/src/ai/output-formatters.ts index 35bf91a..e1033a2 100644 --- a/src/ai/output-formatters.ts +++ b/src/ai/output-formatters.ts @@ -7,9 +7,274 @@ // Pure functions for formatting console output import { extractAgentType, formatDuration } from '../utils/formatting.js'; -import { getAgentPrefix } from '../utils/output-formatter.js'; +import { AGENTS } from '../session-manager.js'; import type { ExecutionContext, ResultData } from './types.js'; +// --- Types for tool call filtering --- + +interface ToolCallInput { + url?: string; + element?: string; + key?: string; + fields?: unknown[]; + text?: string; + action?: string; + description?: string; + todos?: Array<{ + status: string; + content: string; + }>; + [key: string]: unknown; +} + +interface ToolCall { + name: string; + input?: ToolCallInput; +} + +// --- Agent prefix logic --- + +/** + * Get agent prefix for parallel execution + */ +export function getAgentPrefix(description: string): string { + // Map agent names to their prefixes + const agentPrefixes: Record = { + 'injection-vuln': '[Injection]', + 'xss-vuln': '[XSS]', + 'auth-vuln': '[Auth]', + 'authz-vuln': '[Authz]', + 'ssrf-vuln': '[SSRF]', + 'injection-exploit': '[Injection]', + 'xss-exploit': '[XSS]', + 'auth-exploit': '[Auth]', + 'authz-exploit': '[Authz]', + 'ssrf-exploit': '[SSRF]', + }; + + // First try to match by agent name directly + for (const [agentName, prefix] of Object.entries(agentPrefixes)) { + const agent = AGENTS[agentName as keyof typeof AGENTS]; + if (agent && description.includes(agent.displayName)) { + return prefix; + } + } + + // Fallback to partial matches for backwards compatibility + if (description.includes('injection')) return '[Injection]'; + if (description.includes('xss')) return '[XSS]'; + if (description.includes('authz')) return '[Authz]'; // Check authz before auth + if (description.includes('auth')) return '[Auth]'; + if (description.includes('ssrf')) return '[SSRF]'; + + return '[Agent]'; +} + +// --- Tool call filtering --- + +/** + * Extract domain from URL for display + */ +function extractDomain(url: string): string { + try { + const urlObj = new URL(url); + return urlObj.hostname || url.slice(0, 30); + } catch { + return url.slice(0, 30); + } +} + +/** + * Summarize TodoWrite updates into clean progress indicators + */ +function summarizeTodoUpdate(input: ToolCallInput | undefined): string | null { + if (!input?.todos || !Array.isArray(input.todos)) { + return null; + } + + const todos = input.todos; + const completed = todos.filter((t) => t.status === 'completed'); + const inProgress = todos.filter((t) => t.status === 'in_progress'); + + // Show recently completed tasks + if (completed.length > 0) { + const recent = completed[completed.length - 1]!; + return `✅ ${recent.content}`; + } + + // Show current in-progress task + if (inProgress.length > 0) { + const current = inProgress[0]!; + return `🔄 ${current.content}`; + } + + return null; +} + +/** + * Format browser tool calls into clean progress indicators + */ +function formatBrowserAction(toolCall: ToolCall): string { + const toolName = toolCall.name; + const input = toolCall.input || {}; + + // Core Browser Operations + if (toolName === 'mcp__playwright__browser_navigate') { + const url = input.url || ''; + const domain = extractDomain(url); + return `🌐 Navigating to ${domain}`; + } + + if (toolName === 'mcp__playwright__browser_navigate_back') { + return `⬅️ Going back`; + } + + // Page Interaction + if (toolName === 'mcp__playwright__browser_click') { + const element = input.element || 'element'; + return `🖱️ Clicking ${element.slice(0, 25)}`; + } + + if (toolName === 'mcp__playwright__browser_hover') { + const element = input.element || 'element'; + return `👆 Hovering over ${element.slice(0, 20)}`; + } + + if (toolName === 'mcp__playwright__browser_type') { + const element = input.element || 'field'; + return `⌨️ Typing in ${element.slice(0, 20)}`; + } + + if (toolName === 'mcp__playwright__browser_press_key') { + const key = input.key || 'key'; + return `⌨️ Pressing ${key}`; + } + + // Form Handling + if (toolName === 'mcp__playwright__browser_fill_form') { + const fieldCount = input.fields?.length || 0; + return `📝 Filling ${fieldCount} form fields`; + } + + if (toolName === 'mcp__playwright__browser_select_option') { + return `📋 Selecting dropdown option`; + } + + if (toolName === 'mcp__playwright__browser_file_upload') { + return `📁 Uploading file`; + } + + // Page Analysis + if (toolName === 'mcp__playwright__browser_snapshot') { + return `📸 Taking page snapshot`; + } + + if (toolName === 'mcp__playwright__browser_take_screenshot') { + return `📸 Taking screenshot`; + } + + if (toolName === 'mcp__playwright__browser_evaluate') { + return `🔍 Running JavaScript analysis`; + } + + // Waiting & Monitoring + if (toolName === 'mcp__playwright__browser_wait_for') { + if (input.text) { + return `⏳ Waiting for "${input.text.slice(0, 20)}"`; + } + return `⏳ Waiting for page response`; + } + + if (toolName === 'mcp__playwright__browser_console_messages') { + return `📜 Checking console logs`; + } + + if (toolName === 'mcp__playwright__browser_network_requests') { + return `🌐 Analyzing network traffic`; + } + + // Tab Management + if (toolName === 'mcp__playwright__browser_tabs') { + const action = input.action || 'managing'; + return `🗂️ ${action} browser tab`; + } + + // Dialog Handling + if (toolName === 'mcp__playwright__browser_handle_dialog') { + return `💬 Handling browser dialog`; + } + + // Fallback for any missed tools + const actionType = toolName.split('_').pop(); + return `🌐 Browser: ${actionType}`; +} + +/** + * Filter out JSON tool calls from content, with special handling for Task calls + */ +export function filterJsonToolCalls(content: string | null | undefined): string { + if (!content || typeof content !== 'string') { + return content || ''; + } + + const lines = content.split('\n'); + const processedLines: string[] = []; + + for (const line of lines) { + const trimmed = line.trim(); + + // Skip empty lines + if (trimmed === '') { + continue; + } + + // Check if this is a JSON tool call + if (trimmed.startsWith('{"type":"tool_use"')) { + try { + const toolCall = JSON.parse(trimmed) as ToolCall; + + // Special handling for Task tool calls + if (toolCall.name === 'Task') { + const description = toolCall.input?.description || 'analysis agent'; + processedLines.push(`🚀 Launching ${description}`); + continue; + } + + // Special handling for TodoWrite tool calls + if (toolCall.name === 'TodoWrite') { + const summary = summarizeTodoUpdate(toolCall.input); + if (summary) { + processedLines.push(summary); + } + continue; + } + + // Special handling for browser tool calls + if (toolCall.name.startsWith('mcp__playwright__browser_')) { + const browserAction = formatBrowserAction(toolCall); + if (browserAction) { + processedLines.push(browserAction); + } + continue; + } + + // Hide all other tool calls (Read, Write, Grep, etc.) + continue; + } catch { + // If JSON parsing fails, treat as regular text + processedLines.push(line); + } + } else { + // Keep non-JSON lines (assistant text) + processedLines.push(line); + } + } + + return processedLines.join('\n'); +} + +// --- Console output formatting --- + export function detectExecutionContext(description: string): ExecutionContext { const isParallelExecution = description.includes('vuln agent') || description.includes('exploit agent'); diff --git a/src/audit/audit-session.ts b/src/audit/audit-session.ts index 266fb67..3fe389a 100644 --- a/src/audit/audit-session.ts +++ b/src/audit/audit-session.ts @@ -18,7 +18,7 @@ import { initializeAuditStructure, type SessionMetadata } from './utils.js'; import { formatTimestamp } from '../utils/formatting.js'; import { SessionMutex } from '../utils/concurrency.js'; import type { AgentEndResult } from '../types/index.js'; -import { PentestError } from '../error-handling.js'; +import { PentestError } from '../services/error-handling.js'; import { ErrorCode } from '../types/errors.js'; // Global mutex instance diff --git a/src/audit/metrics-tracker.ts b/src/audit/metrics-tracker.ts index 97bfe6a..4827426 100644 --- a/src/audit/metrics-tracker.ts +++ b/src/audit/metrics-tracker.ts @@ -18,7 +18,7 @@ import { import { atomicWrite, readJson, fileExists } from '../utils/file-io.js'; import { formatTimestamp, calculatePercentage } from '../utils/formatting.js'; import { AGENT_PHASE_MAP, type PhaseName } from '../session-manager.js'; -import { PentestError } from '../error-handling.js'; +import { PentestError } from '../services/error-handling.js'; import { ErrorCode } from '../types/errors.js'; import type { AgentName, AgentEndResult } from '../types/index.js'; diff --git a/src/audit/utils.ts b/src/audit/utils.ts index 6e91964..c4366ac 100644 --- a/src/audit/utils.ts +++ b/src/audit/utils.ts @@ -15,9 +15,10 @@ import fs from 'fs/promises'; import path from 'path'; import { fileURLToPath } from 'url'; -// Import and re-export file I/O utilities from canonical location -import { ensureDirectory, atomicWrite, readJson, fileExists } from '../utils/file-io.js'; -export { ensureDirectory, atomicWrite, readJson, fileExists }; +import { ensureDirectory } from '../utils/file-io.js'; + +export type { SessionMetadata } from '../types/audit.js'; +import type { SessionMetadata } from '../types/audit.js'; const __filename = fileURLToPath(import.meta.url); const __dirname = path.dirname(__filename); @@ -26,14 +27,6 @@ const __dirname = path.dirname(__filename); const SHANNON_ROOT = path.resolve(__dirname, '..', '..'); const AUDIT_LOGS_DIR = path.join(SHANNON_ROOT, 'audit-logs'); -export interface SessionMetadata { - id: string; - webUrl: string; - repoPath?: string; - outputPath?: string; - [key: string]: unknown; -} - /** * Extract and sanitize hostname from URL for use in identifiers */ diff --git a/src/config-parser.ts b/src/config-parser.ts index f3ee9d4..57666e6 100644 --- a/src/config-parser.ts +++ b/src/config-parser.ts @@ -9,7 +9,7 @@ import { fs } from 'zx'; import yaml from 'js-yaml'; import { Ajv, type ValidateFunction, type ErrorObject } from 'ajv'; import type { FormatsPlugin } from 'ajv-formats'; -import { PentestError } from './error-handling.js'; +import { PentestError } from './services/error-handling.js'; import { ErrorCode } from './types/errors.js'; import type { Config, diff --git a/src/constants.ts b/src/constants.ts deleted file mode 100644 index 9758684..0000000 --- a/src/constants.ts +++ /dev/null @@ -1,109 +0,0 @@ -// Copyright (C) 2025 Keygraph, Inc. -// -// This program is free software: you can redistribute it and/or modify -// it under the terms of the GNU Affero General Public License version 3 -// as published by the Free Software Foundation. - -import { path, fs } from 'zx'; -import { validateQueueAndDeliverable, type VulnType } from './queue-validation.js'; -import type { AgentName, PlaywrightAgent, AgentValidator } from './types/agents.js'; -import type { ActivityLogger } from './temporal/activity-logger.js'; - -// Factory function for vulnerability queue validators -function createVulnValidator(vulnType: VulnType): AgentValidator { - return async (sourceDir: string, logger: ActivityLogger): Promise => { - try { - await validateQueueAndDeliverable(vulnType, sourceDir); - return true; - } catch (error) { - const errMsg = error instanceof Error ? error.message : String(error); - logger.warn(`Queue validation failed for ${vulnType}: ${errMsg}`); - return false; - } - }; -} - -// Factory function for exploit deliverable validators -function createExploitValidator(vulnType: VulnType): AgentValidator { - return async (sourceDir: string): Promise => { - const evidenceFile = path.join(sourceDir, 'deliverables', `${vulnType}_exploitation_evidence.md`); - return await fs.pathExists(evidenceFile); - }; -} - -// MCP agent mapping - assigns each agent to a specific Playwright instance to prevent conflicts -// Keys are promptTemplate values from AGENTS registry (session-manager.ts) -export const MCP_AGENT_MAPPING: Record = Object.freeze({ - // Phase 1: Pre-reconnaissance (actual prompt name is 'pre-recon-code') - // NOTE: Pre-recon is pure code analysis and doesn't use browser automation, - // but assigning MCP server anyway for consistency and future extensibility - 'pre-recon-code': 'playwright-agent1', - - // Phase 2: Reconnaissance (actual prompt name is 'recon') - recon: 'playwright-agent2', - - // Phase 3: Vulnerability Analysis (5 parallel agents) - 'vuln-injection': 'playwright-agent1', - 'vuln-xss': 'playwright-agent2', - 'vuln-auth': 'playwright-agent3', - 'vuln-ssrf': 'playwright-agent4', - 'vuln-authz': 'playwright-agent5', - - // Phase 4: Exploitation (5 parallel agents - same as vuln counterparts) - 'exploit-injection': 'playwright-agent1', - 'exploit-xss': 'playwright-agent2', - 'exploit-auth': 'playwright-agent3', - 'exploit-ssrf': 'playwright-agent4', - 'exploit-authz': 'playwright-agent5', - - // Phase 5: Reporting (actual prompt name is 'report-executive') - // NOTE: Report generation is typically text-based and doesn't use browser automation, - // but assigning MCP server anyway for potential screenshot inclusion or future needs - 'report-executive': 'playwright-agent3', -}); - -// Direct agent-to-validator mapping - much simpler than pattern matching -export const AGENT_VALIDATORS: Record = Object.freeze({ - // Pre-reconnaissance agent - validates the code analysis deliverable created by the agent - 'pre-recon': async (sourceDir: string): Promise => { - const codeAnalysisFile = path.join(sourceDir, 'deliverables', 'code_analysis_deliverable.md'); - return await fs.pathExists(codeAnalysisFile); - }, - - // Reconnaissance agent - recon: async (sourceDir: string): Promise => { - const reconFile = path.join(sourceDir, 'deliverables', 'recon_deliverable.md'); - return await fs.pathExists(reconFile); - }, - - // Vulnerability analysis agents - 'injection-vuln': createVulnValidator('injection'), - 'xss-vuln': createVulnValidator('xss'), - 'auth-vuln': createVulnValidator('auth'), - 'ssrf-vuln': createVulnValidator('ssrf'), - 'authz-vuln': createVulnValidator('authz'), - - // Exploitation agents - 'injection-exploit': createExploitValidator('injection'), - 'xss-exploit': createExploitValidator('xss'), - 'auth-exploit': createExploitValidator('auth'), - 'ssrf-exploit': createExploitValidator('ssrf'), - 'authz-exploit': createExploitValidator('authz'), - - // Executive report agent - report: async (sourceDir: string, logger: ActivityLogger): Promise => { - const reportFile = path.join( - sourceDir, - 'deliverables', - 'comprehensive_security_assessment_report.md' - ); - - const reportExists = await fs.pathExists(reportFile); - - if (!reportExists) { - logger.error('Missing required deliverable: comprehensive_security_assessment_report.md'); - } - - return reportExists; - }, -}); diff --git a/src/services/agent-execution.ts b/src/services/agent-execution.ts index 771e7b7..9574739 100644 --- a/src/services/agent-execution.ts +++ b/src/services/agent-execution.ts @@ -21,13 +21,13 @@ * No Temporal dependencies - pure domain logic. */ -import type { ActivityLogger } from '../temporal/activity-logger.js'; +import type { ActivityLogger } from '../types/activity-logger.js'; import { Result, ok, err, isErr } from '../types/result.js'; import { ErrorCode } from '../types/errors.js'; -import { PentestError } from '../error-handling.js'; +import { PentestError } from './error-handling.js'; import { isSpendingCapBehavior } from '../utils/billing-detection.js'; import { AGENTS } from '../session-manager.js'; -import { loadPrompt } from '../prompts/prompt-manager.js'; +import { loadPrompt } from './prompt-manager.js'; import { runClaudePrompt, validateAgentOutput, @@ -38,7 +38,7 @@ import { commitGitSuccess, rollbackGitWorkspace, getGitCommitHash, -} from '../utils/git-manager.js'; +} from './git-manager.js'; import { AuditSession } from '../audit/index.js'; import type { AgentEndResult } from '../types/audit.js'; import type { AgentName } from '../types/agents.js'; diff --git a/src/services/config-loader.ts b/src/services/config-loader.ts index 629ca43..506603b 100644 --- a/src/services/config-loader.ts +++ b/src/services/config-loader.ts @@ -12,7 +12,7 @@ */ import { parseConfig, distributeConfig } from '../config-parser.js'; -import { PentestError } from '../error-handling.js'; +import { PentestError } from './error-handling.js'; import { Result, ok, err } from '../types/result.js'; import { ErrorCode } from '../types/errors.js'; import type { DistributedConfig } from '../types/config.js'; diff --git a/src/error-handling.ts b/src/services/error-handling.ts similarity index 99% rename from src/error-handling.ts rename to src/services/error-handling.ts index 88fa501..098d32b 100644 --- a/src/error-handling.ts +++ b/src/services/error-handling.ts @@ -9,11 +9,11 @@ import { type PentestErrorType, type PentestErrorContext, type PromptErrorResult, -} from './types/errors.js'; +} from '../types/errors.js'; import { matchesBillingApiPattern, matchesBillingTextPattern, -} from './utils/billing-detection.js'; +} from '../utils/billing-detection.js'; // Custom error class for pentest operations export class PentestError extends Error { diff --git a/src/services/exploitation-checker.ts b/src/services/exploitation-checker.ts index 326188c..2ecea91 100644 --- a/src/services/exploitation-checker.ts +++ b/src/services/exploitation-checker.ts @@ -17,9 +17,9 @@ import { validateQueueSafe, type VulnType, type ExploitationDecision, -} from '../queue-validation.js'; +} from './queue-validation.js'; import { isOk } from '../types/result.js'; -import type { ActivityLogger } from '../temporal/activity-logger.js'; +import type { ActivityLogger } from '../types/activity-logger.js'; /** * Service for checking exploitation queue decisions. diff --git a/src/utils/git-manager.ts b/src/services/git-manager.ts similarity index 98% rename from src/utils/git-manager.ts rename to src/services/git-manager.ts index 68cb87b..dfc401f 100644 --- a/src/utils/git-manager.ts +++ b/src/services/git-manager.ts @@ -5,9 +5,9 @@ // as published by the Free Software Foundation. import { $ } from 'zx'; -import { PentestError } from '../error-handling.js'; +import { PentestError } from './error-handling.js'; import { ErrorCode } from '../types/errors.js'; -import type { ActivityLogger } from '../temporal/activity-logger.js'; +import type { ActivityLogger } from '../types/activity-logger.js'; /** * Check if a directory is a git repository. diff --git a/src/services/index.ts b/src/services/index.ts index 159eea2..29c489f 100644 --- a/src/services/index.ts +++ b/src/services/index.ts @@ -18,3 +18,6 @@ export { ConfigLoaderService } from './config-loader.js'; export { ExploitationCheckerService } from './exploitation-checker.js'; export { AgentExecutionService } from './agent-execution.js'; export type { AgentExecutionInput } from './agent-execution.js'; + +export { assembleFinalReport, injectModelIntoReport } from './reporting.js'; +export { loadPrompt } from './prompt-manager.js'; diff --git a/src/prompts/prompt-manager.ts b/src/services/prompt-manager.ts similarity index 98% rename from src/prompts/prompt-manager.ts rename to src/services/prompt-manager.ts index 0af37c4..36e0f22 100644 --- a/src/prompts/prompt-manager.ts +++ b/src/services/prompt-manager.ts @@ -5,10 +5,10 @@ // as published by the Free Software Foundation. import { fs, path } from 'zx'; -import { PentestError, handlePromptError } from '../error-handling.js'; -import { MCP_AGENT_MAPPING } from '../constants.js'; +import { PentestError, handlePromptError } from './error-handling.js'; +import { MCP_AGENT_MAPPING } from '../session-manager.js'; import type { Authentication, DistributedConfig } from '../types/config.js'; -import type { ActivityLogger } from '../temporal/activity-logger.js'; +import type { ActivityLogger } from '../types/activity-logger.js'; interface PromptVariables { webUrl: string; diff --git a/src/queue-validation.ts b/src/services/queue-validation.ts similarity index 96% rename from src/queue-validation.ts rename to src/services/queue-validation.ts index a55d80f..dde8666 100644 --- a/src/queue-validation.ts +++ b/src/services/queue-validation.ts @@ -6,11 +6,12 @@ import { fs, path } from 'zx'; import { PentestError } from './error-handling.js'; -import { ErrorCode } from './types/errors.js'; -import { type Result, ok, err } from './types/result.js'; -import { asyncPipe } from './utils/functional.js'; +import { ErrorCode } from '../types/errors.js'; +import { type Result, ok, err } from '../types/result.js'; +import { asyncPipe } from '../utils/functional.js'; +import type { VulnType, ExploitationDecision } from '../types/agents.js'; -export type VulnType = 'injection' | 'xss' | 'auth' | 'ssrf' | 'authz'; +export type { VulnType, ExploitationDecision } from '../types/agents.js'; interface VulnTypeConfigItem { deliverable: string; @@ -62,12 +63,6 @@ interface QueueValidationResult { error: string | null; } -export interface ExploitationDecision { - shouldExploit: boolean; - shouldRetry: boolean; - vulnerabilityCount: number; - vulnType: VulnType; -} /** * Result type for safe validation - explicit error handling. diff --git a/src/phases/reporting.ts b/src/services/reporting.ts similarity index 97% rename from src/phases/reporting.ts rename to src/services/reporting.ts index e8887ad..bc04fe1 100644 --- a/src/phases/reporting.ts +++ b/src/services/reporting.ts @@ -5,9 +5,9 @@ // as published by the Free Software Foundation. import { fs, path } from 'zx'; -import { PentestError } from '../error-handling.js'; +import { PentestError } from './error-handling.js'; import { ErrorCode } from '../types/errors.js'; -import type { ActivityLogger } from '../temporal/activity-logger.js'; +import type { ActivityLogger } from '../types/activity-logger.js'; interface DeliverableFile { name: string; diff --git a/src/session-manager.ts b/src/session-manager.ts index 6307e01..8180040 100644 --- a/src/session-manager.ts +++ b/src/session-manager.ts @@ -4,7 +4,10 @@ // it under the terms of the GNU Affero General Public License version 3 // as published by the Free Software Foundation. -import type { AgentName, AgentDefinition } from './types/index.js'; +import { path, fs } from 'zx'; +import { validateQueueAndDeliverable } from './services/queue-validation.js'; +import type { AgentName, AgentDefinition, PlaywrightAgent, AgentValidator, VulnType } from './types/index.js'; +import type { ActivityLogger } from './types/activity-logger.js'; // Agent definitions according to PRD // NOTE: deliverableFilename values must match mcp-server/src/types/deliverables.ts:DELIVERABLE_FILENAMES @@ -122,4 +125,101 @@ export const AGENT_PHASE_MAP: Readonly> = Object.fr 'report': 'reporting', }); +// Factory function for vulnerability queue validators +function createVulnValidator(vulnType: VulnType): AgentValidator { + return async (sourceDir: string, logger: ActivityLogger): Promise => { + try { + await validateQueueAndDeliverable(vulnType, sourceDir); + return true; + } catch (error) { + const errMsg = error instanceof Error ? error.message : String(error); + logger.warn(`Queue validation failed for ${vulnType}: ${errMsg}`); + return false; + } + }; +} +// Factory function for exploit deliverable validators +function createExploitValidator(vulnType: VulnType): AgentValidator { + return async (sourceDir: string): Promise => { + const evidenceFile = path.join(sourceDir, 'deliverables', `${vulnType}_exploitation_evidence.md`); + return await fs.pathExists(evidenceFile); + }; +} + +// MCP agent mapping - assigns each agent to a specific Playwright instance to prevent conflicts +// Keys are promptTemplate values from AGENTS registry +export const MCP_AGENT_MAPPING: Record = Object.freeze({ + // Phase 1: Pre-reconnaissance (actual prompt name is 'pre-recon-code') + // NOTE: Pre-recon is pure code analysis and doesn't use browser automation, + // but assigning MCP server anyway for consistency and future extensibility + 'pre-recon-code': 'playwright-agent1', + + // Phase 2: Reconnaissance (actual prompt name is 'recon') + recon: 'playwright-agent2', + + // Phase 3: Vulnerability Analysis (5 parallel agents) + 'vuln-injection': 'playwright-agent1', + 'vuln-xss': 'playwright-agent2', + 'vuln-auth': 'playwright-agent3', + 'vuln-ssrf': 'playwright-agent4', + 'vuln-authz': 'playwright-agent5', + + // Phase 4: Exploitation (5 parallel agents - same as vuln counterparts) + 'exploit-injection': 'playwright-agent1', + 'exploit-xss': 'playwright-agent2', + 'exploit-auth': 'playwright-agent3', + 'exploit-ssrf': 'playwright-agent4', + 'exploit-authz': 'playwright-agent5', + + // Phase 5: Reporting (actual prompt name is 'report-executive') + // NOTE: Report generation is typically text-based and doesn't use browser automation, + // but assigning MCP server anyway for potential screenshot inclusion or future needs + 'report-executive': 'playwright-agent3', +}); + +// Direct agent-to-validator mapping - much simpler than pattern matching +export const AGENT_VALIDATORS: Record = Object.freeze({ + // Pre-reconnaissance agent - validates the code analysis deliverable created by the agent + 'pre-recon': async (sourceDir: string): Promise => { + const codeAnalysisFile = path.join(sourceDir, 'deliverables', 'code_analysis_deliverable.md'); + return await fs.pathExists(codeAnalysisFile); + }, + + // Reconnaissance agent + recon: async (sourceDir: string): Promise => { + const reconFile = path.join(sourceDir, 'deliverables', 'recon_deliverable.md'); + return await fs.pathExists(reconFile); + }, + + // Vulnerability analysis agents + 'injection-vuln': createVulnValidator('injection'), + 'xss-vuln': createVulnValidator('xss'), + 'auth-vuln': createVulnValidator('auth'), + 'ssrf-vuln': createVulnValidator('ssrf'), + 'authz-vuln': createVulnValidator('authz'), + + // Exploitation agents + 'injection-exploit': createExploitValidator('injection'), + 'xss-exploit': createExploitValidator('xss'), + 'auth-exploit': createExploitValidator('auth'), + 'ssrf-exploit': createExploitValidator('ssrf'), + 'authz-exploit': createExploitValidator('authz'), + + // Executive report agent + report: async (sourceDir: string, logger: ActivityLogger): Promise => { + const reportFile = path.join( + sourceDir, + 'deliverables', + 'comprehensive_security_assessment_report.md' + ); + + const reportExists = await fs.pathExists(reportFile); + + if (!reportExists) { + logger.error('Missing required deliverable: comprehensive_security_assessment_report.md'); + } + + return reportExists; + }, +}); diff --git a/src/temporal/activities.ts b/src/temporal/activities.ts index bcf8b1c..3d264a8 100644 --- a/src/temporal/activities.ts +++ b/src/temporal/activities.ts @@ -19,20 +19,21 @@ import { heartbeat, ApplicationFailure, Context } from '@temporalio/activity'; import path from 'path'; import fs from 'fs/promises'; -import { classifyErrorForTemporal, PentestError } from '../error-handling.js'; +import { classifyErrorForTemporal, PentestError } from '../services/error-handling.js'; import { ErrorCode } from '../types/errors.js'; import { getOrCreateContainer, getContainer, removeContainer } from '../services/container.js'; import { ExploitationCheckerService } from '../services/exploitation-checker.js'; -import type { VulnType, ExploitationDecision } from '../queue-validation.js'; +import type { VulnType, ExploitationDecision } from '../services/queue-validation.js'; import { AuditSession } from '../audit/index.js'; import type { WorkflowSummary } from '../audit/workflow-logger.js'; import type { AgentName } from '../types/agents.js'; import { ALL_AGENTS } from '../types/agents.js'; import type { AgentMetrics, ResumeState } from './shared.js'; -import { copyDeliverablesToAudit, type SessionMetadata, readJson, fileExists } from '../audit/utils.js'; -import { assembleFinalReport, injectModelIntoReport } from '../phases/reporting.js'; +import { copyDeliverablesToAudit, type SessionMetadata } from '../audit/utils.js'; +import { readJson, fileExists } from '../utils/file-io.js'; +import { assembleFinalReport, injectModelIntoReport } from '../services/reporting.js'; import { AGENTS } from '../session-manager.js'; -import { executeGitCommandWithRetry } from '../utils/git-manager.js'; +import { executeGitCommandWithRetry } from '../services/git-manager.js'; import type { ResumeAttempt } from '../audit/metrics-tracker.js'; import { createActivityLogger } from './activity-logger.js'; diff --git a/src/temporal/activity-logger.ts b/src/temporal/activity-logger.ts index 87cd1ad..bcfe2b9 100644 --- a/src/temporal/activity-logger.ts +++ b/src/temporal/activity-logger.ts @@ -5,16 +5,7 @@ // as published by the Free Software Foundation. import { Context } from '@temporalio/activity'; - -/** - * Logger interface for services called from Temporal activities. - * Keeps services Temporal-agnostic while providing structured logging. - */ -export interface ActivityLogger { - info(message: string, attrs?: Record): void; - warn(message: string, attrs?: Record): void; - error(message: string, attrs?: Record): void; -} +import type { ActivityLogger } from '../types/activity-logger.js'; /** * ActivityLogger backed by Temporal's Context.current().log. diff --git a/src/temporal/client.ts b/src/temporal/client.ts index 1e8df2a..b00228a 100644 --- a/src/temporal/client.ts +++ b/src/temporal/client.ts @@ -30,7 +30,7 @@ import { Connection, Client, WorkflowNotFoundError } from '@temporalio/client'; import dotenv from 'dotenv'; import { displaySplashScreen } from '../splash-screen.js'; import { sanitizeHostname } from '../audit/utils.js'; -import { readJson, fileExists } from '../audit/utils.js'; +import { readJson, fileExists } from '../utils/file-io.js'; import path from 'path'; // Import types only - these don't pull in workflow runtime code import type { PipelineInput, PipelineState, PipelineProgress } from './shared.js'; diff --git a/src/temporal/shared.ts b/src/temporal/shared.ts index b8cd582..883b2e0 100644 --- a/src/temporal/shared.ts +++ b/src/temporal/shared.ts @@ -1,6 +1,5 @@ import { defineQuery } from '@temporalio/workflow'; -// Re-export AgentMetrics from central types location export type { AgentMetrics } from '../types/metrics.js'; import type { AgentMetrics } from '../types/metrics.js'; diff --git a/src/temporal/workflows.ts b/src/temporal/workflows.ts index e5d1b6a..edc5b94 100644 --- a/src/temporal/workflows.ts +++ b/src/temporal/workflows.ts @@ -41,7 +41,7 @@ import { type AgentMetrics, type ResumeState, } from './shared.js'; -import type { VulnType } from '../queue-validation.js'; +import type { VulnType } from '../services/queue-validation.js'; import type { AgentName } from '../types/agents.js'; import { ALL_AGENTS } from '../types/agents.js'; import { toWorkflowSummary } from './summary-mapper.js'; diff --git a/src/types/activity-logger.ts b/src/types/activity-logger.ts new file mode 100644 index 0000000..ff2ae3f --- /dev/null +++ b/src/types/activity-logger.ts @@ -0,0 +1,15 @@ +// Copyright (C) 2025 Keygraph, Inc. +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License version 3 +// as published by the Free Software Foundation. + +/** + * Logger interface for services called from Temporal activities. + * Keeps services Temporal-agnostic while providing structured logging. + */ +export interface ActivityLogger { + info(message: string, attrs?: Record): void; + warn(message: string, attrs?: Record): void; + error(message: string, attrs?: Record): void; +} diff --git a/src/types/agents.ts b/src/types/agents.ts index afa545d..e0265a9 100644 --- a/src/types/agents.ts +++ b/src/types/agents.ts @@ -41,7 +41,7 @@ export type PlaywrightAgent = | 'playwright-agent4' | 'playwright-agent5'; -import type { ActivityLogger } from '../temporal/activity-logger.js'; +import type { ActivityLogger } from './activity-logger.js'; export type AgentValidator = (sourceDir: string, logger: ActivityLogger) => Promise; @@ -59,3 +59,18 @@ export interface AgentDefinition { promptTemplate: string; deliverableFilename: string; } + +/** + * Vulnerability types supported by the pipeline. + */ +export type VulnType = 'injection' | 'xss' | 'auth' | 'ssrf' | 'authz'; + +/** + * Decision returned by queue validation for exploitation phase. + */ +export interface ExploitationDecision { + shouldExploit: boolean; + shouldRetry: boolean; + vulnerabilityCount: number; + vulnType: VulnType; +} diff --git a/src/types/audit.ts b/src/types/audit.ts index efa9326..c433bb1 100644 --- a/src/types/audit.ts +++ b/src/types/audit.ts @@ -8,6 +8,17 @@ * Audit system type definitions */ +/** + * Cross-cutting session metadata used by services, temporal, and audit. + */ +export interface SessionMetadata { + id: string; + webUrl: string; + repoPath?: string; + outputPath?: string; + [key: string]: unknown; +} + /** * Result data passed to audit system when an agent execution ends. * Used by both AuditSession and MetricsTracker. diff --git a/src/types/index.ts b/src/types/index.ts index 2061444..8cf4cd4 100644 --- a/src/types/index.ts +++ b/src/types/index.ts @@ -8,6 +8,7 @@ * Type definitions barrel export */ +export * from './activity-logger.js'; export * from './errors.js'; export * from './config.js'; export * from './agents.js'; diff --git a/src/utils/metrics.ts b/src/utils/metrics.ts index a02836d..d7f257a 100644 --- a/src/utils/metrics.ts +++ b/src/utils/metrics.ts @@ -26,33 +26,3 @@ export class Timer { return end - this.startTime; } } - -interface TimingResultsAgents { - [key: string]: number; -} - -interface TimingResults { - total: Timer | null; - agents: TimingResultsAgents; -} - -interface CostResultsAgents { - [key: string]: number; -} - -interface CostResults { - agents: CostResultsAgents; - total: number; -} - -// Global timing and cost tracker -export const timingResults: TimingResults = { - total: null, - agents: {}, -}; - -export const costResults: CostResults = { - agents: {}, - total: 0, -}; - diff --git a/src/utils/output-formatter.ts b/src/utils/output-formatter.ts deleted file mode 100644 index 1dabd43..0000000 --- a/src/utils/output-formatter.ts +++ /dev/null @@ -1,264 +0,0 @@ -// Copyright (C) 2025 Keygraph, Inc. -// -// This program is free software: you can redistribute it and/or modify -// it under the terms of the GNU Affero General Public License version 3 -// as published by the Free Software Foundation. - -import { AGENTS } from '../session-manager.js'; - -interface ToolCallInput { - url?: string; - element?: string; - key?: string; - fields?: unknown[]; - text?: string; - action?: string; - description?: string; - todos?: Array<{ - status: string; - content: string; - }>; - [key: string]: unknown; -} - -interface ToolCall { - name: string; - input?: ToolCallInput; -} - -/** - * Extract domain from URL for display - */ -function extractDomain(url: string): string { - try { - const urlObj = new URL(url); - return urlObj.hostname || url.slice(0, 30); - } catch { - return url.slice(0, 30); - } -} - -/** - * Summarize TodoWrite updates into clean progress indicators - */ -function summarizeTodoUpdate(input: ToolCallInput | undefined): string | null { - if (!input?.todos || !Array.isArray(input.todos)) { - return null; - } - - const todos = input.todos; - const completed = todos.filter((t) => t.status === 'completed'); - const inProgress = todos.filter((t) => t.status === 'in_progress'); - - // Show recently completed tasks - if (completed.length > 0) { - const recent = completed[completed.length - 1]!; - return `✅ ${recent.content}`; - } - - // Show current in-progress task - if (inProgress.length > 0) { - const current = inProgress[0]!; - return `🔄 ${current.content}`; - } - - return null; -} - -/** - * Get agent prefix for parallel execution - */ -export function getAgentPrefix(description: string): string { - // Map agent names to their prefixes - const agentPrefixes: Record = { - 'injection-vuln': '[Injection]', - 'xss-vuln': '[XSS]', - 'auth-vuln': '[Auth]', - 'authz-vuln': '[Authz]', - 'ssrf-vuln': '[SSRF]', - 'injection-exploit': '[Injection]', - 'xss-exploit': '[XSS]', - 'auth-exploit': '[Auth]', - 'authz-exploit': '[Authz]', - 'ssrf-exploit': '[SSRF]', - }; - - // First try to match by agent name directly - for (const [agentName, prefix] of Object.entries(agentPrefixes)) { - const agent = AGENTS[agentName as keyof typeof AGENTS]; - if (agent && description.includes(agent.displayName)) { - return prefix; - } - } - - // Fallback to partial matches for backwards compatibility - if (description.includes('injection')) return '[Injection]'; - if (description.includes('xss')) return '[XSS]'; - if (description.includes('authz')) return '[Authz]'; // Check authz before auth - if (description.includes('auth')) return '[Auth]'; - if (description.includes('ssrf')) return '[SSRF]'; - - return '[Agent]'; -} - -/** - * Format browser tool calls into clean progress indicators - */ -function formatBrowserAction(toolCall: ToolCall): string { - const toolName = toolCall.name; - const input = toolCall.input || {}; - - // Core Browser Operations - if (toolName === 'mcp__playwright__browser_navigate') { - const url = input.url || ''; - const domain = extractDomain(url); - return `🌐 Navigating to ${domain}`; - } - - if (toolName === 'mcp__playwright__browser_navigate_back') { - return `⬅️ Going back`; - } - - // Page Interaction - if (toolName === 'mcp__playwright__browser_click') { - const element = input.element || 'element'; - return `🖱️ Clicking ${element.slice(0, 25)}`; - } - - if (toolName === 'mcp__playwright__browser_hover') { - const element = input.element || 'element'; - return `👆 Hovering over ${element.slice(0, 20)}`; - } - - if (toolName === 'mcp__playwright__browser_type') { - const element = input.element || 'field'; - return `⌨️ Typing in ${element.slice(0, 20)}`; - } - - if (toolName === 'mcp__playwright__browser_press_key') { - const key = input.key || 'key'; - return `⌨️ Pressing ${key}`; - } - - // Form Handling - if (toolName === 'mcp__playwright__browser_fill_form') { - const fieldCount = input.fields?.length || 0; - return `📝 Filling ${fieldCount} form fields`; - } - - if (toolName === 'mcp__playwright__browser_select_option') { - return `📋 Selecting dropdown option`; - } - - if (toolName === 'mcp__playwright__browser_file_upload') { - return `📁 Uploading file`; - } - - // Page Analysis - if (toolName === 'mcp__playwright__browser_snapshot') { - return `📸 Taking page snapshot`; - } - - if (toolName === 'mcp__playwright__browser_take_screenshot') { - return `📸 Taking screenshot`; - } - - if (toolName === 'mcp__playwright__browser_evaluate') { - return `🔍 Running JavaScript analysis`; - } - - // Waiting & Monitoring - if (toolName === 'mcp__playwright__browser_wait_for') { - if (input.text) { - return `⏳ Waiting for "${input.text.slice(0, 20)}"`; - } - return `⏳ Waiting for page response`; - } - - if (toolName === 'mcp__playwright__browser_console_messages') { - return `📜 Checking console logs`; - } - - if (toolName === 'mcp__playwright__browser_network_requests') { - return `🌐 Analyzing network traffic`; - } - - // Tab Management - if (toolName === 'mcp__playwright__browser_tabs') { - const action = input.action || 'managing'; - return `🗂️ ${action} browser tab`; - } - - // Dialog Handling - if (toolName === 'mcp__playwright__browser_handle_dialog') { - return `💬 Handling browser dialog`; - } - - // Fallback for any missed tools - const actionType = toolName.split('_').pop(); - return `🌐 Browser: ${actionType}`; -} - -/** - * Filter out JSON tool calls from content, with special handling for Task calls - */ -export function filterJsonToolCalls(content: string | null | undefined): string { - if (!content || typeof content !== 'string') { - return content || ''; - } - - const lines = content.split('\n'); - const processedLines: string[] = []; - - for (const line of lines) { - const trimmed = line.trim(); - - // Skip empty lines - if (trimmed === '') { - continue; - } - - // Check if this is a JSON tool call - if (trimmed.startsWith('{"type":"tool_use"')) { - try { - const toolCall = JSON.parse(trimmed) as ToolCall; - - // Special handling for Task tool calls - if (toolCall.name === 'Task') { - const description = toolCall.input?.description || 'analysis agent'; - processedLines.push(`🚀 Launching ${description}`); - continue; - } - - // Special handling for TodoWrite tool calls - if (toolCall.name === 'TodoWrite') { - const summary = summarizeTodoUpdate(toolCall.input); - if (summary) { - processedLines.push(summary); - } - continue; - } - - // Special handling for browser tool calls - if (toolCall.name.startsWith('mcp__playwright__browser_')) { - const browserAction = formatBrowserAction(toolCall); - if (browserAction) { - processedLines.push(browserAction); - } - continue; - } - - // Hide all other tool calls (Read, Write, Grep, etc.) - continue; - } catch { - // If JSON parsing fails, treat as regular text - processedLines.push(line); - } - } else { - // Keep non-JSON lines (assistant text) - processedLines.push(line); - } - } - - return processedLines.join('\n'); -}