mirror of
https://github.com/KeygraphHQ/shannon.git
synced 2026-05-22 16:49:46 +02:00
refactor: consolidate file layout and break circular dependencies
- Move error-handling, git-manager, prompt-manager, queue-validation, and reporting into src/services/ - Delete src/constants.ts — relocate AGENT_VALIDATORS and MCP_AGENT_MAPPING into session-manager.ts alongside agent definitions - Delete src/utils/output-formatter.ts — absorb filterJsonToolCalls and getAgentPrefix into ai/output-formatters.ts - Extract ActivityLogger interface into src/types/activity-logger.ts to break temporal/ → services circular dependency - Consolidate VulnType, ExploitationDecision into types/agents.ts and SessionMetadata into types/audit.ts - Remove dead timingResults/costResults globals from utils/metrics.ts and all consumers
This commit is contained in:
@@ -119,6 +119,29 @@ Defensive security tool only. Use only on systems you own or have explicit permi
|
||||
- Abstractions for one-time operations
|
||||
- Backwards-compatibility shims, deprecated wrappers, or re-exports for removed code — delete the old code, don't preserve it
|
||||
|
||||
### Comments
|
||||
- Explain **WHY**, not WHAT — code shows what it does
|
||||
- Comments must be **timeless** — useful to a reader with no knowledge of this conversation
|
||||
- Never reference: this chat, refactoring history ("moved from X"), the AI, or deleted files
|
||||
- No comment is better than a bad comment
|
||||
|
||||
```typescript
|
||||
// Bad: references refactoring history
|
||||
// Moved from utils/helpers.ts
|
||||
|
||||
// Bad: references conversation
|
||||
// Added per user request
|
||||
|
||||
// Bad: states the obvious
|
||||
// Loop through the array
|
||||
|
||||
// Good: explains WHY
|
||||
// Retry with backoff — Temporal server rejects rapid reconnects
|
||||
|
||||
// Good: documents a gotcha
|
||||
// MUST use FAILSAFE_SCHEMA — default schema allows code execution
|
||||
```
|
||||
|
||||
## Key Files
|
||||
|
||||
**Entry Points:** `src/temporal/workflows.ts`, `src/temporal/activities.ts`, `src/temporal/worker.ts`, `src/temporal/client.ts`
|
||||
|
||||
@@ -9,11 +9,11 @@
|
||||
import { fs, path } from 'zx';
|
||||
import { query } from '@anthropic-ai/claude-agent-sdk';
|
||||
|
||||
import { isRetryableError, PentestError } from '../error-handling.js';
|
||||
import { isRetryableError, PentestError } from '../services/error-handling.js';
|
||||
import { isSpendingCapBehavior } from '../utils/billing-detection.js';
|
||||
import { timingResults, Timer } from '../utils/metrics.js';
|
||||
import { Timer } from '../utils/metrics.js';
|
||||
import { formatTimestamp } from '../utils/formatting.js';
|
||||
import { AGENT_VALIDATORS, MCP_AGENT_MAPPING } from '../constants.js';
|
||||
import { AGENT_VALIDATORS, MCP_AGENT_MAPPING } from '../session-manager.js';
|
||||
import { AuditSession } from '../audit/index.js';
|
||||
import { createShannonHelperServer } from '../../mcp-server/dist/index.js';
|
||||
import { AGENTS } from '../session-manager.js';
|
||||
@@ -24,7 +24,7 @@ import { detectExecutionContext, formatErrorOutput, formatCompletionMessage } fr
|
||||
import { createProgressManager } from './progress-manager.js';
|
||||
import { createAuditLogger } from './audit-logger.js';
|
||||
import { getActualModelName } from './router-utils.js';
|
||||
import type { ActivityLogger } from '../temporal/activity-logger.js';
|
||||
import type { ActivityLogger } from '../types/activity-logger.js';
|
||||
|
||||
declare global {
|
||||
var SHANNON_DISABLE_LOADER: boolean | undefined;
|
||||
@@ -274,7 +274,6 @@ export async function runClaudePrompt(
|
||||
}
|
||||
|
||||
const duration = timer.stop();
|
||||
timingResults.agents[execContext.agentKey] = duration;
|
||||
|
||||
if (apiErrorDetected) {
|
||||
logger.warn(`API Error detected in ${description} - will validate deliverables before failing`);
|
||||
@@ -295,7 +294,6 @@ export async function runClaudePrompt(
|
||||
|
||||
} catch (error) {
|
||||
const duration = timer.stop();
|
||||
timingResults.agents[execContext.agentKey] = duration;
|
||||
|
||||
const err = error as Error & { code?: string; status?: number };
|
||||
|
||||
|
||||
@@ -6,20 +6,19 @@
|
||||
|
||||
// Pure functions for processing SDK message types
|
||||
|
||||
import { PentestError } from '../error-handling.js';
|
||||
import { PentestError } from '../services/error-handling.js';
|
||||
import { ErrorCode } from '../types/errors.js';
|
||||
import { matchesBillingTextPattern } from '../utils/billing-detection.js';
|
||||
import { filterJsonToolCalls } from '../utils/output-formatter.js';
|
||||
import { filterJsonToolCalls } from './output-formatters.js';
|
||||
import { formatTimestamp } from '../utils/formatting.js';
|
||||
import { getActualModelName } from './router-utils.js';
|
||||
import type { ActivityLogger } from '../temporal/activity-logger.js';
|
||||
import type { ActivityLogger } from '../types/activity-logger.js';
|
||||
import {
|
||||
formatAssistantOutput,
|
||||
formatResultOutput,
|
||||
formatToolUseOutput,
|
||||
formatToolResultOutput,
|
||||
} from './output-formatters.js';
|
||||
import { costResults } from '../utils/metrics.js';
|
||||
import type { AuditLogger } from './audit-logger.js';
|
||||
import type { ProgressManager } from './progress-manager.js';
|
||||
import type {
|
||||
@@ -362,8 +361,6 @@ export async function dispatchMessage(
|
||||
case 'result': {
|
||||
const resultData = handleResultMessage(message as ResultMessage);
|
||||
outputLines(formatResultOutput(resultData, !execContext.useCleanOutput));
|
||||
costResults.agents[execContext.agentKey] = resultData.cost;
|
||||
costResults.total += resultData.cost;
|
||||
return { type: 'complete', result: resultData.result, cost: resultData.cost };
|
||||
}
|
||||
|
||||
|
||||
+266
-1
@@ -7,9 +7,274 @@
|
||||
// Pure functions for formatting console output
|
||||
|
||||
import { extractAgentType, formatDuration } from '../utils/formatting.js';
|
||||
import { getAgentPrefix } from '../utils/output-formatter.js';
|
||||
import { AGENTS } from '../session-manager.js';
|
||||
import type { ExecutionContext, ResultData } from './types.js';
|
||||
|
||||
// --- Types for tool call filtering ---
|
||||
|
||||
interface ToolCallInput {
|
||||
url?: string;
|
||||
element?: string;
|
||||
key?: string;
|
||||
fields?: unknown[];
|
||||
text?: string;
|
||||
action?: string;
|
||||
description?: string;
|
||||
todos?: Array<{
|
||||
status: string;
|
||||
content: string;
|
||||
}>;
|
||||
[key: string]: unknown;
|
||||
}
|
||||
|
||||
interface ToolCall {
|
||||
name: string;
|
||||
input?: ToolCallInput;
|
||||
}
|
||||
|
||||
// --- Agent prefix logic ---
|
||||
|
||||
/**
|
||||
* Get agent prefix for parallel execution
|
||||
*/
|
||||
export function getAgentPrefix(description: string): string {
|
||||
// Map agent names to their prefixes
|
||||
const agentPrefixes: Record<string, string> = {
|
||||
'injection-vuln': '[Injection]',
|
||||
'xss-vuln': '[XSS]',
|
||||
'auth-vuln': '[Auth]',
|
||||
'authz-vuln': '[Authz]',
|
||||
'ssrf-vuln': '[SSRF]',
|
||||
'injection-exploit': '[Injection]',
|
||||
'xss-exploit': '[XSS]',
|
||||
'auth-exploit': '[Auth]',
|
||||
'authz-exploit': '[Authz]',
|
||||
'ssrf-exploit': '[SSRF]',
|
||||
};
|
||||
|
||||
// First try to match by agent name directly
|
||||
for (const [agentName, prefix] of Object.entries(agentPrefixes)) {
|
||||
const agent = AGENTS[agentName as keyof typeof AGENTS];
|
||||
if (agent && description.includes(agent.displayName)) {
|
||||
return prefix;
|
||||
}
|
||||
}
|
||||
|
||||
// Fallback to partial matches for backwards compatibility
|
||||
if (description.includes('injection')) return '[Injection]';
|
||||
if (description.includes('xss')) return '[XSS]';
|
||||
if (description.includes('authz')) return '[Authz]'; // Check authz before auth
|
||||
if (description.includes('auth')) return '[Auth]';
|
||||
if (description.includes('ssrf')) return '[SSRF]';
|
||||
|
||||
return '[Agent]';
|
||||
}
|
||||
|
||||
// --- Tool call filtering ---
|
||||
|
||||
/**
|
||||
* Extract domain from URL for display
|
||||
*/
|
||||
function extractDomain(url: string): string {
|
||||
try {
|
||||
const urlObj = new URL(url);
|
||||
return urlObj.hostname || url.slice(0, 30);
|
||||
} catch {
|
||||
return url.slice(0, 30);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Summarize TodoWrite updates into clean progress indicators
|
||||
*/
|
||||
function summarizeTodoUpdate(input: ToolCallInput | undefined): string | null {
|
||||
if (!input?.todos || !Array.isArray(input.todos)) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const todos = input.todos;
|
||||
const completed = todos.filter((t) => t.status === 'completed');
|
||||
const inProgress = todos.filter((t) => t.status === 'in_progress');
|
||||
|
||||
// Show recently completed tasks
|
||||
if (completed.length > 0) {
|
||||
const recent = completed[completed.length - 1]!;
|
||||
return `✅ ${recent.content}`;
|
||||
}
|
||||
|
||||
// Show current in-progress task
|
||||
if (inProgress.length > 0) {
|
||||
const current = inProgress[0]!;
|
||||
return `🔄 ${current.content}`;
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Format browser tool calls into clean progress indicators
|
||||
*/
|
||||
function formatBrowserAction(toolCall: ToolCall): string {
|
||||
const toolName = toolCall.name;
|
||||
const input = toolCall.input || {};
|
||||
|
||||
// Core Browser Operations
|
||||
if (toolName === 'mcp__playwright__browser_navigate') {
|
||||
const url = input.url || '';
|
||||
const domain = extractDomain(url);
|
||||
return `🌐 Navigating to ${domain}`;
|
||||
}
|
||||
|
||||
if (toolName === 'mcp__playwright__browser_navigate_back') {
|
||||
return `⬅️ Going back`;
|
||||
}
|
||||
|
||||
// Page Interaction
|
||||
if (toolName === 'mcp__playwright__browser_click') {
|
||||
const element = input.element || 'element';
|
||||
return `🖱️ Clicking ${element.slice(0, 25)}`;
|
||||
}
|
||||
|
||||
if (toolName === 'mcp__playwright__browser_hover') {
|
||||
const element = input.element || 'element';
|
||||
return `👆 Hovering over ${element.slice(0, 20)}`;
|
||||
}
|
||||
|
||||
if (toolName === 'mcp__playwright__browser_type') {
|
||||
const element = input.element || 'field';
|
||||
return `⌨️ Typing in ${element.slice(0, 20)}`;
|
||||
}
|
||||
|
||||
if (toolName === 'mcp__playwright__browser_press_key') {
|
||||
const key = input.key || 'key';
|
||||
return `⌨️ Pressing ${key}`;
|
||||
}
|
||||
|
||||
// Form Handling
|
||||
if (toolName === 'mcp__playwright__browser_fill_form') {
|
||||
const fieldCount = input.fields?.length || 0;
|
||||
return `📝 Filling ${fieldCount} form fields`;
|
||||
}
|
||||
|
||||
if (toolName === 'mcp__playwright__browser_select_option') {
|
||||
return `📋 Selecting dropdown option`;
|
||||
}
|
||||
|
||||
if (toolName === 'mcp__playwright__browser_file_upload') {
|
||||
return `📁 Uploading file`;
|
||||
}
|
||||
|
||||
// Page Analysis
|
||||
if (toolName === 'mcp__playwright__browser_snapshot') {
|
||||
return `📸 Taking page snapshot`;
|
||||
}
|
||||
|
||||
if (toolName === 'mcp__playwright__browser_take_screenshot') {
|
||||
return `📸 Taking screenshot`;
|
||||
}
|
||||
|
||||
if (toolName === 'mcp__playwright__browser_evaluate') {
|
||||
return `🔍 Running JavaScript analysis`;
|
||||
}
|
||||
|
||||
// Waiting & Monitoring
|
||||
if (toolName === 'mcp__playwright__browser_wait_for') {
|
||||
if (input.text) {
|
||||
return `⏳ Waiting for "${input.text.slice(0, 20)}"`;
|
||||
}
|
||||
return `⏳ Waiting for page response`;
|
||||
}
|
||||
|
||||
if (toolName === 'mcp__playwright__browser_console_messages') {
|
||||
return `📜 Checking console logs`;
|
||||
}
|
||||
|
||||
if (toolName === 'mcp__playwright__browser_network_requests') {
|
||||
return `🌐 Analyzing network traffic`;
|
||||
}
|
||||
|
||||
// Tab Management
|
||||
if (toolName === 'mcp__playwright__browser_tabs') {
|
||||
const action = input.action || 'managing';
|
||||
return `🗂️ ${action} browser tab`;
|
||||
}
|
||||
|
||||
// Dialog Handling
|
||||
if (toolName === 'mcp__playwright__browser_handle_dialog') {
|
||||
return `💬 Handling browser dialog`;
|
||||
}
|
||||
|
||||
// Fallback for any missed tools
|
||||
const actionType = toolName.split('_').pop();
|
||||
return `🌐 Browser: ${actionType}`;
|
||||
}
|
||||
|
||||
/**
|
||||
* Filter out JSON tool calls from content, with special handling for Task calls
|
||||
*/
|
||||
export function filterJsonToolCalls(content: string | null | undefined): string {
|
||||
if (!content || typeof content !== 'string') {
|
||||
return content || '';
|
||||
}
|
||||
|
||||
const lines = content.split('\n');
|
||||
const processedLines: string[] = [];
|
||||
|
||||
for (const line of lines) {
|
||||
const trimmed = line.trim();
|
||||
|
||||
// Skip empty lines
|
||||
if (trimmed === '') {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Check if this is a JSON tool call
|
||||
if (trimmed.startsWith('{"type":"tool_use"')) {
|
||||
try {
|
||||
const toolCall = JSON.parse(trimmed) as ToolCall;
|
||||
|
||||
// Special handling for Task tool calls
|
||||
if (toolCall.name === 'Task') {
|
||||
const description = toolCall.input?.description || 'analysis agent';
|
||||
processedLines.push(`🚀 Launching ${description}`);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Special handling for TodoWrite tool calls
|
||||
if (toolCall.name === 'TodoWrite') {
|
||||
const summary = summarizeTodoUpdate(toolCall.input);
|
||||
if (summary) {
|
||||
processedLines.push(summary);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
// Special handling for browser tool calls
|
||||
if (toolCall.name.startsWith('mcp__playwright__browser_')) {
|
||||
const browserAction = formatBrowserAction(toolCall);
|
||||
if (browserAction) {
|
||||
processedLines.push(browserAction);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
// Hide all other tool calls (Read, Write, Grep, etc.)
|
||||
continue;
|
||||
} catch {
|
||||
// If JSON parsing fails, treat as regular text
|
||||
processedLines.push(line);
|
||||
}
|
||||
} else {
|
||||
// Keep non-JSON lines (assistant text)
|
||||
processedLines.push(line);
|
||||
}
|
||||
}
|
||||
|
||||
return processedLines.join('\n');
|
||||
}
|
||||
|
||||
// --- Console output formatting ---
|
||||
|
||||
export function detectExecutionContext(description: string): ExecutionContext {
|
||||
const isParallelExecution =
|
||||
description.includes('vuln agent') || description.includes('exploit agent');
|
||||
|
||||
@@ -18,7 +18,7 @@ import { initializeAuditStructure, type SessionMetadata } from './utils.js';
|
||||
import { formatTimestamp } from '../utils/formatting.js';
|
||||
import { SessionMutex } from '../utils/concurrency.js';
|
||||
import type { AgentEndResult } from '../types/index.js';
|
||||
import { PentestError } from '../error-handling.js';
|
||||
import { PentestError } from '../services/error-handling.js';
|
||||
import { ErrorCode } from '../types/errors.js';
|
||||
|
||||
// Global mutex instance
|
||||
|
||||
@@ -18,7 +18,7 @@ import {
|
||||
import { atomicWrite, readJson, fileExists } from '../utils/file-io.js';
|
||||
import { formatTimestamp, calculatePercentage } from '../utils/formatting.js';
|
||||
import { AGENT_PHASE_MAP, type PhaseName } from '../session-manager.js';
|
||||
import { PentestError } from '../error-handling.js';
|
||||
import { PentestError } from '../services/error-handling.js';
|
||||
import { ErrorCode } from '../types/errors.js';
|
||||
import type { AgentName, AgentEndResult } from '../types/index.js';
|
||||
|
||||
|
||||
+4
-11
@@ -15,9 +15,10 @@ import fs from 'fs/promises';
|
||||
import path from 'path';
|
||||
import { fileURLToPath } from 'url';
|
||||
|
||||
// Import and re-export file I/O utilities from canonical location
|
||||
import { ensureDirectory, atomicWrite, readJson, fileExists } from '../utils/file-io.js';
|
||||
export { ensureDirectory, atomicWrite, readJson, fileExists };
|
||||
import { ensureDirectory } from '../utils/file-io.js';
|
||||
|
||||
export type { SessionMetadata } from '../types/audit.js';
|
||||
import type { SessionMetadata } from '../types/audit.js';
|
||||
|
||||
const __filename = fileURLToPath(import.meta.url);
|
||||
const __dirname = path.dirname(__filename);
|
||||
@@ -26,14 +27,6 @@ const __dirname = path.dirname(__filename);
|
||||
const SHANNON_ROOT = path.resolve(__dirname, '..', '..');
|
||||
const AUDIT_LOGS_DIR = path.join(SHANNON_ROOT, 'audit-logs');
|
||||
|
||||
export interface SessionMetadata {
|
||||
id: string;
|
||||
webUrl: string;
|
||||
repoPath?: string;
|
||||
outputPath?: string;
|
||||
[key: string]: unknown;
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract and sanitize hostname from URL for use in identifiers
|
||||
*/
|
||||
|
||||
@@ -9,7 +9,7 @@ import { fs } from 'zx';
|
||||
import yaml from 'js-yaml';
|
||||
import { Ajv, type ValidateFunction, type ErrorObject } from 'ajv';
|
||||
import type { FormatsPlugin } from 'ajv-formats';
|
||||
import { PentestError } from './error-handling.js';
|
||||
import { PentestError } from './services/error-handling.js';
|
||||
import { ErrorCode } from './types/errors.js';
|
||||
import type {
|
||||
Config,
|
||||
|
||||
@@ -1,109 +0,0 @@
|
||||
// Copyright (C) 2025 Keygraph, Inc.
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Affero General Public License version 3
|
||||
// as published by the Free Software Foundation.
|
||||
|
||||
import { path, fs } from 'zx';
|
||||
import { validateQueueAndDeliverable, type VulnType } from './queue-validation.js';
|
||||
import type { AgentName, PlaywrightAgent, AgentValidator } from './types/agents.js';
|
||||
import type { ActivityLogger } from './temporal/activity-logger.js';
|
||||
|
||||
// Factory function for vulnerability queue validators
|
||||
function createVulnValidator(vulnType: VulnType): AgentValidator {
|
||||
return async (sourceDir: string, logger: ActivityLogger): Promise<boolean> => {
|
||||
try {
|
||||
await validateQueueAndDeliverable(vulnType, sourceDir);
|
||||
return true;
|
||||
} catch (error) {
|
||||
const errMsg = error instanceof Error ? error.message : String(error);
|
||||
logger.warn(`Queue validation failed for ${vulnType}: ${errMsg}`);
|
||||
return false;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
// Factory function for exploit deliverable validators
|
||||
function createExploitValidator(vulnType: VulnType): AgentValidator {
|
||||
return async (sourceDir: string): Promise<boolean> => {
|
||||
const evidenceFile = path.join(sourceDir, 'deliverables', `${vulnType}_exploitation_evidence.md`);
|
||||
return await fs.pathExists(evidenceFile);
|
||||
};
|
||||
}
|
||||
|
||||
// MCP agent mapping - assigns each agent to a specific Playwright instance to prevent conflicts
|
||||
// Keys are promptTemplate values from AGENTS registry (session-manager.ts)
|
||||
export const MCP_AGENT_MAPPING: Record<string, PlaywrightAgent> = Object.freeze({
|
||||
// Phase 1: Pre-reconnaissance (actual prompt name is 'pre-recon-code')
|
||||
// NOTE: Pre-recon is pure code analysis and doesn't use browser automation,
|
||||
// but assigning MCP server anyway for consistency and future extensibility
|
||||
'pre-recon-code': 'playwright-agent1',
|
||||
|
||||
// Phase 2: Reconnaissance (actual prompt name is 'recon')
|
||||
recon: 'playwright-agent2',
|
||||
|
||||
// Phase 3: Vulnerability Analysis (5 parallel agents)
|
||||
'vuln-injection': 'playwright-agent1',
|
||||
'vuln-xss': 'playwright-agent2',
|
||||
'vuln-auth': 'playwright-agent3',
|
||||
'vuln-ssrf': 'playwright-agent4',
|
||||
'vuln-authz': 'playwright-agent5',
|
||||
|
||||
// Phase 4: Exploitation (5 parallel agents - same as vuln counterparts)
|
||||
'exploit-injection': 'playwright-agent1',
|
||||
'exploit-xss': 'playwright-agent2',
|
||||
'exploit-auth': 'playwright-agent3',
|
||||
'exploit-ssrf': 'playwright-agent4',
|
||||
'exploit-authz': 'playwright-agent5',
|
||||
|
||||
// Phase 5: Reporting (actual prompt name is 'report-executive')
|
||||
// NOTE: Report generation is typically text-based and doesn't use browser automation,
|
||||
// but assigning MCP server anyway for potential screenshot inclusion or future needs
|
||||
'report-executive': 'playwright-agent3',
|
||||
});
|
||||
|
||||
// Direct agent-to-validator mapping - much simpler than pattern matching
|
||||
export const AGENT_VALIDATORS: Record<AgentName, AgentValidator> = Object.freeze({
|
||||
// Pre-reconnaissance agent - validates the code analysis deliverable created by the agent
|
||||
'pre-recon': async (sourceDir: string): Promise<boolean> => {
|
||||
const codeAnalysisFile = path.join(sourceDir, 'deliverables', 'code_analysis_deliverable.md');
|
||||
return await fs.pathExists(codeAnalysisFile);
|
||||
},
|
||||
|
||||
// Reconnaissance agent
|
||||
recon: async (sourceDir: string): Promise<boolean> => {
|
||||
const reconFile = path.join(sourceDir, 'deliverables', 'recon_deliverable.md');
|
||||
return await fs.pathExists(reconFile);
|
||||
},
|
||||
|
||||
// Vulnerability analysis agents
|
||||
'injection-vuln': createVulnValidator('injection'),
|
||||
'xss-vuln': createVulnValidator('xss'),
|
||||
'auth-vuln': createVulnValidator('auth'),
|
||||
'ssrf-vuln': createVulnValidator('ssrf'),
|
||||
'authz-vuln': createVulnValidator('authz'),
|
||||
|
||||
// Exploitation agents
|
||||
'injection-exploit': createExploitValidator('injection'),
|
||||
'xss-exploit': createExploitValidator('xss'),
|
||||
'auth-exploit': createExploitValidator('auth'),
|
||||
'ssrf-exploit': createExploitValidator('ssrf'),
|
||||
'authz-exploit': createExploitValidator('authz'),
|
||||
|
||||
// Executive report agent
|
||||
report: async (sourceDir: string, logger: ActivityLogger): Promise<boolean> => {
|
||||
const reportFile = path.join(
|
||||
sourceDir,
|
||||
'deliverables',
|
||||
'comprehensive_security_assessment_report.md'
|
||||
);
|
||||
|
||||
const reportExists = await fs.pathExists(reportFile);
|
||||
|
||||
if (!reportExists) {
|
||||
logger.error('Missing required deliverable: comprehensive_security_assessment_report.md');
|
||||
}
|
||||
|
||||
return reportExists;
|
||||
},
|
||||
});
|
||||
@@ -21,13 +21,13 @@
|
||||
* No Temporal dependencies - pure domain logic.
|
||||
*/
|
||||
|
||||
import type { ActivityLogger } from '../temporal/activity-logger.js';
|
||||
import type { ActivityLogger } from '../types/activity-logger.js';
|
||||
import { Result, ok, err, isErr } from '../types/result.js';
|
||||
import { ErrorCode } from '../types/errors.js';
|
||||
import { PentestError } from '../error-handling.js';
|
||||
import { PentestError } from './error-handling.js';
|
||||
import { isSpendingCapBehavior } from '../utils/billing-detection.js';
|
||||
import { AGENTS } from '../session-manager.js';
|
||||
import { loadPrompt } from '../prompts/prompt-manager.js';
|
||||
import { loadPrompt } from './prompt-manager.js';
|
||||
import {
|
||||
runClaudePrompt,
|
||||
validateAgentOutput,
|
||||
@@ -38,7 +38,7 @@ import {
|
||||
commitGitSuccess,
|
||||
rollbackGitWorkspace,
|
||||
getGitCommitHash,
|
||||
} from '../utils/git-manager.js';
|
||||
} from './git-manager.js';
|
||||
import { AuditSession } from '../audit/index.js';
|
||||
import type { AgentEndResult } from '../types/audit.js';
|
||||
import type { AgentName } from '../types/agents.js';
|
||||
|
||||
@@ -12,7 +12,7 @@
|
||||
*/
|
||||
|
||||
import { parseConfig, distributeConfig } from '../config-parser.js';
|
||||
import { PentestError } from '../error-handling.js';
|
||||
import { PentestError } from './error-handling.js';
|
||||
import { Result, ok, err } from '../types/result.js';
|
||||
import { ErrorCode } from '../types/errors.js';
|
||||
import type { DistributedConfig } from '../types/config.js';
|
||||
|
||||
@@ -9,11 +9,11 @@ import {
|
||||
type PentestErrorType,
|
||||
type PentestErrorContext,
|
||||
type PromptErrorResult,
|
||||
} from './types/errors.js';
|
||||
} from '../types/errors.js';
|
||||
import {
|
||||
matchesBillingApiPattern,
|
||||
matchesBillingTextPattern,
|
||||
} from './utils/billing-detection.js';
|
||||
} from '../utils/billing-detection.js';
|
||||
|
||||
// Custom error class for pentest operations
|
||||
export class PentestError extends Error {
|
||||
@@ -17,9 +17,9 @@ import {
|
||||
validateQueueSafe,
|
||||
type VulnType,
|
||||
type ExploitationDecision,
|
||||
} from '../queue-validation.js';
|
||||
} from './queue-validation.js';
|
||||
import { isOk } from '../types/result.js';
|
||||
import type { ActivityLogger } from '../temporal/activity-logger.js';
|
||||
import type { ActivityLogger } from '../types/activity-logger.js';
|
||||
|
||||
/**
|
||||
* Service for checking exploitation queue decisions.
|
||||
|
||||
@@ -5,9 +5,9 @@
|
||||
// as published by the Free Software Foundation.
|
||||
|
||||
import { $ } from 'zx';
|
||||
import { PentestError } from '../error-handling.js';
|
||||
import { PentestError } from './error-handling.js';
|
||||
import { ErrorCode } from '../types/errors.js';
|
||||
import type { ActivityLogger } from '../temporal/activity-logger.js';
|
||||
import type { ActivityLogger } from '../types/activity-logger.js';
|
||||
|
||||
/**
|
||||
* Check if a directory is a git repository.
|
||||
@@ -18,3 +18,6 @@ export { ConfigLoaderService } from './config-loader.js';
|
||||
export { ExploitationCheckerService } from './exploitation-checker.js';
|
||||
export { AgentExecutionService } from './agent-execution.js';
|
||||
export type { AgentExecutionInput } from './agent-execution.js';
|
||||
|
||||
export { assembleFinalReport, injectModelIntoReport } from './reporting.js';
|
||||
export { loadPrompt } from './prompt-manager.js';
|
||||
|
||||
@@ -5,10 +5,10 @@
|
||||
// as published by the Free Software Foundation.
|
||||
|
||||
import { fs, path } from 'zx';
|
||||
import { PentestError, handlePromptError } from '../error-handling.js';
|
||||
import { MCP_AGENT_MAPPING } from '../constants.js';
|
||||
import { PentestError, handlePromptError } from './error-handling.js';
|
||||
import { MCP_AGENT_MAPPING } from '../session-manager.js';
|
||||
import type { Authentication, DistributedConfig } from '../types/config.js';
|
||||
import type { ActivityLogger } from '../temporal/activity-logger.js';
|
||||
import type { ActivityLogger } from '../types/activity-logger.js';
|
||||
|
||||
interface PromptVariables {
|
||||
webUrl: string;
|
||||
@@ -6,11 +6,12 @@
|
||||
|
||||
import { fs, path } from 'zx';
|
||||
import { PentestError } from './error-handling.js';
|
||||
import { ErrorCode } from './types/errors.js';
|
||||
import { type Result, ok, err } from './types/result.js';
|
||||
import { asyncPipe } from './utils/functional.js';
|
||||
import { ErrorCode } from '../types/errors.js';
|
||||
import { type Result, ok, err } from '../types/result.js';
|
||||
import { asyncPipe } from '../utils/functional.js';
|
||||
import type { VulnType, ExploitationDecision } from '../types/agents.js';
|
||||
|
||||
export type VulnType = 'injection' | 'xss' | 'auth' | 'ssrf' | 'authz';
|
||||
export type { VulnType, ExploitationDecision } from '../types/agents.js';
|
||||
|
||||
interface VulnTypeConfigItem {
|
||||
deliverable: string;
|
||||
@@ -62,12 +63,6 @@ interface QueueValidationResult {
|
||||
error: string | null;
|
||||
}
|
||||
|
||||
export interface ExploitationDecision {
|
||||
shouldExploit: boolean;
|
||||
shouldRetry: boolean;
|
||||
vulnerabilityCount: number;
|
||||
vulnType: VulnType;
|
||||
}
|
||||
|
||||
/**
|
||||
* Result type for safe validation - explicit error handling.
|
||||
@@ -5,9 +5,9 @@
|
||||
// as published by the Free Software Foundation.
|
||||
|
||||
import { fs, path } from 'zx';
|
||||
import { PentestError } from '../error-handling.js';
|
||||
import { PentestError } from './error-handling.js';
|
||||
import { ErrorCode } from '../types/errors.js';
|
||||
import type { ActivityLogger } from '../temporal/activity-logger.js';
|
||||
import type { ActivityLogger } from '../types/activity-logger.js';
|
||||
|
||||
interface DeliverableFile {
|
||||
name: string;
|
||||
+101
-1
@@ -4,7 +4,10 @@
|
||||
// it under the terms of the GNU Affero General Public License version 3
|
||||
// as published by the Free Software Foundation.
|
||||
|
||||
import type { AgentName, AgentDefinition } from './types/index.js';
|
||||
import { path, fs } from 'zx';
|
||||
import { validateQueueAndDeliverable } from './services/queue-validation.js';
|
||||
import type { AgentName, AgentDefinition, PlaywrightAgent, AgentValidator, VulnType } from './types/index.js';
|
||||
import type { ActivityLogger } from './types/activity-logger.js';
|
||||
|
||||
// Agent definitions according to PRD
|
||||
// NOTE: deliverableFilename values must match mcp-server/src/types/deliverables.ts:DELIVERABLE_FILENAMES
|
||||
@@ -122,4 +125,101 @@ export const AGENT_PHASE_MAP: Readonly<Record<AgentName, PhaseName>> = Object.fr
|
||||
'report': 'reporting',
|
||||
});
|
||||
|
||||
// Factory function for vulnerability queue validators
|
||||
function createVulnValidator(vulnType: VulnType): AgentValidator {
|
||||
return async (sourceDir: string, logger: ActivityLogger): Promise<boolean> => {
|
||||
try {
|
||||
await validateQueueAndDeliverable(vulnType, sourceDir);
|
||||
return true;
|
||||
} catch (error) {
|
||||
const errMsg = error instanceof Error ? error.message : String(error);
|
||||
logger.warn(`Queue validation failed for ${vulnType}: ${errMsg}`);
|
||||
return false;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
// Factory function for exploit deliverable validators
|
||||
function createExploitValidator(vulnType: VulnType): AgentValidator {
|
||||
return async (sourceDir: string): Promise<boolean> => {
|
||||
const evidenceFile = path.join(sourceDir, 'deliverables', `${vulnType}_exploitation_evidence.md`);
|
||||
return await fs.pathExists(evidenceFile);
|
||||
};
|
||||
}
|
||||
|
||||
// MCP agent mapping - assigns each agent to a specific Playwright instance to prevent conflicts
|
||||
// Keys are promptTemplate values from AGENTS registry
|
||||
export const MCP_AGENT_MAPPING: Record<string, PlaywrightAgent> = Object.freeze({
|
||||
// Phase 1: Pre-reconnaissance (actual prompt name is 'pre-recon-code')
|
||||
// NOTE: Pre-recon is pure code analysis and doesn't use browser automation,
|
||||
// but assigning MCP server anyway for consistency and future extensibility
|
||||
'pre-recon-code': 'playwright-agent1',
|
||||
|
||||
// Phase 2: Reconnaissance (actual prompt name is 'recon')
|
||||
recon: 'playwright-agent2',
|
||||
|
||||
// Phase 3: Vulnerability Analysis (5 parallel agents)
|
||||
'vuln-injection': 'playwright-agent1',
|
||||
'vuln-xss': 'playwright-agent2',
|
||||
'vuln-auth': 'playwright-agent3',
|
||||
'vuln-ssrf': 'playwright-agent4',
|
||||
'vuln-authz': 'playwright-agent5',
|
||||
|
||||
// Phase 4: Exploitation (5 parallel agents - same as vuln counterparts)
|
||||
'exploit-injection': 'playwright-agent1',
|
||||
'exploit-xss': 'playwright-agent2',
|
||||
'exploit-auth': 'playwright-agent3',
|
||||
'exploit-ssrf': 'playwright-agent4',
|
||||
'exploit-authz': 'playwright-agent5',
|
||||
|
||||
// Phase 5: Reporting (actual prompt name is 'report-executive')
|
||||
// NOTE: Report generation is typically text-based and doesn't use browser automation,
|
||||
// but assigning MCP server anyway for potential screenshot inclusion or future needs
|
||||
'report-executive': 'playwright-agent3',
|
||||
});
|
||||
|
||||
// Direct agent-to-validator mapping - much simpler than pattern matching
|
||||
export const AGENT_VALIDATORS: Record<AgentName, AgentValidator> = Object.freeze({
|
||||
// Pre-reconnaissance agent - validates the code analysis deliverable created by the agent
|
||||
'pre-recon': async (sourceDir: string): Promise<boolean> => {
|
||||
const codeAnalysisFile = path.join(sourceDir, 'deliverables', 'code_analysis_deliverable.md');
|
||||
return await fs.pathExists(codeAnalysisFile);
|
||||
},
|
||||
|
||||
// Reconnaissance agent
|
||||
recon: async (sourceDir: string): Promise<boolean> => {
|
||||
const reconFile = path.join(sourceDir, 'deliverables', 'recon_deliverable.md');
|
||||
return await fs.pathExists(reconFile);
|
||||
},
|
||||
|
||||
// Vulnerability analysis agents
|
||||
'injection-vuln': createVulnValidator('injection'),
|
||||
'xss-vuln': createVulnValidator('xss'),
|
||||
'auth-vuln': createVulnValidator('auth'),
|
||||
'ssrf-vuln': createVulnValidator('ssrf'),
|
||||
'authz-vuln': createVulnValidator('authz'),
|
||||
|
||||
// Exploitation agents
|
||||
'injection-exploit': createExploitValidator('injection'),
|
||||
'xss-exploit': createExploitValidator('xss'),
|
||||
'auth-exploit': createExploitValidator('auth'),
|
||||
'ssrf-exploit': createExploitValidator('ssrf'),
|
||||
'authz-exploit': createExploitValidator('authz'),
|
||||
|
||||
// Executive report agent
|
||||
report: async (sourceDir: string, logger: ActivityLogger): Promise<boolean> => {
|
||||
const reportFile = path.join(
|
||||
sourceDir,
|
||||
'deliverables',
|
||||
'comprehensive_security_assessment_report.md'
|
||||
);
|
||||
|
||||
const reportExists = await fs.pathExists(reportFile);
|
||||
|
||||
if (!reportExists) {
|
||||
logger.error('Missing required deliverable: comprehensive_security_assessment_report.md');
|
||||
}
|
||||
|
||||
return reportExists;
|
||||
},
|
||||
});
|
||||
|
||||
@@ -19,20 +19,21 @@ import { heartbeat, ApplicationFailure, Context } from '@temporalio/activity';
|
||||
import path from 'path';
|
||||
import fs from 'fs/promises';
|
||||
|
||||
import { classifyErrorForTemporal, PentestError } from '../error-handling.js';
|
||||
import { classifyErrorForTemporal, PentestError } from '../services/error-handling.js';
|
||||
import { ErrorCode } from '../types/errors.js';
|
||||
import { getOrCreateContainer, getContainer, removeContainer } from '../services/container.js';
|
||||
import { ExploitationCheckerService } from '../services/exploitation-checker.js';
|
||||
import type { VulnType, ExploitationDecision } from '../queue-validation.js';
|
||||
import type { VulnType, ExploitationDecision } from '../services/queue-validation.js';
|
||||
import { AuditSession } from '../audit/index.js';
|
||||
import type { WorkflowSummary } from '../audit/workflow-logger.js';
|
||||
import type { AgentName } from '../types/agents.js';
|
||||
import { ALL_AGENTS } from '../types/agents.js';
|
||||
import type { AgentMetrics, ResumeState } from './shared.js';
|
||||
import { copyDeliverablesToAudit, type SessionMetadata, readJson, fileExists } from '../audit/utils.js';
|
||||
import { assembleFinalReport, injectModelIntoReport } from '../phases/reporting.js';
|
||||
import { copyDeliverablesToAudit, type SessionMetadata } from '../audit/utils.js';
|
||||
import { readJson, fileExists } from '../utils/file-io.js';
|
||||
import { assembleFinalReport, injectModelIntoReport } from '../services/reporting.js';
|
||||
import { AGENTS } from '../session-manager.js';
|
||||
import { executeGitCommandWithRetry } from '../utils/git-manager.js';
|
||||
import { executeGitCommandWithRetry } from '../services/git-manager.js';
|
||||
import type { ResumeAttempt } from '../audit/metrics-tracker.js';
|
||||
import { createActivityLogger } from './activity-logger.js';
|
||||
|
||||
|
||||
@@ -5,16 +5,7 @@
|
||||
// as published by the Free Software Foundation.
|
||||
|
||||
import { Context } from '@temporalio/activity';
|
||||
|
||||
/**
|
||||
* Logger interface for services called from Temporal activities.
|
||||
* Keeps services Temporal-agnostic while providing structured logging.
|
||||
*/
|
||||
export interface ActivityLogger {
|
||||
info(message: string, attrs?: Record<string, unknown>): void;
|
||||
warn(message: string, attrs?: Record<string, unknown>): void;
|
||||
error(message: string, attrs?: Record<string, unknown>): void;
|
||||
}
|
||||
import type { ActivityLogger } from '../types/activity-logger.js';
|
||||
|
||||
/**
|
||||
* ActivityLogger backed by Temporal's Context.current().log.
|
||||
|
||||
@@ -30,7 +30,7 @@ import { Connection, Client, WorkflowNotFoundError } from '@temporalio/client';
|
||||
import dotenv from 'dotenv';
|
||||
import { displaySplashScreen } from '../splash-screen.js';
|
||||
import { sanitizeHostname } from '../audit/utils.js';
|
||||
import { readJson, fileExists } from '../audit/utils.js';
|
||||
import { readJson, fileExists } from '../utils/file-io.js';
|
||||
import path from 'path';
|
||||
// Import types only - these don't pull in workflow runtime code
|
||||
import type { PipelineInput, PipelineState, PipelineProgress } from './shared.js';
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
import { defineQuery } from '@temporalio/workflow';
|
||||
|
||||
// Re-export AgentMetrics from central types location
|
||||
export type { AgentMetrics } from '../types/metrics.js';
|
||||
import type { AgentMetrics } from '../types/metrics.js';
|
||||
|
||||
|
||||
@@ -41,7 +41,7 @@ import {
|
||||
type AgentMetrics,
|
||||
type ResumeState,
|
||||
} from './shared.js';
|
||||
import type { VulnType } from '../queue-validation.js';
|
||||
import type { VulnType } from '../services/queue-validation.js';
|
||||
import type { AgentName } from '../types/agents.js';
|
||||
import { ALL_AGENTS } from '../types/agents.js';
|
||||
import { toWorkflowSummary } from './summary-mapper.js';
|
||||
|
||||
@@ -0,0 +1,15 @@
|
||||
// Copyright (C) 2025 Keygraph, Inc.
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Affero General Public License version 3
|
||||
// as published by the Free Software Foundation.
|
||||
|
||||
/**
|
||||
* Logger interface for services called from Temporal activities.
|
||||
* Keeps services Temporal-agnostic while providing structured logging.
|
||||
*/
|
||||
export interface ActivityLogger {
|
||||
info(message: string, attrs?: Record<string, unknown>): void;
|
||||
warn(message: string, attrs?: Record<string, unknown>): void;
|
||||
error(message: string, attrs?: Record<string, unknown>): void;
|
||||
}
|
||||
+16
-1
@@ -41,7 +41,7 @@ export type PlaywrightAgent =
|
||||
| 'playwright-agent4'
|
||||
| 'playwright-agent5';
|
||||
|
||||
import type { ActivityLogger } from '../temporal/activity-logger.js';
|
||||
import type { ActivityLogger } from './activity-logger.js';
|
||||
|
||||
export type AgentValidator = (sourceDir: string, logger: ActivityLogger) => Promise<boolean>;
|
||||
|
||||
@@ -59,3 +59,18 @@ export interface AgentDefinition {
|
||||
promptTemplate: string;
|
||||
deliverableFilename: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Vulnerability types supported by the pipeline.
|
||||
*/
|
||||
export type VulnType = 'injection' | 'xss' | 'auth' | 'ssrf' | 'authz';
|
||||
|
||||
/**
|
||||
* Decision returned by queue validation for exploitation phase.
|
||||
*/
|
||||
export interface ExploitationDecision {
|
||||
shouldExploit: boolean;
|
||||
shouldRetry: boolean;
|
||||
vulnerabilityCount: number;
|
||||
vulnType: VulnType;
|
||||
}
|
||||
|
||||
@@ -8,6 +8,17 @@
|
||||
* Audit system type definitions
|
||||
*/
|
||||
|
||||
/**
|
||||
* Cross-cutting session metadata used by services, temporal, and audit.
|
||||
*/
|
||||
export interface SessionMetadata {
|
||||
id: string;
|
||||
webUrl: string;
|
||||
repoPath?: string;
|
||||
outputPath?: string;
|
||||
[key: string]: unknown;
|
||||
}
|
||||
|
||||
/**
|
||||
* Result data passed to audit system when an agent execution ends.
|
||||
* Used by both AuditSession and MetricsTracker.
|
||||
|
||||
@@ -8,6 +8,7 @@
|
||||
* Type definitions barrel export
|
||||
*/
|
||||
|
||||
export * from './activity-logger.js';
|
||||
export * from './errors.js';
|
||||
export * from './config.js';
|
||||
export * from './agents.js';
|
||||
|
||||
@@ -26,33 +26,3 @@ export class Timer {
|
||||
return end - this.startTime;
|
||||
}
|
||||
}
|
||||
|
||||
interface TimingResultsAgents {
|
||||
[key: string]: number;
|
||||
}
|
||||
|
||||
interface TimingResults {
|
||||
total: Timer | null;
|
||||
agents: TimingResultsAgents;
|
||||
}
|
||||
|
||||
interface CostResultsAgents {
|
||||
[key: string]: number;
|
||||
}
|
||||
|
||||
interface CostResults {
|
||||
agents: CostResultsAgents;
|
||||
total: number;
|
||||
}
|
||||
|
||||
// Global timing and cost tracker
|
||||
export const timingResults: TimingResults = {
|
||||
total: null,
|
||||
agents: {},
|
||||
};
|
||||
|
||||
export const costResults: CostResults = {
|
||||
agents: {},
|
||||
total: 0,
|
||||
};
|
||||
|
||||
|
||||
@@ -1,264 +0,0 @@
|
||||
// Copyright (C) 2025 Keygraph, Inc.
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Affero General Public License version 3
|
||||
// as published by the Free Software Foundation.
|
||||
|
||||
import { AGENTS } from '../session-manager.js';
|
||||
|
||||
interface ToolCallInput {
|
||||
url?: string;
|
||||
element?: string;
|
||||
key?: string;
|
||||
fields?: unknown[];
|
||||
text?: string;
|
||||
action?: string;
|
||||
description?: string;
|
||||
todos?: Array<{
|
||||
status: string;
|
||||
content: string;
|
||||
}>;
|
||||
[key: string]: unknown;
|
||||
}
|
||||
|
||||
interface ToolCall {
|
||||
name: string;
|
||||
input?: ToolCallInput;
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract domain from URL for display
|
||||
*/
|
||||
function extractDomain(url: string): string {
|
||||
try {
|
||||
const urlObj = new URL(url);
|
||||
return urlObj.hostname || url.slice(0, 30);
|
||||
} catch {
|
||||
return url.slice(0, 30);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Summarize TodoWrite updates into clean progress indicators
|
||||
*/
|
||||
function summarizeTodoUpdate(input: ToolCallInput | undefined): string | null {
|
||||
if (!input?.todos || !Array.isArray(input.todos)) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const todos = input.todos;
|
||||
const completed = todos.filter((t) => t.status === 'completed');
|
||||
const inProgress = todos.filter((t) => t.status === 'in_progress');
|
||||
|
||||
// Show recently completed tasks
|
||||
if (completed.length > 0) {
|
||||
const recent = completed[completed.length - 1]!;
|
||||
return `✅ ${recent.content}`;
|
||||
}
|
||||
|
||||
// Show current in-progress task
|
||||
if (inProgress.length > 0) {
|
||||
const current = inProgress[0]!;
|
||||
return `🔄 ${current.content}`;
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get agent prefix for parallel execution
|
||||
*/
|
||||
export function getAgentPrefix(description: string): string {
|
||||
// Map agent names to their prefixes
|
||||
const agentPrefixes: Record<string, string> = {
|
||||
'injection-vuln': '[Injection]',
|
||||
'xss-vuln': '[XSS]',
|
||||
'auth-vuln': '[Auth]',
|
||||
'authz-vuln': '[Authz]',
|
||||
'ssrf-vuln': '[SSRF]',
|
||||
'injection-exploit': '[Injection]',
|
||||
'xss-exploit': '[XSS]',
|
||||
'auth-exploit': '[Auth]',
|
||||
'authz-exploit': '[Authz]',
|
||||
'ssrf-exploit': '[SSRF]',
|
||||
};
|
||||
|
||||
// First try to match by agent name directly
|
||||
for (const [agentName, prefix] of Object.entries(agentPrefixes)) {
|
||||
const agent = AGENTS[agentName as keyof typeof AGENTS];
|
||||
if (agent && description.includes(agent.displayName)) {
|
||||
return prefix;
|
||||
}
|
||||
}
|
||||
|
||||
// Fallback to partial matches for backwards compatibility
|
||||
if (description.includes('injection')) return '[Injection]';
|
||||
if (description.includes('xss')) return '[XSS]';
|
||||
if (description.includes('authz')) return '[Authz]'; // Check authz before auth
|
||||
if (description.includes('auth')) return '[Auth]';
|
||||
if (description.includes('ssrf')) return '[SSRF]';
|
||||
|
||||
return '[Agent]';
|
||||
}
|
||||
|
||||
/**
|
||||
* Format browser tool calls into clean progress indicators
|
||||
*/
|
||||
function formatBrowserAction(toolCall: ToolCall): string {
|
||||
const toolName = toolCall.name;
|
||||
const input = toolCall.input || {};
|
||||
|
||||
// Core Browser Operations
|
||||
if (toolName === 'mcp__playwright__browser_navigate') {
|
||||
const url = input.url || '';
|
||||
const domain = extractDomain(url);
|
||||
return `🌐 Navigating to ${domain}`;
|
||||
}
|
||||
|
||||
if (toolName === 'mcp__playwright__browser_navigate_back') {
|
||||
return `⬅️ Going back`;
|
||||
}
|
||||
|
||||
// Page Interaction
|
||||
if (toolName === 'mcp__playwright__browser_click') {
|
||||
const element = input.element || 'element';
|
||||
return `🖱️ Clicking ${element.slice(0, 25)}`;
|
||||
}
|
||||
|
||||
if (toolName === 'mcp__playwright__browser_hover') {
|
||||
const element = input.element || 'element';
|
||||
return `👆 Hovering over ${element.slice(0, 20)}`;
|
||||
}
|
||||
|
||||
if (toolName === 'mcp__playwright__browser_type') {
|
||||
const element = input.element || 'field';
|
||||
return `⌨️ Typing in ${element.slice(0, 20)}`;
|
||||
}
|
||||
|
||||
if (toolName === 'mcp__playwright__browser_press_key') {
|
||||
const key = input.key || 'key';
|
||||
return `⌨️ Pressing ${key}`;
|
||||
}
|
||||
|
||||
// Form Handling
|
||||
if (toolName === 'mcp__playwright__browser_fill_form') {
|
||||
const fieldCount = input.fields?.length || 0;
|
||||
return `📝 Filling ${fieldCount} form fields`;
|
||||
}
|
||||
|
||||
if (toolName === 'mcp__playwright__browser_select_option') {
|
||||
return `📋 Selecting dropdown option`;
|
||||
}
|
||||
|
||||
if (toolName === 'mcp__playwright__browser_file_upload') {
|
||||
return `📁 Uploading file`;
|
||||
}
|
||||
|
||||
// Page Analysis
|
||||
if (toolName === 'mcp__playwright__browser_snapshot') {
|
||||
return `📸 Taking page snapshot`;
|
||||
}
|
||||
|
||||
if (toolName === 'mcp__playwright__browser_take_screenshot') {
|
||||
return `📸 Taking screenshot`;
|
||||
}
|
||||
|
||||
if (toolName === 'mcp__playwright__browser_evaluate') {
|
||||
return `🔍 Running JavaScript analysis`;
|
||||
}
|
||||
|
||||
// Waiting & Monitoring
|
||||
if (toolName === 'mcp__playwright__browser_wait_for') {
|
||||
if (input.text) {
|
||||
return `⏳ Waiting for "${input.text.slice(0, 20)}"`;
|
||||
}
|
||||
return `⏳ Waiting for page response`;
|
||||
}
|
||||
|
||||
if (toolName === 'mcp__playwright__browser_console_messages') {
|
||||
return `📜 Checking console logs`;
|
||||
}
|
||||
|
||||
if (toolName === 'mcp__playwright__browser_network_requests') {
|
||||
return `🌐 Analyzing network traffic`;
|
||||
}
|
||||
|
||||
// Tab Management
|
||||
if (toolName === 'mcp__playwright__browser_tabs') {
|
||||
const action = input.action || 'managing';
|
||||
return `🗂️ ${action} browser tab`;
|
||||
}
|
||||
|
||||
// Dialog Handling
|
||||
if (toolName === 'mcp__playwright__browser_handle_dialog') {
|
||||
return `💬 Handling browser dialog`;
|
||||
}
|
||||
|
||||
// Fallback for any missed tools
|
||||
const actionType = toolName.split('_').pop();
|
||||
return `🌐 Browser: ${actionType}`;
|
||||
}
|
||||
|
||||
/**
|
||||
* Filter out JSON tool calls from content, with special handling for Task calls
|
||||
*/
|
||||
export function filterJsonToolCalls(content: string | null | undefined): string {
|
||||
if (!content || typeof content !== 'string') {
|
||||
return content || '';
|
||||
}
|
||||
|
||||
const lines = content.split('\n');
|
||||
const processedLines: string[] = [];
|
||||
|
||||
for (const line of lines) {
|
||||
const trimmed = line.trim();
|
||||
|
||||
// Skip empty lines
|
||||
if (trimmed === '') {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Check if this is a JSON tool call
|
||||
if (trimmed.startsWith('{"type":"tool_use"')) {
|
||||
try {
|
||||
const toolCall = JSON.parse(trimmed) as ToolCall;
|
||||
|
||||
// Special handling for Task tool calls
|
||||
if (toolCall.name === 'Task') {
|
||||
const description = toolCall.input?.description || 'analysis agent';
|
||||
processedLines.push(`🚀 Launching ${description}`);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Special handling for TodoWrite tool calls
|
||||
if (toolCall.name === 'TodoWrite') {
|
||||
const summary = summarizeTodoUpdate(toolCall.input);
|
||||
if (summary) {
|
||||
processedLines.push(summary);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
// Special handling for browser tool calls
|
||||
if (toolCall.name.startsWith('mcp__playwright__browser_')) {
|
||||
const browserAction = formatBrowserAction(toolCall);
|
||||
if (browserAction) {
|
||||
processedLines.push(browserAction);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
// Hide all other tool calls (Read, Write, Grep, etc.)
|
||||
continue;
|
||||
} catch {
|
||||
// If JSON parsing fails, treat as regular text
|
||||
processedLines.push(line);
|
||||
}
|
||||
} else {
|
||||
// Keep non-JSON lines (assistant text)
|
||||
processedLines.push(line);
|
||||
}
|
||||
}
|
||||
|
||||
return processedLines.join('\n');
|
||||
}
|
||||
Reference in New Issue
Block a user