Merge pull request #141 from KeygraphHQ/refactor/architecture

refactor: decompose activities into services layer with structured error handling
This commit is contained in:
Arjun Malleswaran
2026-02-17 12:22:23 -08:00
committed by GitHub
56 changed files with 2899 additions and 2913 deletions
+22 -14
View File
@@ -8,13 +8,14 @@ You are debugging an issue. Follow this structured approach to avoid spinning in
- Read the full error message and stack trace
- Identify the layer where the error originated:
- **CLI/Args** - Input validation, path resolution
- **Config Parsing** - YAML parsing, JSON Schema validation
- **Session Management** - Mutex, session.json, lock files
- **Audit System** - Logging, metrics tracking, atomic writes
- **Claude SDK** - Agent execution, MCP servers, turn handling
- **Git Operations** - Checkpoints, rollback, commit
- **Tool Execution** - nmap, subfinder, whatweb
- **Validation** - Deliverable checks, queue validation
- **Config Parsing** - YAML parsing, JSON Schema validation (`src/config-parser.ts`)
- **Session Management** - Agent definitions (`src/session-manager.ts`), mutex (`src/utils/concurrency.ts`)
- **DI Container** - Container initialization/lookup (`src/services/container.ts`)
- **Services** - AgentExecutionService, ConfigLoaderService, ExploitationCheckerService, error-handling (`src/services/`)
- **Audit System** - Logging, metrics tracking, atomic writes (`src/audit/`)
- **Claude SDK** - Agent execution, MCP servers, turn handling (`src/ai/claude-executor.ts`)
- **Git Operations** - Checkpoints, rollback, commit (`src/services/git-manager.ts`)
- **Validation** - Deliverable checks, queue validation (`src/services/queue-validation.ts`)
## Step 2: Check Relevant Logs
@@ -37,12 +38,14 @@ For Shannon, trace through these layers:
1. **Temporal Client**`src/temporal/client.ts` - Workflow initiation
2. **Workflow**`src/temporal/workflows.ts` - Pipeline orchestration
3. **Activities**`src/temporal/activities.ts` - Agent execution with heartbeats
4. **Config**`src/config-parser.ts` - YAML loading, schema validation
5. **Session**`src/session-manager.ts` - Agent definitions, execution order
6. **Audit**`src/audit/audit-session.ts` - Logging facade, metrics tracking
7. **Executor**`src/ai/claude-executor.ts` - SDK calls, MCP setup, retry logic
8. **Validation**`src/queue-validation.ts` - Deliverable checks
3. **Activities**`src/temporal/activities.ts` - Thin wrappers: heartbeat, error classification
4. **Container**`src/services/container.ts` - Per-workflow DI
5. **Services**`src/services/agent-execution.ts` - Agent lifecycle
6. **Config**`src/config-parser.ts` via `src/services/config-loader.ts`
7. **Prompts**`src/services/prompt-manager.ts`
8. **Audit**`src/audit/audit-session.ts` - Logging facade, metrics tracking
9. **Executor**`src/ai/claude-executor.ts` - SDK calls, MCP setup, retry logic
10. **Validation**`src/services/queue-validation.ts` - Deliverable checks
## Step 4: Identify Root Cause
@@ -58,7 +61,10 @@ For Shannon, trace through these layers:
| Cost/timing not tracked | Metrics not reloaded before update | Add `metricsTracker.reload()` before updates |
| session.json corrupted | Partial write during crash | Delete and restart, or restore from backup |
| YAML config rejected | Invalid schema or unsafe content | Run through AJV validator manually |
| Prompt variable not replaced | Missing `{{VARIABLE}}` in context | Check `prompt-manager.ts` interpolation |
| Prompt variable not replaced | Missing `{{VARIABLE}}` in context | Check `src/services/prompt-manager.ts` interpolation |
| Service returns Err result | Check `ErrorCode` in Result | Trace through `classifyErrorForTemporal()` in `src/services/error-handling.ts` |
| Container not found | `getOrCreateContainer()` not called | Check activity setup code in `src/temporal/activities.ts` |
| ActivityLogger undefined | `createActivityLogger()` not called | Must be called at top of each activity function |
**MCP Server Issues:**
```bash
@@ -123,6 +129,8 @@ shannon <URL> <REPO> --pipeline-testing
## Quick Reference: Error Types
`ErrorCode` enum in `src/types/errors.ts` provides finer-grained classification used by `classifyErrorForTemporal()` in `src/services/error-handling.ts`.
| PentestError Type | Meaning | Retryable? |
|-------------------|---------|------------|
| `config` | Configuration file issues | No |
+11
View File
@@ -19,6 +19,8 @@ git diff HEAD
- [ ] **Retryable flag matches behavior** - If error will be retried, set `retryable: true`
- [ ] **Context includes debugging info** - Add relevant paths, tool names, error codes to context object
- [ ] **Never swallow errors silently** - Always log or propagate errors
- [ ] **Use ErrorCode enum** - Prefer `ErrorCode.CONFIG_INVALID` over string matching for classification
- [ ] **Result<T,E> for service returns** - Services return `Result`, not throw
### Audit System & Concurrency (CRITICAL)
- [ ] **Mutex protection for parallel operations** - Use `sessionMutex.lock()` when updating `session.json` during parallel agent execution
@@ -41,6 +43,13 @@ git diff HEAD
- [ ] **Duplicate rule detection** - Same `type:url_path` cannot appear twice
- [ ] **JSON Schema validation before use** - Config must pass AJV validation
### Services Layer & DI Container (CRITICAL)
- [ ] **Business logic in services, not activities** — Activities: heartbeat loop, error classification, container calls only. Domain logic → `src/services/`
- [ ] **Services accept ActivityLogger** — Never import `@temporalio/*` in services. Use `ActivityLogger` interface from `src/types/`
- [ ] **Result type for fallible operations** — Service methods return `Result<T, PentestError>`, unwrap with `isOk()`/`isErr()`. Activities call `executeOrThrow()` at the boundary
- [ ] **Container lifecycle**`getOrCreateContainer()` at activity start, `removeContainer()` only in workflow cleanup
- [ ] **AuditSession not in container** — Must be passed per-agent call (parallel safety)
### Session & Agent Management (CRITICAL)
- [ ] **Deliverable dependencies respected** - Exploitation agents only run if vulnerability queue exists AND has items
- [ ] **Queue validation before exploitation** - Use `safeValidateQueueAndDeliverable()` to check eligibility
@@ -91,6 +100,8 @@ git diff HEAD
- [ ] **Duplicate retry logic** - Don't implement retry at both caller and callee level
- [ ] **Hardcoded error message matching** - Prefer error codes over regex on error.message
- [ ] **Missing timeout on long operations** - Git operations and API calls should have timeouts
- [ ] **Console.log in services** — Use `ActivityLogger`. Only CLI display code (`client.ts`, `worker.ts`, `output-formatters.ts`) uses console.log
- [ ] **Temporal imports in services** — Services must stay Temporal-agnostic. If you need Temporal APIs, it belongs in activities
### Code Quality
- [ ] **No dead code added** - Remove unused imports, functions, variables
+35 -13
View File
@@ -41,18 +41,20 @@ npm run build
## Architecture
### Core Modules
- `src/session-manager.ts` — Agent definitions, execution order, parallel groups
- `src/ai/claude-executor.ts` — Claude Agent SDK integration with retry logic and git checkpoints
- `src/session-manager.ts` — Agent definitions (`AGENTS` record). Agent types in `src/types/agents.ts`
- `src/config-parser.ts` — YAML config parsing with JSON Schema validation
- `src/error-handling.ts` — Categorized error types (PentestError, ConfigError, NetworkError) with retry logic
- `src/tool-checker.ts` — Validates external security tool availability before execution
- `src/queue-validation.ts` — Deliverable validation and agent prerequisites
- `src/ai/claude-executor.ts` — Claude Agent SDK integration with retry logic
- `src/services/` — Business logic layer (Temporal-agnostic). Activities delegate here. Key: `agent-execution.ts`, `error-handling.ts`, `container.ts`
- `src/types/` — Consolidated types: `Result<T,E>`, `ErrorCode`, `AgentName`, `ActivityLogger`, etc.
- `src/utils/` — Shared utilities (file I/O, formatting, concurrency)
### Temporal Orchestration
Durable workflow orchestration with crash recovery, queryable progress, intelligent retry, and parallel execution (5 concurrent agents in vuln/exploit phases).
- `src/temporal/workflows.ts` — Main workflow (`pentestPipelineWorkflow`)
- `src/temporal/activities.ts`Activity implementations with heartbeats
- `src/temporal/activities.ts`Thin wrappers — heartbeat loop, error classification, container lifecycle. Business logic delegated to `src/services/`
- `src/temporal/activity-logger.ts``TemporalActivityLogger` implementation of `ActivityLogger` interface
- `src/temporal/summary-mapper.ts` — Maps `PipelineSummary` to `WorkflowSummary`
- `src/temporal/worker.ts` — Worker entry point
- `src/temporal/client.ts` — CLI client for starting workflows
- `src/temporal/shared.ts` — Types, interfaces, query definitions
@@ -66,30 +68,32 @@ Durable workflow orchestration with crash recovery, queryable progress, intellig
### Supporting Systems
- **Configuration** — YAML configs in `configs/` with JSON Schema validation (`config-schema.json`). Supports auth settings, MFA/TOTP, and per-app testing parameters
- **Prompts** — Per-phase templates in `prompts/` with variable substitution (`{{TARGET_URL}}`, `{{CONFIG_CONTEXT}}`). Shared partials in `prompts/shared/` via `prompt-manager.ts`
- **Prompts** — Per-phase templates in `prompts/` with variable substitution (`{{TARGET_URL}}`, `{{CONFIG_CONTEXT}}`). Shared partials in `prompts/shared/` via `src/services/prompt-manager.ts`
- **SDK Integration** — Uses `@anthropic-ai/claude-agent-sdk` with `maxTurns: 10_000` and `bypassPermissions` mode. Playwright MCP for browser automation, TOTP generation via MCP tool. Login flow template at `prompts/shared/login-instructions.txt` supports form, SSO, API, and basic auth
- **Audit System** — Crash-safe append-only logging in `audit-logs/{hostname}_{sessionId}/`. Tracks session metrics, per-agent logs, prompts, and deliverables
- **Audit System** — Crash-safe append-only logging in `audit-logs/{hostname}_{sessionId}/`. Tracks session metrics, per-agent logs, prompts, and deliverables. WorkflowLogger (`audit/workflow-logger.ts`) provides unified human-readable per-workflow logs, backed by LogStream (`audit/log-stream.ts`) shared stream primitive
- **Deliverables** — Saved to `deliverables/` in the target repo via the `save_deliverable` MCP tool
- **Workspaces & Resume** — Named workspaces via `WORKSPACE=<name>` or auto-named from URL+timestamp. Resume passes `--workspace` to the Temporal client (`src/temporal/client.ts`), which loads `session.json` to detect completed agents. `loadResumeState()` in `src/temporal/activities.ts` validates deliverable existence, restores git checkpoints, and cleans up incomplete deliverables. Workspace listing via `src/temporal/workspaces.ts`
## Development Notes
### Adding a New Agent
1. Define agent in `src/session-manager.ts` (add to `AGENT_QUEUE` and parallel group)
1. Define agent in `src/session-manager.ts` (add to `AGENTS` record). `ALL_AGENTS`/`AgentName` types live in `src/types/agents.ts`
2. Create prompt template in `prompts/` (e.g., `vuln-newtype.txt`)
3. Add activity function in `src/temporal/activities.ts`
3. Two-layer pattern: add a thin activity wrapper in `src/temporal/activities.ts` (heartbeat + error classification). `AgentExecutionService` in `src/services/agent-execution.ts` handles the agent lifecycle automatically via the `AGENTS` registry
4. Register activity in `src/temporal/workflows.ts` within the appropriate phase
### Modifying Prompts
- Variable substitution: `{{TARGET_URL}}`, `{{CONFIG_CONTEXT}}`, `{{LOGIN_INSTRUCTIONS}}`
- Shared partials in `prompts/shared/` included via `prompt-manager.ts`
- Shared partials in `prompts/shared/` included via `src/services/prompt-manager.ts`
- Test with `PIPELINE_TESTING=true` for fast iteration
### Key Design Patterns
- **Configuration-Driven** — YAML configs with JSON Schema validation
- **Progressive Analysis** — Each phase builds on previous results
- **SDK-First** — Claude Agent SDK handles autonomous analysis
- **Modular Error Handling** — Categorized errors with automatic retry (3 attempts per agent)
- **Modular Error Handling** — `ErrorCode` enum, `Result<T,E>` for explicit error propagation, automatic retry (3 attempts per agent)
- **Services Boundary** — Activities are thin Temporal wrappers; `src/services/` owns business logic, accepts `ActivityLogger`, returns `Result<T,E>`. No Temporal imports in services
- **DI Container** — Per-workflow in `src/services/container.ts`. `AuditSession` excluded (parallel safety)
### Security
Defensive security tool only. Use only on systems you own or have explicit permission to test.
@@ -111,18 +115,36 @@ Defensive security tool only. Use only on systems you own or have explicit permi
- Use `function` keyword for top-level functions (not arrow functions)
- Explicit return type annotations on exported/top-level functions
- Prefer `readonly` for data that shouldn't be mutated
- `exactOptionalPropertyTypes` is enabled — use spread for optional props, not direct `undefined` assignment
### Avoid
- Combining multiple concerns into a single function to "save lines"
- Dense callback chains when sequential logic is clearer
- Sacrificing readability for DRY — some repetition is fine if clearer
- Abstractions for one-time operations
- Backwards-compatibility shims, deprecated wrappers, or re-exports for removed code — delete the old code, don't preserve it
### Comments
Comments must be **timeless** — no references to this conversation, refactoring history, or the AI.
**Patterns used in this codebase:**
- `/** JSDoc */` — file headers (after license) and exported functions/interfaces
- `// N. Description` — numbered sequential steps inside function bodies. Use when a
function has 3+ distinct phases where at least one isn't immediately obvious from the
code. Each step marks the start of a logical phase. Reference: `AgentExecutionService.execute`
(steps 1-9) and `injectModelIntoReport` (steps 1-5)
- `// === Section ===` — high-level dividers between groups of functions in long files,
or to label major branching/classification blocks (e.g., `// === SPENDING CAP SAFEGUARD ===`).
Not for sequential steps inside function bodies — use numbered steps for that
- `// NOTE:` / `// WARNING:` / `// IMPORTANT:` — gotchas and constraints
**Never:** obvious comments, conversation references ("as discussed"), history ("moved from X")
## Key Files
**Entry Points:** `src/temporal/workflows.ts`, `src/temporal/activities.ts`, `src/temporal/worker.ts`, `src/temporal/client.ts`
**Core Logic:** `src/session-manager.ts`, `src/ai/claude-executor.ts`, `src/config-parser.ts`, `src/audit/`
**Core Logic:** `src/session-manager.ts`, `src/ai/claude-executor.ts`, `src/config-parser.ts`, `src/services/`, `src/audit/`
**Config:** `shannon` (CLI), `docker-compose.yml`, `configs/`, `prompts/`
-1
View File
@@ -21,7 +21,6 @@
"figlet": "^1.9.3",
"gradient-string": "^3.0.0",
"js-yaml": "^4.1.0",
"zod": "^4.3.6",
"zx": "^8.0.0"
},
"devDependencies": {
-1
View File
@@ -23,7 +23,6 @@
"figlet": "^1.9.3",
"gradient-string": "^3.0.0",
"js-yaml": "^4.1.0",
"zod": "^4.3.6",
"zx": "^8.0.0"
},
"devDependencies": {
+53 -208
View File
@@ -7,18 +7,16 @@
// Production Claude agent execution with retry, git checkpoints, and audit logging
import { fs, path } from 'zx';
import chalk, { type ChalkInstance } from 'chalk';
import { query } from '@anthropic-ai/claude-agent-sdk';
import { isRetryableError, getRetryDelay, PentestError } from '../error-handling.js';
import { timingResults, Timer } from '../utils/metrics.js';
import { isRetryableError, PentestError } from '../services/error-handling.js';
import { isSpendingCapBehavior } from '../utils/billing-detection.js';
import { Timer } from '../utils/metrics.js';
import { formatTimestamp } from '../utils/formatting.js';
import { createGitCheckpoint, commitGitSuccess, rollbackGitWorkspace, getGitCommitHash } from '../utils/git-manager.js';
import { AGENT_VALIDATORS, MCP_AGENT_MAPPING } from '../constants.js';
import { AGENT_VALIDATORS, MCP_AGENT_MAPPING } from '../session-manager.js';
import { AuditSession } from '../audit/index.js';
import { createShannonHelperServer } from '../../mcp-server/dist/index.js';
import type { SessionMetadata } from '../audit/utils.js';
import { getPromptNameForAgent } from '../types/agents.js';
import { AGENTS } from '../session-manager.js';
import type { AgentName } from '../types/index.js';
import { dispatchMessage } from './message-handlers.js';
@@ -26,6 +24,7 @@ import { detectExecutionContext, formatErrorOutput, formatCompletionMessage } fr
import { createProgressManager } from './progress-manager.js';
import { createAuditLogger } from './audit-logger.js';
import { getActualModelName } from './router-utils.js';
import type { ActivityLogger } from '../types/activity-logger.js';
declare global {
var SHANNON_DISABLE_LOADER: boolean | undefined;
@@ -58,24 +57,27 @@ type McpServer = ReturnType<typeof createShannonHelperServer> | StdioMcpServer;
// Configures MCP servers for agent execution, with Docker-specific Chromium handling
function buildMcpServers(
sourceDir: string,
agentName: string | null
agentName: string | null,
logger: ActivityLogger
): Record<string, McpServer> {
// 1. Create the shannon-helper server (always present)
const shannonHelperServer = createShannonHelperServer(sourceDir);
const mcpServers: Record<string, McpServer> = {
'shannon-helper': shannonHelperServer,
};
// 2. Look up the agent's Playwright MCP mapping
if (agentName) {
const promptName = getPromptNameForAgent(agentName as AgentName);
const playwrightMcpName = MCP_AGENT_MAPPING[promptName as keyof typeof MCP_AGENT_MAPPING] || null;
const promptTemplate = AGENTS[agentName as AgentName].promptTemplate;
const playwrightMcpName = MCP_AGENT_MAPPING[promptTemplate as keyof typeof MCP_AGENT_MAPPING] || null;
if (playwrightMcpName) {
console.log(chalk.gray(` Assigned ${agentName} -> ${playwrightMcpName}`));
logger.info(`Assigned ${agentName} -> ${playwrightMcpName}`);
const userDataDir = `/tmp/${playwrightMcpName}`;
// Docker uses system Chromium; local dev uses Playwright's bundled browsers
// 3. Configure Playwright MCP args with Docker/local browser handling
const isDocker = process.env.SHANNON_DOCKER === 'true';
const mcpArgs: string[] = [
@@ -84,7 +86,6 @@ function buildMcpServers(
'--user-data-dir', userDataDir,
];
// Docker: Use system Chromium; Local: Use Playwright's bundled browsers
if (isDocker) {
mcpArgs.push('--executable-path', '/usr/bin/chromium-browser');
mcpArgs.push('--browser', 'chromium');
@@ -107,6 +108,7 @@ function buildMcpServers(
}
}
// 4. Return configured servers
return mcpServers;
}
@@ -142,23 +144,23 @@ async function writeErrorLog(
};
const logPath = path.join(sourceDir, 'error.log');
await fs.appendFile(logPath, JSON.stringify(errorLog) + '\n');
} catch (logError) {
const logErrMsg = logError instanceof Error ? logError.message : String(logError);
console.log(chalk.gray(` (Failed to write error log: ${logErrMsg})`));
} catch {
// Best-effort error log writing - don't propagate failures
}
}
export async function validateAgentOutput(
result: ClaudePromptResult,
agentName: string | null,
sourceDir: string
sourceDir: string,
logger: ActivityLogger
): Promise<boolean> {
console.log(chalk.blue(` Validating ${agentName} agent output`));
logger.info(`Validating ${agentName} agent output`);
try {
// Check if agent completed successfully
if (!result.success || !result.result) {
console.log(chalk.red(` Validation failed: Agent execution was unsuccessful`));
logger.error('Validation failed: Agent execution was unsuccessful');
return false;
}
@@ -166,28 +168,27 @@ export async function validateAgentOutput(
const validator = agentName ? AGENT_VALIDATORS[agentName as keyof typeof AGENT_VALIDATORS] : undefined;
if (!validator) {
console.log(chalk.yellow(` No validator found for agent "${agentName}" - assuming success`));
console.log(chalk.green(` Validation passed: Unknown agent with successful result`));
logger.warn(`No validator found for agent "${agentName}" - assuming success`);
logger.info('Validation passed: Unknown agent with successful result');
return true;
}
console.log(chalk.blue(` Using validator for agent: ${agentName}`));
console.log(chalk.blue(` Source directory: ${sourceDir}`));
logger.info(`Using validator for agent: ${agentName}`, { sourceDir });
// Apply validation function
const validationResult = await validator(sourceDir);
const validationResult = await validator(sourceDir, logger);
if (validationResult) {
console.log(chalk.green(` Validation passed: Required files/structure present`));
logger.info('Validation passed: Required files/structure present');
} else {
console.log(chalk.red(` Validation failed: Missing required deliverable files`));
logger.error('Validation failed: Missing required deliverable files');
}
return validationResult;
} catch (error) {
const errMsg = error instanceof Error ? error.message : String(error);
console.log(chalk.red(` Validation failed with error: ${errMsg}`));
logger.error(`Validation failed with error: ${errMsg}`);
return false;
}
}
@@ -200,14 +201,14 @@ export async function runClaudePrompt(
context: string = '',
description: string = 'Claude analysis',
agentName: string | null = null,
colorFn: ChalkInstance = chalk.cyan,
sessionMetadata: SessionMetadata | null = null,
auditSession: AuditSession | null = null,
attemptNumber: number = 1
logger: ActivityLogger
): Promise<ClaudePromptResult> {
// 1. Initialize timing and prompt
const timer = new Timer(`agent-${description.toLowerCase().replace(/\s+/g, '-')}`);
const fullPrompt = context ? `${context}\n\n${prompt}` : prompt;
// 2. Set up progress and audit infrastructure
const execContext = detectExecutionContext(description);
const progress = createProgressManager(
{ description, useCleanOutput: execContext.useCleanOutput },
@@ -215,11 +216,12 @@ export async function runClaudePrompt(
);
const auditLogger = createAuditLogger(auditSession);
console.log(chalk.blue(` Running Claude Code: ${description}...`));
logger.info(`Running Claude Code: ${description}...`);
const mcpServers = buildMcpServers(sourceDir, agentName);
// 3. Configure MCP servers
const mcpServers = buildMcpServers(sourceDir, agentName, logger);
// Build env vars to pass to SDK subprocesses
// 4. Build env vars to pass to SDK subprocesses
const sdkEnv: Record<string, string> = {
CLAUDE_CODE_MAX_OUTPUT_TOKENS: process.env.CLAUDE_CODE_MAX_OUTPUT_TOKENS || '64000',
};
@@ -230,6 +232,7 @@ export async function runClaudePrompt(
sdkEnv.CLAUDE_CODE_OAUTH_TOKEN = process.env.CLAUDE_CODE_OAUTH_TOKEN;
}
// 5. Configure SDK options
const options = {
model: 'claude-sonnet-4-5-20250929',
maxTurns: 10_000,
@@ -241,7 +244,7 @@ export async function runClaudePrompt(
};
if (!execContext.useCleanOutput) {
console.log(chalk.gray(` SDK Options: maxTurns=${options.maxTurns}, cwd=${sourceDir}, permissions=BYPASS`));
logger.info(`SDK Options: maxTurns=${options.maxTurns}, cwd=${sourceDir}, permissions=BYPASS`);
}
let turnCount = 0;
@@ -252,10 +255,11 @@ export async function runClaudePrompt(
progress.start();
try {
// 6. Process the message stream
const messageLoopResult = await processMessageStream(
fullPrompt,
options,
{ execContext, description, colorFn, progress, auditLogger },
{ execContext, description, progress, auditLogger, logger },
timer
);
@@ -266,30 +270,21 @@ export async function runClaudePrompt(
const model = messageLoopResult.model;
// === SPENDING CAP SAFEGUARD ===
// Defense-in-depth: Detect spending cap that slipped through detectApiError().
// When spending cap is hit, Claude returns a short message with $0 cost.
// Legitimate agent work NEVER costs $0 with only 1-2 turns.
if (turnCount <= 2 && totalCost === 0) {
const resultLower = (result || '').toLowerCase();
const BILLING_KEYWORDS = ['spending', 'cap', 'limit', 'budget', 'resets'];
const looksLikeBillingError = BILLING_KEYWORDS.some((kw) =>
resultLower.includes(kw)
// 7. Defense-in-depth: Detect spending cap that slipped through detectApiError().
// Uses consolidated billing detection from utils/billing-detection.ts
if (isSpendingCapBehavior(turnCount, totalCost, result || '')) {
throw new PentestError(
`Spending cap likely reached (turns=${turnCount}, cost=$0): ${result?.slice(0, 100)}`,
'billing',
true // Retryable - Temporal will use 5-30 min backoff
);
if (looksLikeBillingError) {
throw new PentestError(
`Spending cap likely reached (turns=${turnCount}, cost=$0): ${result?.slice(0, 100)}`,
'billing',
true // Retryable - Temporal will use 5-30 min backoff
);
}
}
// 8. Finalize successful result
const duration = timer.stop();
timingResults.agents[execContext.agentKey] = duration;
if (apiErrorDetected) {
console.log(chalk.yellow(` API Error detected in ${description} - will validate deliverables before failing`));
logger.warn(`API Error detected in ${description} - will validate deliverables before failing`);
}
progress.finish(formatCompletionMessage(execContext, description, turnCount, duration));
@@ -306,8 +301,8 @@ export async function runClaudePrompt(
};
} catch (error) {
// 9. Handle errors — log, write error file, return failure
const duration = timer.stop();
timingResults.agents[execContext.agentKey] = duration;
const err = error as Error & { code?: string; status?: number };
@@ -340,9 +335,9 @@ interface MessageLoopResult {
interface MessageLoopDeps {
execContext: ReturnType<typeof detectExecutionContext>;
description: string;
colorFn: ChalkInstance;
progress: ReturnType<typeof createProgressManager>;
auditLogger: ReturnType<typeof createAuditLogger>;
logger: ActivityLogger;
}
async function processMessageStream(
@@ -351,7 +346,7 @@ async function processMessageStream(
deps: MessageLoopDeps,
timer: Timer
): Promise<MessageLoopResult> {
const { execContext, description, colorFn, progress, auditLogger } = deps;
const { execContext, description, progress, auditLogger, logger } = deps;
const HEARTBEAT_INTERVAL = 30000;
let turnCount = 0;
@@ -365,7 +360,7 @@ async function processMessageStream(
// Heartbeat logging when loader is disabled
const now = Date.now();
if (global.SHANNON_DISABLE_LOADER && now - lastHeartbeat > HEARTBEAT_INTERVAL) {
console.log(chalk.blue(` [${Math.floor((now - timer.startTime) / 1000)}s] ${description} running... (Turn ${turnCount})`));
logger.info(`[${Math.floor((now - timer.startTime) / 1000)}s] ${description} running... (Turn ${turnCount})`);
lastHeartbeat = now;
}
@@ -377,7 +372,7 @@ async function processMessageStream(
const dispatchResult = await dispatchMessage(
message as { type: string; subtype?: string },
turnCount,
{ execContext, description, colorFn, progress, auditLogger }
{ execContext, description, progress, auditLogger, logger }
);
if (dispatchResult.type === 'throw') {
@@ -403,153 +398,3 @@ async function processMessageStream(
return { turnCount, result, apiErrorDetected, cost, model };
}
// Main entry point for agent execution. Handles retries, git checkpoints, and validation.
export async function runClaudePromptWithRetry(
prompt: string,
sourceDir: string,
_allowedTools: string = 'Read',
context: string = '',
description: string = 'Claude analysis',
agentName: string | null = null,
colorFn: ChalkInstance = chalk.cyan,
sessionMetadata: SessionMetadata | null = null
): Promise<ClaudePromptResult> {
const maxRetries = 3;
let lastError: Error | undefined;
let retryContext = context;
console.log(chalk.cyan(`Starting ${description} with ${maxRetries} max attempts`));
let auditSession: AuditSession | null = null;
if (sessionMetadata && agentName) {
auditSession = new AuditSession(sessionMetadata);
await auditSession.initialize();
}
for (let attempt = 1; attempt <= maxRetries; attempt++) {
await createGitCheckpoint(sourceDir, description, attempt);
if (auditSession && agentName) {
const fullPrompt = retryContext ? `${retryContext}\n\n${prompt}` : prompt;
await auditSession.startAgent(agentName, fullPrompt, attempt);
}
try {
const result = await runClaudePrompt(
prompt, sourceDir, retryContext,
description, agentName, colorFn, sessionMetadata, auditSession, attempt
);
if (result.success) {
const validationPassed = await validateAgentOutput(result, agentName, sourceDir);
if (validationPassed) {
if (result.apiErrorDetected) {
console.log(chalk.yellow(`Validation: Ready for exploitation despite API error warnings`));
}
if (auditSession && agentName) {
const commitHash = await getGitCommitHash(sourceDir);
const endResult: {
attemptNumber: number;
duration_ms: number;
cost_usd: number;
success: true;
checkpoint?: string;
} = {
attemptNumber: attempt,
duration_ms: result.duration,
cost_usd: result.cost || 0,
success: true,
};
if (commitHash) {
endResult.checkpoint = commitHash;
}
await auditSession.endAgent(agentName, endResult);
}
await commitGitSuccess(sourceDir, description);
console.log(chalk.green.bold(`${description} completed successfully on attempt ${attempt}/${maxRetries}`));
return result;
// Validation failure is retryable - agent might succeed on retry with cleaner workspace
} else {
console.log(chalk.yellow(`${description} completed but output validation failed`));
if (auditSession && agentName) {
await auditSession.endAgent(agentName, {
attemptNumber: attempt,
duration_ms: result.duration,
cost_usd: result.partialCost || result.cost || 0,
success: false,
error: 'Output validation failed',
isFinalAttempt: attempt === maxRetries
});
}
if (result.apiErrorDetected) {
console.log(chalk.yellow(`API Error detected with validation failure - treating as retryable`));
lastError = new Error('API Error: terminated with validation failure');
} else {
lastError = new Error('Output validation failed');
}
if (attempt < maxRetries) {
await rollbackGitWorkspace(sourceDir, 'validation failure');
continue;
} else {
throw new PentestError(
`Agent ${description} failed output validation after ${maxRetries} attempts. Required deliverable files were not created.`,
'validation',
false,
{ description, sourceDir, attemptsExhausted: maxRetries }
);
}
}
}
} catch (error) {
const err = error as Error & { duration?: number; cost?: number; partialResults?: unknown };
lastError = err;
if (auditSession && agentName) {
await auditSession.endAgent(agentName, {
attemptNumber: attempt,
duration_ms: err.duration || 0,
cost_usd: err.cost || 0,
success: false,
error: err.message,
isFinalAttempt: attempt === maxRetries
});
}
if (!isRetryableError(err)) {
console.log(chalk.red(`${description} failed with non-retryable error: ${err.message}`));
await rollbackGitWorkspace(sourceDir, 'non-retryable error cleanup');
throw err;
}
if (attempt < maxRetries) {
await rollbackGitWorkspace(sourceDir, 'retryable error cleanup');
const delay = getRetryDelay(err, attempt);
const delaySeconds = (delay / 1000).toFixed(1);
console.log(chalk.yellow(`${description} failed (attempt ${attempt}/${maxRetries})`));
console.log(chalk.gray(` Error: ${err.message}`));
console.log(chalk.gray(` Workspace rolled back, retrying in ${delaySeconds}s...`));
if (err.partialResults) {
retryContext = `${context}\n\nPrevious partial results: ${JSON.stringify(err.partialResults)}`;
}
await new Promise(resolve => setTimeout(resolve, delay));
} else {
await rollbackGitWorkspace(sourceDir, 'final failure cleanup');
console.log(chalk.red(`${description} failed after ${maxRetries} attempts`));
console.log(chalk.red(` Final error: ${err.message}`));
}
}
}
throw lastError;
}
+30 -43
View File
@@ -4,20 +4,19 @@
// it under the terms of the GNU Affero General Public License version 3
// as published by the Free Software Foundation.
// Pure functions for processing SDK message types
import { PentestError } from '../error-handling.js';
import { filterJsonToolCalls } from '../utils/output-formatter.js';
import { PentestError } from '../services/error-handling.js';
import { ErrorCode } from '../types/errors.js';
import { matchesBillingTextPattern } from '../utils/billing-detection.js';
import { filterJsonToolCalls } from './output-formatters.js';
import { formatTimestamp } from '../utils/formatting.js';
import chalk from 'chalk';
import { getActualModelName } from './router-utils.js';
import type { ActivityLogger } from '../types/activity-logger.js';
import {
formatAssistantOutput,
formatResultOutput,
formatToolUseOutput,
formatToolResultOutput,
} from './output-formatters.js';
import { costResults } from '../utils/metrics.js';
import type { AuditLogger } from './audit-logger.js';
import type { ProgressManager } from './progress-manager.js';
import type {
@@ -35,10 +34,9 @@ import type {
SystemInitMessage,
ExecutionContext,
} from './types.js';
import type { ChalkInstance } from 'chalk';
// Handles both array and string content formats from SDK
export function extractMessageContent(message: AssistantMessage): string {
function extractMessageContent(message: AssistantMessage): string {
const messageContent = message.message;
if (Array.isArray(messageContent.content)) {
@@ -51,7 +49,7 @@ export function extractMessageContent(message: AssistantMessage): string {
}
// Extracts only text content (no tool_use JSON) to avoid false positives in error detection
export function extractTextOnlyContent(message: AssistantMessage): string {
function extractTextOnlyContent(message: AssistantMessage): string {
const messageContent = message.message;
if (Array.isArray(messageContent.content)) {
@@ -64,7 +62,7 @@ export function extractTextOnlyContent(message: AssistantMessage): string {
return String(messageContent.content);
}
export function detectApiError(content: string): ApiErrorDetection {
function detectApiError(content: string): ApiErrorDetection {
if (!content || typeof content !== 'string') {
return { detected: false };
}
@@ -75,25 +73,15 @@ export function detectApiError(content: string): ApiErrorDetection {
// When Claude Code hits its spending cap, it returns a short message like
// "Spending cap reached resets 8am" instead of throwing an error.
// These should retry with 5-30 min backoff so workflows can recover when cap resets.
const BILLING_PATTERNS = [
'spending cap',
'spending limit',
'cap reached',
'budget exceeded',
'usage limit',
];
const isBillingError = BILLING_PATTERNS.some((pattern) =>
lowerContent.includes(pattern)
);
if (isBillingError) {
if (matchesBillingTextPattern(content)) {
return {
detected: true,
shouldThrow: new PentestError(
`Billing limit reached: ${content.slice(0, 100)}`,
'billing',
true // RETRYABLE - Temporal will use 5-30 min backoff
true, // RETRYABLE - Temporal will use 5-30 min backoff
{},
ErrorCode.SPENDING_CAP_REACHED
),
};
}
@@ -127,7 +115,9 @@ function handleStructuredError(
shouldThrow: new PentestError(
`Billing error (structured): ${content.slice(0, 100)}`,
'billing',
true // Retryable with backoff
true, // Retryable with backoff
{},
ErrorCode.INSUFFICIENT_CREDITS
),
};
case 'rate_limit':
@@ -136,7 +126,9 @@ function handleStructuredError(
shouldThrow: new PentestError(
`Rate limit hit (structured): ${content.slice(0, 100)}`,
'network',
true // Retryable with backoff
true, // Retryable with backoff
{},
ErrorCode.API_RATE_LIMITED
),
};
case 'authentication_failed':
@@ -181,7 +173,7 @@ function handleStructuredError(
}
}
export function handleAssistantMessage(
function handleAssistantMessage(
message: AssistantMessage,
turnCount: number
): AssistantResult {
@@ -219,7 +211,7 @@ export function handleAssistantMessage(
}
// Final message of a query with cost/duration info
export function handleResultMessage(message: ResultMessage): ResultData {
function handleResultMessage(message: ResultMessage): ResultData {
const result: ResultData = {
result: message.result || null,
cost: message.total_cost_usd || 0,
@@ -236,14 +228,14 @@ export function handleResultMessage(message: ResultMessage): ResultData {
if (message.stop_reason !== undefined) {
result.stop_reason = message.stop_reason;
if (message.stop_reason && message.stop_reason !== 'end_turn') {
console.log(chalk.yellow(` Stop reason: ${message.stop_reason}`));
console.log(` Stop reason: ${message.stop_reason}`);
}
}
return result;
}
export function handleToolUseMessage(message: ToolUseMessage): ToolUseData {
function handleToolUseMessage(message: ToolUseMessage): ToolUseData {
return {
toolName: message.name,
parameters: message.input || {},
@@ -252,7 +244,7 @@ export function handleToolUseMessage(message: ToolUseMessage): ToolUseData {
}
// Truncates long results for display (500 char limit), preserves full content for logging
export function handleToolResultMessage(message: ToolResultMessage): ToolResultData {
function handleToolResultMessage(message: ToolResultMessage): ToolResultData {
const content = message.content;
const contentStr =
typeof content === 'string' ? content : JSON.stringify(content, null, 2);
@@ -269,14 +261,12 @@ export function handleToolResultMessage(message: ToolResultMessage): ToolResultD
};
}
// Output helper for console logging
function outputLines(lines: string[]): void {
for (const line of lines) {
console.log(line);
}
}
// Message dispatch result types
export type MessageDispatchAction =
| { type: 'continue'; apiErrorDetected?: boolean | undefined; model?: string | undefined }
| { type: 'complete'; result: string | null; cost: number }
@@ -285,9 +275,9 @@ export type MessageDispatchAction =
export interface MessageDispatchDeps {
execContext: ExecutionContext;
description: string;
colorFn: ChalkInstance;
progress: ProgressManager;
auditLogger: AuditLogger;
logger: ActivityLogger;
}
// Dispatches SDK messages to appropriate handlers and formatters
@@ -296,7 +286,7 @@ export async function dispatchMessage(
turnCount: number,
deps: MessageDispatchDeps
): Promise<MessageDispatchAction> {
const { execContext, description, colorFn, progress, auditLogger } = deps;
const { execContext, description, progress, auditLogger, logger } = deps;
switch (message.type) {
case 'assistant': {
@@ -312,8 +302,7 @@ export async function dispatchMessage(
assistantResult.cleanedContent,
execContext,
turnCount,
description,
colorFn
description
));
progress.start();
}
@@ -321,7 +310,7 @@ export async function dispatchMessage(
await auditLogger.logLlmResponse(turnCount, assistantResult.content);
if (assistantResult.apiErrorDetected) {
console.log(chalk.red(` API Error detected in assistant response`));
logger.warn('API Error detected in assistant response');
return { type: 'continue', apiErrorDetected: true };
}
@@ -333,10 +322,10 @@ export async function dispatchMessage(
const initMsg = message as SystemInitMessage;
const actualModel = getActualModelName(initMsg.model);
if (!execContext.useCleanOutput) {
console.log(chalk.blue(` Model: ${actualModel}, Permission: ${initMsg.permissionMode}`));
logger.info(`Model: ${actualModel}, Permission: ${initMsg.permissionMode}`);
if (initMsg.mcp_servers && initMsg.mcp_servers.length > 0) {
const mcpStatus = initMsg.mcp_servers.map(s => `${s.name}(${s.status})`).join(', ');
console.log(chalk.blue(` MCP: ${mcpStatus}`));
logger.info(`MCP: ${mcpStatus}`);
}
}
// Return actual model for tracking in audit logs
@@ -368,13 +357,11 @@ export async function dispatchMessage(
case 'result': {
const resultData = handleResultMessage(message as ResultMessage);
outputLines(formatResultOutput(resultData, !execContext.useCleanOutput));
costResults.agents[execContext.agentKey] = resultData.cost;
costResults.total += resultData.cost;
return { type: 'complete', result: resultData.result, cost: resultData.cost };
}
default:
console.log(chalk.gray(` ${message.type}: ${JSON.stringify(message, null, 2)}`));
logger.info(`Unhandled message type: ${message.type}`);
return { type: 'continue' };
}
}
+286 -41
View File
@@ -4,13 +4,267 @@
// it under the terms of the GNU Affero General Public License version 3
// as published by the Free Software Foundation.
// Pure functions for formatting console output
import chalk from 'chalk';
import { extractAgentType, formatDuration } from '../utils/formatting.js';
import { getAgentPrefix } from '../utils/output-formatter.js';
import { AGENTS } from '../session-manager.js';
import type { ExecutionContext, ResultData } from './types.js';
interface ToolCallInput {
url?: string;
element?: string;
key?: string;
fields?: unknown[];
text?: string;
action?: string;
description?: string;
todos?: Array<{
status: string;
content: string;
}>;
[key: string]: unknown;
}
interface ToolCall {
name: string;
input?: ToolCallInput;
}
/**
* Get agent prefix for parallel execution
*/
export function getAgentPrefix(description: string): string {
// Map agent names to their prefixes
const agentPrefixes: Record<string, string> = {
'injection-vuln': '[Injection]',
'xss-vuln': '[XSS]',
'auth-vuln': '[Auth]',
'authz-vuln': '[Authz]',
'ssrf-vuln': '[SSRF]',
'injection-exploit': '[Injection]',
'xss-exploit': '[XSS]',
'auth-exploit': '[Auth]',
'authz-exploit': '[Authz]',
'ssrf-exploit': '[SSRF]',
};
// First try to match by agent name directly
for (const [agentName, prefix] of Object.entries(agentPrefixes)) {
const agent = AGENTS[agentName as keyof typeof AGENTS];
if (agent && description.includes(agent.displayName)) {
return prefix;
}
}
// Fallback to partial matches for backwards compatibility
if (description.includes('injection')) return '[Injection]';
if (description.includes('xss')) return '[XSS]';
if (description.includes('authz')) return '[Authz]'; // Check authz before auth
if (description.includes('auth')) return '[Auth]';
if (description.includes('ssrf')) return '[SSRF]';
return '[Agent]';
}
/**
* Extract domain from URL for display
*/
function extractDomain(url: string): string {
try {
const urlObj = new URL(url);
return urlObj.hostname || url.slice(0, 30);
} catch {
return url.slice(0, 30);
}
}
/**
* Summarize TodoWrite updates into clean progress indicators
*/
function summarizeTodoUpdate(input: ToolCallInput | undefined): string | null {
if (!input?.todos || !Array.isArray(input.todos)) {
return null;
}
const todos = input.todos;
const completed = todos.filter((t) => t.status === 'completed');
const inProgress = todos.filter((t) => t.status === 'in_progress');
// Show recently completed tasks
if (completed.length > 0) {
const recent = completed[completed.length - 1]!;
return `${recent.content}`;
}
// Show current in-progress task
if (inProgress.length > 0) {
const current = inProgress[0]!;
return `🔄 ${current.content}`;
}
return null;
}
/**
* Format browser tool calls into clean progress indicators
*/
function formatBrowserAction(toolCall: ToolCall): string {
const toolName = toolCall.name;
const input = toolCall.input || {};
// Core Browser Operations
if (toolName === 'mcp__playwright__browser_navigate') {
const url = input.url || '';
const domain = extractDomain(url);
return `🌐 Navigating to ${domain}`;
}
if (toolName === 'mcp__playwright__browser_navigate_back') {
return `⬅️ Going back`;
}
// Page Interaction
if (toolName === 'mcp__playwright__browser_click') {
const element = input.element || 'element';
return `🖱️ Clicking ${element.slice(0, 25)}`;
}
if (toolName === 'mcp__playwright__browser_hover') {
const element = input.element || 'element';
return `👆 Hovering over ${element.slice(0, 20)}`;
}
if (toolName === 'mcp__playwright__browser_type') {
const element = input.element || 'field';
return `⌨️ Typing in ${element.slice(0, 20)}`;
}
if (toolName === 'mcp__playwright__browser_press_key') {
const key = input.key || 'key';
return `⌨️ Pressing ${key}`;
}
// Form Handling
if (toolName === 'mcp__playwright__browser_fill_form') {
const fieldCount = input.fields?.length || 0;
return `📝 Filling ${fieldCount} form fields`;
}
if (toolName === 'mcp__playwright__browser_select_option') {
return `📋 Selecting dropdown option`;
}
if (toolName === 'mcp__playwright__browser_file_upload') {
return `📁 Uploading file`;
}
// Page Analysis
if (toolName === 'mcp__playwright__browser_snapshot') {
return `📸 Taking page snapshot`;
}
if (toolName === 'mcp__playwright__browser_take_screenshot') {
return `📸 Taking screenshot`;
}
if (toolName === 'mcp__playwright__browser_evaluate') {
return `🔍 Running JavaScript analysis`;
}
// Waiting & Monitoring
if (toolName === 'mcp__playwright__browser_wait_for') {
if (input.text) {
return `⏳ Waiting for "${input.text.slice(0, 20)}"`;
}
return `⏳ Waiting for page response`;
}
if (toolName === 'mcp__playwright__browser_console_messages') {
return `📜 Checking console logs`;
}
if (toolName === 'mcp__playwright__browser_network_requests') {
return `🌐 Analyzing network traffic`;
}
// Tab Management
if (toolName === 'mcp__playwright__browser_tabs') {
const action = input.action || 'managing';
return `🗂️ ${action} browser tab`;
}
// Dialog Handling
if (toolName === 'mcp__playwright__browser_handle_dialog') {
return `💬 Handling browser dialog`;
}
// Fallback for any missed tools
const actionType = toolName.split('_').pop();
return `🌐 Browser: ${actionType}`;
}
/**
* Filter out JSON tool calls from content, with special handling for Task calls
*/
export function filterJsonToolCalls(content: string | null | undefined): string {
if (!content || typeof content !== 'string') {
return content || '';
}
const lines = content.split('\n');
const processedLines: string[] = [];
for (const line of lines) {
const trimmed = line.trim();
// Skip empty lines
if (trimmed === '') {
continue;
}
// Check if this is a JSON tool call
if (trimmed.startsWith('{"type":"tool_use"')) {
try {
const toolCall = JSON.parse(trimmed) as ToolCall;
// Special handling for Task tool calls
if (toolCall.name === 'Task') {
const description = toolCall.input?.description || 'analysis agent';
processedLines.push(`🚀 Launching ${description}`);
continue;
}
// Special handling for TodoWrite tool calls
if (toolCall.name === 'TodoWrite') {
const summary = summarizeTodoUpdate(toolCall.input);
if (summary) {
processedLines.push(summary);
}
continue;
}
// Special handling for browser tool calls
if (toolCall.name.startsWith('mcp__playwright__browser_')) {
const browserAction = formatBrowserAction(toolCall);
if (browserAction) {
processedLines.push(browserAction);
}
continue;
}
// Hide all other tool calls (Read, Write, Grep, etc.)
continue;
} catch {
// If JSON parsing fails, treat as regular text
processedLines.push(line);
}
} else {
// Keep non-JSON lines (assistant text)
processedLines.push(line);
}
}
return processedLines.join('\n');
}
export function detectExecutionContext(description: string): ExecutionContext {
const isParallelExecution =
description.includes('vuln agent') || description.includes('exploit agent');
@@ -33,8 +287,7 @@ export function formatAssistantOutput(
cleanedContent: string,
context: ExecutionContext,
turnCount: number,
description: string,
colorFn: typeof chalk.cyan = chalk.cyan
description: string
): string[] {
if (!cleanedContent.trim()) {
return [];
@@ -45,11 +298,11 @@ export function formatAssistantOutput(
if (context.isParallelExecution) {
// Compact output for parallel agents with prefixes
const prefix = getAgentPrefix(description);
lines.push(colorFn(`${prefix} ${cleanedContent}`));
lines.push(`${prefix} ${cleanedContent}`);
} else {
// Full turn output for sequential agents
lines.push(colorFn(`\n Turn ${turnCount} (${description}):`));
lines.push(colorFn(` ${cleanedContent}`));
lines.push(`\n Turn ${turnCount} (${description}):`);
lines.push(` ${cleanedContent}`);
}
return lines;
@@ -58,28 +311,24 @@ export function formatAssistantOutput(
export function formatResultOutput(data: ResultData, showFullResult: boolean): string[] {
const lines: string[] = [];
lines.push(chalk.magenta(`\n COMPLETED:`));
lines.push(
chalk.gray(
` Duration: ${(data.duration_ms / 1000).toFixed(1)}s, Cost: $${data.cost.toFixed(4)}`
)
);
lines.push(`\n COMPLETED:`);
lines.push(` Duration: ${(data.duration_ms / 1000).toFixed(1)}s, Cost: $${data.cost.toFixed(4)}`);
if (data.subtype === 'error_max_turns') {
lines.push(chalk.red(` Stopped: Hit maximum turns limit`));
lines.push(` Stopped: Hit maximum turns limit`);
} else if (data.subtype === 'error_during_execution') {
lines.push(chalk.red(` Stopped: Execution error`));
lines.push(` Stopped: Execution error`);
}
if (data.permissionDenials > 0) {
lines.push(chalk.yellow(` ${data.permissionDenials} permission denials`));
lines.push(` ${data.permissionDenials} permission denials`);
}
if (showFullResult && data.result && typeof data.result === 'string') {
if (data.result.length > 1000) {
lines.push(chalk.magenta(` ${data.result.slice(0, 1000)}... [${data.result.length} total chars]`));
lines.push(` ${data.result.slice(0, 1000)}... [${data.result.length} total chars]`);
} else {
lines.push(chalk.magenta(` ${data.result}`));
lines.push(` ${data.result}`);
}
}
@@ -98,24 +347,24 @@ export function formatErrorOutput(
if (context.isParallelExecution) {
const prefix = getAgentPrefix(description);
lines.push(chalk.red(`${prefix} Failed (${formatDuration(duration)})`));
lines.push(`${prefix} Failed (${formatDuration(duration)})`);
} else if (context.useCleanOutput) {
lines.push(chalk.red(`${context.agentType} failed (${formatDuration(duration)})`));
lines.push(`${context.agentType} failed (${formatDuration(duration)})`);
} else {
lines.push(chalk.red(` Claude Code failed: ${description} (${formatDuration(duration)})`));
lines.push(` Claude Code failed: ${description} (${formatDuration(duration)})`);
}
lines.push(chalk.red(` Error Type: ${error.constructor.name}`));
lines.push(chalk.red(` Message: ${error.message}`));
lines.push(chalk.gray(` Agent: ${description}`));
lines.push(chalk.gray(` Working Directory: ${sourceDir}`));
lines.push(chalk.gray(` Retryable: ${isRetryable ? 'Yes' : 'No'}`));
lines.push(` Error Type: ${error.constructor.name}`);
lines.push(` Message: ${error.message}`);
lines.push(` Agent: ${description}`);
lines.push(` Working Directory: ${sourceDir}`);
lines.push(` Retryable: ${isRetryable ? 'Yes' : 'No'}`);
if (error.code) {
lines.push(chalk.gray(` Error Code: ${error.code}`));
lines.push(` Error Code: ${error.code}`);
}
if (error.status) {
lines.push(chalk.gray(` HTTP Status: ${error.status}`));
lines.push(` HTTP Status: ${error.status}`);
}
return lines;
@@ -129,18 +378,14 @@ export function formatCompletionMessage(
): string {
if (context.isParallelExecution) {
const prefix = getAgentPrefix(description);
return chalk.green(`${prefix} Complete (${turnCount} turns, ${formatDuration(duration)})`);
return `${prefix} Complete (${turnCount} turns, ${formatDuration(duration)})`;
}
if (context.useCleanOutput) {
return chalk.green(
`${context.agentType.charAt(0).toUpperCase() + context.agentType.slice(1)} complete! (${turnCount} turns, ${formatDuration(duration)})`
);
return `${context.agentType.charAt(0).toUpperCase() + context.agentType.slice(1)} complete! (${turnCount} turns, ${formatDuration(duration)})`;
}
return chalk.green(
` Claude Code completed: ${description} (${turnCount} turns) in ${formatDuration(duration)}`
);
return ` Claude Code completed: ${description} (${turnCount} turns) in ${formatDuration(duration)}`;
}
export function formatToolUseOutput(
@@ -149,9 +394,9 @@ export function formatToolUseOutput(
): string[] {
const lines: string[] = [];
lines.push(chalk.yellow(`\n Using Tool: ${toolName}`));
lines.push(`\n Using Tool: ${toolName}`);
if (input && Object.keys(input).length > 0) {
lines.push(chalk.gray(` Input: ${JSON.stringify(input, null, 2)}`));
lines.push(` Input: ${JSON.stringify(input, null, 2)}`);
}
return lines;
@@ -160,9 +405,9 @@ export function formatToolUseOutput(
export function formatToolResultOutput(displayContent: string): string[] {
const lines: string[] = [];
lines.push(chalk.green(` Tool Result:`));
lines.push(` Tool Result:`);
if (displayContent) {
lines.push(chalk.gray(` ${displayContent}`));
lines.push(` ${displayContent}`);
}
return lines;
-6
View File
@@ -26,9 +26,3 @@ export function getActualModelName(sdkReportedModel?: string): string | undefine
return sdkReportedModel;
}
/**
* Check if router mode is active.
*/
export function isRouterMode(): boolean {
return !!process.env.ANTHROPIC_BASE_URL && !!process.env.ROUTER_DEFAULT;
}
-38
View File
@@ -13,22 +13,6 @@ export interface ExecutionContext {
agentKey: string;
}
export interface ProcessingState {
turnCount: number;
result: string | null;
apiErrorDetected: boolean;
totalCost: number;
partialCost: number;
lastHeartbeat: number;
}
export interface ProcessingResult {
result: string | null;
turnCount: number;
apiErrorDetected: boolean;
totalCost: number;
}
export interface AssistantResult {
content: string;
cleanedContent: string;
@@ -110,15 +94,6 @@ export interface ApiErrorDetection {
shouldThrow?: Error;
}
// Message types from SDK stream
export type SdkMessage =
| AssistantMessage
| ResultMessage
| ToolUseMessage
| ToolResultMessage
| SystemInitMessage
| UserMessage;
export interface SystemInitMessage {
type: 'system';
subtype: 'init';
@@ -131,16 +106,3 @@ export interface UserMessage {
type: 'user';
}
// Dispatch result types for message processing
export type MessageDispatchResult =
| { action: 'continue' }
| { action: 'break'; result: string | null; cost: number }
| { action: 'throw'; error: Error };
export interface MessageDispatchContext {
turnCount: number;
execContext: ExecutionContext;
description: string;
colorFn: (text: string) => string;
useCleanOutput: boolean;
}
+46 -29
View File
@@ -17,21 +17,13 @@ import { MetricsTracker } from './metrics-tracker.js';
import { initializeAuditStructure, type SessionMetadata } from './utils.js';
import { formatTimestamp } from '../utils/formatting.js';
import { SessionMutex } from '../utils/concurrency.js';
import type { AgentEndResult } from '../types/index.js';
import { PentestError } from '../services/error-handling.js';
import { ErrorCode } from '../types/errors.js';
// Global mutex instance
const sessionMutex = new SessionMutex();
interface AgentEndResult {
attemptNumber: number;
duration_ms: number;
cost_usd: number;
success: boolean;
model?: string | undefined;
error?: string | undefined;
checkpoint?: string | undefined;
isFinalAttempt?: boolean | undefined;
}
/**
* AuditSession - Main audit system facade
*/
@@ -50,10 +42,22 @@ export class AuditSession {
// Validate required fields
if (!this.sessionId) {
throw new Error('sessionMetadata.id is required');
throw new PentestError(
'sessionMetadata.id is required',
'config',
false,
{ field: 'sessionMetadata.id' },
ErrorCode.CONFIG_VALIDATION_FAILED
);
}
if (!this.sessionMetadata.webUrl) {
throw new Error('sessionMetadata.webUrl is required');
throw new PentestError(
'sessionMetadata.webUrl is required',
'config',
false,
{ field: 'sessionMetadata.webUrl' },
ErrorCode.CONFIG_VALIDATION_FAILED
);
}
// Components
@@ -103,29 +107,26 @@ export class AuditSession {
): Promise<void> {
await this.ensureInitialized();
// Save prompt snapshot (only on first attempt)
// 1. Save prompt snapshot (only on first attempt)
if (attemptNumber === 1) {
await AgentLogger.savePrompt(this.sessionMetadata, agentName, promptContent);
}
// Track current agent name for workflow logging
// 2. Create and initialize the per-agent logger
this.currentAgentName = agentName;
// Create and initialize logger for this attempt
this.currentLogger = new AgentLogger(this.sessionMetadata, agentName, attemptNumber);
await this.currentLogger.initialize();
// Start metrics tracking
// 3. Start metrics timer
this.metricsTracker.startAgent(agentName, attemptNumber);
// Log start event
// 4. Log start event to both agent log and workflow log
await this.currentLogger.logEvent('agent_start', {
agentName,
attemptNumber,
timestamp: formatTimestamp(),
});
// Log to unified workflow log
await this.workflowLogger.logAgent(agentName, 'start', { attemptNumber });
}
@@ -134,7 +135,13 @@ export class AuditSession {
*/
async logEvent(eventType: string, eventData: unknown): Promise<void> {
if (!this.currentLogger) {
throw new Error('No active logger. Call startAgent() first.');
throw new PentestError(
'No active logger. Call startAgent() first.',
'validation',
false,
{},
ErrorCode.AGENT_EXECUTION_FAILED
);
}
// Log to agent-specific log file (JSON format)
@@ -167,7 +174,7 @@ export class AuditSession {
* End agent execution (mutex-protected)
*/
async endAgent(agentName: string, result: AgentEndResult): Promise<void> {
// Log end event
// 1. Finalize agent log and close the stream
if (this.currentLogger) {
await this.currentLogger.logEvent('agent_end', {
agentName,
@@ -177,15 +184,13 @@ export class AuditSession {
timestamp: formatTimestamp(),
});
// Close logger
await this.currentLogger.close();
this.currentLogger = null;
}
// Reset current agent name
// 2. Log completion to the unified workflow log
this.currentAgentName = null;
// Log to unified workflow log
const agentLogDetails: AgentLogDetails = {
attemptNumber: result.attemptNumber,
duration_ms: result.duration_ms,
@@ -195,13 +200,11 @@ export class AuditSession {
};
await this.workflowLogger.logAgent(agentName, 'end', agentLogDetails);
// Mutex-protected update to session.json
// 3. Acquire mutex before touching session.json
const unlock = await sessionMutex.lock(this.sessionId);
try {
// Reload inside mutex to prevent lost updates during parallel exploitation phase
// 4. Reload-then-write inside mutex to prevent lost updates during parallel phases
await this.metricsTracker.reload();
// Update metrics
await this.metricsTracker.endAgent(agentName, result);
} finally {
unlock();
@@ -278,4 +281,18 @@ export class AuditSession {
unlock();
}
}
/**
* Log resume header to workflow.log
* Call this when a workflow is resuming to add a visual separator
*/
async logResumeHeader(resumeInfo: {
previousWorkflowId: string;
newWorkflowId: string;
checkpointHash: string;
completedAgents: string[];
}): Promise<void> {
await this.ensureInitialized();
await this.workflowLogger.logResumeHeader(resumeInfo);
}
}
-4
View File
@@ -17,7 +17,3 @@
*/
export { AuditSession } from './audit-session.js';
export { AgentLogger } from './logger.js';
export { WorkflowLogger } from './workflow-logger.js';
export { MetricsTracker } from './metrics-tracker.js';
export * as AuditUtils from './utils.js';
+127
View File
@@ -0,0 +1,127 @@
// Copyright (C) 2025 Keygraph, Inc.
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License version 3
// as published by the Free Software Foundation.
/**
* LogStream - Stream composition utility for append-only logging
*
* Encapsulates the common stream management pattern used by AgentLogger
* and WorkflowLogger: opening streams in append mode, handling backpressure,
* and proper cleanup.
*/
import fs from 'fs';
import path from 'path';
import { ensureDirectory } from '../utils/file-io.js';
/**
* LogStream - Manages a single append-only log file stream
*/
export class LogStream {
private readonly filePath: string;
private stream: fs.WriteStream | null = null;
private _isOpen: boolean = false;
constructor(filePath: string) {
this.filePath = filePath;
}
/**
* Open the stream for writing (creates parent directories, opens in append mode)
*/
async open(): Promise<void> {
if (this._isOpen) {
return;
}
// Ensure parent directory exists
await ensureDirectory(path.dirname(this.filePath));
// Create write stream in append mode
this.stream = fs.createWriteStream(this.filePath, {
flags: 'a',
encoding: 'utf8',
autoClose: true,
});
// Handle stream errors to prevent crashes (log and mark closed)
this.stream.on('error', (err) => {
console.error(`LogStream error for ${this.filePath}:`, err.message);
this._isOpen = false;
});
this._isOpen = true;
}
/**
* Write text to the stream with backpressure handling
*/
async write(text: string): Promise<void> {
return new Promise((resolve, reject) => {
if (!this._isOpen || !this.stream) {
reject(new Error('LogStream not open'));
return;
}
const stream = this.stream;
let drainHandler: (() => void) | null = null;
const cleanup = () => {
if (drainHandler) {
stream.removeListener('drain', drainHandler);
drainHandler = null;
}
};
const needsDrain = !stream.write(text, 'utf8', (error) => {
cleanup();
if (error) {
reject(error);
} else if (!needsDrain) {
resolve();
}
});
if (needsDrain) {
drainHandler = () => {
cleanup();
resolve();
};
stream.once('drain', drainHandler);
}
});
}
/**
* Close the stream (flush and close)
*/
async close(): Promise<void> {
if (!this._isOpen || !this.stream) {
return;
}
return new Promise((resolve) => {
this.stream!.end(() => {
this._isOpen = false;
this.stream = null;
resolve();
});
});
}
/**
* Check if the stream is currently open
*/
get isOpen(): boolean {
return this._isOpen;
}
/**
* Get the file path this stream writes to
*/
get path(): string {
return this.filePath;
}
}
+14 -54
View File
@@ -8,10 +8,9 @@
* Append-Only Agent Logger
*
* Provides crash-safe, append-only logging for agent execution.
* Uses file streams with immediate flush to prevent data loss.
* Uses LogStream for stream management with backpressure handling.
*/
import fs from 'fs';
import {
generateLogPath,
generatePromptPath,
@@ -19,6 +18,7 @@ import {
} from './utils.js';
import { atomicWrite } from '../utils/file-io.js';
import { formatTimestamp } from '../utils/formatting.js';
import { LogStream } from './log-stream.js';
interface LogEvent {
type: string;
@@ -30,13 +30,11 @@ interface LogEvent {
* AgentLogger - Manages append-only logging for a single agent execution
*/
export class AgentLogger {
private sessionMetadata: SessionMetadata;
private agentName: string;
private attemptNumber: number;
private timestamp: number;
private logPath: string;
private stream: fs.WriteStream | null = null;
private isOpen: boolean = false;
private readonly sessionMetadata: SessionMetadata;
private readonly agentName: string;
private readonly attemptNumber: number;
private readonly timestamp: number;
private readonly logStream: LogStream;
constructor(sessionMetadata: SessionMetadata, agentName: string, attemptNumber: number) {
this.sessionMetadata = sessionMetadata;
@@ -44,26 +42,19 @@ export class AgentLogger {
this.attemptNumber = attemptNumber;
this.timestamp = Date.now();
// Generate log file path
this.logPath = generateLogPath(sessionMetadata, agentName, this.timestamp, attemptNumber);
const logPath = generateLogPath(sessionMetadata, agentName, this.timestamp, attemptNumber);
this.logStream = new LogStream(logPath);
}
/**
* Initialize the log stream (creates file and opens stream)
*/
async initialize(): Promise<void> {
if (this.isOpen) {
if (this.logStream.isOpen) {
return; // Already initialized
}
// Create write stream with append mode and auto-flush
this.stream = fs.createWriteStream(this.logPath, {
flags: 'a', // Append mode
encoding: 'utf8',
autoClose: true,
});
this.isOpen = true;
await this.logStream.open();
// Write header
await this.writeHeader();
@@ -83,29 +74,7 @@ export class AgentLogger {
`========================================\n`,
].join('\n');
return this.writeRaw(header);
}
/**
* Write raw text to log file with immediate flush
*/
private writeRaw(text: string): Promise<void> {
return new Promise((resolve, reject) => {
if (!this.isOpen || !this.stream) {
reject(new Error('Logger not initialized'));
return;
}
const needsDrain = !this.stream.write(text, 'utf8', (error) => {
if (error) reject(error);
});
if (needsDrain) {
this.stream.once('drain', resolve);
} else {
resolve();
}
});
return this.logStream.write(header);
}
/**
@@ -120,23 +89,14 @@ export class AgentLogger {
};
const eventLine = `${JSON.stringify(event)}\n`;
return this.writeRaw(eventLine);
return this.logStream.write(eventLine);
}
/**
* Close the log stream
*/
async close(): Promise<void> {
if (!this.isOpen || !this.stream) {
return;
}
return new Promise((resolve) => {
this.stream!.end(() => {
this.isOpen = false;
resolve();
});
});
return this.logStream.close();
}
/**
+30 -26
View File
@@ -18,7 +18,9 @@ import {
import { atomicWrite, readJson, fileExists } from '../utils/file-io.js';
import { formatTimestamp, calculatePercentage } from '../utils/formatting.js';
import { AGENT_PHASE_MAP, type PhaseName } from '../session-manager.js';
import type { AgentName } from '../types/index.js';
import { PentestError } from '../services/error-handling.js';
import { ErrorCode } from '../types/errors.js';
import type { AgentName, AgentEndResult } from '../types/index.js';
interface AttemptData {
attempt_number: number;
@@ -30,7 +32,7 @@ interface AttemptData {
error?: string | undefined;
}
interface AgentMetrics {
interface AgentAuditMetrics {
status: 'in-progress' | 'success' | 'failed';
attempts: AttemptData[];
final_duration_ms: number;
@@ -68,21 +70,10 @@ interface SessionData {
total_duration_ms: number;
total_cost_usd: number;
phases: Record<string, PhaseMetrics>;
agents: Record<string, AgentMetrics>;
agents: Record<string, AgentAuditMetrics>;
};
}
interface AgentEndResult {
attemptNumber: number;
duration_ms: number;
cost_usd: number;
success: boolean;
model?: string | undefined;
error?: string | undefined;
checkpoint?: string | undefined;
isFinalAttempt?: boolean | undefined;
}
interface ActiveTimer {
startTime: number;
attemptNumber: number;
@@ -170,10 +161,16 @@ export class MetricsTracker {
*/
async endAgent(agentName: string, result: AgentEndResult): Promise<void> {
if (!this.data) {
throw new Error('MetricsTracker not initialized');
throw new PentestError(
'MetricsTracker not initialized',
'validation',
false,
{},
ErrorCode.AGENT_EXECUTION_FAILED
);
}
// Initialize agent metrics if not exists
// 1. Initialize agent metrics if first time seeing this agent
const existingAgent = this.data.metrics.agents[agentName];
const agent = existingAgent ?? {
status: 'in-progress' as const,
@@ -183,7 +180,7 @@ export class MetricsTracker {
};
this.data.metrics.agents[agentName] = agent;
// Add attempt to array
// 2. Build attempt record with optional model/error fields
const attempt: AttemptData = {
attempt_number: result.attemptNumber,
duration_ms: result.duration_ms,
@@ -200,16 +197,18 @@ export class MetricsTracker {
attempt.error = result.error;
}
// 3. Append attempt to history
agent.attempts.push(attempt);
// Update total cost (includes failed attempts)
// 4. Recalculate total cost across all attempts (includes failures)
agent.total_cost_usd = agent.attempts.reduce((sum, a) => sum + a.cost_usd, 0);
// If successful, update final metrics and status
// 5. Update agent status based on outcome
if (result.success) {
agent.status = 'success';
agent.final_duration_ms = result.duration_ms;
// 6. Attach model and checkpoint metadata on success
if (result.model) {
agent.model = result.model;
}
@@ -218,19 +217,18 @@ export class MetricsTracker {
agent.checkpoint = result.checkpoint;
}
} else {
// If this was the last attempt, mark as failed
if (result.isFinalAttempt) {
agent.status = 'failed';
}
}
// Clear active timer
// 7. Clear active timer
this.activeTimers.delete(agentName);
// Recalculate aggregations
// 8. Recalculate phase and session-level aggregations
this.recalculateAggregations();
// Save to disk
// 9. Persist to session.json
await this.save();
}
@@ -262,7 +260,13 @@ export class MetricsTracker {
checkpointHash?: string
): Promise<void> {
if (!this.data) {
throw new Error('MetricsTracker not initialized');
throw new PentestError(
'MetricsTracker not initialized',
'validation',
false,
{},
ErrorCode.AGENT_EXECUTION_FAILED
);
}
// Ensure originalWorkflowId is set (backfill if missing from old sessions)
@@ -326,9 +330,9 @@ export class MetricsTracker {
* Calculate phase-level metrics
*/
private calculatePhaseMetrics(
successfulAgents: Array<[string, AgentMetrics]>
successfulAgents: Array<[string, AgentAuditMetrics]>
): Record<string, PhaseMetrics> {
const phases: Record<PhaseName, AgentMetrics[]> = {
const phases: Record<PhaseName, AgentAuditMetrics[]> = {
'pre-recon': [],
'recon': [],
'vulnerability-analysis': [],
+7 -102
View File
@@ -15,20 +15,17 @@ import fs from 'fs/promises';
import path from 'path';
import { fileURLToPath } from 'url';
import { ensureDirectory } from '../utils/file-io.js';
export type { SessionMetadata } from '../types/audit.js';
import type { SessionMetadata } from '../types/audit.js';
const __filename = fileURLToPath(import.meta.url);
const __dirname = path.dirname(__filename);
// Get Shannon repository root
export const SHANNON_ROOT = path.resolve(__dirname, '..', '..');
export const AUDIT_LOGS_DIR = path.join(SHANNON_ROOT, 'audit-logs');
export interface SessionMetadata {
id: string;
webUrl: string;
repoPath?: string;
outputPath?: string;
[key: string]: unknown;
}
const SHANNON_ROOT = path.resolve(__dirname, '..', '..');
const AUDIT_LOGS_DIR = path.join(SHANNON_ROOT, 'audit-logs');
/**
* Extract and sanitize hostname from URL for use in identifiers
@@ -93,98 +90,6 @@ export function generateWorkflowLogPath(sessionMetadata: SessionMetadata): strin
return path.join(auditPath, 'workflow.log');
}
/**
* Ensure directory exists (idempotent, race-safe)
*/
export async function ensureDirectory(dirPath: string): Promise<void> {
try {
await fs.mkdir(dirPath, { recursive: true });
} catch (error) {
// Ignore EEXIST errors (race condition safe)
if ((error as NodeJS.ErrnoException).code !== 'EEXIST') {
throw error;
}
}
}
/**
* Atomic write using temp file + rename pattern
* Guarantees no partial writes or corruption on crash
*/
export async function atomicWrite(filePath: string, data: object | string): Promise<void> {
const tempPath = `${filePath}.tmp`;
const content = typeof data === 'string' ? data : JSON.stringify(data, null, 2);
try {
// Write to temp file
await fs.writeFile(tempPath, content, 'utf8');
// Atomic rename (POSIX guarantee: atomic on same filesystem)
await fs.rename(tempPath, filePath);
} catch (error) {
// Clean up temp file on failure
try {
await fs.unlink(tempPath);
} catch {
// Ignore cleanup errors
}
throw error;
}
}
/**
* Format duration in milliseconds to human-readable string
*/
export function formatDuration(ms: number): string {
if (ms < 1000) {
return `${ms}ms`;
}
const seconds = ms / 1000;
if (seconds < 60) {
return `${seconds.toFixed(1)}s`;
}
const minutes = Math.floor(seconds / 60);
const remainingSeconds = Math.floor(seconds % 60);
return `${minutes}m ${remainingSeconds}s`;
}
/**
* Format timestamp to ISO 8601 string
*/
export function formatTimestamp(timestamp: number = Date.now()): string {
return new Date(timestamp).toISOString();
}
/**
* Calculate percentage
*/
export function calculatePercentage(part: number, total: number): number {
if (total === 0) return 0;
return (part / total) * 100;
}
/**
* Read and parse JSON file
*/
export async function readJson<T = unknown>(filePath: string): Promise<T> {
const content = await fs.readFile(filePath, 'utf8');
return JSON.parse(content) as T;
}
/**
* Check if file exists
*/
export async function fileExists(filePath: string): Promise<boolean> {
try {
await fs.access(filePath);
return true;
} catch {
return false;
}
}
/**
* Initialize audit directory structure for a session
* Creates: audit-logs/{sessionId}/, agents/, prompts/, deliverables/
+57 -71
View File
@@ -11,10 +11,10 @@
* Optimized for `tail -f` viewing during concurrent workflow execution.
*/
import fs from 'fs';
import path from 'path';
import { generateWorkflowLogPath, ensureDirectory, type SessionMetadata } from './utils.js';
import fs from 'fs/promises';
import { generateWorkflowLogPath, type SessionMetadata } from './utils.js';
import { formatDuration, formatTimestamp } from '../utils/formatting.js';
import { LogStream } from './log-stream.js';
export interface AgentLogDetails {
attemptNumber?: number;
@@ -42,38 +42,27 @@ export interface WorkflowSummary {
* WorkflowLogger - Manages the unified workflow log file
*/
export class WorkflowLogger {
private sessionMetadata: SessionMetadata;
private logPath: string;
private stream: fs.WriteStream | null = null;
private initialized: boolean = false;
private readonly sessionMetadata: SessionMetadata;
private readonly logStream: LogStream;
constructor(sessionMetadata: SessionMetadata) {
this.sessionMetadata = sessionMetadata;
this.logPath = generateWorkflowLogPath(sessionMetadata);
const logPath = generateWorkflowLogPath(sessionMetadata);
this.logStream = new LogStream(logPath);
}
/**
* Initialize the log stream (creates file and writes header)
*/
async initialize(): Promise<void> {
if (this.initialized) {
if (this.logStream.isOpen) {
return;
}
// Ensure directory exists
await ensureDirectory(path.dirname(this.logPath));
// Create write stream with append mode
this.stream = fs.createWriteStream(this.logPath, {
flags: 'a',
encoding: 'utf8',
autoClose: true,
});
this.initialized = true;
await this.logStream.open();
// Write header only if file is new (empty)
const stats = await fs.promises.stat(this.logPath).catch(() => null);
const stats = await fs.stat(this.logStream.path).catch(() => null);
if (!stats || stats.size === 0) {
await this.writeHeader();
}
@@ -94,29 +83,35 @@ export class WorkflowLogger {
``,
].join('\n');
return this.writeRaw(header);
return this.logStream.write(header);
}
/**
* Write raw text to log file with immediate flush
* Write resume header to log file when workflow is resumed
*/
private writeRaw(text: string): Promise<void> {
return new Promise((resolve, reject) => {
if (!this.initialized || !this.stream) {
reject(new Error('WorkflowLogger not initialized'));
return;
}
async logResumeHeader(resumeInfo: {
previousWorkflowId: string;
newWorkflowId: string;
checkpointHash: string;
completedAgents: string[];
}): Promise<void> {
await this.ensureInitialized();
const needsDrain = !this.stream.write(text, 'utf8', (error) => {
if (error) reject(error);
});
const header = [
``,
`================================================================================`,
`RESUMED`,
`================================================================================`,
`Previous Workflow ID: ${resumeInfo.previousWorkflowId}`,
`New Workflow ID: ${resumeInfo.newWorkflowId}`,
`Resumed At: ${formatTimestamp()}`,
`Checkpoint: ${resumeInfo.checkpointHash}`,
`Completed: ${resumeInfo.completedAgents.length} agents (${resumeInfo.completedAgents.join(', ')})`,
`================================================================================`,
``,
].join('\n');
if (needsDrain) {
this.stream.once('drain', resolve);
} else {
resolve();
}
});
return this.logStream.write(header);
}
/**
@@ -138,10 +133,10 @@ export class WorkflowLogger {
// Add blank line before phase start for readability
if (event === 'start') {
await this.writeRaw('\n');
await this.logStream.write('\n');
}
await this.writeRaw(line);
await this.logStream.write(line);
}
/**
@@ -184,7 +179,7 @@ export class WorkflowLogger {
}
const line = `[${this.formatLogTime()}] [AGENT] ${message}\n`;
await this.writeRaw(line);
await this.logStream.write(line);
}
/**
@@ -194,7 +189,7 @@ export class WorkflowLogger {
await this.ensureInitialized();
const line = `[${this.formatLogTime()}] [${eventType.toUpperCase()}] ${message}\n`;
await this.writeRaw(line);
await this.logStream.write(line);
}
/**
@@ -205,7 +200,7 @@ export class WorkflowLogger {
const contextStr = context ? ` (${context})` : '';
const line = `[${this.formatLogTime()}] [ERROR] ${error.message}${contextStr}\n`;
await this.writeRaw(line);
await this.logStream.write(line);
}
/**
@@ -301,7 +296,7 @@ export class WorkflowLogger {
const params = this.formatToolParams(toolName, parameters);
const paramStr = params ? `: ${params}` : '';
const line = `[${this.formatLogTime()}] [${agentName}] [TOOL] ${toolName}${paramStr}\n`;
await this.writeRaw(line);
await this.logStream.write(line);
}
/**
@@ -313,7 +308,7 @@ export class WorkflowLogger {
// Show full content, replacing newlines with escaped version for single-line output
const escaped = content.replace(/\n/g, '\\n');
const line = `[${this.formatLogTime()}] [${agentName}] [LLM] Turn ${turn}: ${escaped}\n`;
await this.writeRaw(line);
await this.logStream.write(line);
}
/**
@@ -324,42 +319,42 @@ export class WorkflowLogger {
const status = summary.status === 'completed' ? 'COMPLETED' : 'FAILED';
await this.writeRaw('\n');
await this.writeRaw(`================================================================================\n`);
await this.writeRaw(`Workflow ${status}\n`);
await this.writeRaw(`────────────────────────────────────────\n`);
await this.writeRaw(`Workflow ID: ${this.sessionMetadata.id}\n`);
await this.writeRaw(`Status: ${summary.status}\n`);
await this.writeRaw(`Duration: ${formatDuration(summary.totalDurationMs)}\n`);
await this.writeRaw(`Total Cost: $${summary.totalCostUsd.toFixed(4)}\n`);
await this.writeRaw(`Agents: ${summary.completedAgents.length} completed\n`);
await this.logStream.write('\n');
await this.logStream.write(`================================================================================\n`);
await this.logStream.write(`Workflow ${status}\n`);
await this.logStream.write(`────────────────────────────────────────\n`);
await this.logStream.write(`Workflow ID: ${this.sessionMetadata.id}\n`);
await this.logStream.write(`Status: ${summary.status}\n`);
await this.logStream.write(`Duration: ${formatDuration(summary.totalDurationMs)}\n`);
await this.logStream.write(`Total Cost: $${summary.totalCostUsd.toFixed(4)}\n`);
await this.logStream.write(`Agents: ${summary.completedAgents.length} completed\n`);
if (summary.error) {
await this.writeRaw(`Error: ${summary.error}\n`);
await this.logStream.write(`Error: ${summary.error}\n`);
}
await this.writeRaw(`\n`);
await this.writeRaw(`Agent Breakdown:\n`);
await this.logStream.write(`\n`);
await this.logStream.write(`Agent Breakdown:\n`);
for (const agentName of summary.completedAgents) {
const metrics = summary.agentMetrics[agentName];
if (metrics) {
const duration = formatDuration(metrics.durationMs);
const cost = metrics.costUsd !== null ? `$${metrics.costUsd.toFixed(4)}` : 'N/A';
await this.writeRaw(` - ${agentName} (${duration}, ${cost})\n`);
await this.logStream.write(` - ${agentName} (${duration}, ${cost})\n`);
} else {
await this.writeRaw(` - ${agentName}\n`);
await this.logStream.write(` - ${agentName}\n`);
}
}
await this.writeRaw(`================================================================================\n`);
await this.logStream.write(`================================================================================\n`);
}
/**
* Ensure initialized (helper for lazy initialization)
*/
private async ensureInitialized(): Promise<void> {
if (!this.initialized) {
if (!this.logStream.isOpen) {
await this.initialize();
}
}
@@ -368,15 +363,6 @@ export class WorkflowLogger {
* Close the log stream
*/
async close(): Promise<void> {
if (!this.initialized || !this.stream) {
return;
}
return new Promise((resolve) => {
this.stream!.end(() => {
this.initialized = false;
resolve();
});
});
return this.logStream.close();
}
}
-59
View File
@@ -1,59 +0,0 @@
// Copyright (C) 2025 Keygraph, Inc.
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License version 3
// as published by the Free Software Foundation.
import { fs, path } from 'zx';
interface ValidationResult {
valid: boolean;
error?: string;
path?: string;
}
// Helper function: Validate web URL
export function validateWebUrl(url: string): ValidationResult {
try {
const parsed = new URL(url);
if (!['http:', 'https:'].includes(parsed.protocol)) {
return { valid: false, error: 'Web URL must use HTTP or HTTPS protocol' };
}
if (!parsed.hostname) {
return { valid: false, error: 'Web URL must have a valid hostname' };
}
return { valid: true };
} catch {
return { valid: false, error: 'Invalid web URL format' };
}
}
// Helper function: Validate local repository path
export async function validateRepoPath(repoPath: string): Promise<ValidationResult> {
try {
// Check if path exists
if (!(await fs.pathExists(repoPath))) {
return { valid: false, error: 'Repository path does not exist' };
}
// Check if it's a directory
const stats = await fs.stat(repoPath);
if (!stats.isDirectory()) {
return { valid: false, error: 'Repository path must be a directory' };
}
// Check if it's readable
try {
await fs.access(repoPath, fs.constants.R_OK);
} catch {
return { valid: false, error: 'Repository path is not readable' };
}
// Convert to absolute path
const absolutePath = path.resolve(repoPath);
return { valid: true, path: absolutePath };
} catch (error) {
const errMsg = error instanceof Error ? error.message : String(error);
return { valid: false, error: `Invalid repository path: ${errMsg}` };
}
}
-49
View File
@@ -1,49 +0,0 @@
// Copyright (C) 2025 Keygraph, Inc.
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License version 3
// as published by the Free Software Foundation.
import chalk from 'chalk';
import { displaySplashScreen } from '../splash-screen.js';
// Helper function: Display help information
export function showHelp(): void {
console.log(chalk.cyan.bold('AI Penetration Testing Agent'));
console.log(chalk.gray('Automated security assessment tool\n'));
console.log(chalk.yellow.bold('USAGE:'));
console.log(' shannon <WEB_URL> <REPO_PATH> [--config config.yaml] [--output /path/to/reports]\n');
console.log(chalk.yellow.bold('OPTIONS:'));
console.log(
' --config <file> YAML configuration file for authentication and testing parameters'
);
console.log(
' --output <path> Custom output directory for session folder (default: ./audit-logs/)'
);
console.log(
' --pipeline-testing Use minimal prompts for fast pipeline testing (creates minimal deliverables)'
);
console.log(
' --disable-loader Disable the animated progress loader (useful when logs interfere with spinner)'
);
console.log(' --help Show this help message\n');
console.log(chalk.yellow.bold('EXAMPLES:'));
console.log(' shannon "https://example.com" "/path/to/local/repo"');
console.log(' shannon "https://example.com" "/path/to/local/repo" --config auth.yaml');
console.log(' shannon "https://example.com" "/path/to/local/repo" --output /path/to/reports');
console.log(' shannon "https://example.com" "/path/to/local/repo" --pipeline-testing\n');
console.log(chalk.yellow.bold('REQUIREMENTS:'));
console.log(' • WEB_URL must start with http:// or https://');
console.log(' • REPO_PATH must be an accessible local directory');
console.log(' • Only test systems you own or have permission to test\n');
console.log(chalk.yellow.bold('ENVIRONMENT VARIABLES:'));
console.log(' PENTEST_MAX_RETRIES Number of retries for AI agents (default: 3)');
}
// Export the splash screen function for use in main
export { displaySplashScreen };
+311 -106
View File
@@ -7,13 +7,13 @@
import { createRequire } from 'module';
import { fs } from 'zx';
import yaml from 'js-yaml';
import { Ajv, type ValidateFunction } from 'ajv';
import { Ajv, type ValidateFunction, type ErrorObject } from 'ajv';
import type { FormatsPlugin } from 'ajv-formats';
import { PentestError } from './error-handling.js';
import { PentestError } from './services/error-handling.js';
import { ErrorCode } from './types/errors.js';
import type {
Config,
Rule,
Rules,
Authentication,
DistributedConfig,
} from './types/config.js';
@@ -22,11 +22,9 @@ import type {
const require = createRequire(import.meta.url);
const addFormats: FormatsPlugin = require('ajv-formats');
// Initialize AJV with formats
const ajv = new Ajv({ allErrors: true, verbose: true });
addFormats(ajv);
// Load JSON Schema
let configSchema: object;
let validateSchema: ValidateFunction;
@@ -45,7 +43,6 @@ try {
);
}
// Security patterns to block
const DANGEROUS_PATTERNS: RegExp[] = [
/\.\.\//, // Path traversal
/[<>]/, // HTML/XML injection
@@ -54,32 +51,171 @@ const DANGEROUS_PATTERNS: RegExp[] = [
/file:/i, // File URLs
];
// Parse and load YAML configuration file with enhanced safety
export const parseConfig = async (configPath: string): Promise<Config> => {
try {
// File existence check
if (!(await fs.pathExists(configPath))) {
throw new Error(`Configuration file not found: ${configPath}`);
/**
* Format a single AJV error into a human-readable message.
* Translates AJV error keywords into plain English descriptions.
*/
function formatAjvError(error: ErrorObject): string {
const path = error.instancePath || 'root';
const params = error.params as Record<string, unknown>;
switch (error.keyword) {
case 'required': {
const missingProperty = params.missingProperty as string;
return `Missing required field: "${missingProperty}" at ${path || 'root'}`;
}
// File size check (prevent extremely large files)
const stats = await fs.stat(configPath);
const maxFileSize = 1024 * 1024; // 1MB
if (stats.size > maxFileSize) {
throw new Error(
`Configuration file too large: ${stats.size} bytes (maximum: ${maxFileSize} bytes)`
case 'type': {
const expectedType = params.type as string;
return `Invalid type at ${path}: expected ${expectedType}`;
}
case 'enum': {
const allowedValues = params.allowedValues as unknown[];
const formattedValues = allowedValues.map((v) => `"${v}"`).join(', ');
return `Invalid value at ${path}: must be one of [${formattedValues}]`;
}
case 'additionalProperties': {
const additionalProperty = params.additionalProperty as string;
return `Unknown field at ${path}: "${additionalProperty}" is not allowed`;
}
case 'minLength': {
const limit = params.limit as number;
return `Value at ${path} is too short: must have at least ${limit} character(s)`;
}
case 'maxLength': {
const limit = params.limit as number;
return `Value at ${path} is too long: must have at most ${limit} character(s)`;
}
case 'minimum': {
const limit = params.limit as number;
return `Value at ${path} is too small: must be >= ${limit}`;
}
case 'maximum': {
const limit = params.limit as number;
return `Value at ${path} is too large: must be <= ${limit}`;
}
case 'minItems': {
const limit = params.limit as number;
return `Array at ${path} has too few items: must have at least ${limit} item(s)`;
}
case 'maxItems': {
const limit = params.limit as number;
return `Array at ${path} has too many items: must have at most ${limit} item(s)`;
}
case 'pattern': {
const pattern = params.pattern as string;
return `Value at ${path} does not match required pattern: ${pattern}`;
}
case 'format': {
const format = params.format as string;
return `Value at ${path} must be a valid ${format}`;
}
case 'const': {
const allowedValue = params.allowedValue as unknown;
return `Value at ${path} must be exactly "${allowedValue}"`;
}
case 'oneOf': {
return `Value at ${path} must match exactly one schema (matched ${params.passingSchemas ?? 0})`;
}
case 'anyOf': {
return `Value at ${path} must match at least one of the allowed schemas`;
}
case 'not': {
return `Value at ${path} matches a schema it should not match`;
}
case 'if': {
return `Value at ${path} does not satisfy conditional schema requirements`;
}
case 'uniqueItems': {
const i = params.i as number;
const j = params.j as number;
return `Array at ${path} contains duplicate items at positions ${j} and ${i}`;
}
case 'propertyNames': {
const propertyName = params.propertyName as string;
return `Invalid property name at ${path}: "${propertyName}" does not match naming requirements`;
}
case 'dependencies':
case 'dependentRequired': {
const property = params.property as string;
const missingProperty = params.missingProperty as string;
return `Missing dependent field at ${path}: "${missingProperty}" is required when "${property}" is present`;
}
default: {
// Fallback for any unhandled keywords - use AJV's message if available
const message = error.message || `validation failed for keyword "${error.keyword}"`;
return `${path}: ${message}`;
}
}
}
/**
* Format all AJV errors into a list of human-readable messages.
* Returns an array of formatted error strings.
*/
function formatAjvErrors(errors: ErrorObject[]): string[] {
return errors.map(formatAjvError);
}
export const parseConfig = async (configPath: string): Promise<Config> => {
try {
// 1. Verify file exists
if (!(await fs.pathExists(configPath))) {
throw new PentestError(
`Configuration file not found: ${configPath}`,
'config',
false,
{ configPath },
ErrorCode.CONFIG_NOT_FOUND
);
}
// Read file content
const configContent = await fs.readFile(configPath, 'utf8');
// Basic content validation
if (!configContent.trim()) {
throw new Error('Configuration file is empty');
// 2. Check file size
const stats = await fs.stat(configPath);
const maxFileSize = 1024 * 1024; // 1MB
if (stats.size > maxFileSize) {
throw new PentestError(
`Configuration file too large: ${stats.size} bytes (maximum: ${maxFileSize} bytes)`,
'config',
false,
{ configPath, fileSize: stats.size, maxFileSize },
ErrorCode.CONFIG_VALIDATION_FAILED
);
}
// Parse YAML with safety options
// 3. Read and check for empty content
const configContent = await fs.readFile(configPath, 'utf8');
if (!configContent.trim()) {
throw new PentestError(
'Configuration file is empty',
'config',
false,
{ configPath },
ErrorCode.CONFIG_VALIDATION_FAILED
);
}
// 4. Parse YAML with safe schema
let config: unknown;
try {
config = yaml.load(configContent, {
@@ -89,67 +225,82 @@ export const parseConfig = async (configPath: string): Promise<Config> => {
});
} catch (yamlError) {
const errMsg = yamlError instanceof Error ? yamlError.message : String(yamlError);
throw new Error(`YAML parsing failed: ${errMsg}`);
throw new PentestError(
`YAML parsing failed: ${errMsg}`,
'config',
false,
{ configPath, originalError: errMsg },
ErrorCode.CONFIG_PARSE_ERROR
);
}
// Additional safety check
// 5. Guard against null/undefined parse result
if (config === null || config === undefined) {
throw new Error('Configuration file resulted in null/undefined after parsing');
throw new PentestError(
'Configuration file resulted in null/undefined after parsing',
'config',
false,
{ configPath },
ErrorCode.CONFIG_PARSE_ERROR
);
}
// Validate the configuration structure and content
// 6. Validate schema, security rules, and return
validateConfig(config as Config);
return config as Config;
} catch (error) {
const errMsg = error instanceof Error ? error.message : String(error);
// Enhance error message with context
if (
errMsg.startsWith('Configuration file not found') ||
errMsg.startsWith('YAML parsing failed') ||
errMsg.includes('must be') ||
errMsg.includes('exceeds maximum')
) {
// These are already well-formatted errors, re-throw as-is
// PentestError instances are already well-formatted, re-throw as-is
if (error instanceof PentestError) {
throw error;
} else {
// Wrap other errors with context
throw new Error(`Failed to parse configuration file '${configPath}': ${errMsg}`);
}
const errMsg = error instanceof Error ? error.message : String(error);
throw new PentestError(
`Failed to parse configuration file '${configPath}': ${errMsg}`,
'config',
false,
{ configPath, originalError: errMsg },
ErrorCode.CONFIG_PARSE_ERROR
);
}
};
// Validate overall configuration structure using JSON Schema
const validateConfig = (config: Config): void => {
// Basic structure validation
if (!config || typeof config !== 'object') {
throw new Error('Configuration must be a valid object');
throw new PentestError(
'Configuration must be a valid object',
'config',
false,
{},
ErrorCode.CONFIG_VALIDATION_FAILED
);
}
if (Array.isArray(config)) {
throw new Error('Configuration must be an object, not an array');
throw new PentestError(
'Configuration must be an object, not an array',
'config',
false,
{},
ErrorCode.CONFIG_VALIDATION_FAILED
);
}
// JSON Schema validation
const isValid = validateSchema(config);
if (!isValid) {
const errors = validateSchema.errors || [];
const errorMessages = errors.map((err) => {
const path = err.instancePath || 'root';
return `${path}: ${err.message}`;
});
throw new Error(`Configuration validation failed:\n - ${errorMessages.join('\n - ')}`);
const errorMessages = formatAjvErrors(errors);
throw new PentestError(
`Configuration validation failed:\n - ${errorMessages.join('\n - ')}`,
'config',
false,
{ validationErrors: errorMessages },
ErrorCode.CONFIG_VALIDATION_FAILED
);
}
// Additional security validation
performSecurityValidation(config);
// Warn if deprecated fields are used
if (config.login) {
console.warn('⚠️ The "login" section is deprecated. Please use "authentication" instead.');
}
// Ensure at least some configuration is provided
if (!config.rules && !config.authentication) {
console.warn(
'⚠️ Configuration file contains no rules or authentication. The pentest will run without any scoping restrictions or login capabilities.'
@@ -161,35 +312,58 @@ const validateConfig = (config: Config): void => {
}
};
// Perform additional security validation beyond JSON Schema
const performSecurityValidation = (config: Config): void => {
// Validate authentication section for security issues
if (config.authentication) {
const auth = config.authentication;
// Check for dangerous patterns in credentials
if (auth.credentials) {
// Check login_url for dangerous patterns (AJV's "uri" format allows javascript: per RFC 3986)
if (auth.login_url) {
for (const pattern of DANGEROUS_PATTERNS) {
if (pattern.test(auth.credentials.username)) {
throw new Error(
'authentication.credentials.username contains potentially dangerous pattern'
);
}
if (pattern.test(auth.credentials.password)) {
throw new Error(
'authentication.credentials.password contains potentially dangerous pattern'
if (pattern.test(auth.login_url)) {
throw new PentestError(
`authentication.login_url contains potentially dangerous pattern: ${pattern.source}`,
'config',
false,
{ field: 'login_url', pattern: pattern.source },
ErrorCode.CONFIG_VALIDATION_FAILED
);
}
}
}
if (auth.credentials) {
for (const pattern of DANGEROUS_PATTERNS) {
if (pattern.test(auth.credentials.username)) {
throw new PentestError(
`authentication.credentials.username contains potentially dangerous pattern: ${pattern.source}`,
'config',
false,
{ field: 'credentials.username', pattern: pattern.source },
ErrorCode.CONFIG_VALIDATION_FAILED
);
}
if (pattern.test(auth.credentials.password)) {
throw new PentestError(
`authentication.credentials.password contains potentially dangerous pattern: ${pattern.source}`,
'config',
false,
{ field: 'credentials.password', pattern: pattern.source },
ErrorCode.CONFIG_VALIDATION_FAILED
);
}
}
}
// Check login flow for dangerous patterns
if (auth.login_flow) {
auth.login_flow.forEach((step, index) => {
for (const pattern of DANGEROUS_PATTERNS) {
if (pattern.test(step)) {
throw new Error(
`authentication.login_flow[${index}] contains potentially dangerous pattern: ${pattern.source}`
throw new PentestError(
`authentication.login_flow[${index}] contains potentially dangerous pattern: ${pattern.source}`,
'config',
false,
{ field: `login_flow[${index}]`, pattern: pattern.source },
ErrorCode.CONFIG_VALIDATION_FAILED
);
}
}
@@ -197,48 +371,58 @@ const performSecurityValidation = (config: Config): void => {
}
}
// Validate rules section for security issues
if (config.rules) {
validateRulesSecurity(config.rules.avoid, 'avoid');
validateRulesSecurity(config.rules.focus, 'focus');
// Check for duplicate and conflicting rules
checkForDuplicates(config.rules.avoid || [], 'avoid');
checkForDuplicates(config.rules.focus || [], 'focus');
checkForConflicts(config.rules.avoid, config.rules.focus);
}
};
// Validate rules for security issues
const validateRulesSecurity = (rules: Rule[] | undefined, ruleType: string): void => {
if (!rules) return;
rules.forEach((rule, index) => {
// Security validation
for (const pattern of DANGEROUS_PATTERNS) {
if (pattern.test(rule.url_path)) {
throw new Error(
`rules.${ruleType}[${index}].url_path contains potentially dangerous pattern: ${pattern.source}`
throw new PentestError(
`rules.${ruleType}[${index}].url_path contains potentially dangerous pattern: ${pattern.source}`,
'config',
false,
{ field: `rules.${ruleType}[${index}].url_path`, pattern: pattern.source },
ErrorCode.CONFIG_VALIDATION_FAILED
);
}
if (pattern.test(rule.description)) {
throw new Error(
`rules.${ruleType}[${index}].description contains potentially dangerous pattern: ${pattern.source}`
throw new PentestError(
`rules.${ruleType}[${index}].description contains potentially dangerous pattern: ${pattern.source}`,
'config',
false,
{ field: `rules.${ruleType}[${index}].description`, pattern: pattern.source },
ErrorCode.CONFIG_VALIDATION_FAILED
);
}
}
// Type-specific validation
validateRuleTypeSpecific(rule, ruleType, index);
});
};
// Validate rule based on its specific type
const validateRuleTypeSpecific = (rule: Rule, ruleType: string, index: number): void => {
const field = `rules.${ruleType}[${index}].url_path`;
switch (rule.type) {
case 'path':
if (!rule.url_path.startsWith('/')) {
throw new Error(`rules.${ruleType}[${index}].url_path for type 'path' must start with '/'`);
throw new PentestError(
`${field} for type 'path' must start with '/'`,
'config',
false,
{ field, ruleType: rule.type },
ErrorCode.CONFIG_VALIDATION_FAILED
);
}
break;
@@ -246,14 +430,22 @@ const validateRuleTypeSpecific = (rule: Rule, ruleType: string, index: number):
case 'domain':
// Basic domain validation - no slashes allowed
if (rule.url_path.includes('/')) {
throw new Error(
`rules.${ruleType}[${index}].url_path for type '${rule.type}' cannot contain '/' characters`
throw new PentestError(
`${field} for type '${rule.type}' cannot contain '/' characters`,
'config',
false,
{ field, ruleType: rule.type },
ErrorCode.CONFIG_VALIDATION_FAILED
);
}
// Must contain at least one dot for domains
if (rule.type === 'domain' && !rule.url_path.includes('.')) {
throw new Error(
`rules.${ruleType}[${index}].url_path for type 'domain' must be a valid domain name`
throw new PentestError(
`${field} for type 'domain' must be a valid domain name`,
'config',
false,
{ field, ruleType: rule.type },
ErrorCode.CONFIG_VALIDATION_FAILED
);
}
break;
@@ -261,62 +453,77 @@ const validateRuleTypeSpecific = (rule: Rule, ruleType: string, index: number):
case 'method': {
const allowedMethods = ['GET', 'POST', 'PUT', 'DELETE', 'PATCH', 'HEAD', 'OPTIONS'];
if (!allowedMethods.includes(rule.url_path.toUpperCase())) {
throw new Error(
`rules.${ruleType}[${index}].url_path for type 'method' must be one of: ${allowedMethods.join(', ')}`
throw new PentestError(
`${field} for type 'method' must be one of: ${allowedMethods.join(', ')}`,
'config',
false,
{ field, ruleType: rule.type, allowedMethods },
ErrorCode.CONFIG_VALIDATION_FAILED
);
}
break;
}
case 'header':
// Header name validation (basic)
if (!rule.url_path.match(/^[a-zA-Z0-9\-_]+$/)) {
throw new Error(
`rules.${ruleType}[${index}].url_path for type 'header' must be a valid header name (alphanumeric, hyphens, underscores only)`
throw new PentestError(
`${field} for type 'header' must be a valid header name (alphanumeric, hyphens, underscores only)`,
'config',
false,
{ field, ruleType: rule.type },
ErrorCode.CONFIG_VALIDATION_FAILED
);
}
break;
case 'parameter':
// Parameter name validation (basic)
if (!rule.url_path.match(/^[a-zA-Z0-9\-_]+$/)) {
throw new Error(
`rules.${ruleType}[${index}].url_path for type 'parameter' must be a valid parameter name (alphanumeric, hyphens, underscores only)`
throw new PentestError(
`${field} for type 'parameter' must be a valid parameter name (alphanumeric, hyphens, underscores only)`,
'config',
false,
{ field, ruleType: rule.type },
ErrorCode.CONFIG_VALIDATION_FAILED
);
}
break;
}
};
// Check for duplicate rules
const checkForDuplicates = (rules: Rule[], ruleType: string): void => {
const seen = new Set<string>();
rules.forEach((rule, index) => {
const key = `${rule.type}:${rule.url_path}`;
if (seen.has(key)) {
throw new Error(
`Duplicate rule found in rules.${ruleType}[${index}]: ${rule.type} '${rule.url_path}'`
throw new PentestError(
`Duplicate rule found in rules.${ruleType}[${index}]: ${rule.type} '${rule.url_path}'`,
'config',
false,
{ field: `rules.${ruleType}[${index}]`, ruleType: rule.type, urlPath: rule.url_path },
ErrorCode.CONFIG_VALIDATION_FAILED
);
}
seen.add(key);
});
};
// Check for conflicting rules between avoid and focus
const checkForConflicts = (avoidRules: Rule[] = [], focusRules: Rule[] = []): void => {
const avoidSet = new Set(avoidRules.map((rule) => `${rule.type}:${rule.url_path}`));
focusRules.forEach((rule, index) => {
const key = `${rule.type}:${rule.url_path}`;
if (avoidSet.has(key)) {
throw new Error(
`Conflicting rule found: rules.focus[${index}] '${rule.url_path}' also exists in rules.avoid`
throw new PentestError(
`Conflicting rule found: rules.focus[${index}] '${rule.url_path}' also exists in rules.avoid`,
'config',
false,
{ field: `rules.focus[${index}]`, urlPath: rule.url_path },
ErrorCode.CONFIG_VALIDATION_FAILED
);
}
});
};
// Sanitize and normalize rule values
const sanitizeRule = (rule: Rule): Rule => {
return {
description: rule.description.trim(),
@@ -325,7 +532,6 @@ const sanitizeRule = (rule: Rule): Rule => {
};
};
// Distribute configuration sections to different agents with sanitization
export const distributeConfig = (config: Config | null): DistributedConfig => {
const avoid = config?.rules?.avoid || [];
const focus = config?.rules?.focus || [];
@@ -338,7 +544,6 @@ export const distributeConfig = (config: Config | null): DistributedConfig => {
};
};
// Sanitize and normalize authentication values
const sanitizeAuthentication = (auth: Authentication): Authentication => {
return {
login_type: auth.login_type.toLowerCase().trim() as Authentication['login_type'],
@@ -348,7 +553,7 @@ const sanitizeAuthentication = (auth: Authentication): Authentication => {
password: auth.credentials.password,
...(auth.credentials.totp_secret && { totp_secret: auth.credentials.totp_secret.trim() }),
},
login_flow: auth.login_flow.map((step) => step.trim()),
...(auth.login_flow && { login_flow: auth.login_flow.map((step) => step.trim()) }),
success_condition: {
type: auth.success_condition.type.toLowerCase().trim() as Authentication['success_condition']['type'],
value: auth.success_condition.value.trim(),
-110
View File
@@ -1,110 +0,0 @@
// Copyright (C) 2025 Keygraph, Inc.
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License version 3
// as published by the Free Software Foundation.
import { path, fs } from 'zx';
import chalk from 'chalk';
import { validateQueueAndDeliverable, type VulnType } from './queue-validation.js';
import type { AgentName, PromptName, PlaywrightAgent, AgentValidator } from './types/agents.js';
// Factory function for vulnerability queue validators
function createVulnValidator(vulnType: VulnType): AgentValidator {
return async (sourceDir: string): Promise<boolean> => {
try {
await validateQueueAndDeliverable(vulnType, sourceDir);
return true;
} catch (error) {
const errMsg = error instanceof Error ? error.message : String(error);
console.log(chalk.yellow(` Queue validation failed for ${vulnType}: ${errMsg}`));
return false;
}
};
}
// Factory function for exploit deliverable validators
function createExploitValidator(vulnType: VulnType): AgentValidator {
return async (sourceDir: string): Promise<boolean> => {
const evidenceFile = path.join(sourceDir, 'deliverables', `${vulnType}_exploitation_evidence.md`);
return await fs.pathExists(evidenceFile);
};
}
// MCP agent mapping - assigns each agent to a specific Playwright instance to prevent conflicts
export const MCP_AGENT_MAPPING: Record<PromptName, PlaywrightAgent> = Object.freeze({
// Phase 1: Pre-reconnaissance (actual prompt name is 'pre-recon-code')
// NOTE: Pre-recon is pure code analysis and doesn't use browser automation,
// but assigning MCP server anyway for consistency and future extensibility
'pre-recon-code': 'playwright-agent1',
// Phase 2: Reconnaissance (actual prompt name is 'recon')
recon: 'playwright-agent2',
// Phase 3: Vulnerability Analysis (5 parallel agents)
'vuln-injection': 'playwright-agent1',
'vuln-xss': 'playwright-agent2',
'vuln-auth': 'playwright-agent3',
'vuln-ssrf': 'playwright-agent4',
'vuln-authz': 'playwright-agent5',
// Phase 4: Exploitation (5 parallel agents - same as vuln counterparts)
'exploit-injection': 'playwright-agent1',
'exploit-xss': 'playwright-agent2',
'exploit-auth': 'playwright-agent3',
'exploit-ssrf': 'playwright-agent4',
'exploit-authz': 'playwright-agent5',
// Phase 5: Reporting (actual prompt name is 'report-executive')
// NOTE: Report generation is typically text-based and doesn't use browser automation,
// but assigning MCP server anyway for potential screenshot inclusion or future needs
'report-executive': 'playwright-agent3',
});
// Direct agent-to-validator mapping - much simpler than pattern matching
export const AGENT_VALIDATORS: Record<AgentName, AgentValidator> = Object.freeze({
// Pre-reconnaissance agent - validates the code analysis deliverable created by the agent
'pre-recon': async (sourceDir: string): Promise<boolean> => {
const codeAnalysisFile = path.join(sourceDir, 'deliverables', 'code_analysis_deliverable.md');
return await fs.pathExists(codeAnalysisFile);
},
// Reconnaissance agent
recon: async (sourceDir: string): Promise<boolean> => {
const reconFile = path.join(sourceDir, 'deliverables', 'recon_deliverable.md');
return await fs.pathExists(reconFile);
},
// Vulnerability analysis agents
'injection-vuln': createVulnValidator('injection'),
'xss-vuln': createVulnValidator('xss'),
'auth-vuln': createVulnValidator('auth'),
'ssrf-vuln': createVulnValidator('ssrf'),
'authz-vuln': createVulnValidator('authz'),
// Exploitation agents
'injection-exploit': createExploitValidator('injection'),
'xss-exploit': createExploitValidator('xss'),
'auth-exploit': createExploitValidator('auth'),
'ssrf-exploit': createExploitValidator('ssrf'),
'authz-exploit': createExploitValidator('authz'),
// Executive report agent
report: async (sourceDir: string): Promise<boolean> => {
const reportFile = path.join(
sourceDir,
'deliverables',
'comprehensive_security_assessment_report.md'
);
const reportExists = await fs.pathExists(reportFile);
if (!reportExists) {
console.log(
chalk.red(` ❌ Missing required deliverable: comprehensive_security_assessment_report.md`)
);
}
return reportExists;
},
});
-381
View File
@@ -1,381 +0,0 @@
// Copyright (C) 2025 Keygraph, Inc.
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License version 3
// as published by the Free Software Foundation.
import { $, fs, path } from 'zx';
import chalk from 'chalk';
import { Timer } from '../utils/metrics.js';
import { formatDuration } from '../utils/formatting.js';
import { handleToolError, PentestError } from '../error-handling.js';
import { AGENTS } from '../session-manager.js';
import { runClaudePromptWithRetry } from '../ai/claude-executor.js';
import { loadPrompt } from '../prompts/prompt-manager.js';
import type { ToolAvailability } from '../tool-checker.js';
import type { DistributedConfig } from '../types/config.js';
interface AgentResult {
success: boolean;
duration: number;
cost?: number | undefined;
error?: string | undefined;
retryable?: boolean | undefined;
}
type ToolName = 'nmap' | 'subfinder' | 'whatweb' | 'schemathesis';
type ToolStatus = 'success' | 'skipped' | 'error';
interface TerminalScanResult {
tool: ToolName;
output: string;
status: ToolStatus;
duration: number;
success?: boolean;
error?: Error;
}
interface PromptVariables {
webUrl: string;
repoPath: string;
}
// Discriminated union for Wave1 tool results - clearer than loose union types
type Wave1ToolResult =
| { kind: 'scan'; result: TerminalScanResult }
| { kind: 'skipped'; message: string }
| { kind: 'agent'; result: AgentResult };
interface Wave1Results {
nmap: Wave1ToolResult;
subfinder: Wave1ToolResult;
whatweb: Wave1ToolResult;
naabu?: Wave1ToolResult;
codeAnalysis: AgentResult;
}
interface Wave2Results {
schemathesis: TerminalScanResult;
}
interface PreReconResult {
duration: number;
report: string;
}
// Runs external security tools (nmap, whatweb, etc). Schemathesis requires schemas from code analysis.
async function runTerminalScan(tool: ToolName, target: string, sourceDir: string | null = null): Promise<TerminalScanResult> {
const timer = new Timer(`command-${tool}`);
try {
let result;
switch (tool) {
case 'nmap': {
console.log(chalk.blue(` 🔍 Running ${tool} scan...`));
const nmapHostname = new URL(target).hostname;
result = await $({ silent: true, stdio: ['ignore', 'pipe', 'ignore'] })`nmap -sV -sC ${nmapHostname}`;
const duration = timer.stop();
console.log(chalk.green(`${tool} completed in ${formatDuration(duration)}`));
return { tool: 'nmap', output: result.stdout, status: 'success', duration };
}
case 'subfinder': {
console.log(chalk.blue(` 🔍 Running ${tool} scan...`));
const hostname = new URL(target).hostname;
result = await $({ silent: true, stdio: ['ignore', 'pipe', 'ignore'] })`subfinder -d ${hostname}`;
const subfinderDuration = timer.stop();
console.log(chalk.green(`${tool} completed in ${formatDuration(subfinderDuration)}`));
return { tool: 'subfinder', output: result.stdout, status: 'success', duration: subfinderDuration };
}
case 'whatweb': {
console.log(chalk.blue(` 🔍 Running ${tool} scan...`));
const command = `whatweb --open-timeout 30 --read-timeout 60 ${target}`;
console.log(chalk.gray(` Command: ${command}`));
result = await $({ silent: true, stdio: ['ignore', 'pipe', 'ignore'] })`whatweb --open-timeout 30 --read-timeout 60 ${target}`;
const whatwebDuration = timer.stop();
console.log(chalk.green(`${tool} completed in ${formatDuration(whatwebDuration)}`));
return { tool: 'whatweb', output: result.stdout, status: 'success', duration: whatwebDuration };
}
case 'schemathesis': {
// Schemathesis depends on code analysis output - skip if no schemas found
const schemasDir = path.join(sourceDir || '.', 'outputs', 'schemas');
if (await fs.pathExists(schemasDir)) {
const schemaFiles = await fs.readdir(schemasDir) as string[];
const apiSchemas = schemaFiles.filter((f: string) => f.endsWith('.json') || f.endsWith('.yml') || f.endsWith('.yaml'));
if (apiSchemas.length > 0) {
console.log(chalk.blue(` 🔍 Running ${tool} scan...`));
const allResults: string[] = [];
// Run schemathesis on each schema file
for (const schemaFile of apiSchemas) {
const schemaPath = path.join(schemasDir, schemaFile);
try {
result = await $({ silent: true, stdio: ['ignore', 'pipe', 'ignore'] })`schemathesis run ${schemaPath} -u ${target} --max-failures=5`;
allResults.push(`Schema: ${schemaFile}\n${result.stdout}`);
} catch (schemaError) {
const err = schemaError as { stdout?: string; message?: string };
allResults.push(`Schema: ${schemaFile}\nError: ${err.stdout || err.message}`);
}
}
const schemaDuration = timer.stop();
console.log(chalk.green(`${tool} completed in ${formatDuration(schemaDuration)}`));
return { tool: 'schemathesis', output: allResults.join('\n\n'), status: 'success', duration: schemaDuration };
} else {
console.log(chalk.gray(` ⏭️ ${tool} - no API schemas found`));
return { tool: 'schemathesis', output: 'No API schemas found', status: 'skipped', duration: timer.stop() };
}
} else {
console.log(chalk.gray(` ⏭️ ${tool} - schemas directory not found`));
return { tool: 'schemathesis', output: 'Schemas directory not found', status: 'skipped', duration: timer.stop() };
}
}
default:
throw new Error(`Unknown tool: ${tool}`);
}
} catch (error) {
const duration = timer.stop();
console.log(chalk.red(`${tool} failed in ${formatDuration(duration)}`));
return handleToolError(tool, error as Error & { code?: string }) as TerminalScanResult;
}
}
// Wave 1: Initial footprinting + authentication
async function runPreReconWave1(
webUrl: string,
sourceDir: string,
variables: PromptVariables,
config: DistributedConfig | null,
pipelineTestingMode: boolean = false,
sessionId: string | null = null,
outputPath: string | null = null
): Promise<Wave1Results> {
console.log(chalk.blue(' → Launching Wave 1 operations in parallel...'));
const operations: Promise<TerminalScanResult | AgentResult>[] = [];
const skippedResult = (message: string): Wave1ToolResult => ({ kind: 'skipped', message });
// Skip external commands in pipeline testing mode
if (pipelineTestingMode) {
console.log(chalk.gray(' ⏭️ Skipping external tools (pipeline testing mode)'));
operations.push(
runClaudePromptWithRetry(
await loadPrompt('pre-recon-code', variables, null, pipelineTestingMode),
sourceDir,
'*',
'',
AGENTS['pre-recon'].displayName,
'pre-recon', // Agent name for snapshot creation
chalk.cyan,
{ id: sessionId!, webUrl, repoPath: sourceDir, ...(outputPath && { outputPath }) } // Session metadata for audit logging (STANDARD: use 'id' field)
)
);
const [codeAnalysis] = await Promise.all(operations);
return {
nmap: skippedResult('Skipped (pipeline testing mode)'),
subfinder: skippedResult('Skipped (pipeline testing mode)'),
whatweb: skippedResult('Skipped (pipeline testing mode)'),
codeAnalysis: codeAnalysis as AgentResult
};
} else {
operations.push(
runTerminalScan('nmap', webUrl),
runTerminalScan('subfinder', webUrl),
runTerminalScan('whatweb', webUrl),
runClaudePromptWithRetry(
await loadPrompt('pre-recon-code', variables, null, pipelineTestingMode),
sourceDir,
'*',
'',
AGENTS['pre-recon'].displayName,
'pre-recon', // Agent name for snapshot creation
chalk.cyan,
{ id: sessionId!, webUrl, repoPath: sourceDir, ...(outputPath && { outputPath }) } // Session metadata for audit logging (STANDARD: use 'id' field)
)
);
}
// Check if authentication config is provided for login instructions injection
console.log(chalk.gray(` → Config check: ${config ? 'present' : 'missing'}, Auth: ${config?.authentication ? 'present' : 'missing'}`));
const [nmap, subfinder, whatweb, codeAnalysis] = await Promise.all(operations);
return {
nmap: { kind: 'scan', result: nmap as TerminalScanResult },
subfinder: { kind: 'scan', result: subfinder as TerminalScanResult },
whatweb: { kind: 'scan', result: whatweb as TerminalScanResult },
codeAnalysis: codeAnalysis as AgentResult
};
}
// Wave 2: Additional scanning
async function runPreReconWave2(
webUrl: string,
sourceDir: string,
toolAvailability: ToolAvailability,
pipelineTestingMode: boolean = false
): Promise<Wave2Results> {
console.log(chalk.blue(' → Running Wave 2 additional scans in parallel...'));
// Skip external commands in pipeline testing mode
if (pipelineTestingMode) {
console.log(chalk.gray(' ⏭️ Skipping external tools (pipeline testing mode)'));
return {
schemathesis: { tool: 'schemathesis', output: 'Skipped (pipeline testing mode)', status: 'skipped', duration: 0 }
};
}
const operations: Promise<TerminalScanResult>[] = [];
// Parallel additional scans (only run if tools are available)
if (toolAvailability.schemathesis) {
operations.push(runTerminalScan('schemathesis', webUrl, sourceDir));
}
// If no tools are available, return early
if (operations.length === 0) {
console.log(chalk.gray(' ⏭️ No Wave 2 tools available'));
return {
schemathesis: { tool: 'schemathesis', output: 'Tool not available', status: 'skipped', duration: 0 }
};
}
// Run all operations in parallel
const results = await Promise.all(operations);
// Map results back to named properties
const response: Wave2Results = {
schemathesis: { tool: 'schemathesis', output: 'Tool not available', status: 'skipped', duration: 0 }
};
let resultIndex = 0;
if (toolAvailability.schemathesis) {
response.schemathesis = results[resultIndex++]!;
} else {
console.log(chalk.gray(' ⏭️ schemathesis - tool not available'));
}
return response;
}
// Extracts status and output from a Wave1 tool result
function extractResult(r: Wave1ToolResult | undefined): { status: string; output: string } {
if (!r) return { status: 'Skipped', output: 'No output' };
switch (r.kind) {
case 'scan':
return { status: r.result.status || 'Skipped', output: r.result.output || 'No output' };
case 'skipped':
return { status: 'Skipped', output: r.message };
case 'agent':
return { status: r.result.success ? 'success' : 'error', output: 'See agent output' };
}
}
// Combines tool outputs into single deliverable. Falls back to reference if file missing.
async function stitchPreReconOutputs(wave1: Wave1Results, additionalScans: TerminalScanResult[], sourceDir: string): Promise<string> {
// Try to read the code analysis deliverable file
let codeAnalysisContent = 'No analysis available';
try {
const codeAnalysisPath = path.join(sourceDir, 'deliverables', 'code_analysis_deliverable.md');
codeAnalysisContent = await fs.readFile(codeAnalysisPath, 'utf8');
} catch (error) {
const err = error as Error;
console.log(chalk.yellow(`⚠️ Could not read code analysis deliverable: ${err.message}`));
codeAnalysisContent = 'Analysis located in deliverables/code_analysis_deliverable.md';
}
// Build additional scans section
let additionalSection = '';
if (additionalScans.length > 0) {
additionalSection = '\n## Authenticated Scans\n';
for (const scan of additionalScans) {
additionalSection += `
### ${scan.tool.toUpperCase()}
Status: ${scan.status}
${scan.output}
`;
}
}
const nmap = extractResult(wave1.nmap);
const subfinder = extractResult(wave1.subfinder);
const whatweb = extractResult(wave1.whatweb);
const naabu = extractResult(wave1.naabu);
const report = `
# Pre-Reconnaissance Report
## Port Discovery (naabu)
Status: ${naabu.status}
${naabu.output}
## Network Scanning (nmap)
Status: ${nmap.status}
${nmap.output}
## Subdomain Discovery (subfinder)
Status: ${subfinder.status}
${subfinder.output}
## Technology Detection (whatweb)
Status: ${whatweb.status}
${whatweb.output}
## Code Analysis
${codeAnalysisContent}
${additionalSection}
---
Report generated at: ${new Date().toISOString()}
`.trim();
// Ensure deliverables directory exists in the cloned repo
try {
const deliverablePath = path.join(sourceDir, 'deliverables', 'pre_recon_deliverable.md');
await fs.ensureDir(path.join(sourceDir, 'deliverables'));
// Write to file in the cloned repository
await fs.writeFile(deliverablePath, report);
} catch (error) {
const err = error as Error;
throw new PentestError(
`Failed to write pre-recon report: ${err.message}`,
'filesystem',
false,
{ sourceDir, originalError: err.message }
);
}
return report;
}
// Main pre-recon phase execution function
export async function executePreReconPhase(
webUrl: string,
sourceDir: string,
variables: PromptVariables,
config: DistributedConfig | null,
toolAvailability: ToolAvailability,
pipelineTestingMode: boolean,
sessionId: string | null = null,
outputPath: string | null = null
): Promise<PreReconResult> {
console.log(chalk.yellow.bold('\n🔍 PHASE 1: PRE-RECONNAISSANCE'));
const timer = new Timer('phase-1-pre-recon');
console.log(chalk.yellow('Wave 1: Initial footprinting...'));
const wave1Results = await runPreReconWave1(webUrl, sourceDir, variables, config, pipelineTestingMode, sessionId, outputPath);
console.log(chalk.green(' ✅ Wave 1 operations completed'));
console.log(chalk.yellow('Wave 2: Additional scanning...'));
const wave2Results = await runPreReconWave2(webUrl, sourceDir, toolAvailability, pipelineTestingMode);
console.log(chalk.green(' ✅ Wave 2 operations completed'));
console.log(chalk.blue('📝 Stitching pre-recon outputs...'));
const additionalScans = wave2Results.schemathesis ? [wave2Results.schemathesis] : [];
const preReconReport = await stitchPreReconOutputs(wave1Results, additionalScans, sourceDir);
const duration = timer.stop();
console.log(chalk.green(`✅ Pre-reconnaissance complete in ${formatDuration(duration)}`));
console.log(chalk.green(`💾 Saved to ${sourceDir}/deliverables/pre_recon_deliverable.md`));
return { duration, report: preReconReport };
}
+2 -6
View File
@@ -4,8 +4,6 @@
// it under the terms of the GNU Affero General Public License version 3
// as published by the Free Software Foundation.
import chalk from 'chalk';
export class ProgressIndicator {
private message: string;
private frames: string[] = ['⠋', '⠙', '⠹', '⠸', '⠼', '⠴', '⠦', '⠧', '⠇', '⠏'];
@@ -25,9 +23,7 @@ export class ProgressIndicator {
this.interval = setInterval(() => {
// Clear the line and write the spinner
process.stdout.write(
`\r${chalk.cyan(this.frames[this.frameIndex])} ${chalk.dim(this.message)}`
);
process.stdout.write(`\r${this.frames[this.frameIndex]} ${this.message}`);
this.frameIndex = (this.frameIndex + 1) % this.frames.length;
}, 100);
}
@@ -47,6 +43,6 @@ export class ProgressIndicator {
finish(successMessage: string = 'Complete'): void {
this.stop();
console.log(chalk.green(`${successMessage}`));
console.log(`${successMessage}`);
}
}
+291
View File
@@ -0,0 +1,291 @@
// Copyright (C) 2025 Keygraph, Inc.
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License version 3
// as published by the Free Software Foundation.
/**
* Agent Execution Service
*
* Handles the full agent lifecycle:
* - Load config via ConfigLoaderService
* - Load prompt template using AGENTS[agentName].promptTemplate
* - Create git checkpoint
* - Start audit logging
* - Invoke Claude SDK via runClaudePrompt
* - Spending cap check using isSpendingCapBehavior
* - Handle failure (rollback, audit)
* - Validate output using AGENTS[agentName].deliverableFilename
* - Commit on success, log metrics
*
* No Temporal dependencies - pure domain logic.
*/
import type { ActivityLogger } from '../types/activity-logger.js';
import { Result, ok, err, isErr } from '../types/result.js';
import { ErrorCode, type PentestErrorType } from '../types/errors.js';
import { PentestError } from './error-handling.js';
import { isSpendingCapBehavior } from '../utils/billing-detection.js';
import { AGENTS } from '../session-manager.js';
import { loadPrompt } from './prompt-manager.js';
import {
runClaudePrompt,
validateAgentOutput,
type ClaudePromptResult,
} from '../ai/claude-executor.js';
import {
createGitCheckpoint,
commitGitSuccess,
rollbackGitWorkspace,
getGitCommitHash,
} from './git-manager.js';
import { AuditSession } from '../audit/index.js';
import type { AgentEndResult } from '../types/audit.js';
import type { AgentName } from '../types/agents.js';
import type { ConfigLoaderService } from './config-loader.js';
import type { AgentMetrics } from '../types/metrics.js';
/**
* Input for agent execution.
*/
export interface AgentExecutionInput {
webUrl: string;
repoPath: string;
configPath?: string | undefined;
pipelineTestingMode?: boolean | undefined;
attemptNumber: number;
}
interface FailAgentOpts {
attemptNumber: number;
result: ClaudePromptResult;
rollbackReason: string;
errorMessage: string;
errorCode: ErrorCode;
category: PentestErrorType;
retryable: boolean;
context: Record<string, unknown>;
}
/**
* Service for executing agents with full lifecycle management.
*
* NOTE: AuditSession is passed per-execution, NOT stored on the service.
* This is critical for parallel agent execution - each agent needs its own
* AuditSession instance because AuditSession uses instance state (currentAgentName)
* to track which agent is currently logging.
*/
export class AgentExecutionService {
private readonly configLoader: ConfigLoaderService;
constructor(configLoader: ConfigLoaderService) {
this.configLoader = configLoader;
}
/**
* Execute an agent with full lifecycle management.
*
* @param agentName - Name of the agent to execute
* @param input - Execution input parameters
* @param auditSession - Audit session for this specific agent execution
* @returns Result containing AgentEndResult on success, PentestError on failure
*/
async execute(
agentName: AgentName,
input: AgentExecutionInput,
auditSession: AuditSession,
logger: ActivityLogger
): Promise<Result<AgentEndResult, PentestError>> {
const { webUrl, repoPath, configPath, pipelineTestingMode = false, attemptNumber } = input;
// 1. Load config (if provided)
const configResult = await this.configLoader.loadOptional(configPath);
if (isErr(configResult)) {
return configResult;
}
const distributedConfig = configResult.value;
// 2. Load prompt
const promptTemplate = AGENTS[agentName].promptTemplate;
let prompt: string;
try {
prompt = await loadPrompt(
promptTemplate,
{ webUrl, repoPath },
distributedConfig,
pipelineTestingMode,
logger
);
} catch (error) {
const errorMessage = error instanceof Error ? error.message : String(error);
return err(
new PentestError(
`Failed to load prompt for ${agentName}: ${errorMessage}`,
'prompt',
false,
{ agentName, promptTemplate, originalError: errorMessage },
ErrorCode.PROMPT_LOAD_FAILED
)
);
}
// 3. Create git checkpoint before execution
try {
await createGitCheckpoint(repoPath, agentName, attemptNumber, logger);
} catch (error) {
const errorMessage = error instanceof Error ? error.message : String(error);
return err(
new PentestError(
`Failed to create git checkpoint for ${agentName}: ${errorMessage}`,
'filesystem',
false,
{ agentName, repoPath, originalError: errorMessage },
ErrorCode.GIT_CHECKPOINT_FAILED
)
);
}
// 4. Start audit logging
await auditSession.startAgent(agentName, prompt, attemptNumber);
// 5. Execute agent
const result: ClaudePromptResult = await runClaudePrompt(
prompt,
repoPath,
'', // context
agentName, // description
agentName,
auditSession,
logger
);
// 6. Spending cap check - defense-in-depth
if (result.success && (result.turns ?? 0) <= 2 && (result.cost || 0) === 0) {
const resultText = result.result || '';
if (isSpendingCapBehavior(result.turns ?? 0, result.cost || 0, resultText)) {
return this.failAgent(agentName, repoPath, auditSession, logger, {
attemptNumber, result,
rollbackReason: 'spending cap detected',
errorMessage: `Spending cap likely reached: ${resultText.slice(0, 100)}`,
errorCode: ErrorCode.SPENDING_CAP_REACHED,
category: 'billing',
retryable: true,
context: { agentName, turns: result.turns, cost: result.cost },
});
}
}
// 7. Handle execution failure
if (!result.success) {
return this.failAgent(agentName, repoPath, auditSession, logger, {
attemptNumber, result,
rollbackReason: 'execution failure',
errorMessage: result.error || 'Agent execution failed',
errorCode: ErrorCode.AGENT_EXECUTION_FAILED,
category: 'validation',
retryable: result.retryable ?? true,
context: { agentName, originalError: result.error },
});
}
// 8. Validate output
const validationPassed = await validateAgentOutput(result, agentName, repoPath, logger);
if (!validationPassed) {
return this.failAgent(agentName, repoPath, auditSession, logger, {
attemptNumber, result,
rollbackReason: 'validation failure',
errorMessage: `Agent ${agentName} failed output validation`,
errorCode: ErrorCode.OUTPUT_VALIDATION_FAILED,
category: 'validation',
retryable: true,
context: { agentName, deliverableFilename: AGENTS[agentName].deliverableFilename },
});
}
// 9. Success - commit deliverables, then capture checkpoint hash
await commitGitSuccess(repoPath, agentName, logger);
const commitHash = await getGitCommitHash(repoPath);
const endResult: AgentEndResult = {
attemptNumber,
duration_ms: result.duration,
cost_usd: result.cost || 0,
success: true,
model: result.model,
...(commitHash && { checkpoint: commitHash }),
};
await auditSession.endAgent(agentName, endResult);
return ok(endResult);
}
private async failAgent(
agentName: AgentName,
repoPath: string,
auditSession: AuditSession,
logger: ActivityLogger,
opts: FailAgentOpts
): Promise<Result<AgentEndResult, PentestError>> {
await rollbackGitWorkspace(repoPath, opts.rollbackReason, logger);
const endResult: AgentEndResult = {
attemptNumber: opts.attemptNumber,
duration_ms: opts.result.duration,
cost_usd: opts.result.cost || 0,
success: false,
model: opts.result.model,
error: opts.errorMessage,
};
await auditSession.endAgent(agentName, endResult);
return err(
new PentestError(
opts.errorMessage,
opts.category,
opts.retryable,
opts.context,
opts.errorCode
)
);
}
/**
* Execute an agent, throwing PentestError on failure.
*
* This is the preferred method for Temporal activities, which need to
* catch errors and classify them into ApplicationFailure. Avoids requiring
* activities to import Result utilities, keeping the boundary clean.
*
* @param agentName - Name of the agent to execute
* @param input - Execution input parameters
* @param auditSession - Audit session for this specific agent execution
* @returns AgentEndResult on success
* @throws PentestError on failure
*/
async executeOrThrow(
agentName: AgentName,
input: AgentExecutionInput,
auditSession: AuditSession,
logger: ActivityLogger
): Promise<AgentEndResult> {
const result = await this.execute(agentName, input, auditSession, logger);
if (isErr(result)) {
throw result.error;
}
return result.value;
}
/**
* Convert AgentEndResult to AgentMetrics for workflow state.
*/
static toMetrics(endResult: AgentEndResult, result: ClaudePromptResult): AgentMetrics {
return {
durationMs: endResult.duration_ms,
inputTokens: null, // Not currently exposed by SDK wrapper
outputTokens: null,
costUsd: endResult.cost_usd,
numTurns: result.turns ?? null,
model: result.model,
};
}
}
+75
View File
@@ -0,0 +1,75 @@
// Copyright (C) 2025 Keygraph, Inc.
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License version 3
// as published by the Free Software Foundation.
/**
* Config Loader Service
*
* Wraps parseConfig + distributeConfig with Result type for explicit error handling.
* Pure service with no Temporal dependencies.
*/
import { parseConfig, distributeConfig } from '../config-parser.js';
import { PentestError } from './error-handling.js';
import { Result, ok, err } from '../types/result.js';
import { ErrorCode } from '../types/errors.js';
import type { DistributedConfig } from '../types/config.js';
/**
* Service for loading and distributing configuration files.
*
* Provides a Result-based API for explicit error handling,
* allowing callers to decide how to handle failures.
*/
export class ConfigLoaderService {
/**
* Load and distribute a configuration file.
*
* @param configPath - Path to the YAML configuration file
* @returns Result containing DistributedConfig on success, PentestError on failure
*/
async load(configPath: string): Promise<Result<DistributedConfig, PentestError>> {
try {
const config = await parseConfig(configPath);
const distributed = distributeConfig(config);
return ok(distributed);
} catch (error) {
const errorMessage = error instanceof Error ? error.message : String(error);
// Determine appropriate error code based on error message
let errorCode = ErrorCode.CONFIG_PARSE_ERROR;
if (errorMessage.includes('not found') || errorMessage.includes('ENOENT')) {
errorCode = ErrorCode.CONFIG_NOT_FOUND;
} else if (errorMessage.includes('validation failed')) {
errorCode = ErrorCode.CONFIG_VALIDATION_FAILED;
}
return err(
new PentestError(
`Failed to load config ${configPath}: ${errorMessage}`,
'config',
false,
{ configPath, originalError: errorMessage },
errorCode
)
);
}
}
/**
* Load config if path is provided, otherwise return null config.
*
* @param configPath - Optional path to the YAML configuration file
* @returns Result containing DistributedConfig (or null) on success, PentestError on failure
*/
async loadOptional(
configPath: string | undefined
): Promise<Result<DistributedConfig | null, PentestError>> {
if (!configPath) {
return ok(null);
}
return this.load(configPath);
}
}
+117
View File
@@ -0,0 +1,117 @@
// Copyright (C) 2025 Keygraph, Inc.
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License version 3
// as published by the Free Software Foundation.
/**
* Dependency Injection Container
*
* Provides a per-workflow container for service instances.
* Services are wired with explicit constructor injection.
*
* Usage:
* const container = getOrCreateContainer(workflowId, sessionMetadata);
* const auditSession = new AuditSession(sessionMetadata); // Per-agent
* await auditSession.initialize(workflowId);
* const result = await container.agentExecution.executeOrThrow(agentName, input, auditSession);
*/
import type { SessionMetadata } from '../audit/utils.js';
import { AgentExecutionService } from './agent-execution.js';
import { ConfigLoaderService } from './config-loader.js';
import { ExploitationCheckerService } from './exploitation-checker.js';
/**
* Dependencies required to create a Container.
*
* NOTE: AuditSession is NOT stored in the container.
* Each agent execution receives its own AuditSession instance
* because AuditSession uses instance state (currentAgentName) that
* cannot be shared across parallel agents.
*/
export interface ContainerDependencies {
readonly sessionMetadata: SessionMetadata;
}
/**
* DI Container for a single workflow.
*
* Holds all service instances for the workflow lifecycle.
* Services are instantiated once and reused across agent executions.
*
* NOTE: AuditSession is NOT stored here - it's passed per agent execution
* to support parallel agents each having their own logging context.
*/
export class Container {
readonly sessionMetadata: SessionMetadata;
readonly agentExecution: AgentExecutionService;
readonly configLoader: ConfigLoaderService;
readonly exploitationChecker: ExploitationCheckerService;
constructor(deps: ContainerDependencies) {
this.sessionMetadata = deps.sessionMetadata;
// Wire services with explicit constructor injection
this.configLoader = new ConfigLoaderService();
this.exploitationChecker = new ExploitationCheckerService();
this.agentExecution = new AgentExecutionService(this.configLoader);
}
}
/**
* Map of workflowId to Container instance.
* Each workflow gets its own container scoped to its lifecycle.
*/
const containers = new Map<string, Container>();
/**
* Get or create a Container for a workflow.
*
* If a container already exists for the workflowId, returns it.
* Otherwise, creates a new container with the provided dependencies.
*
* @param workflowId - Unique workflow identifier
* @param sessionMetadata - Session metadata for audit paths
* @returns Container instance for the workflow
*/
export function getOrCreateContainer(
workflowId: string,
sessionMetadata: SessionMetadata
): Container {
let container = containers.get(workflowId);
if (!container) {
container = new Container({ sessionMetadata });
containers.set(workflowId, container);
}
return container;
}
/**
* Remove a Container when a workflow completes.
*
* Should be called in logWorkflowComplete to clean up resources.
*
* @param workflowId - Unique workflow identifier
*/
export function removeContainer(workflowId: string): void {
containers.delete(workflowId);
}
/**
* Get an existing Container for a workflow, if one exists.
*
* Unlike getOrCreateContainer, this does NOT create a new container.
* Returns undefined if no container exists for the workflowId.
*
* Useful for lightweight activities that can benefit from an existing
* container but don't need to create one.
*
* @param workflowId - Unique workflow identifier
* @returns Container instance or undefined
*/
export function getContainer(workflowId: string): Container | undefined {
return containers.get(workflowId);
}
@@ -4,116 +4,44 @@
// it under the terms of the GNU Affero General Public License version 3
// as published by the Free Software Foundation.
import chalk from 'chalk';
import { fs, path } from 'zx';
import type {
PentestErrorType,
PentestErrorContext,
LogEntry,
ToolErrorResult,
PromptErrorResult,
} from './types/errors.js';
import {
ErrorCode,
type PentestErrorType,
type PentestErrorContext,
type PromptErrorResult,
} from '../types/errors.js';
import {
matchesBillingApiPattern,
matchesBillingTextPattern,
} from '../utils/billing-detection.js';
// Temporal error classification for ApplicationFailure wrapping
export interface TemporalErrorClassification {
type: string;
retryable: boolean;
}
// Custom error class for pentest operations
export class PentestError extends Error {
name = 'PentestError' as const;
override name = 'PentestError' as const;
type: PentestErrorType;
retryable: boolean;
context: PentestErrorContext;
timestamp: string;
/** Optional specific error code for reliable classification */
code?: ErrorCode;
constructor(
message: string,
type: PentestErrorType,
retryable: boolean = false,
context: PentestErrorContext = {}
context: PentestErrorContext = {},
code?: ErrorCode
) {
super(message);
this.type = type;
this.retryable = retryable;
this.context = context;
this.timestamp = new Date().toISOString();
}
}
// Centralized error logging function
export async function logError(
error: Error & { type?: PentestErrorType; retryable?: boolean; context?: PentestErrorContext },
contextMsg: string,
sourceDir: string | null = null
): Promise<LogEntry> {
const timestamp = new Date().toISOString();
const logEntry: LogEntry = {
timestamp,
context: contextMsg,
error: {
name: error.name || error.constructor.name,
message: error.message,
type: error.type || 'unknown',
retryable: error.retryable || false,
},
};
// Only add stack if it exists
if (error.stack) {
logEntry.error.stack = error.stack;
}
// Console logging with color
const prefix = error.retryable ? '⚠️' : '❌';
const color = error.retryable ? chalk.yellow : chalk.red;
console.log(color(`${prefix} ${contextMsg}:`));
console.log(color(` ${error.message}`));
if (error.context && Object.keys(error.context).length > 0) {
console.log(chalk.gray(` Context: ${JSON.stringify(error.context)}`));
}
// File logging (if source directory available)
if (sourceDir) {
try {
const logPath = path.join(sourceDir, 'error.log');
await fs.appendFile(logPath, JSON.stringify(logEntry) + '\n');
} catch (logErr) {
const errMsg = logErr instanceof Error ? logErr.message : String(logErr);
console.log(chalk.gray(` (Failed to write error log: ${errMsg})`));
if (code !== undefined) {
this.code = code;
}
}
return logEntry;
}
// Handle tool execution errors
export function handleToolError(
toolName: string,
error: Error & { code?: string }
): ToolErrorResult {
const isRetryable =
error.code === 'ECONNRESET' ||
error.code === 'ETIMEDOUT' ||
error.code === 'ENOTFOUND';
return {
tool: toolName,
output: `Error: ${error.message}`,
status: 'error',
duration: 0,
success: false,
error: new PentestError(
`${toolName} execution failed: ${error.message}`,
'tool',
isRetryable,
{ toolName, originalError: error.message, errorCode: error.code }
),
};
}
// Handle prompt loading errors
export function handlePromptError(
promptName: string,
error: Error
@@ -129,7 +57,6 @@ export function handlePromptError(
};
}
// Patterns that indicate retryable errors
const RETRYABLE_PATTERNS = [
// Network and connection errors
'network',
@@ -173,28 +100,58 @@ const NON_RETRYABLE_PATTERNS = [
export function isRetryableError(error: Error): boolean {
const message = error.message.toLowerCase();
// Check for explicit non-retryable patterns first
if (NON_RETRYABLE_PATTERNS.some((pattern) => message.includes(pattern))) {
return false;
}
// Check for retryable patterns
return RETRYABLE_PATTERNS.some((pattern) => message.includes(pattern));
}
// Rate limit errors get longer base delay (30s) vs standard exponential backoff (2s)
export function getRetryDelay(error: Error, attempt: number): number {
const message = error.message.toLowerCase();
/**
* Classifies errors by ErrorCode for reliable, code-based classification.
* Used when error is a PentestError with a specific ErrorCode.
*/
function classifyByErrorCode(
code: ErrorCode,
retryableFromError: boolean
): { type: string; retryable: boolean } {
switch (code) {
// Billing errors - retryable (wait for cap reset or credits added)
case ErrorCode.SPENDING_CAP_REACHED:
case ErrorCode.INSUFFICIENT_CREDITS:
return { type: 'BillingError', retryable: true };
// Rate limiting gets longer delays
if (message.includes('rate limit') || message.includes('429')) {
return Math.min(30000 + attempt * 10000, 120000); // 30s, 40s, 50s, max 2min
case ErrorCode.API_RATE_LIMITED:
return { type: 'RateLimitError', retryable: true };
// Config errors - non-retryable (need manual fix)
case ErrorCode.CONFIG_NOT_FOUND:
case ErrorCode.CONFIG_VALIDATION_FAILED:
case ErrorCode.CONFIG_PARSE_ERROR:
return { type: 'ConfigurationError', retryable: false };
// Prompt errors - non-retryable (need manual fix)
case ErrorCode.PROMPT_LOAD_FAILED:
return { type: 'ConfigurationError', retryable: false };
// Git errors - non-retryable (indicates workspace corruption)
case ErrorCode.GIT_CHECKPOINT_FAILED:
case ErrorCode.GIT_ROLLBACK_FAILED:
return { type: 'GitError', retryable: false };
// Validation errors - retryable (agent may succeed on retry)
case ErrorCode.OUTPUT_VALIDATION_FAILED:
case ErrorCode.DELIVERABLE_NOT_FOUND:
return { type: 'OutputValidationError', retryable: true };
// Agent execution - use the retryable flag from the error
case ErrorCode.AGENT_EXECUTION_FAILED:
return { type: 'AgentExecutionError', retryable: retryableFromError };
default:
// Unknown code - fall through to string matching
return { type: 'UnknownError', retryable: retryableFromError };
}
// Exponential backoff with jitter for other retryable errors
const baseDelay = Math.pow(2, attempt) * 1000; // 2s, 4s, 8s
const jitter = Math.random() * 1000; // 0-1s random
return Math.min(baseDelay + jitter, 30000); // Max 30s
}
/**
@@ -204,31 +161,25 @@ export function getRetryDelay(error: Error, attempt: number): number {
* Used by activities to wrap errors in ApplicationFailure:
* - Retryable errors: Temporal retries with configured backoff
* - Non-retryable errors: Temporal fails immediately
*
* Classification priority:
* 1. If error is PentestError with ErrorCode, classify by code (reliable)
* 2. Fall through to string matching for external errors (SDK, network, etc.)
*/
export function classifyErrorForTemporal(error: unknown): TemporalErrorClassification {
export function classifyErrorForTemporal(error: unknown): { type: string; retryable: boolean } {
// === CODE-BASED CLASSIFICATION (Preferred for internal errors) ===
if (error instanceof PentestError && error.code !== undefined) {
return classifyByErrorCode(error.code, error.retryable);
}
// === STRING-BASED CLASSIFICATION (Fallback for external errors) ===
const message = (error instanceof Error ? error.message : String(error)).toLowerCase();
// === BILLING ERRORS (Retryable with long backoff) ===
// Anthropic returns billing as 400 invalid_request_error
// Human can add credits OR wait for spending cap to reset (5-30 min backoff)
if (
message.includes('billing_error') ||
message.includes('credit balance is too low') ||
message.includes('insufficient credits') ||
message.includes('usage is blocked due to insufficient credits') ||
message.includes('please visit plans & billing') ||
message.includes('please visit plans and billing') ||
message.includes('usage limit reached') ||
message.includes('quota exceeded') ||
message.includes('daily rate limit') ||
message.includes('limit will reset') ||
// Claude Code spending cap patterns (returns short message instead of error)
message.includes('spending cap') ||
message.includes('spending limit') ||
message.includes('cap reached') ||
message.includes('budget exceeded') ||
message.includes('billing limit reached')
) {
// Check both API patterns and text patterns for comprehensive detection
if (matchesBillingApiPattern(message) || matchesBillingTextPattern(message)) {
return { type: 'BillingError', retryable: true };
}
+71
View File
@@ -0,0 +1,71 @@
// Copyright (C) 2025 Keygraph, Inc.
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License version 3
// as published by the Free Software Foundation.
/**
* Exploitation Checker Service
*
* Pure domain logic for determining whether exploitation should run.
* Reads queue file, parses JSON, returns decision.
*
* No Temporal dependencies - this is pure business logic.
*/
import {
validateQueueSafe,
type VulnType,
type ExploitationDecision,
} from './queue-validation.js';
import { isOk } from '../types/result.js';
import type { ActivityLogger } from '../types/activity-logger.js';
/**
* Service for checking exploitation queue decisions.
*
* Determines whether an exploit agent should run based on
* the vulnerability analysis deliverables and queue files.
*/
export class ExploitationCheckerService {
/**
* Check if exploitation should run for a given vulnerability type.
*
* Reads the vulnerability queue file and returns the decision.
* This is pure domain logic - reads queue file, parses JSON, returns decision.
*
* @param vulnType - Type of vulnerability (injection, xss, auth, ssrf, authz)
* @param repoPath - Path to the repository containing deliverables
* @param logger - ActivityLogger for structured logging
* @returns ExploitationDecision indicating whether to exploit
* @throws PentestError if validation fails and is retryable
*/
async checkQueue(vulnType: VulnType, repoPath: string, logger: ActivityLogger): Promise<ExploitationDecision> {
const result = await validateQueueSafe(vulnType, repoPath);
if (isOk(result)) {
const decision = result.value;
logger.info(
`${vulnType}: ${decision.shouldExploit ? `${decision.vulnerabilityCount} vulnerabilities found` : 'no vulnerabilities, skipping exploitation'}`
);
return decision;
}
// Validation failed - check if we should retry or skip
const error = result.error;
if (error.retryable) {
// Re-throw retryable errors so caller can handle retry
logger.warn(`${vulnType}: ${error.message} (retryable)`);
throw error;
}
// Non-retryable error - skip exploitation gracefully
logger.warn(`${vulnType}: ${error.message}, skipping exploitation`);
return {
shouldExploit: false,
shouldRetry: false,
vulnerabilityCount: 0,
vulnType,
};
}
}
@@ -5,7 +5,9 @@
// as published by the Free Software Foundation.
import { $ } from 'zx';
import chalk from 'chalk';
import { PentestError } from './error-handling.js';
import { ErrorCode } from '../types/errors.js';
import type { ActivityLogger } from '../types/activity-logger.js';
/**
* Check if a directory is a git repository.
@@ -51,17 +53,19 @@ function logChangeSummary(
changes: string[],
messageWithChanges: string,
messageWithoutChanges: string,
color: typeof chalk.green,
logger: ActivityLogger,
level: 'info' | 'warn' = 'info',
maxToShow: number = 5
): void {
if (changes.length > 0) {
console.log(color(messageWithChanges.replace('{count}', String(changes.length))));
changes.slice(0, maxToShow).forEach((change) => console.log(chalk.gray(` ${change}`)));
if (changes.length > maxToShow) {
console.log(chalk.gray(` ... and ${changes.length - maxToShow} more files`));
}
const msg = messageWithChanges.replace('{count}', String(changes.length));
const fileList = changes.slice(0, maxToShow).map((c) => ` ${c}`).join(', ');
const suffix = changes.length > maxToShow
? ` ... and ${changes.length - maxToShow} more files`
: '';
logger[level](`${msg} ${fileList}${suffix}`);
} else {
console.log(color(messageWithoutChanges));
logger[level](messageWithoutChanges);
}
}
@@ -136,10 +140,10 @@ export async function executeGitCommandWithRetry(
if (isGitLockError(errMsg) && attempt < maxRetries) {
const delay = Math.pow(2, attempt - 1) * 1000;
console.log(
chalk.yellow(
` ⚠️ Git lock conflict during ${description} (attempt ${attempt}/${maxRetries}). Retrying in ${delay}ms...`
)
// executeGitCommandWithRetry is also called outside activity context
// (e.g., from resume logic), so we use console.warn as a fallback here
console.warn(
`Git lock conflict during ${description} (attempt ${attempt}/${maxRetries}). Retrying in ${delay}ms...`
);
await new Promise((resolve) => setTimeout(resolve, delay));
continue;
@@ -148,7 +152,13 @@ export async function executeGitCommandWithRetry(
throw error;
}
}
throw new Error(`Git command failed after ${maxRetries} retries`);
throw new PentestError(
`Git command failed after ${maxRetries} retries`,
'filesystem',
true, // Retryable - transient git lock issues
{ maxRetries, description },
ErrorCode.GIT_CHECKPOINT_FAILED
);
} finally {
gitSemaphore.release();
}
@@ -157,15 +167,16 @@ export async function executeGitCommandWithRetry(
// Two-phase reset: hard reset (tracked files) + clean (untracked files)
export async function rollbackGitWorkspace(
sourceDir: string,
reason: string = 'retry preparation'
reason: string = 'retry preparation',
logger: ActivityLogger
): Promise<GitOperationResult> {
// Skip git operations if not a git repository
if (!(await isGitRepository(sourceDir))) {
console.log(chalk.gray(` ⏭️ Skipping git rollback (not a git repository)`));
logger.info('Skipping git rollback (not a git repository)');
return { success: true };
}
console.log(chalk.yellow(` 🔄 Rolling back workspace for ${reason}`));
logger.info(`Rolling back workspace for ${reason}`);
try {
const changes = await getChangedFiles(sourceDir, 'status check for rollback');
@@ -182,16 +193,26 @@ export async function rollbackGitWorkspace(
logChangeSummary(
changes,
'Rollback completed - removed {count} contaminated changes:',
'Rollback completed - no changes to remove',
chalk.yellow,
'Rollback completed - removed {count} contaminated changes:',
'Rollback completed - no changes to remove',
logger,
'info',
3
);
return { success: true };
} catch (error) {
const result = toErrorResult(error);
console.log(chalk.red(`Rollback failed after retries: ${result.error?.message}`));
return result;
const errMsg = error instanceof Error ? error.message : String(error);
logger.error(`Rollback failed after retries: ${errMsg}`);
return {
success: false,
error: new PentestError(
`Git rollback failed: ${errMsg}`,
'filesystem',
false, // Non-retryable - rollback is best-effort cleanup
{ sourceDir, reason },
ErrorCode.GIT_ROLLBACK_FAILED
),
};
}
}
@@ -199,29 +220,30 @@ export async function rollbackGitWorkspace(
export async function createGitCheckpoint(
sourceDir: string,
description: string,
attempt: number
attempt: number,
logger: ActivityLogger
): Promise<GitOperationResult> {
// Skip git operations if not a git repository
if (!(await isGitRepository(sourceDir))) {
console.log(chalk.gray(` ⏭️ Skipping git checkpoint (not a git repository)`));
logger.info('Skipping git checkpoint (not a git repository)');
return { success: true };
}
console.log(chalk.blue(` 📍 Creating checkpoint for ${description} (attempt ${attempt})`));
logger.info(`Creating checkpoint for ${description} (attempt ${attempt})`);
try {
// First attempt: preserve existing deliverables. Retries: clean workspace to prevent pollution
// 1. On retries, clean workspace to prevent pollution from previous attempt
if (attempt > 1) {
const cleanResult = await rollbackGitWorkspace(sourceDir, `${description} (retry cleanup)`);
const cleanResult = await rollbackGitWorkspace(sourceDir, `${description} (retry cleanup)`, logger);
if (!cleanResult.success) {
console.log(
chalk.yellow(` ⚠️ Workspace cleanup failed, continuing anyway: ${cleanResult.error?.message}`)
);
logger.warn(`Workspace cleanup failed, continuing anyway: ${cleanResult.error?.message}`);
}
}
// 2. Detect existing changes
const changes = await getChangedFiles(sourceDir, 'status check');
const hasChanges = changes.length > 0;
// 3. Stage and commit checkpoint
await executeGitCommandWithRetry(['git', 'add', '-A'], sourceDir, 'staging changes');
await executeGitCommandWithRetry(
['git', 'commit', '-m', `📍 Checkpoint: ${description} (attempt ${attempt})`, '--allow-empty'],
@@ -229,30 +251,32 @@ export async function createGitCheckpoint(
'creating commit'
);
// 4. Log result
if (hasChanges) {
console.log(chalk.blue(`Checkpoint created with uncommitted changes staged`));
logger.info('Checkpoint created with uncommitted changes staged');
} else {
console.log(chalk.blue(`Empty checkpoint created (no workspace changes)`));
logger.info('Empty checkpoint created (no workspace changes)');
}
return { success: true };
} catch (error) {
const result = toErrorResult(error);
console.log(chalk.yellow(` ⚠️ Checkpoint creation failed after retries: ${result.error?.message}`));
logger.warn(`Checkpoint creation failed after retries: ${result.error?.message}`);
return result;
}
}
export async function commitGitSuccess(
sourceDir: string,
description: string
description: string,
logger: ActivityLogger
): Promise<GitOperationResult> {
// Skip git operations if not a git repository
if (!(await isGitRepository(sourceDir))) {
console.log(chalk.gray(` ⏭️ Skipping git commit (not a git repository)`));
logger.info('Skipping git commit (not a git repository)');
return { success: true };
}
console.log(chalk.green(` 💾 Committing successful results for ${description}`));
logger.info(`Committing successful results for ${description}`);
try {
const changes = await getChangedFiles(sourceDir, 'status check for success commit');
@@ -269,15 +293,14 @@ export async function commitGitSuccess(
logChangeSummary(
changes,
'Success commit created with {count} file changes:',
'Empty success commit created (agent made no file changes)',
chalk.green,
5
'Success commit created with {count} file changes:',
'Empty success commit created (agent made no file changes)',
logger
);
return { success: true };
} catch (error) {
const result = toErrorResult(error);
console.log(chalk.yellow(` ⚠️ Success commit failed after retries: ${result.error?.message}`));
logger.warn(`Success commit failed after retries: ${result.error?.message}`);
return result;
}
}
+23
View File
@@ -0,0 +1,23 @@
// Copyright (C) 2025 Keygraph, Inc.
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License version 3
// as published by the Free Software Foundation.
/**
* Services Module
*
* Exports DI container and service classes for Shannon agent execution.
* Services are pure domain logic with no Temporal dependencies.
*/
export { Container, getOrCreateContainer, removeContainer } from './container.js';
export type { ContainerDependencies } from './container.js';
export { ConfigLoaderService } from './config-loader.js';
export { ExploitationCheckerService } from './exploitation-checker.js';
export { AgentExecutionService } from './agent-execution.js';
export type { AgentExecutionInput } from './agent-execution.js';
export { assembleFinalReport, injectModelIntoReport } from './reporting.js';
export { loadPrompt } from './prompt-manager.js';
@@ -5,10 +5,10 @@
// as published by the Free Software Foundation.
import { fs, path } from 'zx';
import chalk from 'chalk';
import { PentestError, handlePromptError } from '../error-handling.js';
import { MCP_AGENT_MAPPING } from '../constants.js';
import { PentestError, handlePromptError } from './error-handling.js';
import { MCP_AGENT_MAPPING } from '../session-manager.js';
import type { Authentication, DistributedConfig } from '../types/config.js';
import type { ActivityLogger } from '../types/activity-logger.js';
interface PromptVariables {
webUrl: string;
@@ -22,9 +22,9 @@ interface IncludeReplacement {
}
// Pure function: Build complete login instructions from config
async function buildLoginInstructions(authentication: Authentication): Promise<string> {
async function buildLoginInstructions(authentication: Authentication, logger: ActivityLogger): Promise<string> {
try {
// Load the login instructions template
// 1. Load the login instructions template
const loginInstructionsPath = path.join(import.meta.dirname, '..', '..', 'prompts', 'shared', 'login-instructions.txt');
if (!await fs.pathExists(loginInstructionsPath)) {
@@ -38,37 +38,33 @@ async function buildLoginInstructions(authentication: Authentication): Promise<s
const fullTemplate = await fs.readFile(loginInstructionsPath, 'utf8');
// Helper function to extract sections based on markers
const getSection = (content: string, sectionName: string): string => {
const regex = new RegExp(`<!-- BEGIN:${sectionName} -->([\\s\\S]*?)<!-- END:${sectionName} -->`, 'g');
const match = regex.exec(content);
return match ? match[1]!.trim() : '';
};
// Extract sections based on login type
// 2. Extract sections based on login type
const loginType = authentication.login_type?.toUpperCase();
let loginInstructions = '';
// Build instructions with only relevant sections
const commonSection = getSection(fullTemplate, 'COMMON');
const authSection = loginType ? getSection(fullTemplate, loginType) : ''; // FORM or SSO
const verificationSection = getSection(fullTemplate, 'VERIFICATION');
// Fallback to full template if markers are missing (backward compatibility)
// 3. Assemble instructions from sections (fallback to full template if markers missing)
if (!commonSection && !authSection && !verificationSection) {
console.log(chalk.yellow('⚠️ Section markers not found, using full login instructions template'));
logger.warn('Section markers not found, using full login instructions template');
loginInstructions = fullTemplate;
} else {
// Combine relevant sections
loginInstructions = [commonSection, authSection, verificationSection]
.filter(section => section) // Remove empty sections
.filter(section => section)
.join('\n\n');
}
// Replace the user instructions placeholder with the login flow from config
// 4. Interpolate login flow and credential placeholders
let userInstructions = (authentication.login_flow ?? []).join('\n');
// Replace credential placeholders within the user instructions
if (authentication.credentials) {
if (authentication.credentials.username) {
userInstructions = userInstructions.replace(/\$username/g, authentication.credentials.username);
@@ -83,7 +79,7 @@ async function buildLoginInstructions(authentication: Authentication): Promise<s
loginInstructions = loginInstructions.replace(/{{user_instructions}}/g, userInstructions);
// Replace TOTP secret placeholder if present in template
// 5. Replace TOTP secret placeholder if present in template
if (authentication.credentials?.totp_secret) {
loginInstructions = loginInstructions.replace(/{{totp_secret}}/g, authentication.credentials.totp_secret);
}
@@ -128,7 +124,8 @@ async function processIncludes(content: string, baseDir: string): Promise<string
async function interpolateVariables(
template: string,
variables: PromptVariables,
config: DistributedConfig | null = null
config: DistributedConfig | null = null,
logger: ActivityLogger
): Promise<string> {
try {
if (!template || typeof template !== 'string') {
@@ -174,7 +171,7 @@ async function interpolateVariables(
// Extract and inject login instructions from config
if (config.authentication?.login_flow) {
const loginInstructions = await buildLoginInstructions(config.authentication);
const loginInstructions = await buildLoginInstructions(config.authentication, logger);
result = result.replace(/{{LOGIN_INSTRUCTIONS}}/g, loginInstructions);
} else {
result = result.replace(/{{LOGIN_INSTRUCTIONS}}/g, '');
@@ -189,7 +186,7 @@ async function interpolateVariables(
// Validate that all placeholders have been replaced (excluding instructional text)
const remainingPlaceholders = result.match(/\{\{[^}]+\}\}/g);
if (remainingPlaceholders) {
console.log(chalk.yellow(`⚠️ Warning: Found unresolved placeholders in prompt: ${remainingPlaceholders.join(', ')}`));
logger.warn(`Found unresolved placeholders in prompt: ${remainingPlaceholders.join(', ')}`);
}
return result;
@@ -212,20 +209,19 @@ export async function loadPrompt(
promptName: string,
variables: PromptVariables,
config: DistributedConfig | null = null,
pipelineTestingMode: boolean = false
pipelineTestingMode: boolean = false,
logger: ActivityLogger
): Promise<string> {
try {
// Use pipeline testing prompts if pipeline testing mode is enabled
// 1. Resolve prompt file path
const baseDir = pipelineTestingMode ? 'prompts/pipeline-testing' : 'prompts';
const promptsDir = path.join(import.meta.dirname, '..', '..', baseDir);
const promptPath = path.join(promptsDir, `${promptName}.txt`);
// Debug message for pipeline testing mode
if (pipelineTestingMode) {
console.log(chalk.yellow(`Using pipeline testing prompt: ${promptPath}`));
logger.info(`Using pipeline testing prompt: ${promptPath}`);
}
// Check if file exists first
if (!await fs.pathExists(promptPath)) {
throw new PentestError(
`Prompt file not found: ${promptPath}`,
@@ -235,26 +231,26 @@ export async function loadPrompt(
);
}
// Add MCP server assignment to variables
// 2. Assign MCP server based on agent name
const enhancedVariables: PromptVariables = { ...variables };
// Assign MCP server based on prompt name (agent name)
const mcpServer = MCP_AGENT_MAPPING[promptName as keyof typeof MCP_AGENT_MAPPING];
if (mcpServer) {
enhancedVariables.MCP_SERVER = mcpServer;
console.log(chalk.gray(` 🎭 Assigned ${promptName} ${enhancedVariables.MCP_SERVER}`));
logger.info(`Assigned ${promptName} -> ${enhancedVariables.MCP_SERVER}`);
} else {
// Fallback for unknown agents
enhancedVariables.MCP_SERVER = 'playwright-agent1';
console.log(chalk.yellow(` 🎭 Unknown agent ${promptName}, using fallback ${enhancedVariables.MCP_SERVER}`));
logger.warn(`Unknown agent ${promptName}, using fallback -> ${enhancedVariables.MCP_SERVER}`);
}
// 3. Read template file
let template = await fs.readFile(promptPath, 'utf8');
// Pre-process the template to handle @include directives
// 4. Process @include directives
template = await processIncludes(template, promptsDir);
return await interpolateVariables(template, enhancedVariables, config);
// 5. Interpolate variables and return final prompt
return await interpolateVariables(template, enhancedVariables, config, logger);
} catch (error) {
if (error instanceof PentestError) {
throw error;
@@ -6,9 +6,12 @@
import { fs, path } from 'zx';
import { PentestError } from './error-handling.js';
import { asyncPipe } from './utils/functional.js';
import { ErrorCode } from '../types/errors.js';
import { type Result, ok, err } from '../types/result.js';
import { asyncPipe } from '../utils/functional.js';
import type { VulnType, ExploitationDecision } from '../types/agents.js';
export type VulnType = 'injection' | 'xss' | 'auth' | 'ssrf' | 'authz';
export type { VulnType, ExploitationDecision } from '../types/agents.js';
interface VulnTypeConfigItem {
deliverable: string;
@@ -60,18 +63,11 @@ interface QueueValidationResult {
error: string | null;
}
export interface ExploitationDecision {
shouldExploit: boolean;
shouldRetry: boolean;
vulnerabilityCount: number;
vulnType: VulnType;
}
export interface SafeValidationResult {
success: boolean;
data?: ExploitationDecision;
error?: PentestError;
}
/**
* Result type for safe validation - explicit error handling.
*/
export type SafeValidationResult = Result<ExploitationDecision, PentestError>;
// Vulnerability type configuration as immutable data
const VULN_TYPE_CONFIG: VulnTypeConfig = Object.freeze({
@@ -196,7 +192,8 @@ const validateExistenceRules = (
deliverablePath: pathsWithExistence.deliverable,
queuePath: pathsWithExistence.queue,
existence,
}
},
ErrorCode.DELIVERABLE_NOT_FOUND
),
};
}
@@ -311,15 +308,18 @@ export async function validateQueueAndDeliverable(
);
}
// Pure function to safely validate (returns result instead of throwing)
export const safeValidateQueueAndDeliverable = async (
/**
* Safely validate queue and deliverable files.
* Returns Result<ExploitationDecision, PentestError> for explicit error handling.
*/
export async function validateQueueSafe(
vulnType: VulnType,
sourceDir: string
): Promise<SafeValidationResult> => {
): Promise<SafeValidationResult> {
try {
const result = await validateQueueAndDeliverable(vulnType, sourceDir);
return { success: true, data: result };
return ok(result);
} catch (error) {
return { success: false, error: error as PentestError };
return err(error as PentestError);
}
};
}
@@ -5,8 +5,9 @@
// as published by the Free Software Foundation.
import { fs, path } from 'zx';
import chalk from 'chalk';
import { PentestError } from '../error-handling.js';
import { PentestError } from './error-handling.js';
import { ErrorCode } from '../types/errors.js';
import type { ActivityLogger } from '../types/activity-logger.js';
interface DeliverableFile {
name: string;
@@ -15,7 +16,7 @@ interface DeliverableFile {
}
// Pure function: Assemble final report from specialist deliverables
export async function assembleFinalReport(sourceDir: string): Promise<string> {
export async function assembleFinalReport(sourceDir: string, logger: ActivityLogger): Promise<string> {
const deliverableFiles: DeliverableFile[] = [
{ name: 'Injection', path: 'injection_exploitation_evidence.md', required: false },
{ name: 'XSS', path: 'xss_exploitation_evidence.md', required: false },
@@ -32,18 +33,24 @@ export async function assembleFinalReport(sourceDir: string): Promise<string> {
if (await fs.pathExists(filePath)) {
const content = await fs.readFile(filePath, 'utf8');
sections.push(content);
console.log(chalk.green(`Added ${file.name} findings`));
logger.info(`Added ${file.name} findings`);
} else if (file.required) {
throw new Error(`Required file ${file.path} not found`);
throw new PentestError(
`Required deliverable file not found: ${file.path}`,
'filesystem',
false,
{ deliverableFile: file.path, sourceDir },
ErrorCode.DELIVERABLE_NOT_FOUND
);
} else {
console.log(chalk.gray(`⏭️ No ${file.name} deliverable found`));
logger.info(`No ${file.name} deliverable found`);
}
} catch (error) {
if (file.required) {
throw error;
}
const err = error as Error;
console.log(chalk.yellow(`⚠️ Could not read ${file.path}: ${err.message}`));
logger.warn(`Could not read ${file.path}: ${err.message}`);
}
}
@@ -55,7 +62,7 @@ export async function assembleFinalReport(sourceDir: string): Promise<string> {
// Ensure deliverables directory exists
await fs.ensureDir(deliverablesDir);
await fs.writeFile(finalReportPath, finalContent);
console.log(chalk.green(`Final report assembled at ${finalReportPath}`));
logger.info(`Final report assembled at ${finalReportPath}`);
} catch (error) {
const err = error as Error;
throw new PentestError(
@@ -76,13 +83,14 @@ export async function assembleFinalReport(sourceDir: string): Promise<string> {
*/
export async function injectModelIntoReport(
repoPath: string,
outputPath: string
outputPath: string,
logger: ActivityLogger
): Promise<void> {
// 1. Read session.json to get model information
const sessionJsonPath = path.join(outputPath, 'session.json');
if (!(await fs.pathExists(sessionJsonPath))) {
console.log(chalk.yellow('⚠️ session.json not found, skipping model injection'));
logger.warn('session.json not found, skipping model injection');
return;
}
@@ -103,18 +111,18 @@ export async function injectModelIntoReport(
}
if (models.size === 0) {
console.log(chalk.yellow('⚠️ No model information found in session.json'));
logger.warn('No model information found in session.json');
return;
}
const modelStr = Array.from(models).join(', ');
console.log(chalk.blue(`📝 Injecting model info into report: ${modelStr}`));
logger.info(`Injecting model info into report: ${modelStr}`);
// 3. Read the final report
const reportPath = path.join(repoPath, 'deliverables', 'comprehensive_security_assessment_report.md');
if (!(await fs.pathExists(reportPath))) {
console.log(chalk.yellow('⚠️ Final report not found, skipping model injection'));
logger.warn('Final report not found, skipping model injection');
return;
}
@@ -132,7 +140,7 @@ export async function injectModelIntoReport(
assessmentDatePattern,
`$1\n${modelLine}`
);
console.log(chalk.green('Model info injected into Executive Summary'));
logger.info('Model info injected into Executive Summary');
} else {
// If no Assessment Date line found, try to add after Executive Summary header
const execSummaryPattern = /^## Executive Summary$/m;
@@ -142,9 +150,9 @@ export async function injectModelIntoReport(
execSummaryPattern,
`## Executive Summary\n- Model: ${modelStr}`
);
console.log(chalk.green('Model info added to Executive Summary header'));
logger.info('Model info added to Executive Summary header');
} else {
console.log(chalk.yellow('⚠️ Could not find Executive Summary section'));
logger.warn('Could not find Executive Summary section');
return;
}
}
+142 -46
View File
@@ -4,106 +4,105 @@
// it under the terms of the GNU Affero General Public License version 3
// as published by the Free Software Foundation.
import { path } from 'zx';
import type { AgentName } from './types/index.js';
// Agent definition interface
export interface AgentDefinition {
name: AgentName;
displayName: string;
prerequisites: AgentName[];
}
import { path, fs } from 'zx';
import { validateQueueAndDeliverable } from './services/queue-validation.js';
import type { AgentName, AgentDefinition, PlaywrightAgent, AgentValidator, VulnType } from './types/index.js';
import type { ActivityLogger } from './types/activity-logger.js';
// Agent definitions according to PRD
// NOTE: deliverableFilename values must match mcp-server/src/types/deliverables.ts:DELIVERABLE_FILENAMES
export const AGENTS: Readonly<Record<AgentName, AgentDefinition>> = Object.freeze({
'pre-recon': {
name: 'pre-recon',
displayName: 'Pre-recon agent',
prerequisites: []
prerequisites: [],
promptTemplate: 'pre-recon-code',
deliverableFilename: 'code_analysis_deliverable.md',
},
'recon': {
name: 'recon',
displayName: 'Recon agent',
prerequisites: ['pre-recon']
prerequisites: ['pre-recon'],
promptTemplate: 'recon',
deliverableFilename: 'recon_deliverable.md',
},
'injection-vuln': {
name: 'injection-vuln',
displayName: 'Injection vuln agent',
prerequisites: ['recon']
prerequisites: ['recon'],
promptTemplate: 'vuln-injection',
deliverableFilename: 'injection_analysis_deliverable.md',
},
'xss-vuln': {
name: 'xss-vuln',
displayName: 'XSS vuln agent',
prerequisites: ['recon']
prerequisites: ['recon'],
promptTemplate: 'vuln-xss',
deliverableFilename: 'xss_analysis_deliverable.md',
},
'auth-vuln': {
name: 'auth-vuln',
displayName: 'Auth vuln agent',
prerequisites: ['recon']
prerequisites: ['recon'],
promptTemplate: 'vuln-auth',
deliverableFilename: 'auth_analysis_deliverable.md',
},
'ssrf-vuln': {
name: 'ssrf-vuln',
displayName: 'SSRF vuln agent',
prerequisites: ['recon']
prerequisites: ['recon'],
promptTemplate: 'vuln-ssrf',
deliverableFilename: 'ssrf_analysis_deliverable.md',
},
'authz-vuln': {
name: 'authz-vuln',
displayName: 'Authz vuln agent',
prerequisites: ['recon']
prerequisites: ['recon'],
promptTemplate: 'vuln-authz',
deliverableFilename: 'authz_analysis_deliverable.md',
},
'injection-exploit': {
name: 'injection-exploit',
displayName: 'Injection exploit agent',
prerequisites: ['injection-vuln']
prerequisites: ['injection-vuln'],
promptTemplate: 'exploit-injection',
deliverableFilename: 'injection_exploitation_evidence.md',
},
'xss-exploit': {
name: 'xss-exploit',
displayName: 'XSS exploit agent',
prerequisites: ['xss-vuln']
prerequisites: ['xss-vuln'],
promptTemplate: 'exploit-xss',
deliverableFilename: 'xss_exploitation_evidence.md',
},
'auth-exploit': {
name: 'auth-exploit',
displayName: 'Auth exploit agent',
prerequisites: ['auth-vuln']
prerequisites: ['auth-vuln'],
promptTemplate: 'exploit-auth',
deliverableFilename: 'auth_exploitation_evidence.md',
},
'ssrf-exploit': {
name: 'ssrf-exploit',
displayName: 'SSRF exploit agent',
prerequisites: ['ssrf-vuln']
prerequisites: ['ssrf-vuln'],
promptTemplate: 'exploit-ssrf',
deliverableFilename: 'ssrf_exploitation_evidence.md',
},
'authz-exploit': {
name: 'authz-exploit',
displayName: 'Authz exploit agent',
prerequisites: ['authz-vuln']
prerequisites: ['authz-vuln'],
promptTemplate: 'exploit-authz',
deliverableFilename: 'authz_exploitation_evidence.md',
},
'report': {
name: 'report',
displayName: 'Report agent',
prerequisites: ['injection-exploit', 'xss-exploit', 'auth-exploit', 'ssrf-exploit', 'authz-exploit']
}
});
// Agent execution order
export const AGENT_ORDER: readonly AgentName[] = Object.freeze([
'pre-recon',
'recon',
'injection-vuln',
'xss-vuln',
'auth-vuln',
'ssrf-vuln',
'authz-vuln',
'injection-exploit',
'xss-exploit',
'auth-exploit',
'ssrf-exploit',
'authz-exploit',
'report'
] as const);
// Parallel execution groups
export const getParallelGroups = (): Readonly<{ vuln: AgentName[]; exploit: AgentName[] }> => Object.freeze({
vuln: ['injection-vuln', 'xss-vuln', 'auth-vuln', 'ssrf-vuln', 'authz-vuln'],
exploit: ['injection-exploit', 'xss-exploit', 'auth-exploit', 'ssrf-exploit', 'authz-exploit']
prerequisites: ['injection-exploit', 'xss-exploit', 'auth-exploit', 'ssrf-exploit', 'authz-exploit'],
promptTemplate: 'report-executive',
deliverableFilename: 'comprehensive_security_assessment_report.md',
},
});
// Phase names for metrics aggregation
@@ -126,4 +125,101 @@ export const AGENT_PHASE_MAP: Readonly<Record<AgentName, PhaseName>> = Object.fr
'report': 'reporting',
});
// Factory function for vulnerability queue validators
function createVulnValidator(vulnType: VulnType): AgentValidator {
return async (sourceDir: string, logger: ActivityLogger): Promise<boolean> => {
try {
await validateQueueAndDeliverable(vulnType, sourceDir);
return true;
} catch (error) {
const errMsg = error instanceof Error ? error.message : String(error);
logger.warn(`Queue validation failed for ${vulnType}: ${errMsg}`);
return false;
}
};
}
// Factory function for exploit deliverable validators
function createExploitValidator(vulnType: VulnType): AgentValidator {
return async (sourceDir: string): Promise<boolean> => {
const evidenceFile = path.join(sourceDir, 'deliverables', `${vulnType}_exploitation_evidence.md`);
return await fs.pathExists(evidenceFile);
};
}
// MCP agent mapping - assigns each agent to a specific Playwright instance to prevent conflicts
// Keys are promptTemplate values from AGENTS registry
export const MCP_AGENT_MAPPING: Record<string, PlaywrightAgent> = Object.freeze({
// Phase 1: Pre-reconnaissance (actual prompt name is 'pre-recon-code')
// NOTE: Pre-recon is pure code analysis and doesn't use browser automation,
// but assigning MCP server anyway for consistency and future extensibility
'pre-recon-code': 'playwright-agent1',
// Phase 2: Reconnaissance (actual prompt name is 'recon')
recon: 'playwright-agent2',
// Phase 3: Vulnerability Analysis (5 parallel agents)
'vuln-injection': 'playwright-agent1',
'vuln-xss': 'playwright-agent2',
'vuln-auth': 'playwright-agent3',
'vuln-ssrf': 'playwright-agent4',
'vuln-authz': 'playwright-agent5',
// Phase 4: Exploitation (5 parallel agents - same as vuln counterparts)
'exploit-injection': 'playwright-agent1',
'exploit-xss': 'playwright-agent2',
'exploit-auth': 'playwright-agent3',
'exploit-ssrf': 'playwright-agent4',
'exploit-authz': 'playwright-agent5',
// Phase 5: Reporting (actual prompt name is 'report-executive')
// NOTE: Report generation is typically text-based and doesn't use browser automation,
// but assigning MCP server anyway for potential screenshot inclusion or future needs
'report-executive': 'playwright-agent3',
});
// Direct agent-to-validator mapping - much simpler than pattern matching
export const AGENT_VALIDATORS: Record<AgentName, AgentValidator> = Object.freeze({
// Pre-reconnaissance agent - validates the code analysis deliverable created by the agent
'pre-recon': async (sourceDir: string): Promise<boolean> => {
const codeAnalysisFile = path.join(sourceDir, 'deliverables', 'code_analysis_deliverable.md');
return await fs.pathExists(codeAnalysisFile);
},
// Reconnaissance agent
recon: async (sourceDir: string): Promise<boolean> => {
const reconFile = path.join(sourceDir, 'deliverables', 'recon_deliverable.md');
return await fs.pathExists(reconFile);
},
// Vulnerability analysis agents
'injection-vuln': createVulnValidator('injection'),
'xss-vuln': createVulnValidator('xss'),
'auth-vuln': createVulnValidator('auth'),
'ssrf-vuln': createVulnValidator('ssrf'),
'authz-vuln': createVulnValidator('authz'),
// Exploitation agents
'injection-exploit': createExploitValidator('injection'),
'xss-exploit': createExploitValidator('xss'),
'auth-exploit': createExploitValidator('auth'),
'ssrf-exploit': createExploitValidator('ssrf'),
'authz-exploit': createExploitValidator('authz'),
// Executive report agent
report: async (sourceDir: string, logger: ActivityLogger): Promise<boolean> => {
const reportFile = path.join(
sourceDir,
'deliverables',
'comprehensive_security_assessment_report.md'
);
const reportExists = await fs.pathExists(reportFile);
if (!reportExists) {
logger.error('Missing required deliverable: comprehensive_security_assessment_report.md');
}
return reportExists;
},
});
-56
View File
@@ -1,56 +0,0 @@
// Copyright (C) 2025 Keygraph, Inc.
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License version 3
// as published by the Free Software Foundation.
import { $, fs, path } from 'zx';
import chalk from 'chalk';
import { PentestError } from '../error-handling.js';
// Pure function: Setup local repository for testing
export async function setupLocalRepo(repoPath: string): Promise<string> {
try {
const sourceDir = path.resolve(repoPath);
// MCP servers are now configured via mcpServers option in claude-executor.js
// No need for pre-setup with claude CLI
// Initialize git repository if not already initialized and create checkpoint
try {
// Check if it's already a git repository
const isGitRepo = await fs.pathExists(path.join(sourceDir, '.git'));
if (!isGitRepo) {
await $`cd ${sourceDir} && git init`;
console.log(chalk.blue('✅ Git repository initialized'));
}
// Configure git for pentest agent
await $`cd ${sourceDir} && git config user.name "Pentest Agent"`;
await $`cd ${sourceDir} && git config user.email "agent@localhost"`;
// Create initial checkpoint
await $`cd ${sourceDir} && git add -A && git commit -m "Initial checkpoint: Local repository setup" --allow-empty`;
console.log(chalk.green('✅ Initial checkpoint created'));
} catch (gitError) {
const errMsg = gitError instanceof Error ? gitError.message : String(gitError);
console.log(chalk.yellow(`⚠️ Git setup warning: ${errMsg}`));
// Non-fatal - continue without Git setup
}
// MCP tools (save_deliverable, generate_totp) are now available natively via shannon-helper MCP server
// No need to copy bash scripts to target repository
return sourceDir;
} catch (error) {
if (error instanceof PentestError) {
throw error;
}
const errMsg = error instanceof Error ? error.message : String(error);
throw new PentestError(`Local repository setup failed: ${errMsg}`, 'filesystem', false, {
repoPath,
originalError: errMsg,
});
}
}
+182 -347
View File
@@ -7,28 +7,58 @@
/**
* Temporal activities for Shannon agent execution.
*
* Each activity wraps a single agent execution with:
* Each activity wraps service calls with Temporal-specific concerns:
* - Heartbeat loop (2s interval) to signal worker liveness
* - Git checkpoint/rollback/commit per attempt
* - Error classification for Temporal retry behavior
* - Audit session logging
* - Error classification into ApplicationFailure
* - Container lifecycle management
*
* Temporal handles retries based on error classification:
* - Retryable: BillingError, TransientError (429, 5xx, network)
* - Non-retryable: AuthenticationError, PermissionError, ConfigurationError, etc.
* Business logic is delegated to services in src/services/.
*/
import { heartbeat, ApplicationFailure, Context } from '@temporalio/activity';
import chalk from 'chalk';
import path from 'path';
import fs from 'fs/promises';
import { classifyErrorForTemporal, PentestError } from '../services/error-handling.js';
import { ErrorCode } from '../types/errors.js';
import { getOrCreateContainer, getContainer, removeContainer } from '../services/container.js';
import { ExploitationCheckerService } from '../services/exploitation-checker.js';
import type { VulnType, ExploitationDecision } from '../services/queue-validation.js';
import { AuditSession } from '../audit/index.js';
import type { WorkflowSummary } from '../audit/workflow-logger.js';
import type { AgentName } from '../types/agents.js';
import { ALL_AGENTS } from '../types/agents.js';
import type { AgentMetrics, ResumeState } from './shared.js';
import { copyDeliverablesToAudit, type SessionMetadata } from '../audit/utils.js';
import { readJson, fileExists } from '../utils/file-io.js';
import { assembleFinalReport, injectModelIntoReport } from '../services/reporting.js';
import { AGENTS } from '../session-manager.js';
import { executeGitCommandWithRetry } from '../services/git-manager.js';
import type { ResumeAttempt } from '../audit/metrics-tracker.js';
import { createActivityLogger } from './activity-logger.js';
// Max lengths to prevent Temporal protobuf buffer overflow
const MAX_ERROR_MESSAGE_LENGTH = 2000;
const MAX_STACK_TRACE_LENGTH = 1000;
// Max retries for output validation errors (agent didn't save deliverables)
// Lower than default 50 since this is unlikely to self-heal
const MAX_OUTPUT_VALIDATION_RETRIES = 3;
const HEARTBEAT_INTERVAL_MS = 2000;
/**
* Input for all agent activities.
*/
export interface ActivityInput {
webUrl: string;
repoPath: string;
configPath?: string;
outputPath?: string;
pipelineTestingMode?: boolean;
workflowId: string;
sessionId: string;
}
/**
* Truncate error message to prevent buffer overflow in Temporal serialization.
*/
@@ -48,85 +78,34 @@ function truncateStackTrace(failure: ApplicationFailure): void {
}
}
import {
runClaudePrompt,
validateAgentOutput,
type ClaudePromptResult,
} from '../ai/claude-executor.js';
import { loadPrompt } from '../prompts/prompt-manager.js';
import { parseConfig, distributeConfig } from '../config-parser.js';
import { classifyErrorForTemporal } from '../error-handling.js';
import {
safeValidateQueueAndDeliverable,
type VulnType,
type ExploitationDecision,
} from '../queue-validation.js';
import {
createGitCheckpoint,
commitGitSuccess,
rollbackGitWorkspace,
getGitCommitHash,
} from '../utils/git-manager.js';
import { assembleFinalReport, injectModelIntoReport } from '../phases/reporting.js';
import { getPromptNameForAgent } from '../types/agents.js';
import { AuditSession } from '../audit/index.js';
import type { WorkflowSummary } from '../audit/workflow-logger.js';
import type { AgentName } from '../types/agents.js';
import { getDeliverablePath, ALL_AGENTS } from '../types/agents.js';
import type { AgentMetrics, ResumeState } from './shared.js';
import type { DistributedConfig } from '../types/config.js';
import { copyDeliverablesToAudit, type SessionMetadata, readJson, fileExists } from '../audit/utils.js';
import type { ResumeAttempt } from '../audit/metrics-tracker.js';
import { executeGitCommandWithRetry } from '../utils/git-manager.js';
import path from 'path';
import fs from 'fs/promises';
const HEARTBEAT_INTERVAL_MS = 2000; // Must be < heartbeatTimeout (10min production, 5min testing)
/**
* Input for all agent activities.
* Matches PipelineInput but with required workflowId for audit correlation.
* Build SessionMetadata from ActivityInput.
*/
export interface ActivityInput {
webUrl: string;
repoPath: string;
configPath?: string;
outputPath?: string;
pipelineTestingMode?: boolean;
workflowId: string;
sessionId: string; // Workspace name (for resume) or workflowId (for new runs)
function buildSessionMetadata(input: ActivityInput): SessionMetadata {
const { webUrl, repoPath, outputPath, sessionId } = input;
return {
id: sessionId,
webUrl,
repoPath,
...(outputPath && { outputPath }),
};
}
/**
* Core activity implementation.
* Core activity implementation using services.
*
* Executes a single agent with:
* 1. Heartbeat loop for worker liveness
* 2. Config loading (if configPath provided)
* 3. Audit session initialization
* 4. Prompt loading
* 5. Git checkpoint before execution
* 6. Agent execution (single attempt)
* 7. Output validation
* 8. Git commit on success, rollback on failure
* 9. Error classification for Temporal retry
* 2. Container creation/reuse
* 3. Service-based agent execution
* 4. Error classification for Temporal retry
*/
async function runAgentActivity(
agentName: AgentName,
input: ActivityInput
): Promise<AgentMetrics> {
const {
webUrl,
repoPath,
configPath,
outputPath,
pipelineTestingMode = false,
workflowId,
} = input;
const { repoPath, configPath, pipelineTestingMode = false, workflowId, webUrl } = input;
const startTime = Date.now();
// Get attempt number from Temporal context (tracks retries automatically)
const attemptNumber = Context.current().info.attempt;
// Heartbeat loop - signals worker is alive to Temporal server
@@ -136,160 +115,66 @@ async function runAgentActivity(
}, HEARTBEAT_INTERVAL_MS);
try {
// 1. Load config (if provided)
let distributedConfig: DistributedConfig | null = null;
if (configPath) {
try {
const config = await parseConfig(configPath);
distributedConfig = distributeConfig(config);
} catch (err) {
throw new Error(`Failed to load config ${configPath}: ${err instanceof Error ? err.message : String(err)}`);
}
}
const logger = createActivityLogger();
// 2. Build session metadata for audit
// Use sessionId (workspace name) for directory, workflowId for tracking
const sessionMetadata: SessionMetadata = {
id: input.sessionId,
webUrl,
repoPath,
...(outputPath && { outputPath }),
};
// 1. Build session metadata and get/create container
const sessionMetadata = buildSessionMetadata(input);
const container = getOrCreateContainer(workflowId, sessionMetadata);
// 3. Initialize audit session (idempotent, safe across retries)
// 2. Create audit session for THIS agent execution
// NOTE: Each agent needs its own AuditSession because AuditSession uses
// instance state (currentAgentName) that cannot be shared across parallel agents
const auditSession = new AuditSession(sessionMetadata);
await auditSession.initialize(workflowId);
// 4. Load prompt
const promptName = getPromptNameForAgent(agentName);
const prompt = await loadPrompt(
promptName,
{ webUrl, repoPath },
distributedConfig,
pipelineTestingMode
);
// 5. Create git checkpoint before execution
await createGitCheckpoint(repoPath, agentName, attemptNumber);
await auditSession.startAgent(agentName, prompt, attemptNumber);
// 6. Execute agent (single attempt - Temporal handles retries)
const result: ClaudePromptResult = await runClaudePrompt(
prompt,
repoPath,
'', // context
agentName, // description
// 3. Execute agent via service (throws PentestError on failure)
const endResult = await container.agentExecution.executeOrThrow(
agentName,
chalk.cyan,
sessionMetadata,
{
webUrl,
repoPath,
configPath,
pipelineTestingMode,
attemptNumber,
},
auditSession,
attemptNumber
logger
);
// 6.5. Sanity check: Detect spending cap that slipped through all detection layers
// Defense-in-depth: A successful agent execution should never have ≤2 turns with $0 cost
if (result.success && (result.turns ?? 0) <= 2 && (result.cost || 0) === 0) {
const resultText = result.result || '';
const looksLikeBillingError = /spending|cap|limit|budget|resets/i.test(resultText);
if (looksLikeBillingError) {
await rollbackGitWorkspace(repoPath, 'spending cap detected');
await auditSession.endAgent(agentName, {
attemptNumber,
duration_ms: result.duration,
cost_usd: 0,
success: false,
model: result.model,
error: `Spending cap likely reached: ${resultText.slice(0, 100)}`,
});
// Throw as billing error so Temporal retries with long backoff
throw new Error(`Spending cap likely reached: ${resultText.slice(0, 100)}`);
}
}
// 7. Handle execution failure
if (!result.success) {
await rollbackGitWorkspace(repoPath, 'execution failure');
await auditSession.endAgent(agentName, {
attemptNumber,
duration_ms: result.duration,
cost_usd: result.cost || 0,
success: false,
model: result.model,
error: result.error || 'Execution failed',
});
throw new Error(result.error || 'Agent execution failed');
}
// 8. Validate output
const validationPassed = await validateAgentOutput(result, agentName, repoPath);
if (!validationPassed) {
await rollbackGitWorkspace(repoPath, 'validation failure');
await auditSession.endAgent(agentName, {
attemptNumber,
duration_ms: result.duration,
cost_usd: result.cost || 0,
success: false,
model: result.model,
error: 'Output validation failed',
});
// Limit output validation retries (unlikely to self-heal)
if (attemptNumber >= MAX_OUTPUT_VALIDATION_RETRIES) {
throw ApplicationFailure.nonRetryable(
`Agent ${agentName} failed output validation after ${attemptNumber} attempts`,
'OutputValidationError',
[{ agentName, attemptNumber, elapsed: Date.now() - startTime }]
);
}
// Let Temporal retry (will be classified as OutputValidationError)
throw new Error(`Agent ${agentName} failed output validation`);
}
// 9. Success - commit deliverables, then capture checkpoint hash
await commitGitSuccess(repoPath, agentName);
const commitHash = await getGitCommitHash(repoPath);
await auditSession.endAgent(agentName, {
attemptNumber,
duration_ms: result.duration,
cost_usd: result.cost || 0,
success: true,
model: result.model,
...(commitHash && { checkpoint: commitHash }),
});
// 10. Return metrics
// 4. Return metrics
return {
durationMs: Date.now() - startTime,
inputTokens: null, // Not currently exposed by SDK wrapper
inputTokens: null,
outputTokens: null,
costUsd: result.cost ?? null,
numTurns: result.turns ?? null,
model: result.model,
costUsd: endResult.cost_usd,
numTurns: null,
model: endResult.model,
};
} catch (error) {
// Rollback git workspace before Temporal retry to ensure clean state
try {
await rollbackGitWorkspace(repoPath, 'error recovery');
} catch (rollbackErr) {
// Log but don't fail - rollback is best-effort
console.error(`Failed to rollback git workspace for ${agentName}:`, rollbackErr);
}
// If error is already an ApplicationFailure (e.g., from our retry limit logic),
// re-throw it directly without re-classifying
// If error is already an ApplicationFailure, re-throw directly
if (error instanceof ApplicationFailure) {
throw error;
}
// Check if output validation retry limit reached (PentestError with code)
if (
error instanceof PentestError &&
error.code === ErrorCode.OUTPUT_VALIDATION_FAILED &&
attemptNumber >= MAX_OUTPUT_VALIDATION_RETRIES
) {
throw ApplicationFailure.nonRetryable(
`Agent ${agentName} failed output validation after ${attemptNumber} attempts`,
'OutputValidationError',
[{ agentName, attemptNumber, elapsed: Date.now() - startTime }]
);
}
// Classify error for Temporal retry behavior
const classified = classifyErrorForTemporal(error);
// Truncate message to prevent protobuf buffer overflow
const rawMessage = error instanceof Error ? error.message : String(error);
const message = truncateErrorMessage(rawMessage);
if (classified.retryable) {
// Temporal will retry with configured backoff
const failure = ApplicationFailure.create({
message,
type: classified.type,
@@ -298,7 +183,6 @@ async function runAgentActivity(
truncateStackTrace(failure);
throw failure;
} else {
// Fail immediately - no retry
const failure = ApplicationFailure.nonRetryable(message, classified.type, [
{ agentName, attemptNumber, elapsed: Date.now() - startTime },
]);
@@ -310,9 +194,6 @@ async function runAgentActivity(
}
}
// === Individual Agent Activity Exports ===
// Each function is a thin wrapper around runAgentActivity with the agent name.
export async function runPreReconAgent(input: ActivityInput): Promise<AgentMetrics> {
return runAgentActivity('pre-recon', input);
}
@@ -367,92 +248,56 @@ export async function runReportAgent(input: ActivityInput): Promise<AgentMetrics
/**
* Assemble the final report by concatenating exploitation evidence files.
* This must be called BEFORE runReportAgent to create the file that the report agent will modify.
*/
export async function assembleReportActivity(input: ActivityInput): Promise<void> {
const { repoPath } = input;
console.log(chalk.blue('📝 Assembling deliverables from specialist agents...'));
const logger = createActivityLogger();
logger.info('Assembling deliverables from specialist agents...');
try {
await assembleFinalReport(repoPath);
await assembleFinalReport(repoPath, logger);
} catch (error) {
const err = error as Error;
console.log(chalk.yellow(`⚠️ Error assembling final report: ${err.message}`));
// Don't throw - the report agent can still create content even if no exploitation files exist
logger.warn(`Error assembling final report: ${err.message}`);
}
}
/**
* Inject model metadata into the final report.
* This must be called AFTER runReportAgent to add the model information to the Executive Summary.
*/
export async function injectReportMetadataActivity(input: ActivityInput): Promise<void> {
const { repoPath, sessionId, outputPath } = input;
const logger = createActivityLogger();
const effectiveOutputPath = outputPath
? path.join(outputPath, sessionId)
: path.join('./audit-logs', sessionId);
try {
await injectModelIntoReport(repoPath, effectiveOutputPath);
await injectModelIntoReport(repoPath, effectiveOutputPath, logger);
} catch (error) {
const err = error as Error;
console.log(chalk.yellow(`⚠️ Error injecting model into report: ${err.message}`));
// Don't throw - this is a non-critical enhancement
logger.warn(`Error injecting model into report: ${err.message}`);
}
}
/**
* Check if exploitation should run for a given vulnerability type.
* Reads the vulnerability queue file and returns the decision.
*
* This activity allows the workflow to skip exploit agents entirely
* when no vulnerabilities were found, saving API calls and time.
*
* Error handling:
* - Retryable errors (missing files, invalid JSON): re-throw for Temporal retry
* - Non-retryable errors: skip exploitation gracefully
* Uses existing container if available (from prior agent runs),
* otherwise creates service directly (stateless, no dependencies).
*/
export async function checkExploitationQueue(
input: ActivityInput,
vulnType: VulnType
): Promise<ExploitationDecision> {
const { repoPath } = input;
const { repoPath, workflowId } = input;
const logger = createActivityLogger();
const result = await safeValidateQueueAndDeliverable(vulnType, repoPath);
// Reuse container's service if available (from prior vuln agent runs)
const existingContainer = getContainer(workflowId);
const checker = existingContainer?.exploitationChecker ?? new ExploitationCheckerService();
if (result.success && result.data) {
const { shouldExploit, vulnerabilityCount } = result.data;
console.log(
chalk.blue(
`🔍 ${vulnType}: ${shouldExploit ? `${vulnerabilityCount} vulnerabilities found` : 'no vulnerabilities, skipping exploitation'}`
)
);
return result.data;
}
// Validation failed - check if we should retry or skip
const error = result.error;
if (error?.retryable) {
// Re-throw retryable errors so Temporal can retry the vuln agent
console.log(chalk.yellow(`⚠️ ${vulnType}: ${error.message} (retrying)`));
throw error;
}
// Non-retryable error - skip exploitation gracefully
console.log(
chalk.yellow(`⚠️ ${vulnType}: ${error?.message ?? 'Unknown error'}, skipping exploitation`)
);
return {
shouldExploit: false,
shouldRetry: false,
vulnerabilityCount: 0,
vulnType,
};
return checker.checkQueue(vulnType, repoPath, logger);
}
// === Resume Activities ===
/**
* Session.json structure for resume state loading
*/
interface SessionJson {
session: {
id: string;
@@ -462,27 +307,27 @@ interface SessionJson {
resumeAttempts?: ResumeAttempt[];
};
metrics: {
agents: Record<string, {
status: 'in-progress' | 'success' | 'failed';
checkpoint?: string;
}>;
agents: Record<
string,
{
status: 'in-progress' | 'success' | 'failed';
checkpoint?: string;
}
>;
};
}
/**
* Load resume state from an existing workspace.
* Validates workspace exists, URL matches, and determines which agents to skip.
*
* @throws ApplicationFailure.nonRetryable if workspace not found or URL mismatch
*/
export async function loadResumeState(
workspaceName: string,
expectedUrl: string,
expectedRepoPath: string
): Promise<ResumeState> {
// 1. Validate workspace exists
const sessionPath = path.join('./audit-logs', workspaceName, 'session.json');
// Validate workspace exists
const exists = await fileExists(sessionPath);
if (!exists) {
throw ApplicationFailure.nonRetryable(
@@ -491,7 +336,7 @@ export async function loadResumeState(
);
}
// Load session.json
// 2. Parse session.json and validate URL match
let session: SessionJson;
try {
session = await readJson<SessionJson>(sessionPath);
@@ -503,7 +348,6 @@ export async function loadResumeState(
);
}
// Validate URL matches
if (session.session.webUrl !== expectedUrl) {
throw ApplicationFailure.nonRetryable(
`URL mismatch with workspace\n Workspace URL: ${session.session.webUrl}\n Provided URL: ${expectedUrl}`,
@@ -511,34 +355,30 @@ export async function loadResumeState(
);
}
// Find completed agents (status === 'success' AND deliverable exists)
// 3. Cross-check agent status with deliverables on disk
const completedAgents: string[] = [];
const agents = session.metrics.agents;
for (const agentName of ALL_AGENTS) {
const agentData = agents[agentName];
// Skip if agent never ran or didn't succeed
if (!agentData || agentData.status !== 'success') {
continue;
}
// Validate deliverable exists
const deliverablePath = getDeliverablePath(agentName, expectedRepoPath);
const deliverableFilename = AGENTS[agentName].deliverableFilename;
const deliverablePath = `${expectedRepoPath}/deliverables/${deliverableFilename}`;
const deliverableExists = await fileExists(deliverablePath);
if (!deliverableExists) {
console.log(
chalk.yellow(`Agent ${agentName} shows success but deliverable missing, will re-run`)
);
const logger = createActivityLogger();
logger.warn(`Agent ${agentName} shows success but deliverable missing, will re-run`);
continue;
}
// Agent completed successfully and deliverable exists
completedAgents.push(agentName);
}
// Find latest checkpoint from completed agents
// 4. Collect git checkpoints and validate at least one exists
const checkpoints = completedAgents
.map((name) => agents[name]?.checkpoint)
.filter((hash): hash is string => hash != null);
@@ -550,24 +390,26 @@ export async function loadResumeState(
throw ApplicationFailure.nonRetryable(
`Cannot resume workspace ${workspaceName}: ` +
(successAgents.length > 0
? `${successAgents.length} agent(s) show success in session.json (${successAgents.join(', ')}) ` +
`but their deliverable files are missing from disk. ` +
`Start a fresh run instead.`
: `No agents completed successfully. Start a fresh run instead.`),
(successAgents.length > 0
? `${successAgents.length} agent(s) show success in session.json (${successAgents.join(', ')}) ` +
`but their deliverable files are missing from disk. ` +
`Start a fresh run instead.`
: `No agents completed successfully. Start a fresh run instead.`),
'NoCheckpointsError'
);
}
// Find most recent commit among checkpoints
// 5. Find the most recent checkpoint commit
const checkpointHash = await findLatestCommit(expectedRepoPath, checkpoints);
const originalWorkflowId = session.session.originalWorkflowId || session.session.id;
console.log(chalk.cyan(`=== RESUME STATE ===`));
console.log(`Workspace: ${workspaceName}`);
console.log(`Completed agents: ${completedAgents.length}`);
console.log(`Checkpoint: ${checkpointHash}`);
// 6. Log summary and return resume state
const logger = createActivityLogger();
logger.info('Resume state loaded', {
workspace: workspaceName,
completedAgents: completedAgents.length,
checkpoint: checkpointHash,
});
return {
workspaceName,
@@ -578,20 +420,21 @@ export async function loadResumeState(
};
}
/**
* Find the most recent commit among a list of commit hashes.
* Uses git rev-list to determine which commit is newest.
*/
async function findLatestCommit(repoPath: string, commitHashes: string[]): Promise<string> {
if (commitHashes.length === 1) {
const hash = commitHashes[0];
if (!hash) {
throw new Error('Empty commit hash in array');
throw new PentestError(
'Empty commit hash in array',
'filesystem',
false, // Non-retryable - corrupt workspace state
{ phase: 'resume' },
ErrorCode.GIT_CHECKPOINT_FAILED
);
}
return hash;
}
// Use git rev-list to find the most recent commit among all hashes
const result = await executeGitCommandWithRetry(
['git', 'rev-list', '--max-count=1', ...commitHashes],
repoPath,
@@ -603,20 +446,15 @@ async function findLatestCommit(repoPath: string, commitHashes: string[]): Promi
/**
* Restore git workspace to a checkpoint and clean up partial deliverables.
*
* @param repoPath - Repository path
* @param checkpointHash - Git commit hash to reset to
* @param incompleteAgents - Agents that didn't complete (will have deliverables cleaned up)
*/
export async function restoreGitCheckpoint(
repoPath: string,
checkpointHash: string,
incompleteAgents: AgentName[]
): Promise<void> {
console.log(chalk.blue(`Restoring git workspace to ${checkpointHash}...`));
const logger = createActivityLogger();
logger.info(`Restoring git workspace to ${checkpointHash}...`);
// Checkpoint hash points to the success commit (after commitGitSuccess),
// so git reset --hard naturally preserves all completed agent deliverables.
await executeGitCommandWithRetry(
['git', 'reset', '--hard', checkpointHash],
repoPath,
@@ -628,67 +466,60 @@ export async function restoreGitCheckpoint(
'clean untracked files for resume'
);
// Clean up any partial deliverables from incomplete agents
for (const agentName of incompleteAgents) {
const deliverablePath = getDeliverablePath(agentName, repoPath);
const deliverableFilename = AGENTS[agentName].deliverableFilename;
const deliverablePath = `${repoPath}/deliverables/${deliverableFilename}`;
try {
const exists = await fileExists(deliverablePath);
if (exists) {
console.log(chalk.yellow(`Cleaning partial deliverable: ${agentName}`));
logger.warn(`Cleaning partial deliverable: ${agentName}`);
await fs.unlink(deliverablePath);
}
} catch (error) {
console.log(chalk.gray(`Note: Failed to delete ${deliverablePath}: ${error}`));
logger.info(`Note: Failed to delete ${deliverablePath}: ${error}`);
}
}
console.log(chalk.green('Workspace restored to clean state'));
logger.info('Workspace restored to clean state');
}
/**
* Record a resume attempt in session.json.
* Tracks the new workflow ID, terminated workflows, and checkpoint hash.
* Record a resume attempt in session.json and write resume header to workflow.log.
*/
export async function recordResumeAttempt(
input: ActivityInput,
terminatedWorkflows: string[],
checkpointHash: string
checkpointHash: string,
previousWorkflowId: string,
completedAgents: string[]
): Promise<void> {
const { webUrl, repoPath, outputPath, sessionId, workflowId } = input;
const sessionMetadata: SessionMetadata = {
id: sessionId,
webUrl,
repoPath,
...(outputPath && { outputPath }),
};
const sessionMetadata = buildSessionMetadata(input);
const auditSession = new AuditSession(sessionMetadata);
await auditSession.initialize();
await auditSession.addResumeAttempt(workflowId, terminatedWorkflows, checkpointHash);
// Update session.json with resume attempt
await auditSession.addResumeAttempt(input.workflowId, terminatedWorkflows, checkpointHash);
// Write resume header to workflow.log
await auditSession.logResumeHeader({
previousWorkflowId,
newWorkflowId: input.workflowId,
checkpointHash,
completedAgents,
});
}
/**
* Log phase transition to the unified workflow log.
* Called at phase boundaries for per-workflow logging.
*/
export async function logPhaseTransition(
input: ActivityInput,
phase: string,
event: 'start' | 'complete'
): Promise<void> {
const { webUrl, repoPath, outputPath, sessionId, workflowId } = input;
const sessionMetadata: SessionMetadata = {
id: sessionId,
webUrl,
repoPath,
...(outputPath && { outputPath }),
};
const sessionMetadata = buildSessionMetadata(input);
const auditSession = new AuditSession(sessionMetadata);
await auditSession.initialize(workflowId);
await auditSession.initialize(input.workflowId);
if (event === 'start') {
await auditSession.logPhaseStart(phase);
@@ -698,28 +529,23 @@ export async function logPhaseTransition(
}
/**
* Log workflow completion with full summary to the unified workflow log.
* Called at the end of the workflow to write a summary breakdown.
* Log workflow completion with full summary.
* Cleans up container when done.
*/
export async function logWorkflowComplete(
input: ActivityInput,
summary: WorkflowSummary
): Promise<void> {
const { webUrl, repoPath, outputPath, sessionId, workflowId } = input;
const sessionMetadata: SessionMetadata = {
id: sessionId,
webUrl,
repoPath,
...(outputPath && { outputPath }),
};
const { repoPath, workflowId } = input;
const sessionMetadata = buildSessionMetadata(input);
// 1. Initialize audit session and mark final status
const auditSession = new AuditSession(sessionMetadata);
await auditSession.initialize(workflowId);
await auditSession.updateSessionStatus(summary.status);
// Use cumulative metrics from session.json (includes all resume attempts)
const sessionData = await auditSession.getMetrics() as {
// 2. Load cumulative metrics from session.json
const sessionData = (await auditSession.getMetrics()) as {
metrics: {
total_duration_ms: number;
total_cost_usd: number;
@@ -727,7 +553,7 @@ export async function logWorkflowComplete(
};
};
// Fill in metrics for skipped agents (completed in previous runs)
// 3. Fill in metrics for skipped agents (resumed from previous run)
const agentMetrics = { ...summary.agentMetrics };
for (const agentName of summary.completedAgents) {
if (!agentMetrics[agentName]) {
@@ -741,18 +567,27 @@ export async function logWorkflowComplete(
}
}
// 4. Build cumulative summary with cross-run totals
const cumulativeSummary: WorkflowSummary = {
...summary,
totalDurationMs: sessionData.metrics.total_duration_ms,
totalCostUsd: sessionData.metrics.total_cost_usd,
agentMetrics,
};
// 5. Write completion entry to workflow.log
await auditSession.logWorkflowComplete(cumulativeSummary);
// Copy all deliverables to audit-logs once at workflow end (non-fatal)
// 6. Copy deliverables to audit-logs
try {
await copyDeliverablesToAudit(sessionMetadata, repoPath);
} catch (copyErr) {
console.error('Failed to copy deliverables to audit-logs:', copyErr);
const logger = createActivityLogger();
logger.error('Failed to copy deliverables to audit-logs', {
error: copyErr instanceof Error ? copyErr.message : String(copyErr),
});
}
// 7. Clean up container
removeContainer(workflowId);
}
+34
View File
@@ -0,0 +1,34 @@
// Copyright (C) 2025 Keygraph, Inc.
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License version 3
// as published by the Free Software Foundation.
import { Context } from '@temporalio/activity';
import type { ActivityLogger } from '../types/activity-logger.js';
/**
* ActivityLogger backed by Temporal's Context.current().log.
* Must be called inside a running Temporal activity — throws otherwise.
*/
export class TemporalActivityLogger implements ActivityLogger {
info(message: string, attrs?: Record<string, unknown>): void {
Context.current().log.info(message, attrs ?? {});
}
warn(message: string, attrs?: Record<string, unknown>): void {
Context.current().log.warn(message, attrs ?? {});
}
error(message: string, attrs?: Record<string, unknown>): void {
Context.current().log.error(message, attrs ?? {});
}
}
/**
* Create an ActivityLogger. Must be called inside a Temporal activity.
* Throws if called outside an activity context.
*/
export function createActivityLogger(): ActivityLogger {
return new TemporalActivityLogger();
}
+241 -175
View File
@@ -26,12 +26,11 @@
* TEMPORAL_ADDRESS - Temporal server address (default: localhost:7233)
*/
import { Connection, Client, WorkflowNotFoundError } from '@temporalio/client';
import { Connection, Client, WorkflowNotFoundError, type WorkflowHandle } from '@temporalio/client';
import dotenv from 'dotenv';
import chalk from 'chalk';
import { displaySplashScreen } from '../splash-screen.js';
import { sanitizeHostname } from '../audit/utils.js';
import { readJson, fileExists } from '../audit/utils.js';
import { readJson, fileExists } from '../utils/file-io.js';
import path from 'path';
// Import types only - these don't pull in workflow runtime code
import type { PipelineInput, PipelineState, PipelineProgress } from './shared.js';
@@ -89,18 +88,18 @@ async function terminateExistingWorkflows(
const description = await handle.describe();
if (description.status.name === 'RUNNING') {
console.log(chalk.yellow(`Terminating running workflow: ${wfId}`));
console.log(`Terminating running workflow: ${wfId}`);
await handle.terminate('Superseded by resume workflow');
terminated.push(wfId);
console.log(chalk.green(`Terminated: ${wfId}`));
console.log(`Terminated: ${wfId}`);
} else {
console.log(chalk.gray(`Workflow already ${description.status.name}: ${wfId}`));
console.log(`Workflow already ${description.status.name}: ${wfId}`);
}
} catch (error) {
if (error instanceof WorkflowNotFoundError) {
console.log(chalk.gray(`Workflow not found (already cleaned up): ${wfId}`));
console.log(`Workflow not found (already cleaned up): ${wfId}`);
} else {
console.log(chalk.red(`Failed to terminate ${wfId}: ${error}`));
console.log(`Failed to terminate ${wfId}: ${error}`);
// Continue anyway - don't block resume on termination failure
}
}
@@ -118,13 +117,13 @@ function isValidWorkspaceName(name: string): boolean {
}
function showUsage(): void {
console.log(chalk.cyan.bold('\nShannon Temporal Client'));
console.log(chalk.gray('Start a pentest pipeline workflow\n'));
console.log(chalk.yellow('Usage:'));
console.log('\nShannon Temporal Client');
console.log('Start a pentest pipeline workflow\n');
console.log('Usage:');
console.log(
' node dist/temporal/client.js <webUrl> <repoPath> [options]\n'
);
console.log(chalk.yellow('Options:'));
console.log('Options:');
console.log(' --config <path> Configuration file path');
console.log(' --output <path> Output directory for audit logs');
console.log(' --pipeline-testing Use minimal prompts for fast testing');
@@ -133,54 +132,65 @@ function showUsage(): void {
' --workflow-id <id> Custom workflow ID (default: shannon-<timestamp>)'
);
console.log(' --wait Wait for workflow completion with progress polling\n');
console.log(chalk.yellow('Examples:'));
console.log('Examples:');
console.log(' node dist/temporal/client.js https://example.com /path/to/repo');
console.log(
' node dist/temporal/client.js https://example.com /path/to/repo --config config.yaml\n'
);
}
async function startPipeline(): Promise<void> {
const args = process.argv.slice(2);
// === CLI Argument Parsing ===
if (args.includes('--help') || args.includes('-h') || args.length === 0) {
interface CliArgs {
webUrl: string;
repoPath: string;
configPath?: string;
outputPath?: string;
displayOutputPath?: string;
pipelineTestingMode: boolean;
customWorkflowId?: string;
waitForCompletion: boolean;
resumeFromWorkspace?: string;
}
function parseCliArgs(argv: string[]): CliArgs {
if (argv.includes('--help') || argv.includes('-h') || argv.length === 0) {
showUsage();
process.exit(0);
}
// Parse arguments
let webUrl: string | undefined;
let repoPath: string | undefined;
let configPath: string | undefined;
let outputPath: string | undefined;
let displayOutputPath: string | undefined; // Host path for display purposes
let displayOutputPath: string | undefined;
let pipelineTestingMode = false;
let customWorkflowId: string | undefined;
let waitForCompletion = false;
let resumeFromWorkspace: string | undefined;
for (let i = 0; i < args.length; i++) {
const arg = args[i];
for (let i = 0; i < argv.length; i++) {
const arg = argv[i];
if (arg === '--config') {
const nextArg = args[i + 1];
const nextArg = argv[i + 1];
if (nextArg && !nextArg.startsWith('-')) {
configPath = nextArg;
i++;
}
} else if (arg === '--output') {
const nextArg = args[i + 1];
const nextArg = argv[i + 1];
if (nextArg && !nextArg.startsWith('-')) {
outputPath = nextArg;
i++;
}
} else if (arg === '--display-output') {
const nextArg = args[i + 1];
const nextArg = argv[i + 1];
if (nextArg && !nextArg.startsWith('-')) {
displayOutputPath = nextArg;
i++;
}
} else if (arg === '--workflow-id') {
const nextArg = args[i + 1];
const nextArg = argv[i + 1];
if (nextArg && !nextArg.startsWith('-')) {
customWorkflowId = nextArg;
i++;
@@ -188,7 +198,7 @@ async function startPipeline(): Promise<void> {
} else if (arg === '--pipeline-testing') {
pipelineTestingMode = true;
} else if (arg === '--workspace') {
const nextArg = args[i + 1];
const nextArg = argv[i + 1];
if (nextArg && !nextArg.startsWith('-')) {
resumeFromWorkspace = nextArg;
i++;
@@ -205,177 +215,233 @@ async function startPipeline(): Promise<void> {
}
if (!webUrl || !repoPath) {
console.log(chalk.red('Error: webUrl and repoPath are required'));
console.log('Error: webUrl and repoPath are required');
showUsage();
process.exit(1);
}
// Display splash screen
return {
webUrl, repoPath, pipelineTestingMode, waitForCompletion,
...(configPath && { configPath }),
...(outputPath && { outputPath }),
...(displayOutputPath && { displayOutputPath }),
...(customWorkflowId && { customWorkflowId }),
...(resumeFromWorkspace && { resumeFromWorkspace }),
};
}
// === Workspace Resolution ===
interface WorkspaceResolution {
workflowId: string;
sessionId: string;
isResume: boolean;
terminatedWorkflows: string[];
}
async function resolveWorkspace(
client: Client,
args: CliArgs
): Promise<WorkspaceResolution> {
if (!args.resumeFromWorkspace) {
const hostname = sanitizeHostname(args.webUrl);
const workflowId = args.customWorkflowId || `${hostname}_shannon-${Date.now()}`;
return {
workflowId,
sessionId: workflowId,
isResume: false,
terminatedWorkflows: [],
};
}
const workspace = args.resumeFromWorkspace;
const sessionPath = path.join('./audit-logs', workspace, 'session.json');
const workspaceExists = await fileExists(sessionPath);
if (workspaceExists) {
console.log('=== RESUME MODE ===');
console.log(`Workspace: ${workspace}\n`);
// 1. Terminate any running workflows from previous attempts
const terminatedWorkflows = await terminateExistingWorkflows(client, workspace);
if (terminatedWorkflows.length > 0) {
console.log(`Terminated ${terminatedWorkflows.length} previous workflow(s)\n`);
}
// 2. Validate URL matches the workspace
const session = await readJson<SessionJson>(sessionPath);
if (session.session.webUrl !== args.webUrl) {
console.error('ERROR: URL mismatch with workspace');
console.error(` Workspace URL: ${session.session.webUrl}`);
console.error(` Provided URL: ${args.webUrl}`);
process.exit(1);
}
// 3. Generate a new workflow ID scoped to this resume attempt
// 4. Return resolution with isResume=true so downstream uses resume logic
return {
workflowId: `${workspace}_resume_${Date.now()}`,
sessionId: workspace,
isResume: true,
terminatedWorkflows,
};
}
if (!isValidWorkspaceName(workspace)) {
console.error(`ERROR: Invalid workspace name: "${workspace}"`);
console.error(' Must be 1-128 characters, alphanumeric/hyphens/underscores, starting with alphanumeric');
process.exit(1);
}
console.log('=== NEW NAMED WORKSPACE ===');
console.log(`Workspace: ${workspace}\n`);
return {
workflowId: `${workspace}_shannon-${Date.now()}`,
sessionId: workspace,
isResume: false,
terminatedWorkflows: [],
};
}
// === Pipeline Input Construction ===
function buildPipelineInput(args: CliArgs, workspace: WorkspaceResolution): PipelineInput {
return {
webUrl: args.webUrl,
repoPath: args.repoPath,
workflowId: workspace.workflowId,
sessionId: workspace.sessionId,
...(args.configPath && { configPath: args.configPath }),
...(args.outputPath && { outputPath: args.outputPath }),
...(args.pipelineTestingMode && { pipelineTestingMode: args.pipelineTestingMode }),
...(workspace.isResume && args.resumeFromWorkspace && { resumeFromWorkspace: args.resumeFromWorkspace }),
...(workspace.terminatedWorkflows.length > 0 && { terminatedWorkflows: workspace.terminatedWorkflows }),
};
}
// === Display Helpers ===
function displayWorkflowInfo(args: CliArgs, workspace: WorkspaceResolution): void {
console.log(`✓ Workflow started: ${workspace.workflowId}`);
if (workspace.isResume) {
console.log(` (Resuming workspace: ${workspace.sessionId})`);
}
console.log();
console.log(` Target: ${args.webUrl}`);
console.log(` Repository: ${args.repoPath}`);
console.log(` Workspace: ${workspace.sessionId}`);
if (args.configPath) {
console.log(` Config: ${args.configPath}`);
}
if (args.displayOutputPath) {
console.log(` Output: ${args.displayOutputPath}`);
}
if (args.pipelineTestingMode) {
console.log(` Mode: Pipeline Testing`);
}
console.log();
}
function displayMonitoringInfo(args: CliArgs, workspace: WorkspaceResolution): void {
const effectiveDisplayPath = args.displayOutputPath || args.outputPath || './audit-logs';
const outputDir = `${effectiveDisplayPath}/${workspace.sessionId}`;
console.log('Monitor progress:');
console.log(` Web UI: http://localhost:8233/namespaces/default/workflows/${workspace.workflowId}`);
console.log(` Logs: ./shannon logs ID=${workspace.workflowId}`);
console.log();
console.log('Output:');
console.log(` Reports: ${outputDir}`);
console.log();
}
// === Workflow Result Handling ===
async function waitForWorkflowResult(
handle: WorkflowHandle<(input: PipelineInput) => Promise<PipelineState>>,
workspace: WorkspaceResolution
): Promise<void> {
const progressInterval = setInterval(async () => {
try {
const progress = await handle.query<PipelineProgress>(PROGRESS_QUERY);
const elapsed = Math.floor(progress.elapsedMs / 1000);
console.log(
`[${elapsed}s] Phase: ${progress.currentPhase || 'unknown'} | Agent: ${progress.currentAgent || 'none'} | Completed: ${progress.completedAgents.length}/13`
);
} catch {
// Workflow may have completed
}
}, 30000);
try {
// 1. Block until workflow completes
const result = await handle.result();
clearInterval(progressInterval);
// 2. Display run metrics
console.log('\nPipeline completed successfully!');
if (result.summary) {
console.log(`Duration: ${Math.floor(result.summary.totalDurationMs / 1000)}s`);
console.log(`Agents completed: ${result.summary.agentCount}`);
console.log(`Total turns: ${result.summary.totalTurns}`);
console.log(`Run cost: $${result.summary.totalCostUsd.toFixed(4)}`);
// 3. Show cumulative cost across all resume attempts
if (workspace.isResume) {
try {
const session = await readJson<SessionJson>(
path.join('./audit-logs', workspace.sessionId, 'session.json')
);
console.log(`Cumulative cost: $${session.metrics.total_cost_usd.toFixed(4)}`);
} catch {
// Non-fatal, skip cumulative cost display
}
}
}
} catch (error) {
clearInterval(progressInterval);
console.error('\nPipeline failed:', error);
process.exit(1);
}
}
// === Main Entry Point ===
async function startPipeline(): Promise<void> {
// 1. Parse CLI args and display splash
const args = parseCliArgs(process.argv.slice(2));
await displaySplashScreen();
// 2. Connect to Temporal server
const address = process.env.TEMPORAL_ADDRESS || 'localhost:7233';
console.log(chalk.gray(`Connecting to Temporal at ${address}...`));
console.log(`Connecting to Temporal at ${address}...`);
const connection = await Connection.connect({ address });
const client = new Client({ connection });
try {
let terminatedWorkflows: string[] = [];
let workflowId: string;
let sessionId: string; // Workspace name (persistent directory)
let isResume = false;
// 3. Resolve workspace (new or resume) and build pipeline input
const workspace = await resolveWorkspace(client, args);
const input = buildPipelineInput(args, workspace);
if (resumeFromWorkspace) {
const sessionPath = path.join('./audit-logs', resumeFromWorkspace, 'session.json');
const workspaceExists = await fileExists(sessionPath);
if (workspaceExists) {
// === Resume Mode: existing workspace ===
isResume = true;
console.log(chalk.cyan('=== RESUME MODE ==='));
console.log(`Workspace: ${resumeFromWorkspace}\n`);
// Terminate any running workflows for this workspace
terminatedWorkflows = await terminateExistingWorkflows(client, resumeFromWorkspace);
if (terminatedWorkflows.length > 0) {
console.log(chalk.yellow(`Terminated ${terminatedWorkflows.length} previous workflow(s)\n`));
}
// Validate URL matches workspace
const session = await readJson<SessionJson>(sessionPath);
if (session.session.webUrl !== webUrl) {
console.error(chalk.red('ERROR: URL mismatch with workspace'));
console.error(` Workspace URL: ${session.session.webUrl}`);
console.error(` Provided URL: ${webUrl}`);
process.exit(1);
}
// Generate resume workflow ID
workflowId = `${resumeFromWorkspace}_resume_${Date.now()}`;
sessionId = resumeFromWorkspace;
} else {
// === New Named Workspace ===
if (!isValidWorkspaceName(resumeFromWorkspace)) {
console.error(chalk.red(`ERROR: Invalid workspace name: "${resumeFromWorkspace}"`));
console.error(chalk.gray(' Must be 1-128 characters, alphanumeric/hyphens/underscores, starting with alphanumeric'));
process.exit(1);
}
console.log(chalk.cyan('=== NEW NAMED WORKSPACE ==='));
console.log(`Workspace: ${resumeFromWorkspace}\n`);
workflowId = `${resumeFromWorkspace}_shannon-${Date.now()}`;
sessionId = resumeFromWorkspace;
}
} else {
// === New Auto-Named Workflow ===
const hostname = sanitizeHostname(webUrl);
workflowId = customWorkflowId || `${hostname}_shannon-${Date.now()}`;
sessionId = workflowId;
}
const input: PipelineInput = {
webUrl,
repoPath,
workflowId, // Add for audit correlation
sessionId, // Workspace directory name
...(configPath && { configPath }),
...(outputPath && { outputPath }),
...(pipelineTestingMode && { pipelineTestingMode }),
...(isResume && resumeFromWorkspace && { resumeFromWorkspace }),
...(terminatedWorkflows.length > 0 && { terminatedWorkflows }),
};
// Determine output directory for display (use sessionId for persistent directory)
// Use displayOutputPath (host path) if provided, otherwise fall back to outputPath or default
const effectiveDisplayPath = displayOutputPath || outputPath || './audit-logs';
const outputDir = `${effectiveDisplayPath}/${sessionId}`;
console.log(chalk.green.bold(`✓ Workflow started: ${workflowId}`));
if (isResume) {
console.log(chalk.gray(` (Resuming workspace: ${sessionId})`));
}
console.log();
console.log(chalk.white(' Target: ') + chalk.cyan(webUrl));
console.log(chalk.white(' Repository: ') + chalk.cyan(repoPath));
console.log(chalk.white(' Workspace: ') + chalk.cyan(sessionId));
if (configPath) {
console.log(chalk.white(' Config: ') + chalk.cyan(configPath));
}
if (displayOutputPath) {
console.log(chalk.white(' Output: ') + chalk.cyan(displayOutputPath));
}
if (pipelineTestingMode) {
console.log(chalk.white(' Mode: ') + chalk.yellow('Pipeline Testing'));
}
console.log();
// Start workflow by name (not by importing the function)
// 4. Start the Temporal workflow
const handle = await client.workflow.start<(input: PipelineInput) => Promise<PipelineState>>(
'pentestPipelineWorkflow',
{
taskQueue: 'shannon-pipeline',
workflowId,
workflowId: workspace.workflowId,
args: [input],
}
);
if (!waitForCompletion) {
console.log(chalk.bold('Monitor progress:'));
console.log(chalk.white(' Web UI: ') + chalk.blue(`http://localhost:8233/namespaces/default/workflows/${workflowId}`));
console.log(chalk.white(' Logs: ') + chalk.gray(`./shannon logs ID=${workflowId}`));
console.log();
console.log(chalk.bold('Output:'));
console.log(chalk.white(' Reports: ') + chalk.cyan(outputDir));
console.log();
return;
}
// 5. Display info and optionally wait for completion
displayWorkflowInfo(args, workspace);
// Poll for progress every 30 seconds
const progressInterval = setInterval(async () => {
try {
const progress = await handle.query<PipelineProgress>(PROGRESS_QUERY);
const elapsed = Math.floor(progress.elapsedMs / 1000);
console.log(
chalk.gray(`[${elapsed}s]`),
chalk.cyan(`Phase: ${progress.currentPhase || 'unknown'}`),
chalk.gray(`| Agent: ${progress.currentAgent || 'none'}`),
chalk.gray(`| Completed: ${progress.completedAgents.length}/13`)
);
} catch {
// Workflow may have completed
}
}, 30000);
try {
const result = await handle.result();
clearInterval(progressInterval);
console.log(chalk.green.bold('\nPipeline completed successfully!'));
if (result.summary) {
console.log(chalk.gray(`Duration: ${Math.floor(result.summary.totalDurationMs / 1000)}s`));
console.log(chalk.gray(`Agents completed: ${result.summary.agentCount}`));
console.log(chalk.gray(`Total turns: ${result.summary.totalTurns}`));
console.log(chalk.gray(`Run cost: $${result.summary.totalCostUsd.toFixed(4)}`));
// Show cumulative cost from session.json (includes all resume attempts)
if (isResume) {
try {
const session = await readJson<SessionJson>(
path.join('./audit-logs', sessionId, 'session.json')
);
console.log(chalk.gray(`Cumulative cost: $${session.metrics.total_cost_usd.toFixed(4)}`));
} catch {
// Non-fatal, skip cumulative cost display
}
}
}
} catch (error) {
clearInterval(progressInterval);
console.error(chalk.red.bold('\nPipeline failed:'), error);
process.exit(1);
if (args.waitForCompletion) {
await waitForWorkflowResult(handle, workspace);
} else {
displayMonitoringInfo(args, workspace);
}
} finally {
await connection.close();
@@ -383,6 +449,6 @@ async function startPipeline(): Promise<void> {
}
startPipeline().catch((err) => {
console.error(chalk.red('Client error:'), err);
console.error('Client error:', err);
process.exit(1);
});
+3 -13
View File
@@ -1,6 +1,7 @@
import { defineQuery } from '@temporalio/workflow';
// === Types ===
export type { AgentMetrics } from '../types/metrics.js';
import type { AgentMetrics } from '../types/metrics.js';
export interface PipelineInput {
webUrl: string;
@@ -8,7 +9,7 @@ export interface PipelineInput {
configPath?: string;
outputPath?: string;
pipelineTestingMode?: boolean;
workflowId?: string; // Added by client, used for audit correlation
workflowId?: string; // Used for audit correlation
sessionId?: string; // Workspace directory name (distinct from workflowId for named workspaces)
resumeFromWorkspace?: string; // Workspace name to resume from
terminatedWorkflows?: string[]; // Workflows terminated during resume
@@ -22,15 +23,6 @@ export interface ResumeState {
originalWorkflowId: string;
}
export interface AgentMetrics {
durationMs: number;
inputTokens: number | null;
outputTokens: number | null;
costUsd: number | null;
numTurns: number | null;
model?: string | undefined;
}
export interface PipelineSummary {
totalCostUsd: number;
totalDurationMs: number; // Wall-clock time (end - start)
@@ -68,6 +60,4 @@ export interface VulnExploitPipelineResult {
error: string | null;
}
// === Queries ===
export const getProgress = defineQuery<PipelineProgress>('getProgress');
+45
View File
@@ -0,0 +1,45 @@
// Copyright (C) 2025 Keygraph, Inc.
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License version 3
// as published by the Free Software Foundation.
/**
* Maps PipelineState to WorkflowSummary for audit logging.
* Pure function with no side effects.
*/
import type { PipelineState } from './shared.js';
import type { WorkflowSummary } from '../audit/workflow-logger.js';
/**
* Maps PipelineState to WorkflowSummary.
*
* This function is deterministic (no Date.now() or I/O) so it can be
* safely imported into Temporal workflows. The caller must ensure
* state.summary is set before calling (via computeSummary).
*/
export function toWorkflowSummary(
state: PipelineState,
status: 'completed' | 'failed'
): WorkflowSummary {
// state.summary must be computed before calling this mapper
const summary = state.summary;
if (!summary) {
throw new Error('toWorkflowSummary: state.summary must be set before calling');
}
return {
status,
totalDurationMs: summary.totalDurationMs,
totalCostUsd: summary.totalCostUsd,
completedAgents: state.completedAgents,
agentMetrics: Object.fromEntries(
Object.entries(state.agentMetrics).map(([name, m]) => [
name,
{ durationMs: m.durationMs, costUsd: m.costUsd },
])
),
...(state.error && { error: state.error }),
};
}
+8 -9
View File
@@ -24,7 +24,6 @@ import { NativeConnection, Worker, bundleWorkflowCode } from '@temporalio/worker
import { fileURLToPath } from 'node:url';
import path from 'node:path';
import dotenv from 'dotenv';
import chalk from 'chalk';
import * as activities from './activities.js';
dotenv.config();
@@ -33,12 +32,12 @@ const __dirname = path.dirname(fileURLToPath(import.meta.url));
async function runWorker(): Promise<void> {
const address = process.env.TEMPORAL_ADDRESS || 'localhost:7233';
console.log(chalk.cyan(`Connecting to Temporal at ${address}...`));
console.log(`Connecting to Temporal at ${address}...`);
const connection = await NativeConnection.connect({ address });
// Bundle workflows for Temporal's V8 isolate
console.log(chalk.gray('Bundling workflows...'));
console.log('Bundling workflows...');
const workflowBundle = await bundleWorkflowCode({
workflowsPath: path.join(__dirname, 'workflows.js'),
});
@@ -54,26 +53,26 @@ async function runWorker(): Promise<void> {
// Graceful shutdown handling
const shutdown = async (): Promise<void> => {
console.log(chalk.yellow('\nShutting down worker...'));
console.log('\nShutting down worker...');
worker.shutdown();
};
process.on('SIGINT', shutdown);
process.on('SIGTERM', shutdown);
console.log(chalk.green('Shannon worker started'));
console.log(chalk.gray('Task queue: shannon-pipeline'));
console.log(chalk.gray('Press Ctrl+C to stop\n'));
console.log('Shannon worker started');
console.log('Task queue: shannon-pipeline');
console.log('Press Ctrl+C to stop\n');
try {
await worker.run();
} finally {
await connection.close();
console.log(chalk.gray('Worker stopped'));
console.log('Worker stopped');
}
}
runWorker().catch((err) => {
console.error(chalk.red('Worker failed:'), err);
console.error('Worker failed:', err);
process.exit(1);
});
+125 -162
View File
@@ -24,6 +24,7 @@
*/
import {
log,
proxyActivities,
setHandler,
workflowInfo,
@@ -40,9 +41,10 @@ import {
type AgentMetrics,
type ResumeState,
} from './shared.js';
import type { VulnType } from '../queue-validation.js';
import type { VulnType } from '../services/queue-validation.js';
import type { AgentName } from '../types/agents.js';
import { ALL_AGENTS } from '../types/agents.js';
import { toWorkflowSummary } from './summary-mapper.js';
// Retry configuration for production (long intervals for billing recovery)
const PRODUCTION_RETRY = {
@@ -103,11 +105,9 @@ export async function pentestPipelineWorkflow(
): Promise<PipelineState> {
const { workflowId } = workflowInfo();
// Select activity proxy based on testing mode
// Pipeline testing uses fast retry intervals (10s) for quick iteration
const a = input.pipelineTestingMode ? testActs : acts;
// Workflow state (queryable)
const state: PipelineState = {
status: 'running',
currentPhase: null,
@@ -120,7 +120,6 @@ export async function pentestPipelineWorkflow(
summary: null,
};
// Register query handler for real-time progress inspection
setHandler(getProgress, (): PipelineProgress => ({
...state,
workflowId,
@@ -145,18 +144,17 @@ export async function pentestPipelineWorkflow(
}),
};
// === RESUME LOGIC ===
let resumeState: ResumeState | null = null;
if (input.resumeFromWorkspace) {
// Load resume state from existing workspace
// 1. Load resume state (validates workspace, cross-checks deliverables)
resumeState = await a.loadResumeState(
input.resumeFromWorkspace,
input.webUrl,
input.repoPath
);
// Restore git checkpoint and clean up partial deliverables
// 2. Restore git workspace and clean up incomplete deliverables
const incompleteAgents = ALL_AGENTS.filter(
(agentName) => !resumeState!.completedAgents.includes(agentName)
) as AgentName[];
@@ -167,120 +165,59 @@ export async function pentestPipelineWorkflow(
incompleteAgents
);
// Check if all agents are already complete
// 3. Short-circuit if all agents already completed
if (resumeState.completedAgents.length === ALL_AGENTS.length) {
console.log(`All ${ALL_AGENTS.length} agents already completed. Nothing to resume.`);
log.info(`All ${ALL_AGENTS.length} agents already completed. Nothing to resume.`);
state.status = 'completed';
state.completedAgents = [...resumeState.completedAgents];
state.summary = computeSummary(state);
return state;
}
// Record resume attempt in session.json
// 4. Record this resume attempt in session.json and workflow.log
await a.recordResumeAttempt(
activityInput,
input.terminatedWorkflows || [],
resumeState.checkpointHash
resumeState.checkpointHash,
resumeState.originalWorkflowId,
resumeState.completedAgents
);
console.log('Resume state loaded and workspace restored');
log.info('Resume state loaded and workspace restored');
}
// Helper to check if an agent should be skipped
const shouldSkip = (agentName: string): boolean => {
return resumeState?.completedAgents.includes(agentName) ?? false;
};
try {
// === Phase 1: Pre-Reconnaissance ===
if (!shouldSkip('pre-recon')) {
state.currentPhase = 'pre-recon';
state.currentAgent = 'pre-recon';
await a.logPhaseTransition(activityInput, 'pre-recon', 'start');
state.agentMetrics['pre-recon'] =
await a.runPreReconAgent(activityInput);
state.completedAgents.push('pre-recon');
await a.logPhaseTransition(activityInput, 'pre-recon', 'complete');
// Run a sequential agent phase (pre-recon, recon)
async function runSequentialPhase(
phaseName: string,
agentName: AgentName,
runAgent: (input: ActivityInput) => Promise<AgentMetrics>
): Promise<void> {
if (!shouldSkip(agentName)) {
state.currentPhase = phaseName;
state.currentAgent = agentName;
await a.logPhaseTransition(activityInput, phaseName, 'start');
state.agentMetrics[agentName] = await runAgent(activityInput);
state.completedAgents.push(agentName);
await a.logPhaseTransition(activityInput, phaseName, 'complete');
} else {
console.log('Skipping pre-recon (already complete)');
state.completedAgents.push('pre-recon');
log.info(`Skipping ${agentName} (already complete)`);
state.completedAgents.push(agentName);
}
}
// === Phase 2: Reconnaissance ===
if (!shouldSkip('recon')) {
state.currentPhase = 'recon';
state.currentAgent = 'recon';
await a.logPhaseTransition(activityInput, 'recon', 'start');
state.agentMetrics['recon'] = await a.runReconAgent(activityInput);
state.completedAgents.push('recon');
await a.logPhaseTransition(activityInput, 'recon', 'complete');
} else {
console.log('Skipping recon (already complete)');
state.completedAgents.push('recon');
}
// === Phases 3-4: Vulnerability Analysis + Exploitation (Pipelined) ===
// Each vuln type runs as an independent pipeline:
// vuln agent → queue check → conditional exploit agent
// This eliminates the synchronization barrier between phases - each exploit
// starts immediately when its vuln agent finishes, not waiting for all.
state.currentPhase = 'vulnerability-exploitation';
state.currentAgent = 'pipelines';
await a.logPhaseTransition(activityInput, 'vulnerability-exploitation', 'start');
// Helper: Run a single vuln→exploit pipeline with skip logic
async function runVulnExploitPipeline(
vulnType: VulnType,
runVulnAgent: () => Promise<AgentMetrics>,
runExploitAgent: () => Promise<AgentMetrics>
): Promise<VulnExploitPipelineResult> {
const vulnAgentName = `${vulnType}-vuln`;
const exploitAgentName = `${vulnType}-exploit`;
// Step 1: Run vulnerability agent (or skip if completed)
let vulnMetrics: AgentMetrics | null = null;
if (!shouldSkip(vulnAgentName)) {
vulnMetrics = await runVulnAgent();
} else {
console.log(`Skipping ${vulnAgentName} (already complete)`);
}
// Step 2: Check exploitation queue (only if vuln agent ran or completed previously)
const decision = await a.checkExploitationQueue(activityInput, vulnType);
// Step 3: Conditionally run exploit agent (skip if already completed)
let exploitMetrics: AgentMetrics | null = null;
if (decision.shouldExploit) {
if (!shouldSkip(exploitAgentName)) {
exploitMetrics = await runExploitAgent();
} else {
console.log(`Skipping ${exploitAgentName} (already complete)`);
}
}
return {
vulnType,
vulnMetrics,
exploitMetrics,
exploitDecision: {
shouldExploit: decision.shouldExploit,
vulnerabilityCount: decision.vulnerabilityCount,
},
error: null,
};
}
// Determine which pipelines to run (skip if both vuln and exploit completed)
const pipelinesToRun: Array<Promise<VulnExploitPipelineResult>> = [];
// Only run pipeline if at least one agent (vuln or exploit) is incomplete
const pipelineConfigs: Array<{
vulnType: VulnType;
vulnAgent: string;
exploitAgent: string;
runVuln: () => Promise<AgentMetrics>;
runExploit: () => Promise<AgentMetrics>;
}> = [
// Build pipeline configs for the 5 vuln→exploit pairs
function buildPipelineConfigs(): Array<{
vulnType: VulnType;
vulnAgent: string;
exploitAgent: string;
runVuln: () => Promise<AgentMetrics>;
runExploit: () => Promise<AgentMetrics>;
}> {
return [
{
vulnType: 'injection',
vulnAgent: 'injection-vuln',
@@ -317,56 +254,34 @@ export async function pentestPipelineWorkflow(
runExploit: () => a.runAuthzExploitAgent(activityInput),
},
];
}
for (const config of pipelineConfigs) {
const vulnComplete = shouldSkip(config.vulnAgent);
const exploitComplete = shouldSkip(config.exploitAgent);
// Only run pipeline if at least one agent needs to run
if (!vulnComplete || !exploitComplete) {
pipelinesToRun.push(
runVulnExploitPipeline(config.vulnType, config.runVuln, config.runExploit)
);
} else {
console.log(
`Skipping entire ${config.vulnType} pipeline (both agents complete)`
);
// Still need to mark them as completed in state
state.completedAgents.push(config.vulnAgent, config.exploitAgent);
}
}
// Run pipelines in parallel with graceful failure handling
// Promise.allSettled ensures other pipelines continue if one fails
const pipelineResults = await Promise.allSettled(pipelinesToRun);
// Aggregate results from all pipelines
// Aggregate results from settled pipeline promises into workflow state
function aggregatePipelineResults(
results: PromiseSettledResult<VulnExploitPipelineResult>[]
): void {
const failedPipelines: string[] = [];
for (const result of pipelineResults) {
for (const result of results) {
if (result.status === 'fulfilled') {
const { vulnType, vulnMetrics, exploitMetrics } = result.value;
// Record vuln agent
const vulnAgentName = `${vulnType}-vuln`;
if (vulnMetrics) {
state.agentMetrics[vulnAgentName] = vulnMetrics;
state.completedAgents.push(vulnAgentName);
} else if (shouldSkip(vulnAgentName)) {
// Agent was skipped because already complete
state.completedAgents.push(vulnAgentName);
}
// Record exploit agent (if it ran)
const exploitAgentName = `${vulnType}-exploit`;
if (exploitMetrics) {
state.agentMetrics[exploitAgentName] = exploitMetrics;
state.completedAgents.push(exploitAgentName);
} else if (shouldSkip(exploitAgentName)) {
// Agent was skipped because already complete
state.completedAgents.push(exploitAgentName);
}
} else {
// Pipeline failed - log error but continue with others
const errorMsg =
result.reason instanceof Error
? result.reason.message
@@ -375,15 +290,87 @@ export async function pentestPipelineWorkflow(
}
}
// Log any pipeline failures (workflow continues despite failures)
if (failedPipelines.length > 0) {
console.log(
`⚠️ ${failedPipelines.length} pipeline(s) failed:`,
failedPipelines
);
log.warn(`${failedPipelines.length} pipeline(s) failed`, {
failures: failedPipelines,
});
}
}
try {
// === Phase 1: Pre-Reconnaissance ===
await runSequentialPhase('pre-recon', 'pre-recon', a.runPreReconAgent);
// === Phase 2: Reconnaissance ===
await runSequentialPhase('recon', 'recon', a.runReconAgent);
// === Phases 3-4: Vulnerability Analysis + Exploitation (Pipelined) ===
// Each vuln type runs as an independent pipeline:
// vuln agent → queue check → conditional exploit agent
// Exploits start immediately when their vuln finishes, not waiting for all.
state.currentPhase = 'vulnerability-exploitation';
state.currentAgent = 'pipelines';
await a.logPhaseTransition(activityInput, 'vulnerability-exploitation', 'start');
// Closure over shouldSkip and activityInput by design (Temporal replay safety)
async function runVulnExploitPipeline(
vulnType: VulnType,
runVulnAgent: () => Promise<AgentMetrics>,
runExploitAgent: () => Promise<AgentMetrics>
): Promise<VulnExploitPipelineResult> {
const vulnAgentName = `${vulnType}-vuln`;
const exploitAgentName = `${vulnType}-exploit`;
// 1. Run vulnerability analysis (or skip if resumed)
let vulnMetrics: AgentMetrics | null = null;
if (!shouldSkip(vulnAgentName)) {
vulnMetrics = await runVulnAgent();
} else {
log.info(`Skipping ${vulnAgentName} (already complete)`);
}
// 2. Check exploitation queue for actionable findings
const decision = await a.checkExploitationQueue(activityInput, vulnType);
// 3. Conditionally run exploitation agent
let exploitMetrics: AgentMetrics | null = null;
if (decision.shouldExploit) {
if (!shouldSkip(exploitAgentName)) {
exploitMetrics = await runExploitAgent();
} else {
log.info(`Skipping ${exploitAgentName} (already complete)`);
}
}
return {
vulnType,
vulnMetrics,
exploitMetrics,
exploitDecision: {
shouldExploit: decision.shouldExploit,
vulnerabilityCount: decision.vulnerabilityCount,
},
error: null,
};
}
// Update phase markers
const pipelineConfigs = buildPipelineConfigs();
const pipelinesToRun: Array<Promise<VulnExploitPipelineResult>> = [];
for (const config of pipelineConfigs) {
if (!shouldSkip(config.vulnAgent) || !shouldSkip(config.exploitAgent)) {
pipelinesToRun.push(
runVulnExploitPipeline(config.vulnType, config.runVuln, config.runExploit)
);
} else {
log.info(`Skipping entire ${config.vulnType} pipeline (both agents complete)`);
state.completedAgents.push(config.vulnAgent, config.exploitAgent);
}
}
const pipelineResults = await Promise.allSettled(pipelinesToRun);
aggregatePipelineResults(pipelineResults);
state.currentPhase = 'exploitation';
state.currentAgent = null;
await a.logPhaseTransition(activityInput, 'vulnerability-exploitation', 'complete');
@@ -406,29 +393,17 @@ export async function pentestPipelineWorkflow(
await a.logPhaseTransition(activityInput, 'reporting', 'complete');
} else {
console.log('Skipping report (already complete)');
log.info('Skipping report (already complete)');
state.completedAgents.push('report');
}
// === Complete ===
state.status = 'completed';
state.currentPhase = null;
state.currentAgent = null;
state.summary = computeSummary(state);
// Log workflow completion summary
await a.logWorkflowComplete(activityInput, {
status: 'completed',
totalDurationMs: state.summary.totalDurationMs,
totalCostUsd: state.summary.totalCostUsd,
completedAgents: state.completedAgents,
agentMetrics: Object.fromEntries(
Object.entries(state.agentMetrics).map(([name, m]) => [
name,
{ durationMs: m.durationMs, costUsd: m.costUsd },
])
),
});
await a.logWorkflowComplete(activityInput, toWorkflowSummary(state, 'completed'));
return state;
} catch (error) {
@@ -438,19 +413,7 @@ export async function pentestPipelineWorkflow(
state.summary = computeSummary(state);
// Log workflow failure summary
await a.logWorkflowComplete(activityInput, {
status: 'failed',
totalDurationMs: state.summary.totalDurationMs,
totalCostUsd: state.summary.totalCostUsd,
completedAgents: state.completedAgents,
agentMetrics: Object.fromEntries(
Object.entries(state.agentMetrics).map(([name, m]) => [
name,
{ durationMs: m.durationMs, costUsd: m.costUsd },
])
),
error: state.error ?? undefined,
});
await a.logWorkflowComplete(activityInput, toWorkflowSummary(state, 'failed'));
throw error;
}
+22 -34
View File
@@ -20,7 +20,6 @@
import fs from 'fs/promises';
import path from 'path';
import chalk from 'chalk';
interface SessionJson {
session: {
@@ -59,16 +58,7 @@ function formatDuration(ms: number): string {
}
function getStatusDisplay(status: string): string {
switch (status) {
case 'completed':
return chalk.green(status);
case 'in-progress':
return chalk.yellow(status);
case 'failed':
return chalk.red(status);
default:
return status;
}
return status;
}
function truncate(str: string, maxLen: number): string {
@@ -83,8 +73,8 @@ async function listWorkspaces(): Promise<void> {
try {
entries = await fs.readdir(auditDir);
} catch {
console.log(chalk.yellow('No audit-logs directory found.'));
console.log(chalk.gray(`Expected: ${auditDir}`));
console.log('No audit-logs directory found.');
console.log(`Expected: ${auditDir}`);
return;
}
@@ -110,15 +100,15 @@ async function listWorkspaces(): Promise<void> {
}
if (workspaces.length === 0) {
console.log(chalk.yellow('\nNo workspaces found.'));
console.log(chalk.gray('Run a pipeline first: ./shannon start URL=<url> REPO=<repo>'));
console.log('\nNo workspaces found.');
console.log('Run a pipeline first: ./shannon start URL=<url> REPO=<repo>');
return;
}
// Sort by creation date (most recent first)
workspaces.sort((a, b) => b.createdAt.getTime() - a.createdAt.getTime());
console.log(chalk.cyan.bold('\n=== Shannon Workspaces ===\n'));
console.log('\n=== Shannon Workspaces ===\n');
// Column widths
const nameWidth = 30;
@@ -129,16 +119,14 @@ async function listWorkspaces(): Promise<void> {
// Header
console.log(
chalk.gray(
' ' +
'WORKSPACE'.padEnd(nameWidth) +
'URL'.padEnd(urlWidth) +
'STATUS'.padEnd(statusWidth) +
'DURATION'.padEnd(durationWidth) +
'COST'.padEnd(costWidth)
)
' ' +
'WORKSPACE'.padEnd(nameWidth) +
'URL'.padEnd(urlWidth) +
'STATUS'.padEnd(statusWidth) +
'DURATION'.padEnd(durationWidth) +
'COST'.padEnd(costWidth)
);
console.log(chalk.gray(' ' + '\u2500'.repeat(nameWidth + urlWidth + statusWidth + durationWidth + costWidth)));
console.log(' ' + '\u2500'.repeat(nameWidth + urlWidth + statusWidth + durationWidth + costWidth));
let resumableCount = 0;
@@ -154,15 +142,15 @@ async function listWorkspaces(): Promise<void> {
resumableCount++;
}
const resumeTag = isResumable ? chalk.cyan(' (resumable)') : '';
const resumeTag = isResumable ? ' (resumable)' : '';
console.log(
' ' +
chalk.white(truncate(ws.name, nameWidth - 2).padEnd(nameWidth)) +
chalk.gray(truncate(ws.url, urlWidth - 2).padEnd(urlWidth)) +
getStatusDisplay(ws.status).padEnd(statusWidth + 10) + // +10 for chalk escape codes
chalk.gray(duration.padEnd(durationWidth)) +
chalk.gray(cost.padEnd(costWidth)) +
truncate(ws.name, nameWidth - 2).padEnd(nameWidth) +
truncate(ws.url, urlWidth - 2).padEnd(urlWidth) +
getStatusDisplay(ws.status).padEnd(statusWidth) +
duration.padEnd(durationWidth) +
cost.padEnd(costWidth) +
resumeTag
);
}
@@ -170,16 +158,16 @@ async function listWorkspaces(): Promise<void> {
console.log();
const summary = `${workspaces.length} workspace${workspaces.length === 1 ? '' : 's'} found`;
const resumeSummary = resumableCount > 0 ? ` (${resumableCount} resumable)` : '';
console.log(chalk.gray(`${summary}${resumeSummary}`));
console.log(`${summary}${resumeSummary}`);
if (resumableCount > 0) {
console.log(chalk.gray('\nResume with: ./shannon start URL=<url> REPO=<repo> WORKSPACE=<name>'));
console.log('\nResume with: ./shannon start URL=<url> REPO=<repo> WORKSPACE=<name>');
}
console.log();
}
listWorkspaces().catch((err) => {
console.error(chalk.red('Error listing workspaces:'), err);
console.error('Error listing workspaces:', err);
process.exit(1);
});
-66
View File
@@ -1,66 +0,0 @@
// Copyright (C) 2025 Keygraph, Inc.
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License version 3
// as published by the Free Software Foundation.
import { $ } from 'zx';
import chalk from 'chalk';
type ToolName = 'nmap' | 'subfinder' | 'whatweb' | 'schemathesis';
export type ToolAvailability = Record<ToolName, boolean>;
// Check availability of required tools
export const checkToolAvailability = async (): Promise<ToolAvailability> => {
const tools: ToolName[] = ['nmap', 'subfinder', 'whatweb', 'schemathesis'];
const availability: ToolAvailability = {
nmap: false,
subfinder: false,
whatweb: false,
schemathesis: false
};
console.log(chalk.blue('🔧 Checking tool availability...'));
for (const tool of tools) {
try {
await $`command -v ${tool}`;
availability[tool] = true;
console.log(chalk.green(`${tool} - available`));
} catch {
availability[tool] = false;
console.log(chalk.yellow(` ⚠️ ${tool} - not found`));
}
}
return availability;
};
// Handle missing tools with user-friendly messages
export const handleMissingTools = (toolAvailability: ToolAvailability): ToolName[] => {
const missing = (Object.entries(toolAvailability) as Array<[ToolName, boolean]>)
.filter(([, available]) => !available)
.map(([tool]) => tool);
if (missing.length > 0) {
console.log(chalk.yellow(`\n⚠️ Missing tools: ${missing.join(', ')}`));
console.log(chalk.gray('Some functionality will be limited. Install missing tools for full capability.'));
// Provide installation hints
const installHints: Record<ToolName, string> = {
'nmap': 'brew install nmap (macOS) or apt install nmap (Ubuntu)',
'subfinder': 'go install -v github.com/projectdiscovery/subfinder/v2/cmd/subfinder@latest',
'whatweb': 'gem install whatweb',
'schemathesis': 'pip install schemathesis'
};
console.log(chalk.gray('\nInstallation hints:'));
missing.forEach(tool => {
console.log(chalk.gray(` ${tool}: ${installHints[tool]}`));
});
console.log('');
}
return missing;
};
+15
View File
@@ -0,0 +1,15 @@
// Copyright (C) 2025 Keygraph, Inc.
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License version 3
// as published by the Free Software Foundation.
/**
* Logger interface for services called from Temporal activities.
* Keeps services Temporal-agnostic while providing structured logging.
*/
export interface ActivityLogger {
info(message: string, attrs?: Record<string, unknown>): void;
warn(message: string, attrs?: Record<string, unknown>): void;
error(message: string, attrs?: Record<string, unknown>): void;
}
+13 -57
View File
@@ -34,21 +34,6 @@ export const ALL_AGENTS = [
*/
export type AgentName = typeof ALL_AGENTS[number];
export type PromptName =
| 'pre-recon-code'
| 'recon'
| 'vuln-injection'
| 'vuln-xss'
| 'vuln-auth'
| 'vuln-ssrf'
| 'vuln-authz'
| 'exploit-injection'
| 'exploit-xss'
| 'exploit-auth'
| 'exploit-ssrf'
| 'exploit-authz'
| 'report-executive';
export type PlaywrightAgent =
| 'playwright-agent1'
| 'playwright-agent2'
@@ -56,7 +41,9 @@ export type PlaywrightAgent =
| 'playwright-agent4'
| 'playwright-agent5';
export type AgentValidator = (sourceDir: string) => Promise<boolean>;
import type { ActivityLogger } from './activity-logger.js';
export type AgentValidator = (sourceDir: string, logger: ActivityLogger) => Promise<boolean>;
export type AgentStatus =
| 'pending'
@@ -69,52 +56,21 @@ export interface AgentDefinition {
name: AgentName;
displayName: string;
prerequisites: AgentName[];
promptTemplate: string;
deliverableFilename: string;
}
/**
* Maps an agent name to its corresponding prompt file name.
* Vulnerability types supported by the pipeline.
*/
export function getPromptNameForAgent(agentName: AgentName): PromptName {
const mappings: Record<AgentName, PromptName> = {
'pre-recon': 'pre-recon-code',
'recon': 'recon',
'injection-vuln': 'vuln-injection',
'xss-vuln': 'vuln-xss',
'auth-vuln': 'vuln-auth',
'ssrf-vuln': 'vuln-ssrf',
'authz-vuln': 'vuln-authz',
'injection-exploit': 'exploit-injection',
'xss-exploit': 'exploit-xss',
'auth-exploit': 'exploit-auth',
'ssrf-exploit': 'exploit-ssrf',
'authz-exploit': 'exploit-authz',
'report': 'report-executive',
};
return mappings[agentName];
}
export type VulnType = 'injection' | 'xss' | 'auth' | 'ssrf' | 'authz';
/**
* Maps an agent name to its deliverable file path.
* Must match mcp-server/src/types/deliverables.ts:DELIVERABLE_FILENAMES
* Decision returned by queue validation for exploitation phase.
*/
export function getDeliverablePath(agentName: AgentName, repoPath: string): string {
const deliverableMap: Record<AgentName, string> = {
'pre-recon': 'code_analysis_deliverable.md',
'recon': 'recon_deliverable.md',
'injection-vuln': 'injection_analysis_deliverable.md',
'xss-vuln': 'xss_analysis_deliverable.md',
'auth-vuln': 'auth_analysis_deliverable.md',
'ssrf-vuln': 'ssrf_analysis_deliverable.md',
'authz-vuln': 'authz_analysis_deliverable.md',
'injection-exploit': 'injection_exploitation_evidence.md',
'xss-exploit': 'xss_exploitation_evidence.md',
'auth-exploit': 'auth_exploitation_evidence.md',
'ssrf-exploit': 'ssrf_exploitation_evidence.md',
'authz-exploit': 'authz_exploitation_evidence.md',
'report': 'comprehensive_security_assessment_report.md',
};
const filename = deliverableMap[agentName];
return `${repoPath}/deliverables/${filename}`;
export interface ExploitationDecision {
shouldExploit: boolean;
shouldRetry: boolean;
vulnerabilityCount: number;
vulnType: VulnType;
}
+35
View File
@@ -0,0 +1,35 @@
// Copyright (C) 2025 Keygraph, Inc.
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License version 3
// as published by the Free Software Foundation.
/**
* Audit system type definitions
*/
/**
* Cross-cutting session metadata used by services, temporal, and audit.
*/
export interface SessionMetadata {
id: string;
webUrl: string;
repoPath?: string;
outputPath?: string;
[key: string]: unknown;
}
/**
* Result data passed to audit system when an agent execution ends.
* Used by both AuditSession and MetricsTracker.
*/
export interface AgentEndResult {
attemptNumber: number;
duration_ms: number;
cost_usd: number;
success: boolean;
model?: string | undefined;
error?: string | undefined;
checkpoint?: string | undefined;
isFinalAttempt?: boolean | undefined;
}
+1 -4
View File
@@ -29,10 +29,8 @@ export interface Rules {
export type LoginType = 'form' | 'sso' | 'api' | 'basic';
export type SuccessConditionType = 'url' | 'cookie' | 'element' | 'redirect';
export interface SuccessCondition {
type: SuccessConditionType;
type: 'url' | 'cookie' | 'element' | 'redirect';
value: string;
}
@@ -53,7 +51,6 @@ export interface Authentication {
export interface Config {
rules?: Rules;
authentication?: Authentication;
login?: unknown; // Deprecated
}
export interface DistributedConfig {
+33
View File
@@ -8,6 +8,39 @@
* Error type definitions
*/
/**
* Specific error codes for reliable classification.
*
* ErrorCode provides precision within the coarse 8-category PentestErrorType.
* Used by classifyErrorForTemporal for code-based classification (preferred)
* with string matching as fallback for external errors.
*/
export enum ErrorCode {
// Config errors (PentestErrorType: 'config')
CONFIG_NOT_FOUND = 'CONFIG_NOT_FOUND',
CONFIG_VALIDATION_FAILED = 'CONFIG_VALIDATION_FAILED',
CONFIG_PARSE_ERROR = 'CONFIG_PARSE_ERROR',
// Agent execution errors (PentestErrorType: 'validation')
AGENT_EXECUTION_FAILED = 'AGENT_EXECUTION_FAILED',
OUTPUT_VALIDATION_FAILED = 'OUTPUT_VALIDATION_FAILED',
// Billing errors (PentestErrorType: 'billing')
API_RATE_LIMITED = 'API_RATE_LIMITED',
SPENDING_CAP_REACHED = 'SPENDING_CAP_REACHED',
INSUFFICIENT_CREDITS = 'INSUFFICIENT_CREDITS',
// Git errors (PentestErrorType: 'filesystem')
GIT_CHECKPOINT_FAILED = 'GIT_CHECKPOINT_FAILED',
GIT_ROLLBACK_FAILED = 'GIT_ROLLBACK_FAILED',
// Prompt errors (PentestErrorType: 'prompt')
PROMPT_LOAD_FAILED = 'PROMPT_LOAD_FAILED',
// Validation errors (PentestErrorType: 'validation')
DELIVERABLE_NOT_FOUND = 'DELIVERABLE_NOT_FOUND',
}
export type PentestErrorType =
| 'config'
| 'network'
+4
View File
@@ -8,6 +8,10 @@
* Type definitions barrel export
*/
export * from './activity-logger.js';
export * from './errors.js';
export * from './config.js';
export * from './agents.js';
export * from './audit.js';
export * from './result.js';
export * from './metrics.js';
+19
View File
@@ -0,0 +1,19 @@
// Copyright (C) 2025 Keygraph, Inc.
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License version 3
// as published by the Free Software Foundation.
/**
* Agent metrics types used across services and activities.
* Centralized here to avoid temporal/shared.ts import boundary violations.
*/
export interface AgentMetrics {
durationMs: number;
inputTokens: number | null;
outputTokens: number | null;
costUsd: number | null;
numTurns: number | null;
model?: string | undefined;
}
+62
View File
@@ -0,0 +1,62 @@
// Copyright (C) 2025 Keygraph, Inc.
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License version 3
// as published by the Free Software Foundation.
/**
* Minimal Result type for explicit error handling.
*
* A discriminated union that makes error handling explicit without adding
* heavy machinery. Used in key modules (config loading, agent execution,
* queue validation) where callers need to make decisions based on error type.
*/
/**
* Success variant of Result
*/
export interface Ok<T> {
readonly ok: true;
readonly value: T;
}
/**
* Error variant of Result
*/
export interface Err<E> {
readonly ok: false;
readonly error: E;
}
/**
* Result type - either Ok with a value or Err with an error
*/
export type Result<T, E> = Ok<T> | Err<E>;
/**
* Create a success Result
*/
export function ok<T>(value: T): Ok<T> {
return { ok: true, value };
}
/**
* Create an error Result
*/
export function err<E>(error: E): Err<E> {
return { ok: false, error };
}
/**
* Type guard for Ok variant
*/
export function isOk<T, E>(result: Result<T, E>): result is Ok<T> {
return result.ok === true;
}
/**
* Type guard for Err variant
*/
export function isErr<T, E>(result: Result<T, E>): result is Err<E> {
return result.ok === false;
}
+95
View File
@@ -0,0 +1,95 @@
// Copyright (C) 2025 Keygraph, Inc.
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License version 3
// as published by the Free Software Foundation.
/**
* Consolidated billing/spending cap detection utilities.
*
* Anthropic's spending cap behavior is inconsistent:
* - Sometimes a proper SDK error (billing_error)
* - Sometimes Claude responds with text about the cap
* - Sometimes partial billing before cutoff
*
* This module provides defense-in-depth detection with shared pattern lists
* to prevent drift between detection points.
*/
/**
* Text patterns for SDK output sniffing (what Claude says).
* Used by message-handlers.ts and the behavioral heuristic.
*/
export const BILLING_TEXT_PATTERNS = [
'spending cap',
'spending limit',
'cap reached',
'budget exceeded',
'usage limit',
'resets',
] as const;
/**
* API patterns for error message classification (what the API returns).
* Used by classifyErrorForTemporal in error-handling.ts.
*/
export const BILLING_API_PATTERNS = [
'billing_error',
'credit balance is too low',
'insufficient credits',
'usage is blocked due to insufficient credits',
'please visit plans & billing',
'please visit plans and billing',
'usage limit reached',
'quota exceeded',
'daily rate limit',
'limit will reset',
'billing limit reached',
] as const;
/**
* Checks if text matches any billing text pattern.
* Used for sniffing SDK output content for spending cap messages.
*/
export function matchesBillingTextPattern(text: string): boolean {
const lowerText = text.toLowerCase();
return BILLING_TEXT_PATTERNS.some((pattern) => lowerText.includes(pattern));
}
/**
* Checks if an error message matches any billing API pattern.
* Used for classifying API error messages.
*/
export function matchesBillingApiPattern(message: string): boolean {
const lowerMessage = message.toLowerCase();
return BILLING_API_PATTERNS.some((pattern) => lowerMessage.includes(pattern));
}
/**
* Behavioral heuristic for detecting spending cap.
*
* When Claude hits a spending cap, it often returns a short message
* with $0 cost. Legitimate agent work NEVER costs $0 with only 1-2 turns.
*
* This combines three signals:
* 1. Very low turn count (<=2)
* 2. Zero cost ($0)
* 3. Text matches billing patterns
*
* @param turns - Number of turns the agent took
* @param cost - Total cost in USD
* @param resultText - The result text from the agent
* @returns true if this looks like a spending cap hit
*/
export function isSpendingCapBehavior(
turns: number,
cost: number,
resultText: string
): boolean {
// Only check if turns <= 2 AND cost is exactly 0
if (turns > 2 || cost !== 0) {
return false;
}
return matchesBillingTextPattern(resultText);
}
-84
View File
@@ -4,11 +4,6 @@
// it under the terms of the GNU Affero General Public License version 3
// as published by the Free Software Foundation.
import chalk from 'chalk';
import { formatDuration } from './formatting.js';
// Timing utilities
export class Timer {
name: string;
startTime: number;
@@ -29,82 +24,3 @@ export class Timer {
return end - this.startTime;
}
}
interface TimingResultsAgents {
[key: string]: number;
}
interface TimingResults {
total: Timer | null;
agents: TimingResultsAgents;
}
interface CostResultsAgents {
[key: string]: number;
}
interface CostResults {
agents: CostResultsAgents;
total: number;
}
// Global timing and cost tracker
export const timingResults: TimingResults = {
total: null,
agents: {},
};
export const costResults: CostResults = {
agents: {},
total: 0,
};
// Function to display comprehensive timing summary
export const displayTimingSummary = (): void => {
if (!timingResults.total) {
console.log(chalk.yellow('No timing data available'));
return;
}
const totalDuration = timingResults.total.stop();
console.log(chalk.cyan.bold('\n⏱️ TIMING SUMMARY'));
console.log(chalk.gray('─'.repeat(60)));
// Total execution time
console.log(chalk.cyan(`📊 Total Execution Time: ${formatDuration(totalDuration)}`));
console.log();
// Agent breakdown
if (Object.keys(timingResults.agents).length > 0) {
console.log(chalk.magenta.bold('🤖 Agent Breakdown:'));
let agentTotal = 0;
for (const [agent, duration] of Object.entries(timingResults.agents)) {
const percentage = ((duration / totalDuration) * 100).toFixed(1);
const displayName = agent.replace(/-/g, ' ');
console.log(
chalk.magenta(
` ${displayName.padEnd(20)} ${formatDuration(duration).padStart(8)} (${percentage}%)`
)
);
agentTotal += duration;
}
console.log(
chalk.gray(
` ${'Agents Total'.padEnd(20)} ${formatDuration(agentTotal).padStart(8)} (${((agentTotal / totalDuration) * 100).toFixed(1)}%)`
)
);
}
// Cost breakdown
if (Object.keys(costResults.agents).length > 0) {
console.log(chalk.green.bold('\n💰 Cost Breakdown:'));
for (const [agent, cost] of Object.entries(costResults.agents)) {
const displayName = agent.replace(/-/g, ' ');
console.log(chalk.green(` ${displayName.padEnd(20)} $${cost.toFixed(4).padStart(8)}`));
}
console.log(chalk.gray(` ${'Total Cost'.padEnd(20)} $${costResults.total.toFixed(4).padStart(8)}`));
}
console.log(chalk.gray('─'.repeat(60)));
};
-264
View File
@@ -1,264 +0,0 @@
// Copyright (C) 2025 Keygraph, Inc.
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License version 3
// as published by the Free Software Foundation.
import { AGENTS } from '../session-manager.js';
interface ToolCallInput {
url?: string;
element?: string;
key?: string;
fields?: unknown[];
text?: string;
action?: string;
description?: string;
todos?: Array<{
status: string;
content: string;
}>;
[key: string]: unknown;
}
interface ToolCall {
name: string;
input?: ToolCallInput;
}
/**
* Extract domain from URL for display
*/
function extractDomain(url: string): string {
try {
const urlObj = new URL(url);
return urlObj.hostname || url.slice(0, 30);
} catch {
return url.slice(0, 30);
}
}
/**
* Summarize TodoWrite updates into clean progress indicators
*/
function summarizeTodoUpdate(input: ToolCallInput | undefined): string | null {
if (!input?.todos || !Array.isArray(input.todos)) {
return null;
}
const todos = input.todos;
const completed = todos.filter((t) => t.status === 'completed');
const inProgress = todos.filter((t) => t.status === 'in_progress');
// Show recently completed tasks
if (completed.length > 0) {
const recent = completed[completed.length - 1]!;
return `${recent.content}`;
}
// Show current in-progress task
if (inProgress.length > 0) {
const current = inProgress[0]!;
return `🔄 ${current.content}`;
}
return null;
}
/**
* Get agent prefix for parallel execution
*/
export function getAgentPrefix(description: string): string {
// Map agent names to their prefixes
const agentPrefixes: Record<string, string> = {
'injection-vuln': '[Injection]',
'xss-vuln': '[XSS]',
'auth-vuln': '[Auth]',
'authz-vuln': '[Authz]',
'ssrf-vuln': '[SSRF]',
'injection-exploit': '[Injection]',
'xss-exploit': '[XSS]',
'auth-exploit': '[Auth]',
'authz-exploit': '[Authz]',
'ssrf-exploit': '[SSRF]',
};
// First try to match by agent name directly
for (const [agentName, prefix] of Object.entries(agentPrefixes)) {
const agent = AGENTS[agentName as keyof typeof AGENTS];
if (agent && description.includes(agent.displayName)) {
return prefix;
}
}
// Fallback to partial matches for backwards compatibility
if (description.includes('injection')) return '[Injection]';
if (description.includes('xss')) return '[XSS]';
if (description.includes('authz')) return '[Authz]'; // Check authz before auth
if (description.includes('auth')) return '[Auth]';
if (description.includes('ssrf')) return '[SSRF]';
return '[Agent]';
}
/**
* Format browser tool calls into clean progress indicators
*/
function formatBrowserAction(toolCall: ToolCall): string {
const toolName = toolCall.name;
const input = toolCall.input || {};
// Core Browser Operations
if (toolName === 'mcp__playwright__browser_navigate') {
const url = input.url || '';
const domain = extractDomain(url);
return `🌐 Navigating to ${domain}`;
}
if (toolName === 'mcp__playwright__browser_navigate_back') {
return `⬅️ Going back`;
}
// Page Interaction
if (toolName === 'mcp__playwright__browser_click') {
const element = input.element || 'element';
return `🖱️ Clicking ${element.slice(0, 25)}`;
}
if (toolName === 'mcp__playwright__browser_hover') {
const element = input.element || 'element';
return `👆 Hovering over ${element.slice(0, 20)}`;
}
if (toolName === 'mcp__playwright__browser_type') {
const element = input.element || 'field';
return `⌨️ Typing in ${element.slice(0, 20)}`;
}
if (toolName === 'mcp__playwright__browser_press_key') {
const key = input.key || 'key';
return `⌨️ Pressing ${key}`;
}
// Form Handling
if (toolName === 'mcp__playwright__browser_fill_form') {
const fieldCount = input.fields?.length || 0;
return `📝 Filling ${fieldCount} form fields`;
}
if (toolName === 'mcp__playwright__browser_select_option') {
return `📋 Selecting dropdown option`;
}
if (toolName === 'mcp__playwright__browser_file_upload') {
return `📁 Uploading file`;
}
// Page Analysis
if (toolName === 'mcp__playwright__browser_snapshot') {
return `📸 Taking page snapshot`;
}
if (toolName === 'mcp__playwright__browser_take_screenshot') {
return `📸 Taking screenshot`;
}
if (toolName === 'mcp__playwright__browser_evaluate') {
return `🔍 Running JavaScript analysis`;
}
// Waiting & Monitoring
if (toolName === 'mcp__playwright__browser_wait_for') {
if (input.text) {
return `⏳ Waiting for "${input.text.slice(0, 20)}"`;
}
return `⏳ Waiting for page response`;
}
if (toolName === 'mcp__playwright__browser_console_messages') {
return `📜 Checking console logs`;
}
if (toolName === 'mcp__playwright__browser_network_requests') {
return `🌐 Analyzing network traffic`;
}
// Tab Management
if (toolName === 'mcp__playwright__browser_tabs') {
const action = input.action || 'managing';
return `🗂️ ${action} browser tab`;
}
// Dialog Handling
if (toolName === 'mcp__playwright__browser_handle_dialog') {
return `💬 Handling browser dialog`;
}
// Fallback for any missed tools
const actionType = toolName.split('_').pop();
return `🌐 Browser: ${actionType}`;
}
/**
* Filter out JSON tool calls from content, with special handling for Task calls
*/
export function filterJsonToolCalls(content: string | null | undefined): string {
if (!content || typeof content !== 'string') {
return content || '';
}
const lines = content.split('\n');
const processedLines: string[] = [];
for (const line of lines) {
const trimmed = line.trim();
// Skip empty lines
if (trimmed === '') {
continue;
}
// Check if this is a JSON tool call
if (trimmed.startsWith('{"type":"tool_use"')) {
try {
const toolCall = JSON.parse(trimmed) as ToolCall;
// Special handling for Task tool calls
if (toolCall.name === 'Task') {
const description = toolCall.input?.description || 'analysis agent';
processedLines.push(`🚀 Launching ${description}`);
continue;
}
// Special handling for TodoWrite tool calls
if (toolCall.name === 'TodoWrite') {
const summary = summarizeTodoUpdate(toolCall.input);
if (summary) {
processedLines.push(summary);
}
continue;
}
// Special handling for browser tool calls
if (toolCall.name.startsWith('mcp__playwright__browser_')) {
const browserAction = formatBrowserAction(toolCall);
if (browserAction) {
processedLines.push(browserAction);
}
continue;
}
// Hide all other tool calls (Read, Write, Grep, etc.)
continue;
} catch {
// If JSON parsing fails, treat as regular text
processedLines.push(line);
}
} else {
// Keep non-JSON lines (assistant text)
processedLines.push(line);
}
}
return processedLines.join('\n');
}
+5 -5
View File
@@ -33,11 +33,11 @@
"exactOptionalPropertyTypes": true,
// Style Options
// "noImplicitReturns": true,
// "noImplicitOverride": true,
// "noUnusedLocals": true,
// "noUnusedParameters": true,
// "noFallthroughCasesInSwitch": true,
"noImplicitReturns": true,
"noImplicitOverride": true,
"noUnusedLocals": true,
"noUnusedParameters": true,
"noFallthroughCasesInSwitch": true,
// "noPropertyAccessFromIndexSignature": true,
// Recommended Options