Merge pull request #56 from KeygraphHQ/feat/model-router

feat: add multi-model router support for OpenAI and OpenRouter
This commit is contained in:
Arjun Malleswaran
2026-01-21 17:42:52 -08:00
committed by GitHub
17 changed files with 411 additions and 37 deletions

View File

@@ -1,8 +1,33 @@
# Shannon Environment Configuration
# Copy this file to .env and fill in your credentials
# Anthropic API Key (required - choose one)
# Recommended output token configuration for larger tool outputs
CLAUDE_CODE_MAX_OUTPUT_TOKENS=64000
# =============================================================================
# OPTION 1: Direct Anthropic (default, no router)
# =============================================================================
ANTHROPIC_API_KEY=your-api-key-here
# OR use OAuth token instead
# CLAUDE_CODE_OAUTH_TOKEN=your-oauth-token-here
# =============================================================================
# OPTION 2: Router Mode (use alternative providers)
# =============================================================================
# Enable router mode by running: ./shannon start ... ROUTER=true
# Then configure ONE of the providers below:
# --- OpenAI ---
# OPENAI_API_KEY=sk-your-openai-key
# ROUTER_DEFAULT=openai,gpt-5.2
# --- OpenRouter (access Gemini 3 models via single API) ---
# OPENROUTER_API_KEY=sk-or-your-openrouter-key
# ROUTER_DEFAULT=openrouter,google/gemini-3-flash-preview
# =============================================================================
# Available Models
# =============================================================================
# OpenAI: gpt-5.2, gpt-5-mini
# OpenRouter: google/gemini-3-flash-preview

View File

@@ -50,6 +50,7 @@ CONFIG=<file> YAML configuration file for authentication and testing pa
OUTPUT=<path> Custom output directory for session folder (default: ./audit-logs/)
PIPELINE_TESTING=true Use minimal prompts and fast retry intervals (10s instead of 5min)
REBUILD=true Force Docker rebuild with --no-cache (use when code changes aren't picked up)
ROUTER=true Route requests through claude-code-router for multi-model support
```
### Generate TOTP for Authentication
@@ -261,11 +262,41 @@ The tool should only be used on systems you own or have explicit permission to t
- `shannon` - CLI script for running pentests
- `docker-compose.yml` - Temporal server + worker containers
- `configs/` - YAML configs with `config-schema.json` for validation
- `configs/router-config.json` - Router service configuration for multi-model support
- `prompts/` - AI prompt templates (`vuln-*.txt`, `exploit-*.txt`, etc.)
**Output:**
- `audit-logs/{hostname}_{sessionId}/` - Session metrics, agent logs, deliverables
### Router Mode (Multi-Model Support)
Shannon supports routing Claude Agent SDK requests through alternative LLM providers via [claude-code-router](https://github.com/musistudio/claude-code-router).
**Enable router mode:**
```bash
./shannon start URL=<url> REPO=<path> ROUTER=true
```
**Supported Providers:**
| Provider | Models | Use Case |
|----------|--------|----------|
| OpenAI | `gpt-5.2`, `gpt-5-mini` | Good tool use, balanced cost/performance |
| OpenRouter | `google/gemini-3-flash-preview` | Access to Gemini 3 models via single API |
**Configuration (in .env):**
```bash
# OpenAI
OPENAI_API_KEY=sk-your-key
ROUTER_DEFAULT=openai,gpt-5.2
# OpenRouter
OPENROUTER_API_KEY=sk-or-your-key
ROUTER_DEFAULT=openrouter,google/gemini-3-flash-preview
```
**Note:** Shannon is optimized for Anthropic's Claude models. Alternative providers are useful for cost savings during development but may produce varying results.
## Troubleshooting
### Common Issues

View File

@@ -84,6 +84,7 @@ Shannon is available in two editions:
- [Stopping Shannon](#stopping-shannon)
- [Usage Examples](#usage-examples)
- [Configuration (Optional)](#configuration-optional)
- [Router Mode (Alternative Providers)](#router-mode-alternative-providers)
- [Output and Results](#output-and-results)
- [Sample Reports & Benchmarks](#-sample-reports--benchmarks)
- [Architecture](#-architecture)
@@ -100,7 +101,9 @@ Shannon is available in two editions:
### Prerequisites
- **Docker** - Container runtime ([Install Docker](https://docs.docker.com/get-docker/))
- **Anthropic API key or Claude Code OAuth token** - Get from [Anthropic Console](https://console.anthropic.com)
- **AI Provider Credentials** (choose one):
- **Anthropic API key or Claude Code OAuth token** (recommended) - Get from [Anthropic Console](https://console.anthropic.com)
- **Alternative providers via Router Mode** - OpenAI or Google Gemini via OpenRouter (see [Router Mode](#router-mode-alternative-providers))
### Quick Start
@@ -252,6 +255,42 @@ rules:
If your application uses two-factor authentication, simply add the TOTP secret to your config file. The AI will automatically generate the required codes during testing.
### Router Mode (Alternative Providers)
Shannon can route requests through alternative AI providers using [claude-code-router](https://github.com/musistudio/claude-code-router). This is useful for:
- **Model experimentation** - Test with GPT-5.2 or Gemini 3 models
- **API availability** - Use OpenRouter if Anthropic API is unavailable in your region
#### Quick Setup
1. Add your provider API key to `.env`:
```bash
# Choose one provider:
OPENAI_API_KEY=sk-...
# OR
OPENROUTER_API_KEY=sk-or-...
# Set default model:
ROUTER_DEFAULT=openai,gpt-5.2 # provider,model format
```
2. Run with `ROUTER=true`:
```bash
./shannon start URL=https://example.com REPO=/path/to/repo ROUTER=true
```
#### Supported Providers
| Provider | Models |
|----------|--------|
| **Anthropic** | Claude Sonnet 4 |
| **OpenAI** | gpt-5.2, gpt-5-mini |
| **OpenRouter** | google/gemini-3-flash-preview |
#### Disclaimer
> **Output quality depends on model choice.** Shannon is optimized for and tested with Anthropic's Claude models. Alternative providers may produce varying results depending on the model's reasoning capabilities.
### Output and Results
All results are saved to `./audit-logs/{hostname}_{sessionId}/` by default. Use `--output <path>` to specify a custom directory.
@@ -430,7 +469,7 @@ Shannon is designed for legitimate security auditing purposes only.
#### **5. Cost & Performance**
- **Time**: As of the current version, a full test run typically takes **1 to 1.5 hours** to complete.
- **Cost**: Running the full test using Anthropic's Claude 4.5 Sonnet model may incur costs of approximately **$50 USD**. Please note that costs are subject to change based on model pricing and the complexity of the target application.
- **Cost**: Running the full test using Anthropic's Claude 4.5 Sonnet model may incur costs of approximately **$50 USD**. Costs vary based on model pricing and application complexity.
#### **6. Windows Antivirus False Positives**

View File

@@ -0,0 +1,33 @@
{
"HOST": "0.0.0.0",
"APIKEY": "shannon-router-key",
"LOG": true,
"LOG_LEVEL": "info",
"NON_INTERACTIVE_MODE": true,
"API_TIMEOUT_MS": 600000,
"Providers": [
{
"name": "openai",
"api_base_url": "https://api.openai.com/v1/chat/completions",
"api_key": "$OPENAI_API_KEY",
"models": ["gpt-5.2", "gpt-5-mini"],
"transformer": {
"use": [["maxcompletiontokens", { "max_completion_tokens": 16384 }]]
}
},
{
"name": "openrouter",
"api_base_url": "https://openrouter.ai/api/v1/chat/completions",
"api_key": "$OPENROUTER_API_KEY",
"models": [
"google/gemini-3-flash-preview"
],
"transformer": {
"use": ["openrouter"]
}
}
],
"Router": {
"default": "$ROUTER_DEFAULT"
}
}

View File

@@ -20,6 +20,9 @@ services:
environment:
- TEMPORAL_ADDRESS=temporal:7233
- ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY:-}
- ANTHROPIC_BASE_URL=${ANTHROPIC_BASE_URL:-} # Optional: route through claude-code-router
- ANTHROPIC_AUTH_TOKEN=${ANTHROPIC_AUTH_TOKEN:-} # Auth token for router
- ROUTER_DEFAULT=${ROUTER_DEFAULT:-} # Model name when using router (e.g., "gemini,gemini-2.5-pro")
- CLAUDE_CODE_OAUTH_TOKEN=${CLAUDE_CODE_OAUTH_TOKEN:-}
- CLAUDE_CODE_MAX_OUTPUT_TOKENS=${CLAUDE_CODE_MAX_OUTPUT_TOKENS:-64000}
depends_on:
@@ -36,5 +39,33 @@ services:
security_opt:
- seccomp:unconfined
# Optional: claude-code-router for multi-model support
# Start with: ROUTER=true ./shannon start ...
router:
image: node:20-slim
profiles: ["router"] # Only starts when explicitly requested
command: >
sh -c "apt-get update && apt-get install -y gettext-base &&
npm install -g @musistudio/claude-code-router &&
mkdir -p /root/.claude-code-router &&
envsubst < /config/router-config.json > /root/.claude-code-router/config.json &&
ccr start"
ports:
- "3456:3456"
volumes:
- ./configs/router-config.json:/config/router-config.json:ro
environment:
- HOST=0.0.0.0
- ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY:-}
- OPENAI_API_KEY=${OPENAI_API_KEY:-}
- OPENROUTER_API_KEY=${OPENROUTER_API_KEY:-}
- ROUTER_DEFAULT=${ROUTER_DEFAULT:-openai,gpt-4o}
healthcheck:
test: ["CMD", "node", "-e", "require('http').get('http://localhost:3456/health', r => process.exit(r.statusCode === 200 ? 0 : 1)).on('error', () => process.exit(1))"]
interval: 10s
timeout: 5s
retries: 5
start_period: 30s
volumes:
temporal-data:

45
shannon
View File

@@ -35,6 +35,7 @@ Options for 'start':
CONFIG=<path> Configuration file (YAML)
OUTPUT=<path> Output directory for reports (default: ./audit-logs/)
PIPELINE_TESTING=true Use minimal prompts for fast testing
ROUTER=true Route requests through claude-code-router (multi-model support)
Options for 'stop':
CLEAN=true Remove all data including volumes
@@ -63,6 +64,7 @@ parse_args() {
CLEAN=*) CLEAN="${arg#CLEAN=}" ;;
PIPELINE_TESTING=*) PIPELINE_TESTING="${arg#PIPELINE_TESTING=}" ;;
REBUILD=*) REBUILD="${arg#REBUILD=}" ;;
ROUTER=*) ROUTER="${arg#ROUTER=}" ;;
esac
done
}
@@ -121,10 +123,16 @@ cmd_start() {
exit 1
fi
# Check for API key
# Check for API key (router mode can use alternative provider API keys)
if [ -z "$ANTHROPIC_API_KEY" ] && [ -z "$CLAUDE_CODE_OAUTH_TOKEN" ]; then
echo "ERROR: Set ANTHROPIC_API_KEY or CLAUDE_CODE_OAUTH_TOKEN in .env"
exit 1
if [ "$ROUTER" = "true" ] && { [ -n "$OPENAI_API_KEY" ] || [ -n "$OPENROUTER_API_KEY" ]; }; then
# Router mode with alternative provider - set a placeholder for SDK init
export ANTHROPIC_API_KEY="router-mode"
else
echo "ERROR: Set ANTHROPIC_API_KEY or CLAUDE_CODE_OAUTH_TOKEN in .env"
echo " (or use ROUTER=true with OPENAI_API_KEY or OPENROUTER_API_KEY)"
exit 1
fi
fi
# Determine container path for REPO
@@ -150,6 +158,33 @@ cmd_start() {
export OUTPUT_DIR="$OUTPUT"
fi
# Handle ROUTER flag - start claude-code-router for multi-model support
if [ "$ROUTER" = "true" ]; then
# Check if router is already running
if docker compose -f "$COMPOSE_FILE" --profile router ps router 2>/dev/null | grep -q "running"; then
echo "Router already running, skipping startup..."
else
echo "Starting claude-code-router..."
# Check for provider API keys
if [ -z "$OPENAI_API_KEY" ] && [ -z "$OPENROUTER_API_KEY" ]; then
echo "WARNING: No provider API key set (OPENAI_API_KEY or OPENROUTER_API_KEY). Router may not work."
fi
# Start router with profile
docker compose -f "$COMPOSE_FILE" --profile router up -d router
# Give router a few seconds to start (health check disabled for now - TODO: debug later)
echo "Waiting for router to start..."
sleep 5
fi
# Set ANTHROPIC_BASE_URL to route through router
export ANTHROPIC_BASE_URL="http://router:3456"
# Set auth token to match router's APIKEY
export ANTHROPIC_AUTH_TOKEN="shannon-router-key"
fi
# Ensure audit-logs directory exists with write permissions for container user (UID 1001)
mkdir -p ./audit-logs
chmod 777 ./audit-logs
@@ -231,9 +266,9 @@ cmd_stop() {
parse_args "$@"
if [ "$CLEAN" = "true" ]; then
docker compose -f "$COMPOSE_FILE" down -v
docker compose -f "$COMPOSE_FILE" --profile router down -v
else
docker compose -f "$COMPOSE_FILE" down
docker compose -f "$COMPOSE_FILE" --profile router down
fi
}

View File

@@ -25,23 +25,25 @@ import { dispatchMessage } from './message-handlers.js';
import { detectExecutionContext, formatErrorOutput, formatCompletionMessage } from './output-formatters.js';
import { createProgressManager } from './progress-manager.js';
import { createAuditLogger } from './audit-logger.js';
import { getActualModelName } from './router-utils.js';
declare global {
var SHANNON_DISABLE_LOADER: boolean | undefined;
}
export interface ClaudePromptResult {
result?: string | null;
result?: string | null | undefined;
success: boolean;
duration: number;
turns?: number;
turns?: number | undefined;
cost: number;
partialCost?: number;
apiErrorDetected?: boolean;
error?: string;
errorType?: string;
prompt?: string;
retryable?: boolean;
model?: string | undefined;
partialCost?: number | undefined;
apiErrorDetected?: boolean | undefined;
error?: string | undefined;
errorType?: string | undefined;
prompt?: string | undefined;
retryable?: boolean | undefined;
}
interface StdioMcpServer {
@@ -247,6 +249,7 @@ export async function runClaudePrompt(
result = messageLoopResult.result;
apiErrorDetected = messageLoopResult.apiErrorDetected;
totalCost = messageLoopResult.cost;
const model = messageLoopResult.model;
// === SPENDING CAP SAFEGUARD ===
// Defense-in-depth: Detect spending cap that slipped through detectApiError().
@@ -283,6 +286,7 @@ export async function runClaudePrompt(
duration,
turns: turnCount,
cost: totalCost,
model,
partialCost: totalCost,
apiErrorDetected
};
@@ -316,6 +320,7 @@ interface MessageLoopResult {
result: string | null;
apiErrorDetected: boolean;
cost: number;
model?: string | undefined;
}
interface MessageLoopDeps {
@@ -339,6 +344,7 @@ async function processMessageStream(
let result: string | null = null;
let apiErrorDetected = false;
let cost = 0;
let model: string | undefined;
let lastHeartbeat = Date.now();
for await (const message of query({ prompt: fullPrompt, options })) {
@@ -370,12 +376,18 @@ async function processMessageStream(
break;
}
if (dispatchResult.type === 'continue' && dispatchResult.apiErrorDetected) {
apiErrorDetected = true;
if (dispatchResult.type === 'continue') {
if (dispatchResult.apiErrorDetected) {
apiErrorDetected = true;
}
// Capture model from SystemInitMessage, but override with router model if applicable
if (dispatchResult.model) {
model = getActualModelName(dispatchResult.model);
}
}
}
return { turnCount, result, apiErrorDetected, cost };
return { turnCount, result, apiErrorDetected, cost, model };
}
// Main entry point for agent execution. Handles retries, git checkpoints, and validation.

View File

@@ -10,6 +10,7 @@ import { PentestError } from '../error-handling.js';
import { filterJsonToolCalls } from '../utils/output-formatter.js';
import { formatTimestamp } from '../utils/formatting.js';
import chalk from 'chalk';
import { getActualModelName } from './router-utils.js';
import {
formatAssistantOutput,
formatResultOutput,
@@ -178,7 +179,7 @@ function outputLines(lines: string[]): void {
// Message dispatch result types
export type MessageDispatchAction =
| { type: 'continue'; apiErrorDetected?: boolean }
| { type: 'continue'; apiErrorDetected?: boolean | undefined; model?: string | undefined }
| { type: 'complete'; result: string | null; cost: number }
| { type: 'throw'; error: Error };
@@ -229,13 +230,18 @@ export async function dispatchMessage(
}
case 'system': {
if (message.subtype === 'init' && !execContext.useCleanOutput) {
if (message.subtype === 'init') {
const initMsg = message as SystemInitMessage;
console.log(chalk.blue(` Model: ${initMsg.model}, Permission: ${initMsg.permissionMode}`));
if (initMsg.mcp_servers && initMsg.mcp_servers.length > 0) {
const mcpStatus = initMsg.mcp_servers.map(s => `${s.name}(${s.status})`).join(', ');
console.log(chalk.blue(` MCP: ${mcpStatus}`));
const actualModel = getActualModelName(initMsg.model);
if (!execContext.useCleanOutput) {
console.log(chalk.blue(` Model: ${actualModel}, Permission: ${initMsg.permissionMode}`));
if (initMsg.mcp_servers && initMsg.mcp_servers.length > 0) {
const mcpStatus = initMsg.mcp_servers.map(s => `${s.name}(${s.status})`).join(', ');
console.log(chalk.blue(` MCP: ${mcpStatus}`));
}
}
// Return actual model for tracking in audit logs
return { type: 'continue', model: actualModel };
}
return { type: 'continue' };
}

34
src/ai/router-utils.ts Normal file
View File

@@ -0,0 +1,34 @@
// Copyright (C) 2025 Keygraph, Inc.
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License version 3
// as published by the Free Software Foundation.
/**
* Get the actual model name being used.
* When using claude-code-router, the SDK reports its configured model (claude-sonnet)
* but the actual model is determined by ROUTER_DEFAULT env var.
*/
export function getActualModelName(sdkReportedModel?: string): string | undefined {
const routerBaseUrl = process.env.ANTHROPIC_BASE_URL;
const routerDefault = process.env.ROUTER_DEFAULT;
// If router mode is active and ROUTER_DEFAULT is set, use that
if (routerBaseUrl && routerDefault) {
// ROUTER_DEFAULT format: "provider,model" (e.g., "gemini,gemini-2.5-pro")
const parts = routerDefault.split(',');
if (parts.length >= 2) {
return parts.slice(1).join(','); // Handle model names with commas
}
}
// Fall back to SDK-reported model
return sdkReportedModel;
}
/**
* Check if router mode is active.
*/
export function isRouterMode(): boolean {
return !!process.env.ANTHROPIC_BASE_URL && !!process.env.ROUTER_DEFAULT;
}

View File

@@ -26,9 +26,10 @@ interface AgentEndResult {
duration_ms: number;
cost_usd: number;
success: boolean;
error?: string;
checkpoint?: string;
isFinalAttempt?: boolean;
model?: string | undefined;
error?: string | undefined;
checkpoint?: string | undefined;
isFinalAttempt?: boolean | undefined;
}
/**

View File

@@ -26,7 +26,8 @@ interface AttemptData {
cost_usd: number;
success: boolean;
timestamp: string;
error?: string;
model?: string | undefined;
error?: string | undefined;
}
interface AgentMetrics {
@@ -34,7 +35,8 @@ interface AgentMetrics {
attempts: AttemptData[];
final_duration_ms: number;
total_cost_usd: number;
checkpoint?: string;
model?: string | undefined;
checkpoint?: string | undefined;
}
interface PhaseMetrics {
@@ -66,9 +68,10 @@ interface AgentEndResult {
duration_ms: number;
cost_usd: number;
success: boolean;
error?: string;
checkpoint?: string;
isFinalAttempt?: boolean;
model?: string | undefined;
error?: string | undefined;
checkpoint?: string | undefined;
isFinalAttempt?: boolean | undefined;
}
interface ActiveTimer {
@@ -169,6 +172,10 @@ export class MetricsTracker {
timestamp: formatTimestamp(),
};
if (result.model) {
attempt.model = result.model;
}
if (result.error) {
attempt.error = result.error;
}
@@ -183,6 +190,10 @@ export class MetricsTracker {
agent.status = 'success';
agent.final_duration_ms = result.duration_ms;
if (result.model) {
agent.model = result.model;
}
if (result.checkpoint) {
agent.checkpoint = result.checkpoint;
}

View File

@@ -18,9 +18,9 @@ import type { DistributedConfig } from '../types/config.js';
interface AgentResult {
success: boolean;
duration: number;
cost?: number;
error?: string;
retryable?: boolean;
cost?: number | undefined;
error?: string | undefined;
retryable?: boolean | undefined;
}
type ToolName = 'nmap' | 'subfinder' | 'whatweb' | 'schemathesis';

View File

@@ -68,3 +68,87 @@ export async function assembleFinalReport(sourceDir: string): Promise<string> {
return finalContent;
}
/**
* Inject model information into the final security report.
* Reads session.json to get the model(s) used, then injects a "Model:" line
* into the Executive Summary section of the report.
*/
export async function injectModelIntoReport(
repoPath: string,
outputPath: string
): Promise<void> {
// 1. Read session.json to get model information
const sessionJsonPath = path.join(outputPath, 'session.json');
if (!(await fs.pathExists(sessionJsonPath))) {
console.log(chalk.yellow('⚠️ session.json not found, skipping model injection'));
return;
}
interface SessionData {
metrics: {
agents: Record<string, { model?: string }>;
};
}
const sessionData: SessionData = await fs.readJson(sessionJsonPath);
// 2. Extract unique models from all agents
const models = new Set<string>();
for (const agent of Object.values(sessionData.metrics.agents)) {
if (agent.model) {
models.add(agent.model);
}
}
if (models.size === 0) {
console.log(chalk.yellow('⚠️ No model information found in session.json'));
return;
}
const modelStr = Array.from(models).join(', ');
console.log(chalk.blue(`📝 Injecting model info into report: ${modelStr}`));
// 3. Read the final report
const reportPath = path.join(repoPath, 'deliverables', 'comprehensive_security_assessment_report.md');
if (!(await fs.pathExists(reportPath))) {
console.log(chalk.yellow('⚠️ Final report not found, skipping model injection'));
return;
}
let reportContent = await fs.readFile(reportPath, 'utf8');
// 4. Find and inject model line after "Assessment Date" in Executive Summary
// Pattern: "- Assessment Date: <date>" followed by a newline
const assessmentDatePattern = /^(- Assessment Date: .+)$/m;
const match = reportContent.match(assessmentDatePattern);
if (match) {
// Inject model line after Assessment Date
const modelLine = `- Model: ${modelStr}`;
reportContent = reportContent.replace(
assessmentDatePattern,
`$1\n${modelLine}`
);
console.log(chalk.green('✅ Model info injected into Executive Summary'));
} else {
// If no Assessment Date line found, try to add after Executive Summary header
const execSummaryPattern = /^## Executive Summary$/m;
if (reportContent.match(execSummaryPattern)) {
// Add model as first item in Executive Summary
reportContent = reportContent.replace(
execSummaryPattern,
`## Executive Summary\n- Model: ${modelStr}`
);
console.log(chalk.green('✅ Model info added to Executive Summary header'));
} else {
console.log(chalk.yellow('⚠️ Could not find Executive Summary section'));
return;
}
}
// 5. Write modified report back
await fs.writeFile(reportPath, reportContent);
}

View File

@@ -67,7 +67,7 @@ import {
rollbackGitWorkspace,
getGitCommitHash,
} from '../utils/git-manager.js';
import { assembleFinalReport } from '../phases/reporting.js';
import { assembleFinalReport, injectModelIntoReport } from '../phases/reporting.js';
import { getPromptNameForAgent } from '../types/agents.js';
import { AuditSession } from '../audit/index.js';
import type { WorkflowSummary } from '../audit/workflow-logger.js';
@@ -192,6 +192,7 @@ async function runAgentActivity(
duration_ms: result.duration,
cost_usd: 0,
success: false,
model: result.model,
error: `Spending cap likely reached: ${resultText.slice(0, 100)}`,
});
// Throw as billing error so Temporal retries with long backoff
@@ -207,6 +208,7 @@ async function runAgentActivity(
duration_ms: result.duration,
cost_usd: result.cost || 0,
success: false,
model: result.model,
error: result.error || 'Execution failed',
});
throw new Error(result.error || 'Agent execution failed');
@@ -221,6 +223,7 @@ async function runAgentActivity(
duration_ms: result.duration,
cost_usd: result.cost || 0,
success: false,
model: result.model,
error: 'Output validation failed',
});
@@ -243,6 +246,7 @@ async function runAgentActivity(
duration_ms: result.duration,
cost_usd: result.cost || 0,
success: true,
model: result.model,
...(commitHash && { checkpoint: commitHash }),
});
await commitGitSuccess(repoPath, agentName);
@@ -254,6 +258,7 @@ async function runAgentActivity(
outputTokens: null,
costUsd: result.cost ?? null,
numTurns: result.turns ?? null,
model: result.model,
};
} catch (error) {
// Rollback git workspace before Temporal retry to ensure clean state
@@ -369,6 +374,25 @@ export async function assembleReportActivity(input: ActivityInput): Promise<void
}
}
/**
* Inject model metadata into the final report.
* This must be called AFTER runReportAgent to add the model information to the Executive Summary.
*/
export async function injectReportMetadataActivity(input: ActivityInput): Promise<void> {
const { repoPath, outputPath } = input;
if (!outputPath) {
console.log(chalk.yellow('⚠️ No output path provided, skipping model injection'));
return;
}
try {
await injectModelIntoReport(repoPath, outputPath);
} catch (error) {
const err = error as Error;
console.log(chalk.yellow(`⚠️ Error injecting model into report: ${err.message}`));
// Don't throw - this is a non-critical enhancement
}
}
/**
* Check if exploitation should run for a given vulnerability type.
* Reads the vulnerability queue file and returns the decision.

View File

@@ -35,6 +35,7 @@ interface AgentMetrics {
outputTokens: number | null;
costUsd: number | null;
numTurns: number | null;
model?: string | undefined;
}
interface PipelineProgress {
@@ -123,8 +124,10 @@ async function queryWorkflow(): Promise<void> {
const metrics = progress.agentMetrics[agent];
const duration = metrics ? formatDuration(metrics.durationMs) : 'unknown';
const cost = metrics?.costUsd ? `$${metrics.costUsd.toFixed(4)}` : '';
const model = metrics?.model ? ` [${metrics.model}]` : '';
console.log(
chalk.green(` - ${agent}`) +
chalk.blue(model) +
chalk.gray(` (${duration}${cost ? ', ' + cost : ''})`)
);
}

View File

@@ -17,6 +17,7 @@ export interface AgentMetrics {
outputTokens: number | null;
costUsd: number | null;
numTurns: number | null;
model?: string | undefined;
}
export interface PipelineSummary {

View File

@@ -276,6 +276,10 @@ export async function pentestPipelineWorkflow(
// Then run the report agent to add executive summary and clean up
state.agentMetrics['report'] = await a.runReportAgent(activityInput);
state.completedAgents.push('report');
// Inject model metadata into the final report
await a.injectReportMetadataActivity(activityInput);
await a.logPhaseTransition(activityInput, 'reporting', 'complete');
// === Complete ===