mirror of
https://github.com/KeygraphHQ/shannon.git
synced 2026-02-12 17:22:50 +00:00
feat: add model tracking and reporting across pipeline
- Track actual model name from router through audit logs, session.json, and query output - Add router-utils.ts to resolve model names from ROUTER_DEFAULT env var - Inject model info into final report's Executive Summary section - Update documentation with supported providers, pricing, and config examples - Update router-config.json with latest model versions (GPT-5.2, Gemini 2.5, etc.)
This commit is contained in:
14
.env.example
14
.env.example
@@ -20,7 +20,7 @@ ANTHROPIC_API_KEY=your-api-key-here
|
|||||||
|
|
||||||
# --- OpenAI ---
|
# --- OpenAI ---
|
||||||
# OPENAI_API_KEY=sk-your-openai-key
|
# OPENAI_API_KEY=sk-your-openai-key
|
||||||
# ROUTER_DEFAULT=openai,gpt-4o
|
# ROUTER_DEFAULT=openai,gpt-5.2
|
||||||
|
|
||||||
# --- Google Gemini ---
|
# --- Google Gemini ---
|
||||||
# GEMINI_API_KEY=your-gemini-key
|
# GEMINI_API_KEY=your-gemini-key
|
||||||
@@ -42,9 +42,9 @@ ANTHROPIC_API_KEY=your-api-key-here
|
|||||||
# =============================================================================
|
# =============================================================================
|
||||||
# Available Models
|
# Available Models
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
# OpenAI: gpt-4o, gpt-4o-mini
|
# OpenAI: gpt-5.2, gpt-5-mini
|
||||||
# Gemini: gemini-2.5-pro, gemini-2.5-flash
|
# Gemini: gemini-2.5-pro
|
||||||
# DeepSeek: (Together.ai) deepseek-ai/DeepSeek-V3, deepseek-ai/DeepSeek-R1
|
# DeepSeek: (Together.ai) deepseek-ai/DeepSeek-V3
|
||||||
# (Official) deepseek-chat, deepseek-reasoner
|
# (Official) deepseek-chat
|
||||||
# OpenRouter: anthropic/claude-sonnet-4, google/gemini-2.5-pro-preview,
|
# OpenRouter: anthropic/claude-sonnet-4, google/gemini-3-pro-preview,
|
||||||
# openai/gpt-4o, meta-llama/llama-3.3-70b-instruct (100+ more)
|
# openai/gpt-5.2 (100+ more)
|
||||||
|
|||||||
52
CLAUDE.md
52
CLAUDE.md
@@ -50,7 +50,7 @@ CONFIG=<file> YAML configuration file for authentication and testing pa
|
|||||||
OUTPUT=<path> Custom output directory for session folder (default: ./audit-logs/)
|
OUTPUT=<path> Custom output directory for session folder (default: ./audit-logs/)
|
||||||
PIPELINE_TESTING=true Use minimal prompts and fast retry intervals (10s instead of 5min)
|
PIPELINE_TESTING=true Use minimal prompts and fast retry intervals (10s instead of 5min)
|
||||||
REBUILD=true Force Docker rebuild with --no-cache (use when code changes aren't picked up)
|
REBUILD=true Force Docker rebuild with --no-cache (use when code changes aren't picked up)
|
||||||
ROUTER=true Route requests through claude-code-router for multi-model support (see limitations below)
|
ROUTER=true Route requests through claude-code-router for multi-model support
|
||||||
```
|
```
|
||||||
|
|
||||||
### Generate TOTP for Authentication
|
### Generate TOTP for Authentication
|
||||||
@@ -262,11 +262,51 @@ The tool should only be used on systems you own or have explicit permission to t
|
|||||||
- `shannon` - CLI script for running pentests
|
- `shannon` - CLI script for running pentests
|
||||||
- `docker-compose.yml` - Temporal server + worker containers
|
- `docker-compose.yml` - Temporal server + worker containers
|
||||||
- `configs/` - YAML configs with `config-schema.json` for validation
|
- `configs/` - YAML configs with `config-schema.json` for validation
|
||||||
|
- `configs/router-config.json` - Router service configuration for multi-model support
|
||||||
- `prompts/` - AI prompt templates (`vuln-*.txt`, `exploit-*.txt`, etc.)
|
- `prompts/` - AI prompt templates (`vuln-*.txt`, `exploit-*.txt`, etc.)
|
||||||
|
|
||||||
**Output:**
|
**Output:**
|
||||||
- `audit-logs/{hostname}_{sessionId}/` - Session metrics, agent logs, deliverables
|
- `audit-logs/{hostname}_{sessionId}/` - Session metrics, agent logs, deliverables
|
||||||
|
|
||||||
|
### Router Mode (Multi-Model Support)
|
||||||
|
|
||||||
|
Shannon supports routing Claude Agent SDK requests through alternative LLM providers via [claude-code-router](https://github.com/musistudio/claude-code-router).
|
||||||
|
|
||||||
|
**Enable router mode:**
|
||||||
|
```bash
|
||||||
|
./shannon start URL=<url> REPO=<path> ROUTER=true
|
||||||
|
```
|
||||||
|
|
||||||
|
**Supported Providers:**
|
||||||
|
|
||||||
|
| Provider | Models | Use Case |
|
||||||
|
|----------|--------|----------|
|
||||||
|
| OpenAI | `gpt-5.2`, `gpt-5-mini` | Good tool use, balanced cost/performance |
|
||||||
|
| Gemini | `gemini-2.5-pro` | Long context (1M+ tokens), strong reasoning |
|
||||||
|
| DeepSeek | `deepseek-ai/DeepSeek-V3`, `deepseek-chat` | Cheapest option for dev/testing |
|
||||||
|
| OpenRouter | `anthropic/claude-sonnet-4`, `google/gemini-3-pro-preview`, `openai/gpt-5.2` | Multi-provider access via single API |
|
||||||
|
|
||||||
|
**Configuration (in .env):**
|
||||||
|
```bash
|
||||||
|
# OpenAI
|
||||||
|
OPENAI_API_KEY=sk-your-key
|
||||||
|
ROUTER_DEFAULT=openai,gpt-5.2
|
||||||
|
|
||||||
|
# Gemini
|
||||||
|
GEMINI_API_KEY=your-gemini-key
|
||||||
|
ROUTER_DEFAULT=gemini,gemini-2.5-pro
|
||||||
|
|
||||||
|
# DeepSeek (via Together.ai)
|
||||||
|
DEEPSEEK_API_KEY=your-together-key
|
||||||
|
ROUTER_DEFAULT=deepseek,deepseek-ai/DeepSeek-V3
|
||||||
|
|
||||||
|
# OpenRouter
|
||||||
|
OPENROUTER_API_KEY=sk-or-your-key
|
||||||
|
ROUTER_DEFAULT=openrouter,anthropic/claude-sonnet-4
|
||||||
|
```
|
||||||
|
|
||||||
|
**Note:** Shannon is optimized for Anthropic's Claude models. Alternative providers are useful for cost savings during development but may produce varying results.
|
||||||
|
|
||||||
## Troubleshooting
|
## Troubleshooting
|
||||||
|
|
||||||
### Common Issues
|
### Common Issues
|
||||||
@@ -285,16 +325,6 @@ Missing tools can be skipped using `PIPELINE_TESTING=true` mode during developme
|
|||||||
- `subfinder` - Subdomain discovery
|
- `subfinder` - Subdomain discovery
|
||||||
- `whatweb` - Web technology detection
|
- `whatweb` - Web technology detection
|
||||||
|
|
||||||
### Router Mode Limitations
|
|
||||||
When using `ROUTER=true` to route requests through claude-code-router (e.g., to use OpenAI models):
|
|
||||||
|
|
||||||
**Cost tracking shows $0.00**: The Claude Agent SDK expects `total_cost_usd` in the result message, which is Anthropic-specific. OpenAI's API returns token counts in `usage` but not a cost field, and the router doesn't translate this. This is a known limitation of the router, not a Shannon bug.
|
|
||||||
|
|
||||||
**Workarounds:**
|
|
||||||
- Accept $0 costs when using router mode (recommended for dev/testing)
|
|
||||||
- Use Anthropic directly for production runs where cost tracking matters
|
|
||||||
- Use external tools like `ccusage` for post-hoc token analysis
|
|
||||||
|
|
||||||
### Diagnostic & Utility Scripts
|
### Diagnostic & Utility Scripts
|
||||||
```bash
|
```bash
|
||||||
# View Temporal workflow history
|
# View Temporal workflow history
|
||||||
|
|||||||
50
README.md
50
README.md
@@ -84,6 +84,7 @@ Shannon is available in two editions:
|
|||||||
- [Stopping Shannon](#stopping-shannon)
|
- [Stopping Shannon](#stopping-shannon)
|
||||||
- [Usage Examples](#usage-examples)
|
- [Usage Examples](#usage-examples)
|
||||||
- [Configuration (Optional)](#configuration-optional)
|
- [Configuration (Optional)](#configuration-optional)
|
||||||
|
- [Router Mode (Alternative Providers)](#router-mode-alternative-providers)
|
||||||
- [Output and Results](#output-and-results)
|
- [Output and Results](#output-and-results)
|
||||||
- [Sample Reports & Benchmarks](#-sample-reports--benchmarks)
|
- [Sample Reports & Benchmarks](#-sample-reports--benchmarks)
|
||||||
- [Architecture](#-architecture)
|
- [Architecture](#-architecture)
|
||||||
@@ -100,7 +101,9 @@ Shannon is available in two editions:
|
|||||||
### Prerequisites
|
### Prerequisites
|
||||||
|
|
||||||
- **Docker** - Container runtime ([Install Docker](https://docs.docker.com/get-docker/))
|
- **Docker** - Container runtime ([Install Docker](https://docs.docker.com/get-docker/))
|
||||||
- **Anthropic API key or Claude Code OAuth token** - Get from [Anthropic Console](https://console.anthropic.com)
|
- **AI Provider Credentials** (choose one):
|
||||||
|
- **Anthropic API key or Claude Code OAuth token** (recommended) - Get from [Anthropic Console](https://console.anthropic.com)
|
||||||
|
- **Alternative providers via Router Mode** - OpenAI, Google Gemini, DeepSeek, or OpenRouter (see [Router Mode](#router-mode-alternative-providers))
|
||||||
|
|
||||||
### Quick Start
|
### Quick Start
|
||||||
|
|
||||||
@@ -252,6 +255,49 @@ rules:
|
|||||||
|
|
||||||
If your application uses two-factor authentication, simply add the TOTP secret to your config file. The AI will automatically generate the required codes during testing.
|
If your application uses two-factor authentication, simply add the TOTP secret to your config file. The AI will automatically generate the required codes during testing.
|
||||||
|
|
||||||
|
### Router Mode (Alternative Providers)
|
||||||
|
|
||||||
|
Shannon can route requests through alternative AI providers instead of Anthropic. This is useful for:
|
||||||
|
- **Cost optimization** - DeepSeek is significantly cheaper (~$0.14/M input tokens vs $3/M for Claude)
|
||||||
|
- **Model experimentation** - Test with GPT-5.2, Gemini 3, or open-source models
|
||||||
|
- **API availability** - Use OpenRouter if Anthropic API is unavailable in your region
|
||||||
|
|
||||||
|
#### Quick Setup
|
||||||
|
|
||||||
|
1. Add your provider API key to `.env`:
|
||||||
|
```bash
|
||||||
|
# Choose one provider:
|
||||||
|
OPENAI_API_KEY=sk-...
|
||||||
|
# OR
|
||||||
|
GEMINI_API_KEY=...
|
||||||
|
# OR
|
||||||
|
DEEPSEEK_API_KEY=...
|
||||||
|
# OR
|
||||||
|
OPENROUTER_API_KEY=sk-or-...
|
||||||
|
|
||||||
|
# Set default model:
|
||||||
|
ROUTER_DEFAULT=openai,gpt-5.2 # provider,model format
|
||||||
|
```
|
||||||
|
|
||||||
|
2. Run with `ROUTER=true`:
|
||||||
|
```bash
|
||||||
|
./shannon start URL=https://example.com REPO=/path/to/repo ROUTER=true
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Supported Providers
|
||||||
|
|
||||||
|
| Provider | Models | Approx. Cost | Notes |
|
||||||
|
|----------|--------|--------------|-------|
|
||||||
|
| **Anthropic** (default) | Claude Sonnet 4 | $3/$15 per M tokens | Best quality, recommended |
|
||||||
|
| **OpenAI** | gpt-5.2, gpt-5-mini | $2.50/$10 per M tokens | Good alternative |
|
||||||
|
| **Google Gemini** | gemini-2.5-pro | ~$1.25/$5 per M tokens | Long context (1M+), strong reasoning |
|
||||||
|
| **DeepSeek** | DeepSeek-V3 | ~$0.14/$0.28 per M tokens | Cheapest option |
|
||||||
|
| **OpenRouter** | 100+ models | Varies | Single API for many models |
|
||||||
|
|
||||||
|
#### Disclaimer
|
||||||
|
|
||||||
|
> **Output quality depends on model choice.** Shannon is optimized for and tested with Anthropic's Claude models. Alternative providers may produce varying results depending on the model's reasoning capabilities.
|
||||||
|
|
||||||
### Output and Results
|
### Output and Results
|
||||||
|
|
||||||
All results are saved to `./audit-logs/{hostname}_{sessionId}/` by default. Use `--output <path>` to specify a custom directory.
|
All results are saved to `./audit-logs/{hostname}_{sessionId}/` by default. Use `--output <path>` to specify a custom directory.
|
||||||
@@ -430,7 +476,7 @@ Shannon is designed for legitimate security auditing purposes only.
|
|||||||
#### **5. Cost & Performance**
|
#### **5. Cost & Performance**
|
||||||
|
|
||||||
- **Time**: As of the current version, a full test run typically takes **1 to 1.5 hours** to complete.
|
- **Time**: As of the current version, a full test run typically takes **1 to 1.5 hours** to complete.
|
||||||
- **Cost**: Running the full test using Anthropic's Claude 4.5 Sonnet model may incur costs of approximately **$50 USD**. Please note that costs are subject to change based on model pricing and the complexity of the target application.
|
- **Cost**: Running the full test using Anthropic's Claude 4.5 Sonnet model may incur costs of approximately **$50 USD**. Using Router Mode with DeepSeek can reduce this to **~$5 USD** (see [Router Mode](#router-mode-alternative-providers)). Costs vary based on model pricing and application complexity.
|
||||||
|
|
||||||
#### **6. Windows Antivirus False Positives**
|
#### **6. Windows Antivirus False Positives**
|
||||||
|
|
||||||
|
|||||||
@@ -10,16 +10,16 @@
|
|||||||
"name": "openai",
|
"name": "openai",
|
||||||
"api_base_url": "https://api.openai.com/v1/chat/completions",
|
"api_base_url": "https://api.openai.com/v1/chat/completions",
|
||||||
"api_key": "$OPENAI_API_KEY",
|
"api_key": "$OPENAI_API_KEY",
|
||||||
"models": ["gpt-4o", "gpt-4o-mini"],
|
"models": ["gpt-5.2", "gpt-5-mini"],
|
||||||
"transformer": {
|
"transformer": {
|
||||||
"use": [["maxtoken", { "max_tokens": 16384 }]]
|
"use": [["maxcompletiontokens", { "max_completion_tokens": 16384 }]]
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "gemini",
|
"name": "gemini",
|
||||||
"api_base_url": "https://generativelanguage.googleapis.com/v1beta/models/",
|
"api_base_url": "https://generativelanguage.googleapis.com/v1beta/models/",
|
||||||
"api_key": "$GEMINI_API_KEY",
|
"api_key": "$GEMINI_API_KEY",
|
||||||
"models": ["gemini-2.5-pro", "gemini-2.5-flash"],
|
"models": ["gemini-2.5-pro"],
|
||||||
"transformer": {
|
"transformer": {
|
||||||
"use": ["gemini"]
|
"use": ["gemini"]
|
||||||
}
|
}
|
||||||
@@ -28,7 +28,7 @@
|
|||||||
"name": "deepseek",
|
"name": "deepseek",
|
||||||
"api_base_url": "$DEEPSEEK_API_BASE",
|
"api_base_url": "$DEEPSEEK_API_BASE",
|
||||||
"api_key": "$DEEPSEEK_API_KEY",
|
"api_key": "$DEEPSEEK_API_KEY",
|
||||||
"models": ["deepseek-ai/DeepSeek-V3", "deepseek-ai/DeepSeek-R1", "deepseek-chat", "deepseek-reasoner"],
|
"models": ["deepseek-ai/DeepSeek-V3", "deepseek-chat"],
|
||||||
"transformer": {
|
"transformer": {
|
||||||
"use": ["deepseek", "enhancetool"]
|
"use": ["deepseek", "enhancetool"]
|
||||||
}
|
}
|
||||||
@@ -39,9 +39,8 @@
|
|||||||
"api_key": "$OPENROUTER_API_KEY",
|
"api_key": "$OPENROUTER_API_KEY",
|
||||||
"models": [
|
"models": [
|
||||||
"anthropic/claude-sonnet-4",
|
"anthropic/claude-sonnet-4",
|
||||||
"google/gemini-2.5-pro-preview",
|
"google/gemini-3-pro-preview",
|
||||||
"openai/gpt-4o",
|
"openai/gpt-5.2"
|
||||||
"meta-llama/llama-3.3-70b-instruct"
|
|
||||||
],
|
],
|
||||||
"transformer": {
|
"transformer": {
|
||||||
"use": ["openrouter"]
|
"use": ["openrouter"]
|
||||||
|
|||||||
@@ -22,6 +22,7 @@ services:
|
|||||||
- ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY:-}
|
- ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY:-}
|
||||||
- ANTHROPIC_BASE_URL=${ANTHROPIC_BASE_URL:-} # Optional: route through claude-code-router
|
- ANTHROPIC_BASE_URL=${ANTHROPIC_BASE_URL:-} # Optional: route through claude-code-router
|
||||||
- ANTHROPIC_AUTH_TOKEN=${ANTHROPIC_AUTH_TOKEN:-} # Auth token for router
|
- ANTHROPIC_AUTH_TOKEN=${ANTHROPIC_AUTH_TOKEN:-} # Auth token for router
|
||||||
|
- ROUTER_DEFAULT=${ROUTER_DEFAULT:-} # Model name when using router (e.g., "gemini,gemini-2.5-pro")
|
||||||
- CLAUDE_CODE_OAUTH_TOKEN=${CLAUDE_CODE_OAUTH_TOKEN:-}
|
- CLAUDE_CODE_OAUTH_TOKEN=${CLAUDE_CODE_OAUTH_TOKEN:-}
|
||||||
- CLAUDE_CODE_MAX_OUTPUT_TOKENS=${CLAUDE_CODE_MAX_OUTPUT_TOKENS:-64000}
|
- CLAUDE_CODE_MAX_OUTPUT_TOKENS=${CLAUDE_CODE_MAX_OUTPUT_TOKENS:-64000}
|
||||||
depends_on:
|
depends_on:
|
||||||
|
|||||||
@@ -25,23 +25,25 @@ import { dispatchMessage } from './message-handlers.js';
|
|||||||
import { detectExecutionContext, formatErrorOutput, formatCompletionMessage } from './output-formatters.js';
|
import { detectExecutionContext, formatErrorOutput, formatCompletionMessage } from './output-formatters.js';
|
||||||
import { createProgressManager } from './progress-manager.js';
|
import { createProgressManager } from './progress-manager.js';
|
||||||
import { createAuditLogger } from './audit-logger.js';
|
import { createAuditLogger } from './audit-logger.js';
|
||||||
|
import { getActualModelName } from './router-utils.js';
|
||||||
|
|
||||||
declare global {
|
declare global {
|
||||||
var SHANNON_DISABLE_LOADER: boolean | undefined;
|
var SHANNON_DISABLE_LOADER: boolean | undefined;
|
||||||
}
|
}
|
||||||
|
|
||||||
export interface ClaudePromptResult {
|
export interface ClaudePromptResult {
|
||||||
result?: string | null;
|
result?: string | null | undefined;
|
||||||
success: boolean;
|
success: boolean;
|
||||||
duration: number;
|
duration: number;
|
||||||
turns?: number;
|
turns?: number | undefined;
|
||||||
cost: number;
|
cost: number;
|
||||||
partialCost?: number;
|
model?: string | undefined;
|
||||||
apiErrorDetected?: boolean;
|
partialCost?: number | undefined;
|
||||||
error?: string;
|
apiErrorDetected?: boolean | undefined;
|
||||||
errorType?: string;
|
error?: string | undefined;
|
||||||
prompt?: string;
|
errorType?: string | undefined;
|
||||||
retryable?: boolean;
|
prompt?: string | undefined;
|
||||||
|
retryable?: boolean | undefined;
|
||||||
}
|
}
|
||||||
|
|
||||||
interface StdioMcpServer {
|
interface StdioMcpServer {
|
||||||
@@ -247,6 +249,7 @@ export async function runClaudePrompt(
|
|||||||
result = messageLoopResult.result;
|
result = messageLoopResult.result;
|
||||||
apiErrorDetected = messageLoopResult.apiErrorDetected;
|
apiErrorDetected = messageLoopResult.apiErrorDetected;
|
||||||
totalCost = messageLoopResult.cost;
|
totalCost = messageLoopResult.cost;
|
||||||
|
const model = messageLoopResult.model;
|
||||||
|
|
||||||
// === SPENDING CAP SAFEGUARD ===
|
// === SPENDING CAP SAFEGUARD ===
|
||||||
// Defense-in-depth: Detect spending cap that slipped through detectApiError().
|
// Defense-in-depth: Detect spending cap that slipped through detectApiError().
|
||||||
@@ -283,6 +286,7 @@ export async function runClaudePrompt(
|
|||||||
duration,
|
duration,
|
||||||
turns: turnCount,
|
turns: turnCount,
|
||||||
cost: totalCost,
|
cost: totalCost,
|
||||||
|
model,
|
||||||
partialCost: totalCost,
|
partialCost: totalCost,
|
||||||
apiErrorDetected
|
apiErrorDetected
|
||||||
};
|
};
|
||||||
@@ -316,6 +320,7 @@ interface MessageLoopResult {
|
|||||||
result: string | null;
|
result: string | null;
|
||||||
apiErrorDetected: boolean;
|
apiErrorDetected: boolean;
|
||||||
cost: number;
|
cost: number;
|
||||||
|
model?: string | undefined;
|
||||||
}
|
}
|
||||||
|
|
||||||
interface MessageLoopDeps {
|
interface MessageLoopDeps {
|
||||||
@@ -339,6 +344,7 @@ async function processMessageStream(
|
|||||||
let result: string | null = null;
|
let result: string | null = null;
|
||||||
let apiErrorDetected = false;
|
let apiErrorDetected = false;
|
||||||
let cost = 0;
|
let cost = 0;
|
||||||
|
let model: string | undefined;
|
||||||
let lastHeartbeat = Date.now();
|
let lastHeartbeat = Date.now();
|
||||||
|
|
||||||
for await (const message of query({ prompt: fullPrompt, options })) {
|
for await (const message of query({ prompt: fullPrompt, options })) {
|
||||||
@@ -370,12 +376,18 @@ async function processMessageStream(
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (dispatchResult.type === 'continue' && dispatchResult.apiErrorDetected) {
|
if (dispatchResult.type === 'continue') {
|
||||||
apiErrorDetected = true;
|
if (dispatchResult.apiErrorDetected) {
|
||||||
|
apiErrorDetected = true;
|
||||||
|
}
|
||||||
|
// Capture model from SystemInitMessage, but override with router model if applicable
|
||||||
|
if (dispatchResult.model) {
|
||||||
|
model = getActualModelName(dispatchResult.model);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return { turnCount, result, apiErrorDetected, cost };
|
return { turnCount, result, apiErrorDetected, cost, model };
|
||||||
}
|
}
|
||||||
|
|
||||||
// Main entry point for agent execution. Handles retries, git checkpoints, and validation.
|
// Main entry point for agent execution. Handles retries, git checkpoints, and validation.
|
||||||
|
|||||||
@@ -10,6 +10,7 @@ import { PentestError } from '../error-handling.js';
|
|||||||
import { filterJsonToolCalls } from '../utils/output-formatter.js';
|
import { filterJsonToolCalls } from '../utils/output-formatter.js';
|
||||||
import { formatTimestamp } from '../utils/formatting.js';
|
import { formatTimestamp } from '../utils/formatting.js';
|
||||||
import chalk from 'chalk';
|
import chalk from 'chalk';
|
||||||
|
import { getActualModelName } from './router-utils.js';
|
||||||
import {
|
import {
|
||||||
formatAssistantOutput,
|
formatAssistantOutput,
|
||||||
formatResultOutput,
|
formatResultOutput,
|
||||||
@@ -178,7 +179,7 @@ function outputLines(lines: string[]): void {
|
|||||||
|
|
||||||
// Message dispatch result types
|
// Message dispatch result types
|
||||||
export type MessageDispatchAction =
|
export type MessageDispatchAction =
|
||||||
| { type: 'continue'; apiErrorDetected?: boolean }
|
| { type: 'continue'; apiErrorDetected?: boolean | undefined; model?: string | undefined }
|
||||||
| { type: 'complete'; result: string | null; cost: number }
|
| { type: 'complete'; result: string | null; cost: number }
|
||||||
| { type: 'throw'; error: Error };
|
| { type: 'throw'; error: Error };
|
||||||
|
|
||||||
@@ -229,13 +230,18 @@ export async function dispatchMessage(
|
|||||||
}
|
}
|
||||||
|
|
||||||
case 'system': {
|
case 'system': {
|
||||||
if (message.subtype === 'init' && !execContext.useCleanOutput) {
|
if (message.subtype === 'init') {
|
||||||
const initMsg = message as SystemInitMessage;
|
const initMsg = message as SystemInitMessage;
|
||||||
console.log(chalk.blue(` Model: ${initMsg.model}, Permission: ${initMsg.permissionMode}`));
|
const actualModel = getActualModelName(initMsg.model);
|
||||||
if (initMsg.mcp_servers && initMsg.mcp_servers.length > 0) {
|
if (!execContext.useCleanOutput) {
|
||||||
const mcpStatus = initMsg.mcp_servers.map(s => `${s.name}(${s.status})`).join(', ');
|
console.log(chalk.blue(` Model: ${actualModel}, Permission: ${initMsg.permissionMode}`));
|
||||||
console.log(chalk.blue(` MCP: ${mcpStatus}`));
|
if (initMsg.mcp_servers && initMsg.mcp_servers.length > 0) {
|
||||||
|
const mcpStatus = initMsg.mcp_servers.map(s => `${s.name}(${s.status})`).join(', ');
|
||||||
|
console.log(chalk.blue(` MCP: ${mcpStatus}`));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
// Return actual model for tracking in audit logs
|
||||||
|
return { type: 'continue', model: actualModel };
|
||||||
}
|
}
|
||||||
return { type: 'continue' };
|
return { type: 'continue' };
|
||||||
}
|
}
|
||||||
|
|||||||
34
src/ai/router-utils.ts
Normal file
34
src/ai/router-utils.ts
Normal file
@@ -0,0 +1,34 @@
|
|||||||
|
// Copyright (C) 2025 Keygraph, Inc.
|
||||||
|
//
|
||||||
|
// This program is free software: you can redistribute it and/or modify
|
||||||
|
// it under the terms of the GNU Affero General Public License version 3
|
||||||
|
// as published by the Free Software Foundation.
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the actual model name being used.
|
||||||
|
* When using claude-code-router, the SDK reports its configured model (claude-sonnet)
|
||||||
|
* but the actual model is determined by ROUTER_DEFAULT env var.
|
||||||
|
*/
|
||||||
|
export function getActualModelName(sdkReportedModel?: string): string | undefined {
|
||||||
|
const routerBaseUrl = process.env.ANTHROPIC_BASE_URL;
|
||||||
|
const routerDefault = process.env.ROUTER_DEFAULT;
|
||||||
|
|
||||||
|
// If router mode is active and ROUTER_DEFAULT is set, use that
|
||||||
|
if (routerBaseUrl && routerDefault) {
|
||||||
|
// ROUTER_DEFAULT format: "provider,model" (e.g., "gemini,gemini-2.5-pro")
|
||||||
|
const parts = routerDefault.split(',');
|
||||||
|
if (parts.length >= 2) {
|
||||||
|
return parts.slice(1).join(','); // Handle model names with commas
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fall back to SDK-reported model
|
||||||
|
return sdkReportedModel;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Check if router mode is active.
|
||||||
|
*/
|
||||||
|
export function isRouterMode(): boolean {
|
||||||
|
return !!process.env.ANTHROPIC_BASE_URL && !!process.env.ROUTER_DEFAULT;
|
||||||
|
}
|
||||||
@@ -26,9 +26,10 @@ interface AgentEndResult {
|
|||||||
duration_ms: number;
|
duration_ms: number;
|
||||||
cost_usd: number;
|
cost_usd: number;
|
||||||
success: boolean;
|
success: boolean;
|
||||||
error?: string;
|
model?: string | undefined;
|
||||||
checkpoint?: string;
|
error?: string | undefined;
|
||||||
isFinalAttempt?: boolean;
|
checkpoint?: string | undefined;
|
||||||
|
isFinalAttempt?: boolean | undefined;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|||||||
@@ -26,7 +26,8 @@ interface AttemptData {
|
|||||||
cost_usd: number;
|
cost_usd: number;
|
||||||
success: boolean;
|
success: boolean;
|
||||||
timestamp: string;
|
timestamp: string;
|
||||||
error?: string;
|
model?: string | undefined;
|
||||||
|
error?: string | undefined;
|
||||||
}
|
}
|
||||||
|
|
||||||
interface AgentMetrics {
|
interface AgentMetrics {
|
||||||
@@ -34,7 +35,8 @@ interface AgentMetrics {
|
|||||||
attempts: AttemptData[];
|
attempts: AttemptData[];
|
||||||
final_duration_ms: number;
|
final_duration_ms: number;
|
||||||
total_cost_usd: number;
|
total_cost_usd: number;
|
||||||
checkpoint?: string;
|
model?: string | undefined;
|
||||||
|
checkpoint?: string | undefined;
|
||||||
}
|
}
|
||||||
|
|
||||||
interface PhaseMetrics {
|
interface PhaseMetrics {
|
||||||
@@ -66,9 +68,10 @@ interface AgentEndResult {
|
|||||||
duration_ms: number;
|
duration_ms: number;
|
||||||
cost_usd: number;
|
cost_usd: number;
|
||||||
success: boolean;
|
success: boolean;
|
||||||
error?: string;
|
model?: string | undefined;
|
||||||
checkpoint?: string;
|
error?: string | undefined;
|
||||||
isFinalAttempt?: boolean;
|
checkpoint?: string | undefined;
|
||||||
|
isFinalAttempt?: boolean | undefined;
|
||||||
}
|
}
|
||||||
|
|
||||||
interface ActiveTimer {
|
interface ActiveTimer {
|
||||||
@@ -169,6 +172,10 @@ export class MetricsTracker {
|
|||||||
timestamp: formatTimestamp(),
|
timestamp: formatTimestamp(),
|
||||||
};
|
};
|
||||||
|
|
||||||
|
if (result.model) {
|
||||||
|
attempt.model = result.model;
|
||||||
|
}
|
||||||
|
|
||||||
if (result.error) {
|
if (result.error) {
|
||||||
attempt.error = result.error;
|
attempt.error = result.error;
|
||||||
}
|
}
|
||||||
@@ -183,6 +190,10 @@ export class MetricsTracker {
|
|||||||
agent.status = 'success';
|
agent.status = 'success';
|
||||||
agent.final_duration_ms = result.duration_ms;
|
agent.final_duration_ms = result.duration_ms;
|
||||||
|
|
||||||
|
if (result.model) {
|
||||||
|
agent.model = result.model;
|
||||||
|
}
|
||||||
|
|
||||||
if (result.checkpoint) {
|
if (result.checkpoint) {
|
||||||
agent.checkpoint = result.checkpoint;
|
agent.checkpoint = result.checkpoint;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -18,9 +18,9 @@ import type { DistributedConfig } from '../types/config.js';
|
|||||||
interface AgentResult {
|
interface AgentResult {
|
||||||
success: boolean;
|
success: boolean;
|
||||||
duration: number;
|
duration: number;
|
||||||
cost?: number;
|
cost?: number | undefined;
|
||||||
error?: string;
|
error?: string | undefined;
|
||||||
retryable?: boolean;
|
retryable?: boolean | undefined;
|
||||||
}
|
}
|
||||||
|
|
||||||
type ToolName = 'nmap' | 'subfinder' | 'whatweb' | 'schemathesis';
|
type ToolName = 'nmap' | 'subfinder' | 'whatweb' | 'schemathesis';
|
||||||
|
|||||||
@@ -68,3 +68,87 @@ export async function assembleFinalReport(sourceDir: string): Promise<string> {
|
|||||||
|
|
||||||
return finalContent;
|
return finalContent;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Inject model information into the final security report.
|
||||||
|
* Reads session.json to get the model(s) used, then injects a "Model:" line
|
||||||
|
* into the Executive Summary section of the report.
|
||||||
|
*/
|
||||||
|
export async function injectModelIntoReport(
|
||||||
|
repoPath: string,
|
||||||
|
outputPath: string
|
||||||
|
): Promise<void> {
|
||||||
|
// 1. Read session.json to get model information
|
||||||
|
const sessionJsonPath = path.join(outputPath, 'session.json');
|
||||||
|
|
||||||
|
if (!(await fs.pathExists(sessionJsonPath))) {
|
||||||
|
console.log(chalk.yellow('⚠️ session.json not found, skipping model injection'));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
interface SessionData {
|
||||||
|
metrics: {
|
||||||
|
agents: Record<string, { model?: string }>;
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
const sessionData: SessionData = await fs.readJson(sessionJsonPath);
|
||||||
|
|
||||||
|
// 2. Extract unique models from all agents
|
||||||
|
const models = new Set<string>();
|
||||||
|
for (const agent of Object.values(sessionData.metrics.agents)) {
|
||||||
|
if (agent.model) {
|
||||||
|
models.add(agent.model);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (models.size === 0) {
|
||||||
|
console.log(chalk.yellow('⚠️ No model information found in session.json'));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const modelStr = Array.from(models).join(', ');
|
||||||
|
console.log(chalk.blue(`📝 Injecting model info into report: ${modelStr}`));
|
||||||
|
|
||||||
|
// 3. Read the final report
|
||||||
|
const reportPath = path.join(repoPath, 'deliverables', 'comprehensive_security_assessment_report.md');
|
||||||
|
|
||||||
|
if (!(await fs.pathExists(reportPath))) {
|
||||||
|
console.log(chalk.yellow('⚠️ Final report not found, skipping model injection'));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
let reportContent = await fs.readFile(reportPath, 'utf8');
|
||||||
|
|
||||||
|
// 4. Find and inject model line after "Assessment Date" in Executive Summary
|
||||||
|
// Pattern: "- Assessment Date: <date>" followed by a newline
|
||||||
|
const assessmentDatePattern = /^(- Assessment Date: .+)$/m;
|
||||||
|
const match = reportContent.match(assessmentDatePattern);
|
||||||
|
|
||||||
|
if (match) {
|
||||||
|
// Inject model line after Assessment Date
|
||||||
|
const modelLine = `- Model: ${modelStr}`;
|
||||||
|
reportContent = reportContent.replace(
|
||||||
|
assessmentDatePattern,
|
||||||
|
`$1\n${modelLine}`
|
||||||
|
);
|
||||||
|
console.log(chalk.green('✅ Model info injected into Executive Summary'));
|
||||||
|
} else {
|
||||||
|
// If no Assessment Date line found, try to add after Executive Summary header
|
||||||
|
const execSummaryPattern = /^## Executive Summary$/m;
|
||||||
|
if (reportContent.match(execSummaryPattern)) {
|
||||||
|
// Add model as first item in Executive Summary
|
||||||
|
reportContent = reportContent.replace(
|
||||||
|
execSummaryPattern,
|
||||||
|
`## Executive Summary\n- Model: ${modelStr}`
|
||||||
|
);
|
||||||
|
console.log(chalk.green('✅ Model info added to Executive Summary header'));
|
||||||
|
} else {
|
||||||
|
console.log(chalk.yellow('⚠️ Could not find Executive Summary section'));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// 5. Write modified report back
|
||||||
|
await fs.writeFile(reportPath, reportContent);
|
||||||
|
}
|
||||||
|
|||||||
@@ -67,7 +67,7 @@ import {
|
|||||||
rollbackGitWorkspace,
|
rollbackGitWorkspace,
|
||||||
getGitCommitHash,
|
getGitCommitHash,
|
||||||
} from '../utils/git-manager.js';
|
} from '../utils/git-manager.js';
|
||||||
import { assembleFinalReport } from '../phases/reporting.js';
|
import { assembleFinalReport, injectModelIntoReport } from '../phases/reporting.js';
|
||||||
import { getPromptNameForAgent } from '../types/agents.js';
|
import { getPromptNameForAgent } from '../types/agents.js';
|
||||||
import { AuditSession } from '../audit/index.js';
|
import { AuditSession } from '../audit/index.js';
|
||||||
import type { WorkflowSummary } from '../audit/workflow-logger.js';
|
import type { WorkflowSummary } from '../audit/workflow-logger.js';
|
||||||
@@ -192,6 +192,7 @@ async function runAgentActivity(
|
|||||||
duration_ms: result.duration,
|
duration_ms: result.duration,
|
||||||
cost_usd: 0,
|
cost_usd: 0,
|
||||||
success: false,
|
success: false,
|
||||||
|
model: result.model,
|
||||||
error: `Spending cap likely reached: ${resultText.slice(0, 100)}`,
|
error: `Spending cap likely reached: ${resultText.slice(0, 100)}`,
|
||||||
});
|
});
|
||||||
// Throw as billing error so Temporal retries with long backoff
|
// Throw as billing error so Temporal retries with long backoff
|
||||||
@@ -207,6 +208,7 @@ async function runAgentActivity(
|
|||||||
duration_ms: result.duration,
|
duration_ms: result.duration,
|
||||||
cost_usd: result.cost || 0,
|
cost_usd: result.cost || 0,
|
||||||
success: false,
|
success: false,
|
||||||
|
model: result.model,
|
||||||
error: result.error || 'Execution failed',
|
error: result.error || 'Execution failed',
|
||||||
});
|
});
|
||||||
throw new Error(result.error || 'Agent execution failed');
|
throw new Error(result.error || 'Agent execution failed');
|
||||||
@@ -221,6 +223,7 @@ async function runAgentActivity(
|
|||||||
duration_ms: result.duration,
|
duration_ms: result.duration,
|
||||||
cost_usd: result.cost || 0,
|
cost_usd: result.cost || 0,
|
||||||
success: false,
|
success: false,
|
||||||
|
model: result.model,
|
||||||
error: 'Output validation failed',
|
error: 'Output validation failed',
|
||||||
});
|
});
|
||||||
|
|
||||||
@@ -243,6 +246,7 @@ async function runAgentActivity(
|
|||||||
duration_ms: result.duration,
|
duration_ms: result.duration,
|
||||||
cost_usd: result.cost || 0,
|
cost_usd: result.cost || 0,
|
||||||
success: true,
|
success: true,
|
||||||
|
model: result.model,
|
||||||
...(commitHash && { checkpoint: commitHash }),
|
...(commitHash && { checkpoint: commitHash }),
|
||||||
});
|
});
|
||||||
await commitGitSuccess(repoPath, agentName);
|
await commitGitSuccess(repoPath, agentName);
|
||||||
@@ -254,6 +258,7 @@ async function runAgentActivity(
|
|||||||
outputTokens: null,
|
outputTokens: null,
|
||||||
costUsd: result.cost ?? null,
|
costUsd: result.cost ?? null,
|
||||||
numTurns: result.turns ?? null,
|
numTurns: result.turns ?? null,
|
||||||
|
model: result.model,
|
||||||
};
|
};
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
// Rollback git workspace before Temporal retry to ensure clean state
|
// Rollback git workspace before Temporal retry to ensure clean state
|
||||||
@@ -369,6 +374,25 @@ export async function assembleReportActivity(input: ActivityInput): Promise<void
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Inject model metadata into the final report.
|
||||||
|
* This must be called AFTER runReportAgent to add the model information to the Executive Summary.
|
||||||
|
*/
|
||||||
|
export async function injectReportMetadataActivity(input: ActivityInput): Promise<void> {
|
||||||
|
const { repoPath, outputPath } = input;
|
||||||
|
if (!outputPath) {
|
||||||
|
console.log(chalk.yellow('⚠️ No output path provided, skipping model injection'));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
await injectModelIntoReport(repoPath, outputPath);
|
||||||
|
} catch (error) {
|
||||||
|
const err = error as Error;
|
||||||
|
console.log(chalk.yellow(`⚠️ Error injecting model into report: ${err.message}`));
|
||||||
|
// Don't throw - this is a non-critical enhancement
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Check if exploitation should run for a given vulnerability type.
|
* Check if exploitation should run for a given vulnerability type.
|
||||||
* Reads the vulnerability queue file and returns the decision.
|
* Reads the vulnerability queue file and returns the decision.
|
||||||
|
|||||||
@@ -35,6 +35,7 @@ interface AgentMetrics {
|
|||||||
outputTokens: number | null;
|
outputTokens: number | null;
|
||||||
costUsd: number | null;
|
costUsd: number | null;
|
||||||
numTurns: number | null;
|
numTurns: number | null;
|
||||||
|
model?: string | undefined;
|
||||||
}
|
}
|
||||||
|
|
||||||
interface PipelineProgress {
|
interface PipelineProgress {
|
||||||
@@ -123,8 +124,10 @@ async function queryWorkflow(): Promise<void> {
|
|||||||
const metrics = progress.agentMetrics[agent];
|
const metrics = progress.agentMetrics[agent];
|
||||||
const duration = metrics ? formatDuration(metrics.durationMs) : 'unknown';
|
const duration = metrics ? formatDuration(metrics.durationMs) : 'unknown';
|
||||||
const cost = metrics?.costUsd ? `$${metrics.costUsd.toFixed(4)}` : '';
|
const cost = metrics?.costUsd ? `$${metrics.costUsd.toFixed(4)}` : '';
|
||||||
|
const model = metrics?.model ? ` [${metrics.model}]` : '';
|
||||||
console.log(
|
console.log(
|
||||||
chalk.green(` - ${agent}`) +
|
chalk.green(` - ${agent}`) +
|
||||||
|
chalk.blue(model) +
|
||||||
chalk.gray(` (${duration}${cost ? ', ' + cost : ''})`)
|
chalk.gray(` (${duration}${cost ? ', ' + cost : ''})`)
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -17,6 +17,7 @@ export interface AgentMetrics {
|
|||||||
outputTokens: number | null;
|
outputTokens: number | null;
|
||||||
costUsd: number | null;
|
costUsd: number | null;
|
||||||
numTurns: number | null;
|
numTurns: number | null;
|
||||||
|
model?: string | undefined;
|
||||||
}
|
}
|
||||||
|
|
||||||
export interface PipelineSummary {
|
export interface PipelineSummary {
|
||||||
|
|||||||
@@ -276,6 +276,10 @@ export async function pentestPipelineWorkflow(
|
|||||||
// Then run the report agent to add executive summary and clean up
|
// Then run the report agent to add executive summary and clean up
|
||||||
state.agentMetrics['report'] = await a.runReportAgent(activityInput);
|
state.agentMetrics['report'] = await a.runReportAgent(activityInput);
|
||||||
state.completedAgents.push('report');
|
state.completedAgents.push('report');
|
||||||
|
|
||||||
|
// Inject model metadata into the final report
|
||||||
|
await a.injectReportMetadataActivity(activityInput);
|
||||||
|
|
||||||
await a.logPhaseTransition(activityInput, 'reporting', 'complete');
|
await a.logPhaseTransition(activityInput, 'reporting', 'complete');
|
||||||
|
|
||||||
// === Complete ===
|
// === Complete ===
|
||||||
|
|||||||
Reference in New Issue
Block a user