mirror of
https://github.com/KeygraphHQ/shannon.git
synced 2026-05-31 04:39:29 +02:00
feat: add model tracking and reporting across pipeline
- Track actual model name from router through audit logs, session.json, and query output - Add router-utils.ts to resolve model names from ROUTER_DEFAULT env var - Inject model info into final report's Executive Summary section - Update documentation with supported providers, pricing, and config examples - Update router-config.json with latest model versions (GPT-5.2, Gemini 2.5, etc.)
This commit is contained in:
+23
-11
@@ -25,23 +25,25 @@ import { dispatchMessage } from './message-handlers.js';
|
||||
import { detectExecutionContext, formatErrorOutput, formatCompletionMessage } from './output-formatters.js';
|
||||
import { createProgressManager } from './progress-manager.js';
|
||||
import { createAuditLogger } from './audit-logger.js';
|
||||
import { getActualModelName } from './router-utils.js';
|
||||
|
||||
declare global {
|
||||
var SHANNON_DISABLE_LOADER: boolean | undefined;
|
||||
}
|
||||
|
||||
export interface ClaudePromptResult {
|
||||
result?: string | null;
|
||||
result?: string | null | undefined;
|
||||
success: boolean;
|
||||
duration: number;
|
||||
turns?: number;
|
||||
turns?: number | undefined;
|
||||
cost: number;
|
||||
partialCost?: number;
|
||||
apiErrorDetected?: boolean;
|
||||
error?: string;
|
||||
errorType?: string;
|
||||
prompt?: string;
|
||||
retryable?: boolean;
|
||||
model?: string | undefined;
|
||||
partialCost?: number | undefined;
|
||||
apiErrorDetected?: boolean | undefined;
|
||||
error?: string | undefined;
|
||||
errorType?: string | undefined;
|
||||
prompt?: string | undefined;
|
||||
retryable?: boolean | undefined;
|
||||
}
|
||||
|
||||
interface StdioMcpServer {
|
||||
@@ -247,6 +249,7 @@ export async function runClaudePrompt(
|
||||
result = messageLoopResult.result;
|
||||
apiErrorDetected = messageLoopResult.apiErrorDetected;
|
||||
totalCost = messageLoopResult.cost;
|
||||
const model = messageLoopResult.model;
|
||||
|
||||
// === SPENDING CAP SAFEGUARD ===
|
||||
// Defense-in-depth: Detect spending cap that slipped through detectApiError().
|
||||
@@ -283,6 +286,7 @@ export async function runClaudePrompt(
|
||||
duration,
|
||||
turns: turnCount,
|
||||
cost: totalCost,
|
||||
model,
|
||||
partialCost: totalCost,
|
||||
apiErrorDetected
|
||||
};
|
||||
@@ -316,6 +320,7 @@ interface MessageLoopResult {
|
||||
result: string | null;
|
||||
apiErrorDetected: boolean;
|
||||
cost: number;
|
||||
model?: string | undefined;
|
||||
}
|
||||
|
||||
interface MessageLoopDeps {
|
||||
@@ -339,6 +344,7 @@ async function processMessageStream(
|
||||
let result: string | null = null;
|
||||
let apiErrorDetected = false;
|
||||
let cost = 0;
|
||||
let model: string | undefined;
|
||||
let lastHeartbeat = Date.now();
|
||||
|
||||
for await (const message of query({ prompt: fullPrompt, options })) {
|
||||
@@ -370,12 +376,18 @@ async function processMessageStream(
|
||||
break;
|
||||
}
|
||||
|
||||
if (dispatchResult.type === 'continue' && dispatchResult.apiErrorDetected) {
|
||||
apiErrorDetected = true;
|
||||
if (dispatchResult.type === 'continue') {
|
||||
if (dispatchResult.apiErrorDetected) {
|
||||
apiErrorDetected = true;
|
||||
}
|
||||
// Capture model from SystemInitMessage, but override with router model if applicable
|
||||
if (dispatchResult.model) {
|
||||
model = getActualModelName(dispatchResult.model);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return { turnCount, result, apiErrorDetected, cost };
|
||||
return { turnCount, result, apiErrorDetected, cost, model };
|
||||
}
|
||||
|
||||
// Main entry point for agent execution. Handles retries, git checkpoints, and validation.
|
||||
|
||||
@@ -10,6 +10,7 @@ import { PentestError } from '../error-handling.js';
|
||||
import { filterJsonToolCalls } from '../utils/output-formatter.js';
|
||||
import { formatTimestamp } from '../utils/formatting.js';
|
||||
import chalk from 'chalk';
|
||||
import { getActualModelName } from './router-utils.js';
|
||||
import {
|
||||
formatAssistantOutput,
|
||||
formatResultOutput,
|
||||
@@ -178,7 +179,7 @@ function outputLines(lines: string[]): void {
|
||||
|
||||
// Message dispatch result types
|
||||
export type MessageDispatchAction =
|
||||
| { type: 'continue'; apiErrorDetected?: boolean }
|
||||
| { type: 'continue'; apiErrorDetected?: boolean | undefined; model?: string | undefined }
|
||||
| { type: 'complete'; result: string | null; cost: number }
|
||||
| { type: 'throw'; error: Error };
|
||||
|
||||
@@ -229,13 +230,18 @@ export async function dispatchMessage(
|
||||
}
|
||||
|
||||
case 'system': {
|
||||
if (message.subtype === 'init' && !execContext.useCleanOutput) {
|
||||
if (message.subtype === 'init') {
|
||||
const initMsg = message as SystemInitMessage;
|
||||
console.log(chalk.blue(` Model: ${initMsg.model}, Permission: ${initMsg.permissionMode}`));
|
||||
if (initMsg.mcp_servers && initMsg.mcp_servers.length > 0) {
|
||||
const mcpStatus = initMsg.mcp_servers.map(s => `${s.name}(${s.status})`).join(', ');
|
||||
console.log(chalk.blue(` MCP: ${mcpStatus}`));
|
||||
const actualModel = getActualModelName(initMsg.model);
|
||||
if (!execContext.useCleanOutput) {
|
||||
console.log(chalk.blue(` Model: ${actualModel}, Permission: ${initMsg.permissionMode}`));
|
||||
if (initMsg.mcp_servers && initMsg.mcp_servers.length > 0) {
|
||||
const mcpStatus = initMsg.mcp_servers.map(s => `${s.name}(${s.status})`).join(', ');
|
||||
console.log(chalk.blue(` MCP: ${mcpStatus}`));
|
||||
}
|
||||
}
|
||||
// Return actual model for tracking in audit logs
|
||||
return { type: 'continue', model: actualModel };
|
||||
}
|
||||
return { type: 'continue' };
|
||||
}
|
||||
|
||||
@@ -0,0 +1,34 @@
|
||||
// Copyright (C) 2025 Keygraph, Inc.
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Affero General Public License version 3
|
||||
// as published by the Free Software Foundation.
|
||||
|
||||
/**
|
||||
* Get the actual model name being used.
|
||||
* When using claude-code-router, the SDK reports its configured model (claude-sonnet)
|
||||
* but the actual model is determined by ROUTER_DEFAULT env var.
|
||||
*/
|
||||
export function getActualModelName(sdkReportedModel?: string): string | undefined {
|
||||
const routerBaseUrl = process.env.ANTHROPIC_BASE_URL;
|
||||
const routerDefault = process.env.ROUTER_DEFAULT;
|
||||
|
||||
// If router mode is active and ROUTER_DEFAULT is set, use that
|
||||
if (routerBaseUrl && routerDefault) {
|
||||
// ROUTER_DEFAULT format: "provider,model" (e.g., "gemini,gemini-2.5-pro")
|
||||
const parts = routerDefault.split(',');
|
||||
if (parts.length >= 2) {
|
||||
return parts.slice(1).join(','); // Handle model names with commas
|
||||
}
|
||||
}
|
||||
|
||||
// Fall back to SDK-reported model
|
||||
return sdkReportedModel;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if router mode is active.
|
||||
*/
|
||||
export function isRouterMode(): boolean {
|
||||
return !!process.env.ANTHROPIC_BASE_URL && !!process.env.ROUTER_DEFAULT;
|
||||
}
|
||||
@@ -26,9 +26,10 @@ interface AgentEndResult {
|
||||
duration_ms: number;
|
||||
cost_usd: number;
|
||||
success: boolean;
|
||||
error?: string;
|
||||
checkpoint?: string;
|
||||
isFinalAttempt?: boolean;
|
||||
model?: string | undefined;
|
||||
error?: string | undefined;
|
||||
checkpoint?: string | undefined;
|
||||
isFinalAttempt?: boolean | undefined;
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@@ -26,7 +26,8 @@ interface AttemptData {
|
||||
cost_usd: number;
|
||||
success: boolean;
|
||||
timestamp: string;
|
||||
error?: string;
|
||||
model?: string | undefined;
|
||||
error?: string | undefined;
|
||||
}
|
||||
|
||||
interface AgentMetrics {
|
||||
@@ -34,7 +35,8 @@ interface AgentMetrics {
|
||||
attempts: AttemptData[];
|
||||
final_duration_ms: number;
|
||||
total_cost_usd: number;
|
||||
checkpoint?: string;
|
||||
model?: string | undefined;
|
||||
checkpoint?: string | undefined;
|
||||
}
|
||||
|
||||
interface PhaseMetrics {
|
||||
@@ -66,9 +68,10 @@ interface AgentEndResult {
|
||||
duration_ms: number;
|
||||
cost_usd: number;
|
||||
success: boolean;
|
||||
error?: string;
|
||||
checkpoint?: string;
|
||||
isFinalAttempt?: boolean;
|
||||
model?: string | undefined;
|
||||
error?: string | undefined;
|
||||
checkpoint?: string | undefined;
|
||||
isFinalAttempt?: boolean | undefined;
|
||||
}
|
||||
|
||||
interface ActiveTimer {
|
||||
@@ -169,6 +172,10 @@ export class MetricsTracker {
|
||||
timestamp: formatTimestamp(),
|
||||
};
|
||||
|
||||
if (result.model) {
|
||||
attempt.model = result.model;
|
||||
}
|
||||
|
||||
if (result.error) {
|
||||
attempt.error = result.error;
|
||||
}
|
||||
@@ -183,6 +190,10 @@ export class MetricsTracker {
|
||||
agent.status = 'success';
|
||||
agent.final_duration_ms = result.duration_ms;
|
||||
|
||||
if (result.model) {
|
||||
agent.model = result.model;
|
||||
}
|
||||
|
||||
if (result.checkpoint) {
|
||||
agent.checkpoint = result.checkpoint;
|
||||
}
|
||||
|
||||
@@ -18,9 +18,9 @@ import type { DistributedConfig } from '../types/config.js';
|
||||
interface AgentResult {
|
||||
success: boolean;
|
||||
duration: number;
|
||||
cost?: number;
|
||||
error?: string;
|
||||
retryable?: boolean;
|
||||
cost?: number | undefined;
|
||||
error?: string | undefined;
|
||||
retryable?: boolean | undefined;
|
||||
}
|
||||
|
||||
type ToolName = 'nmap' | 'subfinder' | 'whatweb' | 'schemathesis';
|
||||
|
||||
@@ -68,3 +68,87 @@ export async function assembleFinalReport(sourceDir: string): Promise<string> {
|
||||
|
||||
return finalContent;
|
||||
}
|
||||
|
||||
/**
|
||||
* Inject model information into the final security report.
|
||||
* Reads session.json to get the model(s) used, then injects a "Model:" line
|
||||
* into the Executive Summary section of the report.
|
||||
*/
|
||||
export async function injectModelIntoReport(
|
||||
repoPath: string,
|
||||
outputPath: string
|
||||
): Promise<void> {
|
||||
// 1. Read session.json to get model information
|
||||
const sessionJsonPath = path.join(outputPath, 'session.json');
|
||||
|
||||
if (!(await fs.pathExists(sessionJsonPath))) {
|
||||
console.log(chalk.yellow('⚠️ session.json not found, skipping model injection'));
|
||||
return;
|
||||
}
|
||||
|
||||
interface SessionData {
|
||||
metrics: {
|
||||
agents: Record<string, { model?: string }>;
|
||||
};
|
||||
}
|
||||
|
||||
const sessionData: SessionData = await fs.readJson(sessionJsonPath);
|
||||
|
||||
// 2. Extract unique models from all agents
|
||||
const models = new Set<string>();
|
||||
for (const agent of Object.values(sessionData.metrics.agents)) {
|
||||
if (agent.model) {
|
||||
models.add(agent.model);
|
||||
}
|
||||
}
|
||||
|
||||
if (models.size === 0) {
|
||||
console.log(chalk.yellow('⚠️ No model information found in session.json'));
|
||||
return;
|
||||
}
|
||||
|
||||
const modelStr = Array.from(models).join(', ');
|
||||
console.log(chalk.blue(`📝 Injecting model info into report: ${modelStr}`));
|
||||
|
||||
// 3. Read the final report
|
||||
const reportPath = path.join(repoPath, 'deliverables', 'comprehensive_security_assessment_report.md');
|
||||
|
||||
if (!(await fs.pathExists(reportPath))) {
|
||||
console.log(chalk.yellow('⚠️ Final report not found, skipping model injection'));
|
||||
return;
|
||||
}
|
||||
|
||||
let reportContent = await fs.readFile(reportPath, 'utf8');
|
||||
|
||||
// 4. Find and inject model line after "Assessment Date" in Executive Summary
|
||||
// Pattern: "- Assessment Date: <date>" followed by a newline
|
||||
const assessmentDatePattern = /^(- Assessment Date: .+)$/m;
|
||||
const match = reportContent.match(assessmentDatePattern);
|
||||
|
||||
if (match) {
|
||||
// Inject model line after Assessment Date
|
||||
const modelLine = `- Model: ${modelStr}`;
|
||||
reportContent = reportContent.replace(
|
||||
assessmentDatePattern,
|
||||
`$1\n${modelLine}`
|
||||
);
|
||||
console.log(chalk.green('✅ Model info injected into Executive Summary'));
|
||||
} else {
|
||||
// If no Assessment Date line found, try to add after Executive Summary header
|
||||
const execSummaryPattern = /^## Executive Summary$/m;
|
||||
if (reportContent.match(execSummaryPattern)) {
|
||||
// Add model as first item in Executive Summary
|
||||
reportContent = reportContent.replace(
|
||||
execSummaryPattern,
|
||||
`## Executive Summary\n- Model: ${modelStr}`
|
||||
);
|
||||
console.log(chalk.green('✅ Model info added to Executive Summary header'));
|
||||
} else {
|
||||
console.log(chalk.yellow('⚠️ Could not find Executive Summary section'));
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// 5. Write modified report back
|
||||
await fs.writeFile(reportPath, reportContent);
|
||||
}
|
||||
|
||||
@@ -67,7 +67,7 @@ import {
|
||||
rollbackGitWorkspace,
|
||||
getGitCommitHash,
|
||||
} from '../utils/git-manager.js';
|
||||
import { assembleFinalReport } from '../phases/reporting.js';
|
||||
import { assembleFinalReport, injectModelIntoReport } from '../phases/reporting.js';
|
||||
import { getPromptNameForAgent } from '../types/agents.js';
|
||||
import { AuditSession } from '../audit/index.js';
|
||||
import type { WorkflowSummary } from '../audit/workflow-logger.js';
|
||||
@@ -192,6 +192,7 @@ async function runAgentActivity(
|
||||
duration_ms: result.duration,
|
||||
cost_usd: 0,
|
||||
success: false,
|
||||
model: result.model,
|
||||
error: `Spending cap likely reached: ${resultText.slice(0, 100)}`,
|
||||
});
|
||||
// Throw as billing error so Temporal retries with long backoff
|
||||
@@ -207,6 +208,7 @@ async function runAgentActivity(
|
||||
duration_ms: result.duration,
|
||||
cost_usd: result.cost || 0,
|
||||
success: false,
|
||||
model: result.model,
|
||||
error: result.error || 'Execution failed',
|
||||
});
|
||||
throw new Error(result.error || 'Agent execution failed');
|
||||
@@ -221,6 +223,7 @@ async function runAgentActivity(
|
||||
duration_ms: result.duration,
|
||||
cost_usd: result.cost || 0,
|
||||
success: false,
|
||||
model: result.model,
|
||||
error: 'Output validation failed',
|
||||
});
|
||||
|
||||
@@ -243,6 +246,7 @@ async function runAgentActivity(
|
||||
duration_ms: result.duration,
|
||||
cost_usd: result.cost || 0,
|
||||
success: true,
|
||||
model: result.model,
|
||||
...(commitHash && { checkpoint: commitHash }),
|
||||
});
|
||||
await commitGitSuccess(repoPath, agentName);
|
||||
@@ -254,6 +258,7 @@ async function runAgentActivity(
|
||||
outputTokens: null,
|
||||
costUsd: result.cost ?? null,
|
||||
numTurns: result.turns ?? null,
|
||||
model: result.model,
|
||||
};
|
||||
} catch (error) {
|
||||
// Rollback git workspace before Temporal retry to ensure clean state
|
||||
@@ -369,6 +374,25 @@ export async function assembleReportActivity(input: ActivityInput): Promise<void
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Inject model metadata into the final report.
|
||||
* This must be called AFTER runReportAgent to add the model information to the Executive Summary.
|
||||
*/
|
||||
export async function injectReportMetadataActivity(input: ActivityInput): Promise<void> {
|
||||
const { repoPath, outputPath } = input;
|
||||
if (!outputPath) {
|
||||
console.log(chalk.yellow('⚠️ No output path provided, skipping model injection'));
|
||||
return;
|
||||
}
|
||||
try {
|
||||
await injectModelIntoReport(repoPath, outputPath);
|
||||
} catch (error) {
|
||||
const err = error as Error;
|
||||
console.log(chalk.yellow(`⚠️ Error injecting model into report: ${err.message}`));
|
||||
// Don't throw - this is a non-critical enhancement
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if exploitation should run for a given vulnerability type.
|
||||
* Reads the vulnerability queue file and returns the decision.
|
||||
|
||||
@@ -35,6 +35,7 @@ interface AgentMetrics {
|
||||
outputTokens: number | null;
|
||||
costUsd: number | null;
|
||||
numTurns: number | null;
|
||||
model?: string | undefined;
|
||||
}
|
||||
|
||||
interface PipelineProgress {
|
||||
@@ -123,8 +124,10 @@ async function queryWorkflow(): Promise<void> {
|
||||
const metrics = progress.agentMetrics[agent];
|
||||
const duration = metrics ? formatDuration(metrics.durationMs) : 'unknown';
|
||||
const cost = metrics?.costUsd ? `$${metrics.costUsd.toFixed(4)}` : '';
|
||||
const model = metrics?.model ? ` [${metrics.model}]` : '';
|
||||
console.log(
|
||||
chalk.green(` - ${agent}`) +
|
||||
chalk.blue(model) +
|
||||
chalk.gray(` (${duration}${cost ? ', ' + cost : ''})`)
|
||||
);
|
||||
}
|
||||
|
||||
@@ -17,6 +17,7 @@ export interface AgentMetrics {
|
||||
outputTokens: number | null;
|
||||
costUsd: number | null;
|
||||
numTurns: number | null;
|
||||
model?: string | undefined;
|
||||
}
|
||||
|
||||
export interface PipelineSummary {
|
||||
|
||||
@@ -276,6 +276,10 @@ export async function pentestPipelineWorkflow(
|
||||
// Then run the report agent to add executive summary and clean up
|
||||
state.agentMetrics['report'] = await a.runReportAgent(activityInput);
|
||||
state.completedAgents.push('report');
|
||||
|
||||
// Inject model metadata into the final report
|
||||
await a.injectReportMetadataActivity(activityInput);
|
||||
|
||||
await a.logPhaseTransition(activityInput, 'reporting', 'complete');
|
||||
|
||||
// === Complete ===
|
||||
|
||||
Reference in New Issue
Block a user