feat: add model tracking and reporting across pipeline

- Track actual model name from router through audit logs, session.json, and query output - Add router-utils.ts to resolve model names from ROUTER_DEFAULT env var - Inject model info into final report's Executive Summary section - Update documentation with supported providers, pricing, and config examples - Update router-config.json with latest model versions (GPT-5.2, Gemini 2.5, etc.)
2026-05-31 04:39:29 +02:00 · 2026-01-15 18:30:19 -08:00
parent d01980ce4b
commit cd04c7a6d2
16 changed files with 312 additions and 56 deletions
@@ -25,23 +25,25 @@ import { dispatchMessage } from './message-handlers.js';
 import { detectExecutionContext, formatErrorOutput, formatCompletionMessage } from './output-formatters.js';
 import { createProgressManager } from './progress-manager.js';
 import { createAuditLogger } from './audit-logger.js';
+import { getActualModelName } from './router-utils.js';

 declare global {
  var SHANNON_DISABLE_LOADER: boolean | undefined;
 }

 export interface ClaudePromptResult {
-  result?: string | null;
+  result?: string | null | undefined;
  success: boolean;
  duration: number;
-  turns?: number;
+  turns?: number | undefined;
  cost: number;
-  partialCost?: number;
-  apiErrorDetected?: boolean;
-  error?: string;
-  errorType?: string;
-  prompt?: string;
-  retryable?: boolean;
+  model?: string | undefined;
+  partialCost?: number | undefined;
+  apiErrorDetected?: boolean | undefined;
+  error?: string | undefined;
+  errorType?: string | undefined;
+  prompt?: string | undefined;
+  retryable?: boolean | undefined;
 }

 interface StdioMcpServer {
@@ -247,6 +249,7 @@ export async function runClaudePrompt(
    result = messageLoopResult.result;
    apiErrorDetected = messageLoopResult.apiErrorDetected;
    totalCost = messageLoopResult.cost;
+    const model = messageLoopResult.model;

    // === SPENDING CAP SAFEGUARD ===
    // Defense-in-depth: Detect spending cap that slipped through detectApiError().
@@ -283,6 +286,7 @@ export async function runClaudePrompt(
      duration,
      turns: turnCount,
      cost: totalCost,
+      model,
      partialCost: totalCost,
      apiErrorDetected
    };
@@ -316,6 +320,7 @@ interface MessageLoopResult {
  result: string | null;
  apiErrorDetected: boolean;
  cost: number;
+  model?: string | undefined;
 }

 interface MessageLoopDeps {
@@ -339,6 +344,7 @@ async function processMessageStream(
  let result: string | null = null;
  let apiErrorDetected = false;
  let cost = 0;
+  let model: string | undefined;
  let lastHeartbeat = Date.now();

  for await (const message of query({ prompt: fullPrompt, options })) {
@@ -370,12 +376,18 @@ async function processMessageStream(
      break;
    }

-    if (dispatchResult.type === 'continue' && dispatchResult.apiErrorDetected) {
-      apiErrorDetected = true;
+    if (dispatchResult.type === 'continue') {
+      if (dispatchResult.apiErrorDetected) {
+        apiErrorDetected = true;
+      }
+      // Capture model from SystemInitMessage, but override with router model if applicable
+      if (dispatchResult.model) {
+        model = getActualModelName(dispatchResult.model);
+      }
    }
  }

-  return { turnCount, result, apiErrorDetected, cost };
+  return { turnCount, result, apiErrorDetected, cost, model };
 }

 // Main entry point for agent execution. Handles retries, git checkpoints, and validation.
@@ -10,6 +10,7 @@ import { PentestError } from '../error-handling.js';
 import { filterJsonToolCalls } from '../utils/output-formatter.js';
 import { formatTimestamp } from '../utils/formatting.js';
 import chalk from 'chalk';
+import { getActualModelName } from './router-utils.js';
 import {
  formatAssistantOutput,
  formatResultOutput,
@@ -178,7 +179,7 @@ function outputLines(lines: string[]): void {

 // Message dispatch result types
 export type MessageDispatchAction =
-  | { type: 'continue'; apiErrorDetected?: boolean }
+  | { type: 'continue'; apiErrorDetected?: boolean | undefined; model?: string | undefined }
  | { type: 'complete'; result: string | null; cost: number }
  | { type: 'throw'; error: Error };

@@ -229,13 +230,18 @@ export async function dispatchMessage(
    }

    case 'system': {
-      if (message.subtype === 'init' && !execContext.useCleanOutput) {
+      if (message.subtype === 'init') {
        const initMsg = message as SystemInitMessage;
-        console.log(chalk.blue(`    Model: ${initMsg.model}, Permission: ${initMsg.permissionMode}`));
-        if (initMsg.mcp_servers && initMsg.mcp_servers.length > 0) {
-          const mcpStatus = initMsg.mcp_servers.map(s => `${s.name}(${s.status})`).join(', ');
-          console.log(chalk.blue(`    MCP: ${mcpStatus}`));
+        const actualModel = getActualModelName(initMsg.model);
+        if (!execContext.useCleanOutput) {
+          console.log(chalk.blue(`    Model: ${actualModel}, Permission: ${initMsg.permissionMode}`));
+          if (initMsg.mcp_servers && initMsg.mcp_servers.length > 0) {
+            const mcpStatus = initMsg.mcp_servers.map(s => `${s.name}(${s.status})`).join(', ');
+            console.log(chalk.blue(`    MCP: ${mcpStatus}`));
+          }
        }
+        // Return actual model for tracking in audit logs
+        return { type: 'continue', model: actualModel };
      }
      return { type: 'continue' };
    }
@@ -0,0 +1,34 @@
+// Copyright (C) 2025 Keygraph, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Affero General Public License version 3
+// as published by the Free Software Foundation.
+
+/**
+ * Get the actual model name being used.
+ * When using claude-code-router, the SDK reports its configured model (claude-sonnet)
+ * but the actual model is determined by ROUTER_DEFAULT env var.
+ */
+export function getActualModelName(sdkReportedModel?: string): string | undefined {
+  const routerBaseUrl = process.env.ANTHROPIC_BASE_URL;
+  const routerDefault = process.env.ROUTER_DEFAULT;
+
+  // If router mode is active and ROUTER_DEFAULT is set, use that
+  if (routerBaseUrl && routerDefault) {
+    // ROUTER_DEFAULT format: "provider,model" (e.g., "gemini,gemini-2.5-pro")
+    const parts = routerDefault.split(',');
+    if (parts.length >= 2) {
+      return parts.slice(1).join(','); // Handle model names with commas
+    }
+  }
+
+  // Fall back to SDK-reported model
+  return sdkReportedModel;
+}
+
+/**
+ * Check if router mode is active.
+ */
+export function isRouterMode(): boolean {
+  return !!process.env.ANTHROPIC_BASE_URL && !!process.env.ROUTER_DEFAULT;
+}
@@ -26,9 +26,10 @@ interface AgentEndResult {
  duration_ms: number;
  cost_usd: number;
  success: boolean;
-  error?: string;
-  checkpoint?: string;
-  isFinalAttempt?: boolean;
+  model?: string | undefined;
+  error?: string | undefined;
+  checkpoint?: string | undefined;
+  isFinalAttempt?: boolean | undefined;
 }

 /**
@@ -26,7 +26,8 @@ interface AttemptData {
  cost_usd: number;
  success: boolean;
  timestamp: string;
-  error?: string;
+  model?: string | undefined;
+  error?: string | undefined;
 }

 interface AgentMetrics {
@@ -34,7 +35,8 @@ interface AgentMetrics {
  attempts: AttemptData[];
  final_duration_ms: number;
  total_cost_usd: number;
-  checkpoint?: string;
+  model?: string | undefined;
+  checkpoint?: string | undefined;
 }

 interface PhaseMetrics {
@@ -66,9 +68,10 @@ interface AgentEndResult {
  duration_ms: number;
  cost_usd: number;
  success: boolean;
-  error?: string;
-  checkpoint?: string;
-  isFinalAttempt?: boolean;
+  model?: string | undefined;
+  error?: string | undefined;
+  checkpoint?: string | undefined;
+  isFinalAttempt?: boolean | undefined;
 }

 interface ActiveTimer {
@@ -169,6 +172,10 @@ export class MetricsTracker {
      timestamp: formatTimestamp(),
    };

+    if (result.model) {
+      attempt.model = result.model;
+    }
+
    if (result.error) {
      attempt.error = result.error;
    }
@@ -183,6 +190,10 @@ export class MetricsTracker {
      agent.status = 'success';
      agent.final_duration_ms = result.duration_ms;

+      if (result.model) {
+        agent.model = result.model;
+      }
+
      if (result.checkpoint) {
        agent.checkpoint = result.checkpoint;
      }
@@ -18,9 +18,9 @@ import type { DistributedConfig } from '../types/config.js';
 interface AgentResult {
  success: boolean;
  duration: number;
-  cost?: number;
-  error?: string;
-  retryable?: boolean;
+  cost?: number | undefined;
+  error?: string | undefined;
+  retryable?: boolean | undefined;
 }

 type ToolName = 'nmap' | 'subfinder' | 'whatweb' | 'schemathesis';
@@ -68,3 +68,87 @@ export async function assembleFinalReport(sourceDir: string): Promise<string> {

  return finalContent;
 }
+
+/**
+ * Inject model information into the final security report.
+ * Reads session.json to get the model(s) used, then injects a "Model:" line
+ * into the Executive Summary section of the report.
+ */
+export async function injectModelIntoReport(
+  repoPath: string,
+  outputPath: string
+): Promise<void> {
+  // 1. Read session.json to get model information
+  const sessionJsonPath = path.join(outputPath, 'session.json');
+
+  if (!(await fs.pathExists(sessionJsonPath))) {
+    console.log(chalk.yellow('⚠️ session.json not found, skipping model injection'));
+    return;
+  }
+
+  interface SessionData {
+    metrics: {
+      agents: Record<string, { model?: string }>;
+    };
+  }
+
+  const sessionData: SessionData = await fs.readJson(sessionJsonPath);
+
+  // 2. Extract unique models from all agents
+  const models = new Set<string>();
+  for (const agent of Object.values(sessionData.metrics.agents)) {
+    if (agent.model) {
+      models.add(agent.model);
+    }
+  }
+
+  if (models.size === 0) {
+    console.log(chalk.yellow('⚠️ No model information found in session.json'));
+    return;
+  }
+
+  const modelStr = Array.from(models).join(', ');
+  console.log(chalk.blue(`📝 Injecting model info into report: ${modelStr}`));
+
+  // 3. Read the final report
+  const reportPath = path.join(repoPath, 'deliverables', 'comprehensive_security_assessment_report.md');
+
+  if (!(await fs.pathExists(reportPath))) {
+    console.log(chalk.yellow('⚠️ Final report not found, skipping model injection'));
+    return;
+  }
+
+  let reportContent = await fs.readFile(reportPath, 'utf8');
+
+  // 4. Find and inject model line after "Assessment Date" in Executive Summary
+  // Pattern: "- Assessment Date: <date>" followed by a newline
+  const assessmentDatePattern = /^(- Assessment Date: .+)$/m;
+  const match = reportContent.match(assessmentDatePattern);
+
+  if (match) {
+    // Inject model line after Assessment Date
+    const modelLine = `- Model: ${modelStr}`;
+    reportContent = reportContent.replace(
+      assessmentDatePattern,
+      `$1\n${modelLine}`
+    );
+    console.log(chalk.green('✅ Model info injected into Executive Summary'));
+  } else {
+    // If no Assessment Date line found, try to add after Executive Summary header
+    const execSummaryPattern = /^## Executive Summary$/m;
+    if (reportContent.match(execSummaryPattern)) {
+      // Add model as first item in Executive Summary
+      reportContent = reportContent.replace(
+        execSummaryPattern,
+        `## Executive Summary\n- Model: ${modelStr}`
+      );
+      console.log(chalk.green('✅ Model info added to Executive Summary header'));
+    } else {
+      console.log(chalk.yellow('⚠️ Could not find Executive Summary section'));
+      return;
+    }
+  }
+
+  // 5. Write modified report back
+  await fs.writeFile(reportPath, reportContent);
+}
@@ -67,7 +67,7 @@ import {
  rollbackGitWorkspace,
  getGitCommitHash,
 } from '../utils/git-manager.js';
-import { assembleFinalReport } from '../phases/reporting.js';
+import { assembleFinalReport, injectModelIntoReport } from '../phases/reporting.js';
 import { getPromptNameForAgent } from '../types/agents.js';
 import { AuditSession } from '../audit/index.js';
 import type { WorkflowSummary } from '../audit/workflow-logger.js';
@@ -192,6 +192,7 @@ async function runAgentActivity(
          duration_ms: result.duration,
          cost_usd: 0,
          success: false,
+          model: result.model,
          error: `Spending cap likely reached: ${resultText.slice(0, 100)}`,
        });
        // Throw as billing error so Temporal retries with long backoff
@@ -207,6 +208,7 @@ async function runAgentActivity(
        duration_ms: result.duration,
        cost_usd: result.cost || 0,
        success: false,
+        model: result.model,
        error: result.error || 'Execution failed',
      });
      throw new Error(result.error || 'Agent execution failed');
@@ -221,6 +223,7 @@ async function runAgentActivity(
        duration_ms: result.duration,
        cost_usd: result.cost || 0,
        success: false,
+        model: result.model,
        error: 'Output validation failed',
      });

@@ -243,6 +246,7 @@ async function runAgentActivity(
      duration_ms: result.duration,
      cost_usd: result.cost || 0,
      success: true,
+      model: result.model,
      ...(commitHash && { checkpoint: commitHash }),
    });
    await commitGitSuccess(repoPath, agentName);
@@ -254,6 +258,7 @@ async function runAgentActivity(
      outputTokens: null,
      costUsd: result.cost ?? null,
      numTurns: result.turns ?? null,
+      model: result.model,
    };
  } catch (error) {
    // Rollback git workspace before Temporal retry to ensure clean state
@@ -369,6 +374,25 @@ export async function assembleReportActivity(input: ActivityInput): Promise<void
  }
 }

+/**
+ * Inject model metadata into the final report.
+ * This must be called AFTER runReportAgent to add the model information to the Executive Summary.
+ */
+export async function injectReportMetadataActivity(input: ActivityInput): Promise<void> {
+  const { repoPath, outputPath } = input;
+  if (!outputPath) {
+    console.log(chalk.yellow('⚠️ No output path provided, skipping model injection'));
+    return;
+  }
+  try {
+    await injectModelIntoReport(repoPath, outputPath);
+  } catch (error) {
+    const err = error as Error;
+    console.log(chalk.yellow(`⚠️ Error injecting model into report: ${err.message}`));
+    // Don't throw - this is a non-critical enhancement
+  }
+}
+
 /**
 * Check if exploitation should run for a given vulnerability type.
 * Reads the vulnerability queue file and returns the decision.
@@ -35,6 +35,7 @@ interface AgentMetrics {
  outputTokens: number | null;
  costUsd: number | null;
  numTurns: number | null;
+  model?: string | undefined;
 }

 interface PipelineProgress {
@@ -123,8 +124,10 @@ async function queryWorkflow(): Promise<void> {
        const metrics = progress.agentMetrics[agent];
        const duration = metrics ? formatDuration(metrics.durationMs) : 'unknown';
        const cost = metrics?.costUsd ? `$${metrics.costUsd.toFixed(4)}` : '';
+        const model = metrics?.model ? ` [${metrics.model}]` : '';
        console.log(
          chalk.green(`  - ${agent}`) +
+            chalk.blue(model) +
            chalk.gray(` (${duration}${cost ? ', ' + cost : ''})`)
        );
      }
@@ -17,6 +17,7 @@ export interface AgentMetrics {
  outputTokens: number | null;
  costUsd: number | null;
  numTurns: number | null;
+  model?: string | undefined;
 }

 export interface PipelineSummary {
@@ -276,6 +276,10 @@ export async function pentestPipelineWorkflow(
    // Then run the report agent to add executive summary and clean up
    state.agentMetrics['report'] = await a.runReportAgent(activityInput);
    state.completedAgents.push('report');
+
+    // Inject model metadata into the final report
+    await a.injectReportMetadataActivity(activityInput);
+
    await a.logPhaseTransition(activityInput, 'reporting', 'complete');

    // === Complete ===