/** * Per-model cost tracking for eval runs. * * Computes cost breakdowns from CostEntry arrays and formats * them as terminal tables. Supports aggregation across multiple runs. */ import type { CostEntry, StandardEvalResult } from './eval-format'; // --- Interfaces --- export interface CostSummary { model: string; calls: number; input_tokens: number; output_tokens: number; estimated_cost_usd: number; } export interface CostDashboard { entries: CostSummary[]; total: number; at_fast_tier: number; at_full_tier: number; } // --- Pricing --- /** * Per-million-token pricing for Claude models. * Last verified: 2025-05-01 */ export const MODEL_PRICING: Record = { 'claude-opus-4-6': { input: 15.00, output: 75.00 }, 'claude-sonnet-4-6': { input: 3.00, output: 15.00 }, 'claude-haiku-4-5': { input: 0.80, output: 4.00 }, // Legacy model IDs 'claude-3-5-sonnet-20241022': { input: 3.00, output: 15.00 }, 'claude-3-5-haiku-20241022': { input: 0.80, output: 4.00 }, 'claude-3-opus-20240229': { input: 15.00, output: 75.00 }, }; /** Fallback pricing for unknown models (use sonnet pricing as a safe middle ground). */ const FALLBACK_PRICING = { input: 3.00, output: 15.00 }; // --- Computation --- function getPricing(model: string): { input: number; output: number } { return MODEL_PRICING[model] || FALLBACK_PRICING; } /** * Compute per-model cost summaries from an array of CostEntry records. */ export function computeCosts(costs: CostEntry[]): CostDashboard { const byModel = new Map(); // Track exact cost_usd sums per model (from API-provided costs) const exactCosts = new Map(); for (const entry of costs) { const existing = byModel.get(entry.model); if (existing) { existing.calls += entry.calls; existing.input_tokens += entry.input_tokens; existing.output_tokens += entry.output_tokens; } else { byModel.set(entry.model, { model: entry.model, calls: entry.calls, input_tokens: entry.input_tokens, output_tokens: entry.output_tokens, estimated_cost_usd: 0, }); } if (entry.cost_usd !== undefined) { exactCosts.set(entry.model, (exactCosts.get(entry.model) || 0) + entry.cost_usd); } } // Calculate costs — prefer exact cost_usd (accounts for cache discounts) let total = 0; let atFast = 0; let atFull = 0; const fastPricing = MODEL_PRICING['claude-haiku-4-5'] || FALLBACK_PRICING; const fullPricing = MODEL_PRICING['claude-opus-4-6'] || FALLBACK_PRICING; for (const summary of byModel.values()) { const exact = exactCosts.get(summary.model); if (exact !== undefined) { summary.estimated_cost_usd = exact; } else { const pricing = getPricing(summary.model); summary.estimated_cost_usd = (summary.input_tokens / 1_000_000) * pricing.input + (summary.output_tokens / 1_000_000) * pricing.output; } total += summary.estimated_cost_usd; // What-if at fast/full tiers (always from token counts) atFast += (summary.input_tokens / 1_000_000) * fastPricing.input + (summary.output_tokens / 1_000_000) * fastPricing.output; atFull += (summary.input_tokens / 1_000_000) * fullPricing.input + (summary.output_tokens / 1_000_000) * fullPricing.output; } const entries = [...byModel.values()].sort((a, b) => b.estimated_cost_usd - a.estimated_cost_usd); return { entries, total: Math.round(total * 1_000_000) / 1_000_000, at_fast_tier: Math.round(atFast * 1_000_000) / 1_000_000, at_full_tier: Math.round(atFull * 1_000_000) / 1_000_000, }; } /** * Format a CostDashboard as a terminal table. */ export function formatCostDashboard(dashboard: CostDashboard): string { const lines: string[] = []; lines.push(''); lines.push('Cost Breakdown'); lines.push('═'.repeat(75)); lines.push( ' ' + 'Model'.padEnd(32) + 'Calls'.padEnd(8) + 'In Tokens'.padEnd(12) + 'Out Tokens'.padEnd(12) + 'Cost' ); lines.push('─'.repeat(75)); for (const entry of dashboard.entries) { const model = entry.model.length > 30 ? entry.model.slice(0, 27) + '...' : entry.model.padEnd(32); lines.push( ` ${model}` + `${entry.calls}`.padEnd(8) + `${entry.input_tokens.toLocaleString()}`.padEnd(12) + `${entry.output_tokens.toLocaleString()}`.padEnd(12) + `$${entry.estimated_cost_usd.toFixed(4)}` ); } lines.push('─'.repeat(75)); lines.push(` Total: $${dashboard.total.toFixed(4)}`); lines.push(` At fast tier (Haiku): $${dashboard.at_fast_tier.toFixed(4)}`); lines.push(` At full tier (Opus): $${dashboard.at_full_tier.toFixed(4)}`); lines.push(''); return lines.join('\n'); } /** * Aggregate costs across multiple StandardEvalResult runs. * Merges all costs[] arrays and computes a single dashboard. */ export function aggregateCosts(results: StandardEvalResult[]): CostDashboard { const allCosts: CostEntry[] = []; for (const r of results) { if (r.costs) { allCosts.push(...r.costs); } } return computeCosts(allCosts); }