/** * Usage object for logging. * * @param prompt_tokens: The number of prompt tokens. * @param completion_tokens: The number of completion tokens. * @param total_tokens: The total number of tokens. * @param prompt_tokens_details: The details of the prompt tokens. * @returns The usage object. */ export type Usage = { prompt_tokens?: number; completion_tokens?: number; total_tokens?: number; prompt_tokens_details?: { cached_tokens?: number; }; }; /** * Cost configuration for a model. * * @param input: The cost per million prompt tokens (USD). * @param cached: The cost per million cached tokens (USD). * @param output: The cost per million completion tokens (USD). * @returns The cost configuration. */ export type CostConfig = { input: number; cached: number; output: number; }; /** * Model pricing table. * * @param models: Canonical model pricing. * @param aliases: Alias to canonical model mapping. * @returns The pricing table. */ export type ModelCostTable = Record; /** * Helicone API response types */ interface HeliconeModelCost { provider: string; model: string; operator: "equals" | "startsWith" | "includes"; input_cost_per_1m: number; output_cost_per_1m: number; prompt_cache_write_per_1m?: number; prompt_cache_read_per_1m?: number; show_in_playground?: boolean; } interface HeliconeApiResponse { metadata: { total_models: number; }; data: HeliconeModelCost[]; } /** * Internal storage for cost data with matching operators */ interface CostEntry { operator: "equals" | "startsWith" | "includes"; config: CostConfig; } // Storage for Helicone costs (loaded at runtime) let heliconeCosts: Map = new Map(); let heliconeCostsLoaded = false; /** * ============================================================================ * CUSTOM MODEL COSTS * ============================================================================ * * Add your custom model costs here. These will take precedence over costs * fetched from the Helicone API. This is useful for: * * - Custom/fine-tuned models (e.g., "zlm-4.6") * - Self-hosted models with custom pricing * - Overriding Helicone costs for specific models * - Models not yet in the Helicone database * * Format: * "model-name": { input: , cached: , output: } * * All costs are in USD per million tokens. * * @example * ```ts * export const CUSTOM_MODEL_COSTS: ModelCostTable = { * "zlm-4.6": { input: 2.5, cached: 1.25, output: 10 }, * "zlm-4.5-air": { input: 0.15, cached: 0.075, output: 0.6 }, * }; * ``` */ export const CUSTOM_MODEL_COSTS: ModelCostTable = { // Add your custom model costs here }; /** * Fetches and loads cost data from the Helicone API. * This should be called once at application startup. * * @returns Promise that resolves when costs are loaded */ export async function loadHeliconeCosts(): Promise { try { const response = await fetch("https://www.helicone.ai/api/llm-costs"); if (!response.ok) { throw new Error(`Helicone API returned ${response.status}: ${response.statusText}`); } const data: HeliconeApiResponse = await response.json(); heliconeCosts.clear(); for (const model of data.data) { const config: CostConfig = { input: model.input_cost_per_1m ?? 0, output: model.output_cost_per_1m ?? 0, cached: model.prompt_cache_read_per_1m ?? model.input_cost_per_1m ?? 0, }; heliconeCosts.set(model.model.toLowerCase(), { operator: model.operator, config, }); } heliconeCostsLoaded = true; console.log(`\x1b[96m 🌎 Loaded ${data.metadata.total_models} model costs from Helicone API\x1b[0m`); } catch (error) { console.warn(`\x1b[33m ⚠️ Failed to load Helicone costs: ${error instanceof Error ? error.message : error}\x1b[0m`); } } /** * Gets the cost configuration for a model. * * Priority order: * 1. Custom model costs (CUSTOM_MODEL_COSTS) * 2. Helicone API costs (with operator matching) * 3. Fallback cost * * @param model: The model name to look up * @returns The cost configuration for the model */ export function getCostConfig(model: string): CostConfig { const normalizedModel = model.toLowerCase(); /** * Check custom costs first (highest priority) */ if (CUSTOM_MODEL_COSTS[normalizedModel]) { return CUSTOM_MODEL_COSTS[normalizedModel]; } else if (CUSTOM_MODEL_COSTS[model]) { return CUSTOM_MODEL_COSTS[model]; } /** * Check Helicone costs with operator matching */ const exactMatch = heliconeCosts.get(normalizedModel); if (exactMatch?.operator === "equals") { return exactMatch.config; } for (const [pattern, entry] of heliconeCosts) { if (entry.operator === "startsWith" && normalizedModel.startsWith(pattern)) { return entry.config; } } for (const [pattern, entry] of heliconeCosts) { if (entry.operator === "includes" && normalizedModel.includes(pattern)) { return entry.config; } } if (exactMatch) { return exactMatch.config; } /** * Return fallback since no matching cost was found */ return { input: 0, cached: 0, output: 0 }; } /** * Computes the total cost (in USD) for a given model and usage. * * @param model: The model to compute the cost for. * @param usage: The usage object. * @returns The total cost (in USD), or null if no usage data. */ export function calculateCost( model: string, usage?: Usage ): number | null { if (!usage) return null; const { prompt_tokens = 0, completion_tokens = 0, prompt_tokens_details = { cached_tokens: 0 }, } = usage; const cost = getCostConfig(model); let inputCost = 0, cachedCost = 0; if (prompt_tokens_details.cached_tokens && cost.cached > 0) { cachedCost = (prompt_tokens_details.cached_tokens / 1_000_000) * cost.cached; inputCost = ((prompt_tokens - prompt_tokens_details.cached_tokens) / 1_000_000) * cost.input; } else { inputCost = (prompt_tokens / 1_000_000) * cost.input; } const outputCost = (completion_tokens / 1_000_000) * cost.output; const total = inputCost + cachedCost + outputCost; return total > 0 ? Number.parseFloat(total.toFixed(6)) : null; }