From 5294c65777cdaec1293101319f69fea09a7dd14e Mon Sep 17 00:00:00 2001 From: Garry Tan Date: Thu, 23 Apr 2026 11:34:51 -0700 Subject: [PATCH] fix(eval): handle SDK max-turns throw gracefully Some @anthropic-ai/claude-agent-sdk versions throw from the query generator when maxTurns is reached, instead of emitting a result message with subtype='error_max_turns'. The runner treated that as a non-retryable error and killed the whole periodic run on the first fixture that exceeded its turn cap. Added isMaxTurnsError() detector and a catch branch that synthesizes an AgentSdkResult from events captured before the throw, with exitReason='error_max_turns' and costUsd=0 (unknown from the thrown path). The metric function still runs against whatever assistant turns were collected, so the trial produces a usable number. Hoisted events/assistantTurns/toolCalls/assistantTextParts and the timing counters out of the inner try so the catch branch can read them. No behavior change on the success path or on rate-limit retry paths. --- test/helpers/agent-sdk-runner.ts | 66 ++++++++++++++++++++++++++------ 1 file changed, 55 insertions(+), 11 deletions(-) diff --git a/test/helpers/agent-sdk-runner.ts b/test/helpers/agent-sdk-runner.ts index 1504384e..a4df71d9 100644 --- a/test/helpers/agent-sdk-runner.ts +++ b/test/helpers/agent-sdk-runner.ts @@ -209,6 +209,19 @@ export function isRateLimitEvent(msg: SDKMessage): boolean { return info?.status === 'rejected'; } +/** + * True if `err` is the SDK's "max turns reached" throw. Some SDK versions + * raise this as an exception from the generator instead of emitting a + * result message with subtype='error_max_turns'. We treat it as terminal- + * but-recoverable: record what we collected and continue, rather than + * failing the whole run. + */ +export function isMaxTurnsError(err: unknown): boolean { + if (!err || typeof err !== 'object') return false; + const msg = (err as { message?: string }).message ?? ''; + return /reached maximum number of turns|max.?turns/i.test(msg); +} + // --------------------------------------------------------------------------- // Version resolution (cached) // --------------------------------------------------------------------------- @@ -259,6 +272,20 @@ export async function runAgentSdkTest( while (attempt <= maxRetries) { await sem.acquire(); const startMs = Date.now(); + + // Hoisted so the max-turns catch branch can synthesize a result from + // whatever we captured before the SDK threw. + const events: SDKMessage[] = []; + const assistantTurns: SDKAssistantMessage[] = []; + const toolCalls: Array<{ tool: string; input: unknown; output: string }> = []; + const assistantTextParts: string[] = []; + let firstResponseMs = 0; + let lastEventMs = startMs; + let maxInterTurnMs = 0; + let systemInitVersion = 'unknown'; + let rateLimited: unknown = null; + let terminalResult: SDKResultMessage | null = null; + try { const sdkOpts: Options = { model, @@ -280,17 +307,6 @@ export async function runAgentSdkTest( sdkOpts.systemPrompt = opts.systemPrompt; } - const events: SDKMessage[] = []; - const assistantTurns: SDKAssistantMessage[] = []; - const toolCalls: Array<{ tool: string; input: unknown; output: string }> = []; - const assistantTextParts: string[] = []; - let firstResponseMs = 0; - let lastEventMs = startMs; - let maxInterTurnMs = 0; - let systemInitVersion = 'unknown'; - let rateLimited: unknown = null; - let terminalResult: SDKResultMessage | null = null; - const q = queryImpl({ prompt: opts.userPrompt, options: sdkOpts, @@ -382,6 +398,34 @@ export async function runAgentSdkTest( }; } catch (err) { lastErr = err; + + // "Max turns reached" is the SDK's way of saying "this session ran + // out of turns." It's thrown from the generator instead of emitted + // as a result message. Treat as a successful-but-capped trial: the + // assistant turns we collected are real and carry a metric. Record + // them with exitReason='error_max_turns' rather than failing the + // whole run. + if (isMaxTurnsError(err)) { + const durationMs = Date.now() - startMs; + return { + events, + assistantTurns, + toolCalls, + output: assistantTextParts.join('\n'), + exitReason: 'error_max_turns', + turnsUsed: assistantTurns.length, + durationMs, + firstResponseMs, + maxInterTurnMs, + costUsd: 0, // unknown from thrown-error path + model, + sdkVersion: resolveSdkVersion(), + sdkClaudeCodeVersion: systemInitVersion, + resolvedBinaryPath: opts.pathToClaudeCodeExecutable ?? 'sdk-default', + browseErrors: [], + }; + } + const isRetryable = isRateLimitThrown(err); if (!isRetryable || attempt >= maxRetries) { if (isRetryable) {