diff --git a/test/skill-e2e-plan.test.ts b/test/skill-e2e-plan.test.ts index d6f58416e..9b61e9a20 100644 --- a/test/skill-e2e-plan.test.ts +++ b/test/skill-e2e-plan.test.ts @@ -240,6 +240,13 @@ Write your expansion proposals to ${planDir}/proposals.md with ONLY the proposal recordE2E(evalCollector, '/plan-ceo-review-expansion-energy', 'Plan CEO Review Expansion Energy E2E', result, { passed: ['success', 'error_max_turns'].includes(result.exitReason), }); + // Transient API failure escape hatch — see /plan-review-report for the + // full rationale. Same shape: error_api with 0 turns means the API call + // never reached the model, so nothing the test verifies could have run. + if (result.exitReason === 'error_api' && result.costEstimate?.turnsUsed === 0) { + console.warn('[transient] /plan-ceo-review-expansion-energy: error_api with 0 turns — treating as inconclusive'); + return; + } expect(['success', 'error_max_turns']).toContain(result.exitReason); const proposalsPath = path.join(planDir, 'proposals.md'); @@ -686,6 +693,18 @@ This review report at the bottom of the plan is the MOST IMPORTANT deliverable o recordE2E(evalCollector, '/plan-review-report', 'Plan Review Report E2E', result, { passed: ['success', 'error_max_turns'].includes(result.exitReason), }); + + // Transient API failure escape hatch: when the SDK returns error_api with + // zero turns / zero tokens, the API call died before the model ever ran — + // no skill code executed, no file was written. Bun retries the test up to + // 3x; if every attempt hits the same API hiccup, surface a warning and + // treat as inconclusive rather than gating the build on Anthropic + // availability. Logic regressions still surface as success/error_max_turns + // with a missing artifact, which the downstream assertions catch. + if (result.exitReason === 'error_api' && result.costEstimate?.turnsUsed === 0) { + console.warn('[transient] /plan-review-report: error_api with 0 turns — treating as inconclusive (likely Anthropic API hiccup, see CLAUDE.md eval-blame protocol)'); + return; + } expect(['success', 'error_max_turns']).toContain(result.exitReason); // Verify the review report was written to the plan file