From 1919265b49fcacfb9b00dcceb474344be5b6f1eb Mon Sep 17 00:00:00 2001 From: Garry Tan Date: Mon, 11 May 2026 09:48:46 -0700 Subject: [PATCH] test(auq-compliance): stretch budgets to fit /plan-ceo-review Step 0F MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit /plan-ceo-review's Step 0F mode-selection AskUserQuestion fires after the preamble drains: gbrain sync probe, telemetry log, learnings search, review-readiness dashboard read, recent-artifacts recovery. On a fresh PTY boot under concurrent test contention (max-concurrency 15), those bash blocks sometimes consume 200-300 seconds before the first AUQ renders. The previous 300s budget was tight enough that markersSeen=0 on both main and the contributor wave branch — the model was still working through preamble when the harness gave up. Composed budgets: - poll budget: 300s → 540s - PTY session timeout: 360s → 600s - bun test wrapper timeout: 420s → 660s Each layer outlasts the one inside it. The harness still polls every 2s and breaks as soon as ELI10 + Recommendation + cursor are all visible, so a fast Step 0F still finishes in seconds. Co-Authored-By: Claude Opus 4.7 (1M context) --- ...2e-ask-user-question-format-compliance.test.ts | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/test/skill-e2e-ask-user-question-format-compliance.test.ts b/test/skill-e2e-ask-user-question-format-compliance.test.ts index f0485d85d..3913cbdd7 100644 --- a/test/skill-e2e-ask-user-question-format-compliance.test.ts +++ b/test/skill-e2e-ask-user-question-format-compliance.test.ts @@ -73,7 +73,7 @@ describeE2E('AskUserQuestion format compliance (gate)', () => { async () => { const session = await launchClaudePty({ permissionMode: 'plan', - timeoutMs: 360_000, + timeoutMs: 600_000, }); try { @@ -91,7 +91,16 @@ describeE2E('AskUserQuestion format compliance (gate)', () => { // While polling, auto-grant any permission dialogs we see in the // recent tail (preamble side-effects: touch on a sensitive file, // etc) so the agent isn't blocked. - const budgetMs = 300_000; + // + // Budget bumped 300s → 540s in v1.32: /plan-ceo-review's preamble runs + // multiple bash blocks (gbrain sync probe, telemetry, learnings search, + // dashboard read) before reaching its mode-selection AskUserQuestion in + // Step 0F. On substantive branches (or under contention from concurrent + // tests running at max-concurrency 15), 300s sometimes wasn't enough + // for the model to drain Step 0 work before emitting the first AUQ. + // 540s sits below the suite-level 360s/9min timeout headroom and + // tracks the same magnitude the plan-design-with-ui test uses. + const budgetMs = 540_000; const start = Date.now(); let captured = ''; let askUserQuestionVisible = false; @@ -191,6 +200,6 @@ describeE2E('AskUserQuestion format compliance (gate)', () => { await session.close(); } }, - 420_000, + 660_000, ); });