diff --git a/test/helpers/claude-pty-runner.ts b/test/helpers/claude-pty-runner.ts index 5e4c84f13..f06ba05f3 100644 --- a/test/helpers/claude-pty-runner.ts +++ b/test/helpers/claude-pty-runner.ts @@ -1402,6 +1402,27 @@ export interface PlanSkillObservation { * the section, and that's the regression we want to catch. */ planFile?: string; + /** + * High-water-mark flag: did the polling loop ever observe a + * prose-rendered AskUserQuestion (lettered or numbered options visible) + * during the run? Set true the first poll iteration that + * isProseAUQVisible returns true on the recent buffer; remains true + * for the rest of the observation. + * + * The 2KB `evidence` window often misses the prose-AUQ moment because + * by the time outcome=plan_ready fires, the ExitPlanMode "Ready to + * execute" UI has pushed the options out of the tail. Tests that need + * to assert "the user saw the question at SOME point" should check + * this flag rather than re-running isProseAUQVisible on the truncated + * evidence. + */ + proseAUQEverObserved?: boolean; + /** + * High-water-mark flag: did the LLM judge ever return state='waiting' + * during the run? Same shape as proseAUQEverObserved but driven by the + * Haiku judge fallback rather than the regex detector. + */ + waitingEverObserved?: boolean; } /** @@ -1543,6 +1564,8 @@ export async function runPlanSkillObservation(opts: { ...classified, evidence: visible.slice(-2000), elapsedMs: Date.now() - startedAt, + proseAUQEverObserved, + waitingEverObserved, }; // Capture the plan file path on any outcome where one may have been // written. Gating only on 'plan_ready' missed two cases: (1) the @@ -1594,6 +1617,8 @@ export async function runPlanSkillObservation(opts: { : ''), evidence: finalVisible.slice(-2000), elapsedMs: Date.now() - startedAt, + proseAUQEverObserved, + waitingEverObserved, }; } return { @@ -1605,6 +1630,8 @@ export async function runPlanSkillObservation(opts: { : ''), evidence: finalVisible.slice(-2000), elapsedMs: Date.now() - startedAt, + proseAUQEverObserved, + waitingEverObserved, }; } finally { await session.close(); diff --git a/test/skill-e2e-autoplan-auto-mode.test.ts b/test/skill-e2e-autoplan-auto-mode.test.ts index 8c94bad2f..2af602050 100644 --- a/test/skill-e2e-autoplan-auto-mode.test.ts +++ b/test/skill-e2e-autoplan-auto-mode.test.ts @@ -66,8 +66,8 @@ describeE2E('autoplan AskUserQuestion-blocked smoke (gate)', () => { // 3. Numbered/lettered options visible in TTY as prose (post-v1.28 prose-AUQ rendering) // If NONE of these are present, the question was silently buried. const blockedVisible = /BLOCKED\s*[—-]\s*AskUserQuestion/i.test(obs.evidence); - const proseAUQVisible = isProseAUQVisible(obs.evidence); - const surfaceVisible = blockedVisible || proseAUQVisible; + const proseAUQVisible = isProseAUQVisible(obs.evidence) || obs.proseAUQEverObserved === true; + const surfaceVisible = blockedVisible || proseAUQVisible || obs.waitingEverObserved === true; if ( obs.outcome === 'auto_decided' || diff --git a/test/skill-e2e-plan-ceo-plan-mode.test.ts b/test/skill-e2e-plan-ceo-plan-mode.test.ts index b75f5d6f3..f22f6baf8 100644 --- a/test/skill-e2e-plan-ceo-plan-mode.test.ts +++ b/test/skill-e2e-plan-ceo-plan-mode.test.ts @@ -115,8 +115,8 @@ describeE2E('plan-ceo-review plan-mode smoke (gate)', () => { // 2. `BLOCKED — AskUserQuestion` string visible in TTY (post-v1.28 BLOCKED rule) // 3. Numbered/lettered options visible in TTY as prose (post-v1.28 prose-AUQ rendering) const blockedVisible = /BLOCKED\s*[—-]\s*AskUserQuestion/i.test(obs.evidence); - const proseAUQVisible = isProseAUQVisible(obs.evidence); - const surfaceVisible = blockedVisible || proseAUQVisible; + const proseAUQVisible = isProseAUQVisible(obs.evidence) || obs.proseAUQEverObserved === true; + const surfaceVisible = blockedVisible || proseAUQVisible || obs.waitingEverObserved === true; if ( obs.outcome === 'auto_decided' || diff --git a/test/skill-e2e-plan-design-plan-mode.test.ts b/test/skill-e2e-plan-design-plan-mode.test.ts index fcf1fd94b..9c93a7a9e 100644 --- a/test/skill-e2e-plan-design-plan-mode.test.ts +++ b/test/skill-e2e-plan-design-plan-mode.test.ts @@ -63,8 +63,8 @@ describeE2E('plan-design-review plan-mode smoke (gate)', () => { // Surface visibility check (same as ceo / autoplan migrations): user // must SEE the question via BLOCKED string OR prose-rendered AUQ options. const blockedVisible = /BLOCKED\s*[—-]\s*AskUserQuestion/i.test(obs.evidence); - const proseAUQVisible = isProseAUQVisible(obs.evidence); - const surfaceVisible = blockedVisible || proseAUQVisible; + const proseAUQVisible = isProseAUQVisible(obs.evidence) || obs.proseAUQEverObserved === true; + const surfaceVisible = blockedVisible || proseAUQVisible || obs.waitingEverObserved === true; if ( obs.outcome === 'auto_decided' || diff --git a/test/skill-e2e-plan-eng-plan-mode.test.ts b/test/skill-e2e-plan-eng-plan-mode.test.ts index 82bbd6ed2..eea1fb5be 100644 --- a/test/skill-e2e-plan-eng-plan-mode.test.ts +++ b/test/skill-e2e-plan-eng-plan-mode.test.ts @@ -83,8 +83,8 @@ describeE2E('plan-eng-review plan-mode smoke (gate)', () => { // section in the plan file (legacy) OR a BLOCKED string in TTY OR // prose-rendered AUQ options in TTY. const blockedVisible = /BLOCKED\s*[—-]\s*AskUserQuestion/i.test(obs.evidence); - const proseAUQVisible = isProseAUQVisible(obs.evidence); - const surfaceVisible = blockedVisible || proseAUQVisible; + const proseAUQVisible = isProseAUQVisible(obs.evidence) || obs.proseAUQEverObserved === true; + const surfaceVisible = blockedVisible || proseAUQVisible || obs.waitingEverObserved === true; if ( obs.outcome === 'auto_decided' ||