From 05f480e1a9643800b01416054e085fb278110dc3 Mon Sep 17 00:00:00 2001 From: Garry Tan Date: Fri, 8 May 2026 22:57:39 -0700 Subject: [PATCH] =?UTF-8?q?test(harness):=20isProseAUQVisible=20=E2=80=94?= =?UTF-8?q?=20gate=20numbered=20path=20on=20tail,=20not=20full=20buffer?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The numbered-options branch of isProseAUQVisible deferred to isNumberedOptionListVisible whenever a `❯ 1.` cursor was visible in the full buffer. But the boot trust dialog (`❯ 1. Yes, trust`) lives in scrollback for the entire run, so this gate suppressed prose-numbered detection for any session that had the trust prompt at startup — i.e., every E2E run after the first user-trust acceptance. Fix: check only the last 4KB tail. Native-UI deferral applies when the cursor list is CURRENTLY rendered, not historically present in scrollback. Adds a regression test that puts the trust dialog in early scrollback + 5KB filler + a current prose-AUQ render, asserts true. Co-Authored-By: Claude Opus 4.7 (1M context) --- test/helpers/claude-pty-runner.ts | 11 +++++++---- test/helpers/claude-pty-runner.unit.test.ts | 14 +++++++++++++- 2 files changed, 20 insertions(+), 5 deletions(-) diff --git a/test/helpers/claude-pty-runner.ts b/test/helpers/claude-pty-runner.ts index faa0b6d38..435ac23ce 100644 --- a/test/helpers/claude-pty-runner.ts +++ b/test/helpers/claude-pty-runner.ts @@ -519,10 +519,13 @@ export function isProseAUQVisible(visible: string): boolean { } if (letteredHits.size >= 2) return true; - // Pattern 2: 3+ distinct numbered options at line starts, AND no `❯1.` - // cursor anywhere in the FULL visible buffer (which would mean - // isNumberedOptionListVisible already covers the case via the native UI). - if (/❯\s*1\./.test(visible)) return false; + // Pattern 2: 3+ distinct numbered options at line starts, AND no + // `❯1.` cursor IN THE RECENT TAIL (not the full buffer — a + // trust-dialog `❯ 1. Yes` at boot is in scrollback forever and + // would otherwise suppress this path for the rest of the run). + // The native-UI deferral only applies when the cursor list is + // currently rendered, not historically. + if (/❯\s*1\./.test(tail)) return false; const numberedRe = /(?:^|\n)[ \t❯]*([1-9])\./g; const numberedHits = new Set(); let nm: RegExpExecArray | null; diff --git a/test/helpers/claude-pty-runner.unit.test.ts b/test/helpers/claude-pty-runner.unit.test.ts index 1e79ca137..12c51875e 100644 --- a/test/helpers/claude-pty-runner.unit.test.ts +++ b/test/helpers/claude-pty-runner.unit.test.ts @@ -226,7 +226,7 @@ What's the task? A few options: expect(isProseAUQVisible(sample)).toBe(true); }); - test('returns false when ❯ 1. cursor is present (native UI handled by isNumberedOptionListVisible)', () => { + test('returns false when ❯ 1. cursor is present in the recent tail (native UI handled by isNumberedOptionListVisible)', () => { const sample = ` ❯ 1. First option 2. Second option @@ -235,6 +235,18 @@ What's the task? A few options: expect(isProseAUQVisible(sample)).toBe(false); }); + test('does NOT suppress numbered-prose detection when ❯ 1. is only in early scrollback (trust dialog)', () => { + // Boot trust dialog rendered ❯ 1. Yes at startup, then a long body of + // model output, then prose-rendered numbered options now. The historic + // ❯ 1. is in the full buffer but NOT in the recent tail. Should detect + // the prose AUQ. + const trustHeader = '❯ 1. Yes, trust\n 2. No\n'; + const filler = 'x'.repeat(5000); // pushes trust dialog out of last 4KB tail + const proseAUQ = `\n 1. Review the docs\n 2. Investigate the code\n 3. Defer to next session\n❯ \n`; + const sample = trustHeader + filler + proseAUQ; + expect(isProseAUQVisible(sample)).toBe(true); + }); + test('returns false on single lettered option', () => { const sample = ` A) Only one option mentioned in passing.