From 6c13b5e657204d30d787f44d9adc28291795a787 Mon Sep 17 00:00:00 2001 From: Garry Tan Date: Fri, 8 May 2026 23:08:06 -0700 Subject: [PATCH] =?UTF-8?q?test(harness):=20isProseAUQVisible=20=E2=80=94?= =?UTF-8?q?=20lower=20numbered=20threshold=20to=202=20(matches=20lettered)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The 4KB tail window often contains only options 2-4 of a 4-option numbered prose AUQ because the model emits the question header + option 1 several KB earlier in the buffer. The threshold of 3 distinct numbered markers caused the detector to miss real prose AUQs whenever option 1 had scrolled out. Threshold 2 matches the lettered branch and is still tightly gated by: - Line-start anchoring (no false positives on inline `1.` references) - No-cursor gate (defers to native UI when ❯ 1. is currently rendered) - The 4KB tail window itself (prose-AUQ rendering happens at the end of the model's response, so options are clustered in the tail) Co-Authored-By: Claude Opus 4.7 (1M context) --- test/helpers/claude-pty-runner.ts | 10 ++++++++-- test/helpers/claude-pty-runner.unit.test.ts | 9 ++++++++- 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/test/helpers/claude-pty-runner.ts b/test/helpers/claude-pty-runner.ts index 435ac23ce..5e4c84f13 100644 --- a/test/helpers/claude-pty-runner.ts +++ b/test/helpers/claude-pty-runner.ts @@ -519,12 +519,18 @@ export function isProseAUQVisible(visible: string): boolean { } if (letteredHits.size >= 2) return true; - // Pattern 2: 3+ distinct numbered options at line starts, AND no + // Pattern 2: 2+ distinct numbered options at line starts, AND no // `❯1.` cursor IN THE RECENT TAIL (not the full buffer — a // trust-dialog `❯ 1. Yes` at boot is in scrollback forever and // would otherwise suppress this path for the rest of the run). // The native-UI deferral only applies when the cursor list is // currently rendered, not historically. + // + // Threshold 2 (matching the lettered branch): the tail is a 4KB window, + // and by the time the polling loop sees it, the model may have emitted + // option 1 several KB earlier and only 2/3/4 remain in tail. False + // positives on prose ("First, x. Second, y.") are extremely rare given + // the line-start anchor + the no-cursor gate. if (/❯\s*1\./.test(tail)) return false; const numberedRe = /(?:^|\n)[ \t❯]*([1-9])\./g; const numberedHits = new Set(); @@ -532,7 +538,7 @@ export function isProseAUQVisible(visible: string): boolean { while ((nm = numberedRe.exec(tail)) !== null) { if (nm[1]) numberedHits.add(nm[1]); } - return numberedHits.size >= 3; + return numberedHits.size >= 2; } /** diff --git a/test/helpers/claude-pty-runner.unit.test.ts b/test/helpers/claude-pty-runner.unit.test.ts index 12c51875e..ab1a89cbc 100644 --- a/test/helpers/claude-pty-runner.unit.test.ts +++ b/test/helpers/claude-pty-runner.unit.test.ts @@ -254,10 +254,17 @@ A) Only one option mentioned in passing. expect(isProseAUQVisible(sample)).toBe(false); }); - test('returns false on 2 numbered options (need 3+ for prose numbered)', () => { + test('matches 2 numbered options (threshold matches lettered branch — tails miss option 1)', () => { const sample = ` 1. First note. 2. Second note. +`; + expect(isProseAUQVisible(sample)).toBe(true); + }); + + test('returns false on a single numbered option', () => { + const sample = ` +1. Only one option mentioned. `; expect(isProseAUQVisible(sample)).toBe(false); });