From c77f064122e6edb20edcd3a2f937dbfd257a1f43 Mon Sep 17 00:00:00 2001 From: Garry Tan Date: Mon, 30 Mar 2026 00:41:31 -0700 Subject: [PATCH] test: agent conciseness, focus stealing, opus model, switchTab opts Tests for the three UX fixes: - System prompt contains STOP/CONCISE/Do NOT keep exploring - sidebar agent uses opus (not sonnet) for prompt injection resistance - switchTab has bringToFront option, defaults to true (opt-out) - handleCommand tab pinning uses bringToFront: false (no focus steal) - Updated stale tests: switchTab signature, allowedTools excludes Write, narration -> conciseness, tab pinning restore calls Co-Authored-By: Claude Opus 4.6 (1M context) --- browse/test/sidebar-agent.test.ts | 8 ++--- browse/test/sidebar-ux.test.ts | 59 ++++++++++++++++++++++++++----- test/skill-validation.test.ts | 5 +-- 3 files changed, 57 insertions(+), 15 deletions(-) diff --git a/browse/test/sidebar-agent.test.ts b/browse/test/sidebar-agent.test.ts index ee77a33b..872bbd34 100644 --- a/browse/test/sidebar-agent.test.ts +++ b/browse/test/sidebar-agent.test.ts @@ -513,17 +513,17 @@ describe('BROWSE_TAB tab pinning (cross-tab isolation)', () => { expect(handleFn).toContain('tabId'); // Should save and restore the active tab expect(handleFn).toContain('savedTabId'); - expect(handleFn).toContain('browserManager.switchTab(tabId)'); + expect(handleFn).toContain('switchTab(tabId'); }); test('handleCommand restores active tab after command (success path)', () => { - // On success, should restore savedTabId + // On success, should restore savedTabId without stealing focus const handleFn = serverSrc.slice( serverSrc.indexOf('async function handleCommand('), serverSrc.length, ); // Count restore calls — should appear in both success and error paths - const restoreCount = (handleFn.match(/browserManager\.switchTab\(savedTabId\)/g) || []).length; + const restoreCount = (handleFn.match(/switchTab\(savedTabId/g) || []).length; expect(restoreCount).toBeGreaterThanOrEqual(2); // success + error paths }); @@ -532,7 +532,7 @@ describe('BROWSE_TAB tab pinning (cross-tab isolation)', () => { const catchBlock = serverSrc.slice( serverSrc.indexOf('} catch (err: any) {', serverSrc.indexOf('async function handleCommand(')), ); - expect(catchBlock).toContain('switchTab(savedTabId)'); + expect(catchBlock).toContain('switchTab(savedTabId'); }); test('tab pinning only activates when tabId is provided', () => { diff --git a/browse/test/sidebar-ux.test.ts b/browse/test/sidebar-ux.test.ts index a96f57a2..15bfbce5 100644 --- a/browse/test/sidebar-ux.test.ts +++ b/browse/test/sidebar-ux.test.ts @@ -41,13 +41,13 @@ describe('sidebar system prompt (server.ts)', () => { expect(promptSection).toContain('url`'); }); - test('system prompt includes narration instructions', () => { + test('system prompt includes conciseness and stop instructions', () => { const promptSection = serverSrc.slice( serverSrc.indexOf('const systemPrompt = ['), serverSrc.indexOf("].join('\\n');", serverSrc.indexOf('const systemPrompt = [')) + 15, ); - expect(promptSection).toContain('Narrate'); - expect(promptSection).toContain('plain English'); + expect(promptSection).toContain('CONCISE'); + expect(promptSection).toContain('STOP'); }); test('--resume is never used in spawnClaude args', () => { @@ -385,12 +385,11 @@ describe('browser tab bar (sidepanel.html)', () => { describe('sidebar→browser tab switch', () => { const bmSrc = fs.readFileSync(path.join(ROOT, 'src', 'browser-manager.ts'), 'utf-8'); - test('switchTab calls bringToFront so browser visually switches', () => { - const switchFn = bmSrc.slice( - bmSrc.indexOf('switchTab(id: number)'), - bmSrc.indexOf('switchTab(id: number)') + 400, - ); - expect(switchFn).toContain('bringToFront'); + test('switchTab supports bringToFront option', () => { + expect(bmSrc).toContain('switchTab(id: number, opts?'); + expect(bmSrc).toContain('bringToFront'); + // Default behavior still brings to front (opt-out, not opt-in) + expect(bmSrc).toContain('bringToFront !== false'); }); }); @@ -974,6 +973,48 @@ describe('chat message dedup (prevents repeat rendering)', () => { }); }); +// ─── Agent conciseness and focus stealing ─────────────────────── + +describe('sidebar agent conciseness + no focus stealing', () => { + const serverSrc = fs.readFileSync(path.join(ROOT, 'src', 'server.ts'), 'utf-8'); + const bmSrc = fs.readFileSync(path.join(ROOT, 'src', 'browser-manager.ts'), 'utf-8'); + + test('system prompt tells agent to STOP when task is done', () => { + const promptSection = serverSrc.slice( + serverSrc.indexOf('const systemPrompt = ['), + serverSrc.indexOf("].join('\\n');", serverSrc.indexOf('const systemPrompt = [')), + ); + expect(promptSection).toContain('STOP'); + expect(promptSection).toContain('CONCISE'); + expect(promptSection).toContain('Do NOT keep exploring'); + }); + + test('sidebar agent uses opus (not sonnet) for prompt injection resistance', () => { + const spawnFn = serverSrc.slice( + serverSrc.indexOf('function spawnClaude('), + serverSrc.indexOf('\nfunction ', serverSrc.indexOf('function spawnClaude(') + 1), + ); + expect(spawnFn).toContain("'opus'"); + }); + + test('switchTab has bringToFront option', () => { + expect(bmSrc).toContain('bringToFront?: boolean'); + expect(bmSrc).toContain('bringToFront !== false'); + }); + + test('handleCommand tab pinning does NOT steal focus', () => { + // All switchTab calls in handleCommand should use bringToFront: false + const handleFn = serverSrc.slice( + serverSrc.indexOf('async function handleCommand('), + serverSrc.indexOf('\n// ', serverSrc.indexOf('async function handleCommand(') + 200), + ); + const switchCalls = handleFn.match(/switchTab\([^)]+\)/g) || []; + for (const call of switchCalls) { + expect(call).toContain('bringToFront: false'); + } + }); +}); + // ─── LLM-based cleanup architecture ───────────────────────────── describe('LLM-based cleanup (smart agent cleanup)', () => { diff --git a/test/skill-validation.test.ts b/test/skill-validation.test.ts index 50beaa90..e2dcb67e 100644 --- a/test/skill-validation.test.ts +++ b/test/skill-validation.test.ts @@ -1559,12 +1559,13 @@ describe('sidebar agent (#584)', () => { }); // #584 — Server Write: server.ts allowedTools includes Write (DRY parity) - test('server.ts allowedTools includes Write', () => { + test('server.ts allowedTools excludes Write (agent is read-only + Bash)', () => { const content = fs.readFileSync(path.join(ROOT, 'browse', 'src', 'server.ts'), 'utf-8'); // Find the sidebar allowedTools in the headed-mode path const match = content.match(/--allowedTools['"]\s*,\s*['"]([^'"]+)['"]/); expect(match).not.toBeNull(); - expect(match![1]).toContain('Write'); + expect(match![1]).toContain('Bash'); + expect(match![1]).not.toContain('Write'); }); // #584 — Sidebar stderr: stderr handler is not empty