test: per-tab isolation, BROWSE_TAB pinning, tab tracking, sidebar UX

sidebar-agent.test.ts (new tests):
- BROWSE_TAB env var passed to claude process
- CLI reads BROWSE_TAB and sends tabId in body
- handleCommand accepts tabId, saves/restores activeTabId
- Tab pinning only activates when tabId provided
- Per-tab agent state, queue, concurrency
- processingTabs set for parallel agents

sidebar-ux.test.ts (new tests):
- context.on('page') tracks user-created tabs
- page.on('close') removes tabs from pages map
- Tab isolation uses BROWSE_TAB not system prompt hack
- Per-tab chat context in sidepanel
- Tab bar rendering, stop button, banner text

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Garry Tan
2026-03-29 22:19:33 -07:00
parent a36a3ac4d7
commit 812882d1e6
2 changed files with 1029 additions and 0 deletions
+353
View File
@@ -67,6 +67,74 @@ function writeToInbox(
return finalFile;
}
/** Shorten paths — same logic as sidebar-agent.ts shorten() */
function shorten(str: string): string {
return str
.replace(/\/Users\/[^/]+/g, '~')
.replace(/\/conductor\/workspaces\/[^/]+\/[^/]+/g, '')
.replace(/\.claude\/skills\/gstack\//g, '')
.replace(/browse\/dist\/browse/g, '$B');
}
/** describeToolCall — replicated from sidebar-agent.ts for unit testing */
function describeToolCall(tool: string, input: any): string {
if (!input) return '';
if (tool === 'Bash' && input.command) {
const cmd = input.command;
const browseMatch = cmd.match(/\$B\s+(\w+)|browse[^\s]*\s+(\w+)/);
if (browseMatch) {
const browseCmd = browseMatch[1] || browseMatch[2];
const args = cmd.split(/\s+/).slice(2).join(' ');
switch (browseCmd) {
case 'goto': return `Opening ${args.replace(/['"]/g, '')}`;
case 'snapshot': return args.includes('-i') ? 'Scanning for interactive elements' : args.includes('-D') ? 'Checking what changed' : 'Taking a snapshot of the page';
case 'screenshot': return `Saving screenshot${args ? ` to ${shorten(args)}` : ''}`;
case 'click': return `Clicking ${args}`;
case 'fill': { const parts = args.split(/\s+/); return `Typing "${parts.slice(1).join(' ')}" into ${parts[0]}`; }
case 'text': return 'Reading page text';
case 'html': return args ? `Reading HTML of ${args}` : 'Reading full page HTML';
case 'links': return 'Finding all links on the page';
case 'forms': return 'Looking for forms';
case 'console': return 'Checking browser console for errors';
case 'network': return 'Checking network requests';
case 'url': return 'Checking current URL';
case 'back': return 'Going back';
case 'forward': return 'Going forward';
case 'reload': return 'Reloading the page';
case 'scroll': return args ? `Scrolling to ${args}` : 'Scrolling down';
case 'wait': return `Waiting for ${args}`;
case 'inspect': return args ? `Inspecting CSS of ${args}` : 'Getting CSS for last picked element';
case 'style': return `Changing CSS: ${args}`;
case 'cleanup': return 'Removing page clutter (ads, popups, banners)';
case 'prettyscreenshot': return 'Taking a clean screenshot';
case 'css': return `Checking CSS property: ${args}`;
case 'is': return `Checking if element is ${args}`;
case 'diff': return `Comparing ${args}`;
case 'responsive': return 'Taking screenshots at mobile, tablet, and desktop sizes';
case 'status': return 'Checking browser status';
case 'tabs': return 'Listing open tabs';
case 'focus': return 'Bringing browser to front';
case 'select': return `Selecting option in ${args}`;
case 'hover': return `Hovering over ${args}`;
case 'viewport': return `Setting viewport to ${args}`;
case 'upload': return `Uploading file to ${args.split(/\s+/)[0]}`;
default: return `Running browse ${browseCmd} ${args}`.trim();
}
}
if (cmd.includes('git ')) return `Running: ${shorten(cmd)}`;
let short = shorten(cmd);
return short.length > 100 ? short.slice(0, 100) + '…' : short;
}
if (tool === 'Read' && input.file_path) return `Reading ${shorten(input.file_path)}`;
if (tool === 'Edit' && input.file_path) return `Editing ${shorten(input.file_path)}`;
if (tool === 'Write' && input.file_path) return `Writing ${shorten(input.file_path)}`;
if (tool === 'Grep' && input.pattern) return `Searching for "${input.pattern}"`;
if (tool === 'Glob' && input.pattern) return `Finding files matching ${input.pattern}`;
try { return shorten(JSON.stringify(input)).slice(0, 80); } catch { return ''; }
}
// ─── Test setup ──────────────────────────────────────────────────
let tmpDir: string;
@@ -197,3 +265,288 @@ describe('writeToInbox', () => {
expect(files.length).toBe(2);
});
});
// ─── describeToolCall (verbose narration) ────────────────────────
describe('describeToolCall', () => {
// Browse navigation commands
test('goto → plain English with URL', () => {
const result = describeToolCall('Bash', { command: '$B goto https://example.com' });
expect(result).toBe('Opening https://example.com');
});
test('goto strips quotes from URL', () => {
const result = describeToolCall('Bash', { command: '$B goto "https://example.com"' });
expect(result).toBe('Opening https://example.com');
});
test('url → checking current URL', () => {
expect(describeToolCall('Bash', { command: '$B url' })).toBe('Checking current URL');
});
test('back/forward/reload → plain English', () => {
expect(describeToolCall('Bash', { command: '$B back' })).toBe('Going back');
expect(describeToolCall('Bash', { command: '$B forward' })).toBe('Going forward');
expect(describeToolCall('Bash', { command: '$B reload' })).toBe('Reloading the page');
});
// Snapshot variants
test('snapshot -i → scanning for interactive elements', () => {
expect(describeToolCall('Bash', { command: '$B snapshot -i' })).toBe('Scanning for interactive elements');
});
test('snapshot -D → checking what changed', () => {
expect(describeToolCall('Bash', { command: '$B snapshot -D' })).toBe('Checking what changed');
});
test('snapshot (plain) → taking a snapshot', () => {
expect(describeToolCall('Bash', { command: '$B snapshot' })).toBe('Taking a snapshot of the page');
});
// Interaction commands
test('click → clicking element', () => {
expect(describeToolCall('Bash', { command: '$B click @e3' })).toBe('Clicking @e3');
});
test('fill → typing into element', () => {
expect(describeToolCall('Bash', { command: '$B fill @e4 "hello world"' })).toBe('Typing ""hello world"" into @e4');
});
test('scroll with selector → scrolling to element', () => {
expect(describeToolCall('Bash', { command: '$B scroll .footer' })).toBe('Scrolling to .footer');
});
test('scroll without args → scrolling down', () => {
expect(describeToolCall('Bash', { command: '$B scroll' })).toBe('Scrolling down');
});
// Reading commands
test('text → reading page text', () => {
expect(describeToolCall('Bash', { command: '$B text' })).toBe('Reading page text');
});
test('html with selector → reading HTML of element', () => {
expect(describeToolCall('Bash', { command: '$B html .header' })).toBe('Reading HTML of .header');
});
test('html without selector → reading full page HTML', () => {
expect(describeToolCall('Bash', { command: '$B html' })).toBe('Reading full page HTML');
});
test('links → finding all links', () => {
expect(describeToolCall('Bash', { command: '$B links' })).toBe('Finding all links on the page');
});
test('console → checking console', () => {
expect(describeToolCall('Bash', { command: '$B console' })).toBe('Checking browser console for errors');
});
// Inspector commands
test('inspect with selector → inspecting CSS', () => {
expect(describeToolCall('Bash', { command: '$B inspect .header' })).toBe('Inspecting CSS of .header');
});
test('inspect without args → getting last picked element', () => {
expect(describeToolCall('Bash', { command: '$B inspect' })).toBe('Getting CSS for last picked element');
});
test('style → changing CSS', () => {
expect(describeToolCall('Bash', { command: '$B style .header color red' })).toBe('Changing CSS: .header color red');
});
test('cleanup → removing page clutter', () => {
expect(describeToolCall('Bash', { command: '$B cleanup --all' })).toBe('Removing page clutter (ads, popups, banners)');
});
// Visual commands
test('screenshot → saving screenshot', () => {
expect(describeToolCall('Bash', { command: '$B screenshot /tmp/shot.png' })).toBe('Saving screenshot to /tmp/shot.png');
});
test('screenshot without path', () => {
expect(describeToolCall('Bash', { command: '$B screenshot' })).toBe('Saving screenshot');
});
test('responsive → multi-size screenshots', () => {
expect(describeToolCall('Bash', { command: '$B responsive' })).toBe('Taking screenshots at mobile, tablet, and desktop sizes');
});
// Non-browse tools
test('Read tool → reading file', () => {
expect(describeToolCall('Read', { file_path: '/Users/foo/project/src/app.ts' })).toBe('Reading ~/project/src/app.ts');
});
test('Grep tool → searching for pattern', () => {
expect(describeToolCall('Grep', { pattern: 'handleClick' })).toBe('Searching for "handleClick"');
});
test('Glob tool → finding files', () => {
expect(describeToolCall('Glob', { pattern: '**/*.tsx' })).toBe('Finding files matching **/*.tsx');
});
test('Edit tool → editing file', () => {
expect(describeToolCall('Edit', { file_path: '/Users/foo/src/main.ts' })).toBe('Editing ~/src/main.ts');
});
// Edge cases
test('null input → empty string', () => {
expect(describeToolCall('Bash', null)).toBe('');
});
test('unknown browse command → generic description', () => {
expect(describeToolCall('Bash', { command: '$B newtab https://foo.com' })).toContain('newtab');
});
test('non-browse bash → shortened command', () => {
expect(describeToolCall('Bash', { command: 'echo hello' })).toBe('echo hello');
});
test('full browse binary path recognized', () => {
const result = describeToolCall('Bash', { command: '/Users/garrytan/.claude/skills/gstack/browse/dist/browse goto https://example.com' });
expect(result).toBe('Opening https://example.com');
});
test('tab command → switching tab', () => {
expect(describeToolCall('Bash', { command: '$B tab 2' })).toContain('tab');
});
});
// ─── Per-tab agent concurrency (source code validation) ──────────
describe('per-tab agent concurrency', () => {
const serverSrc = fs.readFileSync(path.join(__dirname, '..', 'src', 'server.ts'), 'utf-8');
const agentSrc = fs.readFileSync(path.join(__dirname, '..', 'src', 'sidebar-agent.ts'), 'utf-8');
test('server has per-tab agent state map', () => {
expect(serverSrc).toContain('tabAgents');
expect(serverSrc).toContain('TabAgentState');
expect(serverSrc).toContain('getTabAgent');
});
test('server returns per-tab agent status in /sidebar-chat', () => {
expect(serverSrc).toContain('getTabAgentStatus');
expect(serverSrc).toContain('tabAgentStatus');
});
test('spawnClaude accepts forTabId parameter', () => {
const spawnFn = serverSrc.slice(
serverSrc.indexOf('function spawnClaude('),
serverSrc.indexOf('\nfunction ', serverSrc.indexOf('function spawnClaude(') + 1),
);
expect(spawnFn).toContain('forTabId');
expect(spawnFn).toContain('tabState.status');
});
test('sidebar-command endpoint uses per-tab agent state', () => {
expect(serverSrc).toContain('msgTabId');
expect(serverSrc).toContain('tabState.status');
expect(serverSrc).toContain('tabState.queue');
});
test('agent event handler resets per-tab state', () => {
expect(serverSrc).toContain('eventTabId');
expect(serverSrc).toContain('tabState.status = \'idle\'');
});
test('agent event handler processes per-tab queue', () => {
// After agent_done, should process next message from THIS tab's queue
expect(serverSrc).toContain('tabState.queue.length > 0');
expect(serverSrc).toContain('tabState.queue.shift');
});
test('sidebar-agent uses per-tab processing set', () => {
expect(agentSrc).toContain('processingTabs');
expect(agentSrc).not.toContain('isProcessing');
});
test('sidebar-agent sends tabId with all events', () => {
// sendEvent should accept tabId parameter
expect(agentSrc).toContain('async function sendEvent(event: Record<string, any>, tabId?: number)');
// askClaude should extract tabId from queue entry
expect(agentSrc).toContain('const { prompt, args, stateFile, cwd, tabId }');
});
test('sidebar-agent allows concurrent agents across tabs', () => {
// poll() should not block globally — it should check per-tab
expect(agentSrc).toContain('processingTabs.has(tid)');
// askClaude should be fire-and-forget (no await blocking the loop)
expect(agentSrc).toContain('askClaude(entry).catch');
});
test('queue entries include tabId', () => {
const spawnFn = serverSrc.slice(
serverSrc.indexOf('function spawnClaude('),
serverSrc.indexOf('\nfunction ', serverSrc.indexOf('function spawnClaude(') + 1),
);
expect(spawnFn).toContain('tabId: agentTabId');
});
test('health check monitors all per-tab agents', () => {
expect(serverSrc).toContain('for (const [tid, state] of tabAgents)');
});
});
describe('BROWSE_TAB tab pinning (cross-tab isolation)', () => {
const serverSrc = fs.readFileSync(path.join(__dirname, '..', 'src', 'server.ts'), 'utf-8');
const agentSrc = fs.readFileSync(path.join(__dirname, '..', 'src', 'sidebar-agent.ts'), 'utf-8');
const cliSrc = fs.readFileSync(path.join(__dirname, '..', 'src', 'cli.ts'), 'utf-8');
test('sidebar-agent passes BROWSE_TAB env var to claude process', () => {
// The env block should include BROWSE_TAB set to the tab ID
expect(agentSrc).toContain('BROWSE_TAB');
expect(agentSrc).toContain('String(tid)');
});
test('CLI reads BROWSE_TAB and sends tabId in command body', () => {
expect(cliSrc).toContain('process.env.BROWSE_TAB');
expect(cliSrc).toContain('tabId: parseInt(browseTab');
});
test('handleCommand accepts tabId from request body', () => {
const handleFn = serverSrc.slice(
serverSrc.indexOf('async function handleCommand('),
serverSrc.indexOf('\nasync function ', serverSrc.indexOf('async function handleCommand(') + 1) > 0
? serverSrc.indexOf('\nasync function ', serverSrc.indexOf('async function handleCommand(') + 1)
: serverSrc.indexOf('\n// ', serverSrc.indexOf('async function handleCommand(') + 200),
);
// Should destructure tabId from body
expect(handleFn).toContain('tabId');
// Should save and restore the active tab
expect(handleFn).toContain('savedTabId');
expect(handleFn).toContain('browserManager.switchTab(tabId)');
});
test('handleCommand restores active tab after command (success path)', () => {
// On success, should restore savedTabId
const handleFn = serverSrc.slice(
serverSrc.indexOf('async function handleCommand('),
serverSrc.length,
);
// Count restore calls — should appear in both success and error paths
const restoreCount = (handleFn.match(/browserManager\.switchTab\(savedTabId\)/g) || []).length;
expect(restoreCount).toBeGreaterThanOrEqual(2); // success + error paths
});
test('handleCommand restores active tab on error path', () => {
// The catch block should also restore
const catchBlock = serverSrc.slice(
serverSrc.indexOf('} catch (err: any) {', serverSrc.indexOf('async function handleCommand(')),
);
expect(catchBlock).toContain('switchTab(savedTabId)');
});
test('tab pinning only activates when tabId is provided', () => {
const handleFn = serverSrc.slice(
serverSrc.indexOf('async function handleCommand('),
serverSrc.indexOf('try {', serverSrc.indexOf('async function handleCommand(') + 1),
);
// Should check tabId is not undefined/null before switching
expect(handleFn).toContain('tabId !== undefined');
expect(handleFn).toContain('tabId !== null');
});
test('CLI only sends tabId when BROWSE_TAB is set', () => {
// Should conditionally include tabId in the body
expect(cliSrc).toContain('browseTab ? { tabId:');
});
});
+676
View File
@@ -0,0 +1,676 @@
/**
* Tests for sidebar UX changes:
* - System prompt does not bake in page URL (navigation fix)
* - --resume is never used (stale context fix)
* - /sidebar-chat response includes agentStatus
* - Sidebar HTML has updated banner, placeholder, stop button
* - Narration instructions present in system prompt
*/
import { describe, test, expect } from 'bun:test';
import * as fs from 'fs';
import * as path from 'path';
const ROOT = path.resolve(__dirname, '..');
// ─── System prompt tests (server.ts spawnClaude) ─────────────────
describe('sidebar system prompt (server.ts)', () => {
const serverSrc = fs.readFileSync(path.join(ROOT, 'src', 'server.ts'), 'utf-8');
test('system prompt does not bake in page URL', () => {
// The old prompt had: `The user is currently viewing: ${pageUrl}`
// The new prompt should NOT contain this pattern
// Extract the systemPrompt array from spawnClaude
const promptSection = serverSrc.slice(
serverSrc.indexOf('const systemPrompt = ['),
serverSrc.indexOf("].join('\\n');", serverSrc.indexOf('const systemPrompt = [')) + 15,
);
expect(promptSection).not.toContain('currently viewing');
expect(promptSection).not.toContain('${pageUrl}');
});
test('system prompt tells agent to check URL before acting', () => {
const promptSection = serverSrc.slice(
serverSrc.indexOf('const systemPrompt = ['),
serverSrc.indexOf("].join('\\n');", serverSrc.indexOf('const systemPrompt = [')) + 15,
);
expect(promptSection).toContain('NEVER');
expect(promptSection).toContain('navigate back');
expect(promptSection).toContain('NEVER assume');
expect(promptSection).toContain('url`');
});
test('system prompt includes narration instructions', () => {
const promptSection = serverSrc.slice(
serverSrc.indexOf('const systemPrompt = ['),
serverSrc.indexOf("].join('\\n');", serverSrc.indexOf('const systemPrompt = [')) + 15,
);
expect(promptSection).toContain('Narrate');
expect(promptSection).toContain('plain English');
});
test('--resume is never used in spawnClaude args', () => {
// Extract the spawnClaude function
const fnStart = serverSrc.indexOf('function spawnClaude(');
const fnEnd = serverSrc.indexOf('\nfunction ', fnStart + 1);
const fnBody = serverSrc.slice(fnStart, fnEnd);
// Should not push --resume to args
expect(fnBody).not.toContain("'--resume'");
expect(fnBody).not.toContain('"--resume"');
});
test('system prompt includes inspect and style commands', () => {
const promptSection = serverSrc.slice(
serverSrc.indexOf('const systemPrompt = ['),
serverSrc.indexOf("].join('\\n');", serverSrc.indexOf('const systemPrompt = [')) + 15,
);
expect(promptSection).toContain('inspect');
expect(promptSection).toContain('style');
expect(promptSection).toContain('cleanup');
});
});
// ─── /sidebar-chat response includes agentStatus ─────────────────
describe('/sidebar-chat agentStatus', () => {
const serverSrc = fs.readFileSync(path.join(ROOT, 'src', 'server.ts'), 'utf-8');
test('sidebar-chat response includes agentStatus field', () => {
// Find the GET /sidebar-chat handler — look for the data response, not the auth error
const handlerStart = serverSrc.indexOf("url.pathname === '/sidebar-chat'");
// Find the response that returns entries + total (skip the auth error response)
const entriesResponse = serverSrc.indexOf('{ entries, total', handlerStart);
expect(entriesResponse).toBeGreaterThan(handlerStart);
const responseLine = serverSrc.slice(entriesResponse, entriesResponse + 100);
expect(responseLine).toContain('agentStatus');
});
});
// ─── Sidebar HTML tests ──────────────────────────────────────────
describe('sidebar HTML (sidepanel.html)', () => {
const html = fs.readFileSync(path.join(ROOT, '..', 'extension', 'sidepanel.html'), 'utf-8');
test('banner says "Browser co-pilot" not "Standalone mode"', () => {
expect(html).toContain('Browser co-pilot');
expect(html).not.toContain('Standalone mode');
});
test('input placeholder says "Ask about this page"', () => {
expect(html).toContain('Ask about this page');
expect(html).not.toContain('Message Claude Code');
});
test('stop button exists with id stop-agent-btn', () => {
expect(html).toContain('id="stop-agent-btn"');
expect(html).toContain('class="stop-btn"');
});
test('stop button is hidden by default', () => {
// The stop button should have style="display: none;" initially
const stopBtnMatch = html.match(/id="stop-agent-btn"[^>]*/);
expect(stopBtnMatch).not.toBeNull();
expect(stopBtnMatch![0]).toContain('display: none');
});
});
// ─── Sidebar JS tests ───────────────────────────────────────────
describe('sidebar JS (sidepanel.js)', () => {
const js = fs.readFileSync(path.join(ROOT, '..', 'extension', 'sidepanel.js'), 'utf-8');
test('stopAgent function exists', () => {
expect(js).toContain('async function stopAgent()');
});
test('stopAgent calls /sidebar-agent/stop endpoint', () => {
expect(js).toContain('/sidebar-agent/stop');
});
test('stop button click handler is wired up', () => {
expect(js).toContain("getElementById('stop-agent-btn')");
expect(js).toContain('stopAgent');
});
test('updateStopButton function exists', () => {
expect(js).toContain('function updateStopButton(');
});
test('agent_start shows stop button', () => {
// Find the agent_start handler and verify it calls updateStopButton(true)
const startHandler = js.slice(
js.indexOf("entry.type === 'agent_start'"),
js.indexOf("entry.type === 'agent_done'"),
);
expect(startHandler).toContain('updateStopButton(true)');
});
test('agent_done hides stop button', () => {
const doneHandler = js.slice(
js.indexOf("entry.type === 'agent_done'"),
js.indexOf("entry.type === 'agent_error'"),
);
expect(doneHandler).toContain('updateStopButton(false)');
});
test('agent_error hides stop button', () => {
const errorIdx = js.indexOf("entry.type === 'agent_error'");
const errorHandler = js.slice(errorIdx, errorIdx + 500);
expect(errorHandler).toContain('updateStopButton(false)');
});
test('orphaned thinking cleanup checks agentStatus from server', () => {
// After polling, if agentStatus !== processing, thinking dots are removed
expect(js).toContain("data.agentStatus !== 'processing'");
});
test('orphaned thinking cleanup adds (session ended) notice', () => {
expect(js).toContain('(session ended)');
});
test('sendMessage renders user bubble + thinking dots optimistically', () => {
// sendMessage should create user bubble and agent-thinking BEFORE the server responds
const sendFn = js.slice(js.indexOf('async function sendMessage()'), js.indexOf('async function sendMessage()') + 2000);
expect(sendFn).toContain('chat-bubble user');
expect(sendFn).toContain('agent-thinking');
expect(sendFn).toContain('lastOptimisticMsg');
});
test('fast polling during agent execution (300ms), slow when idle (1000ms)', () => {
expect(js).toContain('FAST_POLL_MS');
expect(js).toContain('SLOW_POLL_MS');
expect(js).toContain('startFastPoll');
expect(js).toContain('stopFastPoll');
// Fast = 300ms
expect(js).toContain('300');
// Slow = 1000ms
expect(js).toContain('1000');
});
test('agent_done calls stopFastPoll', () => {
const doneHandler = js.slice(
js.indexOf("entry.type === 'agent_done'"),
js.indexOf("entry.type === 'agent_error'"),
);
expect(doneHandler).toContain('stopFastPoll');
});
test('duplicate user bubble prevention via lastOptimisticMsg', () => {
expect(js).toContain('lastOptimisticMsg');
// When polled message matches optimistic, skip rendering
expect(js).toContain('lastOptimisticMsg === entry.message');
});
});
// ─── Sidebar agent queue poll (sidebar-agent.ts) ─────────────────
describe('sidebar agent queue poll (sidebar-agent.ts)', () => {
const agentSrc = fs.readFileSync(path.join(ROOT, 'src', 'sidebar-agent.ts'), 'utf-8');
test('queue poll interval is 200ms or less for fast TTFO', () => {
const match = agentSrc.match(/const POLL_MS\s*=\s*(\d+)/);
expect(match).not.toBeNull();
const pollMs = parseInt(match![1], 10);
expect(pollMs).toBeLessThanOrEqual(200);
});
});
// ─── System prompt size (TTFO optimization) ──────────────────────
describe('system prompt size', () => {
const serverSrc = fs.readFileSync(path.join(ROOT, 'src', 'server.ts'), 'utf-8');
test('system prompt is compact (under 20 lines)', () => {
const start = serverSrc.indexOf('const systemPrompt = [');
const end = serverSrc.indexOf("].join('\\n');", start);
const promptBlock = serverSrc.slice(start, end);
const lines = promptBlock.split('\n').length;
// Compact prompt = fewer input tokens = faster first response
// Slightly higher limit because of per-tab instruction line
expect(lines).toBeLessThan(20);
});
test('system prompt does not contain verbose narration examples', () => {
// We trimmed examples to reduce token count. The agent gets the
// instruction to narrate, not 6 examples of how.
const start = serverSrc.indexOf('const systemPrompt = [');
const end = serverSrc.indexOf("].join('\\n');", start);
const promptBlock = serverSrc.slice(start, end);
expect(promptBlock).not.toContain('Examples of good narration');
expect(promptBlock).not.toContain('I can see a login form');
});
});
// ─── TTFO latency chain invariants ──────────────────────────────
describe('TTFO latency chain', () => {
const js = fs.readFileSync(path.join(ROOT, '..', 'extension', 'sidepanel.js'), 'utf-8');
const agentSrc = fs.readFileSync(path.join(ROOT, 'src', 'sidebar-agent.ts'), 'utf-8');
test('optimistic render happens BEFORE chrome.runtime.sendMessage', () => {
// In sendMessage(), the bubble + thinking dots must be created
// before the async POST to the server
const sendFn = js.slice(
js.indexOf('async function sendMessage()'),
js.indexOf('async function sendMessage()') + 3000,
);
const optimisticIdx = sendFn.indexOf('agent-thinking');
const sendIdx = sendFn.indexOf('chrome.runtime.sendMessage');
expect(optimisticIdx).toBeGreaterThan(0);
expect(sendIdx).toBeGreaterThan(0);
expect(optimisticIdx).toBeLessThan(sendIdx);
});
test('sendMessage calls startFastPoll before server request', () => {
const sendFn = js.slice(
js.indexOf('async function sendMessage()'),
js.indexOf('async function sendMessage()') + 3000,
);
const fastPollIdx = sendFn.indexOf('startFastPoll');
const sendIdx = sendFn.indexOf('chrome.runtime.sendMessage');
expect(fastPollIdx).toBeGreaterThan(0);
expect(fastPollIdx).toBeLessThan(sendIdx);
});
test('agent_start from server does not duplicate thinking dots', () => {
// When we already showed dots optimistically, agent_start from
// the poll should skip creating a second set
const startHandler = js.slice(
js.indexOf("entry.type === 'agent_start'"),
js.indexOf("entry.type === 'agent_done'"),
);
expect(startHandler).toContain('agent-thinking');
// Should check if thinking already exists and skip
expect(startHandler).toContain("getElementById('agent-thinking')");
});
test('FAST_POLL_MS is strictly less than SLOW_POLL_MS', () => {
const fastMatch = js.match(/FAST_POLL_MS\s*=\s*(\d+)/);
const slowMatch = js.match(/SLOW_POLL_MS\s*=\s*(\d+)/);
expect(fastMatch).not.toBeNull();
expect(slowMatch).not.toBeNull();
expect(parseInt(fastMatch![1], 10)).toBeLessThan(parseInt(slowMatch![1], 10));
});
test('stopAgent also calls stopFastPoll', () => {
const stopFn = js.slice(
js.indexOf('async function stopAgent()'),
js.indexOf('async function stopAgent()') + 800,
);
expect(stopFn).toContain('stopFastPoll');
});
});
// ─── Browser tab bar ────────────────────────────────────────────
describe('browser tab bar (server.ts)', () => {
const serverSrc = fs.readFileSync(path.join(ROOT, 'src', 'server.ts'), 'utf-8');
test('/sidebar-tabs endpoint exists', () => {
expect(serverSrc).toContain("/sidebar-tabs'");
expect(serverSrc).toContain('getTabListWithTitles');
});
test('/sidebar-tabs/switch endpoint exists', () => {
expect(serverSrc).toContain("/sidebar-tabs/switch'");
expect(serverSrc).toContain('switchTab');
});
test('/sidebar-tabs requires auth', () => {
// Find the handler and verify auth check
const handlerIdx = serverSrc.indexOf("/sidebar-tabs'");
const handlerBlock = serverSrc.slice(handlerIdx, handlerIdx + 300);
expect(handlerBlock).toContain('validateAuth');
});
});
describe('browser tab bar (sidepanel.js)', () => {
const js = fs.readFileSync(path.join(ROOT, '..', 'extension', 'sidepanel.js'), 'utf-8');
test('pollTabs function exists and calls /sidebar-tabs', () => {
expect(js).toContain('async function pollTabs()');
expect(js).toContain('/sidebar-tabs');
});
test('renderTabBar function exists', () => {
expect(js).toContain('function renderTabBar(tabs)');
});
test('tab bar hidden when only 1 tab', () => {
const renderFn = js.slice(
js.indexOf('function renderTabBar('),
js.indexOf('function renderTabBar(') + 600,
);
expect(renderFn).toContain('tabs.length <= 1');
expect(renderFn).toContain("display = 'none'");
});
test('switchBrowserTab calls /sidebar-tabs/switch', () => {
expect(js).toContain('async function switchBrowserTab(');
expect(js).toContain('/sidebar-tabs/switch');
});
test('tab polling interval is set on connection', () => {
expect(js).toContain('tabPollInterval');
expect(js).toContain('setInterval(pollTabs');
});
test('tab polling cleaned up on disconnect', () => {
expect(js).toContain('clearInterval(tabPollInterval)');
});
test('only re-renders when tabs change (diff check)', () => {
expect(js).toContain('lastTabJson');
expect(js).toContain('json === lastTabJson');
});
});
describe('browser tab bar (sidepanel.html)', () => {
const html = fs.readFileSync(path.join(ROOT, '..', 'extension', 'sidepanel.html'), 'utf-8');
test('browser-tabs container exists', () => {
expect(html).toContain('id="browser-tabs"');
});
test('browser-tabs hidden by default', () => {
const match = html.match(/id="browser-tabs"[^>]*/);
expect(match).not.toBeNull();
expect(match![0]).toContain('display:none');
});
});
// ─── Bidirectional tab sync ──────────────────────────────────────
describe('sidebar→browser tab switch', () => {
const bmSrc = fs.readFileSync(path.join(ROOT, 'src', 'browser-manager.ts'), 'utf-8');
test('switchTab calls bringToFront so browser visually switches', () => {
const switchFn = bmSrc.slice(
bmSrc.indexOf('switchTab(id: number)'),
bmSrc.indexOf('switchTab(id: number)') + 400,
);
expect(switchFn).toContain('bringToFront');
});
});
describe('browser→sidebar tab sync', () => {
const bmSrc = fs.readFileSync(path.join(ROOT, 'src', 'browser-manager.ts'), 'utf-8');
const serverSrc = fs.readFileSync(path.join(ROOT, 'src', 'server.ts'), 'utf-8');
const js = fs.readFileSync(path.join(ROOT, '..', 'extension', 'sidepanel.js'), 'utf-8');
test('syncActiveTabByUrl method exists on BrowserManager', () => {
expect(bmSrc).toContain('syncActiveTabByUrl(activeUrl: string)');
});
test('syncActiveTabByUrl updates activeTabId when URL matches a different tab', () => {
const fn = bmSrc.slice(
bmSrc.indexOf('syncActiveTabByUrl('),
bmSrc.indexOf('syncActiveTabByUrl(') + 1200,
);
expect(fn).toContain('this.activeTabId = id');
// Exact match
expect(fn).toContain('pageUrl === activeUrl');
// Fuzzy match (origin+pathname)
expect(fn).toContain('activeOriginPath');
expect(fn).toContain('fuzzyId');
});
test('context.on("page") tracks user-created tabs', () => {
expect(bmSrc).toContain("context.on('page'");
expect(bmSrc).toContain('this.pages.set(id, page)');
// Should log when new tab detected
expect(bmSrc).toContain('New tab detected');
});
test('page close handler removes tab from pages map', () => {
expect(bmSrc).toContain("page.on('close'");
expect(bmSrc).toContain('this.pages.delete(id)');
expect(bmSrc).toContain('Tab closed');
});
test('syncActiveTabByUrl skips when only 1 tab (no ambiguity)', () => {
const fn = bmSrc.slice(
bmSrc.indexOf('syncActiveTabByUrl('),
bmSrc.indexOf('syncActiveTabByUrl(') + 600,
);
expect(fn).toContain('this.pages.size <= 1');
});
test('/sidebar-tabs reads activeUrl param and calls syncActiveTabByUrl', () => {
const handler = serverSrc.slice(
serverSrc.indexOf("/sidebar-tabs'"),
serverSrc.indexOf("/sidebar-tabs'") + 500,
);
expect(handler).toContain("get('activeUrl')");
expect(handler).toContain('syncActiveTabByUrl');
});
test('/sidebar-command syncs activeTabUrl BEFORE reading tabId', () => {
// The server must call syncActiveTabByUrl before getActiveTabId
// so the agent targets the correct tab
const cmdIdx = serverSrc.indexOf("url.pathname === '/sidebar-command'");
const handler = serverSrc.slice(cmdIdx, cmdIdx + 1200);
const syncIdx = handler.indexOf('syncActiveTabByUrl');
const getIdIdx = handler.indexOf('getActiveTabId');
expect(syncIdx).toBeGreaterThan(0);
expect(getIdIdx).toBeGreaterThan(syncIdx); // sync happens BEFORE reading ID
});
test('background.js listens for chrome.tabs.onActivated', () => {
const bgSrc = fs.readFileSync(path.join(ROOT, '..', 'extension', 'background.js'), 'utf-8');
expect(bgSrc).toContain('chrome.tabs.onActivated.addListener');
expect(bgSrc).toContain('browserTabActivated');
});
test('sidepanel handles browserTabActivated message instantly', () => {
expect(js).toContain("msg.type === 'browserTabActivated'");
// Should call switchChatTab for instant context swap
expect(js).toContain('switchChatTab');
});
test('pollTabs sends Chrome active tab URL to server', () => {
const pollFn = js.slice(
js.indexOf('async function pollTabs()'),
js.indexOf('async function pollTabs()') + 800,
);
expect(pollFn).toContain('chrome.tabs.query');
expect(pollFn).toContain('activeUrl=');
});
});
describe('browser tab bar (sidepanel.css)', () => {
const css = fs.readFileSync(path.join(ROOT, '..', 'extension', 'sidepanel.css'), 'utf-8');
test('browser-tabs styles exist', () => {
expect(css).toContain('.browser-tabs');
expect(css).toContain('.browser-tab');
expect(css).toContain('.browser-tab.active');
});
test('tab bar is horizontally scrollable', () => {
const barStyle = css.slice(
css.indexOf('.browser-tabs {'),
css.indexOf('}', css.indexOf('.browser-tabs {')) + 1,
);
expect(barStyle).toContain('overflow-x: auto');
});
test('active tab is visually distinct', () => {
const activeStyle = css.slice(
css.indexOf('.browser-tab.active {'),
css.indexOf('}', css.indexOf('.browser-tab.active {')) + 1,
);
expect(activeStyle).toContain('--bg-surface');
expect(activeStyle).toContain('--text-body');
});
});
// ─── Event relay (processAgentEvent) ────────────────────────────
describe('processAgentEvent handles sidebar-agent event types', () => {
const serverSrc = fs.readFileSync(path.join(ROOT, 'src', 'server.ts'), 'utf-8');
// Extract processAgentEvent function body
const fnStart = serverSrc.indexOf('function processAgentEvent(');
const fnEnd = serverSrc.indexOf('\nfunction ', fnStart + 1);
const fnBody = serverSrc.slice(fnStart, fnEnd > fnStart ? fnEnd : fnStart + 2000);
test('handles tool_use events directly (not raw Claude stream format)', () => {
// Must handle { type: 'tool_use', tool, input } from sidebar-agent
expect(fnBody).toContain("event.type === 'tool_use'");
expect(fnBody).toContain('event.tool');
expect(fnBody).toContain('event.input');
});
test('handles text_delta events directly', () => {
expect(fnBody).toContain("event.type === 'text_delta'");
expect(fnBody).toContain('event.text');
});
test('handles text events directly', () => {
expect(fnBody).toContain("event.type === 'text'");
});
test('handles result events', () => {
expect(fnBody).toContain("event.type === 'result'");
});
test('handles agent_error events', () => {
expect(fnBody).toContain("event.type === 'agent_error'");
expect(fnBody).toContain('event.error');
});
test('does NOT re-parse raw Claude stream events (no content_block_start)', () => {
// sidebar-agent.ts already transforms these. Server should not duplicate.
expect(fnBody).not.toContain('content_block_start');
expect(fnBody).not.toContain('content_block_delta');
expect(fnBody).not.toContain("event.type === 'assistant'");
});
test('all event types call addChatEntry with role: agent', () => {
// Every addChatEntry in processAgentEvent should have role: 'agent'
const addCalls = fnBody.match(/addChatEntry\(\{[^}]+\}\)/g) || [];
for (const call of addCalls) {
expect(call).toContain("role: 'agent'");
}
});
});
// ─── Per-tab chat context ────────────────────────────────────────
describe('per-tab chat context (server.ts)', () => {
const serverSrc = fs.readFileSync(path.join(ROOT, 'src', 'server.ts'), 'utf-8');
test('/sidebar-chat accepts tabId query param', () => {
const handler = serverSrc.slice(
serverSrc.indexOf("/sidebar-chat'"),
serverSrc.indexOf("/sidebar-chat'") + 600,
);
expect(handler).toContain('tabId');
});
test('addChatEntry takes a tabId parameter', () => {
// addChatEntry should route entries to the correct tab's buffer
expect(serverSrc).toContain('tabId');
// Look for tabId in addChatEntry function
const fnIdx = serverSrc.indexOf('function addChatEntry(');
if (fnIdx > -1) {
const fnBody = serverSrc.slice(fnIdx, fnIdx + 300);
expect(fnBody).toContain('tabId');
}
});
test('spawnClaude passes active tab ID to queue entry', () => {
const spawnFn = serverSrc.slice(
serverSrc.indexOf('function spawnClaude('),
serverSrc.indexOf('\nfunction ', serverSrc.indexOf('function spawnClaude(') + 1),
);
expect(spawnFn).toContain('tabId');
});
test('tab isolation uses BROWSE_TAB env var instead of system prompt hack', () => {
const agentSrc = fs.readFileSync(path.join(ROOT, 'src', 'sidebar-agent.ts'), 'utf-8');
// Agent passes BROWSE_TAB env var to claude (not a system prompt instruction)
expect(agentSrc).toContain('BROWSE_TAB');
// Server handleCommand reads tabId from body and pins to that tab
expect(serverSrc).toContain('savedTabId');
expect(serverSrc).toContain('switchTab(tabId)');
});
});
describe('per-tab chat context (sidepanel.js)', () => {
const js = fs.readFileSync(path.join(ROOT, '..', 'extension', 'sidepanel.js'), 'utf-8');
test('tracks activeTabId for chat context', () => {
expect(js).toContain('activeTabId');
});
test('pollChat sends tabId to server', () => {
const pollFn = js.slice(
js.indexOf('async function pollChat()'),
js.indexOf('async function pollChat()') + 600,
);
expect(pollFn).toContain('tabId');
});
test('switching tabs swaps displayed chat', () => {
// When tab changes, old chat is saved and new tab's chat is shown
expect(js).toContain('switchChatTab');
});
test('switchChatTab saves current tab DOM and restores new tab', () => {
const fn = js.slice(
js.indexOf('function switchChatTab('),
js.indexOf('function switchChatTab(') + 800,
);
expect(fn).toContain('chatDomByTab');
expect(fn).toContain('innerHTML');
});
test('sendMessage includes tabId in message', () => {
const sendFn = js.slice(
js.indexOf('async function sendMessage()'),
js.indexOf('async function sendMessage()') + 2000,
);
expect(sendFn).toContain('tabId');
expect(sendFn).toContain('sidebarActiveTabId');
});
});
// ─── Sidebar CSS tests ──────────────────────────────────────────
describe('sidebar CSS (sidepanel.css)', () => {
const css = fs.readFileSync(path.join(ROOT, '..', 'extension', 'sidepanel.css'), 'utf-8');
test('stop button style exists', () => {
expect(css).toContain('.stop-btn');
});
test('stop button uses error color', () => {
const stopBtnSection = css.slice(
css.indexOf('.stop-btn {'),
css.indexOf('}', css.indexOf('.stop-btn {')) + 1,
);
expect(stopBtnSection).toContain('--error');
});
test('experimental-banner no longer uses amber warning colors', () => {
const bannerSection = css.slice(
css.indexOf('.experimental-banner {'),
css.indexOf('}', css.indexOf('.experimental-banner {')) + 1,
);
// Should not be amber/warning anymore
expect(bannerSection).not.toContain('245, 158, 11, 0.15');
expect(bannerSection).not.toContain('#F59E0B');
});
test('tool description uses system font not mono', () => {
const toolSection = css.slice(
css.indexOf('.agent-tool {'),
css.indexOf('}', css.indexOf('.agent-tool {')) + 1,
);
expect(toolSection).toContain('font-system');
expect(toolSection).not.toContain('font-mono');
});
});