diff --git a/test/helpers/touchfiles.ts b/test/helpers/touchfiles.ts index b475daad..a627dd41 100644 --- a/test/helpers/touchfiles.ts +++ b/test/helpers/touchfiles.ts @@ -152,6 +152,7 @@ export const E2E_TOUCHFILES: Record = { // Sidebar agent 'sidebar-navigate': ['browse/src/server.ts', 'browse/src/sidebar-agent.ts', 'browse/src/sidebar-utils.ts', 'extension/**'], 'sidebar-url-accuracy': ['browse/src/server.ts', 'browse/src/sidebar-agent.ts', 'browse/src/sidebar-utils.ts', 'extension/background.js'], + 'sidebar-css-interaction': ['browse/src/server.ts', 'browse/src/sidebar-agent.ts', 'browse/src/write-commands.ts', 'browse/src/read-commands.ts', 'browse/src/cdp-inspector.ts', 'extension/**'], // Autoplan 'autoplan-core': ['autoplan/**', 'plan-ceo-review/**', 'plan-eng-review/**', 'plan-design-review/**'], @@ -282,6 +283,7 @@ export const E2E_TIERS: Record = { // Sidebar agent 'sidebar-navigate': 'periodic', 'sidebar-url-accuracy': 'periodic', + 'sidebar-css-interaction': 'periodic', // Autoplan — periodic (not yet implemented) 'autoplan-core': 'periodic', diff --git a/test/skill-e2e-sidebar.test.ts b/test/skill-e2e-sidebar.test.ts index fe9ae0b0..6d80d7de 100644 --- a/test/skill-e2e-sidebar.test.ts +++ b/test/skill-e2e-sidebar.test.ts @@ -149,6 +149,155 @@ describeIfSelected('Sidebar URL accuracy E2E', ['sidebar-url-accuracy'], () => { }, 30_000); }); +// --- Sidebar CSS Interaction E2E (real Claude + real browser) --- +// Goes to HN, reads comments, identifies the most insightful one, highlights it. +// Exercises: navigation, snapshot, text reading, LLM judgment, CSS style injection. + +describeIfSelected('Sidebar CSS interaction E2E', ['sidebar-css-interaction'], () => { + let serverProc: Subprocess | null = null; + let agentProc: Subprocess | null = null; + let serverPort: number = 0; + let authToken: string = ''; + let tmpDir: string = ''; + let stateFile: string = ''; + let queueFile: string = ''; + + async function api(pathname: string, opts: RequestInit = {}): Promise { + const headers: Record = { + 'Content-Type': 'application/json', + ...(opts.headers as Record || {}), + }; + if (!headers['Authorization'] && authToken) { + headers['Authorization'] = `Bearer ${authToken}`; + } + return fetch(`http://127.0.0.1:${serverPort}${pathname}`, { ...opts, headers }); + } + + beforeAll(async () => { + tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'sidebar-e2e-css-')); + stateFile = path.join(tmpDir, 'browse.json'); + queueFile = path.join(tmpDir, 'sidebar-queue.jsonl'); + fs.mkdirSync(path.dirname(queueFile), { recursive: true }); + + // Start server WITH a real browser (no HEADLESS_SKIP) for CSS interaction + const serverScript = path.resolve(ROOT, 'browse', 'src', 'server.ts'); + serverProc = spawn(['bun', 'run', serverScript], { + env: { + ...process.env, + BROWSE_STATE_FILE: stateFile, + BROWSE_PORT: '0', + SIDEBAR_QUEUE_PATH: queueFile, + BROWSE_IDLE_TIMEOUT: '300', + }, + stdio: ['ignore', 'pipe', 'pipe'], + }); + + const deadline = Date.now() + 30000; + while (Date.now() < deadline) { + if (fs.existsSync(stateFile)) { + try { + const state = JSON.parse(fs.readFileSync(stateFile, 'utf-8')); + if (state.port && state.token) { + serverPort = state.port; + authToken = state.token; + break; + } + } catch {} + } + await new Promise(r => setTimeout(r, 200)); + } + if (!serverPort) throw new Error('Server did not start in time'); + + // Start sidebar-agent with the real browse binary + const agentScript = path.resolve(ROOT, 'browse', 'src', 'sidebar-agent.ts'); + const browseBin = path.resolve(ROOT, 'browse', 'dist', 'browse'); + agentProc = spawn(['bun', 'run', agentScript], { + env: { + ...process.env, + BROWSE_SERVER_PORT: String(serverPort), + BROWSE_STATE_FILE: stateFile, + SIDEBAR_QUEUE_PATH: queueFile, + SIDEBAR_AGENT_TIMEOUT: '120000', + BROWSE_BIN: fs.existsSync(browseBin) ? browseBin : 'echo', + }, + stdio: ['ignore', 'pipe', 'pipe'], + }); + + await new Promise(r => setTimeout(r, 2000)); + }, 35000); + + afterAll(() => { + if (agentProc) { try { agentProc.kill(); } catch {} } + if (serverProc) { try { serverProc.kill(); } catch {} } + finalizeEvalCollector(evalCollector); + try { fs.rmSync(tmpDir, { recursive: true, force: true }); } catch {} + }); + + testIfSelected('sidebar-css-interaction', async () => { + await api('/sidebar-session/new', { method: 'POST' }); + fs.writeFileSync(queueFile, ''); + const startTime = Date.now(); + + // Ask the agent to go to HN, find the most insightful comment, and highlight it + const resp = await api('/sidebar-command', { + method: 'POST', + body: JSON.stringify({ + message: 'Go to https://news.ycombinator.com. Find the top story. Click into its comments. Read the comments and find the most insightful one. Highlight that comment with a 4px solid orange outline.', + activeTabUrl: 'about:blank', + }), + }); + expect(resp.status).toBe(200); + + // Poll for agent_done (2 min timeout — this is a multi-step task) + const deadline = Date.now() + 120000; + let entries: any[] = []; + while (Date.now() < deadline) { + const chatResp = await api('/sidebar-chat?after=0'); + const data = await chatResp.json(); + entries = data.entries; + if (entries.some((e: any) => e.type === 'agent_done')) break; + await new Promise(r => setTimeout(r, 3000)); + } + + const duration = Date.now() - startTime; + const doneEntry = entries.find((e: any) => e.type === 'agent_done'); + + // Agent should have completed + expect(doneEntry).toBeDefined(); + + // Agent should have run browse commands (look for tool_use entries) + const toolUses = entries.filter((e: any) => e.type === 'tool_use'); + expect(toolUses.length).toBeGreaterThanOrEqual(2); // At minimum: goto + one more + + // Agent text should mention something about the comment it found + const agentText = entries + .filter((e: any) => e.role === 'agent' && (e.type === 'text' || e.type === 'result')) + .map((e: any) => e.text || '') + .join(' ') + .toLowerCase(); + + // Should have navigated to HN (look for tool output mentioning ycombinator) + const toolOutputs = entries + .filter((e: any) => e.type === 'tool_result') + .map((e: any) => e.text || '') + .join(' '); + const navigatedToHN = toolOutputs.includes('ycombinator') || toolOutputs.includes('Hacker News'); + expect(navigatedToHN).toBe(true); + + // Should have applied a style (look for orange/outline in tool commands) + const allText = entries.map((e: any) => e.text || '').join(' '); + const appliedStyle = allText.includes('outline') || allText.includes('orange') || allText.includes('style'); + + evalCollector?.addTest({ + name: 'sidebar-css-interaction', suite: 'Sidebar CSS interaction E2E', tier: 'e2e', + passed: !!doneEntry && navigatedToHN && appliedStyle, + duration_ms: duration, + cost_usd: 0, + exit_reason: doneEntry ? 'success' : 'timeout', + }); + }, 150_000); +}); + // --- Sidebar Navigate (real Claude, requires ANTHROPIC_API_KEY) --- describeIfSelected('Sidebar navigate E2E', ['sidebar-navigate'], () => {