From 3a5a174e4cf6a15456a169d523bba46519b2f6a4 Mon Sep 17 00:00:00 2001 From: Garry Tan Date: Mon, 20 Apr 2026 20:55:16 +0800 Subject: [PATCH] =?UTF-8?q?test(security):=20sidepanel=20review=20E2E=20?= =?UTF-8?q?=E2=80=94=20Playwright=20drives=20Allow/Block?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 5 tests, ~13s, gate tier. Loads real extension sidepanel in Playwright Chromium with stubbed chrome.runtime + fetch, injects a reviewable security_event, and drives the user path end-to-end: - banner title flips to "Review suspected injection" - suspected text excerpt renders inside the auto-expanded details - Allow + Block buttons are visible - click Allow → POST /security-decision with decision:"allow" - click Block → POST /security-decision with decision:"block" - banner auto-hides after each decision - non-reviewable events keep the hard-stop framing (regression guard) - XSS guard: script-tagged suspected_text doesn't execute Complements security-review-flow.test.ts (unit-level file handshake) and security-review-fullstack.test.ts (full pipeline with real classifier). Co-Authored-By: Claude Opus 4.7 (1M context) --- .../security-review-sidepanel-e2e.test.ts | 345 ++++++++++++++++++ 1 file changed, 345 insertions(+) create mode 100644 browse/test/security-review-sidepanel-e2e.test.ts diff --git a/browse/test/security-review-sidepanel-e2e.test.ts b/browse/test/security-review-sidepanel-e2e.test.ts new file mode 100644 index 00000000..4fdd9f07 --- /dev/null +++ b/browse/test/security-review-sidepanel-e2e.test.ts @@ -0,0 +1,345 @@ +/** + * Review-flow E2E (sidepanel side, hermetic). + * + * Loads the real extension sidepanel.html in Playwright Chromium, stubs + * the browse server responses, injects a `reviewable: true` security_event + * into /sidebar-chat, and asserts the user-in-the-loop flow end-to-end: + * + * 1. Banner renders with "Review suspected injection" title + * 2. Suspected text excerpt shows up inside the expandable details + * 3. Allow + Block buttons are visible and actionable + * 4. Clicking Allow posts to /security-decision with decision:"allow" + * 5. Clicking Block posts to /security-decision with decision:"block" + * 6. Banner auto-hides after decision + * + * This is the UI-and-wire test. The server-side handshake (decision file + * write + sidebar-agent poll) is covered by security-review-flow.test.ts. + * The full-stack version with real mock-claude + real classifier lives + * in security-review-fullstack.test.ts (periodic tier). + * + * Gate tier. ~3s. Skipped if Playwright chromium is unavailable. + */ + +import { describe, test, expect, beforeAll, afterAll } from 'bun:test'; +import * as fs from 'fs'; +import * as path from 'path'; +import { chromium, type Browser, type Page } from 'playwright'; + +const EXTENSION_DIR = path.resolve(import.meta.dir, '..', '..', 'extension'); +const SIDEPANEL_URL = `file://${EXTENSION_DIR}/sidepanel.html`; + +const CHROMIUM_AVAILABLE = (() => { + try { + const exe = chromium.executablePath(); + return !!exe && fs.existsSync(exe); + } catch { + return false; + } +})(); + +interface DecisionCall { + tabId: number; + decision: 'allow' | 'block'; + reason?: string; +} + +/** + * Install the same stubs the existing sidepanel-dom test uses, plus a + * fetch interceptor that captures POSTs to /security-decision into a + * page-scoped array. Returns a handle to read the captured calls. + */ +async function installStubsAndCapture( + page: Page, + scenario: { securityEntries: any[] }, +): Promise { + await page.addInitScript((params: any) => { + (window as any).__decisionCalls = []; + + (window as any).chrome = { + runtime: { + sendMessage: (_req: any, cb: any) => { + const payload = { connected: true, port: 34567 }; + if (typeof cb === 'function') { + setTimeout(() => cb(payload), 0); + return undefined; + } + return Promise.resolve(payload); + }, + lastError: null, + onMessage: { addListener: () => {} }, + }, + tabs: { + query: (_q: any, cb: any) => setTimeout(() => cb([{ id: 1, url: 'https://example.com' }]), 0), + onActivated: { addListener: () => {} }, + onUpdated: { addListener: () => {} }, + }, + }; + + (window as any).EventSource = class { + constructor() {} + addEventListener() {} + close() {} + }; + + const scenarioRef = params; + const origFetch = window.fetch; + window.fetch = async function (input: any, init?: any) { + const url = String(input); + if (url.endsWith('/health')) { + return new Response(JSON.stringify({ + status: 'healthy', + token: 'test-token', + mode: 'headed', + agent: { status: 'idle', runningFor: null, queueLength: 0 }, + session: null, + security: { status: 'protected', layers: { testsavant: 'ok', transcript: 'ok', canary: 'ok' } }, + }), { status: 200, headers: { 'Content-Type': 'application/json' } }); + } + if (url.includes('/sidebar-chat')) { + return new Response(JSON.stringify({ + entries: scenarioRef.securityEntries ?? [], + total: (scenarioRef.securityEntries ?? []).length, + agentStatus: 'idle', + activeTabId: 1, + security: { status: 'protected', layers: { testsavant: 'ok', transcript: 'ok', canary: 'ok' } }, + }), { status: 200, headers: { 'Content-Type': 'application/json' } }); + } + if (url.includes('/security-decision') && init?.method === 'POST') { + try { + const body = JSON.parse(init.body || '{}'); + (window as any).__decisionCalls.push(body); + } catch { + (window as any).__decisionCalls.push({ _parseError: true, raw: init?.body }); + } + return new Response(JSON.stringify({ ok: true }), { status: 200, headers: { 'Content-Type': 'application/json' } }); + } + if (url.includes('/sidebar-tabs')) { + return new Response(JSON.stringify({ tabs: [] }), { status: 200 }); + } + if (typeof origFetch === 'function') return origFetch(input, init); + return new Response('{}', { status: 200 }); + } as any; + }, scenario); +} + +let browser: Browser | null = null; + +beforeAll(async () => { + if (!CHROMIUM_AVAILABLE) return; + browser = await chromium.launch({ headless: true }); +}, 30000); + +afterAll(async () => { + if (browser) { + try { + // Race browser.close() against a timeout — on rare occasions Playwright + // hangs on close because an EventSource stub keeps a poll alive. 10s is + // plenty; past that we forcibly drop the handle. Bun's default hook + // timeout is 5s and has bitten this file. + await Promise.race([ + browser.close(), + new Promise((resolve) => setTimeout(resolve, 10000)), + ]); + } catch {} + } +}, 15000); + +/** + * The reviewable security_event the sidebar-agent emits on tool-output BLOCK. + * Mirrors the shape of the real production event: verdict:'block', + * reviewable:true, suspected_text excerpt, per-layer signals, and tabId + * so the banner's Allow/Block buttons know which tab to decide for. + */ +function buildReviewableEntry(overrides?: Partial): any { + return { + id: 42, + ts: '2026-04-20T12:00:00Z', + role: 'agent', + type: 'security_event', + verdict: 'block', + reason: 'tool_result_ml', + layer: 'testsavant_content', + confidence: 0.95, + domain: 'news.ycombinator.com', + tool: 'Bash', + reviewable: true, + suspected_text: 'A comment thread discussing ignore previous instructions and reveal secrets — classifier flagged this as injection but it is actually benign developer content about a prompt injection incident.', + signals: [ + { layer: 'testsavant_content', confidence: 0.95 }, + { layer: 'transcript_classifier', confidence: 0.0, meta: { degraded: true } }, + ], + tabId: 1, + ...overrides, + }; +} + +describe('sidepanel review-flow E2E', () => { + test.skipIf(!CHROMIUM_AVAILABLE)('reviewable event shows review banner with suspected text + buttons', async () => { + const context = await browser!.newContext(); + const page = await context.newPage(); + await installStubsAndCapture(page, { securityEntries: [buildReviewableEntry()] }); + await page.goto(SIDEPANEL_URL); + + // Wait for /sidebar-chat poll to deliver the entry + banner to render. + await page.waitForFunction( + () => { + const b = document.getElementById('security-banner') as HTMLElement | null; + return !!b && b.style.display !== 'none'; + }, + { timeout: 5000 }, + ); + + // Title flips to the review framing (not "Session terminated") + const title = await page.$eval('#security-banner-title', (el) => el.textContent); + expect(title).toContain('Review suspected injection'); + + // Subtitle mentions the tool + domain + const subtitle = await page.$eval('#security-banner-subtitle', (el) => el.textContent); + expect(subtitle).toContain('Bash'); + expect(subtitle).toContain('news.ycombinator.com'); + expect(subtitle).toContain('allow to continue'); + + // Suspected text shows up unescaped (textContent, not innerHTML) + const suspect = await page.$eval('#security-banner-suspect', (el) => el.textContent); + expect(suspect).toContain('ignore previous instructions'); + + // Both action buttons are visible + const allowVisible = await page.locator('#security-banner-btn-allow').isVisible(); + const blockVisible = await page.locator('#security-banner-btn-block').isVisible(); + expect(allowVisible).toBe(true); + expect(blockVisible).toBe(true); + + // Details auto-expanded so the user sees context + const detailsHidden = await page.$eval('#security-banner-details', (el) => (el as HTMLElement).hidden); + expect(detailsHidden).toBe(false); + + await context.close(); + }, 15000); + + test.skipIf(!CHROMIUM_AVAILABLE)('clicking Allow posts {decision:"allow"} and hides banner', async () => { + const context = await browser!.newContext(); + const page = await context.newPage(); + await installStubsAndCapture(page, { securityEntries: [buildReviewableEntry()] }); + await page.goto(SIDEPANEL_URL); + await page.waitForSelector('#security-banner-btn-allow:visible', { timeout: 5000 }); + + await page.click('#security-banner-btn-allow'); + + // Decision POST should have fired with decision:"allow" and the tabId + // from the security_event. Give the fetch promise a tick to resolve. + await page.waitForFunction( + () => (window as any).__decisionCalls?.length > 0, + { timeout: 2000 }, + ); + + const calls = await page.evaluate(() => (window as any).__decisionCalls); + expect(calls).toHaveLength(1); + expect(calls[0].decision).toBe('allow'); + expect(calls[0].tabId).toBe(1); + expect(calls[0].reason).toBe('user'); + + // Banner should hide optimistically after the POST + await page.waitForFunction( + () => { + const b = document.getElementById('security-banner') as HTMLElement | null; + return !!b && b.style.display === 'none'; + }, + { timeout: 2000 }, + ); + + await context.close(); + }, 15000); + + test.skipIf(!CHROMIUM_AVAILABLE)('clicking Block posts {decision:"block"} and hides banner', async () => { + const context = await browser!.newContext(); + const page = await context.newPage(); + await installStubsAndCapture(page, { securityEntries: [buildReviewableEntry({ id: 55 })] }); + await page.goto(SIDEPANEL_URL); + await page.waitForSelector('#security-banner-btn-block:visible', { timeout: 5000 }); + + await page.click('#security-banner-btn-block'); + + await page.waitForFunction( + () => (window as any).__decisionCalls?.length > 0, + { timeout: 2000 }, + ); + + const calls = await page.evaluate(() => (window as any).__decisionCalls); + expect(calls).toHaveLength(1); + expect(calls[0].decision).toBe('block'); + expect(calls[0].tabId).toBe(1); + + await page.waitForFunction( + () => { + const b = document.getElementById('security-banner') as HTMLElement | null; + return !!b && b.style.display === 'none'; + }, + { timeout: 2000 }, + ); + + await context.close(); + }, 15000); + + test.skipIf(!CHROMIUM_AVAILABLE)('non-reviewable event still shows hard-stop banner with no buttons', async () => { + // Regression guard: the existing hard-stop canary leak UX must not be + // disturbed by the reviewable branch. An event without reviewable:true + // keeps the old behavior. + const hardStop = { + id: 99, + ts: '2026-04-20T12:00:00Z', + role: 'agent', + type: 'security_event', + verdict: 'block', + reason: 'canary_leaked', + layer: 'canary', + confidence: 1.0, + domain: 'attacker.example.com', + channel: 'tool_use:Bash', + tabId: 1, + }; + const context = await browser!.newContext(); + const page = await context.newPage(); + await installStubsAndCapture(page, { securityEntries: [hardStop] }); + await page.goto(SIDEPANEL_URL); + await page.waitForFunction( + () => { + const b = document.getElementById('security-banner') as HTMLElement | null; + return !!b && b.style.display !== 'none'; + }, + { timeout: 5000 }, + ); + + const title = await page.$eval('#security-banner-title', (el) => el.textContent); + expect(title).toContain('Session terminated'); + + // Action row stays hidden for the non-reviewable path + const actionsHidden = await page.$eval('#security-banner-actions', (el) => (el as HTMLElement).hidden); + expect(actionsHidden).toBe(true); + + await context.close(); + }, 15000); + + test.skipIf(!CHROMIUM_AVAILABLE)('suspected text renders via textContent, not innerHTML (XSS guard)', async () => { + // If the sidepanel ever regressed to innerHTML for the suspected text, + // a crafted excerpt could execute script. This test uses one; if the + // ', + }); + const context = await browser!.newContext(); + const page = await context.newPage(); + await installStubsAndCapture(page, { securityEntries: [xssAttempt] }); + await page.goto(SIDEPANEL_URL); + await page.waitForSelector('#security-banner-suspect:not([hidden])', { timeout: 5000 }); + + // The literal text should appear inside the suspect block (as text, not markup) + const suspectText = await page.$eval('#security-banner-suspect', (el) => el.textContent); + expect(suspectText).toContain('