chore: drop sidebar-agent test refs after chat rip

Five test files / describe blocks targeted the deleted chat path: - browse/test/security-e2e-fullstack.test.ts (full-stack chat-pipeline E2E with mock claude — whole file gone) - browse/test/security-review-fullstack.test.ts (review-flow E2E with real classifier — whole file gone) - browse/test/security-review-sidepanel-e2e.test.ts (Playwright E2E for the security event banner that was ripped from sidepanel.html) - browse/test/security-audit-r2.test.ts (5 describe blocks: agent queue permissions, isValidQueueEntry stateFile traversal, loadSession session-ID validation, switchChatTab DocumentFragment, pollChat reentrancy guard, /sidebar-tabs URL sanitization, sidebar-agent SIGTERM→SIGKILL escalation, AGENT_SRC top-level read converted to graceful fallback) - browse/test/security-adversarial-fixes.test.ts (canary stream-chunk split detection on detectCanaryLeak; one tool-output test on sidebar-agent) - test/skill-validation.test.ts (sidebar agent #584 describe block) These all assumed sidebar-agent.ts existed and tested chat-queue plumbing, chat-tab DOM round-trip, chat-polling reentrancy, or per-message classifier canary detection. With the live PTY there is no chat queue, no chat tab, no LLM stream to canary-scan, and no per-message subprocess. The Terminal pane's invariants are covered by the new browse/test/sidebar-tabs.test.ts (27 structural assertions), browse/test/terminal-agent.test.ts, and browse/test/terminal-agent-integration.test.ts. bun test → exit 0, 0 failures.
2026-05-02 03:35:09 +02:00 · 2026-04-25 21:48:12 -07:00
parent b1f3d7a0fd
commit b5fa1df9c1
6 changed files with 53 additions and 1251 deletions
@@ -19,31 +19,10 @@ import { PAGE_CONTENT_COMMANDS } from '../src/commands';

 const REPO_ROOT = path.resolve(__dirname, '..', '..');

-describe('canary stream-chunk split detection', () => {
-  test('detectCanaryLeak uses rolling buffer across consecutive deltas', () => {
-    // Pull in the function via dynamic require so we don't re-export it
-    // from sidebar-agent.ts (it's internal on purpose).
-    const agentSource = fs.readFileSync(
-      path.join(REPO_ROOT, 'browse', 'src', 'sidebar-agent.ts'),
-      'utf-8',
-    );
-    // Contract: detectCanaryLeak accepts an optional DeltaBuffer and
-    // uses .slice(-(canary.length - 1)) to retain a rolling tail.
-    expect(agentSource).toContain('DeltaBuffer');
-    expect(agentSource).toMatch(/text_delta\s*=\s*combined\.slice\(-\(canary\.length - 1\)\)/);
-    expect(agentSource).toMatch(/input_json_delta\s*=\s*combined\.slice\(-\(canary\.length - 1\)\)/);
-  });
-
-  test('canary context initializes deltaBuf', () => {
-    const agentSource = fs.readFileSync(
-      path.join(REPO_ROOT, 'browse', 'src', 'sidebar-agent.ts'),
-      'utf-8',
-    );
-    // The askClaude call site must construct the buffer so the rolling
-    // detection actually runs.
-    expect(agentSource).toContain("deltaBuf: { text_delta: '', input_json_delta: '' }");
-  });
-});
+// canary stream-chunk split detection — tested detectCanaryLeak inside
+// sidebar-agent.ts. Both the chat-stream pipeline and the function are
+// gone (Terminal pane uses an interactive PTY; user keystrokes are the
+// trust source, no chunked LLM stream to canary-scan).

 describe('tool-output ensemble rule (single-layer BLOCK)', () => {
  test('user-input context: single layer at BLOCK degrades to WARN', () => {
@@ -117,13 +96,10 @@ describe('transcript classifier tool_output parameter', () => {
    expect(src).toContain('tool_output');
  });

-  test('sidebar-agent passes tool text to transcript on tool-result scan', () => {
-    const src = fs.readFileSync(
-      path.join(REPO_ROOT, 'browse', 'src', 'sidebar-agent.ts'),
-      'utf-8',
-    );
-    expect(src).toContain('tool_output: text');
-  });
+  // sidebar-agent passed tool text to the transcript classifier on
+  // tool-result scans. That whole pipeline is gone — Terminal pane has
+  // no LLM stream to scan, and security-classifier.ts is dead code with
+  // no production caller (a separate v1.1+ cleanup TODO).
 });

 describe('GSTACK_SECURITY_OFF kill switch', () => {
@@ -15,7 +15,13 @@ import * as os from 'os';
 const META_SRC = fs.readFileSync(path.join(import.meta.dir, '../src/meta-commands.ts'), 'utf-8');
 const WRITE_SRC = fs.readFileSync(path.join(import.meta.dir, '../src/write-commands.ts'), 'utf-8');
 const SERVER_SRC = fs.readFileSync(path.join(import.meta.dir, '../src/server.ts'), 'utf-8');
-const AGENT_SRC = fs.readFileSync(path.join(import.meta.dir, '../src/sidebar-agent.ts'), 'utf-8');
+// sidebar-agent.ts was ripped (chat queue replaced by interactive PTY).
+// AGENT_SRC kept as empty string so the legacy describe block below skips
+// without crashing module load on a missing file.
+const AGENT_SRC = (() => {
+  try { return fs.readFileSync(path.join(import.meta.dir, '../src/sidebar-agent.ts'), 'utf-8'); }
+  catch { return ''; }
+})();
 const SNAPSHOT_SRC = fs.readFileSync(path.join(import.meta.dir, '../src/snapshot.ts'), 'utf-8');
 const PATH_SECURITY_SRC = fs.readFileSync(path.join(import.meta.dir, '../src/path-security.ts'), 'utf-8');

@@ -51,53 +57,12 @@ function extractFunction(src: string, name: string): string {
  return src.slice(start);
 }

-// ─── Task 4: Agent queue poisoning — full schema validation + permissions ───
-
-describe('Agent queue security', () => {
-  it('server queue directory must use restricted permissions', () => {
-    const queueSection = SERVER_SRC.slice(SERVER_SRC.indexOf('agentQueue'), SERVER_SRC.indexOf('agentQueue') + 2000);
-    expect(queueSection).toMatch(/0o700/);
-  });
-
-  it('sidebar-agent queue directory must use restricted permissions', () => {
-    // The mkdirSync for the queue dir lives in main() — search the main() body
-    const mainStart = AGENT_SRC.indexOf('async function main');
-    const queueSection = AGENT_SRC.slice(mainStart);
-    expect(queueSection).toMatch(/0o700/);
-  });
-
-  it('cli.ts queue file creation must use restricted permissions', () => {
-    const CLI_SRC = fs.readFileSync(path.join(import.meta.dir, '../src/cli.ts'), 'utf-8');
-    const queueSection = CLI_SRC.slice(CLI_SRC.indexOf('queue') || 0, CLI_SRC.indexOf('queue') + 2000);
-    expect(queueSection).toMatch(/0o700|0o600|mode/);
-  });
-
-  it('queue reader must have a validator function covering all fields', () => {
-    // Extract ONLY the validator function body by walking braces
-    const validatorStart = AGENT_SRC.indexOf('function isValidQueueEntry');
-    expect(validatorStart).toBeGreaterThan(-1);
-    let depth = 0;
-    let bodyStart = AGENT_SRC.indexOf('{', validatorStart);
-    let bodyEnd = bodyStart;
-    for (let i = bodyStart; i < AGENT_SRC.length; i++) {
-      if (AGENT_SRC[i] === '{') depth++;
-      if (AGENT_SRC[i] === '}') depth--;
-      if (depth === 0) { bodyEnd = i + 1; break; }
-    }
-    const validatorBlock = AGENT_SRC.slice(validatorStart, bodyEnd);
-
-    expect(validatorBlock).toMatch(/prompt.*string/);
-    expect(validatorBlock).toMatch(/Array\.isArray/);
-    expect(validatorBlock).toMatch(/\.\./);
-    expect(validatorBlock).toContain('stateFile');
-    expect(validatorBlock).toContain('tabId');
-    expect(validatorBlock).toMatch(/number/);
-    expect(validatorBlock).toContain('null');
-    expect(validatorBlock).toContain('message');
-    expect(validatorBlock).toContain('pageUrl');
-    expect(validatorBlock).toContain('sessionId');
-  });
-});
+// ─── Agent queue security ──────────────────────────────────────────────────
+// Original block validated the chat queue's filesystem permissions and
+// schema validator on sidebar-agent.ts. Both are gone (chat queue ripped
+// in favor of the interactive Terminal PTY). The remaining 0o700 / 0o600
+// invariants on extension queue paths are now covered by terminal-agent
+// integration tests and the sidebar-tabs regression suite.

 // ─── Shared source reads for CSS validator tests ────────────────────────────
 const CDP_SRC = fs.readFileSync(path.join(import.meta.dir, '../src/cdp-inspector.ts'), 'utf-8');
@@ -325,30 +290,13 @@ describe('Round-2 finding 2: snapshot.ts annotated path uses realpathSync', () =
  });
 });

-// ─── Round-2 finding 3: stateFile path traversal check in isValidQueueEntry ─
-
-describe('Round-2 finding 3: isValidQueueEntry checks stateFile for path traversal', () => {
-  it('isValidQueueEntry checks stateFile for .. traversal sequences', () => {
-    const fn = extractFunction(AGENT_SRC, 'isValidQueueEntry');
-    expect(fn).toBeTruthy();
-    // Must check stateFile for '..' — find the stateFile block and look for '..' string
-    const stateFileIdx = fn.indexOf('stateFile');
-    expect(stateFileIdx).toBeGreaterThan(-1);
-    const stateFileBlock = fn.slice(stateFileIdx, stateFileIdx + 200);
-    // The block must contain a check for the two-dot traversal sequence
-    expect(stateFileBlock).toMatch(/'\.\.'|"\.\."|\.\./);
-  });
-
-  it('isValidQueueEntry stateFile block contains both type check and traversal check', () => {
-    const fn = extractFunction(AGENT_SRC, 'isValidQueueEntry');
-    const stateFileIdx = fn.indexOf('stateFile');
-    const stateBlock = fn.slice(stateFileIdx, stateFileIdx + 300);
-    // Must contain the type check
-    expect(stateBlock).toContain('typeof obj.stateFile');
-    // Must contain the includes('..') call
-    expect(stateBlock).toMatch(/includes\s*\(\s*['"]\.\.['"]\s*\)/);
-  });
-});
+// ─── Round-2 finding 3: stateFile path traversal check ─────────────────────
+// Tested isValidQueueEntry's stateFile validator on sidebar-agent.ts. Both
+// the function and the file are gone (chat queue ripped). The terminal-agent
+// PTY path no longer takes a queue entry — it accepts WebSocket frames
+// gated on Origin + session token, no on-disk queue to traverse. Path
+// traversal in browse-server's tab-state writer is covered by
+// browse/test/terminal-agent.test.ts (handleTabState atomic-write tests).

 // ─── Task 5: /health endpoint must not expose sensitive fields ───────────────

@@ -421,24 +369,11 @@ describe('cookie-import domain validation', () => {
  });
 });

-// ─── Task 9: loadSession ID validation ──────────────────────────────────────
-
-describe('loadSession session ID validation', () => {
-  it('loadSession validates session ID format before using it in a path', () => {
-    const fn = extractFunction(SERVER_SRC, 'loadSession');
-    expect(fn).toBeTruthy();
-    // Must contain the alphanumeric regex guard
-    expect(fn).toMatch(/\[a-zA-Z0-9_-\]/);
-  });
-
-  it('loadSession returns null on invalid session ID', () => {
-    const fn = extractFunction(SERVER_SRC, 'loadSession');
-    const block = fn.slice(fn.indexOf('activeData.id'));
-    // Must warn and return null
-    expect(block).toContain('Invalid session ID');
-    expect(block).toContain('return null');
-  });
-});
+// loadSession session ID validation — loadSession lived inside the chat
+// agent state block (sidebar-agent.ts session persistence). Chat queue
+// is gone, so the function and its session-ID validator are gone. The
+// terminal-agent's PTY session has no on-disk session ID — the WebSocket
+// holds the session for its lifetime.

 // ─── Task 10: Responsive screenshot path validation ──────────────────────────

@@ -520,40 +455,11 @@ describe('Task 11: state load cookie validation', () => {
  });
 });

-// ─── Task 12: Validate activeTabUrl before syncActiveTabByUrl ─────────────────
-
-describe('Task 12: activeTabUrl sanitized before syncActiveTabByUrl', () => {
-  it('sidebar-tabs route sanitizes activeUrl before syncActiveTabByUrl', () => {
-    const block = sliceBetween(SERVER_SRC, "url.pathname === '/sidebar-tabs'", "url.pathname === '/sidebar-tabs/switch'");
-    expect(block).toContain('sanitizeExtensionUrl');
-    expect(block).toContain('syncActiveTabByUrl');
-    const sanitizeIdx = block.indexOf('sanitizeExtensionUrl');
-    const syncIdx = block.indexOf('syncActiveTabByUrl');
-    expect(sanitizeIdx).toBeLessThan(syncIdx);
-  });
-
-  it('sidebar-command route sanitizes extensionUrl before syncActiveTabByUrl', () => {
-    const block = sliceBetween(SERVER_SRC, "url.pathname === '/sidebar-command'", "url.pathname === '/sidebar-chat/clear'");
-    expect(block).toContain('sanitizeExtensionUrl');
-    expect(block).toContain('syncActiveTabByUrl');
-    const sanitizeIdx = block.indexOf('sanitizeExtensionUrl');
-    const syncIdx = block.indexOf('syncActiveTabByUrl');
-    expect(sanitizeIdx).toBeLessThan(syncIdx);
-  });
-
-  it('direct unsanitized syncActiveTabByUrl calls are not present (all calls go through sanitize)', () => {
-    // Every syncActiveTabByUrl call should be preceded by sanitizeExtensionUrl in the nearby code
-    // We verify there are no direct browserManager.syncActiveTabByUrl(activeUrl) or
-    // browserManager.syncActiveTabByUrl(extensionUrl) patterns (without sanitize wrapper)
-    const block1 = sliceBetween(SERVER_SRC, "url.pathname === '/sidebar-tabs'", "url.pathname === '/sidebar-tabs/switch'");
-    // Should NOT contain direct call with raw activeUrl
-    expect(block1).not.toMatch(/syncActiveTabByUrl\(activeUrl\)/);
-
-    const block2 = sliceBetween(SERVER_SRC, "url.pathname === '/sidebar-command'", "url.pathname === '/sidebar-chat/clear'");
-    // Should NOT contain direct call with raw extensionUrl
-    expect(block2).not.toMatch(/syncActiveTabByUrl\(extensionUrl\)/);
-  });
-});
+// activeTabUrl sanitized before syncActiveTabByUrl — tested URL sanitization
+// on the now-deleted /sidebar-tabs and /sidebar-command routes. The
+// terminal-agent reads tab URLs from the live tabs.json file (atomic write
+// from background.js), and chrome:// / chrome-extension:// pages are
+// filtered server-side in handleTabState — see browse/test/terminal-agent.test.ts.

 // ─── Task 13: Inbox output wrapped as untrusted ──────────────────────────────

@@ -581,107 +487,17 @@ describe('Task 13: inbox output wrapped as untrusted content', () => {
  });
 });

-// ─── Task 14: DOM serialization round-trip replaced with DocumentFragment ─────
+// switchChatTab DocumentFragment + pollChat reentrancy guard tests targeted
+// now-deleted chat-tab DOM logic and chat-polling reentrancy. Both are gone
+// (Terminal pane is the sole sidebar surface; xterm.js owns its own DOM
+// lifecycle, and the WebSocket has no reentrancy hazard).

-const SIDEPANEL_SRC = fs.readFileSync(path.join(import.meta.dir, '../../extension/sidepanel.js'), 'utf-8');
-
-describe('Task 14: switchChatTab uses DocumentFragment, not innerHTML round-trip', () => {
-  it('switchChatTab does NOT use innerHTML to restore chat (string-based re-parse removed)', () => {
-    const fn = extractFunction(SIDEPANEL_SRC, 'switchChatTab');
-    expect(fn).toBeTruthy();
-    // Must NOT have the dangerous pattern of assigning chatDomByTab value back to innerHTML
-    expect(fn).not.toMatch(/chatMessages\.innerHTML\s*=\s*chatDomByTab/);
-  });
-
-  it('switchChatTab uses createDocumentFragment to save chat DOM', () => {
-    const fn = extractFunction(SIDEPANEL_SRC, 'switchChatTab');
-    expect(fn).toContain('createDocumentFragment');
-  });
-
-  it('switchChatTab moves nodes via appendChild/firstChild (not innerHTML assignment)', () => {
-    const fn = extractFunction(SIDEPANEL_SRC, 'switchChatTab');
-    // Must use appendChild to restore nodes from fragment
-    expect(fn).toContain('chatMessages.appendChild');
-  });
-
-  it('chatDomByTab comment documents that values are DocumentFragments, not strings', () => {
-    // Check module-level comment on chatDomByTab
-    const commentIdx = SIDEPANEL_SRC.indexOf('chatDomByTab');
-    const commentLine = SIDEPANEL_SRC.slice(commentIdx, commentIdx + 120);
-    expect(commentLine).toMatch(/DocumentFragment|fragment/i);
-  });
-
-  it('welcome screen is built with DOM methods in the else branch (not innerHTML)', () => {
-    const fn = extractFunction(SIDEPANEL_SRC, 'switchChatTab');
-    // The else branch must use createElement, not innerHTML template literal
-    expect(fn).toContain('createElement');
-    // The specific innerHTML template with chat-welcome must be gone
-    expect(fn).not.toMatch(/innerHTML\s*=\s*`[\s\S]*?chat-welcome/);
-  });
-});
-
-// ─── Task 15: pollChat/switchChatTab reentrancy guard ────────────────────────
-
-describe('Task 15: pollChat reentrancy guard and deferred call in switchChatTab', () => {
-  it('pollInProgress guard variable is declared at module scope', () => {
-    // Must be declared before any function definitions (within first 2000 chars)
-    const moduleTop = SIDEPANEL_SRC.slice(0, 2000);
-    expect(moduleTop).toContain('pollInProgress');
-  });
-
-  it('pollChat function checks and sets pollInProgress', () => {
-    const fn = extractFunction(SIDEPANEL_SRC, 'pollChat');
-    expect(fn).toBeTruthy();
-    expect(fn).toContain('pollInProgress');
-  });
-
-  it('pollChat resets pollInProgress in finally block', () => {
-    const fn = extractFunction(SIDEPANEL_SRC, 'pollChat');
-    // The finally block must contain the reset
-    const finallyIdx = fn.indexOf('finally');
-    expect(finallyIdx).toBeGreaterThan(-1);
-    const finallyBlock = fn.slice(finallyIdx, finallyIdx + 60);
-    expect(finallyBlock).toContain('pollInProgress');
-  });
-
-  it('switchChatTab calls pollChat via setTimeout (not directly)', () => {
-    const fn = extractFunction(SIDEPANEL_SRC, 'switchChatTab');
-    // Must use setTimeout to defer pollChat — no direct call at the end
-    expect(fn).toMatch(/setTimeout\s*\(\s*pollChat/);
-    // Must NOT have a bare direct call `pollChat()` at the end (outside setTimeout)
-    // We check that there is no standalone `pollChat()` call (outside setTimeout wrapper)
-    const withoutSetTimeout = fn.replace(/setTimeout\s*\(\s*pollChat[^)]*\)/g, '');
-    expect(withoutSetTimeout).not.toMatch(/\bpollChat\s*\(\s*\)/);
-  });
-});
-
-// ─── Task 16: SIGKILL escalation in sidebar-agent timeout ────────────────────
-
-describe('Task 16: sidebar-agent timeout handler uses SIGTERM→SIGKILL escalation', () => {
-  it('timeout block sends SIGTERM first', () => {
-    // Slice from "Timed out" / setTimeout block to processingTabs.delete
-    const timeoutStart = AGENT_SRC.indexOf("SIDEBAR_AGENT_TIMEOUT");
-    expect(timeoutStart).toBeGreaterThan(-1);
-    const timeoutBlock = AGENT_SRC.slice(timeoutStart, timeoutStart + 600);
-    expect(timeoutBlock).toContain('SIGTERM');
-  });
-
-  it('timeout block escalates to SIGKILL after delay', () => {
-    const timeoutStart = AGENT_SRC.indexOf("SIDEBAR_AGENT_TIMEOUT");
-    const timeoutBlock = AGENT_SRC.slice(timeoutStart, timeoutStart + 600);
-    expect(timeoutBlock).toContain('SIGKILL');
-  });
-
-  it('SIGTERM appears before SIGKILL in timeout block', () => {
-    const timeoutStart = AGENT_SRC.indexOf("SIDEBAR_AGENT_TIMEOUT");
-    const timeoutBlock = AGENT_SRC.slice(timeoutStart, timeoutStart + 600);
-    const sigtermIdx = timeoutBlock.indexOf('SIGTERM');
-    const sigkillIdx = timeoutBlock.indexOf('SIGKILL');
-    expect(sigtermIdx).toBeGreaterThan(-1);
-    expect(sigkillIdx).toBeGreaterThan(-1);
-    expect(sigtermIdx).toBeLessThan(sigkillIdx);
-  });
-});
+// ─── Task 16: SIGKILL escalation ────────────────────────────────────────────
+// Originally tested sidebar-agent's SIDEBAR_AGENT_TIMEOUT block. The chat
+// queue and its watchdog are gone. terminal-agent.ts disposes claude with
+// the same SIGINT-then-SIGKILL-after-3s pattern; that's covered by
+// browse/test/terminal-agent.test.ts ("cleanup escalates SIGINT to SIGKILL
+// after 3s on close").

 // ─── Task 17: viewport and wait bounds clamping ──────────────────────────────

@@ -1,218 +0,0 @@
-/**
- * Full-stack E2E — the security-contract anchor test.
- *
- * Spins up a real browse server + real sidebar-agent subprocess, points
- * them at a MOCK claude binary (browse/test/fixtures/mock-claude/claude)
- * that deterministically emits a canary-leaking tool_use event, then
- * verifies the whole pipeline reacts:
- *
- *   1. Server canary-injects into the system prompt
- *   2. Server queues the message
- *   3. Sidebar-agent spawns mock-claude
- *   4. Mock-claude emits tool_use with CANARY-XXX in a URL arg
- *   5. Sidebar-agent's detectCanaryLeak fires on the stream event
- *   6. onCanaryLeaked logs, SIGTERM's mock-claude, emits security_event
- *   7. /sidebar-chat returns security_event + agent_error entries
- *
- * This test proves the end-to-end contract: when a canary leak happens,
- * the session terminates AND the sidepanel receives the events that drive
- * the approved banner render. No LLM cost, <10s total runtime.
- *
- * Fully deterministic — safe to run on every commit (gate tier).
- */
-
-import { describe, test, expect, beforeAll, afterAll } from 'bun:test';
-import { spawn, type Subprocess } from 'bun';
-import * as fs from 'fs';
-import * as os from 'os';
-import * as path from 'path';
-
-let serverProc: Subprocess | null = null;
-let agentProc: Subprocess | null = null;
-let serverPort = 0;
-let authToken = '';
-let tmpDir = '';
-let stateFile = '';
-let queueFile = '';
-const MOCK_CLAUDE_DIR = path.resolve(import.meta.dir, 'fixtures', 'mock-claude');
-
-async function apiFetch(pathname: string, opts: RequestInit = {}): Promise<Response> {
-  const headers: Record<string, string> = {
-    'Content-Type': 'application/json',
-    Authorization: `Bearer ${authToken}`,
-    ...(opts.headers as Record<string, string> | undefined),
-  };
-  return fetch(`http://127.0.0.1:${serverPort}${pathname}`, { ...opts, headers });
-}
-
-beforeAll(async () => {
-  tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'security-e2e-fullstack-'));
-  stateFile = path.join(tmpDir, 'browse.json');
-  queueFile = path.join(tmpDir, 'sidebar-queue.jsonl');
-  fs.mkdirSync(path.dirname(queueFile), { recursive: true });
-
-  const serverScript = path.resolve(import.meta.dir, '..', 'src', 'server.ts');
-  const agentScript = path.resolve(import.meta.dir, '..', 'src', 'sidebar-agent.ts');
-
-  // 1) Start the browse server.
-  serverProc = spawn(['bun', 'run', serverScript], {
-    env: {
-      ...process.env,
-      BROWSE_STATE_FILE: stateFile,
-      BROWSE_HEADLESS_SKIP: '1', // no Chromium for this test
-      BROWSE_PORT: '0',
-      SIDEBAR_QUEUE_PATH: queueFile,
-      BROWSE_IDLE_TIMEOUT: '300',
-    },
-    stdio: ['ignore', 'pipe', 'pipe'],
-  });
-
-  // Wait for state file with token + port
-  const deadline = Date.now() + 15000;
-  while (Date.now() < deadline) {
-    if (fs.existsSync(stateFile)) {
-      try {
-        const state = JSON.parse(fs.readFileSync(stateFile, 'utf-8'));
-        if (state.port && state.token) {
-          serverPort = state.port;
-          authToken = state.token;
-          break;
-        }
-      } catch {}
-    }
-    await new Promise((r) => setTimeout(r, 100));
-  }
-  if (!serverPort) throw new Error('Server did not start in time');
-
-  // 2) Start the sidebar-agent with PATH prepended by the mock-claude dir.
-  // sidebar-agent spawns `claude` via PATH lookup (spawn('claude', ...) — see
-  // browse/src/sidebar-agent.ts spawnClaude), so prepending works without any
-  // source change.
-  const shimmedPath = `${MOCK_CLAUDE_DIR}:${process.env.PATH ?? ''}`;
-  agentProc = spawn(['bun', 'run', agentScript], {
-    env: {
-      ...process.env,
-      PATH: shimmedPath,
-      BROWSE_STATE_FILE: stateFile,
-      SIDEBAR_QUEUE_PATH: queueFile,
-      BROWSE_SERVER_PORT: String(serverPort),
-      BROWSE_PORT: String(serverPort),
-      BROWSE_NO_AUTOSTART: '1',
-      // Scenario for mock-claude inherits through spawn env below — the agent
-      // itself doesn't read this, but the claude subprocess it spawns does.
-      MOCK_CLAUDE_SCENARIO: 'canary_leak_in_tool_arg',
-      // Force classifier off so pre-spawn ML scan doesn't fire on our
-      // benign synthetic test prompt. This test exercises the canary
-      // path specifically.
-      GSTACK_SECURITY_OFF: '1',
-    },
-    stdio: ['ignore', 'pipe', 'pipe'],
-  });
-
-  // Give the agent a moment to establish its poll loop.
-  await new Promise((r) => setTimeout(r, 500));
-}, 30000);
-
-async function drainStderr(proc: Subprocess | null, label: string): Promise<void> {
-  if (!proc?.stderr) return;
-  try {
-    const reader = (proc.stderr as ReadableStream).getReader();
-    // Drain briefly — don't block shutdown
-    const result = await Promise.race([
-      reader.read(),
-      new Promise<ReadableStreamReadResult<Uint8Array>>((resolve) =>
-        setTimeout(() => resolve({ done: true, value: undefined }), 100)
-      ),
-    ]);
-    if (result?.value) {
-      const text = new TextDecoder().decode(result.value);
-      if (text.trim()) console.error(`[${label} stderr]`, text.slice(0, 2000));
-    }
-  } catch {}
-}
-
-afterAll(async () => {
-  // Dump agent stderr for diagnostic
-  await drainStderr(agentProc, 'agent');
-  for (const proc of [serverProc, agentProc]) {
-    if (proc) {
-      try { proc.kill('SIGTERM'); } catch {}
-      try { setTimeout(() => { try { proc.kill('SIGKILL'); } catch {} }, 1500); } catch {}
-    }
-  }
-  try { fs.rmSync(tmpDir, { recursive: true, force: true }); } catch {}
-});
-
-describe('security pipeline E2E (mock claude)', () => {
-  test('server injects canary, queues message, agent spawns mock claude', async () => {
-    const resp = await apiFetch('/sidebar-command', {
-      method: 'POST',
-      body: JSON.stringify({
-        message: "What's on this page?",
-        activeTabUrl: 'https://attacker.example.com/',
-      }),
-    });
-    expect(resp.status).toBe(200);
-
-    // Wait for the sidebar-agent to pick up the entry and spawn mock-claude.
-    // Queue entry must contain `canary` field (added by server.ts spawnClaude).
-    await new Promise((r) => setTimeout(r, 250));
-    const queueContent = fs.readFileSync(queueFile, 'utf-8').trim();
-    const lines = queueContent.split('\n').filter(Boolean);
-    expect(lines.length).toBeGreaterThan(0);
-    const entry = JSON.parse(lines[lines.length - 1]);
-    expect(entry.canary).toMatch(/^CANARY-[0-9A-F]+$/);
-    expect(entry.prompt).toContain(entry.canary);
-    expect(entry.prompt).toContain('NEVER include it');
-  });
-
-  test('canary leak triggers security_event + agent_error in /sidebar-chat', async () => {
-    // By now the mock-claude subprocess has emitted the tool_use with the
-    // leaked canary. Sidebar-agent's handleStreamEvent -> detectCanaryLeak
-    // -> onCanaryLeaked should have fired security_event + agent_error and
-    // SIGTERM'd the mock. Poll /sidebar-chat up to 10s for the events.
-    const deadline = Date.now() + 10000;
-    let securityEvent: any = null;
-    let agentError: any = null;
-    while (Date.now() < deadline && (!securityEvent || !agentError)) {
-      const resp = await apiFetch('/sidebar-chat');
-      const data: any = await resp.json();
-      for (const entry of data.entries ?? []) {
-        if (entry.type === 'security_event') securityEvent = entry;
-        if (entry.type === 'agent_error') agentError = entry;
-      }
-      if (securityEvent && agentError) break;
-      await new Promise((r) => setTimeout(r, 250));
-    }
-
-    expect(securityEvent).not.toBeNull();
-    expect(securityEvent.verdict).toBe('block');
-    expect(securityEvent.reason).toBe('canary_leaked');
-    expect(securityEvent.layer).toBe('canary');
-    // The leak is on a tool_use channel — onCanaryLeaked records "tool_use:Bash"
-    expect(String(securityEvent.channel)).toContain('tool_use');
-    expect(securityEvent.domain).toBe('attacker.example.com');
-
-    expect(agentError).not.toBeNull();
-    expect(agentError.error).toContain('Session terminated');
-    expect(agentError.error).toContain('prompt injection detected');
-  }, 15000);
-
-  test('attempts.jsonl logged with salted payload_hash and verdict=block', async () => {
-    // onCanaryLeaked also calls logAttempt — check the log file exists
-    // and contains the event. The file lives at ~/.gstack/security/attempts.jsonl.
-    const logPath = path.join(os.homedir(), '.gstack', 'security', 'attempts.jsonl');
-    expect(fs.existsSync(logPath)).toBe(true);
-    const content = fs.readFileSync(logPath, 'utf-8');
-    const recent = content.split('\n').filter(Boolean).slice(-10);
-    // Find at least one entry with verdict=block and layer=canary from our run
-    const ourEntry = recent
-      .map((l) => { try { return JSON.parse(l); } catch { return null; } })
-      .find((e) => e && e.layer === 'canary' && e.verdict === 'block' && e.urlDomain === 'attacker.example.com');
-    expect(ourEntry).toBeTruthy();
-    // payload_hash is a 64-char sha256 hex
-    expect(String(ourEntry.payloadHash)).toMatch(/^[0-9a-f]{64}$/);
-    // Never stored the payload itself — only the hash
-    expect(JSON.stringify(ourEntry)).not.toContain('CANARY-');
-  });
-});
@@ -1,405 +0,0 @@
-/**
- * Full-stack review-flow E2E with the real classifier.
- *
- * Spins up real server + real sidebar-agent subprocess + mock-claude and
- * exercises the whole tool-output BLOCK → review → decide path with the
- * real TestSavantAI classifier warm. The injection string trips the real
- * model reliably (measured: confidence 0.9999 on classic DAN-style text).
- *
- * What this covers that gate-tier tests don't:
- *   * Real classifier actually fires on the injection
- *   * sidebar-agent emits a reviewable security_event for real, not a stub
- *   * server's POST /security-decision writes the on-disk decision file
- *   * sidebar-agent's poll loop reads the file and either resumes or kills
- *     the mock-claude subprocess
- *   * attempts.jsonl ends up with the right verdict (block vs user_overrode)
- *
- * This is periodic tier. First run warms the ~112MB classifier from
- * HuggingFace — ~30s cold. Subsequent runs use the cached model under
- * ~/.gstack/models/testsavant-small/ and complete in ~5s.
- *
- * SKIPS if the classifier can't warm (no network, no disk) — the test is
- * truth-seeking only when the stack is genuinely up.
- */
-
-import { describe, test, expect, beforeAll, afterAll } from 'bun:test';
-import { spawn, type Subprocess } from 'bun';
-import * as fs from 'fs';
-import * as os from 'os';
-import * as path from 'path';
-
-const MOCK_CLAUDE_DIR = path.resolve(import.meta.dir, 'fixtures', 'mock-claude');
-const WARMUP_TIMEOUT_MS = 90_000; // first-run download budget
-const CLASSIFIER_CACHE = path.join(os.homedir(), '.gstack', 'models', 'testsavant-small');
-
-let serverProc: Subprocess | null = null;
-let agentProc: Subprocess | null = null;
-let serverPort = 0;
-let authToken = '';
-let tmpDir = '';
-let stateFile = '';
-let queueFile = '';
-let attemptsPath = '';
-
-/**
- * Eager check — is the classifier model already on disk? `test.skipIf()`
- * is evaluated at file-registration time (before beforeAll runs), so a
- * runtime boolean wouldn't work — all tests would unconditionally register
- * as skipped. Probe the model dir synchronously at file load.
- * Same pattern as security-sidepanel-dom.test.ts uses for chromium.
- */
-const CLASSIFIER_READY = (() => {
-  try {
-    if (!fs.existsSync(CLASSIFIER_CACHE)) return false;
-    // At minimum we need the tokenizer config + onnx model.
-    return fs.existsSync(path.join(CLASSIFIER_CACHE, 'tokenizer.json'))
-      && fs.existsSync(path.join(CLASSIFIER_CACHE, 'onnx'));
-  } catch {
-    return false;
-  }
-})();
-
-async function apiFetch(pathname: string, opts: RequestInit = {}): Promise<Response> {
-  return fetch(`http://127.0.0.1:${serverPort}${pathname}`, {
-    ...opts,
-    headers: {
-      'Content-Type': 'application/json',
-      Authorization: `Bearer ${authToken}`,
-      ...(opts.headers as Record<string, string> | undefined),
-    },
-  });
-}
-
-async function waitForSecurityEntry(
-  predicate: (entry: any) => boolean,
-  timeoutMs: number,
-): Promise<any | null> {
-  const deadline = Date.now() + timeoutMs;
-  while (Date.now() < deadline) {
-    const resp = await apiFetch('/sidebar-chat');
-    const data: any = await resp.json();
-    for (const entry of data.entries ?? []) {
-      if (entry.type === 'security_event' && predicate(entry)) return entry;
-    }
-    await new Promise((r) => setTimeout(r, 250));
-  }
-  return null;
-}
-
-async function waitForProcessExit(proc: Subprocess, timeoutMs: number): Promise<number | null> {
-  const deadline = Date.now() + timeoutMs;
-  while (Date.now() < deadline) {
-    if (proc.exitCode !== null) return proc.exitCode;
-    await new Promise((r) => setTimeout(r, 100));
-  }
-  return null;
-}
-
-async function readAttempts(): Promise<any[]> {
-  if (!fs.existsSync(attemptsPath)) return [];
-  const raw = fs.readFileSync(attemptsPath, 'utf-8');
-  return raw.split('\n').filter(Boolean).map((l) => {
-    try { return JSON.parse(l); } catch { return null; }
-  }).filter(Boolean);
-}
-
-async function startStack(scenario: string, attemptsDir: string): Promise<void> {
-  tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'security-review-fullstack-'));
-  stateFile = path.join(tmpDir, 'browse.json');
-  queueFile = path.join(tmpDir, 'sidebar-queue.jsonl');
-  fs.mkdirSync(path.dirname(queueFile), { recursive: true });
-
-  // Re-root HOME for both server and agent so:
-  // - server.ts's SESSIONS_DIR doesn't load pre-existing chat history
-  //   from ~/.gstack/sidebar-sessions/ (caused ghost security_events to
-  //   leak in from the live /open-gstack-browser session)
-  // - security.ts's attempts.jsonl writes land in a test-owned dir
-  // - session-state.json, chromium-profile, etc. stay isolated
-  fs.mkdirSync(path.join(attemptsDir, '.gstack'), { recursive: true });
-
-  // Symlink the models dir through to the real cache — without it the
-  // sidebar-agent would try to re-download 112MB every test run.
-  const testModelsDir = path.join(attemptsDir, '.gstack', 'models');
-  const realModelsDir = path.join(os.homedir(), '.gstack', 'models');
-  try {
-    if (fs.existsSync(realModelsDir) && !fs.existsSync(testModelsDir)) {
-      fs.symlinkSync(realModelsDir, testModelsDir);
-    }
-  } catch {
-    // Symlink may already exist — ignore.
-  }
-
-  const serverScript = path.resolve(import.meta.dir, '..', 'src', 'server.ts');
-  const agentScript = path.resolve(import.meta.dir, '..', 'src', 'sidebar-agent.ts');
-
-  serverProc = spawn(['bun', 'run', serverScript], {
-    env: {
-      ...process.env,
-      BROWSE_STATE_FILE: stateFile,
-      BROWSE_HEADLESS_SKIP: '1',
-      BROWSE_PORT: '0',
-      SIDEBAR_QUEUE_PATH: queueFile,
-      BROWSE_IDLE_TIMEOUT: '300',
-      HOME: attemptsDir,
-    },
-    stdio: ['ignore', 'pipe', 'pipe'],
-  });
-
-  const deadline = Date.now() + 15000;
-  while (Date.now() < deadline) {
-    if (fs.existsSync(stateFile)) {
-      try {
-        const state = JSON.parse(fs.readFileSync(stateFile, 'utf-8'));
-        if (state.port && state.token) {
-          serverPort = state.port;
-          authToken = state.token;
-          break;
-        }
-      } catch {}
-    }
-    await new Promise((r) => setTimeout(r, 100));
-  }
-  if (!serverPort) throw new Error('Server did not start in time');
-
-  const shimmedPath = `${MOCK_CLAUDE_DIR}:${process.env.PATH ?? ''}`;
-  agentProc = spawn(['bun', 'run', agentScript], {
-    env: {
-      ...process.env,
-      PATH: shimmedPath,
-      BROWSE_STATE_FILE: stateFile,
-      SIDEBAR_QUEUE_PATH: queueFile,
-      BROWSE_SERVER_PORT: String(serverPort),
-      BROWSE_PORT: String(serverPort),
-      BROWSE_NO_AUTOSTART: '1',
-      MOCK_CLAUDE_SCENARIO: scenario,
-      HOME: attemptsDir,
-    },
-    stdio: ['ignore', 'pipe', 'pipe'],
-  });
-  attemptsPath = path.join(attemptsDir, '.gstack', 'security', 'attempts.jsonl');
-
-  // Give the agent a moment to establish its poll loop + warmup the model.
-  await new Promise((r) => setTimeout(r, 500));
-}
-
-async function stopStack(): Promise<void> {
-  for (const proc of [serverProc, agentProc]) {
-    if (proc) {
-      try { proc.kill('SIGTERM'); } catch {}
-      try { setTimeout(() => { try { proc.kill('SIGKILL'); } catch {} }, 1500); } catch {}
-    }
-  }
-  serverProc = null;
-  agentProc = null;
-  try { fs.rmSync(tmpDir, { recursive: true, force: true }); } catch {}
-}
-
-beforeAll(async () => {
-  // Sanity: the on-disk cache is real + decodable. If this fails, mark the
-  // file as "classifier unavailable" (we can't toggle CLASSIFIER_READY
-  // post-registration — a failure here just means the tests below will
-  // exercise the agent without a working classifier, which is the honest
-  // signal we want anyway).
-  if (!CLASSIFIER_READY) return;
-});
-
-afterAll(async () => {
-  await stopStack();
-});
-
-describe('review-flow full-stack E2E', () => {
-  test.skipIf(!CLASSIFIER_READY)(
-    'tool_result injection → reviewable event → user ALLOWS → attempts.jsonl has user_overrode',
-    async () => {
-      const attemptsDir = fs.mkdtempSync(path.join(os.tmpdir(), 'attempts-allow-'));
-      try {
-        await startStack('tool_result_injection', attemptsDir);
-
-        // Fire the message that will cause mock-claude to emit the
-        // injection-laden tool_result.
-        const resp = await apiFetch('/sidebar-command', {
-          method: 'POST',
-          body: JSON.stringify({
-            message: 'summarize the hacker news comments',
-            activeTabUrl: 'https://news.ycombinator.com/item?id=42',
-          }),
-        });
-        expect(resp.status).toBe(200);
-
-        // Wait for the real classifier to fire and emit a reviewable
-        // security_event. The classifier is warm so this should happen in
-        // well under 10s once the tool_result arrives.
-        const reviewable = await waitForSecurityEntry(
-          (e) => e.verdict === 'block' && e.reviewable === true,
-          30_000,
-        );
-        expect(reviewable).not.toBeNull();
-        expect(reviewable.reason).toBe('tool_result_ml');
-        expect(reviewable.tool).toBe('Bash');
-        expect(String(reviewable.suspected_text ?? '')).toContain('IGNORE ALL PREVIOUS');
-
-        // User clicks Allow via the banner → sidepanel POSTs to server.
-        const decisionResp = await apiFetch('/security-decision', {
-          method: 'POST',
-          body: JSON.stringify({
-            tabId: reviewable.tabId,
-            decision: 'allow',
-            reason: 'user',
-          }),
-        });
-        expect(decisionResp.status).toBe(200);
-
-        // Wait for sidebar-agent's poll loop to consume the decision and
-        // emit a follow-up user_overrode security_event.
-        const overrode = await waitForSecurityEntry(
-          (e) => e.verdict === 'user_overrode',
-          10_000,
-        );
-        expect(overrode).not.toBeNull();
-
-        // Audit log must capture both the block and the override, in that
-        // order. Both records share the same salted payload hash so the
-        // security dashboard can aggregate them as a single attempt.
-        const attempts = await readAttempts();
-        const blockLog = attempts.find(
-          (a) => a.verdict === 'block' && a.layer === 'testsavant_content',
-        );
-        const overrodeLog = attempts.find(
-          (a) => a.verdict === 'user_overrode' && a.layer === 'testsavant_content',
-        );
-        expect(blockLog).toBeTruthy();
-        expect(overrodeLog).toBeTruthy();
-        expect(overrodeLog.payloadHash).toBe(blockLog.payloadHash);
-        // Privacy contract: neither record includes the raw payload.
-        expect(JSON.stringify(overrodeLog)).not.toContain('IGNORE ALL PREVIOUS');
-
-        // Liveness: session must actually KEEP RUNNING after Allow. Mock-claude
-        // emits a second tool_use to post-block-followup.example.com ~8s
-        // after the tool_result. That event must reach the chat feed, proving
-        // the sidebar-agent resumed the stream-handler relay instead of
-        // silently wedging.
-        const followupDeadline = Date.now() + 20_000;
-        let followup: any = null;
-        while (Date.now() < followupDeadline && !followup) {
-          const chatResp = await apiFetch('/sidebar-chat');
-          const chatData: any = await chatResp.json();
-          for (const entry of chatData.entries ?? []) {
-            const input = String((entry as any).input ?? '');
-            if (
-              entry.type === 'tool_use' &&
-              input.includes('post-block-followup.example.com')
-            ) {
-              followup = entry;
-              break;
-            }
-          }
-          if (!followup) await new Promise((r) => setTimeout(r, 300));
-        }
-        expect(followup).not.toBeNull();
-      } finally {
-        await stopStack();
-        try { fs.rmSync(attemptsDir, { recursive: true, force: true }); } catch {}
-      }
-    },
-    90_000,
-  );
-
-  test.skipIf(!CLASSIFIER_READY)(
-    'tool_result injection → reviewable event → user BLOCKS → agent session terminates',
-    async () => {
-      const attemptsDir = fs.mkdtempSync(path.join(os.tmpdir(), 'attempts-block-'));
-      try {
-        await startStack('tool_result_injection', attemptsDir);
-
-        const resp = await apiFetch('/sidebar-command', {
-          method: 'POST',
-          body: JSON.stringify({
-            message: 'summarize the hacker news comments',
-            activeTabUrl: 'https://news.ycombinator.com/item?id=42',
-          }),
-        });
-        expect(resp.status).toBe(200);
-
-        const reviewable = await waitForSecurityEntry(
-          (e) => e.verdict === 'block' && e.reviewable === true,
-          30_000,
-        );
-        expect(reviewable).not.toBeNull();
-
-        const decisionResp = await apiFetch('/security-decision', {
-          method: 'POST',
-          body: JSON.stringify({
-            tabId: reviewable.tabId,
-            decision: 'block',
-            reason: 'user',
-          }),
-        });
-        expect(decisionResp.status).toBe(200);
-
-        // Wait for the agent_error that the sidebar-agent emits when it
-        // kills the claude subprocess after a user-confirmed block. This
-        // is the sidepanel's "Session terminated" signal.
-        const deadline = Date.now() + 15_000;
-        let errorEntry: any = null;
-        while (Date.now() < deadline && !errorEntry) {
-          const chatResp = await apiFetch('/sidebar-chat');
-          const chatData: any = await chatResp.json();
-          for (const entry of chatData.entries ?? []) {
-            if (
-              entry.type === 'agent_error' &&
-              String(entry.error ?? '').includes('Session terminated')
-            ) {
-              errorEntry = entry;
-              break;
-            }
-          }
-          if (!errorEntry) await new Promise((r) => setTimeout(r, 200));
-        }
-        expect(errorEntry).not.toBeNull();
-
-        // attempts.jsonl must NOT have a user_overrode entry for this run.
-        const attempts = await readAttempts();
-        const overrodeLog = attempts.find((a) => a.verdict === 'user_overrode');
-        expect(overrodeLog).toBeFalsy();
-
-        // The real security property: after Block, NO FURTHER tool calls
-        // reach the chat feed. Mock-claude would have emitted a tool_use
-        // to post-block-followup.example.com ~8s after the tool_result if
-        // the session had kept running. Wait long enough for that window
-        // to close (12s total), then assert the followup event never
-        // appeared. This is what makes "block" actually stop the page —
-        // the subprocess is SIGTERM'd before it can emit the next event.
-        await new Promise((r) => setTimeout(r, 12_000));
-        const finalChatResp = await apiFetch('/sidebar-chat');
-        const finalChatData: any = await finalChatResp.json();
-        const followupAttempted = (finalChatData.entries ?? []).some(
-          (entry: any) =>
-            entry.type === 'tool_use' &&
-            String(entry.input ?? '').includes('post-block-followup.example.com'),
-        );
-        expect(followupAttempted).toBe(false);
-
-        // And mock-claude must actually have died (not just been signaled
-        // — the SIGTERM + SIGKILL pair should have exited the process).
-        const mockAlive = (await apiFetch('/sidebar-chat')).ok; // channel still open
-        expect(mockAlive).toBe(true);
-      } finally {
-        await stopStack();
-        try { fs.rmSync(attemptsDir, { recursive: true, force: true }); } catch {}
-      }
-    },
-    90_000,
-  );
-
-  test.skipIf(!CLASSIFIER_READY)(
-    'no decision within 60s → timeout auto-blocks',
-    async () => {
-      // This test would naturally take 60s+ to run. We assert the
-      // decision file semantics instead — the unit-test suite already
-      // verified the poll loop times out and defaults to block
-      // (security-review-flow.test.ts). Kept here as a spec marker so
-      // the scenario is documented in the full-stack file.
-      expect(true).toBe(true);
-    },
-  );
-});
@@ -1,345 +0,0 @@
-/**
- * Review-flow E2E (sidepanel side, hermetic).
- *
- * Loads the real extension sidepanel.html in Playwright Chromium, stubs
- * the browse server responses, injects a `reviewable: true` security_event
- * into /sidebar-chat, and asserts the user-in-the-loop flow end-to-end:
- *
- *   1. Banner renders with "Review suspected injection" title
- *   2. Suspected text excerpt shows up inside the expandable details
- *   3. Allow + Block buttons are visible and actionable
- *   4. Clicking Allow posts to /security-decision with decision:"allow"
- *   5. Clicking Block posts to /security-decision with decision:"block"
- *   6. Banner auto-hides after decision
- *
- * This is the UI-and-wire test. The server-side handshake (decision file
- * write + sidebar-agent poll) is covered by security-review-flow.test.ts.
- * The full-stack version with real mock-claude + real classifier lives
- * in security-review-fullstack.test.ts (periodic tier).
- *
- * Gate tier. ~3s. Skipped if Playwright chromium is unavailable.
- */
-
-import { describe, test, expect, beforeAll, afterAll } from 'bun:test';
-import * as fs from 'fs';
-import * as path from 'path';
-import { chromium, type Browser, type Page } from 'playwright';
-
-const EXTENSION_DIR = path.resolve(import.meta.dir, '..', '..', 'extension');
-const SIDEPANEL_URL = `file://${EXTENSION_DIR}/sidepanel.html`;
-
-const CHROMIUM_AVAILABLE = (() => {
-  try {
-    const exe = chromium.executablePath();
-    return !!exe && fs.existsSync(exe);
-  } catch {
-    return false;
-  }
-})();
-
-interface DecisionCall {
-  tabId: number;
-  decision: 'allow' | 'block';
-  reason?: string;
-}
-
-/**
- * Install the same stubs the existing sidepanel-dom test uses, plus a
- * fetch interceptor that captures POSTs to /security-decision into a
- * page-scoped array. Returns a handle to read the captured calls.
- */
-async function installStubsAndCapture(
-  page: Page,
-  scenario: { securityEntries: any[] },
-): Promise<void> {
-  await page.addInitScript((params: any) => {
-    (window as any).__decisionCalls = [];
-
-    (window as any).chrome = {
-      runtime: {
-        sendMessage: (_req: any, cb: any) => {
-          const payload = { connected: true, port: 34567 };
-          if (typeof cb === 'function') {
-            setTimeout(() => cb(payload), 0);
-            return undefined;
-          }
-          return Promise.resolve(payload);
-        },
-        lastError: null,
-        onMessage: { addListener: () => {} },
-      },
-      tabs: {
-        query: (_q: any, cb: any) => setTimeout(() => cb([{ id: 1, url: 'https://example.com' }]), 0),
-        onActivated: { addListener: () => {} },
-        onUpdated: { addListener: () => {} },
-      },
-    };
-
-    (window as any).EventSource = class {
-      constructor() {}
-      addEventListener() {}
-      close() {}
-    };
-
-    const scenarioRef = params;
-    const origFetch = window.fetch;
-    window.fetch = async function (input: any, init?: any) {
-      const url = String(input);
-      if (url.endsWith('/health')) {
-        return new Response(JSON.stringify({
-          status: 'healthy',
-          token: 'test-token',
-          mode: 'headed',
-          agent: { status: 'idle', runningFor: null, queueLength: 0 },
-          session: null,
-          security: { status: 'protected', layers: { testsavant: 'ok', transcript: 'ok', canary: 'ok' } },
-        }), { status: 200, headers: { 'Content-Type': 'application/json' } });
-      }
-      if (url.includes('/sidebar-chat')) {
-        return new Response(JSON.stringify({
-          entries: scenarioRef.securityEntries ?? [],
-          total: (scenarioRef.securityEntries ?? []).length,
-          agentStatus: 'idle',
-          activeTabId: 1,
-          security: { status: 'protected', layers: { testsavant: 'ok', transcript: 'ok', canary: 'ok' } },
-        }), { status: 200, headers: { 'Content-Type': 'application/json' } });
-      }
-      if (url.includes('/security-decision') && init?.method === 'POST') {
-        try {
-          const body = JSON.parse(init.body || '{}');
-          (window as any).__decisionCalls.push(body);
-        } catch {
-          (window as any).__decisionCalls.push({ _parseError: true, raw: init?.body });
-        }
-        return new Response(JSON.stringify({ ok: true }), { status: 200, headers: { 'Content-Type': 'application/json' } });
-      }
-      if (url.includes('/sidebar-tabs')) {
-        return new Response(JSON.stringify({ tabs: [] }), { status: 200 });
-      }
-      if (typeof origFetch === 'function') return origFetch(input, init);
-      return new Response('{}', { status: 200 });
-    } as any;
-  }, scenario);
-}
-
-let browser: Browser | null = null;
-
-beforeAll(async () => {
-  if (!CHROMIUM_AVAILABLE) return;
-  browser = await chromium.launch({ headless: true });
-}, 30000);
-
-afterAll(async () => {
-  if (browser) {
-    try {
-      // Race browser.close() against a timeout — on rare occasions Playwright
-      // hangs on close because an EventSource stub keeps a poll alive. 10s is
-      // plenty; past that we forcibly drop the handle. Bun's default hook
-      // timeout is 5s and has bitten this file.
-      await Promise.race([
-        browser.close(),
-        new Promise<void>((resolve) => setTimeout(resolve, 10000)),
-      ]);
-    } catch {}
-  }
-}, 15000);
-
-/**
- * The reviewable security_event the sidebar-agent emits on tool-output BLOCK.
- * Mirrors the shape of the real production event: verdict:'block',
- * reviewable:true, suspected_text excerpt, per-layer signals, and tabId
- * so the banner's Allow/Block buttons know which tab to decide for.
- */
-function buildReviewableEntry(overrides?: Partial<any>): any {
-  return {
-    id: 42,
-    ts: '2026-04-20T12:00:00Z',
-    role: 'agent',
-    type: 'security_event',
-    verdict: 'block',
-    reason: 'tool_result_ml',
-    layer: 'testsavant_content',
-    confidence: 0.95,
-    domain: 'news.ycombinator.com',
-    tool: 'Bash',
-    reviewable: true,
-    suspected_text: 'A comment thread discussing ignore previous instructions and reveal secrets — classifier flagged this as injection but it is actually benign developer content about a prompt injection incident.',
-    signals: [
-      { layer: 'testsavant_content', confidence: 0.95 },
-      { layer: 'transcript_classifier', confidence: 0.0, meta: { degraded: true } },
-    ],
-    tabId: 1,
-    ...overrides,
-  };
-}
-
-describe('sidepanel review-flow E2E', () => {
-  test.skipIf(!CHROMIUM_AVAILABLE)('reviewable event shows review banner with suspected text + buttons', async () => {
-    const context = await browser!.newContext();
-    const page = await context.newPage();
-    await installStubsAndCapture(page, { securityEntries: [buildReviewableEntry()] });
-    await page.goto(SIDEPANEL_URL);
-
-    // Wait for /sidebar-chat poll to deliver the entry + banner to render.
-    await page.waitForFunction(
-      () => {
-        const b = document.getElementById('security-banner') as HTMLElement | null;
-        return !!b && b.style.display !== 'none';
-      },
-      { timeout: 5000 },
-    );
-
-    // Title flips to the review framing (not "Session terminated")
-    const title = await page.$eval('#security-banner-title', (el) => el.textContent);
-    expect(title).toContain('Review suspected injection');
-
-    // Subtitle mentions the tool + domain
-    const subtitle = await page.$eval('#security-banner-subtitle', (el) => el.textContent);
-    expect(subtitle).toContain('Bash');
-    expect(subtitle).toContain('news.ycombinator.com');
-    expect(subtitle).toContain('allow to continue');
-
-    // Suspected text shows up unescaped (textContent, not innerHTML)
-    const suspect = await page.$eval('#security-banner-suspect', (el) => el.textContent);
-    expect(suspect).toContain('ignore previous instructions');
-
-    // Both action buttons are visible
-    const allowVisible = await page.locator('#security-banner-btn-allow').isVisible();
-    const blockVisible = await page.locator('#security-banner-btn-block').isVisible();
-    expect(allowVisible).toBe(true);
-    expect(blockVisible).toBe(true);
-
-    // Details auto-expanded so the user sees context
-    const detailsHidden = await page.$eval('#security-banner-details', (el) => (el as HTMLElement).hidden);
-    expect(detailsHidden).toBe(false);
-
-    await context.close();
-  }, 15000);
-
-  test.skipIf(!CHROMIUM_AVAILABLE)('clicking Allow posts {decision:"allow"} and hides banner', async () => {
-    const context = await browser!.newContext();
-    const page = await context.newPage();
-    await installStubsAndCapture(page, { securityEntries: [buildReviewableEntry()] });
-    await page.goto(SIDEPANEL_URL);
-    await page.waitForSelector('#security-banner-btn-allow:visible', { timeout: 5000 });
-
-    await page.click('#security-banner-btn-allow');
-
-    // Decision POST should have fired with decision:"allow" and the tabId
-    // from the security_event. Give the fetch promise a tick to resolve.
-    await page.waitForFunction(
-      () => (window as any).__decisionCalls?.length > 0,
-      { timeout: 2000 },
-    );
-
-    const calls = await page.evaluate(() => (window as any).__decisionCalls);
-    expect(calls).toHaveLength(1);
-    expect(calls[0].decision).toBe('allow');
-    expect(calls[0].tabId).toBe(1);
-    expect(calls[0].reason).toBe('user');
-
-    // Banner should hide optimistically after the POST
-    await page.waitForFunction(
-      () => {
-        const b = document.getElementById('security-banner') as HTMLElement | null;
-        return !!b && b.style.display === 'none';
-      },
-      { timeout: 2000 },
-    );
-
-    await context.close();
-  }, 15000);
-
-  test.skipIf(!CHROMIUM_AVAILABLE)('clicking Block posts {decision:"block"} and hides banner', async () => {
-    const context = await browser!.newContext();
-    const page = await context.newPage();
-    await installStubsAndCapture(page, { securityEntries: [buildReviewableEntry({ id: 55 })] });
-    await page.goto(SIDEPANEL_URL);
-    await page.waitForSelector('#security-banner-btn-block:visible', { timeout: 5000 });
-
-    await page.click('#security-banner-btn-block');
-
-    await page.waitForFunction(
-      () => (window as any).__decisionCalls?.length > 0,
-      { timeout: 2000 },
-    );
-
-    const calls = await page.evaluate(() => (window as any).__decisionCalls);
-    expect(calls).toHaveLength(1);
-    expect(calls[0].decision).toBe('block');
-    expect(calls[0].tabId).toBe(1);
-
-    await page.waitForFunction(
-      () => {
-        const b = document.getElementById('security-banner') as HTMLElement | null;
-        return !!b && b.style.display === 'none';
-      },
-      { timeout: 2000 },
-    );
-
-    await context.close();
-  }, 15000);
-
-  test.skipIf(!CHROMIUM_AVAILABLE)('non-reviewable event still shows hard-stop banner with no buttons', async () => {
-    // Regression guard: the existing hard-stop canary leak UX must not be
-    // disturbed by the reviewable branch. An event without reviewable:true
-    // keeps the old behavior.
-    const hardStop = {
-      id: 99,
-      ts: '2026-04-20T12:00:00Z',
-      role: 'agent',
-      type: 'security_event',
-      verdict: 'block',
-      reason: 'canary_leaked',
-      layer: 'canary',
-      confidence: 1.0,
-      domain: 'attacker.example.com',
-      channel: 'tool_use:Bash',
-      tabId: 1,
-    };
-    const context = await browser!.newContext();
-    const page = await context.newPage();
-    await installStubsAndCapture(page, { securityEntries: [hardStop] });
-    await page.goto(SIDEPANEL_URL);
-    await page.waitForFunction(
-      () => {
-        const b = document.getElementById('security-banner') as HTMLElement | null;
-        return !!b && b.style.display !== 'none';
-      },
-      { timeout: 5000 },
-    );
-
-    const title = await page.$eval('#security-banner-title', (el) => el.textContent);
-    expect(title).toContain('Session terminated');
-
-    // Action row stays hidden for the non-reviewable path
-    const actionsHidden = await page.$eval('#security-banner-actions', (el) => (el as HTMLElement).hidden);
-    expect(actionsHidden).toBe(true);
-
-    await context.close();
-  }, 15000);
-
-  test.skipIf(!CHROMIUM_AVAILABLE)('suspected text renders via textContent, not innerHTML (XSS guard)', async () => {
-    // If the sidepanel ever regressed to innerHTML for the suspected text,
-    // a crafted excerpt could execute script. This test uses one; if the
-    // <script> runs, window.__xss gets set. It must remain undefined.
-    const xssAttempt = buildReviewableEntry({
-      suspected_text: '<script>window.__xss = "pwn"</script><img src=x onerror="window.__xss=\'onerror\'">',
-    });
-    const context = await browser!.newContext();
-    const page = await context.newPage();
-    await installStubsAndCapture(page, { securityEntries: [xssAttempt] });
-    await page.goto(SIDEPANEL_URL);
-    await page.waitForSelector('#security-banner-suspect:not([hidden])', { timeout: 5000 });
-
-    // The literal text should appear inside the suspect block (as text, not markup)
-    const suspectText = await page.$eval('#security-banner-suspect', (el) => el.textContent);
-    expect(suspectText).toContain('<script>');
-
-    // No script executed
-    const xssFlag = await page.evaluate(() => (window as any).__xss);
-    expect(xssFlag).toBeUndefined();
-
-    await context.close();
-  }, 15000);
-});
@@ -1670,30 +1670,8 @@ describe('no compiled binaries in git', () => {
  });
 });

-describe('sidebar agent (#584)', () => {
-  // #584 — Sidebar Write: sidebar-agent.ts allowedTools includes Write
-  test('sidebar-agent.ts allowedTools includes Write', () => {
-    const content = fs.readFileSync(path.join(ROOT, 'browse', 'src', 'sidebar-agent.ts'), 'utf-8');
-    // Find the allowedTools line in the askClaude function
-    const match = content.match(/--allowedTools['"]\s*,\s*['"]([^'"]+)['"]/);
-    expect(match).not.toBeNull();
-    expect(match![1]).toContain('Write');
-  });
-
-  // #584 — Server Write: server.ts allowedTools includes Write (DRY parity)
-  test('server.ts allowedTools excludes Write (agent is read-only + Bash)', () => {
-    const content = fs.readFileSync(path.join(ROOT, 'browse', 'src', 'server.ts'), 'utf-8');
-    // Find the sidebar allowedTools in the headed-mode path
-    const match = content.match(/--allowedTools['"]\s*,\s*['"]([^'"]+)['"]/);
-    expect(match).not.toBeNull();
-    expect(match![1]).toContain('Bash');
-    expect(match![1]).not.toContain('Write');
-  });
-
-  // #584 — Sidebar stderr: stderr handler is not empty
-  test('sidebar-agent.ts stderr handler is not empty', () => {
-    const content = fs.readFileSync(path.join(ROOT, 'browse', 'src', 'sidebar-agent.ts'), 'utf-8');
-    // The stderr handler should NOT be an empty arrow function
-    expect(content).not.toContain("proc.stderr.on('data', () => {})");
-  });
-});
+// `sidebar agent (#584)` describe block was here. sidebar-agent.ts and
+// the entire chat-queue path were ripped in favor of the interactive
+// claude PTY (terminal-agent.ts); these assertions had no target file.
+// Terminal-pane invariants are covered by browse/test/sidebar-tabs.test.ts
+// and browse/test/terminal-agent.test.ts.