From f4e31ef5d85205d44f542bab4ba45204a6300a25 Mon Sep 17 00:00:00 2001 From: Garry Tan Date: Mon, 20 Apr 2026 20:25:37 +0800 Subject: [PATCH] test(security): review-flow regression tests 16 tests for the file-based handshake: round-trip, clear, permissions, atomic write tmp-file cleanup, excerpt sanitization (truncation, ctrl chars, whitespace collapse), and a simulated poll-loop confirming allow/block/timeout behavior the sidebar-agent relies on. Pins the contract so future refactors can't silently break the allow-path recovery and ship people back into the hard-kill FP pit. Co-Authored-By: Claude Opus 4.7 (1M context) --- browse/test/security-review-flow.test.ts | 194 +++++++++++++++++++++++ 1 file changed, 194 insertions(+) create mode 100644 browse/test/security-review-flow.test.ts diff --git a/browse/test/security-review-flow.test.ts b/browse/test/security-review-flow.test.ts new file mode 100644 index 00000000..a8755499 --- /dev/null +++ b/browse/test/security-review-flow.test.ts @@ -0,0 +1,194 @@ +/** + * Review-on-BLOCK regression tests. + * + * Covers the user-in-the-loop path added to resolve false positives on + * benign developer content (e.g., HN comments discussing a prompt injection + * incident getting flagged as prompt injection). Instead of hard-stopping + * the session on a tool-output BLOCK, the agent emits a reviewable + * security_event and polls for the user's decision via a per-tab file. + * + * These tests pin the file-based handshake and the excerpt sanitization. + */ +import { describe, test, expect, beforeEach, afterEach } from 'bun:test'; +import * as fs from 'fs'; +import * as os from 'os'; +import * as path from 'path'; +import { + writeDecision, + readDecision, + clearDecision, + decisionFileForTab, + excerptForReview, + type Verdict, +} from '../src/security'; + +const ORIG_HOME = process.env.HOME; +let tmpHome = ''; + +beforeEach(() => { + tmpHome = fs.mkdtempSync(path.join(os.tmpdir(), 'sec-review-')); + process.env.HOME = tmpHome; +}); + +afterEach(() => { + process.env.HOME = ORIG_HOME; + try { fs.rmSync(tmpHome, { recursive: true, force: true }); } catch {} +}); + +describe('security decision file handshake', () => { + test('writeDecision + readDecision round-trips', () => { + // SECURITY_DIR is computed at module load time from the original HOME. + // The function writes relative to its own SECURITY_DIR constant, so we + // verify the API shape rather than the exact path. The file lives where + // decisionFileForTab says it does. + const file = decisionFileForTab(42); + expect(file.endsWith('/tab-42.json')).toBe(true); + + // Ensure the directory exists (writeDecision creates it). + writeDecision({ tabId: 42, decision: 'allow', ts: new Date().toISOString(), reason: 'user' }); + const rec = readDecision(42); + expect(rec).not.toBeNull(); + expect(rec?.tabId).toBe(42); + expect(rec?.decision).toBe('allow'); + expect(rec?.reason).toBe('user'); + }); + + test('clearDecision removes the file', () => { + writeDecision({ tabId: 7, decision: 'block', ts: new Date().toISOString() }); + expect(readDecision(7)).not.toBeNull(); + clearDecision(7); + expect(readDecision(7)).toBeNull(); + }); + + test('readDecision returns null for a tab with no decision', () => { + expect(readDecision(99999)).toBeNull(); + }); + + test('writeDecision + readDecision handles both values', () => { + writeDecision({ tabId: 1, decision: 'allow', ts: '2026-04-20T12:00:00Z' }); + writeDecision({ tabId: 2, decision: 'block', ts: '2026-04-20T12:00:01Z' }); + expect(readDecision(1)?.decision).toBe('allow'); + expect(readDecision(2)?.decision).toBe('block'); + }); + + test('atomic write: temp file is cleaned up after rename', () => { + writeDecision({ tabId: 10, decision: 'allow', ts: new Date().toISOString() }); + const file = decisionFileForTab(10); + const dir = path.dirname(file); + const leftover = fs.readdirSync(dir).filter((f) => f.startsWith('tab-10.json.tmp')); + expect(leftover.length).toBe(0); + }); + + test('file perms are 0600 on the decision file', () => { + writeDecision({ tabId: 3, decision: 'allow', ts: new Date().toISOString() }); + const stat = fs.statSync(decisionFileForTab(3)); + // mode & 0o777 = lower 9 bits of permission + const perms = stat.mode & 0o777; + // On some filesystems the sticky/group bits may vary; we assert the + // owner-only pattern. + expect(perms & 0o077).toBe(0); // no group/other read or write + }); +}); + +describe('excerptForReview sanitization', () => { + test('passes short clean text through', () => { + expect(excerptForReview('hello world')).toBe('hello world'); + }); + + test('truncates at the default max with ellipsis', () => { + const long = 'a'.repeat(800); + const out = excerptForReview(long); + expect(out.length).toBe(501); // 500 chars + ellipsis + expect(out.endsWith('…')).toBe(true); + }); + + test('strips control chars that would break the UI', () => { + const input = 'before\x00\x01\x02\x1Fafter'; + expect(excerptForReview(input)).toBe('beforeafter'); + }); + + test('collapses whitespace for compact display', () => { + expect(excerptForReview('foo \n\n\t bar')).toBe('foo bar'); + }); + + test('returns empty string for empty input', () => { + expect(excerptForReview('')).toBe(''); + expect(excerptForReview(null as any)).toBe(''); + }); + + test('custom max parameter', () => { + expect(excerptForReview('abcdefghij', 5)).toBe('abcde…'); + }); +}); + +describe('Verdict type includes user_overrode', () => { + test('user_overrode is a valid Verdict value', () => { + // TypeScript compile-time check that the type accepts the value. + // If 'user_overrode' were removed from the Verdict union, this file + // would fail to type-check. + const v: Verdict = 'user_overrode'; + expect(v).toBe('user_overrode'); + }); +}); + +describe('review-flow smoke — simulated sidebar-agent poll loop', () => { + test('agent-side poll sees user allow decision', async () => { + const tabId = 123; + clearDecision(tabId); + + // Simulate the sidepanel POST happening after a short delay. + setTimeout(() => { + writeDecision({ tabId, decision: 'allow', ts: new Date().toISOString(), reason: 'user' }); + }, 50); + + // Simulate the sidebar-agent poll loop. + const deadline = Date.now() + 2000; + let decision: 'allow' | 'block' | null = null; + while (Date.now() < deadline) { + const rec = readDecision(tabId); + if (rec?.decision) { + decision = rec.decision; + break; + } + await new Promise((r) => setTimeout(r, 20)); + } + expect(decision).toBe('allow'); + }); + + test('agent-side poll sees user block decision', async () => { + const tabId = 456; + clearDecision(tabId); + setTimeout(() => { + writeDecision({ tabId, decision: 'block', ts: new Date().toISOString() }); + }, 50); + + const deadline = Date.now() + 2000; + let decision: 'allow' | 'block' | null = null; + while (Date.now() < deadline) { + const rec = readDecision(tabId); + if (rec?.decision) { + decision = rec.decision; + break; + } + await new Promise((r) => setTimeout(r, 20)); + } + expect(decision).toBe('block'); + }); + + test('poll times out when no decision arrives', async () => { + const tabId = 789; + clearDecision(tabId); + + const deadline = Date.now() + 200; + let decision: 'allow' | 'block' | null = null; + while (Date.now() < deadline) { + const rec = readDecision(tabId); + if (rec?.decision) { + decision = rec.decision; + break; + } + await new Promise((r) => setTimeout(r, 20)); + } + expect(decision).toBeNull(); + }); +});