diff --git a/browse/test/security-source-contracts.test.ts b/browse/test/security-source-contracts.test.ts new file mode 100644 index 00000000..97d933fc --- /dev/null +++ b/browse/test/security-source-contracts.test.ts @@ -0,0 +1,129 @@ +/** + * Source-level contract tests for security code paths that are not exported + * and therefore not reachable from unit tests. Follows the same convention + * as sidebar-security.test.ts — asserts specific invariants by grep'ing the + * source tree. + * + * These tests fail fast if a future refactor silently drops: + * * A canary-leak check on one of the known outbound channels + * * The SCANNED_TOOLS set for post-tool-result ML scans + * * The security_event relay in server.ts processAgentEvent + * * The canary field on the queue entry (server → sidebar-agent) + */ + +import { describe, test, expect } from 'bun:test'; +import * as fs from 'fs'; +import * as path from 'path'; + +const AGENT_SRC = fs.readFileSync( + path.join(import.meta.dir, '../src/sidebar-agent.ts'), + 'utf-8', +); +const SERVER_SRC = fs.readFileSync( + path.join(import.meta.dir, '../src/server.ts'), + 'utf-8', +); + +describe('detectCanaryLeak — channel coverage (source)', () => { + test('covers assistant_text channel', () => { + expect(AGENT_SRC).toContain("'assistant_text'"); + }); + + test('covers tool_use arguments via checkCanaryInStructure', () => { + expect(AGENT_SRC).toMatch(/checkCanaryInStructure\(block\.input, canary\)/); + expect(AGENT_SRC).toMatch(/checkCanaryInStructure\(event\.content_block\.input, canary\)/); + }); + + test('covers text_delta streaming channel', () => { + expect(AGENT_SRC).toContain("'text_delta'"); + expect(AGENT_SRC).toContain("event.delta?.type === 'text_delta'"); + }); + + test('covers input_json_delta (streaming tool args)', () => { + expect(AGENT_SRC).toContain("'tool_input_delta'"); + expect(AGENT_SRC).toContain("event.delta?.type === 'input_json_delta'"); + }); + + test('covers result channel (final claude event)', () => { + expect(AGENT_SRC).toContain("event.type === 'result'"); + expect(AGENT_SRC).toContain('event.result.includes(canary)'); + }); +}); + +describe('SCANNED_TOOLS — ML scan coverage for tool outputs', () => { + test('Read, Grep, Glob, Bash, WebFetch all included', () => { + const match = AGENT_SRC.match(/const SCANNED_TOOLS = new Set\(\[([^\]]+)\]\);/); + expect(match).toBeTruthy(); + const list = match![1]; + expect(list).toContain("'Read'"); + expect(list).toContain("'Grep'"); + expect(list).toContain("'Glob'"); + expect(list).toContain("'Bash'"); + expect(list).toContain("'WebFetch'"); + }); + + test('tool-result scanner only fires when text.length >= 32', () => { + // Tiny tool outputs (e.g. empty directory listings) should not trigger + // the expensive ML path. + expect(AGENT_SRC).toMatch(/text\.length >= 32/); + }); +}); + +describe('processAgentEvent — security_event relay (server.ts)', () => { + test('relays verdict, reason, layer, confidence, domain, channel, tool, signals', () => { + // Block: addChatEntry call inside the security_event branch + const branch = SERVER_SRC.split("event.type === 'security_event'")[1] ?? ''; + expect(branch).toContain('addChatEntry'); + expect(branch).toContain('verdict: event.verdict'); + expect(branch).toContain('reason: event.reason'); + expect(branch).toContain('layer: event.layer'); + expect(branch).toContain('confidence: event.confidence'); + expect(branch).toContain('domain: event.domain'); + expect(branch).toContain('channel: event.channel'); + expect(branch).toContain('signals: event.signals'); + }); +}); + +describe('spawnClaude — canary lifecycle (server.ts)', () => { + test('generates a fresh canary per message', () => { + expect(SERVER_SRC).toMatch(/const canary = generateCanary\(\);/); + }); + + test('injects canary into the system prompt before embedding user message', () => { + expect(SERVER_SRC).toMatch(/injectCanary\(systemPrompt, canary\)/); + // Order matters: canary-augmented system prompt comes before + expect(SERVER_SRC).toMatch(/systemPromptWithCanary.*/s); + }); + + test('canary is written into the queue entry for sidebar-agent pickup', () => { + // Queue entry JSON includes `canary` field so sidebar-agent can scan + // outbound channels for it. + expect(SERVER_SRC).toMatch(/canary,.*sidebar-agent/s); + }); +}); + +describe('askClaude — pre-spawn + tool-result defense wiring', () => { + test('preSpawnSecurityCheck runs BEFORE claude subprocess spawn', () => { + // The pre-spawn check must be `await`ed and short-circuit spawning when + // it returns true. + expect(AGENT_SRC).toMatch(/await preSpawnSecurityCheck\(queueEntry\)/); + }); + + test('canaryCtx onLeak kills proc with SIGTERM then SIGKILL after 2s', () => { + expect(AGENT_SRC).toContain("proc.kill('SIGTERM')"); + expect(AGENT_SRC).toContain("proc.kill('SIGKILL')"); + // 2000ms fallback appears near both onLeak and tool-result-block handlers + expect(AGENT_SRC).toContain('}, 2000);'); + }); + + test('tool-result scan short-circuits when both content layers below WARN', () => { + expect(AGENT_SRC).toMatch(/maxContent < THRESHOLDS\.WARN/); + }); + + test('onCanaryLeaked fires both security_event and agent_error for legacy clients', () => { + const fn = AGENT_SRC.split('async function onCanaryLeaked')[1]?.split('async function ')[0] ?? ''; + expect(fn).toContain("type: 'security_event'"); + expect(fn).toContain("type: 'agent_error'"); + expect(fn).toContain('Session terminated'); + }); +});