test(security): source-level contracts for the security wiring

15 tests covering the non-ML wiring that unit + e2e tests didn't exercise directly: channel-coverage set for detectCanaryLeak, SCANNED_TOOLS membership, processAgentEvent security_event relay, spawnClaude canary lifecycle, and askClaude pre-spawn/tool-result hooks. Generated by /ship coverage audit — 87% weighted coverage. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-06-17 23:30:09 +02:00 · 2026-04-20 07:09:52 +08:00
parent ac41d9fffd
commit 9bbfa26597
1 changed files with 129 additions and 0 deletions
@@ -0,0 +1,129 @@
+/**
+ * Source-level contract tests for security code paths that are not exported
+ * and therefore not reachable from unit tests. Follows the same convention
+ * as sidebar-security.test.ts — asserts specific invariants by grep'ing the
+ * source tree.
+ *
+ * These tests fail fast if a future refactor silently drops:
+ *   * A canary-leak check on one of the known outbound channels
+ *   * The SCANNED_TOOLS set for post-tool-result ML scans
+ *   * The security_event relay in server.ts processAgentEvent
+ *   * The canary field on the queue entry (server → sidebar-agent)
+ */
+
+import { describe, test, expect } from 'bun:test';
+import * as fs from 'fs';
+import * as path from 'path';
+
+const AGENT_SRC = fs.readFileSync(
+  path.join(import.meta.dir, '../src/sidebar-agent.ts'),
+  'utf-8',
+);
+const SERVER_SRC = fs.readFileSync(
+  path.join(import.meta.dir, '../src/server.ts'),
+  'utf-8',
+);
+
+describe('detectCanaryLeak — channel coverage (source)', () => {
+  test('covers assistant_text channel', () => {
+    expect(AGENT_SRC).toContain("'assistant_text'");
+  });
+
+  test('covers tool_use arguments via checkCanaryInStructure', () => {
+    expect(AGENT_SRC).toMatch(/checkCanaryInStructure\(block\.input, canary\)/);
+    expect(AGENT_SRC).toMatch(/checkCanaryInStructure\(event\.content_block\.input, canary\)/);
+  });
+
+  test('covers text_delta streaming channel', () => {
+    expect(AGENT_SRC).toContain("'text_delta'");
+    expect(AGENT_SRC).toContain("event.delta?.type === 'text_delta'");
+  });
+
+  test('covers input_json_delta (streaming tool args)', () => {
+    expect(AGENT_SRC).toContain("'tool_input_delta'");
+    expect(AGENT_SRC).toContain("event.delta?.type === 'input_json_delta'");
+  });
+
+  test('covers result channel (final claude event)', () => {
+    expect(AGENT_SRC).toContain("event.type === 'result'");
+    expect(AGENT_SRC).toContain('event.result.includes(canary)');
+  });
+});
+
+describe('SCANNED_TOOLS — ML scan coverage for tool outputs', () => {
+  test('Read, Grep, Glob, Bash, WebFetch all included', () => {
+    const match = AGENT_SRC.match(/const SCANNED_TOOLS = new Set\(\[([^\]]+)\]\);/);
+    expect(match).toBeTruthy();
+    const list = match![1];
+    expect(list).toContain("'Read'");
+    expect(list).toContain("'Grep'");
+    expect(list).toContain("'Glob'");
+    expect(list).toContain("'Bash'");
+    expect(list).toContain("'WebFetch'");
+  });
+
+  test('tool-result scanner only fires when text.length >= 32', () => {
+    // Tiny tool outputs (e.g. empty directory listings) should not trigger
+    // the expensive ML path.
+    expect(AGENT_SRC).toMatch(/text\.length >= 32/);
+  });
+});
+
+describe('processAgentEvent — security_event relay (server.ts)', () => {
+  test('relays verdict, reason, layer, confidence, domain, channel, tool, signals', () => {
+    // Block: addChatEntry call inside the security_event branch
+    const branch = SERVER_SRC.split("event.type === 'security_event'")[1] ?? '';
+    expect(branch).toContain('addChatEntry');
+    expect(branch).toContain('verdict: event.verdict');
+    expect(branch).toContain('reason: event.reason');
+    expect(branch).toContain('layer: event.layer');
+    expect(branch).toContain('confidence: event.confidence');
+    expect(branch).toContain('domain: event.domain');
+    expect(branch).toContain('channel: event.channel');
+    expect(branch).toContain('signals: event.signals');
+  });
+});
+
+describe('spawnClaude — canary lifecycle (server.ts)', () => {
+  test('generates a fresh canary per message', () => {
+    expect(SERVER_SRC).toMatch(/const canary = generateCanary\(\);/);
+  });
+
+  test('injects canary into the system prompt before embedding user message', () => {
+    expect(SERVER_SRC).toMatch(/injectCanary\(systemPrompt, canary\)/);
+    // Order matters: canary-augmented system prompt comes before <user-message>
+    expect(SERVER_SRC).toMatch(/systemPromptWithCanary.*<user-message>/s);
+  });
+
+  test('canary is written into the queue entry for sidebar-agent pickup', () => {
+    // Queue entry JSON includes `canary` field so sidebar-agent can scan
+    // outbound channels for it.
+    expect(SERVER_SRC).toMatch(/canary,.*sidebar-agent/s);
+  });
+});
+
+describe('askClaude — pre-spawn + tool-result defense wiring', () => {
+  test('preSpawnSecurityCheck runs BEFORE claude subprocess spawn', () => {
+    // The pre-spawn check must be `await`ed and short-circuit spawning when
+    // it returns true.
+    expect(AGENT_SRC).toMatch(/await preSpawnSecurityCheck\(queueEntry\)/);
+  });
+
+  test('canaryCtx onLeak kills proc with SIGTERM then SIGKILL after 2s', () => {
+    expect(AGENT_SRC).toContain("proc.kill('SIGTERM')");
+    expect(AGENT_SRC).toContain("proc.kill('SIGKILL')");
+    // 2000ms fallback appears near both onLeak and tool-result-block handlers
+    expect(AGENT_SRC).toContain('}, 2000);');
+  });
+
+  test('tool-result scan short-circuits when both content layers below WARN', () => {
+    expect(AGENT_SRC).toMatch(/maxContent < THRESHOLDS\.WARN/);
+  });
+
+  test('onCanaryLeaked fires both security_event and agent_error for legacy clients', () => {
+    const fn = AGENT_SRC.split('async function onCanaryLeaked')[1]?.split('async function ')[0] ?? '';
+    expect(fn).toContain("type: 'security_event'");
+    expect(fn).toContain("type: 'agent_error'");
+    expect(fn).toContain('Session terminated');
+  });
+});