mirror of
https://github.com/garrytan/gstack.git
synced 2026-05-02 03:35:09 +02:00
test(security): source-level contracts for the security wiring
15 tests covering the non-ML wiring that unit + e2e tests didn't exercise directly: channel-coverage set for detectCanaryLeak, SCANNED_TOOLS membership, processAgentEvent security_event relay, spawnClaude canary lifecycle, and askClaude pre-spawn/tool-result hooks. Generated by /ship coverage audit — 87% weighted coverage. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,129 @@
|
||||
/**
|
||||
* Source-level contract tests for security code paths that are not exported
|
||||
* and therefore not reachable from unit tests. Follows the same convention
|
||||
* as sidebar-security.test.ts — asserts specific invariants by grep'ing the
|
||||
* source tree.
|
||||
*
|
||||
* These tests fail fast if a future refactor silently drops:
|
||||
* * A canary-leak check on one of the known outbound channels
|
||||
* * The SCANNED_TOOLS set for post-tool-result ML scans
|
||||
* * The security_event relay in server.ts processAgentEvent
|
||||
* * The canary field on the queue entry (server → sidebar-agent)
|
||||
*/
|
||||
|
||||
import { describe, test, expect } from 'bun:test';
|
||||
import * as fs from 'fs';
|
||||
import * as path from 'path';
|
||||
|
||||
const AGENT_SRC = fs.readFileSync(
|
||||
path.join(import.meta.dir, '../src/sidebar-agent.ts'),
|
||||
'utf-8',
|
||||
);
|
||||
const SERVER_SRC = fs.readFileSync(
|
||||
path.join(import.meta.dir, '../src/server.ts'),
|
||||
'utf-8',
|
||||
);
|
||||
|
||||
describe('detectCanaryLeak — channel coverage (source)', () => {
|
||||
test('covers assistant_text channel', () => {
|
||||
expect(AGENT_SRC).toContain("'assistant_text'");
|
||||
});
|
||||
|
||||
test('covers tool_use arguments via checkCanaryInStructure', () => {
|
||||
expect(AGENT_SRC).toMatch(/checkCanaryInStructure\(block\.input, canary\)/);
|
||||
expect(AGENT_SRC).toMatch(/checkCanaryInStructure\(event\.content_block\.input, canary\)/);
|
||||
});
|
||||
|
||||
test('covers text_delta streaming channel', () => {
|
||||
expect(AGENT_SRC).toContain("'text_delta'");
|
||||
expect(AGENT_SRC).toContain("event.delta?.type === 'text_delta'");
|
||||
});
|
||||
|
||||
test('covers input_json_delta (streaming tool args)', () => {
|
||||
expect(AGENT_SRC).toContain("'tool_input_delta'");
|
||||
expect(AGENT_SRC).toContain("event.delta?.type === 'input_json_delta'");
|
||||
});
|
||||
|
||||
test('covers result channel (final claude event)', () => {
|
||||
expect(AGENT_SRC).toContain("event.type === 'result'");
|
||||
expect(AGENT_SRC).toContain('event.result.includes(canary)');
|
||||
});
|
||||
});
|
||||
|
||||
describe('SCANNED_TOOLS — ML scan coverage for tool outputs', () => {
|
||||
test('Read, Grep, Glob, Bash, WebFetch all included', () => {
|
||||
const match = AGENT_SRC.match(/const SCANNED_TOOLS = new Set\(\[([^\]]+)\]\);/);
|
||||
expect(match).toBeTruthy();
|
||||
const list = match![1];
|
||||
expect(list).toContain("'Read'");
|
||||
expect(list).toContain("'Grep'");
|
||||
expect(list).toContain("'Glob'");
|
||||
expect(list).toContain("'Bash'");
|
||||
expect(list).toContain("'WebFetch'");
|
||||
});
|
||||
|
||||
test('tool-result scanner only fires when text.length >= 32', () => {
|
||||
// Tiny tool outputs (e.g. empty directory listings) should not trigger
|
||||
// the expensive ML path.
|
||||
expect(AGENT_SRC).toMatch(/text\.length >= 32/);
|
||||
});
|
||||
});
|
||||
|
||||
describe('processAgentEvent — security_event relay (server.ts)', () => {
|
||||
test('relays verdict, reason, layer, confidence, domain, channel, tool, signals', () => {
|
||||
// Block: addChatEntry call inside the security_event branch
|
||||
const branch = SERVER_SRC.split("event.type === 'security_event'")[1] ?? '';
|
||||
expect(branch).toContain('addChatEntry');
|
||||
expect(branch).toContain('verdict: event.verdict');
|
||||
expect(branch).toContain('reason: event.reason');
|
||||
expect(branch).toContain('layer: event.layer');
|
||||
expect(branch).toContain('confidence: event.confidence');
|
||||
expect(branch).toContain('domain: event.domain');
|
||||
expect(branch).toContain('channel: event.channel');
|
||||
expect(branch).toContain('signals: event.signals');
|
||||
});
|
||||
});
|
||||
|
||||
describe('spawnClaude — canary lifecycle (server.ts)', () => {
|
||||
test('generates a fresh canary per message', () => {
|
||||
expect(SERVER_SRC).toMatch(/const canary = generateCanary\(\);/);
|
||||
});
|
||||
|
||||
test('injects canary into the system prompt before embedding user message', () => {
|
||||
expect(SERVER_SRC).toMatch(/injectCanary\(systemPrompt, canary\)/);
|
||||
// Order matters: canary-augmented system prompt comes before <user-message>
|
||||
expect(SERVER_SRC).toMatch(/systemPromptWithCanary.*<user-message>/s);
|
||||
});
|
||||
|
||||
test('canary is written into the queue entry for sidebar-agent pickup', () => {
|
||||
// Queue entry JSON includes `canary` field so sidebar-agent can scan
|
||||
// outbound channels for it.
|
||||
expect(SERVER_SRC).toMatch(/canary,.*sidebar-agent/s);
|
||||
});
|
||||
});
|
||||
|
||||
describe('askClaude — pre-spawn + tool-result defense wiring', () => {
|
||||
test('preSpawnSecurityCheck runs BEFORE claude subprocess spawn', () => {
|
||||
// The pre-spawn check must be `await`ed and short-circuit spawning when
|
||||
// it returns true.
|
||||
expect(AGENT_SRC).toMatch(/await preSpawnSecurityCheck\(queueEntry\)/);
|
||||
});
|
||||
|
||||
test('canaryCtx onLeak kills proc with SIGTERM then SIGKILL after 2s', () => {
|
||||
expect(AGENT_SRC).toContain("proc.kill('SIGTERM')");
|
||||
expect(AGENT_SRC).toContain("proc.kill('SIGKILL')");
|
||||
// 2000ms fallback appears near both onLeak and tool-result-block handlers
|
||||
expect(AGENT_SRC).toContain('}, 2000);');
|
||||
});
|
||||
|
||||
test('tool-result scan short-circuits when both content layers below WARN', () => {
|
||||
expect(AGENT_SRC).toMatch(/maxContent < THRESHOLDS\.WARN/);
|
||||
});
|
||||
|
||||
test('onCanaryLeaked fires both security_event and agent_error for legacy clients', () => {
|
||||
const fn = AGENT_SRC.split('async function onCanaryLeaked')[1]?.split('async function ')[0] ?? '';
|
||||
expect(fn).toContain("type: 'security_event'");
|
||||
expect(fn).toContain("type: 'agent_error'");
|
||||
expect(fn).toContain('Session terminated');
|
||||
});
|
||||
});
|
||||
Reference in New Issue
Block a user