From e2adee554aa05756e0f253c848599b093444a250 Mon Sep 17 00:00:00 2001 From: Garry Tan Date: Fri, 24 Apr 2026 00:09:04 -0700 Subject: [PATCH] feat(test): add secret-sink-harness for negative-space leak testing (D21 #5) Runs a subprocess with a seeded secret, captures every channel the subprocess could leak through, and asserts the seed never appears. Built per the D1-eng tightened contract: per-run tmp $HOME, four seed match rules (exact + URL-decoded + first-12-char prefix + base64), fd-level stdout/stderr capture via Bun.spawn, post-mortem walk of every file written under $HOME, separate buckets for telemetry JSONL. Reusable: any future skill that handles secrets can import runWithSecretSink and run positive/negative controls against its own bins. The harness itself is ~180 lines of TS with no external deps beyond Bun + node:fs. Out of scope for v1 (documented as follow-ups): subprocess env dump (portable /proc reading), the user's real shell history (bins don't modify it). Co-Authored-By: Claude Opus 4.7 (1M context) --- test/helpers/secret-sink-harness.ts | 212 ++++++++++++++++++++++++++++ 1 file changed, 212 insertions(+) create mode 100644 test/helpers/secret-sink-harness.ts diff --git a/test/helpers/secret-sink-harness.ts b/test/helpers/secret-sink-harness.ts new file mode 100644 index 00000000..d97ffd91 --- /dev/null +++ b/test/helpers/secret-sink-harness.ts @@ -0,0 +1,212 @@ +/** + * Secret-sink test harness (D21 #5, D1-eng contract). + * + * Runs a bin with a seeded secret, captures every channel the bin could + * leak through, and asserts that the seed never appears. Used by Slice 6 + * tests and available for future skills that handle secrets. + * + * Channels covered: + * - stdout (Bun.spawn pipe) + * - stderr (Bun.spawn pipe) + * - files written under a per-run $HOME (walked post-mortem) + * - telemetry JSONL under $HOME/.gstack/analytics/ (same walk, but called + * out separately for clearer test failures) + * + * Match rules (any hit = leak): + * - exact substring + * - URL-decoded substring (catches percent-encoded leaks) + * - first-12-char prefix (catches "we logged just a portion") + * - base64 encoding of the seed (catches auth-header leakage) + * + * Intentionally NOT covered in v1: + * - subprocess environment dump (portable /proc reading is non-trivial; + * bins rarely leak env without also writing to stdout/stderr) + * - the user's real shell history (bins don't modify it; the user's + * shell does) + * Those are documented as follow-ups in the D21 eng review commentary. + * + * Positive-control discipline: every test suite using this harness should + * include one test that deliberately leaks a seed and asserts the harness + * catches it. A harness that silently under-reports is worse than no + * harness. + */ + +import * as fs from 'fs'; +import * as path from 'path'; +import * as os from 'os'; + +export interface SecretSinkOptions { + bin: string; + args: string[]; + /** Seeds whose presence in any captured channel = failure. */ + seeds: string[]; + env?: Record; + stdin?: string; + /** Override the tmp $HOME. Default: fresh mkdtemp under os.tmpdir(). */ + tmpHome?: string; + /** Cap on subprocess runtime, ms. Default 10_000. */ + timeoutMs?: number; +} + +export interface Leak { + channel: 'stdout' | 'stderr' | 'file' | 'telemetry'; + matchType: 'exact' | 'url-decoded' | 'prefix-12' | 'base64'; + /** For channel=file|telemetry: the path relative to tmpHome. */ + where?: string; + /** Short excerpt around the match (for debugging). */ + excerpt: string; +} + +export interface SinkResult { + stdout: string; + stderr: string; + status: number; + /** All files written under tmpHome during the run, keyed by relative path. */ + filesWritten: Record; + /** Subset of filesWritten matching .gstack/analytics/*.jsonl. */ + telemetry: Record; + /** Leaks discovered. Empty = clean. */ + leaks: Leak[]; + /** Where HOME was pointed during the run (for post-mortem inspection). */ + tmpHome: string; +} + +export async function runWithSecretSink(opts: SecretSinkOptions): Promise { + const tmpHome = opts.tmpHome ?? fs.mkdtempSync(path.join(os.tmpdir(), 'sink-')); + // Make sure .gstack exists so bins that append to analytics have somewhere to write. + fs.mkdirSync(path.join(tmpHome, '.gstack', 'analytics'), { recursive: true }); + + const env = { + // Minimal PATH that still finds jq/git/curl/sed so our bins work. + PATH: '/usr/bin:/bin:/usr/sbin:/sbin:/opt/homebrew/bin:/usr/local/bin', + HOME: tmpHome, + GSTACK_HOME: path.join(tmpHome, '.gstack'), + ...(opts.env || {}), + }; + + const proc = Bun.spawn([opts.bin, ...opts.args], { + env, + stdout: 'pipe', + stderr: 'pipe', + stdin: opts.stdin ? 'pipe' : 'ignore', + }); + if (opts.stdin) { + proc.stdin!.write(opts.stdin); + proc.stdin!.end(); + } + + const timeoutMs = opts.timeoutMs ?? 10_000; + const timeoutHandle = setTimeout(() => { + try { proc.kill(); } catch { /* already done */ } + }, timeoutMs); + + const [stdout, stderr, status] = await Promise.all([ + new Response(proc.stdout).text(), + new Response(proc.stderr).text(), + proc.exited, + ]); + clearTimeout(timeoutHandle); + + // Walk tmpHome and read all files (skip binaries / very large files). + const filesWritten: Record = {}; + const telemetry: Record = {}; + walk(tmpHome, tmpHome, filesWritten); + for (const [rel, content] of Object.entries(filesWritten)) { + if (rel.startsWith('.gstack/analytics/') && rel.endsWith('.jsonl')) { + telemetry[rel] = content; + } + } + + // Scan every channel for every seed with every match rule. + const leaks: Leak[] = []; + for (const seed of opts.seeds) { + if (!seed) continue; + const rules = buildMatchRules(seed); + for (const { rule, matchType } of rules) { + const stdoutHit = findHit(stdout, rule); + if (stdoutHit !== null) { + leaks.push({ channel: 'stdout', matchType, excerpt: excerptAt(stdout, stdoutHit) }); + } + const stderrHit = findHit(stderr, rule); + if (stderrHit !== null) { + leaks.push({ channel: 'stderr', matchType, excerpt: excerptAt(stderr, stderrHit) }); + } + for (const [rel, content] of Object.entries(filesWritten)) { + const hit = findHit(content, rule); + if (hit !== null) { + const channel = rel.startsWith('.gstack/analytics/') ? 'telemetry' : 'file'; + leaks.push({ channel, matchType, where: rel, excerpt: excerptAt(content, hit) }); + } + } + } + } + + return { stdout, stderr, status, filesWritten, telemetry, leaks, tmpHome }; +} + +function walk(root: string, dir: string, out: Record) { + for (const entry of fs.readdirSync(dir)) { + const full = path.join(dir, entry); + let stat; + try { + stat = fs.lstatSync(full); + } catch { + continue; + } + if (stat.isSymbolicLink()) continue; + if (stat.isDirectory()) { + walk(root, full, out); + continue; + } + if (!stat.isFile()) continue; + if (stat.size > 1024 * 1024) continue; // skip huge files, unlikely to be secrets + const rel = path.relative(root, full); + try { + out[rel] = fs.readFileSync(full, 'utf-8'); + } catch { + // binary or unreadable — skip + } + } +} + +function buildMatchRules(seed: string): Array<{ rule: string; matchType: Leak['matchType'] }> { + const rules: Array<{ rule: string; matchType: Leak['matchType'] }> = []; + rules.push({ rule: seed, matchType: 'exact' }); + + // URL-decoded form — catches cases where the seed got percent-encoded + // (e.g., a password with a '@' embedded in a connection string). + try { + const decoded = decodeURIComponent(seed); + if (decoded !== seed) rules.push({ rule: decoded, matchType: 'url-decoded' }); + } catch { + // malformed %-encoding in the seed itself; ignore + } + + // First-12-char prefix — catches partial leaks like "we logged the + // first 10 chars for debugging." Only applied to seeds >= 16 chars, + // since shorter seeds would false-positive against normal words. + if (seed.length >= 16) { + rules.push({ rule: seed.slice(0, 12), matchType: 'prefix-12' }); + } + + // Base64 encoding — catches leaks through auth headers or config files + // that encode the seed. Only for seeds >= 12 chars to reduce false + // positives from short strings that happen to be valid base64. + if (seed.length >= 12) { + rules.push({ rule: Buffer.from(seed).toString('base64'), matchType: 'base64' }); + } + + return rules; +} + +function findHit(haystack: string, needle: string): number | null { + if (!needle) return null; + const idx = haystack.indexOf(needle); + return idx === -1 ? null : idx; +} + +function excerptAt(s: string, idx: number): string { + const start = Math.max(0, idx - 20); + const end = Math.min(s.length, idx + 40); + return s.slice(start, end).replace(/\n/g, '\\n'); +}