/** * Integration tests — the defense-in-depth contract. * * Pins the invariant that content-security.ts (L1-L3) and security.ts (L4-L6) * layers coexist and fire INDEPENDENTLY. If someone refactors thinking "the * ML classifier covers this, we can delete the regex layer," these tests * fail and stop the regression. * * This is the lighter version of CEO plan §E5. The full version requires * a live Playwright Page for hidden-element stripping and ARIA regex (those * operate on DOM). Here we test the pure-function cross-module surface: * * content-security.ts datamark + envelope wrap + URL blocklist * * security.ts canary + combineVerdict * * Both modules on the same input produce orthogonal signals */ import { describe, test, expect } from 'bun:test'; import { datamarkContent, wrapUntrustedPageContent, urlBlocklistFilter, runContentFilters, resetSessionMarker, } from '../src/content-security'; import { generateCanary, checkCanaryInStructure, combineVerdict, type LayerSignal, } from '../src/security'; describe('defense-in-depth — layer coexistence', () => { test('canary survives when content is wrapped by content-security envelope', () => { const c = generateCanary(); // Attacker got Claude to echo the canary into tool output text. // content-security wraps that text in an envelope — canary still detectable. const leakedText = `Here's my session token: ${c}`; const wrapped = wrapUntrustedPageContent(leakedText, 'text'); expect(wrapped).toContain(c); expect(checkCanaryInStructure(wrapped, c)).toBe(true); }); test('datamarking does not corrupt canary detection', () => { resetSessionMarker(); const c = generateCanary(); // datamarkContent inserts zero-width watermarks after every 3rd period. // It must not break canary detection on text that contains the canary. const leakedText = `Intro sentence. Middle sentence. Third sentence. Here is the token ${c}. More. More.`; const marked = datamarkContent(leakedText); expect(checkCanaryInStructure(marked, c)).toBe(true); }); test('URL blocklist + canary are orthogonal — both can fire', () => { const c = generateCanary(); // Attack: URL points to a blocklisted exfil domain AND carries the canary. // content-security's urlBlocklistFilter catches the domain. // security.ts's canary check catches the token. // Neither depends on the other. const attackContent = `See https://requestbin.com/?leak=${c} for details`; const blockResult = urlBlocklistFilter(attackContent, 'https://requestbin.com/abc', 'text'); expect(blockResult.safe).toBe(false); expect(blockResult.warnings.length).toBeGreaterThan(0); const canaryHit = checkCanaryInStructure({ content: attackContent }, c); expect(canaryHit).toBe(true); }); test('benign content survives all layers — zero false positives', () => { resetSessionMarker(); const c = generateCanary(); const benign = 'The Pacific Ocean is the largest ocean on Earth. It contains many islands. Marine biodiversity is rich.'; // Datamark doesn't add the canary const marked = datamarkContent(benign); expect(checkCanaryInStructure(marked, c)).toBe(false); // Envelope wrap doesn't add the canary const wrapped = wrapUntrustedPageContent(benign, 'text'); expect(checkCanaryInStructure(wrapped, c)).toBe(false); // URL blocklist returns safe on a benign URL const blockResult = urlBlocklistFilter(benign, 'https://wikipedia.org', 'text'); expect(blockResult.safe).toBe(true); }); test('removing one signal does not zero-out the verdict (defense-in-depth)', () => { // Attack scenario: page has hidden injection + exfil URL + canary leak // across three different layers. Remove any ONE signal, other two still // produce a BLOCK-worthy verdict. const baseSignals: LayerSignal[] = [ // content at 0.95 clears the SOLO_CONTENT_BLOCK threshold (0.92) so // that the "content alone" case below still hits single_layer_high. { layer: 'testsavant_content', confidence: 0.95 }, { layer: 'transcript_classifier', confidence: 0.75, meta: { verdict: 'block' } }, { layer: 'canary', confidence: 1.0 }, ]; // All 3 signals → BLOCK (canary alone does it, ensemble also fires) expect(combineVerdict(baseSignals).verdict).toBe('block'); // Remove canary → BLOCK via ensemble_agreement expect(combineVerdict(baseSignals.slice(0, 2)).verdict).toBe('block'); // Remove transcript → BLOCK via canary still expect( combineVerdict([baseSignals[0], baseSignals[2]]).verdict, ).toBe('block'); // Remove content → BLOCK via canary still expect( combineVerdict([baseSignals[1], baseSignals[2]]).verdict, ).toBe('block'); // Remove canary AND transcript → only content WARN (single_layer_high // — but content is 0.88 which is just above BLOCK threshold 0.85) const contentOnly = combineVerdict([baseSignals[0]]); expect(contentOnly.verdict).toBe('warn'); expect(contentOnly.reason).toBe('single_layer_high'); }); test('content-security filter runs through the registered pipeline', () => { // Verify runContentFilters picks up the built-in url blocklist filter. // If a future refactor accidentally unregisters it, this test fails. const result = runContentFilters( 'page content', 'https://requestbin.com/webhook', 'text', ); // urlBlocklistFilter is auto-registered on module load (content-security.ts:347) expect(result.safe).toBe(false); expect(result.warnings.some(w => w.includes('requestbin.com'))).toBe(true); }); test('canary in envelope-escaped content still detectable', () => { // The envelope uses "═══ BEGIN UNTRUSTED WEB CONTENT ═══" markers and // escapes occurrences in content via zero-width space. This must NOT // break canary detection — the canary isn't special to the escape logic. const c = generateCanary(); const contentWithEnvelopeChars = `═══ BEGIN UNTRUSTED WEB CONTENT ═══ real payload: ${c}`; const wrapped = wrapUntrustedPageContent(contentWithEnvelopeChars, 'text'); // The inner "BEGIN" gets escaped to "BEGIN UNTRUSTED WEB C{zwsp}ONTENT" // but the canary remains intact expect(checkCanaryInStructure(wrapped, c)).toBe(true); }); }); describe('defense-in-depth — regression guards', () => { test('combineVerdict cannot be bypassed via signal starvation', () => { // Attacker might try to suppress classifier calls to avoid signals. // Empty signals still yields safe verdict — fail-open is intentional. // This is not a regression; it's the documented contract. // Test asserts that a ZERO-confidence-everywhere state IS explicitly safe. const allZeros: LayerSignal[] = [ { layer: 'testsavant_content', confidence: 0 }, { layer: 'transcript_classifier', confidence: 0 }, { layer: 'canary', confidence: 0 }, { layer: 'aria_regex', confidence: 0 }, ]; expect(combineVerdict(allZeros).verdict).toBe('safe'); }); test('negative confidences cannot trigger block', () => { // Defensive: if some future refactor returns negative scores (bug), // combineVerdict must not misinterpret them. Math-wise, negative values // never exceed WARN/BLOCK thresholds, so this falls through to safe. const weird: LayerSignal[] = [ { layer: 'testsavant_content', confidence: -0.5 }, { layer: 'transcript_classifier', confidence: -1.0 }, ]; expect(combineVerdict(weird).verdict).toBe('safe'); }); test('huge confidences (> 1.0) still behave predictably', () => { // If a classifier ever returns > 1.0 (bug), we want the verdict to // still be BLOCK, not crash or produce nonsense. Canary uses >= 1.0 // which matches; ML layers also register. const overflow: LayerSignal[] = [ { layer: 'testsavant_content', confidence: 5.5 }, // above BLOCK, block-vote { layer: 'transcript_classifier', confidence: 3.2, meta: { verdict: 'block' } }, // label-first block-vote ]; expect(combineVerdict(overflow).verdict).toBe('block'); }); });