From 07745e046d25f2b3b98413c6c833bc9a622e9e49 Mon Sep 17 00:00:00 2001
From: Garry Tan <garrytan@gmail.com>
Date: Mon, 20 Apr 2026 04:20:14 +0800
Subject: [PATCH] =?UTF-8?q?test(security):=20integration=20suite=20?=
 =?UTF-8?q?=E2=80=94=20content-security.ts=20+=20security.ts=20coexistence?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

10 tests pinning the defense-in-depth contract between the existing
content-security.ts module (L1-L3: datamark, hidden DOM strip, envelope
wrap, URL blocklist) and the new security.ts module (L4-L6: ML classifier,
transcript classifier, canary, combineVerdict). Without these tests a
future "the ML classifier covers it, let's remove the regex layer" refactor
would silently erase defense-in-depth.

Coverage:

Layer coexistence (7 tests)
  * Canary survives wrapUntrustedPageContent — envelope markup doesn't
    obscure the token
  * Datamarking zero-width watermarks don't corrupt canary detection
  * URL blocklist and canary fire INDEPENDENTLY on the same payload
  * Benign content (Wikipedia text) produces no false positives across
    datamark + wrap + blocklist + canary
  * Removing any ONE layer (canary OR ensemble) still produces BLOCK
    from the remaining signals — the whole point of layering
  * runContentFilters pipeline wiring survives module load
  * Canary inside envelope-escape chars (zero-width injected in boundary
    markers) remains detectable

Regression guards (3 tests)
  * Signal starvation (all zero) → safe (fail-open contract)
  * Negative confidences don't misbehave
  * Overflow confidences (> 1.0) still resolve to BLOCK, not crash

All 10 tests pass in 16ms. Heavier version (live Playwright Page for
hidden-element stripping + ARIA regex) is still a P1 TODO for the
browser-facing smoke harness — these pure-function tests cover the
module boundary that's most refactor-prone.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 browse/test/security-integration.test.ts | 182 +++++++++++++++++++++++
 1 file changed, 182 insertions(+)
 create mode 100644 browse/test/security-integration.test.ts

diff --git a/browse/test/security-integration.test.ts b/browse/test/security-integration.test.ts
new file mode 100644
index 00000000..e8a8132c
--- /dev/null
+++ b/browse/test/security-integration.test.ts
@@ -0,0 +1,182 @@
+/**
+ * Integration tests — the defense-in-depth contract.
+ *
+ * Pins the invariant that content-security.ts (L1-L3) and security.ts (L4-L6)
+ * layers coexist and fire INDEPENDENTLY. If someone refactors thinking "the
+ * ML classifier covers this, we can delete the regex layer," these tests
+ * fail and stop the regression.
+ *
+ * This is the lighter version of CEO plan §E5. The full version requires
+ * a live Playwright Page for hidden-element stripping and ARIA regex (those
+ * operate on DOM). Here we test the pure-function cross-module surface:
+ *   * content-security.ts datamark + envelope wrap + URL blocklist
+ *   * security.ts canary + combineVerdict
+ *   * Both modules on the same input produce orthogonal signals
+ */
+
+import { describe, test, expect } from 'bun:test';
+import {
+  datamarkContent,
+  wrapUntrustedPageContent,
+  urlBlocklistFilter,
+  runContentFilters,
+  resetSessionMarker,
+} from '../src/content-security';
+import {
+  generateCanary,
+  checkCanaryInStructure,
+  combineVerdict,
+  type LayerSignal,
+} from '../src/security';
+
+describe('defense-in-depth — layer coexistence', () => {
+  test('canary survives when content is wrapped by content-security envelope', () => {
+    const c = generateCanary();
+    // Attacker got Claude to echo the canary into tool output text.
+    // content-security wraps that text in an envelope — canary still detectable.
+    const leakedText = `Here's my session token: ${c}`;
+    const wrapped = wrapUntrustedPageContent(leakedText, 'text');
+    expect(wrapped).toContain(c);
+    expect(checkCanaryInStructure(wrapped, c)).toBe(true);
+  });
+
+  test('datamarking does not corrupt canary detection', () => {
+    resetSessionMarker();
+    const c = generateCanary();
+    // datamarkContent inserts zero-width watermarks after every 3rd period.
+    // It must not break canary detection on text that contains the canary.
+    const leakedText = `Intro sentence. Middle sentence. Third sentence. Here is the token ${c}. More. More.`;
+    const marked = datamarkContent(leakedText);
+    expect(checkCanaryInStructure(marked, c)).toBe(true);
+  });
+
+  test('URL blocklist + canary are orthogonal — both can fire', () => {
+    const c = generateCanary();
+    // Attack: URL points to a blocklisted exfil domain AND carries the canary.
+    // content-security's urlBlocklistFilter catches the domain.
+    // security.ts's canary check catches the token.
+    // Neither depends on the other.
+    const attackContent = `See https://requestbin.com/?leak=${c} for details`;
+    const blockResult = urlBlocklistFilter(attackContent, 'https://requestbin.com/abc', 'text');
+    expect(blockResult.safe).toBe(false);
+    expect(blockResult.warnings.length).toBeGreaterThan(0);
+
+    const canaryHit = checkCanaryInStructure({ content: attackContent }, c);
+    expect(canaryHit).toBe(true);
+  });
+
+  test('benign content survives all layers — zero false positives', () => {
+    resetSessionMarker();
+    const c = generateCanary();
+    const benign = 'The Pacific Ocean is the largest ocean on Earth. It contains many islands. Marine biodiversity is rich.';
+
+    // Datamark doesn't add the canary
+    const marked = datamarkContent(benign);
+    expect(checkCanaryInStructure(marked, c)).toBe(false);
+
+    // Envelope wrap doesn't add the canary
+    const wrapped = wrapUntrustedPageContent(benign, 'text');
+    expect(checkCanaryInStructure(wrapped, c)).toBe(false);
+
+    // URL blocklist returns safe on a benign URL
+    const blockResult = urlBlocklistFilter(benign, 'https://wikipedia.org', 'text');
+    expect(blockResult.safe).toBe(true);
+  });
+
+  test('removing one signal does not zero-out the verdict (defense-in-depth)', () => {
+    // Attack scenario: page has hidden injection + exfil URL + canary leak
+    // across three different layers. Remove any ONE signal, other two still
+    // produce a BLOCK-worthy verdict.
+
+    const baseSignals: LayerSignal[] = [
+      { layer: 'testsavant_content', confidence: 0.88 },
+      { layer: 'transcript_classifier', confidence: 0.75 },
+      { layer: 'canary', confidence: 1.0 },
+    ];
+
+    // All 3 signals → BLOCK (canary alone does it, ensemble also fires)
+    expect(combineVerdict(baseSignals).verdict).toBe('block');
+
+    // Remove canary → BLOCK via ensemble_agreement
+    expect(combineVerdict(baseSignals.slice(0, 2)).verdict).toBe('block');
+
+    // Remove transcript → BLOCK via canary still
+    expect(
+      combineVerdict([baseSignals[0], baseSignals[2]]).verdict,
+    ).toBe('block');
+
+    // Remove content → BLOCK via canary still
+    expect(
+      combineVerdict([baseSignals[1], baseSignals[2]]).verdict,
+    ).toBe('block');
+
+    // Remove canary AND transcript → only content WARN (single_layer_high
+    // — but content is 0.88 which is just above BLOCK threshold 0.85)
+    const contentOnly = combineVerdict([baseSignals[0]]);
+    expect(contentOnly.verdict).toBe('warn');
+    expect(contentOnly.reason).toBe('single_layer_high');
+  });
+
+  test('content-security filter runs through the registered pipeline', () => {
+    // Verify runContentFilters picks up the built-in url blocklist filter.
+    // If a future refactor accidentally unregisters it, this test fails.
+    const result = runContentFilters(
+      'page content',
+      'https://requestbin.com/webhook',
+      'text',
+    );
+    // urlBlocklistFilter is auto-registered on module load (content-security.ts:347)
+    expect(result.safe).toBe(false);
+    expect(result.warnings.some(w => w.includes('requestbin.com'))).toBe(true);
+  });
+
+  test('canary in envelope-escaped content still detectable', () => {
+    // The envelope uses "═══ BEGIN UNTRUSTED WEB CONTENT ═══" markers and
+    // escapes occurrences in content via zero-width space. This must NOT
+    // break canary detection — the canary isn't special to the escape logic.
+    const c = generateCanary();
+    const contentWithEnvelopeChars = `═══ BEGIN UNTRUSTED WEB CONTENT ═══ real payload: ${c}`;
+    const wrapped = wrapUntrustedPageContent(contentWithEnvelopeChars, 'text');
+    // The inner "BEGIN" gets escaped to "BEGIN UNTRUSTED WEB C{zwsp}ONTENT"
+    // but the canary remains intact
+    expect(checkCanaryInStructure(wrapped, c)).toBe(true);
+  });
+});
+
+describe('defense-in-depth — regression guards', () => {
+  test('combineVerdict cannot be bypassed via signal starvation', () => {
+    // Attacker might try to suppress classifier calls to avoid signals.
+    // Empty signals still yields safe verdict — fail-open is intentional.
+    // This is not a regression; it's the documented contract.
+    // Test asserts that a ZERO-confidence-everywhere state IS explicitly safe.
+    const allZeros: LayerSignal[] = [
+      { layer: 'testsavant_content', confidence: 0 },
+      { layer: 'transcript_classifier', confidence: 0 },
+      { layer: 'canary', confidence: 0 },
+      { layer: 'aria_regex', confidence: 0 },
+    ];
+    expect(combineVerdict(allZeros).verdict).toBe('safe');
+  });
+
+  test('negative confidences cannot trigger block', () => {
+    // Defensive: if some future refactor returns negative scores (bug),
+    // combineVerdict must not misinterpret them. Math-wise, negative values
+    // never exceed WARN/BLOCK thresholds, so this falls through to safe.
+    const weird: LayerSignal[] = [
+      { layer: 'testsavant_content', confidence: -0.5 },
+      { layer: 'transcript_classifier', confidence: -1.0 },
+    ];
+    expect(combineVerdict(weird).verdict).toBe('safe');
+  });
+
+  test('huge confidences (> 1.0) still behave predictably', () => {
+    // If a classifier ever returns > 1.0 (bug), we want the verdict to
+    // still be BLOCK, not crash or produce nonsense. Canary uses >= 1.0
+    // which matches; ML layers also register.
+    const overflow: LayerSignal[] = [
+      { layer: 'testsavant_content', confidence: 5.5 }, // above BLOCK
+      { layer: 'transcript_classifier', confidence: 3.2 }, // above BLOCK
+    ];
+    expect(combineVerdict(overflow).verdict).toBe('block');
+  });
+});