mirror of
https://github.com/garrytan/gstack.git
synced 2026-05-02 11:45:20 +02:00
07745e046d
10 tests pinning the defense-in-depth contract between the existing
content-security.ts module (L1-L3: datamark, hidden DOM strip, envelope
wrap, URL blocklist) and the new security.ts module (L4-L6: ML classifier,
transcript classifier, canary, combineVerdict). Without these tests a
future "the ML classifier covers it, let's remove the regex layer" refactor
would silently erase defense-in-depth.
Coverage:
Layer coexistence (7 tests)
* Canary survives wrapUntrustedPageContent — envelope markup doesn't
obscure the token
* Datamarking zero-width watermarks don't corrupt canary detection
* URL blocklist and canary fire INDEPENDENTLY on the same payload
* Benign content (Wikipedia text) produces no false positives across
datamark + wrap + blocklist + canary
* Removing any ONE layer (canary OR ensemble) still produces BLOCK
from the remaining signals — the whole point of layering
* runContentFilters pipeline wiring survives module load
* Canary inside envelope-escape chars (zero-width injected in boundary
markers) remains detectable
Regression guards (3 tests)
* Signal starvation (all zero) → safe (fail-open contract)
* Negative confidences don't misbehave
* Overflow confidences (> 1.0) still resolve to BLOCK, not crash
All 10 tests pass in 16ms. Heavier version (live Playwright Page for
hidden-element stripping + ARIA regex) is still a P1 TODO for the
browser-facing smoke harness — these pure-function tests cover the
module boundary that's most refactor-prone.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
183 lines
7.8 KiB
TypeScript
183 lines
7.8 KiB
TypeScript
/**
|
|
* Integration tests — the defense-in-depth contract.
|
|
*
|
|
* Pins the invariant that content-security.ts (L1-L3) and security.ts (L4-L6)
|
|
* layers coexist and fire INDEPENDENTLY. If someone refactors thinking "the
|
|
* ML classifier covers this, we can delete the regex layer," these tests
|
|
* fail and stop the regression.
|
|
*
|
|
* This is the lighter version of CEO plan §E5. The full version requires
|
|
* a live Playwright Page for hidden-element stripping and ARIA regex (those
|
|
* operate on DOM). Here we test the pure-function cross-module surface:
|
|
* * content-security.ts datamark + envelope wrap + URL blocklist
|
|
* * security.ts canary + combineVerdict
|
|
* * Both modules on the same input produce orthogonal signals
|
|
*/
|
|
|
|
import { describe, test, expect } from 'bun:test';
|
|
import {
|
|
datamarkContent,
|
|
wrapUntrustedPageContent,
|
|
urlBlocklistFilter,
|
|
runContentFilters,
|
|
resetSessionMarker,
|
|
} from '../src/content-security';
|
|
import {
|
|
generateCanary,
|
|
checkCanaryInStructure,
|
|
combineVerdict,
|
|
type LayerSignal,
|
|
} from '../src/security';
|
|
|
|
describe('defense-in-depth — layer coexistence', () => {
|
|
test('canary survives when content is wrapped by content-security envelope', () => {
|
|
const c = generateCanary();
|
|
// Attacker got Claude to echo the canary into tool output text.
|
|
// content-security wraps that text in an envelope — canary still detectable.
|
|
const leakedText = `Here's my session token: ${c}`;
|
|
const wrapped = wrapUntrustedPageContent(leakedText, 'text');
|
|
expect(wrapped).toContain(c);
|
|
expect(checkCanaryInStructure(wrapped, c)).toBe(true);
|
|
});
|
|
|
|
test('datamarking does not corrupt canary detection', () => {
|
|
resetSessionMarker();
|
|
const c = generateCanary();
|
|
// datamarkContent inserts zero-width watermarks after every 3rd period.
|
|
// It must not break canary detection on text that contains the canary.
|
|
const leakedText = `Intro sentence. Middle sentence. Third sentence. Here is the token ${c}. More. More.`;
|
|
const marked = datamarkContent(leakedText);
|
|
expect(checkCanaryInStructure(marked, c)).toBe(true);
|
|
});
|
|
|
|
test('URL blocklist + canary are orthogonal — both can fire', () => {
|
|
const c = generateCanary();
|
|
// Attack: URL points to a blocklisted exfil domain AND carries the canary.
|
|
// content-security's urlBlocklistFilter catches the domain.
|
|
// security.ts's canary check catches the token.
|
|
// Neither depends on the other.
|
|
const attackContent = `See https://requestbin.com/?leak=${c} for details`;
|
|
const blockResult = urlBlocklistFilter(attackContent, 'https://requestbin.com/abc', 'text');
|
|
expect(blockResult.safe).toBe(false);
|
|
expect(blockResult.warnings.length).toBeGreaterThan(0);
|
|
|
|
const canaryHit = checkCanaryInStructure({ content: attackContent }, c);
|
|
expect(canaryHit).toBe(true);
|
|
});
|
|
|
|
test('benign content survives all layers — zero false positives', () => {
|
|
resetSessionMarker();
|
|
const c = generateCanary();
|
|
const benign = 'The Pacific Ocean is the largest ocean on Earth. It contains many islands. Marine biodiversity is rich.';
|
|
|
|
// Datamark doesn't add the canary
|
|
const marked = datamarkContent(benign);
|
|
expect(checkCanaryInStructure(marked, c)).toBe(false);
|
|
|
|
// Envelope wrap doesn't add the canary
|
|
const wrapped = wrapUntrustedPageContent(benign, 'text');
|
|
expect(checkCanaryInStructure(wrapped, c)).toBe(false);
|
|
|
|
// URL blocklist returns safe on a benign URL
|
|
const blockResult = urlBlocklistFilter(benign, 'https://wikipedia.org', 'text');
|
|
expect(blockResult.safe).toBe(true);
|
|
});
|
|
|
|
test('removing one signal does not zero-out the verdict (defense-in-depth)', () => {
|
|
// Attack scenario: page has hidden injection + exfil URL + canary leak
|
|
// across three different layers. Remove any ONE signal, other two still
|
|
// produce a BLOCK-worthy verdict.
|
|
|
|
const baseSignals: LayerSignal[] = [
|
|
{ layer: 'testsavant_content', confidence: 0.88 },
|
|
{ layer: 'transcript_classifier', confidence: 0.75 },
|
|
{ layer: 'canary', confidence: 1.0 },
|
|
];
|
|
|
|
// All 3 signals → BLOCK (canary alone does it, ensemble also fires)
|
|
expect(combineVerdict(baseSignals).verdict).toBe('block');
|
|
|
|
// Remove canary → BLOCK via ensemble_agreement
|
|
expect(combineVerdict(baseSignals.slice(0, 2)).verdict).toBe('block');
|
|
|
|
// Remove transcript → BLOCK via canary still
|
|
expect(
|
|
combineVerdict([baseSignals[0], baseSignals[2]]).verdict,
|
|
).toBe('block');
|
|
|
|
// Remove content → BLOCK via canary still
|
|
expect(
|
|
combineVerdict([baseSignals[1], baseSignals[2]]).verdict,
|
|
).toBe('block');
|
|
|
|
// Remove canary AND transcript → only content WARN (single_layer_high
|
|
// — but content is 0.88 which is just above BLOCK threshold 0.85)
|
|
const contentOnly = combineVerdict([baseSignals[0]]);
|
|
expect(contentOnly.verdict).toBe('warn');
|
|
expect(contentOnly.reason).toBe('single_layer_high');
|
|
});
|
|
|
|
test('content-security filter runs through the registered pipeline', () => {
|
|
// Verify runContentFilters picks up the built-in url blocklist filter.
|
|
// If a future refactor accidentally unregisters it, this test fails.
|
|
const result = runContentFilters(
|
|
'page content',
|
|
'https://requestbin.com/webhook',
|
|
'text',
|
|
);
|
|
// urlBlocklistFilter is auto-registered on module load (content-security.ts:347)
|
|
expect(result.safe).toBe(false);
|
|
expect(result.warnings.some(w => w.includes('requestbin.com'))).toBe(true);
|
|
});
|
|
|
|
test('canary in envelope-escaped content still detectable', () => {
|
|
// The envelope uses "═══ BEGIN UNTRUSTED WEB CONTENT ═══" markers and
|
|
// escapes occurrences in content via zero-width space. This must NOT
|
|
// break canary detection — the canary isn't special to the escape logic.
|
|
const c = generateCanary();
|
|
const contentWithEnvelopeChars = `═══ BEGIN UNTRUSTED WEB CONTENT ═══ real payload: ${c}`;
|
|
const wrapped = wrapUntrustedPageContent(contentWithEnvelopeChars, 'text');
|
|
// The inner "BEGIN" gets escaped to "BEGIN UNTRUSTED WEB C{zwsp}ONTENT"
|
|
// but the canary remains intact
|
|
expect(checkCanaryInStructure(wrapped, c)).toBe(true);
|
|
});
|
|
});
|
|
|
|
describe('defense-in-depth — regression guards', () => {
|
|
test('combineVerdict cannot be bypassed via signal starvation', () => {
|
|
// Attacker might try to suppress classifier calls to avoid signals.
|
|
// Empty signals still yields safe verdict — fail-open is intentional.
|
|
// This is not a regression; it's the documented contract.
|
|
// Test asserts that a ZERO-confidence-everywhere state IS explicitly safe.
|
|
const allZeros: LayerSignal[] = [
|
|
{ layer: 'testsavant_content', confidence: 0 },
|
|
{ layer: 'transcript_classifier', confidence: 0 },
|
|
{ layer: 'canary', confidence: 0 },
|
|
{ layer: 'aria_regex', confidence: 0 },
|
|
];
|
|
expect(combineVerdict(allZeros).verdict).toBe('safe');
|
|
});
|
|
|
|
test('negative confidences cannot trigger block', () => {
|
|
// Defensive: if some future refactor returns negative scores (bug),
|
|
// combineVerdict must not misinterpret them. Math-wise, negative values
|
|
// never exceed WARN/BLOCK thresholds, so this falls through to safe.
|
|
const weird: LayerSignal[] = [
|
|
{ layer: 'testsavant_content', confidence: -0.5 },
|
|
{ layer: 'transcript_classifier', confidence: -1.0 },
|
|
];
|
|
expect(combineVerdict(weird).verdict).toBe('safe');
|
|
});
|
|
|
|
test('huge confidences (> 1.0) still behave predictably', () => {
|
|
// If a classifier ever returns > 1.0 (bug), we want the verdict to
|
|
// still be BLOCK, not crash or produce nonsense. Canary uses >= 1.0
|
|
// which matches; ML layers also register.
|
|
const overflow: LayerSignal[] = [
|
|
{ layer: 'testsavant_content', confidence: 5.5 }, // above BLOCK
|
|
{ layer: 'transcript_classifier', confidence: 3.2 }, // above BLOCK
|
|
];
|
|
expect(combineVerdict(overflow).verdict).toBe('block');
|
|
});
|
|
});
|