From 5184ea677bc5bf589ce5e22d58ae132452ecaa46 Mon Sep 17 00:00:00 2001 From: Garry Tan Date: Sun, 5 Apr 2026 11:05:38 -0700 Subject: [PATCH] feat: add content-security.ts with datamarking, envelope, and filter hooks Four-layer prompt injection defense for pair-agent browser sharing: - Datamarking: session-scoped watermark for text exfiltration detection - Content envelope: trust boundary wrapping with ZWSP marker escaping - Content filter hooks: extensible filter pipeline with warn/block modes - Built-in URL blocklist: requestbin, pipedream, webhook.site, etc. BROWSE_CONTENT_FILTER env var controls mode: off|warn|block (default: warn) Co-Authored-By: Claude Opus 4.6 (1M context) --- browse/src/content-security.ts | 208 +++++++++++++++++++++++++++++++++ 1 file changed, 208 insertions(+) create mode 100644 browse/src/content-security.ts diff --git a/browse/src/content-security.ts b/browse/src/content-security.ts new file mode 100644 index 00000000..4e8bc9aa --- /dev/null +++ b/browse/src/content-security.ts @@ -0,0 +1,208 @@ +/** + * Content security layer for pair-agent browser sharing. + * + * Four defense layers: + * 1. Datamarking — watermark text output to detect exfiltration + * 2. Hidden element stripping — remove invisible/deceptive elements from output + * 3. Content filter hooks — extensible URL/content filter pipeline + * 4. Instruction block hardening — SECURITY section in agent instructions + * + * This module handles layers 1-3. Layer 4 is in cli.ts. + */ + +import { randomBytes } from 'crypto'; + +// ─── Datamarking (Layer 1) ────────────────────────────────────── + +/** Session-scoped random marker for text watermarking */ +let sessionMarker: string | null = null; + +function ensureMarker(): string { + if (!sessionMarker) { + sessionMarker = randomBytes(3).toString('base64').slice(0, 4); + } + return sessionMarker; +} + +/** Exported for tests only */ +export function getSessionMarker(): string { + return ensureMarker(); +} + +/** Reset marker (for testing) */ +export function resetSessionMarker(): void { + sessionMarker = null; +} + +/** + * Insert invisible watermark into text content. + * Places the marker as zero-width characters between words. + * Only applied to `text` command output (not html, forms, or structured data). + */ +export function datamarkContent(content: string): string { + const marker = ensureMarker(); + // Insert marker as a Unicode tag sequence between sentences (after periods followed by space) + // This is subtle enough to not corrupt output but detectable if exfiltrated + const zwsp = '\u200B'; // zero-width space + const taggedMarker = marker.split('').map(c => zwsp + c).join(''); + // Insert after every 3rd sentence-ending period + let count = 0; + return content.replace(/(\. )/g, (match) => { + count++; + if (count % 3 === 0) { + return match + taggedMarker; + } + return match; + }); +} + +// ─── Content Envelope (wrapping) ──────────────────────────────── + +const ENVELOPE_BEGIN = '═══ BEGIN UNTRUSTED WEB CONTENT ═══'; +const ENVELOPE_END = '═══ END UNTRUSTED WEB CONTENT ═══'; + +/** + * Wrap page content in a trust boundary envelope for scoped tokens. + * Escapes envelope markers in content to prevent boundary escape attacks. + */ +export function wrapUntrustedPageContent( + content: string, + command: string, + filterWarnings?: string[], +): string { + // Escape envelope markers in content (zero-width space injection) + const zwsp = '\u200B'; + const safeContent = content + .replace(/═══ BEGIN UNTRUSTED WEB CONTENT ═══/g, `═══ BEGIN UNTRUSTED WEB C${zwsp}ONTENT ═══`) + .replace(/═══ END UNTRUSTED WEB CONTENT ═══/g, `═══ END UNTRUSTED WEB C${zwsp}ONTENT ═══`); + + const parts: string[] = []; + + if (filterWarnings && filterWarnings.length > 0) { + parts.push(`⚠ CONTENT WARNINGS: ${filterWarnings.join('; ')}`); + } + + parts.push(ENVELOPE_BEGIN); + parts.push(safeContent); + parts.push(ENVELOPE_END); + + return parts.join('\n'); +} + +// ─── Content Filter Hooks (Layer 3) ───────────────────────────── + +export interface ContentFilterResult { + safe: boolean; + warnings: string[]; + blocked?: boolean; + message?: string; +} + +export type ContentFilter = ( + content: string, + url: string, + command: string, +) => ContentFilterResult; + +const registeredFilters: ContentFilter[] = []; + +export function registerContentFilter(filter: ContentFilter): void { + registeredFilters.push(filter); +} + +export function clearContentFilters(): void { + registeredFilters.length = 0; +} + +/** Get current filter mode from env */ +export function getFilterMode(): 'off' | 'warn' | 'block' { + const mode = process.env.BROWSE_CONTENT_FILTER?.toLowerCase(); + if (mode === 'off' || mode === 'block') return mode; + return 'warn'; // default +} + +/** + * Run all registered content filters against content. + * Returns aggregated result with all warnings. + */ +export function runContentFilters( + content: string, + url: string, + command: string, +): ContentFilterResult { + const mode = getFilterMode(); + if (mode === 'off') { + return { safe: true, warnings: [] }; + } + + const allWarnings: string[] = []; + let blocked = false; + + for (const filter of registeredFilters) { + const result = filter(content, url, command); + if (!result.safe) { + allWarnings.push(...result.warnings); + if (mode === 'block') { + blocked = true; + } + } + } + + if (blocked && allWarnings.length > 0) { + return { + safe: false, + warnings: allWarnings, + blocked: true, + message: `Content blocked: ${allWarnings.join('; ')}`, + }; + } + + return { + safe: allWarnings.length === 0, + warnings: allWarnings, + }; +} + +// ─── Built-in URL Blocklist Filter ────────────────────────────── + +const BLOCKLIST_DOMAINS = [ + 'requestbin.com', + 'pipedream.com', + 'webhook.site', + 'hookbin.com', + 'requestcatcher.com', + 'burpcollaborator.net', + 'interact.sh', + 'canarytokens.com', + 'ngrok.io', + 'ngrok-free.app', +]; + +/** Check if URL matches any blocklisted exfiltration domain */ +export function urlBlocklistFilter(content: string, url: string, _command: string): ContentFilterResult { + const warnings: string[] = []; + + // Check page URL + for (const domain of BLOCKLIST_DOMAINS) { + if (url.includes(domain)) { + warnings.push(`Page URL matches blocklisted domain: ${domain}`); + } + } + + // Check for blocklisted URLs in content (links, form actions) + const urlPattern = /https?:\/\/[^\s"'<>]+/g; + const contentUrls = content.match(urlPattern) || []; + for (const contentUrl of contentUrls) { + for (const domain of BLOCKLIST_DOMAINS) { + if (contentUrl.includes(domain)) { + warnings.push(`Content contains blocklisted URL: ${contentUrl.slice(0, 100)}`); + break; + } + } + } + + return { safe: warnings.length === 0, warnings }; +} + +// Register the built-in filter on module load +registerContentFilter(urlBlocklistFilter);