feat: add content-security.ts with datamarking, envelope, and filter hooks

Four-layer prompt injection defense for pair-agent browser sharing: - Datamarking: session-scoped watermark for text exfiltration detection - Content envelope: trust boundary wrapping with ZWSP marker escaping - Content filter hooks: extensible filter pipeline with warn/block modes - Built-in URL blocklist: requestbin, pipedream, webhook.site, etc. BROWSE_CONTENT_FILTER env var controls mode: off|warn|block (default: warn) Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-08-02 04:18:37 +02:00 · 2026-04-05 11:05:38 -07:00
parent 905f1ddd38
commit 5184ea677b
1 changed files with 208 additions and 0 deletions
@@ -0,0 +1,208 @@
+/**
+ * Content security layer for pair-agent browser sharing.
+ *
+ * Four defense layers:
+ *   1. Datamarking — watermark text output to detect exfiltration
+ *   2. Hidden element stripping — remove invisible/deceptive elements from output
+ *   3. Content filter hooks — extensible URL/content filter pipeline
+ *   4. Instruction block hardening — SECURITY section in agent instructions
+ *
+ * This module handles layers 1-3. Layer 4 is in cli.ts.
+ */
+
+import { randomBytes } from 'crypto';
+
+// ─── Datamarking (Layer 1) ──────────────────────────────────────
+
+/** Session-scoped random marker for text watermarking */
+let sessionMarker: string | null = null;
+
+function ensureMarker(): string {
+  if (!sessionMarker) {
+    sessionMarker = randomBytes(3).toString('base64').slice(0, 4);
+  }
+  return sessionMarker;
+}
+
+/** Exported for tests only */
+export function getSessionMarker(): string {
+  return ensureMarker();
+}
+
+/** Reset marker (for testing) */
+export function resetSessionMarker(): void {
+  sessionMarker = null;
+}
+
+/**
+ * Insert invisible watermark into text content.
+ * Places the marker as zero-width characters between words.
+ * Only applied to `text` command output (not html, forms, or structured data).
+ */
+export function datamarkContent(content: string): string {
+  const marker = ensureMarker();
+  // Insert marker as a Unicode tag sequence between sentences (after periods followed by space)
+  // This is subtle enough to not corrupt output but detectable if exfiltrated
+  const zwsp = '\u200B'; // zero-width space
+  const taggedMarker = marker.split('').map(c => zwsp + c).join('');
+  // Insert after every 3rd sentence-ending period
+  let count = 0;
+  return content.replace(/(\. )/g, (match) => {
+    count++;
+    if (count % 3 === 0) {
+      return match + taggedMarker;
+    }
+    return match;
+  });
+}
+
+// ─── Content Envelope (wrapping) ────────────────────────────────
+
+const ENVELOPE_BEGIN = '═══ BEGIN UNTRUSTED WEB CONTENT ═══';
+const ENVELOPE_END = '═══ END UNTRUSTED WEB CONTENT ═══';
+
+/**
+ * Wrap page content in a trust boundary envelope for scoped tokens.
+ * Escapes envelope markers in content to prevent boundary escape attacks.
+ */
+export function wrapUntrustedPageContent(
+  content: string,
+  command: string,
+  filterWarnings?: string[],
+): string {
+  // Escape envelope markers in content (zero-width space injection)
+  const zwsp = '\u200B';
+  const safeContent = content
+    .replace(/═══ BEGIN UNTRUSTED WEB CONTENT ═══/g, `═══ BEGIN UNTRUSTED WEB C${zwsp}ONTENT ═══`)
+    .replace(/═══ END UNTRUSTED WEB CONTENT ═══/g, `═══ END UNTRUSTED WEB C${zwsp}ONTENT ═══`);
+
+  const parts: string[] = [];
+
+  if (filterWarnings && filterWarnings.length > 0) {
+    parts.push(`⚠ CONTENT WARNINGS: ${filterWarnings.join('; ')}`);
+  }
+
+  parts.push(ENVELOPE_BEGIN);
+  parts.push(safeContent);
+  parts.push(ENVELOPE_END);
+
+  return parts.join('\n');
+}
+
+// ─── Content Filter Hooks (Layer 3) ─────────────────────────────
+
+export interface ContentFilterResult {
+  safe: boolean;
+  warnings: string[];
+  blocked?: boolean;
+  message?: string;
+}
+
+export type ContentFilter = (
+  content: string,
+  url: string,
+  command: string,
+) => ContentFilterResult;
+
+const registeredFilters: ContentFilter[] = [];
+
+export function registerContentFilter(filter: ContentFilter): void {
+  registeredFilters.push(filter);
+}
+
+export function clearContentFilters(): void {
+  registeredFilters.length = 0;
+}
+
+/** Get current filter mode from env */
+export function getFilterMode(): 'off' | 'warn' | 'block' {
+  const mode = process.env.BROWSE_CONTENT_FILTER?.toLowerCase();
+  if (mode === 'off' || mode === 'block') return mode;
+  return 'warn'; // default
+}
+
+/**
+ * Run all registered content filters against content.
+ * Returns aggregated result with all warnings.
+ */
+export function runContentFilters(
+  content: string,
+  url: string,
+  command: string,
+): ContentFilterResult {
+  const mode = getFilterMode();
+  if (mode === 'off') {
+    return { safe: true, warnings: [] };
+  }
+
+  const allWarnings: string[] = [];
+  let blocked = false;
+
+  for (const filter of registeredFilters) {
+    const result = filter(content, url, command);
+    if (!result.safe) {
+      allWarnings.push(...result.warnings);
+      if (mode === 'block') {
+        blocked = true;
+      }
+    }
+  }
+
+  if (blocked && allWarnings.length > 0) {
+    return {
+      safe: false,
+      warnings: allWarnings,
+      blocked: true,
+      message: `Content blocked: ${allWarnings.join('; ')}`,
+    };
+  }
+
+  return {
+    safe: allWarnings.length === 0,
+    warnings: allWarnings,
+  };
+}
+
+// ─── Built-in URL Blocklist Filter ──────────────────────────────
+
+const BLOCKLIST_DOMAINS = [
+  'requestbin.com',
+  'pipedream.com',
+  'webhook.site',
+  'hookbin.com',
+  'requestcatcher.com',
+  'burpcollaborator.net',
+  'interact.sh',
+  'canarytokens.com',
+  'ngrok.io',
+  'ngrok-free.app',
+];
+
+/** Check if URL matches any blocklisted exfiltration domain */
+export function urlBlocklistFilter(content: string, url: string, _command: string): ContentFilterResult {
+  const warnings: string[] = [];
+
+  // Check page URL
+  for (const domain of BLOCKLIST_DOMAINS) {
+    if (url.includes(domain)) {
+      warnings.push(`Page URL matches blocklisted domain: ${domain}`);
+    }
+  }
+
+  // Check for blocklisted URLs in content (links, form actions)
+  const urlPattern = /https?:\/\/[^\s"'<>]+/g;
+  const contentUrls = content.match(urlPattern) || [];
+  for (const contentUrl of contentUrls) {
+    for (const domain of BLOCKLIST_DOMAINS) {
+      if (contentUrl.includes(domain)) {
+        warnings.push(`Content contains blocklisted URL: ${contentUrl.slice(0, 100)}`);
+        break;
+      }
+    }
+  }
+
+  return { safe: warnings.length === 0, warnings };
+}
+
+// Register the built-in filter on module load
+registerContentFilter(urlBlocklistFilter);