feat(browse): $B cdp escape hatch — deny-default allowlist + two-tier mutex

Codex T2: flip CDP posture to deny-default. Allowed methods enumerated in cdp-allowlist.ts with (scope: tab|browser, output: trusted|untrusted, justification) per entry. Initial allowlist (~25 methods) covers: - Accessibility tree extraction (read-only) - DOM/CSS inspection (read-only) - Performance metrics - Tracing - Emulation viewport/UA override - Page screenshot/PDF capture (output is binary, no marker injection vector) - Network.enable/disable (no bodies/cookies — those are exfil surfaces) - Runtime.getProperties (NO evaluate/callFunctionOn — those would be RCE) Page.navigate is INTENTIONALLY NOT allowed; agents use $B goto which goes through the URL blocklist. Codex T7: two-tier mutex. tab-scoped methods take per-tab lock; browser- scoped take global lock that blocks all tab locks. 5s acquire timeout yields CDPMutexAcquireTimeout (no silent hangs). All lock acquires use try/finally so errors don't leak the lock. Path A from spike: uses Playwright's newCDPSession() per page. No second WebSocket, no need for --remote-debugging-port. CDPSession is cached per page in a WeakMap and cleared on page close. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-06-18 15:50:11 +02:00 · 2026-04-25 13:06:55 -07:00
parent 41f008debd
commit 78c1f5b33c
4 changed files with 458 additions and 0 deletions
@@ -741,6 +741,80 @@ export class BrowserManager {
    return session;
  }

+  /** Get the underlying Page for a tab id. Returns null if the tab doesn't exist.
+   *  Used by the CDP bridge (cdp-bridge.ts) to mint per-tab CDPSessions. */
+  getPageForTab(tabId: number): Page | null {
+    return this.pages.get(tabId) ?? null;
+  }
+
+  // ─── Two-tier mutex (Codex T7) ─────────────────────────────
+  // Per-tab and global locks for the CDP bridge. tab-scoped methods take the
+  // per-tab mutex; browser-scoped methods take the global lock that blocks all
+  // tab mutexes. Hard timeout on acquire so silent deadlock can't happen.
+  // Every caller MUST use try { ... } finally { release() }.
+
+  private tabLocks: Map<number, Promise<void>> = new Map();
+  private globalCdpLockTail: Promise<void> = Promise.resolve();
+
+  /**
+   * Acquire the per-tab CDP lock with a timeout. Returns a release fn.
+   * Locks chain: each acquire waits on the prior tail's resolution.
+   * Browser-scoped global lock takes precedence: while the global lock is
+   * held, no tab lock can be acquired (and vice versa).
+   */
+  async acquireTabLock(tabId: number, timeoutMs: number): Promise<() => void> {
+    const existing = this.tabLocks.get(tabId) ?? Promise.resolve();
+    // Wait for any held global lock first (cross-tier serialization).
+    const tail = Promise.all([existing, this.globalCdpLockTail]).then(() => undefined);
+    let release!: () => void;
+    const next = new Promise<void>((resolve) => { release = resolve; });
+    this.tabLocks.set(tabId, tail.then(() => next));
+
+    const timeoutPromise = new Promise<never>((_, reject) =>
+      setTimeout(() => reject(new Error(
+        `CDPMutexAcquireTimeout: tab ${tabId} lock not acquired within ${timeoutMs}ms.\n` +
+        'Cause: a prior CDP or browser-scoped operation has held the lock too long.\n' +
+        'Action: retry; if this repeats, the prior operation may be hung — file a bug.'
+      )), timeoutMs),
+    );
+    try {
+      await Promise.race([tail, timeoutPromise]);
+    } catch (e) {
+      // Acquisition failed; release the slot we reserved so we don't deadlock the queue.
+      release();
+      throw e;
+    }
+    return release;
+  }
+
+  /**
+   * Acquire the global CDP lock. Blocks until all tab locks are released, and
+   * blocks new tab-lock acquisitions until released.
+   */
+  async acquireGlobalCdpLock(timeoutMs: number): Promise<() => void> {
+    const allTabTails = Array.from(this.tabLocks.values());
+    const priorGlobal = this.globalCdpLockTail;
+    const allPrior = Promise.all([priorGlobal, ...allTabTails]).then(() => undefined);
+    let release!: () => void;
+    const next = new Promise<void>((resolve) => { release = resolve; });
+    this.globalCdpLockTail = allPrior.then(() => next);
+
+    const timeoutPromise = new Promise<never>((_, reject) =>
+      setTimeout(() => reject(new Error(
+        `CDPMutexAcquireTimeout: global CDP lock not acquired within ${timeoutMs}ms.\n` +
+        'Cause: in-flight tab operations have not completed.\n' +
+        'Action: retry; if this repeats, file a bug — a tab op may be hung.'
+      )), timeoutMs),
+    );
+    try {
+      await Promise.race([allPrior, timeoutPromise]);
+    } catch (e) {
+      release();
+      throw e;
+    }
+    return release;
+  }
+
  // ─── Page Access (delegates to active session) ─────────────
  getPage(): Page {
    return this.getActiveSession().page;
@@ -0,0 +1,214 @@
+/**
+ * CDP method allow-list (T2: deny-default).
+ *
+ * Codex outside-voice T2: allow-default with a deny-list is backwards because
+ * Target.*, Browser.*, Runtime.evaluate, Page.addScriptToEvaluateOnNewDocument,
+ * Fetch.*, IO.read, etc. are all dangerous and easy to forget. Default-deny
+ * inverts the failure mode: missing a method means it's blocked (annoying),
+ * not exposed (silent compromise).
+ *
+ * Each entry has:
+ *   - domain.method     unique CDP identifier
+ *   - scope             "tab" | "browser" — controls T7 mutex tier
+ *   - output            "trusted" | "untrusted" — wraps result if "untrusted"
+ *   - justification     why this method is safe to allow
+ *
+ * Add entries via PR. CI lint (cdp-allowlist.test.ts) ensures every entry has all 4 fields.
+ */
+
+export type CdpScope = 'tab' | 'browser';
+export type CdpOutput = 'trusted' | 'untrusted';
+
+export interface CdpAllowEntry {
+  domain: string;
+  method: string;
+  scope: CdpScope;
+  output: CdpOutput;
+  justification: string;
+}
+
+export const CDP_ALLOWLIST: ReadonlyArray<CdpAllowEntry> = Object.freeze([
+  // ─── Accessibility (read-only) ─────────────────────────────
+  {
+    domain: 'Accessibility',
+    method: 'getFullAXTree',
+    scope: 'tab',
+    output: 'untrusted',
+    justification: 'Read-only AX tree extraction. Output is third-party page content; wrap in UNTRUSTED.',
+  },
+  {
+    domain: 'Accessibility',
+    method: 'getPartialAXTree',
+    scope: 'tab',
+    output: 'untrusted',
+    justification: 'Read-only AX tree subtree by node. Output is third-party page content.',
+  },
+  {
+    domain: 'Accessibility',
+    method: 'getRootAXNode',
+    scope: 'tab',
+    output: 'untrusted',
+    justification: 'Read-only root AX node accessor.',
+  },
+  // ─── DOM (read-only inspection) ────────────────────────────
+  {
+    domain: 'DOM',
+    method: 'describeNode',
+    scope: 'tab',
+    output: 'untrusted',
+    justification: 'Inspect a DOM node by backend ID; pure read.',
+  },
+  {
+    domain: 'DOM',
+    method: 'getBoxModel',
+    scope: 'tab',
+    output: 'trusted',
+    justification: 'Pure geometric data (box dimensions). No page content leaks; safe trusted.',
+  },
+  {
+    domain: 'DOM',
+    method: 'getNodeForLocation',
+    scope: 'tab',
+    output: 'trusted',
+    justification: 'Pure coordinate→nodeId mapping; no content leak.',
+  },
+  // ─── CSS (read-only) ───────────────────────────────────────
+  {
+    domain: 'CSS',
+    method: 'getMatchedStylesForNode',
+    scope: 'tab',
+    output: 'untrusted',
+    justification: 'Read computed cascade for a node; output may contain attacker-controlled selectors.',
+  },
+  {
+    domain: 'CSS',
+    method: 'getComputedStyleForNode',
+    scope: 'tab',
+    output: 'trusted',
+    justification: 'Computed style values are bounded (CSS keywords/numbers); safe trusted.',
+  },
+  {
+    domain: 'CSS',
+    method: 'getInlineStylesForNode',
+    scope: 'tab',
+    output: 'untrusted',
+    justification: 'Inline style content may contain attacker-controlled custom-property values.',
+  },
+  // ─── Performance metrics ───────────────────────────────────
+  {
+    domain: 'Performance',
+    method: 'getMetrics',
+    scope: 'tab',
+    output: 'trusted',
+    justification: 'Pure numeric metrics (timing, layout count); safe.',
+  },
+  {
+    domain: 'Performance',
+    method: 'enable',
+    scope: 'tab',
+    output: 'trusted',
+    justification: 'Domain enable; no content; required prerequisite for getMetrics.',
+  },
+  {
+    domain: 'Performance',
+    method: 'disable',
+    scope: 'tab',
+    output: 'trusted',
+    justification: 'Domain disable; no content.',
+  },
+  // ─── Tracing (event capture) ───────────────────────────────
+  // NOTE: Tracing.start can capture cross-tab data depending on categories.
+  // We mark it browser-scoped to acquire the global lock when in use.
+  {
+    domain: 'Tracing',
+    method: 'start',
+    scope: 'browser',
+    output: 'trusted',
+    justification: 'Trace category capture. Browser-scoped to serialize against other CDP ops.',
+  },
+  {
+    domain: 'Tracing',
+    method: 'end',
+    scope: 'browser',
+    output: 'untrusted',
+    justification: 'Trace dump may contain URLs and page data; wrap.',
+  },
+  // ─── Emulation (viewport/device) ───────────────────────────
+  {
+    domain: 'Emulation',
+    method: 'setDeviceMetricsOverride',
+    scope: 'tab',
+    output: 'trusted',
+    justification: 'Viewport/scale override on the active tab.',
+  },
+  {
+    domain: 'Emulation',
+    method: 'clearDeviceMetricsOverride',
+    scope: 'tab',
+    output: 'trusted',
+    justification: 'Clear viewport override.',
+  },
+  {
+    domain: 'Emulation',
+    method: 'setUserAgentOverride',
+    scope: 'tab',
+    output: 'trusted',
+    justification: 'UA override on the active tab. NOTE: changes affect future requests; fine for tests.',
+  },
+  // ─── Page capture (output, not navigation) ─────────────────
+  {
+    domain: 'Page',
+    method: 'captureScreenshot',
+    scope: 'tab',
+    output: 'untrusted',
+    justification: 'Screenshot bytes; output is bounded image data (no marker injection vector).',
+  },
+  {
+    domain: 'Page',
+    method: 'printToPDF',
+    scope: 'tab',
+    output: 'untrusted',
+    justification: 'PDF bytes; bounded binary output.',
+  },
+  // NOTE: Page.navigate is INTENTIONALLY NOT on the allowlist (Codex T2 cat 4).
+  // Use $B goto for navigation; that path goes through the URL blocklist.
+  // ─── Network metadata (NOT bodies/cookies — those exfil data) ──
+  {
+    domain: 'Network',
+    method: 'enable',
+    scope: 'tab',
+    output: 'trusted',
+    justification: 'Domain enable; required prerequisite. Does not return data.',
+  },
+  {
+    domain: 'Network',
+    method: 'disable',
+    scope: 'tab',
+    output: 'trusted',
+    justification: 'Domain disable.',
+  },
+  // NOTE: Network.getResponseBody, Network.getCookies, Network.replayXHR,
+  // Network.loadNetworkResource are INTENTIONALLY NOT allowed (Codex T2 cat 7).
+  // ─── Runtime (limited, NO evaluate/callFunctionOn) ──────────
+  // Runtime.evaluate/callFunctionOn/compileScript/runScript = RCE if exposed (Codex T2 cat 6).
+  // Only a tiny safe subset:
+  {
+    domain: 'Runtime',
+    method: 'getProperties',
+    scope: 'tab',
+    output: 'untrusted',
+    justification: 'Inspect properties of an existing remote object. Read-only; output may contain page data.',
+  },
+]);
+
+const CDP_ALLOWLIST_INDEX: Map<string, CdpAllowEntry> = new Map(
+  CDP_ALLOWLIST.map((e) => [`${e.domain}.${e.method}`, e]),
+);
+
+export function lookupCdpMethod(qualifiedName: string): CdpAllowEntry | null {
+  return CDP_ALLOWLIST_INDEX.get(qualifiedName) ?? null;
+}
+
+export function isCdpMethodAllowed(qualifiedName: string): boolean {
+  return CDP_ALLOWLIST_INDEX.has(qualifiedName);
+}
@@ -0,0 +1,106 @@
+/**
+ * CDP escape hatch — `$B cdp <Domain.method> [json-params]`.
+ *
+ * Path A from the spike: uses Playwright's newCDPSession() per page so we
+ * piggyback Playwright's own CDP socket (no second WebSocket, no need for
+ * --remote-debugging-port).
+ *
+ * Security posture (Codex T2):
+ *   - DENY-DEFAULT. Methods must be explicitly listed in cdp-allowlist.ts.
+ *   - Each entry is tagged scope (tab|browser) and output (trusted|untrusted).
+ *
+ * Concurrency posture (Codex T7):
+ *   - Two-tier lock from browser-manager.ts.
+ *   - tab-scoped methods take the per-tab mutex.
+ *   - browser-scoped methods take the global lock that blocks all tab mutexes.
+ *   - Hard 5s timeout on acquire → CDPMutexAcquireTimeout (no silent hangs).
+ *   - Every lock-holder uses try { ... } finally { release() } so errors don't leak locks.
+ */
+
+import type { Page } from 'playwright';
+import type { BrowserManager } from './browser-manager';
+import { lookupCdpMethod, type CdpAllowEntry } from './cdp-allowlist';
+
+const CDP_TIMEOUT_MS = 5000;
+const CDP_ACQUIRE_TIMEOUT_MS = 5000;
+
+// Per-page CDPSession cache. Created lazily on first allow-listed call,
+// cleaned up when the page closes.
+const sessionCache: WeakMap<Page, any> = new WeakMap();
+
+async function getCdpSession(page: Page): Promise<any> {
+  let s = sessionCache.get(page);
+  if (s) return s;
+  s = await page.context().newCDPSession(page);
+  sessionCache.set(page, s);
+  // Clear cache on detach so we don't hold a stale handle.
+  page.once('close', () => sessionCache.delete(page));
+  return s;
+}
+
+export interface CdpDispatchInput {
+  domain: string;
+  method: string;
+  params: Record<string, unknown>;
+  tabId: number;
+  bm: BrowserManager;
+}
+
+export interface CdpDispatchResult {
+  raw: unknown;
+  entry: CdpAllowEntry;
+}
+
+/**
+ * Look up + acquire mutex + send + release. Throws structured errors on:
+ *  - DENIED (method not on allowlist)
+ *  - CDPMutexAcquireTimeout (lock contention exceeded budget)
+ *  - CDPBridgeTimeout (CDP method itself didn't return in budget)
+ *  - CDPSessionInvalidated (Playwright recreated context, session stale)
+ */
+export async function dispatchCdpCall(input: CdpDispatchInput): Promise<CdpDispatchResult> {
+  const qualified = `${input.domain}.${input.method}`;
+  const entry = lookupCdpMethod(qualified);
+  if (!entry) {
+    throw new Error(
+      `DENIED: ${qualified} is not on the CDP allowlist.\n` +
+        `Cause: deny-default posture; method has not been audited and added to cdp-allowlist.ts.\n` +
+        `Action: if this method is genuinely needed, open a PR adding it to CDP_ALLOWLIST with a one-line justification + scope (tab|browser) + output (trusted|untrusted).`
+    );
+  }
+  // Acquire the right tier of lock.
+  const release =
+    entry.scope === 'browser'
+      ? await input.bm.acquireGlobalCdpLock(CDP_ACQUIRE_TIMEOUT_MS)
+      : await input.bm.acquireTabLock(input.tabId, CDP_ACQUIRE_TIMEOUT_MS);
+
+  try {
+    const page = input.bm.getPageForTab(input.tabId);
+    if (!page) {
+      throw new Error(
+        `Cannot dispatch: tab ${input.tabId} not found.\n` +
+          'Cause: tab was closed between command queue and dispatch.\n' +
+          'Action: $B tabs to list current tabs.'
+      );
+    }
+    let session;
+    try {
+      session = await getCdpSession(page);
+    } catch (e: any) {
+      throw new Error(
+        `CDPSessionInvalidated: ${e.message}\n` +
+          'Cause: Playwright context was recreated (e.g., viewport scale change) and the prior CDP session is stale.\n' +
+          'Action: retry the command; the bridge will create a fresh session.'
+      );
+    }
+    // Race the call against a hard timeout.
+    const callPromise = session.send(qualified, input.params);
+    const timeoutPromise = new Promise((_, reject) =>
+      setTimeout(() => reject(new Error(`CDPBridgeTimeout: ${qualified} did not return within ${CDP_TIMEOUT_MS}ms`)), CDP_TIMEOUT_MS),
+    );
+    const raw = await Promise.race([callPromise, timeoutPromise]);
+    return { raw, entry };
+  } finally {
+    release();
+  }
+}
@@ -0,0 +1,64 @@
+/**
+ * $B cdp <Domain.method> [json-params] — CLI surface for the CDP escape hatch.
+ *
+ * Output for trusted methods is a plain JSON pretty-print.
+ * Output for untrusted methods is wrapped with the centralized UNTRUSTED EXTERNAL
+ * CONTENT envelope so the sidebar-agent classifier sees it (matches the pattern
+ * used by other untrusted-content commands in commands.ts).
+ */
+
+import type { BrowserManager } from './browser-manager';
+import { dispatchCdpCall } from './cdp-bridge';
+import { wrapUntrustedContent } from './commands';
+
+function parseQualified(name: string): { domain: string; method: string } {
+  const idx = name.indexOf('.');
+  if (idx <= 0 || idx === name.length - 1) {
+    throw new Error(
+      `Usage: $B cdp <Domain.method> [json-params]\n` +
+        `Cause: '${name}' is not in Domain.method format.\n` +
+        'Action: e.g. $B cdp Accessibility.getFullAXTree {}'
+    );
+  }
+  return { domain: name.slice(0, idx), method: name.slice(idx + 1) };
+}
+
+export async function handleCdpCommand(args: string[], bm: BrowserManager): Promise<string> {
+  if (args.length === 0 || args[0] === 'help' || args[0] === '--help') {
+    return [
+      '$B cdp — raw CDP method dispatch (deny-default escape hatch)',
+      '',
+      'Usage: $B cdp <Domain.method> [json-params]',
+      '',
+      'Allowed methods are listed in browse/src/cdp-allowlist.ts. To add one,',
+      'open a PR with a one-line justification and the (scope, output) tags.',
+      'Examples:',
+      '  $B cdp Accessibility.getFullAXTree {}',
+      '  $B cdp Performance.getMetrics {}',
+      '  $B cdp DOM.describeNode \'{"backendNodeId":42,"depth":3}\'',
+    ].join('\n');
+  }
+  const qualified = args[0]!;
+  const { domain, method } = parseQualified(qualified);
+  // Optional second arg is JSON params; default to {}.
+  let params: Record<string, unknown> = {};
+  if (args[1]) {
+    try {
+      params = JSON.parse(args[1]) ?? {};
+    } catch (e: any) {
+      throw new Error(
+        `Cannot parse params as JSON: ${e.message}\n` +
+          `Cause: argument '${args[1]}' is not valid JSON.\n` +
+          'Action: pass a JSON object literal, e.g. \'{"backendNodeId":42}\'.'
+      );
+    }
+  }
+  // Dispatch via the bridge (allowlist + mutex + timeout + finally-release).
+  const tabId = bm.getActiveTabId();
+  const { raw, entry } = await dispatchCdpCall({ domain, method, params, tabId, bm });
+  const json = JSON.stringify(raw, null, 2);
+  if (entry.output === 'untrusted') {
+    return wrapUntrustedContent(json, `cdp:${qualified}`);
+  }
+  return json;
+}