feat(browse): $B cdp escape hatch — deny-default allowlist + two-tier mutex

Codex T2: flip CDP posture to deny-default. Allowed methods enumerated in
cdp-allowlist.ts with (scope: tab|browser, output: trusted|untrusted,
justification) per entry.

Initial allowlist (~25 methods) covers:
- Accessibility tree extraction (read-only)
- DOM/CSS inspection (read-only)
- Performance metrics
- Tracing
- Emulation viewport/UA override
- Page screenshot/PDF capture (output is binary, no marker injection vector)
- Network.enable/disable (no bodies/cookies — those are exfil surfaces)
- Runtime.getProperties (NO evaluate/callFunctionOn — those would be RCE)

Page.navigate is INTENTIONALLY NOT allowed; agents use $B goto which
goes through the URL blocklist.

Codex T7: two-tier mutex. tab-scoped methods take per-tab lock; browser-
scoped take global lock that blocks all tab locks. 5s acquire timeout
yields CDPMutexAcquireTimeout (no silent hangs). All lock acquires use
try/finally so errors don't leak the lock.

Path A from spike: uses Playwright's newCDPSession() per page. No second
WebSocket, no need for --remote-debugging-port. CDPSession is cached
per page in a WeakMap and cleared on page close.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Garry Tan
2026-04-25 13:06:55 -07:00
parent 41f008debd
commit 78c1f5b33c
4 changed files with 458 additions and 0 deletions
+74
View File
@@ -741,6 +741,80 @@ export class BrowserManager {
return session;
}
/** Get the underlying Page for a tab id. Returns null if the tab doesn't exist.
* Used by the CDP bridge (cdp-bridge.ts) to mint per-tab CDPSessions. */
getPageForTab(tabId: number): Page | null {
return this.pages.get(tabId) ?? null;
}
// ─── Two-tier mutex (Codex T7) ─────────────────────────────
// Per-tab and global locks for the CDP bridge. tab-scoped methods take the
// per-tab mutex; browser-scoped methods take the global lock that blocks all
// tab mutexes. Hard timeout on acquire so silent deadlock can't happen.
// Every caller MUST use try { ... } finally { release() }.
private tabLocks: Map<number, Promise<void>> = new Map();
private globalCdpLockTail: Promise<void> = Promise.resolve();
/**
* Acquire the per-tab CDP lock with a timeout. Returns a release fn.
* Locks chain: each acquire waits on the prior tail's resolution.
* Browser-scoped global lock takes precedence: while the global lock is
* held, no tab lock can be acquired (and vice versa).
*/
async acquireTabLock(tabId: number, timeoutMs: number): Promise<() => void> {
const existing = this.tabLocks.get(tabId) ?? Promise.resolve();
// Wait for any held global lock first (cross-tier serialization).
const tail = Promise.all([existing, this.globalCdpLockTail]).then(() => undefined);
let release!: () => void;
const next = new Promise<void>((resolve) => { release = resolve; });
this.tabLocks.set(tabId, tail.then(() => next));
const timeoutPromise = new Promise<never>((_, reject) =>
setTimeout(() => reject(new Error(
`CDPMutexAcquireTimeout: tab ${tabId} lock not acquired within ${timeoutMs}ms.\n` +
'Cause: a prior CDP or browser-scoped operation has held the lock too long.\n' +
'Action: retry; if this repeats, the prior operation may be hung — file a bug.'
)), timeoutMs),
);
try {
await Promise.race([tail, timeoutPromise]);
} catch (e) {
// Acquisition failed; release the slot we reserved so we don't deadlock the queue.
release();
throw e;
}
return release;
}
/**
* Acquire the global CDP lock. Blocks until all tab locks are released, and
* blocks new tab-lock acquisitions until released.
*/
async acquireGlobalCdpLock(timeoutMs: number): Promise<() => void> {
const allTabTails = Array.from(this.tabLocks.values());
const priorGlobal = this.globalCdpLockTail;
const allPrior = Promise.all([priorGlobal, ...allTabTails]).then(() => undefined);
let release!: () => void;
const next = new Promise<void>((resolve) => { release = resolve; });
this.globalCdpLockTail = allPrior.then(() => next);
const timeoutPromise = new Promise<never>((_, reject) =>
setTimeout(() => reject(new Error(
`CDPMutexAcquireTimeout: global CDP lock not acquired within ${timeoutMs}ms.\n` +
'Cause: in-flight tab operations have not completed.\n' +
'Action: retry; if this repeats, file a bug — a tab op may be hung.'
)), timeoutMs),
);
try {
await Promise.race([allPrior, timeoutPromise]);
} catch (e) {
release();
throw e;
}
return release;
}
// ─── Page Access (delegates to active session) ─────────────
getPage(): Page {
return this.getActiveSession().page;
+214
View File
@@ -0,0 +1,214 @@
/**
* CDP method allow-list (T2: deny-default).
*
* Codex outside-voice T2: allow-default with a deny-list is backwards because
* Target.*, Browser.*, Runtime.evaluate, Page.addScriptToEvaluateOnNewDocument,
* Fetch.*, IO.read, etc. are all dangerous and easy to forget. Default-deny
* inverts the failure mode: missing a method means it's blocked (annoying),
* not exposed (silent compromise).
*
* Each entry has:
* - domain.method unique CDP identifier
* - scope "tab" | "browser" — controls T7 mutex tier
* - output "trusted" | "untrusted" — wraps result if "untrusted"
* - justification why this method is safe to allow
*
* Add entries via PR. CI lint (cdp-allowlist.test.ts) ensures every entry has all 4 fields.
*/
export type CdpScope = 'tab' | 'browser';
export type CdpOutput = 'trusted' | 'untrusted';
export interface CdpAllowEntry {
domain: string;
method: string;
scope: CdpScope;
output: CdpOutput;
justification: string;
}
export const CDP_ALLOWLIST: ReadonlyArray<CdpAllowEntry> = Object.freeze([
// ─── Accessibility (read-only) ─────────────────────────────
{
domain: 'Accessibility',
method: 'getFullAXTree',
scope: 'tab',
output: 'untrusted',
justification: 'Read-only AX tree extraction. Output is third-party page content; wrap in UNTRUSTED.',
},
{
domain: 'Accessibility',
method: 'getPartialAXTree',
scope: 'tab',
output: 'untrusted',
justification: 'Read-only AX tree subtree by node. Output is third-party page content.',
},
{
domain: 'Accessibility',
method: 'getRootAXNode',
scope: 'tab',
output: 'untrusted',
justification: 'Read-only root AX node accessor.',
},
// ─── DOM (read-only inspection) ────────────────────────────
{
domain: 'DOM',
method: 'describeNode',
scope: 'tab',
output: 'untrusted',
justification: 'Inspect a DOM node by backend ID; pure read.',
},
{
domain: 'DOM',
method: 'getBoxModel',
scope: 'tab',
output: 'trusted',
justification: 'Pure geometric data (box dimensions). No page content leaks; safe trusted.',
},
{
domain: 'DOM',
method: 'getNodeForLocation',
scope: 'tab',
output: 'trusted',
justification: 'Pure coordinate→nodeId mapping; no content leak.',
},
// ─── CSS (read-only) ───────────────────────────────────────
{
domain: 'CSS',
method: 'getMatchedStylesForNode',
scope: 'tab',
output: 'untrusted',
justification: 'Read computed cascade for a node; output may contain attacker-controlled selectors.',
},
{
domain: 'CSS',
method: 'getComputedStyleForNode',
scope: 'tab',
output: 'trusted',
justification: 'Computed style values are bounded (CSS keywords/numbers); safe trusted.',
},
{
domain: 'CSS',
method: 'getInlineStylesForNode',
scope: 'tab',
output: 'untrusted',
justification: 'Inline style content may contain attacker-controlled custom-property values.',
},
// ─── Performance metrics ───────────────────────────────────
{
domain: 'Performance',
method: 'getMetrics',
scope: 'tab',
output: 'trusted',
justification: 'Pure numeric metrics (timing, layout count); safe.',
},
{
domain: 'Performance',
method: 'enable',
scope: 'tab',
output: 'trusted',
justification: 'Domain enable; no content; required prerequisite for getMetrics.',
},
{
domain: 'Performance',
method: 'disable',
scope: 'tab',
output: 'trusted',
justification: 'Domain disable; no content.',
},
// ─── Tracing (event capture) ───────────────────────────────
// NOTE: Tracing.start can capture cross-tab data depending on categories.
// We mark it browser-scoped to acquire the global lock when in use.
{
domain: 'Tracing',
method: 'start',
scope: 'browser',
output: 'trusted',
justification: 'Trace category capture. Browser-scoped to serialize against other CDP ops.',
},
{
domain: 'Tracing',
method: 'end',
scope: 'browser',
output: 'untrusted',
justification: 'Trace dump may contain URLs and page data; wrap.',
},
// ─── Emulation (viewport/device) ───────────────────────────
{
domain: 'Emulation',
method: 'setDeviceMetricsOverride',
scope: 'tab',
output: 'trusted',
justification: 'Viewport/scale override on the active tab.',
},
{
domain: 'Emulation',
method: 'clearDeviceMetricsOverride',
scope: 'tab',
output: 'trusted',
justification: 'Clear viewport override.',
},
{
domain: 'Emulation',
method: 'setUserAgentOverride',
scope: 'tab',
output: 'trusted',
justification: 'UA override on the active tab. NOTE: changes affect future requests; fine for tests.',
},
// ─── Page capture (output, not navigation) ─────────────────
{
domain: 'Page',
method: 'captureScreenshot',
scope: 'tab',
output: 'untrusted',
justification: 'Screenshot bytes; output is bounded image data (no marker injection vector).',
},
{
domain: 'Page',
method: 'printToPDF',
scope: 'tab',
output: 'untrusted',
justification: 'PDF bytes; bounded binary output.',
},
// NOTE: Page.navigate is INTENTIONALLY NOT on the allowlist (Codex T2 cat 4).
// Use $B goto for navigation; that path goes through the URL blocklist.
// ─── Network metadata (NOT bodies/cookies — those exfil data) ──
{
domain: 'Network',
method: 'enable',
scope: 'tab',
output: 'trusted',
justification: 'Domain enable; required prerequisite. Does not return data.',
},
{
domain: 'Network',
method: 'disable',
scope: 'tab',
output: 'trusted',
justification: 'Domain disable.',
},
// NOTE: Network.getResponseBody, Network.getCookies, Network.replayXHR,
// Network.loadNetworkResource are INTENTIONALLY NOT allowed (Codex T2 cat 7).
// ─── Runtime (limited, NO evaluate/callFunctionOn) ──────────
// Runtime.evaluate/callFunctionOn/compileScript/runScript = RCE if exposed (Codex T2 cat 6).
// Only a tiny safe subset:
{
domain: 'Runtime',
method: 'getProperties',
scope: 'tab',
output: 'untrusted',
justification: 'Inspect properties of an existing remote object. Read-only; output may contain page data.',
},
]);
const CDP_ALLOWLIST_INDEX: Map<string, CdpAllowEntry> = new Map(
CDP_ALLOWLIST.map((e) => [`${e.domain}.${e.method}`, e]),
);
export function lookupCdpMethod(qualifiedName: string): CdpAllowEntry | null {
return CDP_ALLOWLIST_INDEX.get(qualifiedName) ?? null;
}
export function isCdpMethodAllowed(qualifiedName: string): boolean {
return CDP_ALLOWLIST_INDEX.has(qualifiedName);
}
+106
View File
@@ -0,0 +1,106 @@
/**
* CDP escape hatch — `$B cdp <Domain.method> [json-params]`.
*
* Path A from the spike: uses Playwright's newCDPSession() per page so we
* piggyback Playwright's own CDP socket (no second WebSocket, no need for
* --remote-debugging-port).
*
* Security posture (Codex T2):
* - DENY-DEFAULT. Methods must be explicitly listed in cdp-allowlist.ts.
* - Each entry is tagged scope (tab|browser) and output (trusted|untrusted).
*
* Concurrency posture (Codex T7):
* - Two-tier lock from browser-manager.ts.
* - tab-scoped methods take the per-tab mutex.
* - browser-scoped methods take the global lock that blocks all tab mutexes.
* - Hard 5s timeout on acquire → CDPMutexAcquireTimeout (no silent hangs).
* - Every lock-holder uses try { ... } finally { release() } so errors don't leak locks.
*/
import type { Page } from 'playwright';
import type { BrowserManager } from './browser-manager';
import { lookupCdpMethod, type CdpAllowEntry } from './cdp-allowlist';
const CDP_TIMEOUT_MS = 5000;
const CDP_ACQUIRE_TIMEOUT_MS = 5000;
// Per-page CDPSession cache. Created lazily on first allow-listed call,
// cleaned up when the page closes.
const sessionCache: WeakMap<Page, any> = new WeakMap();
async function getCdpSession(page: Page): Promise<any> {
let s = sessionCache.get(page);
if (s) return s;
s = await page.context().newCDPSession(page);
sessionCache.set(page, s);
// Clear cache on detach so we don't hold a stale handle.
page.once('close', () => sessionCache.delete(page));
return s;
}
export interface CdpDispatchInput {
domain: string;
method: string;
params: Record<string, unknown>;
tabId: number;
bm: BrowserManager;
}
export interface CdpDispatchResult {
raw: unknown;
entry: CdpAllowEntry;
}
/**
* Look up + acquire mutex + send + release. Throws structured errors on:
* - DENIED (method not on allowlist)
* - CDPMutexAcquireTimeout (lock contention exceeded budget)
* - CDPBridgeTimeout (CDP method itself didn't return in budget)
* - CDPSessionInvalidated (Playwright recreated context, session stale)
*/
export async function dispatchCdpCall(input: CdpDispatchInput): Promise<CdpDispatchResult> {
const qualified = `${input.domain}.${input.method}`;
const entry = lookupCdpMethod(qualified);
if (!entry) {
throw new Error(
`DENIED: ${qualified} is not on the CDP allowlist.\n` +
`Cause: deny-default posture; method has not been audited and added to cdp-allowlist.ts.\n` +
`Action: if this method is genuinely needed, open a PR adding it to CDP_ALLOWLIST with a one-line justification + scope (tab|browser) + output (trusted|untrusted).`
);
}
// Acquire the right tier of lock.
const release =
entry.scope === 'browser'
? await input.bm.acquireGlobalCdpLock(CDP_ACQUIRE_TIMEOUT_MS)
: await input.bm.acquireTabLock(input.tabId, CDP_ACQUIRE_TIMEOUT_MS);
try {
const page = input.bm.getPageForTab(input.tabId);
if (!page) {
throw new Error(
`Cannot dispatch: tab ${input.tabId} not found.\n` +
'Cause: tab was closed between command queue and dispatch.\n' +
'Action: $B tabs to list current tabs.'
);
}
let session;
try {
session = await getCdpSession(page);
} catch (e: any) {
throw new Error(
`CDPSessionInvalidated: ${e.message}\n` +
'Cause: Playwright context was recreated (e.g., viewport scale change) and the prior CDP session is stale.\n' +
'Action: retry the command; the bridge will create a fresh session.'
);
}
// Race the call against a hard timeout.
const callPromise = session.send(qualified, input.params);
const timeoutPromise = new Promise((_, reject) =>
setTimeout(() => reject(new Error(`CDPBridgeTimeout: ${qualified} did not return within ${CDP_TIMEOUT_MS}ms`)), CDP_TIMEOUT_MS),
);
const raw = await Promise.race([callPromise, timeoutPromise]);
return { raw, entry };
} finally {
release();
}
}
+64
View File
@@ -0,0 +1,64 @@
/**
* $B cdp <Domain.method> [json-params] — CLI surface for the CDP escape hatch.
*
* Output for trusted methods is a plain JSON pretty-print.
* Output for untrusted methods is wrapped with the centralized UNTRUSTED EXTERNAL
* CONTENT envelope so the sidebar-agent classifier sees it (matches the pattern
* used by other untrusted-content commands in commands.ts).
*/
import type { BrowserManager } from './browser-manager';
import { dispatchCdpCall } from './cdp-bridge';
import { wrapUntrustedContent } from './commands';
function parseQualified(name: string): { domain: string; method: string } {
const idx = name.indexOf('.');
if (idx <= 0 || idx === name.length - 1) {
throw new Error(
`Usage: $B cdp <Domain.method> [json-params]\n` +
`Cause: '${name}' is not in Domain.method format.\n` +
'Action: e.g. $B cdp Accessibility.getFullAXTree {}'
);
}
return { domain: name.slice(0, idx), method: name.slice(idx + 1) };
}
export async function handleCdpCommand(args: string[], bm: BrowserManager): Promise<string> {
if (args.length === 0 || args[0] === 'help' || args[0] === '--help') {
return [
'$B cdp — raw CDP method dispatch (deny-default escape hatch)',
'',
'Usage: $B cdp <Domain.method> [json-params]',
'',
'Allowed methods are listed in browse/src/cdp-allowlist.ts. To add one,',
'open a PR with a one-line justification and the (scope, output) tags.',
'Examples:',
' $B cdp Accessibility.getFullAXTree {}',
' $B cdp Performance.getMetrics {}',
' $B cdp DOM.describeNode \'{"backendNodeId":42,"depth":3}\'',
].join('\n');
}
const qualified = args[0]!;
const { domain, method } = parseQualified(qualified);
// Optional second arg is JSON params; default to {}.
let params: Record<string, unknown> = {};
if (args[1]) {
try {
params = JSON.parse(args[1]) ?? {};
} catch (e: any) {
throw new Error(
`Cannot parse params as JSON: ${e.message}\n` +
`Cause: argument '${args[1]}' is not valid JSON.\n` +
'Action: pass a JSON object literal, e.g. \'{"backendNodeId":42}\'.'
);
}
}
// Dispatch via the bridge (allowlist + mutex + timeout + finally-release).
const tabId = bm.getActiveTabId();
const { raw, entry } = await dispatchCdpCall({ domain, method, params, tabId, bm });
const json = JSON.stringify(raw, null, 2);
if (entry.output === 'untrusted') {
return wrapUntrustedContent(json, `cdp:${qualified}`);
}
return json;
}