Merge origin/main into garrytan/browserharness

Resolves three conflicts:

- VERSION: kept 1.19.0.0 (this branch's consolidated Phase 1 + 2a ship);
  main brought 1.17.0.0 (setup-gbrain wireup) on top of 1.16.0.0 (tunnel
  allowlist), this branch leaps past with a gap (CLAUDE.md allows version
  gaps).
- package.json: synced to 1.19.0.0 to match VERSION (HEAD had 1.16.0.0
  stale from the prior release commit; this fixes the drift).
- CHANGELOG.md: kept the v1.19.0.0 entry on top, then main's v1.17.0.0
  + v1.16.0.0 entries below.

browse/src/server.ts auto-merged: main's TUNNEL_COMMANDS expanded from
17→26 (export const) with the canDispatchOverTunnel pure gate is intact
alongside this branch's LOCAL_LISTEN_PORT addition for $B skill run.

Verification: 858/0 pass across browser-skill-write + browser-skill-commands
+ browser-skills-storage + skill-token + dual-listener + tunnel-gate-unit
+ skill-validation + gen-skill-docs (covers both this branch's Phase 2a
and main's tunnel allowlist work).

Regenerated all SKILL.md files (--host all) to absorb main's setup-gbrain
template changes.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Garry Tan
2026-04-28 01:47:26 -07:00
21 changed files with 1646 additions and 128 deletions
+43 -3
View File
@@ -116,13 +116,31 @@ const TUNNEL_PATHS = new Set<string>([
* extension-inspector state. This allowlist maps to the eng-review decision
* logged in the CEO plan for sec-wave v1.6.0.0.
*/
const TUNNEL_COMMANDS = new Set<string>([
export const TUNNEL_COMMANDS = new Set<string>([
// Original 17
'goto', 'click', 'text', 'screenshot',
'html', 'links', 'forms', 'accessibility',
'attrs', 'media', 'data',
'scroll', 'press', 'type', 'select', 'wait', 'eval',
// Tab + navigation primitives operator docs and CLI hints already promised
'newtab', 'tabs', 'back', 'forward', 'reload',
// Read/inspect/write operators paired agents need to be useful
'snapshot', 'fill', 'url', 'closetab',
]);
/**
* Pure gate: returns true iff the command is reachable over the tunnel surface.
* Extracted from the inline /command handler so the gate logic is unit-testable
* without standing up an HTTP listener. Behavior is identical to the inline
* check; the function canonicalizes the command (so aliases hit the same set)
* and returns false for null/undefined input.
*/
export function canDispatchOverTunnel(command: string | undefined | null): boolean {
if (typeof command !== 'string' || command.length === 0) return false;
const cmd = canonicalizeCommand(command);
return TUNNEL_COMMANDS.has(cmd);
}
/**
* Read ngrok authtoken from env var, ~/.gstack/ngrok.env, or ngrok's native
* config files. Returns null if nothing found. Shared between the
@@ -1782,8 +1800,7 @@ async function start() {
// Paired remote agents drive the browser but cannot configure the
// daemon, launch new browsers, import cookies, or rotate tokens.
if (surface === 'tunnel') {
const cmd = canonicalizeCommand(body?.command);
if (!cmd || !TUNNEL_COMMANDS.has(cmd)) {
if (!canDispatchOverTunnel(body?.command)) {
logTunnelDenial(req, url, `disallowed_command:${body?.command}`);
return new Response(JSON.stringify({
error: `Command '${body?.command}' is not allowed over the tunnel surface`,
@@ -2070,6 +2087,29 @@ async function start() {
tunnelListener = null;
}
}
} else if (process.env.BROWSE_TUNNEL_LOCAL_ONLY === '1') {
// Test-only: bind the dual-listener tunnel surface on 127.0.0.1 with NO
// ngrok forwarding. Lets paid evals exercise the surface==='tunnel' gate
// without an ngrok authtoken or live network. Production tunneling still
// requires BROWSE_TUNNEL=1 + a valid authtoken above.
try {
const boundTunnel = Bun.serve({
port: 0,
hostname: '127.0.0.1',
fetch: makeFetchHandler('tunnel'),
});
tunnelServer = boundTunnel;
tunnelActive = true;
const tunnelPort = boundTunnel.port;
console.log(`[browse] Tunnel listener bound (local-only test mode) on 127.0.0.1:${tunnelPort}`);
const stateContent = JSON.parse(fs.readFileSync(config.stateFile, 'utf-8'));
stateContent.tunnelLocalPort = tunnelPort;
const tmpState = config.stateFile + '.tmp';
fs.writeFileSync(tmpState, JSON.stringify(stateContent, null, 2), { mode: 0o600 });
fs.renameSync(tmpState, config.stateFile);
} catch (err: any) {
console.error(`[browse] BROWSE_TUNNEL_LOCAL_ONLY=1 listener bind failed: ${err.message}`);
}
}
}
+40 -11
View File
@@ -70,17 +70,37 @@ describe('Tunnel path allowlist', () => {
});
describe('Tunnel command allowlist', () => {
test('TUNNEL_COMMANDS is a closed set of browser-driving commands only', () => {
// The full closed set of commands reachable over the tunnel surface. Adding
// or removing a command here means changing the literal in server.ts AND
// updating this list — that double-edit is the point. A single-source
// "include the items in the source" assertion would silently widen the
// surface during a refactor that adds a command to server.ts without test
// review. The exact-set match catches it.
const EXPECTED_TUNNEL_COMMANDS = new Set([
// Original 17
'goto', 'click', 'text', 'screenshot',
'html', 'links', 'forms', 'accessibility',
'attrs', 'media', 'data',
'scroll', 'press', 'type', 'select', 'wait', 'eval',
// Tab + navigation primitives operator docs and CLI hints already promised
'newtab', 'tabs', 'back', 'forward', 'reload',
// Read/inspect/write operators paired agents need to be useful
'snapshot', 'fill', 'url', 'closetab',
]);
test('TUNNEL_COMMANDS literal matches the closed allowlist exactly (catches additions/removals without test update)', () => {
const cmds = extractSetContents(SERVER_SRC, 'TUNNEL_COMMANDS');
// Must include the core browser-driving commands
const required = [
'goto', 'click', 'text', 'screenshot', 'html', 'links',
'forms', 'accessibility', 'attrs', 'media', 'data',
'scroll', 'press', 'type', 'select', 'wait', 'eval',
];
for (const c of required) {
// Both directions: anything in the source must be expected, and anything
// expected must be in the source. The intersection-only style of the old
// must-include / must-exclude tests let new commands sneak into the source
// without a corresponding test update.
for (const c of cmds) {
expect(EXPECTED_TUNNEL_COMMANDS.has(c)).toBe(true);
}
for (const c of EXPECTED_TUNNEL_COMMANDS) {
expect(cmds.has(c)).toBe(true);
}
expect(cmds.size).toBe(EXPECTED_TUNNEL_COMMANDS.size);
});
test('TUNNEL_COMMANDS does NOT include daemon-configuration or bootstrap commands', () => {
@@ -89,12 +109,21 @@ describe('Tunnel command allowlist', () => {
'launch', 'launch-browser', 'connect', 'disconnect',
'restart', 'stop', 'tunnel-start', 'tunnel-stop',
'token-mint', 'token-revoke', 'cookie-picker', 'cookie-import',
'inspector-pick',
'inspector-pick', 'pair', 'unpair', 'cookies', 'setup',
];
for (const c of forbidden) {
expect(cmds.has(c)).toBe(false);
}
});
test('newtab ownership exemption preserved (catches refactors that re-introduce the catch-22)', () => {
// The /command handler must skip the per-tab ownership check when the
// command is `newtab`, otherwise paired agents have no way to create their
// own tab — every other write command requires an owned tab, and you can't
// own a tab you haven't created. The string `command !== 'newtab'` is the
// contract that breaks the catch-22.
expect(SERVER_SRC).toMatch(/command\s*!==\s*['"]newtab['"]/);
});
});
describe('Request handler factory', () => {
@@ -176,14 +205,14 @@ describe('GET /connect alive probe', () => {
});
describe('/command tunnel command allowlist', () => {
test('/command handler checks TUNNEL_COMMANDS when surface is tunnel', () => {
test('/command handler delegates to canDispatchOverTunnel when surface is tunnel', () => {
const commandBlock = sliceBetween(
SERVER_SRC,
"url.pathname === '/command' && req.method === 'POST'",
'return handleCommand(body, tokenInfo)'
);
expect(commandBlock).toContain("surface === 'tunnel'");
expect(commandBlock).toContain('TUNNEL_COMMANDS.has');
expect(commandBlock).toContain('canDispatchOverTunnel(body?.command)');
expect(commandBlock).toContain('disallowed_command');
expect(commandBlock).toContain('is not allowed over the tunnel surface');
expect(commandBlock).toContain('status: 403');
+215
View File
@@ -0,0 +1,215 @@
/**
* Tunnel-surface behavioral eval for the pair-agent flow.
*
* Spawns the daemon under `BROWSE_HEADLESS_SKIP=1 BROWSE_TUNNEL_LOCAL_ONLY=1`
* so BOTH listeners come up: the local listener on `port` and the tunnel
* listener on `tunnelLocalPort`. No ngrok, no live network — the surface tag
* (`local` vs `tunnel`) is set by which listener received the request, which
* is testable as long as both bind locally.
*
* This file is the only place that exercises the tunnel-surface gate
* end-to-end. The source-level guards in `dual-listener.test.ts` catch
* literal/exemption regressions, the unit test in `tunnel-gate-unit.test.ts`
* catches gate-logic regressions, and this file catches routing-or-listener
* regressions (e.g. someone accidentally swaps `'local'` and `'tunnel'` at
* the makeFetchHandler call site).
*
* The browser dispatch path under BROWSE_HEADLESS_SKIP=1 surfaces an error
* because there is no Playwright context, so the assertion target is
* specifically that the GATE was passed (i.e. the response is NOT a 403 with
* `disallowed_command:<x>`), not that the dispatch succeeded.
*/
import { describe, test, expect, beforeAll, afterAll } from 'bun:test';
import * as fs from 'fs';
import * as os from 'os';
import * as path from 'path';
const ROOT = path.resolve(import.meta.dir, '../..');
const SERVER_ENTRY = path.join(ROOT, 'browse/src/server.ts');
interface DaemonHandle {
proc: ReturnType<typeof Bun.spawn>;
localPort: number;
tunnelPort: number;
rootToken: string;
scopedToken: string;
stateFile: string;
tempDir: string;
localUrl: string;
tunnelUrl: string;
attemptsLogPath: string;
}
async function waitForReady(baseUrl: string, timeoutMs = 20_000): Promise<void> {
const deadline = Date.now() + timeoutMs;
while (Date.now() < deadline) {
try {
const resp = await fetch(`${baseUrl}/health`, {
signal: AbortSignal.timeout(1000),
});
if (resp.ok) return;
} catch {
// not ready yet
}
await new Promise(r => setTimeout(r, 200));
}
throw new Error(`Daemon did not become ready within ${timeoutMs}ms at ${baseUrl}`);
}
async function waitForTunnelPort(stateFile: string, timeoutMs = 20_000): Promise<number> {
const deadline = Date.now() + timeoutMs;
while (Date.now() < deadline) {
try {
const state = JSON.parse(fs.readFileSync(stateFile, 'utf-8'));
if (typeof state.tunnelLocalPort === 'number') return state.tunnelLocalPort;
} catch {
// state file not written yet
}
await new Promise(r => setTimeout(r, 200));
}
throw new Error(`Tunnel local port did not appear in ${stateFile} within ${timeoutMs}ms`);
}
async function spawnDaemonWithTunnel(): Promise<DaemonHandle> {
// Isolate this test's analytics + denial log directory so we can assert on a
// fresh attempts.jsonl without colliding with the user's real ~/.gstack.
const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'pair-agent-tunnel-eval-'));
const stateFile = path.join(tempDir, 'browse.json');
const fakeHome = path.join(tempDir, 'home');
fs.mkdirSync(fakeHome, { recursive: true });
const localPort = 30000 + Math.floor(Math.random() * 30000);
const attemptsLogPath = path.join(fakeHome, '.gstack', 'security', 'attempts.jsonl');
const proc = Bun.spawn(['bun', 'run', SERVER_ENTRY], {
cwd: ROOT,
env: {
...process.env,
HOME: fakeHome,
BROWSE_HEADLESS_SKIP: '1',
BROWSE_TUNNEL_LOCAL_ONLY: '1',
BROWSE_PORT: String(localPort),
BROWSE_STATE_FILE: stateFile,
BROWSE_PARENT_PID: '0',
BROWSE_IDLE_TIMEOUT: '600000',
},
stdio: ['ignore', 'pipe', 'pipe'],
});
const localUrl = `http://127.0.0.1:${localPort}`;
await waitForReady(localUrl);
const tunnelPort = await waitForTunnelPort(stateFile);
const tunnelUrl = `http://127.0.0.1:${tunnelPort}`;
// Read the root token, then exchange it for a scoped token via /pair → /connect.
const state = JSON.parse(fs.readFileSync(stateFile, 'utf-8'));
const rootToken = state.token;
const pairResp = await fetch(`${localUrl}/pair`, {
method: 'POST',
headers: { 'Content-Type': 'application/json', Authorization: `Bearer ${rootToken}` },
body: JSON.stringify({ clientId: 'tunnel-eval' }),
});
if (!pairResp.ok) throw new Error(`/pair failed: ${pairResp.status}`);
const { setup_key } = await pairResp.json() as any;
const connectResp = await fetch(`${localUrl}/connect`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ setup_key }),
});
if (!connectResp.ok) throw new Error(`/connect failed: ${connectResp.status}`);
const { token: scopedToken } = await connectResp.json() as any;
return { proc, localPort, tunnelPort, rootToken, scopedToken, stateFile, tempDir, localUrl, tunnelUrl, attemptsLogPath };
}
function killDaemon(handle: DaemonHandle): void {
try { handle.proc.kill('SIGKILL'); } catch {}
try { fs.rmSync(handle.tempDir, { recursive: true, force: true }); } catch {}
}
async function postCommand(baseUrl: string, token: string, body: any): Promise<{ status: number; bodyText: string }> {
const resp = await fetch(`${baseUrl}/command`, {
method: 'POST',
headers: { 'Content-Type': 'application/json', Authorization: `Bearer ${token}` },
body: JSON.stringify(body),
});
return { status: resp.status, bodyText: await resp.text() };
}
describe('pair-agent over tunnel surface — gate fires on the right surface only', () => {
let daemon: DaemonHandle;
beforeAll(async () => {
daemon = await spawnDaemonWithTunnel();
}, 30_000);
afterAll(() => {
if (daemon) killDaemon(daemon);
});
test('newtab on tunnel surface passes the allowlist gate (not 403 disallowed_command)', async () => {
const { status, bodyText } = await postCommand(daemon.tunnelUrl, daemon.scopedToken, { command: 'newtab' });
// Browser dispatch under BROWSE_HEADLESS_SKIP=1 will fail differently
// (no Playwright context), but the gate must NOT 403 with
// disallowed_command.
if (status === 403) {
expect(bodyText).not.toContain('disallowed_command:newtab');
expect(bodyText).not.toContain('is not allowed over the tunnel surface');
}
});
test('pair on tunnel surface 403s with disallowed_command and writes a denial-log entry', async () => {
// Snapshot attempts.jsonl size before the call so we can detect the new entry.
let beforeBytes = 0;
try { beforeBytes = fs.statSync(daemon.attemptsLogPath).size; } catch {}
const { status, bodyText } = await postCommand(daemon.tunnelUrl, daemon.scopedToken, { command: 'pair' });
expect(status).toBe(403);
expect(bodyText).toContain('is not allowed over the tunnel surface');
// Wait briefly for the denial-log writer (it's synchronous fs.appendFile in
// tunnel-denial-log.ts but the OS may need a tick to flush).
await new Promise(r => setTimeout(r, 250));
expect(fs.existsSync(daemon.attemptsLogPath)).toBe(true);
const after = fs.readFileSync(daemon.attemptsLogPath, 'utf-8');
const newSection = after.slice(beforeBytes);
expect(newSection).toContain('disallowed_command:pair');
});
test('pair on local surface does NOT trigger the tunnel allowlist gate', async () => {
// The same scoped token over the LOCAL listener must not see the
// disallowed_command path — the tunnel gate is surface-scoped.
const { status, bodyText } = await postCommand(daemon.localUrl, daemon.scopedToken, { command: 'pair' });
// Whatever happens (404 unknown command, 403 from a token-scope check, or
// 200 if the local handler accepts it) the response must NOT come from the
// tunnel allowlist gate.
expect(bodyText).not.toContain('disallowed_command:pair');
expect(bodyText).not.toContain('is not allowed over the tunnel surface');
expect([200, 400, 403, 404, 500]).toContain(status);
});
test('catch-22 regression: newtab + goto on the just-created tab passes ownership check', async () => {
// Without the `command !== 'newtab'` exemption at server.ts:613, scoped
// agents can't open a tab (newtab fails ownership) and can't goto an
// existing tab (also fails ownership). This proves the exemption holds:
// newtab succeeds the gate AND the ownership check, then the agent can
// hand off the tabId to a follow-up command without hitting the
// "Tab not owned by your agent" error.
const newtabResp = await postCommand(daemon.tunnelUrl, daemon.scopedToken, { command: 'newtab' });
if (newtabResp.status === 403) {
expect(newtabResp.bodyText).not.toContain('disallowed_command');
expect(newtabResp.bodyText).not.toContain('Tab not owned by your agent');
}
// Even if the headless-skip dispatch fails before returning a tabId, a
// follow-up `goto` over the tunnel surface must not 403 with
// `disallowed_command:goto`. We are NOT asserting that the goto
// succeeds — only that the allowlist + ownership exemption don't reject
// it as a class.
const gotoResp = await postCommand(daemon.tunnelUrl, daemon.scopedToken, { command: 'goto', args: ['http://127.0.0.1:1/'] });
expect(gotoResp.bodyText).not.toContain('disallowed_command:goto');
expect(gotoResp.bodyText).not.toContain('is not allowed over the tunnel surface');
});
});
+97
View File
@@ -0,0 +1,97 @@
/**
* Unit-test the pure tunnel-gate function extracted from the /command handler.
*
* The gate decides whether a paired remote agent's request to `/command` over
* the tunnel surface is allowed (returns true) or 403'd (returns false). Pure,
* synchronous, no HTTP — testable without standing up a Bun.serve listener.
*
* The behavioral coverage of the gate firing on the right surface (and only
* the right surface) lives in `pair-agent-tunnel-eval.test.ts` (paid eval,
* gate-tier).
*/
import { describe, test, expect } from 'bun:test';
import { canDispatchOverTunnel, TUNNEL_COMMANDS } from '../src/server';
describe('canDispatchOverTunnel — closed allowlist', () => {
test('every command in TUNNEL_COMMANDS dispatches over tunnel', () => {
for (const cmd of TUNNEL_COMMANDS) {
expect(canDispatchOverTunnel(cmd)).toBe(true);
}
});
test('TUNNEL_COMMANDS contains the 26-command closed set', () => {
// Mirror the source-level guard in dual-listener.test.ts. If this ever
// disagrees with the literal in server.ts, one of them is wrong.
const expected = new Set([
'goto', 'click', 'text', 'screenshot',
'html', 'links', 'forms', 'accessibility',
'attrs', 'media', 'data',
'scroll', 'press', 'type', 'select', 'wait', 'eval',
'newtab', 'tabs', 'back', 'forward', 'reload',
'snapshot', 'fill', 'url', 'closetab',
]);
expect(TUNNEL_COMMANDS.size).toBe(expected.size);
for (const c of expected) expect(TUNNEL_COMMANDS.has(c)).toBe(true);
for (const c of TUNNEL_COMMANDS) expect(expected.has(c)).toBe(true);
});
});
describe('canDispatchOverTunnel — daemon-config + bootstrap commands rejected', () => {
const blocked = [
'pair', 'unpair', 'cookies', 'setup',
'launch', 'launch-browser', 'connect', 'disconnect',
'restart', 'stop', 'tunnel-start', 'tunnel-stop',
'token-mint', 'token-revoke', 'cookie-picker', 'cookie-import',
'inspector-pick', 'extension-inspect',
'invalid-command-xyz', 'totally-made-up',
];
for (const cmd of blocked) {
test(`rejects '${cmd}'`, () => {
expect(canDispatchOverTunnel(cmd)).toBe(false);
});
}
});
describe('canDispatchOverTunnel — null/undefined/empty input', () => {
test('returns false for empty string', () => {
expect(canDispatchOverTunnel('')).toBe(false);
});
test('returns false for undefined', () => {
expect(canDispatchOverTunnel(undefined)).toBe(false);
});
test('returns false for null', () => {
expect(canDispatchOverTunnel(null)).toBe(false);
});
test('returns false for non-string input (defensive)', () => {
// The body parser may hand the gate a number or object if a malicious
// client sends `{"command": 42}`. The pure gate must treat anything
// non-string as not-allowed rather than throw.
expect(canDispatchOverTunnel(42 as unknown as string)).toBe(false);
expect(canDispatchOverTunnel({} as unknown as string)).toBe(false);
});
});
describe('canDispatchOverTunnel — alias canonicalization', () => {
// canonicalizeCommand resolves aliases (e.g. 'set-content' → 'load-html').
// Any aliased form of an allowlisted canonical command should also pass the
// gate; aliases that resolve to a non-allowlisted canonical command should
// not. We don't hardcode alias names here — we read from the source registry
// by importing what we need from commands.ts.
test('aliases that resolve to allowlisted commands pass the gate', () => {
// 'set-content' canonicalizes to 'load-html'. 'load-html' is NOT in
// TUNNEL_COMMANDS, so 'set-content' must also be rejected. This guards
// against a future alias that accidentally maps a tunnel-allowed name to
// a non-tunnel-allowed canonical (e.g. 'goto' → 'navigate' would break).
expect(canDispatchOverTunnel('set-content')).toBe(false);
});
test('canonical commands pass directly without alias lookup', () => {
expect(canDispatchOverTunnel('goto')).toBe(true);
expect(canDispatchOverTunnel('newtab')).toBe(true);
expect(canDispatchOverTunnel('closetab')).toBe(true);
});
});