From c0dff846474de46a926dac51f4278d5421023c33 Mon Sep 17 00:00:00 2001 From: Garry Tan Date: Sun, 26 Apr 2026 04:52:16 -0700 Subject: [PATCH] feat(browse-client): SDK for browser-skill scripts Thin wrapper over POST /command with bearer auth. Resolves daemon port + token from GSTACK_PORT + GSTACK_SKILL_TOKEN env vars first (set by $B skill run when spawning), falls back to .gstack/browse.json for standalone debug runs. Convenience methods cover the read+write surface skills typically need: goto, click, fill, text, html, snapshot, links, forms, accessibility, attrs, media, data, scroll, press, type, select, wait, hover, screenshot. Low-level command(cmd, args) escape hatch for anything else. This is the canonical SDK source. Each browser-skill ships a sibling copy at /_lib/browse-client.ts so each skill is fully portable and version-pinned. Co-Authored-By: Claude Opus 4.7 (1M context) --- browse/src/browse-client.ts | 257 +++++++++++++++++++++++++++ browse/test/browse-client.test.ts | 281 ++++++++++++++++++++++++++++++ 2 files changed, 538 insertions(+) create mode 100644 browse/src/browse-client.ts create mode 100644 browse/test/browse-client.test.ts diff --git a/browse/src/browse-client.ts b/browse/src/browse-client.ts new file mode 100644 index 00000000..a33681f7 --- /dev/null +++ b/browse/src/browse-client.ts @@ -0,0 +1,257 @@ +/** + * browse-client — canonical SDK that browser-skill scripts import to drive the + * gstack daemon over loopback HTTP. + * + * Distribution model: + * This file is the canonical source. Each browser-skill ships a sibling + * copy at `/_lib/browse-client.ts` (Phase 2's generator copies it + * alongside every generated skill; Phase 1's bundled `hackernews-frontpage` + * reference skill ships a hand-copied version). The skill imports the + * sibling via relative path: `import { browse } from './_lib/browse-client'`. + * + * Why per-skill copies and not a single global SDK: each skill is fully + * portable (copy the directory anywhere, it runs), version drift is + * impossible (the SDK is frozen at the version the skill was authored + * against), no npm publish workflow, no fixed-path tilde imports. + * + * Auth resolution: + * 1. GSTACK_PORT + GSTACK_SKILL_TOKEN env vars (set by `$B skill run` when + * spawning the script). The token is a per-spawn scoped capability bound + * to read+write commands; it expires when the spawn ends. + * 2. State file fallback: read `BROWSE_STATE_FILE` env or `/.gstack/browse.json` + * and use the `port` + `token` (the daemon root token). This path exists + * for developers running a skill directly via `bun run script.ts` outside + * the harness — your own authority, not an agent's. + * + * Trust: + * The SDK exposes only the daemon's existing HTTP surface (POST /command). + * No new capabilities. The token's scopes (read+write for spawned skills, + * full root for standalone debug) determine what actually executes. + * + * Zero side effects on import. Safe to import from tests or plain scripts. + */ + +import * as fs from 'fs'; +import * as path from 'path'; +import * as cp from 'child_process'; + +export interface BrowseClientOptions { + /** Override port. Default: GSTACK_PORT env or state file. */ + port?: number; + /** Override token. Default: GSTACK_SKILL_TOKEN env, then state file root token. */ + token?: string; + /** Tab id to target (every command can scope to a tab). Default: BROWSE_TAB env or undefined (active tab). */ + tabId?: number; + /** Per-request timeout in milliseconds. Default: 30_000. */ + timeoutMs?: number; + /** Override state-file path. Default: BROWSE_STATE_FILE env or /.gstack/browse.json. */ + stateFile?: string; +} + +interface ResolvedAuth { + port: number; + token: string; + source: 'env' | 'state-file'; +} + +/** Resolve the daemon port + token. Throws a clear error if neither path works. */ +export function resolveBrowseAuth(opts: BrowseClientOptions = {}): ResolvedAuth { + if (opts.port !== undefined && opts.token !== undefined) { + return { port: opts.port, token: opts.token, source: 'env' }; + } + + // 1. Env vars (set by $B skill run when spawning). + const envPort = process.env.GSTACK_PORT; + const envToken = process.env.GSTACK_SKILL_TOKEN; + if (envPort && envToken) { + const port = opts.port ?? parseInt(envPort, 10); + if (!isNaN(port)) { + return { port, token: opts.token ?? envToken, source: 'env' }; + } + } + + // 2. State file fallback (developer running `bun run script.ts` directly). + const stateFile = opts.stateFile ?? process.env.BROWSE_STATE_FILE ?? defaultStateFile(); + if (stateFile && fs.existsSync(stateFile)) { + try { + const data = JSON.parse(fs.readFileSync(stateFile, 'utf-8')); + if (typeof data.port === 'number' && typeof data.token === 'string') { + return { + port: opts.port ?? data.port, + token: opts.token ?? data.token, + source: 'state-file', + }; + } + } catch { + // fall through to error + } + } + + throw new Error( + 'browse-client: cannot find daemon port + token. Either spawn via `$B skill run` ' + + '(sets GSTACK_PORT + GSTACK_SKILL_TOKEN) or run from a project with a live daemon ' + + '(.gstack/browse.json must exist).' + ); +} + +function defaultStateFile(): string | null { + try { + const proc = cp.spawnSync('git', ['rev-parse', '--show-toplevel'], { encoding: 'utf-8', timeout: 2000 }); + const root = proc.status === 0 ? proc.stdout.trim() : null; + const base = root || process.cwd(); + return path.join(base, '.gstack', 'browse.json'); + } catch { + return path.join(process.cwd(), '.gstack', 'browse.json'); + } +} + +export class BrowseClientError extends Error { + constructor( + message: string, + public readonly status?: number, + public readonly body?: string, + ) { + super(message); + this.name = 'BrowseClientError'; + } +} + +/** + * Thin client over the daemon's POST /command endpoint. + * + * Convenience methods cover the common cases (goto, click, text, snapshot, + * etc.). For anything not exposed as a method, use `command(cmd, args)`. + */ +export class BrowseClient { + readonly port: number; + readonly token: string; + readonly tabId?: number; + readonly timeoutMs: number; + + constructor(opts: BrowseClientOptions = {}) { + const auth = resolveBrowseAuth(opts); + this.port = auth.port; + this.token = auth.token; + this.tabId = opts.tabId ?? (process.env.BROWSE_TAB ? parseInt(process.env.BROWSE_TAB, 10) : undefined); + this.timeoutMs = opts.timeoutMs ?? 30_000; + } + + // ─── Low-level dispatch ───────────────────────────────────────── + + /** Send an arbitrary command; returns raw response text. Throws on non-2xx. */ + async command(cmd: string, args: string[] = []): Promise { + const body = JSON.stringify({ + command: cmd, + args, + ...(this.tabId !== undefined && !isNaN(this.tabId) ? { tabId: this.tabId } : {}), + }); + + let resp: Response; + try { + resp = await fetch(`http://127.0.0.1:${this.port}/command`, { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + 'Authorization': `Bearer ${this.token}`, + }, + body, + signal: AbortSignal.timeout(this.timeoutMs), + }); + } catch (err: any) { + if (err.name === 'TimeoutError' || err.name === 'AbortError') { + throw new BrowseClientError(`browse-client: command "${cmd}" timed out after ${this.timeoutMs}ms`); + } + if (err.code === 'ECONNREFUSED') { + throw new BrowseClientError(`browse-client: daemon not running on port ${this.port}`); + } + throw new BrowseClientError(`browse-client: ${err.message ?? err}`); + } + + const text = await resp.text(); + if (!resp.ok) { + let message = `browse-client: command "${cmd}" failed with status ${resp.status}`; + try { + const parsed = JSON.parse(text); + if (parsed.error) message += `: ${parsed.error}`; + } catch { + if (text) message += `: ${text.slice(0, 200)}`; + } + throw new BrowseClientError(message, resp.status, text); + } + return text; + } + + // ─── Navigation ───────────────────────────────────────────────── + + async goto(url: string): Promise { return this.command('goto', [url]); } + async wait(arg: string): Promise { return this.command('wait', [arg]); } + + // ─── Reading ──────────────────────────────────────────────────── + + async text(selector?: string): Promise { + return this.command('text', selector ? [selector] : []); + } + async html(selector?: string): Promise { + return this.command('html', selector ? [selector] : []); + } + async links(): Promise { return this.command('links'); } + async forms(): Promise { return this.command('forms'); } + async accessibility(): Promise { return this.command('accessibility'); } + async attrs(selector: string): Promise { return this.command('attrs', [selector]); } + async media(...flags: string[]): Promise { return this.command('media', flags); } + async data(...flags: string[]): Promise { return this.command('data', flags); } + + // ─── Interaction ──────────────────────────────────────────────── + + async click(selector: string): Promise { return this.command('click', [selector]); } + async fill(selector: string, value: string): Promise { return this.command('fill', [selector, value]); } + async select(selector: string, value: string): Promise { return this.command('select', [selector, value]); } + async hover(selector: string): Promise { return this.command('hover', [selector]); } + async type(text: string): Promise { return this.command('type', [text]); } + async press(key: string): Promise { return this.command('press', [key]); } + async scroll(selector?: string): Promise { + return this.command('scroll', selector ? [selector] : []); + } + + // ─── Snapshot + screenshot ────────────────────────────────────── + + /** Snapshot returns the ARIA tree. Pass flags like '-i' (interactive only), '-c' (compact). */ + async snapshot(...flags: string[]): Promise { return this.command('snapshot', flags); } + async screenshot(...args: string[]): Promise { return this.command('screenshot', args); } +} + +/** + * Default singleton. Lazily resolves auth on first method call so a script can + * import `browse` and immediately call `await browse.goto(...)` without + * threading through a constructor. + */ +class LazyBrowseClient { + private inner: BrowseClient | null = null; + private get(): BrowseClient { + if (!this.inner) this.inner = new BrowseClient(); + return this.inner; + } + // Mirror the BrowseClient surface; each method delegates to a freshly resolved instance. + command(cmd: string, args: string[] = []) { return this.get().command(cmd, args); } + goto(url: string) { return this.get().goto(url); } + wait(arg: string) { return this.get().wait(arg); } + text(selector?: string) { return this.get().text(selector); } + html(selector?: string) { return this.get().html(selector); } + links() { return this.get().links(); } + forms() { return this.get().forms(); } + accessibility() { return this.get().accessibility(); } + attrs(selector: string) { return this.get().attrs(selector); } + media(...flags: string[]) { return this.get().media(...flags); } + data(...flags: string[]) { return this.get().data(...flags); } + click(selector: string) { return this.get().click(selector); } + fill(selector: string, value: string) { return this.get().fill(selector, value); } + select(selector: string, value: string) { return this.get().select(selector, value); } + hover(selector: string) { return this.get().hover(selector); } + type(text: string) { return this.get().type(text); } + press(key: string) { return this.get().press(key); } + scroll(selector?: string) { return this.get().scroll(selector); } + snapshot(...flags: string[]) { return this.get().snapshot(...flags); } + screenshot(...args: string[]) { return this.get().screenshot(...args); } +} + +export const browse = new LazyBrowseClient(); diff --git a/browse/test/browse-client.test.ts b/browse/test/browse-client.test.ts new file mode 100644 index 00000000..1def4a88 --- /dev/null +++ b/browse/test/browse-client.test.ts @@ -0,0 +1,281 @@ +/** + * browse-client tests — verify the SDK against a mock HTTP server. + * + * We don't need a real daemon. We stand up a Bun.serve that mimics POST + * /command, capture the requests, and assert wire format + auth + error + * handling. + */ + +import { describe, it, expect, beforeEach, afterEach } from 'bun:test'; +import * as fs from 'fs'; +import * as os from 'os'; +import * as path from 'path'; +import { BrowseClient, BrowseClientError, resolveBrowseAuth } from '../src/browse-client'; + +interface CapturedRequest { + method: string; + url: string; + authorization: string | null; + contentType: string | null; + body: any; +} + +interface MockServer { + port: number; + requests: CapturedRequest[]; + setResponse(status: number, body: string): void; + stop(): Promise; +} + +async function startMockServer(): Promise { + const requests: CapturedRequest[] = []; + let response: { status: number; body: string } = { status: 200, body: 'OK' }; + + const server = Bun.serve({ + port: 0, // random port + async fetch(req) { + const body = await req.text(); + let parsed: any = body; + try { parsed = JSON.parse(body); } catch { /* leave as text */ } + requests.push({ + method: req.method, + url: new URL(req.url).pathname, + authorization: req.headers.get('Authorization'), + contentType: req.headers.get('Content-Type'), + body: parsed, + }); + return new Response(response.body, { status: response.status }); + }, + }); + + return { + port: server.port, + requests, + setResponse(status: number, body: string) { response = { status, body }; }, + async stop() { server.stop(true); }, + }; +} + +describe('browse-client', () => { + let server: MockServer; + const origEnv: Record = {}; + + beforeEach(async () => { + server = await startMockServer(); + // Snapshot env we mutate so tests are hermetic. + for (const k of ['GSTACK_PORT', 'GSTACK_SKILL_TOKEN', 'BROWSE_STATE_FILE', 'BROWSE_TAB']) { + origEnv[k] = process.env[k]; + delete process.env[k]; + } + }); + + afterEach(async () => { + await server.stop(); + for (const [k, v] of Object.entries(origEnv)) { + if (v === undefined) delete process.env[k]; + else process.env[k] = v; + } + }); + + describe('resolveBrowseAuth', () => { + it('uses GSTACK_PORT + GSTACK_SKILL_TOKEN env when present', () => { + process.env.GSTACK_PORT = String(server.port); + process.env.GSTACK_SKILL_TOKEN = 'scoped-token'; + const auth = resolveBrowseAuth(); + expect(auth.port).toBe(server.port); + expect(auth.token).toBe('scoped-token'); + expect(auth.source).toBe('env'); + }); + + it('falls back to state file when env vars missing', () => { + const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'browse-client-test-')); + const stateFile = path.join(tmpDir, 'browse.json'); + fs.writeFileSync(stateFile, JSON.stringify({ pid: 1, port: server.port, token: 'root-token' })); + try { + const auth = resolveBrowseAuth({ stateFile }); + expect(auth.port).toBe(server.port); + expect(auth.token).toBe('root-token'); + expect(auth.source).toBe('state-file'); + } finally { + fs.rmSync(tmpDir, { recursive: true, force: true }); + } + }); + + it('throws a clear error when neither env nor state file resolves', () => { + const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'browse-client-test-')); + try { + expect(() => resolveBrowseAuth({ stateFile: path.join(tmpDir, 'nonexistent.json') })) + .toThrow('browse-client: cannot find daemon port + token'); + } finally { + fs.rmSync(tmpDir, { recursive: true, force: true }); + } + }); + + it('explicit opts.port + opts.token bypass env and state file', () => { + const auth = resolveBrowseAuth({ port: 9999, token: 'explicit' }); + expect(auth.port).toBe(9999); + expect(auth.token).toBe('explicit'); + }); + }); + + describe('command()', () => { + it('emits POST /command with bearer auth and JSON body', async () => { + const client = new BrowseClient({ port: server.port, token: 'tok-abc' }); + server.setResponse(200, 'navigated'); + + const result = await client.command('goto', ['https://example.com']); + expect(result).toBe('navigated'); + + expect(server.requests).toHaveLength(1); + const req = server.requests[0]; + expect(req.method).toBe('POST'); + expect(req.url).toBe('/command'); + expect(req.authorization).toBe('Bearer tok-abc'); + expect(req.contentType).toBe('application/json'); + expect(req.body).toEqual({ command: 'goto', args: ['https://example.com'] }); + }); + + it('omits tabId when not set', async () => { + const client = new BrowseClient({ port: server.port, token: 't' }); + await client.command('text', []); + expect(server.requests[0].body).toEqual({ command: 'text', args: [] }); + }); + + it('includes tabId when constructor receives one', async () => { + const client = new BrowseClient({ port: server.port, token: 't', tabId: 5 }); + await client.command('text', []); + expect(server.requests[0].body).toEqual({ command: 'text', args: [], tabId: 5 }); + }); + + it('reads tabId from BROWSE_TAB env when not passed explicitly', async () => { + process.env.BROWSE_TAB = '7'; + const client = new BrowseClient({ port: server.port, token: 't' }); + await client.command('text', []); + expect(server.requests[0].body).toEqual({ command: 'text', args: [], tabId: 7 }); + }); + + it('throws BrowseClientError with status on non-2xx', async () => { + const client = new BrowseClient({ port: server.port, token: 't' }); + server.setResponse(403, JSON.stringify({ error: 'Insufficient scope' })); + + let caught: BrowseClientError | null = null; + try { + await client.command('eval', ['file.js']); + } catch (e) { + caught = e as BrowseClientError; + } + expect(caught).not.toBeNull(); + expect(caught!.name).toBe('BrowseClientError'); + expect(caught!.status).toBe(403); + expect(caught!.message).toContain('Insufficient scope'); + }); + + it('wraps connection-refused errors as BrowseClientError', async () => { + // Pick an unused port to force ECONNREFUSED + const client = new BrowseClient({ port: 1, token: 't', timeoutMs: 1000 }); + let caught: BrowseClientError | null = null; + try { + await client.command('goto', ['x']); + } catch (e) { + caught = e as BrowseClientError; + } + expect(caught).not.toBeNull(); + expect(caught!.name).toBe('BrowseClientError'); + }); + }); + + describe('convenience methods', () => { + let client: BrowseClient; + + beforeEach(() => { + client = new BrowseClient({ port: server.port, token: 't' }); + server.setResponse(200, 'OK'); + }); + + it('goto sends url as single arg', async () => { + await client.goto('https://example.com'); + expect(server.requests[0].body).toEqual({ command: 'goto', args: ['https://example.com'] }); + }); + + it('text with no selector sends empty args', async () => { + await client.text(); + expect(server.requests[0].body).toEqual({ command: 'text', args: [] }); + }); + + it('text with selector sends [selector]', async () => { + await client.text('.my-class'); + expect(server.requests[0].body).toEqual({ command: 'text', args: ['.my-class'] }); + }); + + it('html with selector sends [selector]', async () => { + await client.html('article'); + expect(server.requests[0].body).toEqual({ command: 'html', args: ['article'] }); + }); + + it('click sends selector', async () => { + await client.click('button.submit'); + expect(server.requests[0].body).toEqual({ command: 'click', args: ['button.submit'] }); + }); + + it('fill sends [selector, value]', async () => { + await client.fill('#email', 'user@example.com'); + expect(server.requests[0].body).toEqual({ command: 'fill', args: ['#email', 'user@example.com'] }); + }); + + it('select sends [selector, value]', async () => { + await client.select('#country', 'US'); + expect(server.requests[0].body).toEqual({ command: 'select', args: ['#country', 'US'] }); + }); + + it('hover sends selector', async () => { + await client.hover('.menu'); + expect(server.requests[0].body).toEqual({ command: 'hover', args: ['.menu'] }); + }); + + it('press sends key', async () => { + await client.press('Enter'); + expect(server.requests[0].body).toEqual({ command: 'press', args: ['Enter'] }); + }); + + it('type sends text', async () => { + await client.type('hello world'); + expect(server.requests[0].body).toEqual({ command: 'type', args: ['hello world'] }); + }); + + it('wait sends arg', async () => { + await client.wait('--networkidle'); + expect(server.requests[0].body).toEqual({ command: 'wait', args: ['--networkidle'] }); + }); + + it('scroll with no selector sends empty args', async () => { + await client.scroll(); + expect(server.requests[0].body).toEqual({ command: 'scroll', args: [] }); + }); + + it('snapshot with flags forwards them', async () => { + await client.snapshot('-i', '-c'); + expect(server.requests[0].body).toEqual({ command: 'snapshot', args: ['-i', '-c'] }); + }); + + it('attrs sends selector', async () => { + await client.attrs('@e1'); + expect(server.requests[0].body).toEqual({ command: 'attrs', args: ['@e1'] }); + }); + + it('links/forms/accessibility take no args', async () => { + await client.links(); + await client.forms(); + await client.accessibility(); + expect(server.requests).toHaveLength(3); + expect(server.requests.map(r => r.body.command)).toEqual(['links', 'forms', 'accessibility']); + for (const r of server.requests) expect(r.body.args).toEqual([]); + }); + + it('media and data forward flag args', async () => { + await client.media('--images'); + await client.data('--jsonld'); + expect(server.requests[0].body).toEqual({ command: 'media', args: ['--images'] }); + expect(server.requests[1].body).toEqual({ command: 'data', args: ['--jsonld'] }); + }); + }); +});