diff --git a/browse/src/browser-skill-commands.ts b/browse/src/browser-skill-commands.ts new file mode 100644 index 00000000..3c0805f5 --- /dev/null +++ b/browse/src/browser-skill-commands.ts @@ -0,0 +1,413 @@ +/** + * $B skill subcommands — CLI surface for browser-skills. + * + * Subcommands: + * list — list all skills, with resolved tier + * show — print skill SKILL.md + * run [--arg ...] [--timeout=Ns] — spawn the skill script, return JSON + * test — run script.test.ts via bun test + * rm [--global] — tombstone a user-tier skill + * + * Load-bearing: spawnSkill mints a per-spawn scoped token (read+write scope) + * and passes it via GSTACK_SKILL_TOKEN. The skill never sees the daemon root + * token. Untrusted skills get a scrubbed env (no $HOME, $PATH minimal, no + * secrets like $GITHUB_TOKEN/$OPENAI_API_KEY/etc.) and a locked cwd. Trusted + * skills (frontmatter `trusted: true`) inherit the full process env. + * + * Output protocol: stdout = JSON, stderr = streaming logs, exit code 0/non-0. + * stdout cap = 1MB (truncate + nonzero exit if exceeded). Default timeout 60s. + */ + +import * as fs from 'fs'; +import * as path from 'path'; +import { + listBrowserSkills, + readBrowserSkill, + tombstoneBrowserSkill, + defaultTierPaths, + type BrowserSkill, + type TierPaths, +} from './browser-skills'; +import { mintSkillToken, revokeSkillToken, generateSpawnId } from './skill-token'; + +const DEFAULT_TIMEOUT_SECONDS = 60; +const MAX_STDOUT_BYTES = 1024 * 1024; // 1 MB + +// ─── Public command dispatcher ────────────────────────────────── + +export interface SkillCommandContext { + /** Daemon port the skill should connect back to. */ + port: number; + /** Optional override of tier paths (tests pass synthetic dirs). */ + tiers?: TierPaths; +} + +/** + * Dispatch a `$B skill ` invocation. Returns the response string + * for the daemon to relay back to the CLI. Throws on invalid usage. + */ +export async function handleSkillCommand(args: string[], ctx: SkillCommandContext): Promise { + const sub = args[0]; + const rest = args.slice(1); + + switch (sub) { + case undefined: + case 'help': + case '--help': + return formatUsage(); + case 'list': + return handleList(ctx); + case 'show': + return handleShow(rest, ctx); + case 'run': + return handleRun(rest, ctx); + case 'test': + return handleTest(rest, ctx); + case 'rm': + return handleRm(rest, ctx); + default: + throw new Error(`Unknown skill subcommand: "${sub}". Try: list, show, run, test, rm.`); + } +} + +function formatUsage(): string { + return [ + 'Usage: $B skill ', + '', + ' list List all skills with resolved tier', + ' show Print SKILL.md', + ' run [--arg k=v]... [--timeout=Ns] Run the skill script', + ' test Run script.test.ts', + ' rm [--global] Tombstone a user-tier skill', + ].join('\n'); +} + +// ─── list ─────────────────────────────────────────────────────── + +function handleList(ctx: SkillCommandContext): string { + const tiers = ctx.tiers ?? defaultTierPaths(); + const skills = listBrowserSkills(tiers); + if (skills.length === 0) { + return 'No browser-skills found.\n\nTry: $B skill show (none right now)\n'; + } + const lines: string[] = ['NAME TIER HOST DESC']; + for (const s of skills) { + const desc = (s.frontmatter.description ?? '').slice(0, 40); + lines.push( + [ + s.name.padEnd(30), + s.tier.padEnd(8), + s.frontmatter.host.padEnd(28), + desc, + ].join(' '), + ); + } + return lines.join('\n') + '\n'; +} + +// ─── show ─────────────────────────────────────────────────────── + +function handleShow(args: string[], ctx: SkillCommandContext): string { + const name = args[0]; + if (!name) throw new Error('Usage: $B skill show '); + const tiers = ctx.tiers ?? defaultTierPaths(); + const skill = readBrowserSkill(name, tiers); + if (!skill) throw new Error(`Skill "${name}" not found in any tier.`); + return readFile(path.join(skill.dir, 'SKILL.md')); +} + +function readFile(p: string): string { + return fs.readFileSync(p, 'utf-8'); +} + +// ─── run ──────────────────────────────────────────────────────── + +interface ParsedRunArgs { + passthrough: string[]; + timeoutSeconds: number; +} + +export function parseSkillRunArgs(args: string[]): ParsedRunArgs { + const passthrough: string[] = []; + let timeoutSeconds = DEFAULT_TIMEOUT_SECONDS; + for (let i = 0; i < args.length; i++) { + const a = args[i]; + if (a.startsWith('--timeout=')) { + const n = parseInt(a.slice('--timeout='.length), 10); + if (!isNaN(n) && n > 0) timeoutSeconds = n; + continue; + } + passthrough.push(a); + } + return { passthrough, timeoutSeconds }; +} + +async function handleRun(args: string[], ctx: SkillCommandContext): Promise { + const name = args[0]; + if (!name) throw new Error('Usage: $B skill run [--arg k=v]... [--timeout=Ns]'); + const tiers = ctx.tiers ?? defaultTierPaths(); + const skill = readBrowserSkill(name, tiers); + if (!skill) throw new Error(`Skill "${name}" not found.`); + + const { passthrough, timeoutSeconds } = parseSkillRunArgs(args.slice(1)); + const result = await spawnSkill({ + skill, + skillArgs: passthrough, + trusted: skill.frontmatter.trusted, + timeoutSeconds, + port: ctx.port, + }); + + if (result.exitCode !== 0 || result.timedOut || result.truncated) { + const summary = result.truncated + ? `truncated stdout at ${MAX_STDOUT_BYTES} bytes` + : result.timedOut + ? `timed out after ${timeoutSeconds}s` + : `exit ${result.exitCode}`; + const err = new Error(`Skill "${name}" failed: ${summary}\n--- stderr ---\n${result.stderr.slice(0, 4096)}`); + (err as any).exitCode = result.exitCode || 1; + throw err; + } + return result.stdout; +} + +// ─── test ─────────────────────────────────────────────────────── + +async function handleTest(args: string[], ctx: SkillCommandContext): Promise { + const name = args[0]; + if (!name) throw new Error('Usage: $B skill test '); + const tiers = ctx.tiers ?? defaultTierPaths(); + const skill = readBrowserSkill(name, tiers); + if (!skill) throw new Error(`Skill "${name}" not found.`); + + const testFile = path.join(skill.dir, 'script.test.ts'); + if (!fs.existsSync(testFile)) { + throw new Error(`Skill "${name}" has no script.test.ts at ${testFile}`); + } + + const proc = Bun.spawn(['bun', 'test', testFile], { + cwd: skill.dir, + stdout: 'pipe', + stderr: 'pipe', + env: process.env, + }); + const exitCode = await proc.exited; + const stdout = proc.stdout ? await new Response(proc.stdout).text() : ''; + const stderr = proc.stderr ? await new Response(proc.stderr).text() : ''; + if (exitCode !== 0) { + throw new Error(`Skill "${name}" tests failed (exit ${exitCode}).\n${stderr}`); + } + return stderr || stdout || `tests passed for "${name}"`; +} + +// ─── rm ───────────────────────────────────────────────────────── + +function handleRm(args: string[], ctx: SkillCommandContext): string { + const name = args[0]; + if (!name) throw new Error('Usage: $B skill rm [--global]'); + const isGlobal = args.includes('--global'); + const tier: 'project' | 'global' = isGlobal ? 'global' : 'project'; + + const tiers = ctx.tiers ?? defaultTierPaths(); + // For UX: if no project tier exists at all, default to global. + const effectiveTier: 'project' | 'global' = (tier === 'project' && !tiers.project) ? 'global' : tier; + + const dst = tombstoneBrowserSkill(name, effectiveTier, tiers); + return `Tombstoned "${name}" (${effectiveTier} tier) → ${dst}\n`; +} + +// ─── spawnSkill (load-bearing) ────────────────────────────────── + +export interface SpawnSkillOptions { + skill: BrowserSkill; + skillArgs: string[]; + trusted: boolean; + timeoutSeconds: number; + port: number; +} + +export interface SpawnSkillResult { + stdout: string; + stderr: string; + exitCode: number; + timedOut: boolean; + truncated: boolean; +} + +/** + * Spawn a skill script as a child process. + * + * 1. Mint a scoped token (read+write only; expires at timeout + 30s slack). + * 2. Build the env: trusted=true → process.env; trusted=false → scrubbed. + * GSTACK_PORT and GSTACK_SKILL_TOKEN are always set. + * 3. Spawn `bun run script.ts -- ` with cwd=skill.dir. + * 4. Capture stdout (capped at 1MB) and stderr; enforce timeout. + * 5. On exit/timeout, revoke the token. Always. + */ +export async function spawnSkill(opts: SpawnSkillOptions): Promise { + const spawnId = generateSpawnId(); + const tokenInfo = mintSkillToken({ + skillName: opts.skill.name, + spawnId, + spawnTimeoutSeconds: opts.timeoutSeconds, + }); + + try { + const env = buildSpawnEnv({ + trusted: opts.trusted, + port: opts.port, + skillToken: tokenInfo.token, + }); + const scriptPath = path.join(opts.skill.dir, 'script.ts'); + if (!fs.existsSync(scriptPath)) { + throw new Error(`Skill "${opts.skill.name}" missing script.ts at ${scriptPath}`); + } + + const proc = Bun.spawn(['bun', 'run', scriptPath, '--', ...opts.skillArgs], { + cwd: opts.skill.dir, + env, + stdout: 'pipe', + stderr: 'pipe', + }); + + let timedOut = false; + const killer = setTimeout(() => { + timedOut = true; + try { proc.kill(); } catch {} + }, opts.timeoutSeconds * 1000); + + const stdoutPromise = readCapped(proc.stdout, MAX_STDOUT_BYTES); + const stderrPromise = readCapped(proc.stderr, MAX_STDOUT_BYTES); + + const exitCode = await proc.exited; + clearTimeout(killer); + + const stdoutResult = await stdoutPromise; + const stderrResult = await stderrPromise; + + return { + stdout: stdoutResult.text, + stderr: stderrResult.text, + exitCode: timedOut ? 124 : exitCode, + timedOut, + truncated: stdoutResult.truncated, + }; + } finally { + revokeSkillToken(opts.skill.name, spawnId); + } +} + +interface CappedRead { text: string; truncated: boolean; } + +async function readCapped(stream: ReadableStream | undefined, capBytes: number): Promise { + if (!stream) return { text: '', truncated: false }; + const reader = stream.getReader(); + const chunks: Uint8Array[] = []; + let total = 0; + let truncated = false; + try { + while (true) { + const { done, value } = await reader.read(); + if (done) break; + if (!value) continue; + total += value.length; + if (total > capBytes) { + truncated = true; + // Take only what fits; drop the rest of the stream (release reader). + const fits = value.length - (total - capBytes); + if (fits > 0) chunks.push(value.subarray(0, fits)); + try { await reader.cancel(); } catch {} + break; + } + chunks.push(value); + } + } finally { + try { reader.releaseLock(); } catch {} + } + const buf = Buffer.concat(chunks.map(c => Buffer.from(c))); + return { text: buf.toString('utf-8'), truncated }; +} + +// ─── env construction (security-critical) ─────────────────────── + +/** + * Env keys ALWAYS scrubbed for untrusted skills. These represent secrets, + * authority, or developer-environment context that an agent-authored script + * should not see. + */ +const SECRET_KEY_PATTERNS = [ + /TOKEN/i, /KEY/i, /SECRET/i, /PASSWORD/i, /CREDENTIAL/i, + /^AWS_/, /^AZURE_/, /^GCP_/, /^GOOGLE_APPLICATION_/, + /^ANTHROPIC_/, /^OPENAI_/, /^GITHUB_/, /^GH_/, + /^SSH_/, /^GPG_/, + /^NPM_TOKEN/, /^PYPI_/, +]; + +/** + * Allowlist for untrusted spawns. Anything not in this list is dropped. + * Includes: minimal PATH, locale, terminal type. Skills get GSTACK_PORT + + * GSTACK_SKILL_TOKEN injected separately. + */ +const UNTRUSTED_ALLOWLIST = new Set([ + 'LANG', 'LC_ALL', 'LC_CTYPE', + 'TERM', + 'TZ', +]); + +interface BuildEnvOptions { + trusted: boolean; + port: number; + skillToken: string; +} + +export function buildSpawnEnv(opts: BuildEnvOptions): Record { + const out: Record = {}; + + if (opts.trusted) { + // Trusted: pass through process.env, but always strip the daemon root token + // if the parent had one in env (defense in depth). + for (const [k, v] of Object.entries(process.env)) { + if (v === undefined) continue; + if (k === 'GSTACK_TOKEN') continue; // never propagate root token + out[k] = v; + } + // Set a minimal PATH if missing. + if (!out.PATH) out.PATH = '/usr/local/bin:/usr/bin:/bin'; + } else { + // Untrusted: minimal allowlist. + for (const k of UNTRUSTED_ALLOWLIST) { + const v = process.env[k]; + if (v !== undefined) out[k] = v; + } + // Provide a minimal PATH so `bun` is findable. Prefer the resolved bun dir + // so scripts using a custom Bun install still work, but otherwise fall back + // to /usr/local/bin:/usr/bin:/bin. + out.PATH = resolveMinimalPath(); + } + + // Drop anything that pattern-matches a secret. (Trusted path can have secrets + // intentionally — e.g. an internal-tool skill — but we still strip GSTACK_TOKEN + // above.) + if (!opts.trusted) { + for (const k of Object.keys(out)) { + if (SECRET_KEY_PATTERNS.some(p => p.test(k))) delete out[k]; + } + } + + // Inject the daemon connection (always last so callers can't override). + out.GSTACK_PORT = String(opts.port); + out.GSTACK_SKILL_TOKEN = opts.skillToken; + + return out; +} + +function resolveMinimalPath(): string { + // Prefer the directory bun lives in; fall back to standard system dirs. + const fallback = '/usr/local/bin:/usr/bin:/bin'; + const bunPath = process.execPath; + if (bunPath && bunPath.includes('/bun')) { + const dir = path.dirname(bunPath); + return `${dir}:${fallback}`; + } + return fallback; +} diff --git a/browse/src/commands.ts b/browse/src/commands.ts index a86c7932..d4e63160 100644 --- a/browse/src/commands.ts +++ b/browse/src/commands.ts @@ -43,6 +43,7 @@ export const META_COMMANDS = new Set([ 'frame', 'ux-audit', 'domain-skill', + 'skill', 'cdp', ]); @@ -177,6 +178,8 @@ export const COMMAND_DESCRIPTIONS: Record' }, + // Browser-skills (hand-written or generated Playwright scripts the runtime spawns) + 'skill': { category: 'Meta', description: 'Run a browser-skill: deterministic Playwright script that drives the daemon over loopback HTTP. 3-tier lookup (project > global > bundled). Spawned scripts get a per-spawn scoped token (read+write only) — never the daemon root token.', usage: 'skill list|show|run|test|rm [--arg k=v]... [--timeout=Ns]' }, // CDP escape hatch (deny-default; see browse/src/cdp-allowlist.ts) 'cdp': { category: 'Inspection', description: 'Raw CDP method dispatch (deny-default; allowlist in cdp-allowlist.ts). Output through UNTRUSTED envelope when method is data-exfil.', usage: 'cdp [json-params]' }, }; diff --git a/browse/src/meta-commands.ts b/browse/src/meta-commands.ts index 93d6a127..ac2f2bd4 100644 --- a/browse/src/meta-commands.ts +++ b/browse/src/meta-commands.ts @@ -7,6 +7,7 @@ import { handleSnapshot } from './snapshot'; import { getCleanText } from './read-commands'; import { READ_COMMANDS, WRITE_COMMANDS, META_COMMANDS, PAGE_CONTENT_COMMANDS, wrapUntrustedContent, canonicalizeCommand } from './commands'; import { handleDomainSkillCommand } from './domain-skill-commands'; +import { handleSkillCommand } from './browser-skill-commands'; import { validateNavigationUrl } from './url-validation'; import { checkScope, type TokenInfo } from './token-registry'; import { validateOutputPath, validateReadPath, SAFE_DIRECTORIES, escapeRegExp } from './path-security'; @@ -235,6 +236,8 @@ export interface MetaCommandOpts { chainDepth?: number; /** Callback to route subcommands through the full security pipeline (handleCommandInternal) */ executeCommand?: (body: { command: string; args?: string[]; tabId?: number }, tokenInfo?: TokenInfo | null) => Promise<{ status: number; result: string; json?: boolean }>; + /** The port the daemon is listening on (needed by `$B skill run` to point spawned scripts at the daemon). */ + daemonPort?: number; } export async function handleMetaCommand( @@ -1024,6 +1027,14 @@ export async function handleMetaCommand( return await handleDomainSkillCommand(args, bm); } + case 'skill': { + const port = opts?.daemonPort; + if (port === undefined) { + throw new Error('skill command requires daemonPort in MetaCommandOpts (server bug)'); + } + return await handleSkillCommand(args, { port }); + } + case 'cdp': { // Lazy import — cdp-bridge introduces module deps we don't want loaded // for projects that never use the CDP escape hatch. diff --git a/browse/src/server.ts b/browse/src/server.ts index 1865d021..15892053 100644 --- a/browse/src/server.ts +++ b/browse/src/server.ts @@ -71,6 +71,14 @@ const AUTH_TOKEN = crypto.randomUUID(); initRegistry(AUTH_TOKEN); const BROWSE_PORT = parseInt(process.env.BROWSE_PORT || '0', 10); const IDLE_TIMEOUT_MS = parseInt(process.env.BROWSE_IDLE_TIMEOUT || '1800000', 10); // 30 min + +/** + * Port the local listener bound to. Set once the daemon picks a port. + * Used by `$B skill run` to point spawned skill scripts at the daemon over + * loopback. Module-level so handleCommandInternal can read it without threading + * the port through every dispatch. + */ +let LOCAL_LISTEN_PORT: number = 0; // Sidebar chat is always enabled in headed mode (ungated in v0.12.0) // ─── Tunnel State ─────────────────────────────────────────────── @@ -1286,6 +1294,7 @@ async function handleCommandInternal( const chainDepth = (opts?.chainDepth ?? 0); result = await handleMetaCommand(command, args, browserManager, shutdown, tokenInfo, { chainDepth, + daemonPort: LOCAL_LISTEN_PORT, executeCommand: (body, ti) => handleCommandInternal(body, ti, { skipRateCheck: true, // chain counts as 1 request skipActivity: true, // chain emits 1 event for all subcommands @@ -1571,6 +1580,7 @@ async function start() { safeUnlink(DIALOG_LOG_PATH); const port = await findPort(); + LOCAL_LISTEN_PORT = port; // Launch browser (headless or headed with extension) // BROWSE_HEADLESS_SKIP=1 skips browser launch entirely (for HTTP-only testing) diff --git a/browse/test/browser-skill-commands.test.ts b/browse/test/browser-skill-commands.test.ts new file mode 100644 index 00000000..5bea02a9 --- /dev/null +++ b/browse/test/browser-skill-commands.test.ts @@ -0,0 +1,359 @@ +/** + * browser-skill-commands tests — covers the dispatch surface, env scrubbing, + * spawn lifecycle, timeout, stdout cap. + * + * The `run` and `test` subcommands spawn `bun` subprocesses, so these tests + * write tiny inline scripts to the synthetic skill dir and assert behavior + * end-to-end. + */ + +import { describe, it, expect, beforeEach, afterEach } from 'bun:test'; +import * as fs from 'fs'; +import * as os from 'os'; +import * as path from 'path'; +import { + rotateRoot, initRegistry, validateToken, listTokens, +} from '../src/token-registry'; +import { + handleSkillCommand, + spawnSkill, + buildSpawnEnv, + parseSkillRunArgs, +} from '../src/browser-skill-commands'; +import { readBrowserSkill, type TierPaths } from '../src/browser-skills'; + +let tmpRoot: string; +let tiers: TierPaths; + +beforeEach(() => { + rotateRoot(); + initRegistry('root-token-for-tests'); + tmpRoot = fs.mkdtempSync(path.join(os.tmpdir(), 'browser-skill-cmd-test-')); + tiers = { + project: path.join(tmpRoot, 'project', '.gstack', 'browser-skills'), + global: path.join(tmpRoot, 'home', '.gstack', 'browser-skills'), + bundled: path.join(tmpRoot, 'gstack-install', 'browser-skills'), + }; + fs.mkdirSync(tiers.project!, { recursive: true }); + fs.mkdirSync(tiers.global, { recursive: true }); + fs.mkdirSync(tiers.bundled, { recursive: true }); +}); + +afterEach(() => { + fs.rmSync(tmpRoot, { recursive: true, force: true }); +}); + +function makeSkillDir(tierRoot: string, name: string, frontmatter: string, scriptBody: string = '') { + const dir = path.join(tierRoot, name); + fs.mkdirSync(dir, { recursive: true }); + fs.writeFileSync(path.join(dir, 'SKILL.md'), `---\n${frontmatter}\n---\nbody\n`); + if (scriptBody) { + fs.writeFileSync(path.join(dir, 'script.ts'), scriptBody); + } + return dir; +} + +describe('parseSkillRunArgs', () => { + it('extracts --timeout=N', () => { + const r = parseSkillRunArgs(['--timeout=10', '--arg', 'foo=bar']); + expect(r.timeoutSeconds).toBe(10); + expect(r.passthrough).toEqual(['--arg', 'foo=bar']); + }); + + it('defaults to 60s when no timeout', () => { + const r = parseSkillRunArgs(['--arg', 'foo=bar']); + expect(r.timeoutSeconds).toBe(60); + expect(r.passthrough).toEqual(['--arg', 'foo=bar']); + }); + + it('passes through unknown flags', () => { + const r = parseSkillRunArgs(['--keywords=ai', '--limit=10']); + expect(r.passthrough).toEqual(['--keywords=ai', '--limit=10']); + }); + + it('ignores invalid --timeout values', () => { + const r = parseSkillRunArgs(['--timeout=abc', '--timeout=-5']); + expect(r.timeoutSeconds).toBe(60); + }); +}); + +describe('handleSkillCommand: list', () => { + it('shows empty message when no skills', async () => { + const result = await handleSkillCommand(['list'], { port: 9999, tiers }); + expect(result).toContain('No browser-skills found'); + }); + + it('lists skills with their resolved tier', async () => { + makeSkillDir(tiers.bundled, 'foo', 'name: foo\nhost: a.com\ndescription: foo desc'); + makeSkillDir(tiers.global, 'bar', 'name: bar\nhost: b.com\ndescription: bar desc'); + const result = await handleSkillCommand(['list'], { port: 9999, tiers }); + expect(result).toContain('foo'); + expect(result).toContain('bundled'); + expect(result).toContain('a.com'); + expect(result).toContain('bar'); + expect(result).toContain('global'); + }); + + it('prints project tier when same name in multiple tiers', async () => { + makeSkillDir(tiers.bundled, 'shared', 'name: shared\nhost: bundled.com'); + makeSkillDir(tiers.project!, 'shared', 'name: shared\nhost: project.com'); + const result = await handleSkillCommand(['list'], { port: 9999, tiers }); + expect(result).toContain('project'); + expect(result).toContain('project.com'); + expect(result).not.toContain('bundled.com'); + }); +}); + +describe('handleSkillCommand: show', () => { + it('prints SKILL.md', async () => { + makeSkillDir(tiers.bundled, 'foo', 'name: foo\nhost: a.com\ndescription: hi'); + const result = await handleSkillCommand(['show', 'foo'], { port: 9999, tiers }); + expect(result).toContain('name: foo'); + expect(result).toContain('host: a.com'); + expect(result).toContain('body'); + }); + + it('throws when skill missing', async () => { + await expect(handleSkillCommand(['show', 'nope'], { port: 9999, tiers })).rejects.toThrow(/not found/); + }); + + it('throws when name omitted', async () => { + await expect(handleSkillCommand(['show'], { port: 9999, tiers })).rejects.toThrow(/Usage/); + }); +}); + +describe('handleSkillCommand: rm', () => { + it('tombstones global skill by default', async () => { + makeSkillDir(tiers.global, 'gone', 'name: gone\nhost: x.com'); + // No project tier skill, so default tier resolution should target global anyway. + // But the function defaults to 'project' unless --global. With no project + // skill, it would error — pass --global explicitly. + const result = await handleSkillCommand(['rm', 'gone', '--global'], { port: 9999, tiers }); + expect(result).toContain('Tombstoned'); + expect(fs.existsSync(path.join(tiers.global, 'gone'))).toBe(false); + }); + + it('tombstones project skill', async () => { + makeSkillDir(tiers.project!, 'gone', 'name: gone\nhost: x.com'); + const result = await handleSkillCommand(['rm', 'gone'], { port: 9999, tiers }); + expect(result).toContain('Tombstoned'); + expect(fs.existsSync(path.join(tiers.project!, 'gone'))).toBe(false); + }); + + it('falls back to global when no project tier path', async () => { + const tiersNoProject = { ...tiers, project: null }; + makeSkillDir(tiers.global, 'gone', 'name: gone\nhost: x.com'); + const result = await handleSkillCommand(['rm', 'gone'], { port: 9999, tiers: tiersNoProject }); + expect(result).toContain('global'); + }); +}); + +describe('handleSkillCommand: help / unknown', () => { + it('prints usage with no subcommand', async () => { + const r = await handleSkillCommand([], { port: 9999, tiers }); + expect(r).toContain('Usage'); + }); + + it('throws on unknown subcommand', async () => { + await expect(handleSkillCommand(['frobnicate'], { port: 9999, tiers })) + .rejects.toThrow(/Unknown skill subcommand/); + }); +}); + +describe('buildSpawnEnv', () => { + let origEnv: Record; + beforeEach(() => { + origEnv = { ...process.env }; + // Plant some secrets for scrub-tests + process.env.GITHUB_TOKEN = 'gh-secret'; + process.env.OPENAI_API_KEY = 'oai-secret'; + process.env.MY_PASSWORD = 'sup3r'; + process.env.NPM_TOKEN = 'npmtok'; + process.env.AWS_SECRET_ACCESS_KEY = 'aws-secret'; + process.env.GSTACK_TOKEN = 'root-token'; + process.env.HOME = '/Users/test'; + process.env.PATH = '/test/bin:/usr/bin'; + process.env.LANG = 'en_US.UTF-8'; + }); + afterEach(() => { + process.env = origEnv; + }); + + it('untrusted: drops $HOME and secrets', () => { + const env = buildSpawnEnv({ trusted: false, port: 1234, skillToken: 'tok' }); + expect(env.HOME).toBeUndefined(); + expect(env.GITHUB_TOKEN).toBeUndefined(); + expect(env.OPENAI_API_KEY).toBeUndefined(); + expect(env.MY_PASSWORD).toBeUndefined(); + expect(env.NPM_TOKEN).toBeUndefined(); + expect(env.AWS_SECRET_ACCESS_KEY).toBeUndefined(); + expect(env.GSTACK_TOKEN).toBeUndefined(); + }); + + it('untrusted: keeps locale + TERM', () => { + process.env.TERM = 'xterm-256color'; + const env = buildSpawnEnv({ trusted: false, port: 1234, skillToken: 'tok' }); + expect(env.LANG).toBe('en_US.UTF-8'); + expect(env.TERM).toBe('xterm-256color'); + }); + + it('untrusted: PATH is minimal (no /test/bin override)', () => { + const env = buildSpawnEnv({ trusted: false, port: 1234, skillToken: 'tok' }); + expect(env.PATH).not.toContain('/test/bin'); + expect(env.PATH).toMatch(/\/(usr\/local\/)?bin/); + }); + + it('untrusted: injects GSTACK_PORT + GSTACK_SKILL_TOKEN', () => { + const env = buildSpawnEnv({ trusted: false, port: 1234, skillToken: 'tok-xyz' }); + expect(env.GSTACK_PORT).toBe('1234'); + expect(env.GSTACK_SKILL_TOKEN).toBe('tok-xyz'); + }); + + it('trusted: keeps $HOME', () => { + const env = buildSpawnEnv({ trusted: true, port: 1234, skillToken: 'tok' }); + expect(env.HOME).toBe('/Users/test'); + }); + + it('trusted: still strips GSTACK_TOKEN (defense in depth)', () => { + const env = buildSpawnEnv({ trusted: true, port: 1234, skillToken: 'tok' }); + expect(env.GSTACK_TOKEN).toBeUndefined(); + }); + + it('trusted: keeps developer secrets (intentional)', () => { + const env = buildSpawnEnv({ trusted: true, port: 1234, skillToken: 'tok' }); + expect(env.GITHUB_TOKEN).toBe('gh-secret'); + }); + + it('GSTACK_PORT/GSTACK_SKILL_TOKEN can never be overridden by parent env', () => { + process.env.GSTACK_PORT = '99999'; // attacker-set + process.env.GSTACK_SKILL_TOKEN = 'attacker-tok'; + const env = buildSpawnEnv({ trusted: true, port: 1234, skillToken: 'real-tok' }); + expect(env.GSTACK_PORT).toBe('1234'); + expect(env.GSTACK_SKILL_TOKEN).toBe('real-tok'); + }); +}); + +// ─── Spawn integration ────────────────────────────────────────── +// +// Tests below shell out to `bun run` against a synthesized script.ts, so they +// take 1-3s each. Skip the suite if BUN_TEST_NO_SPAWN is set. +const SKIP_SPAWN = process.env.BUN_TEST_NO_SPAWN === '1'; + +describe.skipIf(SKIP_SPAWN)('spawnSkill: lifecycle', () => { + it('happy path: returns stdout, exit 0, token revoked', async () => { + const dir = makeSkillDir(tiers.bundled, 'echo-skill', + 'name: echo-skill\nhost: x.com\ntrusted: true', + `console.log(JSON.stringify({ ok: true, args: process.argv.slice(2) }));`, + ); + const skill = readBrowserSkill('echo-skill', tiers)!; + const result = await spawnSkill({ + skill, + skillArgs: ['hello'], + trusted: true, + timeoutSeconds: 30, + port: 9999, + }); + expect(result.exitCode).toBe(0); + expect(result.timedOut).toBe(false); + expect(result.truncated).toBe(false); + const parsed = JSON.parse(result.stdout); + expect(parsed.ok).toBe(true); + // Only --timeout filtering happens; -- is preserved by Bun. + expect(parsed.args).toContain('hello'); + // Token revoked: nothing left in the registry for this client. + expect(listTokens().filter(t => t.clientId.startsWith('skill:echo-skill:'))).toEqual([]); + }); + + it('untrusted spawn: GSTACK_SKILL_TOKEN visible, root env scrubbed', async () => { + const dir = makeSkillDir(tiers.bundled, 'env-probe', + 'name: env-probe\nhost: x.com', // trusted defaults to false + `console.log(JSON.stringify({ + port: process.env.GSTACK_PORT, + token: process.env.GSTACK_SKILL_TOKEN, + home: process.env.HOME ?? null, + gh: process.env.GITHUB_TOKEN ?? null, + gstack: process.env.GSTACK_TOKEN ?? null, + }));`, + ); + const origEnv = { ...process.env }; + process.env.GITHUB_TOKEN = 'gh-secret'; + process.env.GSTACK_TOKEN = 'root'; + try { + const skill = readBrowserSkill('env-probe', tiers)!; + const result = await spawnSkill({ + skill, skillArgs: [], trusted: false, timeoutSeconds: 30, port: 4242, + }); + expect(result.exitCode).toBe(0); + const parsed = JSON.parse(result.stdout); + expect(parsed.port).toBe('4242'); + expect(parsed.token).toMatch(/^gsk_sess_/); + expect(parsed.home).toBeNull(); + expect(parsed.gh).toBeNull(); + expect(parsed.gstack).toBeNull(); + } finally { + process.env = origEnv; + } + }); + + it('trusted spawn: HOME passes through', async () => { + const dir = makeSkillDir(tiers.bundled, 'env-trusted', + 'name: env-trusted\nhost: x.com\ntrusted: true', + `console.log(JSON.stringify({ home: process.env.HOME ?? null }));`, + ); + const origEnv = { ...process.env }; + process.env.HOME = '/Users/test-user'; + try { + const skill = readBrowserSkill('env-trusted', tiers)!; + const result = await spawnSkill({ + skill, skillArgs: [], trusted: true, timeoutSeconds: 30, port: 9999, + }); + const parsed = JSON.parse(result.stdout); + expect(parsed.home).toBe('/Users/test-user'); + } finally { + process.env = origEnv; + } + }); + + it('timeout fires, exit code 124, token revoked', async () => { + const dir = makeSkillDir(tiers.bundled, 'sleeper', + 'name: sleeper\nhost: x.com\ntrusted: true', + // Sleep longer than the test timeout; the spawn should kill us. + `await new Promise(r => setTimeout(r, 30000)); console.log("done");`, + ); + const skill = readBrowserSkill('sleeper', tiers)!; + const result = await spawnSkill({ + skill, skillArgs: [], trusted: true, timeoutSeconds: 1, port: 9999, + }); + expect(result.timedOut).toBe(true); + expect(result.exitCode).toBe(124); + expect(listTokens().filter(t => t.clientId.startsWith('skill:sleeper:'))).toEqual([]); + }, 10_000); + + it('script crash propagates nonzero exit', async () => { + const dir = makeSkillDir(tiers.bundled, 'crasher', + 'name: crasher\nhost: x.com\ntrusted: true', + `process.exit(7);`, + ); + const skill = readBrowserSkill('crasher', tiers)!; + const result = await spawnSkill({ + skill, skillArgs: [], trusted: true, timeoutSeconds: 5, port: 9999, + }); + expect(result.exitCode).toBe(7); + expect(result.timedOut).toBe(false); + }); + + it('stdout > 1MB truncates and reports truncated', async () => { + const dir = makeSkillDir(tiers.bundled, 'flood', + 'name: flood\nhost: x.com\ntrusted: true', + // Emit ~2MB of "x" so the cap fires deterministically. + `const chunk = 'x'.repeat(64 * 1024); + for (let i = 0; i < 40; i++) process.stdout.write(chunk);`, + ); + const skill = readBrowserSkill('flood', tiers)!; + const result = await spawnSkill({ + skill, skillArgs: [], trusted: true, timeoutSeconds: 10, port: 9999, + }); + expect(result.truncated).toBe(true); + expect(result.stdout.length).toBeLessThanOrEqual(1024 * 1024); + }, 10_000); +});