feat(browser-skills): \$B skill list/show/run/test/rm subcommands

handleSkillCommand dispatches to per-subcommand handlers; spawnSkill is the load-bearing function that: 1. Mints a per-spawn scoped token (read+write only) bound to the skill name + spawn-id. 2. Builds the spawn env: - trusted: passes process.env minus GSTACK_TOKEN (defense in depth). - untrusted: minimal allowlist (LANG, LC_ALL, TERM, TZ) + locked PATH; explicitly drops anything matching TOKEN/KEY/SECRET/etc. Also drops AWS_/AZURE_/GCP_/GOOGLE_APPLICATION_/ANTHROPIC_/OPENAI_/ GITHUB_/GH_/SSH_/GPG_/NPM_TOKEN/PYPI_ patterns. 3. Always injects GSTACK_PORT + GSTACK_SKILL_TOKEN last (cannot be overridden by parent env). 4. Spawns bun run script.ts -- <args> with cwd=skillDir, captures stdout (1MB cap), stderr, and timeout-kills past the deadline. 5. Revokes the token in finally{}, always. list output prints the resolved tier inline so "why did it run that one?" never becomes a debugging mystery (Codex finding #4 mitigation). server.ts threads the listen port to meta-commands via MetaCommandOpts.daemonPort. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-02 11:45:20 +02:00 · 2026-04-26 05:03:17 -07:00
parent faf663b22c
commit e6a27993fe
5 changed files with 796 additions and 0 deletions
@@ -0,0 +1,413 @@
+/**
+ * $B skill subcommands — CLI surface for browser-skills.
+ *
+ * Subcommands:
+ *   list                                       — list all skills, with resolved tier
+ *   show <name>                                — print skill SKILL.md
+ *   run <name> [--arg ...] [--timeout=Ns]      — spawn the skill script, return JSON
+ *   test <name>                                — run script.test.ts via bun test
+ *   rm <name> [--global]                       — tombstone a user-tier skill
+ *
+ * Load-bearing: spawnSkill mints a per-spawn scoped token (read+write scope)
+ * and passes it via GSTACK_SKILL_TOKEN. The skill never sees the daemon root
+ * token. Untrusted skills get a scrubbed env (no $HOME, $PATH minimal, no
+ * secrets like $GITHUB_TOKEN/$OPENAI_API_KEY/etc.) and a locked cwd. Trusted
+ * skills (frontmatter `trusted: true`) inherit the full process env.
+ *
+ * Output protocol: stdout = JSON, stderr = streaming logs, exit code 0/non-0.
+ * stdout cap = 1MB (truncate + nonzero exit if exceeded). Default timeout 60s.
+ */
+
+import * as fs from 'fs';
+import * as path from 'path';
+import {
+  listBrowserSkills,
+  readBrowserSkill,
+  tombstoneBrowserSkill,
+  defaultTierPaths,
+  type BrowserSkill,
+  type TierPaths,
+} from './browser-skills';
+import { mintSkillToken, revokeSkillToken, generateSpawnId } from './skill-token';
+
+const DEFAULT_TIMEOUT_SECONDS = 60;
+const MAX_STDOUT_BYTES = 1024 * 1024; // 1 MB
+
+// ─── Public command dispatcher ──────────────────────────────────
+
+export interface SkillCommandContext {
+  /** Daemon port the skill should connect back to. */
+  port: number;
+  /** Optional override of tier paths (tests pass synthetic dirs). */
+  tiers?: TierPaths;
+}
+
+/**
+ * Dispatch a `$B skill <subcommand>` invocation. Returns the response string
+ * for the daemon to relay back to the CLI. Throws on invalid usage.
+ */
+export async function handleSkillCommand(args: string[], ctx: SkillCommandContext): Promise<string> {
+  const sub = args[0];
+  const rest = args.slice(1);
+
+  switch (sub) {
+    case undefined:
+    case 'help':
+    case '--help':
+      return formatUsage();
+    case 'list':
+      return handleList(ctx);
+    case 'show':
+      return handleShow(rest, ctx);
+    case 'run':
+      return handleRun(rest, ctx);
+    case 'test':
+      return handleTest(rest, ctx);
+    case 'rm':
+      return handleRm(rest, ctx);
+    default:
+      throw new Error(`Unknown skill subcommand: "${sub}". Try: list, show, run, test, rm.`);
+  }
+}
+
+function formatUsage(): string {
+  return [
+    'Usage: $B skill <subcommand>',
+    '',
+    '  list                                  List all skills with resolved tier',
+    '  show <name>                           Print SKILL.md',
+    '  run <name> [--arg k=v]... [--timeout=Ns]   Run the skill script',
+    '  test <name>                           Run script.test.ts',
+    '  rm <name> [--global]                  Tombstone a user-tier skill',
+  ].join('\n');
+}
+
+// ─── list ───────────────────────────────────────────────────────
+
+function handleList(ctx: SkillCommandContext): string {
+  const tiers = ctx.tiers ?? defaultTierPaths();
+  const skills = listBrowserSkills(tiers);
+  if (skills.length === 0) {
+    return 'No browser-skills found.\n\nTry: $B skill show <name>  (none right now)\n';
+  }
+  const lines: string[] = ['NAME                          TIER     HOST                        DESC'];
+  for (const s of skills) {
+    const desc = (s.frontmatter.description ?? '').slice(0, 40);
+    lines.push(
+      [
+        s.name.padEnd(30),
+        s.tier.padEnd(8),
+        s.frontmatter.host.padEnd(28),
+        desc,
+      ].join(' '),
+    );
+  }
+  return lines.join('\n') + '\n';
+}
+
+// ─── show ───────────────────────────────────────────────────────
+
+function handleShow(args: string[], ctx: SkillCommandContext): string {
+  const name = args[0];
+  if (!name) throw new Error('Usage: $B skill show <name>');
+  const tiers = ctx.tiers ?? defaultTierPaths();
+  const skill = readBrowserSkill(name, tiers);
+  if (!skill) throw new Error(`Skill "${name}" not found in any tier.`);
+  return readFile(path.join(skill.dir, 'SKILL.md'));
+}
+
+function readFile(p: string): string {
+  return fs.readFileSync(p, 'utf-8');
+}
+
+// ─── run ────────────────────────────────────────────────────────
+
+interface ParsedRunArgs {
+  passthrough: string[];
+  timeoutSeconds: number;
+}
+
+export function parseSkillRunArgs(args: string[]): ParsedRunArgs {
+  const passthrough: string[] = [];
+  let timeoutSeconds = DEFAULT_TIMEOUT_SECONDS;
+  for (let i = 0; i < args.length; i++) {
+    const a = args[i];
+    if (a.startsWith('--timeout=')) {
+      const n = parseInt(a.slice('--timeout='.length), 10);
+      if (!isNaN(n) && n > 0) timeoutSeconds = n;
+      continue;
+    }
+    passthrough.push(a);
+  }
+  return { passthrough, timeoutSeconds };
+}
+
+async function handleRun(args: string[], ctx: SkillCommandContext): Promise<string> {
+  const name = args[0];
+  if (!name) throw new Error('Usage: $B skill run <name> [--arg k=v]... [--timeout=Ns]');
+  const tiers = ctx.tiers ?? defaultTierPaths();
+  const skill = readBrowserSkill(name, tiers);
+  if (!skill) throw new Error(`Skill "${name}" not found.`);
+
+  const { passthrough, timeoutSeconds } = parseSkillRunArgs(args.slice(1));
+  const result = await spawnSkill({
+    skill,
+    skillArgs: passthrough,
+    trusted: skill.frontmatter.trusted,
+    timeoutSeconds,
+    port: ctx.port,
+  });
+
+  if (result.exitCode !== 0 || result.timedOut || result.truncated) {
+    const summary = result.truncated
+      ? `truncated stdout at ${MAX_STDOUT_BYTES} bytes`
+      : result.timedOut
+        ? `timed out after ${timeoutSeconds}s`
+        : `exit ${result.exitCode}`;
+    const err = new Error(`Skill "${name}" failed: ${summary}\n--- stderr ---\n${result.stderr.slice(0, 4096)}`);
+    (err as any).exitCode = result.exitCode || 1;
+    throw err;
+  }
+  return result.stdout;
+}
+
+// ─── test ───────────────────────────────────────────────────────
+
+async function handleTest(args: string[], ctx: SkillCommandContext): Promise<string> {
+  const name = args[0];
+  if (!name) throw new Error('Usage: $B skill test <name>');
+  const tiers = ctx.tiers ?? defaultTierPaths();
+  const skill = readBrowserSkill(name, tiers);
+  if (!skill) throw new Error(`Skill "${name}" not found.`);
+
+  const testFile = path.join(skill.dir, 'script.test.ts');
+  if (!fs.existsSync(testFile)) {
+    throw new Error(`Skill "${name}" has no script.test.ts at ${testFile}`);
+  }
+
+  const proc = Bun.spawn(['bun', 'test', testFile], {
+    cwd: skill.dir,
+    stdout: 'pipe',
+    stderr: 'pipe',
+    env: process.env,
+  });
+  const exitCode = await proc.exited;
+  const stdout = proc.stdout ? await new Response(proc.stdout).text() : '';
+  const stderr = proc.stderr ? await new Response(proc.stderr).text() : '';
+  if (exitCode !== 0) {
+    throw new Error(`Skill "${name}" tests failed (exit ${exitCode}).\n${stderr}`);
+  }
+  return stderr || stdout || `tests passed for "${name}"`;
+}
+
+// ─── rm ─────────────────────────────────────────────────────────
+
+function handleRm(args: string[], ctx: SkillCommandContext): string {
+  const name = args[0];
+  if (!name) throw new Error('Usage: $B skill rm <name> [--global]');
+  const isGlobal = args.includes('--global');
+  const tier: 'project' | 'global' = isGlobal ? 'global' : 'project';
+
+  const tiers = ctx.tiers ?? defaultTierPaths();
+  // For UX: if no project tier exists at all, default to global.
+  const effectiveTier: 'project' | 'global' = (tier === 'project' && !tiers.project) ? 'global' : tier;
+
+  const dst = tombstoneBrowserSkill(name, effectiveTier, tiers);
+  return `Tombstoned "${name}" (${effectiveTier} tier) → ${dst}\n`;
+}
+
+// ─── spawnSkill (load-bearing) ──────────────────────────────────
+
+export interface SpawnSkillOptions {
+  skill: BrowserSkill;
+  skillArgs: string[];
+  trusted: boolean;
+  timeoutSeconds: number;
+  port: number;
+}
+
+export interface SpawnSkillResult {
+  stdout: string;
+  stderr: string;
+  exitCode: number;
+  timedOut: boolean;
+  truncated: boolean;
+}
+
+/**
+ * Spawn a skill script as a child process.
+ *
+ * 1. Mint a scoped token (read+write only; expires at timeout + 30s slack).
+ * 2. Build the env: trusted=true → process.env; trusted=false → scrubbed.
+ *    GSTACK_PORT and GSTACK_SKILL_TOKEN are always set.
+ * 3. Spawn `bun run script.ts -- <args>` with cwd=skill.dir.
+ * 4. Capture stdout (capped at 1MB) and stderr; enforce timeout.
+ * 5. On exit/timeout, revoke the token. Always.
+ */
+export async function spawnSkill(opts: SpawnSkillOptions): Promise<SpawnSkillResult> {
+  const spawnId = generateSpawnId();
+  const tokenInfo = mintSkillToken({
+    skillName: opts.skill.name,
+    spawnId,
+    spawnTimeoutSeconds: opts.timeoutSeconds,
+  });
+
+  try {
+    const env = buildSpawnEnv({
+      trusted: opts.trusted,
+      port: opts.port,
+      skillToken: tokenInfo.token,
+    });
+    const scriptPath = path.join(opts.skill.dir, 'script.ts');
+    if (!fs.existsSync(scriptPath)) {
+      throw new Error(`Skill "${opts.skill.name}" missing script.ts at ${scriptPath}`);
+    }
+
+    const proc = Bun.spawn(['bun', 'run', scriptPath, '--', ...opts.skillArgs], {
+      cwd: opts.skill.dir,
+      env,
+      stdout: 'pipe',
+      stderr: 'pipe',
+    });
+
+    let timedOut = false;
+    const killer = setTimeout(() => {
+      timedOut = true;
+      try { proc.kill(); } catch {}
+    }, opts.timeoutSeconds * 1000);
+
+    const stdoutPromise = readCapped(proc.stdout, MAX_STDOUT_BYTES);
+    const stderrPromise = readCapped(proc.stderr, MAX_STDOUT_BYTES);
+
+    const exitCode = await proc.exited;
+    clearTimeout(killer);
+
+    const stdoutResult = await stdoutPromise;
+    const stderrResult = await stderrPromise;
+
+    return {
+      stdout: stdoutResult.text,
+      stderr: stderrResult.text,
+      exitCode: timedOut ? 124 : exitCode,
+      timedOut,
+      truncated: stdoutResult.truncated,
+    };
+  } finally {
+    revokeSkillToken(opts.skill.name, spawnId);
+  }
+}
+
+interface CappedRead { text: string; truncated: boolean; }
+
+async function readCapped(stream: ReadableStream<Uint8Array> | undefined, capBytes: number): Promise<CappedRead> {
+  if (!stream) return { text: '', truncated: false };
+  const reader = stream.getReader();
+  const chunks: Uint8Array[] = [];
+  let total = 0;
+  let truncated = false;
+  try {
+    while (true) {
+      const { done, value } = await reader.read();
+      if (done) break;
+      if (!value) continue;
+      total += value.length;
+      if (total > capBytes) {
+        truncated = true;
+        // Take only what fits; drop the rest of the stream (release reader).
+        const fits = value.length - (total - capBytes);
+        if (fits > 0) chunks.push(value.subarray(0, fits));
+        try { await reader.cancel(); } catch {}
+        break;
+      }
+      chunks.push(value);
+    }
+  } finally {
+    try { reader.releaseLock(); } catch {}
+  }
+  const buf = Buffer.concat(chunks.map(c => Buffer.from(c)));
+  return { text: buf.toString('utf-8'), truncated };
+}
+
+// ─── env construction (security-critical) ───────────────────────
+
+/**
+ * Env keys ALWAYS scrubbed for untrusted skills. These represent secrets,
+ * authority, or developer-environment context that an agent-authored script
+ * should not see.
+ */
+const SECRET_KEY_PATTERNS = [
+  /TOKEN/i, /KEY/i, /SECRET/i, /PASSWORD/i, /CREDENTIAL/i,
+  /^AWS_/, /^AZURE_/, /^GCP_/, /^GOOGLE_APPLICATION_/,
+  /^ANTHROPIC_/, /^OPENAI_/, /^GITHUB_/, /^GH_/,
+  /^SSH_/, /^GPG_/,
+  /^NPM_TOKEN/, /^PYPI_/,
+];
+
+/**
+ * Allowlist for untrusted spawns. Anything not in this list is dropped.
+ * Includes: minimal PATH, locale, terminal type. Skills get GSTACK_PORT +
+ * GSTACK_SKILL_TOKEN injected separately.
+ */
+const UNTRUSTED_ALLOWLIST = new Set([
+  'LANG', 'LC_ALL', 'LC_CTYPE',
+  'TERM',
+  'TZ',
+]);
+
+interface BuildEnvOptions {
+  trusted: boolean;
+  port: number;
+  skillToken: string;
+}
+
+export function buildSpawnEnv(opts: BuildEnvOptions): Record<string, string> {
+  const out: Record<string, string> = {};
+
+  if (opts.trusted) {
+    // Trusted: pass through process.env, but always strip the daemon root token
+    // if the parent had one in env (defense in depth).
+    for (const [k, v] of Object.entries(process.env)) {
+      if (v === undefined) continue;
+      if (k === 'GSTACK_TOKEN') continue; // never propagate root token
+      out[k] = v;
+    }
+    // Set a minimal PATH if missing.
+    if (!out.PATH) out.PATH = '/usr/local/bin:/usr/bin:/bin';
+  } else {
+    // Untrusted: minimal allowlist.
+    for (const k of UNTRUSTED_ALLOWLIST) {
+      const v = process.env[k];
+      if (v !== undefined) out[k] = v;
+    }
+    // Provide a minimal PATH so `bun` is findable. Prefer the resolved bun dir
+    // so scripts using a custom Bun install still work, but otherwise fall back
+    // to /usr/local/bin:/usr/bin:/bin.
+    out.PATH = resolveMinimalPath();
+  }
+
+  // Drop anything that pattern-matches a secret. (Trusted path can have secrets
+  // intentionally — e.g. an internal-tool skill — but we still strip GSTACK_TOKEN
+  // above.)
+  if (!opts.trusted) {
+    for (const k of Object.keys(out)) {
+      if (SECRET_KEY_PATTERNS.some(p => p.test(k))) delete out[k];
+    }
+  }
+
+  // Inject the daemon connection (always last so callers can't override).
+  out.GSTACK_PORT = String(opts.port);
+  out.GSTACK_SKILL_TOKEN = opts.skillToken;
+
+  return out;
+}
+
+function resolveMinimalPath(): string {
+  // Prefer the directory bun lives in; fall back to standard system dirs.
+  const fallback = '/usr/local/bin:/usr/bin:/bin';
+  const bunPath = process.execPath;
+  if (bunPath && bunPath.includes('/bun')) {
+    const dir = path.dirname(bunPath);
+    return `${dir}:${fallback}`;
+  }
+  return fallback;
+}
@@ -43,6 +43,7 @@ export const META_COMMANDS = new Set([
  'frame',
  'ux-audit',
  'domain-skill',
+  'skill',
  'cdp',
 ]);

@@ -177,6 +178,8 @@ export const COMMAND_DESCRIPTIONS: Record<string, { category: string; descriptio
  'ux-audit': { category: 'Inspection', description: 'Extract page structure for UX behavioral analysis — site ID, nav, headings, text blocks, interactive elements. Returns JSON for agent interpretation.', usage: 'ux-audit' },
  // Domain skills (per-site notes the agent writes for itself)
  'domain-skill': { category: 'Meta', description: 'Per-site notes (host derived from active tab). Quarantined → active after N=3 uses without classifier flag → global by explicit promote.', usage: 'domain-skill save|list|show|edit|promote-to-global|rollback|rm <host?>' },
+  // Browser-skills (hand-written or generated Playwright scripts the runtime spawns)
+  'skill':        { category: 'Meta', description: 'Run a browser-skill: deterministic Playwright script that drives the daemon over loopback HTTP. 3-tier lookup (project > global > bundled). Spawned scripts get a per-spawn scoped token (read+write only) — never the daemon root token.', usage: 'skill list|show|run|test|rm <name?> [--arg k=v]... [--timeout=Ns]' },
  // CDP escape hatch (deny-default; see browse/src/cdp-allowlist.ts)
  'cdp':          { category: 'Inspection', description: 'Raw CDP method dispatch (deny-default; allowlist in cdp-allowlist.ts). Output through UNTRUSTED envelope when method is data-exfil.', usage: 'cdp <Domain.method> [json-params]' },
 };
@@ -7,6 +7,7 @@ import { handleSnapshot } from './snapshot';
 import { getCleanText } from './read-commands';
 import { READ_COMMANDS, WRITE_COMMANDS, META_COMMANDS, PAGE_CONTENT_COMMANDS, wrapUntrustedContent, canonicalizeCommand } from './commands';
 import { handleDomainSkillCommand } from './domain-skill-commands';
+import { handleSkillCommand } from './browser-skill-commands';
 import { validateNavigationUrl } from './url-validation';
 import { checkScope, type TokenInfo } from './token-registry';
 import { validateOutputPath, validateReadPath, SAFE_DIRECTORIES, escapeRegExp } from './path-security';
@@ -235,6 +236,8 @@ export interface MetaCommandOpts {
  chainDepth?: number;
  /** Callback to route subcommands through the full security pipeline (handleCommandInternal) */
  executeCommand?: (body: { command: string; args?: string[]; tabId?: number }, tokenInfo?: TokenInfo | null) => Promise<{ status: number; result: string; json?: boolean }>;
+  /** The port the daemon is listening on (needed by `$B skill run` to point spawned scripts at the daemon). */
+  daemonPort?: number;
 }

 export async function handleMetaCommand(
@@ -1024,6 +1027,14 @@ export async function handleMetaCommand(
      return await handleDomainSkillCommand(args, bm);
    }

+    case 'skill': {
+      const port = opts?.daemonPort;
+      if (port === undefined) {
+        throw new Error('skill command requires daemonPort in MetaCommandOpts (server bug)');
+      }
+      return await handleSkillCommand(args, { port });
+    }
+
    case 'cdp': {
      // Lazy import — cdp-bridge introduces module deps we don't want loaded
      // for projects that never use the CDP escape hatch.
@@ -71,6 +71,14 @@ const AUTH_TOKEN = crypto.randomUUID();
 initRegistry(AUTH_TOKEN);
 const BROWSE_PORT = parseInt(process.env.BROWSE_PORT || '0', 10);
 const IDLE_TIMEOUT_MS = parseInt(process.env.BROWSE_IDLE_TIMEOUT || '1800000', 10); // 30 min
+
+/**
+ * Port the local listener bound to. Set once the daemon picks a port.
+ * Used by `$B skill run` to point spawned skill scripts at the daemon over
+ * loopback. Module-level so handleCommandInternal can read it without threading
+ * the port through every dispatch.
+ */
+let LOCAL_LISTEN_PORT: number = 0;
 // Sidebar chat is always enabled in headed mode (ungated in v0.12.0)

 // ─── Tunnel State ───────────────────────────────────────────────
@@ -1286,6 +1294,7 @@ async function handleCommandInternal(
      const chainDepth = (opts?.chainDepth ?? 0);
      result = await handleMetaCommand(command, args, browserManager, shutdown, tokenInfo, {
        chainDepth,
+        daemonPort: LOCAL_LISTEN_PORT,
        executeCommand: (body, ti) => handleCommandInternal(body, ti, {
          skipRateCheck: true,    // chain counts as 1 request
          skipActivity: true,     // chain emits 1 event for all subcommands
@@ -1571,6 +1580,7 @@ async function start() {
  safeUnlink(DIALOG_LOG_PATH);

  const port = await findPort();
+  LOCAL_LISTEN_PORT = port;

  // Launch browser (headless or headed with extension)
  // BROWSE_HEADLESS_SKIP=1 skips browser launch entirely (for HTTP-only testing)
@@ -0,0 +1,359 @@
+/**
+ * browser-skill-commands tests — covers the dispatch surface, env scrubbing,
+ * spawn lifecycle, timeout, stdout cap.
+ *
+ * The `run` and `test` subcommands spawn `bun` subprocesses, so these tests
+ * write tiny inline scripts to the synthetic skill dir and assert behavior
+ * end-to-end.
+ */
+
+import { describe, it, expect, beforeEach, afterEach } from 'bun:test';
+import * as fs from 'fs';
+import * as os from 'os';
+import * as path from 'path';
+import {
+  rotateRoot, initRegistry, validateToken, listTokens,
+} from '../src/token-registry';
+import {
+  handleSkillCommand,
+  spawnSkill,
+  buildSpawnEnv,
+  parseSkillRunArgs,
+} from '../src/browser-skill-commands';
+import { readBrowserSkill, type TierPaths } from '../src/browser-skills';
+
+let tmpRoot: string;
+let tiers: TierPaths;
+
+beforeEach(() => {
+  rotateRoot();
+  initRegistry('root-token-for-tests');
+  tmpRoot = fs.mkdtempSync(path.join(os.tmpdir(), 'browser-skill-cmd-test-'));
+  tiers = {
+    project: path.join(tmpRoot, 'project', '.gstack', 'browser-skills'),
+    global: path.join(tmpRoot, 'home', '.gstack', 'browser-skills'),
+    bundled: path.join(tmpRoot, 'gstack-install', 'browser-skills'),
+  };
+  fs.mkdirSync(tiers.project!, { recursive: true });
+  fs.mkdirSync(tiers.global, { recursive: true });
+  fs.mkdirSync(tiers.bundled, { recursive: true });
+});
+
+afterEach(() => {
+  fs.rmSync(tmpRoot, { recursive: true, force: true });
+});
+
+function makeSkillDir(tierRoot: string, name: string, frontmatter: string, scriptBody: string = '') {
+  const dir = path.join(tierRoot, name);
+  fs.mkdirSync(dir, { recursive: true });
+  fs.writeFileSync(path.join(dir, 'SKILL.md'), `---\n${frontmatter}\n---\nbody\n`);
+  if (scriptBody) {
+    fs.writeFileSync(path.join(dir, 'script.ts'), scriptBody);
+  }
+  return dir;
+}
+
+describe('parseSkillRunArgs', () => {
+  it('extracts --timeout=N', () => {
+    const r = parseSkillRunArgs(['--timeout=10', '--arg', 'foo=bar']);
+    expect(r.timeoutSeconds).toBe(10);
+    expect(r.passthrough).toEqual(['--arg', 'foo=bar']);
+  });
+
+  it('defaults to 60s when no timeout', () => {
+    const r = parseSkillRunArgs(['--arg', 'foo=bar']);
+    expect(r.timeoutSeconds).toBe(60);
+    expect(r.passthrough).toEqual(['--arg', 'foo=bar']);
+  });
+
+  it('passes through unknown flags', () => {
+    const r = parseSkillRunArgs(['--keywords=ai', '--limit=10']);
+    expect(r.passthrough).toEqual(['--keywords=ai', '--limit=10']);
+  });
+
+  it('ignores invalid --timeout values', () => {
+    const r = parseSkillRunArgs(['--timeout=abc', '--timeout=-5']);
+    expect(r.timeoutSeconds).toBe(60);
+  });
+});
+
+describe('handleSkillCommand: list', () => {
+  it('shows empty message when no skills', async () => {
+    const result = await handleSkillCommand(['list'], { port: 9999, tiers });
+    expect(result).toContain('No browser-skills found');
+  });
+
+  it('lists skills with their resolved tier', async () => {
+    makeSkillDir(tiers.bundled, 'foo', 'name: foo\nhost: a.com\ndescription: foo desc');
+    makeSkillDir(tiers.global, 'bar', 'name: bar\nhost: b.com\ndescription: bar desc');
+    const result = await handleSkillCommand(['list'], { port: 9999, tiers });
+    expect(result).toContain('foo');
+    expect(result).toContain('bundled');
+    expect(result).toContain('a.com');
+    expect(result).toContain('bar');
+    expect(result).toContain('global');
+  });
+
+  it('prints project tier when same name in multiple tiers', async () => {
+    makeSkillDir(tiers.bundled, 'shared', 'name: shared\nhost: bundled.com');
+    makeSkillDir(tiers.project!, 'shared', 'name: shared\nhost: project.com');
+    const result = await handleSkillCommand(['list'], { port: 9999, tiers });
+    expect(result).toContain('project');
+    expect(result).toContain('project.com');
+    expect(result).not.toContain('bundled.com');
+  });
+});
+
+describe('handleSkillCommand: show', () => {
+  it('prints SKILL.md', async () => {
+    makeSkillDir(tiers.bundled, 'foo', 'name: foo\nhost: a.com\ndescription: hi');
+    const result = await handleSkillCommand(['show', 'foo'], { port: 9999, tiers });
+    expect(result).toContain('name: foo');
+    expect(result).toContain('host: a.com');
+    expect(result).toContain('body');
+  });
+
+  it('throws when skill missing', async () => {
+    await expect(handleSkillCommand(['show', 'nope'], { port: 9999, tiers })).rejects.toThrow(/not found/);
+  });
+
+  it('throws when name omitted', async () => {
+    await expect(handleSkillCommand(['show'], { port: 9999, tiers })).rejects.toThrow(/Usage/);
+  });
+});
+
+describe('handleSkillCommand: rm', () => {
+  it('tombstones global skill by default', async () => {
+    makeSkillDir(tiers.global, 'gone', 'name: gone\nhost: x.com');
+    // No project tier skill, so default tier resolution should target global anyway.
+    // But the function defaults to 'project' unless --global. With no project
+    // skill, it would error — pass --global explicitly.
+    const result = await handleSkillCommand(['rm', 'gone', '--global'], { port: 9999, tiers });
+    expect(result).toContain('Tombstoned');
+    expect(fs.existsSync(path.join(tiers.global, 'gone'))).toBe(false);
+  });
+
+  it('tombstones project skill', async () => {
+    makeSkillDir(tiers.project!, 'gone', 'name: gone\nhost: x.com');
+    const result = await handleSkillCommand(['rm', 'gone'], { port: 9999, tiers });
+    expect(result).toContain('Tombstoned');
+    expect(fs.existsSync(path.join(tiers.project!, 'gone'))).toBe(false);
+  });
+
+  it('falls back to global when no project tier path', async () => {
+    const tiersNoProject = { ...tiers, project: null };
+    makeSkillDir(tiers.global, 'gone', 'name: gone\nhost: x.com');
+    const result = await handleSkillCommand(['rm', 'gone'], { port: 9999, tiers: tiersNoProject });
+    expect(result).toContain('global');
+  });
+});
+
+describe('handleSkillCommand: help / unknown', () => {
+  it('prints usage with no subcommand', async () => {
+    const r = await handleSkillCommand([], { port: 9999, tiers });
+    expect(r).toContain('Usage');
+  });
+
+  it('throws on unknown subcommand', async () => {
+    await expect(handleSkillCommand(['frobnicate'], { port: 9999, tiers }))
+      .rejects.toThrow(/Unknown skill subcommand/);
+  });
+});
+
+describe('buildSpawnEnv', () => {
+  let origEnv: Record<string, string | undefined>;
+  beforeEach(() => {
+    origEnv = { ...process.env };
+    // Plant some secrets for scrub-tests
+    process.env.GITHUB_TOKEN = 'gh-secret';
+    process.env.OPENAI_API_KEY = 'oai-secret';
+    process.env.MY_PASSWORD = 'sup3r';
+    process.env.NPM_TOKEN = 'npmtok';
+    process.env.AWS_SECRET_ACCESS_KEY = 'aws-secret';
+    process.env.GSTACK_TOKEN = 'root-token';
+    process.env.HOME = '/Users/test';
+    process.env.PATH = '/test/bin:/usr/bin';
+    process.env.LANG = 'en_US.UTF-8';
+  });
+  afterEach(() => {
+    process.env = origEnv;
+  });
+
+  it('untrusted: drops $HOME and secrets', () => {
+    const env = buildSpawnEnv({ trusted: false, port: 1234, skillToken: 'tok' });
+    expect(env.HOME).toBeUndefined();
+    expect(env.GITHUB_TOKEN).toBeUndefined();
+    expect(env.OPENAI_API_KEY).toBeUndefined();
+    expect(env.MY_PASSWORD).toBeUndefined();
+    expect(env.NPM_TOKEN).toBeUndefined();
+    expect(env.AWS_SECRET_ACCESS_KEY).toBeUndefined();
+    expect(env.GSTACK_TOKEN).toBeUndefined();
+  });
+
+  it('untrusted: keeps locale + TERM', () => {
+    process.env.TERM = 'xterm-256color';
+    const env = buildSpawnEnv({ trusted: false, port: 1234, skillToken: 'tok' });
+    expect(env.LANG).toBe('en_US.UTF-8');
+    expect(env.TERM).toBe('xterm-256color');
+  });
+
+  it('untrusted: PATH is minimal (no /test/bin override)', () => {
+    const env = buildSpawnEnv({ trusted: false, port: 1234, skillToken: 'tok' });
+    expect(env.PATH).not.toContain('/test/bin');
+    expect(env.PATH).toMatch(/\/(usr\/local\/)?bin/);
+  });
+
+  it('untrusted: injects GSTACK_PORT + GSTACK_SKILL_TOKEN', () => {
+    const env = buildSpawnEnv({ trusted: false, port: 1234, skillToken: 'tok-xyz' });
+    expect(env.GSTACK_PORT).toBe('1234');
+    expect(env.GSTACK_SKILL_TOKEN).toBe('tok-xyz');
+  });
+
+  it('trusted: keeps $HOME', () => {
+    const env = buildSpawnEnv({ trusted: true, port: 1234, skillToken: 'tok' });
+    expect(env.HOME).toBe('/Users/test');
+  });
+
+  it('trusted: still strips GSTACK_TOKEN (defense in depth)', () => {
+    const env = buildSpawnEnv({ trusted: true, port: 1234, skillToken: 'tok' });
+    expect(env.GSTACK_TOKEN).toBeUndefined();
+  });
+
+  it('trusted: keeps developer secrets (intentional)', () => {
+    const env = buildSpawnEnv({ trusted: true, port: 1234, skillToken: 'tok' });
+    expect(env.GITHUB_TOKEN).toBe('gh-secret');
+  });
+
+  it('GSTACK_PORT/GSTACK_SKILL_TOKEN can never be overridden by parent env', () => {
+    process.env.GSTACK_PORT = '99999'; // attacker-set
+    process.env.GSTACK_SKILL_TOKEN = 'attacker-tok';
+    const env = buildSpawnEnv({ trusted: true, port: 1234, skillToken: 'real-tok' });
+    expect(env.GSTACK_PORT).toBe('1234');
+    expect(env.GSTACK_SKILL_TOKEN).toBe('real-tok');
+  });
+});
+
+// ─── Spawn integration ──────────────────────────────────────────
+//
+// Tests below shell out to `bun run` against a synthesized script.ts, so they
+// take 1-3s each. Skip the suite if BUN_TEST_NO_SPAWN is set.
+const SKIP_SPAWN = process.env.BUN_TEST_NO_SPAWN === '1';
+
+describe.skipIf(SKIP_SPAWN)('spawnSkill: lifecycle', () => {
+  it('happy path: returns stdout, exit 0, token revoked', async () => {
+    const dir = makeSkillDir(tiers.bundled, 'echo-skill',
+      'name: echo-skill\nhost: x.com\ntrusted: true',
+      `console.log(JSON.stringify({ ok: true, args: process.argv.slice(2) }));`,
+    );
+    const skill = readBrowserSkill('echo-skill', tiers)!;
+    const result = await spawnSkill({
+      skill,
+      skillArgs: ['hello'],
+      trusted: true,
+      timeoutSeconds: 30,
+      port: 9999,
+    });
+    expect(result.exitCode).toBe(0);
+    expect(result.timedOut).toBe(false);
+    expect(result.truncated).toBe(false);
+    const parsed = JSON.parse(result.stdout);
+    expect(parsed.ok).toBe(true);
+    // Only --timeout filtering happens; -- is preserved by Bun.
+    expect(parsed.args).toContain('hello');
+    // Token revoked: nothing left in the registry for this client.
+    expect(listTokens().filter(t => t.clientId.startsWith('skill:echo-skill:'))).toEqual([]);
+  });
+
+  it('untrusted spawn: GSTACK_SKILL_TOKEN visible, root env scrubbed', async () => {
+    const dir = makeSkillDir(tiers.bundled, 'env-probe',
+      'name: env-probe\nhost: x.com',  // trusted defaults to false
+      `console.log(JSON.stringify({
+        port: process.env.GSTACK_PORT,
+        token: process.env.GSTACK_SKILL_TOKEN,
+        home: process.env.HOME ?? null,
+        gh: process.env.GITHUB_TOKEN ?? null,
+        gstack: process.env.GSTACK_TOKEN ?? null,
+      }));`,
+    );
+    const origEnv = { ...process.env };
+    process.env.GITHUB_TOKEN = 'gh-secret';
+    process.env.GSTACK_TOKEN = 'root';
+    try {
+      const skill = readBrowserSkill('env-probe', tiers)!;
+      const result = await spawnSkill({
+        skill, skillArgs: [], trusted: false, timeoutSeconds: 30, port: 4242,
+      });
+      expect(result.exitCode).toBe(0);
+      const parsed = JSON.parse(result.stdout);
+      expect(parsed.port).toBe('4242');
+      expect(parsed.token).toMatch(/^gsk_sess_/);
+      expect(parsed.home).toBeNull();
+      expect(parsed.gh).toBeNull();
+      expect(parsed.gstack).toBeNull();
+    } finally {
+      process.env = origEnv;
+    }
+  });
+
+  it('trusted spawn: HOME passes through', async () => {
+    const dir = makeSkillDir(tiers.bundled, 'env-trusted',
+      'name: env-trusted\nhost: x.com\ntrusted: true',
+      `console.log(JSON.stringify({ home: process.env.HOME ?? null }));`,
+    );
+    const origEnv = { ...process.env };
+    process.env.HOME = '/Users/test-user';
+    try {
+      const skill = readBrowserSkill('env-trusted', tiers)!;
+      const result = await spawnSkill({
+        skill, skillArgs: [], trusted: true, timeoutSeconds: 30, port: 9999,
+      });
+      const parsed = JSON.parse(result.stdout);
+      expect(parsed.home).toBe('/Users/test-user');
+    } finally {
+      process.env = origEnv;
+    }
+  });
+
+  it('timeout fires, exit code 124, token revoked', async () => {
+    const dir = makeSkillDir(tiers.bundled, 'sleeper',
+      'name: sleeper\nhost: x.com\ntrusted: true',
+      // Sleep longer than the test timeout; the spawn should kill us.
+      `await new Promise(r => setTimeout(r, 30000)); console.log("done");`,
+    );
+    const skill = readBrowserSkill('sleeper', tiers)!;
+    const result = await spawnSkill({
+      skill, skillArgs: [], trusted: true, timeoutSeconds: 1, port: 9999,
+    });
+    expect(result.timedOut).toBe(true);
+    expect(result.exitCode).toBe(124);
+    expect(listTokens().filter(t => t.clientId.startsWith('skill:sleeper:'))).toEqual([]);
+  }, 10_000);
+
+  it('script crash propagates nonzero exit', async () => {
+    const dir = makeSkillDir(tiers.bundled, 'crasher',
+      'name: crasher\nhost: x.com\ntrusted: true',
+      `process.exit(7);`,
+    );
+    const skill = readBrowserSkill('crasher', tiers)!;
+    const result = await spawnSkill({
+      skill, skillArgs: [], trusted: true, timeoutSeconds: 5, port: 9999,
+    });
+    expect(result.exitCode).toBe(7);
+    expect(result.timedOut).toBe(false);
+  });
+
+  it('stdout > 1MB truncates and reports truncated', async () => {
+    const dir = makeSkillDir(tiers.bundled, 'flood',
+      'name: flood\nhost: x.com\ntrusted: true',
+      // Emit ~2MB of "x" so the cap fires deterministically.
+      `const chunk = 'x'.repeat(64 * 1024);
+       for (let i = 0; i < 40; i++) process.stdout.write(chunk);`,
+    );
+    const skill = readBrowserSkill('flood', tiers)!;
+    const result = await spawnSkill({
+      skill, skillArgs: [], trusted: true, timeoutSeconds: 10, port: 9999,
+    });
+    expect(result.truncated).toBe(true);
+    expect(result.stdout.length).toBeLessThanOrEqual(1024 * 1024);
+  }, 10_000);
+});