mirror of
https://github.com/garrytan/gstack.git
synced 2026-05-02 11:45:20 +02:00
feat(browser-skills): \$B skill list/show/run/test/rm subcommands
handleSkillCommand dispatches to per-subcommand handlers; spawnSkill is
the load-bearing function that:
1. Mints a per-spawn scoped token (read+write only) bound to the
skill name + spawn-id.
2. Builds the spawn env:
- trusted: passes process.env minus GSTACK_TOKEN (defense in depth).
- untrusted: minimal allowlist (LANG, LC_ALL, TERM, TZ) + locked
PATH; explicitly drops anything matching TOKEN/KEY/SECRET/etc.
Also drops AWS_/AZURE_/GCP_/GOOGLE_APPLICATION_/ANTHROPIC_/OPENAI_/
GITHUB_/GH_/SSH_/GPG_/NPM_TOKEN/PYPI_ patterns.
3. Always injects GSTACK_PORT + GSTACK_SKILL_TOKEN last (cannot be
overridden by parent env).
4. Spawns bun run script.ts -- <args> with cwd=skillDir, captures
stdout (1MB cap), stderr, and timeout-kills past the deadline.
5. Revokes the token in finally{}, always.
list output prints the resolved tier inline so "why did it run that
one?" never becomes a debugging mystery (Codex finding #4 mitigation).
server.ts threads the listen port to meta-commands via MetaCommandOpts.daemonPort.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,413 @@
|
||||
/**
|
||||
* $B skill subcommands — CLI surface for browser-skills.
|
||||
*
|
||||
* Subcommands:
|
||||
* list — list all skills, with resolved tier
|
||||
* show <name> — print skill SKILL.md
|
||||
* run <name> [--arg ...] [--timeout=Ns] — spawn the skill script, return JSON
|
||||
* test <name> — run script.test.ts via bun test
|
||||
* rm <name> [--global] — tombstone a user-tier skill
|
||||
*
|
||||
* Load-bearing: spawnSkill mints a per-spawn scoped token (read+write scope)
|
||||
* and passes it via GSTACK_SKILL_TOKEN. The skill never sees the daemon root
|
||||
* token. Untrusted skills get a scrubbed env (no $HOME, $PATH minimal, no
|
||||
* secrets like $GITHUB_TOKEN/$OPENAI_API_KEY/etc.) and a locked cwd. Trusted
|
||||
* skills (frontmatter `trusted: true`) inherit the full process env.
|
||||
*
|
||||
* Output protocol: stdout = JSON, stderr = streaming logs, exit code 0/non-0.
|
||||
* stdout cap = 1MB (truncate + nonzero exit if exceeded). Default timeout 60s.
|
||||
*/
|
||||
|
||||
import * as fs from 'fs';
|
||||
import * as path from 'path';
|
||||
import {
|
||||
listBrowserSkills,
|
||||
readBrowserSkill,
|
||||
tombstoneBrowserSkill,
|
||||
defaultTierPaths,
|
||||
type BrowserSkill,
|
||||
type TierPaths,
|
||||
} from './browser-skills';
|
||||
import { mintSkillToken, revokeSkillToken, generateSpawnId } from './skill-token';
|
||||
|
||||
const DEFAULT_TIMEOUT_SECONDS = 60;
|
||||
const MAX_STDOUT_BYTES = 1024 * 1024; // 1 MB
|
||||
|
||||
// ─── Public command dispatcher ──────────────────────────────────
|
||||
|
||||
export interface SkillCommandContext {
|
||||
/** Daemon port the skill should connect back to. */
|
||||
port: number;
|
||||
/** Optional override of tier paths (tests pass synthetic dirs). */
|
||||
tiers?: TierPaths;
|
||||
}
|
||||
|
||||
/**
|
||||
* Dispatch a `$B skill <subcommand>` invocation. Returns the response string
|
||||
* for the daemon to relay back to the CLI. Throws on invalid usage.
|
||||
*/
|
||||
export async function handleSkillCommand(args: string[], ctx: SkillCommandContext): Promise<string> {
|
||||
const sub = args[0];
|
||||
const rest = args.slice(1);
|
||||
|
||||
switch (sub) {
|
||||
case undefined:
|
||||
case 'help':
|
||||
case '--help':
|
||||
return formatUsage();
|
||||
case 'list':
|
||||
return handleList(ctx);
|
||||
case 'show':
|
||||
return handleShow(rest, ctx);
|
||||
case 'run':
|
||||
return handleRun(rest, ctx);
|
||||
case 'test':
|
||||
return handleTest(rest, ctx);
|
||||
case 'rm':
|
||||
return handleRm(rest, ctx);
|
||||
default:
|
||||
throw new Error(`Unknown skill subcommand: "${sub}". Try: list, show, run, test, rm.`);
|
||||
}
|
||||
}
|
||||
|
||||
function formatUsage(): string {
|
||||
return [
|
||||
'Usage: $B skill <subcommand>',
|
||||
'',
|
||||
' list List all skills with resolved tier',
|
||||
' show <name> Print SKILL.md',
|
||||
' run <name> [--arg k=v]... [--timeout=Ns] Run the skill script',
|
||||
' test <name> Run script.test.ts',
|
||||
' rm <name> [--global] Tombstone a user-tier skill',
|
||||
].join('\n');
|
||||
}
|
||||
|
||||
// ─── list ───────────────────────────────────────────────────────
|
||||
|
||||
function handleList(ctx: SkillCommandContext): string {
|
||||
const tiers = ctx.tiers ?? defaultTierPaths();
|
||||
const skills = listBrowserSkills(tiers);
|
||||
if (skills.length === 0) {
|
||||
return 'No browser-skills found.\n\nTry: $B skill show <name> (none right now)\n';
|
||||
}
|
||||
const lines: string[] = ['NAME TIER HOST DESC'];
|
||||
for (const s of skills) {
|
||||
const desc = (s.frontmatter.description ?? '').slice(0, 40);
|
||||
lines.push(
|
||||
[
|
||||
s.name.padEnd(30),
|
||||
s.tier.padEnd(8),
|
||||
s.frontmatter.host.padEnd(28),
|
||||
desc,
|
||||
].join(' '),
|
||||
);
|
||||
}
|
||||
return lines.join('\n') + '\n';
|
||||
}
|
||||
|
||||
// ─── show ───────────────────────────────────────────────────────
|
||||
|
||||
function handleShow(args: string[], ctx: SkillCommandContext): string {
|
||||
const name = args[0];
|
||||
if (!name) throw new Error('Usage: $B skill show <name>');
|
||||
const tiers = ctx.tiers ?? defaultTierPaths();
|
||||
const skill = readBrowserSkill(name, tiers);
|
||||
if (!skill) throw new Error(`Skill "${name}" not found in any tier.`);
|
||||
return readFile(path.join(skill.dir, 'SKILL.md'));
|
||||
}
|
||||
|
||||
function readFile(p: string): string {
|
||||
return fs.readFileSync(p, 'utf-8');
|
||||
}
|
||||
|
||||
// ─── run ────────────────────────────────────────────────────────
|
||||
|
||||
interface ParsedRunArgs {
|
||||
passthrough: string[];
|
||||
timeoutSeconds: number;
|
||||
}
|
||||
|
||||
export function parseSkillRunArgs(args: string[]): ParsedRunArgs {
|
||||
const passthrough: string[] = [];
|
||||
let timeoutSeconds = DEFAULT_TIMEOUT_SECONDS;
|
||||
for (let i = 0; i < args.length; i++) {
|
||||
const a = args[i];
|
||||
if (a.startsWith('--timeout=')) {
|
||||
const n = parseInt(a.slice('--timeout='.length), 10);
|
||||
if (!isNaN(n) && n > 0) timeoutSeconds = n;
|
||||
continue;
|
||||
}
|
||||
passthrough.push(a);
|
||||
}
|
||||
return { passthrough, timeoutSeconds };
|
||||
}
|
||||
|
||||
async function handleRun(args: string[], ctx: SkillCommandContext): Promise<string> {
|
||||
const name = args[0];
|
||||
if (!name) throw new Error('Usage: $B skill run <name> [--arg k=v]... [--timeout=Ns]');
|
||||
const tiers = ctx.tiers ?? defaultTierPaths();
|
||||
const skill = readBrowserSkill(name, tiers);
|
||||
if (!skill) throw new Error(`Skill "${name}" not found.`);
|
||||
|
||||
const { passthrough, timeoutSeconds } = parseSkillRunArgs(args.slice(1));
|
||||
const result = await spawnSkill({
|
||||
skill,
|
||||
skillArgs: passthrough,
|
||||
trusted: skill.frontmatter.trusted,
|
||||
timeoutSeconds,
|
||||
port: ctx.port,
|
||||
});
|
||||
|
||||
if (result.exitCode !== 0 || result.timedOut || result.truncated) {
|
||||
const summary = result.truncated
|
||||
? `truncated stdout at ${MAX_STDOUT_BYTES} bytes`
|
||||
: result.timedOut
|
||||
? `timed out after ${timeoutSeconds}s`
|
||||
: `exit ${result.exitCode}`;
|
||||
const err = new Error(`Skill "${name}" failed: ${summary}\n--- stderr ---\n${result.stderr.slice(0, 4096)}`);
|
||||
(err as any).exitCode = result.exitCode || 1;
|
||||
throw err;
|
||||
}
|
||||
return result.stdout;
|
||||
}
|
||||
|
||||
// ─── test ───────────────────────────────────────────────────────
|
||||
|
||||
async function handleTest(args: string[], ctx: SkillCommandContext): Promise<string> {
|
||||
const name = args[0];
|
||||
if (!name) throw new Error('Usage: $B skill test <name>');
|
||||
const tiers = ctx.tiers ?? defaultTierPaths();
|
||||
const skill = readBrowserSkill(name, tiers);
|
||||
if (!skill) throw new Error(`Skill "${name}" not found.`);
|
||||
|
||||
const testFile = path.join(skill.dir, 'script.test.ts');
|
||||
if (!fs.existsSync(testFile)) {
|
||||
throw new Error(`Skill "${name}" has no script.test.ts at ${testFile}`);
|
||||
}
|
||||
|
||||
const proc = Bun.spawn(['bun', 'test', testFile], {
|
||||
cwd: skill.dir,
|
||||
stdout: 'pipe',
|
||||
stderr: 'pipe',
|
||||
env: process.env,
|
||||
});
|
||||
const exitCode = await proc.exited;
|
||||
const stdout = proc.stdout ? await new Response(proc.stdout).text() : '';
|
||||
const stderr = proc.stderr ? await new Response(proc.stderr).text() : '';
|
||||
if (exitCode !== 0) {
|
||||
throw new Error(`Skill "${name}" tests failed (exit ${exitCode}).\n${stderr}`);
|
||||
}
|
||||
return stderr || stdout || `tests passed for "${name}"`;
|
||||
}
|
||||
|
||||
// ─── rm ─────────────────────────────────────────────────────────
|
||||
|
||||
function handleRm(args: string[], ctx: SkillCommandContext): string {
|
||||
const name = args[0];
|
||||
if (!name) throw new Error('Usage: $B skill rm <name> [--global]');
|
||||
const isGlobal = args.includes('--global');
|
||||
const tier: 'project' | 'global' = isGlobal ? 'global' : 'project';
|
||||
|
||||
const tiers = ctx.tiers ?? defaultTierPaths();
|
||||
// For UX: if no project tier exists at all, default to global.
|
||||
const effectiveTier: 'project' | 'global' = (tier === 'project' && !tiers.project) ? 'global' : tier;
|
||||
|
||||
const dst = tombstoneBrowserSkill(name, effectiveTier, tiers);
|
||||
return `Tombstoned "${name}" (${effectiveTier} tier) → ${dst}\n`;
|
||||
}
|
||||
|
||||
// ─── spawnSkill (load-bearing) ──────────────────────────────────
|
||||
|
||||
export interface SpawnSkillOptions {
|
||||
skill: BrowserSkill;
|
||||
skillArgs: string[];
|
||||
trusted: boolean;
|
||||
timeoutSeconds: number;
|
||||
port: number;
|
||||
}
|
||||
|
||||
export interface SpawnSkillResult {
|
||||
stdout: string;
|
||||
stderr: string;
|
||||
exitCode: number;
|
||||
timedOut: boolean;
|
||||
truncated: boolean;
|
||||
}
|
||||
|
||||
/**
|
||||
* Spawn a skill script as a child process.
|
||||
*
|
||||
* 1. Mint a scoped token (read+write only; expires at timeout + 30s slack).
|
||||
* 2. Build the env: trusted=true → process.env; trusted=false → scrubbed.
|
||||
* GSTACK_PORT and GSTACK_SKILL_TOKEN are always set.
|
||||
* 3. Spawn `bun run script.ts -- <args>` with cwd=skill.dir.
|
||||
* 4. Capture stdout (capped at 1MB) and stderr; enforce timeout.
|
||||
* 5. On exit/timeout, revoke the token. Always.
|
||||
*/
|
||||
export async function spawnSkill(opts: SpawnSkillOptions): Promise<SpawnSkillResult> {
|
||||
const spawnId = generateSpawnId();
|
||||
const tokenInfo = mintSkillToken({
|
||||
skillName: opts.skill.name,
|
||||
spawnId,
|
||||
spawnTimeoutSeconds: opts.timeoutSeconds,
|
||||
});
|
||||
|
||||
try {
|
||||
const env = buildSpawnEnv({
|
||||
trusted: opts.trusted,
|
||||
port: opts.port,
|
||||
skillToken: tokenInfo.token,
|
||||
});
|
||||
const scriptPath = path.join(opts.skill.dir, 'script.ts');
|
||||
if (!fs.existsSync(scriptPath)) {
|
||||
throw new Error(`Skill "${opts.skill.name}" missing script.ts at ${scriptPath}`);
|
||||
}
|
||||
|
||||
const proc = Bun.spawn(['bun', 'run', scriptPath, '--', ...opts.skillArgs], {
|
||||
cwd: opts.skill.dir,
|
||||
env,
|
||||
stdout: 'pipe',
|
||||
stderr: 'pipe',
|
||||
});
|
||||
|
||||
let timedOut = false;
|
||||
const killer = setTimeout(() => {
|
||||
timedOut = true;
|
||||
try { proc.kill(); } catch {}
|
||||
}, opts.timeoutSeconds * 1000);
|
||||
|
||||
const stdoutPromise = readCapped(proc.stdout, MAX_STDOUT_BYTES);
|
||||
const stderrPromise = readCapped(proc.stderr, MAX_STDOUT_BYTES);
|
||||
|
||||
const exitCode = await proc.exited;
|
||||
clearTimeout(killer);
|
||||
|
||||
const stdoutResult = await stdoutPromise;
|
||||
const stderrResult = await stderrPromise;
|
||||
|
||||
return {
|
||||
stdout: stdoutResult.text,
|
||||
stderr: stderrResult.text,
|
||||
exitCode: timedOut ? 124 : exitCode,
|
||||
timedOut,
|
||||
truncated: stdoutResult.truncated,
|
||||
};
|
||||
} finally {
|
||||
revokeSkillToken(opts.skill.name, spawnId);
|
||||
}
|
||||
}
|
||||
|
||||
interface CappedRead { text: string; truncated: boolean; }
|
||||
|
||||
async function readCapped(stream: ReadableStream<Uint8Array> | undefined, capBytes: number): Promise<CappedRead> {
|
||||
if (!stream) return { text: '', truncated: false };
|
||||
const reader = stream.getReader();
|
||||
const chunks: Uint8Array[] = [];
|
||||
let total = 0;
|
||||
let truncated = false;
|
||||
try {
|
||||
while (true) {
|
||||
const { done, value } = await reader.read();
|
||||
if (done) break;
|
||||
if (!value) continue;
|
||||
total += value.length;
|
||||
if (total > capBytes) {
|
||||
truncated = true;
|
||||
// Take only what fits; drop the rest of the stream (release reader).
|
||||
const fits = value.length - (total - capBytes);
|
||||
if (fits > 0) chunks.push(value.subarray(0, fits));
|
||||
try { await reader.cancel(); } catch {}
|
||||
break;
|
||||
}
|
||||
chunks.push(value);
|
||||
}
|
||||
} finally {
|
||||
try { reader.releaseLock(); } catch {}
|
||||
}
|
||||
const buf = Buffer.concat(chunks.map(c => Buffer.from(c)));
|
||||
return { text: buf.toString('utf-8'), truncated };
|
||||
}
|
||||
|
||||
// ─── env construction (security-critical) ───────────────────────
|
||||
|
||||
/**
|
||||
* Env keys ALWAYS scrubbed for untrusted skills. These represent secrets,
|
||||
* authority, or developer-environment context that an agent-authored script
|
||||
* should not see.
|
||||
*/
|
||||
const SECRET_KEY_PATTERNS = [
|
||||
/TOKEN/i, /KEY/i, /SECRET/i, /PASSWORD/i, /CREDENTIAL/i,
|
||||
/^AWS_/, /^AZURE_/, /^GCP_/, /^GOOGLE_APPLICATION_/,
|
||||
/^ANTHROPIC_/, /^OPENAI_/, /^GITHUB_/, /^GH_/,
|
||||
/^SSH_/, /^GPG_/,
|
||||
/^NPM_TOKEN/, /^PYPI_/,
|
||||
];
|
||||
|
||||
/**
|
||||
* Allowlist for untrusted spawns. Anything not in this list is dropped.
|
||||
* Includes: minimal PATH, locale, terminal type. Skills get GSTACK_PORT +
|
||||
* GSTACK_SKILL_TOKEN injected separately.
|
||||
*/
|
||||
const UNTRUSTED_ALLOWLIST = new Set([
|
||||
'LANG', 'LC_ALL', 'LC_CTYPE',
|
||||
'TERM',
|
||||
'TZ',
|
||||
]);
|
||||
|
||||
interface BuildEnvOptions {
|
||||
trusted: boolean;
|
||||
port: number;
|
||||
skillToken: string;
|
||||
}
|
||||
|
||||
export function buildSpawnEnv(opts: BuildEnvOptions): Record<string, string> {
|
||||
const out: Record<string, string> = {};
|
||||
|
||||
if (opts.trusted) {
|
||||
// Trusted: pass through process.env, but always strip the daemon root token
|
||||
// if the parent had one in env (defense in depth).
|
||||
for (const [k, v] of Object.entries(process.env)) {
|
||||
if (v === undefined) continue;
|
||||
if (k === 'GSTACK_TOKEN') continue; // never propagate root token
|
||||
out[k] = v;
|
||||
}
|
||||
// Set a minimal PATH if missing.
|
||||
if (!out.PATH) out.PATH = '/usr/local/bin:/usr/bin:/bin';
|
||||
} else {
|
||||
// Untrusted: minimal allowlist.
|
||||
for (const k of UNTRUSTED_ALLOWLIST) {
|
||||
const v = process.env[k];
|
||||
if (v !== undefined) out[k] = v;
|
||||
}
|
||||
// Provide a minimal PATH so `bun` is findable. Prefer the resolved bun dir
|
||||
// so scripts using a custom Bun install still work, but otherwise fall back
|
||||
// to /usr/local/bin:/usr/bin:/bin.
|
||||
out.PATH = resolveMinimalPath();
|
||||
}
|
||||
|
||||
// Drop anything that pattern-matches a secret. (Trusted path can have secrets
|
||||
// intentionally — e.g. an internal-tool skill — but we still strip GSTACK_TOKEN
|
||||
// above.)
|
||||
if (!opts.trusted) {
|
||||
for (const k of Object.keys(out)) {
|
||||
if (SECRET_KEY_PATTERNS.some(p => p.test(k))) delete out[k];
|
||||
}
|
||||
}
|
||||
|
||||
// Inject the daemon connection (always last so callers can't override).
|
||||
out.GSTACK_PORT = String(opts.port);
|
||||
out.GSTACK_SKILL_TOKEN = opts.skillToken;
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
function resolveMinimalPath(): string {
|
||||
// Prefer the directory bun lives in; fall back to standard system dirs.
|
||||
const fallback = '/usr/local/bin:/usr/bin:/bin';
|
||||
const bunPath = process.execPath;
|
||||
if (bunPath && bunPath.includes('/bun')) {
|
||||
const dir = path.dirname(bunPath);
|
||||
return `${dir}:${fallback}`;
|
||||
}
|
||||
return fallback;
|
||||
}
|
||||
@@ -43,6 +43,7 @@ export const META_COMMANDS = new Set([
|
||||
'frame',
|
||||
'ux-audit',
|
||||
'domain-skill',
|
||||
'skill',
|
||||
'cdp',
|
||||
]);
|
||||
|
||||
@@ -177,6 +178,8 @@ export const COMMAND_DESCRIPTIONS: Record<string, { category: string; descriptio
|
||||
'ux-audit': { category: 'Inspection', description: 'Extract page structure for UX behavioral analysis — site ID, nav, headings, text blocks, interactive elements. Returns JSON for agent interpretation.', usage: 'ux-audit' },
|
||||
// Domain skills (per-site notes the agent writes for itself)
|
||||
'domain-skill': { category: 'Meta', description: 'Per-site notes (host derived from active tab). Quarantined → active after N=3 uses without classifier flag → global by explicit promote.', usage: 'domain-skill save|list|show|edit|promote-to-global|rollback|rm <host?>' },
|
||||
// Browser-skills (hand-written or generated Playwright scripts the runtime spawns)
|
||||
'skill': { category: 'Meta', description: 'Run a browser-skill: deterministic Playwright script that drives the daemon over loopback HTTP. 3-tier lookup (project > global > bundled). Spawned scripts get a per-spawn scoped token (read+write only) — never the daemon root token.', usage: 'skill list|show|run|test|rm <name?> [--arg k=v]... [--timeout=Ns]' },
|
||||
// CDP escape hatch (deny-default; see browse/src/cdp-allowlist.ts)
|
||||
'cdp': { category: 'Inspection', description: 'Raw CDP method dispatch (deny-default; allowlist in cdp-allowlist.ts). Output through UNTRUSTED envelope when method is data-exfil.', usage: 'cdp <Domain.method> [json-params]' },
|
||||
};
|
||||
|
||||
@@ -7,6 +7,7 @@ import { handleSnapshot } from './snapshot';
|
||||
import { getCleanText } from './read-commands';
|
||||
import { READ_COMMANDS, WRITE_COMMANDS, META_COMMANDS, PAGE_CONTENT_COMMANDS, wrapUntrustedContent, canonicalizeCommand } from './commands';
|
||||
import { handleDomainSkillCommand } from './domain-skill-commands';
|
||||
import { handleSkillCommand } from './browser-skill-commands';
|
||||
import { validateNavigationUrl } from './url-validation';
|
||||
import { checkScope, type TokenInfo } from './token-registry';
|
||||
import { validateOutputPath, validateReadPath, SAFE_DIRECTORIES, escapeRegExp } from './path-security';
|
||||
@@ -235,6 +236,8 @@ export interface MetaCommandOpts {
|
||||
chainDepth?: number;
|
||||
/** Callback to route subcommands through the full security pipeline (handleCommandInternal) */
|
||||
executeCommand?: (body: { command: string; args?: string[]; tabId?: number }, tokenInfo?: TokenInfo | null) => Promise<{ status: number; result: string; json?: boolean }>;
|
||||
/** The port the daemon is listening on (needed by `$B skill run` to point spawned scripts at the daemon). */
|
||||
daemonPort?: number;
|
||||
}
|
||||
|
||||
export async function handleMetaCommand(
|
||||
@@ -1024,6 +1027,14 @@ export async function handleMetaCommand(
|
||||
return await handleDomainSkillCommand(args, bm);
|
||||
}
|
||||
|
||||
case 'skill': {
|
||||
const port = opts?.daemonPort;
|
||||
if (port === undefined) {
|
||||
throw new Error('skill command requires daemonPort in MetaCommandOpts (server bug)');
|
||||
}
|
||||
return await handleSkillCommand(args, { port });
|
||||
}
|
||||
|
||||
case 'cdp': {
|
||||
// Lazy import — cdp-bridge introduces module deps we don't want loaded
|
||||
// for projects that never use the CDP escape hatch.
|
||||
|
||||
@@ -71,6 +71,14 @@ const AUTH_TOKEN = crypto.randomUUID();
|
||||
initRegistry(AUTH_TOKEN);
|
||||
const BROWSE_PORT = parseInt(process.env.BROWSE_PORT || '0', 10);
|
||||
const IDLE_TIMEOUT_MS = parseInt(process.env.BROWSE_IDLE_TIMEOUT || '1800000', 10); // 30 min
|
||||
|
||||
/**
|
||||
* Port the local listener bound to. Set once the daemon picks a port.
|
||||
* Used by `$B skill run` to point spawned skill scripts at the daemon over
|
||||
* loopback. Module-level so handleCommandInternal can read it without threading
|
||||
* the port through every dispatch.
|
||||
*/
|
||||
let LOCAL_LISTEN_PORT: number = 0;
|
||||
// Sidebar chat is always enabled in headed mode (ungated in v0.12.0)
|
||||
|
||||
// ─── Tunnel State ───────────────────────────────────────────────
|
||||
@@ -1286,6 +1294,7 @@ async function handleCommandInternal(
|
||||
const chainDepth = (opts?.chainDepth ?? 0);
|
||||
result = await handleMetaCommand(command, args, browserManager, shutdown, tokenInfo, {
|
||||
chainDepth,
|
||||
daemonPort: LOCAL_LISTEN_PORT,
|
||||
executeCommand: (body, ti) => handleCommandInternal(body, ti, {
|
||||
skipRateCheck: true, // chain counts as 1 request
|
||||
skipActivity: true, // chain emits 1 event for all subcommands
|
||||
@@ -1571,6 +1580,7 @@ async function start() {
|
||||
safeUnlink(DIALOG_LOG_PATH);
|
||||
|
||||
const port = await findPort();
|
||||
LOCAL_LISTEN_PORT = port;
|
||||
|
||||
// Launch browser (headless or headed with extension)
|
||||
// BROWSE_HEADLESS_SKIP=1 skips browser launch entirely (for HTTP-only testing)
|
||||
|
||||
@@ -0,0 +1,359 @@
|
||||
/**
|
||||
* browser-skill-commands tests — covers the dispatch surface, env scrubbing,
|
||||
* spawn lifecycle, timeout, stdout cap.
|
||||
*
|
||||
* The `run` and `test` subcommands spawn `bun` subprocesses, so these tests
|
||||
* write tiny inline scripts to the synthetic skill dir and assert behavior
|
||||
* end-to-end.
|
||||
*/
|
||||
|
||||
import { describe, it, expect, beforeEach, afterEach } from 'bun:test';
|
||||
import * as fs from 'fs';
|
||||
import * as os from 'os';
|
||||
import * as path from 'path';
|
||||
import {
|
||||
rotateRoot, initRegistry, validateToken, listTokens,
|
||||
} from '../src/token-registry';
|
||||
import {
|
||||
handleSkillCommand,
|
||||
spawnSkill,
|
||||
buildSpawnEnv,
|
||||
parseSkillRunArgs,
|
||||
} from '../src/browser-skill-commands';
|
||||
import { readBrowserSkill, type TierPaths } from '../src/browser-skills';
|
||||
|
||||
let tmpRoot: string;
|
||||
let tiers: TierPaths;
|
||||
|
||||
beforeEach(() => {
|
||||
rotateRoot();
|
||||
initRegistry('root-token-for-tests');
|
||||
tmpRoot = fs.mkdtempSync(path.join(os.tmpdir(), 'browser-skill-cmd-test-'));
|
||||
tiers = {
|
||||
project: path.join(tmpRoot, 'project', '.gstack', 'browser-skills'),
|
||||
global: path.join(tmpRoot, 'home', '.gstack', 'browser-skills'),
|
||||
bundled: path.join(tmpRoot, 'gstack-install', 'browser-skills'),
|
||||
};
|
||||
fs.mkdirSync(tiers.project!, { recursive: true });
|
||||
fs.mkdirSync(tiers.global, { recursive: true });
|
||||
fs.mkdirSync(tiers.bundled, { recursive: true });
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
fs.rmSync(tmpRoot, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
function makeSkillDir(tierRoot: string, name: string, frontmatter: string, scriptBody: string = '') {
|
||||
const dir = path.join(tierRoot, name);
|
||||
fs.mkdirSync(dir, { recursive: true });
|
||||
fs.writeFileSync(path.join(dir, 'SKILL.md'), `---\n${frontmatter}\n---\nbody\n`);
|
||||
if (scriptBody) {
|
||||
fs.writeFileSync(path.join(dir, 'script.ts'), scriptBody);
|
||||
}
|
||||
return dir;
|
||||
}
|
||||
|
||||
describe('parseSkillRunArgs', () => {
|
||||
it('extracts --timeout=N', () => {
|
||||
const r = parseSkillRunArgs(['--timeout=10', '--arg', 'foo=bar']);
|
||||
expect(r.timeoutSeconds).toBe(10);
|
||||
expect(r.passthrough).toEqual(['--arg', 'foo=bar']);
|
||||
});
|
||||
|
||||
it('defaults to 60s when no timeout', () => {
|
||||
const r = parseSkillRunArgs(['--arg', 'foo=bar']);
|
||||
expect(r.timeoutSeconds).toBe(60);
|
||||
expect(r.passthrough).toEqual(['--arg', 'foo=bar']);
|
||||
});
|
||||
|
||||
it('passes through unknown flags', () => {
|
||||
const r = parseSkillRunArgs(['--keywords=ai', '--limit=10']);
|
||||
expect(r.passthrough).toEqual(['--keywords=ai', '--limit=10']);
|
||||
});
|
||||
|
||||
it('ignores invalid --timeout values', () => {
|
||||
const r = parseSkillRunArgs(['--timeout=abc', '--timeout=-5']);
|
||||
expect(r.timeoutSeconds).toBe(60);
|
||||
});
|
||||
});
|
||||
|
||||
describe('handleSkillCommand: list', () => {
|
||||
it('shows empty message when no skills', async () => {
|
||||
const result = await handleSkillCommand(['list'], { port: 9999, tiers });
|
||||
expect(result).toContain('No browser-skills found');
|
||||
});
|
||||
|
||||
it('lists skills with their resolved tier', async () => {
|
||||
makeSkillDir(tiers.bundled, 'foo', 'name: foo\nhost: a.com\ndescription: foo desc');
|
||||
makeSkillDir(tiers.global, 'bar', 'name: bar\nhost: b.com\ndescription: bar desc');
|
||||
const result = await handleSkillCommand(['list'], { port: 9999, tiers });
|
||||
expect(result).toContain('foo');
|
||||
expect(result).toContain('bundled');
|
||||
expect(result).toContain('a.com');
|
||||
expect(result).toContain('bar');
|
||||
expect(result).toContain('global');
|
||||
});
|
||||
|
||||
it('prints project tier when same name in multiple tiers', async () => {
|
||||
makeSkillDir(tiers.bundled, 'shared', 'name: shared\nhost: bundled.com');
|
||||
makeSkillDir(tiers.project!, 'shared', 'name: shared\nhost: project.com');
|
||||
const result = await handleSkillCommand(['list'], { port: 9999, tiers });
|
||||
expect(result).toContain('project');
|
||||
expect(result).toContain('project.com');
|
||||
expect(result).not.toContain('bundled.com');
|
||||
});
|
||||
});
|
||||
|
||||
describe('handleSkillCommand: show', () => {
|
||||
it('prints SKILL.md', async () => {
|
||||
makeSkillDir(tiers.bundled, 'foo', 'name: foo\nhost: a.com\ndescription: hi');
|
||||
const result = await handleSkillCommand(['show', 'foo'], { port: 9999, tiers });
|
||||
expect(result).toContain('name: foo');
|
||||
expect(result).toContain('host: a.com');
|
||||
expect(result).toContain('body');
|
||||
});
|
||||
|
||||
it('throws when skill missing', async () => {
|
||||
await expect(handleSkillCommand(['show', 'nope'], { port: 9999, tiers })).rejects.toThrow(/not found/);
|
||||
});
|
||||
|
||||
it('throws when name omitted', async () => {
|
||||
await expect(handleSkillCommand(['show'], { port: 9999, tiers })).rejects.toThrow(/Usage/);
|
||||
});
|
||||
});
|
||||
|
||||
describe('handleSkillCommand: rm', () => {
|
||||
it('tombstones global skill by default', async () => {
|
||||
makeSkillDir(tiers.global, 'gone', 'name: gone\nhost: x.com');
|
||||
// No project tier skill, so default tier resolution should target global anyway.
|
||||
// But the function defaults to 'project' unless --global. With no project
|
||||
// skill, it would error — pass --global explicitly.
|
||||
const result = await handleSkillCommand(['rm', 'gone', '--global'], { port: 9999, tiers });
|
||||
expect(result).toContain('Tombstoned');
|
||||
expect(fs.existsSync(path.join(tiers.global, 'gone'))).toBe(false);
|
||||
});
|
||||
|
||||
it('tombstones project skill', async () => {
|
||||
makeSkillDir(tiers.project!, 'gone', 'name: gone\nhost: x.com');
|
||||
const result = await handleSkillCommand(['rm', 'gone'], { port: 9999, tiers });
|
||||
expect(result).toContain('Tombstoned');
|
||||
expect(fs.existsSync(path.join(tiers.project!, 'gone'))).toBe(false);
|
||||
});
|
||||
|
||||
it('falls back to global when no project tier path', async () => {
|
||||
const tiersNoProject = { ...tiers, project: null };
|
||||
makeSkillDir(tiers.global, 'gone', 'name: gone\nhost: x.com');
|
||||
const result = await handleSkillCommand(['rm', 'gone'], { port: 9999, tiers: tiersNoProject });
|
||||
expect(result).toContain('global');
|
||||
});
|
||||
});
|
||||
|
||||
describe('handleSkillCommand: help / unknown', () => {
|
||||
it('prints usage with no subcommand', async () => {
|
||||
const r = await handleSkillCommand([], { port: 9999, tiers });
|
||||
expect(r).toContain('Usage');
|
||||
});
|
||||
|
||||
it('throws on unknown subcommand', async () => {
|
||||
await expect(handleSkillCommand(['frobnicate'], { port: 9999, tiers }))
|
||||
.rejects.toThrow(/Unknown skill subcommand/);
|
||||
});
|
||||
});
|
||||
|
||||
describe('buildSpawnEnv', () => {
|
||||
let origEnv: Record<string, string | undefined>;
|
||||
beforeEach(() => {
|
||||
origEnv = { ...process.env };
|
||||
// Plant some secrets for scrub-tests
|
||||
process.env.GITHUB_TOKEN = 'gh-secret';
|
||||
process.env.OPENAI_API_KEY = 'oai-secret';
|
||||
process.env.MY_PASSWORD = 'sup3r';
|
||||
process.env.NPM_TOKEN = 'npmtok';
|
||||
process.env.AWS_SECRET_ACCESS_KEY = 'aws-secret';
|
||||
process.env.GSTACK_TOKEN = 'root-token';
|
||||
process.env.HOME = '/Users/test';
|
||||
process.env.PATH = '/test/bin:/usr/bin';
|
||||
process.env.LANG = 'en_US.UTF-8';
|
||||
});
|
||||
afterEach(() => {
|
||||
process.env = origEnv;
|
||||
});
|
||||
|
||||
it('untrusted: drops $HOME and secrets', () => {
|
||||
const env = buildSpawnEnv({ trusted: false, port: 1234, skillToken: 'tok' });
|
||||
expect(env.HOME).toBeUndefined();
|
||||
expect(env.GITHUB_TOKEN).toBeUndefined();
|
||||
expect(env.OPENAI_API_KEY).toBeUndefined();
|
||||
expect(env.MY_PASSWORD).toBeUndefined();
|
||||
expect(env.NPM_TOKEN).toBeUndefined();
|
||||
expect(env.AWS_SECRET_ACCESS_KEY).toBeUndefined();
|
||||
expect(env.GSTACK_TOKEN).toBeUndefined();
|
||||
});
|
||||
|
||||
it('untrusted: keeps locale + TERM', () => {
|
||||
process.env.TERM = 'xterm-256color';
|
||||
const env = buildSpawnEnv({ trusted: false, port: 1234, skillToken: 'tok' });
|
||||
expect(env.LANG).toBe('en_US.UTF-8');
|
||||
expect(env.TERM).toBe('xterm-256color');
|
||||
});
|
||||
|
||||
it('untrusted: PATH is minimal (no /test/bin override)', () => {
|
||||
const env = buildSpawnEnv({ trusted: false, port: 1234, skillToken: 'tok' });
|
||||
expect(env.PATH).not.toContain('/test/bin');
|
||||
expect(env.PATH).toMatch(/\/(usr\/local\/)?bin/);
|
||||
});
|
||||
|
||||
it('untrusted: injects GSTACK_PORT + GSTACK_SKILL_TOKEN', () => {
|
||||
const env = buildSpawnEnv({ trusted: false, port: 1234, skillToken: 'tok-xyz' });
|
||||
expect(env.GSTACK_PORT).toBe('1234');
|
||||
expect(env.GSTACK_SKILL_TOKEN).toBe('tok-xyz');
|
||||
});
|
||||
|
||||
it('trusted: keeps $HOME', () => {
|
||||
const env = buildSpawnEnv({ trusted: true, port: 1234, skillToken: 'tok' });
|
||||
expect(env.HOME).toBe('/Users/test');
|
||||
});
|
||||
|
||||
it('trusted: still strips GSTACK_TOKEN (defense in depth)', () => {
|
||||
const env = buildSpawnEnv({ trusted: true, port: 1234, skillToken: 'tok' });
|
||||
expect(env.GSTACK_TOKEN).toBeUndefined();
|
||||
});
|
||||
|
||||
it('trusted: keeps developer secrets (intentional)', () => {
|
||||
const env = buildSpawnEnv({ trusted: true, port: 1234, skillToken: 'tok' });
|
||||
expect(env.GITHUB_TOKEN).toBe('gh-secret');
|
||||
});
|
||||
|
||||
it('GSTACK_PORT/GSTACK_SKILL_TOKEN can never be overridden by parent env', () => {
|
||||
process.env.GSTACK_PORT = '99999'; // attacker-set
|
||||
process.env.GSTACK_SKILL_TOKEN = 'attacker-tok';
|
||||
const env = buildSpawnEnv({ trusted: true, port: 1234, skillToken: 'real-tok' });
|
||||
expect(env.GSTACK_PORT).toBe('1234');
|
||||
expect(env.GSTACK_SKILL_TOKEN).toBe('real-tok');
|
||||
});
|
||||
});
|
||||
|
||||
// ─── Spawn integration ──────────────────────────────────────────
|
||||
//
|
||||
// Tests below shell out to `bun run` against a synthesized script.ts, so they
|
||||
// take 1-3s each. Skip the suite if BUN_TEST_NO_SPAWN is set.
|
||||
const SKIP_SPAWN = process.env.BUN_TEST_NO_SPAWN === '1';
|
||||
|
||||
describe.skipIf(SKIP_SPAWN)('spawnSkill: lifecycle', () => {
|
||||
it('happy path: returns stdout, exit 0, token revoked', async () => {
|
||||
const dir = makeSkillDir(tiers.bundled, 'echo-skill',
|
||||
'name: echo-skill\nhost: x.com\ntrusted: true',
|
||||
`console.log(JSON.stringify({ ok: true, args: process.argv.slice(2) }));`,
|
||||
);
|
||||
const skill = readBrowserSkill('echo-skill', tiers)!;
|
||||
const result = await spawnSkill({
|
||||
skill,
|
||||
skillArgs: ['hello'],
|
||||
trusted: true,
|
||||
timeoutSeconds: 30,
|
||||
port: 9999,
|
||||
});
|
||||
expect(result.exitCode).toBe(0);
|
||||
expect(result.timedOut).toBe(false);
|
||||
expect(result.truncated).toBe(false);
|
||||
const parsed = JSON.parse(result.stdout);
|
||||
expect(parsed.ok).toBe(true);
|
||||
// Only --timeout filtering happens; -- is preserved by Bun.
|
||||
expect(parsed.args).toContain('hello');
|
||||
// Token revoked: nothing left in the registry for this client.
|
||||
expect(listTokens().filter(t => t.clientId.startsWith('skill:echo-skill:'))).toEqual([]);
|
||||
});
|
||||
|
||||
it('untrusted spawn: GSTACK_SKILL_TOKEN visible, root env scrubbed', async () => {
|
||||
const dir = makeSkillDir(tiers.bundled, 'env-probe',
|
||||
'name: env-probe\nhost: x.com', // trusted defaults to false
|
||||
`console.log(JSON.stringify({
|
||||
port: process.env.GSTACK_PORT,
|
||||
token: process.env.GSTACK_SKILL_TOKEN,
|
||||
home: process.env.HOME ?? null,
|
||||
gh: process.env.GITHUB_TOKEN ?? null,
|
||||
gstack: process.env.GSTACK_TOKEN ?? null,
|
||||
}));`,
|
||||
);
|
||||
const origEnv = { ...process.env };
|
||||
process.env.GITHUB_TOKEN = 'gh-secret';
|
||||
process.env.GSTACK_TOKEN = 'root';
|
||||
try {
|
||||
const skill = readBrowserSkill('env-probe', tiers)!;
|
||||
const result = await spawnSkill({
|
||||
skill, skillArgs: [], trusted: false, timeoutSeconds: 30, port: 4242,
|
||||
});
|
||||
expect(result.exitCode).toBe(0);
|
||||
const parsed = JSON.parse(result.stdout);
|
||||
expect(parsed.port).toBe('4242');
|
||||
expect(parsed.token).toMatch(/^gsk_sess_/);
|
||||
expect(parsed.home).toBeNull();
|
||||
expect(parsed.gh).toBeNull();
|
||||
expect(parsed.gstack).toBeNull();
|
||||
} finally {
|
||||
process.env = origEnv;
|
||||
}
|
||||
});
|
||||
|
||||
it('trusted spawn: HOME passes through', async () => {
|
||||
const dir = makeSkillDir(tiers.bundled, 'env-trusted',
|
||||
'name: env-trusted\nhost: x.com\ntrusted: true',
|
||||
`console.log(JSON.stringify({ home: process.env.HOME ?? null }));`,
|
||||
);
|
||||
const origEnv = { ...process.env };
|
||||
process.env.HOME = '/Users/test-user';
|
||||
try {
|
||||
const skill = readBrowserSkill('env-trusted', tiers)!;
|
||||
const result = await spawnSkill({
|
||||
skill, skillArgs: [], trusted: true, timeoutSeconds: 30, port: 9999,
|
||||
});
|
||||
const parsed = JSON.parse(result.stdout);
|
||||
expect(parsed.home).toBe('/Users/test-user');
|
||||
} finally {
|
||||
process.env = origEnv;
|
||||
}
|
||||
});
|
||||
|
||||
it('timeout fires, exit code 124, token revoked', async () => {
|
||||
const dir = makeSkillDir(tiers.bundled, 'sleeper',
|
||||
'name: sleeper\nhost: x.com\ntrusted: true',
|
||||
// Sleep longer than the test timeout; the spawn should kill us.
|
||||
`await new Promise(r => setTimeout(r, 30000)); console.log("done");`,
|
||||
);
|
||||
const skill = readBrowserSkill('sleeper', tiers)!;
|
||||
const result = await spawnSkill({
|
||||
skill, skillArgs: [], trusted: true, timeoutSeconds: 1, port: 9999,
|
||||
});
|
||||
expect(result.timedOut).toBe(true);
|
||||
expect(result.exitCode).toBe(124);
|
||||
expect(listTokens().filter(t => t.clientId.startsWith('skill:sleeper:'))).toEqual([]);
|
||||
}, 10_000);
|
||||
|
||||
it('script crash propagates nonzero exit', async () => {
|
||||
const dir = makeSkillDir(tiers.bundled, 'crasher',
|
||||
'name: crasher\nhost: x.com\ntrusted: true',
|
||||
`process.exit(7);`,
|
||||
);
|
||||
const skill = readBrowserSkill('crasher', tiers)!;
|
||||
const result = await spawnSkill({
|
||||
skill, skillArgs: [], trusted: true, timeoutSeconds: 5, port: 9999,
|
||||
});
|
||||
expect(result.exitCode).toBe(7);
|
||||
expect(result.timedOut).toBe(false);
|
||||
});
|
||||
|
||||
it('stdout > 1MB truncates and reports truncated', async () => {
|
||||
const dir = makeSkillDir(tiers.bundled, 'flood',
|
||||
'name: flood\nhost: x.com\ntrusted: true',
|
||||
// Emit ~2MB of "x" so the cap fires deterministically.
|
||||
`const chunk = 'x'.repeat(64 * 1024);
|
||||
for (let i = 0; i < 40; i++) process.stdout.write(chunk);`,
|
||||
);
|
||||
const skill = readBrowserSkill('flood', tiers)!;
|
||||
const result = await spawnSkill({
|
||||
skill, skillArgs: [], trusted: true, timeoutSeconds: 10, port: 9999,
|
||||
});
|
||||
expect(result.truncated).toBe(true);
|
||||
expect(result.stdout.length).toBeLessThanOrEqual(1024 * 1024);
|
||||
}, 10_000);
|
||||
});
|
||||
Reference in New Issue
Block a user