From 6a6b2b076641dfdbec23d1e763ba1d62532ef035 Mon Sep 17 00:00:00 2001 From: Garry Tan Date: Fri, 20 Mar 2026 08:30:09 -0700 Subject: [PATCH 1/2] feat: Gemini CLI E2E tests (v0.9.2.0) (#252) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * feat: add Gemini CLI session runner + JSONL parser Subprocess wrapper for `gemini -p --output-format stream-json --yolo` that spawns the Gemini CLI and parses NDJSON events (init, message, tool_use, tool_result, result) into a structured GeminiResult. Includes 10 unit tests for parseGeminiJSONL covering happy path, malformed input, empty input, missing fields, and multi-tool scenarios. Co-Authored-By: Claude Opus 4.6 (1M context) * feat: add Gemini CLI E2E tests Two E2E tests (gemini-discover-skill, gemini-review-findings) that verify gstack skills work when invoked by the Gemini CLI. Follows the same pattern as codex-e2e.test.ts — gated by EVALS=1 + binary availability, diff-based selection via touchfiles, eval persistence. - Add test/gemini-e2e.test.ts - Add Gemini entries to E2E_TOUCHFILES and GLOBAL_TOUCHFILES - Add test:gemini and test:gemini:all scripts to package.json - Add gemini-e2e.test.ts to test:evals, test:e2e, and ignore list Co-Authored-By: Claude Opus 4.6 (1M context) * chore: bump version and changelog (v0.9.2.0) Co-Authored-By: Claude Opus 4.6 --------- Co-authored-by: Claude Opus 4.6 (1M context) --- CHANGELOG.md | 8 + VERSION | 2 +- package.json | 12 +- test/gemini-e2e.test.ts | 173 ++++++++++++++++++ test/helpers/gemini-session-runner.test.ts | 104 +++++++++++ test/helpers/gemini-session-runner.ts | 201 +++++++++++++++++++++ test/helpers/touchfiles.ts | 5 + 7 files changed, 499 insertions(+), 6 deletions(-) create mode 100644 test/gemini-e2e.test.ts create mode 100644 test/helpers/gemini-session-runner.test.ts create mode 100644 test/helpers/gemini-session-runner.ts diff --git a/CHANGELOG.md b/CHANGELOG.md index e0259c60..9e47e135 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,13 @@ # Changelog +## [0.9.2.0] - 2026-03-20 — Gemini CLI E2E Tests + +### Added + +- **Gemini CLI is now tested end-to-end.** Two E2E tests verify that gstack skills work when invoked by Google's Gemini CLI (`gemini -p`). The `gemini-discover-skill` test confirms skill discovery from `.agents/skills/`, and `gemini-review-findings` runs a full code review via gstack-review. Both parse Gemini's stream-json NDJSON output and track token usage. +- **Gemini JSONL parser with 10 unit tests.** `parseGeminiJSONL` handles all Gemini event types (init, message, tool_use, tool_result, result) with defensive parsing for malformed input. The parser is a pure function, independently testable without spawning the CLI. +- **`bun run test:gemini`** and **`bun run test:gemini:all`** scripts for running Gemini E2E tests independently. Gemini tests are also included in `test:evals` and `test:e2e` aggregate scripts. + ## [0.9.1.0] - 2026-03-20 — Adversarial Spec Review + Skill Chaining ### Added diff --git a/VERSION b/VERSION index cf94a424..594150e3 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -0.9.1.0 +0.9.2.0 diff --git a/package.json b/package.json index 2bf4a238..ba18c08a 100644 --- a/package.json +++ b/package.json @@ -12,13 +12,15 @@ "gen:skill-docs": "bun run scripts/gen-skill-docs.ts", "dev": "bun run browse/src/cli.ts", "server": "bun run browse/src/server.ts", - "test": "bun test browse/test/ test/ --ignore test/skill-e2e.test.ts --ignore test/skill-llm-eval.test.ts --ignore test/skill-routing-e2e.test.ts --ignore test/codex-e2e.test.ts", - "test:evals": "EVALS=1 bun test test/skill-llm-eval.test.ts test/skill-e2e.test.ts test/skill-routing-e2e.test.ts test/codex-e2e.test.ts", - "test:evals:all": "EVALS=1 EVALS_ALL=1 bun test test/skill-llm-eval.test.ts test/skill-e2e.test.ts test/skill-routing-e2e.test.ts test/codex-e2e.test.ts", - "test:e2e": "EVALS=1 bun test test/skill-e2e.test.ts test/skill-routing-e2e.test.ts test/codex-e2e.test.ts", - "test:e2e:all": "EVALS=1 EVALS_ALL=1 bun test test/skill-e2e.test.ts test/skill-routing-e2e.test.ts test/codex-e2e.test.ts", + "test": "bun test browse/test/ test/ --ignore test/skill-e2e.test.ts --ignore test/skill-llm-eval.test.ts --ignore test/skill-routing-e2e.test.ts --ignore test/codex-e2e.test.ts --ignore test/gemini-e2e.test.ts", + "test:evals": "EVALS=1 bun test test/skill-llm-eval.test.ts test/skill-e2e.test.ts test/skill-routing-e2e.test.ts test/codex-e2e.test.ts test/gemini-e2e.test.ts", + "test:evals:all": "EVALS=1 EVALS_ALL=1 bun test test/skill-llm-eval.test.ts test/skill-e2e.test.ts test/skill-routing-e2e.test.ts test/codex-e2e.test.ts test/gemini-e2e.test.ts", + "test:e2e": "EVALS=1 bun test test/skill-e2e.test.ts test/skill-routing-e2e.test.ts test/codex-e2e.test.ts test/gemini-e2e.test.ts", + "test:e2e:all": "EVALS=1 EVALS_ALL=1 bun test test/skill-e2e.test.ts test/skill-routing-e2e.test.ts test/codex-e2e.test.ts test/gemini-e2e.test.ts", "test:codex": "EVALS=1 bun test test/codex-e2e.test.ts", "test:codex:all": "EVALS=1 EVALS_ALL=1 bun test test/codex-e2e.test.ts", + "test:gemini": "EVALS=1 bun test test/gemini-e2e.test.ts", + "test:gemini:all": "EVALS=1 EVALS_ALL=1 bun test test/gemini-e2e.test.ts", "skill:check": "bun run scripts/skill-check.ts", "dev:skill": "bun run scripts/dev-skill.ts", "start": "bun run browse/src/server.ts", diff --git a/test/gemini-e2e.test.ts b/test/gemini-e2e.test.ts new file mode 100644 index 00000000..bd69919f --- /dev/null +++ b/test/gemini-e2e.test.ts @@ -0,0 +1,173 @@ +/** + * Gemini CLI E2E tests — verify skills work when invoked by Gemini CLI. + * + * Spawns `gemini -p` with stream-json output in the repo root (where + * .agents/skills/ already exists), parses JSONL events, and validates + * structured results. Follows the same pattern as codex-e2e.test.ts. + * + * Prerequisites: + * - `gemini` binary installed (npm install -g @google/gemini-cli) + * - Gemini authenticated via ~/.gemini/ config or GEMINI_API_KEY env var + * - EVALS=1 env var set (same gate as Claude E2E tests) + * + * Skips gracefully when prerequisites are not met. + */ + +import { describe, test, expect, afterAll } from 'bun:test'; +import { runGeminiSkill } from './helpers/gemini-session-runner'; +import type { GeminiResult } from './helpers/gemini-session-runner'; +import { EvalCollector } from './helpers/eval-store'; +import { selectTests, detectBaseBranch, getChangedFiles, GLOBAL_TOUCHFILES } from './helpers/touchfiles'; +import * as path from 'path'; + +const ROOT = path.resolve(import.meta.dir, '..'); + +// --- Prerequisites check --- + +const GEMINI_AVAILABLE = (() => { + try { + const result = Bun.spawnSync(['which', 'gemini']); + return result.exitCode === 0; + } catch { return false; } +})(); + +const evalsEnabled = !!process.env.EVALS; + +// Skip all tests if gemini is not available or EVALS is not set. +const SKIP = !GEMINI_AVAILABLE || !evalsEnabled; + +const describeGemini = SKIP ? describe.skip : describe; + +// Log why we're skipping (helpful for debugging CI) +if (!evalsEnabled) { + // Silent — same as Claude E2E tests, EVALS=1 required +} else if (!GEMINI_AVAILABLE) { + process.stderr.write('\nGemini E2E: SKIPPED — gemini binary not found (install: npm i -g @google/gemini-cli)\n'); +} + +// --- Diff-based test selection --- + +// Gemini E2E touchfiles — keyed by test name, same pattern as Codex E2E +const GEMINI_E2E_TOUCHFILES: Record = { + 'gemini-discover-skill': ['.agents/skills/**', 'test/helpers/gemini-session-runner.ts'], + 'gemini-review-findings': ['review/**', '.agents/skills/gstack-review/**', 'test/helpers/gemini-session-runner.ts'], +}; + +let selectedTests: string[] | null = null; // null = run all + +if (evalsEnabled && !process.env.EVALS_ALL) { + const baseBranch = process.env.EVALS_BASE + || detectBaseBranch(ROOT) + || 'main'; + const changedFiles = getChangedFiles(baseBranch, ROOT); + + if (changedFiles.length > 0) { + const selection = selectTests(changedFiles, GEMINI_E2E_TOUCHFILES, GLOBAL_TOUCHFILES); + selectedTests = selection.selected; + process.stderr.write(`\nGemini E2E selection (${selection.reason}): ${selection.selected.length}/${Object.keys(GEMINI_E2E_TOUCHFILES).length} tests\n`); + if (selection.skipped.length > 0) { + process.stderr.write(` Skipped: ${selection.skipped.join(', ')}\n`); + } + process.stderr.write('\n'); + } + // If changedFiles is empty (e.g., on main branch), selectedTests stays null -> run all +} + +/** Skip an individual test if not selected by diff-based selection. */ +function testIfSelected(testName: string, fn: () => Promise, timeout: number) { + const shouldRun = selectedTests === null || selectedTests.includes(testName); + (shouldRun ? test : test.skip)(testName, fn, timeout); +} + +// --- Eval result collector --- + +const evalCollector = evalsEnabled && !SKIP ? new EvalCollector('e2e-gemini') : null; + +/** DRY helper to record a Gemini E2E test result into the eval collector. */ +function recordGeminiE2E(name: string, result: GeminiResult, passed: boolean) { + evalCollector?.addTest({ + name, + suite: 'gemini-e2e', + tier: 'e2e', + passed, + duration_ms: result.durationMs, + cost_usd: 0, // Gemini doesn't report cost in USD; tokens are tracked + output: result.output?.slice(0, 2000), + turns_used: result.toolCalls.length, // approximate: tool calls as turns + exit_reason: result.exitCode === 0 ? 'success' : `exit_code_${result.exitCode}`, + }); +} + +/** Print cost summary after a Gemini E2E test. */ +function logGeminiCost(label: string, result: GeminiResult) { + const durationSec = Math.round(result.durationMs / 1000); + console.log(`${label}: ${result.tokens} tokens, ${result.toolCalls.length} tool calls, ${durationSec}s`); +} + +// Finalize eval results on exit +afterAll(async () => { + if (evalCollector) { + await evalCollector.finalize(); + } +}); + +// --- Tests --- + +describeGemini('Gemini E2E', () => { + + testIfSelected('gemini-discover-skill', async () => { + // Run Gemini in the repo root where .agents/skills/ exists + const result = await runGeminiSkill({ + prompt: 'List any skills or instructions you have available. Just list the names.', + timeoutMs: 60_000, + cwd: ROOT, + }); + + logGeminiCost('gemini-discover-skill', result); + + // Gemini should have produced some output + const passed = result.exitCode === 0 && result.output.length > 0; + recordGeminiE2E('gemini-discover-skill', result, passed); + + expect(result.exitCode).toBe(0); + expect(result.output.length).toBeGreaterThan(0); + // The output should reference skills in some form + const outputLower = result.output.toLowerCase(); + expect( + outputLower.includes('review') || outputLower.includes('gstack') || outputLower.includes('skill'), + ).toBe(true); + }, 120_000); + + testIfSelected('gemini-review-findings', async () => { + // Run gstack-review skill via Gemini on this repo + const result = await runGeminiSkill({ + prompt: 'Run the gstack-review skill on this repository. Review the current branch diff and report your findings.', + timeoutMs: 540_000, + cwd: ROOT, + }); + + logGeminiCost('gemini-review-findings', result); + + // Should produce structured review-like output + const output = result.output; + const passed = result.exitCode === 0 && output.length > 50; + recordGeminiE2E('gemini-review-findings', result, passed); + + expect(result.exitCode).toBe(0); + expect(output.length).toBeGreaterThan(50); + + // Review output should contain some review-like content + const outputLower = output.toLowerCase(); + const hasReviewContent = + outputLower.includes('finding') || + outputLower.includes('issue') || + outputLower.includes('review') || + outputLower.includes('change') || + outputLower.includes('diff') || + outputLower.includes('clean') || + outputLower.includes('no issues') || + outputLower.includes('p1') || + outputLower.includes('p2'); + expect(hasReviewContent).toBe(true); + }, 600_000); +}); diff --git a/test/helpers/gemini-session-runner.test.ts b/test/helpers/gemini-session-runner.test.ts new file mode 100644 index 00000000..1bb9a393 --- /dev/null +++ b/test/helpers/gemini-session-runner.test.ts @@ -0,0 +1,104 @@ +import { describe, test, expect } from 'bun:test'; +import { parseGeminiJSONL } from './gemini-session-runner'; + +// Fixture: actual Gemini CLI stream-json output with tool use +const FIXTURE_LINES = [ + '{"type":"init","timestamp":"2026-03-20T15:14:46.455Z","session_id":"test-session-123","model":"auto-gemini-3"}', + '{"type":"message","timestamp":"2026-03-20T15:14:46.456Z","role":"user","content":"list the files"}', + '{"type":"message","timestamp":"2026-03-20T15:14:49.650Z","role":"assistant","content":"I will list the files.","delta":true}', + '{"type":"tool_use","timestamp":"2026-03-20T15:14:49.690Z","tool_name":"run_shell_command","tool_id":"cmd_1","parameters":{"command":"ls"}}', + '{"type":"tool_result","timestamp":"2026-03-20T15:14:49.931Z","tool_id":"cmd_1","status":"success","output":"file1.ts\\nfile2.ts"}', + '{"type":"message","timestamp":"2026-03-20T15:14:51.945Z","role":"assistant","content":"Here are the files.","delta":true}', + '{"type":"result","timestamp":"2026-03-20T15:14:52.030Z","status":"success","stats":{"total_tokens":27147,"input_tokens":26928,"output_tokens":87,"cached":0,"duration_ms":5575,"tool_calls":1}}', +]; + +describe('parseGeminiJSONL', () => { + test('extracts session ID from init event', () => { + const parsed = parseGeminiJSONL(FIXTURE_LINES); + expect(parsed.sessionId).toBe('test-session-123'); + }); + + test('concatenates assistant message deltas into output', () => { + const parsed = parseGeminiJSONL(FIXTURE_LINES); + expect(parsed.output).toBe('I will list the files.Here are the files.'); + }); + + test('ignores user messages', () => { + const lines = [ + '{"type":"message","role":"user","content":"this should be ignored"}', + '{"type":"message","role":"assistant","content":"this should be kept","delta":true}', + ]; + const parsed = parseGeminiJSONL(lines); + expect(parsed.output).toBe('this should be kept'); + }); + + test('extracts tool names from tool_use events', () => { + const parsed = parseGeminiJSONL(FIXTURE_LINES); + expect(parsed.toolCalls).toHaveLength(1); + expect(parsed.toolCalls[0]).toBe('run_shell_command'); + }); + + test('extracts total tokens from result stats', () => { + const parsed = parseGeminiJSONL(FIXTURE_LINES); + expect(parsed.tokens).toBe(27147); + }); + + test('skips malformed lines without throwing', () => { + const lines = [ + '{"type":"init","session_id":"ok"}', + 'this is not json', + '{"type":"message","role":"assistant","content":"hello","delta":true}', + '{incomplete json', + '{"type":"result","status":"success","stats":{"total_tokens":100}}', + ]; + const parsed = parseGeminiJSONL(lines); + expect(parsed.sessionId).toBe('ok'); + expect(parsed.output).toBe('hello'); + expect(parsed.tokens).toBe(100); + }); + + test('skips empty and whitespace-only lines', () => { + const lines = [ + '', + ' ', + '{"type":"init","session_id":"s1"}', + '\t', + '{"type":"result","status":"success","stats":{"total_tokens":50}}', + ]; + const parsed = parseGeminiJSONL(lines); + expect(parsed.sessionId).toBe('s1'); + expect(parsed.tokens).toBe(50); + }); + + test('handles empty input', () => { + const parsed = parseGeminiJSONL([]); + expect(parsed.output).toBe(''); + expect(parsed.toolCalls).toHaveLength(0); + expect(parsed.tokens).toBe(0); + expect(parsed.sessionId).toBeNull(); + }); + + test('handles missing fields gracefully', () => { + const lines = [ + '{"type":"init"}', // no session_id + '{"type":"message","role":"assistant"}', // no content + '{"type":"tool_use"}', // no tool_name + '{"type":"result","status":"success"}', // no stats + ]; + const parsed = parseGeminiJSONL(lines); + expect(parsed.sessionId).toBeNull(); + expect(parsed.output).toBe(''); + expect(parsed.toolCalls).toHaveLength(0); + expect(parsed.tokens).toBe(0); + }); + + test('handles multiple tool_use events', () => { + const lines = [ + '{"type":"tool_use","tool_name":"run_shell_command","tool_id":"cmd_1","parameters":{"command":"ls"}}', + '{"type":"tool_use","tool_name":"read_file","tool_id":"cmd_2","parameters":{"path":"foo.ts"}}', + '{"type":"tool_use","tool_name":"run_shell_command","tool_id":"cmd_3","parameters":{"command":"cat bar.ts"}}', + ]; + const parsed = parseGeminiJSONL(lines); + expect(parsed.toolCalls).toEqual(['run_shell_command', 'read_file', 'run_shell_command']); + }); +}); diff --git a/test/helpers/gemini-session-runner.ts b/test/helpers/gemini-session-runner.ts new file mode 100644 index 00000000..06393c38 --- /dev/null +++ b/test/helpers/gemini-session-runner.ts @@ -0,0 +1,201 @@ +/** + * Gemini CLI subprocess runner for skill E2E testing. + * + * Spawns `gemini -p` as an independent process, parses its stream-json + * output, and returns structured results. Follows the same pattern as + * codex-session-runner.ts but adapted for the Gemini CLI. + * + * Key differences from Codex session-runner: + * - Uses `gemini -p` instead of `codex exec` + * - Output is NDJSON with event types: init, message, tool_use, tool_result, result + * - Uses `--output-format stream-json --yolo` instead of `--json -s read-only` + * - No temp HOME needed — Gemini discovers skills from `.agents/skills/` in cwd + * - Message events are streamed with `delta: true` — must concatenate + */ + +import * as path from 'path'; + +// --- Interfaces --- + +export interface GeminiResult { + output: string; // Full assistant message text (concatenated deltas) + toolCalls: string[]; // Tool names from tool_use events + tokens: number; // Total tokens used + exitCode: number; // Process exit code + durationMs: number; // Wall clock time + sessionId: string | null; // Session ID from init event + rawLines: string[]; // Raw JSONL lines for debugging +} + +// --- JSONL parser --- + +export interface ParsedGeminiJSONL { + output: string; + toolCalls: string[]; + tokens: number; + sessionId: string | null; +} + +/** + * Parse an array of JSONL lines from `gemini -p --output-format stream-json`. + * Pure function — no I/O, no side effects. + * + * Handles these Gemini event types: + * - init → extract session_id + * - message (role=assistant, delta=true) → concatenate content into output + * - tool_use → extract tool_name + * - tool_result → logged but not extracted + * - result → extract token usage from stats + */ +export function parseGeminiJSONL(lines: string[]): ParsedGeminiJSONL { + const outputParts: string[] = []; + const toolCalls: string[] = []; + let tokens = 0; + let sessionId: string | null = null; + + for (const line of lines) { + if (!line.trim()) continue; + try { + const obj = JSON.parse(line); + const t = obj.type || ''; + + if (t === 'init') { + const sid = obj.session_id || ''; + if (sid) sessionId = sid; + } else if (t === 'message') { + if (obj.role === 'assistant' && obj.content) { + outputParts.push(obj.content); + } + } else if (t === 'tool_use') { + const name = obj.tool_name || ''; + if (name) toolCalls.push(name); + } else if (t === 'result') { + const stats = obj.stats || {}; + tokens = (stats.total_tokens || 0); + } + } catch { /* skip malformed lines */ } + } + + return { + output: outputParts.join(''), + toolCalls, + tokens, + sessionId, + }; +} + +// --- Main runner --- + +/** + * Run a prompt via `gemini -p` and return structured results. + * + * Spawns gemini with stream-json output, parses JSONL events, + * and returns a GeminiResult. Skips gracefully if gemini binary is not found. + */ +export async function runGeminiSkill(opts: { + prompt: string; // What to ask Gemini + timeoutMs?: number; // Default 300000 (5 min) + cwd?: string; // Working directory (where .agents/skills/ lives) +}): Promise { + const { + prompt, + timeoutMs = 300_000, + cwd, + } = opts; + + const startTime = Date.now(); + + // Check if gemini binary exists + const whichResult = Bun.spawnSync(['which', 'gemini']); + if (whichResult.exitCode !== 0) { + return { + output: 'SKIP: gemini binary not found', + toolCalls: [], + tokens: 0, + exitCode: -1, + durationMs: Date.now() - startTime, + sessionId: null, + rawLines: [], + }; + } + + // Build gemini command + const args = ['-p', prompt, '--output-format', 'stream-json', '--yolo']; + + // Spawn gemini — uses real HOME for auth, cwd for skill discovery + const proc = Bun.spawn(['gemini', ...args], { + cwd: cwd || process.cwd(), + stdout: 'pipe', + stderr: 'pipe', + }); + + // Race against timeout + let timedOut = false; + const timeoutId = setTimeout(() => { + timedOut = true; + proc.kill(); + }, timeoutMs); + + // Stream and collect JSONL from stdout + const collectedLines: string[] = []; + const stderrPromise = new Response(proc.stderr).text(); + + const reader = proc.stdout.getReader(); + const decoder = new TextDecoder(); + let buf = ''; + + try { + while (true) { + const { done, value } = await reader.read(); + if (done) break; + buf += decoder.decode(value, { stream: true }); + const lines = buf.split('\n'); + buf = lines.pop() || ''; + for (const line of lines) { + if (!line.trim()) continue; + collectedLines.push(line); + + // Real-time progress to stderr + try { + const event = JSON.parse(line); + if (event.type === 'tool_use' && event.tool_name) { + const elapsed = Math.round((Date.now() - startTime) / 1000); + process.stderr.write(` [gemini ${elapsed}s] tool: ${event.tool_name}\n`); + } else if (event.type === 'message' && event.role === 'assistant' && event.content) { + const elapsed = Math.round((Date.now() - startTime) / 1000); + process.stderr.write(` [gemini ${elapsed}s] message: ${event.content.slice(0, 100)}\n`); + } + } catch { /* skip — parseGeminiJSONL will handle it later */ } + } + } + } catch { /* stream read error — fall through to exit code handling */ } + + // Flush remaining buffer + if (buf.trim()) { + collectedLines.push(buf); + } + + const stderr = await stderrPromise; + const exitCode = await proc.exited; + clearTimeout(timeoutId); + + const durationMs = Date.now() - startTime; + + // Parse all collected JSONL lines + const parsed = parseGeminiJSONL(collectedLines); + + // Log stderr if non-empty (may contain auth errors, etc.) + if (stderr.trim()) { + process.stderr.write(` [gemini stderr] ${stderr.trim().slice(0, 200)}\n`); + } + + return { + output: parsed.output, + toolCalls: parsed.toolCalls, + tokens: parsed.tokens, + exitCode: timedOut ? 124 : exitCode, + durationMs, + sessionId: parsed.sessionId, + rawLines: collectedLines, + }; +} diff --git a/test/helpers/touchfiles.ts b/test/helpers/touchfiles.ts index c516a3b5..1246a413 100644 --- a/test/helpers/touchfiles.ts +++ b/test/helpers/touchfiles.ts @@ -84,6 +84,10 @@ export const E2E_TOUCHFILES: Record = { 'codex-discover-skill': ['codex/**', '.agents/skills/**', 'test/helpers/codex-session-runner.ts'], 'codex-review-findings': ['review/**', '.agents/skills/gstack-review/**', 'codex/**', 'test/helpers/codex-session-runner.ts'], + // Gemini E2E (tests skills via Gemini CLI) + 'gemini-discover-skill': ['.agents/skills/**', 'test/helpers/gemini-session-runner.ts'], + 'gemini-review-findings': ['review/**', '.agents/skills/gstack-review/**', 'test/helpers/gemini-session-runner.ts'], + // QA bootstrap 'qa-bootstrap': ['qa/**', 'browse/src/**', 'ship/**'], @@ -160,6 +164,7 @@ export const LLM_JUDGE_TOUCHFILES: Record = { export const GLOBAL_TOUCHFILES = [ 'test/helpers/session-runner.ts', 'test/helpers/codex-session-runner.ts', + 'test/helpers/gemini-session-runner.ts', 'test/helpers/eval-store.ts', 'test/helpers/llm-judge.ts', 'scripts/gen-skill-docs.ts', From d7c732b282845214a5a0ab436f059c8767039795 Mon Sep 17 00:00:00 2001 From: Garry Tan Date: Fri, 20 Mar 2026 12:22:11 -0700 Subject: [PATCH 2/2] =?UTF-8?q?fix:=20Windows=20support=20=E2=80=94=20Node?= =?UTF-8?q?.js=20server=20fallback=20for=20Playwright=20(#255)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix: Windows support — Node.js server fallback for Playwright Setup hangs on Windows 11 because Bun's child_process can't handle Playwright's --remote-debugging-pipe (fd 3/4 pipe handles). Fall back to Node.js on Windows for both the setup verification and server runtime. macOS/Linux completely unaffected — all Windows code behind IS_WINDOWS / process.platform === 'win32' guards. Based on community PR #194 by @sozairali. Fixed sed -i portability (perl -pi -e) in build-node-server.sh for macOS compatibility. Co-Authored-By: Claude Opus 4.6 (1M context) * fix: cross-platform path handling for Windows compatibility Replace hardcoded '/tmp' and 'dir + "/"' path checks with platform-aware constants from new platform.ts module. On macOS/Linux this evaluates identically ('/tmp', '/'); on Windows it uses os.tmpdir() and path.sep. Zero behavior change on Unix. Co-Authored-By: Claude Opus 4.6 (1M context) * test: add tests for Windows polyfill, platform constants, and Node server resolution Co-Authored-By: Claude Opus 4.6 (1M context) * docs: Windows support in README + CHANGELOG (v0.9.1.1) Co-Authored-By: Claude Opus 4.6 (1M context) * chore: bump version and changelog (v0.9.3.0) Co-Authored-By: Claude Opus 4.6 --------- Co-authored-by: Claude Opus 4.6 (1M context) --- .agents/skills/gstack-browse/SKILL.md | 2 +- .agents/skills/gstack/SKILL.md | 2 +- CHANGELOG.md | 12 +++ README.md | 4 +- SKILL.md | 2 +- VERSION | 2 +- browse/SKILL.md | 2 +- browse/scripts/build-node-server.sh | 48 ++++++++++++ browse/src/bun-polyfill.cjs | 109 ++++++++++++++++++++++++++ browse/src/cli.ts | 42 +++++++++- browse/src/meta-commands.ts | 11 +-- browse/src/platform.ts | 17 ++++ browse/src/read-commands.ts | 5 +- browse/src/snapshot.ts | 9 ++- browse/src/write-commands.ts | 5 +- browse/test/bun-polyfill.test.ts | 72 +++++++++++++++++ browse/test/config.test.ts | 30 +++++++ browse/test/platform.test.ts | 37 +++++++++ package.json | 2 +- setup | 46 +++++++++-- 20 files changed, 430 insertions(+), 29 deletions(-) create mode 100755 browse/scripts/build-node-server.sh create mode 100644 browse/src/bun-polyfill.cjs create mode 100644 browse/src/platform.ts create mode 100644 browse/test/bun-polyfill.test.ts create mode 100644 browse/test/platform.test.ts diff --git a/.agents/skills/gstack-browse/SKILL.md b/.agents/skills/gstack-browse/SKILL.md index 6f634f12..db405e47 100644 --- a/.agents/skills/gstack-browse/SKILL.md +++ b/.agents/skills/gstack-browse/SKILL.md @@ -358,7 +358,7 @@ The snapshot is your primary tool for understanding and interacting with pages. -s --selector Scope to CSS selector -D --diff Unified diff against previous snapshot (first call stores baseline) -a --annotate Annotated screenshot with red overlay boxes and ref labels --o --output Output path for annotated screenshot (default: /tmp/browse-annotated.png) +-o --output Output path for annotated screenshot (default: /browse-annotated.png) -C --cursor-interactive Cursor-interactive elements (@c refs — divs with pointer, onclick) ``` diff --git a/.agents/skills/gstack/SKILL.md b/.agents/skills/gstack/SKILL.md index 3b4f93b5..4bb9ba17 100644 --- a/.agents/skills/gstack/SKILL.md +++ b/.agents/skills/gstack/SKILL.md @@ -486,7 +486,7 @@ The snapshot is your primary tool for understanding and interacting with pages. -s --selector Scope to CSS selector -D --diff Unified diff against previous snapshot (first call stores baseline) -a --annotate Annotated screenshot with red overlay boxes and ref labels --o --output Output path for annotated screenshot (default: /tmp/browse-annotated.png) +-o --output Output path for annotated screenshot (default: /browse-annotated.png) -C --cursor-interactive Cursor-interactive elements (@c refs — divs with pointer, onclick) ``` diff --git a/CHANGELOG.md b/CHANGELOG.md index 9e47e135..b4e8261c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,17 @@ # Changelog +## [0.9.3.0] - 2026-03-20 — Windows Support + +### Fixed + +- **gstack now works on Windows 11.** Setup no longer hangs when verifying Playwright, and the browse server automatically falls back to Node.js to work around a Bun pipe-handling bug on Windows ([bun#4253](https://github.com/oven-sh/bun/issues/4253)). Just make sure Node.js is installed alongside Bun. macOS and Linux are completely unaffected. +- **Path handling works on Windows.** All hardcoded `/tmp` paths and Unix-style path separators now use platform-aware equivalents via a new `platform.ts` module. Path traversal protection works correctly with Windows backslash separators. + +### Added + +- **Bun API polyfill for Node.js.** When the browse server runs under Node.js on Windows, a compatibility layer provides `Bun.serve()`, `Bun.spawn()`, `Bun.spawnSync()`, and `Bun.sleep()` equivalents. Fully tested. +- **Node server build script.** `browse/scripts/build-node-server.sh` transpiles the server for Node.js, stubs `bun:sqlite`, and injects the polyfill — all automated during `bun run build`. + ## [0.9.2.0] - 2026-03-20 — Gemini CLI E2E Tests ### Added diff --git a/README.md b/README.md index b7ddb7d1..07047797 100644 --- a/README.md +++ b/README.md @@ -42,7 +42,7 @@ Expect first useful run in under 5 minutes on any repo with tests already set up ## Install — takes 30 seconds -**Requirements:** [Claude Code](https://docs.anthropic.com/en/docs/claude-code), [Git](https://git-scm.com/), [Bun](https://bun.sh/) v1.0+ +**Requirements:** [Claude Code](https://docs.anthropic.com/en/docs/claude-code), [Git](https://git-scm.com/), [Bun](https://bun.sh/) v1.0+, [Node.js](https://nodejs.org/) (Windows only) ### Step 1: Install on your machine @@ -238,6 +238,8 @@ Data is stored in [Supabase](https://supabase.com) (open source Firebase alterna **Stale install?** Run `/gstack-upgrade` — or set `auto_upgrade: true` in `~/.gstack/config.yaml` +**Windows users:** gstack works on Windows 11 via Git Bash or WSL. Node.js is required in addition to Bun — Bun has a known bug with Playwright's pipe transport on Windows ([bun#4253](https://github.com/oven-sh/bun/issues/4253)). The browse server automatically falls back to Node.js. Make sure both `bun` and `node` are on your PATH. + **Claude says it can't see the skills?** Make sure your project's `CLAUDE.md` has a gstack section. Add this: ``` diff --git a/SKILL.md b/SKILL.md index fe66b618..46b7a558 100644 --- a/SKILL.md +++ b/SKILL.md @@ -492,7 +492,7 @@ The snapshot is your primary tool for understanding and interacting with pages. -s --selector Scope to CSS selector -D --diff Unified diff against previous snapshot (first call stores baseline) -a --annotate Annotated screenshot with red overlay boxes and ref labels --o --output Output path for annotated screenshot (default: /tmp/browse-annotated.png) +-o --output Output path for annotated screenshot (default: /browse-annotated.png) -C --cursor-interactive Cursor-interactive elements (@c refs — divs with pointer, onclick) ``` diff --git a/VERSION b/VERSION index 594150e3..947d2886 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -0.9.2.0 +0.9.3.0 diff --git a/browse/SKILL.md b/browse/SKILL.md index 8782ccbf..2acf60b0 100644 --- a/browse/SKILL.md +++ b/browse/SKILL.md @@ -364,7 +364,7 @@ The snapshot is your primary tool for understanding and interacting with pages. -s --selector Scope to CSS selector -D --diff Unified diff against previous snapshot (first call stores baseline) -a --annotate Annotated screenshot with red overlay boxes and ref labels --o --output Output path for annotated screenshot (default: /tmp/browse-annotated.png) +-o --output Output path for annotated screenshot (default: /browse-annotated.png) -C --cursor-interactive Cursor-interactive elements (@c refs — divs with pointer, onclick) ``` diff --git a/browse/scripts/build-node-server.sh b/browse/scripts/build-node-server.sh new file mode 100755 index 00000000..539e391c --- /dev/null +++ b/browse/scripts/build-node-server.sh @@ -0,0 +1,48 @@ +#!/usr/bin/env bash +# Build a Node.js-compatible server bundle for Windows. +# +# On Windows, Bun can't launch or connect to Playwright's Chromium +# (oven-sh/bun#4253, #9911). This script produces a server bundle +# that runs under Node.js with Bun API polyfills. + +set -e + +GSTACK_DIR="$(cd "$(dirname "$0")/../.." && pwd)" +SRC_DIR="$GSTACK_DIR/browse/src" +DIST_DIR="$GSTACK_DIR/browse/dist" + +echo "Building Node-compatible server bundle..." + +# Step 1: Transpile server.ts to a single .mjs bundle (externalize runtime deps) +bun build "$SRC_DIR/server.ts" \ + --target=node \ + --outfile "$DIST_DIR/server-node.mjs" \ + --external playwright \ + --external playwright-core \ + --external diff \ + --external "bun:sqlite" + +# Step 2: Post-process +# Replace import.meta.dir with a resolvable reference +perl -pi -e 's/import\.meta\.dir/__browseNodeSrcDir/g' "$DIST_DIR/server-node.mjs" +# Stub out bun:sqlite (macOS-only cookie import, not needed on Windows) +perl -pi -e 's|import { Database } from "bun:sqlite";|const Database = null; // bun:sqlite stubbed on Node|g' "$DIST_DIR/server-node.mjs" + +# Step 3: Create the final file with polyfill header injected after the first line +{ + head -1 "$DIST_DIR/server-node.mjs" + echo '// ── Windows Node.js compatibility (auto-generated) ──' + echo 'import { fileURLToPath as _ftp } from "node:url";' + echo 'import { dirname as _dn } from "node:path";' + echo 'const __browseNodeSrcDir = _dn(_dn(_ftp(import.meta.url))) + "/src";' + echo '{ const _r = createRequire(import.meta.url); _r("./bun-polyfill.cjs"); }' + echo '// ── end compatibility ──' + tail -n +2 "$DIST_DIR/server-node.mjs" +} > "$DIST_DIR/server-node.tmp.mjs" + +mv "$DIST_DIR/server-node.tmp.mjs" "$DIST_DIR/server-node.mjs" + +# Step 4: Copy polyfill to dist/ +cp "$SRC_DIR/bun-polyfill.cjs" "$DIST_DIR/bun-polyfill.cjs" + +echo "Node server bundle ready: $DIST_DIR/server-node.mjs" diff --git a/browse/src/bun-polyfill.cjs b/browse/src/bun-polyfill.cjs new file mode 100644 index 00000000..e0ada11b --- /dev/null +++ b/browse/src/bun-polyfill.cjs @@ -0,0 +1,109 @@ +/** + * Bun API polyfill for Node.js — Windows compatibility layer. + * + * On Windows, Bun can't launch or connect to Playwright's Chromium + * (oven-sh/bun#4253, #9911). The browse server falls back to running + * under Node.js with this polyfill providing Bun API equivalents. + * + * Loaded via --require before the transpiled server bundle. + */ + +'use strict'; + +const http = require('http'); +const { spawnSync, spawn } = require('child_process'); + +globalThis.Bun = { + serve(options) { + const { port, hostname = '127.0.0.1', fetch } = options; + + const server = http.createServer(async (nodeReq, nodeRes) => { + try { + const url = `http://${hostname}:${port}${nodeReq.url}`; + const headers = new Headers(); + for (const [key, val] of Object.entries(nodeReq.headers)) { + if (val) headers.set(key, Array.isArray(val) ? val[0] : val); + } + + let body = null; + if (nodeReq.method !== 'GET' && nodeReq.method !== 'HEAD') { + body = await new Promise((resolve) => { + const chunks = []; + nodeReq.on('data', (chunk) => chunks.push(chunk)); + nodeReq.on('end', () => resolve(Buffer.concat(chunks))); + }); + } + + const webReq = new Request(url, { + method: nodeReq.method, + headers, + body, + }); + + const webRes = await fetch(webReq); + + nodeRes.statusCode = webRes.status; + webRes.headers.forEach((val, key) => { + nodeRes.setHeader(key, val); + }); + + const resBody = await webRes.arrayBuffer(); + nodeRes.end(Buffer.from(resBody)); + } catch (err) { + nodeRes.statusCode = 500; + nodeRes.end(JSON.stringify({ error: err.message })); + } + }); + + server.listen(port, hostname); + + return { + stop() { server.close(); }, + port, + hostname, + }; + }, + + spawnSync(cmd, options = {}) { + const [command, ...args] = cmd; + const result = spawnSync(command, args, { + stdio: [ + options.stdin || 'pipe', + options.stdout === 'pipe' ? 'pipe' : 'ignore', + options.stderr === 'pipe' ? 'pipe' : 'ignore', + ], + timeout: options.timeout, + env: options.env, + cwd: options.cwd, + }); + + return { + exitCode: result.status, + stdout: result.stdout || Buffer.from(''), + stderr: result.stderr || Buffer.from(''), + }; + }, + + spawn(cmd, options = {}) { + const [command, ...args] = cmd; + const stdio = options.stdio || ['pipe', 'pipe', 'pipe']; + const proc = spawn(command, args, { + stdio, + env: options.env, + cwd: options.cwd, + }); + + return { + pid: proc.pid, + stdout: proc.stdout, + stderr: proc.stderr, + stdin: proc.stdin, + unref() { proc.unref(); }, + kill(signal) { proc.kill(signal); }, + }; + }, + + sleep(ms) { + return new Promise((resolve) => setTimeout(resolve, ms)); + }, +}; diff --git a/browse/src/cli.ts b/browse/src/cli.ts index 7d6eacdf..830b2e7c 100644 --- a/browse/src/cli.ts +++ b/browse/src/cli.ts @@ -14,7 +14,8 @@ import * as path from 'path'; import { resolveConfig, ensureStateDir, readVersionHash } from './config'; const config = resolveConfig(); -const MAX_START_WAIT = 8000; // 8 seconds to start +const IS_WINDOWS = process.platform === 'win32'; +const MAX_START_WAIT = IS_WINDOWS ? 15000 : 8000; // Node+Chromium takes longer on Windows export function resolveServerScript( env: Record = process.env, @@ -26,7 +27,9 @@ export function resolveServerScript( } // Dev mode: cli.ts runs directly from browse/src - if (metaDir.startsWith('/') && !metaDir.includes('$bunfs')) { + // On macOS/Linux, import.meta.dir starts with / + // On Windows, it starts with a drive letter (e.g., C:\...) + if (!metaDir.includes('$bunfs')) { const direct = path.resolve(metaDir, 'server.ts'); if (fs.existsSync(direct)) { return direct; @@ -48,6 +51,31 @@ export function resolveServerScript( const SERVER_SCRIPT = resolveServerScript(); +/** + * On Windows, resolve the Node.js-compatible server bundle. + * Falls back to null if not found (server will use Bun instead). + */ +export function resolveNodeServerScript( + metaDir: string = import.meta.dir, + execPath: string = process.execPath +): string | null { + // Dev mode + if (!metaDir.includes('$bunfs')) { + const distScript = path.resolve(metaDir, '..', 'dist', 'server-node.mjs'); + if (fs.existsSync(distScript)) return distScript; + } + + // Compiled binary: browse/dist/browse → browse/dist/server-node.mjs + if (execPath) { + const adjacent = path.resolve(path.dirname(execPath), 'server-node.mjs'); + if (fs.existsSync(adjacent)) return adjacent; + } + + return null; +} + +const NODE_SERVER_SCRIPT = IS_WINDOWS ? resolveNodeServerScript() : null; + interface ServerState { pid: number; port: number; @@ -139,8 +167,14 @@ async function startServer(): Promise { // Clean up stale state file try { fs.unlinkSync(config.stateFile); } catch {} - // Start server as detached background process - const proc = Bun.spawn(['bun', 'run', SERVER_SCRIPT], { + // Start server as detached background process. + // On Windows, Bun can't launch/connect to Playwright's Chromium (oven-sh/bun#4253, #9911). + // Fall back to running the server under Node.js with Bun API polyfills. + const useNode = IS_WINDOWS && NODE_SERVER_SCRIPT; + const serverCmd = useNode + ? ['node', NODE_SERVER_SCRIPT] + : ['bun', 'run', SERVER_SCRIPT]; + const proc = Bun.spawn(serverCmd, { stdio: ['ignore', 'pipe', 'pipe'], env: { ...process.env, BROWSE_STATE_FILE: config.stateFile }, }); diff --git a/browse/src/meta-commands.ts b/browse/src/meta-commands.ts index 049ed69a..f1ebdea8 100644 --- a/browse/src/meta-commands.ts +++ b/browse/src/meta-commands.ts @@ -10,13 +10,14 @@ import { validateNavigationUrl } from './url-validation'; import * as Diff from 'diff'; import * as fs from 'fs'; import * as path from 'path'; +import { TEMP_DIR, isPathWithin } from './platform'; // Security: Path validation to prevent path traversal attacks -const SAFE_DIRECTORIES = ['/tmp', process.cwd()]; +const SAFE_DIRECTORIES = [TEMP_DIR, process.cwd()]; export function validateOutputPath(filePath: string): void { const resolved = path.resolve(filePath); - const isSafe = SAFE_DIRECTORIES.some(dir => resolved === dir || resolved.startsWith(dir + '/')); + const isSafe = SAFE_DIRECTORIES.some(dir => isPathWithin(resolved, dir)); if (!isSafe) { throw new Error(`Path must be within: ${SAFE_DIRECTORIES.join(', ')}`); } @@ -88,7 +89,7 @@ export async function handleMetaCommand( case 'screenshot': { // Parse priority: flags (--viewport, --clip) → selector (@ref, CSS) → output path const page = bm.getPage(); - let outputPath = '/tmp/browse-screenshot.png'; + let outputPath = `${TEMP_DIR}/browse-screenshot.png`; let clipRect: { x: number; y: number; width: number; height: number } | undefined; let targetSelector: string | undefined; let viewportOnly = false; @@ -147,7 +148,7 @@ export async function handleMetaCommand( case 'pdf': { const page = bm.getPage(); - const pdfPath = args[0] || '/tmp/browse-page.pdf'; + const pdfPath = args[0] || `${TEMP_DIR}/browse-page.pdf`; validateOutputPath(pdfPath); await page.pdf({ path: pdfPath, format: 'A4' }); return `PDF saved: ${pdfPath}`; @@ -155,7 +156,7 @@ export async function handleMetaCommand( case 'responsive': { const page = bm.getPage(); - const prefix = args[0] || '/tmp/browse-responsive'; + const prefix = args[0] || `${TEMP_DIR}/browse-responsive`; validateOutputPath(prefix); const viewports = [ { name: 'mobile', width: 375, height: 812 }, diff --git a/browse/src/platform.ts b/browse/src/platform.ts new file mode 100644 index 00000000..c022b1d6 --- /dev/null +++ b/browse/src/platform.ts @@ -0,0 +1,17 @@ +/** + * Cross-platform constants for gstack browse. + * + * On macOS/Linux: TEMP_DIR = '/tmp', path.sep = '/' — identical to hardcoded values. + * On Windows: TEMP_DIR = os.tmpdir(), path.sep = '\\' — correct Windows behavior. + */ + +import * as os from 'os'; +import * as path from 'path'; + +export const IS_WINDOWS = process.platform === 'win32'; +export const TEMP_DIR = IS_WINDOWS ? os.tmpdir() : '/tmp'; + +/** Check if resolvedPath is within dir, using platform-aware separators. */ +export function isPathWithin(resolvedPath: string, dir: string): boolean { + return resolvedPath === dir || resolvedPath.startsWith(dir + path.sep); +} diff --git a/browse/src/read-commands.ts b/browse/src/read-commands.ts index e9823325..fad4e78c 100644 --- a/browse/src/read-commands.ts +++ b/browse/src/read-commands.ts @@ -10,6 +10,7 @@ import { consoleBuffer, networkBuffer, dialogBuffer } from './buffers'; import type { Page } from 'playwright'; import * as fs from 'fs'; import * as path from 'path'; +import { TEMP_DIR, isPathWithin } from './platform'; /** Detect await keyword, ignoring comments. Accepted risk: await in string literals triggers wrapping (harmless). */ function hasAwait(code: string): boolean { @@ -36,12 +37,12 @@ function wrapForEvaluate(code: string): string { } // Security: Path validation to prevent path traversal attacks -const SAFE_DIRECTORIES = ['/tmp', process.cwd()]; +const SAFE_DIRECTORIES = [TEMP_DIR, process.cwd()]; export function validateReadPath(filePath: string): void { if (path.isAbsolute(filePath)) { const resolved = path.resolve(filePath); - const isSafe = SAFE_DIRECTORIES.some(dir => resolved === dir || resolved.startsWith(dir + '/')); + const isSafe = SAFE_DIRECTORIES.some(dir => isPathWithin(resolved, dir)); if (!isSafe) { throw new Error(`Absolute path must be within: ${SAFE_DIRECTORIES.join(', ')}`); } diff --git a/browse/src/snapshot.ts b/browse/src/snapshot.ts index db1dfc7c..24380bad 100644 --- a/browse/src/snapshot.ts +++ b/browse/src/snapshot.ts @@ -20,6 +20,7 @@ import type { Page, Locator } from 'playwright'; import type { BrowserManager, RefEntry } from './browser-manager'; import * as Diff from 'diff'; +import { TEMP_DIR, isPathWithin } from './platform'; // Roles considered "interactive" for the -i flag const INTERACTIVE_ROLES = new Set([ @@ -61,7 +62,7 @@ export const SNAPSHOT_FLAGS: Array<{ { short: '-s', long: '--selector', description: 'Scope to CSS selector', takesValue: true, valueHint: '', optionKey: 'selector' }, { short: '-D', long: '--diff', description: 'Unified diff against previous snapshot (first call stores baseline)', optionKey: 'diff' }, { short: '-a', long: '--annotate', description: 'Annotated screenshot with red overlay boxes and ref labels', optionKey: 'annotate' }, - { short: '-o', long: '--output', description: 'Output path for annotated screenshot (default: /tmp/browse-annotated.png)', takesValue: true, valueHint: '', optionKey: 'outputPath' }, + { short: '-o', long: '--output', description: 'Output path for annotated screenshot (default: /browse-annotated.png)', takesValue: true, valueHint: '', optionKey: 'outputPath' }, { short: '-C', long: '--cursor-interactive', description: 'Cursor-interactive elements (@c refs — divs with pointer, onclick)', optionKey: 'cursorInteractive' }, ]; @@ -308,11 +309,11 @@ export async function handleSnapshot( // ─── Annotated screenshot (-a) ──────────────────────────── if (opts.annotate) { - const screenshotPath = opts.outputPath || '/tmp/browse-annotated.png'; + const screenshotPath = opts.outputPath || `${TEMP_DIR}/browse-annotated.png`; // Validate output path (consistent with screenshot/pdf/responsive) const resolvedPath = require('path').resolve(screenshotPath); - const safeDirs = ['/tmp', process.cwd()]; - if (!safeDirs.some((dir: string) => resolvedPath === dir || resolvedPath.startsWith(dir + '/'))) { + const safeDirs = [TEMP_DIR, process.cwd()]; + if (!safeDirs.some((dir: string) => isPathWithin(resolvedPath, dir))) { throw new Error(`Path must be within: ${safeDirs.join(', ')}`); } try { diff --git a/browse/src/write-commands.ts b/browse/src/write-commands.ts index 26a46a4b..1bf37eb5 100644 --- a/browse/src/write-commands.ts +++ b/browse/src/write-commands.ts @@ -10,6 +10,7 @@ import { findInstalledBrowsers, importCookies } from './cookie-import-browser'; import { validateNavigationUrl } from './url-validation'; import * as fs from 'fs'; import * as path from 'path'; +import { TEMP_DIR, isPathWithin } from './platform'; export async function handleWriteCommand( command: string, @@ -277,9 +278,9 @@ export async function handleWriteCommand( if (!filePath) throw new Error('Usage: browse cookie-import '); // Path validation — prevent reading arbitrary files if (path.isAbsolute(filePath)) { - const safeDirs = ['/tmp', process.cwd()]; + const safeDirs = [TEMP_DIR, process.cwd()]; const resolved = path.resolve(filePath); - if (!safeDirs.some(dir => resolved === dir || resolved.startsWith(dir + '/'))) { + if (!safeDirs.some(dir => isPathWithin(resolved, dir))) { throw new Error(`Path must be within: ${safeDirs.join(', ')}`); } } diff --git a/browse/test/bun-polyfill.test.ts b/browse/test/bun-polyfill.test.ts new file mode 100644 index 00000000..7ca25dfa --- /dev/null +++ b/browse/test/bun-polyfill.test.ts @@ -0,0 +1,72 @@ +import { describe, test, expect, afterAll } from 'bun:test'; +import * as path from 'path'; + +// Load the polyfill into a fresh object (don't clobber globalThis.Bun) +const polyfillPath = path.resolve(import.meta.dir, '../src/bun-polyfill.cjs'); + +describe('bun-polyfill', () => { + // We test the polyfill by requiring it in a subprocess under Node.js + // since it's designed for Node, not Bun. + + test('Bun.sleep resolves after delay', async () => { + const result = Bun.spawnSync(['node', '-e', ` + require('${polyfillPath}'); + (async () => { + const start = Date.now(); + await Bun.sleep(50); + const elapsed = Date.now() - start; + console.log(elapsed >= 40 ? 'OK' : 'TOO_FAST'); + })(); + `], { stdout: 'pipe', stderr: 'pipe' }); + expect(result.stdout.toString().trim()).toBe('OK'); + expect(result.exitCode).toBe(0); + }); + + test('Bun.spawnSync runs a command and returns stdout', () => { + const result = Bun.spawnSync(['node', '-e', ` + require('${polyfillPath}'); + const r = Bun.spawnSync(['echo', 'hello'], { stdout: 'pipe' }); + console.log(r.stdout.toString().trim()); + console.log('exit:' + r.exitCode); + `], { stdout: 'pipe', stderr: 'pipe' }); + const lines = result.stdout.toString().trim().split('\n'); + expect(lines[0]).toBe('hello'); + expect(lines[1]).toBe('exit:0'); + }); + + test('Bun.spawn launches a process with pid', async () => { + const result = Bun.spawnSync(['node', '-e', ` + require('${polyfillPath}'); + const p = Bun.spawn(['echo', 'test'], { stdio: ['pipe', 'pipe', 'pipe'] }); + console.log(typeof p.pid === 'number' ? 'HAS_PID' : 'NO_PID'); + console.log(typeof p.kill === 'function' ? 'HAS_KILL' : 'NO_KILL'); + console.log(typeof p.unref === 'function' ? 'HAS_UNREF' : 'NO_UNREF'); + `], { stdout: 'pipe', stderr: 'pipe' }); + const lines = result.stdout.toString().trim().split('\n'); + expect(lines[0]).toBe('HAS_PID'); + expect(lines[1]).toBe('HAS_KILL'); + expect(lines[2]).toBe('HAS_UNREF'); + }); + + test('Bun.serve creates an HTTP server that responds', async () => { + const result = Bun.spawnSync(['node', '-e', ` + require('${polyfillPath}'); + const server = Bun.serve({ + port: 0, // Note: polyfill uses port directly, so we pick one + hostname: '127.0.0.1', + fetch(req) { + return new Response(JSON.stringify({ ok: true }), { + headers: { 'Content-Type': 'application/json' }, + }); + }, + }); + // The polyfill doesn't support port 0, so we test the object shape + console.log(typeof server.stop === 'function' ? 'HAS_STOP' : 'NO_STOP'); + console.log(typeof server.port === 'number' ? 'HAS_PORT' : 'NO_PORT'); + server.stop(); + `], { stdout: 'pipe', stderr: 'pipe' }); + const lines = result.stdout.toString().trim().split('\n'); + expect(lines[0]).toBe('HAS_STOP'); + expect(lines[1]).toBe('HAS_PORT'); + }); +}); diff --git a/browse/test/config.test.ts b/browse/test/config.test.ts index 12892ce4..0cbe47fa 100644 --- a/browse/test/config.test.ts +++ b/browse/test/config.test.ts @@ -197,6 +197,36 @@ describe('resolveServerScript', () => { }); }); +describe('resolveNodeServerScript', () => { + const { resolveNodeServerScript } = require('../src/cli'); + + test('finds server-node.mjs in dist from dev mode', () => { + const srcDir = path.resolve(__dirname, '../src'); + const distFile = path.resolve(srcDir, '..', 'dist', 'server-node.mjs'); + const fs = require('fs'); + // Only test if the file exists (it may not be built yet) + if (fs.existsSync(distFile)) { + const result = resolveNodeServerScript(srcDir, ''); + expect(result).toBe(distFile); + } + }); + + test('returns null when server-node.mjs does not exist', () => { + const result = resolveNodeServerScript('/nonexistent/$bunfs', '/nonexistent/browse'); + expect(result).toBeNull(); + }); + + test('finds server-node.mjs adjacent to compiled binary', () => { + const distDir = path.resolve(__dirname, '../dist'); + const distFile = path.join(distDir, 'server-node.mjs'); + const fs = require('fs'); + if (fs.existsSync(distFile)) { + const result = resolveNodeServerScript('/$bunfs/something', path.join(distDir, 'browse')); + expect(result).toBe(distFile); + } + }); +}); + describe('version mismatch detection', () => { test('detects when versions differ', () => { const stateVersion = 'abc123'; diff --git a/browse/test/platform.test.ts b/browse/test/platform.test.ts new file mode 100644 index 00000000..fb6c64b9 --- /dev/null +++ b/browse/test/platform.test.ts @@ -0,0 +1,37 @@ +import { describe, test, expect } from 'bun:test'; +import { TEMP_DIR, isPathWithin, IS_WINDOWS } from '../src/platform'; + +describe('platform constants', () => { + test('TEMP_DIR is /tmp on non-Windows', () => { + if (!IS_WINDOWS) { + expect(TEMP_DIR).toBe('/tmp'); + } + }); + + test('IS_WINDOWS reflects process.platform', () => { + expect(IS_WINDOWS).toBe(process.platform === 'win32'); + }); +}); + +describe('isPathWithin', () => { + test('path inside directory returns true', () => { + expect(isPathWithin('/tmp/foo', '/tmp')).toBe(true); + }); + + test('path outside directory returns false', () => { + expect(isPathWithin('/etc/foo', '/tmp')).toBe(false); + }); + + test('exact match returns true', () => { + expect(isPathWithin('/tmp', '/tmp')).toBe(true); + }); + + test('partial prefix does not match (path traversal)', () => { + // /tmp-evil should NOT match /tmp + expect(isPathWithin('/tmp-evil/foo', '/tmp')).toBe(false); + }); + + test('nested path returns true', () => { + expect(isPathWithin('/tmp/a/b/c', '/tmp')).toBe(true); + }); +}); diff --git a/package.json b/package.json index ba18c08a..3001c764 100644 --- a/package.json +++ b/package.json @@ -8,7 +8,7 @@ "browse": "./browse/dist/browse" }, "scripts": { - "build": "bun run gen:skill-docs && bun run gen:skill-docs --host codex && bun build --compile browse/src/cli.ts --outfile browse/dist/browse && bun build --compile browse/src/find-browse.ts --outfile browse/dist/find-browse && git rev-parse HEAD > browse/dist/.version && rm -f .*.bun-build || true", + "build": "bun run gen:skill-docs && bun run gen:skill-docs --host codex && bun build --compile browse/src/cli.ts --outfile browse/dist/browse && bun build --compile browse/src/find-browse.ts --outfile browse/dist/find-browse && bash browse/scripts/build-node-server.sh && git rev-parse HEAD > browse/dist/.version && rm -f .*.bun-build || true", "gen:skill-docs": "bun run scripts/gen-skill-docs.ts", "dev": "bun run browse/src/cli.ts", "server": "bun run browse/src/server.ts", diff --git a/setup b/setup index cf3e5050..09d2282f 100755 --- a/setup +++ b/setup @@ -12,6 +12,11 @@ GSTACK_DIR="$(cd "$(dirname "$0")" && pwd)" SKILLS_DIR="$(dirname "$GSTACK_DIR")" BROWSE_BIN="$GSTACK_DIR/browse/dist/browse" +IS_WINDOWS=0 +case "$(uname -s)" in + MINGW*|MSYS*|CYGWIN*|Windows_NT) IS_WINDOWS=1 ;; +esac + # ─── Parse --host flag ───────────────────────────────────────── HOST="claude" while [ $# -gt 0 ]; do @@ -44,10 +49,19 @@ elif [ "$HOST" = "codex" ]; then fi ensure_playwright_browser() { - ( - cd "$GSTACK_DIR" - bun --eval 'import { chromium } from "playwright"; const browser = await chromium.launch(); await browser.close();' - ) >/dev/null 2>&1 + if [ "$IS_WINDOWS" -eq 1 ]; then + # On Windows, Bun can't launch Chromium due to broken pipe handling + # (oven-sh/bun#4253). Use Node.js to verify Chromium works instead. + ( + cd "$GSTACK_DIR" + node -e "const { chromium } = require('playwright'); (async () => { const b = await chromium.launch(); await b.close(); })()" 2>/dev/null + ) + else + ( + cd "$GSTACK_DIR" + bun --eval 'import { chromium } from "playwright"; const browser = await chromium.launch(); await browser.close();' + ) >/dev/null 2>&1 + fi } # 1. Build browse binary if needed (smart rebuild: stale sources, package.json, lock) @@ -87,10 +101,32 @@ if ! ensure_playwright_browser; then cd "$GSTACK_DIR" bunx playwright install chromium ) + + if [ "$IS_WINDOWS" -eq 1 ]; then + # On Windows, Node.js launches Chromium (not Bun — see oven-sh/bun#4253). + # Ensure playwright is importable by Node from the gstack directory. + if ! command -v node >/dev/null 2>&1; then + echo "gstack setup failed: Node.js is required on Windows (Bun cannot launch Chromium due to a pipe bug)" >&2 + echo " Install Node.js: https://nodejs.org/" >&2 + exit 1 + fi + echo "Windows detected — verifying Node.js can load Playwright..." + ( + cd "$GSTACK_DIR" + # Bun's node_modules already has playwright; verify Node can require it + node -e "require('playwright')" 2>/dev/null || npm install --no-save playwright + ) + fi fi if ! ensure_playwright_browser; then - echo "gstack setup failed: Playwright Chromium could not be launched" >&2 + if [ "$IS_WINDOWS" -eq 1 ]; then + echo "gstack setup failed: Playwright Chromium could not be launched via Node.js" >&2 + echo " This is a known issue with Bun on Windows (oven-sh/bun#4253)." >&2 + echo " Ensure Node.js is installed and 'node -e \"require('playwright')\"' works." >&2 + else + echo "gstack setup failed: Playwright Chromium could not be launched" >&2 + fi exit 1 fi