From e9fbb664f8f244aab7069c053580e0daec504574 Mon Sep 17 00:00:00 2001 From: Garry Tan Date: Wed, 11 Mar 2026 19:27:43 -0700 Subject: [PATCH] =?UTF-8?q?refactor:=20reorganize=20codebase=20=E2=80=94?= =?UTF-8?q?=20move=20browse=20CLI=20to=20browse/=20directory?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Restructure project layout: src/ → browse/src/, test/ → browse/test/. Add snapshot testing. Update docs, package.json, and skills integration. Add setup script and TODO tracking. Co-Authored-By: Claude Haiku 4.5 --- .gitignore | 2 +- CLAUDE.md | 32 ++- README.md | 164 ++++++++------ SKILL.md | 78 +++++-- TODO.md | 78 +++++++ {src => browse/src}/browser-manager.ts | 45 +++- {src => browse/src}/buffers.ts | 0 {src => browse/src}/cli.ts | 12 +- {src => browse/src}/meta-commands.ts | 8 +- {src => browse/src}/read-commands.ts | 29 ++- {src => browse/src}/server.ts | 4 +- browse/src/snapshot.ts | 212 ++++++++++++++++++ {src => browse/src}/write-commands.ts | 42 +++- {test => browse/test}/commands.test.ts | 0 {test => browse/test}/fixtures/basic.html | 0 {test => browse/test}/fixtures/forms.html | 0 .../test}/fixtures/responsive.html | 0 browse/test/fixtures/snapshot.html | 55 +++++ {test => browse/test}/fixtures/spa.html | 0 browse/test/snapshot.test.ts | 201 +++++++++++++++++ {test => browse/test}/test-server.ts | 0 package.json | 16 +- .../SKILL.md | 0 .../SKILL.md | 0 {.claude/skills/retro => retro}/SKILL.md | 0 {.claude/skills/review => review}/SKILL.md | 0 .../skills/review => review}/checklist.md | 0 setup | 40 ++++ {.claude/skills/ship => ship}/SKILL.md | 0 29 files changed, 888 insertions(+), 130 deletions(-) create mode 100644 TODO.md rename {src => browse/src}/browser-manager.ts (82%) rename {src => browse/src}/buffers.ts (100%) rename {src => browse/src}/cli.ts (94%) rename {src => browse/src}/meta-commands.ts (96%) rename {src => browse/src}/read-commands.ts (87%) rename {src => browse/src}/server.ts (99%) create mode 100644 browse/src/snapshot.ts rename {src => browse/src}/write-commands.ts (75%) rename {test => browse/test}/commands.test.ts (100%) rename {test => browse/test}/fixtures/basic.html (100%) rename {test => browse/test}/fixtures/forms.html (100%) rename {test => browse/test}/fixtures/responsive.html (100%) create mode 100644 browse/test/fixtures/snapshot.html rename {test => browse/test}/fixtures/spa.html (100%) create mode 100644 browse/test/snapshot.test.ts rename {test => browse/test}/test-server.ts (100%) rename {.claude/skills/plan-exit-review => plan-exit-review}/SKILL.md (100%) rename {.claude/skills/plan-mega-review => plan-mega-review}/SKILL.md (100%) rename {.claude/skills/retro => retro}/SKILL.md (100%) rename {.claude/skills/review => review}/SKILL.md (100%) rename {.claude/skills/review => review}/checklist.md (100%) create mode 100755 setup rename {.claude/skills/ship => ship}/SKILL.md (100%) diff --git a/.gitignore b/.gitignore index 9ce5c763..2d3e8509 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,5 @@ node_modules/ -dist/ +browse/dist/ /tmp/ *.log bun.lock diff --git a/CLAUDE.md b/CLAUDE.md index a876ad99..d38e3d0f 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -1,20 +1,38 @@ -# gstack-browse development +# gstack development ## Commands ```bash bun install # install dependencies -bun test # run integration tests (40 tests with fixture server) +bun test # run integration tests (browse + snapshot) bun run dev # run CLI in dev mode, e.g. bun run dev goto https://example.com -bun run build # compile binary to dist/browse +bun run build # compile binary to browse/dist/browse +``` + +## Project structure + +``` +gstack/ +├── browse/ # Headless browser CLI (Playwright) +│ ├── src/ # CLI + server + commands +│ ├── test/ # Integration tests + fixtures +│ └── dist/ # Compiled binary +├── ship/ # Ship workflow skill +├── review/ # PR review skill +├── plan-exit-review/# Plan review skill +├── plan-mega-review/# Mega plan review skill +├── retro/ # Retrospective skill +├── setup # One-time setup: build binary + symlink skills +├── SKILL.md # Browse skill (Claude discovers this) +└── package.json # Build scripts for browse ``` ## Deploying to the active skill -The active skill lives at `~/.claude/skills/gstack-browse/`. After making changes: +The active skill lives at `~/.claude/skills/gstack/`. After making changes: 1. Push your branch -2. Pull in the skill directory: `cd ~/.claude/skills/gstack-browse && git pull` -3. Rebuild: `cd ~/.claude/skills/gstack-browse && bun run build` +2. Pull in the skill directory: `cd ~/.claude/skills/gstack && git pull` +3. Rebuild: `cd ~/.claude/skills/gstack && bun run build` -Or copy the binary directly: `cp dist/browse ~/.claude/skills/gstack-browse/dist/browse` +Or copy the binary directly: `cp browse/dist/browse ~/.claude/skills/gstack/browse/dist/browse` diff --git a/README.md b/README.md index 80ea0feb..7ca48374 100644 --- a/README.md +++ b/README.md @@ -1,9 +1,21 @@ -# gstack-browse +# gstack -**The browser tool that Claude Code deserves.** Persistent headless Chromium daemon with ~100ms commands. No MCP. No Chrome extension. No bullshit. +**Garry's Stack — the AI engineering toolkit that Claude Code deserves.** Browser automation, workflow skills, and more. One repo, one install. No MCP. No Chrome extension. No bullshit. Created by [Garry Tan](https://x.com/garrytan), President & CEO of [Y Combinator](https://www.ycombinator.com/). +## What's in the box + +### Browser (`browse`) +Persistent headless Chromium daemon with ~100ms commands. Navigate, click, fill forms, take screenshots, run JavaScript, inspect CSS/DOM, capture console/network logs. The killer feature: **ref-based element selection** via accessibility tree snapshots. + +### Skills +- **ship** — merge, test, review, bump version, changelog, commit, push, PR +- **review** — pre-landing PR review with structural analysis +- **plan-exit-review** — thorough plan review before implementation +- **plan-mega-review** — the most rigorous plan review possible (3 modes) +- **retro** — weekly engineering retrospective with trend tracking + ## The Problem Claude Code needs to browse the web. Check a deployment. Verify a UI change. Read documentation. Fill out a form. Take a screenshot. Simple stuff. @@ -16,24 +28,19 @@ Chrome MCP (the "Claude in Chrome" integration) is the default browser tool in C If you've used it, you know. It's the tool you learn to avoid. -### Why Playwright MCP sucks +### Why MCP itself is the problem for local tools -Playwright MCP wraps Playwright in an MCP server. Sounds reasonable until you actually use it. It inherits all of MCP's problems: connection management, JSON-RPC overhead, schema bloat in context. The server process is fragile. Reconnection logic is flaky. And because it's MCP, every single browser action costs you context window tokens for protocol framing that adds zero value. You're burning your most precious resource (context) on transport protocol garbage instead of actual work. - -### Why MCP itself is the problem - -MCP (Model Context Protocol) is a well-intentioned standard that adds a layer of complexity between the AI and the tool. For browser automation, that layer is pure overhead: +MCP is a well-intentioned standard that adds a layer of complexity between the AI and the tool. For browser automation, that layer is pure overhead: - **Context bloat**: Every MCP tool call includes full JSON schemas, capability declarations, and protocol framing. A simple "get the page text" costs 10x more context tokens than it should. -- **Connection fragility**: MCP uses persistent connections (WebSocket/stdio). Connections drop. Reconnection is unreliable. Your multi-step browser flow dies at step 4 of 7. -- **Cold start tax**: MCP servers need to handshake, declare capabilities, and negotiate. This happens every session. -- **Unnecessary abstraction**: The AI agent is *already running in a shell*. It can already call CLI tools via Bash. MCP adds a client-server protocol on top of... calling a local process. It's solving a problem that doesn't exist for local tools. +- **Connection fragility**: MCP uses persistent connections (WebSocket/stdio). Connections drop. Reconnection is unreliable. +- **Unnecessary abstraction**: The AI agent is *already running in a shell*. It can already call CLI tools via Bash. MCP adds a client-server protocol on top of... calling a local process. **The insight**: Claude Code already has `Bash`. A CLI tool that prints to stdout is the simplest, fastest, most reliable interface possible. No protocol overhead. No connection management. No schema bloat. Just input and output. ## The Solution -gstack-browse is a CLI that talks to a persistent local Chromium daemon via HTTP. That's it. +A CLI that talks to a persistent local Chromium daemon via HTTP. That's it. ``` Claude Code ──Bash──> browse CLI ──HTTP──> Bun server ──Playwright──> Chromium @@ -49,18 +56,16 @@ Claude Code ──Bash──> browse CLI ──HTTP──> Bun server ──Play **No MCP**: Zero protocol overhead. Zero context bloat. The CLI prints plain text to stdout. Claude reads it. Done. -**No Chrome extension**: No permissions dialogs. No "extension not responding." No WebSocket reconnection prayer circles. The browser runs headless in a local process that you control. +**No Chrome extension**: No permissions dialogs. No "extension not responding." No WebSocket reconnection prayer circles. -**Crash recovery**: If Chromium crashes, the server exits. Next CLI call auto-starts a fresh one. No stale state. No zombie processes. No "have you tried restarting the extension?" - -**Works with any agent**: Built for Claude Code, but any coding agent with shell access (Codex, Cursor, etc.) can call the CLI. It's just a binary that prints to stdout. +**Crash recovery**: If Chromium crashes, the server exits. Next CLI call auto-starts a fresh one. No stale state. ## What it can do 40+ commands covering everything Claude Code needs: ```bash -B=~/.claude/skills/gstack-browse/dist/browse +B=~/.claude/skills/gstack/browse/dist/browse # Navigate and read pages $B goto https://yourapp.com @@ -70,7 +75,18 @@ $B links # all links as "text -> href" $B forms # all forms + fields as structured JSON $B accessibility # full ARIA tree -# Interact with pages +# Snapshot: accessibility tree with refs for interaction +$B snapshot -i # interactive elements only +# Output: +# @e1 [link] "Home" +# @e2 [textbox] "Email" +# @e3 [button] "Submit" + +# Interact by ref (after snapshot) +$B fill @e2 "test@test.com" +$B click @e3 + +# Or interact by CSS selector $B click "button.submit" $B fill "#email" "test@test.com" $B select "#country" "US" @@ -96,12 +112,13 @@ $B pdf /tmp/page.pdf # save as PDF # Compare pages $B diff https://prod.app https://staging.app # text diff between two URLs -# Multi-step flows (single call, no shell quoting hell) +# Multi-step flows (single call) echo '[ ["goto", "https://app.com/login"], - ["fill", "#email", "user@test.com"], - ["fill", "#password", "secret"], - ["click", "button[type=submit]"], + ["snapshot", "-i"], + ["fill", "@e2", "user@test.com"], + ["fill", "@e3", "secret"], + ["click", "@e4"], ["wait", ".dashboard"], ["screenshot", "/tmp/logged-in.png"] ]' | $B chain @@ -116,59 +133,44 @@ $B status # health, uptime, tab count $B stop # shut down (or just let it idle-timeout) ``` -The browser **persists between calls**. Navigate once, then query as many times as you want. Cookies, tabs, localStorage all carry over. This is the killer feature MCP-based tools can't match — they either start fresh every call (slow) or maintain fragile persistent connections (unreliable). +The browser **persists between calls**. Navigate once, then query as many times as you want. Cookies, tabs, localStorage all carry over. ## Install -Just clone it. Claude handles the rest on first use (installs dependencies, compiles the binary, ~10 seconds). - -### Option A: Project-level (recommended for teams) - -Run this from the root of your git project: +### 1. Add gstack to your project ```bash -git submodule add https://github.com/garrytan/gstack-browse.git .claude/skills/gstack-browse +# Project-level (teams — committed to repo): +git submodule add https://github.com/garrytan/gstack.git .claude/skills/gstack + +# Or user-level (personal — available everywhere): +git clone https://github.com/garrytan/gstack.git ~/.claude/skills/gstack ``` -Commit the submodule. Everyone who clones your repo gets the browser. +### 2. Add to your CLAUDE.md (required) -**Worktrees**: New worktrees need `git submodule update --init .claude/skills/gstack-browse` to populate the directory. Claude's auto-setup handles this — it detects an empty submodule and runs the init for you. +Paste this into your project's `CLAUDE.md`: -### Option B: User-level (personal) +````markdown +## Browser +Use gstack for all web browsing. Never use `mcp__claude-in-chrome__*` tools. +```` -```bash -git clone https://github.com/garrytan/gstack-browse.git ~/.claude/skills/gstack-browse -``` +### 3. Done -Available across all your projects. No per-repo setup. +First time Claude needs the browser, it will ask to run a 10-second one-time setup. Say yes. All skills are available from then on. **Prerequisite**: [Bun](https://bun.sh/) v1.0+ (Claude will tell you if it's missing). -## Teach Claude to use it - -Add this to your project's `CLAUDE.md`: - -````markdown -## Browser Automation - -Use gstack-browse for all web browsing. NEVER use `mcp__claude-in-chrome__*` tools. +### Update ```bash -B=~/.claude/skills/gstack-browse/dist/browse - -$B goto # navigate -$B text # read page -$B js "document.title" # run JS -$B css "body" "font-family" # check CSS -$B click "button.submit" # interact -$B fill "#email" "test@test.com" # fill forms -$B screenshot /tmp/page.png # visual check -$B console # debug console -$B network # debug network +cd .claude/skills/gstack && git pull && ./setup ``` -Navigate once, then query many times — the browser persists between calls. -```` +### Worktrees + +New worktrees need `git submodule update --init .claude/skills/gstack` to populate the directory. Claude's auto-setup handles this — it detects an empty submodule and runs the init for you. ## Command Reference @@ -176,6 +178,7 @@ Navigate once, then query many times — the browser persists between calls. |----------|----------| | Navigate | `goto `, `back`, `forward`, `reload`, `url` | | Read | `text`, `html [sel]`, `links`, `forms`, `accessibility` | +| Snapshot | `snapshot [-i] [-c] [-d N] [-s sel]` | | Interact | `click `, `fill `, `select `, `hover `, `type `, `press `, `scroll [sel]`, `wait `, `viewport ` | | Inspect | `js `, `eval `, `css `, `attrs `, `console`, `network`, `cookies`, `storage`, `perf` | | Visual | `screenshot [path]`, `pdf [path]`, `responsive [prefix]` | @@ -184,11 +187,28 @@ Navigate once, then query many times — the browser persists between calls. | Multi-step | `chain` (reads JSON array from stdin) | | Server | `status`, `stop`, `restart` | +All commands that take `` accept either CSS selectors or `@ref` after `snapshot`. + ## Architecture -- **Compiled CLI binary** (Bun `--compile`, ~58MB) — ~1ms startup, reads state file for server port + auth token -- **Persistent Bun HTTP server** — launches headless Chromium via Playwright on localhost -- **Bearer token auth** — random UUID per server session, stored in state file (chmod 600) +``` +gstack/ +├── browse/ # Browser CLI (Playwright) +│ ├── src/ # CLI + server + commands + snapshot +│ ├── test/ # Integration tests + fixtures +│ └── dist/ # Compiled binary (~58MB) +├── ship/ # Ship workflow skill +├── review/ # PR review skill +├── plan-exit-review/# Plan review skill +├── plan-mega-review/# Mega plan review skill +├── retro/ # Retrospective skill +├── setup # One-time setup: build + symlink skills +└── SKILL.md # Browse skill (Claude discovers this) +``` + +- **Compiled CLI binary** (Bun `--compile`) — ~1ms startup +- **Persistent Bun HTTP server** — launches headless Chromium via Playwright, localhost:9400-9410 +- **Bearer token auth** — random UUID per session, stored in state file (chmod 600) - **Console/network buffers** — all entries in memory, flushed to `/tmp/browse-*.log` every 1s - **Auto-idle shutdown** — 30 minutes (configurable via `BROWSE_IDLE_TIMEOUT`) - **Crash handling** — Chromium crash kills server, CLI auto-restarts on next command @@ -209,25 +229,25 @@ browse_port = CONDUCTOR_PORT - 45600 Each instance has its own Chromium process, tabs, cookies, console/network logs. No cross-workspace interference. You can also set `BROWSE_PORT` directly if you're not using Conductor. +## Performance comparison + +| Tool | First call | Subsequent calls | Context overhead per call | +|------|-----------|-----------------|-------------------------- +| Chrome MCP | ~5s | ~2-5s | ~2000 tokens (schema + protocol) | +| Playwright MCP | ~3s | ~1-3s | ~1500 tokens (schema + protocol) | +| **gstack** | **~3s** | **~100-200ms** | **0 tokens** (plain text stdout) | + +The context overhead difference compounds fast. In a 20-command browser session, MCP tools burn 30,000-40,000 tokens on protocol framing alone. gstack burns zero. + ## Development ```bash bun install # install dependencies -bun test # run 40 integration tests (~2s) +bun test # run 55 integration tests (~3s) bun run dev # run CLI from source (no compile) -bun run build # compile to dist/browse +bun run build # compile to browse/dist/browse ``` -## Performance comparison - -| Tool | First call | Subsequent calls | Context overhead per call | -|------|-----------|-----------------|--------------------------| -| Chrome MCP | ~5s | ~2-5s | ~2000 tokens (schema + protocol) | -| Playwright MCP | ~3s | ~1-3s | ~1500 tokens (schema + protocol) | -| **gstack-browse** | **~3s** | **~100-200ms** | **0 tokens** (plain text stdout) | - -The context overhead difference compounds fast. In a 20-command browser session, MCP tools burn 30,000-40,000 tokens on protocol framing alone. gstack-browse burns zero. - ## License MIT diff --git a/SKILL.md b/SKILL.md index 199bec56..dcdaaaf9 100644 --- a/SKILL.md +++ b/SKILL.md @@ -1,6 +1,6 @@ --- -name: gstack-browse -version: 0.1.0 +name: gstack +version: 0.2.0 description: | Fast web browsing for Claude Code via persistent headless Chromium daemon. Navigate to any URL, read page content, click elements, fill forms, run JavaScript, take screenshots, @@ -13,7 +13,7 @@ allowed-tools: --- -# gstack-browse: Persistent Browser for Claude Code +# gstack: Persistent Browser for Claude Code Persistent headless Chromium daemon. First call auto-starts the server (~3s). Every subsequent call: ~100-200ms. Auto-shuts down after 30 min idle. @@ -24,9 +24,9 @@ Before using any browse command, find the skill and check if the binary exists: ```bash # Check project-level first, then user-level -if test -x .claude/skills/gstack-browse/dist/browse; then +if test -x .claude/skills/gstack/browse/dist/browse; then echo "READY_PROJECT" -elif test -x ~/.claude/skills/gstack-browse/dist/browse; then +elif test -x ~/.claude/skills/gstack/browse/dist/browse; then echo "READY_USER" else echo "NEEDS_SETUP" @@ -36,16 +36,16 @@ fi Set `B` to whichever path is READY and use it for all commands. Prefer project-level if both exist. If `NEEDS_SETUP`: -1. Tell the user: "gstack-browse needs a one-time setup (~10 seconds). OK to proceed?" Then STOP and wait for their response. -2. If they approve, determine the skill directory (project-level `.claude/skills/gstack-browse` or user-level `~/.claude/skills/gstack-browse`) and run: +1. Tell the user: "gstack needs a one-time setup (~10 seconds). OK to proceed?" Then STOP and wait for their response. +2. If they approve, determine the skill directory (project-level `.claude/skills/gstack` or user-level `~/.claude/skills/gstack`) and run: ```bash SKILL_DIR= # If submodule exists but isn't initialized (empty dir, no package.json): test -f "$SKILL_DIR/package.json" || git submodule update --init "$SKILL_DIR" -# Install and build -cd "$SKILL_DIR" && bun install && bun run build +# Build binary + register skills +cd "$SKILL_DIR" && ./setup ``` 3. If `bun` is not installed, tell the user to install it: `curl -fsSL https://bun.sh/install | bash` @@ -53,7 +53,7 @@ Once setup is done, it never needs to run again (the compiled binary persists ac ## IMPORTANT -- Use the compiled binary via Bash: `.claude/skills/gstack-browse/dist/browse` (project) or `~/.claude/skills/gstack-browse/dist/browse` (user). +- Use the compiled binary via Bash: `.claude/skills/gstack/browse/dist/browse` (project) or `~/.claude/skills/gstack/browse/dist/browse` (user). - NEVER use `mcp__claude-in-chrome__*` tools. They are slow and unreliable. - The browser persists between calls — cookies, tabs, and state carry over. - The server auto-starts on first command. No setup needed. @@ -61,7 +61,7 @@ Once setup is done, it never needs to run again (the compiled binary persists ac ## Quick Reference ```bash -B=~/.claude/skills/gstack-browse/dist/browse +B=~/.claude/skills/gstack/browse/dist/browse # Navigate to a page $B goto https://example.com @@ -72,16 +72,25 @@ $B text # Take a screenshot (then Read the image) $B screenshot /tmp/page.png +# Snapshot: accessibility tree with refs +$B snapshot -i + +# Click by ref (after snapshot) +$B click @e3 + +# Fill by ref +$B fill @e4 "test@test.com" + # Run JavaScript $B js "document.title" # Get all links $B links -# Click something +# Click by CSS selector $B click "button.submit" -# Fill a form +# Fill a form by CSS selector $B fill "#email" "test@test.com" $B fill "#password" "abc123" $B click "button[type=submit]" @@ -129,9 +138,30 @@ browse forms All forms + fields as JSON browse accessibility Accessibility tree snapshot (ARIA) ``` +### Snapshot (ref-based element selection) +``` +browse snapshot Full accessibility tree with @refs +browse snapshot -i Interactive elements only (buttons, links, inputs) +browse snapshot -c Compact (no empty structural elements) +browse snapshot -d Limit depth to N levels +browse snapshot -s Scope to CSS selector +``` + +After snapshot, use @refs as selectors in any command: +``` +browse click @e3 Click the element assigned ref @e3 +browse fill @e4 "value" Fill the input assigned ref @e4 +browse hover @e1 Hover the element assigned ref @e1 +browse html @e2 Get innerHTML of ref @e2 +browse css @e5 "color" Get computed CSS of ref @e5 +browse attrs @e6 Get attributes of ref @e6 +``` + +Refs are invalidated on navigation — run `snapshot` again after `goto`. + ### Interaction ``` -browse click Click element +browse click Click element (CSS selector or @ref) browse fill Fill input field browse select Select dropdown value browse hover Hover over element @@ -172,7 +202,7 @@ browse diff Text diff between two pages ### Multi-step (chain) ``` -echo '[["goto","https://example.com"],["fill","#email","test@test.com"],["click","#submit"],["screenshot","/tmp/result.png"]]' | browse chain +echo '[["goto","https://example.com"],["snapshot","-i"],["click","@e1"],["screenshot","/tmp/result.png"]]' | browse chain ``` ### Tabs @@ -193,27 +223,27 @@ browse restart Kill + restart server ## Speed Rules 1. **Navigate once, query many times.** `goto` loads the page; then `text`, `js`, `css`, `screenshot` all run against the loaded page instantly. -2. **Use `js` for precision.** `js "document.querySelector('.price').textContent"` is faster than parsing full page text. -3. **Use `links` to survey.** Faster than `text` when you just need navigation structure. -4. **Use `chain` for multi-step flows.** Avoids CLI overhead per step. -5. **Use `responsive` for layout checks.** One command = 3 viewport screenshots. +2. **Use `snapshot -i` for interaction.** Get refs for all interactive elements, then click/fill by ref. No need to guess CSS selectors. +3. **Use `js` for precision.** `js "document.querySelector('.price').textContent"` is faster than parsing full page text. +4. **Use `links` to survey.** Faster than `text` when you just need navigation structure. +5. **Use `chain` for multi-step flows.** Avoids CLI overhead per step. +6. **Use `responsive` for layout checks.** One command = 3 viewport screenshots. ## When to Use What | Task | Commands | |------|----------| | Read a page | `goto ` then `text` | +| Interact with elements | `snapshot -i` then `click @e3` | | Check if element exists | `js "!!document.querySelector('.thing')"` | | Extract specific data | `js "document.querySelector('.price').textContent"` | | Visual check | `screenshot /tmp/x.png` then Read the image | -| Fill and submit form | `fill "#email" "val"` → `click "#submit"` → `screenshot` | -| Check CSS | `css "selector" "property"` | -| Inspect DOM | `html "selector"` or `attrs "selector"` | +| Fill and submit form | `snapshot -i` → `fill @e4 "val"` → `click @e5` → `screenshot` | +| Check CSS | `css "selector" "property"` or `css @e3 "property"` | +| Inspect DOM | `html "selector"` or `attrs @e3` | | Debug console errors | `console` | | Check network requests | `network` | | Check local dev | `goto http://127.0.0.1:3000` | -| Check staging | `goto https://staging.garryslist.org` | -| Check production | `goto https://garryslist.org` | | Compare two pages | `diff ` | | Mobile layout check | `responsive /tmp/prefix` | | Multi-step flow | `echo '[...]' \| browse chain` | diff --git a/TODO.md b/TODO.md new file mode 100644 index 00000000..f08cff23 --- /dev/null +++ b/TODO.md @@ -0,0 +1,78 @@ +# TODO — gstack roadmap + +## Phase 1: Foundations (v0.2.0) + - [x] Rename to gstack + - [x] Restructure to monorepo layout + - [x] Setup script for skill symlinks + - [x] Snapshot command with ref-based element selection + - [x] Snapshot tests + +## Phase 2: Enhanced Browser + - [ ] Annotated screenshots (--annotate flag, numbered labels on elements mapped to refs) + - [ ] Snapshot diffing (compare before/after accessibility trees, verify actions worked) + - [ ] Dialog handling (dialog accept/dismiss — prevents browser lockup) + - [ ] File upload (upload ) + - [ ] Cursor-interactive elements (-C flag, detect divs with cursor:pointer/onclick/tabindex) + - [ ] Element state checks (is visible/enabled/checked ) + +## Phase 3: QA Testing Agent (dogfood skill) + - [ ] SKILL.md — 6-phase workflow: Initialize → Authenticate → Orient → Explore → Document → Wrap up + - [ ] Issue taxonomy reference (7 categories: visual, functional, UX, content, performance, console, accessibility) + - [ ] Severity classification (critical/high/medium/low) + - [ ] Exploration checklist per page + - [ ] Report template (structured markdown with per-issue evidence) + - [ ] Repro-first philosophy: every issue gets evidence before moving on + - [ ] Two evidence tiers: interactive bugs (video + step-by-step screenshots), static bugs (single annotated screenshot) + - [ ] Video recording (record start/stop for WebM capture via Playwright) + - [ ] Key guidance: 5-10 well-documented issues per session, depth over breadth, write incrementally + +## Phase 4: Skill + Browser Integration + - [ ] ship + browse: post-deploy verification + - Browse staging/preview URL after push + - Screenshot key pages + - Check console for JS errors + - Compare staging vs prod via snapshot diff + - Include verification screenshots in PR body + - STOP if critical errors found + - [ ] review + browse: visual diff review + - Browse PR's preview deploy + - Annotated screenshots of changed pages + - Compare against production visually + - Check responsive layouts (mobile/tablet/desktop) + - Verify accessibility tree hasn't regressed + - [ ] deploy-verify skill: lightweight post-deploy smoke test + - Hit key URLs, verify 200s + - Screenshot critical pages + - Console error check + - Compare against baseline snapshots + - Pass/fail with evidence + +## Phase 5: State & Sessions + - [ ] Sessions (isolated browser instances with separate cookies/storage/history) + - [ ] State persistence (save/load cookies + localStorage to JSON files) + - [ ] Auth vault (encrypted credential storage, referenced by name, LLM never sees passwords) + - [ ] retro + browse: deployment health tracking + - Screenshot production state + - Check perf metrics (page load times) + - Count console errors across key pages + - Track trends over retro window + +## Phase 6: Advanced Browser + - [ ] Iframe support (frame , frame main) + - [ ] Semantic locators (find role/label/text/placeholder/testid with actions) + - [ ] Device emulation presets (set device "iPhone 16 Pro") + - [ ] Network mocking/routing (intercept, block, mock requests) + - [ ] Download handling (click-to-download with path control) + - [ ] Content safety (--max-output truncation, --allowed-domains) + - [ ] Streaming (WebSocket live preview for pair browsing) + - [ ] CDP mode (connect to already-running Chrome/Electron apps) + +## Ideas & Notes + - Browser is the nervous system — every skill should be able to see, interact with, and verify the web + - Skills are the product; the browser enables them + - One repo, one install, entire AI engineering workflow + - Bun compiled binary matches Rust CLI performance for this use case (bottleneck is Chromium, not CLI parsing) + - Accessibility tree snapshots use ~200-400 tokens vs ~3000-5000 for full DOM — critical for AI context efficiency + - Locator map approach for refs: store Map on BrowserManager, no DOM mutation, no CSP issues + - Snapshot scoping (-i, -c, -d, -s flags) is critical for performance on large pages + - All new commands follow existing pattern: add to command set, add switch case, return string diff --git a/src/browser-manager.ts b/browse/src/browser-manager.ts similarity index 82% rename from src/browser-manager.ts rename to browse/src/browser-manager.ts index 6d9c75f0..033ed874 100644 --- a/src/browser-manager.ts +++ b/browse/src/browser-manager.ts @@ -7,7 +7,7 @@ * We do NOT try to self-heal — don't hide failure. */ -import { chromium, type Browser, type BrowserContext, type Page } from 'playwright'; +import { chromium, type Browser, type BrowserContext, type Page, type Locator } from 'playwright'; import { addConsoleEntry, addNetworkEntry, networkBuffer, type LogEntry, type NetworkEntry } from './buffers'; export class BrowserManager { @@ -19,6 +19,9 @@ export class BrowserManager { private extraHeaders: Record = {}; private customUserAgent: string | null = null; + // ─── Ref Map (snapshot → @e1, @e2, ...) ──────────────────── + private refMap: Map = new Map(); + async launch() { this.browser = await chromium.launch({ headless: true }); @@ -139,6 +142,37 @@ export class BrowserManager { } } + // ─── Ref Map ────────────────────────────────────────────── + setRefMap(refs: Map) { + this.refMap = refs; + } + + clearRefs() { + this.refMap.clear(); + } + + /** + * Resolve a selector that may be a @ref (e.g., "@e3") or a CSS selector. + * Returns { locator } for refs or { selector } for CSS selectors. + */ + resolveRef(selector: string): { locator: Locator } | { selector: string } { + if (selector.startsWith('@e')) { + const ref = selector.slice(1); // "e3" + const locator = this.refMap.get(ref); + if (!locator) { + throw new Error( + `Ref ${selector} not found. Page may have changed — run 'snapshot' to get fresh refs.` + ); + } + return { locator }; + } + return { selector }; + } + + getRefCount(): number { + return this.refMap.size; + } + // ─── Viewport ────────────────────────────────────────────── async setViewport(width: number, height: number) { await this.getPage().setViewportSize({ width, height }); @@ -159,8 +193,15 @@ export class BrowserManager { this.customUserAgent = ua; } - // ─── Console/Network Wiring ──────────────────────────────── + // ─── Console/Network/Ref Wiring ──────────────────────────── private wirePageEvents(page: Page) { + // Clear ref map on navigation — refs point to stale elements after page change + page.on('framenavigated', (frame) => { + if (frame === page.mainFrame()) { + this.clearRefs(); + } + }); + page.on('console', (msg) => { addConsoleEntry({ timestamp: Date.now(), diff --git a/src/buffers.ts b/browse/src/buffers.ts similarity index 100% rename from src/buffers.ts rename to browse/src/buffers.ts diff --git a/src/cli.ts b/browse/src/cli.ts similarity index 94% rename from src/cli.ts rename to browse/src/cli.ts index 5befe779..b24e4513 100644 --- a/src/cli.ts +++ b/browse/src/cli.ts @@ -1,5 +1,5 @@ /** - * gstack-browse CLI — thin wrapper that talks to the persistent server + * gstack CLI — thin wrapper that talks to the persistent server * * Flow: * 1. Read /tmp/browse-server.json for port + token @@ -22,7 +22,7 @@ const STATE_FILE = process.env.BROWSE_STATE_FILE || `/tmp/browse-server${INSTANC const SERVER_SCRIPT = process.env.BROWSE_SERVER_SCRIPT || (import.meta.dir.startsWith('/') && !import.meta.dir.includes('$bunfs') ? path.resolve(import.meta.dir, 'server.ts') - : path.resolve(process.env.HOME || '/tmp', '.claude/skills/gstack-browse/src/server.ts')); + : path.resolve(process.env.HOME || '/tmp', '.claude/skills/gstack/browse/src/server.ts')); const MAX_START_WAIT = 8000; // 8 seconds to start interface ServerState { @@ -176,7 +176,7 @@ async function main() { const args = process.argv.slice(2); if (args.length === 0 || args[0] === '--help' || args[0] === '-h') { - console.log(`gstack-browse — Fast headless browser for AI coding agents + console.log(`gstack browse — Fast headless browser for AI coding agents Usage: browse [args...] @@ -189,11 +189,15 @@ Inspection: js | eval | css | attrs console [--clear] | network [--clear] cookies | storage [set ] | perf Visual: screenshot [path] | pdf [path] | responsive [prefix] +Snapshot: snapshot [-i] [-c] [-d N] [-s sel] Compare: diff Multi-step: chain (reads JSON from stdin) Tabs: tabs | tab | newtab [url] | closetab [id] Server: status | cookie = | header : - useragent | stop | restart`); + useragent | stop | restart + +Refs: After 'snapshot', use @e1, @e2... as selectors: + click @e3 | fill @e4 "value" | hover @e1`); process.exit(0); } diff --git a/src/meta-commands.ts b/browse/src/meta-commands.ts similarity index 96% rename from src/meta-commands.ts rename to browse/src/meta-commands.ts index 8cb56e91..094e0ea1 100644 --- a/src/meta-commands.ts +++ b/browse/src/meta-commands.ts @@ -1,8 +1,9 @@ /** - * Meta commands — tabs, server control, screenshots, chain, diff + * Meta commands — tabs, server control, screenshots, chain, diff, snapshot */ import type { BrowserManager } from './browser-manager'; +import { handleSnapshot } from './snapshot'; import * as Diff from 'diff'; import * as fs from 'fs'; @@ -192,6 +193,11 @@ export async function handleMetaCommand( return output.join('\n'); } + // ─── Snapshot ───────────────────────────────────── + case 'snapshot': { + return await handleSnapshot(args, bm); + } + default: throw new Error(`Unknown meta command: ${command}`); } diff --git a/src/read-commands.ts b/browse/src/read-commands.ts similarity index 87% rename from src/read-commands.ts rename to browse/src/read-commands.ts index 8932ee05..a473477d 100644 --- a/src/read-commands.ts +++ b/browse/src/read-commands.ts @@ -34,7 +34,11 @@ export async function handleReadCommand( case 'html': { const selector = args[0]; if (selector) { - return await page.innerHTML(selector); + const resolved = bm.resolveRef(selector); + if ('locator' in resolved) { + return await resolved.locator.innerHTML({ timeout: 5000 }); + } + return await page.innerHTML(resolved.selector); } return await page.content(); } @@ -103,13 +107,21 @@ export async function handleReadCommand( case 'css': { const [selector, property] = args; if (!selector || !property) throw new Error('Usage: browse css '); + const resolved = bm.resolveRef(selector); + if ('locator' in resolved) { + const value = await resolved.locator.evaluate( + (el, prop) => getComputedStyle(el).getPropertyValue(prop), + property + ); + return value; + } const value = await page.evaluate( ([sel, prop]) => { const el = document.querySelector(sel); if (!el) return `Element not found: ${sel}`; return getComputedStyle(el).getPropertyValue(prop); }, - [selector, property] + [resolved.selector, property] ); return value; } @@ -117,6 +129,17 @@ export async function handleReadCommand( case 'attrs': { const selector = args[0]; if (!selector) throw new Error('Usage: browse attrs '); + const resolved = bm.resolveRef(selector); + if ('locator' in resolved) { + const attrs = await resolved.locator.evaluate((el) => { + const result: Record = {}; + for (const attr of el.attributes) { + result[attr.name] = attr.value; + } + return result; + }); + return JSON.stringify(attrs, null, 2); + } const attrs = await page.evaluate((sel) => { const el = document.querySelector(sel); if (!el) return `Element not found: ${sel}`; @@ -125,7 +148,7 @@ export async function handleReadCommand( result[attr.name] = attr.value; } return result; - }, selector); + }, resolved.selector); return typeof attrs === 'string' ? attrs : JSON.stringify(attrs, null, 2); } diff --git a/src/server.ts b/browse/src/server.ts similarity index 99% rename from src/server.ts rename to browse/src/server.ts index 694443fd..af183759 100644 --- a/src/server.ts +++ b/browse/src/server.ts @@ -1,5 +1,5 @@ /** - * gstack-browse server — persistent Chromium daemon + * gstack browse server — persistent Chromium daemon * * Architecture: * Bun.serve HTTP on localhost → routes commands to Playwright @@ -100,7 +100,7 @@ const META_COMMANDS = new Set([ 'status', 'stop', 'restart', 'screenshot', 'pdf', 'responsive', 'chain', 'diff', - 'url', + 'url', 'snapshot', ]); // Find port: deterministic from CONDUCTOR_PORT, or scan range diff --git a/browse/src/snapshot.ts b/browse/src/snapshot.ts new file mode 100644 index 00000000..d8d0da0f --- /dev/null +++ b/browse/src/snapshot.ts @@ -0,0 +1,212 @@ +/** + * Snapshot command — accessibility tree with ref-based element selection + * + * Architecture (Locator map — no DOM mutation): + * 1. page.locator(scope).ariaSnapshot() → YAML-like accessibility tree + * 2. Parse tree, assign refs @e1, @e2, ... + * 3. Build Playwright Locator for each ref (getByRole + nth) + * 4. Store Map on BrowserManager + * 5. Return compact text output with refs prepended + * + * Later: "click @e3" → look up Locator → locator.click() + */ + +import type { Page, Locator } from 'playwright'; +import type { BrowserManager } from './browser-manager'; + +// Roles considered "interactive" for the -i flag +const INTERACTIVE_ROLES = new Set([ + 'button', 'link', 'textbox', 'checkbox', 'radio', 'combobox', + 'listbox', 'menuitem', 'menuitemcheckbox', 'menuitemradio', + 'option', 'searchbox', 'slider', 'spinbutton', 'switch', 'tab', + 'treeitem', +]); + +interface SnapshotOptions { + interactive?: boolean; // -i: only interactive elements + compact?: boolean; // -c: remove empty structural elements + depth?: number; // -d N: limit tree depth + selector?: string; // -s SEL: scope to CSS selector +} + +interface ParsedNode { + indent: number; + role: string; + name: string | null; + props: string; // e.g., "[level=1]" + children: string; // inline text content after ":" + rawLine: string; +} + +/** + * Parse CLI args into SnapshotOptions + */ +export function parseSnapshotArgs(args: string[]): SnapshotOptions { + const opts: SnapshotOptions = {}; + for (let i = 0; i < args.length; i++) { + switch (args[i]) { + case '-i': + case '--interactive': + opts.interactive = true; + break; + case '-c': + case '--compact': + opts.compact = true; + break; + case '-d': + case '--depth': + opts.depth = parseInt(args[++i], 10); + if (isNaN(opts.depth!)) throw new Error('Usage: snapshot -d '); + break; + case '-s': + case '--selector': + opts.selector = args[++i]; + if (!opts.selector) throw new Error('Usage: snapshot -s '); + break; + default: + throw new Error(`Unknown snapshot flag: ${args[i]}`); + } + } + return opts; +} + +/** + * Parse one line of ariaSnapshot output. + * + * Format examples: + * - heading "Test" [level=1] + * - link "Link A": + * - /url: /a + * - textbox "Name" + * - paragraph: Some text + * - combobox "Role": + */ +function parseLine(line: string): ParsedNode | null { + // Match: (indent)(- )(role)( "name")?( [props])?(: inline)? + const match = line.match(/^(\s*)-\s+(\w+)(?:\s+"([^"]*)")?(?:\s+(\[.*?\]))?\s*(?::\s*(.*))?$/); + if (!match) { + // Skip metadata lines like "- /url: /a" + return null; + } + return { + indent: match[1].length, + role: match[2], + name: match[3] ?? null, + props: match[4] || '', + children: match[5]?.trim() || '', + rawLine: line, + }; +} + +/** + * Take an accessibility snapshot and build the ref map. + */ +export async function handleSnapshot( + args: string[], + bm: BrowserManager +): Promise { + const opts = parseSnapshotArgs(args); + const page = bm.getPage(); + + // Get accessibility tree via ariaSnapshot + let rootLocator: Locator; + if (opts.selector) { + rootLocator = page.locator(opts.selector); + const count = await rootLocator.count(); + if (count === 0) throw new Error(`Selector not found: ${opts.selector}`); + } else { + rootLocator = page.locator('body'); + } + + const ariaText = await rootLocator.ariaSnapshot(); + if (!ariaText || ariaText.trim().length === 0) { + bm.setRefMap(new Map()); + return '(no accessible elements found)'; + } + + // Parse the ariaSnapshot output + const lines = ariaText.split('\n'); + const refMap = new Map(); + const output: string[] = []; + let refCounter = 1; + + // Track role+name occurrences for nth() disambiguation + const roleNameCounts = new Map(); + const roleNameSeen = new Map(); + + // First pass: count role+name pairs for disambiguation + for (const line of lines) { + const node = parseLine(line); + if (!node) continue; + const key = `${node.role}:${node.name || ''}`; + roleNameCounts.set(key, (roleNameCounts.get(key) || 0) + 1); + } + + // Second pass: assign refs and build locators + for (const line of lines) { + const node = parseLine(line); + if (!node) continue; + + const depth = Math.floor(node.indent / 2); + const isInteractive = INTERACTIVE_ROLES.has(node.role); + + // Depth filter + if (opts.depth !== undefined && depth > opts.depth) continue; + + // Interactive filter: skip non-interactive but still count for locator indices + if (opts.interactive && !isInteractive) { + // Still track for nth() counts + const key = `${node.role}:${node.name || ''}`; + roleNameSeen.set(key, (roleNameSeen.get(key) || 0) + 1); + continue; + } + + // Compact filter: skip elements with no name and no inline content that aren't interactive + if (opts.compact && !isInteractive && !node.name && !node.children) continue; + + // Assign ref + const ref = `e${refCounter++}`; + const indent = ' '.repeat(depth); + + // Build Playwright locator + const key = `${node.role}:${node.name || ''}`; + const seenIndex = roleNameSeen.get(key) || 0; + roleNameSeen.set(key, seenIndex + 1); + const totalCount = roleNameCounts.get(key) || 1; + + let locator: Locator; + if (opts.selector) { + locator = page.locator(opts.selector).getByRole(node.role as any, { + name: node.name || undefined, + }); + } else { + locator = page.getByRole(node.role as any, { + name: node.name || undefined, + }); + } + + // Disambiguate with nth() if multiple elements share role+name + if (totalCount > 1) { + locator = locator.nth(seenIndex); + } + + refMap.set(ref, locator); + + // Format output line + let outputLine = `${indent}@${ref} [${node.role}]`; + if (node.name) outputLine += ` "${node.name}"`; + if (node.props) outputLine += ` ${node.props}`; + if (node.children) outputLine += `: ${node.children}`; + + output.push(outputLine); + } + + // Store ref map on BrowserManager + bm.setRefMap(refMap); + + if (output.length === 0) { + return '(no interactive elements found)'; + } + + return output.join('\n'); +} diff --git a/src/write-commands.ts b/browse/src/write-commands.ts similarity index 75% rename from src/write-commands.ts rename to browse/src/write-commands.ts index ad77ba02..e1c91942 100644 --- a/src/write-commands.ts +++ b/browse/src/write-commands.ts @@ -41,7 +41,12 @@ export async function handleWriteCommand( case 'click': { const selector = args[0]; if (!selector) throw new Error('Usage: browse click '); - await page.click(selector, { timeout: 5000 }); + const resolved = bm.resolveRef(selector); + if ('locator' in resolved) { + await resolved.locator.click({ timeout: 5000 }); + } else { + await page.click(resolved.selector, { timeout: 5000 }); + } // Wait briefly for any navigation/DOM update await page.waitForLoadState('domcontentloaded').catch(() => {}); return `Clicked ${selector} → now at ${page.url()}`; @@ -51,7 +56,12 @@ export async function handleWriteCommand( const [selector, ...valueParts] = args; const value = valueParts.join(' '); if (!selector || !value) throw new Error('Usage: browse fill '); - await page.fill(selector, value, { timeout: 5000 }); + const resolved = bm.resolveRef(selector); + if ('locator' in resolved) { + await resolved.locator.fill(value, { timeout: 5000 }); + } else { + await page.fill(resolved.selector, value, { timeout: 5000 }); + } return `Filled ${selector}`; } @@ -59,14 +69,24 @@ export async function handleWriteCommand( const [selector, ...valueParts] = args; const value = valueParts.join(' '); if (!selector || !value) throw new Error('Usage: browse select '); - await page.selectOption(selector, value, { timeout: 5000 }); + const resolved = bm.resolveRef(selector); + if ('locator' in resolved) { + await resolved.locator.selectOption(value, { timeout: 5000 }); + } else { + await page.selectOption(resolved.selector, value, { timeout: 5000 }); + } return `Selected "${value}" in ${selector}`; } case 'hover': { const selector = args[0]; if (!selector) throw new Error('Usage: browse hover '); - await page.hover(selector, { timeout: 5000 }); + const resolved = bm.resolveRef(selector); + if ('locator' in resolved) { + await resolved.locator.hover({ timeout: 5000 }); + } else { + await page.hover(resolved.selector, { timeout: 5000 }); + } return `Hovered ${selector}`; } @@ -87,7 +107,12 @@ export async function handleWriteCommand( case 'scroll': { const selector = args[0]; if (selector) { - await page.locator(selector).scrollIntoViewIfNeeded({ timeout: 5000 }); + const resolved = bm.resolveRef(selector); + if ('locator' in resolved) { + await resolved.locator.scrollIntoViewIfNeeded({ timeout: 5000 }); + } else { + await page.locator(resolved.selector).scrollIntoViewIfNeeded({ timeout: 5000 }); + } return `Scrolled ${selector} into view`; } await page.evaluate(() => window.scrollTo(0, document.body.scrollHeight)); @@ -98,7 +123,12 @@ export async function handleWriteCommand( const selector = args[0]; if (!selector) throw new Error('Usage: browse wait '); const timeout = args[1] ? parseInt(args[1], 10) : 15000; - await page.waitForSelector(selector, { timeout }); + const resolved = bm.resolveRef(selector); + if ('locator' in resolved) { + await resolved.locator.waitFor({ state: 'visible', timeout }); + } else { + await page.waitForSelector(resolved.selector, { timeout }); + } return `Element ${selector} appeared`; } diff --git a/test/commands.test.ts b/browse/test/commands.test.ts similarity index 100% rename from test/commands.test.ts rename to browse/test/commands.test.ts diff --git a/test/fixtures/basic.html b/browse/test/fixtures/basic.html similarity index 100% rename from test/fixtures/basic.html rename to browse/test/fixtures/basic.html diff --git a/test/fixtures/forms.html b/browse/test/fixtures/forms.html similarity index 100% rename from test/fixtures/forms.html rename to browse/test/fixtures/forms.html diff --git a/test/fixtures/responsive.html b/browse/test/fixtures/responsive.html similarity index 100% rename from test/fixtures/responsive.html rename to browse/test/fixtures/responsive.html diff --git a/browse/test/fixtures/snapshot.html b/browse/test/fixtures/snapshot.html new file mode 100644 index 00000000..3753202d --- /dev/null +++ b/browse/test/fixtures/snapshot.html @@ -0,0 +1,55 @@ + + + + + Snapshot Test Page + + + +

Snapshot Test

+

Subheading

+ + + +
+

Form Section

+
+ + + + + + + +
+
+ +
+
+ +
+
+ +

Some paragraph text that is not interactive.

+ + + + diff --git a/test/fixtures/spa.html b/browse/test/fixtures/spa.html similarity index 100% rename from test/fixtures/spa.html rename to browse/test/fixtures/spa.html diff --git a/browse/test/snapshot.test.ts b/browse/test/snapshot.test.ts new file mode 100644 index 00000000..846c82bb --- /dev/null +++ b/browse/test/snapshot.test.ts @@ -0,0 +1,201 @@ +/** + * Snapshot command tests + * + * Tests: accessibility tree snapshots, ref-based element selection, + * ref invalidation on navigation, and ref resolution in commands. + */ + +import { describe, test, expect, beforeAll, afterAll } from 'bun:test'; +import { startTestServer } from './test-server'; +import { BrowserManager } from '../src/browser-manager'; +import { handleReadCommand } from '../src/read-commands'; +import { handleWriteCommand } from '../src/write-commands'; +import { handleMetaCommand } from '../src/meta-commands'; + +let testServer: ReturnType; +let bm: BrowserManager; +let baseUrl: string; +const shutdown = async () => {}; + +beforeAll(async () => { + testServer = startTestServer(0); + baseUrl = testServer.url; + + bm = new BrowserManager(); + await bm.launch(); +}); + +afterAll(() => { + try { testServer.server.stop(); } catch {} + setTimeout(() => process.exit(0), 500); +}); + +// ─── Snapshot Output ──────────────────────────────────────────── + +describe('Snapshot', () => { + test('snapshot returns accessibility tree with refs', async () => { + await handleWriteCommand('goto', [baseUrl + '/snapshot.html'], bm); + const result = await handleMetaCommand('snapshot', [], bm, shutdown); + expect(result).toContain('@e'); + expect(result).toContain('[heading]'); + expect(result).toContain('"Snapshot Test"'); + expect(result).toContain('[button]'); + expect(result).toContain('[link]'); + }); + + test('snapshot -i returns only interactive elements', async () => { + await handleWriteCommand('goto', [baseUrl + '/snapshot.html'], bm); + const result = await handleMetaCommand('snapshot', ['-i'], bm, shutdown); + expect(result).toContain('[button]'); + expect(result).toContain('[link]'); + expect(result).toContain('[textbox]'); + // Should NOT contain non-interactive roles like heading or paragraph + expect(result).not.toContain('[heading]'); + }); + + test('snapshot -c returns compact output', async () => { + await handleWriteCommand('goto', [baseUrl + '/snapshot.html'], bm); + const full = await handleMetaCommand('snapshot', [], bm, shutdown); + const compact = await handleMetaCommand('snapshot', ['-c'], bm, shutdown); + // Compact should have fewer lines (empty structural elements removed) + const fullLines = full.split('\n').length; + const compactLines = compact.split('\n').length; + expect(compactLines).toBeLessThanOrEqual(fullLines); + }); + + test('snapshot -d 2 limits depth', async () => { + await handleWriteCommand('goto', [baseUrl + '/snapshot.html'], bm); + const shallow = await handleMetaCommand('snapshot', ['-d', '2'], bm, shutdown); + const deep = await handleMetaCommand('snapshot', [], bm, shutdown); + // Shallow should have fewer or equal lines + expect(shallow.split('\n').length).toBeLessThanOrEqual(deep.split('\n').length); + }); + + test('snapshot -s "#main" scopes to selector', async () => { + await handleWriteCommand('goto', [baseUrl + '/snapshot.html'], bm); + const scoped = await handleMetaCommand('snapshot', ['-s', '#main'], bm, shutdown); + // Should contain elements inside #main + expect(scoped).toContain('[button]'); + expect(scoped).toContain('"Submit"'); + // Should NOT contain elements outside #main (like nav links) + expect(scoped).not.toContain('"Internal Link"'); + }); + + test('snapshot on page with no interactive elements', async () => { + // Navigate to about:blank which has minimal content + await handleWriteCommand('goto', [baseUrl + '/basic.html'], bm); + const result = await handleMetaCommand('snapshot', ['-i'], bm, shutdown); + // basic.html has links, so this should find those + expect(result).toContain('[link]'); + }); + + test('second snapshot generates fresh refs', async () => { + await handleWriteCommand('goto', [baseUrl + '/snapshot.html'], bm); + const snap1 = await handleMetaCommand('snapshot', [], bm, shutdown); + const snap2 = await handleMetaCommand('snapshot', [], bm, shutdown); + // Both should have @e1 (refs restart from 1) + expect(snap1).toContain('@e1'); + expect(snap2).toContain('@e1'); + }); +}); + +// ─── Ref-Based Interaction ────────────────────────────────────── + +describe('Ref resolution', () => { + test('click @ref works after snapshot', async () => { + await handleWriteCommand('goto', [baseUrl + '/snapshot.html'], bm); + const snap = await handleMetaCommand('snapshot', ['-i'], bm, shutdown); + // Find a button ref + const buttonLine = snap.split('\n').find(l => l.includes('[button]') && l.includes('"Submit"')); + expect(buttonLine).toBeDefined(); + const refMatch = buttonLine!.match(/@(e\d+)/); + expect(refMatch).toBeDefined(); + const ref = `@${refMatch![1]}`; + const result = await handleWriteCommand('click', [ref], bm); + expect(result).toContain('Clicked'); + }); + + test('fill @ref works after snapshot', async () => { + await handleWriteCommand('goto', [baseUrl + '/snapshot.html'], bm); + const snap = await handleMetaCommand('snapshot', ['-i'], bm, shutdown); + // Find a textbox ref (Username) + const textboxLine = snap.split('\n').find(l => l.includes('[textbox]') && l.includes('"Username"')); + expect(textboxLine).toBeDefined(); + const refMatch = textboxLine!.match(/@(e\d+)/); + expect(refMatch).toBeDefined(); + const ref = `@${refMatch![1]}`; + const result = await handleWriteCommand('fill', [ref, 'testuser'], bm); + expect(result).toContain('Filled'); + }); + + test('hover @ref works after snapshot', async () => { + await handleWriteCommand('goto', [baseUrl + '/snapshot.html'], bm); + const snap = await handleMetaCommand('snapshot', ['-i'], bm, shutdown); + const linkLine = snap.split('\n').find(l => l.includes('[link]')); + expect(linkLine).toBeDefined(); + const refMatch = linkLine!.match(/@(e\d+)/); + const ref = `@${refMatch![1]}`; + const result = await handleWriteCommand('hover', [ref], bm); + expect(result).toContain('Hovered'); + }); + + test('html @ref returns innerHTML', async () => { + await handleWriteCommand('goto', [baseUrl + '/snapshot.html'], bm); + const snap = await handleMetaCommand('snapshot', [], bm, shutdown); + // Find a heading ref + const headingLine = snap.split('\n').find(l => l.includes('[heading]') && l.includes('"Snapshot Test"')); + expect(headingLine).toBeDefined(); + const refMatch = headingLine!.match(/@(e\d+)/); + const ref = `@${refMatch![1]}`; + const result = await handleReadCommand('html', [ref], bm); + expect(result).toContain('Snapshot Test'); + }); + + test('css @ref returns computed CSS', async () => { + await handleWriteCommand('goto', [baseUrl + '/snapshot.html'], bm); + const snap = await handleMetaCommand('snapshot', [], bm, shutdown); + const headingLine = snap.split('\n').find(l => l.includes('[heading]') && l.includes('"Snapshot Test"')); + const refMatch = headingLine!.match(/@(e\d+)/); + const ref = `@${refMatch![1]}`; + const result = await handleReadCommand('css', [ref, 'font-family'], bm); + expect(result).toBeTruthy(); + }); + + test('attrs @ref returns element attributes', async () => { + await handleWriteCommand('goto', [baseUrl + '/snapshot.html'], bm); + const snap = await handleMetaCommand('snapshot', ['-i'], bm, shutdown); + const textboxLine = snap.split('\n').find(l => l.includes('[textbox]') && l.includes('"Username"')); + const refMatch = textboxLine!.match(/@(e\d+)/); + const ref = `@${refMatch![1]}`; + const result = await handleReadCommand('attrs', [ref], bm); + expect(result).toContain('id'); + }); +}); + +// ─── Ref Invalidation ─────────────────────────────────────────── + +describe('Ref invalidation', () => { + test('stale ref after goto returns clear error', async () => { + await handleWriteCommand('goto', [baseUrl + '/snapshot.html'], bm); + await handleMetaCommand('snapshot', ['-i'], bm, shutdown); + // Navigate away — should invalidate refs + await handleWriteCommand('goto', [baseUrl + '/basic.html'], bm); + // Try to use old ref + try { + await handleWriteCommand('click', ['@e1'], bm); + expect(true).toBe(false); // Should not reach here + } catch (err: any) { + expect(err.message).toContain('not found'); + expect(err.message).toContain('snapshot'); + } + }); + + test('refs cleared on page navigation', async () => { + await handleWriteCommand('goto', [baseUrl + '/snapshot.html'], bm); + await handleMetaCommand('snapshot', ['-i'], bm, shutdown); + expect(bm.getRefCount()).toBeGreaterThan(0); + // Navigate + await handleWriteCommand('goto', [baseUrl + '/basic.html'], bm); + expect(bm.getRefCount()).toBe(0); + }); +}); diff --git a/test/test-server.ts b/browse/test/test-server.ts similarity index 100% rename from test/test-server.ts rename to browse/test/test-server.ts diff --git a/package.json b/package.json index 9343986e..f8454a16 100644 --- a/package.json +++ b/package.json @@ -1,18 +1,18 @@ { - "name": "gstack-browse", - "version": "0.1.0", - "description": "Fast headless browser for Claude Code. Persistent Chromium daemon, ~100ms commands, zero MCP overhead.", + "name": "gstack", + "version": "0.2.0", + "description": "Garry's Stack — Claude Code skills + fast headless browser. One repo, one install, entire AI engineering workflow.", "license": "MIT", "type": "module", "bin": { - "browse": "./dist/browse" + "browse": "./browse/dist/browse" }, "scripts": { - "build": "bun build --compile src/cli.ts --outfile dist/browse", - "dev": "bun run src/cli.ts", - "server": "bun run src/server.ts", + "build": "bun build --compile browse/src/cli.ts --outfile browse/dist/browse", + "dev": "bun run browse/src/cli.ts", + "server": "bun run browse/src/server.ts", "test": "bun test", - "start": "bun run src/server.ts" + "start": "bun run browse/src/server.ts" }, "dependencies": { "playwright": "^1.58.2", diff --git a/.claude/skills/plan-exit-review/SKILL.md b/plan-exit-review/SKILL.md similarity index 100% rename from .claude/skills/plan-exit-review/SKILL.md rename to plan-exit-review/SKILL.md diff --git a/.claude/skills/plan-mega-review/SKILL.md b/plan-mega-review/SKILL.md similarity index 100% rename from .claude/skills/plan-mega-review/SKILL.md rename to plan-mega-review/SKILL.md diff --git a/.claude/skills/retro/SKILL.md b/retro/SKILL.md similarity index 100% rename from .claude/skills/retro/SKILL.md rename to retro/SKILL.md diff --git a/.claude/skills/review/SKILL.md b/review/SKILL.md similarity index 100% rename from .claude/skills/review/SKILL.md rename to review/SKILL.md diff --git a/.claude/skills/review/checklist.md b/review/checklist.md similarity index 100% rename from .claude/skills/review/checklist.md rename to review/checklist.md diff --git a/setup b/setup new file mode 100755 index 00000000..3b1f34e6 --- /dev/null +++ b/setup @@ -0,0 +1,40 @@ +#!/usr/bin/env bash +# gstack setup — build browser binary + register all skills with Claude Code +set -e + +GSTACK_DIR="$(cd "$(dirname "$0")" && pwd)" +SKILLS_DIR="$(dirname "$GSTACK_DIR")" + +# 1. Build browse binary if needed +if [ ! -x "$GSTACK_DIR/browse/dist/browse" ]; then + echo "Building browse binary..." + cd "$GSTACK_DIR" && bun install && bun run build +fi + +# 2. Only create skill symlinks if we're inside a .claude/skills directory +SKILLS_BASENAME="$(basename "$SKILLS_DIR")" +if [ "$SKILLS_BASENAME" = "skills" ]; then + linked=() + for skill_dir in "$GSTACK_DIR"/*/; do + if [ -f "$skill_dir/SKILL.md" ]; then + skill_name="$(basename "$skill_dir")" + # Skip browse (handled by root SKILL.md) and node_modules + [ "$skill_name" = "browse" ] || [ "$skill_name" = "node_modules" ] && continue + target="$SKILLS_DIR/$skill_name" + if [ ! -e "$target" ]; then + ln -s "$skill_dir" "$target" + linked+=("$skill_name") + fi + fi + done + + echo "gstack ready." + echo " browse: $GSTACK_DIR/browse/dist/browse" + if [ ${#linked[@]} -gt 0 ]; then + echo " linked skills: ${linked[*]}" + fi +else + echo "gstack ready." + echo " browse: $GSTACK_DIR/browse/dist/browse" + echo " (skipped skill symlinks — not inside .claude/skills/)" +fi diff --git a/.claude/skills/ship/SKILL.md b/ship/SKILL.md similarity index 100% rename from .claude/skills/ship/SKILL.md rename to ship/SKILL.md