diff --git a/.agents/skills/gstack-connect-chrome/SKILL.md b/.agents/skills/gstack-connect-chrome/SKILL.md new file mode 100644 index 00000000..b1dfc989 --- /dev/null +++ b/.agents/skills/gstack-connect-chrome/SKILL.md @@ -0,0 +1,411 @@ +--- +name: connect-chrome +description: | + Launch real Chrome controlled by gstack with the Side Panel extension auto-loaded. + One command: connects Claude to a visible Chrome window where you can watch every + action in real time. The extension shows a live activity feed in the Side Panel. + Use when asked to "connect chrome", "open chrome", "real browser", "launch chrome", + "side panel", or "control my browser". +--- + + + +## Preamble (run first) + +```bash +_ROOT=$(git rev-parse --show-toplevel 2>/dev/null) +GSTACK_ROOT="$HOME/.codex/skills/gstack" +[ -n "$_ROOT" ] && [ -d "$_ROOT/.agents/skills/gstack" ] && GSTACK_ROOT="$_ROOT/.agents/skills/gstack" +GSTACK_BIN="$GSTACK_ROOT/bin" +GSTACK_BROWSE="$GSTACK_ROOT/browse/dist" +_UPD=$($GSTACK_BIN/gstack-update-check 2>/dev/null || .agents/skills/gstack/bin/gstack-update-check 2>/dev/null || true) +[ -n "$_UPD" ] && echo "$_UPD" || true +mkdir -p ~/.gstack/sessions +touch ~/.gstack/sessions/"$PPID" +_SESSIONS=$(find ~/.gstack/sessions -mmin -120 -type f 2>/dev/null | wc -l | tr -d ' ') +find ~/.gstack/sessions -mmin +120 -type f -delete 2>/dev/null || true +_CONTRIB=$($GSTACK_BIN/gstack-config get gstack_contributor 2>/dev/null || true) +_PROACTIVE=$($GSTACK_BIN/gstack-config get proactive 2>/dev/null || echo "true") +_PROACTIVE_PROMPTED=$([ -f ~/.gstack/.proactive-prompted ] && echo "yes" || echo "no") +_BRANCH=$(git branch --show-current 2>/dev/null || echo "unknown") +echo "BRANCH: $_BRANCH" +echo "PROACTIVE: $_PROACTIVE" +echo "PROACTIVE_PROMPTED: $_PROACTIVE_PROMPTED" +source <($GSTACK_BIN/gstack-repo-mode 2>/dev/null) || true +REPO_MODE=${REPO_MODE:-unknown} +echo "REPO_MODE: $REPO_MODE" +_LAKE_SEEN=$([ -f ~/.gstack/.completeness-intro-seen ] && echo "yes" || echo "no") +echo "LAKE_INTRO: $_LAKE_SEEN" +_TEL=$($GSTACK_BIN/gstack-config get telemetry 2>/dev/null || true) +_TEL_PROMPTED=$([ -f ~/.gstack/.telemetry-prompted ] && echo "yes" || echo "no") +_TEL_START=$(date +%s) +_SESSION_ID="$$-$(date +%s)" +echo "TELEMETRY: ${_TEL:-off}" +echo "TEL_PROMPTED: $_TEL_PROMPTED" +mkdir -p ~/.gstack/analytics +echo '{"skill":"connect-chrome","ts":"'$(date -u +%Y-%m-%dT%H:%M:%SZ)'","repo":"'$(basename "$(git rev-parse --show-toplevel 2>/dev/null)" 2>/dev/null || echo "unknown")'"}' >> ~/.gstack/analytics/skill-usage.jsonl 2>/dev/null || true +# zsh-compatible: use find instead of glob to avoid NOMATCH error +for _PF in $(find ~/.gstack/analytics -maxdepth 1 -name '.pending-*' 2>/dev/null); do [ -f "$_PF" ] && $GSTACK_BIN/gstack-telemetry-log --event-type skill_run --skill _pending_finalize --outcome unknown --session-id "$_SESSION_ID" 2>/dev/null || true; break; done +``` + +If `PROACTIVE` is `"false"`, do not proactively suggest gstack skills AND do not +auto-invoke skills based on conversation context. Only run skills the user explicitly +types (e.g., /qa, /ship). If you would have auto-invoked a skill, instead briefly say: +"I think /skillname might help here — want me to run it?" and wait for confirmation. +The user opted out of proactive behavior. + +If output shows `UPGRADE_AVAILABLE `: read `$GSTACK_ROOT/gstack-upgrade/SKILL.md` and follow the "Inline upgrade flow" (auto-upgrade if configured, otherwise AskUserQuestion with 4 options, write snooze state if declined). If `JUST_UPGRADED `: tell user "Running gstack v{to} (just updated!)" and continue. + +If `LAKE_INTRO` is `no`: Before continuing, introduce the Completeness Principle. +Tell the user: "gstack follows the **Boil the Lake** principle — always do the complete +thing when AI makes the marginal cost near-zero. Read more: https://garryslist.org/posts/boil-the-ocean" +Then offer to open the essay in their default browser: + +```bash +open https://garryslist.org/posts/boil-the-ocean +touch ~/.gstack/.completeness-intro-seen +``` + +Only run `open` if the user says yes. Always run `touch` to mark as seen. This only happens once. + +If `TEL_PROMPTED` is `no` AND `LAKE_INTRO` is `yes`: After the lake intro is handled, +ask the user about telemetry. Use AskUserQuestion: + +> Help gstack get better! Community mode shares usage data (which skills you use, how long +> they take, crash info) with a stable device ID so we can track trends and fix bugs faster. +> No code, file paths, or repo names are ever sent. +> Change anytime with `gstack-config set telemetry off`. + +Options: +- A) Help gstack get better! (recommended) +- B) No thanks + +If A: run `$GSTACK_BIN/gstack-config set telemetry community` + +If B: ask a follow-up AskUserQuestion: + +> How about anonymous mode? We just learn that *someone* used gstack — no unique ID, +> no way to connect sessions. Just a counter that helps us know if anyone's out there. + +Options: +- A) Sure, anonymous is fine +- B) No thanks, fully off + +If B→A: run `$GSTACK_BIN/gstack-config set telemetry anonymous` +If B→B: run `$GSTACK_BIN/gstack-config set telemetry off` + +Always run: +```bash +touch ~/.gstack/.telemetry-prompted +``` + +This only happens once. If `TEL_PROMPTED` is `yes`, skip this entirely. + +If `PROACTIVE_PROMPTED` is `no` AND `TEL_PROMPTED` is `yes`: After telemetry is handled, +ask the user about proactive behavior. Use AskUserQuestion: + +> gstack can proactively figure out when you might need a skill while you work — +> like suggesting /qa when you say "does this work?" or /investigate when you hit +> a bug. We recommend keeping this on — it speeds up every part of your workflow. + +Options: +- A) Keep it on (recommended) +- B) Turn it off — I'll type /commands myself + +If A: run `$GSTACK_BIN/gstack-config set proactive true` +If B: run `$GSTACK_BIN/gstack-config set proactive false` + +Always run: +```bash +touch ~/.gstack/.proactive-prompted +``` + +This only happens once. If `PROACTIVE_PROMPTED` is `yes`, skip this entirely. + +## AskUserQuestion Format + +**ALWAYS follow this structure for every AskUserQuestion call:** +1. **Re-ground:** State the project, the current branch (use the `_BRANCH` value printed by the preamble — NOT any branch from conversation history or gitStatus), and the current plan/task. (1-2 sentences) +2. **Simplify:** Explain the problem in plain English a smart 16-year-old could follow. No raw function names, no internal jargon, no implementation details. Use concrete examples and analogies. Say what it DOES, not what it's called. +3. **Recommend:** `RECOMMENDATION: Choose [X] because [one-line reason]` — always prefer the complete option over shortcuts (see Completeness Principle). Include `Completeness: X/10` for each option. Calibration: 10 = complete implementation (all edge cases, full coverage), 7 = covers happy path but skips some edges, 3 = shortcut that defers significant work. If both options are 8+, pick the higher; if one is ≤5, flag it. +4. **Options:** Lettered options: `A) ... B) ... C) ...` — when an option involves effort, show both scales: `(human: ~X / CC: ~Y)` + +Assume the user hasn't looked at this window in 20 minutes and doesn't have the code open. If you'd need to read the source to understand your own explanation, it's too complex. + +Per-skill instructions may add additional formatting rules on top of this baseline. + +## Completeness Principle — Boil the Lake + +AI makes completeness near-free. Always recommend the complete option over shortcuts — the delta is minutes with CC+gstack. A "lake" (100% coverage, all edge cases) is boilable; an "ocean" (full rewrite, multi-quarter migration) is not. Boil lakes, flag oceans. + +**Effort reference** — always show both scales: + +| Task type | Human team | CC+gstack | Compression | +|-----------|-----------|-----------|-------------| +| Boilerplate | 2 days | 15 min | ~100x | +| Tests | 1 day | 15 min | ~50x | +| Feature | 1 week | 30 min | ~30x | +| Bug fix | 4 hours | 15 min | ~20x | + +Include `Completeness: X/10` for each option (10=all edge cases, 7=happy path, 3=shortcut). + +## Repo Ownership — See Something, Say Something + +`REPO_MODE` controls how to handle issues outside your branch: +- **`solo`** — You own everything. Investigate and offer to fix proactively. +- **`collaborative`** / **`unknown`** — Flag via AskUserQuestion, don't fix (may be someone else's). + +Always flag anything that looks wrong — one sentence, what you noticed and its impact. + +## Search Before Building + +Before building anything unfamiliar, **search first.** See `$GSTACK_ROOT/ETHOS.md`. +- **Layer 1** (tried and true) — don't reinvent. **Layer 2** (new and popular) — scrutinize. **Layer 3** (first principles) — prize above all. + +**Eureka:** When first-principles reasoning contradicts conventional wisdom, name it and log: +```bash +jq -n --arg ts "$(date -u +%Y-%m-%dT%H:%M:%SZ)" --arg skill "SKILL_NAME" --arg branch "$(git branch --show-current 2>/dev/null)" --arg insight "ONE_LINE_SUMMARY" '{ts:$ts,skill:$skill,branch:$branch,insight:$insight}' >> ~/.gstack/analytics/eureka.jsonl 2>/dev/null || true +``` + +## Contributor Mode + +If `_CONTRIB` is `true`: you are in **contributor mode**. At the end of each major workflow step, rate your gstack experience 0-10. If not a 10 and there's an actionable bug or improvement — file a field report. + +**File only:** gstack tooling bugs where the input was reasonable but gstack failed. **Skip:** user app bugs, network errors, auth failures on user's site. + +**To file:** write `~/.gstack/contributor-logs/{slug}.md`: +``` +# {Title} +**What I tried:** {action} | **What happened:** {result} | **Rating:** {0-10} +## Repro +1. {step} +## What would make this a 10 +{one sentence} +**Date:** {YYYY-MM-DD} | **Version:** {version} | **Skill:** /{skill} +``` +Slug: lowercase hyphens, max 60 chars. Skip if exists. Max 3/session. File inline, don't stop. + +## Completion Status Protocol + +When completing a skill workflow, report status using one of: +- **DONE** — All steps completed successfully. Evidence provided for each claim. +- **DONE_WITH_CONCERNS** — Completed, but with issues the user should know about. List each concern. +- **BLOCKED** — Cannot proceed. State what is blocking and what was tried. +- **NEEDS_CONTEXT** — Missing information required to continue. State exactly what you need. + +### Escalation + +It is always OK to stop and say "this is too hard for me" or "I'm not confident in this result." + +Bad work is worse than no work. You will not be penalized for escalating. +- If you have attempted a task 3 times without success, STOP and escalate. +- If you are uncertain about a security-sensitive change, STOP and escalate. +- If the scope of work exceeds what you can verify, STOP and escalate. + +Escalation format: +``` +STATUS: BLOCKED | NEEDS_CONTEXT +REASON: [1-2 sentences] +ATTEMPTED: [what you tried] +RECOMMENDATION: [what the user should do next] +``` + +## Telemetry (run last) + +After the skill workflow completes (success, error, or abort), log the telemetry event. +Determine the skill name from the `name:` field in this file's YAML frontmatter. +Determine the outcome from the workflow result (success if completed normally, error +if it failed, abort if the user interrupted). + +**PLAN MODE EXCEPTION — ALWAYS RUN:** This command writes telemetry to +`~/.gstack/analytics/` (user config directory, not project files). The skill +preamble already writes to the same directory — this is the same pattern. +Skipping this command loses session duration and outcome data. + +Run this bash: + +```bash +_TEL_END=$(date +%s) +_TEL_DUR=$(( _TEL_END - _TEL_START )) +rm -f ~/.gstack/analytics/.pending-"$_SESSION_ID" 2>/dev/null || true +$GSTACK_ROOT/bin/gstack-telemetry-log \ + --skill "SKILL_NAME" --duration "$_TEL_DUR" --outcome "OUTCOME" \ + --used-browse "USED_BROWSE" --session-id "$_SESSION_ID" 2>/dev/null & +``` + +Replace `SKILL_NAME` with the actual skill name from frontmatter, `OUTCOME` with +success/error/abort, and `USED_BROWSE` with true/false based on whether `$B` was used. +If you cannot determine the outcome, use "unknown". This runs in the background and +never blocks the user. + +## Plan Status Footer + +When you are in plan mode and about to call ExitPlanMode: + +1. Check if the plan file already has a `## GSTACK REVIEW REPORT` section. +2. If it DOES — skip (a review skill already wrote a richer report). +3. If it does NOT — run this command: + +\`\`\`bash +$GSTACK_ROOT/bin/gstack-review-read +\`\`\` + +Then write a `## GSTACK REVIEW REPORT` section to the end of the plan file: + +- If the output contains review entries (JSONL lines before `---CONFIG---`): format the + standard report table with runs/status/findings per skill, same format as the review + skills use. +- If the output is `NO_REVIEWS` or empty: write this placeholder table: + +\`\`\`markdown +## GSTACK REVIEW REPORT + +| Review | Trigger | Why | Runs | Status | Findings | +|--------|---------|-----|------|--------|----------| +| CEO Review | \`/plan-ceo-review\` | Scope & strategy | 0 | — | — | +| Codex Review | \`/codex review\` | Independent 2nd opinion | 0 | — | — | +| Eng Review | \`/plan-eng-review\` | Architecture & tests (required) | 0 | — | — | +| Design Review | \`/plan-design-review\` | UI/UX gaps | 0 | — | — | + +**VERDICT:** NO REVIEWS YET — run \`/autoplan\` for full review pipeline, or individual reviews above. +\`\`\` + +**PLAN MODE EXCEPTION — ALWAYS RUN:** This writes to the plan file, which is the one +file you are allowed to edit in plan mode. The plan file review report is part of the +plan's living status. + +# /connect-chrome — Launch Real Chrome with Side Panel + +Connect Claude to a visible Chrome window with the gstack extension auto-loaded. +You see every click, every navigation, every action in real time. + +## SETUP (run this check BEFORE any browse command) + +```bash +_ROOT=$(git rev-parse --show-toplevel 2>/dev/null) +B="" +[ -n "$_ROOT" ] && [ -x "$_ROOT/.agents/skills/gstack/browse/dist/browse" ] && B="$_ROOT/.agents/skills/gstack/browse/dist/browse" +[ -z "$B" ] && B=$GSTACK_BROWSE/browse +if [ -x "$B" ]; then + echo "READY: $B" +else + echo "NEEDS_SETUP" +fi +``` + +If `NEEDS_SETUP`: +1. Tell the user: "gstack browse needs a one-time build (~10 seconds). OK to proceed?" Then STOP and wait. +2. Run: `cd && ./setup` +3. If `bun` is not installed: `curl -fsSL https://bun.sh/install | bash` + +## Step 1: Connect + +```bash +$B connect +``` + +This launches your system Chrome via Playwright with: +- A visible window (headed mode, not headless) +- The gstack Chrome extension pre-loaded +- A green shimmer line + "gstack" pill so you know which window is controlled + +If Chrome is already running, the server restarts in headed mode with a fresh +Chrome instance. Your regular Chrome stays untouched. + +After connecting, print the output to the user. + +## Step 2: Verify + +```bash +$B status +``` + +Confirm the output shows `Mode: cdp`. Print the port number — the user may need +it for the Side Panel. + +## Step 3: Guide the user to the Side Panel + +Use AskUserQuestion: + +> Chrome is launched with gstack control. You should see a green shimmer line at the +> top of the Chrome window and a small "gstack" pill in the bottom-right corner. +> +> The Side Panel extension is pre-loaded. To open it: +> 1. Look for the **puzzle piece icon** (Extensions) in Chrome's toolbar +> 2. Click it → find **gstack browse** → click the **pin icon** to pin it +> 3. Click the **gstack icon** in the toolbar +> 4. Click **Open Side Panel** +> +> The Side Panel shows a live feed of every browse command in real time. +> +> **Port:** The browse server is on port {PORT} — the extension auto-detects it +> if you're using the Playwright-controlled Chrome. If the badge stays gray, click +> the gstack icon and enter port {PORT} manually. + +Options: +- A) I can see the Side Panel — let's go! +- B) I can see Chrome but can't find the extension +- C) Something went wrong + +If B: Tell the user: +> The extension should be auto-loaded, but Chrome sometimes doesn't show it +> immediately. Try: +> 1. Type `chrome://extensions` in the address bar +> 2. Look for "gstack browse" — it should be listed and enabled +> 3. If not listed, click "Load unpacked" → navigate to the extension folder +> (press Cmd+Shift+G in the file picker, paste this path): +> `{EXTENSION_PATH}` +> +> Then pin it from the puzzle piece icon and open the Side Panel. + +If C: Run `$B status` and show the output. Check if the server is healthy. + +## Step 4: Demo + +After the user confirms the Side Panel is working, run a quick demo so they +can see the activity feed in action: + +```bash +$B goto https://news.ycombinator.com +``` + +Wait 2 seconds, then: + +```bash +$B snapshot -i +``` + +Tell the user: "Check the Side Panel — you should see the `goto` and `snapshot` +commands appear in the activity feed. Every command Claude runs will show up here +in real time." + +## Step 5: Sidebar chat + +After the activity feed demo, tell the user about the sidebar chat: + +> The Side Panel also has a **chat tab**. Try typing a message like "take a +> snapshot and describe this page." A child Claude instance will execute your +> request in the browser — you'll see the commands appear in the activity feed. +> +> The sidebar agent can navigate pages, click buttons, fill forms, and read +> content. Each task gets up to 5 minutes. It runs in an isolated session, so +> it won't interfere with this Claude Code window. + +## Step 6: What's next + +Tell the user: + +> You're all set! Chrome is under Claude's control with the Side Panel showing +> live activity and a chat sidebar for direct commands. Here's what you can do: +> +> - **Chat in the sidebar** — type natural language instructions and Claude +> executes them in the browser +> - **Run any browse command** — `$B goto`, `$B click`, `$B snapshot` — and +> watch it happen in Chrome + the Side Panel +> - **Use /qa or /design-review** — they'll run in the visible Chrome window +> instead of headless. No cookie import needed. +> - **`$B focus`** — bring Chrome to the foreground anytime +> - **`$B disconnect`** — return to headless mode when done + +Then proceed with whatever the user asked to do. If they didn't specify a task, +ask what they'd like to test or browse. diff --git a/BROWSER.md b/BROWSER.md index 086d2278..8f626948 100644 --- a/BROWSER.md +++ b/BROWSER.md @@ -18,6 +18,7 @@ This document covers the command reference and internals of gstack's headless br | Cookies | `cookie-import`, `cookie-import-browser` | Import cookies from file or real browser | | Multi-step | `chain` (JSON from stdin) | Batch commands in one call | | Handoff | `handoff [reason]`, `resume` | Switch to visible Chrome for user takeover | +| Real browser | `connect`, `disconnect`, `focus` | Control real Chrome, visible window | All selector arguments accept CSS selectors, `@e` refs after `snapshot`, or `@c` refs after `snapshot -C`. 50+ commands total plus cookie import. @@ -70,6 +71,7 @@ browse/ │ ├── cookie-import-browser.ts # Decrypt + import cookies from real Chromium browsers │ ├── cookie-picker-routes.ts # HTTP routes for interactive cookie picker UI │ ├── cookie-picker-ui.ts # Self-contained HTML/CSS/JS for cookie picker +│ ├── activity.ts # Activity streaming (SSE) for Chrome extension │ └── buffers.ts # CircularBuffer + console/network/dialog capture ├── test/ # Integration tests + HTML fixtures └── dist/ @@ -124,6 +126,125 @@ The server hooks into Playwright's `page.on('console')`, `page.on('response')`, The `console`, `network`, and `dialog` commands read from the in-memory buffers, not disk. +### Real browser mode (`connect`) + +Instead of headless Chromium, `connect` launches your real Chrome as a headed window controlled by Playwright. You see everything Claude does in real time. + +```bash +$B connect # launch real Chrome, headed +$B goto https://app.com # navigates in the visible window +$B snapshot -i # refs from the real page +$B click @e3 # clicks in the real window +$B focus # bring Chrome window to foreground (macOS) +$B status # shows Mode: cdp +$B disconnect # back to headless mode +``` + +The window has a subtle green shimmer line at the top edge and a floating "gstack" pill in the bottom-right corner so you always know which Chrome window is being controlled. + +**How it works:** Playwright's `channel: 'chrome'` launches your system Chrome binary via a native pipe protocol — not CDP WebSocket. All existing browse commands work unchanged because they go through Playwright's abstraction layer. + +**When to use it:** +- QA testing where you want to watch Claude click through your app +- Design review where you need to see exactly what Claude sees +- Debugging where headless behavior differs from real Chrome +- Demos where you're sharing your screen + +**Commands:** + +| Command | What it does | +|---------|-------------| +| `connect` | Launch real Chrome, restart server in headed mode | +| `disconnect` | Close real Chrome, restart in headless mode | +| `focus` | Bring Chrome to foreground (macOS). `focus @e3` also scrolls element into view | +| `status` | Shows `Mode: cdp` when connected, `Mode: launched` when headless | + +**CDP-aware skills:** When in real-browser mode, `/qa` and `/design-review` automatically skip cookie import prompts and headless workarounds. + +### Chrome extension (Side Panel) + +A Chrome extension that shows a live activity feed of browse commands in a Side Panel, plus @ref overlays on the page. + +#### Automatic install (recommended) + +When you run `$B connect`, the extension **auto-loads** into the Playwright-controlled Chrome window. No manual steps needed — the Side Panel is immediately available. + +```bash +$B connect # launches Chrome with extension pre-loaded +# Click the gstack icon in toolbar → Open Side Panel +``` + +The port is auto-configured. You're done. + +#### Manual install (for your regular Chrome) + +If you want the extension in your everyday Chrome (not the Playwright-controlled one), run: + +```bash +bin/gstack-extension # opens chrome://extensions, copies path to clipboard +``` + +Or do it manually: + +1. **Go to `chrome://extensions`** in Chrome's address bar +2. **Toggle "Developer mode" ON** (top-right corner) +3. **Click "Load unpacked"** — a file picker opens +4. **Navigate to the extension folder:** Press **Cmd+Shift+G** in the file picker to open "Go to folder", then paste one of these paths: + - Global install: `~/.claude/skills/gstack/extension` + - Dev/source: `/extension` + + Press Enter, then click **Select**. + + (Tip: macOS hides folders starting with `.` — press **Cmd+Shift+.** in the file picker to reveal them if you prefer to navigate manually.) + +5. **Pin it:** Click the puzzle piece icon (Extensions) in the toolbar → pin "gstack browse" +6. **Set the port:** Click the gstack icon → enter the port from `$B status` or `.gstack/browse.json` +7. **Open Side Panel:** Click the gstack icon → "Open Side Panel" + +#### What you get + +| Feature | What it does | +|---------|-------------| +| **Toolbar badge** | Green dot when the browse server is reachable, gray when not | +| **Side Panel** | Live scrolling feed of every browse command — shows command name, args, duration, status (success/error) | +| **Refs tab** | After `$B snapshot`, shows the current @ref list (role + name) | +| **@ref overlays** | Floating panel on the page showing current refs | +| **Connection pill** | Small "gstack" pill in the bottom-right corner of every page when connected | + +#### Troubleshooting + +- **Badge stays gray:** Check that the port is correct. The browse server may have restarted on a different port — re-run `$B status` and update the port in the popup. +- **Side Panel is empty:** The feed only shows activity after the extension connects. Run a browse command (`$B snapshot`) to see it appear. +- **Extension disappeared after Chrome update:** Sideloaded extensions persist across updates. If it's gone, reload it from Step 3. + +### Sidebar agent + +The Chrome side panel includes a chat interface. Type a message and a child Claude instance executes it in the browser. The sidebar agent has access to `Bash`, `Read`, `Glob`, and `Grep` tools (same as Claude Code, minus `Edit` and `Write` ... read-only by design). + +**How it works:** + +1. You type a message in the side panel chat +2. The extension POSTs to the local browse server (`/sidebar-command`) +3. The server queues the message and the sidebar-agent process spawns `claude -p` with your message + the current page context +4. Claude executes browse commands via Bash (`$B snapshot`, `$B click @e3`, etc.) +5. Progress streams back to the side panel in real time + +**What you can do:** +- "Take a snapshot and describe what you see" +- "Click the Login button, fill in test@example.com / password123, and submit" +- "Go through every row in this table and extract the names and emails" +- "Navigate to Settings > Account and screenshot it" + +**Timeout:** Each task gets up to 5 minutes. Multi-page workflows (navigating a directory, filling forms across pages) work within this window. If a task times out, the side panel shows an error and you can retry or break it into smaller steps. + +**Session isolation:** Each sidebar session runs in its own git worktree. The sidebar agent won't interfere with your main Claude Code session. + +**Authentication:** The sidebar agent uses the same browser session as headed mode. Two options: +1. Log in manually in the headed browser ... your session persists for the sidebar agent +2. Import cookies from your real Chrome via `/setup-browser-cookies` + +**Random delays:** If you need the agent to pause between actions (e.g., to avoid rate limits), use `sleep` in bash or `$B wait `. + ### User handoff When the headless browser can't proceed (CAPTCHA, MFA, complex auth), `handoff` opens a visible Chrome window at the exact same page with all cookies, localStorage, and tabs preserved. The user solves the problem manually, then `resume` returns control to the agent with a fresh snapshot. @@ -171,6 +292,8 @@ No port collisions. No shared state. Each project is fully isolated. | `BROWSE_IDLE_TIMEOUT` | 1800000 (30 min) | Idle shutdown timeout in ms | | `BROWSE_STATE_FILE` | `.gstack/browse.json` | Path to state file (CLI passes to server) | | `BROWSE_SERVER_SCRIPT` | auto-detected | Path to server.ts | +| `BROWSE_CDP_URL` | (none) | Set to `channel:chrome` for real browser mode | +| `BROWSE_CDP_PORT` | 0 | CDP port (used internally) | ### Performance @@ -250,6 +373,7 @@ Tests spin up a local HTTP server (`browse/test/test-server.ts`) serving HTML fi | `browse/src/cookie-import-browser.ts` | Decrypt Chromium cookies from macOS and Linux browser profiles using platform-specific safe-storage key lookup. Auto-detects installed browsers. | | `browse/src/cookie-picker-routes.ts` | HTTP routes for `/cookie-picker/*` — browser list, domain search, import, remove. | | `browse/src/cookie-picker-ui.ts` | Self-contained HTML generator for the interactive cookie picker (dark theme, no frameworks). | +| `browse/src/activity.ts` | Activity streaming — `ActivityEntry` type, `CircularBuffer`, privacy filtering, SSE subscriber management. | | `browse/src/buffers.ts` | `CircularBuffer` (O(1) ring buffer) + console/network/dialog capture with async disk flush. | ### Deploying to the active skill diff --git a/CHANGELOG.md b/CHANGELOG.md index 68199eb1..2f989493 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,51 @@ # Changelog +## [0.12.1.0] - 2026-03-26 — Smarter Browsing: Network Idle, State Persistence, Iframes + +Every click, fill, and select now waits for the page to settle before returning. No more stale snapshots because an XHR was still in-flight. Chain accepts pipe-delimited format for faster multi-step flows. You can save and restore browser sessions (cookies + open tabs). And iframe content is now reachable. + +### Added + +- **Network idle detection.** `click`, `fill`, and `select` auto-wait up to 2s for network requests to settle before returning. Catches XHR/fetch triggered by interactions. Uses Playwright's built-in `waitForLoadState('networkidle')`, not a custom tracker. + +- **`$B state save/load`.** Save your browser session (cookies + open tabs) to a named file, load it back later. Files stored at `.gstack/browse-states/{name}.json` with 0o600 permissions. V1 saves cookies + URLs only (not localStorage, which breaks on load-before-navigate). Load replaces the current session, not merge. + +- **`$B frame` command.** Switch command context into an iframe: `$B frame iframe`, `$B frame --name checkout`, `$B frame --url stripe`, or `$B frame @e5`. All subsequent commands (click, fill, snapshot, etc.) operate inside the iframe. `$B frame main` returns to the main page. Snapshot shows `[Context: iframe src="..."]` header. Detached frames auto-recover. + +- **Chain pipe format.** Chain now accepts `$B chain 'goto url | click @e5 | snapshot -ic'` as a fallback when JSON parsing fails. Pipe-delimited with quote-aware tokenization. + +### Changed + +- **Chain post-loop idle wait.** After executing all commands in a chain, if the last was a write command, chain waits for network idle before returning. + +### Fixed + +- **Iframe ref scoping.** Snapshot ref locators, cursor-interactive scan, and cursor locators now use the frame-aware target instead of always scoping to the main page. +- **Detached frame recovery.** `getActiveFrameOrPage()` checks `isDetached()` and auto-recovers. +- **State load resets frame context.** Loading a saved state clears the active frame reference. +- **elementHandle leak in frame command.** Now properly disposed after getting contentFrame. +- **Upload command frame-aware.** `upload` uses the frame-aware target for file input locators. + +## [0.12.0.0] - 2026-03-26 — Headed Mode + Sidebar Agent + +You can now watch Claude work in a real Chrome window and direct it from a sidebar chat. + +### Added + +- **Headed mode with sidebar agent.** `$B connect` launches a visible Chrome window with the gstack extension. The Side Panel shows a live activity feed of every command AND a chat interface where you type natural language instructions. A child Claude instance executes your requests in the browser ... navigate pages, click buttons, fill forms, extract data. Each task gets up to 5 minutes. + +- **Personal automation.** The sidebar agent handles repetitive browser tasks beyond dev workflows. Browse your kid's school parent portal and add parent contact info to Google Contacts. Fill out vendor onboarding forms. Extract data from dashboards. Log in once in the headed browser or import cookies from your real Chrome with `/setup-browser-cookies`. + +- **Chrome extension.** Toolbar badge (green=connected, gray=not), Side Panel with activity feed + chat + refs tab, @ref overlays on the page, and a connection pill showing which window gstack controls. Auto-loads when you run `$B connect`. + +- **`/connect-chrome` skill.** Guided setup: launches Chrome, verifies the extension, demos the activity feed, and introduces the sidebar chat. + +### Changed + +- **Sidebar agent ungated.** Previously required `--chat` flag. Now always available in headed mode. The sidebar agent has the same security model as Claude Code itself (Bash, Read, Glob, Grep on localhost). + +- **Agent timeout raised to 5 minutes.** Multi-page tasks (navigating directories, filling forms across pages) need more than the previous 2-minute limit. + ## [0.11.21.0] - 2026-03-26 ### Fixed diff --git a/DESIGN.md b/DESIGN.md new file mode 100644 index 00000000..d1f3ce3d --- /dev/null +++ b/DESIGN.md @@ -0,0 +1,86 @@ +# Design System — gstack + +## Product Context +- **What this is:** Community website for gstack — a CLI tool that turns Claude Code into a virtual engineering team +- **Who it's for:** Developers discovering gstack, existing community members +- **Space/industry:** Developer tools (peers: Linear, Raycast, Warp, Zed) +- **Project type:** Community dashboard + marketing site + +## Aesthetic Direction +- **Direction:** Industrial/Utilitarian — function-first, data-dense, monospace as personality font +- **Decoration level:** Intentional — subtle noise/grain texture on surfaces for materiality +- **Mood:** Serious tool built by someone who cares about craft. Warm, not cold. The CLI heritage IS the brand. +- **Reference sites:** formulae.brew.sh (competitor, but ours is live and interactive), Linear (dark + restrained), Warp (warm accents) + +## Typography +- **Display/Hero:** Satoshi (Black 900 / Bold 700) — geometric with warmth, distinctive letterforms (the lowercase 'a' and 'g'). Not Inter, not Geist. Loaded from Fontshare CDN. +- **Body:** DM Sans (Regular 400 / Medium 500 / Semibold 600) — clean, readable, slightly friendlier than geometric display. Loaded from Google Fonts. +- **UI/Labels:** DM Sans (same as body) +- **Data/Tables:** JetBrains Mono (Regular 400 / Medium 500) — the personality font. Supports tabular-nums. Monospace should be prominent, not hidden in code blocks. Loaded from Google Fonts. +- **Code:** JetBrains Mono +- **Loading:** Google Fonts for DM Sans + JetBrains Mono, Fontshare for Satoshi. Use `display=swap`. +- **Scale:** + - Hero: 72px / clamp(40px, 6vw, 72px) + - H1: 48px + - H2: 32px + - H3: 24px + - H4: 18px + - Body: 16px + - Small: 14px + - Caption: 13px + - Micro: 12px + - Nano: 11px (JetBrains Mono labels) + +## Color +- **Approach:** Restrained — amber accent is rare and meaningful. Dashboard data gets the color; chrome stays neutral. +- **Primary (dark mode):** amber-500 #F59E0B — warm, energetic, reads as "terminal cursor" +- **Primary (light mode):** amber-600 #D97706 — darker for contrast against white backgrounds +- **Primary text accent (dark mode):** amber-400 #FBBF24 +- **Primary text accent (light mode):** amber-700 #B45309 +- **Neutrals:** Cool zinc grays + - zinc-50: #FAFAFA (lightest) + - zinc-400: #A1A1AA + - zinc-600: #52525B + - zinc-800: #27272A + - Surface (dark): #141414 + - Base (dark): #0C0C0C + - Surface (light): #FFFFFF + - Base (light): #FAFAF9 +- **Semantic:** success #22C55E, warning #F59E0B, error #EF4444, info #3B82F6 +- **Dark mode:** Default. Near-black base (#0C0C0C), surface cards at #141414, borders at #262626. +- **Light mode:** Warm stone base (#FAFAF9), white surface cards, stone borders (#E7E5E4). Amber accent shifts to amber-600 for contrast. + +## Spacing +- **Base unit:** 4px +- **Density:** Comfortable — not cramped (not Bloomberg Terminal), not spacious (not a marketing site) +- **Scale:** 2xs(2px) xs(4px) sm(8px) md(16px) lg(24px) xl(32px) 2xl(48px) 3xl(64px) + +## Layout +- **Approach:** Grid-disciplined for dashboard, editorial hero for landing page +- **Grid:** 12 columns at lg+, 1 column at mobile +- **Max content width:** 1200px (6xl) +- **Border radius:** sm:4px, md:8px, lg:12px, full:9999px + - Cards/panels: lg (12px) + - Buttons/inputs: md (8px) + - Badges/pills: full (9999px) + - Skill bars: sm (4px) + +## Motion +- **Approach:** Minimal-functional — only transitions that aid comprehension. The dashboard's live feed IS the motion. +- **Easing:** enter(ease-out / cubic-bezier(0.16,1,0.3,1)) exit(ease-in) move(ease-in-out) +- **Duration:** micro(50-100ms) short(150ms) medium(250ms) long(400ms) +- **Animated elements:** live feed dot pulse (2s infinite), skill bar fill (600ms ease-out), hover states (150ms) + +## Grain Texture +Apply a subtle noise overlay to the entire page for materiality: +- Dark mode: opacity 0.03 +- Light mode: opacity 0.02 +- Use SVG feTurbulence filter as a CSS background-image on body::after +- pointer-events: none, position: fixed, z-index: 9999 + +## Decisions Log +| Date | Decision | Rationale | +|------|----------|-----------| +| 2026-03-21 | Initial design system | Created by /design-consultation. Industrial aesthetic, warm amber accent, Satoshi + DM Sans + JetBrains Mono. | +| 2026-03-21 | Light mode amber-600 | amber-500 too bright/washed against white; amber-700 too brown/umber. amber-600 is the sweet spot. | +| 2026-03-21 | Grain texture | Adds materiality to flat dark surfaces. Prevents the "generic SaaS template" sameness. | diff --git a/README.md b/README.md index fd81d78c..aad62290 100644 --- a/README.md +++ b/README.md @@ -157,7 +157,7 @@ Each skill feeds into the next. `/office-hours` writes a design doc that `/plan- | `/benchmark` | **Performance Engineer** | Baseline page load times, Core Web Vitals, and resource sizes. Compare before/after on every PR. | | `/document-release` | **Technical Writer** | Update all project docs to match what you just shipped. Catches stale READMEs automatically. | | `/retro` | **Eng Manager** | Team-aware weekly retro. Per-person breakdowns, shipping streaks, test health trends, growth opportunities. `/retro global` runs across all your projects and AI tools (Claude Code, Codex, Gemini). | -| `/browse` | **QA Engineer** | Real Chromium browser, real clicks, real screenshots. ~100ms per command. | +| `/browse` | **QA Engineer** | Give the agent eyes. Real Chromium browser, real clicks, real screenshots. ~100ms per command. `$B connect` launches your real Chrome as a headed window — watch every action live. | | `/setup-browser-cookies` | **Session Manager** | Import cookies from your real browser (Chrome, Arc, Brave, Edge) into the headless session. Test authenticated pages. | | `/autoplan` | **Review Pipeline** | One command, fully reviewed plan. Runs CEO → design → eng review automatically with encoded decision principles. Surfaces only taste decisions for your approval. | @@ -179,7 +179,37 @@ Each skill feeds into the next. `/office-hours` writes a design doc that `/plan- gstack works well with one sprint. It gets interesting with ten running at once. -[Conductor](https://conductor.build) runs multiple Claude Code sessions in parallel — each in its own isolated workspace. One session on `/office-hours`, another on `/review`, a third implementing a feature, a fourth running `/qa`. All at the same time. The sprint structure is what makes parallelism work — without a process, ten agents is ten sources of chaos. With a process, each agent knows exactly what to do and when to stop. +**Design is at the heart.** `/design-consultation` doesn't just pick fonts. It researches what's out there in your space, proposes safe choices AND creative risks, generates realistic mockups of your actual product, and writes `DESIGN.md` — and then `/design-review` and `/plan-eng-review` read what you chose. Design decisions flow through the whole system. + +**`/qa` was a massive unlock.** It let me go from 6 to 12 parallel workers. Claude Code saying *"I SEE THE ISSUE"* and then actually fixing it, generating a regression test, and verifying the fix — that changed how I work. The agent has eyes now. + +**Smart review routing.** Just like at a well-run startup: CEO doesn't have to look at infra bug fixes, design review isn't needed for backend changes. gstack tracks what reviews are run, figures out what's appropriate, and just does the smart thing. The Review Readiness Dashboard tells you where you stand before you ship. + +**Test everything.** `/ship` bootstraps test frameworks from scratch if your project doesn't have one. Every `/ship` run produces a coverage audit. Every `/qa` bug fix generates a regression test. 100% test coverage is the goal — tests make vibe coding safe instead of yolo coding. + +**`/document-release` is the engineer you never had.** It reads every doc file in your project, cross-references the diff, and updates everything that drifted. README, ARCHITECTURE, CONTRIBUTING, CLAUDE.md, TODOS — all kept current automatically. And now `/ship` auto-invokes it — docs stay current without an extra command. + +**Real browser mode.** `$B connect` launches your actual Chrome as a headed window controlled by Playwright. You watch Claude click, fill, and navigate in real time — same window, same screen. A subtle green shimmer at the top edge tells you which Chrome window gstack controls. All existing browse commands work unchanged. `$B disconnect` returns to headless. A Chrome extension Side Panel shows a live activity feed of every command and a chat sidebar where you can direct Claude. This is co-presence — Claude isn't remote-controlling a hidden browser, it's sitting next to you in the same cockpit. + +**Sidebar agent — your AI browser assistant.** Type natural language instructions in the Chrome side panel and a child Claude instance executes them. "Navigate to the settings page and screenshot it." "Fill out this form with test data." "Go through every item in this list and extract the prices." Each task gets up to 5 minutes. The sidebar agent runs in an isolated session, so it won't interfere with your main Claude Code window. It's like having a second pair of hands in the browser. + +**Personal automation.** The sidebar agent isn't just for dev workflows. Example: "Browse my kid's school parent portal and add all the other parents' names, phone numbers, and photos to my Google Contacts." Two ways to get authenticated: (1) log in once in the headed browser — your session persists, or (2) run `/setup-browser-cookies` to import cookies from your real Chrome. Once authenticated, Claude navigates the directory, extracts the data, and creates the contacts. + +**Browser handoff when the AI gets stuck.** Hit a CAPTCHA, auth wall, or MFA prompt? `$B handoff` opens a visible Chrome at the exact same page with all your cookies and tabs intact. Solve the problem, tell Claude you're done, `$B resume` picks up right where it left off. The agent even suggests it automatically after 3 consecutive failures. + +**Multi-AI second opinion.** `/codex` gets an independent review from OpenAI's Codex CLI — a completely different AI looking at the same diff. Three modes: code review with a pass/fail gate, adversarial challenge that actively tries to break your code, and open consultation with session continuity. When both `/review` (Claude) and `/codex` (OpenAI) have reviewed the same branch, you get a cross-model analysis showing which findings overlap and which are unique to each. + +**Safety guardrails on demand.** Say "be careful" and `/careful` warns before any destructive command — rm -rf, DROP TABLE, force-push, git reset --hard. `/freeze` locks edits to one directory while debugging so Claude can't accidentally "fix" unrelated code. `/guard` activates both. `/investigate` auto-freezes to the module being investigated. + +**Proactive skill suggestions.** gstack notices what stage you're in — brainstorming, reviewing, debugging, testing — and suggests the right skill. Don't like it? Say "stop suggesting" and it remembers across sessions. + +## 10-15 parallel sprints + +gstack is powerful with one sprint. It is transformative with ten running at once. + +[Conductor](https://conductor.build) runs multiple Claude Code sessions in parallel — each in its own isolated workspace. One session running `/office-hours` on a new idea, another doing `/review` on a PR, a third implementing a feature, a fourth running `/qa` on staging, and six more on other branches. All at the same time. I regularly run 10-15 parallel sprints — that's the practical max right now. + +The sprint structure is what makes parallelism work. Without a process, ten agents is ten sources of chaos. With a process — think, plan, build, review, test, ship — each agent knows exactly what to do and when to stop. You manage them the way a CEO manages a team: check in on the decisions that matter, let the rest run. --- diff --git a/SKILL.md b/SKILL.md index 5f8d0f33..b3f1ce3d 100644 --- a/SKILL.md +++ b/SKILL.md @@ -591,6 +591,9 @@ Refs are invalidated on navigation — run `snapshot` again after `goto`. | Command | Description | |---------|-------------| | `chain` | Run commands from JSON stdin. Format: [["cmd","arg1",...],...] | +| `frame ` | Switch to iframe context (or main to return) | +| `inbox [--clear]` | List messages from sidebar scout inbox | +| `watch [stop]` | Passive observation — periodic snapshots while user browses | ### Tabs | Command | Description | @@ -603,9 +606,13 @@ Refs are invalidated on navigation — run `snapshot` again after `goto`. ### Server | Command | Description | |---------|-------------| +| `connect` | Launch headed Chromium with Chrome extension | +| `disconnect` | Disconnect headed browser, return to headless mode | +| `focus [@ref]` | Bring headed browser window to foreground (macOS) | | `handoff [message]` | Open visible Chrome at current page for user takeover | | `restart` | Restart server | | `resume` | Re-snapshot after user takeover, return control to AI | +| `state save|load ` | Save/load browser state (cookies + URLs) | | `status` | Health check | | `stop` | Shutdown server | diff --git a/TODOS.md b/TODOS.md index 3ee995b6..8458a98a 100644 --- a/TODOS.md +++ b/TODOS.md @@ -14,6 +14,26 @@ **Priority:** P2 **Depends on:** Blog post about Search Before Building +## Chrome DevTools MCP Integration + +### Real Chrome session access + +**What:** Integrate Chrome DevTools MCP to connect to the user's real Chrome session with real cookies, real state, no Playwright middleman. + +**Why:** Right now, headed mode launches a fresh Chromium profile. Users must log in manually or import cookies. Chrome DevTools MCP connects to the user's actual Chrome ... instant access to every authenticated site. This is the future of browser automation for AI agents. + +**Context:** Google shipped Chrome DevTools MCP in Chrome 146+ (June 2025). It provides screenshots, console messages, performance traces, Lighthouse audits, and full page interaction through the user's real browser. gstack should use it for real-session access while keeping Playwright for headless CI/testing workflows. + +Potential new skills: +- `/debug-browser`: JS error tracing with source-mapped stack traces +- `/perf-debug`: performance traces, Core Web Vitals, network waterfall + +May replace `/setup-browser-cookies` for most use cases since the user's real cookies are already there. + +**Effort:** L (human: ~2 weeks / CC: ~2 hours) +**Priority:** P0 +**Depends on:** Chrome 146+, DevTools MCP server installed + ## Browse ### Bundle server.ts into compiled binary @@ -60,17 +80,14 @@ **Effort:** S **Priority:** P3 -### State persistence +### State persistence — SHIPPED -**What:** Save/load cookies + localStorage to JSON files for reproducible test sessions. +~~**What:** Save/load cookies + localStorage to JSON files for reproducible test sessions.~~ -**Why:** Enables "resume where I left off" for QA sessions and repeatable auth states. +`$B state save/load` ships in v0.12.1.0. V1 saves cookies + URLs only (not localStorage, which breaks on load-before-navigate). Files at `.gstack/browse-states/{name}.json` with 0o600 permissions. Load replaces session (closes all pages first). Name sanitized to `[a-zA-Z0-9_-]`. -**Context:** The `saveState()`/`restoreState()` helpers from the handoff feature (browser-manager.ts) already capture cookies + localStorage + sessionStorage + URLs. Adding file I/O on top is ~20 lines. - -**Effort:** S -**Priority:** P3 -**Depends on:** Sessions +**Remaining:** V2 localStorage support (needs pre-navigation injection strategy). +**Completed:** v0.12.1.0 (2026-03-26) ### Auth vault @@ -82,14 +99,13 @@ **Priority:** P3 **Depends on:** Sessions, state persistence -### Iframe support +### Iframe support — SHIPPED -**What:** `frame ` and `frame main` commands for cross-frame interaction. +~~**What:** `frame ` and `frame main` commands for cross-frame interaction.~~ -**Why:** Many web apps use iframes (embeds, payment forms, ads). Currently invisible to browse. +`$B frame` ships in v0.12.1.0. Supports CSS selector, @ref, `--name`, and `--url` pattern matching. Execution target abstraction (`getActiveFrameOrPage()`) across all read/write/snapshot commands. Frame context cleared on navigation, tab switch, resume. Detached frame auto-recovery. Page-only operations (goto, screenshot, viewport) throw clear error when in frame context. -**Effort:** M -**Priority:** P4 +**Completed:** v0.12.1.0 (2026-03-26) ### Semantic locators @@ -145,14 +161,39 @@ **Effort:** L **Priority:** P4 -### CDP mode +### Headed mode with Chrome extension — SHIPPED -**What:** Connect to already-running Chrome/Electron apps via Chrome DevTools Protocol. +`$B connect` launches Playwright's bundled Chromium in headed mode with the gstack Chrome extension auto-loaded. `$B handoff` now produces the same result (extension + side panel). Sidebar chat gated behind `--chat` flag. -**Why:** Test production apps, Electron apps, and existing browser sessions without launching new instances. +### `$B watch` — SHIPPED -**Effort:** M +Claude observes user browsing in passive read-only mode with periodic snapshots. `$B watch stop` exits with summary. Mutation commands blocked during watch. + +### Sidebar scout / file drop relay — SHIPPED + +Sidebar agent writes structured messages to `.context/sidebar-inbox/`. Workspace agent reads via `$B inbox`. Message format: `{type, timestamp, page, userMessage, sidebarSessionId}`. + +### Multi-agent tab isolation + +**What:** Two Claude sessions connect to the same browser, each operating on different tabs. No cross-contamination. + +**Why:** Enables parallel /qa + /design-review on different tabs in the same browser. + +**Context:** Requires tab ownership model for concurrent headed connections. Playwright may not cleanly support two persistent contexts. Needs investigation. + +**Effort:** L (human: ~2 weeks / CC: ~2 hours) +**Priority:** P3 +**Depends on:** Headed mode (shipped) + +### Chrome Web Store publishing + +**What:** Publish the gstack browse Chrome extension to Chrome Web Store for easier install. + +**Why:** Currently sideloaded via chrome://extensions. Web Store makes install one-click. + +**Effort:** S **Priority:** P4 +**Depends on:** Chrome extension proving value via sideloading ### Linux cookie decryption — PARTIALLY SHIPPED diff --git a/VERSION b/VERSION index 5e1d8ddf..ba9b59b5 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -0.11.21.0 +0.12.1.0 diff --git a/bin/chrome-cdp b/bin/chrome-cdp new file mode 100755 index 00000000..9c1ad717 --- /dev/null +++ b/bin/chrome-cdp @@ -0,0 +1,68 @@ +#!/bin/bash +# Launch Chrome with CDP (remote debugging) enabled. +# Usage: chrome-cdp [port] +# +# Chrome refuses --remote-debugging-port on its default data directory. +# We create a separate data dir with a symlink to the user's real profile, +# so Chrome thinks it's non-default but uses the same cookies/extensions. + +PORT="${1:-9222}" +CHROME="/Applications/Google Chrome.app/Contents/MacOS/Google Chrome" +REAL_PROFILE="$HOME/Library/Application Support/Google/Chrome" +CDP_DATA_DIR="$HOME/.gstack/cdp-profile/chrome" + +if ! [ -f "$CHROME" ]; then + echo "Chrome not found at $CHROME" >&2 + exit 1 +fi + +# Check if Chrome is running +if pgrep -f "Google Chrome" >/dev/null 2>&1; then + echo "Chrome is still running. Quitting..." + osascript -e 'tell application "Google Chrome" to quit' 2>/dev/null + + # Wait for it to fully exit + for i in $(seq 1 20); do + pgrep -f "Google Chrome" >/dev/null 2>&1 || break + sleep 0.5 + done + + if pgrep -f "Google Chrome" >/dev/null 2>&1; then + echo "Chrome won't quit. Force-killing..." >&2 + pkill -f "Google Chrome" + sleep 1 + fi +fi + +# Set up CDP data dir with symlinked profile +# Chrome requires a "non-default" data dir for --remote-debugging-port. +# We symlink the real Default profile so cookies/extensions carry over. +mkdir -p "$CDP_DATA_DIR" +if [ -d "$REAL_PROFILE/Default" ] && ! [ -e "$CDP_DATA_DIR/Default" ]; then + ln -s "$REAL_PROFILE/Default" "$CDP_DATA_DIR/Default" + echo "Linked real Chrome profile into CDP data dir" +fi +# Also link Local State (contains crypto keys for cookie decryption, etc.) +if [ -f "$REAL_PROFILE/Local State" ] && ! [ -e "$CDP_DATA_DIR/Local State" ]; then + ln -s "$REAL_PROFILE/Local State" "$CDP_DATA_DIR/Local State" +fi + +echo "Launching Chrome with CDP on port $PORT..." +"$CHROME" \ + --remote-debugging-port="$PORT" \ + --user-data-dir="$CDP_DATA_DIR" \ + --restore-last-session & +disown + +# Wait for CDP to be available +for i in $(seq 1 30); do + if curl -s "http://127.0.0.1:$PORT/json/version" >/dev/null 2>&1; then + echo "CDP ready on port $PORT" + echo "Run: \$B connect chrome" + exit 0 + fi + sleep 1 +done + +echo "CDP not available after 30s." >&2 +exit 1 diff --git a/bin/gstack-extension b/bin/gstack-extension new file mode 100755 index 00000000..8d0a62af --- /dev/null +++ b/bin/gstack-extension @@ -0,0 +1,65 @@ +#!/bin/bash +# gstack-extension — helper to install the Chrome extension +# +# When using $B connect, the extension auto-loads. This script is for +# installing it in your regular Chrome (not the Playwright-controlled one). + +set -e + +SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" +REPO_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" + +# Find the extension directory +EXT_DIR="" +if [ -f "$REPO_ROOT/extension/manifest.json" ]; then + EXT_DIR="$REPO_ROOT/extension" +elif [ -f "$HOME/.claude/skills/gstack/extension/manifest.json" ]; then + EXT_DIR="$HOME/.claude/skills/gstack/extension" +fi + +if [ -z "$EXT_DIR" ]; then + echo "Error: extension/ directory not found." + echo "Expected at: $REPO_ROOT/extension/ or ~/.claude/skills/gstack/extension/" + exit 1 +fi + +# Copy path to clipboard +echo -n "$EXT_DIR" | pbcopy 2>/dev/null + +# Get browse server port +PORT="" +STATE_FILE="$REPO_ROOT/.gstack/browse.json" +if [ -f "$STATE_FILE" ]; then + PORT=$(grep -o '"port":[0-9]*' "$STATE_FILE" | grep -o '[0-9]*') +fi + +echo "gstack Chrome Extension Setup" +echo "==============================" +echo "" +echo "Extension path (copied to clipboard):" +echo " $EXT_DIR" +echo "" + +if [ -n "$PORT" ]; then + echo "Browse server port: $PORT" + echo "" +fi + +echo "Quick install (if using \$B connect):" +echo " The extension auto-loads when you run \$B connect." +echo " No manual installation needed!" +echo "" +echo "Manual install (for your regular Chrome):" +echo "" +echo " 1. Opening chrome://extensions now..." + +# Open chrome://extensions +osascript -e 'tell application "Google Chrome" to open location "chrome://extensions"' 2>/dev/null || \ + open "chrome://extensions" 2>/dev/null || \ + echo " Could not open Chrome. Navigate to chrome://extensions manually." + +echo " 2. Toggle 'Developer mode' ON (top-right)" +echo " 3. Click 'Load unpacked'" +echo " 4. In the file picker: Cmd+Shift+G → paste (path is in your clipboard) → Enter → Select" +echo " 5. Click the gstack puzzle icon in toolbar → enter port: ${PORT:-}" +echo " 6. Click 'Open Side Panel'" diff --git a/browse/SKILL.md b/browse/SKILL.md index c52dcaa5..399aec3a 100644 --- a/browse/SKILL.md +++ b/browse/SKILL.md @@ -474,6 +474,9 @@ Refs are invalidated on navigation — run `snapshot` again after `goto`. | Command | Description | |---------|-------------| | `chain` | Run commands from JSON stdin. Format: [["cmd","arg1",...],...] | +| `frame ` | Switch to iframe context (or main to return) | +| `inbox [--clear]` | List messages from sidebar scout inbox | +| `watch [stop]` | Passive observation — periodic snapshots while user browses | ### Tabs | Command | Description | @@ -486,8 +489,12 @@ Refs are invalidated on navigation — run `snapshot` again after `goto`. ### Server | Command | Description | |---------|-------------| +| `connect` | Launch headed Chromium with Chrome extension | +| `disconnect` | Disconnect headed browser, return to headless mode | +| `focus [@ref]` | Bring headed browser window to foreground (macOS) | | `handoff [message]` | Open visible Chrome at current page for user takeover | | `restart` | Restart server | | `resume` | Re-snapshot after user takeover, return control to AI | +| `state save|load ` | Save/load browser state (cookies + URLs) | | `status` | Health check | | `stop` | Shutdown server | diff --git a/browse/src/activity.ts b/browse/src/activity.ts new file mode 100644 index 00000000..e76467d4 --- /dev/null +++ b/browse/src/activity.ts @@ -0,0 +1,208 @@ +/** + * Activity streaming — real-time feed of browse commands for the Chrome extension Side Panel + * + * Architecture: + * handleCommand() ──► emitActivity(command_start) + * ──► emitActivity(command_end) + * wirePageEvents() ──► emitActivity(navigation) + * + * GET /activity/stream?after=ID ──► SSE via ReadableStream + * GET /activity/history?limit=N ──► REST fallback + * + * Privacy: filterArgs() redacts passwords, auth tokens, and sensitive query params. + * Backpressure: subscribers notified via queueMicrotask (never blocks command path). + * Gap detection: client sends ?after=ID, server detects if ring buffer overflowed. + */ + +import { CircularBuffer } from './buffers'; + +// ─── Types ────────────────────────────────────────────────────── + +export interface ActivityEntry { + id: number; + timestamp: number; + type: 'command_start' | 'command_end' | 'navigation' | 'error'; + command?: string; + args?: string[]; + url?: string; + duration?: number; + status?: 'ok' | 'error'; + error?: string; + result?: string; + tabs?: number; + mode?: string; +} + +// ─── Buffer & Subscribers ─────────────────────────────────────── + +const BUFFER_CAPACITY = 1000; +const activityBuffer = new CircularBuffer(BUFFER_CAPACITY); +let nextId = 1; + +type ActivitySubscriber = (entry: ActivityEntry) => void; +const subscribers = new Set(); + +// ─── Privacy Filtering ───────────────────────────────────────── + +const SENSITIVE_COMMANDS = new Set(['fill', 'type', 'cookie', 'header']); +const SENSITIVE_PARAM_PATTERN = /\b(password|token|secret|key|auth|bearer|api[_-]?key)\b/i; + +/** + * Redact sensitive data from command args before streaming. + */ +export function filterArgs(command: string, args: string[]): string[] { + if (!args || args.length === 0) return args; + + // fill: redact the value (last arg) for password-type fields + if (command === 'fill' && args.length >= 2) { + const selector = args[0]; + // If the selector suggests a password field, redact the value + if (/password|passwd|secret|token/i.test(selector)) { + return [selector, '[REDACTED]']; + } + return args; + } + + // header: redact Authorization and other sensitive headers + if (command === 'header' && args.length >= 1) { + const headerLine = args[0]; + if (/^(authorization|x-api-key|cookie|set-cookie)/i.test(headerLine)) { + const colonIdx = headerLine.indexOf(':'); + if (colonIdx > 0) { + return [headerLine.substring(0, colonIdx + 1) + '[REDACTED]']; + } + } + return args; + } + + // cookie: redact cookie values + if (command === 'cookie' && args.length >= 1) { + const cookieStr = args[0]; + const eqIdx = cookieStr.indexOf('='); + if (eqIdx > 0) { + return [cookieStr.substring(0, eqIdx + 1) + '[REDACTED]']; + } + return args; + } + + // type: always redact (could be a password field) + if (command === 'type') { + return ['[REDACTED]']; + } + + // URL args: redact sensitive query params + return args.map(arg => { + if (arg.startsWith('http://') || arg.startsWith('https://')) { + try { + const url = new URL(arg); + let redacted = false; + for (const key of url.searchParams.keys()) { + if (SENSITIVE_PARAM_PATTERN.test(key)) { + url.searchParams.set(key, '[REDACTED]'); + redacted = true; + } + } + return redacted ? url.toString() : arg; + } catch { + return arg; + } + } + return arg; + }); +} + +/** + * Truncate result text for streaming (max 200 chars). + */ +function truncateResult(result: string | undefined): string | undefined { + if (!result) return undefined; + if (result.length <= 200) return result; + return result.substring(0, 200) + '...'; +} + +// ─── Public API ───────────────────────────────────────────────── + +/** + * Emit an activity event. Backpressure-safe: subscribers notified asynchronously. + */ +export function emitActivity(entry: Omit): ActivityEntry { + const full: ActivityEntry = { + ...entry, + id: nextId++, + timestamp: Date.now(), + args: entry.args ? filterArgs(entry.command || '', entry.args) : undefined, + result: truncateResult(entry.result), + }; + activityBuffer.push(full); + + // Notify subscribers asynchronously — never block the command path + for (const notify of subscribers) { + queueMicrotask(() => { + try { notify(full); } catch { /* subscriber error — don't crash */ } + }); + } + + return full; +} + +/** + * Subscribe to live activity events. Returns unsubscribe function. + */ +export function subscribe(fn: ActivitySubscriber): () => void { + subscribers.add(fn); + return () => subscribers.delete(fn); +} + +/** + * Get recent activity entries after the given cursor ID. + * Returns entries and gap info if the buffer has overflowed. + */ +export function getActivityAfter(afterId: number): { + entries: ActivityEntry[]; + gap: boolean; + gapFrom?: number; + availableFrom?: number; + totalAdded: number; +} { + const total = activityBuffer.totalAdded; + const allEntries = activityBuffer.toArray(); + + if (afterId === 0) { + return { entries: allEntries, gap: false, totalAdded: total }; + } + + // Check for gap: if afterId is too old and has been evicted + const oldestId = allEntries.length > 0 ? allEntries[0].id : nextId; + if (afterId < oldestId) { + return { + entries: allEntries, + gap: true, + gapFrom: afterId + 1, + availableFrom: oldestId, + totalAdded: total, + }; + } + + // Filter to entries after the cursor + const filtered = allEntries.filter(e => e.id > afterId); + return { entries: filtered, gap: false, totalAdded: total }; +} + +/** + * Get the N most recent activity entries. + */ +export function getActivityHistory(limit: number = 50): { + entries: ActivityEntry[]; + totalAdded: number; +} { + const allEntries = activityBuffer.toArray(); + const sliced = limit < allEntries.length ? allEntries.slice(-limit) : allEntries; + return { entries: sliced, totalAdded: activityBuffer.totalAdded }; +} + +/** + * Get subscriber count (for debugging/health). + */ +export function getSubscriberCount(): number { + return subscribers.size; +} diff --git a/browse/src/browser-manager.ts b/browse/src/browser-manager.ts index 335ff19e..1ef58e36 100644 --- a/browse/src/browser-manager.ts +++ b/browse/src/browser-manager.ts @@ -61,6 +61,88 @@ export class BrowserManager { private isHeaded: boolean = false; private consecutiveFailures: number = 0; + // ─── Watch Mode ───────────────────────────────────────── + private watching = false; + public watchInterval: ReturnType | null = null; + private watchSnapshots: string[] = []; + private watchStartTime: number = 0; + + // ─── Headed State ──────────────────────────────────────── + private connectionMode: 'launched' | 'headed' = 'launched'; + private intentionalDisconnect = false; + + getConnectionMode(): 'launched' | 'headed' { return this.connectionMode; } + + // ─── Watch Mode Methods ───────────────────────────────── + isWatching(): boolean { return this.watching; } + + startWatch(): void { + this.watching = true; + this.watchSnapshots = []; + this.watchStartTime = Date.now(); + } + + stopWatch(): { snapshots: string[]; duration: number } { + this.watching = false; + if (this.watchInterval) { + clearInterval(this.watchInterval); + this.watchInterval = null; + } + const snapshots = this.watchSnapshots; + const duration = Date.now() - this.watchStartTime; + this.watchSnapshots = []; + this.watchStartTime = 0; + return { snapshots, duration }; + } + + addWatchSnapshot(snapshot: string): void { + this.watchSnapshots.push(snapshot); + } + + /** + * Find the gstack Chrome extension directory. + * Checks: repo root /extension, global install, dev install. + */ + private findExtensionPath(): string | null { + const fs = require('fs'); + const path = require('path'); + const candidates = [ + // Relative to this source file (dev mode: browse/src/ -> ../../extension) + path.resolve(__dirname, '..', '..', 'extension'), + // Global gstack install + path.join(process.env.HOME || '', '.claude', 'skills', 'gstack', 'extension'), + // Git repo root (detected via BROWSE_STATE_FILE location) + (() => { + const stateFile = process.env.BROWSE_STATE_FILE || ''; + if (stateFile) { + const repoRoot = path.resolve(path.dirname(stateFile), '..'); + return path.join(repoRoot, '.claude', 'skills', 'gstack', 'extension'); + } + return ''; + })(), + ].filter(Boolean); + + for (const candidate of candidates) { + try { + if (fs.existsSync(path.join(candidate, 'manifest.json'))) { + return candidate; + } + } catch {} + } + return null; + } + + /** + * Get the ref map for external consumers (e.g., /refs endpoint). + */ + getRefMap(): Array<{ ref: string; role: string; name: string }> { + const refs: Array<{ ref: string; role: string; name: string }> = []; + for (const [ref, entry] of this.refMap) { + refs.push({ ref, role: entry.role, name: entry.name }); + } + return refs; + } + async launch() { // ─── Extension Support ──────────────────────────────────── // BROWSE_EXTENSIONS_DIR points to an unpacked Chrome extension directory. @@ -119,15 +201,140 @@ export class BrowserManager { await this.newTab(); } - async close() { + // ─── Headed Mode ───────────────────────────────────────────── + /** + * Launch Playwright's bundled Chromium in headed mode with the gstack + * Chrome extension auto-loaded. Uses launchPersistentContext() which + * is required for extension loading (launch() + newContext() can't + * load extensions). + * + * The browser launches headed with a visible window — the user sees + * every action Claude takes in real time. + */ + async launchHeaded(): Promise { + // Clear old state before repopulating + this.pages.clear(); + this.refMap.clear(); + this.nextTabId = 1; + + // Find the gstack extension directory for auto-loading + const extensionPath = this.findExtensionPath(); + const launchArgs = ['--hide-crash-restore-bubble']; + if (extensionPath) { + launchArgs.push(`--disable-extensions-except=${extensionPath}`); + launchArgs.push(`--load-extension=${extensionPath}`); + } + + // Launch headed Chromium via Playwright's persistent context. + // Extensions REQUIRE launchPersistentContext (not launch + newContext). + // Real Chrome (executablePath/channel) silently blocks --load-extension, + // so we use Playwright's bundled Chromium which reliably loads extensions. + const fs = require('fs'); + const path = require('path'); + const userDataDir = path.join(process.env.HOME || '/tmp', '.gstack', 'chromium-profile'); + fs.mkdirSync(userDataDir, { recursive: true }); + + this.context = await chromium.launchPersistentContext(userDataDir, { + headless: false, + args: launchArgs, + viewport: null, // Use browser's default viewport (real window size) + // Playwright adds flags that block extension loading + ignoreDefaultArgs: [ + '--disable-extensions', + '--disable-component-extensions-with-background-pages', + ], + }); + this.browser = this.context.browser(); + this.connectionMode = 'headed'; + this.intentionalDisconnect = false; + + // Inject visual indicator — subtle top-edge amber gradient + // Extension's content script handles the floating pill + const indicatorScript = () => { + const injectIndicator = () => { + if (document.getElementById('gstack-ctrl')) return; + + const topLine = document.createElement('div'); + topLine.id = 'gstack-ctrl'; + topLine.style.cssText = ` + position: fixed; top: 0; left: 0; right: 0; height: 2px; + background: linear-gradient(90deg, #F59E0B, #FBBF24, #F59E0B); + background-size: 200% 100%; + animation: gstack-shimmer 3s linear infinite; + pointer-events: none; z-index: 2147483647; + opacity: 0.8; + `; + + const style = document.createElement('style'); + style.textContent = ` + @keyframes gstack-shimmer { + 0% { background-position: 200% 0; } + 100% { background-position: -200% 0; } + } + @media (prefers-reduced-motion: reduce) { + #gstack-ctrl { animation: none !important; } + } + `; + + document.documentElement.appendChild(style); + document.documentElement.appendChild(topLine); + }; + if (document.readyState === 'loading') { + document.addEventListener('DOMContentLoaded', injectIndicator); + } else { + injectIndicator(); + } + }; + await this.context.addInitScript(indicatorScript); + + // Persistent context opens a default page — adopt it instead of creating a new one + const existingPages = this.context.pages(); + if (existingPages.length > 0) { + const page = existingPages[0]; + const id = this.nextTabId++; + this.pages.set(id, page); + this.activeTabId = id; + this.wirePageEvents(page); + // Inject indicator on restored page (addInitScript only fires on new navigations) + try { await page.evaluate(indicatorScript); } catch {} + } else { + await this.newTab(); + } + + // Browser disconnect handler — exit code 2 distinguishes from crashes (1) if (this.browser) { - // Remove disconnect handler to avoid exit during intentional close - this.browser.removeAllListeners('disconnected'); - // Timeout: headed browser.close() can hang on macOS - await Promise.race([ - this.browser.close(), - new Promise(resolve => setTimeout(resolve, 5000)), - ]).catch(() => {}); + this.browser.on('disconnected', () => { + if (this.intentionalDisconnect) return; + console.error('[browse] Real browser disconnected (user closed or crashed).'); + console.error('[browse] Run `$B connect` to reconnect.'); + process.exit(2); + }); + } + + // Headed mode defaults + this.dialogAutoAccept = false; // Don't dismiss user's real dialogs + this.isHeaded = true; + this.consecutiveFailures = 0; + } + + async close() { + if (this.browser || (this.connectionMode === 'headed' && this.context)) { + if (this.connectionMode === 'headed') { + // Headed/persistent context mode: close the context (which closes the browser) + this.intentionalDisconnect = true; + if (this.browser) this.browser.removeAllListeners('disconnected'); + await Promise.race([ + this.context ? this.context.close() : Promise.resolve(), + new Promise(resolve => setTimeout(resolve, 5000)), + ]).catch(() => {}); + } else { + // Launched mode: close the browser we spawned + this.browser.removeAllListeners('disconnected'); + await Promise.race([ + this.browser.close(), + new Promise(resolve => setTimeout(resolve, 5000)), + ]).catch(() => {}); + } this.browser = null; } } @@ -195,6 +402,7 @@ export class BrowserManager { switchTab(id: number): void { if (!this.pages.has(id)) throw new Error(`Tab ${id} not found`); this.activeTabId = id; + this.activeFrame = null; // Frame context is per-tab } getTabCount(): number { @@ -324,6 +532,42 @@ export class BrowserManager { return this.customUserAgent; } + // ─── Lifecycle helpers ─────────────────────────────── + /** + * Close all open pages and clear the pages map. + * Used by state load to replace the current session. + */ + async closeAllPages(): Promise { + for (const page of this.pages.values()) { + await page.close().catch(() => {}); + } + this.pages.clear(); + this.clearRefs(); + } + + // ─── Frame context ───────────────────────────────── + private activeFrame: import('playwright').Frame | null = null; + + setFrame(frame: import('playwright').Frame | null): void { + this.activeFrame = frame; + } + + getFrame(): import('playwright').Frame | null { + return this.activeFrame; + } + + /** + * Returns the active frame if set, otherwise the current page. + * Use this for operations that work on both Page and Frame (locator, evaluate, etc.). + */ + getActiveFrameOrPage(): import('playwright').Page | import('playwright').Frame { + // Auto-recover from detached frames (iframe removed/navigated) + if (this.activeFrame?.isDetached()) { + this.activeFrame = null; + } + return this.activeFrame ?? this.getPage(); + } + // ─── State Save/Restore (shared by recreateContext + handoff) ─ /** * Capture browser state: cookies, localStorage, sessionStorage, URLs, active tab. @@ -416,6 +660,9 @@ export class BrowserManager { * Falls back to a clean slate on any failure. */ async recreateContext(): Promise { + if (this.connectionMode === 'headed') { + throw new Error('Cannot recreate context in headed mode. Use disconnect first.'); + } if (!this.browser || !this.context) { throw new Error('Browser not launched'); } @@ -482,7 +729,7 @@ export class BrowserManager { * If step 2 fails → return error, headless browser untouched */ async handoff(message: string): Promise { - if (this.isHeaded) { + if (this.connectionMode === 'headed' || this.isHeaded) { return `HANDOFF: Already in headed mode at ${this.getCurrentUrl()}`; } if (!this.browser || !this.context) { @@ -493,53 +740,68 @@ export class BrowserManager { const state = await this.saveState(); const currentUrl = this.getCurrentUrl(); - // 2. Launch new headed browser (try-catch — if this fails, headless stays running) - let newBrowser: Browser; + // 2. Launch new headed browser with extension (same as launchHeaded) + // Uses launchPersistentContext so the extension auto-loads. + let newContext: BrowserContext; try { - newBrowser = await chromium.launch({ + const fs = require('fs'); + const path = require('path'); + const extensionPath = this.findExtensionPath(); + const launchArgs = ['--hide-crash-restore-bubble']; + if (extensionPath) { + launchArgs.push(`--disable-extensions-except=${extensionPath}`); + launchArgs.push(`--load-extension=${extensionPath}`); + console.log(`[browse] Handoff: loading extension from ${extensionPath}`); + } else { + console.log('[browse] Handoff: extension not found — headed mode without side panel'); + } + + const userDataDir = path.join(process.env.HOME || '/tmp', '.gstack', 'chromium-profile'); + fs.mkdirSync(userDataDir, { recursive: true }); + + newContext = await chromium.launchPersistentContext(userDataDir, { headless: false, + args: launchArgs, + viewport: null, + ignoreDefaultArgs: [ + '--disable-extensions', + '--disable-component-extensions-with-background-pages', + ], timeout: 15000, - chromiumSandbox: process.platform !== 'win32', }); } catch (err: unknown) { const msg = err instanceof Error ? err.message : String(err); return `ERROR: Cannot open headed browser — ${msg}. Headless browser still running.`; } - // 3. Create context and restore state into new headed browser + // 3. Restore state into new headed browser try { - const contextOptions: BrowserContextOptions = { - viewport: { width: 1280, height: 720 }, - }; - if (this.customUserAgent) { - contextOptions.userAgent = this.customUserAgent; - } - const newContext = await newBrowser.newContext(contextOptions); + // Swap to new browser/context before restoreState (it uses this.context) + const oldBrowser = this.browser; + + this.context = newContext; + this.browser = newContext.browser(); + this.pages.clear(); + this.connectionMode = 'headed'; if (Object.keys(this.extraHeaders).length > 0) { await newContext.setExtraHTTPHeaders(this.extraHeaders); } - // Swap to new browser/context before restoreState (it uses this.context) - const oldBrowser = this.browser; - const oldContext = this.context; - - this.browser = newBrowser; - this.context = newContext; - this.pages.clear(); - // Register crash handler on new browser - this.browser.on('disconnected', () => { - console.error('[browse] FATAL: Chromium process crashed or was killed. Server exiting.'); - console.error('[browse] Console/network logs flushed to .gstack/browse-*.log'); - process.exit(1); - }); + if (this.browser) { + this.browser.on('disconnected', () => { + if (this.intentionalDisconnect) return; + console.error('[browse] FATAL: Chromium process crashed or was killed. Server exiting.'); + process.exit(1); + }); + } await this.restoreState(state); this.isHeaded = true; + this.dialogAutoAccept = false; // User controls dialogs in headed mode - // 4. Close old headless browser (fire-and-forget — close() can hang - // when another Playwright instance is active, so we don't await it) + // 4. Close old headless browser (fire-and-forget) oldBrowser.removeAllListeners('disconnected'); oldBrowser.close().catch(() => {}); @@ -549,8 +811,8 @@ export class BrowserManager { `STATUS: Waiting for user. Run 'resume' when done.`, ].join('\n'); } catch (err: unknown) { - // Restore failed — close the new browser, keep old one - await newBrowser.close().catch(() => {}); + // Restore failed — close the new context, keep old state + await newContext.close().catch(() => {}); const msg = err instanceof Error ? err.message : String(err); return `ERROR: Handoff failed during state restore — ${msg}. Headless browser still running.`; } @@ -564,6 +826,7 @@ export class BrowserManager { resume(): void { this.clearRefs(); this.resetFailures(); + this.activeFrame = null; } getIsHeaded(): boolean { @@ -593,6 +856,7 @@ export class BrowserManager { page.on('framenavigated', (frame) => { if (frame === page.mainFrame()) { this.clearRefs(); + this.activeFrame = null; // Navigation invalidates frame context } }); diff --git a/browse/src/cli.ts b/browse/src/cli.ts index 25894a5d..28e4a79e 100644 --- a/browse/src/cli.ts +++ b/browse/src/cli.ts @@ -90,6 +90,7 @@ interface ServerState { startedAt: string; serverPath: string; binaryVersion?: string; + mode?: 'launched' | 'headed'; } // ─── State File ──────────────────────────────────────────────── @@ -217,7 +218,7 @@ function cleanupLegacyState(): void { } // ─── Server Lifecycle ────────────────────────────────────────── -async function startServer(): Promise { +async function startServer(extraEnv?: Record): Promise { ensureStateDir(config); // Clean up stale state file and error log @@ -241,7 +242,7 @@ async function startServer(): Promise { // macOS/Linux: Bun.spawn + unref works correctly proc = Bun.spawn(['bun', 'run', SERVER_SCRIPT], { stdio: ['ignore', 'pipe', 'pipe'], - env: { ...process.env, BROWSE_STATE_FILE: config.stateFile }, + env: { ...process.env, BROWSE_STATE_FILE: config.stateFile, ...extraEnv }, }); proc.unref(); } @@ -328,6 +329,15 @@ async function ensureServer(): Promise { return state; } + // Guard: never silently replace a headed server with a headless one. + // Headed mode means a user-visible Chrome window is (or was) controlled. + // Silently replacing it would be confusing — tell the user to reconnect. + if (state && state.mode === 'headed' && isProcessAlive(state.pid)) { + console.error(`[browse] Headed server running (PID ${state.pid}) but not responding.`); + console.error(`[browse] Run '$B connect' to restart.`); + process.exit(1); + } + // Ensure state directory exists before lock acquisition (lock file lives there) ensureStateDir(config); @@ -471,6 +481,144 @@ Refs: After 'snapshot', use @e1, @e2... as selectors: const command = args[0]; const commandArgs = args.slice(1); + // ─── Headed Connect (pre-server command) ──────────────────── + // connect must be handled BEFORE ensureServer() because it needs + // to restart the server in headed mode with the Chrome extension. + if (command === 'connect') { + // Check if already in headed mode and healthy + const existingState = readState(); + if (existingState && existingState.mode === 'headed' && isProcessAlive(existingState.pid)) { + try { + const resp = await fetch(`http://127.0.0.1:${existingState.port}/health`, { + signal: AbortSignal.timeout(2000), + }); + if (resp.ok) { + console.log('Already connected in headed mode.'); + process.exit(0); + } + } catch { + // Headed server alive but not responding — kill and restart + } + } + + // Kill ANY existing server (SIGTERM → wait 2s → SIGKILL) + if (existingState && isProcessAlive(existingState.pid)) { + try { process.kill(existingState.pid, 'SIGTERM'); } catch {} + await new Promise(resolve => setTimeout(resolve, 2000)); + if (isProcessAlive(existingState.pid)) { + try { process.kill(existingState.pid, 'SIGKILL'); } catch {} + await new Promise(resolve => setTimeout(resolve, 1000)); + } + } + + // Clean up Chromium profile locks (can persist after crashes) + const profileDir = path.join(process.env.HOME || '/tmp', '.gstack', 'chromium-profile'); + for (const lockFile of ['SingletonLock', 'SingletonSocket', 'SingletonCookie']) { + try { fs.unlinkSync(path.join(profileDir, lockFile)); } catch {} + } + + // Delete stale state file + try { fs.unlinkSync(config.stateFile); } catch {} + + console.log('Launching headed Chromium with extension + sidebar agent...'); + try { + // Start server in headed mode with extension auto-loaded + // Use a well-known port so the Chrome extension auto-connects + const serverEnv: Record = { + BROWSE_HEADED: '1', + BROWSE_PORT: '34567', + BROWSE_SIDEBAR_CHAT: '1', + }; + const newState = await startServer(serverEnv); + + // Print connected status + const resp = await fetch(`http://127.0.0.1:${newState.port}/command`, { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + 'Authorization': `Bearer ${newState.token}`, + }, + body: JSON.stringify({ command: 'status', args: [] }), + signal: AbortSignal.timeout(5000), + }); + const status = await resp.text(); + console.log(`Connected to real Chrome\n${status}`); + + // Auto-start sidebar agent + const agentScript = path.resolve(__dirname, 'sidebar-agent.ts'); + try { + // Clear old agent queue + const agentQueue = path.join(process.env.HOME || '/tmp', '.gstack', 'sidebar-agent-queue.jsonl'); + try { fs.writeFileSync(agentQueue, ''); } catch {} + + const agentProc = Bun.spawn(['bun', 'run', agentScript], { + cwd: config.projectDir, + env: { + ...process.env, + BROWSE_BIN: path.resolve(__dirname, '..', 'dist', 'browse'), + BROWSE_STATE_FILE: config.stateFile, + BROWSE_SERVER_PORT: String(newState.port), + }, + stdio: ['ignore', 'ignore', 'ignore'], + }); + agentProc.unref(); + console.log(`[browse] Sidebar agent started (PID: ${agentProc.pid})`); + } catch (err: any) { + console.error(`[browse] Sidebar agent failed to start: ${err.message}`); + console.error(`[browse] Run manually: bun run ${agentScript}`); + } + } catch (err: any) { + console.error(`[browse] Connect failed: ${err.message}`); + process.exit(1); + } + process.exit(0); + } + + // ─── Headed Disconnect (pre-server command) ───────────────── + // disconnect must be handled BEFORE ensureServer() because the headed + // guard blocks all commands when the server is unresponsive. + if (command === 'disconnect') { + const existingState = readState(); + if (!existingState || existingState.mode !== 'headed') { + console.log('Not in headed mode — nothing to disconnect.'); + process.exit(0); + } + // Try graceful shutdown via server + try { + const resp = await fetch(`http://127.0.0.1:${existingState.port}/command`, { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + 'Authorization': `Bearer ${existingState.token}`, + }, + body: JSON.stringify({ command: 'disconnect', args: [] }), + signal: AbortSignal.timeout(3000), + }); + if (resp.ok) { + console.log('Disconnected from real browser.'); + process.exit(0); + } + } catch { + // Server not responding — force cleanup + } + // Force kill + cleanup + if (isProcessAlive(existingState.pid)) { + try { process.kill(existingState.pid, 'SIGTERM'); } catch {} + await new Promise(resolve => setTimeout(resolve, 2000)); + if (isProcessAlive(existingState.pid)) { + try { process.kill(existingState.pid, 'SIGKILL'); } catch {} + } + } + // Clean profile locks and state file + const profileDir = path.join(process.env.HOME || '/tmp', '.gstack', 'chromium-profile'); + for (const lockFile of ['SingletonLock', 'SingletonSocket', 'SingletonCookie']) { + try { fs.unlinkSync(path.join(profileDir, lockFile)); } catch {} + } + try { fs.unlinkSync(config.stateFile); } catch {} + console.log('Disconnected (server was unresponsive — force cleaned).'); + process.exit(0); + } + // Special case: chain reads from stdin if (command === 'chain' && commandArgs.length === 0) { const stdin = await Bun.stdin.text(); diff --git a/browse/src/commands.ts b/browse/src/commands.ts index 81c8f61a..15244538 100644 --- a/browse/src/commands.ts +++ b/browse/src/commands.ts @@ -31,6 +31,11 @@ export const META_COMMANDS = new Set([ 'chain', 'diff', 'url', 'snapshot', 'handoff', 'resume', + 'connect', 'disconnect', 'focus', + 'inbox', + 'watch', + 'state', + 'frame', ]); export const ALL_COMMANDS = new Set([...READ_COMMANDS, ...WRITE_COMMANDS, ...META_COMMANDS]); @@ -98,6 +103,18 @@ export const COMMAND_DESCRIPTIONS: Record' }, + // Frame + 'frame': { category: 'Meta', description: 'Switch to iframe context (or main to return)', usage: 'frame ' }, }; // Load-time validation: descriptions must cover exactly the command sets diff --git a/browse/src/meta-commands.ts b/browse/src/meta-commands.ts index 16ed7f84..4388491a 100644 --- a/browse/src/meta-commands.ts +++ b/browse/src/meta-commands.ts @@ -11,6 +11,8 @@ import * as Diff from 'diff'; import * as fs from 'fs'; import * as path from 'path'; import { TEMP_DIR, isPathWithin } from './platform'; +import { resolveConfig } from './config'; +import type { Frame } from 'playwright'; // Security: Path validation to prevent path traversal attacks const SAFE_DIRECTORIES = [TEMP_DIR, process.cwd()]; @@ -23,6 +25,25 @@ export function validateOutputPath(filePath: string): void { } } +/** Tokenize a pipe segment respecting double-quoted strings. */ +function tokenizePipeSegment(segment: string): string[] { + const tokens: string[] = []; + let current = ''; + let inQuote = false; + for (let i = 0; i < segment.length; i++) { + const ch = segment[i]; + if (ch === '"') { + inQuote = !inQuote; + } else if (ch === ' ' && !inQuote) { + if (current) { tokens.push(current); current = ''; } + } else { + current += ch; + } + } + if (current) tokens.push(current); + return tokens; +} + export async function handleMetaCommand( command: string, args: string[], @@ -61,8 +82,10 @@ export async function handleMetaCommand( case 'status': { const page = bm.getPage(); const tabs = bm.getTabCount(); + const mode = bm.getConnectionMode(); return [ `Status: healthy`, + `Mode: ${mode}`, `URL: ${page.url()}`, `Tabs: ${tabs}`, `PID: ${process.pid}`, @@ -185,35 +208,54 @@ export async function handleMetaCommand( case 'chain': { // Read JSON array from args[0] (if provided) or expect it was passed as body const jsonStr = args[0]; - if (!jsonStr) throw new Error('Usage: echo \'[["goto","url"],["text"]]\' | browse chain'); + if (!jsonStr) throw new Error( + 'Usage: echo \'[["goto","url"],["text"]]\' | browse chain\n' + + ' or: browse chain \'goto url | click @e5 | snapshot -ic\'' + ); let commands: string[][]; try { commands = JSON.parse(jsonStr); + if (!Array.isArray(commands)) throw new Error('not array'); } catch { - throw new Error('Invalid JSON. Expected: [["command", "arg1", "arg2"], ...]'); + // Fallback: pipe-delimited format "goto url | click @e5 | snapshot -ic" + commands = jsonStr.split(' | ') + .filter(seg => seg.trim().length > 0) + .map(seg => tokenizePipeSegment(seg.trim())); } - if (!Array.isArray(commands)) throw new Error('Expected JSON array of commands'); - const results: string[] = []; const { handleReadCommand } = await import('./read-commands'); const { handleWriteCommand } = await import('./write-commands'); + let lastWasWrite = false; for (const cmd of commands) { const [name, ...cmdArgs] = cmd; try { let result: string; - if (WRITE_COMMANDS.has(name)) result = await handleWriteCommand(name, cmdArgs, bm); - else if (READ_COMMANDS.has(name)) result = await handleReadCommand(name, cmdArgs, bm); - else if (META_COMMANDS.has(name)) result = await handleMetaCommand(name, cmdArgs, bm, shutdown); - else throw new Error(`Unknown command: ${name}`); + if (WRITE_COMMANDS.has(name)) { + result = await handleWriteCommand(name, cmdArgs, bm); + lastWasWrite = true; + } else if (READ_COMMANDS.has(name)) { + result = await handleReadCommand(name, cmdArgs, bm); + lastWasWrite = false; + } else if (META_COMMANDS.has(name)) { + result = await handleMetaCommand(name, cmdArgs, bm, shutdown); + lastWasWrite = false; + } else { + throw new Error(`Unknown command: ${name}`); + } results.push(`[${name}] ${result}`); } catch (err: any) { results.push(`[${name}] ERROR: ${err.message}`); } } + // Wait for network to settle after write commands before returning + if (lastWasWrite) { + await bm.getPage().waitForLoadState('networkidle', { timeout: 2000 }).catch(() => {}); + } + return results.join('\n\n'); } @@ -263,6 +305,232 @@ export async function handleMetaCommand( return `RESUMED\n${snapshot}`; } + // ─── Headed Mode ────────────────────────────────────── + case 'connect': { + // connect is handled as a pre-server command in cli.ts + // If we get here, server is already running — tell the user + if (bm.getConnectionMode() === 'headed') { + return 'Already in headed mode with extension.'; + } + return 'The connect command must be run from the CLI (not sent to a running server). Run: $B connect'; + } + + case 'disconnect': { + if (bm.getConnectionMode() !== 'headed') { + return 'Not in headed mode — nothing to disconnect.'; + } + // Signal that we want a restart in headless mode + console.log('[browse] Disconnecting headed browser. Restarting in headless mode.'); + await shutdown(); + return 'Disconnected. Server will restart in headless mode on next command.'; + } + + case 'focus': { + if (bm.getConnectionMode() !== 'headed') { + return 'focus requires headed mode. Run `$B connect` first.'; + } + try { + const { execSync } = await import('child_process'); + // Try common Chromium-based browser app names to bring to foreground + const appNames = ['Comet', 'Google Chrome', 'Arc', 'Brave Browser', 'Microsoft Edge']; + let activated = false; + for (const appName of appNames) { + try { + execSync(`osascript -e 'tell application "${appName}" to activate'`, { stdio: 'pipe', timeout: 3000 }); + activated = true; + break; + } catch { + // Try next browser + } + } + + if (!activated) { + return 'Could not bring browser to foreground. macOS only.'; + } + + // If a ref was passed, scroll it into view + if (args.length > 0 && args[0].startsWith('@')) { + try { + const resolved = await bm.resolveRef(args[0]); + if ('locator' in resolved) { + await resolved.locator.scrollIntoViewIfNeeded({ timeout: 5000 }); + return `Browser activated. Scrolled ${args[0]} into view.`; + } + } catch { + // Ref not found — still activated the browser + } + } + + return 'Browser window activated.'; + } catch (err: any) { + return `focus failed: ${err.message}. macOS only.`; + } + } + + // ─── Watch ────────────────────────────────────────── + case 'watch': { + if (args[0] === 'stop') { + if (!bm.isWatching()) return 'Not currently watching.'; + const result = bm.stopWatch(); + const durationSec = Math.round(result.duration / 1000); + return [ + `WATCH STOPPED (${durationSec}s, ${result.snapshots.length} snapshots)`, + '', + 'Last snapshot:', + result.snapshots.length > 0 ? result.snapshots[result.snapshots.length - 1] : '(none)', + ].join('\n'); + } + + if (bm.isWatching()) return 'Already watching. Run `$B watch stop` to stop.'; + if (bm.getConnectionMode() !== 'headed') { + return 'watch requires headed mode. Run `$B connect` first.'; + } + + bm.startWatch(); + return 'WATCHING — observing user browsing. Periodic snapshots every 5s.\nRun `$B watch stop` to stop and get summary.'; + } + + // ─── Inbox ────────────────────────────────────────── + case 'inbox': { + const { execSync } = await import('child_process'); + let gitRoot: string; + try { + gitRoot = execSync('git rev-parse --show-toplevel', { encoding: 'utf-8', stdio: ['pipe', 'pipe', 'pipe'] }).trim(); + } catch { + return 'Not in a git repository — cannot locate inbox.'; + } + + const inboxDir = path.join(gitRoot, '.context', 'sidebar-inbox'); + if (!fs.existsSync(inboxDir)) return 'Inbox empty.'; + + const files = fs.readdirSync(inboxDir) + .filter(f => f.endsWith('.json') && !f.startsWith('.')) + .sort() + .reverse(); // newest first + + if (files.length === 0) return 'Inbox empty.'; + + const messages: { timestamp: string; url: string; userMessage: string }[] = []; + for (const file of files) { + try { + const data = JSON.parse(fs.readFileSync(path.join(inboxDir, file), 'utf-8')); + messages.push({ + timestamp: data.timestamp || '', + url: data.page?.url || 'unknown', + userMessage: data.userMessage || '', + }); + } catch { + // Skip malformed files + } + } + + if (messages.length === 0) return 'Inbox empty.'; + + const lines: string[] = []; + lines.push(`SIDEBAR INBOX (${messages.length} message${messages.length === 1 ? '' : 's'})`); + lines.push('────────────────────────────────'); + + for (const msg of messages) { + const ts = msg.timestamp ? `[${msg.timestamp}]` : '[unknown]'; + lines.push(`${ts} ${msg.url}`); + lines.push(` "${msg.userMessage}"`); + lines.push(''); + } + + lines.push('────────────────────────────────'); + + // Handle --clear flag + if (args.includes('--clear')) { + for (const file of files) { + try { fs.unlinkSync(path.join(inboxDir, file)); } catch {} + } + lines.push(`Cleared ${files.length} message${files.length === 1 ? '' : 's'}.`); + } + + return lines.join('\n'); + } + + // ─── State ──────────────────────────────────────── + case 'state': { + const [action, name] = args; + if (!action || !name) throw new Error('Usage: state save|load '); + + // Sanitize name: alphanumeric + hyphens + underscores only + if (!/^[a-zA-Z0-9_-]+$/.test(name)) { + throw new Error('State name must be alphanumeric (a-z, 0-9, _, -)'); + } + + const config = resolveConfig(); + const stateDir = path.join(config.stateDir, 'browse-states'); + fs.mkdirSync(stateDir, { recursive: true }); + const statePath = path.join(stateDir, `${name}.json`); + + if (action === 'save') { + const state = await bm.saveState(); + // V1: cookies + URLs only (not localStorage — breaks on load-before-navigate) + const saveData = { + version: 1, + cookies: state.cookies, + pages: state.pages.map(p => ({ url: p.url, isActive: p.isActive })), + }; + fs.writeFileSync(statePath, JSON.stringify(saveData, null, 2), { mode: 0o600 }); + return `State saved: ${statePath} (${state.cookies.length} cookies, ${state.pages.length} pages — treat as sensitive)`; + } + + if (action === 'load') { + if (!fs.existsSync(statePath)) throw new Error(`State not found: ${statePath}`); + const data = JSON.parse(fs.readFileSync(statePath, 'utf-8')); + if (!Array.isArray(data.cookies) || !Array.isArray(data.pages)) { + throw new Error('Invalid state file: expected cookies and pages arrays'); + } + // Close existing pages, then restore (replace, not merge) + bm.setFrame(null); + await bm.closeAllPages(); + await bm.restoreState({ + cookies: data.cookies, + pages: data.pages.map((p: any) => ({ ...p, storage: null })), + }); + return `State loaded: ${data.cookies.length} cookies, ${data.pages.length} pages`; + } + + throw new Error('Usage: state save|load '); + } + + // ─── Frame ─────────────────────────────────────── + case 'frame': { + const target = args[0]; + if (!target) throw new Error('Usage: frame '); + + if (target === 'main') { + bm.setFrame(null); + bm.clearRefs(); + return 'Switched to main frame'; + } + + const page = bm.getPage(); + let frame: Frame | null = null; + + if (target === '--name') { + if (!args[1]) throw new Error('Usage: frame --name '); + frame = page.frame({ name: args[1] }); + } else if (target === '--url') { + if (!args[1]) throw new Error('Usage: frame --url '); + frame = page.frame({ url: new RegExp(args[1]) }); + } else { + // CSS selector or @ref for the iframe element + const resolved = await bm.resolveRef(target); + const locator = 'locator' in resolved ? resolved.locator : page.locator(resolved.selector); + const elementHandle = await locator.elementHandle({ timeout: 5000 }); + frame = await elementHandle?.contentFrame() ?? null; + await elementHandle?.dispose(); + } + + if (!frame) throw new Error(`Frame not found: ${target}`); + bm.setFrame(frame); + bm.clearRefs(); + return `Switched to frame: ${frame.url()}`; + } + default: throw new Error(`Unknown meta command: ${command}`); } diff --git a/browse/src/read-commands.ts b/browse/src/read-commands.ts index 5d93156c..802c3813 100644 --- a/browse/src/read-commands.ts +++ b/browse/src/read-commands.ts @@ -7,7 +7,7 @@ import type { BrowserManager } from './browser-manager'; import { consoleBuffer, networkBuffer, dialogBuffer } from './buffers'; -import type { Page } from 'playwright'; +import type { Page, Frame } from 'playwright'; import * as fs from 'fs'; import * as path from 'path'; import { TEMP_DIR, isPathWithin } from './platform'; @@ -57,7 +57,7 @@ export function validateReadPath(filePath: string): void { * Extract clean text from a page (strips script/style/noscript/svg). * Exported for DRY reuse in meta-commands (diff). */ -export async function getCleanText(page: Page): Promise { +export async function getCleanText(page: Page | Frame): Promise { return await page.evaluate(() => { const body = document.body; if (!body) return ''; @@ -77,10 +77,12 @@ export async function handleReadCommand( bm: BrowserManager ): Promise { const page = bm.getPage(); + // Frame-aware target for content extraction + const target = bm.getActiveFrameOrPage(); switch (command) { case 'text': { - return await getCleanText(page); + return await getCleanText(target); } case 'html': { @@ -90,13 +92,19 @@ export async function handleReadCommand( if ('locator' in resolved) { return await resolved.locator.innerHTML({ timeout: 5000 }); } - return await page.innerHTML(resolved.selector); + return await target.locator(resolved.selector).innerHTML({ timeout: 5000 }); } - return await page.content(); + // page.content() is page-only; use evaluate for frame compat + const doctype = await target.evaluate(() => { + const dt = document.doctype; + return dt ? `` : ''; + }); + const html = await target.evaluate(() => document.documentElement.outerHTML); + return doctype ? `${doctype}\n${html}` : html; } case 'links': { - const links = await page.evaluate(() => + const links = await target.evaluate(() => [...document.querySelectorAll('a[href]')].map(a => ({ text: a.textContent?.trim().slice(0, 120) || '', href: (a as HTMLAnchorElement).href, @@ -106,7 +114,7 @@ export async function handleReadCommand( } case 'forms': { - const forms = await page.evaluate(() => { + const forms = await target.evaluate(() => { return [...document.querySelectorAll('form')].map((form, i) => { const fields = [...form.querySelectorAll('input, select, textarea')].map(el => { const input = el as HTMLInputElement; @@ -136,7 +144,7 @@ export async function handleReadCommand( } case 'accessibility': { - const snapshot = await page.locator("body").ariaSnapshot(); + const snapshot = await target.locator("body").ariaSnapshot(); return snapshot; } @@ -144,7 +152,7 @@ export async function handleReadCommand( const expr = args[0]; if (!expr) throw new Error('Usage: browse js '); const wrapped = wrapForEvaluate(expr); - const result = await page.evaluate(wrapped); + const result = await target.evaluate(wrapped); return typeof result === 'object' ? JSON.stringify(result, null, 2) : String(result ?? ''); } @@ -155,7 +163,7 @@ export async function handleReadCommand( if (!fs.existsSync(filePath)) throw new Error(`File not found: ${filePath}`); const code = fs.readFileSync(filePath, 'utf-8'); const wrapped = wrapForEvaluate(code); - const result = await page.evaluate(wrapped); + const result = await target.evaluate(wrapped); return typeof result === 'object' ? JSON.stringify(result, null, 2) : String(result ?? ''); } @@ -170,7 +178,7 @@ export async function handleReadCommand( ); return value; } - const value = await page.evaluate( + const value = await target.evaluate( ([sel, prop]) => { const el = document.querySelector(sel); if (!el) return `Element not found: ${sel}`; @@ -195,7 +203,7 @@ export async function handleReadCommand( }); return JSON.stringify(attrs, null, 2); } - const attrs = await page.evaluate((sel) => { + const attrs = await target.evaluate((sel: string) => { const el = document.querySelector(sel); if (!el) return `Element not found: ${sel}`; const result: Record = {}; @@ -253,7 +261,7 @@ export async function handleReadCommand( if ('locator' in resolved) { locator = resolved.locator; } else { - locator = page.locator(resolved.selector); + locator = target.locator(resolved.selector); } switch (property) { @@ -283,10 +291,10 @@ export async function handleReadCommand( if (args[0] === 'set' && args[1]) { const key = args[1]; const value = args[2] || ''; - await page.evaluate(([k, v]) => localStorage.setItem(k, v), [key, value]); + await target.evaluate(([k, v]: string[]) => localStorage.setItem(k, v), [key, value]); return `Set localStorage["${key}"]`; } - const storage = await page.evaluate(() => ({ + const storage = await target.evaluate(() => ({ localStorage: { ...localStorage }, sessionStorage: { ...sessionStorage }, })); diff --git a/browse/src/server.ts b/browse/src/server.ts index fe2c27cb..fe288e9e 100644 --- a/browse/src/server.ts +++ b/browse/src/server.ts @@ -19,8 +19,11 @@ import { handleWriteCommand } from './write-commands'; import { handleMetaCommand } from './meta-commands'; import { handleCookiePickerRoute } from './cookie-picker-routes'; import { COMMAND_DESCRIPTIONS } from './commands'; -import { SNAPSHOT_FLAGS } from './snapshot'; +import { handleSnapshot, SNAPSHOT_FLAGS } from './snapshot'; import { resolveConfig, ensureStateDir, readVersionHash } from './config'; +import { emitActivity, subscribe, getActivityAfter, getActivityHistory, getSubscriberCount } from './activity'; +// Bun.spawn used instead of child_process.spawn (compiled bun binaries +// fail posix_spawn on all executables including /bin/bash) import * as fs from 'fs'; import * as path from 'path'; import * as crypto from 'crypto'; @@ -33,6 +36,7 @@ ensureStateDir(config); const AUTH_TOKEN = crypto.randomUUID(); const BROWSE_PORT = parseInt(process.env.BROWSE_PORT || '0', 10); const IDLE_TIMEOUT_MS = parseInt(process.env.BROWSE_IDLE_TIMEOUT || '1800000', 10); // 30 min +// Sidebar chat is always enabled in headed mode (ungated in v0.12.0) function validateAuth(req: Request): boolean { const header = req.headers.get('authorization'); @@ -87,6 +91,377 @@ export { consoleBuffer, networkBuffer, dialogBuffer, addConsoleEntry, addNetwork const CONSOLE_LOG_PATH = config.consoleLog; const NETWORK_LOG_PATH = config.networkLog; const DIALOG_LOG_PATH = config.dialogLog; + +// ─── Sidebar Agent (integrated — no separate process) ───────────── + +interface ChatEntry { + id: number; + ts: string; + role: 'user' | 'assistant' | 'agent'; + message?: string; + type?: string; + tool?: string; + input?: string; + text?: string; + error?: string; +} + +interface SidebarSession { + id: string; + name: string; + claudeSessionId: string | null; + worktreePath: string | null; + createdAt: string; + lastActiveAt: string; +} + +const SESSIONS_DIR = path.join(process.env.HOME || '/tmp', '.gstack', 'sidebar-sessions'); +const AGENT_TIMEOUT_MS = 300_000; // 5 minutes — multi-page tasks need time +const MAX_QUEUE = 5; + +let sidebarSession: SidebarSession | null = null; +let agentProcess: ChildProcess | null = null; +let agentStatus: 'idle' | 'processing' | 'hung' = 'idle'; +let agentStartTime: number | null = null; +let messageQueue: Array<{message: string, ts: string}> = []; +let currentMessage: string | null = null; +let chatBuffer: ChatEntry[] = []; +let chatNextId = 0; + +// Find the browse binary for the claude subprocess system prompt +function findBrowseBin(): string { + const candidates = [ + path.resolve(__dirname, '..', 'dist', 'browse'), + path.resolve(__dirname, '..', '..', '.claude', 'skills', 'gstack', 'browse', 'dist', 'browse'), + path.join(process.env.HOME || '', '.claude', 'skills', 'gstack', 'browse', 'dist', 'browse'), + ]; + for (const c of candidates) { + try { if (fs.existsSync(c)) return c; } catch {} + } + return 'browse'; // fallback to PATH +} + +const BROWSE_BIN = findBrowseBin(); + +function findClaudeBin(): string | null { + const home = process.env.HOME || ''; + const candidates = [ + // Conductor app bundled binary (not a symlink — works reliably) + path.join(home, 'Library', 'Application Support', 'com.conductor.app', 'bin', 'claude'), + // Direct versioned binary (not a symlink) + ...(() => { + try { + const versionsDir = path.join(home, '.local', 'share', 'claude', 'versions'); + const entries = fs.readdirSync(versionsDir).filter(e => /^\d/.test(e)).sort().reverse(); + return entries.map(e => path.join(versionsDir, e)); + } catch { return []; } + })(), + // Standard install (symlink — resolve it) + path.join(home, '.local', 'bin', 'claude'), + '/usr/local/bin/claude', + '/opt/homebrew/bin/claude', + ]; + // Also check if 'claude' is in current PATH + try { + const proc = Bun.spawnSync(['which', 'claude'], { stdout: 'pipe', stderr: 'pipe', timeout: 2000 }); + if (proc.exitCode === 0) { + const p = proc.stdout.toString().trim(); + if (p) candidates.unshift(p); + } + } catch {} + for (const c of candidates) { + try { + if (!fs.existsSync(c)) continue; + // Resolve symlinks — posix_spawn can fail on symlinks in compiled bun binaries + return fs.realpathSync(c); + } catch {} + } + return null; +} + +function shortenPath(str: string): string { + return str + .replace(new RegExp(BROWSE_BIN.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'), 'g'), '$B') + .replace(/\/Users\/[^/]+/g, '~') + .replace(/\/conductor\/workspaces\/[^/]+\/[^/]+/g, '') + .replace(/\.claude\/skills\/gstack\//g, '') + .replace(/browse\/dist\/browse/g, '$B'); +} + +function summarizeToolInput(tool: string, input: any): string { + if (!input) return ''; + if (tool === 'Bash' && input.command) { + let cmd = shortenPath(input.command); + return cmd.length > 80 ? cmd.slice(0, 80) + '…' : cmd; + } + if (tool === 'Read' && input.file_path) return shortenPath(input.file_path); + if (tool === 'Edit' && input.file_path) return shortenPath(input.file_path); + if (tool === 'Write' && input.file_path) return shortenPath(input.file_path); + if (tool === 'Grep' && input.pattern) return `/${input.pattern}/`; + if (tool === 'Glob' && input.pattern) return input.pattern; + try { return shortenPath(JSON.stringify(input)).slice(0, 60); } catch { return ''; } +} + +function addChatEntry(entry: Omit): ChatEntry { + const full: ChatEntry = { ...entry, id: chatNextId++ }; + chatBuffer.push(full); + // Persist to disk (best-effort) + if (sidebarSession) { + const chatFile = path.join(SESSIONS_DIR, sidebarSession.id, 'chat.jsonl'); + try { fs.appendFileSync(chatFile, JSON.stringify(full) + '\n'); } catch {} + } + return full; +} + +function loadSession(): SidebarSession | null { + try { + const activeFile = path.join(SESSIONS_DIR, 'active.json'); + const activeData = JSON.parse(fs.readFileSync(activeFile, 'utf-8')); + const sessionFile = path.join(SESSIONS_DIR, activeData.id, 'session.json'); + const session = JSON.parse(fs.readFileSync(sessionFile, 'utf-8')) as SidebarSession; + // Load chat history + const chatFile = path.join(SESSIONS_DIR, session.id, 'chat.jsonl'); + try { + const lines = fs.readFileSync(chatFile, 'utf-8').split('\n').filter(Boolean); + chatBuffer = lines.map(line => { try { return JSON.parse(line); } catch { return null; } }).filter(Boolean); + chatNextId = chatBuffer.length > 0 ? Math.max(...chatBuffer.map(e => e.id)) + 1 : 0; + } catch {} + return session; + } catch { + return null; + } +} + +/** + * Create a git worktree for session isolation. + * Falls back to null (use main cwd) if: + * - not in a git repo + * - git worktree add fails (submodules, LFS, permissions) + * - worktree dir already exists (collision from prior crash) + */ +function createWorktree(sessionId: string): string | null { + try { + // Check if we're in a git repo + const gitCheck = Bun.spawnSync(['git', 'rev-parse', '--show-toplevel'], { + stdout: 'pipe', stderr: 'pipe', timeout: 3000, + }); + if (gitCheck.exitCode !== 0) return null; + const repoRoot = gitCheck.stdout.toString().trim(); + + const worktreeDir = path.join(process.env.HOME || '/tmp', '.gstack', 'worktrees', sessionId.slice(0, 8)); + + // Clean up if dir exists from prior crash + if (fs.existsSync(worktreeDir)) { + Bun.spawnSync(['git', 'worktree', 'remove', '--force', worktreeDir], { + cwd: repoRoot, stdout: 'pipe', stderr: 'pipe', timeout: 5000, + }); + try { fs.rmSync(worktreeDir, { recursive: true, force: true }); } catch {} + } + + // Get current branch/commit + const headCheck = Bun.spawnSync(['git', 'rev-parse', 'HEAD'], { + cwd: repoRoot, stdout: 'pipe', stderr: 'pipe', timeout: 3000, + }); + if (headCheck.exitCode !== 0) return null; + const head = headCheck.stdout.toString().trim(); + + // Create worktree (detached HEAD — no branch conflicts) + const result = Bun.spawnSync(['git', 'worktree', 'add', '--detach', worktreeDir, head], { + cwd: repoRoot, stdout: 'pipe', stderr: 'pipe', timeout: 10000, + }); + + if (result.exitCode !== 0) { + console.log(`[browse] Worktree creation failed: ${result.stderr.toString().trim()}`); + return null; + } + + console.log(`[browse] Created worktree: ${worktreeDir}`); + return worktreeDir; + } catch (err: any) { + console.log(`[browse] Worktree creation error: ${err.message}`); + return null; + } +} + +function removeWorktree(worktreePath: string | null): void { + if (!worktreePath) return; + try { + const gitCheck = Bun.spawnSync(['git', 'rev-parse', '--show-toplevel'], { + stdout: 'pipe', stderr: 'pipe', timeout: 3000, + }); + if (gitCheck.exitCode === 0) { + Bun.spawnSync(['git', 'worktree', 'remove', '--force', worktreePath], { + cwd: gitCheck.stdout.toString().trim(), stdout: 'pipe', stderr: 'pipe', timeout: 5000, + }); + } + // Cleanup dir if git worktree remove didn't + try { fs.rmSync(worktreePath, { recursive: true, force: true }); } catch {} + } catch {} +} + +function createSession(): SidebarSession { + const id = crypto.randomUUID(); + const worktreePath = createWorktree(id); + const session: SidebarSession = { + id, + name: 'Chrome sidebar', + claudeSessionId: null, + worktreePath, + createdAt: new Date().toISOString(), + lastActiveAt: new Date().toISOString(), + }; + const sessionDir = path.join(SESSIONS_DIR, id); + fs.mkdirSync(sessionDir, { recursive: true }); + fs.writeFileSync(path.join(sessionDir, 'session.json'), JSON.stringify(session, null, 2)); + fs.writeFileSync(path.join(sessionDir, 'chat.jsonl'), ''); + fs.writeFileSync(path.join(SESSIONS_DIR, 'active.json'), JSON.stringify({ id })); + chatBuffer = []; + chatNextId = 0; + return session; +} + +function saveSession(): void { + if (!sidebarSession) return; + sidebarSession.lastActiveAt = new Date().toISOString(); + const sessionFile = path.join(SESSIONS_DIR, sidebarSession.id, 'session.json'); + try { fs.writeFileSync(sessionFile, JSON.stringify(sidebarSession, null, 2)); } catch {} +} + +function listSessions(): Array { + try { + const dirs = fs.readdirSync(SESSIONS_DIR).filter(d => d !== 'active.json'); + return dirs.map(d => { + try { + const session = JSON.parse(fs.readFileSync(path.join(SESSIONS_DIR, d, 'session.json'), 'utf-8')); + let chatLines = 0; + try { chatLines = fs.readFileSync(path.join(SESSIONS_DIR, d, 'chat.jsonl'), 'utf-8').split('\n').filter(Boolean).length; } catch {} + return { ...session, chatLines }; + } catch { return null; } + }).filter(Boolean); + } catch { return []; } +} + +function processAgentEvent(event: any): void { + if (event.type === 'system' && event.session_id && sidebarSession && !sidebarSession.claudeSessionId) { + // Capture session_id from first claude init event for --resume + sidebarSession.claudeSessionId = event.session_id; + saveSession(); + } + + if (event.type === 'assistant' && event.message?.content) { + for (const block of event.message.content) { + if (block.type === 'tool_use') { + addChatEntry({ ts: new Date().toISOString(), role: 'agent', type: 'tool_use', tool: block.name, input: summarizeToolInput(block.name, block.input) }); + } else if (block.type === 'text' && block.text) { + addChatEntry({ ts: new Date().toISOString(), role: 'agent', type: 'text', text: block.text }); + } + } + } + + if (event.type === 'content_block_start' && event.content_block?.type === 'tool_use') { + addChatEntry({ ts: new Date().toISOString(), role: 'agent', type: 'tool_use', tool: event.content_block.name, input: summarizeToolInput(event.content_block.name, event.content_block.input) }); + } + + if (event.type === 'content_block_delta' && event.delta?.type === 'text_delta' && event.delta.text) { + addChatEntry({ ts: new Date().toISOString(), role: 'agent', type: 'text_delta', text: event.delta.text }); + } + + if (event.type === 'result') { + addChatEntry({ ts: new Date().toISOString(), role: 'agent', type: 'result', text: event.text || event.result || '' }); + } +} + +function spawnClaude(userMessage: string): void { + agentStatus = 'processing'; + agentStartTime = Date.now(); + currentMessage = userMessage; + + const pageUrl = browserManager.getCurrentUrl() || 'about:blank'; + const B = BROWSE_BIN; + const systemPrompt = [ + 'You are a browser assistant running in a Chrome sidebar.', + `Current page: ${pageUrl}`, + `Browse binary: ${B}`, + '', + 'Commands (run via bash):', + ` ${B} goto ${B} click <@ref> ${B} fill <@ref> `, + ` ${B} snapshot -i ${B} text ${B} screenshot`, + ` ${B} back ${B} forward ${B} reload`, + '', + 'Rules: run snapshot -i before clicking. Keep responses SHORT.', + ].join('\n'); + + const prompt = `${systemPrompt}\n\nUser: ${userMessage}`; + const args = ['-p', prompt, '--output-format', 'stream-json', '--verbose', + '--allowedTools', 'Bash,Read,Glob,Grep']; + if (sidebarSession?.claudeSessionId) { + args.push('--resume', sidebarSession.claudeSessionId); + } + + addChatEntry({ ts: new Date().toISOString(), role: 'agent', type: 'agent_start' }); + + // Compiled bun binaries CANNOT spawn external processes (posix_spawn + // fails with ENOENT on everything, including /bin/bash). Instead, + // write the command to a queue file that the sidebar-agent process + // (running as non-compiled bun) picks up and spawns claude. + const gstackDir = path.join(process.env.HOME || '/tmp', '.gstack'); + const agentQueue = path.join(gstackDir, 'sidebar-agent-queue.jsonl'); + const entry = JSON.stringify({ + ts: new Date().toISOString(), + message: userMessage, + prompt, + args, + stateFile: config.stateFile, + cwd: (sidebarSession as any)?.worktreePath || process.cwd(), + sessionId: sidebarSession?.claudeSessionId || null, + }); + try { + fs.mkdirSync(gstackDir, { recursive: true }); + fs.appendFileSync(agentQueue, entry + '\n'); + } catch (err: any) { + addChatEntry({ ts: new Date().toISOString(), role: 'agent', type: 'agent_error', error: `Failed to queue: ${err.message}` }); + agentStatus = 'idle'; + agentStartTime = null; + currentMessage = null; + return; + } + // The sidebar-agent.ts process polls this file and spawns claude. + // It POST events back via /sidebar-event which processAgentEvent handles. + // Agent status transitions happen when we receive agent_done/agent_error events. +} + +function killAgent(): void { + if (agentProcess) { + try { agentProcess.kill('SIGTERM'); } catch {} + setTimeout(() => { try { agentProcess?.kill('SIGKILL'); } catch {} }, 3000); + } + agentProcess = null; + agentStartTime = null; + currentMessage = null; + agentStatus = 'idle'; +} + +// Agent health check — detect hung processes +let agentHealthInterval: ReturnType | null = null; +function startAgentHealthCheck(): void { + agentHealthInterval = setInterval(() => { + if (agentStatus === 'processing' && agentStartTime && Date.now() - agentStartTime > AGENT_TIMEOUT_MS) { + agentStatus = 'hung'; + console.log(`[browse] Sidebar agent hung (>${AGENT_TIMEOUT_MS / 1000}s)`); + } + }, 10000); +} + +// Initialize session on startup +function initSidebarSession(): void { + fs.mkdirSync(SESSIONS_DIR, { recursive: true }); + sidebarSession = loadSession(); + if (!sidebarSession) { + sidebarSession = createSession(); + } + console.log(`[browse] Sidebar session: ${sidebarSession.id} (${chatBuffer.length} chat entries loaded)`); + startAgentHealthCheck(); +} let lastConsoleFlushed = 0; let lastNetworkFlushed = 0; let lastDialogFlushed = 0; @@ -224,6 +599,27 @@ async function handleCommand(body: any): Promise { }); } + // Block mutation commands while watching (read-only observation mode) + if (browserManager.isWatching() && WRITE_COMMANDS.has(command)) { + return new Response(JSON.stringify({ + error: 'Cannot run mutation commands while watching. Run `$B watch stop` first.', + }), { + status: 400, + headers: { 'Content-Type': 'application/json' }, + }); + } + + // Activity: emit command_start + const startTime = Date.now(); + emitActivity({ + type: 'command_start', + command, + args, + url: browserManager.getCurrentUrl(), + tabs: browserManager.getTabCount(), + mode: browserManager.getConnectionMode(), + }); + try { let result: string; @@ -233,6 +629,22 @@ async function handleCommand(body: any): Promise { result = await handleWriteCommand(command, args, browserManager); } else if (META_COMMANDS.has(command)) { result = await handleMetaCommand(command, args, browserManager, shutdown); + // Start periodic snapshot interval when watch mode begins + if (command === 'watch' && args[0] !== 'stop' && browserManager.isWatching()) { + const watchInterval = setInterval(async () => { + if (!browserManager.isWatching()) { + clearInterval(watchInterval); + return; + } + try { + const snapshot = await handleSnapshot(['-i'], browserManager); + browserManager.addWatchSnapshot(snapshot); + } catch { + // Page may be navigating — skip this snapshot + } + }, 5000); + browserManager.watchInterval = watchInterval; + } } else if (command === 'help') { const helpText = generateHelpText(); return new Response(helpText, { @@ -249,12 +661,38 @@ async function handleCommand(body: any): Promise { }); } + // Activity: emit command_end (success) + emitActivity({ + type: 'command_end', + command, + args, + url: browserManager.getCurrentUrl(), + duration: Date.now() - startTime, + status: 'ok', + result: result, + tabs: browserManager.getTabCount(), + mode: browserManager.getConnectionMode(), + }); + browserManager.resetFailures(); return new Response(result, { status: 200, headers: { 'Content-Type': 'text/plain' }, }); } catch (err: any) { + // Activity: emit command_end (error) + emitActivity({ + type: 'command_end', + command, + args, + url: browserManager.getCurrentUrl(), + duration: Date.now() - startTime, + status: 'error', + error: err.message, + tabs: browserManager.getTabCount(), + mode: browserManager.getConnectionMode(), + }); + browserManager.incrementFailures(); let errorMsg = wrapError(err); const hint = browserManager.getFailureHint(); @@ -271,12 +709,25 @@ async function shutdown() { isShuttingDown = true; console.log('[browse] Shutting down...'); + // Stop watch mode if active + if (browserManager.isWatching()) browserManager.stopWatch(); + killAgent(); + messageQueue = []; + saveSession(); // Persist chat history before exit + if (sidebarSession?.worktreePath) removeWorktree(sidebarSession.worktreePath); + if (agentHealthInterval) clearInterval(agentHealthInterval); clearInterval(flushInterval); clearInterval(idleCheckInterval); await flushBuffers(); // Final flush (async now) await browserManager.close(); + // Clean up Chromium profile locks (prevent SingletonLock on next launch) + const profileDir = path.join(process.env.HOME || '/tmp', '.gstack', 'chromium-profile'); + for (const lockFile of ['SingletonLock', 'SingletonSocket', 'SingletonCookie']) { + try { fs.unlinkSync(path.join(profileDir, lockFile)); } catch {} + } + // Clean up state file try { fs.unlinkSync(config.stateFile); } catch {} @@ -294,6 +745,32 @@ if (process.platform === 'win32') { }); } +// Emergency cleanup for crashes (OOM, uncaught exceptions, browser disconnect) +function emergencyCleanup() { + if (isShuttingDown) return; + isShuttingDown = true; + // Kill agent subprocess if running + try { killAgent(); } catch {} + // Save session state so chat history persists across crashes + try { saveSession(); } catch {} + // Clean Chromium profile locks + const profileDir = path.join(process.env.HOME || '/tmp', '.gstack', 'chromium-profile'); + for (const lockFile of ['SingletonLock', 'SingletonSocket', 'SingletonCookie']) { + try { fs.unlinkSync(path.join(profileDir, lockFile)); } catch {} + } + try { fs.unlinkSync(config.stateFile); } catch {} +} +process.on('uncaughtException', (err) => { + console.error('[browse] FATAL uncaught exception:', err.message); + emergencyCleanup(); + process.exit(1); +}); +process.on('unhandledRejection', (err: any) => { + console.error('[browse] FATAL unhandled rejection:', err?.message || err); + emergencyCleanup(); + process.exit(1); +}); + // ─── Start ───────────────────────────────────────────────────── async function start() { // Clear old log files @@ -303,16 +780,20 @@ async function start() { const port = await findPort(); - // Launch browser - await browserManager.launch(); + // Launch browser (headless or headed with extension) + const headed = process.env.BROWSE_HEADED === '1'; + if (headed) { + await browserManager.launchHeaded(); + console.log(`[browse] Launched headed Chromium with extension`); + } else { + await browserManager.launch(); + } const startTime = Date.now(); const server = Bun.serve({ port, hostname: '127.0.0.1', fetch: async (req) => { - resetIdleTimer(); - const url = new URL(req.url); // Cookie picker routes — no auth required (localhost-only) @@ -320,21 +801,285 @@ async function start() { return handleCookiePickerRoute(url, req, browserManager); } - // Health check — no auth required (now async) + // Health check — no auth required, does NOT reset idle timer if (url.pathname === '/health') { const healthy = await browserManager.isHealthy(); return new Response(JSON.stringify({ status: healthy ? 'healthy' : 'unhealthy', + mode: browserManager.getConnectionMode(), uptime: Math.floor((Date.now() - startTime) / 1000), tabs: browserManager.getTabCount(), currentUrl: browserManager.getCurrentUrl(), + token: AUTH_TOKEN, // Extension uses this for Bearer auth + chatEnabled: true, + agent: { + status: agentStatus, + runningFor: agentStartTime ? Date.now() - agentStartTime : null, + currentMessage, + queueLength: messageQueue.length, + }, + session: sidebarSession ? { id: sidebarSession.id, name: sidebarSession.name } : null, }), { status: 200, headers: { 'Content-Type': 'application/json' }, }); } - // All other endpoints require auth + // Refs endpoint — no auth required (localhost-only), does NOT reset idle timer + if (url.pathname === '/refs') { + const refs = browserManager.getRefMap(); + return new Response(JSON.stringify({ + refs, + url: browserManager.getCurrentUrl(), + mode: browserManager.getConnectionMode(), + }), { + status: 200, + headers: { + 'Content-Type': 'application/json', + 'Access-Control-Allow-Origin': '*', + }, + }); + } + + // Activity stream — SSE, no auth (localhost-only), does NOT reset idle timer + if (url.pathname === '/activity/stream') { + const afterId = parseInt(url.searchParams.get('after') || '0', 10); + const encoder = new TextEncoder(); + + const stream = new ReadableStream({ + start(controller) { + // 1. Gap detection + replay + const { entries, gap, gapFrom, availableFrom } = getActivityAfter(afterId); + if (gap) { + controller.enqueue(encoder.encode(`event: gap\ndata: ${JSON.stringify({ gapFrom, availableFrom })}\n\n`)); + } + for (const entry of entries) { + controller.enqueue(encoder.encode(`event: activity\ndata: ${JSON.stringify(entry)}\n\n`)); + } + + // 2. Subscribe for live events + const unsubscribe = subscribe((entry) => { + try { + controller.enqueue(encoder.encode(`event: activity\ndata: ${JSON.stringify(entry)}\n\n`)); + } catch { + unsubscribe(); + } + }); + + // 3. Heartbeat every 15s + const heartbeat = setInterval(() => { + try { + controller.enqueue(encoder.encode(`: heartbeat\n\n`)); + } catch { + clearInterval(heartbeat); + unsubscribe(); + } + }, 15000); + + // 4. Cleanup on disconnect + req.signal.addEventListener('abort', () => { + clearInterval(heartbeat); + unsubscribe(); + try { controller.close(); } catch {} + }); + }, + }); + + return new Response(stream, { + headers: { + 'Content-Type': 'text/event-stream', + 'Cache-Control': 'no-cache', + 'Connection': 'keep-alive', + 'Access-Control-Allow-Origin': '*', + }, + }); + } + + // Activity history — REST, no auth (localhost-only), does NOT reset idle timer + if (url.pathname === '/activity/history') { + const limit = parseInt(url.searchParams.get('limit') || '50', 10); + const { entries, totalAdded } = getActivityHistory(limit); + return new Response(JSON.stringify({ entries, totalAdded, subscribers: getSubscriberCount() }), { + status: 200, + headers: { + 'Content-Type': 'application/json', + 'Access-Control-Allow-Origin': '*', + }, + }); + } + + // ─── Sidebar endpoints (auth required — token from /health) ──── + + // Sidebar routes are always available in headed mode (ungated in v0.12.0) + + // Sidebar chat history — read from in-memory buffer + if (url.pathname === '/sidebar-chat') { + if (!validateAuth(req)) { + return new Response(JSON.stringify({ error: 'Unauthorized' }), { status: 401, headers: { 'Content-Type': 'application/json' } }); + } + const afterId = parseInt(url.searchParams.get('after') || '0', 10); + const entries = chatBuffer.filter(e => e.id >= afterId); + return new Response(JSON.stringify({ entries, total: chatNextId }), { + status: 200, + headers: { 'Content-Type': 'application/json', 'Access-Control-Allow-Origin': '*' }, + }); + } + + // Sidebar → server: user message → queue or process immediately + if (url.pathname === '/sidebar-command' && req.method === 'POST') { + if (!validateAuth(req)) { + return new Response(JSON.stringify({ error: 'Unauthorized' }), { status: 401, headers: { 'Content-Type': 'application/json' } }); + } + const body = await req.json(); + const msg = body.message?.trim(); + if (!msg) { + return new Response(JSON.stringify({ error: 'Empty message' }), { status: 400, headers: { 'Content-Type': 'application/json' } }); + } + const ts = new Date().toISOString(); + addChatEntry({ ts, role: 'user', message: msg }); + if (sidebarSession) { sidebarSession.lastActiveAt = ts; saveSession(); } + + if (agentStatus === 'idle') { + spawnClaude(msg); + return new Response(JSON.stringify({ ok: true, processing: true }), { + status: 200, headers: { 'Content-Type': 'application/json' }, + }); + } else if (messageQueue.length < MAX_QUEUE) { + messageQueue.push({ message: msg, ts }); + return new Response(JSON.stringify({ ok: true, queued: true, position: messageQueue.length }), { + status: 200, headers: { 'Content-Type': 'application/json' }, + }); + } else { + return new Response(JSON.stringify({ error: 'Queue full (max 5)' }), { + status: 429, headers: { 'Content-Type': 'application/json' }, + }); + } + } + + // Clear sidebar chat + if (url.pathname === '/sidebar-chat/clear' && req.method === 'POST') { + if (!validateAuth(req)) { + return new Response(JSON.stringify({ error: 'Unauthorized' }), { status: 401, headers: { 'Content-Type': 'application/json' } }); + } + chatBuffer = []; + chatNextId = 0; + if (sidebarSession) { + try { fs.writeFileSync(path.join(SESSIONS_DIR, sidebarSession.id, 'chat.jsonl'), ''); } catch {} + } + return new Response(JSON.stringify({ ok: true }), { status: 200, headers: { 'Content-Type': 'application/json' } }); + } + + // Kill hung agent + if (url.pathname === '/sidebar-agent/kill' && req.method === 'POST') { + if (!validateAuth(req)) { + return new Response(JSON.stringify({ error: 'Unauthorized' }), { status: 401, headers: { 'Content-Type': 'application/json' } }); + } + killAgent(); + addChatEntry({ ts: new Date().toISOString(), role: 'agent', type: 'agent_error', error: 'Killed by user' }); + // Process next in queue + if (messageQueue.length > 0) { + const next = messageQueue.shift()!; + spawnClaude(next.message); + } + return new Response(JSON.stringify({ ok: true }), { status: 200, headers: { 'Content-Type': 'application/json' } }); + } + + // Stop agent (user-initiated) — queued messages remain for dismissal + if (url.pathname === '/sidebar-agent/stop' && req.method === 'POST') { + if (!validateAuth(req)) { + return new Response(JSON.stringify({ error: 'Unauthorized' }), { status: 401, headers: { 'Content-Type': 'application/json' } }); + } + killAgent(); + addChatEntry({ ts: new Date().toISOString(), role: 'agent', type: 'agent_error', error: 'Stopped by user' }); + return new Response(JSON.stringify({ ok: true, queuedMessages: messageQueue.length }), { + status: 200, headers: { 'Content-Type': 'application/json' }, + }); + } + + // Dismiss a queued message by index + if (url.pathname === '/sidebar-queue/dismiss' && req.method === 'POST') { + if (!validateAuth(req)) { + return new Response(JSON.stringify({ error: 'Unauthorized' }), { status: 401, headers: { 'Content-Type': 'application/json' } }); + } + const body = await req.json(); + const idx = body.index; + if (typeof idx === 'number' && idx >= 0 && idx < messageQueue.length) { + messageQueue.splice(idx, 1); + } + return new Response(JSON.stringify({ ok: true, queueLength: messageQueue.length }), { + status: 200, headers: { 'Content-Type': 'application/json' }, + }); + } + + // Session info + if (url.pathname === '/sidebar-session') { + if (!validateAuth(req)) { + return new Response(JSON.stringify({ error: 'Unauthorized' }), { status: 401, headers: { 'Content-Type': 'application/json' } }); + } + return new Response(JSON.stringify({ + session: sidebarSession, + agent: { status: agentStatus, runningFor: agentStartTime ? Date.now() - agentStartTime : null, currentMessage, queueLength: messageQueue.length, queue: messageQueue }, + }), { status: 200, headers: { 'Content-Type': 'application/json' } }); + } + + // Create new session + if (url.pathname === '/sidebar-session/new' && req.method === 'POST') { + if (!validateAuth(req)) { + return new Response(JSON.stringify({ error: 'Unauthorized' }), { status: 401, headers: { 'Content-Type': 'application/json' } }); + } + killAgent(); + messageQueue = []; + // Clean up old session's worktree before creating new one + if (sidebarSession?.worktreePath) removeWorktree(sidebarSession.worktreePath); + sidebarSession = createSession(); + return new Response(JSON.stringify({ ok: true, session: sidebarSession }), { + status: 200, headers: { 'Content-Type': 'application/json' }, + }); + } + + // List all sessions + if (url.pathname === '/sidebar-session/list') { + if (!validateAuth(req)) { + return new Response(JSON.stringify({ error: 'Unauthorized' }), { status: 401, headers: { 'Content-Type': 'application/json' } }); + } + return new Response(JSON.stringify({ sessions: listSessions(), activeId: sidebarSession?.id }), { + status: 200, headers: { 'Content-Type': 'application/json' }, + }); + } + + // Agent event relay — sidebar-agent.ts POSTs events here + if (url.pathname === '/sidebar-agent/event' && req.method === 'POST') { + if (!validateAuth(req)) { + return new Response(JSON.stringify({ error: 'Unauthorized' }), { status: 401, headers: { 'Content-Type': 'application/json' } }); + } + const body = await req.json(); + processAgentEvent(body); + // Handle agent lifecycle events + if (body.type === 'agent_done' || body.type === 'agent_error') { + agentProcess = null; + agentStartTime = null; + currentMessage = null; + if (body.type === 'agent_done') { + addChatEntry({ ts: new Date().toISOString(), role: 'agent', type: 'agent_done' }); + } + // Process next queued message + if (messageQueue.length > 0) { + const next = messageQueue.shift()!; + spawnClaude(next.message); + } else { + agentStatus = 'idle'; + } + } + // Capture claude session ID for --resume + if (body.claudeSessionId && sidebarSession && !sidebarSession.claudeSessionId) { + sidebarSession.claudeSessionId = body.claudeSessionId; + saveSession(); + } + return new Response(JSON.stringify({ ok: true }), { status: 200, headers: { 'Content-Type': 'application/json' } }); + } + + // ─── Auth-required endpoints ────────────────────────────────── + if (!validateAuth(req)) { return new Response(JSON.stringify({ error: 'Unauthorized' }), { status: 401, @@ -343,6 +1088,7 @@ async function start() { } if (url.pathname === '/command' && req.method === 'POST') { + resetIdleTimer(); // Only commands reset idle timer const body = await req.json(); return handleCommand(body); } @@ -352,13 +1098,14 @@ async function start() { }); // Write state file (atomic: write .tmp then rename) - const state = { + const state: Record = { pid: process.pid, port, token: AUTH_TOKEN, startedAt: new Date().toISOString(), serverPath: path.resolve(import.meta.dir, 'server.ts'), binaryVersion: readVersionHash() || undefined, + mode: browserManager.getConnectionMode(), }; const tmpFile = config.stateFile + '.tmp'; fs.writeFileSync(tmpFile, JSON.stringify(state, null, 2), { mode: 0o600 }); @@ -368,6 +1115,9 @@ async function start() { console.log(`[browse] Server running on http://127.0.0.1:${port} (PID: ${process.pid})`); console.log(`[browse] State file: ${config.stateFile}`); console.log(`[browse] Idle timeout: ${IDLE_TIMEOUT_MS / 1000}s`); + + // Initialize sidebar session (load existing or create new) + initSidebarSession(); } start().catch((err) => { diff --git a/browse/src/sidebar-agent.ts b/browse/src/sidebar-agent.ts new file mode 100644 index 00000000..6f28f5f4 --- /dev/null +++ b/browse/src/sidebar-agent.ts @@ -0,0 +1,278 @@ +/** + * Sidebar Agent — polls agent-queue from server, spawns claude -p for each + * message, streams live events back to the server via /sidebar-agent/event. + * + * This runs as a NON-COMPILED bun process because compiled bun binaries + * cannot posix_spawn external executables. The server writes to the queue + * file, this process reads it and spawns claude. + * + * Usage: BROWSE_BIN=/path/to/browse bun run browse/src/sidebar-agent.ts + */ + +import { spawn } from 'child_process'; +import * as fs from 'fs'; +import * as path from 'path'; + +const QUEUE = path.join(process.env.HOME || '/tmp', '.gstack', 'sidebar-agent-queue.jsonl'); +const SERVER_PORT = parseInt(process.env.BROWSE_SERVER_PORT || '34567', 10); +const SERVER_URL = `http://127.0.0.1:${SERVER_PORT}`; +const POLL_MS = 500; // Fast polling — server already did the user-facing response +const B = process.env.BROWSE_BIN || path.resolve(__dirname, '../../.claude/skills/gstack/browse/dist/browse'); + +let lastLine = 0; +let authToken: string | null = null; +let isProcessing = false; + +// ─── File drop relay ────────────────────────────────────────── + +function getGitRoot(): string | null { + try { + const { execSync } = require('child_process'); + return execSync('git rev-parse --show-toplevel', { encoding: 'utf-8', stdio: ['pipe', 'pipe', 'pipe'] }).trim(); + } catch { + return null; + } +} + +function writeToInbox(message: string, pageUrl?: string, sessionId?: string): void { + const gitRoot = getGitRoot(); + if (!gitRoot) { + console.error('[sidebar-agent] Cannot write to inbox — not in a git repo'); + return; + } + + const inboxDir = path.join(gitRoot, '.context', 'sidebar-inbox'); + fs.mkdirSync(inboxDir, { recursive: true }); + + const now = new Date(); + const timestamp = now.toISOString().replace(/:/g, '-'); + const filename = `${timestamp}-observation.json`; + const tmpFile = path.join(inboxDir, `.${filename}.tmp`); + const finalFile = path.join(inboxDir, filename); + + const inboxMessage = { + type: 'observation', + timestamp: now.toISOString(), + page: { url: pageUrl || 'unknown', title: '' }, + userMessage: message, + sidebarSessionId: sessionId || 'unknown', + }; + + fs.writeFileSync(tmpFile, JSON.stringify(inboxMessage, null, 2)); + fs.renameSync(tmpFile, finalFile); + console.log(`[sidebar-agent] Wrote inbox message: ${filename}`); +} + +// ─── Auth ──────────────────────────────────────────────────────── + +async function refreshToken(): Promise { + try { + const resp = await fetch(`${SERVER_URL}/health`, { signal: AbortSignal.timeout(3000) }); + if (!resp.ok) return null; + const data = await resp.json() as any; + authToken = data.token || null; + return authToken; + } catch { + return null; + } +} + +// ─── Event relay to server ────────────────────────────────────── + +async function sendEvent(event: Record): Promise { + if (!authToken) await refreshToken(); + if (!authToken) return; + + try { + await fetch(`${SERVER_URL}/sidebar-agent/event`, { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + 'Authorization': `Bearer ${authToken}`, + }, + body: JSON.stringify(event), + }); + } catch (err) { + console.error('[sidebar-agent] Failed to send event:', err); + } +} + +// ─── Claude subprocess ────────────────────────────────────────── + +function shorten(str: string): string { + return str + .replace(new RegExp(B.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'), 'g'), '$B') + .replace(/\/Users\/[^/]+/g, '~') + .replace(/\/conductor\/workspaces\/[^/]+\/[^/]+/g, '') + .replace(/\.claude\/skills\/gstack\//g, '') + .replace(/browse\/dist\/browse/g, '$B'); +} + +function summarizeToolInput(tool: string, input: any): string { + if (!input) return ''; + if (tool === 'Bash' && input.command) { + let cmd = shorten(input.command); + return cmd.length > 80 ? cmd.slice(0, 80) + '…' : cmd; + } + if (tool === 'Read' && input.file_path) return shorten(input.file_path); + if (tool === 'Edit' && input.file_path) return shorten(input.file_path); + if (tool === 'Write' && input.file_path) return shorten(input.file_path); + if (tool === 'Grep' && input.pattern) return `/${input.pattern}/`; + if (tool === 'Glob' && input.pattern) return input.pattern; + try { return shorten(JSON.stringify(input)).slice(0, 60); } catch { return ''; } +} + +async function handleStreamEvent(event: any): Promise { + if (event.type === 'system' && event.session_id) { + // Relay claude session ID for --resume support + await sendEvent({ type: 'system', claudeSessionId: event.session_id }); + } + + if (event.type === 'assistant' && event.message?.content) { + for (const block of event.message.content) { + if (block.type === 'tool_use') { + await sendEvent({ type: 'tool_use', tool: block.name, input: summarizeToolInput(block.name, block.input) }); + } else if (block.type === 'text' && block.text) { + await sendEvent({ type: 'text', text: block.text }); + } + } + } + + if (event.type === 'content_block_start' && event.content_block?.type === 'tool_use') { + await sendEvent({ type: 'tool_use', tool: event.content_block.name, input: summarizeToolInput(event.content_block.name, event.content_block.input) }); + } + + if (event.type === 'content_block_delta' && event.delta?.type === 'text_delta' && event.delta.text) { + await sendEvent({ type: 'text_delta', text: event.delta.text }); + } + + if (event.type === 'result') { + await sendEvent({ type: 'result', text: event.result || '' }); + } +} + +async function askClaude(queueEntry: any): Promise { + const { prompt, args, stateFile, cwd } = queueEntry; + + isProcessing = true; + await sendEvent({ type: 'agent_start' }); + + return new Promise((resolve) => { + // Build args fresh — don't trust --resume from queue (session may be stale) + let claudeArgs = ['-p', prompt, '--output-format', 'stream-json', '--verbose', + '--allowedTools', 'Bash,Read,Glob,Grep']; + + // Validate cwd exists — queue may reference a stale worktree + let effectiveCwd = cwd || process.cwd(); + try { fs.accessSync(effectiveCwd); } catch { effectiveCwd = process.cwd(); } + + const proc = spawn('claude', claudeArgs, { + stdio: ['pipe', 'pipe', 'pipe'], + cwd: effectiveCwd, + env: { ...process.env, BROWSE_STATE_FILE: stateFile || '' }, + }); + + proc.stdin.end(); + + let buffer = ''; + + proc.stdout.on('data', (data: Buffer) => { + buffer += data.toString(); + const lines = buffer.split('\n'); + buffer = lines.pop() || ''; + for (const line of lines) { + if (!line.trim()) continue; + try { handleStreamEvent(JSON.parse(line)); } catch {} + } + }); + + proc.stderr.on('data', () => {}); // Claude logs to stderr, ignore + + proc.on('close', (code) => { + if (buffer.trim()) { + try { handleStreamEvent(JSON.parse(buffer)); } catch {} + } + sendEvent({ type: 'agent_done' }).then(() => { + isProcessing = false; + resolve(); + }); + }); + + proc.on('error', (err) => { + sendEvent({ type: 'agent_error', error: err.message }).then(() => { + isProcessing = false; + resolve(); + }); + }); + + // Timeout after 300 seconds (5 min — multi-page tasks need time) + setTimeout(() => { + try { proc.kill(); } catch {} + sendEvent({ type: 'agent_error', error: 'Timed out after 300s' }).then(() => { + isProcessing = false; + resolve(); + }); + }, 300000); + }); +} + +// ─── Poll loop ─────────────────────────────────────────────────── + +function countLines(): number { + try { + return fs.readFileSync(QUEUE, 'utf-8').split('\n').filter(Boolean).length; + } catch { return 0; } +} + +function readLine(n: number): string | null { + try { + const lines = fs.readFileSync(QUEUE, 'utf-8').split('\n').filter(Boolean); + return lines[n - 1] || null; + } catch { return null; } +} + +async function poll() { + if (isProcessing) return; // One at a time — server handles queuing + + const current = countLines(); + if (current <= lastLine) return; + + while (lastLine < current && !isProcessing) { + lastLine++; + const line = readLine(lastLine); + if (!line) continue; + + let entry: any; + try { entry = JSON.parse(line); } catch { continue; } + if (!entry.message && !entry.prompt) continue; + + console.log(`[sidebar-agent] Processing: "${entry.message}"`); + // Write to inbox so workspace agent can pick it up + writeToInbox(entry.message || entry.prompt, entry.pageUrl, entry.sessionId); + try { + await askClaude(entry); + } catch (err) { + console.error(`[sidebar-agent] Error:`, err); + await sendEvent({ type: 'agent_error', error: String(err) }); + } + } +} + +// ─── Main ──────────────────────────────────────────────────────── + +async function main() { + const dir = path.dirname(QUEUE); + fs.mkdirSync(dir, { recursive: true }); + if (!fs.existsSync(QUEUE)) fs.writeFileSync(QUEUE, ''); + + lastLine = countLines(); + await refreshToken(); + + console.log(`[sidebar-agent] Started. Watching ${QUEUE} from line ${lastLine}`); + console.log(`[sidebar-agent] Server: ${SERVER_URL}`); + console.log(`[sidebar-agent] Browse binary: ${B}`); + + setInterval(poll, POLL_MS); +} + +main().catch(console.error); diff --git a/browse/src/snapshot.ts b/browse/src/snapshot.ts index 24380bad..840cd686 100644 --- a/browse/src/snapshot.ts +++ b/browse/src/snapshot.ts @@ -17,7 +17,7 @@ * Later: "click @e3" → look up Locator → locator.click() */ -import type { Page, Locator } from 'playwright'; +import type { Page, Frame, Locator } from 'playwright'; import type { BrowserManager, RefEntry } from './browser-manager'; import * as Diff from 'diff'; import { TEMP_DIR, isPathWithin } from './platform'; @@ -136,15 +136,18 @@ export async function handleSnapshot( ): Promise { const opts = parseSnapshotArgs(args); const page = bm.getPage(); + // Frame-aware target for accessibility tree + const target = bm.getActiveFrameOrPage(); + const inFrame = bm.getFrame() !== null; // Get accessibility tree via ariaSnapshot let rootLocator: Locator; if (opts.selector) { - rootLocator = page.locator(opts.selector); + rootLocator = target.locator(opts.selector); const count = await rootLocator.count(); if (count === 0) throw new Error(`Selector not found: ${opts.selector}`); } else { - rootLocator = page.locator('body'); + rootLocator = target.locator('body'); } const ariaText = await rootLocator.ariaSnapshot(); @@ -205,11 +208,11 @@ export async function handleSnapshot( let locator: Locator; if (opts.selector) { - locator = page.locator(opts.selector).getByRole(node.role as any, { + locator = target.locator(opts.selector).getByRole(node.role as any, { name: node.name || undefined, }); } else { - locator = page.getByRole(node.role as any, { + locator = target.getByRole(node.role as any, { name: node.name || undefined, }); } @@ -233,7 +236,7 @@ export async function handleSnapshot( // ─── Cursor-interactive scan (-C) ───────────────────────── if (opts.cursorInteractive) { try { - const cursorElements = await page.evaluate(() => { + const cursorElements = await target.evaluate(() => { const STANDARD_INTERACTIVE = new Set([ 'A', 'BUTTON', 'INPUT', 'SELECT', 'TEXTAREA', 'SUMMARY', 'DETAILS', ]); @@ -287,7 +290,7 @@ export async function handleSnapshot( let cRefCounter = 1; for (const elem of cursorElements) { const ref = `c${cRefCounter++}`; - const locator = page.locator(elem.selector); + const locator = target.locator(elem.selector); refMap.set(ref, { locator, role: 'cursor-interactive', name: elem.text }); output.push(`@${ref} [${elem.reason}] "${elem.text}"`); } @@ -394,5 +397,11 @@ export async function handleSnapshot( // Store for future diffs bm.setLastSnapshot(snapshotText); + // Add frame context header when operating inside an iframe + if (inFrame) { + const frameUrl = bm.getFrame()?.url() ?? 'unknown'; + output.unshift(`[Context: iframe src="${frameUrl}"]`); + } + return output.join('\n'); } diff --git a/browse/src/write-commands.ts b/browse/src/write-commands.ts index 3e80c7fd..02413daf 100644 --- a/browse/src/write-commands.ts +++ b/browse/src/write-commands.ts @@ -18,9 +18,13 @@ export async function handleWriteCommand( bm: BrowserManager ): Promise { const page = bm.getPage(); + // Frame-aware target for locator-based operations (click, fill, etc.) + const target = bm.getActiveFrameOrPage(); + const inFrame = bm.getFrame() !== null; switch (command) { case 'goto': { + if (inFrame) throw new Error('Cannot use goto inside a frame. Run \'frame main\' first.'); const url = args[0]; if (!url) throw new Error('Usage: browse goto '); await validateNavigationUrl(url); @@ -30,16 +34,19 @@ export async function handleWriteCommand( } case 'back': { + if (inFrame) throw new Error('Cannot use back inside a frame. Run \'frame main\' first.'); await page.goBack({ waitUntil: 'domcontentloaded', timeout: 15000 }); return `Back → ${page.url()}`; } case 'forward': { + if (inFrame) throw new Error('Cannot use forward inside a frame. Run \'frame main\' first.'); await page.goForward({ waitUntil: 'domcontentloaded', timeout: 15000 }); return `Forward → ${page.url()}`; } case 'reload': { + if (inFrame) throw new Error('Cannot use reload inside a frame. Run \'frame main\' first.'); await page.reload({ waitUntil: 'domcontentloaded', timeout: 15000 }); return `Reloaded ${page.url()}`; } @@ -73,15 +80,14 @@ export async function handleWriteCommand( if ('locator' in resolved) { await resolved.locator.click({ timeout: 5000 }); } else { - await page.click(resolved.selector, { timeout: 5000 }); + await target.locator(resolved.selector).click({ timeout: 5000 }); } } catch (err: any) { // Enhanced error guidance: clicking