mirror of
https://github.com/garrytan/gstack.git
synced 2026-05-08 06:26:45 +02:00
merge: resolve CHANGELOG conflict with main, bump to v0.12.7.0
Main claimed v0.12.6.0 for sidebar fixes. Bumped our codex-cwd-bug entry to v0.12.7.0. Fixed ELOOP in regression test by using Bun.Glob with followSymlinks:false instead of fs.readdirSync recursive (the .claude/skills/gstack symlink loops back to ROOT). Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -342,21 +342,49 @@ If `NEEDS_SETUP`:
|
||||
2. Run: `cd <SKILL_DIR> && ./setup`
|
||||
3. If `bun` is not installed: `curl -fsSL https://bun.sh/install | bash`
|
||||
|
||||
## Step 0: Pre-flight cleanup
|
||||
|
||||
Before connecting, kill any stale browse servers and clean up lock files that
|
||||
may have persisted from a crash. This prevents "already connected" false
|
||||
positives and Chromium profile lock conflicts.
|
||||
|
||||
```bash
|
||||
# Kill any existing browse server
|
||||
if [ -f "$(git rev-parse --show-toplevel 2>/dev/null)/.gstack/browse.json" ]; then
|
||||
_OLD_PID=$(cat "$(git rev-parse --show-toplevel)/.gstack/browse.json" 2>/dev/null | grep -o '"pid":[0-9]*' | grep -o '[0-9]*')
|
||||
[ -n "$_OLD_PID" ] && kill "$_OLD_PID" 2>/dev/null || true
|
||||
sleep 1
|
||||
[ -n "$_OLD_PID" ] && kill -9 "$_OLD_PID" 2>/dev/null || true
|
||||
rm -f "$(git rev-parse --show-toplevel)/.gstack/browse.json"
|
||||
fi
|
||||
# Clean Chromium profile locks (can persist after crashes)
|
||||
_PROFILE_DIR="$HOME/.gstack/chromium-profile"
|
||||
for _LF in SingletonLock SingletonSocket SingletonCookie; do
|
||||
rm -f "$_PROFILE_DIR/$_LF" 2>/dev/null || true
|
||||
done
|
||||
echo "Pre-flight cleanup done"
|
||||
```
|
||||
|
||||
## Step 1: Connect
|
||||
|
||||
```bash
|
||||
$B connect
|
||||
```
|
||||
|
||||
This launches your system Chrome via Playwright with:
|
||||
- A visible window (headed mode, not headless)
|
||||
- The gstack Chrome extension pre-loaded
|
||||
- A green shimmer line + "gstack" pill so you know which window is controlled
|
||||
This launches Playwright's bundled Chromium in headed mode with:
|
||||
- A visible window you can watch (not your regular Chrome — it stays untouched)
|
||||
- The gstack Chrome extension auto-loaded via `launchPersistentContext`
|
||||
- A golden shimmer line at the top of every page so you know which window is controlled
|
||||
- A sidebar agent process for chat commands
|
||||
|
||||
If Chrome is already running, the server restarts in headed mode with a fresh
|
||||
Chrome instance. Your regular Chrome stays untouched.
|
||||
The `connect` command auto-discovers the extension from the gstack install
|
||||
directory. It always uses port **34567** so the extension can auto-connect.
|
||||
|
||||
After connecting, print the output to the user.
|
||||
After connecting, print the full output to the user. Confirm you see
|
||||
`Mode: headed` in the output.
|
||||
|
||||
If the output shows an error or the mode is not `headed`, run `$B status` and
|
||||
share the output with the user before proceeding.
|
||||
|
||||
## Step 2: Verify
|
||||
|
||||
@@ -364,27 +392,41 @@ After connecting, print the output to the user.
|
||||
$B status
|
||||
```
|
||||
|
||||
Confirm the output shows `Mode: cdp`. Print the port number — the user may need
|
||||
it for the Side Panel.
|
||||
Confirm the output shows `Mode: headed`. Read the port from the state file:
|
||||
|
||||
```bash
|
||||
cat "$(git rev-parse --show-toplevel 2>/dev/null)/.gstack/browse.json" 2>/dev/null | grep -o '"port":[0-9]*' | grep -o '[0-9]*'
|
||||
```
|
||||
|
||||
The port should be **34567**. If it's different, note it — the user may need it
|
||||
for the Side Panel.
|
||||
|
||||
Also find the extension path so you can help the user if they need to load it manually:
|
||||
|
||||
```bash
|
||||
_EXT_PATH=""
|
||||
_ROOT=$(git rev-parse --show-toplevel 2>/dev/null)
|
||||
[ -n "$_ROOT" ] && [ -f "$_ROOT/.agents/skills/gstack/extension/manifest.json" ] && _EXT_PATH="$_ROOT/.agents/skills/gstack/extension"
|
||||
[ -z "$_EXT_PATH" ] && [ -f "$HOME/.agents/skills/gstack/extension/manifest.json" ] && _EXT_PATH="$HOME/.agents/skills/gstack/extension"
|
||||
echo "EXTENSION_PATH: ${_EXT_PATH:-NOT FOUND}"
|
||||
```
|
||||
|
||||
## Step 3: Guide the user to the Side Panel
|
||||
|
||||
Use AskUserQuestion:
|
||||
|
||||
> Chrome is launched with gstack control. You should see a green shimmer line at the
|
||||
> top of the Chrome window and a small "gstack" pill in the bottom-right corner.
|
||||
> Chrome is launched with gstack control. You should see Playwright's Chromium
|
||||
> (not your regular Chrome) with a golden shimmer line at the top of the page.
|
||||
>
|
||||
> The Side Panel extension is pre-loaded. To open it:
|
||||
> 1. Look for the **puzzle piece icon** (Extensions) in Chrome's toolbar
|
||||
> 2. Click it → find **gstack browse** → click the **pin icon** to pin it
|
||||
> 3. Click the **gstack icon** in the toolbar
|
||||
> 4. Click **Open Side Panel**
|
||||
> The Side Panel extension should be auto-loaded. To open it:
|
||||
> 1. Look for the **puzzle piece icon** (Extensions) in the toolbar — it may
|
||||
> already show the gstack icon if the extension loaded successfully
|
||||
> 2. Click the **puzzle piece** → find **gstack browse** → click the **pin icon**
|
||||
> 3. Click the pinned **gstack icon** in the toolbar
|
||||
> 4. The Side Panel should open on the right showing a live activity feed
|
||||
>
|
||||
> The Side Panel shows a live feed of every browse command in real time.
|
||||
>
|
||||
> **Port:** The browse server is on port {PORT} — the extension auto-detects it
|
||||
> if you're using the Playwright-controlled Chrome. If the badge stays gray, click
|
||||
> the gstack icon and enter port {PORT} manually.
|
||||
> **Port:** 34567 (auto-detected — the extension connects automatically in the
|
||||
> Playwright-controlled Chrome).
|
||||
|
||||
Options:
|
||||
- A) I can see the Side Panel — let's go!
|
||||
@@ -392,22 +434,34 @@ Options:
|
||||
- C) Something went wrong
|
||||
|
||||
If B: Tell the user:
|
||||
> The extension should be auto-loaded, but Chrome sometimes doesn't show it
|
||||
> immediately. Try:
|
||||
> 1. Type `chrome://extensions` in the address bar
|
||||
> 2. Look for "gstack browse" — it should be listed and enabled
|
||||
> 3. If not listed, click "Load unpacked" → navigate to the extension folder
|
||||
> (press Cmd+Shift+G in the file picker, paste this path):
|
||||
> `{EXTENSION_PATH}`
|
||||
>
|
||||
> Then pin it from the puzzle piece icon and open the Side Panel.
|
||||
|
||||
If C: Run `$B status` and show the output. Check if the server is healthy.
|
||||
> The extension is loaded into Playwright's Chromium at launch time, but
|
||||
> sometimes it doesn't appear immediately. Try these steps:
|
||||
>
|
||||
> 1. Type `chrome://extensions` in the address bar
|
||||
> 2. Look for **"gstack browse"** — it should be listed and enabled
|
||||
> 3. If it's there but not pinned, go back to any page, click the puzzle piece
|
||||
> icon, and pin it
|
||||
> 4. If it's NOT listed at all, click **"Load unpacked"** and navigate to:
|
||||
> - Press **Cmd+Shift+G** in the file picker dialog
|
||||
> - Paste this path: `{EXTENSION_PATH}` (use the path from Step 2)
|
||||
> - Click **Select**
|
||||
>
|
||||
> After loading, pin it and click the icon to open the Side Panel.
|
||||
>
|
||||
> If the Side Panel badge stays gray (disconnected), click the gstack icon
|
||||
> and enter port **34567** manually.
|
||||
|
||||
If C:
|
||||
|
||||
1. Run `$B status` and show the output
|
||||
2. If the server is not healthy, re-run Step 0 cleanup + Step 1 connect
|
||||
3. If the server IS healthy but the browser isn't visible, try `$B focus`
|
||||
4. If that fails, ask the user what they see (error message, blank screen, etc.)
|
||||
|
||||
## Step 4: Demo
|
||||
|
||||
After the user confirms the Side Panel is working, run a quick demo so they
|
||||
can see the activity feed in action:
|
||||
After the user confirms the Side Panel is working, run a quick demo:
|
||||
|
||||
```bash
|
||||
$B goto https://news.ycombinator.com
|
||||
@@ -420,7 +474,7 @@ $B snapshot -i
|
||||
```
|
||||
|
||||
Tell the user: "Check the Side Panel — you should see the `goto` and `snapshot`
|
||||
commands appear in the activity feed. Every command Claude runs will show up here
|
||||
commands appear in the activity feed. Every command Claude runs shows up here
|
||||
in real time."
|
||||
|
||||
## Step 5: Sidebar chat
|
||||
@@ -428,8 +482,9 @@ in real time."
|
||||
After the activity feed demo, tell the user about the sidebar chat:
|
||||
|
||||
> The Side Panel also has a **chat tab**. Try typing a message like "take a
|
||||
> snapshot and describe this page." A child Claude instance will execute your
|
||||
> request in the browser — you'll see the commands appear in the activity feed.
|
||||
> snapshot and describe this page." A sidebar agent (a child Claude instance)
|
||||
> executes your request in the browser — you'll see the commands appear in
|
||||
> the activity feed as they happen.
|
||||
>
|
||||
> The sidebar agent can navigate pages, click buttons, fill forms, and read
|
||||
> content. Each task gets up to 5 minutes. It runs in an isolated session, so
|
||||
@@ -439,17 +494,28 @@ After the activity feed demo, tell the user about the sidebar chat:
|
||||
|
||||
Tell the user:
|
||||
|
||||
> You're all set! Chrome is under Claude's control with the Side Panel showing
|
||||
> live activity and a chat sidebar for direct commands. Here's what you can do:
|
||||
> You're all set! Here's what you can do with the connected Chrome:
|
||||
>
|
||||
> - **Chat in the sidebar** — type natural language instructions and Claude
|
||||
> executes them in the browser
|
||||
> - **Run any browse command** — `$B goto`, `$B click`, `$B snapshot` — and
|
||||
> watch it happen in Chrome + the Side Panel
|
||||
> - **Use /qa or /design-review** — they'll run in the visible Chrome window
|
||||
> instead of headless. No cookie import needed.
|
||||
> - **`$B focus`** — bring Chrome to the foreground anytime
|
||||
> - **`$B disconnect`** — return to headless mode when done
|
||||
> **Watch Claude work in real time:**
|
||||
> - Run any gstack skill (`/qa`, `/design-review`, `/benchmark`) and watch
|
||||
> every action happen in the visible Chrome window + Side Panel feed
|
||||
> - No cookie import needed — the Playwright browser shares its own session
|
||||
>
|
||||
> **Control the browser directly:**
|
||||
> - **Sidebar chat** — type natural language in the Side Panel and the sidebar
|
||||
> agent executes it (e.g., "fill in the login form and submit")
|
||||
> - **Browse commands** — `$B goto <url>`, `$B click <sel>`, `$B fill <sel> <val>`,
|
||||
> `$B snapshot -i` — all visible in Chrome + Side Panel
|
||||
>
|
||||
> **Window management:**
|
||||
> - `$B focus` — bring Chrome to the foreground anytime
|
||||
> - `$B disconnect` — close headed Chrome and return to headless mode
|
||||
>
|
||||
> **What skills look like in headed mode:**
|
||||
> - `/qa` runs its full test suite in the visible browser — you see every page
|
||||
> load, every click, every assertion
|
||||
> - `/design-review` takes screenshots in the real browser — same pixels you see
|
||||
> - `/benchmark` measures performance in the headed browser
|
||||
|
||||
Then proceed with whatever the user asked to do. If they didn't specify a task,
|
||||
ask what they'd like to test or browse.
|
||||
|
||||
+19
-2
@@ -1,12 +1,14 @@
|
||||
# Changelog
|
||||
|
||||
## [0.12.6.0] - 2026-03-27 — Codex No Longer Reviews the Wrong Project
|
||||
## [0.12.7.0] - 2026-03-27 — Codex No Longer Reviews the Wrong Project
|
||||
|
||||
When you run gstack in Conductor with multiple workspaces open, Codex could silently review the wrong project. The `codex exec -C` flag resolved the repo root inline via `$(git rev-parse --show-toplevel)`, which evaluates in whatever cwd the background shell inherits. In multi-workspace environments, that cwd might be a different project entirely.
|
||||
|
||||
### Fixed
|
||||
|
||||
- **Codex exec resolves repo root eagerly.** All 12 `codex exec` commands across `/codex`, `/autoplan`, and 4 resolver functions now resolve `_REPO_ROOT` at the top of each bash block and reference the stored value in `-C`. No more inline evaluation that races with other workspaces.
|
||||
- **`codex review` also gets cwd protection.** `codex review` doesn't support `-C`, so it now gets `cd "$_REPO_ROOT"` before invocation. Same class of bug, different command.
|
||||
- **Silent fallback replaced with hard fail.** The `|| pwd` fallback silently used whatever random cwd was available. Now it errors out with a clear message if not in a git repo.
|
||||
|
||||
### Removed
|
||||
|
||||
@@ -14,7 +16,22 @@ When you run gstack in Conductor with multiple workspaces open, Codex could sile
|
||||
|
||||
### Added
|
||||
|
||||
- **Regression test** that scans all `.tmpl` and resolver `.ts` source files for `codex exec` commands using inline `$(git rev-parse --show-toplevel)` in the `-C` flag. Prevents reintroduction.
|
||||
- **Regression test** that scans all `.tmpl`, resolver `.ts`, and generated `SKILL.md` files for codex commands using inline `$(git rev-parse --show-toplevel)`. Prevents reintroduction.
|
||||
|
||||
## [0.12.6.0] - 2026-03-27 — Sidebar Knows What Page You're On
|
||||
|
||||
The Chrome sidebar agent used to navigate to the wrong page when you asked it to do something. If you'd manually browsed to a site, the sidebar would ignore that and go to whatever Playwright last saw (often Hacker News from the demo). Now it works.
|
||||
|
||||
### Fixed
|
||||
|
||||
- **Sidebar uses the real tab URL.** The Chrome extension now captures the actual page URL via `chrome.tabs.query()` and sends it to the server. Previously the sidebar agent used Playwright's stale `page.url()`, which didn't update when you navigated manually in headed mode.
|
||||
- **URL sanitization.** The extension-provided URL is validated (http/https only, control characters stripped, 2048 char limit) before being used in the Claude system prompt. Prevents prompt injection via crafted URLs.
|
||||
- **Stale sidebar agents killed on reconnect.** Each `/connect-chrome` now kills leftover sidebar-agent processes before starting a new one. Old agents had stale auth tokens and would silently fail, causing the sidebar to freeze.
|
||||
|
||||
### Added
|
||||
|
||||
- **Pre-flight cleanup for `/connect-chrome`.** Kills stale browse servers and cleans Chromium profile locks before connecting. Prevents "already connected" false positives after crashes.
|
||||
- **Sidebar agent test suite (36 tests).** Four layers: unit tests for URL sanitization, integration tests for server HTTP endpoints, mock-Claude round-trip tests, and E2E tests with real Claude. All free except layer 4.
|
||||
|
||||
## [0.12.5.1] - 2026-03-27 — Eng Review Now Tells You What to Parallelize
|
||||
|
||||
|
||||
@@ -185,6 +185,18 @@ Sidebar agent writes structured messages to `.context/sidebar-inbox/`. Workspace
|
||||
**Priority:** P3
|
||||
**Depends on:** Headed mode (shipped)
|
||||
|
||||
### Sidebar agent needs Write tool + better error visibility
|
||||
|
||||
**What:** Two issues with the sidebar agent (`sidebar-agent.ts`): (1) `--allowedTools` is hardcoded to `Bash,Read,Glob,Grep`, missing `Write`. Claude can't create files (like CSVs) when asked. (2) When Claude errors or returns empty, the sidebar UI shows nothing, just a green dot. No error message, no "I tried but failed", nothing.
|
||||
|
||||
**Why:** Users ask "write this to a CSV" and the sidebar silently can't. Then they think it's broken. The UI needs to surface errors visibly, and Claude needs the tools to actually do what's asked.
|
||||
|
||||
**Context:** `sidebar-agent.ts:163` hardcodes `--allowedTools`. The event relay (`handleStreamEvent`) handles `agent_done` and `agent_error` but the extension's sidepanel.js may not be rendering error states. The sidebar should show "Error: ..." or "Claude finished but produced no output" instead of staying on the green dot forever.
|
||||
|
||||
**Effort:** S (human: ~2h / CC: ~10min)
|
||||
**Priority:** P1
|
||||
**Depends on:** None
|
||||
|
||||
### Chrome Web Store publishing
|
||||
|
||||
**What:** Publish the gstack browse Chrome extension to Chrome Web Store for easier install.
|
||||
|
||||
+43
-3
@@ -511,8 +511,27 @@ Refs: After 'snapshot', use @e1, @e2... as selectors:
|
||||
}
|
||||
}
|
||||
|
||||
// Clean up Chromium profile locks (can persist after crashes)
|
||||
// Kill orphaned Chromium processes that may still hold the profile lock.
|
||||
// The server PID is the Bun process; Chromium is a child that can outlive it
|
||||
// if the server is killed abruptly (SIGKILL, crash, manual rm of state file).
|
||||
const profileDir = path.join(process.env.HOME || '/tmp', '.gstack', 'chromium-profile');
|
||||
try {
|
||||
const singletonLock = path.join(profileDir, 'SingletonLock');
|
||||
const lockTarget = fs.readlinkSync(singletonLock); // e.g. "hostname-12345"
|
||||
const orphanPid = parseInt(lockTarget.split('-').pop() || '', 10);
|
||||
if (orphanPid && isProcessAlive(orphanPid)) {
|
||||
try { process.kill(orphanPid, 'SIGTERM'); } catch {}
|
||||
await new Promise(resolve => setTimeout(resolve, 1000));
|
||||
if (isProcessAlive(orphanPid)) {
|
||||
try { process.kill(orphanPid, 'SIGKILL'); } catch {}
|
||||
await new Promise(resolve => setTimeout(resolve, 500));
|
||||
}
|
||||
}
|
||||
} catch {
|
||||
// No lock symlink or not readable — nothing to kill
|
||||
}
|
||||
|
||||
// Clean up Chromium profile locks (can persist after crashes)
|
||||
for (const lockFile of ['SingletonLock', 'SingletonSocket', 'SingletonCookie']) {
|
||||
try { fs.unlinkSync(path.join(profileDir, lockFile)); } catch {}
|
||||
}
|
||||
@@ -545,17 +564,38 @@ Refs: After 'snapshot', use @e1, @e2... as selectors:
|
||||
console.log(`Connected to real Chrome\n${status}`);
|
||||
|
||||
// Auto-start sidebar agent
|
||||
const agentScript = path.resolve(__dirname, 'sidebar-agent.ts');
|
||||
// __dirname is inside $bunfs in compiled binaries — resolve from execPath instead
|
||||
let agentScript = path.resolve(__dirname, 'sidebar-agent.ts');
|
||||
if (!fs.existsSync(agentScript)) {
|
||||
agentScript = path.resolve(path.dirname(process.execPath), '..', 'src', 'sidebar-agent.ts');
|
||||
}
|
||||
try {
|
||||
if (!fs.existsSync(agentScript)) {
|
||||
throw new Error(`sidebar-agent.ts not found at ${agentScript}`);
|
||||
}
|
||||
// Clear old agent queue
|
||||
const agentQueue = path.join(process.env.HOME || '/tmp', '.gstack', 'sidebar-agent-queue.jsonl');
|
||||
try { fs.writeFileSync(agentQueue, ''); } catch {}
|
||||
|
||||
// Resolve browse binary path the same way — execPath-relative
|
||||
let browseBin = path.resolve(__dirname, '..', 'dist', 'browse');
|
||||
if (!fs.existsSync(browseBin)) {
|
||||
browseBin = process.execPath; // the compiled binary itself
|
||||
}
|
||||
|
||||
// Kill any existing sidebar-agent processes before starting a new one.
|
||||
// Old agents have stale auth tokens and will silently fail to relay events,
|
||||
// causing the server to mark the agent as "hung".
|
||||
try {
|
||||
const { spawnSync } = require('child_process');
|
||||
spawnSync('pkill', ['-f', 'sidebar-agent\\.ts'], { stdio: 'ignore', timeout: 3000 });
|
||||
} catch {}
|
||||
|
||||
const agentProc = Bun.spawn(['bun', 'run', agentScript], {
|
||||
cwd: config.projectDir,
|
||||
env: {
|
||||
...process.env,
|
||||
BROWSE_BIN: path.resolve(__dirname, '..', 'dist', 'browse'),
|
||||
BROWSE_BIN: browseBin,
|
||||
BROWSE_STATE_FILE: config.stateFile,
|
||||
BROWSE_SERVER_PORT: String(newState.port),
|
||||
},
|
||||
|
||||
+35
-16
@@ -18,6 +18,7 @@ import { handleReadCommand } from './read-commands';
|
||||
import { handleWriteCommand } from './write-commands';
|
||||
import { handleMetaCommand } from './meta-commands';
|
||||
import { handleCookiePickerRoute } from './cookie-picker-routes';
|
||||
import { sanitizeExtensionUrl } from './sidebar-utils';
|
||||
import { COMMAND_DESCRIPTIONS } from './commands';
|
||||
import { handleSnapshot, SNAPSHOT_FLAGS } from './snapshot';
|
||||
import { resolveConfig, ensureStateDir, readVersionHash } from './config';
|
||||
@@ -123,7 +124,7 @@ let sidebarSession: SidebarSession | null = null;
|
||||
let agentProcess: ChildProcess | null = null;
|
||||
let agentStatus: 'idle' | 'processing' | 'hung' = 'idle';
|
||||
let agentStartTime: number | null = null;
|
||||
let messageQueue: Array<{message: string, ts: string}> = [];
|
||||
let messageQueue: Array<{message: string, ts: string, extensionUrl?: string | null}> = [];
|
||||
let currentMessage: string | null = null;
|
||||
let chatBuffer: ChatEntry[] = [];
|
||||
let chatNextId = 0;
|
||||
@@ -371,18 +372,27 @@ function processAgentEvent(event: any): void {
|
||||
}
|
||||
}
|
||||
|
||||
function spawnClaude(userMessage: string): void {
|
||||
function spawnClaude(userMessage: string, extensionUrl?: string | null): void {
|
||||
agentStatus = 'processing';
|
||||
agentStartTime = Date.now();
|
||||
currentMessage = userMessage;
|
||||
|
||||
const pageUrl = browserManager.getCurrentUrl() || 'about:blank';
|
||||
// Prefer the URL from the Chrome extension (what the user actually sees)
|
||||
// over Playwright's page.url() which can be stale in headed mode.
|
||||
const sanitizedExtUrl = sanitizeExtensionUrl(extensionUrl);
|
||||
const playwrightUrl = browserManager.getCurrentUrl() || 'about:blank';
|
||||
const pageUrl = sanitizedExtUrl || playwrightUrl;
|
||||
const B = BROWSE_BIN;
|
||||
const systemPrompt = [
|
||||
'You are a browser assistant running in a Chrome sidebar.',
|
||||
`Current page: ${pageUrl}`,
|
||||
`The user is currently viewing: ${pageUrl}`,
|
||||
`Browse binary: ${B}`,
|
||||
'',
|
||||
'IMPORTANT: You are controlling a SHARED browser. The user may have navigated',
|
||||
'manually. Always run `' + B + ' url` first to check the actual current URL.',
|
||||
'If it differs from above, the user navigated — work with the ACTUAL page.',
|
||||
'Do NOT navigate away from the user\'s current page unless they ask you to.',
|
||||
'',
|
||||
'Commands (run via bash):',
|
||||
` ${B} goto <url> ${B} click <@ref> ${B} fill <@ref> <text>`,
|
||||
` ${B} snapshot -i ${B} text ${B} screenshot`,
|
||||
@@ -404,8 +414,8 @@ function spawnClaude(userMessage: string): void {
|
||||
// fails with ENOENT on everything, including /bin/bash). Instead,
|
||||
// write the command to a queue file that the sidebar-agent process
|
||||
// (running as non-compiled bun) picks up and spawns claude.
|
||||
const gstackDir = path.join(process.env.HOME || '/tmp', '.gstack');
|
||||
const agentQueue = path.join(gstackDir, 'sidebar-agent-queue.jsonl');
|
||||
const agentQueue = process.env.SIDEBAR_QUEUE_PATH || path.join(process.env.HOME || '/tmp', '.gstack', 'sidebar-agent-queue.jsonl');
|
||||
const gstackDir = path.dirname(agentQueue);
|
||||
const entry = JSON.stringify({
|
||||
ts: new Date().toISOString(),
|
||||
message: userMessage,
|
||||
@@ -414,6 +424,7 @@ function spawnClaude(userMessage: string): void {
|
||||
stateFile: config.stateFile,
|
||||
cwd: (sidebarSession as any)?.worktreePath || process.cwd(),
|
||||
sessionId: sidebarSession?.claudeSessionId || null,
|
||||
pageUrl: pageUrl,
|
||||
});
|
||||
try {
|
||||
fs.mkdirSync(gstackDir, { recursive: true });
|
||||
@@ -781,12 +792,16 @@ async function start() {
|
||||
const port = await findPort();
|
||||
|
||||
// Launch browser (headless or headed with extension)
|
||||
const headed = process.env.BROWSE_HEADED === '1';
|
||||
if (headed) {
|
||||
await browserManager.launchHeaded();
|
||||
console.log(`[browse] Launched headed Chromium with extension`);
|
||||
} else {
|
||||
await browserManager.launch();
|
||||
// BROWSE_HEADLESS_SKIP=1 skips browser launch entirely (for HTTP-only testing)
|
||||
const skipBrowser = process.env.BROWSE_HEADLESS_SKIP === '1';
|
||||
if (!skipBrowser) {
|
||||
const headed = process.env.BROWSE_HEADED === '1';
|
||||
if (headed) {
|
||||
await browserManager.launchHeaded();
|
||||
console.log(`[browse] Launched headed Chromium with extension`);
|
||||
} else {
|
||||
await browserManager.launch();
|
||||
}
|
||||
}
|
||||
|
||||
const startTime = Date.now();
|
||||
@@ -935,17 +950,21 @@ async function start() {
|
||||
if (!msg) {
|
||||
return new Response(JSON.stringify({ error: 'Empty message' }), { status: 400, headers: { 'Content-Type': 'application/json' } });
|
||||
}
|
||||
// The Chrome extension sends the active tab's URL — prefer it over
|
||||
// Playwright's page.url() which can be stale in headed mode when
|
||||
// the user navigates manually.
|
||||
const extensionUrl = body.activeTabUrl || null;
|
||||
const ts = new Date().toISOString();
|
||||
addChatEntry({ ts, role: 'user', message: msg });
|
||||
if (sidebarSession) { sidebarSession.lastActiveAt = ts; saveSession(); }
|
||||
|
||||
if (agentStatus === 'idle') {
|
||||
spawnClaude(msg);
|
||||
spawnClaude(msg, extensionUrl);
|
||||
return new Response(JSON.stringify({ ok: true, processing: true }), {
|
||||
status: 200, headers: { 'Content-Type': 'application/json' },
|
||||
});
|
||||
} else if (messageQueue.length < MAX_QUEUE) {
|
||||
messageQueue.push({ message: msg, ts });
|
||||
messageQueue.push({ message: msg, ts, extensionUrl });
|
||||
return new Response(JSON.stringify({ ok: true, queued: true, position: messageQueue.length }), {
|
||||
status: 200, headers: { 'Content-Type': 'application/json' },
|
||||
});
|
||||
@@ -979,7 +998,7 @@ async function start() {
|
||||
// Process next in queue
|
||||
if (messageQueue.length > 0) {
|
||||
const next = messageQueue.shift()!;
|
||||
spawnClaude(next.message);
|
||||
spawnClaude(next.message, next.extensionUrl);
|
||||
}
|
||||
return new Response(JSON.stringify({ ok: true }), { status: 200, headers: { 'Content-Type': 'application/json' } });
|
||||
}
|
||||
@@ -1065,7 +1084,7 @@ async function start() {
|
||||
// Process next queued message
|
||||
if (messageQueue.length > 0) {
|
||||
const next = messageQueue.shift()!;
|
||||
spawnClaude(next.message);
|
||||
spawnClaude(next.message, next.extensionUrl);
|
||||
} else {
|
||||
agentStatus = 'idle';
|
||||
}
|
||||
|
||||
@@ -13,7 +13,7 @@ import { spawn } from 'child_process';
|
||||
import * as fs from 'fs';
|
||||
import * as path from 'path';
|
||||
|
||||
const QUEUE = path.join(process.env.HOME || '/tmp', '.gstack', 'sidebar-agent-queue.jsonl');
|
||||
const QUEUE = process.env.SIDEBAR_QUEUE_PATH || path.join(process.env.HOME || '/tmp', '.gstack', 'sidebar-agent-queue.jsonl');
|
||||
const SERVER_PORT = parseInt(process.env.BROWSE_SERVER_PORT || '34567', 10);
|
||||
const SERVER_URL = `http://127.0.0.1:${SERVER_PORT}`;
|
||||
const POLL_MS = 500; // Fast polling — server already did the user-facing response
|
||||
@@ -205,14 +205,15 @@ async function askClaude(queueEntry: any): Promise<void> {
|
||||
});
|
||||
});
|
||||
|
||||
// Timeout after 300 seconds (5 min — multi-page tasks need time)
|
||||
// Timeout (default 300s / 5 min — multi-page tasks need time)
|
||||
const timeoutMs = parseInt(process.env.SIDEBAR_AGENT_TIMEOUT || '300000', 10);
|
||||
setTimeout(() => {
|
||||
try { proc.kill(); } catch {}
|
||||
sendEvent({ type: 'agent_error', error: 'Timed out after 300s' }).then(() => {
|
||||
sendEvent({ type: 'agent_error', error: `Timed out after ${timeoutMs / 1000}s` }).then(() => {
|
||||
isProcessing = false;
|
||||
resolve();
|
||||
});
|
||||
}, 300000);
|
||||
}, timeoutMs);
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
@@ -0,0 +1,21 @@
|
||||
/**
|
||||
* Shared sidebar utilities — extracted for testability.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Sanitize a URL from the Chrome extension before embedding in a prompt.
|
||||
* Only accepts http/https, strips control characters, truncates to 2048 chars.
|
||||
* Returns null if the URL is invalid or uses a non-http scheme.
|
||||
*/
|
||||
export function sanitizeExtensionUrl(url: string | null | undefined): string | null {
|
||||
if (!url) return null;
|
||||
try {
|
||||
const u = new URL(url);
|
||||
if (u.protocol === 'http:' || u.protocol === 'https:') {
|
||||
return u.href.replace(/[\x00-\x1f\x7f]/g, '').slice(0, 2048);
|
||||
}
|
||||
return null;
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,226 @@
|
||||
/**
|
||||
* Layer 3: Sidebar agent round-trip tests.
|
||||
* Starts server + sidebar-agent together. Mocks the `claude` binary with a shell
|
||||
* script that outputs canned stream-json. Verifies events flow end-to-end:
|
||||
* POST /sidebar-command → queue → sidebar-agent → mock claude → events → /sidebar-chat
|
||||
*/
|
||||
|
||||
import { describe, test, expect, beforeAll, afterAll } from 'bun:test';
|
||||
import { spawn, type Subprocess } from 'bun';
|
||||
import * as fs from 'fs';
|
||||
import * as os from 'os';
|
||||
import * as path from 'path';
|
||||
|
||||
let serverProc: Subprocess | null = null;
|
||||
let agentProc: Subprocess | null = null;
|
||||
let serverPort: number = 0;
|
||||
let authToken: string = '';
|
||||
let tmpDir: string = '';
|
||||
let stateFile: string = '';
|
||||
let queueFile: string = '';
|
||||
let mockBinDir: string = '';
|
||||
|
||||
async function api(pathname: string, opts: RequestInit = {}): Promise<Response> {
|
||||
const headers: Record<string, string> = {
|
||||
'Content-Type': 'application/json',
|
||||
...(opts.headers as Record<string, string> || {}),
|
||||
};
|
||||
if (!headers['Authorization'] && authToken) {
|
||||
headers['Authorization'] = `Bearer ${authToken}`;
|
||||
}
|
||||
return fetch(`http://127.0.0.1:${serverPort}${pathname}`, { ...opts, headers });
|
||||
}
|
||||
|
||||
async function resetState() {
|
||||
await api('/sidebar-session/new', { method: 'POST' });
|
||||
fs.writeFileSync(queueFile, '');
|
||||
}
|
||||
|
||||
async function pollChatUntil(
|
||||
predicate: (entries: any[]) => boolean,
|
||||
timeoutMs = 10000,
|
||||
): Promise<any[]> {
|
||||
const deadline = Date.now() + timeoutMs;
|
||||
while (Date.now() < deadline) {
|
||||
const resp = await api('/sidebar-chat?after=0');
|
||||
const data = await resp.json();
|
||||
if (predicate(data.entries)) return data.entries;
|
||||
await new Promise(r => setTimeout(r, 300));
|
||||
}
|
||||
// Return whatever we have on timeout
|
||||
const resp = await api('/sidebar-chat?after=0');
|
||||
return (await resp.json()).entries;
|
||||
}
|
||||
|
||||
function writeMockClaude(script: string) {
|
||||
const mockPath = path.join(mockBinDir, 'claude');
|
||||
fs.writeFileSync(mockPath, script, { mode: 0o755 });
|
||||
}
|
||||
|
||||
beforeAll(async () => {
|
||||
tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'sidebar-roundtrip-'));
|
||||
stateFile = path.join(tmpDir, 'browse.json');
|
||||
queueFile = path.join(tmpDir, 'sidebar-queue.jsonl');
|
||||
mockBinDir = path.join(tmpDir, 'bin');
|
||||
fs.mkdirSync(mockBinDir, { recursive: true });
|
||||
fs.mkdirSync(path.dirname(queueFile), { recursive: true });
|
||||
|
||||
// Write default mock claude that outputs canned events
|
||||
writeMockClaude(`#!/bin/bash
|
||||
echo '{"type":"system","session_id":"mock-session-123"}'
|
||||
echo '{"type":"assistant","message":{"content":[{"type":"text","text":"I can see the page. It looks like a test fixture."}]}}'
|
||||
echo '{"type":"result","result":"Done."}'
|
||||
`);
|
||||
|
||||
// Start server (no browser)
|
||||
const serverScript = path.resolve(__dirname, '..', 'src', 'server.ts');
|
||||
serverProc = spawn(['bun', 'run', serverScript], {
|
||||
env: {
|
||||
...process.env,
|
||||
BROWSE_STATE_FILE: stateFile,
|
||||
BROWSE_HEADLESS_SKIP: '1',
|
||||
BROWSE_PORT: '0',
|
||||
SIDEBAR_QUEUE_PATH: queueFile,
|
||||
BROWSE_IDLE_TIMEOUT: '300',
|
||||
},
|
||||
stdio: ['ignore', 'pipe', 'pipe'],
|
||||
});
|
||||
|
||||
// Wait for server
|
||||
const deadline = Date.now() + 15000;
|
||||
while (Date.now() < deadline) {
|
||||
if (fs.existsSync(stateFile)) {
|
||||
try {
|
||||
const state = JSON.parse(fs.readFileSync(stateFile, 'utf-8'));
|
||||
if (state.port && state.token) {
|
||||
serverPort = state.port;
|
||||
authToken = state.token;
|
||||
break;
|
||||
}
|
||||
} catch {}
|
||||
}
|
||||
await new Promise(r => setTimeout(r, 100));
|
||||
}
|
||||
if (!serverPort) throw new Error('Server did not start in time');
|
||||
|
||||
// Start sidebar-agent with mock claude on PATH
|
||||
const agentScript = path.resolve(__dirname, '..', 'src', 'sidebar-agent.ts');
|
||||
agentProc = spawn(['bun', 'run', agentScript], {
|
||||
env: {
|
||||
...process.env,
|
||||
PATH: `${mockBinDir}:${process.env.PATH}`,
|
||||
BROWSE_SERVER_PORT: String(serverPort),
|
||||
BROWSE_STATE_FILE: stateFile,
|
||||
SIDEBAR_QUEUE_PATH: queueFile,
|
||||
SIDEBAR_AGENT_TIMEOUT: '10000',
|
||||
BROWSE_BIN: 'browse', // doesn't matter, mock claude doesn't use it
|
||||
},
|
||||
stdio: ['ignore', 'pipe', 'pipe'],
|
||||
});
|
||||
|
||||
// Give sidebar-agent time to start polling
|
||||
await new Promise(r => setTimeout(r, 1000));
|
||||
}, 20000);
|
||||
|
||||
afterAll(() => {
|
||||
if (agentProc) { try { agentProc.kill(); } catch {} }
|
||||
if (serverProc) { try { serverProc.kill(); } catch {} }
|
||||
try { fs.rmSync(tmpDir, { recursive: true, force: true }); } catch {}
|
||||
});
|
||||
|
||||
describe('sidebar-agent round-trip', () => {
|
||||
test('full message round-trip with mock claude', async () => {
|
||||
await resetState();
|
||||
|
||||
// Send a command
|
||||
const resp = await api('/sidebar-command', {
|
||||
method: 'POST',
|
||||
body: JSON.stringify({
|
||||
message: 'what is on this page?',
|
||||
activeTabUrl: 'https://example.com/test',
|
||||
}),
|
||||
});
|
||||
expect(resp.status).toBe(200);
|
||||
|
||||
// Wait for mock claude to process and events to arrive
|
||||
const entries = await pollChatUntil(
|
||||
(entries) => entries.some((e: any) => e.type === 'agent_done'),
|
||||
15000,
|
||||
);
|
||||
|
||||
// Verify the flow: user message → agent_start → text → agent_done
|
||||
const userEntry = entries.find((e: any) => e.role === 'user');
|
||||
expect(userEntry).toBeDefined();
|
||||
expect(userEntry.message).toBe('what is on this page?');
|
||||
|
||||
// The mock claude outputs text — check for any agent text entry
|
||||
const textEntries = entries.filter((e: any) => e.role === 'agent' && (e.type === 'text' || e.type === 'result'));
|
||||
expect(textEntries.length).toBeGreaterThan(0);
|
||||
|
||||
const doneEntry = entries.find((e: any) => e.type === 'agent_done');
|
||||
expect(doneEntry).toBeDefined();
|
||||
|
||||
// Agent should be back to idle
|
||||
const session = await (await api('/sidebar-session')).json();
|
||||
expect(session.agent.status).toBe('idle');
|
||||
}, 20000);
|
||||
|
||||
test('claude crash produces agent_error', async () => {
|
||||
await resetState();
|
||||
|
||||
// Replace mock claude with one that crashes
|
||||
writeMockClaude(`#!/bin/bash
|
||||
echo '{"type":"system","session_id":"crash-test"}' >&2
|
||||
exit 1
|
||||
`);
|
||||
|
||||
await api('/sidebar-command', {
|
||||
method: 'POST',
|
||||
body: JSON.stringify({ message: 'crash test' }),
|
||||
});
|
||||
|
||||
// Wait for agent_done (sidebar-agent sends agent_done even on crash via proc.on('close'))
|
||||
const entries = await pollChatUntil(
|
||||
(entries) => entries.some((e: any) => e.type === 'agent_done' || e.type === 'agent_error'),
|
||||
15000,
|
||||
);
|
||||
|
||||
// Agent should recover to idle
|
||||
const session = await (await api('/sidebar-session')).json();
|
||||
expect(session.agent.status).toBe('idle');
|
||||
|
||||
// Restore working mock
|
||||
writeMockClaude(`#!/bin/bash
|
||||
echo '{"type":"assistant","message":{"content":[{"type":"text","text":"recovered"}]}}'
|
||||
`);
|
||||
}, 20000);
|
||||
|
||||
test('sequential queue drain', async () => {
|
||||
await resetState();
|
||||
|
||||
// Restore working mock
|
||||
writeMockClaude(`#!/bin/bash
|
||||
echo '{"type":"assistant","message":{"content":[{"type":"text","text":"response to: '"'"'$*'"'"'"}]}}'
|
||||
`);
|
||||
|
||||
// Send two messages rapidly — first processes, second queues
|
||||
await api('/sidebar-command', {
|
||||
method: 'POST',
|
||||
body: JSON.stringify({ message: 'first message' }),
|
||||
});
|
||||
await api('/sidebar-command', {
|
||||
method: 'POST',
|
||||
body: JSON.stringify({ message: 'second message' }),
|
||||
});
|
||||
|
||||
// Wait for both to complete (two agent_done events)
|
||||
const entries = await pollChatUntil(
|
||||
(entries) => entries.filter((e: any) => e.type === 'agent_done').length >= 2,
|
||||
20000,
|
||||
);
|
||||
|
||||
// Both user messages should be in chat
|
||||
const userEntries = entries.filter((e: any) => e.role === 'user');
|
||||
expect(userEntries.length).toBeGreaterThanOrEqual(2);
|
||||
}, 25000);
|
||||
});
|
||||
@@ -0,0 +1,320 @@
|
||||
/**
|
||||
* Layer 2: Server HTTP integration tests for sidebar endpoints.
|
||||
* Starts the browse server as a subprocess (no browser via BROWSE_HEADLESS_SKIP),
|
||||
* exercises sidebar HTTP endpoints with fetch(). No Chrome, no Claude, no sidebar-agent.
|
||||
*/
|
||||
|
||||
import { describe, test, expect, beforeAll, afterAll, beforeEach } from 'bun:test';
|
||||
import { spawn, type Subprocess } from 'bun';
|
||||
import * as fs from 'fs';
|
||||
import * as os from 'os';
|
||||
import * as path from 'path';
|
||||
|
||||
let serverProc: Subprocess | null = null;
|
||||
let serverPort: number = 0;
|
||||
let authToken: string = '';
|
||||
let tmpDir: string = '';
|
||||
let stateFile: string = '';
|
||||
let queueFile: string = '';
|
||||
|
||||
async function api(pathname: string, opts: RequestInit & { noAuth?: boolean } = {}): Promise<Response> {
|
||||
const { noAuth, ...fetchOpts } = opts;
|
||||
const headers: Record<string, string> = {
|
||||
'Content-Type': 'application/json',
|
||||
...(fetchOpts.headers as Record<string, string> || {}),
|
||||
};
|
||||
if (!noAuth && !headers['Authorization'] && authToken) {
|
||||
headers['Authorization'] = `Bearer ${authToken}`;
|
||||
}
|
||||
return fetch(`http://127.0.0.1:${serverPort}${pathname}`, { ...fetchOpts, headers });
|
||||
}
|
||||
|
||||
beforeAll(async () => {
|
||||
tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'sidebar-integ-'));
|
||||
stateFile = path.join(tmpDir, 'browse.json');
|
||||
queueFile = path.join(tmpDir, 'sidebar-queue.jsonl');
|
||||
|
||||
// Ensure queue dir exists
|
||||
fs.mkdirSync(path.dirname(queueFile), { recursive: true });
|
||||
|
||||
const serverScript = path.resolve(__dirname, '..', 'src', 'server.ts');
|
||||
serverProc = spawn(['bun', 'run', serverScript], {
|
||||
env: {
|
||||
...process.env,
|
||||
BROWSE_STATE_FILE: stateFile,
|
||||
BROWSE_HEADLESS_SKIP: '1',
|
||||
BROWSE_PORT: '0',
|
||||
SIDEBAR_QUEUE_PATH: queueFile,
|
||||
BROWSE_IDLE_TIMEOUT: '300',
|
||||
},
|
||||
stdio: ['ignore', 'pipe', 'pipe'],
|
||||
});
|
||||
|
||||
// Wait for state file
|
||||
const deadline = Date.now() + 15000;
|
||||
while (Date.now() < deadline) {
|
||||
if (fs.existsSync(stateFile)) {
|
||||
try {
|
||||
const state = JSON.parse(fs.readFileSync(stateFile, 'utf-8'));
|
||||
if (state.port && state.token) {
|
||||
serverPort = state.port;
|
||||
authToken = state.token;
|
||||
break;
|
||||
}
|
||||
} catch {}
|
||||
}
|
||||
await new Promise(r => setTimeout(r, 100));
|
||||
}
|
||||
if (!serverPort) throw new Error('Server did not start in time');
|
||||
}, 20000);
|
||||
|
||||
afterAll(() => {
|
||||
if (serverProc) { try { serverProc.kill(); } catch {} }
|
||||
try { fs.rmSync(tmpDir, { recursive: true, force: true }); } catch {}
|
||||
});
|
||||
|
||||
// Reset state between tests — creates a fresh session, clears all queues
|
||||
async function resetState() {
|
||||
await api('/sidebar-session/new', { method: 'POST' });
|
||||
fs.writeFileSync(queueFile, '');
|
||||
}
|
||||
|
||||
describe('sidebar auth', () => {
|
||||
test('rejects request without auth token', async () => {
|
||||
const resp = await api('/sidebar-command', {
|
||||
method: 'POST',
|
||||
noAuth: true,
|
||||
body: JSON.stringify({ message: 'test' }),
|
||||
});
|
||||
expect(resp.status).toBe(401);
|
||||
});
|
||||
|
||||
test('rejects request with wrong token', async () => {
|
||||
const resp = await api('/sidebar-command', {
|
||||
method: 'POST',
|
||||
headers: { 'Authorization': 'Bearer wrong-token' },
|
||||
body: JSON.stringify({ message: 'test' }),
|
||||
});
|
||||
expect(resp.status).toBe(401);
|
||||
});
|
||||
|
||||
test('accepts request with correct token', async () => {
|
||||
const resp = await api('/sidebar-command', {
|
||||
method: 'POST',
|
||||
body: JSON.stringify({ message: 'hello' }),
|
||||
});
|
||||
expect(resp.status).toBe(200);
|
||||
// Clean up
|
||||
await api('/sidebar-agent/kill', { method: 'POST' });
|
||||
});
|
||||
});
|
||||
|
||||
describe('sidebar-command → queue', () => {
|
||||
test('writes queue entry with activeTabUrl', async () => {
|
||||
await resetState();
|
||||
|
||||
const resp = await api('/sidebar-command', {
|
||||
method: 'POST',
|
||||
body: JSON.stringify({
|
||||
message: 'what is on this page?',
|
||||
activeTabUrl: 'https://example.com/test-page',
|
||||
}),
|
||||
});
|
||||
expect(resp.status).toBe(200);
|
||||
const data = await resp.json();
|
||||
expect(data.ok).toBe(true);
|
||||
|
||||
// Give server a moment to write queue
|
||||
await new Promise(r => setTimeout(r, 100));
|
||||
|
||||
const content = fs.readFileSync(queueFile, 'utf-8').trim();
|
||||
const lines = content.split('\n').filter(Boolean);
|
||||
expect(lines.length).toBeGreaterThan(0);
|
||||
const entry = JSON.parse(lines[lines.length - 1]);
|
||||
expect(entry.pageUrl).toBe('https://example.com/test-page');
|
||||
expect(entry.prompt).toContain('https://example.com/test-page');
|
||||
|
||||
await api('/sidebar-agent/kill', { method: 'POST' });
|
||||
});
|
||||
|
||||
test('falls back when activeTabUrl is null', async () => {
|
||||
await resetState();
|
||||
|
||||
await api('/sidebar-command', {
|
||||
method: 'POST',
|
||||
body: JSON.stringify({ message: 'test', activeTabUrl: null }),
|
||||
});
|
||||
await new Promise(r => setTimeout(r, 100));
|
||||
|
||||
const lines = fs.readFileSync(queueFile, 'utf-8').trim().split('\n').filter(Boolean);
|
||||
expect(lines.length).toBeGreaterThan(0);
|
||||
const entry = JSON.parse(lines[lines.length - 1]);
|
||||
// No browser → playwright URL is 'about:blank'
|
||||
expect(entry.pageUrl).toBe('about:blank');
|
||||
|
||||
await api('/sidebar-agent/kill', { method: 'POST' });
|
||||
});
|
||||
|
||||
test('rejects chrome:// activeTabUrl and falls back', async () => {
|
||||
await resetState();
|
||||
|
||||
await api('/sidebar-command', {
|
||||
method: 'POST',
|
||||
body: JSON.stringify({ message: 'test', activeTabUrl: 'chrome://extensions' }),
|
||||
});
|
||||
await new Promise(r => setTimeout(r, 100));
|
||||
|
||||
const lines = fs.readFileSync(queueFile, 'utf-8').trim().split('\n').filter(Boolean);
|
||||
expect(lines.length).toBeGreaterThan(0);
|
||||
const entry = JSON.parse(lines[lines.length - 1]);
|
||||
expect(entry.pageUrl).toBe('about:blank');
|
||||
|
||||
await api('/sidebar-agent/kill', { method: 'POST' });
|
||||
});
|
||||
|
||||
test('rejects empty message', async () => {
|
||||
const resp = await api('/sidebar-command', {
|
||||
method: 'POST',
|
||||
body: JSON.stringify({ message: '' }),
|
||||
});
|
||||
expect(resp.status).toBe(400);
|
||||
});
|
||||
});
|
||||
|
||||
describe('sidebar-agent/event → chat buffer', () => {
|
||||
test('agent events appear in /sidebar-chat', async () => {
|
||||
await resetState();
|
||||
|
||||
// Post mock agent events using Claude's streaming format
|
||||
await api('/sidebar-agent/event', {
|
||||
method: 'POST',
|
||||
body: JSON.stringify({
|
||||
type: 'assistant',
|
||||
message: { content: [{ type: 'text', text: 'Hello from mock agent' }] },
|
||||
}),
|
||||
});
|
||||
|
||||
const chatData = await (await api('/sidebar-chat?after=0')).json();
|
||||
const textEntry = chatData.entries.find((e: any) => e.type === 'text');
|
||||
expect(textEntry).toBeDefined();
|
||||
expect(textEntry.text).toBe('Hello from mock agent');
|
||||
});
|
||||
|
||||
test('agent_done transitions status to idle', async () => {
|
||||
await resetState();
|
||||
// Start a command so agent is processing
|
||||
await api('/sidebar-command', {
|
||||
method: 'POST',
|
||||
body: JSON.stringify({ message: 'test' }),
|
||||
});
|
||||
|
||||
// Verify processing
|
||||
let session = await (await api('/sidebar-session')).json();
|
||||
expect(session.agent.status).toBe('processing');
|
||||
|
||||
// Send agent_done
|
||||
await api('/sidebar-agent/event', {
|
||||
method: 'POST',
|
||||
body: JSON.stringify({ type: 'agent_done' }),
|
||||
});
|
||||
|
||||
session = await (await api('/sidebar-session')).json();
|
||||
expect(session.agent.status).toBe('idle');
|
||||
});
|
||||
});
|
||||
|
||||
describe('message queuing', () => {
|
||||
test('queues message when agent is processing', async () => {
|
||||
await resetState();
|
||||
|
||||
// First message starts processing
|
||||
await api('/sidebar-command', {
|
||||
method: 'POST',
|
||||
body: JSON.stringify({ message: 'first' }),
|
||||
});
|
||||
|
||||
// Second message gets queued
|
||||
const resp = await api('/sidebar-command', {
|
||||
method: 'POST',
|
||||
body: JSON.stringify({ message: 'second' }),
|
||||
});
|
||||
const data = await resp.json();
|
||||
expect(data.ok).toBe(true);
|
||||
expect(data.queued).toBe(true);
|
||||
expect(data.position).toBe(1);
|
||||
|
||||
await api('/sidebar-agent/kill', { method: 'POST' });
|
||||
});
|
||||
|
||||
test('returns 429 when queue is full', async () => {
|
||||
await resetState();
|
||||
|
||||
// First message starts processing
|
||||
await api('/sidebar-command', {
|
||||
method: 'POST',
|
||||
body: JSON.stringify({ message: 'first' }),
|
||||
});
|
||||
|
||||
// Fill queue (max 5)
|
||||
for (let i = 0; i < 5; i++) {
|
||||
await api('/sidebar-command', {
|
||||
method: 'POST',
|
||||
body: JSON.stringify({ message: `fill-${i}` }),
|
||||
});
|
||||
}
|
||||
|
||||
// 7th message should be rejected
|
||||
const resp = await api('/sidebar-command', {
|
||||
method: 'POST',
|
||||
body: JSON.stringify({ message: 'overflow' }),
|
||||
});
|
||||
expect(resp.status).toBe(429);
|
||||
|
||||
await api('/sidebar-agent/kill', { method: 'POST' });
|
||||
});
|
||||
});
|
||||
|
||||
describe('chat clear', () => {
|
||||
test('clears chat buffer', async () => {
|
||||
await resetState();
|
||||
// Add some entries
|
||||
await api('/sidebar-agent/event', {
|
||||
method: 'POST',
|
||||
body: JSON.stringify({ type: 'text', text: 'to be cleared' }),
|
||||
});
|
||||
|
||||
await api('/sidebar-chat/clear', { method: 'POST' });
|
||||
|
||||
const data = await (await api('/sidebar-chat?after=0')).json();
|
||||
expect(data.entries.length).toBe(0);
|
||||
expect(data.total).toBe(0);
|
||||
});
|
||||
});
|
||||
|
||||
describe('agent kill', () => {
|
||||
test('kill adds error entry and returns to idle', async () => {
|
||||
await resetState();
|
||||
|
||||
// Start a command so agent is processing
|
||||
await api('/sidebar-command', {
|
||||
method: 'POST',
|
||||
body: JSON.stringify({ message: 'kill me' }),
|
||||
});
|
||||
|
||||
let session = await (await api('/sidebar-session')).json();
|
||||
expect(session.agent.status).toBe('processing');
|
||||
|
||||
// Kill the agent
|
||||
const killResp = await api('/sidebar-agent/kill', { method: 'POST' });
|
||||
expect(killResp.status).toBe(200);
|
||||
|
||||
// Check chat for error entry
|
||||
const chatData = await (await api('/sidebar-chat?after=0')).json();
|
||||
const errorEntry = chatData.entries.find((e: any) => e.error === 'Killed by user');
|
||||
expect(errorEntry).toBeDefined();
|
||||
|
||||
// Agent should be idle (no queue items to auto-process)
|
||||
session = await (await api('/sidebar-session')).json();
|
||||
expect(session.agent.status).toBe('idle');
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,96 @@
|
||||
/**
|
||||
* Layer 1: Unit tests for sidebar utilities.
|
||||
* Tests pure functions — no server, no processes, no network.
|
||||
*/
|
||||
|
||||
import { describe, test, expect } from 'bun:test';
|
||||
import { sanitizeExtensionUrl } from '../src/sidebar-utils';
|
||||
|
||||
describe('sanitizeExtensionUrl', () => {
|
||||
test('passes valid http URL', () => {
|
||||
expect(sanitizeExtensionUrl('http://example.com')).toBe('http://example.com/');
|
||||
});
|
||||
|
||||
test('passes valid https URL', () => {
|
||||
expect(sanitizeExtensionUrl('https://example.com/page?q=1')).toBe('https://example.com/page?q=1');
|
||||
});
|
||||
|
||||
test('rejects chrome:// URLs', () => {
|
||||
expect(sanitizeExtensionUrl('chrome://extensions')).toBeNull();
|
||||
});
|
||||
|
||||
test('rejects chrome-extension:// URLs', () => {
|
||||
expect(sanitizeExtensionUrl('chrome-extension://abcdef/popup.html')).toBeNull();
|
||||
});
|
||||
|
||||
test('rejects javascript: URLs', () => {
|
||||
expect(sanitizeExtensionUrl('javascript:alert(1)')).toBeNull();
|
||||
});
|
||||
|
||||
test('rejects file:// URLs', () => {
|
||||
expect(sanitizeExtensionUrl('file:///etc/passwd')).toBeNull();
|
||||
});
|
||||
|
||||
test('rejects data: URLs', () => {
|
||||
expect(sanitizeExtensionUrl('data:text/html,<h1>hi</h1>')).toBeNull();
|
||||
});
|
||||
|
||||
test('strips raw control characters from URL', () => {
|
||||
// URL constructor percent-encodes \x00 as %00, which is safe
|
||||
// The regex strips any remaining raw control chars after .href normalization
|
||||
const result = sanitizeExtensionUrl('https://example.com/\x00page\x1f');
|
||||
expect(result).not.toBeNull();
|
||||
expect(result!).not.toMatch(/[\x00-\x1f\x7f]/);
|
||||
});
|
||||
|
||||
test('strips newlines (prompt injection vector)', () => {
|
||||
const result = sanitizeExtensionUrl('https://evil.com/%0AUser:%20ignore');
|
||||
// URL constructor normalizes %0A, control char stripping removes any raw newlines
|
||||
expect(result).not.toBeNull();
|
||||
expect(result!).not.toContain('\n');
|
||||
});
|
||||
|
||||
test('truncates URLs longer than 2048 chars', () => {
|
||||
const longUrl = 'https://example.com/' + 'a'.repeat(3000);
|
||||
const result = sanitizeExtensionUrl(longUrl);
|
||||
expect(result).not.toBeNull();
|
||||
expect(result!.length).toBeLessThanOrEqual(2048);
|
||||
});
|
||||
|
||||
test('returns null for null input', () => {
|
||||
expect(sanitizeExtensionUrl(null)).toBeNull();
|
||||
});
|
||||
|
||||
test('returns null for undefined input', () => {
|
||||
expect(sanitizeExtensionUrl(undefined)).toBeNull();
|
||||
});
|
||||
|
||||
test('returns null for empty string', () => {
|
||||
expect(sanitizeExtensionUrl('')).toBeNull();
|
||||
});
|
||||
|
||||
test('returns null for invalid URL string', () => {
|
||||
expect(sanitizeExtensionUrl('not a url at all')).toBeNull();
|
||||
});
|
||||
|
||||
test('does not crash on weird input', () => {
|
||||
expect(sanitizeExtensionUrl(':///')).toBeNull();
|
||||
expect(sanitizeExtensionUrl(' ')).toBeNull();
|
||||
expect(sanitizeExtensionUrl('\x00\x01\x02')).toBeNull();
|
||||
});
|
||||
|
||||
test('preserves query parameters and fragments', () => {
|
||||
const url = 'https://example.com/search?q=test&page=2#results';
|
||||
expect(sanitizeExtensionUrl(url)).toBe(url);
|
||||
});
|
||||
|
||||
test('preserves port numbers', () => {
|
||||
expect(sanitizeExtensionUrl('http://localhost:3000/api')).toBe('http://localhost:3000/api');
|
||||
});
|
||||
|
||||
test('handles URL with auth (user:pass@host)', () => {
|
||||
const result = sanitizeExtensionUrl('https://user:pass@example.com/');
|
||||
expect(result).not.toBeNull();
|
||||
expect(result).toContain('example.com');
|
||||
});
|
||||
});
|
||||
+112
-46
@@ -343,21 +343,49 @@ If `NEEDS_SETUP`:
|
||||
2. Run: `cd <SKILL_DIR> && ./setup`
|
||||
3. If `bun` is not installed: `curl -fsSL https://bun.sh/install | bash`
|
||||
|
||||
## Step 0: Pre-flight cleanup
|
||||
|
||||
Before connecting, kill any stale browse servers and clean up lock files that
|
||||
may have persisted from a crash. This prevents "already connected" false
|
||||
positives and Chromium profile lock conflicts.
|
||||
|
||||
```bash
|
||||
# Kill any existing browse server
|
||||
if [ -f "$(git rev-parse --show-toplevel 2>/dev/null)/.gstack/browse.json" ]; then
|
||||
_OLD_PID=$(cat "$(git rev-parse --show-toplevel)/.gstack/browse.json" 2>/dev/null | grep -o '"pid":[0-9]*' | grep -o '[0-9]*')
|
||||
[ -n "$_OLD_PID" ] && kill "$_OLD_PID" 2>/dev/null || true
|
||||
sleep 1
|
||||
[ -n "$_OLD_PID" ] && kill -9 "$_OLD_PID" 2>/dev/null || true
|
||||
rm -f "$(git rev-parse --show-toplevel)/.gstack/browse.json"
|
||||
fi
|
||||
# Clean Chromium profile locks (can persist after crashes)
|
||||
_PROFILE_DIR="$HOME/.gstack/chromium-profile"
|
||||
for _LF in SingletonLock SingletonSocket SingletonCookie; do
|
||||
rm -f "$_PROFILE_DIR/$_LF" 2>/dev/null || true
|
||||
done
|
||||
echo "Pre-flight cleanup done"
|
||||
```
|
||||
|
||||
## Step 1: Connect
|
||||
|
||||
```bash
|
||||
$B connect
|
||||
```
|
||||
|
||||
This launches your system Chrome via Playwright with:
|
||||
- A visible window (headed mode, not headless)
|
||||
- The gstack Chrome extension pre-loaded
|
||||
- A green shimmer line + "gstack" pill so you know which window is controlled
|
||||
This launches Playwright's bundled Chromium in headed mode with:
|
||||
- A visible window you can watch (not your regular Chrome — it stays untouched)
|
||||
- The gstack Chrome extension auto-loaded via `launchPersistentContext`
|
||||
- A golden shimmer line at the top of every page so you know which window is controlled
|
||||
- A sidebar agent process for chat commands
|
||||
|
||||
If Chrome is already running, the server restarts in headed mode with a fresh
|
||||
Chrome instance. Your regular Chrome stays untouched.
|
||||
The `connect` command auto-discovers the extension from the gstack install
|
||||
directory. It always uses port **34567** so the extension can auto-connect.
|
||||
|
||||
After connecting, print the output to the user.
|
||||
After connecting, print the full output to the user. Confirm you see
|
||||
`Mode: headed` in the output.
|
||||
|
||||
If the output shows an error or the mode is not `headed`, run `$B status` and
|
||||
share the output with the user before proceeding.
|
||||
|
||||
## Step 2: Verify
|
||||
|
||||
@@ -365,27 +393,41 @@ After connecting, print the output to the user.
|
||||
$B status
|
||||
```
|
||||
|
||||
Confirm the output shows `Mode: cdp`. Print the port number — the user may need
|
||||
it for the Side Panel.
|
||||
Confirm the output shows `Mode: headed`. Read the port from the state file:
|
||||
|
||||
```bash
|
||||
cat "$(git rev-parse --show-toplevel 2>/dev/null)/.gstack/browse.json" 2>/dev/null | grep -o '"port":[0-9]*' | grep -o '[0-9]*'
|
||||
```
|
||||
|
||||
The port should be **34567**. If it's different, note it — the user may need it
|
||||
for the Side Panel.
|
||||
|
||||
Also find the extension path so you can help the user if they need to load it manually:
|
||||
|
||||
```bash
|
||||
_EXT_PATH=""
|
||||
_ROOT=$(git rev-parse --show-toplevel 2>/dev/null)
|
||||
[ -n "$_ROOT" ] && [ -f "$_ROOT/.claude/skills/gstack/extension/manifest.json" ] && _EXT_PATH="$_ROOT/.claude/skills/gstack/extension"
|
||||
[ -z "$_EXT_PATH" ] && [ -f "$HOME/.claude/skills/gstack/extension/manifest.json" ] && _EXT_PATH="$HOME/.claude/skills/gstack/extension"
|
||||
echo "EXTENSION_PATH: ${_EXT_PATH:-NOT FOUND}"
|
||||
```
|
||||
|
||||
## Step 3: Guide the user to the Side Panel
|
||||
|
||||
Use AskUserQuestion:
|
||||
|
||||
> Chrome is launched with gstack control. You should see a green shimmer line at the
|
||||
> top of the Chrome window and a small "gstack" pill in the bottom-right corner.
|
||||
> Chrome is launched with gstack control. You should see Playwright's Chromium
|
||||
> (not your regular Chrome) with a golden shimmer line at the top of the page.
|
||||
>
|
||||
> The Side Panel extension is pre-loaded. To open it:
|
||||
> 1. Look for the **puzzle piece icon** (Extensions) in Chrome's toolbar
|
||||
> 2. Click it → find **gstack browse** → click the **pin icon** to pin it
|
||||
> 3. Click the **gstack icon** in the toolbar
|
||||
> 4. Click **Open Side Panel**
|
||||
> The Side Panel extension should be auto-loaded. To open it:
|
||||
> 1. Look for the **puzzle piece icon** (Extensions) in the toolbar — it may
|
||||
> already show the gstack icon if the extension loaded successfully
|
||||
> 2. Click the **puzzle piece** → find **gstack browse** → click the **pin icon**
|
||||
> 3. Click the pinned **gstack icon** in the toolbar
|
||||
> 4. The Side Panel should open on the right showing a live activity feed
|
||||
>
|
||||
> The Side Panel shows a live feed of every browse command in real time.
|
||||
>
|
||||
> **Port:** The browse server is on port {PORT} — the extension auto-detects it
|
||||
> if you're using the Playwright-controlled Chrome. If the badge stays gray, click
|
||||
> the gstack icon and enter port {PORT} manually.
|
||||
> **Port:** 34567 (auto-detected — the extension connects automatically in the
|
||||
> Playwright-controlled Chrome).
|
||||
|
||||
Options:
|
||||
- A) I can see the Side Panel — let's go!
|
||||
@@ -393,22 +435,34 @@ Options:
|
||||
- C) Something went wrong
|
||||
|
||||
If B: Tell the user:
|
||||
> The extension should be auto-loaded, but Chrome sometimes doesn't show it
|
||||
> immediately. Try:
|
||||
> 1. Type `chrome://extensions` in the address bar
|
||||
> 2. Look for "gstack browse" — it should be listed and enabled
|
||||
> 3. If not listed, click "Load unpacked" → navigate to the extension folder
|
||||
> (press Cmd+Shift+G in the file picker, paste this path):
|
||||
> `{EXTENSION_PATH}`
|
||||
>
|
||||
> Then pin it from the puzzle piece icon and open the Side Panel.
|
||||
|
||||
If C: Run `$B status` and show the output. Check if the server is healthy.
|
||||
> The extension is loaded into Playwright's Chromium at launch time, but
|
||||
> sometimes it doesn't appear immediately. Try these steps:
|
||||
>
|
||||
> 1. Type `chrome://extensions` in the address bar
|
||||
> 2. Look for **"gstack browse"** — it should be listed and enabled
|
||||
> 3. If it's there but not pinned, go back to any page, click the puzzle piece
|
||||
> icon, and pin it
|
||||
> 4. If it's NOT listed at all, click **"Load unpacked"** and navigate to:
|
||||
> - Press **Cmd+Shift+G** in the file picker dialog
|
||||
> - Paste this path: `{EXTENSION_PATH}` (use the path from Step 2)
|
||||
> - Click **Select**
|
||||
>
|
||||
> After loading, pin it and click the icon to open the Side Panel.
|
||||
>
|
||||
> If the Side Panel badge stays gray (disconnected), click the gstack icon
|
||||
> and enter port **34567** manually.
|
||||
|
||||
If C:
|
||||
|
||||
1. Run `$B status` and show the output
|
||||
2. If the server is not healthy, re-run Step 0 cleanup + Step 1 connect
|
||||
3. If the server IS healthy but the browser isn't visible, try `$B focus`
|
||||
4. If that fails, ask the user what they see (error message, blank screen, etc.)
|
||||
|
||||
## Step 4: Demo
|
||||
|
||||
After the user confirms the Side Panel is working, run a quick demo so they
|
||||
can see the activity feed in action:
|
||||
After the user confirms the Side Panel is working, run a quick demo:
|
||||
|
||||
```bash
|
||||
$B goto https://news.ycombinator.com
|
||||
@@ -421,7 +475,7 @@ $B snapshot -i
|
||||
```
|
||||
|
||||
Tell the user: "Check the Side Panel — you should see the `goto` and `snapshot`
|
||||
commands appear in the activity feed. Every command Claude runs will show up here
|
||||
commands appear in the activity feed. Every command Claude runs shows up here
|
||||
in real time."
|
||||
|
||||
## Step 5: Sidebar chat
|
||||
@@ -429,8 +483,9 @@ in real time."
|
||||
After the activity feed demo, tell the user about the sidebar chat:
|
||||
|
||||
> The Side Panel also has a **chat tab**. Try typing a message like "take a
|
||||
> snapshot and describe this page." A child Claude instance will execute your
|
||||
> request in the browser — you'll see the commands appear in the activity feed.
|
||||
> snapshot and describe this page." A sidebar agent (a child Claude instance)
|
||||
> executes your request in the browser — you'll see the commands appear in
|
||||
> the activity feed as they happen.
|
||||
>
|
||||
> The sidebar agent can navigate pages, click buttons, fill forms, and read
|
||||
> content. Each task gets up to 5 minutes. It runs in an isolated session, so
|
||||
@@ -440,17 +495,28 @@ After the activity feed demo, tell the user about the sidebar chat:
|
||||
|
||||
Tell the user:
|
||||
|
||||
> You're all set! Chrome is under Claude's control with the Side Panel showing
|
||||
> live activity and a chat sidebar for direct commands. Here's what you can do:
|
||||
> You're all set! Here's what you can do with the connected Chrome:
|
||||
>
|
||||
> - **Chat in the sidebar** — type natural language instructions and Claude
|
||||
> executes them in the browser
|
||||
> - **Run any browse command** — `$B goto`, `$B click`, `$B snapshot` — and
|
||||
> watch it happen in Chrome + the Side Panel
|
||||
> - **Use /qa or /design-review** — they'll run in the visible Chrome window
|
||||
> instead of headless. No cookie import needed.
|
||||
> - **`$B focus`** — bring Chrome to the foreground anytime
|
||||
> - **`$B disconnect`** — return to headless mode when done
|
||||
> **Watch Claude work in real time:**
|
||||
> - Run any gstack skill (`/qa`, `/design-review`, `/benchmark`) and watch
|
||||
> every action happen in the visible Chrome window + Side Panel feed
|
||||
> - No cookie import needed — the Playwright browser shares its own session
|
||||
>
|
||||
> **Control the browser directly:**
|
||||
> - **Sidebar chat** — type natural language in the Side Panel and the sidebar
|
||||
> agent executes it (e.g., "fill in the login form and submit")
|
||||
> - **Browse commands** — `$B goto <url>`, `$B click <sel>`, `$B fill <sel> <val>`,
|
||||
> `$B snapshot -i` — all visible in Chrome + Side Panel
|
||||
>
|
||||
> **Window management:**
|
||||
> - `$B focus` — bring Chrome to the foreground anytime
|
||||
> - `$B disconnect` — close headed Chrome and return to headless mode
|
||||
>
|
||||
> **What skills look like in headed mode:**
|
||||
> - `/qa` runs its full test suite in the visible browser — you see every page
|
||||
> load, every click, every assertion
|
||||
> - `/design-review` takes screenshots in the real browser — same pixels you see
|
||||
> - `/benchmark` measures performance in the headed browser
|
||||
|
||||
Then proceed with whatever the user asked to do. If they didn't specify a task,
|
||||
ask what they'd like to test or browse.
|
||||
|
||||
+112
-46
@@ -23,21 +23,49 @@ You see every click, every navigation, every action in real time.
|
||||
|
||||
{{BROWSE_SETUP}}
|
||||
|
||||
## Step 0: Pre-flight cleanup
|
||||
|
||||
Before connecting, kill any stale browse servers and clean up lock files that
|
||||
may have persisted from a crash. This prevents "already connected" false
|
||||
positives and Chromium profile lock conflicts.
|
||||
|
||||
```bash
|
||||
# Kill any existing browse server
|
||||
if [ -f "$(git rev-parse --show-toplevel 2>/dev/null)/.gstack/browse.json" ]; then
|
||||
_OLD_PID=$(cat "$(git rev-parse --show-toplevel)/.gstack/browse.json" 2>/dev/null | grep -o '"pid":[0-9]*' | grep -o '[0-9]*')
|
||||
[ -n "$_OLD_PID" ] && kill "$_OLD_PID" 2>/dev/null || true
|
||||
sleep 1
|
||||
[ -n "$_OLD_PID" ] && kill -9 "$_OLD_PID" 2>/dev/null || true
|
||||
rm -f "$(git rev-parse --show-toplevel)/.gstack/browse.json"
|
||||
fi
|
||||
# Clean Chromium profile locks (can persist after crashes)
|
||||
_PROFILE_DIR="$HOME/.gstack/chromium-profile"
|
||||
for _LF in SingletonLock SingletonSocket SingletonCookie; do
|
||||
rm -f "$_PROFILE_DIR/$_LF" 2>/dev/null || true
|
||||
done
|
||||
echo "Pre-flight cleanup done"
|
||||
```
|
||||
|
||||
## Step 1: Connect
|
||||
|
||||
```bash
|
||||
$B connect
|
||||
```
|
||||
|
||||
This launches your system Chrome via Playwright with:
|
||||
- A visible window (headed mode, not headless)
|
||||
- The gstack Chrome extension pre-loaded
|
||||
- A green shimmer line + "gstack" pill so you know which window is controlled
|
||||
This launches Playwright's bundled Chromium in headed mode with:
|
||||
- A visible window you can watch (not your regular Chrome — it stays untouched)
|
||||
- The gstack Chrome extension auto-loaded via `launchPersistentContext`
|
||||
- A golden shimmer line at the top of every page so you know which window is controlled
|
||||
- A sidebar agent process for chat commands
|
||||
|
||||
If Chrome is already running, the server restarts in headed mode with a fresh
|
||||
Chrome instance. Your regular Chrome stays untouched.
|
||||
The `connect` command auto-discovers the extension from the gstack install
|
||||
directory. It always uses port **34567** so the extension can auto-connect.
|
||||
|
||||
After connecting, print the output to the user.
|
||||
After connecting, print the full output to the user. Confirm you see
|
||||
`Mode: headed` in the output.
|
||||
|
||||
If the output shows an error or the mode is not `headed`, run `$B status` and
|
||||
share the output with the user before proceeding.
|
||||
|
||||
## Step 2: Verify
|
||||
|
||||
@@ -45,27 +73,41 @@ After connecting, print the output to the user.
|
||||
$B status
|
||||
```
|
||||
|
||||
Confirm the output shows `Mode: cdp`. Print the port number — the user may need
|
||||
it for the Side Panel.
|
||||
Confirm the output shows `Mode: headed`. Read the port from the state file:
|
||||
|
||||
```bash
|
||||
cat "$(git rev-parse --show-toplevel 2>/dev/null)/.gstack/browse.json" 2>/dev/null | grep -o '"port":[0-9]*' | grep -o '[0-9]*'
|
||||
```
|
||||
|
||||
The port should be **34567**. If it's different, note it — the user may need it
|
||||
for the Side Panel.
|
||||
|
||||
Also find the extension path so you can help the user if they need to load it manually:
|
||||
|
||||
```bash
|
||||
_EXT_PATH=""
|
||||
_ROOT=$(git rev-parse --show-toplevel 2>/dev/null)
|
||||
[ -n "$_ROOT" ] && [ -f "$_ROOT/.claude/skills/gstack/extension/manifest.json" ] && _EXT_PATH="$_ROOT/.claude/skills/gstack/extension"
|
||||
[ -z "$_EXT_PATH" ] && [ -f "$HOME/.claude/skills/gstack/extension/manifest.json" ] && _EXT_PATH="$HOME/.claude/skills/gstack/extension"
|
||||
echo "EXTENSION_PATH: ${_EXT_PATH:-NOT FOUND}"
|
||||
```
|
||||
|
||||
## Step 3: Guide the user to the Side Panel
|
||||
|
||||
Use AskUserQuestion:
|
||||
|
||||
> Chrome is launched with gstack control. You should see a green shimmer line at the
|
||||
> top of the Chrome window and a small "gstack" pill in the bottom-right corner.
|
||||
> Chrome is launched with gstack control. You should see Playwright's Chromium
|
||||
> (not your regular Chrome) with a golden shimmer line at the top of the page.
|
||||
>
|
||||
> The Side Panel extension is pre-loaded. To open it:
|
||||
> 1. Look for the **puzzle piece icon** (Extensions) in Chrome's toolbar
|
||||
> 2. Click it → find **gstack browse** → click the **pin icon** to pin it
|
||||
> 3. Click the **gstack icon** in the toolbar
|
||||
> 4. Click **Open Side Panel**
|
||||
> The Side Panel extension should be auto-loaded. To open it:
|
||||
> 1. Look for the **puzzle piece icon** (Extensions) in the toolbar — it may
|
||||
> already show the gstack icon if the extension loaded successfully
|
||||
> 2. Click the **puzzle piece** → find **gstack browse** → click the **pin icon**
|
||||
> 3. Click the pinned **gstack icon** in the toolbar
|
||||
> 4. The Side Panel should open on the right showing a live activity feed
|
||||
>
|
||||
> The Side Panel shows a live feed of every browse command in real time.
|
||||
>
|
||||
> **Port:** The browse server is on port {PORT} — the extension auto-detects it
|
||||
> if you're using the Playwright-controlled Chrome. If the badge stays gray, click
|
||||
> the gstack icon and enter port {PORT} manually.
|
||||
> **Port:** 34567 (auto-detected — the extension connects automatically in the
|
||||
> Playwright-controlled Chrome).
|
||||
|
||||
Options:
|
||||
- A) I can see the Side Panel — let's go!
|
||||
@@ -73,22 +115,34 @@ Options:
|
||||
- C) Something went wrong
|
||||
|
||||
If B: Tell the user:
|
||||
> The extension should be auto-loaded, but Chrome sometimes doesn't show it
|
||||
> immediately. Try:
|
||||
> 1. Type `chrome://extensions` in the address bar
|
||||
> 2. Look for "gstack browse" — it should be listed and enabled
|
||||
> 3. If not listed, click "Load unpacked" → navigate to the extension folder
|
||||
> (press Cmd+Shift+G in the file picker, paste this path):
|
||||
> `{EXTENSION_PATH}`
|
||||
>
|
||||
> Then pin it from the puzzle piece icon and open the Side Panel.
|
||||
|
||||
If C: Run `$B status` and show the output. Check if the server is healthy.
|
||||
> The extension is loaded into Playwright's Chromium at launch time, but
|
||||
> sometimes it doesn't appear immediately. Try these steps:
|
||||
>
|
||||
> 1. Type `chrome://extensions` in the address bar
|
||||
> 2. Look for **"gstack browse"** — it should be listed and enabled
|
||||
> 3. If it's there but not pinned, go back to any page, click the puzzle piece
|
||||
> icon, and pin it
|
||||
> 4. If it's NOT listed at all, click **"Load unpacked"** and navigate to:
|
||||
> - Press **Cmd+Shift+G** in the file picker dialog
|
||||
> - Paste this path: `{EXTENSION_PATH}` (use the path from Step 2)
|
||||
> - Click **Select**
|
||||
>
|
||||
> After loading, pin it and click the icon to open the Side Panel.
|
||||
>
|
||||
> If the Side Panel badge stays gray (disconnected), click the gstack icon
|
||||
> and enter port **34567** manually.
|
||||
|
||||
If C:
|
||||
|
||||
1. Run `$B status` and show the output
|
||||
2. If the server is not healthy, re-run Step 0 cleanup + Step 1 connect
|
||||
3. If the server IS healthy but the browser isn't visible, try `$B focus`
|
||||
4. If that fails, ask the user what they see (error message, blank screen, etc.)
|
||||
|
||||
## Step 4: Demo
|
||||
|
||||
After the user confirms the Side Panel is working, run a quick demo so they
|
||||
can see the activity feed in action:
|
||||
After the user confirms the Side Panel is working, run a quick demo:
|
||||
|
||||
```bash
|
||||
$B goto https://news.ycombinator.com
|
||||
@@ -101,7 +155,7 @@ $B snapshot -i
|
||||
```
|
||||
|
||||
Tell the user: "Check the Side Panel — you should see the `goto` and `snapshot`
|
||||
commands appear in the activity feed. Every command Claude runs will show up here
|
||||
commands appear in the activity feed. Every command Claude runs shows up here
|
||||
in real time."
|
||||
|
||||
## Step 5: Sidebar chat
|
||||
@@ -109,8 +163,9 @@ in real time."
|
||||
After the activity feed demo, tell the user about the sidebar chat:
|
||||
|
||||
> The Side Panel also has a **chat tab**. Try typing a message like "take a
|
||||
> snapshot and describe this page." A child Claude instance will execute your
|
||||
> request in the browser — you'll see the commands appear in the activity feed.
|
||||
> snapshot and describe this page." A sidebar agent (a child Claude instance)
|
||||
> executes your request in the browser — you'll see the commands appear in
|
||||
> the activity feed as they happen.
|
||||
>
|
||||
> The sidebar agent can navigate pages, click buttons, fill forms, and read
|
||||
> content. Each task gets up to 5 minutes. It runs in an isolated session, so
|
||||
@@ -120,17 +175,28 @@ After the activity feed demo, tell the user about the sidebar chat:
|
||||
|
||||
Tell the user:
|
||||
|
||||
> You're all set! Chrome is under Claude's control with the Side Panel showing
|
||||
> live activity and a chat sidebar for direct commands. Here's what you can do:
|
||||
> You're all set! Here's what you can do with the connected Chrome:
|
||||
>
|
||||
> - **Chat in the sidebar** — type natural language instructions and Claude
|
||||
> executes them in the browser
|
||||
> - **Run any browse command** — `$B goto`, `$B click`, `$B snapshot` — and
|
||||
> watch it happen in Chrome + the Side Panel
|
||||
> - **Use /qa or /design-review** — they'll run in the visible Chrome window
|
||||
> instead of headless. No cookie import needed.
|
||||
> - **`$B focus`** — bring Chrome to the foreground anytime
|
||||
> - **`$B disconnect`** — return to headless mode when done
|
||||
> **Watch Claude work in real time:**
|
||||
> - Run any gstack skill (`/qa`, `/design-review`, `/benchmark`) and watch
|
||||
> every action happen in the visible Chrome window + Side Panel feed
|
||||
> - No cookie import needed — the Playwright browser shares its own session
|
||||
>
|
||||
> **Control the browser directly:**
|
||||
> - **Sidebar chat** — type natural language in the Side Panel and the sidebar
|
||||
> agent executes it (e.g., "fill in the login form and submit")
|
||||
> - **Browse commands** — `$B goto <url>`, `$B click <sel>`, `$B fill <sel> <val>`,
|
||||
> `$B snapshot -i` — all visible in Chrome + Side Panel
|
||||
>
|
||||
> **Window management:**
|
||||
> - `$B focus` — bring Chrome to the foreground anytime
|
||||
> - `$B disconnect` — close headed Chrome and return to headless mode
|
||||
>
|
||||
> **What skills look like in headed mode:**
|
||||
> - `/qa` runs its full test suite in the visible browser — you see every page
|
||||
> load, every click, every assertion
|
||||
> - `/design-review` takes screenshots in the real browser — same pixels you see
|
||||
> - `/benchmark` measures performance in the headed browser
|
||||
|
||||
Then proceed with whatever the user asked to do. If they didn't specify a task,
|
||||
ask what they'd like to test or browse.
|
||||
|
||||
+17
-11
@@ -194,17 +194,23 @@ chrome.runtime.onMessage.addListener((msg, sender, sendResponse) => {
|
||||
sendResponse({ error: 'Not connected' });
|
||||
return true;
|
||||
}
|
||||
fetch(`${base}/sidebar-command`, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
'Authorization': `Bearer ${authToken}`,
|
||||
},
|
||||
body: JSON.stringify({ message: msg.message }),
|
||||
})
|
||||
.then(r => r.json())
|
||||
.then(data => sendResponse(data))
|
||||
.catch(err => sendResponse({ error: err.message }));
|
||||
// Capture the active tab's URL so the sidebar agent knows what page
|
||||
// the user is actually looking at (Playwright's page.url() can be stale
|
||||
// if the user navigated manually in headed mode).
|
||||
chrome.tabs.query({ active: true, currentWindow: true }, (tabs) => {
|
||||
const activeTabUrl = tabs?.[0]?.url || null;
|
||||
fetch(`${base}/sidebar-command`, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
'Authorization': `Bearer ${authToken}`,
|
||||
},
|
||||
body: JSON.stringify({ message: msg.message, activeTabUrl }),
|
||||
})
|
||||
.then(r => r.json())
|
||||
.then(data => sendResponse(data))
|
||||
.catch(err => sendResponse({ error: err.message }));
|
||||
});
|
||||
return true;
|
||||
}
|
||||
});
|
||||
|
||||
+1
-1
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "gstack",
|
||||
"version": "0.12.5.0",
|
||||
"version": "0.12.7.0",
|
||||
"description": "Garry's Stack — Claude Code skills + fast headless browser. One repo, one install, entire AI engineering workflow.",
|
||||
"license": "MIT",
|
||||
"type": "module",
|
||||
|
||||
@@ -1655,9 +1655,10 @@ describe('codex commands must not use inline $(git rev-parse --show-toplevel) fo
|
||||
// The fix is to resolve _REPO_ROOT eagerly at the top of each bash block.
|
||||
|
||||
// Scan all source files that could contain codex commands
|
||||
// Use Bun.Glob to avoid ELOOP from .claude/skills/gstack symlink back to ROOT
|
||||
const tmplGlob = new Bun.Glob('**/*.tmpl');
|
||||
const sourceFiles = [
|
||||
...fs.readdirSync(ROOT, { recursive: true })
|
||||
.filter((f): f is string => typeof f === 'string' && f.endsWith('.tmpl') && !f.includes('node_modules')),
|
||||
...Array.from(tmplGlob.scanSync({ cwd: ROOT, followSymlinks: false })),
|
||||
...fs.readdirSync(path.join(ROOT, 'scripts/resolvers'))
|
||||
.filter(f => f.endsWith('.ts'))
|
||||
.map(f => `scripts/resolvers/${f}`),
|
||||
@@ -1688,6 +1689,8 @@ describe('codex commands must not use inline $(git rev-parse --show-toplevel) fo
|
||||
typeof f === 'string' &&
|
||||
f.endsWith('SKILL.md') &&
|
||||
!f.includes('node_modules') &&
|
||||
!f.includes('.claude') &&
|
||||
!f.includes('.agents') &&
|
||||
!f.includes('.tmpl'));
|
||||
for (const rel of skillMdFiles) {
|
||||
const abs = path.join(ROOT, rel);
|
||||
|
||||
@@ -141,6 +141,10 @@ export const E2E_TOUCHFILES: Record<string, string[]> = {
|
||||
'benchmark-workflow': ['benchmark/**', 'browse/src/**'],
|
||||
'setup-deploy-workflow': ['setup-deploy/**', 'scripts/gen-skill-docs.ts'],
|
||||
|
||||
// Sidebar agent
|
||||
'sidebar-navigate': ['browse/src/server.ts', 'browse/src/sidebar-agent.ts', 'browse/src/sidebar-utils.ts', 'extension/**'],
|
||||
'sidebar-url-accuracy': ['browse/src/server.ts', 'browse/src/sidebar-agent.ts', 'browse/src/sidebar-utils.ts', 'extension/background.js'],
|
||||
|
||||
// Autoplan
|
||||
'autoplan-core': ['autoplan/**', 'plan-ceo-review/**', 'plan-eng-review/**', 'plan-design-review/**'],
|
||||
|
||||
@@ -262,6 +266,10 @@ export const E2E_TIERS: Record<string, 'gate' | 'periodic'> = {
|
||||
'benchmark-workflow': 'gate',
|
||||
'setup-deploy-workflow': 'gate',
|
||||
|
||||
// Sidebar agent
|
||||
'sidebar-navigate': 'periodic',
|
||||
'sidebar-url-accuracy': 'periodic',
|
||||
|
||||
// Autoplan — periodic (not yet implemented)
|
||||
'autoplan-core': 'periodic',
|
||||
|
||||
|
||||
@@ -0,0 +1,279 @@
|
||||
/**
|
||||
* Layer 4: E2E tests for the sidebar agent.
|
||||
*
|
||||
* sidebar-url-accuracy: Deterministic test that verifies the activeTabUrl fix.
|
||||
* Starts server (no browser), POSTs to /sidebar-command with different activeTabUrl
|
||||
* values, reads the queue file, and verifies the prompt uses the extension URL.
|
||||
* No real Claude needed — this is a fast, cheap, deterministic test.
|
||||
*
|
||||
* sidebar-navigate: Full E2E with real Claude (requires ANTHROPIC_API_KEY).
|
||||
* Starts server + sidebar-agent, sends a message, waits for Claude to respond.
|
||||
* Tests the complete message flow through the queue.
|
||||
*/
|
||||
|
||||
import { describe, test, expect, beforeAll, afterAll } from 'bun:test';
|
||||
import { spawn, type Subprocess } from 'bun';
|
||||
import * as fs from 'fs';
|
||||
import * as os from 'os';
|
||||
import * as path from 'path';
|
||||
import {
|
||||
ROOT,
|
||||
describeIfSelected, testIfSelected,
|
||||
createEvalCollector, finalizeEvalCollector,
|
||||
} from './helpers/e2e-helpers';
|
||||
|
||||
const evalCollector = createEvalCollector('e2e-sidebar');
|
||||
|
||||
// --- Sidebar URL Accuracy (deterministic, no Claude) ---
|
||||
|
||||
describeIfSelected('Sidebar URL accuracy E2E', ['sidebar-url-accuracy'], () => {
|
||||
let serverProc: Subprocess | null = null;
|
||||
let serverPort: number = 0;
|
||||
let authToken: string = '';
|
||||
let tmpDir: string = '';
|
||||
let stateFile: string = '';
|
||||
let queueFile: string = '';
|
||||
|
||||
async function api(pathname: string, opts: RequestInit = {}): Promise<Response> {
|
||||
const headers: Record<string, string> = {
|
||||
'Content-Type': 'application/json',
|
||||
...(opts.headers as Record<string, string> || {}),
|
||||
};
|
||||
if (!headers['Authorization'] && authToken) {
|
||||
headers['Authorization'] = `Bearer ${authToken}`;
|
||||
}
|
||||
return fetch(`http://127.0.0.1:${serverPort}${pathname}`, { ...opts, headers });
|
||||
}
|
||||
|
||||
beforeAll(async () => {
|
||||
tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'sidebar-e2e-url-'));
|
||||
stateFile = path.join(tmpDir, 'browse.json');
|
||||
queueFile = path.join(tmpDir, 'sidebar-queue.jsonl');
|
||||
fs.mkdirSync(path.dirname(queueFile), { recursive: true });
|
||||
|
||||
const serverScript = path.resolve(ROOT, 'browse', 'src', 'server.ts');
|
||||
serverProc = spawn(['bun', 'run', serverScript], {
|
||||
env: {
|
||||
...process.env,
|
||||
BROWSE_STATE_FILE: stateFile,
|
||||
BROWSE_HEADLESS_SKIP: '1',
|
||||
BROWSE_PORT: '0',
|
||||
SIDEBAR_QUEUE_PATH: queueFile,
|
||||
BROWSE_IDLE_TIMEOUT: '300',
|
||||
},
|
||||
stdio: ['ignore', 'pipe', 'pipe'],
|
||||
});
|
||||
|
||||
const deadline = Date.now() + 15000;
|
||||
while (Date.now() < deadline) {
|
||||
if (fs.existsSync(stateFile)) {
|
||||
try {
|
||||
const state = JSON.parse(fs.readFileSync(stateFile, 'utf-8'));
|
||||
if (state.port && state.token) {
|
||||
serverPort = state.port;
|
||||
authToken = state.token;
|
||||
break;
|
||||
}
|
||||
} catch {}
|
||||
}
|
||||
await new Promise(r => setTimeout(r, 100));
|
||||
}
|
||||
if (!serverPort) throw new Error('Server did not start in time');
|
||||
}, 20000);
|
||||
|
||||
afterAll(() => {
|
||||
if (serverProc) { try { serverProc.kill(); } catch {} }
|
||||
finalizeEvalCollector(evalCollector);
|
||||
try { fs.rmSync(tmpDir, { recursive: true, force: true }); } catch {}
|
||||
});
|
||||
|
||||
testIfSelected('sidebar-url-accuracy', async () => {
|
||||
// Fresh session
|
||||
await api('/sidebar-session/new', { method: 'POST' });
|
||||
fs.writeFileSync(queueFile, '');
|
||||
|
||||
const extensionUrl = 'https://example.com/user-navigated-here';
|
||||
const resp = await api('/sidebar-command', {
|
||||
method: 'POST',
|
||||
body: JSON.stringify({
|
||||
message: 'What page am I on?',
|
||||
activeTabUrl: extensionUrl,
|
||||
}),
|
||||
});
|
||||
expect(resp.status).toBe(200);
|
||||
|
||||
// Wait for queue entry
|
||||
let lastEntry: any = null;
|
||||
const deadline = Date.now() + 5000;
|
||||
while (Date.now() < deadline) {
|
||||
await new Promise(r => setTimeout(r, 100));
|
||||
if (!fs.existsSync(queueFile)) continue;
|
||||
const lines = fs.readFileSync(queueFile, 'utf-8').trim().split('\n').filter(Boolean);
|
||||
if (lines.length > 0) {
|
||||
lastEntry = JSON.parse(lines[lines.length - 1]);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
expect(lastEntry).not.toBeNull();
|
||||
// Extension URL should be used, not the Playwright fallback
|
||||
expect(lastEntry.pageUrl).toBe(extensionUrl);
|
||||
expect(lastEntry.prompt).toContain(extensionUrl);
|
||||
expect(lastEntry.pageUrl).not.toBe('about:blank');
|
||||
|
||||
// Also test: chrome:// URL should be rejected, falling back to about:blank
|
||||
await api('/sidebar-agent/kill', { method: 'POST' });
|
||||
fs.writeFileSync(queueFile, '');
|
||||
|
||||
await api('/sidebar-command', {
|
||||
method: 'POST',
|
||||
body: JSON.stringify({
|
||||
message: 'test',
|
||||
activeTabUrl: 'chrome://settings',
|
||||
}),
|
||||
});
|
||||
await new Promise(r => setTimeout(r, 200));
|
||||
const lines2 = fs.readFileSync(queueFile, 'utf-8').trim().split('\n').filter(Boolean);
|
||||
if (lines2.length > 0) {
|
||||
const entry2 = JSON.parse(lines2[lines2.length - 1]);
|
||||
expect(entry2.pageUrl).toBe('about:blank');
|
||||
}
|
||||
|
||||
evalCollector?.addTest({
|
||||
name: 'sidebar-url-accuracy', suite: 'Sidebar URL accuracy E2E', tier: 'e2e',
|
||||
passed: true,
|
||||
duration_ms: 0,
|
||||
cost_usd: 0,
|
||||
exit_reason: 'success',
|
||||
});
|
||||
}, 30_000);
|
||||
});
|
||||
|
||||
// --- Sidebar Navigate (real Claude, requires ANTHROPIC_API_KEY) ---
|
||||
|
||||
describeIfSelected('Sidebar navigate E2E', ['sidebar-navigate'], () => {
|
||||
let serverProc: Subprocess | null = null;
|
||||
let agentProc: Subprocess | null = null;
|
||||
let serverPort: number = 0;
|
||||
let authToken: string = '';
|
||||
let tmpDir: string = '';
|
||||
let stateFile: string = '';
|
||||
let queueFile: string = '';
|
||||
|
||||
async function api(pathname: string, opts: RequestInit = {}): Promise<Response> {
|
||||
const headers: Record<string, string> = {
|
||||
'Content-Type': 'application/json',
|
||||
...(opts.headers as Record<string, string> || {}),
|
||||
};
|
||||
if (!headers['Authorization'] && authToken) {
|
||||
headers['Authorization'] = `Bearer ${authToken}`;
|
||||
}
|
||||
return fetch(`http://127.0.0.1:${serverPort}${pathname}`, { ...opts, headers });
|
||||
}
|
||||
|
||||
beforeAll(async () => {
|
||||
tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'sidebar-e2e-nav-'));
|
||||
stateFile = path.join(tmpDir, 'browse.json');
|
||||
queueFile = path.join(tmpDir, 'sidebar-queue.jsonl');
|
||||
fs.mkdirSync(path.dirname(queueFile), { recursive: true });
|
||||
|
||||
// Start server WITHOUT headless skip — we need a real browser for Claude to use
|
||||
const serverScript = path.resolve(ROOT, 'browse', 'src', 'server.ts');
|
||||
serverProc = spawn(['bun', 'run', serverScript], {
|
||||
env: {
|
||||
...process.env,
|
||||
BROWSE_STATE_FILE: stateFile,
|
||||
BROWSE_HEADLESS_SKIP: '1', // Still skip browser — Claude uses curl/fetch instead
|
||||
BROWSE_PORT: '0',
|
||||
SIDEBAR_QUEUE_PATH: queueFile,
|
||||
BROWSE_IDLE_TIMEOUT: '300',
|
||||
},
|
||||
stdio: ['ignore', 'pipe', 'pipe'],
|
||||
});
|
||||
|
||||
const deadline = Date.now() + 15000;
|
||||
while (Date.now() < deadline) {
|
||||
if (fs.existsSync(stateFile)) {
|
||||
try {
|
||||
const state = JSON.parse(fs.readFileSync(stateFile, 'utf-8'));
|
||||
if (state.port && state.token) {
|
||||
serverPort = state.port;
|
||||
authToken = state.token;
|
||||
break;
|
||||
}
|
||||
} catch {}
|
||||
}
|
||||
await new Promise(r => setTimeout(r, 100));
|
||||
}
|
||||
if (!serverPort) throw new Error('Server did not start in time');
|
||||
|
||||
// Start sidebar-agent
|
||||
const agentScript = path.resolve(ROOT, 'browse', 'src', 'sidebar-agent.ts');
|
||||
agentProc = spawn(['bun', 'run', agentScript], {
|
||||
env: {
|
||||
...process.env,
|
||||
BROWSE_SERVER_PORT: String(serverPort),
|
||||
BROWSE_STATE_FILE: stateFile,
|
||||
SIDEBAR_QUEUE_PATH: queueFile,
|
||||
SIDEBAR_AGENT_TIMEOUT: '90000',
|
||||
BROWSE_BIN: 'echo', // browse commands won't work, but Claude can use curl
|
||||
},
|
||||
stdio: ['ignore', 'pipe', 'pipe'],
|
||||
});
|
||||
|
||||
await new Promise(r => setTimeout(r, 1500));
|
||||
}, 25000);
|
||||
|
||||
afterAll(() => {
|
||||
if (agentProc) { try { agentProc.kill(); } catch {} }
|
||||
if (serverProc) { try { serverProc.kill(); } catch {} }
|
||||
finalizeEvalCollector(evalCollector);
|
||||
try { fs.rmSync(tmpDir, { recursive: true, force: true }); } catch {}
|
||||
});
|
||||
|
||||
testIfSelected('sidebar-navigate', async () => {
|
||||
await api('/sidebar-session/new', { method: 'POST' });
|
||||
fs.writeFileSync(queueFile, '');
|
||||
const startTime = Date.now();
|
||||
|
||||
// Ask Claude a simple question — it doesn't need browse commands for this
|
||||
const resp = await api('/sidebar-command', {
|
||||
method: 'POST',
|
||||
body: JSON.stringify({
|
||||
message: 'Say exactly "SIDEBAR_TEST_OK" and nothing else.',
|
||||
activeTabUrl: 'https://example.com',
|
||||
}),
|
||||
});
|
||||
expect(resp.status).toBe(200);
|
||||
|
||||
// Poll for agent_done
|
||||
const deadline = Date.now() + 90000;
|
||||
let entries: any[] = [];
|
||||
while (Date.now() < deadline) {
|
||||
const chatResp = await api('/sidebar-chat?after=0');
|
||||
const data = await chatResp.json();
|
||||
entries = data.entries;
|
||||
if (entries.some((e: any) => e.type === 'agent_done')) break;
|
||||
await new Promise(r => setTimeout(r, 2000));
|
||||
}
|
||||
|
||||
const duration = Date.now() - startTime;
|
||||
const doneEntry = entries.find((e: any) => e.type === 'agent_done');
|
||||
expect(doneEntry).toBeDefined();
|
||||
|
||||
// Claude should have responded with something
|
||||
const agentText = entries
|
||||
.filter((e: any) => e.role === 'agent' && (e.type === 'text' || e.type === 'result'))
|
||||
.map((e: any) => e.text || '')
|
||||
.join(' ');
|
||||
expect(agentText.length).toBeGreaterThan(0);
|
||||
|
||||
evalCollector?.addTest({
|
||||
name: 'sidebar-navigate', suite: 'Sidebar navigate E2E', tier: 'e2e',
|
||||
passed: !!doneEntry && agentText.length > 0,
|
||||
duration_ms: duration,
|
||||
cost_usd: 0,
|
||||
exit_reason: doneEntry ? 'success' : 'timeout',
|
||||
});
|
||||
}, 120_000);
|
||||
});
|
||||
Reference in New Issue
Block a user