Files
gstack/browse/test/compare-board.test.ts
Garry Tan 1868636f49 refactor: extract TabSession for per-tab state isolation (v0.15.16.0) (#873)
* plan: batch command endpoint + multi-tab parallel execution for GStack Browser

* refactor: extract TabSession from BrowserManager for per-tab state

Move per-tab state (refMap, lastSnapshot, frame) into a new TabSession
class. BrowserManager delegates to the active TabSession via
getActiveSession(). Zero behavior change — all existing tests pass.

This is the foundation for the /batch endpoint: both /command and /batch
will use the same handler functions with TabSession, eliminating shared
state races during parallel tab execution.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

* refactor: update handler signatures to use TabSession

Change handleReadCommand and handleSnapshot to take TabSession instead of
BrowserManager. Change handleWriteCommand to take both TabSession (per-tab
ops) and BrowserManager (global ops like viewport, headers, dialog).
handleMetaCommand keeps BrowserManager for tab management.

Tests use thin wrapper functions that bridge the old 3-arg call pattern to
the new signatures via bm.getActiveSession().

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

* feat: add POST /batch endpoint for parallel multi-tab execution

Execute multiple commands across tabs in a single HTTP request.
Commands targeting different tabs run concurrently via Promise.allSettled.
Commands targeting the same tab run sequentially within that group.

Features:
- Batch-safe command subset (text, goto, click, snapshot, screenshot, etc.)
- newtab/closetab as special commands within batch
- SSE streaming mode (stream: true) for partial results
- Per-command error isolation (one tab failing doesn't abort the batch)
- Max 50 commands per batch, soft batch-level timeout

A 143-page crawl drops from ~45 min (serial HTTP) to ~5 min (20 tabs
in parallel, batched commands).

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

* test: add batch endpoint integration tests

10 tests covering:
- Multi-tab parallel execution (goto + text on different tabs)
- Same-tab sequential ordering
- Per-command error isolation (one tab fails, others succeed)
- Page-scoped refs (snapshot refs are per-session, not global)
- Per-tab lastSnapshot (snapshot -D with independent baselines)
- getSession/getActiveSession API
- Batch-safe command subset validation
- closeTab via page.close preserves at-least-one-page invariant
- Parallel goto on 3 tabs simultaneously

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

* fix: harden codex-review E2E — extract SKILL.md section, bump maxTurns to 25

The test was copying the full 55KB/1075-line codex SKILL.md into the fixture,
requiring 8 Read calls just to consume it and exhausting the 15-turn budget
before reaching the actual codex review command. Now extracts only the
review-relevant section (~6KB/148 lines), reducing Read calls from 8 to 1.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

* docs: move batch endpoint plan into BROWSER.md as feature documentation

The batch endpoint is implemented — document it as an actual feature in
BROWSER.md (architecture, API shape, design decisions, usage pattern)
and remove the standalone plan file.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

* chore: bump version and changelog (v0.15.16.0)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

---------

Co-authored-by: gstack <ship@gstack.dev>
Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-07 00:23:36 -07:00

348 lines
12 KiB
TypeScript

/**
* Integration test for the design comparison board feedback loop.
*
* Tests the DOM polling pattern that plan-design-review, office-hours,
* and design-consultation use to read user feedback from the comparison board.
*
* Flow: generate board HTML → open in browser → verify DOM elements →
* simulate user interaction → verify structured JSON feedback.
*
* No LLM involved — this is a deterministic functional test.
*/
import { describe, test, expect, beforeAll, afterAll } from 'bun:test';
import { BrowserManager } from '../src/browser-manager';
import { handleReadCommand as _handleReadCommand } from '../src/read-commands';
import { handleWriteCommand as _handleWriteCommand } from '../src/write-commands';
const handleReadCommand = (cmd: string, args: string[], b: BrowserManager) =>
_handleReadCommand(cmd, args, b.getActiveSession());
const handleWriteCommand = (cmd: string, args: string[], b: BrowserManager) =>
_handleWriteCommand(cmd, args, b.getActiveSession(), b);
import { generateCompareHtml } from '../../design/src/compare';
import * as fs from 'fs';
import * as path from 'path';
let bm: BrowserManager;
let boardUrl: string;
let server: ReturnType<typeof Bun.serve>;
let tmpDir: string;
// Create a minimal 1x1 pixel PNG for test variants
function createTestPng(filePath: string): void {
// Minimal valid PNG: 1x1 red pixel
const png = Buffer.from(
'iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mP8/58BAwAI/AL+hc2rNAAAAABJRU5ErkJggg==',
'base64'
);
fs.writeFileSync(filePath, png);
}
beforeAll(async () => {
// Create test PNG files
tmpDir = '/tmp/compare-board-test-' + Date.now();
fs.mkdirSync(tmpDir, { recursive: true });
createTestPng(path.join(tmpDir, 'variant-A.png'));
createTestPng(path.join(tmpDir, 'variant-B.png'));
createTestPng(path.join(tmpDir, 'variant-C.png'));
// Generate comparison board HTML using the real compare module
const html = generateCompareHtml([
path.join(tmpDir, 'variant-A.png'),
path.join(tmpDir, 'variant-B.png'),
path.join(tmpDir, 'variant-C.png'),
]);
// Serve the board via HTTP (browse blocks file:// URLs for security)
server = Bun.serve({
port: 0,
fetch() {
return new Response(html, { headers: { 'Content-Type': 'text/html' } });
},
});
boardUrl = `http://localhost:${server.port}`;
// Launch browser and navigate to the board
bm = new BrowserManager();
await bm.launch();
await handleWriteCommand('goto', [boardUrl], bm);
});
afterAll(() => {
try { server.stop(); } catch {}
fs.rmSync(tmpDir, { recursive: true, force: true });
setTimeout(() => process.exit(0), 500);
});
// ─── DOM Structure ──────────────────────────────────────────────
describe('Comparison board DOM structure', () => {
test('has hidden status element', async () => {
const status = await handleReadCommand('js', [
'document.getElementById("status").textContent'
], bm);
expect(status).toBe('');
});
test('has hidden feedback-result element', async () => {
const result = await handleReadCommand('js', [
'document.getElementById("feedback-result").textContent'
], bm);
expect(result).toBe('');
});
test('has submit button', async () => {
const exists = await handleReadCommand('js', [
'!!document.getElementById("submit-btn")'
], bm);
expect(exists).toBe('true');
});
test('has regenerate button', async () => {
const exists = await handleReadCommand('js', [
'!!document.getElementById("regen-btn")'
], bm);
expect(exists).toBe('true');
});
test('has 3 variant cards', async () => {
const count = await handleReadCommand('js', [
'document.querySelectorAll(".variant").length'
], bm);
expect(count).toBe('3');
});
test('has pick radio buttons for each variant', async () => {
const count = await handleReadCommand('js', [
'document.querySelectorAll("input[name=\\"preferred\\"]").length'
], bm);
expect(count).toBe('3');
});
test('has star ratings for each variant', async () => {
const count = await handleReadCommand('js', [
'document.querySelectorAll(".stars").length'
], bm);
expect(count).toBe('3');
});
});
// ─── Submit Flow ────────────────────────────────────────────────
describe('Submit feedback flow', () => {
test('submit without interaction returns empty preferred', async () => {
// Reset page state
await handleWriteCommand('goto', [boardUrl], bm);
// Click submit without picking anything
await handleReadCommand('js', [
'document.getElementById("submit-btn").click()'
], bm);
// Status should be "submitted"
const status = await handleReadCommand('js', [
'document.getElementById("status").textContent'
], bm);
expect(status).toBe('submitted');
// Read feedback JSON
const raw = await handleReadCommand('js', [
'document.getElementById("feedback-result").textContent'
], bm);
const feedback = JSON.parse(raw);
expect(feedback.preferred).toBeNull();
expect(feedback.regenerated).toBe(false);
expect(feedback.ratings).toBeDefined();
});
test('submit with pick + rating + comment returns structured JSON', async () => {
// Fresh page
await handleWriteCommand('goto', [boardUrl], bm);
// Pick variant B
await handleReadCommand('js', [
'document.querySelectorAll("input[name=\\"preferred\\"]")[1].click()'
], bm);
// Rate variant A: 4 stars (click the 4th star)
await handleReadCommand('js', [
'document.querySelectorAll(".stars")[0].querySelectorAll(".star")[3].click()'
], bm);
// Rate variant B: 5 stars
await handleReadCommand('js', [
'document.querySelectorAll(".stars")[1].querySelectorAll(".star")[4].click()'
], bm);
// Add comment on variant A
await handleReadCommand('js', [
'document.querySelectorAll(".feedback-input")[0].value = "Good spacing but wrong colors"'
], bm);
// Add overall feedback
await handleReadCommand('js', [
'document.getElementById("overall-feedback").value = "Go with B, make the CTA bigger"'
], bm);
// Submit
await handleReadCommand('js', [
'document.getElementById("submit-btn").click()'
], bm);
// Verify status
const status = await handleReadCommand('js', [
'document.getElementById("status").textContent'
], bm);
expect(status).toBe('submitted');
// Read and verify structured feedback
const raw = await handleReadCommand('js', [
'document.getElementById("feedback-result").textContent'
], bm);
const feedback = JSON.parse(raw);
expect(feedback.preferred).toBe('B');
expect(feedback.ratings.A).toBe(4);
expect(feedback.ratings.B).toBe(5);
expect(feedback.comments.A).toBe('Good spacing but wrong colors');
expect(feedback.overall).toBe('Go with B, make the CTA bigger');
expect(feedback.regenerated).toBe(false);
});
test('submit button is disabled after submission', async () => {
const disabled = await handleReadCommand('js', [
'document.getElementById("submit-btn").disabled'
], bm);
expect(disabled).toBe('true');
});
test('success message is visible after submission', async () => {
const display = await handleReadCommand('js', [
'document.getElementById("success-msg").style.display'
], bm);
expect(display).toBe('block');
});
});
// ─── Regenerate Flow ────────────────────────────────────────────
describe('Regenerate flow', () => {
test('regenerate button sets status to "regenerate"', async () => {
// Fresh page
await handleWriteCommand('goto', [boardUrl], bm);
// Click "Totally different" chiclet then regenerate
await handleReadCommand('js', [
'document.querySelector(".regen-chiclet[data-action=\\"different\\"]").click()'
], bm);
await handleReadCommand('js', [
'document.getElementById("regen-btn").click()'
], bm);
const status = await handleReadCommand('js', [
'document.getElementById("status").textContent'
], bm);
expect(status).toBe('regenerate');
// Verify regenerate action in feedback
const raw = await handleReadCommand('js', [
'document.getElementById("feedback-result").textContent'
], bm);
const feedback = JSON.parse(raw);
expect(feedback.regenerated).toBe(true);
expect(feedback.regenerateAction).toBe('different');
});
test('"More like this" sets regenerate with variant reference', async () => {
// Fresh page
await handleWriteCommand('goto', [boardUrl], bm);
// Click "More like this" on variant B
await handleReadCommand('js', [
'document.querySelectorAll(".more-like-this")[1].click()'
], bm);
const status = await handleReadCommand('js', [
'document.getElementById("status").textContent'
], bm);
expect(status).toBe('regenerate');
const raw = await handleReadCommand('js', [
'document.getElementById("feedback-result").textContent'
], bm);
const feedback = JSON.parse(raw);
expect(feedback.regenerated).toBe(true);
expect(feedback.regenerateAction).toBe('more_like_B');
});
test('regenerate with custom text', async () => {
// Fresh page
await handleWriteCommand('goto', [boardUrl], bm);
// Type custom regeneration text
await handleReadCommand('js', [
'document.getElementById("regen-custom-input").value = "V3 layout with V1 colors"'
], bm);
// Click regenerate (no chiclet selected = custom)
await handleReadCommand('js', [
'document.getElementById("regen-btn").click()'
], bm);
const raw = await handleReadCommand('js', [
'document.getElementById("feedback-result").textContent'
], bm);
const feedback = JSON.parse(raw);
expect(feedback.regenerated).toBe(true);
expect(feedback.regenerateAction).toBe('V3 layout with V1 colors');
});
});
// ─── Agent Polling Pattern ──────────────────────────────────────
describe('Agent polling pattern (simulates what $B eval does)', () => {
test('status is empty before user action', async () => {
// Fresh page — simulates agent's first poll
await handleWriteCommand('goto', [boardUrl], bm);
const status = await handleReadCommand('js', [
'document.getElementById("status").textContent'
], bm);
expect(status).toBe('');
});
test('full polling cycle: empty → submitted → read JSON', async () => {
await handleWriteCommand('goto', [boardUrl], bm);
// Poll 1: empty (user hasn't acted)
const poll1 = await handleReadCommand('js', [
'document.getElementById("status").textContent'
], bm);
expect(poll1).toBe('');
// User acts: pick A, submit
await handleReadCommand('js', [
'document.querySelectorAll("input[name=\\"preferred\\"]")[0].click()'
], bm);
await handleReadCommand('js', [
'document.getElementById("submit-btn").click()'
], bm);
// Poll 2: submitted
const poll2 = await handleReadCommand('js', [
'document.getElementById("status").textContent'
], bm);
expect(poll2).toBe('submitted');
// Read feedback (what the agent does after seeing "submitted")
const raw = await handleReadCommand('js', [
'document.getElementById("feedback-result").textContent'
], bm);
const feedback = JSON.parse(raw);
expect(feedback.preferred).toBe('A');
expect(typeof feedback.ratings).toBe('object');
expect(typeof feedback.comments).toBe('object');
});
});