mirror of
https://github.com/garrytan/gstack.git
synced 2026-05-01 11:17:50 +02:00
1868636f49
* plan: batch command endpoint + multi-tab parallel execution for GStack Browser * refactor: extract TabSession from BrowserManager for per-tab state Move per-tab state (refMap, lastSnapshot, frame) into a new TabSession class. BrowserManager delegates to the active TabSession via getActiveSession(). Zero behavior change — all existing tests pass. This is the foundation for the /batch endpoint: both /command and /batch will use the same handler functions with TabSession, eliminating shared state races during parallel tab execution. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * refactor: update handler signatures to use TabSession Change handleReadCommand and handleSnapshot to take TabSession instead of BrowserManager. Change handleWriteCommand to take both TabSession (per-tab ops) and BrowserManager (global ops like viewport, headers, dialog). handleMetaCommand keeps BrowserManager for tab management. Tests use thin wrapper functions that bridge the old 3-arg call pattern to the new signatures via bm.getActiveSession(). Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * feat: add POST /batch endpoint for parallel multi-tab execution Execute multiple commands across tabs in a single HTTP request. Commands targeting different tabs run concurrently via Promise.allSettled. Commands targeting the same tab run sequentially within that group. Features: - Batch-safe command subset (text, goto, click, snapshot, screenshot, etc.) - newtab/closetab as special commands within batch - SSE streaming mode (stream: true) for partial results - Per-command error isolation (one tab failing doesn't abort the batch) - Max 50 commands per batch, soft batch-level timeout A 143-page crawl drops from ~45 min (serial HTTP) to ~5 min (20 tabs in parallel, batched commands). Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * test: add batch endpoint integration tests 10 tests covering: - Multi-tab parallel execution (goto + text on different tabs) - Same-tab sequential ordering - Per-command error isolation (one tab fails, others succeed) - Page-scoped refs (snapshot refs are per-session, not global) - Per-tab lastSnapshot (snapshot -D with independent baselines) - getSession/getActiveSession API - Batch-safe command subset validation - closeTab via page.close preserves at-least-one-page invariant - Parallel goto on 3 tabs simultaneously Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * fix: harden codex-review E2E — extract SKILL.md section, bump maxTurns to 25 The test was copying the full 55KB/1075-line codex SKILL.md into the fixture, requiring 8 Read calls just to consume it and exhausting the 15-turn budget before reaching the actual codex review command. Now extracts only the review-relevant section (~6KB/148 lines), reducing Read calls from 8 to 1. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * docs: move batch endpoint plan into BROWSER.md as feature documentation The batch endpoint is implemented — document it as an actual feature in BROWSER.md (architecture, API shape, design decisions, usage pattern) and remove the standalone plan file. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * chore: bump version and changelog (v0.15.16.0) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> --------- Co-authored-by: gstack <ship@gstack.dev> Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
348 lines
12 KiB
TypeScript
348 lines
12 KiB
TypeScript
/**
|
|
* Integration test for the design comparison board feedback loop.
|
|
*
|
|
* Tests the DOM polling pattern that plan-design-review, office-hours,
|
|
* and design-consultation use to read user feedback from the comparison board.
|
|
*
|
|
* Flow: generate board HTML → open in browser → verify DOM elements →
|
|
* simulate user interaction → verify structured JSON feedback.
|
|
*
|
|
* No LLM involved — this is a deterministic functional test.
|
|
*/
|
|
|
|
import { describe, test, expect, beforeAll, afterAll } from 'bun:test';
|
|
import { BrowserManager } from '../src/browser-manager';
|
|
import { handleReadCommand as _handleReadCommand } from '../src/read-commands';
|
|
import { handleWriteCommand as _handleWriteCommand } from '../src/write-commands';
|
|
|
|
const handleReadCommand = (cmd: string, args: string[], b: BrowserManager) =>
|
|
_handleReadCommand(cmd, args, b.getActiveSession());
|
|
const handleWriteCommand = (cmd: string, args: string[], b: BrowserManager) =>
|
|
_handleWriteCommand(cmd, args, b.getActiveSession(), b);
|
|
import { generateCompareHtml } from '../../design/src/compare';
|
|
import * as fs from 'fs';
|
|
import * as path from 'path';
|
|
|
|
let bm: BrowserManager;
|
|
let boardUrl: string;
|
|
let server: ReturnType<typeof Bun.serve>;
|
|
let tmpDir: string;
|
|
|
|
// Create a minimal 1x1 pixel PNG for test variants
|
|
function createTestPng(filePath: string): void {
|
|
// Minimal valid PNG: 1x1 red pixel
|
|
const png = Buffer.from(
|
|
'iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mP8/58BAwAI/AL+hc2rNAAAAABJRU5ErkJggg==',
|
|
'base64'
|
|
);
|
|
fs.writeFileSync(filePath, png);
|
|
}
|
|
|
|
beforeAll(async () => {
|
|
// Create test PNG files
|
|
tmpDir = '/tmp/compare-board-test-' + Date.now();
|
|
fs.mkdirSync(tmpDir, { recursive: true });
|
|
|
|
createTestPng(path.join(tmpDir, 'variant-A.png'));
|
|
createTestPng(path.join(tmpDir, 'variant-B.png'));
|
|
createTestPng(path.join(tmpDir, 'variant-C.png'));
|
|
|
|
// Generate comparison board HTML using the real compare module
|
|
const html = generateCompareHtml([
|
|
path.join(tmpDir, 'variant-A.png'),
|
|
path.join(tmpDir, 'variant-B.png'),
|
|
path.join(tmpDir, 'variant-C.png'),
|
|
]);
|
|
|
|
// Serve the board via HTTP (browse blocks file:// URLs for security)
|
|
server = Bun.serve({
|
|
port: 0,
|
|
fetch() {
|
|
return new Response(html, { headers: { 'Content-Type': 'text/html' } });
|
|
},
|
|
});
|
|
boardUrl = `http://localhost:${server.port}`;
|
|
|
|
// Launch browser and navigate to the board
|
|
bm = new BrowserManager();
|
|
await bm.launch();
|
|
await handleWriteCommand('goto', [boardUrl], bm);
|
|
});
|
|
|
|
afterAll(() => {
|
|
try { server.stop(); } catch {}
|
|
fs.rmSync(tmpDir, { recursive: true, force: true });
|
|
setTimeout(() => process.exit(0), 500);
|
|
});
|
|
|
|
// ─── DOM Structure ──────────────────────────────────────────────
|
|
|
|
describe('Comparison board DOM structure', () => {
|
|
test('has hidden status element', async () => {
|
|
const status = await handleReadCommand('js', [
|
|
'document.getElementById("status").textContent'
|
|
], bm);
|
|
expect(status).toBe('');
|
|
});
|
|
|
|
test('has hidden feedback-result element', async () => {
|
|
const result = await handleReadCommand('js', [
|
|
'document.getElementById("feedback-result").textContent'
|
|
], bm);
|
|
expect(result).toBe('');
|
|
});
|
|
|
|
test('has submit button', async () => {
|
|
const exists = await handleReadCommand('js', [
|
|
'!!document.getElementById("submit-btn")'
|
|
], bm);
|
|
expect(exists).toBe('true');
|
|
});
|
|
|
|
test('has regenerate button', async () => {
|
|
const exists = await handleReadCommand('js', [
|
|
'!!document.getElementById("regen-btn")'
|
|
], bm);
|
|
expect(exists).toBe('true');
|
|
});
|
|
|
|
test('has 3 variant cards', async () => {
|
|
const count = await handleReadCommand('js', [
|
|
'document.querySelectorAll(".variant").length'
|
|
], bm);
|
|
expect(count).toBe('3');
|
|
});
|
|
|
|
test('has pick radio buttons for each variant', async () => {
|
|
const count = await handleReadCommand('js', [
|
|
'document.querySelectorAll("input[name=\\"preferred\\"]").length'
|
|
], bm);
|
|
expect(count).toBe('3');
|
|
});
|
|
|
|
test('has star ratings for each variant', async () => {
|
|
const count = await handleReadCommand('js', [
|
|
'document.querySelectorAll(".stars").length'
|
|
], bm);
|
|
expect(count).toBe('3');
|
|
});
|
|
});
|
|
|
|
// ─── Submit Flow ────────────────────────────────────────────────
|
|
|
|
describe('Submit feedback flow', () => {
|
|
test('submit without interaction returns empty preferred', async () => {
|
|
// Reset page state
|
|
await handleWriteCommand('goto', [boardUrl], bm);
|
|
|
|
// Click submit without picking anything
|
|
await handleReadCommand('js', [
|
|
'document.getElementById("submit-btn").click()'
|
|
], bm);
|
|
|
|
// Status should be "submitted"
|
|
const status = await handleReadCommand('js', [
|
|
'document.getElementById("status").textContent'
|
|
], bm);
|
|
expect(status).toBe('submitted');
|
|
|
|
// Read feedback JSON
|
|
const raw = await handleReadCommand('js', [
|
|
'document.getElementById("feedback-result").textContent'
|
|
], bm);
|
|
const feedback = JSON.parse(raw);
|
|
expect(feedback.preferred).toBeNull();
|
|
expect(feedback.regenerated).toBe(false);
|
|
expect(feedback.ratings).toBeDefined();
|
|
});
|
|
|
|
test('submit with pick + rating + comment returns structured JSON', async () => {
|
|
// Fresh page
|
|
await handleWriteCommand('goto', [boardUrl], bm);
|
|
|
|
// Pick variant B
|
|
await handleReadCommand('js', [
|
|
'document.querySelectorAll("input[name=\\"preferred\\"]")[1].click()'
|
|
], bm);
|
|
|
|
// Rate variant A: 4 stars (click the 4th star)
|
|
await handleReadCommand('js', [
|
|
'document.querySelectorAll(".stars")[0].querySelectorAll(".star")[3].click()'
|
|
], bm);
|
|
|
|
// Rate variant B: 5 stars
|
|
await handleReadCommand('js', [
|
|
'document.querySelectorAll(".stars")[1].querySelectorAll(".star")[4].click()'
|
|
], bm);
|
|
|
|
// Add comment on variant A
|
|
await handleReadCommand('js', [
|
|
'document.querySelectorAll(".feedback-input")[0].value = "Good spacing but wrong colors"'
|
|
], bm);
|
|
|
|
// Add overall feedback
|
|
await handleReadCommand('js', [
|
|
'document.getElementById("overall-feedback").value = "Go with B, make the CTA bigger"'
|
|
], bm);
|
|
|
|
// Submit
|
|
await handleReadCommand('js', [
|
|
'document.getElementById("submit-btn").click()'
|
|
], bm);
|
|
|
|
// Verify status
|
|
const status = await handleReadCommand('js', [
|
|
'document.getElementById("status").textContent'
|
|
], bm);
|
|
expect(status).toBe('submitted');
|
|
|
|
// Read and verify structured feedback
|
|
const raw = await handleReadCommand('js', [
|
|
'document.getElementById("feedback-result").textContent'
|
|
], bm);
|
|
const feedback = JSON.parse(raw);
|
|
|
|
expect(feedback.preferred).toBe('B');
|
|
expect(feedback.ratings.A).toBe(4);
|
|
expect(feedback.ratings.B).toBe(5);
|
|
expect(feedback.comments.A).toBe('Good spacing but wrong colors');
|
|
expect(feedback.overall).toBe('Go with B, make the CTA bigger');
|
|
expect(feedback.regenerated).toBe(false);
|
|
});
|
|
|
|
test('submit button is disabled after submission', async () => {
|
|
const disabled = await handleReadCommand('js', [
|
|
'document.getElementById("submit-btn").disabled'
|
|
], bm);
|
|
expect(disabled).toBe('true');
|
|
});
|
|
|
|
test('success message is visible after submission', async () => {
|
|
const display = await handleReadCommand('js', [
|
|
'document.getElementById("success-msg").style.display'
|
|
], bm);
|
|
expect(display).toBe('block');
|
|
});
|
|
});
|
|
|
|
// ─── Regenerate Flow ────────────────────────────────────────────
|
|
|
|
describe('Regenerate flow', () => {
|
|
test('regenerate button sets status to "regenerate"', async () => {
|
|
// Fresh page
|
|
await handleWriteCommand('goto', [boardUrl], bm);
|
|
|
|
// Click "Totally different" chiclet then regenerate
|
|
await handleReadCommand('js', [
|
|
'document.querySelector(".regen-chiclet[data-action=\\"different\\"]").click()'
|
|
], bm);
|
|
await handleReadCommand('js', [
|
|
'document.getElementById("regen-btn").click()'
|
|
], bm);
|
|
|
|
const status = await handleReadCommand('js', [
|
|
'document.getElementById("status").textContent'
|
|
], bm);
|
|
expect(status).toBe('regenerate');
|
|
|
|
// Verify regenerate action in feedback
|
|
const raw = await handleReadCommand('js', [
|
|
'document.getElementById("feedback-result").textContent'
|
|
], bm);
|
|
const feedback = JSON.parse(raw);
|
|
expect(feedback.regenerated).toBe(true);
|
|
expect(feedback.regenerateAction).toBe('different');
|
|
});
|
|
|
|
test('"More like this" sets regenerate with variant reference', async () => {
|
|
// Fresh page
|
|
await handleWriteCommand('goto', [boardUrl], bm);
|
|
|
|
// Click "More like this" on variant B
|
|
await handleReadCommand('js', [
|
|
'document.querySelectorAll(".more-like-this")[1].click()'
|
|
], bm);
|
|
|
|
const status = await handleReadCommand('js', [
|
|
'document.getElementById("status").textContent'
|
|
], bm);
|
|
expect(status).toBe('regenerate');
|
|
|
|
const raw = await handleReadCommand('js', [
|
|
'document.getElementById("feedback-result").textContent'
|
|
], bm);
|
|
const feedback = JSON.parse(raw);
|
|
expect(feedback.regenerated).toBe(true);
|
|
expect(feedback.regenerateAction).toBe('more_like_B');
|
|
});
|
|
|
|
test('regenerate with custom text', async () => {
|
|
// Fresh page
|
|
await handleWriteCommand('goto', [boardUrl], bm);
|
|
|
|
// Type custom regeneration text
|
|
await handleReadCommand('js', [
|
|
'document.getElementById("regen-custom-input").value = "V3 layout with V1 colors"'
|
|
], bm);
|
|
|
|
// Click regenerate (no chiclet selected = custom)
|
|
await handleReadCommand('js', [
|
|
'document.getElementById("regen-btn").click()'
|
|
], bm);
|
|
|
|
const raw = await handleReadCommand('js', [
|
|
'document.getElementById("feedback-result").textContent'
|
|
], bm);
|
|
const feedback = JSON.parse(raw);
|
|
expect(feedback.regenerated).toBe(true);
|
|
expect(feedback.regenerateAction).toBe('V3 layout with V1 colors');
|
|
});
|
|
});
|
|
|
|
// ─── Agent Polling Pattern ──────────────────────────────────────
|
|
|
|
describe('Agent polling pattern (simulates what $B eval does)', () => {
|
|
test('status is empty before user action', async () => {
|
|
// Fresh page — simulates agent's first poll
|
|
await handleWriteCommand('goto', [boardUrl], bm);
|
|
|
|
const status = await handleReadCommand('js', [
|
|
'document.getElementById("status").textContent'
|
|
], bm);
|
|
expect(status).toBe('');
|
|
});
|
|
|
|
test('full polling cycle: empty → submitted → read JSON', async () => {
|
|
await handleWriteCommand('goto', [boardUrl], bm);
|
|
|
|
// Poll 1: empty (user hasn't acted)
|
|
const poll1 = await handleReadCommand('js', [
|
|
'document.getElementById("status").textContent'
|
|
], bm);
|
|
expect(poll1).toBe('');
|
|
|
|
// User acts: pick A, submit
|
|
await handleReadCommand('js', [
|
|
'document.querySelectorAll("input[name=\\"preferred\\"]")[0].click()'
|
|
], bm);
|
|
await handleReadCommand('js', [
|
|
'document.getElementById("submit-btn").click()'
|
|
], bm);
|
|
|
|
// Poll 2: submitted
|
|
const poll2 = await handleReadCommand('js', [
|
|
'document.getElementById("status").textContent'
|
|
], bm);
|
|
expect(poll2).toBe('submitted');
|
|
|
|
// Read feedback (what the agent does after seeing "submitted")
|
|
const raw = await handleReadCommand('js', [
|
|
'document.getElementById("feedback-result").textContent'
|
|
], bm);
|
|
const feedback = JSON.parse(raw);
|
|
expect(feedback.preferred).toBe('A');
|
|
expect(typeof feedback.ratings).toBe('object');
|
|
expect(typeof feedback.comments).toBe('object');
|
|
});
|
|
});
|