mirror of
https://github.com/garrytan/gstack.git
synced 2026-05-01 19:25:10 +02:00
1868636f49
* plan: batch command endpoint + multi-tab parallel execution for GStack Browser * refactor: extract TabSession from BrowserManager for per-tab state Move per-tab state (refMap, lastSnapshot, frame) into a new TabSession class. BrowserManager delegates to the active TabSession via getActiveSession(). Zero behavior change — all existing tests pass. This is the foundation for the /batch endpoint: both /command and /batch will use the same handler functions with TabSession, eliminating shared state races during parallel tab execution. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * refactor: update handler signatures to use TabSession Change handleReadCommand and handleSnapshot to take TabSession instead of BrowserManager. Change handleWriteCommand to take both TabSession (per-tab ops) and BrowserManager (global ops like viewport, headers, dialog). handleMetaCommand keeps BrowserManager for tab management. Tests use thin wrapper functions that bridge the old 3-arg call pattern to the new signatures via bm.getActiveSession(). Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * feat: add POST /batch endpoint for parallel multi-tab execution Execute multiple commands across tabs in a single HTTP request. Commands targeting different tabs run concurrently via Promise.allSettled. Commands targeting the same tab run sequentially within that group. Features: - Batch-safe command subset (text, goto, click, snapshot, screenshot, etc.) - newtab/closetab as special commands within batch - SSE streaming mode (stream: true) for partial results - Per-command error isolation (one tab failing doesn't abort the batch) - Max 50 commands per batch, soft batch-level timeout A 143-page crawl drops from ~45 min (serial HTTP) to ~5 min (20 tabs in parallel, batched commands). Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * test: add batch endpoint integration tests 10 tests covering: - Multi-tab parallel execution (goto + text on different tabs) - Same-tab sequential ordering - Per-command error isolation (one tab fails, others succeed) - Page-scoped refs (snapshot refs are per-session, not global) - Per-tab lastSnapshot (snapshot -D with independent baselines) - getSession/getActiveSession API - Batch-safe command subset validation - closeTab via page.close preserves at-least-one-page invariant - Parallel goto on 3 tabs simultaneously Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * fix: harden codex-review E2E — extract SKILL.md section, bump maxTurns to 25 The test was copying the full 55KB/1075-line codex SKILL.md into the fixture, requiring 8 Read calls just to consume it and exhausting the 15-turn budget before reaching the actual codex review command. Now extracts only the review-relevant section (~6KB/148 lines), reducing Read calls from 8 to 1. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * docs: move batch endpoint plan into BROWSER.md as feature documentation The batch endpoint is implemented — document it as an actual feature in BROWSER.md (architecture, API shape, design decisions, usage pattern) and remove the standalone plan file. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * chore: bump version and changelog (v0.15.16.0) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> --------- Co-authored-by: gstack <ship@gstack.dev> Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
239 lines
8.2 KiB
TypeScript
239 lines
8.2 KiB
TypeScript
/**
|
|
* Tests for handoff/resume commands — headless-to-headed browser switching.
|
|
*
|
|
* Unit tests cover saveState/restoreState, failure tracking, and edge cases.
|
|
* Integration tests cover the full handoff flow with real Playwright browsers.
|
|
*/
|
|
|
|
import { describe, test, expect, beforeAll, afterAll } from 'bun:test';
|
|
import { startTestServer } from './test-server';
|
|
import { BrowserManager, type BrowserState } from '../src/browser-manager';
|
|
import { handleWriteCommand as _handleWriteCommand } from '../src/write-commands';
|
|
import { handleMetaCommand } from '../src/meta-commands';
|
|
|
|
const handleWriteCommand = (cmd: string, args: string[], b: BrowserManager) =>
|
|
_handleWriteCommand(cmd, args, b.getActiveSession(), b);
|
|
|
|
let testServer: ReturnType<typeof startTestServer>;
|
|
let bm: BrowserManager;
|
|
let baseUrl: string;
|
|
|
|
beforeAll(async () => {
|
|
testServer = startTestServer(0);
|
|
baseUrl = testServer.url;
|
|
|
|
bm = new BrowserManager();
|
|
await bm.launch();
|
|
});
|
|
|
|
afterAll(() => {
|
|
try { testServer.server.stop(); } catch {}
|
|
setTimeout(() => process.exit(0), 500);
|
|
});
|
|
|
|
// ─── Unit Tests: Failure Tracking (no browser needed) ────────────
|
|
|
|
describe('failure tracking', () => {
|
|
test('getFailureHint returns null when below threshold', () => {
|
|
const tracker = new BrowserManager();
|
|
tracker.incrementFailures();
|
|
tracker.incrementFailures();
|
|
expect(tracker.getFailureHint()).toBeNull();
|
|
});
|
|
|
|
test('getFailureHint returns hint after 3 consecutive failures', () => {
|
|
const tracker = new BrowserManager();
|
|
tracker.incrementFailures();
|
|
tracker.incrementFailures();
|
|
tracker.incrementFailures();
|
|
const hint = tracker.getFailureHint();
|
|
expect(hint).not.toBeNull();
|
|
expect(hint).toContain('handoff');
|
|
expect(hint).toContain('3');
|
|
});
|
|
|
|
test('hint suppressed when already headed', () => {
|
|
const tracker = new BrowserManager();
|
|
(tracker as any).isHeaded = true;
|
|
tracker.incrementFailures();
|
|
tracker.incrementFailures();
|
|
tracker.incrementFailures();
|
|
expect(tracker.getFailureHint()).toBeNull();
|
|
});
|
|
|
|
test('resetFailures clears the counter', () => {
|
|
const tracker = new BrowserManager();
|
|
tracker.incrementFailures();
|
|
tracker.incrementFailures();
|
|
tracker.incrementFailures();
|
|
expect(tracker.getFailureHint()).not.toBeNull();
|
|
tracker.resetFailures();
|
|
expect(tracker.getFailureHint()).toBeNull();
|
|
});
|
|
|
|
test('getIsHeaded returns false by default', () => {
|
|
const tracker = new BrowserManager();
|
|
expect(tracker.getIsHeaded()).toBe(false);
|
|
});
|
|
});
|
|
|
|
// ─── Unit Tests: State Save/Restore (shared browser) ─────────────
|
|
|
|
describe('saveState', () => {
|
|
test('captures cookies and page URLs', async () => {
|
|
await handleWriteCommand('goto', [baseUrl + '/basic.html'], bm);
|
|
await handleWriteCommand('cookie', ['testcookie=testvalue'], bm);
|
|
|
|
const state = await bm.saveState();
|
|
|
|
expect(state.cookies.length).toBeGreaterThan(0);
|
|
expect(state.cookies.some(c => c.name === 'testcookie')).toBe(true);
|
|
expect(state.pages.length).toBeGreaterThanOrEqual(1);
|
|
expect(state.pages.some(p => p.url.includes('/basic.html'))).toBe(true);
|
|
}, 15000);
|
|
|
|
test('captures localStorage and sessionStorage', async () => {
|
|
await handleWriteCommand('goto', [baseUrl + '/basic.html'], bm);
|
|
const page = bm.getPage();
|
|
await page.evaluate(() => {
|
|
localStorage.setItem('lsKey', 'lsValue');
|
|
sessionStorage.setItem('ssKey', 'ssValue');
|
|
});
|
|
|
|
const state = await bm.saveState();
|
|
const activePage = state.pages.find(p => p.isActive);
|
|
|
|
expect(activePage).toBeDefined();
|
|
expect(activePage!.storage).not.toBeNull();
|
|
expect(activePage!.storage!.localStorage).toHaveProperty('lsKey', 'lsValue');
|
|
expect(activePage!.storage!.sessionStorage).toHaveProperty('ssKey', 'ssValue');
|
|
}, 15000);
|
|
|
|
test('captures multiple tabs', async () => {
|
|
while (bm.getTabCount() > 1) {
|
|
await bm.closeTab();
|
|
}
|
|
await handleWriteCommand('goto', [baseUrl + '/basic.html'], bm);
|
|
await handleMetaCommand('newtab', [baseUrl + '/form.html'], bm, () => {});
|
|
|
|
const state = await bm.saveState();
|
|
expect(state.pages.length).toBe(2);
|
|
const activePage = state.pages.find(p => p.isActive);
|
|
expect(activePage).toBeDefined();
|
|
expect(activePage!.url).toContain('/form.html');
|
|
|
|
await bm.closeTab();
|
|
}, 15000);
|
|
});
|
|
|
|
describe('restoreState', () => {
|
|
test('state survives recreateContext round-trip', async () => {
|
|
await handleWriteCommand('goto', [baseUrl + '/basic.html'], bm);
|
|
await handleWriteCommand('cookie', ['restored=yes'], bm);
|
|
|
|
const stateBefore = await bm.saveState();
|
|
expect(stateBefore.cookies.some(c => c.name === 'restored')).toBe(true);
|
|
|
|
await bm.recreateContext();
|
|
|
|
const stateAfter = await bm.saveState();
|
|
expect(stateAfter.cookies.some(c => c.name === 'restored')).toBe(true);
|
|
expect(stateAfter.pages.length).toBeGreaterThanOrEqual(1);
|
|
}, 30000);
|
|
});
|
|
|
|
// ─── Unit Tests: Handoff Edge Cases ──────────────────────────────
|
|
|
|
describe('handoff edge cases', () => {
|
|
test('handoff when already headed returns no-op', async () => {
|
|
(bm as any).isHeaded = true;
|
|
const result = await bm.handoff('test');
|
|
expect(result).toContain('Already in headed mode');
|
|
(bm as any).isHeaded = false;
|
|
}, 10000);
|
|
|
|
test('resume clears refs and resets failures', () => {
|
|
bm.incrementFailures();
|
|
bm.incrementFailures();
|
|
bm.incrementFailures();
|
|
bm.resume();
|
|
expect(bm.getFailureHint()).toBeNull();
|
|
expect(bm.getRefCount()).toBe(0);
|
|
});
|
|
|
|
test('resume without prior handoff works via meta command', async () => {
|
|
await handleWriteCommand('goto', [baseUrl + '/basic.html'], bm);
|
|
const result = await handleMetaCommand('resume', [], bm, () => {});
|
|
expect(result).toContain('RESUMED');
|
|
}, 15000);
|
|
});
|
|
|
|
// ─── Integration Tests: Full Handoff Flow ────────────────────────
|
|
// Each handoff test creates its own BrowserManager since handoff swaps the browser.
|
|
// These tests run sequentially (one browser at a time) to avoid resource issues.
|
|
|
|
describe('handoff integration', () => {
|
|
test('full handoff: cookies preserved, headed mode active, commands work', async () => {
|
|
const hbm = new BrowserManager();
|
|
await hbm.launch();
|
|
|
|
try {
|
|
// Set up state
|
|
await handleWriteCommand('goto', [baseUrl + '/basic.html'], hbm);
|
|
await handleWriteCommand('cookie', ['handoff_test=preserved'], hbm);
|
|
|
|
// Handoff
|
|
const result = await hbm.handoff('Testing handoff');
|
|
expect(result).toContain('HANDOFF:');
|
|
expect(result).toContain('Testing handoff');
|
|
expect(result).toContain('resume');
|
|
expect(hbm.getIsHeaded()).toBe(true);
|
|
|
|
// Verify cookies survived
|
|
const { handleReadCommand } = await import('../src/read-commands');
|
|
const cookiesResult = await handleReadCommand('cookies', [], hbm);
|
|
expect(cookiesResult).toContain('handoff_test');
|
|
|
|
// Verify commands still work
|
|
const text = await handleReadCommand('text', [], hbm);
|
|
expect(text.length).toBeGreaterThan(0);
|
|
|
|
// Resume
|
|
const resumeResult = await handleMetaCommand('resume', [], hbm, () => {});
|
|
expect(resumeResult).toContain('RESUMED');
|
|
} finally {
|
|
await hbm.close();
|
|
}
|
|
}, 45000);
|
|
|
|
test('multi-tab handoff preserves all tabs', async () => {
|
|
const hbm = new BrowserManager();
|
|
await hbm.launch();
|
|
|
|
try {
|
|
await handleWriteCommand('goto', [baseUrl + '/basic.html'], hbm);
|
|
await handleMetaCommand('newtab', [baseUrl + '/form.html'], hbm, () => {});
|
|
expect(hbm.getTabCount()).toBe(2);
|
|
|
|
await hbm.handoff('multi-tab test');
|
|
expect(hbm.getTabCount()).toBe(2);
|
|
expect(hbm.getIsHeaded()).toBe(true);
|
|
} finally {
|
|
await hbm.close();
|
|
}
|
|
}, 45000);
|
|
|
|
test('handoff meta command joins args as message', async () => {
|
|
const hbm = new BrowserManager();
|
|
await hbm.launch();
|
|
|
|
try {
|
|
await handleWriteCommand('goto', [baseUrl + '/basic.html'], hbm);
|
|
const result = await handleMetaCommand('handoff', ['CAPTCHA', 'stuck'], hbm, () => {});
|
|
expect(result).toContain('CAPTCHA stuck');
|
|
} finally {
|
|
await hbm.close();
|
|
}
|
|
}, 45000);
|
|
});
|