feat: browse handoff — headless-to-headed browser switching

Add `handoff` and `resume` commands that let users take over a visible
Chrome when the headless browser gets stuck (CAPTCHAs, auth walls, MFA).

Architecture: launch-first-close-second for safe rollback. State transfer
via extracted saveState()/restoreState() helpers (DRY with recreateContext).
Auto-handoff hint after 3 consecutive command failures.
This commit is contained in:
Garry Tan
2026-03-18 22:15:30 -07:00
parent c4f679d829
commit 58d654c09a
4 changed files with 244 additions and 69 deletions
+221 -68
View File
@@ -15,7 +15,7 @@
* restores state. Falls back to clean slate on any failure.
*/
import { chromium, type Browser, type BrowserContext, type BrowserContextOptions, type Page, type Locator } from 'playwright';
import { chromium, type Browser, type BrowserContext, type BrowserContextOptions, type Page, type Locator, type Cookie } from 'playwright';
import { addConsoleEntry, addNetworkEntry, addDialogEntry, networkBuffer, type DialogEntry } from './buffers';
export interface RefEntry {
@@ -24,6 +24,15 @@ export interface RefEntry {
name: string;
}
export interface BrowserState {
cookies: Cookie[];
pages: Array<{
url: string;
isActive: boolean;
storage: { localStorage: Record<string, string>; sessionStorage: Record<string, string> } | null;
}>;
}
export class BrowserManager {
private browser: Browser | null = null;
private context: BrowserContext | null = null;
@@ -47,6 +56,10 @@ export class BrowserManager {
private dialogAutoAccept: boolean = true;
private dialogPromptText: string | null = null;
// ─── Handoff State ─────────────────────────────────────────
private isHeaded: boolean = false;
private consecutiveFailures: number = 0;
async launch() {
this.browser = await chromium.launch({ headless: true });
@@ -77,7 +90,11 @@ export class BrowserManager {
if (this.browser) {
// Remove disconnect handler to avoid exit during intentional close
this.browser.removeAllListeners('disconnected');
await this.browser.close();
// Timeout: headed browser.close() can hang on macOS
await Promise.race([
this.browser.close(),
new Promise(resolve => setTimeout(resolve, 5000)),
]).catch(() => {});
this.browser = null;
}
}
@@ -269,6 +286,92 @@ export class BrowserManager {
return this.customUserAgent;
}
// ─── State Save/Restore (shared by recreateContext + handoff) ─
/**
* Capture browser state: cookies, localStorage, sessionStorage, URLs, active tab.
* Skips pages that fail storage reads (e.g., already closed).
*/
async saveState(): Promise<BrowserState> {
if (!this.context) throw new Error('Browser not launched');
const cookies = await this.context.cookies();
const pages: BrowserState['pages'] = [];
for (const [id, page] of this.pages) {
const url = page.url();
let storage = null;
try {
storage = await page.evaluate(() => ({
localStorage: { ...localStorage },
sessionStorage: { ...sessionStorage },
}));
} catch {}
pages.push({
url: url === 'about:blank' ? '' : url,
isActive: id === this.activeTabId,
storage,
});
}
return { cookies, pages };
}
/**
* Restore browser state into the current context: cookies, pages, storage.
* Navigates to saved URLs, restores storage, wires page events.
* Failures on individual pages are swallowed — partial restore is better than none.
*/
async restoreState(state: BrowserState): Promise<void> {
if (!this.context) throw new Error('Browser not launched');
// Restore cookies
if (state.cookies.length > 0) {
await this.context.addCookies(state.cookies);
}
// Re-create pages
let activeId: number | null = null;
for (const saved of state.pages) {
const page = await this.context.newPage();
const id = this.nextTabId++;
this.pages.set(id, page);
this.wirePageEvents(page);
if (saved.url) {
await page.goto(saved.url, { waitUntil: 'domcontentloaded', timeout: 15000 }).catch(() => {});
}
if (saved.storage) {
try {
await page.evaluate((s: { localStorage: Record<string, string>; sessionStorage: Record<string, string> }) => {
if (s.localStorage) {
for (const [k, v] of Object.entries(s.localStorage)) {
localStorage.setItem(k, v);
}
}
if (s.sessionStorage) {
for (const [k, v] of Object.entries(s.sessionStorage)) {
sessionStorage.setItem(k, v);
}
}
}, saved.storage);
} catch {}
}
if (saved.isActive) activeId = id;
}
// If no pages were saved, create a blank one
if (this.pages.size === 0) {
await this.newTab();
} else {
this.activeTabId = activeId ?? [...this.pages.keys()][0];
}
// Clear refs — pages are new, locators are stale
this.clearRefs();
}
/**
* Recreate the browser context to apply user agent changes.
* Saves and restores cookies, localStorage, sessionStorage, and open pages.
@@ -280,25 +383,8 @@ export class BrowserManager {
}
try {
// 1. Save state from current context
const savedCookies = await this.context.cookies();
const savedPages: Array<{ url: string; isActive: boolean; storage: { localStorage: Record<string, string>; sessionStorage: Record<string, string> } | null }> = [];
for (const [id, page] of this.pages) {
const url = page.url();
let storage = null;
try {
storage = await page.evaluate(() => ({
localStorage: { ...localStorage },
sessionStorage: { ...sessionStorage },
}));
} catch {}
savedPages.push({
url: url === 'about:blank' ? '' : url,
isActive: id === this.activeTabId,
storage,
});
}
// 1. Save state
const state = await this.saveState();
// 2. Close old pages and context
for (const page of this.pages.values()) {
@@ -320,53 +406,8 @@ export class BrowserManager {
await this.context.setExtraHTTPHeaders(this.extraHeaders);
}
// 4. Restore cookies
if (savedCookies.length > 0) {
await this.context.addCookies(savedCookies);
}
// 5. Re-create pages
let activeId: number | null = null;
for (const saved of savedPages) {
const page = await this.context.newPage();
const id = this.nextTabId++;
this.pages.set(id, page);
this.wirePageEvents(page);
if (saved.url) {
await page.goto(saved.url, { waitUntil: 'domcontentloaded', timeout: 15000 }).catch(() => {});
}
// 6. Restore storage
if (saved.storage) {
try {
await page.evaluate((s: { localStorage: Record<string, string>; sessionStorage: Record<string, string> }) => {
if (s.localStorage) {
for (const [k, v] of Object.entries(s.localStorage)) {
localStorage.setItem(k, v);
}
}
if (s.sessionStorage) {
for (const [k, v] of Object.entries(s.sessionStorage)) {
sessionStorage.setItem(k, v);
}
}
}, saved.storage);
} catch {}
}
if (saved.isActive) activeId = id;
}
// If no pages were saved, create a blank one
if (this.pages.size === 0) {
await this.newTab();
} else {
this.activeTabId = activeId ?? [...this.pages.keys()][0];
}
// Clear refs — pages are new, locators are stale
this.clearRefs();
// 4. Restore state
await this.restoreState(state);
return null; // success
} catch (err: unknown) {
@@ -391,6 +432,118 @@ export class BrowserManager {
}
}
// ─── Handoff: Headless → Headed ─────────────────────────────
/**
* Hand off browser control to the user by relaunching in headed mode.
*
* Flow (launch-first-close-second for safe rollback):
* 1. Save state from current headless browser
* 2. Launch NEW headed browser
* 3. Restore state into new browser
* 4. Close OLD headless browser
* If step 2 fails → return error, headless browser untouched
*/
async handoff(message: string): Promise<string> {
if (this.isHeaded) {
return `HANDOFF: Already in headed mode at ${this.getCurrentUrl()}`;
}
if (!this.browser || !this.context) {
throw new Error('Browser not launched');
}
// 1. Save state from current browser
const state = await this.saveState();
const currentUrl = this.getCurrentUrl();
// 2. Launch new headed browser (try-catch — if this fails, headless stays running)
let newBrowser: Browser;
try {
newBrowser = await chromium.launch({ headless: false, timeout: 15000 });
} catch (err: unknown) {
const msg = err instanceof Error ? err.message : String(err);
return `ERROR: Cannot open headed browser — ${msg}. Headless browser still running.`;
}
// 3. Create context and restore state into new headed browser
try {
const contextOptions: BrowserContextOptions = {
viewport: { width: 1280, height: 720 },
};
if (this.customUserAgent) {
contextOptions.userAgent = this.customUserAgent;
}
const newContext = await newBrowser.newContext(contextOptions);
if (Object.keys(this.extraHeaders).length > 0) {
await newContext.setExtraHTTPHeaders(this.extraHeaders);
}
// Swap to new browser/context before restoreState (it uses this.context)
const oldBrowser = this.browser;
const oldContext = this.context;
this.browser = newBrowser;
this.context = newContext;
this.pages.clear();
// Register crash handler on new browser
this.browser.on('disconnected', () => {
console.error('[browse] FATAL: Chromium process crashed or was killed. Server exiting.');
console.error('[browse] Console/network logs flushed to .gstack/browse-*.log');
process.exit(1);
});
await this.restoreState(state);
this.isHeaded = true;
// 4. Close old headless browser (fire-and-forget — close() can hang
// when another Playwright instance is active, so we don't await it)
oldBrowser.removeAllListeners('disconnected');
oldBrowser.close().catch(() => {});
return [
`HANDOFF: Browser opened at ${currentUrl}`,
`MESSAGE: ${message}`,
`STATUS: Waiting for user. Run 'resume' when done.`,
].join('\n');
} catch (err: unknown) {
// Restore failed — close the new browser, keep old one
await newBrowser.close().catch(() => {});
const msg = err instanceof Error ? err.message : String(err);
return `ERROR: Handoff failed during state restore — ${msg}. Headless browser still running.`;
}
}
/**
* Resume AI control after user handoff.
* Clears stale refs and resets failure counter.
* The meta-command handler calls handleSnapshot() after this.
*/
resume(): void {
this.clearRefs();
this.resetFailures();
}
getIsHeaded(): boolean {
return this.isHeaded;
}
// ─── Auto-handoff Hint (consecutive failure tracking) ───────
incrementFailures(): void {
this.consecutiveFailures++;
}
resetFailures(): void {
this.consecutiveFailures = 0;
}
getFailureHint(): string | null {
if (this.consecutiveFailures >= 3 && !this.isHeaded) {
return `HINT: ${this.consecutiveFailures} consecutive failures. Consider using 'handoff' to let the user help.`;
}
return null;
}
// ─── Console/Network/Dialog/Ref Wiring ────────────────────
private wirePageEvents(page: Page) {
// Clear ref map on navigation — refs point to stale elements after page change
+4
View File
@@ -30,6 +30,7 @@ export const META_COMMANDS = new Set([
'screenshot', 'pdf', 'responsive',
'chain', 'diff',
'url', 'snapshot',
'handoff', 'resume',
]);
export const ALL_COMMANDS = new Set([...READ_COMMANDS, ...WRITE_COMMANDS, ...META_COMMANDS]);
@@ -94,6 +95,9 @@ export const COMMAND_DESCRIPTIONS: Record<string, { category: string; descriptio
// Meta
'snapshot':{ category: 'Snapshot', description: 'Accessibility tree with @e refs for element selection. Flags: -i interactive only, -c compact, -d N depth limit, -s sel scope, -D diff vs previous, -a annotated screenshot, -o path output, -C cursor-interactive @c refs', usage: 'snapshot [flags]' },
'chain': { category: 'Meta', description: 'Run commands from JSON stdin. Format: [["cmd","arg1",...],...]' },
// Handoff
'handoff': { category: 'Server', description: 'Open visible Chrome at current page for user takeover', usage: 'handoff [message]' },
'resume': { category: 'Server', description: 'Re-snapshot after user takeover, return control to AI', usage: 'resume' },
};
// Load-time validation: descriptions must cover exactly the command sets
+13
View File
@@ -246,6 +246,19 @@ export async function handleMetaCommand(
return await handleSnapshot(args, bm);
}
// ─── Handoff ────────────────────────────────────
case 'handoff': {
const message = args.join(' ') || 'User takeover requested';
return await bm.handoff(message);
}
case 'resume': {
bm.resume();
// Re-snapshot to capture current page state after human interaction
const snapshot = await handleSnapshot(['-i'], bm);
return `RESUMED\n${snapshot}`;
}
default:
throw new Error(`Unknown meta command: ${command}`);
}
+6 -1
View File
@@ -249,12 +249,17 @@ async function handleCommand(body: any): Promise<Response> {
});
}
browserManager.resetFailures();
return new Response(result, {
status: 200,
headers: { 'Content-Type': 'text/plain' },
});
} catch (err: any) {
return new Response(JSON.stringify({ error: wrapError(err) }), {
browserManager.incrementFailures();
let errorMsg = wrapError(err);
const hint = browserManager.getFailureHint();
if (hint) errorMsg += '\n' + hint;
return new Response(JSON.stringify({ error: errorMsg }), {
status: 500,
headers: { 'Content-Type': 'application/json' },
});