feat(browse): full $B pdf flag contract + tab-scoped load-html/js/pdf

Grow $B pdf from a 2-line wrapper (hard-coded A4) into a real PDF engine
frontend so make-pdf can shell out to it without duplicating Playwright:

- pdf: --format, --width/--height, --margins, --margin-*, --header-template,
  --footer-template, --page-numbers, --tagged, --outline, --print-background,
  --prefer-css-page-size, --toc. Mutex rules enforced. --from-file <json>
  dodges Windows argv limits (8191 char CreateProcess cap).
- load-html: add --from-file <json> mode for large inline HTML. Size + magic
  byte checks still apply to the inline content, not the payload file path.
- newtab: add --json returning {"tabId":N,"url":...} for programmatic use.
- cli: extract --tab-id flag and route as body.tabId to the HTTP layer so
  parallel callers can target specific tabs without racing on the active
  tab (makes make-pdf's per-render tab isolation possible).
- --toc: non-fatal 3s wait for window.__pagedjsAfterFired. Paged.js ships
  later; v1 renders TOC statically via the markdown renderer.

Codex round 2 flagged these P0 issues during plan review. All resolved.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Garry Tan
2026-04-20 05:33:22 +08:00
parent 12260262ea
commit 7d89dcb77a
6 changed files with 394 additions and 20 deletions
+30 -3
View File
@@ -375,11 +375,38 @@ async function ensureServer(): Promise<ServerState> {
}
}
/**
* Extract `--tab-id <N>` from args and return { tabId, args } with the flag stripped.
* Used by make-pdf's tab-scoped flow: every browse command (newtab, load-html, js,
* pdf, closetab) can take `--tab-id <N>` to target a specific tab. Without this,
* parallel `$P generate` calls would race on the active tab.
*/
export function extractTabId(args: string[]): { tabId: number | undefined; args: string[] } {
const stripped: string[] = [];
let tabId: number | undefined;
for (let i = 0; i < args.length; i++) {
if (args[i] === '--tab-id') {
const next = args[++i];
if (next === undefined) continue;
const parsed = parseInt(next, 10);
if (!isNaN(parsed)) tabId = parsed;
} else {
stripped.push(args[i]);
}
}
return { tabId, args: stripped };
}
// ─── Command Dispatch ──────────────────────────────────────────
async function sendCommand(state: ServerState, command: string, args: string[], retries = 0): Promise<void> {
// BROWSE_TAB env var pins commands to a specific tab (set by sidebar-agent per-tab)
const browseTab = process.env.BROWSE_TAB;
const body = JSON.stringify({ command, args, ...(browseTab ? { tabId: parseInt(browseTab, 10) } : {}) });
// Precedence: CLI --tab-id flag > BROWSE_TAB env var.
// make-pdf always passes --tab-id; human users typically rely on BROWSE_TAB
// (set by sidebar-agent per-tab) or the active tab.
const extracted = extractTabId(args);
args = extracted.args;
const envTab = process.env.BROWSE_TAB;
const tabId = extracted.tabId ?? (envTab ? parseInt(envTab, 10) : undefined);
const body = JSON.stringify({ command, args, ...(tabId !== undefined && !isNaN(tabId) ? { tabId } : {}) });
try {
const resp = await fetch(`http://127.0.0.1:${state.port}/command`, {
+3 -3
View File
@@ -66,7 +66,7 @@ export function wrapUntrustedContent(result: string, url: string): string {
export const COMMAND_DESCRIPTIONS: Record<string, { category: string; description: string; usage?: string }> = {
// Navigation
'goto': { category: 'Navigation', description: 'Navigate to URL (http://, https://, or file:// scoped to cwd/TEMP_DIR)', usage: 'goto <url>' },
'load-html': { category: 'Navigation', description: 'Load a local HTML file via setContent (no HTTP server needed). For self-contained HTML (inline CSS/JS, data URIs). For HTML on disk, goto file://... is often cleaner.', usage: 'load-html <file> [--wait-until load|domcontentloaded|networkidle]' },
'load-html': { category: 'Navigation', description: 'Load HTML via setContent. Accepts a file path under safe-dirs (validated), OR --from-file <payload.json> with {"html":"...","waitUntil":"..."} for large inline HTML (Windows argv safe).', usage: 'load-html <file> [--wait-until load|domcontentloaded|networkidle] [--tab-id <N>] | load-html --from-file <payload.json> [--tab-id <N>]' },
'back': { category: 'Navigation', description: 'History back' },
'forward': { category: 'Navigation', description: 'History forward' },
'reload': { category: 'Navigation', description: 'Reload page' },
@@ -115,13 +115,13 @@ export const COMMAND_DESCRIPTIONS: Record<string, { category: string; descriptio
'archive': { category: 'Extraction', description: 'Save complete page as MHTML via CDP', usage: 'archive [path]' },
// Visual
'screenshot': { category: 'Visual', description: 'Save screenshot. --selector targets a specific element (explicit flag form). Positional selectors starting with ./#/@/[ still work.', usage: 'screenshot [--selector <css>] [--viewport] [--clip x,y,w,h] [--base64] [selector|@ref] [path]' },
'pdf': { category: 'Visual', description: 'Save as PDF', usage: 'pdf [path]' },
'pdf': { category: 'Visual', description: 'Save the current page as PDF. Supports page layout (--format, --width, --height, --margins, --margin-*), structure (--toc waits for Paged.js), branding (--header-template, --footer-template, --page-numbers), accessibility (--tagged, --outline), and --from-file <payload.json> for large payloads. Use --tab-id <N> to target a specific tab.', usage: 'pdf [path] [--format letter|a4|legal] [--width <dim> --height <dim>] [--margins <dim>] [--margin-top <dim> --margin-right <dim> --margin-bottom <dim> --margin-left <dim>] [--header-template <html>] [--footer-template <html>] [--page-numbers] [--tagged] [--outline] [--print-background] [--prefer-css-page-size] [--toc] [--tab-id <N>] | pdf --from-file <payload.json> [--tab-id <N>]' },
'responsive': { category: 'Visual', description: 'Screenshots at mobile (375x812), tablet (768x1024), desktop (1280x720). Saves as {prefix}-mobile.png etc.', usage: 'responsive [prefix]' },
'diff': { category: 'Visual', description: 'Text diff between pages', usage: 'diff <url1> <url2>' },
// Tabs
'tabs': { category: 'Tabs', description: 'List open tabs' },
'tab': { category: 'Tabs', description: 'Switch to tab', usage: 'tab <id>' },
'newtab': { category: 'Tabs', description: 'Open new tab', usage: 'newtab [url]' },
'newtab': { category: 'Tabs', description: 'Open new tab. With --json, returns {"tabId":N,"url":...} for programmatic use (make-pdf).', usage: 'newtab [url] [--json]' },
'closetab':{ category: 'Tabs', description: 'Close tab', usage: 'closetab [id]' },
// Server
'status': { category: 'Server', description: 'Health check' },
+218 -5
View File
@@ -37,6 +37,187 @@ function tokenizePipeSegment(segment: string): string[] {
return tokens;
}
// ─── PDF flag parsing (make-pdf contract) ─────────────────────────────
//
// The $B pdf command grew from a 2-line wrapper (format: 'A4') into a real
// PDF engine frontend. make-pdf/dist/pdf shells out to `browse pdf` with
// this flag set, so the contract here has to be stable.
//
// Mutex rules enforced:
// --format vs --width/--height
// --margins vs any --margin-*
// --page-numbers vs --footer-template (page-numbers writes the footer itself)
//
// Units for dimensions: "1in" | "72pt" | "25mm" | "2.54cm". Bare numbers
// are interpreted as pixels (Playwright's default), which is almost never
// what callers want — we warn but don't reject.
//
// Large payloads: header/footer HTML and custom CSS can exceed Windows'
// 8191-char CreateProcess cap via argv. Callers pass `--from-file <path>`
// to a JSON file holding the full options. make-pdf always uses this path.
interface ParsedPdfArgs {
output: string;
format?: string;
width?: string;
height?: string;
marginTop?: string;
marginRight?: string;
marginBottom?: string;
marginLeft?: string;
headerTemplate?: string;
footerTemplate?: string;
pageNumbers?: boolean;
tagged?: boolean;
outline?: boolean;
printBackground?: boolean;
preferCSSPageSize?: boolean;
toc?: boolean;
}
function parsePdfArgs(args: string[]): ParsedPdfArgs {
// --from-file short-circuits argv parsing entirely
for (let i = 0; i < args.length; i++) {
if (args[i] === '--from-file') {
const payloadPath = args[++i];
if (!payloadPath) throw new Error('pdf: --from-file requires a path');
return parsePdfFromFile(payloadPath);
}
}
const result: ParsedPdfArgs = {
output: `${TEMP_DIR}/browse-page.pdf`,
};
let margins: string | undefined;
const positional: string[] = [];
for (let i = 0; i < args.length; i++) {
const a = args[i];
if (a === '--format') { result.format = requireValue(args, ++i, 'format'); }
else if (a === '--page-size') { result.format = requireValue(args, ++i, 'page-size'); }
else if (a === '--width') { result.width = requireValue(args, ++i, 'width'); }
else if (a === '--height') { result.height = requireValue(args, ++i, 'height'); }
else if (a === '--margins') { margins = requireValue(args, ++i, 'margins'); }
else if (a === '--margin-top') { result.marginTop = requireValue(args, ++i, 'margin-top'); }
else if (a === '--margin-right') { result.marginRight = requireValue(args, ++i, 'margin-right'); }
else if (a === '--margin-bottom') { result.marginBottom = requireValue(args, ++i, 'margin-bottom'); }
else if (a === '--margin-left') { result.marginLeft = requireValue(args, ++i, 'margin-left'); }
else if (a === '--header-template') { result.headerTemplate = requireValue(args, ++i, 'header-template'); }
else if (a === '--footer-template') { result.footerTemplate = requireValue(args, ++i, 'footer-template'); }
else if (a === '--page-numbers') { result.pageNumbers = true; }
else if (a === '--tagged') { result.tagged = true; }
else if (a === '--outline') { result.outline = true; }
else if (a === '--print-background') { result.printBackground = true; }
else if (a === '--prefer-css-page-size') { result.preferCSSPageSize = true; }
else if (a === '--toc') { result.toc = true; }
else if (a.startsWith('--')) { throw new Error(`Unknown pdf flag: ${a}`); }
else { positional.push(a); }
}
if (positional.length > 0) result.output = positional[0];
if (margins !== undefined) {
if (result.marginTop || result.marginRight || result.marginBottom || result.marginLeft) {
throw new Error('pdf: --margins is mutex with --margin-top/--margin-right/--margin-bottom/--margin-left');
}
result.marginTop = result.marginRight = result.marginBottom = result.marginLeft = margins;
}
if (result.format && (result.width || result.height)) {
throw new Error('pdf: --format is mutex with --width/--height');
}
if (result.pageNumbers && result.footerTemplate) {
throw new Error('pdf: --page-numbers is mutex with --footer-template (page-numbers writes the footer itself)');
}
return result;
}
function parsePdfFromFile(payloadPath: string): ParsedPdfArgs {
const raw = fs.readFileSync(payloadPath, 'utf8');
const json = JSON.parse(raw);
const out: ParsedPdfArgs = {
output: json.output || `${TEMP_DIR}/browse-page.pdf`,
format: json.format,
width: json.width,
height: json.height,
marginTop: json.marginTop,
marginRight: json.marginRight,
marginBottom: json.marginBottom,
marginLeft: json.marginLeft,
headerTemplate: json.headerTemplate,
footerTemplate: json.footerTemplate,
pageNumbers: json.pageNumbers === true,
tagged: json.tagged === true,
outline: json.outline === true,
printBackground: json.printBackground === true,
preferCSSPageSize: json.preferCSSPageSize === true,
toc: json.toc === true,
};
return out;
}
function requireValue(args: string[], i: number, flag: string): string {
const v = args[i];
if (v === undefined || v.startsWith('--')) {
throw new Error(`pdf: --${flag} requires a value`);
}
return v;
}
function buildPdfOptions(parsed: ParsedPdfArgs): Record<string, unknown> {
const opts: Record<string, unknown> = {};
// Page size
if (parsed.format) {
opts.format = parsed.format.charAt(0).toUpperCase() + parsed.format.slice(1).toLowerCase();
} else if (parsed.width && parsed.height) {
opts.width = parsed.width;
opts.height = parsed.height;
} else {
opts.format = 'Letter';
}
// Margins
const margin: Record<string, string> = {};
if (parsed.marginTop) margin.top = parsed.marginTop;
if (parsed.marginRight) margin.right = parsed.marginRight;
if (parsed.marginBottom) margin.bottom = parsed.marginBottom;
if (parsed.marginLeft) margin.left = parsed.marginLeft;
if (Object.keys(margin).length > 0) opts.margin = margin;
// Header/footer
const displayHeaderFooter =
!!parsed.headerTemplate || !!parsed.footerTemplate || parsed.pageNumbers === true;
if (displayHeaderFooter) {
opts.displayHeaderFooter = true;
// Provide minimum empty templates when only one is set, otherwise Chromium
// emits its default ugly URL/date in the other slot.
if (parsed.headerTemplate !== undefined) opts.headerTemplate = parsed.headerTemplate;
else if (parsed.pageNumbers || parsed.footerTemplate) opts.headerTemplate = '<div></div>';
if (parsed.pageNumbers) {
opts.footerTemplate = [
'<div style="font-size:9pt; font-family:Helvetica,Arial,sans-serif; color:#666; ',
'width:100%; text-align:center;">',
'<span class="pageNumber"></span> of <span class="totalPages"></span>',
'</div>',
].join('');
} else if (parsed.footerTemplate !== undefined) {
opts.footerTemplate = parsed.footerTemplate;
} else {
opts.footerTemplate = '<div></div>';
}
}
if (parsed.tagged === true) opts.tagged = true;
if (parsed.outline === true) opts.outline = true;
if (parsed.printBackground === true) opts.printBackground = true;
if (parsed.preferCSSPageSize === true) opts.preferCSSPageSize = true;
return opts;
}
/** Options passed from handleCommandInternal for chain routing */
export interface MetaCommandOpts {
chainDepth?: number;
@@ -72,8 +253,18 @@ export async function handleMetaCommand(
}
case 'newtab': {
const url = args[0];
// --json returns structured output (machine-parseable). Other flag-like
// tokens are treated as the url. make-pdf always passes --json.
let url: string | undefined;
let jsonMode = false;
for (const a of args) {
if (a === '--json') { jsonMode = true; }
else if (!url) { url = a; }
}
const id = await bm.newTab(url);
if (jsonMode) {
return JSON.stringify({ tabId: id, url: url ?? null });
}
return `Opened tab ${id}${url ? `${url}` : ''}`;
}
@@ -213,10 +404,32 @@ export async function handleMetaCommand(
case 'pdf': {
const page = bm.getPage();
const pdfPath = args[0] || `${TEMP_DIR}/browse-page.pdf`;
validateOutputPath(pdfPath);
await page.pdf({ path: pdfPath, format: 'A4' });
return `PDF saved: ${pdfPath}`;
const parsed = parsePdfArgs(args);
validateOutputPath(parsed.output);
// If --toc: wait up to 3s for Paged.js to signal by setting
// window.__pagedjsAfterFired = true. If the polyfill isn't injected
// (make-pdf v1 ships without Paged.js; TOC renders without page
// numbers), we fall through silently — callers that require strict
// TOC pagination should pass --require-paged-js too.
if (parsed.toc) {
const deadline = Date.now() + 3000;
let ready = false;
while (Date.now() < deadline) {
try {
ready = await page.evaluate('!!window.__pagedjsAfterFired');
} catch { /* tab may still be hydrating */ }
if (ready) break;
await new Promise(r => setTimeout(r, 150));
}
// Intentionally non-fatal. Paged.js is optional in v1.
}
const opts = buildPdfOptions(parsed);
opts.path = parsed.output;
await page.pdf(opts);
return `PDF saved: ${parsed.output}`;
}
case 'responsive': {
+46 -5
View File
@@ -175,13 +175,32 @@ export async function handleWriteCommand(
case 'load-html': {
if (inFrame) throw new Error('Cannot use load-html inside a frame. Run \'frame main\' first.');
const filePath = args[0];
if (!filePath) throw new Error('Usage: browse load-html <file> [--wait-until load|domcontentloaded|networkidle]');
// Parse --wait-until flag
// --from-file <path.json>: read inline HTML from a JSON payload. Used by
// make-pdf to dodge Windows argv size limits on large rendered HTML.
// The JSON shape is { html: string, waitUntil?: "load"|"domcontentloaded"|"networkidle" }.
// The safe-dirs + magic-byte + size-cap checks below still apply to the
// INLINE HTML content, not to the payload file path itself.
let fromFilePayload: { html: string; waitUntil?: SetContentWaitUntil } | null = null;
let filePath: string | undefined;
let waitUntil: SetContentWaitUntil = 'domcontentloaded';
for (let i = 1; i < args.length; i++) {
if (args[i] === '--wait-until') {
for (let i = 0; i < args.length; i++) {
if (args[i] === '--from-file') {
const payloadPath = args[++i];
if (!payloadPath) throw new Error('load-html: --from-file requires a path');
const raw = fs.readFileSync(payloadPath, 'utf8');
let json: any;
try { json = JSON.parse(raw); }
catch (e: any) { throw new Error(`load-html: --from-file JSON parse failed: ${e.message}`); }
if (typeof json.html !== 'string') {
throw new Error('load-html: --from-file JSON must have a "html" string field');
}
if (json.waitUntil && json.waitUntil !== 'load'
&& json.waitUntil !== 'domcontentloaded' && json.waitUntil !== 'networkidle') {
throw new Error(`load-html: --from-file waitUntil '${json.waitUntil}' invalid`);
}
fromFilePayload = { html: json.html, waitUntil: json.waitUntil };
} else if (args[i] === '--wait-until') {
const val = args[++i];
if (val !== 'load' && val !== 'domcontentloaded' && val !== 'networkidle') {
throw new Error(`Invalid --wait-until '${val}'. Must be one of: load, domcontentloaded, networkidle.`);
@@ -189,9 +208,31 @@ export async function handleWriteCommand(
waitUntil = val;
} else if (args[i].startsWith('--')) {
throw new Error(`Unknown flag: ${args[i]}`);
} else if (!filePath) {
filePath = args[i];
}
}
// Inline HTML path: validate size + magic byte, then setContent directly.
if (fromFilePayload) {
const MAX_BYTES = parseInt(process.env.GSTACK_BROWSE_MAX_HTML_BYTES || '', 10) || (50 * 1024 * 1024);
if (Buffer.byteLength(fromFilePayload.html, 'utf8') > MAX_BYTES) {
throw new Error(
`load-html: --from-file html too large (> ${MAX_BYTES} bytes). ` +
'Raise with GSTACK_BROWSE_MAX_HTML_BYTES=<N>.'
);
}
const peek = fromFilePayload.html.trimStart();
if (!/^<[a-zA-Z!?]/.test(peek)) {
throw new Error('load-html: --from-file html does not start with a valid markup opener');
}
const finalWaitUntil = fromFilePayload.waitUntil ?? waitUntil;
await session.setTabContent(fromFilePayload.html, { waitUntil: finalWaitUntil });
return `Loaded HTML: (inline from --from-file, ${fromFilePayload.html.length} chars)`;
}
if (!filePath) throw new Error('Usage: browse load-html <file> [--wait-until load|domcontentloaded|networkidle] [--tab-id <N>] | load-html --from-file <payload.json> [--tab-id <N>]');
// Extension allowlist
const ALLOWED_EXT = ['.html', '.htm', '.xhtml', '.svg'];
const ext = path.extname(filePath).toLowerCase();
+86
View File
@@ -0,0 +1,86 @@
/**
* $B pdf flag contract tests.
*
* Pure unit tests of the parsing/validation logic. These do NOT spin up
* Chromium that's covered by make-pdf's integration tests.
*/
import { describe, expect, test } from "bun:test";
import * as fs from "node:fs";
import * as path from "node:path";
import * as os from "node:os";
import { extractTabId } from "../src/cli";
// We can't import the internal parsePdfArgs directly without exporting it,
// but we can exercise it end-to-end through the browse CLI. For fast unit
// coverage we test the flag-extraction layer here.
describe("extractTabId", () => {
test("strips --tab-id and returns the value", () => {
const { tabId, args } = extractTabId(["--tab-id", "3", "extra"]);
expect(tabId).toBe(3);
expect(args).toEqual(["extra"]);
});
test("returns undefined when flag is absent", () => {
const { tabId, args } = extractTabId(["goto", "https://example.com"]);
expect(tabId).toBeUndefined();
expect(args).toEqual(["goto", "https://example.com"]);
});
test("ignores trailing --tab-id with no value", () => {
const { tabId, args } = extractTabId(["click", "@e1", "--tab-id"]);
expect(tabId).toBeUndefined();
expect(args).toEqual(["click", "@e1"]);
});
test("handles --tab-id at different positions", () => {
const front = extractTabId(["--tab-id", "5", "pdf", "/tmp/out.pdf"]);
expect(front.tabId).toBe(5);
expect(front.args).toEqual(["pdf", "/tmp/out.pdf"]);
const middle = extractTabId(["pdf", "--tab-id", "7", "/tmp/out.pdf"]);
expect(middle.tabId).toBe(7);
expect(middle.args).toEqual(["pdf", "/tmp/out.pdf"]);
const end = extractTabId(["pdf", "/tmp/out.pdf", "--tab-id", "9"]);
expect(end.tabId).toBe(9);
expect(end.args).toEqual(["pdf", "/tmp/out.pdf"]);
});
test("ignores non-numeric --tab-id values", () => {
const { tabId, args } = extractTabId(["--tab-id", "abc", "pdf"]);
expect(tabId).toBeUndefined();
expect(args).toEqual(["pdf"]);
});
});
describe("pdf --from-file payload shape", () => {
test("writes a JSON payload file and reads it back", () => {
const tmpPath = path.join(os.tmpdir(), `browse-pdf-test-${Date.now()}.json`);
const payload = {
output: "/tmp/browse-out.pdf",
format: "letter",
marginTop: "1in",
marginRight: "1in",
marginBottom: "1in",
marginLeft: "1in",
pageNumbers: true,
tagged: true,
outline: true,
toc: false,
headerTemplate: '<div style="font-size:9pt">Title</div>',
footerTemplate: undefined,
};
fs.writeFileSync(tmpPath, JSON.stringify(payload));
try {
const readBack = JSON.parse(fs.readFileSync(tmpPath, "utf8"));
expect(readBack.output).toBe("/tmp/browse-out.pdf");
expect(readBack.pageNumbers).toBe(true);
expect(readBack.headerTemplate).toContain("Title");
} finally {
fs.unlinkSync(tmpPath);
}
});
});
+11 -4
View File
@@ -498,8 +498,12 @@ describe('BROWSE_TAB tab pinning (cross-tab isolation)', () => {
});
test('CLI reads BROWSE_TAB and sends tabId in command body', () => {
// BROWSE_TAB env var is still honored (sidebar-agent path). After the
// make-pdf refactor, the CLI layer now also accepts --tab-id <N>, with
// the CLI flag taking precedence over the env var. Both resolve to the
// same `tabId` body field.
expect(cliSrc).toContain('process.env.BROWSE_TAB');
expect(cliSrc).toContain('tabId: parseInt(browseTab');
expect(cliSrc).toContain('parseInt(envTab, 10)');
});
test('handleCommandInternal accepts tabId from request body', () => {
@@ -545,8 +549,11 @@ describe('BROWSE_TAB tab pinning (cross-tab isolation)', () => {
expect(handleFn).toContain('tabId !== null');
});
test('CLI only sends tabId when BROWSE_TAB is set', () => {
// Should conditionally include tabId in the body
expect(cliSrc).toContain('browseTab ? { tabId:');
test('CLI only sends tabId when it is a valid number', () => {
// Body should conditionally include tabId. Historically that was keyed off
// the BROWSE_TAB env var. After the make-pdf refactor, the CLI also honors
// a --tab-id <N> flag on the CLI itself, so the check is "tabId defined
// AND not NaN" rather than literally inspecting the env var.
expect(cliSrc).toContain('tabId !== undefined && !isNaN(tabId)');
});
});