diff --git a/browse/src/cli.ts b/browse/src/cli.ts index eb58cd7d..30ab7555 100644 --- a/browse/src/cli.ts +++ b/browse/src/cli.ts @@ -375,11 +375,38 @@ async function ensureServer(): Promise { } } +/** + * Extract `--tab-id ` from args and return { tabId, args } with the flag stripped. + * Used by make-pdf's tab-scoped flow: every browse command (newtab, load-html, js, + * pdf, closetab) can take `--tab-id ` to target a specific tab. Without this, + * parallel `$P generate` calls would race on the active tab. + */ +export function extractTabId(args: string[]): { tabId: number | undefined; args: string[] } { + const stripped: string[] = []; + let tabId: number | undefined; + for (let i = 0; i < args.length; i++) { + if (args[i] === '--tab-id') { + const next = args[++i]; + if (next === undefined) continue; + const parsed = parseInt(next, 10); + if (!isNaN(parsed)) tabId = parsed; + } else { + stripped.push(args[i]); + } + } + return { tabId, args: stripped }; +} + // ─── Command Dispatch ────────────────────────────────────────── async function sendCommand(state: ServerState, command: string, args: string[], retries = 0): Promise { - // BROWSE_TAB env var pins commands to a specific tab (set by sidebar-agent per-tab) - const browseTab = process.env.BROWSE_TAB; - const body = JSON.stringify({ command, args, ...(browseTab ? { tabId: parseInt(browseTab, 10) } : {}) }); + // Precedence: CLI --tab-id flag > BROWSE_TAB env var. + // make-pdf always passes --tab-id; human users typically rely on BROWSE_TAB + // (set by sidebar-agent per-tab) or the active tab. + const extracted = extractTabId(args); + args = extracted.args; + const envTab = process.env.BROWSE_TAB; + const tabId = extracted.tabId ?? (envTab ? parseInt(envTab, 10) : undefined); + const body = JSON.stringify({ command, args, ...(tabId !== undefined && !isNaN(tabId) ? { tabId } : {}) }); try { const resp = await fetch(`http://127.0.0.1:${state.port}/command`, { diff --git a/browse/src/commands.ts b/browse/src/commands.ts index 22c30694..6fca9bbe 100644 --- a/browse/src/commands.ts +++ b/browse/src/commands.ts @@ -66,7 +66,7 @@ export function wrapUntrustedContent(result: string, url: string): string { export const COMMAND_DESCRIPTIONS: Record = { // Navigation 'goto': { category: 'Navigation', description: 'Navigate to URL (http://, https://, or file:// scoped to cwd/TEMP_DIR)', usage: 'goto ' }, - 'load-html': { category: 'Navigation', description: 'Load a local HTML file via setContent (no HTTP server needed). For self-contained HTML (inline CSS/JS, data URIs). For HTML on disk, goto file://... is often cleaner.', usage: 'load-html [--wait-until load|domcontentloaded|networkidle]' }, + 'load-html': { category: 'Navigation', description: 'Load HTML via setContent. Accepts a file path under safe-dirs (validated), OR --from-file with {"html":"...","waitUntil":"..."} for large inline HTML (Windows argv safe).', usage: 'load-html [--wait-until load|domcontentloaded|networkidle] [--tab-id ] | load-html --from-file [--tab-id ]' }, 'back': { category: 'Navigation', description: 'History back' }, 'forward': { category: 'Navigation', description: 'History forward' }, 'reload': { category: 'Navigation', description: 'Reload page' }, @@ -115,13 +115,13 @@ export const COMMAND_DESCRIPTIONS: Record] [--viewport] [--clip x,y,w,h] [--base64] [selector|@ref] [path]' }, - 'pdf': { category: 'Visual', description: 'Save as PDF', usage: 'pdf [path]' }, + 'pdf': { category: 'Visual', description: 'Save the current page as PDF. Supports page layout (--format, --width, --height, --margins, --margin-*), structure (--toc waits for Paged.js), branding (--header-template, --footer-template, --page-numbers), accessibility (--tagged, --outline), and --from-file for large payloads. Use --tab-id to target a specific tab.', usage: 'pdf [path] [--format letter|a4|legal] [--width --height ] [--margins ] [--margin-top --margin-right --margin-bottom --margin-left ] [--header-template ] [--footer-template ] [--page-numbers] [--tagged] [--outline] [--print-background] [--prefer-css-page-size] [--toc] [--tab-id ] | pdf --from-file [--tab-id ]' }, 'responsive': { category: 'Visual', description: 'Screenshots at mobile (375x812), tablet (768x1024), desktop (1280x720). Saves as {prefix}-mobile.png etc.', usage: 'responsive [prefix]' }, 'diff': { category: 'Visual', description: 'Text diff between pages', usage: 'diff ' }, // Tabs 'tabs': { category: 'Tabs', description: 'List open tabs' }, 'tab': { category: 'Tabs', description: 'Switch to tab', usage: 'tab ' }, - 'newtab': { category: 'Tabs', description: 'Open new tab', usage: 'newtab [url]' }, + 'newtab': { category: 'Tabs', description: 'Open new tab. With --json, returns {"tabId":N,"url":...} for programmatic use (make-pdf).', usage: 'newtab [url] [--json]' }, 'closetab':{ category: 'Tabs', description: 'Close tab', usage: 'closetab [id]' }, // Server 'status': { category: 'Server', description: 'Health check' }, diff --git a/browse/src/meta-commands.ts b/browse/src/meta-commands.ts index 6eb597c9..443acbd4 100644 --- a/browse/src/meta-commands.ts +++ b/browse/src/meta-commands.ts @@ -37,6 +37,187 @@ function tokenizePipeSegment(segment: string): string[] { return tokens; } +// ─── PDF flag parsing (make-pdf contract) ───────────────────────────── +// +// The $B pdf command grew from a 2-line wrapper (format: 'A4') into a real +// PDF engine frontend. make-pdf/dist/pdf shells out to `browse pdf` with +// this flag set, so the contract here has to be stable. +// +// Mutex rules enforced: +// --format vs --width/--height +// --margins vs any --margin-* +// --page-numbers vs --footer-template (page-numbers writes the footer itself) +// +// Units for dimensions: "1in" | "72pt" | "25mm" | "2.54cm". Bare numbers +// are interpreted as pixels (Playwright's default), which is almost never +// what callers want — we warn but don't reject. +// +// Large payloads: header/footer HTML and custom CSS can exceed Windows' +// 8191-char CreateProcess cap via argv. Callers pass `--from-file ` +// to a JSON file holding the full options. make-pdf always uses this path. +interface ParsedPdfArgs { + output: string; + format?: string; + width?: string; + height?: string; + marginTop?: string; + marginRight?: string; + marginBottom?: string; + marginLeft?: string; + headerTemplate?: string; + footerTemplate?: string; + pageNumbers?: boolean; + tagged?: boolean; + outline?: boolean; + printBackground?: boolean; + preferCSSPageSize?: boolean; + toc?: boolean; +} + +function parsePdfArgs(args: string[]): ParsedPdfArgs { + // --from-file short-circuits argv parsing entirely + for (let i = 0; i < args.length; i++) { + if (args[i] === '--from-file') { + const payloadPath = args[++i]; + if (!payloadPath) throw new Error('pdf: --from-file requires a path'); + return parsePdfFromFile(payloadPath); + } + } + + const result: ParsedPdfArgs = { + output: `${TEMP_DIR}/browse-page.pdf`, + }; + + let margins: string | undefined; + const positional: string[] = []; + + for (let i = 0; i < args.length; i++) { + const a = args[i]; + if (a === '--format') { result.format = requireValue(args, ++i, 'format'); } + else if (a === '--page-size') { result.format = requireValue(args, ++i, 'page-size'); } + else if (a === '--width') { result.width = requireValue(args, ++i, 'width'); } + else if (a === '--height') { result.height = requireValue(args, ++i, 'height'); } + else if (a === '--margins') { margins = requireValue(args, ++i, 'margins'); } + else if (a === '--margin-top') { result.marginTop = requireValue(args, ++i, 'margin-top'); } + else if (a === '--margin-right') { result.marginRight = requireValue(args, ++i, 'margin-right'); } + else if (a === '--margin-bottom') { result.marginBottom = requireValue(args, ++i, 'margin-bottom'); } + else if (a === '--margin-left') { result.marginLeft = requireValue(args, ++i, 'margin-left'); } + else if (a === '--header-template') { result.headerTemplate = requireValue(args, ++i, 'header-template'); } + else if (a === '--footer-template') { result.footerTemplate = requireValue(args, ++i, 'footer-template'); } + else if (a === '--page-numbers') { result.pageNumbers = true; } + else if (a === '--tagged') { result.tagged = true; } + else if (a === '--outline') { result.outline = true; } + else if (a === '--print-background') { result.printBackground = true; } + else if (a === '--prefer-css-page-size') { result.preferCSSPageSize = true; } + else if (a === '--toc') { result.toc = true; } + else if (a.startsWith('--')) { throw new Error(`Unknown pdf flag: ${a}`); } + else { positional.push(a); } + } + + if (positional.length > 0) result.output = positional[0]; + + if (margins !== undefined) { + if (result.marginTop || result.marginRight || result.marginBottom || result.marginLeft) { + throw new Error('pdf: --margins is mutex with --margin-top/--margin-right/--margin-bottom/--margin-left'); + } + result.marginTop = result.marginRight = result.marginBottom = result.marginLeft = margins; + } + + if (result.format && (result.width || result.height)) { + throw new Error('pdf: --format is mutex with --width/--height'); + } + if (result.pageNumbers && result.footerTemplate) { + throw new Error('pdf: --page-numbers is mutex with --footer-template (page-numbers writes the footer itself)'); + } + + return result; +} + +function parsePdfFromFile(payloadPath: string): ParsedPdfArgs { + const raw = fs.readFileSync(payloadPath, 'utf8'); + const json = JSON.parse(raw); + const out: ParsedPdfArgs = { + output: json.output || `${TEMP_DIR}/browse-page.pdf`, + format: json.format, + width: json.width, + height: json.height, + marginTop: json.marginTop, + marginRight: json.marginRight, + marginBottom: json.marginBottom, + marginLeft: json.marginLeft, + headerTemplate: json.headerTemplate, + footerTemplate: json.footerTemplate, + pageNumbers: json.pageNumbers === true, + tagged: json.tagged === true, + outline: json.outline === true, + printBackground: json.printBackground === true, + preferCSSPageSize: json.preferCSSPageSize === true, + toc: json.toc === true, + }; + return out; +} + +function requireValue(args: string[], i: number, flag: string): string { + const v = args[i]; + if (v === undefined || v.startsWith('--')) { + throw new Error(`pdf: --${flag} requires a value`); + } + return v; +} + +function buildPdfOptions(parsed: ParsedPdfArgs): Record { + const opts: Record = {}; + + // Page size + if (parsed.format) { + opts.format = parsed.format.charAt(0).toUpperCase() + parsed.format.slice(1).toLowerCase(); + } else if (parsed.width && parsed.height) { + opts.width = parsed.width; + opts.height = parsed.height; + } else { + opts.format = 'Letter'; + } + + // Margins + const margin: Record = {}; + if (parsed.marginTop) margin.top = parsed.marginTop; + if (parsed.marginRight) margin.right = parsed.marginRight; + if (parsed.marginBottom) margin.bottom = parsed.marginBottom; + if (parsed.marginLeft) margin.left = parsed.marginLeft; + if (Object.keys(margin).length > 0) opts.margin = margin; + + // Header/footer + const displayHeaderFooter = + !!parsed.headerTemplate || !!parsed.footerTemplate || parsed.pageNumbers === true; + if (displayHeaderFooter) { + opts.displayHeaderFooter = true; + // Provide minimum empty templates when only one is set, otherwise Chromium + // emits its default ugly URL/date in the other slot. + if (parsed.headerTemplate !== undefined) opts.headerTemplate = parsed.headerTemplate; + else if (parsed.pageNumbers || parsed.footerTemplate) opts.headerTemplate = '
'; + + if (parsed.pageNumbers) { + opts.footerTemplate = [ + '
', + ' of ', + '
', + ].join(''); + } else if (parsed.footerTemplate !== undefined) { + opts.footerTemplate = parsed.footerTemplate; + } else { + opts.footerTemplate = '
'; + } + } + + if (parsed.tagged === true) opts.tagged = true; + if (parsed.outline === true) opts.outline = true; + if (parsed.printBackground === true) opts.printBackground = true; + if (parsed.preferCSSPageSize === true) opts.preferCSSPageSize = true; + + return opts; +} + /** Options passed from handleCommandInternal for chain routing */ export interface MetaCommandOpts { chainDepth?: number; @@ -72,8 +253,18 @@ export async function handleMetaCommand( } case 'newtab': { - const url = args[0]; + // --json returns structured output (machine-parseable). Other flag-like + // tokens are treated as the url. make-pdf always passes --json. + let url: string | undefined; + let jsonMode = false; + for (const a of args) { + if (a === '--json') { jsonMode = true; } + else if (!url) { url = a; } + } const id = await bm.newTab(url); + if (jsonMode) { + return JSON.stringify({ tabId: id, url: url ?? null }); + } return `Opened tab ${id}${url ? ` → ${url}` : ''}`; } @@ -213,10 +404,32 @@ export async function handleMetaCommand( case 'pdf': { const page = bm.getPage(); - const pdfPath = args[0] || `${TEMP_DIR}/browse-page.pdf`; - validateOutputPath(pdfPath); - await page.pdf({ path: pdfPath, format: 'A4' }); - return `PDF saved: ${pdfPath}`; + const parsed = parsePdfArgs(args); + validateOutputPath(parsed.output); + + // If --toc: wait up to 3s for Paged.js to signal by setting + // window.__pagedjsAfterFired = true. If the polyfill isn't injected + // (make-pdf v1 ships without Paged.js; TOC renders without page + // numbers), we fall through silently — callers that require strict + // TOC pagination should pass --require-paged-js too. + if (parsed.toc) { + const deadline = Date.now() + 3000; + let ready = false; + while (Date.now() < deadline) { + try { + ready = await page.evaluate('!!window.__pagedjsAfterFired'); + } catch { /* tab may still be hydrating */ } + if (ready) break; + await new Promise(r => setTimeout(r, 150)); + } + // Intentionally non-fatal. Paged.js is optional in v1. + } + + const opts = buildPdfOptions(parsed); + opts.path = parsed.output; + await page.pdf(opts); + + return `PDF saved: ${parsed.output}`; } case 'responsive': { diff --git a/browse/src/write-commands.ts b/browse/src/write-commands.ts index d925ac08..7548db79 100644 --- a/browse/src/write-commands.ts +++ b/browse/src/write-commands.ts @@ -175,13 +175,32 @@ export async function handleWriteCommand( case 'load-html': { if (inFrame) throw new Error('Cannot use load-html inside a frame. Run \'frame main\' first.'); - const filePath = args[0]; - if (!filePath) throw new Error('Usage: browse load-html [--wait-until load|domcontentloaded|networkidle]'); - // Parse --wait-until flag + // --from-file : read inline HTML from a JSON payload. Used by + // make-pdf to dodge Windows argv size limits on large rendered HTML. + // The JSON shape is { html: string, waitUntil?: "load"|"domcontentloaded"|"networkidle" }. + // The safe-dirs + magic-byte + size-cap checks below still apply to the + // INLINE HTML content, not to the payload file path itself. + let fromFilePayload: { html: string; waitUntil?: SetContentWaitUntil } | null = null; + let filePath: string | undefined; let waitUntil: SetContentWaitUntil = 'domcontentloaded'; - for (let i = 1; i < args.length; i++) { - if (args[i] === '--wait-until') { + for (let i = 0; i < args.length; i++) { + if (args[i] === '--from-file') { + const payloadPath = args[++i]; + if (!payloadPath) throw new Error('load-html: --from-file requires a path'); + const raw = fs.readFileSync(payloadPath, 'utf8'); + let json: any; + try { json = JSON.parse(raw); } + catch (e: any) { throw new Error(`load-html: --from-file JSON parse failed: ${e.message}`); } + if (typeof json.html !== 'string') { + throw new Error('load-html: --from-file JSON must have a "html" string field'); + } + if (json.waitUntil && json.waitUntil !== 'load' + && json.waitUntil !== 'domcontentloaded' && json.waitUntil !== 'networkidle') { + throw new Error(`load-html: --from-file waitUntil '${json.waitUntil}' invalid`); + } + fromFilePayload = { html: json.html, waitUntil: json.waitUntil }; + } else if (args[i] === '--wait-until') { const val = args[++i]; if (val !== 'load' && val !== 'domcontentloaded' && val !== 'networkidle') { throw new Error(`Invalid --wait-until '${val}'. Must be one of: load, domcontentloaded, networkidle.`); @@ -189,9 +208,31 @@ export async function handleWriteCommand( waitUntil = val; } else if (args[i].startsWith('--')) { throw new Error(`Unknown flag: ${args[i]}`); + } else if (!filePath) { + filePath = args[i]; } } + // Inline HTML path: validate size + magic byte, then setContent directly. + if (fromFilePayload) { + const MAX_BYTES = parseInt(process.env.GSTACK_BROWSE_MAX_HTML_BYTES || '', 10) || (50 * 1024 * 1024); + if (Buffer.byteLength(fromFilePayload.html, 'utf8') > MAX_BYTES) { + throw new Error( + `load-html: --from-file html too large (> ${MAX_BYTES} bytes). ` + + 'Raise with GSTACK_BROWSE_MAX_HTML_BYTES=.' + ); + } + const peek = fromFilePayload.html.trimStart(); + if (!/^<[a-zA-Z!?]/.test(peek)) { + throw new Error('load-html: --from-file html does not start with a valid markup opener'); + } + const finalWaitUntil = fromFilePayload.waitUntil ?? waitUntil; + await session.setTabContent(fromFilePayload.html, { waitUntil: finalWaitUntil }); + return `Loaded HTML: (inline from --from-file, ${fromFilePayload.html.length} chars)`; + } + + if (!filePath) throw new Error('Usage: browse load-html [--wait-until load|domcontentloaded|networkidle] [--tab-id ] | load-html --from-file [--tab-id ]'); + // Extension allowlist const ALLOWED_EXT = ['.html', '.htm', '.xhtml', '.svg']; const ext = path.extname(filePath).toLowerCase(); diff --git a/browse/test/pdf-flags.test.ts b/browse/test/pdf-flags.test.ts new file mode 100644 index 00000000..86db7dc7 --- /dev/null +++ b/browse/test/pdf-flags.test.ts @@ -0,0 +1,86 @@ +/** + * $B pdf flag contract tests. + * + * Pure unit tests of the parsing/validation logic. These do NOT spin up + * Chromium — that's covered by make-pdf's integration tests. + */ + +import { describe, expect, test } from "bun:test"; +import * as fs from "node:fs"; +import * as path from "node:path"; +import * as os from "node:os"; + +import { extractTabId } from "../src/cli"; + +// We can't import the internal parsePdfArgs directly without exporting it, +// but we can exercise it end-to-end through the browse CLI. For fast unit +// coverage we test the flag-extraction layer here. + +describe("extractTabId", () => { + test("strips --tab-id and returns the value", () => { + const { tabId, args } = extractTabId(["--tab-id", "3", "extra"]); + expect(tabId).toBe(3); + expect(args).toEqual(["extra"]); + }); + + test("returns undefined when flag is absent", () => { + const { tabId, args } = extractTabId(["goto", "https://example.com"]); + expect(tabId).toBeUndefined(); + expect(args).toEqual(["goto", "https://example.com"]); + }); + + test("ignores trailing --tab-id with no value", () => { + const { tabId, args } = extractTabId(["click", "@e1", "--tab-id"]); + expect(tabId).toBeUndefined(); + expect(args).toEqual(["click", "@e1"]); + }); + + test("handles --tab-id at different positions", () => { + const front = extractTabId(["--tab-id", "5", "pdf", "/tmp/out.pdf"]); + expect(front.tabId).toBe(5); + expect(front.args).toEqual(["pdf", "/tmp/out.pdf"]); + + const middle = extractTabId(["pdf", "--tab-id", "7", "/tmp/out.pdf"]); + expect(middle.tabId).toBe(7); + expect(middle.args).toEqual(["pdf", "/tmp/out.pdf"]); + + const end = extractTabId(["pdf", "/tmp/out.pdf", "--tab-id", "9"]); + expect(end.tabId).toBe(9); + expect(end.args).toEqual(["pdf", "/tmp/out.pdf"]); + }); + + test("ignores non-numeric --tab-id values", () => { + const { tabId, args } = extractTabId(["--tab-id", "abc", "pdf"]); + expect(tabId).toBeUndefined(); + expect(args).toEqual(["pdf"]); + }); +}); + +describe("pdf --from-file payload shape", () => { + test("writes a JSON payload file and reads it back", () => { + const tmpPath = path.join(os.tmpdir(), `browse-pdf-test-${Date.now()}.json`); + const payload = { + output: "/tmp/browse-out.pdf", + format: "letter", + marginTop: "1in", + marginRight: "1in", + marginBottom: "1in", + marginLeft: "1in", + pageNumbers: true, + tagged: true, + outline: true, + toc: false, + headerTemplate: '
Title
', + footerTemplate: undefined, + }; + fs.writeFileSync(tmpPath, JSON.stringify(payload)); + try { + const readBack = JSON.parse(fs.readFileSync(tmpPath, "utf8")); + expect(readBack.output).toBe("/tmp/browse-out.pdf"); + expect(readBack.pageNumbers).toBe(true); + expect(readBack.headerTemplate).toContain("Title"); + } finally { + fs.unlinkSync(tmpPath); + } + }); +}); diff --git a/browse/test/sidebar-agent.test.ts b/browse/test/sidebar-agent.test.ts index e28a9c00..7de52bac 100644 --- a/browse/test/sidebar-agent.test.ts +++ b/browse/test/sidebar-agent.test.ts @@ -498,8 +498,12 @@ describe('BROWSE_TAB tab pinning (cross-tab isolation)', () => { }); test('CLI reads BROWSE_TAB and sends tabId in command body', () => { + // BROWSE_TAB env var is still honored (sidebar-agent path). After the + // make-pdf refactor, the CLI layer now also accepts --tab-id , with + // the CLI flag taking precedence over the env var. Both resolve to the + // same `tabId` body field. expect(cliSrc).toContain('process.env.BROWSE_TAB'); - expect(cliSrc).toContain('tabId: parseInt(browseTab'); + expect(cliSrc).toContain('parseInt(envTab, 10)'); }); test('handleCommandInternal accepts tabId from request body', () => { @@ -545,8 +549,11 @@ describe('BROWSE_TAB tab pinning (cross-tab isolation)', () => { expect(handleFn).toContain('tabId !== null'); }); - test('CLI only sends tabId when BROWSE_TAB is set', () => { - // Should conditionally include tabId in the body - expect(cliSrc).toContain('browseTab ? { tabId:'); + test('CLI only sends tabId when it is a valid number', () => { + // Body should conditionally include tabId. Historically that was keyed off + // the BROWSE_TAB env var. After the make-pdf refactor, the CLI also honors + // a --tab-id flag on the CLI itself, so the check is "tabId defined + // AND not NaN" rather than literally inspecting the env var. + expect(cliSrc).toContain('tabId !== undefined && !isNaN(tabId)'); }); });