From 26a7cab26ddbfdec4aba34e50061dc6c9dcd2a20 Mon Sep 17 00:00:00 2001 From: Garry Tan Date: Fri, 12 Jun 2026 10:01:48 -0700 Subject: [PATCH] =?UTF-8?q?fix(make-pdf):=20adversarial-review=20wave=20?= =?UTF-8?q?=E2=80=94=20offline=20posture=20enforced,=20symlink-aware=20con?= =?UTF-8?q?finement,=20bounded=20reads?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Codex adversarial + structured review findings: - Remote images are now BLOCKED with a visible placeholder instead of warn-and-keep — leaving the tag meant Chromium fetched the URL at print time anyway, so the offline posture was a lie (tracking pixels and internal-URL probes ran without --allow-network). - The out-of-tree read check compares REAL paths: a symlink inside the input dir pointing at ~/.ssh/... passed the string-prefix check, including under --strict. Ordered after the existence check (realpath of a missing file false-positives on macOS /var → /private/var). - Image reads are bounded BEFORE reading: statSync first, non-regular files (fifo/device/dir) and >64MB files degrade to placeholders instead of hanging or exhausting memory; malformed percent-encoding (foo%zz.png) degrades to missing-image instead of crashing decodeURIComponent. - browse shell-outs get a 120s timeout — a wedged daemon or hostile mermaid source fails the run instead of hanging it. - TOC entries link to the heading's ACTUAL id (pre-id'd raw-HTML headings previously got dead #toc-N links); per-side margins compose into the CSS @page shorthand so a landscape promotion flipping preferCSSPageSize no longer silently reverts --margin-left/right to defaults (Codex P2). - The image memo is a typed object — literal NUL-byte separators had made diagram-prepass.ts register as binary to text tooling. Codex structured review GATE: PASS (no P1). Co-Authored-By: Claude Fable 5 --- make-pdf/src/browseClient.ts | 3 + make-pdf/src/diagram-prepass.ts | 86 +++++++++++++++++++++------ make-pdf/src/orchestrator.ts | 4 ++ make-pdf/src/render.ts | 63 ++++++++++++++++---- make-pdf/test/diagram-prepass.test.ts | 37 +++++++++++- make-pdf/test/render.test.ts | 7 +++ 6 files changed, 168 insertions(+), 32 deletions(-) diff --git a/make-pdf/src/browseClient.ts b/make-pdf/src/browseClient.ts index c8206c804..da25677e5 100644 --- a/make-pdf/src/browseClient.ts +++ b/make-pdf/src/browseClient.ts @@ -176,6 +176,9 @@ function runBrowse(args: string[]): string { encoding: "utf8", maxBuffer: 16 * 1024 * 1024, // 16MB; tab content can be large stdio: ["ignore", "pipe", "pipe"], + // A wedged daemon (or a hostile mermaid source spinning the renderer) + // must fail the run, not hang it forever. + timeout: 120_000, }); } catch (err: any) { const exitCode = typeof err.status === "number" ? err.status : 1; diff --git a/make-pdf/src/diagram-prepass.ts b/make-pdf/src/diagram-prepass.ts index 8d6c6a48c..bf12249bb 100644 --- a/make-pdf/src/diagram-prepass.ts +++ b/make-pdf/src/diagram-prepass.ts @@ -80,6 +80,8 @@ export interface PrepassImageOptions { * 2 × contentWidth × 300dpi down to contentWidth × 300dpi. */ const PRINT_DPI = 300; const DOWNSCALE_FACTOR = 2; +/** Per-image read ceiling — bounds memory before any policy runs. */ +const MAX_IMAGE_BYTES = 64 * 1024 * 1024; export class StrictModeError extends Error { constructor(msg: string) { @@ -584,7 +586,7 @@ export function inlineLocalImages(html: string, opts: PrepassImageOptions): stri const targetPx = Math.round(opts.contentWidthIn * PRINT_DPI); // An image referenced N times is read/probed/downscaled once; the same data // URI string is reused (also dedupes memory until the final join). - const memo = new Map(); + const memo = new Map(); return html.replace(IMG_TAG_RE, (tag) => { const srcMatch = tag.match(SRC_RE); @@ -601,36 +603,73 @@ export function inlineLocalImages(html: string, opts: PrepassImageOptions): stri // Absolute URL with a scheme (http, https, file, …) if (opts.allowNetwork && /^https?:/i.test(src)) return tag; if (/^https?:/i.test(src)) { - const msg = `remote image not fetched (offline posture): ${src}`; + const msg = `remote image blocked (offline posture): ${src}`; if (opts.strict) throw new StrictModeError(msg + " — re-run without --strict or pass --allow-network"); opts.warn(msg); - return tag; + // Leaving the tag would make Chromium fetch it at print time anyway — + // the warn would be a lie. Replace with a visible placeholder. + return buildBlockedRemotePlaceholder(src); } // file:// and friends fall through to the local path branch if (!src.startsWith("file:")) return tag; } + // decodeURIComponent throws on malformed escapes (foo%zz.png) — a broken + // URL must degrade to the missing-image path, not crash the run. + let decodedSrc = src; + try { + decodedSrc = decodeURIComponent(src); + } catch { /* keep raw src */ } + const filePath = src.startsWith("file:") ? fileURLToPath(src) : isDrivePath ? path.resolve(src) - : path.resolve(opts.inputDir, decodeURIComponent(src)); + : path.resolve(opts.inputDir, decodedSrc); const cached = memo.get(filePath); if (cached !== undefined) return rewriteImgTag(tag, cached); + if (!fs.existsSync(filePath)) { + const msg = `image not found: ${src} (resolved to ${filePath})`; + if (opts.strict) throw new StrictModeError(msg); + opts.warn(msg); + return buildMissingImagePlaceholder(src); + } + // Out-of-tree reads are legal (local CLI semantics — like pandoc) but // never silent: an agent PDF-ing untrusted markdown should not quietly // embed ~/.ssh/config into a shareable document. --strict makes it fatal. - const inputRoot = path.resolve(opts.inputDir) + path.sep; - if (!filePath.startsWith(inputRoot)) { - const msg = `image resolves OUTSIDE the input directory: ${src} → ${filePath}`; + // Compare REAL paths — a symlink inside the input dir pointing outside + // would otherwise pass a string-prefix check (Codex adversarial finding). + // Runs after the existence check: realpath of a missing file can't + // resolve, and on macOS /var vs /private/var would false-positive. + const inputRoot = safeRealpath(path.resolve(opts.inputDir)) + path.sep; + const realFilePath = safeRealpath(filePath); + if (!realFilePath.startsWith(inputRoot)) { + const msg = `image resolves OUTSIDE the input directory: ${src} → ${realFilePath}`; if (opts.strict) throw new StrictModeError(msg + " — move it under the markdown's directory or drop --strict"); opts.warn(msg); } - if (!fs.existsSync(filePath)) { - const msg = `image not found: ${src} (resolved to ${filePath})`; + // Bound the read BEFORE reading: a markdown image pointing at a special + // file (fifo, device) would hang readFileSync, and a multi-GB file would + // exhaust memory before any policy ran. + let stat: fs.Stats; + try { + stat = fs.statSync(filePath); + } catch { + opts.warn(`image unreadable: ${src}`); + return buildMissingImagePlaceholder(src); + } + if (!stat.isFile()) { + const msg = `image is not a regular file: ${src}`; + if (opts.strict) throw new StrictModeError(msg); + opts.warn(msg); + return buildMissingImagePlaceholder(src); + } + if (stat.size > MAX_IMAGE_BYTES) { + const msg = `image exceeds ${Math.round(MAX_IMAGE_BYTES / 1024 / 1024)}MB cap: ${src} (${Math.round(stat.size / 1024 / 1024)}MB)`; if (opts.strict) throw new StrictModeError(msg); opts.warn(msg); return buildMissingImagePlaceholder(src); @@ -665,20 +704,17 @@ export function inlineLocalImages(html: string, opts: PrepassImageOptions): stri const attrs = dims ? ` data-gstack-px-width="${Math.round(dims.width)}" data-gstack-px-height="${Math.round(dims.height)}"` : ""; - memo.set(filePath, `${dataUri}${attrs}`); + memo.set(filePath, { dataUri, attrs }); return rewriteImgTag(tag, memo.get(filePath)!); }); } -/** Apply a memoized `dataUriattrs` payload to an img tag. */ -function rewriteImgTag(tag: string, memoEntry: string): string { - const sep = memoEntry.indexOf(""); - const dataUri = memoEntry.slice(0, sep); - const attrs = memoEntry.slice(sep + 1); +/** Apply a memoized inline result to an img tag. */ +function rewriteImgTag(tag: string, entry: { dataUri: string; attrs: string }): string { // Function replacement: data URIs are user-content-derived; string-form // replace() would expand $-patterns inside them. - let out = tag.replace(SRC_RE, () => `src="${dataUri}"`); - if (attrs) out = out.replace(/^ ` `src="${entry.dataUri}"`); + if (entry.attrs) out = out.replace(/^ `` + + `[remote image blocked (use --allow-network): ${escapeHtml(src)}]` + ); +} + +/** realpath that degrades to the input path when resolution fails. */ +function safeRealpath(p: string): string { + try { + return fs.realpathSync(p); + } catch { + return p; + } +} + function mimeFromExtension(p: string): string { switch (path.extname(p).toLowerCase()) { case ".png": return "image/png"; diff --git a/make-pdf/src/orchestrator.ts b/make-pdf/src/orchestrator.ts index 87e97f793..12a21570d 100644 --- a/make-pdf/src/orchestrator.ts +++ b/make-pdf/src/orchestrator.ts @@ -112,6 +112,10 @@ export async function generate(opts: GenerateOptions): Promise { confidential: opts.confidential, pageSize: opts.pageSize, margins: opts.margins, + marginTop: opts.marginTop, + marginRight: opts.marginRight, + marginBottom: opts.marginBottom, + marginLeft: opts.marginLeft, pageNumbers: opts.pageNumbers, footerTemplate: opts.footerTemplate, }); diff --git a/make-pdf/src/render.ts b/make-pdf/src/render.ts index 862efde39..514fbbc89 100644 --- a/make-pdf/src/render.ts +++ b/make-pdf/src/render.ts @@ -35,6 +35,14 @@ export interface RenderOptions { // Page layout pageSize?: "letter" | "a4" | "legal" | "tabloid"; margins?: string; + // Per-side margins (override `margins`). Must reach the CSS @page rule: + // when a landscape promotion flips preferCSSPageSize on, the CSS margins + // are the ones Chromium honors — dropping per-side flags there would + // silently change the whole document's layout (Codex P2). + marginTop?: string; + marginRight?: string; + marginBottom?: string; + marginLeft?: string; // Footer behavior. pageNumbers defaults to true. When footerTemplate is set, // CSS page numbers are suppressed so the custom Chromium footer wins cleanly. @@ -97,7 +105,9 @@ export function render(opts: RenderOptions): RenderResult { confidential: opts.confidential !== false, runningHeader: derivedTitle, pageSize: opts.pageSize, - margins: opts.margins, + // Compose per-side margins into the CSS shorthand so @page stays the + // single source of truth even under preferCSSPageSize. + margins: composeMargins(opts), pageNumbers: showPageNumbers, }; const css = printCss(cssOptions); @@ -114,11 +124,14 @@ export function render(opts: RenderOptions): RenderResult { // TOC anchors must resolve: assign id="toc-N" to each H1-H3 in the same // order buildTocBlock scans them, or every TOC link is a dead href (masked - // in PDFs by Chromium outline bookmarks, glaring in --to html). - const anchoredHtml = opts.toc ? addHeadingIds(typographicHtml) : typographicHtml; + // in PDFs by Chromium outline bookmarks, glaring in --to html). Headings + // that already carry an id keep it — the ids array records the ACTUAL id + // per heading so TOC entries always link to something real. + const anchored = opts.toc ? addHeadingIds(typographicHtml) : { html: typographicHtml, ids: [] }; + const anchoredHtml = anchored.html; const tocBlock = opts.toc - ? buildTocBlock(anchoredHtml) + ? buildTocBlock(anchoredHtml, anchored.ids) : ""; // Wrap body in .chapter sections at H1 boundaries if chapter breaks are on. @@ -267,13 +280,13 @@ function buildCoverBlock(opts: { * Page numbers are filled in by Paged.js (when --toc is passed and Paged.js * polyfill is injected). */ -function buildTocBlock(html: string): string { +function buildTocBlock(html: string, ids: string[] = []): string { const headings = extractHeadings(html); if (headings.length === 0) return ""; const items = headings.map((h, i) => { const level = h.level >= 2 ? "level-2" : "level-1"; - const id = `toc-${i}`; + const id = ids[i] ?? `toc-${i}`; return [ `
  • `, ` ${escapeHtml(h.text)}`, @@ -296,16 +309,23 @@ function buildTocBlock(html: string): string { /** * Assign id="toc-N" to every H1-H3 in document order — the same order * extractHeadings/buildTocBlock use, so anchors and entries line up by index. - * Headings that already carry an id keep it AND gain nothing (the TOC link - * targets toc-N, so we only skip tagging when one exists to avoid dupes). + * A heading that already carries an id keeps it, and the returned ids array + * records the actual id for that slot so the TOC links to the real anchor + * instead of a nonexistent toc-N. */ -function addHeadingIds(html: string): string { - let i = 0; - return html.replace(/<(h[1-3])([^>]*)>/gi, (full, tag: string, attrs: string) => { - const id = `toc-${i++}`; - if (/\bid\s*=/i.test(attrs)) return full; +function addHeadingIds(html: string): { html: string; ids: string[] } { + const ids: string[] = []; + const out = html.replace(/<(h[1-3])([^>]*)>/gi, (full, tag: string, attrs: string) => { + const existing = attrs.match(/\bid\s*=\s*["']([^"']*)["']/i)?.[1]; + if (existing) { + ids.push(existing); + return full; + } + const id = `toc-${ids.length}`; + ids.push(id); return `<${tag}${attrs} id="${id}">`; }); + return { html: out, ids }; } function extractHeadings(html: string): Array<{ level: number; text: string }> { @@ -378,6 +398,23 @@ function decodeTextEntities(s: string): string { .replace(/&/g, "&"); } +/** Compose `margin: top right bottom left` from per-side overrides + base. */ +function composeMargins(opts: { + margins?: string; marginTop?: string; marginRight?: string; + marginBottom?: string; marginLeft?: string; +}): string | undefined { + const base = opts.margins ?? "1in"; + if (!opts.marginTop && !opts.marginRight && !opts.marginBottom && !opts.marginLeft) { + return opts.margins; + } + return [ + opts.marginTop ?? base, + opts.marginRight ?? base, + opts.marginBottom ?? base, + opts.marginLeft ?? base, + ].join(" "); +} + function stripTags(html: string): string { return html.replace(/<[^>]+>/g, ""); } diff --git a/make-pdf/test/diagram-prepass.test.ts b/make-pdf/test/diagram-prepass.test.ts index c589dbdf1..eac3e645d 100644 --- a/make-pdf/test/diagram-prepass.test.ts +++ b/make-pdf/test/diagram-prepass.test.ts @@ -277,14 +277,47 @@ describe("inlineLocalImages", () => { ).toThrow(StrictModeError); }); - test("remote image warns and is left untouched (offline posture)", () => { + test("remote image is BLOCKED with a visible placeholder (offline posture)", () => { + // Leaving the tag would make Chromium fetch it at print time anyway — + // the offline posture must remove the src, not just warn about it. const warnings: string[] = []; const tag = ``; const out = inlineLocalImages(tag, { ...base, warn: (m) => warnings.push(m) }); - expect(out).toBe(tag); + expect(out).not.toContain("https://example.com/x.png\""); + expect(out).toContain("remote image blocked"); expect(warnings[0]).toContain("offline"); }); + test("symlink escaping the input dir is caught by the realpath check", () => { + const outside = fs.mkdtempSync(path.join(os.tmpdir(), "prepass-symlink-")); + fs.writeFileSync(path.join(outside, "secret.png"), tinyPng(5, 5)); + const link = path.join(dir, "innocent.png"); + try { + fs.symlinkSync(path.join(outside, "secret.png"), link); + const warnings: string[] = []; + inlineLocalImages(``, { ...base, warn: (m) => warnings.push(m) }); + expect(warnings.some((w) => w.includes("OUTSIDE the input directory"))).toBe(true); + } finally { + try { fs.unlinkSync(link); } catch { /* ignore */ } + fs.rmSync(outside, { recursive: true, force: true }); + } + }); + + test("special files and oversized images degrade to placeholders, never hang", () => { + // Directory masquerading as an image — not a regular file. + fs.mkdirSync(path.join(dir, "dir.png"), { recursive: true }); + const warnings: string[] = []; + const out = inlineLocalImages(``, { ...base, warn: (m) => warnings.push(m) }); + expect(out).toContain("image-missing"); + expect(warnings.some((w) => w.includes("not a regular file"))).toBe(true); + }); + + test("malformed percent-encoding degrades to missing-image, never throws", () => { + const warnings: string[] = []; + const out = inlineLocalImages(``, { ...base, warn: (m) => warnings.push(m) }); + expect(out).toContain("image-missing"); + }); + test("remote image + --allow-network passes silently", () => { const warnings: string[] = []; const tag = ``; diff --git a/make-pdf/test/render.test.ts b/make-pdf/test/render.test.ts index 4f5575b79..b54085ecb 100644 --- a/make-pdf/test/render.test.ts +++ b/make-pdf/test/render.test.ts @@ -264,6 +264,13 @@ describe("printCss", () => { expect(css).toContain("margin: 72pt"); }); + test("per-side margins reach the CSS @page rule (preferCSSPageSize parity)", () => { + // Under a landscape promotion Chromium honors the CSS margins, not the + // CDP per-side options — render() must compose them into the shorthand. + const r = render({ markdown: "# T", marginLeft: "0.5in", marginRight: "0.5in" }); + expect(r.printCss).toContain("margin: 1in 0.5in 1in 0.5in"); + }); + test("emits letter page size by default", () => { const css = printCss(); expect(css).toContain("size: letter");