From 9db479a38dbee03336835950abcc919947186548 Mon Sep 17 00:00:00 2001 From: Garry Tan Date: Fri, 12 Jun 2026 07:57:42 -0700 Subject: [PATCH] =?UTF-8?q?fix(make-pdf):=20pre-landing=20review=20wave=20?= =?UTF-8?q?=E2=80=94=20fence=20fidelity,=20injection=20hardening,=20Window?= =?UTF-8?q?s=20paths,=20transport=20rework?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Review army (6 specialists + red team) findings, all fixed: - Indented fences replay byte-for-byte and indented diagram fences are NOT extracted (red-team conf-9: the pre-pass reconstructed fences at column 0, splitting any list containing fenced code — every ordinary document). - String.replace $-pattern injection killed at every seam: substituteSlots, mergeStyle, img/src rewrites all use function replacements (a diagram label containing $' duplicated the document tail). - Big-expression transport reworked: browse `eval ` (one spawn, any size, Windows-safe) replaces the 64KB chunked window-buffer eval — fixes the per-chunk spawn cost, the char-vs-byte argv units, AND the Windows 32,767-char command-line ceiling in one move. - Staged-bundle trust: content verified by hash even when the file exists, and the rename-failure path re-hashes the survivor (sticky-bit /tmp EPERM would otherwise ride a pre-planted file past the check). - Windows drive-letter img srcs (C:/x.png) reach the local-path branch instead of being swallowed as unknown URL schemes. - DOCX rasterize-failure now embeds the decoded source as visible text — returning the figure made diagrams vanish silently (converter drops svg). - Fence source preserved as base64 data-gstack-source attribute (the comment encoding corrupted every '-->' arrow); decodeFigureSource() round-trips. - inlineLocalImages memoizes per path; file:// uses fileURLToPath; preview prints a divergence note for fences/local images; --to docx strips the watermark div and warns about print-only flags; TOC links resolve in html/docx (heading ids assigned); waitForExpression sleeps instead of busy-spinning; escapeHtml/svg-dims deduped to single definitions; typography stragglers (blockquote 12pt, footnotes 10pt, 42em screen measure); bundle BUILD_INFO gains srcSha256 for no-node_modules drift detection; MAX_TARGET_PX shared guard. Co-Authored-By: Claude Fable 5 --- lib/diagram-render/dist/BUILD_INFO.json | 5 +- lib/diagram-render/dist/diagram-render.html | 682 ++++++++++---------- lib/diagram-render/scripts/build.ts | 8 + lib/diagram-render/src/entry.ts | 16 +- make-pdf/src/browseClient.ts | 21 +- make-pdf/src/diagram-prepass.ts | 237 ++++--- make-pdf/src/image-policy.ts | 29 +- make-pdf/src/image-size.ts | 17 +- make-pdf/src/orchestrator.ts | 30 +- make-pdf/src/print-css.ts | 27 +- make-pdf/src/render.ts | 28 +- 11 files changed, 625 insertions(+), 475 deletions(-) diff --git a/lib/diagram-render/dist/BUILD_INFO.json b/lib/diagram-render/dist/BUILD_INFO.json index 5ae8527ce..b664e6309 100644 --- a/lib/diagram-render/dist/BUILD_INFO.json +++ b/lib/diagram-render/dist/BUILD_INFO.json @@ -1,7 +1,8 @@ { "name": "gstack-diagram-render", - "sha256": "0ee91aef5a8da85c8941c26ebf2991bbeba82412644bb070d5c5dd2e23538b81", - "bytes": 9645503, + "sha256": "da9c363071afbe79e06807bd1e67dbacc1123187db7b99e2608dd4a1a9567e94", + "srcSha256": "07238fae312bc0444f62b0a0a3404a8a38c45cef505aa1528c60a0ded17cbe06", + "bytes": 9645479, "bunVersion": "1.3.13", "deps": { "@excalidraw/excalidraw": "0.18.0", diff --git a/lib/diagram-render/dist/diagram-render.html b/lib/diagram-render/dist/diagram-render.html index e953a32ed..25de82b9f 100644 --- a/lib/diagram-render/dist/diagram-render.html +++ b/lib/diagram-render/dist/diagram-render.html @@ -20,11 +20,11 @@ window.addEventListener("unhandledrejection", function (e) {
loading
diff --git a/lib/diagram-render/scripts/build.ts b/lib/diagram-render/scripts/build.ts index b140bfd54..2bd2caf99 100644 --- a/lib/diagram-render/scripts/build.ts +++ b/lib/diagram-render/scripts/build.ts @@ -78,9 +78,17 @@ const html = head + inlineJs + tail; await Bun.write(DIST_HTML, html); const sha256 = createHash("sha256").update(html).digest("hex"); +// Source fingerprint: lets the drift test catch "edited src, forgot to +// rebuild dist" WITHOUT needing node_modules for a full rebuild (the deep +// rebuild check only runs where deps are installed). +const srcSha256 = createHash("sha256") + .update(await Bun.file(ENTRY).text()) + .update(await Bun.file(import.meta.path).text()) + .digest("hex"); const info = { name: "gstack-diagram-render", sha256, + srcSha256, bytes: Buffer.byteLength(html), bunVersion: Bun.version, deps, diff --git a/lib/diagram-render/src/entry.ts b/lib/diagram-render/src/entry.ts index c9e98707e..503c23ed3 100644 --- a/lib/diagram-render/src/entry.ts +++ b/lib/diagram-render/src/entry.ts @@ -100,10 +100,16 @@ window.__excalidrawToSvg = async (sceneJson: string): Promise => { * targetWidthPx = placed physical width (in) × 300dpi (eng-review D6.5) — * the bundle never guesses a viewport. */ -window.__rasterize = async (svgText: string, targetWidthPx: number): Promise => { - if (!(targetWidthPx > 0 && targetWidthPx <= 10000)) { - throw new Error(`targetWidthPx out of range: ${targetWidthPx}`); +/** Shared ceiling for rasterization targets (both window functions). */ +const MAX_TARGET_PX = 10_000; +function assertTargetWidth(px: number): void { + if (!(px > 0 && px <= MAX_TARGET_PX)) { + throw new Error(`targetWidthPx out of range: ${px}`); } +} + +window.__rasterize = async (svgText: string, targetWidthPx: number): Promise => { + assertTargetWidth(targetWidthPx); const blob = new Blob([svgText], { type: "image/svg+xml;charset=utf-8" }); const url = URL.createObjectURL(blob); try { @@ -164,9 +170,7 @@ window.__downscaleRaster = async ( targetWidthPx: number, mime: string, ): Promise => { - if (!(targetWidthPx > 0 && targetWidthPx <= 10000)) { - throw new Error(`targetWidthPx out of range: ${targetWidthPx}`); - } + assertTargetWidth(targetWidthPx); const img = new Image(); await new Promise((resolve, reject) => { img.onload = () => resolve(); diff --git a/make-pdf/src/browseClient.ts b/make-pdf/src/browseClient.ts index 9c70c716c..c8206c804 100644 --- a/make-pdf/src/browseClient.ts +++ b/make-pdf/src/browseClient.ts @@ -290,6 +290,19 @@ export function js(opts: JsOptions): string { ]).trim(); } +/** + * Evaluate a JS file in a tab (`browse eval `): the argv-safe transport + * for expressions too large for a command-line element. The file must live + * under browse's safe dirs (/tmp or cwd). + */ +export function evalFile(opts: { file: string; tabId: number }): string { + return runBrowse([ + "eval", + opts.file, + "--tab-id", String(opts.tabId), + ]).trim(); +} + /** * Poll a boolean JS expression until it evaluates to true, or timeout. * Returns true if it succeeded, false if timed out. @@ -311,9 +324,11 @@ export function waitForExpression(opts: { } const wait = Math.min(poll, Math.max(0, deadline - Date.now())); if (wait <= 0) break; - // Synchronous sleep is fine — this only runs once per PDF render - const end = Date.now() + wait; - while (Date.now() < end) { /* busy wait */ } + // Real sleep, not a busy-wait: this poll now runs on every diagram-render + // bundle load (and after every fence render error), exactly while Chromium + // is parsing a 9MB page on the same machine — spinning a core competes + // with the work being awaited. + Bun.sleepSync(wait); } return false; } diff --git a/make-pdf/src/diagram-prepass.ts b/make-pdf/src/diagram-prepass.ts index e8a40392c..361806799 100644 --- a/make-pdf/src/diagram-prepass.ts +++ b/make-pdf/src/diagram-prepass.ts @@ -28,9 +28,10 @@ import * as fs from "node:fs"; import * as os from "node:os"; import * as path from "node:path"; import * as crypto from "node:crypto"; +import { fileURLToPath } from "node:url"; import * as browseClient from "./browseClient"; -import { sanitizeUntrustedHtml } from "./render"; +import { escapeHtml, sanitizeUntrustedHtml } from "./render"; import { imageDims } from "./image-size"; // ─── Types ──────────────────────────────────────────────────────────── @@ -92,10 +93,17 @@ export class StrictModeError extends Error { const DIAGRAM_LANGS = new Set(["mermaid", "excalidraw"]); /** - * Extract top-level ```mermaid / ```excalidraw fences, replacing each with a + * Extract column-0 ```mermaid / ```excalidraw fences, replacing each with a * unique placeholder token paragraph. Backtick and tilde fences, any length * >= 3; closers must be at least as long as the opener (CommonMark). Fences - * with `render=false` in the info string are left untouched. + * with `render=false` are left untouched. + * + * Two deliberate conservatisms (red-team finding — the original version + * reconstructed fences at column 0 and restructured lists): + * - Non-diagram fences replay as their ORIGINAL raw lines, byte-for-byte + * (only a render=false flag is removed, in place, preserving indent). + * - INDENTED diagram fences (inside lists/quotes) are NOT extracted — a + * column-0 placeholder would split the list. They replay verbatim as code. */ export function extractDiagramFences(markdown: string): FenceExtraction { const lines = markdown.split("\n"); @@ -104,7 +112,10 @@ export function extractDiagramFences(markdown: string): FenceExtraction { const runId = crypto.randomBytes(4).toString("hex"); let i = 0; - let openFence: { char: string; len: number; info: string; body: string[] } | null = null; + let openFence: { + char: string; len: number; indent: number; info: string; + rawOpener: string; body: string[]; + } | null = null; let ordinal = 0; while (i < lines.length) { @@ -114,7 +125,7 @@ export function extractDiagramFences(markdown: string): FenceExtraction { const close = matchFenceLine(line); if (close && close.char === openFence.char && close.len >= openFence.len && close.info === "") { const info = parseInfoString(openFence.info); - if (DIAGRAM_LANGS.has(info.lang) && info.render) { + if (DIAGRAM_LANGS.has(info.lang) && info.render && openFence.indent === 0) { ordinal++; const token = `gstack-diagram-slot-${runId}-${ordinal}`; fences.push({ @@ -128,10 +139,9 @@ export function extractDiagramFences(markdown: string): FenceExtraction { }); out.push("", token, ""); } else { - // Not a diagram fence (or render=false): replay verbatim, but strip - // the render=false flag so it never leaks into highlighted output. - const infoOut = info.render ? openFence.info : info.lang; - out.push(`${openFence.char.repeat(openFence.len)}${infoOut}`); + // Not extracted (other language, render=false, or indented): replay + // the ORIGINAL lines verbatim; only strip a render=false flag. + out.push(stripRenderFalse(openFence.rawOpener)); out.push(...openFence.body); out.push(line); } @@ -146,7 +156,7 @@ export function extractDiagramFences(markdown: string): FenceExtraction { const open = matchFenceLine(line); if (open && open.info !== "") { - openFence = { char: open.char, len: open.len, info: open.info, body: [] }; + openFence = { ...open, rawOpener: line, body: [] }; i++; continue; } @@ -171,17 +181,22 @@ export function extractDiagramFences(markdown: string): FenceExtraction { // Unclosed fence at EOF: replay verbatim (CommonMark treats it as code to EOF). if (openFence) { - out.push(`${openFence.char.repeat(openFence.len)}${openFence.info}`); + out.push(openFence.rawOpener); out.push(...openFence.body); } return { markdown: out.join("\n"), fences }; } -function matchFenceLine(line: string): { char: string; len: number; info: string } | null { - const m = line.match(/^ {0,3}(`{3,}|~{3,})\s*(.*)$/); +function matchFenceLine(line: string): { char: string; len: number; indent: number; info: string } | null { + const m = line.match(/^( {0,3})(`{3,}|~{3,})\s*(.*)$/); if (!m) return null; - return { char: m[1][0], len: m[1].length, info: m[2].trim() }; + return { indent: m[1].length, char: m[2][0], len: m[2].length, info: m[3].trim() }; +} + +/** Remove a render=false flag from a raw opener line, preserving everything else. */ +function stripRenderFalse(rawOpener: string): string { + return rawOpener.replace(/\s*\brender\s*=\s*false\b/i, ""); } /** Parse a fence info string: `mermaid`, `mermaid render=false`, @@ -208,12 +223,12 @@ export function parseInfoString(info: string): { export function substituteSlots(html: string, slots: Map): string { let s = html; for (const [token, slotHtml] of slots) { + // Function replacement is load-bearing: slot HTML carries user/LLM-authored + // diagram label text, and string-form replace() expands $&, $', $` patterns + // inside it — a label containing "$'" would duplicate the document tail. const wrapped = new RegExp(`

\\s*${token}\\s*

`, "g"); - if (wrapped.test(s)) { - s = s.replace(new RegExp(`

\\s*${token}\\s*

`, "g"), slotHtml); - } else { - s = s.split(token).join(slotHtml); - } + const replaced = s.replace(wrapped, () => slotHtml); + s = replaced !== s ? replaced : s.split(token).join(slotHtml); } return s; } @@ -227,14 +242,19 @@ export function buildDiagnosticBlock(fence: DiagramFence, errorMessage: string): const excerpt = fence.source.split("\n").slice(0, 8).join("\n"); const truncated = fence.source.split("\n").length > 8 ? "\n…" : ""; return [ - `