fix(make-pdf): pre-landing review wave — fence fidelity, injection hardening, Windows paths, transport rework

Review army (6 specialists + red team) findings, all fixed: - Indented fences replay byte-for-byte and indented diagram fences are NOT extracted (red-team conf-9: the pre-pass reconstructed fences at column 0, splitting any list containing fenced code — every ordinary document). - String.replace $-pattern injection killed at every seam: substituteSlots, mergeStyle, img/src rewrites all use function replacements (a diagram label containing $' duplicated the document tail). - Big-expression transport reworked: browse `eval <file>` (one spawn, any size, Windows-safe) replaces the 64KB chunked window-buffer eval — fixes the per-chunk spawn cost, the char-vs-byte argv units, AND the Windows 32,767-char command-line ceiling in one move. - Staged-bundle trust: content verified by hash even when the file exists, and the rename-failure path re-hashes the survivor (sticky-bit /tmp EPERM would otherwise ride a pre-planted file past the check). - Windows drive-letter img srcs (C:/x.png) reach the local-path branch instead of being swallowed as unknown URL schemes. - DOCX rasterize-failure now embeds the decoded source as visible text — returning the figure made diagrams vanish silently (converter drops svg). - Fence source preserved as base64 data-gstack-source attribute (the comment encoding corrupted every '-->' arrow); decodeFigureSource() round-trips. - inlineLocalImages memoizes per path; file:// uses fileURLToPath; preview prints a divergence note for fences/local images; --to docx strips the watermark div and warns about print-only flags; TOC links resolve in html/docx (heading ids assigned); waitForExpression sleeps instead of busy-spinning; escapeHtml/svg-dims deduped to single definitions; typography stragglers (blockquote 12pt, footnotes 10pt, 42em screen measure); bundle BUILD_INFO gains srcSha256 for no-node_modules drift detection; MAX_TARGET_PX shared guard. Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
2026-08-02 12:28:36 +02:00 · 2026-06-12 07:57:42 -07:00
parent 0b7b5ee0f7
commit 9db479a38d
11 changed files with 625 additions and 475 deletions
@@ -1,7 +1,8 @@
 {
  "name": "gstack-diagram-render",
-  "sha256": "0ee91aef5a8da85c8941c26ebf2991bbeba82412644bb070d5c5dd2e23538b81",
-  "bytes": 9645503,
+  "sha256": "da9c363071afbe79e06807bd1e67dbacc1123187db7b99e2608dd4a1a9567e94",
+  "srcSha256": "07238fae312bc0444f62b0a0a3404a8a38c45cef505aa1528c60a0ded17cbe06",
+  "bytes": 9645479,
  "bunVersion": "1.3.13",
  "deps": {
    "@excalidraw/excalidraw": "0.18.0",
@@ -78,9 +78,17 @@ const html = head + inlineJs + tail;
 await Bun.write(DIST_HTML, html);

 const sha256 = createHash("sha256").update(html).digest("hex");
+// Source fingerprint: lets the drift test catch "edited src, forgot to
+// rebuild dist" WITHOUT needing node_modules for a full rebuild (the deep
+// rebuild check only runs where deps are installed).
+const srcSha256 = createHash("sha256")
+  .update(await Bun.file(ENTRY).text())
+  .update(await Bun.file(import.meta.path).text())
+  .digest("hex");
 const info = {
  name: "gstack-diagram-render",
  sha256,
+  srcSha256,
  bytes: Buffer.byteLength(html),
  bunVersion: Bun.version,
  deps,
@@ -100,10 +100,16 @@ window.__excalidrawToSvg = async (sceneJson: string): Promise<string> => {
 * targetWidthPx = placed physical width (in) × 300dpi (eng-review D6.5) —
 * the bundle never guesses a viewport.
 */
-window.__rasterize = async (svgText: string, targetWidthPx: number): Promise<string> => {
-  if (!(targetWidthPx > 0 && targetWidthPx <= 10000)) {
-    throw new Error(`targetWidthPx out of range: ${targetWidthPx}`);
+/** Shared ceiling for rasterization targets (both window functions). */
+const MAX_TARGET_PX = 10_000;
+function assertTargetWidth(px: number): void {
+  if (!(px > 0 && px <= MAX_TARGET_PX)) {
+    throw new Error(`targetWidthPx out of range: ${px}`);
  }
+}
+
+window.__rasterize = async (svgText: string, targetWidthPx: number): Promise<string> => {
+  assertTargetWidth(targetWidthPx);
  const blob = new Blob([svgText], { type: "image/svg+xml;charset=utf-8" });
  const url = URL.createObjectURL(blob);
  try {
@@ -164,9 +170,7 @@ window.__downscaleRaster = async (
  targetWidthPx: number,
  mime: string,
 ): Promise<string> => {
-  if (!(targetWidthPx > 0 && targetWidthPx <= 10000)) {
-    throw new Error(`targetWidthPx out of range: ${targetWidthPx}`);
-  }
+  assertTargetWidth(targetWidthPx);
  const img = new Image();
  await new Promise<void>((resolve, reject) => {
    img.onload = () => resolve();
@@ -290,6 +290,19 @@ export function js(opts: JsOptions): string {
  ]).trim();
 }

+/**
+ * Evaluate a JS file in a tab (`browse eval <file>`): the argv-safe transport
+ * for expressions too large for a command-line element. The file must live
+ * under browse's safe dirs (/tmp or cwd).
+ */
+export function evalFile(opts: { file: string; tabId: number }): string {
+  return runBrowse([
+    "eval",
+    opts.file,
+    "--tab-id", String(opts.tabId),
+  ]).trim();
+}
+
 /**
 * Poll a boolean JS expression until it evaluates to true, or timeout.
 * Returns true if it succeeded, false if timed out.
@@ -311,9 +324,11 @@ export function waitForExpression(opts: {
    }
    const wait = Math.min(poll, Math.max(0, deadline - Date.now()));
    if (wait <= 0) break;
-    // Synchronous sleep is fine — this only runs once per PDF render
-    const end = Date.now() + wait;
-    while (Date.now() < end) { /* busy wait */ }
+    // Real sleep, not a busy-wait: this poll now runs on every diagram-render
+    // bundle load (and after every fence render error), exactly while Chromium
+    // is parsing a 9MB page on the same machine — spinning a core competes
+    // with the work being awaited.
+    Bun.sleepSync(wait);
  }
  return false;
 }
@@ -28,9 +28,10 @@ import * as fs from "node:fs";
 import * as os from "node:os";
 import * as path from "node:path";
 import * as crypto from "node:crypto";
+import { fileURLToPath } from "node:url";

 import * as browseClient from "./browseClient";
-import { sanitizeUntrustedHtml } from "./render";
+import { escapeHtml, sanitizeUntrustedHtml } from "./render";
 import { imageDims } from "./image-size";

 // ─── Types ────────────────────────────────────────────────────────────
@@ -92,10 +93,17 @@ export class StrictModeError extends Error {
 const DIAGRAM_LANGS = new Set(["mermaid", "excalidraw"]);

 /**
- * Extract top-level ```mermaid / ```excalidraw fences, replacing each with a
+ * Extract column-0 ```mermaid / ```excalidraw fences, replacing each with a
 * unique placeholder token paragraph. Backtick and tilde fences, any length
 * >= 3; closers must be at least as long as the opener (CommonMark). Fences
- * with `render=false` in the info string are left untouched.
+ * with `render=false` are left untouched.
+ *
+ * Two deliberate conservatisms (red-team finding — the original version
+ * reconstructed fences at column 0 and restructured lists):
+ *  - Non-diagram fences replay as their ORIGINAL raw lines, byte-for-byte
+ *    (only a render=false flag is removed, in place, preserving indent).
+ *  - INDENTED diagram fences (inside lists/quotes) are NOT extracted — a
+ *    column-0 placeholder would split the list. They replay verbatim as code.
 */
 export function extractDiagramFences(markdown: string): FenceExtraction {
  const lines = markdown.split("\n");
@@ -104,7 +112,10 @@ export function extractDiagramFences(markdown: string): FenceExtraction {
  const runId = crypto.randomBytes(4).toString("hex");

  let i = 0;
-  let openFence: { char: string; len: number; info: string; body: string[] } | null = null;
+  let openFence: {
+    char: string; len: number; indent: number; info: string;
+    rawOpener: string; body: string[];
+  } | null = null;
  let ordinal = 0;

  while (i < lines.length) {
@@ -114,7 +125,7 @@ export function extractDiagramFences(markdown: string): FenceExtraction {
      const close = matchFenceLine(line);
      if (close && close.char === openFence.char && close.len >= openFence.len && close.info === "") {
        const info = parseInfoString(openFence.info);
-        if (DIAGRAM_LANGS.has(info.lang) && info.render) {
+        if (DIAGRAM_LANGS.has(info.lang) && info.render && openFence.indent === 0) {
          ordinal++;
          const token = `gstack-diagram-slot-${runId}-${ordinal}`;
          fences.push({
@@ -128,10 +139,9 @@ export function extractDiagramFences(markdown: string): FenceExtraction {
          });
          out.push("", token, "");
        } else {
-          // Not a diagram fence (or render=false): replay verbatim, but strip
-          // the render=false flag so it never leaks into highlighted output.
-          const infoOut = info.render ? openFence.info : info.lang;
-          out.push(`${openFence.char.repeat(openFence.len)}${infoOut}`);
+          // Not extracted (other language, render=false, or indented): replay
+          // the ORIGINAL lines verbatim; only strip a render=false flag.
+          out.push(stripRenderFalse(openFence.rawOpener));
          out.push(...openFence.body);
          out.push(line);
        }
@@ -146,7 +156,7 @@ export function extractDiagramFences(markdown: string): FenceExtraction {

    const open = matchFenceLine(line);
    if (open && open.info !== "") {
-      openFence = { char: open.char, len: open.len, info: open.info, body: [] };
+      openFence = { ...open, rawOpener: line, body: [] };
      i++;
      continue;
    }
@@ -171,17 +181,22 @@ export function extractDiagramFences(markdown: string): FenceExtraction {

  // Unclosed fence at EOF: replay verbatim (CommonMark treats it as code to EOF).
  if (openFence) {
-    out.push(`${openFence.char.repeat(openFence.len)}${openFence.info}`);
+    out.push(openFence.rawOpener);
    out.push(...openFence.body);
  }

  return { markdown: out.join("\n"), fences };
 }

-function matchFenceLine(line: string): { char: string; len: number; info: string } | null {
-  const m = line.match(/^ {0,3}(`{3,}|~{3,})\s*(.*)$/);
+function matchFenceLine(line: string): { char: string; len: number; indent: number; info: string } | null {
+  const m = line.match(/^( {0,3})(`{3,}|~{3,})\s*(.*)$/);
  if (!m) return null;
-  return { char: m[1][0], len: m[1].length, info: m[2].trim() };
+  return { indent: m[1].length, char: m[2][0], len: m[2].length, info: m[3].trim() };
+}
+
+/** Remove a render=false flag from a raw opener line, preserving everything else. */
+function stripRenderFalse(rawOpener: string): string {
+  return rawOpener.replace(/\s*\brender\s*=\s*false\b/i, "");
 }

 /** Parse a fence info string: `mermaid`, `mermaid render=false`,
@@ -208,12 +223,12 @@ export function parseInfoString(info: string): {
 export function substituteSlots(html: string, slots: Map<string, string>): string {
  let s = html;
  for (const [token, slotHtml] of slots) {
+    // Function replacement is load-bearing: slot HTML carries user/LLM-authored
+    // diagram label text, and string-form replace() expands $&, $', $` patterns
+    // inside it — a label containing "$'" would duplicate the document tail.
    const wrapped = new RegExp(`<p>\\s*${token}\\s*</p>`, "g");
-    if (wrapped.test(s)) {
-      s = s.replace(new RegExp(`<p>\\s*${token}\\s*</p>`, "g"), slotHtml);
-    } else {
-      s = s.split(token).join(slotHtml);
-    }
+    const replaced = s.replace(wrapped, () => slotHtml);
+    s = replaced !== s ? replaced : s.split(token).join(slotHtml);
  }
  return s;
 }
@@ -227,14 +242,19 @@ export function buildDiagnosticBlock(fence: DiagramFence, errorMessage: string):
  const excerpt = fence.source.split("\n").slice(0, 8).join("\n");
  const truncated = fence.source.split("\n").length > 8 ? "\n…" : "";
  return [
-    `<figure class="diagram diagram-error" role="img" aria-label="${escapeAttr(diagramLabel(fence))} (failed to render)">`,
+    `<figure class="diagram diagram-error" role="img" aria-label="${escapeHtml(diagramLabel(fence))} (failed to render)">`,
    `<figcaption class="diagram-error-title">Diagram failed to render (${escapeHtml(fence.lang)})</figcaption>`,
    `<pre class="diagram-error-detail">${escapeHtml(errorMessage.trim())}\n\n${escapeHtml(excerpt + truncated)}</pre>`,
    `</figure>`,
  ].join("\n");
 }

-/** Wrap a rendered SVG in an accessible figure (D6.4). */
+/**
+ * Wrap a rendered SVG in an accessible figure (D6.4). The raw fence source is
+ * preserved base64-encoded in a data attribute — an HTML comment would need
+ * `--` escaping, which corrupts every mermaid arrow (`-->`) and breaks
+ * round-trip recovery.
+ */
 export function buildDiagramFigure(fence: DiagramFence, svg: string): string {
  const label = diagramLabel(fence);
  const cleanSvg = sanitizeUntrustedHtml(svg);
@@ -242,17 +262,27 @@ export function buildDiagramFigure(fence: DiagramFence, svg: string): string {
    ? `\n<figcaption class="diagram-caption">${escapeHtml(fence.title)}</figcaption>`
    : "";
  const pageAttr = fence.page ? ` data-gstack-page="${fence.page}"` : "";
+  const sourceB64 = Buffer.from(fence.source, "utf8").toString("base64");
  return [
-    `<figure class="diagram" role="img" aria-label="${escapeAttr(label)}"${pageAttr}>`,
-    `<!-- gstack-diagram-source lang=${escapeAttr(fence.lang)}`,
-    escapeHtmlComment(fence.source),
-    `-->`,
+    `<figure class="diagram" role="img" aria-label="${escapeHtml(label)}"${pageAttr}` +
+      ` data-gstack-lang="${escapeHtml(fence.lang)}" data-gstack-source="${sourceB64}">`,
    cleanSvg,
    captioned,
    `</figure>`,
  ].join("\n");
 }

+/** Recover the original fence source from a rendered figure (round-trip). */
+export function decodeFigureSource(figureHtml: string): string | null {
+  const m = figureHtml.match(/\bdata-gstack-source="([A-Za-z0-9+/=]*)"/);
+  if (!m) return null;
+  try {
+    return Buffer.from(m[1], "base64").toString("utf8");
+  } catch {
+    return null;
+  }
+}
+
 function diagramLabel(fence: DiagramFence): string {
  return fence.title ?? `diagram ${fence.ordinal}`;
 }
@@ -261,6 +291,11 @@ function diagramLabel(fence: DiagramFence): string {

 const PAYLOAD_TMP_DIR = process.platform === "win32" ? os.tmpdir() : "/tmp";
 const READY_TIMEOUT_MS = 20_000;
+// Expressions bigger than this ship via `browse eval <file>` instead of argv.
+// 8KB is safe on every platform (Windows CreateProcess caps the WHOLE command
+// line at 32,767 chars; Linux MAX_ARG_STRLEN is ~128KiB) and the tmp-file
+// round-trip costs microseconds — one spawn regardless of payload size.
+const MAX_ARGV_EXPR_BYTES = 8_000;

 export class RenderTab {
  private constructor(
@@ -279,14 +314,37 @@ export class RenderTab {
    const html = fs.readFileSync(bundleSrc);
    const sha = crypto.createHash("sha256").update(html).digest("hex").slice(0, 16);
    const staged = path.join(PAYLOAD_TMP_DIR, `gstack-diagram-render-${sha}.html`);
-    if (!fs.existsSync(staged)) {
+    // Never trust an existing file at the predictable shared-/tmp name: verify
+    // its content hash and re-stage on mismatch (a pre-planted file would
+    // otherwise be loaded into the render tab as the bundle).
+    let needsWrite = true;
+    if (fs.existsSync(staged)) {
+      try {
+        const existing = crypto.createHash("sha256").update(fs.readFileSync(staged)).digest("hex").slice(0, 16);
+        needsWrite = existing !== sha;
+      } catch {
+        needsWrite = true;
+      }
+    }
+    if (needsWrite) {
      // Concurrent-safe: write to a unique temp name, then atomic rename.
      const tmp = `${staged}.${process.pid}.${crypto.randomBytes(4).toString("hex")}`;
      fs.writeFileSync(tmp, html);
      try {
        fs.renameSync(tmp, staged);
-      } catch {
-        fs.unlinkSync(tmp); // another process won the race — theirs is identical
+      } catch (renameErr) {
+        try { fs.unlinkSync(tmp); } catch { /* best-effort tmp cleanup */ }
+        // Only swallow the rename failure when the surviving file HASHES to
+        // the expected bundle (a concurrent writer won an OS-level race).
+        // Sticky-bit /tmp makes rename-over-foreign-file fail EPERM — if the
+        // survivor were trusted on existence alone, a pre-planted file would
+        // ride through the exact check added to stop it.
+        let survivorOk = false;
+        try {
+          const survivor = crypto.createHash("sha256").update(fs.readFileSync(staged)).digest("hex").slice(0, 16);
+          survivorOk = survivor === sha;
+        } catch { /* unreadable survivor = not ok */ }
+        if (!survivorOk) throw renameErr;
      }
    }
    const tabId = browseClient.newtab();
@@ -330,36 +388,29 @@ export class RenderTab {

  private js(expression: string): string {
    // Large payloads (scene JSON, SVG text, data URIs) blow past argv limits —
-    // browseClient.js shells out with the expression as an argv element, so
-    // stage anything big through a tmp file the page can fetch? No: file URLs
-    // are unreachable from the page. Instead, chunk through a window buffer.
-    if (expression.length <= 100_000) {
+    // browseClient.js shells out with the expression as an argv element. The
+    // limit is BYTES, not chars (CJK content is 3x its char count in UTF-8),
+    // and Windows caps the whole command line at 32,767 chars — so anything
+    // big ships via `browse eval <file>` instead: one spawn, any size.
+    if (Buffer.byteLength(expression, "utf8") <= MAX_ARGV_EXPR_BYTES) {
      return browseClient.js({ expression, tabId: this.tabId });
    }
-    return this.jsViaBuffer(expression);
+    return this.jsViaFile(expression);
  }

-  /**
-   * argv-safe path for big expressions: ship the expression into the page in
-   * 64KB chunks (window.__exprBuf), then eval it there. Used for multi-MB
-   * data URIs (photo downscaling) where a single argv would exceed OS limits.
-   */
-  private jsViaBuffer(expression: string): string {
-    browseClient.js({ expression: "window.__exprBuf = ''", tabId: this.tabId });
-    const CHUNK = 64_000;
-    for (let i = 0; i < expression.length; i += CHUNK) {
-      const chunk = expression.slice(i, i + CHUNK);
-      browseClient.js({
-        expression: `window.__exprBuf += ${JSON.stringify(chunk)}, window.__exprBuf.length`,
-        tabId: this.tabId,
-      });
+  /** argv-safe path for big expressions: stage to a tmp file under browse's
+   *  safe dirs and run `browse eval <file>` (one spawn regardless of size). */
+  private jsViaFile(expression: string): string {
+    const file = path.join(
+      PAYLOAD_TMP_DIR,
+      `gstack-diagram-expr-${process.pid}-${crypto.randomBytes(4).toString("hex")}.js`,
+    );
+    fs.writeFileSync(file, expression, "utf8");
+    try {
+      return browseClient.evalFile({ file, tabId: this.tabId });
+    } finally {
+      try { fs.unlinkSync(file); } catch { /* best-effort tmp cleanup */ }
    }
-    // Eval the buffer as a single expression so the resulting promise is the
-    // statement value browse awaits. The buffer resets at the next call.
-    return browseClient.js({
-      expression: `(0, eval)(window.__exprBuf)`,
-      tabId: this.tabId,
-    });
  }

  close(): void {
@@ -466,21 +517,31 @@ export function rasterizeDiagramFigures(
        const png = tab.call("__rasterize", svgMatch[0], targetPx);
        return `<p><img src="${png}" alt="${label}"></p>`;
      } catch (err: any) {
-        warn(`docx: diagram rasterization failed (${firstLine(err?.message ?? String(err))}); keeping source text`);
-        return figure;
+        const reason = firstLine(err?.message ?? String(err));
+        warn(`docx: diagram rasterization failed (${reason}); embedding source text instead`);
+        // The converter drops <figure>/<svg> entirely, so returning the figure
+        // would make the diagram vanish without a trace — the exact invisible
+        // failure the diagnostic contract forbids. Surface the source.
+        const source = decodeFigureSource(figure) ?? "(source unavailable)";
+        return [
+          `<p><strong>Diagram could not be rasterized for DOCX (${escapeHtml(reason)}) — source:</strong></p>`,
+          `<pre>${escapeHtml(source)}</pre>`,
+        ].join("\n");
      }
    },
  );

  // 2. SVG data-URI images (inlined .svg files) → PNG.
  out = out.replace(/<img\b[^>]*>/gi, (tag) => {
-    const src = tag.match(SRC_RE)?.[2] ?? tag.match(SRC_RE)?.[3] ?? "";
+    const m = tag.match(SRC_RE);
+    const src = m?.[2] ?? m?.[3] ?? "";
    if (!src.startsWith("data:image/svg+xml")) return tag;
    try {
      const b64 = src.slice(src.indexOf(",") + 1);
      const svgText = Buffer.from(b64, "base64").toString("utf8");
      const png = tab.call("__rasterize", svgText, targetPx);
-      return tag.replace(SRC_RE, `src="${png}"`);
+      // Function replacement: data URIs can contain $-patterns.
+      return tag.replace(SRC_RE, () => `src="${png}"`);
    } catch (err: any) {
      warn(`docx: svg image rasterization failed (${firstLine(err?.message ?? String(err))})`);
      return tag;
@@ -521,6 +582,9 @@ const SRC_RE = /\bsrc\s*=\s*("([^"]*)"|'([^']*)')/i;
 export function inlineLocalImages(html: string, opts: PrepassImageOptions): string {
  const maxPx = Math.round(opts.contentWidthIn * PRINT_DPI * DOWNSCALE_FACTOR);
  const targetPx = Math.round(opts.contentWidthIn * PRINT_DPI);
+  // An image referenced N times is read/probed/downscaled once; the same data
+  // URI string is reused (also dedupes memory until the final join).
+  const memo = new Map<string, string>();

  return html.replace(IMG_TAG_RE, (tag) => {
    const srcMatch = tag.match(SRC_RE);
@@ -529,7 +593,11 @@ export function inlineLocalImages(html: string, opts: PrepassImageOptions): stri

    if (src.startsWith("data:")) return annotateFromDataUri(tag, src);

-    if (/^[a-z][a-z0-9+.-]*:/i.test(src)) {
+    // Windows drive-letter paths (C:/x.png, C:\x.png) look like single-letter
+    // URL schemes — they are local paths, not URLs.
+    const isDrivePath = /^[a-zA-Z]:[\\/]/.test(src);
+
+    if (!isDrivePath && /^[a-z][a-z0-9+.-]*:/i.test(src)) {
      // Absolute URL with a scheme (http, https, file, …)
      if (opts.allowNetwork && /^https?:/i.test(src)) return tag;
      if (/^https?:/i.test(src)) {
@@ -543,8 +611,13 @@ export function inlineLocalImages(html: string, opts: PrepassImageOptions): stri
    }

    const filePath = src.startsWith("file:")
-      ? decodeURIComponent(new URL(src).pathname)
-      : path.resolve(opts.inputDir, decodeURIComponent(src));
+      ? fileURLToPath(src)
+      : isDrivePath
+        ? path.resolve(src)
+        : path.resolve(opts.inputDir, decodeURIComponent(src));
+
+    const cached = memo.get(filePath);
+    if (cached !== undefined) return rewriteImgTag(tag, cached);

    if (!fs.existsSync(filePath)) {
      const msg = `image not found: ${src} (resolved to ${filePath})`;
@@ -579,17 +652,26 @@ export function inlineLocalImages(html: string, opts: PrepassImageOptions): stri
    }

    const dataUri = `data:${mime};base64,${buf.toString("base64")}`;
-    let newTag = tag.replace(SRC_RE, `src="${dataUri}"`);
-    if (dims) {
-      newTag = newTag.replace(
-        /^<img\b/i,
-        `<img data-gstack-px-width="${Math.round(dims.width)}" data-gstack-px-height="${Math.round(dims.height)}"`,
-      );
-    }
-    return newTag;
+    const attrs = dims
+      ? ` data-gstack-px-width="${Math.round(dims.width)}" data-gstack-px-height="${Math.round(dims.height)}"`
+      : "";
+    memo.set(filePath, `${dataUri}${attrs}`);
+    return rewriteImgTag(tag, memo.get(filePath)!);
  });
 }

+/** Apply a memoized `dataUriattrs` payload to an img tag. */
+function rewriteImgTag(tag: string, memoEntry: string): string {
+  const sep = memoEntry.indexOf("");
+  const dataUri = memoEntry.slice(0, sep);
+  const attrs = memoEntry.slice(sep + 1);
+  // Function replacement: data URIs are user-content-derived; string-form
+  // replace() would expand $-patterns inside them.
+  let out = tag.replace(SRC_RE, () => `src="${dataUri}"`);
+  if (attrs) out = out.replace(/^<img\b/i, () => `<img${attrs}`);
+  return out;
+}
+
 function annotateFromDataUri(tag: string, src: string): string {
  try {
    const b64 = src.slice(src.indexOf(",") + 1);
@@ -695,24 +777,7 @@ export function landscapeContentBox(opts: {
 }

 // ─── tiny helpers ─────────────────────────────────────────────────────
-
-function escapeHtml(s: string): string {
-  return s
-    .replace(/&/g, "&amp;")
-    .replace(/</g, "&lt;")
-    .replace(/>/g, "&gt;")
-    .replace(/"/g, "&quot;")
-    .replace(/'/g, "&#39;");
-}
-
-function escapeAttr(s: string): string {
-  return escapeHtml(s);
-}
-
-/** Comments may not contain `--`; encode it so the raw source survives. */
-function escapeHtmlComment(s: string): string {
-  return s.replace(/--/g, "-‐");
-}
+// escapeHtml is imported from ./render — single definition, no drift.

 function firstLine(s: string): string {
  return s.split("\n")[0].slice(0, 200);
@@ -34,6 +34,8 @@
 *     passes preferCSSPageSize — the orchestrator sets it when hasLandscape.
 */

+import { svgTagDims } from "./image-size";
+
 export interface ImagePolicyOptions {
  /** Physical content-box width in inches (page width minus margins). */
  contentWidthIn: number;
@@ -207,24 +209,8 @@ function decideDiagramPromotion(figure: string, widthThresholdPx: number): Promo
  return { promote: true, reason: `wide diagram (${Math.round(dims.width)}px)` };
 }

-/**
- * Best-effort CSS-px dimensions of the first <svg> in a figure: explicit
- * width/height attributes (px or unitless) first, else viewBox.
- */
-function svgCssDims(figure: string): { width: number; height: number } | null {
-  const tag = figure.match(/<svg\b[^>]*>/i)?.[0];
-  if (!tag) return null;
-  const attrNum = (name: string): number | null => {
-    const m = tag.match(new RegExp(`\\b${name}\\s*=\\s*["']\\s*([0-9.]+)(px)?\\s*["']`, "i"));
-    return m ? parseFloat(m[1]) : null;
-  };
-  const w = attrNum("width");
-  const h = attrNum("height");
-  if (w && h) return { width: w, height: h };
-  const vb = tag.match(/\bviewBox\s*=\s*["']\s*[-0-9.]+[\s,]+[-0-9.]+[\s,]+([0-9.]+)[\s,]+([0-9.]+)\s*["']/i);
-  if (vb) return { width: parseFloat(vb[1]), height: parseFloat(vb[2]) };
-  return null;
-}
+/** SVG dimension probing is shared with the byte prober — see image-size.ts. */
+const svgCssDims = svgTagDims;

 function attrValue(tag: string, name: string): string | null {
  const m = tag.match(new RegExp(`\\b${name}\\s*=\\s*"([^"]*)"`, "i"))
@@ -241,7 +227,10 @@ function num(s: string | null): number | null {
 function mergeStyle(tag: string, css: string): string {
  const existing = attrValue(tag, "style");
  if (existing !== null) {
-    return tag.replace(/\bstyle\s*=\s*(".*?"|'.*?')/i, `style="${existing.replace(/"/g, "")}; ${css}"`);
+    // Function replacement (no $-pattern expansion from user-controlled style
+    // values) and the existing declarations are preserved verbatim — attrValue
+    // already returned the unquoted inner value.
+    return tag.replace(/\bstyle\s*=\s*(".*?"|'.*?')/i, () => `style="${existing}; ${css}"`);
  }
-  return tag.replace(/^<img\b/i, `<img style="${css}"`);
+  return tag.replace(/^<img\b/i, () => `<img style="${css}"`);
 }
@@ -91,7 +91,18 @@ function webpDims(b: Buffer): ImageDims | null {
 */
 function svgDims(b: Buffer): ImageDims | null {
  const head = b.toString("utf8", 0, Math.min(b.length, 4096));
-  const tag = head.match(/<svg\b[^>]*>/i)?.[0];
+  const dims = svgTagDims(head);
+  return dims ? { ...dims, mime: "image/svg+xml" } : null;
+}
+
+/**
+ * CSS-px dimensions of the first <svg> element in a markup string: explicit
+ * width/height attributes (px or unitless) first, else viewBox. Shared by the
+ * byte prober above and image-policy's diagram-figure measurements — one
+ * regex, no drift.
+ */
+export function svgTagDims(markup: string): { width: number; height: number } | null {
+  const tag = markup.match(/<svg\b[^>]*>/i)?.[0];
  if (!tag) return null;
  const attr = (name: string): number | null => {
    const m = tag.match(new RegExp(`\\b${name}\\s*=\\s*["']\\s*([0-9.]+)(px)?\\s*["']`, "i"));
@@ -99,8 +110,8 @@ function svgDims(b: Buffer): ImageDims | null {
  };
  const w = attr("width");
  const h = attr("height");
-  if (w && h) return { width: w, height: h, mime: "image/svg+xml" };
+  if (w && h) return { width: w, height: h };
  const vb = tag.match(/\bviewBox\s*=\s*["']\s*[-0-9.]+[\s,]+[-0-9.]+[\s,]+([0-9.]+)[\s,]+([0-9.]+)\s*["']/i);
-  if (vb) return { width: parseFloat(vb[1]), height: parseFloat(vb[2]), mime: "image/svg+xml" };
+  if (vb) return { width: parseFloat(vb[1]), height: parseFloat(vb[2]) };
  return null;
 }
@@ -21,6 +21,7 @@ import * as crypto from "node:crypto";
 import { spawn } from "node:child_process";

 import { render } from "./render";
+import { screenCss } from "./print-css";
 import type { GenerateOptions, PreviewOptions } from "./types";
 import { ExitCode } from "./types";
 import * as browseClient from "./browseClient";
@@ -201,7 +202,6 @@ export async function generate(opts: GenerateOptions): Promise<string> {

  // ─── --to html: write the self-contained document, no print round-trip ──
  if (to === "html") {
-    const { screenCss } = await import("./print-css");
    const withScreenLayer = finalHtml.replace(
      "</style>",
      `</style>\n<style>\n${screenCss()}\n</style>`,
@@ -214,6 +214,19 @@ export async function generate(opts: GenerateOptions): Promise<string> {

  // ─── --to docx: content-fidelity conversion (eng-review P8) ────────────
  if (to === "docx") {
+    // Print-only surfaces don't survive the conversion. The watermark div
+    // would degrade to a literal body paragraph reading "DRAFT" (worse than
+    // absent) — strip it. Warn once about print-only flags that were set.
+    finalHtml = finalHtml.replace(/<div class="watermark">[\s\S]*?<\/div>/, "");
+    const printOnly: string[] = [];
+    if (opts.watermark) printOnly.push("--watermark");
+    if (opts.headerTemplate) printOnly.push("--header-template");
+    if (opts.footerTemplate) printOnly.push("--footer-template");
+    if (opts.pageSize) printOnly.push("--page-size");
+    if (opts.margins || opts.marginTop || opts.marginRight || opts.marginBottom || opts.marginLeft) printOnly.push("--margins");
+    if (printOnly.length > 0) {
+      warn(`docx is content-fidelity: ${printOnly.join(", ")} do not apply to Word output`);
+    }
    progress.begin("Converting to DOCX");
    const { default: HTMLtoDOCX } = await import("html-to-docx");
    const buf = await HTMLtoDOCX(finalHtml, null, {
@@ -311,6 +324,21 @@ export async function preview(opts: PreviewOptions): Promise<string> {

  progress.begin("Rendering HTML");
  const markdown = fs.readFileSync(input, "utf8");
+  // Preview deliberately skips the diagram/image pre-pass (no browse daemon
+  // round-trip — preview is the fast loop). Be loud about the divergence so
+  // nobody signs off on a preview that lacks what the PDF will have.
+  if (!opts.quiet) {
+    const fenceCount = extractDiagramFences(markdown).fences.length;
+    const hasLocalImages = /!\[[^\]]*\]\((?!https?:|data:)[^)]+\)/.test(markdown);
+    if (fenceCount > 0 || hasLocalImages) {
+      process.stderr.write(
+        `[make-pdf] preview note: ${fenceCount > 0 ? `${fenceCount} diagram fence(s) shown as code` : ""}` +
+        `${fenceCount > 0 && hasLocalImages ? "; " : ""}` +
+        `${hasLocalImages ? "local images may not resolve from the preview location" : ""}` +
+        ` — \`generate\` renders them fully.\n`,
+      );
+    }
+  }
  const rendered = render({
    markdown,
    title: opts.title,
@@ -12,9 +12,11 @@
 *     breaks copy-paste extraction.
 *   - All paragraphs flush-left. No first-line indent, no justify, no
 *     p+p indent. text-align: left everywhere. 12pt margin-bottom.
- *   - Cover page has the same 1in margins as every other page. No flexbox
- *     center, no inset padding, no vertical centering. Distinction comes
- *     from eyebrow + larger title + hairline rule, not from centering.
+ *   - Cover page (v1.58.0.0 poster revision, user-directed): 56pt title,
+ *     13pt meta, padding-top 1.4in for poster placement. Still no flexbox
+ *     and no vertical centering; the inset is a deliberate top-third drop.
+ *     (Supersedes the original "no inset padding" lock from the first
+ *     /plan-design-review — the 32pt cover read as too small in print.)
 *   - `@page :first` suppresses running header/footer but does NOT override
 *     the 1in margin.
 *   - No <link>, no external CSS/fonts — everything inlined.
@@ -122,8 +124,9 @@ function pageRules(size: string, margin: string, opts: PrintCssOptions): string
    // Landscape named page for promoted wide diagrams/images (image-policy).
    // Chromium-only — exactly the engine this pipeline always prints with.
    // Honored only when the print call passes preferCSSPageSize (orchestrator
-    // sets it when a promotion exists). The block is flex-centered: a diagram
-    // alone on a rotated page should sit in the middle, not hug the header.
+    // sets it when a promotion exists). Vertical centering is NOT done here —
+    // image-policy emits a computed inline margin-top instead (see the
+    // .page-wide comment below for why).
    `@page wide {`,
    `  size: ${size} landscape;`,
    `  margin: ${margin};`,
@@ -139,6 +142,9 @@ function pageRules(size: string, margin: string, opts: PrintCssOptions): string
    `  page: wide;`,
    `  text-align: center;`,
    `}`,
+    // width: 100% stretch is intentional for promoted content: auto-promoted
+    // rasters are >=~1600px (≈190dpi at the 9in landscape box — prints fine),
+    // and a directive-forced small image is the user's explicit call.
    `.page-wide img, .page-wide svg { width: 100%; height: auto; max-width: none; }`,
    `.page-wide figure.diagram > svg { max-width: none; }`,
  ].filter(line => line !== "").join("\n");
@@ -153,10 +159,13 @@ function pageRules(size: string, margin: string, opts: PrintCssOptions): string
 export function screenCss(): string {
  return [
    `@media screen {`,
-    `  body { max-width: 52em; margin: 0 auto; padding: 2.5em 1.5em; }`,
+    // ~42em at 12pt ≈ 70-75 characters per line — the readable ceiling.
+    `  body { max-width: 42em; margin: 0 auto; padding: 2.5em 1.5em; }`,
    `  .chapter { break-before: auto; }`,
    `  .watermark { display: none; }`,
    `  figure.diagram { overflow-x: auto; }`,
+    // Page numbers only exist in print; hide the empty spans + dot leaders.
+    `  .toc li .toc-page, .toc li .toc-dots { display: none; }`,
    `}`,
  ].join("\n");
 }
@@ -362,11 +371,11 @@ function quoteRules(): string {
    `  padding: 0 0 0 18pt;`,
    `  border-left: 2pt solid #111;`,
    `  color: #333;`,
-    `  font-size: 11pt;`,
+    `  font-size: 12pt;`,
    `  line-height: 1.5;`,
    `}`,
    `blockquote p { margin-bottom: 6pt; text-align: left; }`,
-    `blockquote cite { display: block; margin-top: 6pt; font-style: normal; font-size: 9.5pt; color: #666; letter-spacing: 0.02em; }`,
+    `blockquote cite { display: block; margin-top: 6pt; font-style: normal; font-size: 10pt; color: #666; letter-spacing: 0.02em; }`,
    `blockquote cite::before { content: "— "; }`,
  ].join("\n");
 }
@@ -410,7 +419,7 @@ function listRules(): string {
 function footnoteRules(): string {
  return [
    `.footnote-ref { font-size: 0.75em; vertical-align: super; line-height: 0; text-decoration: none; color: #0055cc; }`,
-    `.footnotes { margin-top: 24pt; padding-top: 12pt; border-top: 0.5pt solid #ccc; font-size: 9.5pt; line-height: 1.4; }`,
+    `.footnotes { margin-top: 24pt; padding-top: 12pt; border-top: 0.5pt solid #ccc; font-size: 10pt; line-height: 1.4; }`,
    `.footnotes ol { padding-left: 18pt; }`,
  ].join("\n");
 }
@@ -112,14 +112,19 @@ export function render(opts: RenderOptions): RenderResult {
      })
    : "";

+  // TOC anchors must resolve: assign id="toc-N" to each H1-H3 in the same
+  // order buildTocBlock scans them, or every TOC link is a dead href (masked
+  // in PDFs by Chromium outline bookmarks, glaring in --to html).
+  const anchoredHtml = opts.toc ? addHeadingIds(typographicHtml) : typographicHtml;
+
  const tocBlock = opts.toc
-    ? buildTocBlock(typographicHtml)
+    ? buildTocBlock(anchoredHtml)
    : "";

  // Wrap body in .chapter sections at H1 boundaries if chapter breaks are on.
  const chapterHtml = opts.noChapterBreaks
-    ? `<section class="chapter">${typographicHtml}</section>`
-    : wrapChaptersByH1(typographicHtml);
+    ? `<section class="chapter">${anchoredHtml}</section>`
+    : wrapChaptersByH1(anchoredHtml);

  const watermarkBlock = opts.watermark
    ? `<div class="watermark">${escapeHtml(opts.watermark)}</div>`
@@ -288,6 +293,21 @@ function buildTocBlock(html: string): string {
  ].join("\n");
 }

+/**
+ * Assign id="toc-N" to every H1-H3 in document order — the same order
+ * extractHeadings/buildTocBlock use, so anchors and entries line up by index.
+ * Headings that already carry an id keep it AND gain nothing (the TOC link
+ * targets toc-N, so we only skip tagging when one exists to avoid dupes).
+ */
+function addHeadingIds(html: string): string {
+  let i = 0;
+  return html.replace(/<(h[1-3])([^>]*)>/gi, (full, tag: string, attrs: string) => {
+    const id = `toc-${i++}`;
+    if (/\bid\s*=/i.test(attrs)) return full;
+    return `<${tag}${attrs} id="${id}">`;
+  });
+}
+
 function extractHeadings(html: string): Array<{ level: number; text: string }> {
  const re = /<(h[1-3])[^>]*>([\s\S]*?)<\/\1>/gi;
  const headings: Array<{ level: number; text: string }> = [];
@@ -362,7 +382,7 @@ function stripTags(html: string): string {
  return html.replace(/<[^>]+>/g, "");
 }

-function escapeHtml(s: string): string {
+export function escapeHtml(s: string): string {
  return s
    .replace(/&/g, "&amp;")
    .replace(/</g, "&lt;")