/**
* Diagram + image pre-pass. Runs between "read markdown" and render() in the
* orchestrator, and owns everything that needs the diagram-render bundle.
*
* markdown ─▶ extractDiagramFences() ──▶ render() (marked+sanitize+smarty)
* │ fences → placeholder tokens │
* │ ▼
* └─▶ renderFenceSlots() ───────────▶ substituteSlots(html, slots)
* one browse render tab/run │
* error ⇒ diagnostic block + page reload ▼
* inlineLocalImages(html)
* data URIs, probe dims from bytes,
* downscale >2x content box @300dpi,
* remote warn / missing placeholder /
* --strict hard-fail
*
* Placeholders survive marked, the sanitizer, and smartypants because they are
* plain hyphenated lowercase tokens with no quotes or HTML. Slot HTML is run
* through the same sanitizer as user content before substitution (the bundle
* renders with securityLevel strict — the sanitizer is the second layer).
*
* Reset contract (eng-review D6.2): each fence renders with a fresh
* mermaid.render id; after ANY render error the bundle page is reloaded before
* the next fence so a poisoned global can't corrupt diagram N+1.
*/
import * as fs from "node:fs";
import * as os from "node:os";
import * as path from "node:path";
import * as crypto from "node:crypto";
import * as browseClient from "./browseClient";
import { sanitizeUntrustedHtml } from "./render";
import { imageDims } from "./image-size";
// ─── Types ────────────────────────────────────────────────────────────
export interface DiagramFence {
/** "mermaid" | "excalidraw" */
lang: string;
/** Fence body (the diagram source). */
source: string;
/** Optional title="..." from the fence info string (a11y label, D6.4). */
title?: string;
/** Optional page=landscape|portrait fence directive (image-policy override). */
page?: "landscape" | "portrait";
/** render=false → leave as a plain code block (escape hatch, D6.3). */
render: boolean;
/** Placeholder token substituted into the markdown. */
token: string;
/** 1-based ordinal among rendered fences (unique ids, aria fallback). */
ordinal: number;
}
export interface FenceExtraction {
markdown: string;
fences: DiagramFence[];
}
export interface PrepassWarnings {
warn: (msg: string) => void;
}
export interface PrepassImageOptions {
/** Directory of the source markdown — relative image paths resolve here. */
inputDir: string;
/** Hard-fail on missing/remote images instead of warn (D6.1). */
strict: boolean;
/** Remote images are left untouched when network is explicitly allowed. */
allowNetwork: boolean;
/** Physical content-box width in inches (page width minus margins). */
contentWidthIn: number;
warn: (msg: string) => void;
/** Lazily provides a ready bundle tab (only opened when needed). */
getTab: () => RenderTab | null;
}
/** Print-resolution policy (eng-review D4): downscale rasters wider than
* 2 × contentWidth × 300dpi down to contentWidth × 300dpi. */
const PRINT_DPI = 300;
const DOWNSCALE_FACTOR = 2;
export class StrictModeError extends Error {
constructor(msg: string) {
super(msg);
this.name = "StrictModeError";
}
}
// ─── Fence extraction (pure) ──────────────────────────────────────────
const DIAGRAM_LANGS = new Set(["mermaid", "excalidraw"]);
/**
* Extract top-level ```mermaid / ```excalidraw fences, replacing each with a
* unique placeholder token paragraph. Backtick and tilde fences, any length
* >= 3; closers must be at least as long as the opener (CommonMark). Fences
* with `render=false` in the info string are left untouched.
*/
export function extractDiagramFences(markdown: string): FenceExtraction {
const lines = markdown.split("\n");
const out: string[] = [];
const fences: DiagramFence[] = [];
const runId = crypto.randomBytes(4).toString("hex");
let i = 0;
let openFence: { char: string; len: number; info: string; body: string[] } | null = null;
let ordinal = 0;
while (i < lines.length) {
const line = lines[i];
if (openFence) {
const close = matchFenceLine(line);
if (close && close.char === openFence.char && close.len >= openFence.len && close.info === "") {
const info = parseInfoString(openFence.info);
if (DIAGRAM_LANGS.has(info.lang) && info.render) {
ordinal++;
const token = `gstack-diagram-slot-${runId}-${ordinal}`;
fences.push({
lang: info.lang,
source: openFence.body.join("\n"),
title: info.title,
page: info.page,
render: true,
token,
ordinal,
});
out.push("", token, "");
} else {
// Not a diagram fence (or render=false): replay verbatim, but strip
// the render=false flag so it never leaks into highlighted output.
const infoOut = info.render ? openFence.info : info.lang;
out.push(`${openFence.char.repeat(openFence.len)}${infoOut}`);
out.push(...openFence.body);
out.push(line);
}
openFence = null;
i++;
continue;
}
openFence.body.push(line);
i++;
continue;
}
const open = matchFenceLine(line);
if (open && open.info !== "") {
openFence = { char: open.char, len: open.len, info: open.info, body: [] };
i++;
continue;
}
if (open) {
// Anonymous fence (plain code block) — copy through to its closer so a
// ```mermaid example INSIDE a plain fence is never extracted.
out.push(line);
i++;
while (i < lines.length) {
const l = lines[i];
const close = matchFenceLine(l);
out.push(l);
i++;
if (close && close.char === open.char && close.len >= open.len && close.info === "") break;
}
continue;
}
out.push(line);
i++;
}
// Unclosed fence at EOF: replay verbatim (CommonMark treats it as code to EOF).
if (openFence) {
out.push(`${openFence.char.repeat(openFence.len)}${openFence.info}`);
out.push(...openFence.body);
}
return { markdown: out.join("\n"), fences };
}
function matchFenceLine(line: string): { char: string; len: number; info: string } | null {
const m = line.match(/^ {0,3}(`{3,}|~{3,})\s*(.*)$/);
if (!m) return null;
return { char: m[1][0], len: m[1].length, info: m[2].trim() };
}
/** Parse a fence info string: `mermaid`, `mermaid render=false`,
* `mermaid title="Auth flow"`, `mermaid page=landscape`. */
export function parseInfoString(info: string): {
lang: string; render: boolean; title?: string; page?: "landscape" | "portrait";
} {
const lang = (info.match(/^\S+/)?.[0] ?? "").toLowerCase();
const render = !/\brender\s*=\s*false\b/i.test(info);
const title = info.match(/\btitle\s*=\s*"([^"]*)"/i)?.[1]
?? info.match(/\btitle\s*=\s*'([^']*)'/i)?.[1];
const pageRaw = info.match(/\bpage\s*=\s*(landscape|portrait)\b/i)?.[1]?.toLowerCase();
const page = pageRaw === "landscape" || pageRaw === "portrait" ? pageRaw : undefined;
return { lang, render, title, page };
}
// ─── Slot substitution (pure) ─────────────────────────────────────────
/**
* Replace placeholder tokens in rendered HTML with their final slot HTML.
* marked wraps the bare token line in
…
; replace the wrapper too so
* the figure isn't nested inside a paragraph.
*/
export function substituteSlots(html: string, slots: Map): string {
let s = html;
for (const [token, slotHtml] of slots) {
const wrapped = new RegExp(`
\\s*${token}\\s*
`, "g");
if (wrapped.test(s)) {
s = s.replace(new RegExp(`
\\s*${token}\\s*
`, "g"), slotHtml);
} else {
s = s.split(token).join(slotHtml);
}
}
return s;
}
/**
* Visible diagnostic block for a failed fence render — never silent raw code
* (eng-review: explicit error blocks). Sanitizer-safe: all dynamic content is
* HTML-escaped.
*/
export function buildDiagnosticBlock(fence: DiagramFence, errorMessage: string): string {
const excerpt = fence.source.split("\n").slice(0, 8).join("\n");
const truncated = fence.source.split("\n").length > 8 ? "\n…" : "";
return [
``,
`Diagram failed to render (${escapeHtml(fence.lang)})`,
`
`,
``,
].join("\n");
}
/** Wrap a rendered SVG in an accessible figure (D6.4). */
export function buildDiagramFigure(fence: DiagramFence, svg: string): string {
const label = diagramLabel(fence);
const cleanSvg = sanitizeUntrustedHtml(svg);
const captioned = fence.title
? `\n${escapeHtml(fence.title)}`
: "";
const pageAttr = fence.page ? ` data-gstack-page="${fence.page}"` : "";
return [
``,
``,
cleanSvg,
captioned,
``,
].join("\n");
}
function diagramLabel(fence: DiagramFence): string {
return fence.title ?? `diagram ${fence.ordinal}`;
}
// ─── Render tab (bundle page lifecycle) ───────────────────────────────
const PAYLOAD_TMP_DIR = process.platform === "win32" ? os.tmpdir() : "/tmp";
const READY_TIMEOUT_MS = 20_000;
export class RenderTab {
private constructor(
public readonly tabId: number,
private readonly stagedBundlePath: string,
) {}
/**
* Open a tab and load the diagram-render bundle. The bundle HTML is staged
* under /tmp (content-addressed, reused across runs — load-html only reads
* inside its safe dirs) and loaded by PATH, not --from-file: a 9MB JSON
* round-trip per run would be pure waste.
*/
static open(): RenderTab {
const bundleSrc = resolveBundlePath();
const html = fs.readFileSync(bundleSrc);
const sha = crypto.createHash("sha256").update(html).digest("hex").slice(0, 16);
const staged = path.join(PAYLOAD_TMP_DIR, `gstack-diagram-render-${sha}.html`);
if (!fs.existsSync(staged)) {
// Concurrent-safe: write to a unique temp name, then atomic rename.
const tmp = `${staged}.${process.pid}.${crypto.randomBytes(4).toString("hex")}`;
fs.writeFileSync(tmp, html);
try {
fs.renameSync(tmp, staged);
} catch {
fs.unlinkSync(tmp); // another process won the race — theirs is identical
}
}
const tabId = browseClient.newtab();
const tab = new RenderTab(tabId, staged);
tab.loadBundle();
return tab;
}
/** (Re)load the bundle page — also the reset path after a render error. */
loadBundle(): void {
browseClient.loadHtmlFile({ file: this.stagedBundlePath, tabId: this.tabId });
const ready = browseClient.waitForExpression({
expression: "document.getElementById('status') !== null && document.getElementById('status').textContent === 'ready'",
tabId: this.tabId,
timeoutMs: READY_TIMEOUT_MS,
});
if (!ready) {
throw new Error(
"diagram-render bundle did not become ready in the browse tab " +
`(${READY_TIMEOUT_MS}ms). Check \`browse js "window.__errors"\` on tab ${this.tabId}.`,
);
}
}
/**
* Call one of the bundle's async window functions with JSON-safe string
* args. Errors come back as a recognizable ERR: prefix so a render failure
* is data, not a thrown browse exit.
*/
call(fn: string, ...args: Array): string {
const argList = args.map((a) => JSON.stringify(a)).join(",");
const expression =
`window.${fn}(${argList})` +
`.then(r => "OK:" + r)` +
`.catch(e => "ERR:" + String((e && e.message) || e))`;
const result = this.js(expression);
if (result.startsWith("OK:")) return result.slice(3);
if (result.startsWith("ERR:")) throw new RenderCallError(result.slice(4));
throw new RenderCallError(`unexpected bundle result: ${result.slice(0, 200)}`);
}
private js(expression: string): string {
// Large payloads (scene JSON, SVG text, data URIs) blow past argv limits —
// browseClient.js shells out with the expression as an argv element, so
// stage anything big through a tmp file the page can fetch? No: file URLs
// are unreachable from the page. Instead, chunk through a window buffer.
if (expression.length <= 100_000) {
return browseClient.js({ expression, tabId: this.tabId });
}
return this.jsViaBuffer(expression);
}
/**
* argv-safe path for big expressions: ship the expression into the page in
* 64KB chunks (window.__exprBuf), then eval it there. Used for multi-MB
* data URIs (photo downscaling) where a single argv would exceed OS limits.
*/
private jsViaBuffer(expression: string): string {
browseClient.js({ expression: "window.__exprBuf = ''", tabId: this.tabId });
const CHUNK = 64_000;
for (let i = 0; i < expression.length; i += CHUNK) {
const chunk = expression.slice(i, i + CHUNK);
browseClient.js({
expression: `window.__exprBuf += ${JSON.stringify(chunk)}, window.__exprBuf.length`,
tabId: this.tabId,
});
}
// Eval the buffer as a single expression so the resulting promise is the
// statement value browse awaits. The buffer resets at the next call.
return browseClient.js({
expression: `(0, eval)(window.__exprBuf)`,
tabId: this.tabId,
});
}
close(): void {
try {
browseClient.closetab(this.tabId);
} catch {
// best-effort: orchestrator finally path
}
}
}
export class RenderCallError extends Error {
constructor(msg: string) {
super(msg);
this.name = "RenderCallError";
}
}
/** Resolve dist/diagram-render.html: env override → repo-relative (dev) → global install. */
export function resolveBundlePath(env: NodeJS.ProcessEnv = process.env): string {
const candidates = [
env.GSTACK_DIAGRAM_BUNDLE,
// dev: make-pdf/src/* → repo root lib/. (In a compiled binary this is the
// virtual /$bunfs/root and simply never exists — harmless.)
path.resolve(import.meta.dir, "../../lib/diagram-render/dist/diagram-render.html"),
// compiled binary at /make-pdf/dist/pdf → /lib/… — same shape
// in the repo and in the ~/.claude/skills/gstack global install. argv[0]
// is the literal string "bun" in compiled binaries; execPath is real.
path.resolve(path.dirname(process.execPath), "../../lib/diagram-render/dist/diagram-render.html"),
path.join(os.homedir(), ".claude/skills/gstack/lib/diagram-render/dist/diagram-render.html"),
].filter((p): p is string => !!p);
for (const p of candidates) {
if (fs.existsSync(p)) return p;
}
throw new Error(
"diagram-render bundle not found. Tried:\n" +
candidates.map((c) => ` - ${c}`).join("\n") +
"\nRun `bun run build:diagram-render` (repo) or re-run ./setup (install).",
);
}
// ─── Fence rendering ──────────────────────────────────────────────────
/**
* Render every extracted fence to its slot HTML. One bundle tab serves all
* fences; a failed fence yields a diagnostic block and a bundle reload
* (reset contract) before the next fence renders.
*/
export function renderFenceSlots(
fences: DiagramFence[],
tab: RenderTab,
warn: (msg: string) => void,
): Map {
const slots = new Map();
for (const fence of fences) {
try {
let svg: string;
if (fence.lang === "mermaid") {
svg = tab.call("__renderMermaid", `mermaid-fence-${fence.ordinal}`, fence.source);
} else {
JSON.parse(fence.source); // fail fast with a JSON diagnostic, not a bundle stack
svg = tab.call("__excalidrawToSvg", fence.source);
}
slots.set(fence.token, buildDiagramFigure(fence, svg));
} catch (err: any) {
const msg = err?.message ?? String(err);
warn(`diagram ${fence.ordinal} (${fence.lang}) failed to render: ${firstLine(msg)}`);
slots.set(fence.token, buildDiagnosticBlock(fence, msg));
// Reset contract: a poisoned page must not corrupt the next fence.
try {
tab.loadBundle();
} catch (reloadErr: any) {
warn(`bundle reload after render error failed: ${firstLine(reloadErr?.message ?? String(reloadErr))}`);
}
}
}
return slots;
}
// ─── DOCX rasterization (eng-review D6.5, P8) ─────────────────────────
/**
* Replace inline diagram SVGs (and svg data-URI images) with PNG tags
* for the DOCX export — Word's SVG support is unreliable, so the content-
* fidelity contract embeds rasters at 300dpi of the placed width (the
* content box). Diagnostic blocks keep their text form.
*/
export function rasterizeDiagramFigures(
html: string,
tab: RenderTab,
contentWidthIn: number,
warn: (msg: string) => void,
): string {
const targetPx = Math.round(contentWidthIn * PRINT_DPI);
// 1. Rendered diagram figures → with the figure's aria-label as alt.
let out = html.replace(
/]*>[\s\S]*?<\/figure>/gi,
(figure) => {
const svgMatch = figure.match(/