mirror of
https://github.com/garrytan/gstack.git
synced 2026-06-26 19:49:57 +02:00
feat(make-pdf): --to html|docx output formats
--to html writes the assembled self-contained document directly (no print round-trip): inline vector diagrams, data-URI images, zero network references, plus an @media screen layer for browser reading. --to docx is the content-fidelity export (eng-review P8): html-to-docx@1.8.0 (exact pin; pure JS, bun-compile-verified) maps headings/tables/code/lists; diagrams and SVG images rasterize at 300dpi of the content-box width via the render tab; diagnostic figures convert to plain p/pre so the converter can't silently drop an error. --format keeps its page-size-alias meaning; --to is the output format, and the CLI says so when confused. Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
@@ -439,6 +439,73 @@ export function renderFenceSlots(
|
||||
return slots;
|
||||
}
|
||||
|
||||
// ─── DOCX rasterization (eng-review D6.5, P8) ─────────────────────────
|
||||
|
||||
/**
|
||||
* Replace inline diagram SVGs (and svg data-URI images) with PNG <img> tags
|
||||
* for the DOCX export — Word's SVG support is unreliable, so the content-
|
||||
* fidelity contract embeds rasters at 300dpi of the placed width (the
|
||||
* content box). Diagnostic blocks keep their text form.
|
||||
*/
|
||||
export function rasterizeDiagramFigures(
|
||||
html: string,
|
||||
tab: RenderTab,
|
||||
contentWidthIn: number,
|
||||
warn: (msg: string) => void,
|
||||
): string {
|
||||
const targetPx = Math.round(contentWidthIn * PRINT_DPI);
|
||||
|
||||
// 1. Rendered diagram figures → <img> with the figure's aria-label as alt.
|
||||
let out = html.replace(
|
||||
/<figure class="diagram"[^>]*>[\s\S]*?<\/figure>/gi,
|
||||
(figure) => {
|
||||
const svgMatch = figure.match(/<svg\b[\s\S]*<\/svg>/i);
|
||||
if (!svgMatch) return figure;
|
||||
const label = figure.match(/\baria-label\s*=\s*"([^"]*)"/i)?.[1] ?? "diagram";
|
||||
try {
|
||||
const png = tab.call("__rasterize", svgMatch[0], targetPx);
|
||||
return `<p><img src="${png}" alt="${label}"></p>`;
|
||||
} catch (err: any) {
|
||||
warn(`docx: diagram rasterization failed (${firstLine(err?.message ?? String(err))}); keeping source text`);
|
||||
return figure;
|
||||
}
|
||||
},
|
||||
);
|
||||
|
||||
// 2. SVG data-URI images (inlined .svg files) → PNG.
|
||||
out = out.replace(/<img\b[^>]*>/gi, (tag) => {
|
||||
const src = tag.match(SRC_RE)?.[2] ?? tag.match(SRC_RE)?.[3] ?? "";
|
||||
if (!src.startsWith("data:image/svg+xml")) return tag;
|
||||
try {
|
||||
const b64 = src.slice(src.indexOf(",") + 1);
|
||||
const svgText = Buffer.from(b64, "base64").toString("utf8");
|
||||
const png = tab.call("__rasterize", svgText, targetPx);
|
||||
return tag.replace(SRC_RE, `src="${png}"`);
|
||||
} catch (err: any) {
|
||||
warn(`docx: svg image rasterization failed (${firstLine(err?.message ?? String(err))})`);
|
||||
return tag;
|
||||
}
|
||||
});
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
/**
|
||||
* Diagnostic figures → plain <p>/<pre> for the DOCX converter, which drops
|
||||
* <figure> elements it can't map. An invisible error is the one thing the
|
||||
* diagnostic contract forbids. Pure — no render tab needed.
|
||||
*/
|
||||
export function convertDiagnosticsForDocx(html: string): string {
|
||||
return html.replace(
|
||||
/<figure class="diagram diagram-error"[^>]*>([\s\S]*?)<\/figure>/gi,
|
||||
(_full, body: string) => {
|
||||
const title = body.match(/<figcaption[^>]*>([\s\S]*?)<\/figcaption>/i)?.[1] ?? "Diagram failed to render";
|
||||
const detail = body.match(/<pre[^>]*>([\s\S]*?)<\/pre>/i)?.[1] ?? "";
|
||||
return `<p><strong>${title}</strong></p>\n<pre>${detail}</pre>`;
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
// ─── Image inlining (eng-review D1 + D4 + D6.1) ───────────────────────
|
||||
|
||||
const IMG_TAG_RE = /<img\b[^>]*>/gi;
|
||||
|
||||
Reference in New Issue
Block a user