mirror of
https://github.com/garrytan/gstack.git
synced 2026-07-05 15:47:57 +02:00
fix(make-pdf): adversarial-review wave — offline posture enforced, symlink-aware confinement, bounded reads
Codex adversarial + structured review findings: - Remote images are now BLOCKED with a visible placeholder instead of warn-and-keep — leaving the tag meant Chromium fetched the URL at print time anyway, so the offline posture was a lie (tracking pixels and internal-URL probes ran without --allow-network). - The out-of-tree read check compares REAL paths: a symlink inside the input dir pointing at ~/.ssh/... passed the string-prefix check, including under --strict. Ordered after the existence check (realpath of a missing file false-positives on macOS /var → /private/var). - Image reads are bounded BEFORE reading: statSync first, non-regular files (fifo/device/dir) and >64MB files degrade to placeholders instead of hanging or exhausting memory; malformed percent-encoding (foo%zz.png) degrades to missing-image instead of crashing decodeURIComponent. - browse shell-outs get a 120s timeout — a wedged daemon or hostile mermaid source fails the run instead of hanging it. - TOC entries link to the heading's ACTUAL id (pre-id'd raw-HTML headings previously got dead #toc-N links); per-side margins compose into the CSS @page shorthand so a landscape promotion flipping preferCSSPageSize no longer silently reverts --margin-left/right to defaults (Codex P2). - The image memo is a typed object — literal NUL-byte separators had made diagram-prepass.ts register as binary to text tooling. Codex structured review GATE: PASS (no P1). Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
@@ -176,6 +176,9 @@ function runBrowse(args: string[]): string {
|
|||||||
encoding: "utf8",
|
encoding: "utf8",
|
||||||
maxBuffer: 16 * 1024 * 1024, // 16MB; tab content can be large
|
maxBuffer: 16 * 1024 * 1024, // 16MB; tab content can be large
|
||||||
stdio: ["ignore", "pipe", "pipe"],
|
stdio: ["ignore", "pipe", "pipe"],
|
||||||
|
// A wedged daemon (or a hostile mermaid source spinning the renderer)
|
||||||
|
// must fail the run, not hang it forever.
|
||||||
|
timeout: 120_000,
|
||||||
});
|
});
|
||||||
} catch (err: any) {
|
} catch (err: any) {
|
||||||
const exitCode = typeof err.status === "number" ? err.status : 1;
|
const exitCode = typeof err.status === "number" ? err.status : 1;
|
||||||
|
|||||||
@@ -80,6 +80,8 @@ export interface PrepassImageOptions {
|
|||||||
* 2 × contentWidth × 300dpi down to contentWidth × 300dpi. */
|
* 2 × contentWidth × 300dpi down to contentWidth × 300dpi. */
|
||||||
const PRINT_DPI = 300;
|
const PRINT_DPI = 300;
|
||||||
const DOWNSCALE_FACTOR = 2;
|
const DOWNSCALE_FACTOR = 2;
|
||||||
|
/** Per-image read ceiling — bounds memory before any policy runs. */
|
||||||
|
const MAX_IMAGE_BYTES = 64 * 1024 * 1024;
|
||||||
|
|
||||||
export class StrictModeError extends Error {
|
export class StrictModeError extends Error {
|
||||||
constructor(msg: string) {
|
constructor(msg: string) {
|
||||||
@@ -584,7 +586,7 @@ export function inlineLocalImages(html: string, opts: PrepassImageOptions): stri
|
|||||||
const targetPx = Math.round(opts.contentWidthIn * PRINT_DPI);
|
const targetPx = Math.round(opts.contentWidthIn * PRINT_DPI);
|
||||||
// An image referenced N times is read/probed/downscaled once; the same data
|
// An image referenced N times is read/probed/downscaled once; the same data
|
||||||
// URI string is reused (also dedupes memory until the final join).
|
// URI string is reused (also dedupes memory until the final join).
|
||||||
const memo = new Map<string, string>();
|
const memo = new Map<string, { dataUri: string; attrs: string }>();
|
||||||
|
|
||||||
return html.replace(IMG_TAG_RE, (tag) => {
|
return html.replace(IMG_TAG_RE, (tag) => {
|
||||||
const srcMatch = tag.match(SRC_RE);
|
const srcMatch = tag.match(SRC_RE);
|
||||||
@@ -601,36 +603,73 @@ export function inlineLocalImages(html: string, opts: PrepassImageOptions): stri
|
|||||||
// Absolute URL with a scheme (http, https, file, …)
|
// Absolute URL with a scheme (http, https, file, …)
|
||||||
if (opts.allowNetwork && /^https?:/i.test(src)) return tag;
|
if (opts.allowNetwork && /^https?:/i.test(src)) return tag;
|
||||||
if (/^https?:/i.test(src)) {
|
if (/^https?:/i.test(src)) {
|
||||||
const msg = `remote image not fetched (offline posture): ${src}`;
|
const msg = `remote image blocked (offline posture): ${src}`;
|
||||||
if (opts.strict) throw new StrictModeError(msg + " — re-run without --strict or pass --allow-network");
|
if (opts.strict) throw new StrictModeError(msg + " — re-run without --strict or pass --allow-network");
|
||||||
opts.warn(msg);
|
opts.warn(msg);
|
||||||
return tag;
|
// Leaving the tag would make Chromium fetch it at print time anyway —
|
||||||
|
// the warn would be a lie. Replace with a visible placeholder.
|
||||||
|
return buildBlockedRemotePlaceholder(src);
|
||||||
}
|
}
|
||||||
// file:// and friends fall through to the local path branch
|
// file:// and friends fall through to the local path branch
|
||||||
if (!src.startsWith("file:")) return tag;
|
if (!src.startsWith("file:")) return tag;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// decodeURIComponent throws on malformed escapes (foo%zz.png) — a broken
|
||||||
|
// URL must degrade to the missing-image path, not crash the run.
|
||||||
|
let decodedSrc = src;
|
||||||
|
try {
|
||||||
|
decodedSrc = decodeURIComponent(src);
|
||||||
|
} catch { /* keep raw src */ }
|
||||||
|
|
||||||
const filePath = src.startsWith("file:")
|
const filePath = src.startsWith("file:")
|
||||||
? fileURLToPath(src)
|
? fileURLToPath(src)
|
||||||
: isDrivePath
|
: isDrivePath
|
||||||
? path.resolve(src)
|
? path.resolve(src)
|
||||||
: path.resolve(opts.inputDir, decodeURIComponent(src));
|
: path.resolve(opts.inputDir, decodedSrc);
|
||||||
|
|
||||||
const cached = memo.get(filePath);
|
const cached = memo.get(filePath);
|
||||||
if (cached !== undefined) return rewriteImgTag(tag, cached);
|
if (cached !== undefined) return rewriteImgTag(tag, cached);
|
||||||
|
|
||||||
|
if (!fs.existsSync(filePath)) {
|
||||||
|
const msg = `image not found: ${src} (resolved to ${filePath})`;
|
||||||
|
if (opts.strict) throw new StrictModeError(msg);
|
||||||
|
opts.warn(msg);
|
||||||
|
return buildMissingImagePlaceholder(src);
|
||||||
|
}
|
||||||
|
|
||||||
// Out-of-tree reads are legal (local CLI semantics — like pandoc) but
|
// Out-of-tree reads are legal (local CLI semantics — like pandoc) but
|
||||||
// never silent: an agent PDF-ing untrusted markdown should not quietly
|
// never silent: an agent PDF-ing untrusted markdown should not quietly
|
||||||
// embed ~/.ssh/config into a shareable document. --strict makes it fatal.
|
// embed ~/.ssh/config into a shareable document. --strict makes it fatal.
|
||||||
const inputRoot = path.resolve(opts.inputDir) + path.sep;
|
// Compare REAL paths — a symlink inside the input dir pointing outside
|
||||||
if (!filePath.startsWith(inputRoot)) {
|
// would otherwise pass a string-prefix check (Codex adversarial finding).
|
||||||
const msg = `image resolves OUTSIDE the input directory: ${src} → ${filePath}`;
|
// Runs after the existence check: realpath of a missing file can't
|
||||||
|
// resolve, and on macOS /var vs /private/var would false-positive.
|
||||||
|
const inputRoot = safeRealpath(path.resolve(opts.inputDir)) + path.sep;
|
||||||
|
const realFilePath = safeRealpath(filePath);
|
||||||
|
if (!realFilePath.startsWith(inputRoot)) {
|
||||||
|
const msg = `image resolves OUTSIDE the input directory: ${src} → ${realFilePath}`;
|
||||||
if (opts.strict) throw new StrictModeError(msg + " — move it under the markdown's directory or drop --strict");
|
if (opts.strict) throw new StrictModeError(msg + " — move it under the markdown's directory or drop --strict");
|
||||||
opts.warn(msg);
|
opts.warn(msg);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!fs.existsSync(filePath)) {
|
// Bound the read BEFORE reading: a markdown image pointing at a special
|
||||||
const msg = `image not found: ${src} (resolved to ${filePath})`;
|
// file (fifo, device) would hang readFileSync, and a multi-GB file would
|
||||||
|
// exhaust memory before any policy ran.
|
||||||
|
let stat: fs.Stats;
|
||||||
|
try {
|
||||||
|
stat = fs.statSync(filePath);
|
||||||
|
} catch {
|
||||||
|
opts.warn(`image unreadable: ${src}`);
|
||||||
|
return buildMissingImagePlaceholder(src);
|
||||||
|
}
|
||||||
|
if (!stat.isFile()) {
|
||||||
|
const msg = `image is not a regular file: ${src}`;
|
||||||
|
if (opts.strict) throw new StrictModeError(msg);
|
||||||
|
opts.warn(msg);
|
||||||
|
return buildMissingImagePlaceholder(src);
|
||||||
|
}
|
||||||
|
if (stat.size > MAX_IMAGE_BYTES) {
|
||||||
|
const msg = `image exceeds ${Math.round(MAX_IMAGE_BYTES / 1024 / 1024)}MB cap: ${src} (${Math.round(stat.size / 1024 / 1024)}MB)`;
|
||||||
if (opts.strict) throw new StrictModeError(msg);
|
if (opts.strict) throw new StrictModeError(msg);
|
||||||
opts.warn(msg);
|
opts.warn(msg);
|
||||||
return buildMissingImagePlaceholder(src);
|
return buildMissingImagePlaceholder(src);
|
||||||
@@ -665,20 +704,17 @@ export function inlineLocalImages(html: string, opts: PrepassImageOptions): stri
|
|||||||
const attrs = dims
|
const attrs = dims
|
||||||
? ` data-gstack-px-width="${Math.round(dims.width)}" data-gstack-px-height="${Math.round(dims.height)}"`
|
? ` data-gstack-px-width="${Math.round(dims.width)}" data-gstack-px-height="${Math.round(dims.height)}"`
|
||||||
: "";
|
: "";
|
||||||
memo.set(filePath, `${dataUri} | |||||||