diff --git a/make-pdf/src/pdftotext.ts b/make-pdf/src/pdftotext.ts index 54cc55118..5cdb51e81 100644 --- a/make-pdf/src/pdftotext.ts +++ b/make-pdf/src/pdftotext.ts @@ -114,6 +114,34 @@ export function resolvePdftotext(env: NodeJS.ProcessEnv = process.env): Pdftotex ].join("\n")); } +/** + * Locate a poppler companion tool (pdffonts, pdfimages, pdftoppm) used by the + * emoji render gate. Mirrors resolvePdftotext's resolution order: + * 1. $GSTACK__BIN env override (e.g. GSTACK_PDFFONTS_BIN) + * 2. PATH via Bun.which + * 3. standard POSIX locations (Homebrew + distro) + * + * Returns null (does NOT throw) when the tool is missing — the emoji gate skips + * cleanly rather than failing on a box without full poppler-utils. + */ +export function resolvePopplerTool( + tool: "pdffonts" | "pdfimages" | "pdftoppm", + env: NodeJS.ProcessEnv = process.env, +): string | null { + const override = resolveOverride(env[`GSTACK_${tool.toUpperCase()}_BIN`], env); + if (override) return override; + + const PATH = env.PATH ?? env.Path ?? ""; + const onPath = Bun.which(tool, { PATH }); + if (onPath) return onPath; + + for (const dir of ["/opt/homebrew/bin", "/usr/local/bin", "/usr/bin"]) { + const candidate = findExecutable(path.join(dir, tool)); + if (candidate) return candidate; + } + return null; +} + function isExecutable(p: string): boolean { try { fs.accessSync(p, fs.constants.X_OK); diff --git a/make-pdf/test/e2e/emoji-gate.test.ts b/make-pdf/test/e2e/emoji-gate.test.ts new file mode 100644 index 000000000..ab9713a9c --- /dev/null +++ b/make-pdf/test/e2e/emoji-gate.test.ts @@ -0,0 +1,154 @@ +/** + * Emoji render gate — proves emoji code points render as real color glyphs in + * the output PDF instead of .notdef tofu boxes (▯). This is the regression gate + * for fix/make-pdf-emoji-tofu. + * + * Why not just check pdftotext? Because text extraction is a FALSE oracle for + * emoji: Skia preserves the Unicode in the text cluster even when the displayed + * glyph is .notdef, so pdftotext can report the emoji survived on a render that + * actually drew tofu. Verified empirically on macOS — pdftotext extracts 😀 + * regardless of whether a color font was available. + * + * Two assertions that DO distinguish a real render from tofu: + * 1. pdffonts shows an emoji family embedded in the PDF (the cascade selected + * a real emoji font — AppleColorEmoji as Type 3 on macOS, NotoColorEmoji + * on Linux). Missing-fallback => no emoji font embedded. + * 2. pdftoppm rasterizes the page and we count saturated (colored) pixels. + * A color-emoji render has hundreds (measured: ~1650 at 100dpi); a tofu + * render is a monochrome black outline on white (~0 saturated). Tolerant + * threshold, not an exact-pixel fixture diff, to dodge cross-platform AA + * and font-version variance. + * + * Note: pdfimages -list is intentionally NOT used — macOS embeds color emoji as + * Type 3 fonts, so pdfimages lists nothing even on a correct render. + * + * Gating: runs only when the compiled binary + browse + pdffonts + pdftoppm are + * available AND a color-emoji font is installed for Chromium to fall back to. + * Skipped cleanly otherwise (local dev before ./setup installs the font). + */ + +import { describe, expect, test } from "bun:test"; +import { execFileSync } from "node:child_process"; +import * as fs from "node:fs"; +import * as path from "node:path"; + +import { resolvePopplerTool } from "../../src/pdftotext"; + +const FIXTURE = path.resolve(__dirname, "../fixtures/emoji-gate.md"); +const ROOT = path.resolve(__dirname, "../../.."); +const PDF_BIN = path.join(ROOT, "make-pdf/dist/pdf"); +const BROWSE_BIN = path.join(ROOT, "browse/dist/browse"); + +// Saturated-pixel floor. Measured ~1650 at 100dpi for the fixture's color +// emoji; a tofu render yields ~0. 200 sits well clear of both. +const SATURATED_PIXEL_FLOOR = 200; +// A pixel is "colored" when its max-min channel spread exceeds this. Black text, +// gray rules, and white background all stay near 0; color emoji spike high. +const SATURATION_DELTA = 40; + +/** Is a color-emoji font available for Chromium to fall back to? */ +function emojiFontAvailable(): boolean { + if (process.platform === "darwin") { + return fs.existsSync("/System/Library/Fonts/Apple Color Emoji.ttc"); + } + if (process.platform === "linux") { + const fcMatch = Bun.which("fc-match"); + if (!fcMatch) return false; + try { + const out = execFileSync( + fcMatch, + ["-f", "%{color}\n", ":lang=und-zsye:charset=1F600"], + { encoding: "utf8" }, + ); + return /true/i.test(out); + } catch { + return false; + } + } + return false; +} + +function prerequisitesAvailable(): { ok: true } | { ok: false; reason: string } { + if (!fs.existsSync(PDF_BIN)) return { ok: false, reason: `make-pdf binary missing (${PDF_BIN}). Run bun run build.` }; + if (!fs.existsSync(BROWSE_BIN)) return { ok: false, reason: `browse binary missing (${BROWSE_BIN}).` }; + if (!fs.existsSync(FIXTURE)) return { ok: false, reason: `fixture missing (${FIXTURE}).` }; + if (!resolvePopplerTool("pdffonts")) return { ok: false, reason: "pdffonts not found (install poppler-utils)." }; + if (!resolvePopplerTool("pdftoppm")) return { ok: false, reason: "pdftoppm not found (install poppler-utils)." }; + if (!emojiFontAvailable()) return { ok: false, reason: "no color-emoji font installed; run ./setup (Linux) or install one." }; + return { ok: true }; +} + +/** Count pixels in a P6 (binary) PPM whose RGB channel spread exceeds delta. */ +function countSaturatedPixels(ppmPath: string, delta: number): number { + const b = fs.readFileSync(ppmPath); + let i = 0; + const token = (): string => { + while (b[i] === 0x20 || b[i] === 0x0a || b[i] === 0x09 || b[i] === 0x0d) i++; + const s = i; + while (i < b.length && b[i] !== 0x20 && b[i] !== 0x0a && b[i] !== 0x09 && b[i] !== 0x0d) i++; + return b.slice(s, i).toString("ascii"); + }; + const magic = token(); + if (magic !== "P6") throw new Error(`expected P6 PPM, got ${magic}`); + const w = Number(token()); + const h = Number(token()); + token(); // maxval + i++; // single whitespace byte after the maxval precedes the pixel block + let sat = 0; + const total = w * h; + for (let p = 0; p < total; p++) { + const o = i + p * 3; + const r = b[o], g = b[o + 1], bl = b[o + 2]; + if (Math.max(r, g, bl) - Math.min(r, g, bl) > delta) sat++; + } + return sat; +} + +describe("emoji render gate", () => { + const avail = prerequisitesAvailable(); + + test.skipIf(!avail.ok)("emoji render as color glyphs, not tofu", () => { + if (!avail.ok) return; // type narrowing + const outputPdf = `/tmp/make-pdf-emoji-gate-${process.pid}.pdf`; + const ppmPrefix = `/tmp/make-pdf-emoji-gate-${process.pid}`; + const ppmPath = `${ppmPrefix}.ppm`; + try { + execFileSync(PDF_BIN, ["generate", FIXTURE, outputPdf, "--quiet"], { + encoding: "utf8", + env: { ...process.env, BROWSE_BIN }, + stdio: ["ignore", "pipe", "pipe"], + }); + expect(fs.existsSync(outputPdf)).toBe(true); + + // 1. An emoji family must be embedded — the cascade found a real emoji + // font instead of falling through to .notdef. + const pdffonts = resolvePopplerTool("pdffonts")!; + const fontList = execFileSync(pdffonts, [outputPdf], { encoding: "utf8" }); + if (!/emoji/i.test(fontList)) { + process.stderr.write(`\n--- pdffonts ---\n${fontList}\n--- END ---\n`); + } + expect(/emoji/i.test(fontList)).toBe(true); + + // 2. The page must actually rasterize to color, not a monochrome tofu box. + const pdftoppm = resolvePopplerTool("pdftoppm")!; + execFileSync(pdftoppm, ["-r", "100", "-singlefile", outputPdf, ppmPrefix], { + stdio: ["ignore", "pipe", "pipe"], + }); + expect(fs.existsSync(ppmPath)).toBe(true); + const saturated = countSaturatedPixels(ppmPath, SATURATION_DELTA); + if (saturated < SATURATED_PIXEL_FLOOR) { + process.stderr.write(`\n[emoji-gate] saturated pixels: ${saturated} (floor ${SATURATED_PIXEL_FLOOR})\n`); + } + expect(saturated).toBeGreaterThanOrEqual(SATURATED_PIXEL_FLOOR); + } finally { + try { fs.unlinkSync(outputPdf); } catch { /* ignore */ } + try { fs.unlinkSync(ppmPath); } catch { /* ignore */ } + } + }, 30000); + + if (!avail.ok) { + test("prerequisites check", () => { + console.warn(`[skip] ${avail.reason}`); + }); + } +}); diff --git a/make-pdf/test/fixtures/emoji-gate.md b/make-pdf/test/fixtures/emoji-gate.md new file mode 100644 index 000000000..d12319454 --- /dev/null +++ b/make-pdf/test/fixtures/emoji-gate.md @@ -0,0 +1,12 @@ +# Emoji rendering gate 😀 + +This fixture exists to prove that emoji code points render as real color +glyphs in the output PDF, not as `.notdef` tofu boxes (▯). + +Color emoji on one line: 😀 ❤️ 🚀 ✅ 💡 + +A variation-selector sequence (FE0F) renders color: ❤️ — the bare code point +❤ is text-style. Both must come from a font in the cascade, never tofu. + +Non-emoji Unicode (unchanged, regression guard): em dash —, times ×, arrow →, +bullet •, ellipsis …