mirror of
https://github.com/garrytan/gstack.git
synced 2026-06-17 15:20:11 +02:00
test(make-pdf): emoji render gate (pdffonts + pixel proof)
pdftotext is a false oracle for emoji: Skia preserves the Unicode in the text cluster even when the glyph drew as .notdef tofu, so extraction passes on a broken render. The gate instead asserts (1) pdffonts shows an emoji family embedded and (2) pdftoppm rasterizes the page to color (measured ~1650 saturated pixels vs ~0 for tofu). pdfimages is not used: macOS embeds color emoji as Type 3 fonts, so it lists nothing even on a correct render. Adds resolvePopplerTool() (DRY resolver, returns null for clean skips) and a fixture exercising FE0F variation-selector emoji. Skips cleanly when poppler tools or a color-emoji font are unavailable. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -114,6 +114,34 @@ export function resolvePdftotext(env: NodeJS.ProcessEnv = process.env): Pdftotex
|
||||
].join("\n"));
|
||||
}
|
||||
|
||||
/**
|
||||
* Locate a poppler companion tool (pdffonts, pdfimages, pdftoppm) used by the
|
||||
* emoji render gate. Mirrors resolvePdftotext's resolution order:
|
||||
* 1. $GSTACK_<TOOL>_BIN env override (e.g. GSTACK_PDFFONTS_BIN)
|
||||
* 2. PATH via Bun.which
|
||||
* 3. standard POSIX locations (Homebrew + distro)
|
||||
*
|
||||
* Returns null (does NOT throw) when the tool is missing — the emoji gate skips
|
||||
* cleanly rather than failing on a box without full poppler-utils.
|
||||
*/
|
||||
export function resolvePopplerTool(
|
||||
tool: "pdffonts" | "pdfimages" | "pdftoppm",
|
||||
env: NodeJS.ProcessEnv = process.env,
|
||||
): string | null {
|
||||
const override = resolveOverride(env[`GSTACK_${tool.toUpperCase()}_BIN`], env);
|
||||
if (override) return override;
|
||||
|
||||
const PATH = env.PATH ?? env.Path ?? "";
|
||||
const onPath = Bun.which(tool, { PATH });
|
||||
if (onPath) return onPath;
|
||||
|
||||
for (const dir of ["/opt/homebrew/bin", "/usr/local/bin", "/usr/bin"]) {
|
||||
const candidate = findExecutable(path.join(dir, tool));
|
||||
if (candidate) return candidate;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
function isExecutable(p: string): boolean {
|
||||
try {
|
||||
fs.accessSync(p, fs.constants.X_OK);
|
||||
|
||||
@@ -0,0 +1,154 @@
|
||||
/**
|
||||
* Emoji render gate — proves emoji code points render as real color glyphs in
|
||||
* the output PDF instead of .notdef tofu boxes (▯). This is the regression gate
|
||||
* for fix/make-pdf-emoji-tofu.
|
||||
*
|
||||
* Why not just check pdftotext? Because text extraction is a FALSE oracle for
|
||||
* emoji: Skia preserves the Unicode in the text cluster even when the displayed
|
||||
* glyph is .notdef, so pdftotext can report the emoji survived on a render that
|
||||
* actually drew tofu. Verified empirically on macOS — pdftotext extracts 😀
|
||||
* regardless of whether a color font was available.
|
||||
*
|
||||
* Two assertions that DO distinguish a real render from tofu:
|
||||
* 1. pdffonts shows an emoji family embedded in the PDF (the cascade selected
|
||||
* a real emoji font — AppleColorEmoji as Type 3 on macOS, NotoColorEmoji
|
||||
* on Linux). Missing-fallback => no emoji font embedded.
|
||||
* 2. pdftoppm rasterizes the page and we count saturated (colored) pixels.
|
||||
* A color-emoji render has hundreds (measured: ~1650 at 100dpi); a tofu
|
||||
* render is a monochrome black outline on white (~0 saturated). Tolerant
|
||||
* threshold, not an exact-pixel fixture diff, to dodge cross-platform AA
|
||||
* and font-version variance.
|
||||
*
|
||||
* Note: pdfimages -list is intentionally NOT used — macOS embeds color emoji as
|
||||
* Type 3 fonts, so pdfimages lists nothing even on a correct render.
|
||||
*
|
||||
* Gating: runs only when the compiled binary + browse + pdffonts + pdftoppm are
|
||||
* available AND a color-emoji font is installed for Chromium to fall back to.
|
||||
* Skipped cleanly otherwise (local dev before ./setup installs the font).
|
||||
*/
|
||||
|
||||
import { describe, expect, test } from "bun:test";
|
||||
import { execFileSync } from "node:child_process";
|
||||
import * as fs from "node:fs";
|
||||
import * as path from "node:path";
|
||||
|
||||
import { resolvePopplerTool } from "../../src/pdftotext";
|
||||
|
||||
const FIXTURE = path.resolve(__dirname, "../fixtures/emoji-gate.md");
|
||||
const ROOT = path.resolve(__dirname, "../../..");
|
||||
const PDF_BIN = path.join(ROOT, "make-pdf/dist/pdf");
|
||||
const BROWSE_BIN = path.join(ROOT, "browse/dist/browse");
|
||||
|
||||
// Saturated-pixel floor. Measured ~1650 at 100dpi for the fixture's color
|
||||
// emoji; a tofu render yields ~0. 200 sits well clear of both.
|
||||
const SATURATED_PIXEL_FLOOR = 200;
|
||||
// A pixel is "colored" when its max-min channel spread exceeds this. Black text,
|
||||
// gray rules, and white background all stay near 0; color emoji spike high.
|
||||
const SATURATION_DELTA = 40;
|
||||
|
||||
/** Is a color-emoji font available for Chromium to fall back to? */
|
||||
function emojiFontAvailable(): boolean {
|
||||
if (process.platform === "darwin") {
|
||||
return fs.existsSync("/System/Library/Fonts/Apple Color Emoji.ttc");
|
||||
}
|
||||
if (process.platform === "linux") {
|
||||
const fcMatch = Bun.which("fc-match");
|
||||
if (!fcMatch) return false;
|
||||
try {
|
||||
const out = execFileSync(
|
||||
fcMatch,
|
||||
["-f", "%{color}\n", ":lang=und-zsye:charset=1F600"],
|
||||
{ encoding: "utf8" },
|
||||
);
|
||||
return /true/i.test(out);
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
function prerequisitesAvailable(): { ok: true } | { ok: false; reason: string } {
|
||||
if (!fs.existsSync(PDF_BIN)) return { ok: false, reason: `make-pdf binary missing (${PDF_BIN}). Run bun run build.` };
|
||||
if (!fs.existsSync(BROWSE_BIN)) return { ok: false, reason: `browse binary missing (${BROWSE_BIN}).` };
|
||||
if (!fs.existsSync(FIXTURE)) return { ok: false, reason: `fixture missing (${FIXTURE}).` };
|
||||
if (!resolvePopplerTool("pdffonts")) return { ok: false, reason: "pdffonts not found (install poppler-utils)." };
|
||||
if (!resolvePopplerTool("pdftoppm")) return { ok: false, reason: "pdftoppm not found (install poppler-utils)." };
|
||||
if (!emojiFontAvailable()) return { ok: false, reason: "no color-emoji font installed; run ./setup (Linux) or install one." };
|
||||
return { ok: true };
|
||||
}
|
||||
|
||||
/** Count pixels in a P6 (binary) PPM whose RGB channel spread exceeds delta. */
|
||||
function countSaturatedPixels(ppmPath: string, delta: number): number {
|
||||
const b = fs.readFileSync(ppmPath);
|
||||
let i = 0;
|
||||
const token = (): string => {
|
||||
while (b[i] === 0x20 || b[i] === 0x0a || b[i] === 0x09 || b[i] === 0x0d) i++;
|
||||
const s = i;
|
||||
while (i < b.length && b[i] !== 0x20 && b[i] !== 0x0a && b[i] !== 0x09 && b[i] !== 0x0d) i++;
|
||||
return b.slice(s, i).toString("ascii");
|
||||
};
|
||||
const magic = token();
|
||||
if (magic !== "P6") throw new Error(`expected P6 PPM, got ${magic}`);
|
||||
const w = Number(token());
|
||||
const h = Number(token());
|
||||
token(); // maxval
|
||||
i++; // single whitespace byte after the maxval precedes the pixel block
|
||||
let sat = 0;
|
||||
const total = w * h;
|
||||
for (let p = 0; p < total; p++) {
|
||||
const o = i + p * 3;
|
||||
const r = b[o], g = b[o + 1], bl = b[o + 2];
|
||||
if (Math.max(r, g, bl) - Math.min(r, g, bl) > delta) sat++;
|
||||
}
|
||||
return sat;
|
||||
}
|
||||
|
||||
describe("emoji render gate", () => {
|
||||
const avail = prerequisitesAvailable();
|
||||
|
||||
test.skipIf(!avail.ok)("emoji render as color glyphs, not tofu", () => {
|
||||
if (!avail.ok) return; // type narrowing
|
||||
const outputPdf = `/tmp/make-pdf-emoji-gate-${process.pid}.pdf`;
|
||||
const ppmPrefix = `/tmp/make-pdf-emoji-gate-${process.pid}`;
|
||||
const ppmPath = `${ppmPrefix}.ppm`;
|
||||
try {
|
||||
execFileSync(PDF_BIN, ["generate", FIXTURE, outputPdf, "--quiet"], {
|
||||
encoding: "utf8",
|
||||
env: { ...process.env, BROWSE_BIN },
|
||||
stdio: ["ignore", "pipe", "pipe"],
|
||||
});
|
||||
expect(fs.existsSync(outputPdf)).toBe(true);
|
||||
|
||||
// 1. An emoji family must be embedded — the cascade found a real emoji
|
||||
// font instead of falling through to .notdef.
|
||||
const pdffonts = resolvePopplerTool("pdffonts")!;
|
||||
const fontList = execFileSync(pdffonts, [outputPdf], { encoding: "utf8" });
|
||||
if (!/emoji/i.test(fontList)) {
|
||||
process.stderr.write(`\n--- pdffonts ---\n${fontList}\n--- END ---\n`);
|
||||
}
|
||||
expect(/emoji/i.test(fontList)).toBe(true);
|
||||
|
||||
// 2. The page must actually rasterize to color, not a monochrome tofu box.
|
||||
const pdftoppm = resolvePopplerTool("pdftoppm")!;
|
||||
execFileSync(pdftoppm, ["-r", "100", "-singlefile", outputPdf, ppmPrefix], {
|
||||
stdio: ["ignore", "pipe", "pipe"],
|
||||
});
|
||||
expect(fs.existsSync(ppmPath)).toBe(true);
|
||||
const saturated = countSaturatedPixels(ppmPath, SATURATION_DELTA);
|
||||
if (saturated < SATURATED_PIXEL_FLOOR) {
|
||||
process.stderr.write(`\n[emoji-gate] saturated pixels: ${saturated} (floor ${SATURATED_PIXEL_FLOOR})\n`);
|
||||
}
|
||||
expect(saturated).toBeGreaterThanOrEqual(SATURATED_PIXEL_FLOOR);
|
||||
} finally {
|
||||
try { fs.unlinkSync(outputPdf); } catch { /* ignore */ }
|
||||
try { fs.unlinkSync(ppmPath); } catch { /* ignore */ }
|
||||
}
|
||||
}, 30000);
|
||||
|
||||
if (!avail.ok) {
|
||||
test("prerequisites check", () => {
|
||||
console.warn(`[skip] ${avail.reason}`);
|
||||
});
|
||||
}
|
||||
});
|
||||
Vendored
+12
@@ -0,0 +1,12 @@
|
||||
# Emoji rendering gate 😀
|
||||
|
||||
This fixture exists to prove that emoji code points render as real color
|
||||
glyphs in the output PDF, not as `.notdef` tofu boxes (▯).
|
||||
|
||||
Color emoji on one line: 😀 ❤️ 🚀 ✅ 💡
|
||||
|
||||
A variation-selector sequence (FE0F) renders color: ❤️ — the bare code point
|
||||
❤ is text-style. Both must come from a font in the cascade, never tofu.
|
||||
|
||||
Non-emoji Unicode (unchanged, regression guard): em dash —, times ×, arrow →,
|
||||
bullet •, ellipsis …
|
||||
Reference in New Issue
Block a user