test(browse): regression tests for downloadFile cleanup + parsePdfFromFile guard

Covers PR #1169 bugs #6 and #7:

- security-classifier-download-cleanup.test.ts pins downloadFile error-path
  cleanup against three failure shapes: reader rejects mid-stream, non-2xx
  response, missing body. Asserts the dest file is not created and no
  <dest>.tmp.* siblings remain (glob-matched, not exact path — codex push:
  if the fix later switches to mkdtempSync, the assertion still holds).
  Includes a happy-path case so the cleanup isn't fighting a correct download.

- regression-pr1169-pdf-from-file-invalid-json.test.ts pins parsePdfFromFile
  to throw a helpful error for: invalid JSON, empty file, top-level array,
  top-level number, top-level string, top-level null, top-level boolean.
  Codex push: JSON.parse accepts primitives too, so Array.isArray + typeof
  guard must be tested separately from the JSON.parse try/catch.

Both files use mkdtempSync(process.cwd()/...) for fixture isolation since
SAFE_DIRECTORIES allows TEMP_DIR or cwd; cwd is universal across CI hosts.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Garry Tan
2026-05-17 20:29:08 -07:00
parent 206e114387
commit d3bc545268
2 changed files with 221 additions and 0 deletions
@@ -0,0 +1,83 @@
/**
* Regression test for PR #1169 bug #7 — `pdf --from-file` ran JSON.parse on
* user-supplied file contents with no try/catch. A malformed payload crashed
* the pdf handler with a raw SyntaxError. Codex flagged that JSON.parse
* accepts primitives too (numbers, strings, null) and Array.isArray must be
* checked separately, so the fix added an explicit object-shape gate.
*
* Test surface: parsePdfFromFile, exported for tests at meta-commands.ts:139.
* All fixtures land in process.cwd() (SAFE_DIRECTORIES allows TEMP_DIR or cwd;
* cwd is universally safe on every platform our CI runs on).
*/
import { describe, expect, test, beforeAll, afterAll } from "bun:test";
import * as fs from "node:fs";
import * as path from "node:path";
import { parsePdfFromFile } from "../src/meta-commands";
const FIXTURE_DIR = fs.mkdtempSync(path.join(process.cwd(), "pr1169-pdf-"));
beforeAll(() => {
// mkdtempSync already created the dir
});
afterAll(() => {
fs.rmSync(FIXTURE_DIR, { recursive: true, force: true });
});
function writeFixture(name: string, body: string): string {
const p = path.join(FIXTURE_DIR, name);
fs.writeFileSync(p, body);
return p;
}
describe("parsePdfFromFile — invalid JSON regression (PR #1169 bug #7)", () => {
test("invalid JSON: throws with file path AND parser detail", () => {
const p = writeFixture("invalid.json", "{ not-json");
expect(() => parsePdfFromFile(p)).toThrow(/not valid JSON/);
expect(() => parsePdfFromFile(p)).toThrow(p);
});
test("empty file: throws JSON-parse style error", () => {
const p = writeFixture("empty.json", "");
// Empty string is invalid JSON per ECMA-404.
expect(() => parsePdfFromFile(p)).toThrow(/not valid JSON/);
});
test("top-level array: throws 'must be a JSON object' with type", () => {
const p = writeFixture("array.json", JSON.stringify(["a", "b"]));
expect(() => parsePdfFromFile(p)).toThrow(/must be a JSON object/);
expect(() => parsePdfFromFile(p)).toThrow(/array/);
});
test("top-level number: throws with 'number' type label", () => {
const p = writeFixture("number.json", "42");
expect(() => parsePdfFromFile(p)).toThrow(/must be a JSON object/);
expect(() => parsePdfFromFile(p)).toThrow(/number/);
});
test("top-level string: throws with 'string' type label", () => {
const p = writeFixture("string.json", JSON.stringify("hello"));
expect(() => parsePdfFromFile(p)).toThrow(/must be a JSON object/);
expect(() => parsePdfFromFile(p)).toThrow(/string/);
});
test("top-level null: throws with 'object' type label (JS null typeof === object)", () => {
const p = writeFixture("null.json", "null");
// null passes typeof === 'object' but the fix's `=== null` branch catches it.
expect(() => parsePdfFromFile(p)).toThrow(/must be a JSON object/);
});
test("top-level boolean: throws with 'boolean' type label", () => {
const p = writeFixture("bool.json", "true");
expect(() => parsePdfFromFile(p)).toThrow(/must be a JSON object/);
expect(() => parsePdfFromFile(p)).toThrow(/boolean/);
});
test("valid object: parses successfully (happy-path regression)", () => {
const p = writeFixture("valid.json", JSON.stringify({ format: "A4", pageNumbers: true }));
const result = parsePdfFromFile(p);
expect(result.format).toBe("A4");
expect(result.pageNumbers).toBe(true);
});
});
@@ -0,0 +1,138 @@
/**
* Regression test for PR #1169 bug #6 — downloadFile opened a WriteStream to
* `<dest>.tmp.<pid>` but never closed it on error paths. If the reader or
* writer threw mid-download, the FD leaked and the half-written tmp could
* be promoted by a retry's renameSync.
*
* The fix wraps the read loop in try/catch and runs `writer.destroy()` +
* `fs.unlinkSync(tmp)` before rethrowing.
*
* Per codex's pushback, this test must exercise BOTH the reader-throws path
* and the non-2xx-response path, and it must NOT assume the specific tmp
* filename — only that no `<dest>.tmp.*` sibling remains.
*/
import { describe, expect, test, beforeAll, afterAll, beforeEach, afterEach } from "bun:test";
import * as fs from "node:fs";
import * as path from "node:path";
import { downloadFile } from "../src/security-classifier";
function tmpSiblings(destDir: string, destBase: string): string[] {
if (!fs.existsSync(destDir)) return [];
return fs.readdirSync(destDir).filter((f) =>
f.startsWith(destBase + ".tmp.")
);
}
let FIXTURE_DIR = "";
let originalFetch: typeof fetch;
beforeAll(() => {
FIXTURE_DIR = fs.mkdtempSync(path.join(process.cwd(), "pr1169-dl-"));
});
afterAll(() => {
if (FIXTURE_DIR) {
fs.rmSync(FIXTURE_DIR, { recursive: true, force: true });
}
});
beforeEach(() => {
originalFetch = globalThis.fetch;
});
afterEach(() => {
globalThis.fetch = originalFetch;
});
describe("downloadFile error-path cleanup (PR #1169 bug #6)", () => {
test("reader rejects mid-stream: throws, no dest, no tmp sibling left", async () => {
const dest = path.join(FIXTURE_DIR, "reader-fail-model.bin");
const destDir = path.dirname(dest);
const destBase = path.basename(dest);
// Build a ReadableStream that emits one chunk then errors on second pull.
const body = new ReadableStream<Uint8Array>({
start(controller) {
controller.enqueue(new Uint8Array([1, 2, 3, 4]));
},
pull(controller) {
// Second pull triggers the failure path the fix protects against.
controller.error(new Error("simulated mid-stream read failure"));
},
});
// @ts-expect-error — overwrite global fetch for the test
globalThis.fetch = async () =>
new Response(body, { status: 200, statusText: "OK" });
await expect(downloadFile("https://example.com/model.bin", dest)).rejects.toThrow(
/simulated mid-stream read failure/
);
expect(fs.existsSync(dest)).toBe(false);
expect(tmpSiblings(destDir, destBase)).toEqual([]);
});
test("non-2xx response: throws with status, no tmp file created", async () => {
const dest = path.join(FIXTURE_DIR, "http500-model.bin");
const destDir = path.dirname(dest);
const destBase = path.basename(dest);
// @ts-expect-error — overwrite global fetch for the test
globalThis.fetch = async () =>
new Response("server boom", { status: 500, statusText: "Server Error" });
await expect(downloadFile("https://example.com/model.bin", dest)).rejects.toThrow(
/Failed to fetch.*500/
);
expect(fs.existsSync(dest)).toBe(false);
expect(tmpSiblings(destDir, destBase)).toEqual([]);
});
test("missing body: throws, no tmp file created", async () => {
const dest = path.join(FIXTURE_DIR, "nobody-model.bin");
const destDir = path.dirname(dest);
const destBase = path.basename(dest);
// Response with null body (some upstreams send this on edge errors).
// @ts-expect-error — overwrite global fetch for the test
globalThis.fetch = async () =>
new Response(null, { status: 200, statusText: "OK" });
await expect(downloadFile("https://example.com/model.bin", dest)).rejects.toThrow(
/Failed to fetch/
);
expect(fs.existsSync(dest)).toBe(false);
expect(tmpSiblings(destDir, destBase)).toEqual([]);
});
test("happy path: 2xx body completes, dest exists, no tmp sibling remains", async () => {
const dest = path.join(FIXTURE_DIR, "ok-model.bin");
const destDir = path.dirname(dest);
const destBase = path.basename(dest);
const body = new ReadableStream<Uint8Array>({
start(controller) {
controller.enqueue(new Uint8Array([9, 9, 9, 9]));
controller.close();
},
});
// @ts-expect-error — overwrite global fetch for the test
globalThis.fetch = async () =>
new Response(body, { status: 200, statusText: "OK" });
await downloadFile("https://example.com/model.bin", dest);
expect(fs.existsSync(dest)).toBe(true);
expect(tmpSiblings(destDir, destBase)).toEqual([]);
const written = fs.readFileSync(dest);
expect(Array.from(written)).toEqual([9, 9, 9, 9]);
fs.unlinkSync(dest);
});
});