From d3bc54526828d01a861d74ce4c0cd9a8a22de56d Mon Sep 17 00:00:00 2001 From: Garry Tan Date: Sun, 17 May 2026 20:29:08 -0700 Subject: [PATCH] test(browse): regression tests for downloadFile cleanup + parsePdfFromFile guard MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Covers PR #1169 bugs #6 and #7: - security-classifier-download-cleanup.test.ts pins downloadFile error-path cleanup against three failure shapes: reader rejects mid-stream, non-2xx response, missing body. Asserts the dest file is not created and no .tmp.* siblings remain (glob-matched, not exact path — codex push: if the fix later switches to mkdtempSync, the assertion still holds). Includes a happy-path case so the cleanup isn't fighting a correct download. - regression-pr1169-pdf-from-file-invalid-json.test.ts pins parsePdfFromFile to throw a helpful error for: invalid JSON, empty file, top-level array, top-level number, top-level string, top-level null, top-level boolean. Codex push: JSON.parse accepts primitives too, so Array.isArray + typeof guard must be tested separately from the JSON.parse try/catch. Both files use mkdtempSync(process.cwd()/...) for fixture isolation since SAFE_DIRECTORIES allows TEMP_DIR or cwd; cwd is universal across CI hosts. Co-Authored-By: Claude Opus 4.7 (1M context) --- ...-pr1169-pdf-from-file-invalid-json.test.ts | 83 +++++++++++ ...curity-classifier-download-cleanup.test.ts | 138 ++++++++++++++++++ 2 files changed, 221 insertions(+) create mode 100644 browse/test/regression-pr1169-pdf-from-file-invalid-json.test.ts create mode 100644 browse/test/security-classifier-download-cleanup.test.ts diff --git a/browse/test/regression-pr1169-pdf-from-file-invalid-json.test.ts b/browse/test/regression-pr1169-pdf-from-file-invalid-json.test.ts new file mode 100644 index 000000000..834bce078 --- /dev/null +++ b/browse/test/regression-pr1169-pdf-from-file-invalid-json.test.ts @@ -0,0 +1,83 @@ +/** + * Regression test for PR #1169 bug #7 — `pdf --from-file` ran JSON.parse on + * user-supplied file contents with no try/catch. A malformed payload crashed + * the pdf handler with a raw SyntaxError. Codex flagged that JSON.parse + * accepts primitives too (numbers, strings, null) and Array.isArray must be + * checked separately, so the fix added an explicit object-shape gate. + * + * Test surface: parsePdfFromFile, exported for tests at meta-commands.ts:139. + * All fixtures land in process.cwd() (SAFE_DIRECTORIES allows TEMP_DIR or cwd; + * cwd is universally safe on every platform our CI runs on). + */ +import { describe, expect, test, beforeAll, afterAll } from "bun:test"; +import * as fs from "node:fs"; +import * as path from "node:path"; + +import { parsePdfFromFile } from "../src/meta-commands"; + +const FIXTURE_DIR = fs.mkdtempSync(path.join(process.cwd(), "pr1169-pdf-")); + +beforeAll(() => { + // mkdtempSync already created the dir +}); + +afterAll(() => { + fs.rmSync(FIXTURE_DIR, { recursive: true, force: true }); +}); + +function writeFixture(name: string, body: string): string { + const p = path.join(FIXTURE_DIR, name); + fs.writeFileSync(p, body); + return p; +} + +describe("parsePdfFromFile — invalid JSON regression (PR #1169 bug #7)", () => { + test("invalid JSON: throws with file path AND parser detail", () => { + const p = writeFixture("invalid.json", "{ not-json"); + expect(() => parsePdfFromFile(p)).toThrow(/not valid JSON/); + expect(() => parsePdfFromFile(p)).toThrow(p); + }); + + test("empty file: throws JSON-parse style error", () => { + const p = writeFixture("empty.json", ""); + // Empty string is invalid JSON per ECMA-404. + expect(() => parsePdfFromFile(p)).toThrow(/not valid JSON/); + }); + + test("top-level array: throws 'must be a JSON object' with type", () => { + const p = writeFixture("array.json", JSON.stringify(["a", "b"])); + expect(() => parsePdfFromFile(p)).toThrow(/must be a JSON object/); + expect(() => parsePdfFromFile(p)).toThrow(/array/); + }); + + test("top-level number: throws with 'number' type label", () => { + const p = writeFixture("number.json", "42"); + expect(() => parsePdfFromFile(p)).toThrow(/must be a JSON object/); + expect(() => parsePdfFromFile(p)).toThrow(/number/); + }); + + test("top-level string: throws with 'string' type label", () => { + const p = writeFixture("string.json", JSON.stringify("hello")); + expect(() => parsePdfFromFile(p)).toThrow(/must be a JSON object/); + expect(() => parsePdfFromFile(p)).toThrow(/string/); + }); + + test("top-level null: throws with 'object' type label (JS null typeof === object)", () => { + const p = writeFixture("null.json", "null"); + // null passes typeof === 'object' but the fix's `=== null` branch catches it. + expect(() => parsePdfFromFile(p)).toThrow(/must be a JSON object/); + }); + + test("top-level boolean: throws with 'boolean' type label", () => { + const p = writeFixture("bool.json", "true"); + expect(() => parsePdfFromFile(p)).toThrow(/must be a JSON object/); + expect(() => parsePdfFromFile(p)).toThrow(/boolean/); + }); + + test("valid object: parses successfully (happy-path regression)", () => { + const p = writeFixture("valid.json", JSON.stringify({ format: "A4", pageNumbers: true })); + const result = parsePdfFromFile(p); + expect(result.format).toBe("A4"); + expect(result.pageNumbers).toBe(true); + }); +}); diff --git a/browse/test/security-classifier-download-cleanup.test.ts b/browse/test/security-classifier-download-cleanup.test.ts new file mode 100644 index 000000000..af82961f1 --- /dev/null +++ b/browse/test/security-classifier-download-cleanup.test.ts @@ -0,0 +1,138 @@ +/** + * Regression test for PR #1169 bug #6 — downloadFile opened a WriteStream to + * `.tmp.` but never closed it on error paths. If the reader or + * writer threw mid-download, the FD leaked and the half-written tmp could + * be promoted by a retry's renameSync. + * + * The fix wraps the read loop in try/catch and runs `writer.destroy()` + + * `fs.unlinkSync(tmp)` before rethrowing. + * + * Per codex's pushback, this test must exercise BOTH the reader-throws path + * and the non-2xx-response path, and it must NOT assume the specific tmp + * filename — only that no `.tmp.*` sibling remains. + */ +import { describe, expect, test, beforeAll, afterAll, beforeEach, afterEach } from "bun:test"; +import * as fs from "node:fs"; +import * as path from "node:path"; + +import { downloadFile } from "../src/security-classifier"; + +function tmpSiblings(destDir: string, destBase: string): string[] { + if (!fs.existsSync(destDir)) return []; + return fs.readdirSync(destDir).filter((f) => + f.startsWith(destBase + ".tmp.") + ); +} + +let FIXTURE_DIR = ""; +let originalFetch: typeof fetch; + +beforeAll(() => { + FIXTURE_DIR = fs.mkdtempSync(path.join(process.cwd(), "pr1169-dl-")); +}); + +afterAll(() => { + if (FIXTURE_DIR) { + fs.rmSync(FIXTURE_DIR, { recursive: true, force: true }); + } +}); + +beforeEach(() => { + originalFetch = globalThis.fetch; +}); + +afterEach(() => { + globalThis.fetch = originalFetch; +}); + +describe("downloadFile error-path cleanup (PR #1169 bug #6)", () => { + test("reader rejects mid-stream: throws, no dest, no tmp sibling left", async () => { + const dest = path.join(FIXTURE_DIR, "reader-fail-model.bin"); + const destDir = path.dirname(dest); + const destBase = path.basename(dest); + + // Build a ReadableStream that emits one chunk then errors on second pull. + const body = new ReadableStream({ + start(controller) { + controller.enqueue(new Uint8Array([1, 2, 3, 4])); + }, + pull(controller) { + // Second pull triggers the failure path the fix protects against. + controller.error(new Error("simulated mid-stream read failure")); + }, + }); + + // @ts-expect-error — overwrite global fetch for the test + globalThis.fetch = async () => + new Response(body, { status: 200, statusText: "OK" }); + + await expect(downloadFile("https://example.com/model.bin", dest)).rejects.toThrow( + /simulated mid-stream read failure/ + ); + + expect(fs.existsSync(dest)).toBe(false); + expect(tmpSiblings(destDir, destBase)).toEqual([]); + }); + + test("non-2xx response: throws with status, no tmp file created", async () => { + const dest = path.join(FIXTURE_DIR, "http500-model.bin"); + const destDir = path.dirname(dest); + const destBase = path.basename(dest); + + // @ts-expect-error — overwrite global fetch for the test + globalThis.fetch = async () => + new Response("server boom", { status: 500, statusText: "Server Error" }); + + await expect(downloadFile("https://example.com/model.bin", dest)).rejects.toThrow( + /Failed to fetch.*500/ + ); + + expect(fs.existsSync(dest)).toBe(false); + expect(tmpSiblings(destDir, destBase)).toEqual([]); + }); + + test("missing body: throws, no tmp file created", async () => { + const dest = path.join(FIXTURE_DIR, "nobody-model.bin"); + const destDir = path.dirname(dest); + const destBase = path.basename(dest); + + // Response with null body (some upstreams send this on edge errors). + // @ts-expect-error — overwrite global fetch for the test + globalThis.fetch = async () => + new Response(null, { status: 200, statusText: "OK" }); + + await expect(downloadFile("https://example.com/model.bin", dest)).rejects.toThrow( + /Failed to fetch/ + ); + + expect(fs.existsSync(dest)).toBe(false); + expect(tmpSiblings(destDir, destBase)).toEqual([]); + }); + + test("happy path: 2xx body completes, dest exists, no tmp sibling remains", async () => { + const dest = path.join(FIXTURE_DIR, "ok-model.bin"); + const destDir = path.dirname(dest); + const destBase = path.basename(dest); + + const body = new ReadableStream({ + start(controller) { + controller.enqueue(new Uint8Array([9, 9, 9, 9])); + controller.close(); + }, + }); + + // @ts-expect-error — overwrite global fetch for the test + globalThis.fetch = async () => + new Response(body, { status: 200, statusText: "OK" }); + + await downloadFile("https://example.com/model.bin", dest); + + expect(fs.existsSync(dest)).toBe(true); + expect(tmpSiblings(destDir, destBase)).toEqual([]); + const written = fs.readFileSync(dest); + expect(Array.from(written)).toEqual([9, 9, 9, 9]); + + fs.unlinkSync(dest); + }); +}); +