From 4aeecd0fdb2dd91f1fbdf0fe63f22cfe7f2474bf Mon Sep 17 00:00:00 2001 From: tdurieux Date: Mon, 4 May 2026 11:34:54 +0200 Subject: [PATCH] fix: recognize LICENSE / COPYING / etc. as text files MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit istextorbinary returns null for filenames with no extension, and the isTextFile() guard treated null as "not text" — so terms in LICENSE, COPYING, AUTHORS, README (extensionless), CHANGELOG, NOTICE, and similar conventional filenames went through the binary passthrough in AnonymizeTransformer and were never anonymized. Add a small whitelist of these names ahead of the istextorbinary call. Fixes #493. --- src/core/anonymize-utils.ts | 26 ++++++++++++++++++++++++++ test/is-text-file.test.js | 30 ++++++++++++++++++++++++++++++ 2 files changed, 56 insertions(+) create mode 100644 test/is-text-file.test.js diff --git a/src/core/anonymize-utils.ts b/src/core/anonymize-utils.ts index 7e7ddb5..7d54e90 100644 --- a/src/core/anonymize-utils.ts +++ b/src/core/anonymize-utils.ts @@ -22,6 +22,29 @@ export function streamToString(stream: Readable): Promise { }); } +// Common conventional plaintext filenames that have no extension. The +// istextorbinary package returns null (unknown) for these, which our +// `=== true` check then treats as binary — so terms in LICENSE, COPYING, +// etc. silently went through unchanged (#493). +const KNOWN_TEXT_FILENAMES = new Set( + [ + "license", + "licence", + "copying", + "copyright", + "authors", + "contributors", + "readme", + "changelog", + "changes", + "notice", + "install", + "todo", + "version", + "manifest", + ] +); + export function isTextFile(filePath: string, content?: Buffer) { const filename = basename(filePath); const extensions = filename.split(".").reverse(); @@ -29,6 +52,9 @@ export function isTextFile(filePath: string, content?: Buffer) { if (config.additionalExtensions.includes(extension)) { return true; } + if (KNOWN_TEXT_FILENAMES.has(filename.toLowerCase())) { + return true; + } if (isText(filename)) { return true; } diff --git a/test/is-text-file.test.js b/test/is-text-file.test.js new file mode 100644 index 0000000..8c94dac --- /dev/null +++ b/test/is-text-file.test.js @@ -0,0 +1,30 @@ +const { expect } = require("chai"); +require("ts-node/register/transpile-only"); +const { isTextFile } = require("../src/core/anonymize-utils"); + +describe("isTextFile", function () { + // #493 — istextorbinary returns null for files with no extension, so a + // bare LICENSE / COPYING / etc. used to be classified as binary and + // never anonymized. Whitelist the conventional plaintext filenames. + it("recognizes conventional no-extension plaintext filenames", function () { + expect(isTextFile("LICENSE")).to.equal(true); + expect(isTextFile("license")).to.equal(true); + expect(isTextFile("COPYING")).to.equal(true); + expect(isTextFile("AUTHORS")).to.equal(true); + expect(isTextFile("README")).to.equal(true); + expect(isTextFile("CHANGELOG")).to.equal(true); + expect(isTextFile("NOTICE")).to.equal(true); + expect(isTextFile("path/to/LICENSE")).to.equal(true); + }); + + it("still recognizes well-known text extensions", function () { + expect(isTextFile("foo.txt")).to.equal(true); + expect(isTextFile("foo.md")).to.equal(true); + expect(isTextFile("foo.js")).to.equal(true); + }); + + it("does not classify binary files as text", function () { + expect(isTextFile("foo.png")).to.equal(false); + expect(isTextFile("foo.zip")).to.equal(false); + }); +});