fix: recognize LICENSE / COPYING / etc. as text files

istextorbinary returns null for filenames with no extension, and the
isTextFile() guard treated null as "not text" — so terms in LICENSE,
COPYING, AUTHORS, README (extensionless), CHANGELOG, NOTICE, and
similar conventional filenames went through the binary passthrough
in AnonymizeTransformer and were never anonymized.

Add a small whitelist of these names ahead of the istextorbinary call.

Fixes #493.
This commit is contained in:
tdurieux
2026-05-04 11:34:54 +02:00
parent f91db91cee
commit 4aeecd0fdb
2 changed files with 56 additions and 0 deletions
+30
View File
@@ -0,0 +1,30 @@
const { expect } = require("chai");
require("ts-node/register/transpile-only");
const { isTextFile } = require("../src/core/anonymize-utils");
describe("isTextFile", function () {
// #493 — istextorbinary returns null for files with no extension, so a
// bare LICENSE / COPYING / etc. used to be classified as binary and
// never anonymized. Whitelist the conventional plaintext filenames.
it("recognizes conventional no-extension plaintext filenames", function () {
expect(isTextFile("LICENSE")).to.equal(true);
expect(isTextFile("license")).to.equal(true);
expect(isTextFile("COPYING")).to.equal(true);
expect(isTextFile("AUTHORS")).to.equal(true);
expect(isTextFile("README")).to.equal(true);
expect(isTextFile("CHANGELOG")).to.equal(true);
expect(isTextFile("NOTICE")).to.equal(true);
expect(isTextFile("path/to/LICENSE")).to.equal(true);
});
it("still recognizes well-known text extensions", function () {
expect(isTextFile("foo.txt")).to.equal(true);
expect(isTextFile("foo.md")).to.equal(true);
expect(isTextFile("foo.js")).to.equal(true);
});
it("does not classify binary files as text", function () {
expect(isTextFile("foo.png")).to.equal(false);
expect(isTextFile("foo.zip")).to.equal(false);
});
});