fix: recognize LICENSE / COPYING / etc. as text files

istextorbinary returns null for filenames with no extension, and the
isTextFile() guard treated null as "not text" — so terms in LICENSE,
COPYING, AUTHORS, README (extensionless), CHANGELOG, NOTICE, and
similar conventional filenames went through the binary passthrough
in AnonymizeTransformer and were never anonymized.

Add a small whitelist of these names ahead of the istextorbinary call.

Fixes #493.
This commit is contained in:
tdurieux
2026-05-04 11:34:54 +02:00
parent f91db91cee
commit 4aeecd0fdb
2 changed files with 56 additions and 0 deletions
+26
View File
@@ -22,6 +22,29 @@ export function streamToString(stream: Readable): Promise<string> {
});
}
// Common conventional plaintext filenames that have no extension. The
// istextorbinary package returns null (unknown) for these, which our
// `=== true` check then treats as binary — so terms in LICENSE, COPYING,
// etc. silently went through unchanged (#493).
const KNOWN_TEXT_FILENAMES = new Set(
[
"license",
"licence",
"copying",
"copyright",
"authors",
"contributors",
"readme",
"changelog",
"changes",
"notice",
"install",
"todo",
"version",
"manifest",
]
);
export function isTextFile(filePath: string, content?: Buffer) {
const filename = basename(filePath);
const extensions = filename.split(".").reverse();
@@ -29,6 +52,9 @@ export function isTextFile(filePath: string, content?: Buffer) {
if (config.additionalExtensions.includes(extension)) {
return true;
}
if (KNOWN_TEXT_FILENAMES.has(filename.toLowerCase())) {
return true;
}
if (isText(filename)) {
return true;
}