mirror of
https://github.com/tdurieux/anonymous_github.git
synced 2026-05-15 22:48:00 +02:00
fix: recognize LICENSE / COPYING / etc. as text files
istextorbinary returns null for filenames with no extension, and the isTextFile() guard treated null as "not text" — so terms in LICENSE, COPYING, AUTHORS, README (extensionless), CHANGELOG, NOTICE, and similar conventional filenames went through the binary passthrough in AnonymizeTransformer and were never anonymized. Add a small whitelist of these names ahead of the istextorbinary call. Fixes #493.
This commit is contained in:
@@ -22,6 +22,29 @@ export function streamToString(stream: Readable): Promise<string> {
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Common conventional plaintext filenames that have no extension. The
|
||||||
|
// istextorbinary package returns null (unknown) for these, which our
|
||||||
|
// `=== true` check then treats as binary — so terms in LICENSE, COPYING,
|
||||||
|
// etc. silently went through unchanged (#493).
|
||||||
|
const KNOWN_TEXT_FILENAMES = new Set(
|
||||||
|
[
|
||||||
|
"license",
|
||||||
|
"licence",
|
||||||
|
"copying",
|
||||||
|
"copyright",
|
||||||
|
"authors",
|
||||||
|
"contributors",
|
||||||
|
"readme",
|
||||||
|
"changelog",
|
||||||
|
"changes",
|
||||||
|
"notice",
|
||||||
|
"install",
|
||||||
|
"todo",
|
||||||
|
"version",
|
||||||
|
"manifest",
|
||||||
|
]
|
||||||
|
);
|
||||||
|
|
||||||
export function isTextFile(filePath: string, content?: Buffer) {
|
export function isTextFile(filePath: string, content?: Buffer) {
|
||||||
const filename = basename(filePath);
|
const filename = basename(filePath);
|
||||||
const extensions = filename.split(".").reverse();
|
const extensions = filename.split(".").reverse();
|
||||||
@@ -29,6 +52,9 @@ export function isTextFile(filePath: string, content?: Buffer) {
|
|||||||
if (config.additionalExtensions.includes(extension)) {
|
if (config.additionalExtensions.includes(extension)) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
if (KNOWN_TEXT_FILENAMES.has(filename.toLowerCase())) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
if (isText(filename)) {
|
if (isText(filename)) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -0,0 +1,30 @@
|
|||||||
|
const { expect } = require("chai");
|
||||||
|
require("ts-node/register/transpile-only");
|
||||||
|
const { isTextFile } = require("../src/core/anonymize-utils");
|
||||||
|
|
||||||
|
describe("isTextFile", function () {
|
||||||
|
// #493 — istextorbinary returns null for files with no extension, so a
|
||||||
|
// bare LICENSE / COPYING / etc. used to be classified as binary and
|
||||||
|
// never anonymized. Whitelist the conventional plaintext filenames.
|
||||||
|
it("recognizes conventional no-extension plaintext filenames", function () {
|
||||||
|
expect(isTextFile("LICENSE")).to.equal(true);
|
||||||
|
expect(isTextFile("license")).to.equal(true);
|
||||||
|
expect(isTextFile("COPYING")).to.equal(true);
|
||||||
|
expect(isTextFile("AUTHORS")).to.equal(true);
|
||||||
|
expect(isTextFile("README")).to.equal(true);
|
||||||
|
expect(isTextFile("CHANGELOG")).to.equal(true);
|
||||||
|
expect(isTextFile("NOTICE")).to.equal(true);
|
||||||
|
expect(isTextFile("path/to/LICENSE")).to.equal(true);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("still recognizes well-known text extensions", function () {
|
||||||
|
expect(isTextFile("foo.txt")).to.equal(true);
|
||||||
|
expect(isTextFile("foo.md")).to.equal(true);
|
||||||
|
expect(isTextFile("foo.js")).to.equal(true);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("does not classify binary files as text", function () {
|
||||||
|
expect(isTextFile("foo.png")).to.equal(false);
|
||||||
|
expect(isTextFile("foo.zip")).to.equal(false);
|
||||||
|
});
|
||||||
|
});
|
||||||
Reference in New Issue
Block a user