mirror of
https://github.com/tdurieux/anonymous_github.git
synced 2026-05-15 22:48:00 +02:00
improve binary file detection: content sniffing + jsonl support
Files like .jsonl that mime-types doesn't know fell through to application/octet-stream and rendered as "Unsupported binary file" in the viewer. Replace istextorbinary with isbinaryfile for content-based detection, and use mime-types for name-based classification with a textual application/* allowlist. The streaming transformer now defers classification when the name is inconclusive and sniffs the first chunk before emitting "transform", so route.ts and AnonymizedFile.ts get a content-aware Content-Type. Whitelists .jsonl and .ndjson to short-circuit dataset files. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
Generated
+18
-109
@@ -28,7 +28,7 @@
|
||||
"express-slow-down": "^2.0.1",
|
||||
"got": "^11.8.6",
|
||||
"inquirer": "^8.2.6",
|
||||
"istextorbinary": "^9.5.0",
|
||||
"isbinaryfile": "^6.0.0",
|
||||
"marked": "^5.1.2",
|
||||
"mime-types": "^2.1.35",
|
||||
"mongoose": "^7.6.10",
|
||||
@@ -8150,20 +8150,6 @@
|
||||
"url": "https://github.com/sponsors/sindresorhus"
|
||||
}
|
||||
},
|
||||
"node_modules/binaryextensions": {
|
||||
"version": "6.11.0",
|
||||
"resolved": "https://registry.npmjs.org/binaryextensions/-/binaryextensions-6.11.0.tgz",
|
||||
"integrity": "sha512-sXnYK/Ij80TO3lcqZVV2YgfKN5QjUWIRk/XSm2J/4bd/lPko3lvk0O4ZppH6m+6hB2/GTu+ptNwVFe1xh+QLQw==",
|
||||
"dependencies": {
|
||||
"editions": "^6.21.0"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=4"
|
||||
},
|
||||
"funding": {
|
||||
"url": "https://bevry.me/fund"
|
||||
}
|
||||
},
|
||||
"node_modules/bl": {
|
||||
"version": "4.1.0",
|
||||
"resolved": "https://registry.npmjs.org/bl/-/bl-4.1.0.tgz",
|
||||
@@ -9292,20 +9278,6 @@
|
||||
"node": ">=0.10.0"
|
||||
}
|
||||
},
|
||||
"node_modules/editions": {
|
||||
"version": "6.21.0",
|
||||
"resolved": "https://registry.npmjs.org/editions/-/editions-6.21.0.tgz",
|
||||
"integrity": "sha512-ofkXJtn7z0urokN62DI3SBo/5xAtF0rR7tn+S/bSYV79Ka8pTajIIl+fFQ1q88DQEImymmo97M4azY3WX/nUdg==",
|
||||
"dependencies": {
|
||||
"version-range": "^4.13.0"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=4"
|
||||
},
|
||||
"funding": {
|
||||
"url": "https://bevry.me/fund"
|
||||
}
|
||||
},
|
||||
"node_modules/ee-first": {
|
||||
"version": "1.1.1",
|
||||
"resolved": "https://registry.npmjs.org/ee-first/-/ee-first-1.1.1.tgz",
|
||||
@@ -11378,6 +11350,18 @@
|
||||
"resolved": "https://registry.npmjs.org/isarray/-/isarray-1.0.0.tgz",
|
||||
"integrity": "sha512-VLghIWNM6ELQzo7zwmcg0NmTVyWKYjvIeM83yjp0wRDTmUnrM678fQbcKBo6n2CJEF0szoG//ytg+TKla89ALQ=="
|
||||
},
|
||||
"node_modules/isbinaryfile": {
|
||||
"version": "6.0.0",
|
||||
"resolved": "https://registry.npmjs.org/isbinaryfile/-/isbinaryfile-6.0.0.tgz",
|
||||
"integrity": "sha512-2FN2B8MAqKv6d5TaKsLvMrwMcghxwHTpcKy0L5mhNbRqjNqo2++SpCqN6eG1lCC1GmTQgvrYJYXv2+Chvyevag==",
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">= 24.0.0"
|
||||
},
|
||||
"funding": {
|
||||
"url": "https://github.com/sponsors/gjtorikian/"
|
||||
}
|
||||
},
|
||||
"node_modules/isobject": {
|
||||
"version": "3.0.1",
|
||||
"resolved": "https://registry.npmjs.org/isobject/-/isobject-3.0.1.tgz",
|
||||
@@ -11387,22 +11371,6 @@
|
||||
"node": ">=0.10.0"
|
||||
}
|
||||
},
|
||||
"node_modules/istextorbinary": {
|
||||
"version": "9.5.0",
|
||||
"resolved": "https://registry.npmjs.org/istextorbinary/-/istextorbinary-9.5.0.tgz",
|
||||
"integrity": "sha512-5mbUj3SiZXCuRf9fT3ibzbSSEWiy63gFfksmGfdOzujPjW3k+z8WvIBxcJHBoQNlaZaiyB25deviif2+osLmLw==",
|
||||
"dependencies": {
|
||||
"binaryextensions": "^6.11.0",
|
||||
"editions": "^6.21.0",
|
||||
"textextensions": "^6.11.0"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=4"
|
||||
},
|
||||
"funding": {
|
||||
"url": "https://bevry.me/fund"
|
||||
}
|
||||
},
|
||||
"node_modules/jiti": {
|
||||
"version": "2.6.1",
|
||||
"resolved": "https://registry.npmjs.org/jiti/-/jiti-2.6.1.tgz",
|
||||
@@ -13910,20 +13878,6 @@
|
||||
"streamx": "^2.12.5"
|
||||
}
|
||||
},
|
||||
"node_modules/textextensions": {
|
||||
"version": "6.11.0",
|
||||
"resolved": "https://registry.npmjs.org/textextensions/-/textextensions-6.11.0.tgz",
|
||||
"integrity": "sha512-tXJwSr9355kFJI3lbCkPpUH5cP8/M0GGy2xLO34aZCjMXBaK3SoPnZwr/oWmo1FdCnELcs4npdCIOFtq9W3ruQ==",
|
||||
"dependencies": {
|
||||
"editions": "^6.21.0"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=4"
|
||||
},
|
||||
"funding": {
|
||||
"url": "https://bevry.me/fund"
|
||||
}
|
||||
},
|
||||
"node_modules/through": {
|
||||
"version": "2.3.8",
|
||||
"resolved": "https://registry.npmjs.org/through/-/through-2.3.8.tgz",
|
||||
@@ -14392,17 +14346,6 @@
|
||||
"node": ">= 0.8"
|
||||
}
|
||||
},
|
||||
"node_modules/version-range": {
|
||||
"version": "4.14.0",
|
||||
"resolved": "https://registry.npmjs.org/version-range/-/version-range-4.14.0.tgz",
|
||||
"integrity": "sha512-gjb0ARm9qlcBAonU4zPwkl9ecKkas+tC2CGwFfptTCWWIVTWY1YUbT2zZKsOAF1jR/tNxxyLwwG0cb42XlYcTg==",
|
||||
"engines": {
|
||||
"node": ">=4"
|
||||
},
|
||||
"funding": {
|
||||
"url": "https://bevry.me/fund"
|
||||
}
|
||||
},
|
||||
"node_modules/vinyl": {
|
||||
"version": "3.0.0",
|
||||
"resolved": "https://registry.npmjs.org/vinyl/-/vinyl-3.0.0.tgz",
|
||||
@@ -20607,14 +20550,6 @@
|
||||
"integrity": "sha512-Ceh+7ox5qe7LJuLHoY0feh3pHuUDHAcRUeyL2VYghZwfpkNIy/+8Ocg0a3UuSoYzavmylwuLWQOf3hl0jjMMIw==",
|
||||
"dev": true
|
||||
},
|
||||
"binaryextensions": {
|
||||
"version": "6.11.0",
|
||||
"resolved": "https://registry.npmjs.org/binaryextensions/-/binaryextensions-6.11.0.tgz",
|
||||
"integrity": "sha512-sXnYK/Ij80TO3lcqZVV2YgfKN5QjUWIRk/XSm2J/4bd/lPko3lvk0O4ZppH6m+6hB2/GTu+ptNwVFe1xh+QLQw==",
|
||||
"requires": {
|
||||
"editions": "^6.21.0"
|
||||
}
|
||||
},
|
||||
"bl": {
|
||||
"version": "4.1.0",
|
||||
"resolved": "https://registry.npmjs.org/bl/-/bl-4.1.0.tgz",
|
||||
@@ -21431,14 +21366,6 @@
|
||||
}
|
||||
}
|
||||
},
|
||||
"editions": {
|
||||
"version": "6.21.0",
|
||||
"resolved": "https://registry.npmjs.org/editions/-/editions-6.21.0.tgz",
|
||||
"integrity": "sha512-ofkXJtn7z0urokN62DI3SBo/5xAtF0rR7tn+S/bSYV79Ka8pTajIIl+fFQ1q88DQEImymmo97M4azY3WX/nUdg==",
|
||||
"requires": {
|
||||
"version-range": "^4.13.0"
|
||||
}
|
||||
},
|
||||
"ee-first": {
|
||||
"version": "1.1.1",
|
||||
"resolved": "https://registry.npmjs.org/ee-first/-/ee-first-1.1.1.tgz",
|
||||
@@ -22919,22 +22846,17 @@
|
||||
"resolved": "https://registry.npmjs.org/isarray/-/isarray-1.0.0.tgz",
|
||||
"integrity": "sha512-VLghIWNM6ELQzo7zwmcg0NmTVyWKYjvIeM83yjp0wRDTmUnrM678fQbcKBo6n2CJEF0szoG//ytg+TKla89ALQ=="
|
||||
},
|
||||
"isbinaryfile": {
|
||||
"version": "6.0.0",
|
||||
"resolved": "https://registry.npmjs.org/isbinaryfile/-/isbinaryfile-6.0.0.tgz",
|
||||
"integrity": "sha512-2FN2B8MAqKv6d5TaKsLvMrwMcghxwHTpcKy0L5mhNbRqjNqo2++SpCqN6eG1lCC1GmTQgvrYJYXv2+Chvyevag=="
|
||||
},
|
||||
"isobject": {
|
||||
"version": "3.0.1",
|
||||
"resolved": "https://registry.npmjs.org/isobject/-/isobject-3.0.1.tgz",
|
||||
"integrity": "sha512-WhB9zCku7EGTj/HQQRz5aUQEUeoQZH2bWcltRErOpymJ4boYE6wL9Tbr23krRPSZ+C5zqNSrSw+Cc7sZZ4b7vg==",
|
||||
"dev": true
|
||||
},
|
||||
"istextorbinary": {
|
||||
"version": "9.5.0",
|
||||
"resolved": "https://registry.npmjs.org/istextorbinary/-/istextorbinary-9.5.0.tgz",
|
||||
"integrity": "sha512-5mbUj3SiZXCuRf9fT3ibzbSSEWiy63gFfksmGfdOzujPjW3k+z8WvIBxcJHBoQNlaZaiyB25deviif2+osLmLw==",
|
||||
"requires": {
|
||||
"binaryextensions": "^6.11.0",
|
||||
"editions": "^6.21.0",
|
||||
"textextensions": "^6.11.0"
|
||||
}
|
||||
},
|
||||
"jiti": {
|
||||
"version": "2.6.1",
|
||||
"resolved": "https://registry.npmjs.org/jiti/-/jiti-2.6.1.tgz",
|
||||
@@ -24719,14 +24641,6 @@
|
||||
"streamx": "^2.12.5"
|
||||
}
|
||||
},
|
||||
"textextensions": {
|
||||
"version": "6.11.0",
|
||||
"resolved": "https://registry.npmjs.org/textextensions/-/textextensions-6.11.0.tgz",
|
||||
"integrity": "sha512-tXJwSr9355kFJI3lbCkPpUH5cP8/M0GGy2xLO34aZCjMXBaK3SoPnZwr/oWmo1FdCnELcs4npdCIOFtq9W3ruQ==",
|
||||
"requires": {
|
||||
"editions": "^6.21.0"
|
||||
}
|
||||
},
|
||||
"through": {
|
||||
"version": "2.3.8",
|
||||
"resolved": "https://registry.npmjs.org/through/-/through-2.3.8.tgz",
|
||||
@@ -25046,11 +24960,6 @@
|
||||
"resolved": "https://registry.npmjs.org/vary/-/vary-1.1.2.tgz",
|
||||
"integrity": "sha512-BNGbWLfd0eUPabhkXUVm0j8uuvREyTh5ovRa/dyow/BqAbZJyC+5fU+IzQOzmAKzYqYRAISoRhdQr3eIZ/PXqg=="
|
||||
},
|
||||
"version-range": {
|
||||
"version": "4.14.0",
|
||||
"resolved": "https://registry.npmjs.org/version-range/-/version-range-4.14.0.tgz",
|
||||
"integrity": "sha512-gjb0ARm9qlcBAonU4zPwkl9ecKkas+tC2CGwFfptTCWWIVTWY1YUbT2zZKsOAF1jR/tNxxyLwwG0cb42XlYcTg=="
|
||||
},
|
||||
"vinyl": {
|
||||
"version": "3.0.0",
|
||||
"resolved": "https://registry.npmjs.org/vinyl/-/vinyl-3.0.0.tgz",
|
||||
|
||||
+1
-1
@@ -51,7 +51,7 @@
|
||||
"express-slow-down": "^2.0.1",
|
||||
"got": "^11.8.6",
|
||||
"inquirer": "^8.2.6",
|
||||
"istextorbinary": "^9.5.0",
|
||||
"isbinaryfile": "^6.0.0",
|
||||
"marked": "^5.1.2",
|
||||
"mime-types": "^2.1.35",
|
||||
"mongoose": "^7.6.10",
|
||||
|
||||
@@ -69,6 +69,8 @@ const config: Config = {
|
||||
"out",
|
||||
"sol",
|
||||
"in",
|
||||
"jsonl",
|
||||
"ndjson",
|
||||
],
|
||||
STORAGE: "filesystem",
|
||||
STREAMER_ENTRYPOINT: null,
|
||||
|
||||
+104
-43
@@ -1,7 +1,8 @@
|
||||
import { basename } from "path";
|
||||
import { Transform, Readable } from "stream";
|
||||
import { StringDecoder } from "string_decoder";
|
||||
import { isText } from "istextorbinary";
|
||||
import { isBinaryFileSync } from "isbinaryfile";
|
||||
import { lookup as lookupMime } from "mime-types";
|
||||
|
||||
import config from "../config";
|
||||
import {
|
||||
@@ -22,47 +23,93 @@ export function streamToString(stream: Readable): Promise<string> {
|
||||
});
|
||||
}
|
||||
|
||||
// Common conventional plaintext filenames that have no extension. The
|
||||
// istextorbinary package returns null (unknown) for these, which our
|
||||
// `=== true` check then treats as binary — so terms in LICENSE, COPYING,
|
||||
// etc. silently went through unchanged (#493).
|
||||
const KNOWN_TEXT_FILENAMES = new Set(
|
||||
[
|
||||
"license",
|
||||
"licence",
|
||||
"copying",
|
||||
"copyright",
|
||||
"authors",
|
||||
"contributors",
|
||||
"readme",
|
||||
"changelog",
|
||||
"changes",
|
||||
"notice",
|
||||
"install",
|
||||
"todo",
|
||||
"version",
|
||||
"manifest",
|
||||
]
|
||||
);
|
||||
// Common conventional plaintext filenames that have no extension and no MIME
|
||||
// match. Without this whitelist a bare LICENSE / COPYING / etc. would fall
|
||||
// through to content sniffing, which is fine for non-empty files but breaks
|
||||
// on zero-byte ones — so we short-circuit them here (#493).
|
||||
const KNOWN_TEXT_FILENAMES = new Set([
|
||||
"license",
|
||||
"licence",
|
||||
"copying",
|
||||
"copyright",
|
||||
"authors",
|
||||
"contributors",
|
||||
"readme",
|
||||
"changelog",
|
||||
"changes",
|
||||
"notice",
|
||||
"install",
|
||||
"todo",
|
||||
"version",
|
||||
"manifest",
|
||||
]);
|
||||
|
||||
export function isTextFile(filePath: string, content?: Buffer) {
|
||||
const filename = basename(filePath);
|
||||
const extensions = filename.split(".").reverse();
|
||||
const extension = extensions[0].toLowerCase();
|
||||
if (config.additionalExtensions.includes(extension)) {
|
||||
return true;
|
||||
}
|
||||
if (KNOWN_TEXT_FILENAMES.has(filename.toLowerCase())) {
|
||||
return true;
|
||||
}
|
||||
if (isText(filename)) {
|
||||
return true;
|
||||
}
|
||||
return isText(filename, content);
|
||||
// Application/* MIME types that carry text payloads. text/* is always text,
|
||||
// application/* needs an allowlist (most are binary: zip, pdf, octet-stream).
|
||||
const TEXTUAL_APPLICATION_MIMES = new Set([
|
||||
"application/json",
|
||||
"application/ld+json",
|
||||
"application/xml",
|
||||
"application/javascript",
|
||||
"application/ecmascript",
|
||||
"application/typescript",
|
||||
"application/toml",
|
||||
"application/sql",
|
||||
"application/x-sql",
|
||||
"application/x-sh",
|
||||
"application/x-csh",
|
||||
"application/x-yaml",
|
||||
"application/yaml",
|
||||
"application/x-httpd-php",
|
||||
"application/graphql",
|
||||
"application/x-tex",
|
||||
"application/x-latex",
|
||||
"application/x-perl",
|
||||
"application/x-ruby",
|
||||
"application/x-python",
|
||||
]);
|
||||
|
||||
function isTextualMime(mime: string): boolean {
|
||||
if (mime.startsWith("text/")) return true;
|
||||
if (TEXTUAL_APPLICATION_MIMES.has(mime)) return true;
|
||||
// application/*+json, application/*+xml, application/*+yaml
|
||||
return /\+(json|xml|yaml)$/.test(mime);
|
||||
}
|
||||
|
||||
// Name-only classification: returns true (known text), false (known binary),
|
||||
// or null when the name alone is inconclusive. The streaming transformer
|
||||
// resolves null by sniffing the first chunk with isbinaryfile.
|
||||
function classifyByName(filePath: string): boolean | null {
|
||||
const name = basename(filePath);
|
||||
const extension = name.split(".").reverse()[0].toLowerCase();
|
||||
if (config.additionalExtensions.includes(extension)) return true;
|
||||
if (KNOWN_TEXT_FILENAMES.has(name.toLowerCase())) return true;
|
||||
const mime = lookupMime(name);
|
||||
if (mime === false) return null;
|
||||
// mime-types treats `.ts` as video/mp2t; route.ts already special-cases it.
|
||||
// Prefer text for the ambiguous extension since it matches our typical use.
|
||||
if (extension === "ts") return true;
|
||||
return isTextualMime(mime);
|
||||
}
|
||||
|
||||
export function isTextFile(filePath: string, content?: Buffer): boolean {
|
||||
const byName = classifyByName(filePath);
|
||||
if (byName === true) return true;
|
||||
if (byName === false) return false;
|
||||
// Name was inconclusive — sniff the buffer if we have one. isbinaryfile
|
||||
// checks for null bytes / non-printable ratio in the first 512 bytes
|
||||
// and returns a decisive boolean.
|
||||
if (content && content.length > 0) return !isBinaryFileSync(content);
|
||||
return false;
|
||||
}
|
||||
|
||||
export class AnonymizeTransformer extends Transform {
|
||||
public isText: boolean;
|
||||
// Set in the constructor for known extensions; left null until the first
|
||||
// chunk arrives for unknown extensions, where it's resolved by sniffing.
|
||||
// Consumers of the "transform" event always see a resolved boolean — we
|
||||
// sniff before emitting.
|
||||
public isText!: boolean;
|
||||
private nameVerdict: boolean | null;
|
||||
anonimizer: ContentAnonimizer;
|
||||
private decoder = new StringDecoder("utf8");
|
||||
// Trailing decoded text held back between chunks so that terms, URLs, or
|
||||
@@ -84,11 +131,13 @@ export class AnonymizeTransformer extends Transform {
|
||||
} & ConstructorParameters<typeof ContentAnonimizer>[0]
|
||||
) {
|
||||
super();
|
||||
// isTextFile may return null for unknown extensions; treat unknown as
|
||||
// binary. Sniffing from chunk content is unsafe — split archives,
|
||||
// compressed blobs, etc. can have an ASCII-looking first 64 KB and get
|
||||
// misclassified as text, which then UTF-8-round-trips and corrupts them.
|
||||
this.isText = isTextFile(this.opt.filePath) === true;
|
||||
// Tri-state: name-based check returns true (known text), false (known
|
||||
// binary), or null (name inconclusive). For null we defer to a content
|
||||
// sniff on the first chunk in _transform — known binary extensions
|
||||
// (archives, compressed blobs, images) are resolved here and never
|
||||
// reach the sniff path (#493).
|
||||
this.nameVerdict = classifyByName(this.opt.filePath);
|
||||
if (this.nameVerdict !== null) this.isText = this.nameVerdict;
|
||||
this.anonimizer = new ContentAnonimizer(this.opt);
|
||||
}
|
||||
|
||||
@@ -105,6 +154,12 @@ export class AnonymizeTransformer extends Transform {
|
||||
}
|
||||
|
||||
_transform(chunk: Buffer, encoding: string, callback: () => void) {
|
||||
if (this.nameVerdict === null) {
|
||||
// Name didn't decide. isbinaryfile inspects the first 512 bytes for
|
||||
// null bytes and non-printable ratio and returns a decisive boolean.
|
||||
this.isText = chunk.length === 0 ? true : !isBinaryFileSync(chunk);
|
||||
this.nameVerdict = this.isText;
|
||||
}
|
||||
if (!this.isText) {
|
||||
this.emit("transform", {
|
||||
isText: this.isText,
|
||||
@@ -166,6 +221,12 @@ export class AnonymizeTransformer extends Transform {
|
||||
}
|
||||
|
||||
_flush(callback: () => void) {
|
||||
// Empty file with an unknown extension: no chunk arrived to trigger
|
||||
// sniffing. Treat as text — there's nothing to corrupt.
|
||||
if (this.nameVerdict === null) {
|
||||
this.isText = true;
|
||||
this.nameVerdict = true;
|
||||
}
|
||||
if (this.isText) {
|
||||
this.pending += this.decoder.end();
|
||||
if (this.pending) {
|
||||
|
||||
@@ -499,8 +499,9 @@ describe("ContentAnonimizer", function () {
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
// Mirror of isTextFile that relies on the file extension only — the real
|
||||
// impl additionally calls istextorbinary, but for these tests checking the
|
||||
// suffix is enough to demonstrate the constructor-vs-post-assignment bug.
|
||||
// impl additionally consults mime-types and isbinaryfile, but for these
|
||||
// tests checking the suffix is enough to demonstrate the
|
||||
// constructor-vs-post-assignment bug.
|
||||
function _isTextFileFromPath(filePath) {
|
||||
if (!filePath) return false;
|
||||
const ext = String(filePath).split(".").pop().toLowerCase();
|
||||
|
||||
@@ -3,9 +3,9 @@ require("ts-node/register/transpile-only");
|
||||
const { isTextFile } = require("../src/core/anonymize-utils");
|
||||
|
||||
describe("isTextFile", function () {
|
||||
// #493 — istextorbinary returns null for files with no extension, so a
|
||||
// bare LICENSE / COPYING / etc. used to be classified as binary and
|
||||
// never anonymized. Whitelist the conventional plaintext filenames.
|
||||
// #493 — bare LICENSE / COPYING / etc. have no extension and no MIME, so
|
||||
// we whitelist the conventional plaintext filenames to short-circuit them
|
||||
// before falling through to content sniffing (which fails on empty files).
|
||||
it("recognizes conventional no-extension plaintext filenames", function () {
|
||||
expect(isTextFile("LICENSE")).to.equal(true);
|
||||
expect(isTextFile("license")).to.equal(true);
|
||||
@@ -27,4 +27,27 @@ describe("isTextFile", function () {
|
||||
expect(isTextFile("foo.png")).to.equal(false);
|
||||
expect(isTextFile("foo.zip")).to.equal(false);
|
||||
});
|
||||
|
||||
it("recognizes jsonl-family dataset extensions", function () {
|
||||
expect(isTextFile("data.jsonl")).to.equal(true);
|
||||
expect(isTextFile("data.ndjson")).to.equal(true);
|
||||
});
|
||||
|
||||
it("falls back to content sniffing for unknown extensions", function () {
|
||||
expect(
|
||||
isTextFile("foo.unknown", Buffer.from("hello world\nline two\n", "utf8"))
|
||||
).to.equal(true);
|
||||
expect(
|
||||
isTextFile("foo.unknown", Buffer.from([0x00, 0x01, 0x02, 0x03, 0x00, 0x05]))
|
||||
).to.equal(false);
|
||||
const random = Buffer.alloc(512);
|
||||
for (let i = 0; i < random.length; i++) random[i] = (i * 31 + 7) % 32;
|
||||
expect(isTextFile("foo.unknown", random)).to.equal(false);
|
||||
});
|
||||
|
||||
it("does not let content sniffing override a known binary extension", function () {
|
||||
expect(
|
||||
isTextFile("foo.png", Buffer.from("plain ascii pretending to be a png"))
|
||||
).to.equal(false);
|
||||
});
|
||||
});
|
||||
|
||||
Reference in New Issue
Block a user