improve binary file detection: content sniffing + jsonl support

Files like .jsonl that mime-types doesn't know fell through to
application/octet-stream and rendered as "Unsupported binary file" in
the viewer. Replace istextorbinary with isbinaryfile for content-based
detection, and use mime-types for name-based classification with a
textual application/* allowlist.

The streaming transformer now defers classification when the name is
inconclusive and sniffs the first chunk before emitting "transform",
so route.ts and AnonymizedFile.ts get a content-aware Content-Type.
Whitelists .jsonl and .ndjson to short-circuit dataset files.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
tdurieux
2026-05-06 07:52:48 +03:00
parent 18ce39e019
commit 79f555769d
6 changed files with 154 additions and 158 deletions
+18 -109
View File
@@ -28,7 +28,7 @@
"express-slow-down": "^2.0.1",
"got": "^11.8.6",
"inquirer": "^8.2.6",
"istextorbinary": "^9.5.0",
"isbinaryfile": "^6.0.0",
"marked": "^5.1.2",
"mime-types": "^2.1.35",
"mongoose": "^7.6.10",
@@ -8150,20 +8150,6 @@
"url": "https://github.com/sponsors/sindresorhus"
}
},
"node_modules/binaryextensions": {
"version": "6.11.0",
"resolved": "https://registry.npmjs.org/binaryextensions/-/binaryextensions-6.11.0.tgz",
"integrity": "sha512-sXnYK/Ij80TO3lcqZVV2YgfKN5QjUWIRk/XSm2J/4bd/lPko3lvk0O4ZppH6m+6hB2/GTu+ptNwVFe1xh+QLQw==",
"dependencies": {
"editions": "^6.21.0"
},
"engines": {
"node": ">=4"
},
"funding": {
"url": "https://bevry.me/fund"
}
},
"node_modules/bl": {
"version": "4.1.0",
"resolved": "https://registry.npmjs.org/bl/-/bl-4.1.0.tgz",
@@ -9292,20 +9278,6 @@
"node": ">=0.10.0"
}
},
"node_modules/editions": {
"version": "6.21.0",
"resolved": "https://registry.npmjs.org/editions/-/editions-6.21.0.tgz",
"integrity": "sha512-ofkXJtn7z0urokN62DI3SBo/5xAtF0rR7tn+S/bSYV79Ka8pTajIIl+fFQ1q88DQEImymmo97M4azY3WX/nUdg==",
"dependencies": {
"version-range": "^4.13.0"
},
"engines": {
"node": ">=4"
},
"funding": {
"url": "https://bevry.me/fund"
}
},
"node_modules/ee-first": {
"version": "1.1.1",
"resolved": "https://registry.npmjs.org/ee-first/-/ee-first-1.1.1.tgz",
@@ -11378,6 +11350,18 @@
"resolved": "https://registry.npmjs.org/isarray/-/isarray-1.0.0.tgz",
"integrity": "sha512-VLghIWNM6ELQzo7zwmcg0NmTVyWKYjvIeM83yjp0wRDTmUnrM678fQbcKBo6n2CJEF0szoG//ytg+TKla89ALQ=="
},
"node_modules/isbinaryfile": {
"version": "6.0.0",
"resolved": "https://registry.npmjs.org/isbinaryfile/-/isbinaryfile-6.0.0.tgz",
"integrity": "sha512-2FN2B8MAqKv6d5TaKsLvMrwMcghxwHTpcKy0L5mhNbRqjNqo2++SpCqN6eG1lCC1GmTQgvrYJYXv2+Chvyevag==",
"license": "MIT",
"engines": {
"node": ">= 24.0.0"
},
"funding": {
"url": "https://github.com/sponsors/gjtorikian/"
}
},
"node_modules/isobject": {
"version": "3.0.1",
"resolved": "https://registry.npmjs.org/isobject/-/isobject-3.0.1.tgz",
@@ -11387,22 +11371,6 @@
"node": ">=0.10.0"
}
},
"node_modules/istextorbinary": {
"version": "9.5.0",
"resolved": "https://registry.npmjs.org/istextorbinary/-/istextorbinary-9.5.0.tgz",
"integrity": "sha512-5mbUj3SiZXCuRf9fT3ibzbSSEWiy63gFfksmGfdOzujPjW3k+z8WvIBxcJHBoQNlaZaiyB25deviif2+osLmLw==",
"dependencies": {
"binaryextensions": "^6.11.0",
"editions": "^6.21.0",
"textextensions": "^6.11.0"
},
"engines": {
"node": ">=4"
},
"funding": {
"url": "https://bevry.me/fund"
}
},
"node_modules/jiti": {
"version": "2.6.1",
"resolved": "https://registry.npmjs.org/jiti/-/jiti-2.6.1.tgz",
@@ -13910,20 +13878,6 @@
"streamx": "^2.12.5"
}
},
"node_modules/textextensions": {
"version": "6.11.0",
"resolved": "https://registry.npmjs.org/textextensions/-/textextensions-6.11.0.tgz",
"integrity": "sha512-tXJwSr9355kFJI3lbCkPpUH5cP8/M0GGy2xLO34aZCjMXBaK3SoPnZwr/oWmo1FdCnELcs4npdCIOFtq9W3ruQ==",
"dependencies": {
"editions": "^6.21.0"
},
"engines": {
"node": ">=4"
},
"funding": {
"url": "https://bevry.me/fund"
}
},
"node_modules/through": {
"version": "2.3.8",
"resolved": "https://registry.npmjs.org/through/-/through-2.3.8.tgz",
@@ -14392,17 +14346,6 @@
"node": ">= 0.8"
}
},
"node_modules/version-range": {
"version": "4.14.0",
"resolved": "https://registry.npmjs.org/version-range/-/version-range-4.14.0.tgz",
"integrity": "sha512-gjb0ARm9qlcBAonU4zPwkl9ecKkas+tC2CGwFfptTCWWIVTWY1YUbT2zZKsOAF1jR/tNxxyLwwG0cb42XlYcTg==",
"engines": {
"node": ">=4"
},
"funding": {
"url": "https://bevry.me/fund"
}
},
"node_modules/vinyl": {
"version": "3.0.0",
"resolved": "https://registry.npmjs.org/vinyl/-/vinyl-3.0.0.tgz",
@@ -20607,14 +20550,6 @@
"integrity": "sha512-Ceh+7ox5qe7LJuLHoY0feh3pHuUDHAcRUeyL2VYghZwfpkNIy/+8Ocg0a3UuSoYzavmylwuLWQOf3hl0jjMMIw==",
"dev": true
},
"binaryextensions": {
"version": "6.11.0",
"resolved": "https://registry.npmjs.org/binaryextensions/-/binaryextensions-6.11.0.tgz",
"integrity": "sha512-sXnYK/Ij80TO3lcqZVV2YgfKN5QjUWIRk/XSm2J/4bd/lPko3lvk0O4ZppH6m+6hB2/GTu+ptNwVFe1xh+QLQw==",
"requires": {
"editions": "^6.21.0"
}
},
"bl": {
"version": "4.1.0",
"resolved": "https://registry.npmjs.org/bl/-/bl-4.1.0.tgz",
@@ -21431,14 +21366,6 @@
}
}
},
"editions": {
"version": "6.21.0",
"resolved": "https://registry.npmjs.org/editions/-/editions-6.21.0.tgz",
"integrity": "sha512-ofkXJtn7z0urokN62DI3SBo/5xAtF0rR7tn+S/bSYV79Ka8pTajIIl+fFQ1q88DQEImymmo97M4azY3WX/nUdg==",
"requires": {
"version-range": "^4.13.0"
}
},
"ee-first": {
"version": "1.1.1",
"resolved": "https://registry.npmjs.org/ee-first/-/ee-first-1.1.1.tgz",
@@ -22919,22 +22846,17 @@
"resolved": "https://registry.npmjs.org/isarray/-/isarray-1.0.0.tgz",
"integrity": "sha512-VLghIWNM6ELQzo7zwmcg0NmTVyWKYjvIeM83yjp0wRDTmUnrM678fQbcKBo6n2CJEF0szoG//ytg+TKla89ALQ=="
},
"isbinaryfile": {
"version": "6.0.0",
"resolved": "https://registry.npmjs.org/isbinaryfile/-/isbinaryfile-6.0.0.tgz",
"integrity": "sha512-2FN2B8MAqKv6d5TaKsLvMrwMcghxwHTpcKy0L5mhNbRqjNqo2++SpCqN6eG1lCC1GmTQgvrYJYXv2+Chvyevag=="
},
"isobject": {
"version": "3.0.1",
"resolved": "https://registry.npmjs.org/isobject/-/isobject-3.0.1.tgz",
"integrity": "sha512-WhB9zCku7EGTj/HQQRz5aUQEUeoQZH2bWcltRErOpymJ4boYE6wL9Tbr23krRPSZ+C5zqNSrSw+Cc7sZZ4b7vg==",
"dev": true
},
"istextorbinary": {
"version": "9.5.0",
"resolved": "https://registry.npmjs.org/istextorbinary/-/istextorbinary-9.5.0.tgz",
"integrity": "sha512-5mbUj3SiZXCuRf9fT3ibzbSSEWiy63gFfksmGfdOzujPjW3k+z8WvIBxcJHBoQNlaZaiyB25deviif2+osLmLw==",
"requires": {
"binaryextensions": "^6.11.0",
"editions": "^6.21.0",
"textextensions": "^6.11.0"
}
},
"jiti": {
"version": "2.6.1",
"resolved": "https://registry.npmjs.org/jiti/-/jiti-2.6.1.tgz",
@@ -24719,14 +24641,6 @@
"streamx": "^2.12.5"
}
},
"textextensions": {
"version": "6.11.0",
"resolved": "https://registry.npmjs.org/textextensions/-/textextensions-6.11.0.tgz",
"integrity": "sha512-tXJwSr9355kFJI3lbCkPpUH5cP8/M0GGy2xLO34aZCjMXBaK3SoPnZwr/oWmo1FdCnELcs4npdCIOFtq9W3ruQ==",
"requires": {
"editions": "^6.21.0"
}
},
"through": {
"version": "2.3.8",
"resolved": "https://registry.npmjs.org/through/-/through-2.3.8.tgz",
@@ -25046,11 +24960,6 @@
"resolved": "https://registry.npmjs.org/vary/-/vary-1.1.2.tgz",
"integrity": "sha512-BNGbWLfd0eUPabhkXUVm0j8uuvREyTh5ovRa/dyow/BqAbZJyC+5fU+IzQOzmAKzYqYRAISoRhdQr3eIZ/PXqg=="
},
"version-range": {
"version": "4.14.0",
"resolved": "https://registry.npmjs.org/version-range/-/version-range-4.14.0.tgz",
"integrity": "sha512-gjb0ARm9qlcBAonU4zPwkl9ecKkas+tC2CGwFfptTCWWIVTWY1YUbT2zZKsOAF1jR/tNxxyLwwG0cb42XlYcTg=="
},
"vinyl": {
"version": "3.0.0",
"resolved": "https://registry.npmjs.org/vinyl/-/vinyl-3.0.0.tgz",