fix: end stream for files <= 150 bytes in resolveLfsPointer

The LFS-pointer probe buffered up to 150 bytes before deciding whether
to forward the blob or swap to the raw URL. For blobs that fit entirely
in the probe, decide() ran from the source's end event and attached
data/end listeners to an already-ended stream, so out.end() was never
called. The response hung until upstream timed out and storage.write
left an incomplete cached copy, which then forced a re-fetch on every
subsequent read.

Pass a sourceEnded flag through decide() and end the output directly
when the source has already finished. Also skip the GitHub blob fetch
when the tree size is already over MAX_FILE_SIZE, surfacing
file_too_big instead of a translated 422.
This commit is contained in:
tdurieux
2026-05-06 13:10:25 +03:00
parent 7dd6d872e9
commit 371693dc3b
+25 -8
View File
@@ -15,6 +15,8 @@ import { octokit } from "../GitHubUtils";
import FileModel from "../model/files/files.model";
import { IFile } from "../model/files/files.types";
import { createLogger, serializeError } from "../logger";
import config from "../../config";
const logger = createLogger("gh-stream");
@@ -85,7 +87,7 @@ export default class GitHubStream extends GitHubBase {
const PROBE_BYTES = 150;
const LFS_PREFIX = "version https://git-lfs.github.com/spec/";
const decide = (extra?: Buffer) => {
const decide = (extra?: Buffer, sourceEnded = false) => {
if (decided) return;
decided = true;
const head = probe.toString(
@@ -98,13 +100,17 @@ export default class GitHubStream extends GitHubBase {
const lfsStream = this.downloadFileViaRaw(token, filePath);
lfsStream.on("error", (err) => out.destroy(err));
lfsStream.pipe(out);
} else {
out.write(probe);
if (extra && extra.length) out.write(extra);
blobStream.on("data", (c) => out.write(c));
blobStream.on("end", () => out.end());
blobStream.on("error", (err) => out.destroy(err));
return;
}
out.write(probe);
if (extra && extra.length) out.write(extra);
if (sourceEnded) {
out.end();
return;
}
blobStream.on("data", (c) => out.write(c));
blobStream.on("end", () => out.end());
blobStream.on("error", (err) => out.destroy(err));
};
blobStream.on("data", (chunk: Buffer) => {
@@ -118,7 +124,7 @@ export default class GitHubStream extends GitHubBase {
decide(chunk.slice(remaining));
}
});
blobStream.on("end", () => decide());
blobStream.on("end", () => decide(undefined, true));
blobStream.on("error", (err) => {
// Always propagate — pre-decision this is the only listener; once a
// non-LFS decision is made, the inner branch attaches its own
@@ -172,6 +178,17 @@ export default class GitHubStream extends GitHubBase {
object: filePath,
});
}
// GitHub's blob API rejects blobs larger than 100 MB with HTTP 422.
// Skip the download entirely when the tree already tells us the file is
// over the cap, so we surface a clean `file_too_big` instead of paying
// the round-trip just to translate a 422.
if (expected.size != null && expected.size > config.MAX_FILE_SIZE) {
throw new AnonymousError("file_too_big", {
httpStatus: 413,
object: filePath,
});
}
const token = await this.data.getToken();
const blobStream = this.downloadFile(token, expected.sha);
// If the blob is a Git LFS pointer, swap to a raw-URL fetch so the