From 371693dc3bda753398fc45f715ec894206e170c3 Mon Sep 17 00:00:00 2001 From: tdurieux Date: Wed, 6 May 2026 13:10:25 +0300 Subject: [PATCH] fix: end stream for files <= 150 bytes in resolveLfsPointer The LFS-pointer probe buffered up to 150 bytes before deciding whether to forward the blob or swap to the raw URL. For blobs that fit entirely in the probe, decide() ran from the source's end event and attached data/end listeners to an already-ended stream, so out.end() was never called. The response hung until upstream timed out and storage.write left an incomplete cached copy, which then forced a re-fetch on every subsequent read. Pass a sourceEnded flag through decide() and end the output directly when the source has already finished. Also skip the GitHub blob fetch when the tree size is already over MAX_FILE_SIZE, surfacing file_too_big instead of a translated 422. --- src/core/source/GitHubStream.ts | 33 +++++++++++++++++++++++++-------- 1 file changed, 25 insertions(+), 8 deletions(-) diff --git a/src/core/source/GitHubStream.ts b/src/core/source/GitHubStream.ts index 14436fb..603a7a0 100644 --- a/src/core/source/GitHubStream.ts +++ b/src/core/source/GitHubStream.ts @@ -15,6 +15,8 @@ import { octokit } from "../GitHubUtils"; import FileModel from "../model/files/files.model"; import { IFile } from "../model/files/files.types"; import { createLogger, serializeError } from "../logger"; +import config from "../../config"; + const logger = createLogger("gh-stream"); @@ -85,7 +87,7 @@ export default class GitHubStream extends GitHubBase { const PROBE_BYTES = 150; const LFS_PREFIX = "version https://git-lfs.github.com/spec/"; - const decide = (extra?: Buffer) => { + const decide = (extra?: Buffer, sourceEnded = false) => { if (decided) return; decided = true; const head = probe.toString( @@ -98,13 +100,17 @@ export default class GitHubStream extends GitHubBase { const lfsStream = this.downloadFileViaRaw(token, filePath); lfsStream.on("error", (err) => out.destroy(err)); lfsStream.pipe(out); - } else { - out.write(probe); - if (extra && extra.length) out.write(extra); - blobStream.on("data", (c) => out.write(c)); - blobStream.on("end", () => out.end()); - blobStream.on("error", (err) => out.destroy(err)); + return; } + out.write(probe); + if (extra && extra.length) out.write(extra); + if (sourceEnded) { + out.end(); + return; + } + blobStream.on("data", (c) => out.write(c)); + blobStream.on("end", () => out.end()); + blobStream.on("error", (err) => out.destroy(err)); }; blobStream.on("data", (chunk: Buffer) => { @@ -118,7 +124,7 @@ export default class GitHubStream extends GitHubBase { decide(chunk.slice(remaining)); } }); - blobStream.on("end", () => decide()); + blobStream.on("end", () => decide(undefined, true)); blobStream.on("error", (err) => { // Always propagate — pre-decision this is the only listener; once a // non-LFS decision is made, the inner branch attaches its own @@ -172,6 +178,17 @@ export default class GitHubStream extends GitHubBase { object: filePath, }); } + + // GitHub's blob API rejects blobs larger than 100 MB with HTTP 422. + // Skip the download entirely when the tree already tells us the file is + // over the cap, so we surface a clean `file_too_big` instead of paying + // the round-trip just to translate a 422. + if (expected.size != null && expected.size > config.MAX_FILE_SIZE) { + throw new AnonymousError("file_too_big", { + httpStatus: 413, + object: filePath, + }); + } const token = await this.data.getToken(); const blobStream = this.downloadFile(token, expected.sha); // If the blob is a Git LFS pointer, swap to a raw-URL fetch so the