mirror of
https://github.com/tdurieux/anonymous_github.git
synced 2026-05-15 06:30:26 +02:00
feat: resolve Git LFS pointers via the raw URL endpoint
Files tracked by Git LFS used to come out as the pointer text:
version https://git-lfs.github.com/spec/v1
oid sha256:...
size ...
…because GitHub's blob API returns the pointer, not the resolved
content. Detect that prefix on the first ~150 bytes of the blob stream
and switch to a fresh fetch via the web raw URL
(github.com/<owner>/<repo>/raw/<commit>/<path>), which auto-redirects
to media.githubusercontent.com and resolves the LFS object — auth
header carries through. Non-LFS files are forwarded through the
existing pipeline unchanged.
Fixes #95.
This commit is contained in:
@@ -51,6 +51,77 @@ export default class GitHubStream extends GitHubBase {
|
||||
}
|
||||
}
|
||||
|
||||
// GitHub's web raw URL auto-resolves Git LFS pointers via redirect to
|
||||
// media.githubusercontent.com, with the auth header carried through. The
|
||||
// blob endpoint above returns the raw pointer text instead, so we use this
|
||||
// as the fallback for LFS files (#95).
|
||||
private downloadFileViaRaw(token: string, filePath: string) {
|
||||
const url = `https://github.com/${this.data.organization}/${this.data.repoName}/raw/${this.data.commit}/${filePath}`;
|
||||
console.log("[GHStream] Downloading via raw URL (LFS)", url);
|
||||
return got.stream(url, {
|
||||
headers: { authorization: `token ${token}` },
|
||||
followRedirect: true,
|
||||
});
|
||||
}
|
||||
|
||||
// Wrap a blob stream so that if its first ~150 bytes look like a Git LFS
|
||||
// pointer, the bytes are dropped and replaced by a fresh fetch from the
|
||||
// raw URL endpoint (which resolves LFS automatically). Non-LFS files are
|
||||
// forwarded unchanged.
|
||||
private resolveLfsPointer(
|
||||
blobStream: stream.Readable,
|
||||
token: string,
|
||||
filePath: string
|
||||
): stream.Readable {
|
||||
const out = new stream.PassThrough();
|
||||
let probe = Buffer.alloc(0);
|
||||
let decided = false;
|
||||
const PROBE_BYTES = 150;
|
||||
const LFS_PREFIX = "version https://git-lfs.github.com/spec/";
|
||||
|
||||
const decide = (extra?: Buffer) => {
|
||||
if (decided) return;
|
||||
decided = true;
|
||||
const head = probe.toString(
|
||||
"utf8",
|
||||
0,
|
||||
Math.min(probe.length, LFS_PREFIX.length)
|
||||
);
|
||||
if (head === LFS_PREFIX) {
|
||||
blobStream.destroy();
|
||||
const lfsStream = this.downloadFileViaRaw(token, filePath);
|
||||
lfsStream.on("error", (err) => out.destroy(err));
|
||||
lfsStream.pipe(out);
|
||||
} else {
|
||||
out.write(probe);
|
||||
if (extra && extra.length) out.write(extra);
|
||||
blobStream.on("data", (c) => out.write(c));
|
||||
blobStream.on("end", () => out.end());
|
||||
blobStream.on("error", (err) => out.destroy(err));
|
||||
}
|
||||
};
|
||||
|
||||
blobStream.on("data", (chunk: Buffer) => {
|
||||
if (decided) return;
|
||||
const remaining = PROBE_BYTES - probe.length;
|
||||
if (chunk.length <= remaining) {
|
||||
probe = Buffer.concat([probe, chunk]);
|
||||
if (probe.length >= PROBE_BYTES) decide();
|
||||
} else {
|
||||
probe = Buffer.concat([probe, chunk.slice(0, remaining)]);
|
||||
decide(chunk.slice(remaining));
|
||||
}
|
||||
});
|
||||
blobStream.on("end", () => decide());
|
||||
blobStream.on("error", (err) => {
|
||||
if (decided) return;
|
||||
decided = true;
|
||||
out.destroy(err);
|
||||
});
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
async getFileContentCache(
|
||||
filePath: string,
|
||||
repoId: string,
|
||||
@@ -65,10 +136,12 @@ export default class GitHubStream extends GitHubBase {
|
||||
object: filePath,
|
||||
});
|
||||
}
|
||||
const content = this.downloadFile(
|
||||
await this.data.getToken(),
|
||||
await fileSha()
|
||||
);
|
||||
const token = await this.data.getToken();
|
||||
const blobStream = this.downloadFile(token, await fileSha());
|
||||
// If the blob is a Git LFS pointer, swap to a raw-URL fetch so the
|
||||
// file content (not the pointer text) makes it into the pipeline. See
|
||||
// #95 — Support for Git LFS.
|
||||
const content = this.resolveLfsPointer(blobStream, token, filePath);
|
||||
|
||||
// duplicate the stream to write it to the storage
|
||||
const stream1 = content.pipe(new stream.PassThrough());
|
||||
|
||||
@@ -0,0 +1,42 @@
|
||||
const { expect } = require("chai");
|
||||
const { Readable } = require("stream");
|
||||
|
||||
// Standalone test of the LFS-pointer detection shape used in
|
||||
// GitHubStream#resolveLfsPointer. We can't easily import that method (it's
|
||||
// private and the file pulls in heavy GitHub plumbing), so this mirrors the
|
||||
// detection logic to confirm the head-bytes check.
|
||||
|
||||
const LFS_PREFIX = "version https://git-lfs.github.com/spec/";
|
||||
|
||||
function isLfsPointer(buf) {
|
||||
return (
|
||||
buf.length >= LFS_PREFIX.length &&
|
||||
buf.toString("utf8", 0, LFS_PREFIX.length) === LFS_PREFIX
|
||||
);
|
||||
}
|
||||
|
||||
describe("LFS pointer detection (#95)", function () {
|
||||
it("recognizes the standard pointer prefix", function () {
|
||||
const pointer = Buffer.from(
|
||||
"version https://git-lfs.github.com/spec/v1\n" +
|
||||
"oid sha256:abc123\nsize 12345\n"
|
||||
);
|
||||
expect(isLfsPointer(pointer)).to.equal(true);
|
||||
});
|
||||
|
||||
it("doesn't false-positive on plain text starting with 'version'", function () {
|
||||
const fake = Buffer.from(
|
||||
"version 1.2.3\nA short release notes file mentioning git-lfs.\n"
|
||||
);
|
||||
expect(isLfsPointer(fake)).to.equal(false);
|
||||
});
|
||||
|
||||
it("doesn't false-positive on binary headers", function () {
|
||||
const elf = Buffer.from([0x7f, 0x45, 0x4c, 0x46, ...new Array(100).fill(0)]);
|
||||
expect(isLfsPointer(elf)).to.equal(false);
|
||||
});
|
||||
|
||||
it("handles short streams below the prefix length", function () {
|
||||
expect(isLfsPointer(Buffer.from("vers"))).to.equal(false);
|
||||
});
|
||||
});
|
||||
Reference in New Issue
Block a user