fix old github download repos

This commit is contained in:
tdurieux
2026-05-06 19:37:16 +03:00
parent da78708b7b
commit 67cb2538b1
6 changed files with 154 additions and 22 deletions
+18 -8
View File
@@ -593,7 +593,9 @@ angular
if (dir) {
output += `<a ng-click="openFolder('${path}', $event)">${name}</a>`;
} else {
output += `<a href='/r/${$scope.repoId}${path}'>${name}</a>`;
output += `<a href='/r/${$scope.repoId}${encodePathForUrl(
path
)}'>${name}</a>`;
}
if (truncated) {
output += `<span class="truncated-warning" title="{{ 'WARNINGS.folder_truncated' | translate }}"><i class="fas fa-exclamation-triangle"></i></span>`;
@@ -2255,7 +2257,9 @@ angular
}
// redirect to readme
$location.url(uri + readmeCandidates[best_match]);
$location.url(
uri + encodePathForUrl(readmeCandidates[best_match])
);
}
}
$scope.getFiles = async function (path) {
@@ -2353,11 +2357,15 @@ angular
// server returns a fresh ETag on first hit either way.
const sha = (fileInfo && fileInfo.sha) || "0";
$http
.get(`/api/repo/${$scope.repoId}/file/${path}?v=` + sha, {
transformResponse: (data) => {
return data;
},
})
.get(
`/api/repo/${$scope.repoId}/file/${encodePathForUrl(path)}?v=` +
sha,
{
transformResponse: (data) => {
return data;
},
}
)
.then(
(res) => {
$scope.type = originalType;
@@ -2427,7 +2435,9 @@ angular
if ($scope.file && $scope.file.sha) {
fileVersion = $scope.file.sha;
}
$scope.url = `/api/repo/${$scope.repoId}/file/${$scope.filePath}?v=${fileVersion}`;
$scope.url = `/api/repo/${$scope.repoId}/file/${encodePathForUrl(
$scope.filePath
)}?v=${fileVersion}`;
let extension = $scope.filePath.toLowerCase();
const extensionIndex = extension.lastIndexOf(".");
+2 -2
View File
File diff suppressed because one or more lines are too long
+7
View File
@@ -15,6 +15,13 @@ function saveFilterPrefs(key, value) {
}
}
function encodePathForUrl(path) {
return String(path || "")
.split("/")
.map((segment) => encodeURIComponent(segment))
.join("/");
}
function humanFileSize(bytes, si = false, dp = 1) {
const thresh = si ? 1000 : 1024;
+15 -1
View File
@@ -141,7 +141,21 @@ export default class Repository {
force: false,
}
): Promise<IFile[]> {
const hasFile = await FileModel.exists({ repoId: this.repoId }).exec();
let hasFile = await FileModel.exists({ repoId: this.repoId }).exec();
// Files created by GitHubDownload don't carry a valid 40-char GitHub
// blob SHA. When the source type later switches to GitHubStream the
// stale entries cause blob-API 404s. Detect this by sampling a file
// with a sha and checking its length; force a re-fetch if it doesn't
// look like a GitHub SHA.
if (hasFile && this.source instanceof GitHubStream) {
const sample = await FileModel.findOne(
{ repoId: this.repoId, sha: { $exists: true, $ne: null } },
{ sha: 1 }
).exec();
if (sample?.sha && sample.sha.length !== 40) {
hasFile = null;
}
}
if (!hasFile || opt.force) {
await FileModel.deleteMany({ repoId: this.repoId }).exec();
const files = await this.source.getFiles(opt.progress);
+81 -6
View File
@@ -20,6 +20,21 @@ import config from "../../config";
const logger = createLogger("gh-stream");
export function githubRawFileUrl(
owner: string,
repo: string,
commit: string,
filePath: string
): string {
const encodedPath = filePath
.split("/")
.map((segment) => encodeURIComponent(segment))
.join("/");
return `https://github.com/${encodeURIComponent(owner)}/${encodeURIComponent(
repo
)}/raw/${encodeURIComponent(commit)}/${encodedPath}`;
}
export default class GitHubStream extends GitHubBase {
type: "GitHubDownload" | "GitHubStream" | "Zip" = "GitHubStream";
@@ -64,7 +79,12 @@ export default class GitHubStream extends GitHubBase {
// blob endpoint above returns the raw pointer text instead, so we use this
// as the fallback for LFS files (#95).
private downloadFileViaRaw(token: string, filePath: string) {
const url = `https://github.com/${this.data.organization}/${this.data.repoName}/raw/${this.data.commit}/${filePath}`;
const url = githubRawFileUrl(
this.data.organization,
this.data.repoName,
this.data.commit,
filePath
);
logger.debug("downloading via raw URL (LFS)", { url });
return got.stream(url, {
headers: { authorization: `token ${token}` },
@@ -72,6 +92,58 @@ export default class GitHubStream extends GitHubBase {
});
}
// Try the blob API, then fall back to the raw URL on statuses where the
// path-based endpoint can still succeed. 422 is the blob endpoint's size
// cap; 404 can happen with stale/invalid blob SHAs while the path still
// exists at the requested commit.
private downloadWithFallback(
token: string,
sha: string,
filePath: string
): Promise<stream.Readable> {
return new Promise<stream.Readable>((resolve) => {
const blobStream = this.downloadFile(token, sha);
let settled = false;
const fallbackStatuses = new Set([404, 422]);
const fallbackToRaw = (statusCode?: number) => {
settled = true;
logger.info("blob API failed, falling back to raw URL", {
filePath,
statusCode,
});
resolve(this.downloadFileViaRaw(token, filePath));
};
blobStream.on("error", (err) => {
if (settled) return;
const statusCode = (
err as { response?: { statusCode?: number } }
)?.response?.statusCode;
if (statusCode && fallbackStatuses.has(statusCode)) {
fallbackToRaw(statusCode);
return;
}
// Other errors: let the normal pipeline handle them.
settled = true;
const passthrough = new stream.PassThrough();
passthrough.destroy(err);
resolve(passthrough);
});
blobStream.on("response", (response) => {
if (settled) return;
if (fallbackStatuses.has(response.statusCode || 0)) {
blobStream.destroy();
fallbackToRaw(response.statusCode);
return;
}
settled = true;
resolve(this.resolveLfsPointer(blobStream, token, filePath));
});
});
}
// Wrap a blob stream so that if its first ~150 bytes look like a Git LFS
// pointer, the bytes are dropped and replaced by a fresh fetch from the
// raw URL endpoint (which resolves LFS automatically). Non-LFS files are
@@ -190,11 +262,14 @@ export default class GitHubStream extends GitHubBase {
});
}
const token = await this.data.getToken();
const blobStream = this.downloadFile(token, expected.sha);
// If the blob is a Git LFS pointer, swap to a raw-URL fetch so the
// file content (not the pointer text) makes it into the pipeline. See
// #95 — Support for Git LFS.
const content = this.resolveLfsPointer(blobStream, token, filePath);
// Try the blob API first, but fall back to the raw URL on recoverable
// blob misses/caps while still preserving LFS pointer handling.
const content = await this.downloadWithFallback(
token,
expected.sha,
filePath
);
// duplicate the stream to write it to the storage
const stream1 = content.pipe(new stream.PassThrough());
+31 -5
View File
@@ -6,14 +6,40 @@ import { fileETag } from "./file-etag";
export const router = express.Router();
function decodePathSegment(segment: string): string {
try {
return decodeURIComponent(segment);
} catch {
try {
return decodeURI(segment);
} catch {
return segment;
}
}
}
export function filePathFromRequestUrl(
reqUrl: string,
protocol: string,
hostname: string,
repoId: string
): string {
const pathname = new URL(reqUrl, `${protocol}://${hostname}`).pathname;
const prefix = `/${encodeURIComponent(repoId)}/file/`;
const rawPath = pathname.startsWith(prefix)
? pathname.substring(prefix.length)
: pathname.replace(`/${repoId}/file/`, "");
return rawPath.split("/").map(decodePathSegment).join("/");
}
router.get(
"/:repoId/file/:path*",
async (req: express.Request, res: express.Response) => {
const anonymizedPath = decodeURI(
new URL(req.url, `${req.protocol}://${req.hostname}`).pathname.replace(
`/${req.params.repoId}/file/`,
""
)
const anonymizedPath = filePathFromRequestUrl(
req.url,
req.protocol,
req.hostname,
req.params.repoId
);
if (anonymizedPath.endsWith("/")) {
return handleError(