mirror of
https://github.com/tdurieux/anonymous_github.git
synced 2026-05-15 06:30:26 +02:00
fix old github download repos
This commit is contained in:
+18
-8
@@ -593,7 +593,9 @@ angular
|
||||
if (dir) {
|
||||
output += `<a ng-click="openFolder('${path}', $event)">${name}</a>`;
|
||||
} else {
|
||||
output += `<a href='/r/${$scope.repoId}${path}'>${name}</a>`;
|
||||
output += `<a href='/r/${$scope.repoId}${encodePathForUrl(
|
||||
path
|
||||
)}'>${name}</a>`;
|
||||
}
|
||||
if (truncated) {
|
||||
output += `<span class="truncated-warning" title="{{ 'WARNINGS.folder_truncated' | translate }}"><i class="fas fa-exclamation-triangle"></i></span>`;
|
||||
@@ -2255,7 +2257,9 @@ angular
|
||||
}
|
||||
|
||||
// redirect to readme
|
||||
$location.url(uri + readmeCandidates[best_match]);
|
||||
$location.url(
|
||||
uri + encodePathForUrl(readmeCandidates[best_match])
|
||||
);
|
||||
}
|
||||
}
|
||||
$scope.getFiles = async function (path) {
|
||||
@@ -2353,11 +2357,15 @@ angular
|
||||
// server returns a fresh ETag on first hit either way.
|
||||
const sha = (fileInfo && fileInfo.sha) || "0";
|
||||
$http
|
||||
.get(`/api/repo/${$scope.repoId}/file/${path}?v=` + sha, {
|
||||
transformResponse: (data) => {
|
||||
return data;
|
||||
},
|
||||
})
|
||||
.get(
|
||||
`/api/repo/${$scope.repoId}/file/${encodePathForUrl(path)}?v=` +
|
||||
sha,
|
||||
{
|
||||
transformResponse: (data) => {
|
||||
return data;
|
||||
},
|
||||
}
|
||||
)
|
||||
.then(
|
||||
(res) => {
|
||||
$scope.type = originalType;
|
||||
@@ -2427,7 +2435,9 @@ angular
|
||||
if ($scope.file && $scope.file.sha) {
|
||||
fileVersion = $scope.file.sha;
|
||||
}
|
||||
$scope.url = `/api/repo/${$scope.repoId}/file/${$scope.filePath}?v=${fileVersion}`;
|
||||
$scope.url = `/api/repo/${$scope.repoId}/file/${encodePathForUrl(
|
||||
$scope.filePath
|
||||
)}?v=${fileVersion}`;
|
||||
|
||||
let extension = $scope.filePath.toLowerCase();
|
||||
const extensionIndex = extension.lastIndexOf(".");
|
||||
|
||||
Vendored
+2
-2
File diff suppressed because one or more lines are too long
@@ -15,6 +15,13 @@ function saveFilterPrefs(key, value) {
|
||||
}
|
||||
}
|
||||
|
||||
function encodePathForUrl(path) {
|
||||
return String(path || "")
|
||||
.split("/")
|
||||
.map((segment) => encodeURIComponent(segment))
|
||||
.join("/");
|
||||
}
|
||||
|
||||
function humanFileSize(bytes, si = false, dp = 1) {
|
||||
const thresh = si ? 1000 : 1024;
|
||||
|
||||
|
||||
+15
-1
@@ -141,7 +141,21 @@ export default class Repository {
|
||||
force: false,
|
||||
}
|
||||
): Promise<IFile[]> {
|
||||
const hasFile = await FileModel.exists({ repoId: this.repoId }).exec();
|
||||
let hasFile = await FileModel.exists({ repoId: this.repoId }).exec();
|
||||
// Files created by GitHubDownload don't carry a valid 40-char GitHub
|
||||
// blob SHA. When the source type later switches to GitHubStream the
|
||||
// stale entries cause blob-API 404s. Detect this by sampling a file
|
||||
// with a sha and checking its length; force a re-fetch if it doesn't
|
||||
// look like a GitHub SHA.
|
||||
if (hasFile && this.source instanceof GitHubStream) {
|
||||
const sample = await FileModel.findOne(
|
||||
{ repoId: this.repoId, sha: { $exists: true, $ne: null } },
|
||||
{ sha: 1 }
|
||||
).exec();
|
||||
if (sample?.sha && sample.sha.length !== 40) {
|
||||
hasFile = null;
|
||||
}
|
||||
}
|
||||
if (!hasFile || opt.force) {
|
||||
await FileModel.deleteMany({ repoId: this.repoId }).exec();
|
||||
const files = await this.source.getFiles(opt.progress);
|
||||
|
||||
@@ -20,6 +20,21 @@ import config from "../../config";
|
||||
|
||||
const logger = createLogger("gh-stream");
|
||||
|
||||
export function githubRawFileUrl(
|
||||
owner: string,
|
||||
repo: string,
|
||||
commit: string,
|
||||
filePath: string
|
||||
): string {
|
||||
const encodedPath = filePath
|
||||
.split("/")
|
||||
.map((segment) => encodeURIComponent(segment))
|
||||
.join("/");
|
||||
return `https://github.com/${encodeURIComponent(owner)}/${encodeURIComponent(
|
||||
repo
|
||||
)}/raw/${encodeURIComponent(commit)}/${encodedPath}`;
|
||||
}
|
||||
|
||||
export default class GitHubStream extends GitHubBase {
|
||||
type: "GitHubDownload" | "GitHubStream" | "Zip" = "GitHubStream";
|
||||
|
||||
@@ -64,7 +79,12 @@ export default class GitHubStream extends GitHubBase {
|
||||
// blob endpoint above returns the raw pointer text instead, so we use this
|
||||
// as the fallback for LFS files (#95).
|
||||
private downloadFileViaRaw(token: string, filePath: string) {
|
||||
const url = `https://github.com/${this.data.organization}/${this.data.repoName}/raw/${this.data.commit}/${filePath}`;
|
||||
const url = githubRawFileUrl(
|
||||
this.data.organization,
|
||||
this.data.repoName,
|
||||
this.data.commit,
|
||||
filePath
|
||||
);
|
||||
logger.debug("downloading via raw URL (LFS)", { url });
|
||||
return got.stream(url, {
|
||||
headers: { authorization: `token ${token}` },
|
||||
@@ -72,6 +92,58 @@ export default class GitHubStream extends GitHubBase {
|
||||
});
|
||||
}
|
||||
|
||||
// Try the blob API, then fall back to the raw URL on statuses where the
|
||||
// path-based endpoint can still succeed. 422 is the blob endpoint's size
|
||||
// cap; 404 can happen with stale/invalid blob SHAs while the path still
|
||||
// exists at the requested commit.
|
||||
private downloadWithFallback(
|
||||
token: string,
|
||||
sha: string,
|
||||
filePath: string
|
||||
): Promise<stream.Readable> {
|
||||
return new Promise<stream.Readable>((resolve) => {
|
||||
const blobStream = this.downloadFile(token, sha);
|
||||
let settled = false;
|
||||
|
||||
const fallbackStatuses = new Set([404, 422]);
|
||||
const fallbackToRaw = (statusCode?: number) => {
|
||||
settled = true;
|
||||
logger.info("blob API failed, falling back to raw URL", {
|
||||
filePath,
|
||||
statusCode,
|
||||
});
|
||||
resolve(this.downloadFileViaRaw(token, filePath));
|
||||
};
|
||||
|
||||
blobStream.on("error", (err) => {
|
||||
if (settled) return;
|
||||
const statusCode = (
|
||||
err as { response?: { statusCode?: number } }
|
||||
)?.response?.statusCode;
|
||||
if (statusCode && fallbackStatuses.has(statusCode)) {
|
||||
fallbackToRaw(statusCode);
|
||||
return;
|
||||
}
|
||||
// Other errors: let the normal pipeline handle them.
|
||||
settled = true;
|
||||
const passthrough = new stream.PassThrough();
|
||||
passthrough.destroy(err);
|
||||
resolve(passthrough);
|
||||
});
|
||||
|
||||
blobStream.on("response", (response) => {
|
||||
if (settled) return;
|
||||
if (fallbackStatuses.has(response.statusCode || 0)) {
|
||||
blobStream.destroy();
|
||||
fallbackToRaw(response.statusCode);
|
||||
return;
|
||||
}
|
||||
settled = true;
|
||||
resolve(this.resolveLfsPointer(blobStream, token, filePath));
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
// Wrap a blob stream so that if its first ~150 bytes look like a Git LFS
|
||||
// pointer, the bytes are dropped and replaced by a fresh fetch from the
|
||||
// raw URL endpoint (which resolves LFS automatically). Non-LFS files are
|
||||
@@ -190,11 +262,14 @@ export default class GitHubStream extends GitHubBase {
|
||||
});
|
||||
}
|
||||
const token = await this.data.getToken();
|
||||
const blobStream = this.downloadFile(token, expected.sha);
|
||||
// If the blob is a Git LFS pointer, swap to a raw-URL fetch so the
|
||||
// file content (not the pointer text) makes it into the pipeline. See
|
||||
// #95 — Support for Git LFS.
|
||||
const content = this.resolveLfsPointer(blobStream, token, filePath);
|
||||
|
||||
// Try the blob API first, but fall back to the raw URL on recoverable
|
||||
// blob misses/caps while still preserving LFS pointer handling.
|
||||
const content = await this.downloadWithFallback(
|
||||
token,
|
||||
expected.sha,
|
||||
filePath
|
||||
);
|
||||
|
||||
// duplicate the stream to write it to the storage
|
||||
const stream1 = content.pipe(new stream.PassThrough());
|
||||
|
||||
@@ -6,14 +6,40 @@ import { fileETag } from "./file-etag";
|
||||
|
||||
export const router = express.Router();
|
||||
|
||||
function decodePathSegment(segment: string): string {
|
||||
try {
|
||||
return decodeURIComponent(segment);
|
||||
} catch {
|
||||
try {
|
||||
return decodeURI(segment);
|
||||
} catch {
|
||||
return segment;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
export function filePathFromRequestUrl(
|
||||
reqUrl: string,
|
||||
protocol: string,
|
||||
hostname: string,
|
||||
repoId: string
|
||||
): string {
|
||||
const pathname = new URL(reqUrl, `${protocol}://${hostname}`).pathname;
|
||||
const prefix = `/${encodeURIComponent(repoId)}/file/`;
|
||||
const rawPath = pathname.startsWith(prefix)
|
||||
? pathname.substring(prefix.length)
|
||||
: pathname.replace(`/${repoId}/file/`, "");
|
||||
return rawPath.split("/").map(decodePathSegment).join("/");
|
||||
}
|
||||
|
||||
router.get(
|
||||
"/:repoId/file/:path*",
|
||||
async (req: express.Request, res: express.Response) => {
|
||||
const anonymizedPath = decodeURI(
|
||||
new URL(req.url, `${req.protocol}://${req.hostname}`).pathname.replace(
|
||||
`/${req.params.repoId}/file/`,
|
||||
""
|
||||
)
|
||||
const anonymizedPath = filePathFromRequestUrl(
|
||||
req.url,
|
||||
req.protocol,
|
||||
req.hostname,
|
||||
req.params.repoId
|
||||
);
|
||||
if (anonymizedPath.endsWith("/")) {
|
||||
return handleError(
|
||||
|
||||
Reference in New Issue
Block a user