Files
anonymous_github/src/core/source/GitHubRepository.ts
T
tdurieux c4182b5b2a fix: prevent E11000 on concurrent getRepositoryFromGitHub
Replace check-then-insert with atomic findOneAndUpdate upsert keyed on
externalId, plus a single E11000 retry fallback. Eliminates the duplicate
key race when two requests resolve the same gh_<id> concurrently.
2026-05-06 12:22:42 +03:00

384 lines
10 KiB
TypeScript

import { Branch } from "../types";
import * as gh from "parse-github-url";
import { RestEndpointMethodTypes } from "@octokit/rest";
import AnonymousError from "../AnonymousError";
import { isConnected } from "../../server/database";
import { octokit } from "../GitHubUtils";
import { IRepositoryDocument } from "../model/repositories/repositories.types";
import RepositoryModel from "../model/repositories/repositories.model";
export class GitHubRepository {
private _data: Partial<{
[P in keyof IRepositoryDocument]: IRepositoryDocument[P];
}>;
constructor(
data: Partial<{ [P in keyof IRepositoryDocument]: IRepositoryDocument[P] }>
) {
this._data = data;
}
toJSON() {
return {
id: this.model._id,
externalId: this._data.externalId,
repo: this.repo,
owner: this.owner,
hasPage: this._data.hasPage,
pageSource: this._data.pageSource,
fullName: this.fullName,
defaultBranch: this._data.defaultBranch,
size: this.size,
};
}
get model() {
return this._data;
}
public get fullName(): string | undefined {
return this._data.name;
}
public get id(): string | undefined {
return this._data.externalId;
}
public get size(): number | undefined {
return this._data.size;
}
async getCommitInfo(
sha: string,
opt: {
accessToken: string;
}
) {
const oct = octokit(opt.accessToken);
try {
const commit = await oct.repos.getCommit({
owner: this.owner,
repo: this.repo,
ref: sha,
});
return commit.data;
} catch (error) {
const status = (error as { status?: number }).status;
if (status === 404) {
// Distinguish: does the repo itself still exist?
let repoExists: boolean;
try {
await oct.repos.get({ owner: this.owner, repo: this.repo });
repoExists = true;
} catch {
repoExists = false;
}
throw new AnonymousError(
repoExists ? "commit_not_found" : "repo_not_found",
{
httpStatus: 404,
cause: error as Error,
object: this,
}
);
}
throw error;
}
}
async branches(opt: {
accessToken: string;
force?: boolean;
}): Promise<Branch[]> {
if (
!this._data.branches ||
this._data.branches.length == 0 ||
opt?.force === true
) {
// get the list of repo from github
const oct = octokit(opt.accessToken);
try {
const branches = (
await oct.paginate("GET /repos/{owner}/{repo}/branches", {
owner: this.owner,
repo: this.repo,
per_page: 100,
})
).map((b) => {
return {
name: b.name,
commit: b.commit.sha,
readme: this._data.branches?.filter(
(f: Branch) => f.name == b.name
)[0]?.readme,
} as Branch;
});
this._data.branches = branches;
if (isConnected) {
await RepositoryModel.updateOne(
{ externalId: this.id },
{ $set: { branches } }
);
}
} catch (error) {
throw new AnonymousError("repo_not_found", {
httpStatus: (error as { status?: number }).status,
cause: error as Error,
object: this,
});
}
} else if (isConnected) {
const q = await RepositoryModel.findOne({ externalId: this.id }).select(
"branches"
);
this._data.branches = q?.branches;
}
return this._data.branches || [];
}
async readme(opt: {
branch?: string;
force?: boolean;
accessToken: string;
}): Promise<string | undefined> {
if (!opt.branch) opt.branch = this._data.defaultBranch || "master";
const model = await RepositoryModel.findOne({
externalId: this.id,
}).select("branches");
if (!model) {
throw new AnonymousError("repo_not_found", { httpStatus: 404 });
}
this._data.branches = await this.branches(opt);
model.branches = this._data.branches;
const selected = model.branches.filter((f) => f.name == opt.branch)[0];
if (selected && (!selected.readme || opt?.force === true)) {
// get the list of repo from github
const oct = octokit(opt.accessToken);
try {
const ghRes = await oct.repos.getReadme({
owner: this.owner,
repo: this.repo,
ref: selected?.commit,
});
const readme = Buffer.from(
ghRes.data.content,
ghRes.data.encoding as BufferEncoding
).toString("utf-8");
selected.readme = readme;
await model.save();
} catch (error) {
throw new AnonymousError("readme_not_available", {
httpStatus: 404,
cause: error as Error,
object: this,
});
}
}
if (!selected) {
throw new AnonymousError("readme_not_available", {
httpStatus: 404,
object: this,
});
}
return selected.readme;
}
public get owner(): string {
if (!this.fullName) {
throw new AnonymousError("invalid_repo", {
httpStatus: 400,
object: this,
});
}
const repo = gh(this.fullName);
if (!repo) {
throw new AnonymousError("invalid_repo", {
httpStatus: 400,
object: this,
});
}
return repo.owner || this.fullName;
}
public get repo(): string {
if (!this.fullName) {
throw new AnonymousError("invalid_repo", {
httpStatus: 400,
object: this,
});
}
const repo = gh(this.fullName);
if (!repo) {
throw new AnonymousError("invalid_repo", {
httpStatus: 400,
object: this,
});
}
return repo.name || this.fullName;
}
}
export async function getRepositoryFromGitHub(opt: {
owner: string;
repo: string;
repositoryID?: string;
accessToken: string;
force?: boolean;
}) {
if (opt.repo.endsWith(".git")) {
opt.repo = opt.repo.slice(0, -4);
}
let dbModel;
if (opt.repositoryID) {
dbModel = isConnected
? await RepositoryModel.findById(opt.repositoryID)
: null;
opt.owner = dbModel?.name?.split("/")[0] || opt.owner;
opt.repo = dbModel?.name?.split("/")[1] || opt.repo;
} else {
dbModel = isConnected
? await RepositoryModel.findOne({
name: opt.owner + "/" + opt.repo,
})
: null;
}
if (dbModel && !opt.force) {
return new GitHubRepository(dbModel);
}
const oct = octokit(opt.accessToken);
let r: RestEndpointMethodTypes["repos"]["get"]["response"]["data"];
// Recover the numeric GitHub repository id from `externalId` (stored as
// "gh_<id>") so we can re-fetch by id if the owner/repo we cached has
// since been renamed on GitHub — see #409.
const ghIdFromExternal =
typeof dbModel?.externalId === "string" &&
dbModel.externalId.startsWith("gh_")
? dbModel.externalId.slice(3)
: null;
try {
r = (
await oct.repos.get({
owner: opt.owner,
repo: opt.repo,
})
).data;
} catch (error) {
if (
error instanceof Error &&
error.message.includes(
"organization has enabled OAuth App access restrictions"
)
) {
throw new AnonymousError("repo_access_limited", {
httpStatus: 403,
object: {
owner: opt.owner,
repo: opt.repo,
},
cause: error as Error,
});
}
// If the name 404s but we know the GitHub repo id, the repo was
// probably renamed. Look it up by id and continue with the new name.
const status = (error as { status?: number }).status;
if (status === 404 && ghIdFromExternal) {
try {
r = (
await oct.request("GET /repositories/{id}", {
id: ghIdFromExternal,
})
).data as RestEndpointMethodTypes["repos"]["get"]["response"]["data"];
} catch (idError) {
throw new AnonymousError("repo_not_found", {
httpStatus: (idError as { status?: number }).status || 404,
object: { owner: opt.owner, repo: opt.repo },
cause: idError as Error,
});
}
} else {
throw new AnonymousError("repo_not_found", {
httpStatus: status,
object: {
owner: opt.owner,
repo: opt.repo,
},
cause: error as Error,
});
}
}
if (!r)
throw new AnonymousError("repo_not_found", {
httpStatus: 404,
object: {
owner: opt.owner,
repo: opt.repo,
},
});
let pageSource:
| RestEndpointMethodTypes["repos"]["getPages"]["response"]["data"]["source"]
| undefined;
if (r.has_pages) {
const ghPageRes = await oct.repos.getPages({
owner: opt.owner,
repo: opt.repo,
});
pageSource = ghPageRes.data.source;
}
if (!isConnected) {
const model = dbModel || new RepositoryModel({ externalId: "gh_" + r.id });
model.name = r.full_name;
model.url = r.html_url;
model.size = r.size;
model.defaultBranch = r.default_branch;
model.hasPage = r.has_pages;
if (pageSource) model.pageSource = pageSource;
return new GitHubRepository(model);
}
// Atomic upsert keyed on externalId so concurrent requests for the same
// GitHub repo can't both insert and trip the unique-index race (E11000).
const update: Record<string, unknown> = {
$set: {
name: r.full_name,
url: r.html_url,
size: r.size,
defaultBranch: r.default_branch,
hasPage: r.has_pages,
...(pageSource ? { pageSource } : {}),
},
$setOnInsert: { externalId: "gh_" + r.id },
};
let model: IRepositoryDocument | null;
try {
model = await RepositoryModel.findOneAndUpdate(
{ externalId: "gh_" + r.id },
update,
{ upsert: true, new: true, setDefaultsOnInsert: true }
);
} catch (error) {
// Mongo can still raise E11000 on a concurrent upsert insert; retry
// once with a plain find — the winning insert is now visible.
if ((error as { code?: number }).code === 11000) {
model = await RepositoryModel.findOne({ externalId: "gh_" + r.id });
} else {
throw error;
}
}
if (!model) {
throw new AnonymousError("repo_not_found", {
httpStatus: 404,
object: { owner: opt.owner, repo: opt.repo },
});
}
return new GitHubRepository(model);
}