feat: adds opentelemetry support

This commit is contained in:
tdurieux
2024-03-27 11:17:56 +00:00
parent 803720e2ea
commit 0caf786c9c
24 changed files with 4522 additions and 1187 deletions

View File

@@ -10,7 +10,7 @@ import GitHubBase from "./GitHubBase";
import AnonymizedFile from "../AnonymizedFile";
import { FILE_TYPE, RepositoryStatus, SourceBase } from "../types";
import AnonymousError from "../AnonymousError";
import { tryCatch } from "bullmq";
import { trace } from "@opentelemetry/api";
export default class GitHubDownload extends GitHubBase implements SourceBase {
constructor(
@@ -40,115 +40,129 @@ export default class GitHubDownload extends GitHubBase implements SourceBase {
}
async download(token?: string) {
const fiveMinuteAgo = new Date();
fiveMinuteAgo.setMinutes(fiveMinuteAgo.getMinutes() - 5);
if (
this.repository.status == "download" &&
this.repository.model.statusDate > fiveMinuteAgo
)
throw new AnonymousError("repo_in_download", {
httpStatus: 404,
object: this.repository,
});
let response: OctokitResponse<unknown, number>;
const span = trace.getTracer("ano-file").startSpan("GHDownload.download");
span.setAttribute("repoId", this.repository.repoId);
try {
if (!token) {
token = await this.getToken();
}
response = await this._getZipUrl(token);
} catch (error) {
if ((error as any).status == 401 && config.GITHUB_TOKEN) {
try {
response = await this._getZipUrl(config.GITHUB_TOKEN);
} catch (error) {
const fiveMinuteAgo = new Date();
fiveMinuteAgo.setMinutes(fiveMinuteAgo.getMinutes() - 5);
if (
this.repository.status == "download" &&
this.repository.model.statusDate > fiveMinuteAgo
)
throw new AnonymousError("repo_in_download", {
httpStatus: 404,
object: this.repository,
});
let response: OctokitResponse<unknown, number>;
try {
if (!token) {
token = await this.getToken();
}
response = await this._getZipUrl(token);
} catch (error) {
span.recordException(error as Error);
if ((error as any).status == 401 && config.GITHUB_TOKEN) {
try {
response = await this._getZipUrl(config.GITHUB_TOKEN);
} catch (error) {
await this.repository.resetSate(
RepositoryStatus.ERROR,
"repo_not_accessible"
);
throw new AnonymousError("repo_not_accessible", {
httpStatus: 404,
cause: error as Error,
object: this.repository,
});
}
} else {
await this.repository.resetSate(
RepositoryStatus.ERROR,
"repo_not_accessible"
);
throw new AnonymousError("repo_not_accessible", {
httpStatus: 404,
cause: error as Error,
object: this.repository,
cause: error as Error,
});
}
} else {
await this.repository.resetSate(
RepositoryStatus.ERROR,
"repo_not_accessible"
);
throw new AnonymousError("repo_not_accessible", {
httpStatus: 404,
object: this.repository,
cause: error as Error,
});
}
}
await this.repository.updateStatus(RepositoryStatus.DOWNLOAD);
const originalPath = this.repository.originalCachePath;
await storage.mk(originalPath);
let progress: { transferred: number } | undefined = undefined;
let progressTimeout;
let inDownload = true;
await this.repository.updateStatus(RepositoryStatus.DOWNLOAD);
const originalPath = this.repository.originalCachePath;
await storage.mk(originalPath);
let progress: { transferred: number } | undefined = undefined;
let progressTimeout;
let inDownload = true;
const that = this;
async function updateProgress() {
if (inDownload) {
if (progress && that.repository.status == RepositoryStatus.DOWNLOAD) {
await that.repository.updateStatus(
that.repository.status,
progress.transferred.toString()
);
const that = this;
async function updateProgress() {
if (inDownload) {
if (progress && that.repository.status == RepositoryStatus.DOWNLOAD) {
await that.repository.updateStatus(
that.repository.status,
progress.transferred.toString()
);
}
progressTimeout = setTimeout(updateProgress, 1500);
}
progressTimeout = setTimeout(updateProgress, 1500);
}
}
updateProgress();
updateProgress();
try {
const downloadStream = got.stream(response.url);
downloadStream.addListener("downloadProgress", async (p) => {
progress = p;
});
await storage.extractZip(originalPath, downloadStream, undefined, this);
} catch (error) {
await this.repository.updateStatus(
RepositoryStatus.ERROR,
"unable_to_download"
);
throw new AnonymousError("unable_to_download", {
httpStatus: 500,
cause: error as Error,
object: this.repository,
});
try {
const downloadStream = got.stream(response.url);
downloadStream.addListener("downloadProgress", async (p) => {
progress = p;
});
await storage.extractZip(originalPath, downloadStream, undefined, this);
} catch (error) {
span.recordException(error as Error);
await this.repository.updateStatus(
RepositoryStatus.ERROR,
"unable_to_download"
);
throw new AnonymousError("unable_to_download", {
httpStatus: 500,
cause: error as Error,
object: this.repository,
});
} finally {
inDownload = false;
clearTimeout(progressTimeout);
}
this.repository.model.isReseted = false;
try {
await this.repository.updateStatus(RepositoryStatus.READY);
} catch (error) {
span.recordException(error as Error);
}
} finally {
inDownload = false;
clearTimeout(progressTimeout);
}
this.repository.model.isReseted = false;
try {
await this.repository.updateStatus(RepositoryStatus.READY);
} catch (error) {
console.error(error);
span.end();
}
}
async getFileContent(file: AnonymizedFile): Promise<Readable> {
const exists = await storage.exists(file.originalCachePath);
if (exists === FILE_TYPE.FILE) {
return storage.read(file.originalCachePath);
} else if (exists === FILE_TYPE.FOLDER) {
throw new AnonymousError("folder_not_supported", {
httpStatus: 400,
object: file,
});
}
// will throw an error if the file is not in the repository
await file.originalPath();
const span = trace.getTracer("ano-file").startSpan("GHDownload.getFileContent");
span.setAttribute("repoId", this.repository.repoId);
try {
const exists = await storage.exists(file.originalCachePath);
if (exists === FILE_TYPE.FILE) {
return storage.read(file.originalCachePath);
} else if (exists === FILE_TYPE.FOLDER) {
throw new AnonymousError("folder_not_supported", {
httpStatus: 400,
object: file,
});
}
// will throw an error if the file is not in the repository
await file.originalPath();
// the cache is not ready, we need to download the repository
await this.download();
return storage.read(file.originalCachePath);
// the cache is not ready, we need to download the repository
await this.download();
return storage.read(file.originalCachePath);
} finally {
span.end();
}
}
async getFiles() {

View File

@@ -5,6 +5,7 @@ import { Octokit, RestEndpointMethodTypes } from "@octokit/rest";
import RepositoryModel from "../database/repositories/repositories.model";
import AnonymousError from "../AnonymousError";
import { isConnected } from "../database/database";
import { trace } from "@opentelemetry/api";
export class GitHubRepository {
private _data: Partial<{
@@ -50,64 +51,81 @@ export class GitHubRepository {
accessToken?: string;
}
) {
const octokit = new Octokit({ auth: opt.accessToken });
const commit = await octokit.repos.getCommit({
owner: this.owner,
repo: this.repo,
ref: sha,
});
return commit.data;
const span = trace
.getTracer("ano-file")
.startSpan("GHRepository.getCommitInfo");
span.setAttribute("owner", this.owner);
span.setAttribute("repo", this.repo);
try {
const octokit = new Octokit({ auth: opt.accessToken });
const commit = await octokit.repos.getCommit({
owner: this.owner,
repo: this.repo,
ref: sha,
});
return commit.data;
} finally {
span.end();
}
}
async branches(opt: {
accessToken?: string;
force?: boolean;
}): Promise<Branch[]> {
if (
!this._data.branches ||
this._data.branches.length == 0 ||
opt?.force === true
) {
// get the list of repo from github
const octokit = new Octokit({ auth: opt.accessToken });
try {
const branches = (
await octokit.paginate("GET /repos/{owner}/{repo}/branches", {
owner: this.owner,
repo: this.repo,
per_page: 100,
})
).map((b) => {
return {
name: b.name,
commit: b.commit.sha,
readme: this._data.branches?.filter(
(f: Branch) => f.name == b.name
)[0]?.readme,
} as Branch;
});
this._data.branches = branches;
if (isConnected) {
await RepositoryModel.updateOne(
{ externalId: this.id },
{ $set: { branches } }
);
const span = trace.getTracer("ano-file").startSpan("GHRepository.branches");
span.setAttribute("owner", this.owner);
span.setAttribute("repo", this.repo);
try {
if (
!this._data.branches ||
this._data.branches.length == 0 ||
opt?.force === true
) {
// get the list of repo from github
const octokit = new Octokit({ auth: opt.accessToken });
try {
const branches = (
await octokit.paginate("GET /repos/{owner}/{repo}/branches", {
owner: this.owner,
repo: this.repo,
per_page: 100,
})
).map((b) => {
return {
name: b.name,
commit: b.commit.sha,
readme: this._data.branches?.filter(
(f: Branch) => f.name == b.name
)[0]?.readme,
} as Branch;
});
this._data.branches = branches;
if (isConnected) {
await RepositoryModel.updateOne(
{ externalId: this.id },
{ $set: { branches } }
);
}
} catch (error) {
span.recordException(error as Error);
throw new AnonymousError("repo_not_found", {
httpStatus: (error as any).status,
cause: error as Error,
object: this,
});
}
} catch (error) {
throw new AnonymousError("repo_not_found", {
httpStatus: (error as any).status,
cause: error as Error,
object: this,
});
} else if (isConnected) {
const q = await RepositoryModel.findOne({ externalId: this.id }).select(
"branches"
);
this._data.branches = q?.branches;
}
} else if (isConnected) {
const q = await RepositoryModel.findOne({ externalId: this.id }).select(
"branches"
);
this._data.branches = q?.branches;
}
return this._data.branches || [];
return this._data.branches || [];
} finally {
span.end();
}
}
async readme(opt: {
@@ -115,52 +133,60 @@ export class GitHubRepository {
force?: boolean;
accessToken?: string;
}): Promise<string | undefined> {
if (!opt.branch) opt.branch = this._data.defaultBranch || "master";
const span = trace.getTracer("ano-file").startSpan("GHRepository.readme");
span.setAttribute("owner", this.owner);
span.setAttribute("repo", this.repo);
try {
if (!opt.branch) opt.branch = this._data.defaultBranch || "master";
const model = await RepositoryModel.findOne({
externalId: this.id,
}).select("branches");
const model = await RepositoryModel.findOne({
externalId: this.id,
}).select("branches");
if (!model) {
throw new AnonymousError("repo_not_found", { httpStatus: 404 });
}
if (!model) {
throw new AnonymousError("repo_not_found", { httpStatus: 404 });
}
this._data.branches = await this.branches(opt);
model.branches = this._data.branches;
this._data.branches = await this.branches(opt);
model.branches = this._data.branches;
const selected = model.branches.filter((f) => f.name == opt.branch)[0];
if (selected && (!selected.readme || opt?.force === true)) {
// get the list of repo from github
const octokit = new Octokit({ auth: opt.accessToken });
try {
const ghRes = await octokit.repos.getReadme({
owner: this.owner,
repo: this.repo,
ref: selected?.commit,
});
const readme = Buffer.from(
ghRes.data.content,
ghRes.data.encoding as BufferEncoding
).toString("utf-8");
selected.readme = readme;
await model.save();
} catch (error) {
const selected = model.branches.filter((f) => f.name == opt.branch)[0];
if (selected && (!selected.readme || opt?.force === true)) {
// get the list of repo from github
const octokit = new Octokit({ auth: opt.accessToken });
try {
const ghRes = await octokit.repos.getReadme({
owner: this.owner,
repo: this.repo,
ref: selected?.commit,
});
const readme = Buffer.from(
ghRes.data.content,
ghRes.data.encoding as BufferEncoding
).toString("utf-8");
selected.readme = readme;
await model.save();
} catch (error) {
span.recordException(error as Error);
throw new AnonymousError("readme_not_available", {
httpStatus: 404,
cause: error as Error,
object: this,
});
}
}
if (!selected) {
throw new AnonymousError("readme_not_available", {
httpStatus: 404,
cause: error as Error,
object: this,
});
}
}
if (!selected) {
throw new AnonymousError("readme_not_available", {
httpStatus: 404,
object: this,
});
return selected.readme;
} finally {
span.end();
}
return selected.readme;
}
public get owner(): string {
@@ -203,57 +229,69 @@ export async function getRepositoryFromGitHub(opt: {
repo: string;
accessToken: string;
}) {
if (opt.repo.indexOf(".git") > -1) {
opt.repo = opt.repo.replace(".git", "");
}
const octokit = new Octokit({ auth: opt.accessToken });
let r: RestEndpointMethodTypes["repos"]["get"]["response"]["data"];
const span = trace
.getTracer("ano-file")
.startSpan("GHRepository.getRepositoryFromGitHub");
span.setAttribute("owner", opt.owner);
span.setAttribute("repo", opt.repo);
try {
r = (
await octokit.repos.get({
owner: opt.owner,
repo: opt.repo,
})
).data;
} catch (error) {
throw new AnonymousError("repo_not_found", {
httpStatus: (error as any).status,
object: {
owner: opt.owner,
repo: opt.repo,
},
cause: error as Error,
});
}
if (!r)
throw new AnonymousError("repo_not_found", {
httpStatus: 404,
object: {
owner: opt.owner,
repo: opt.repo,
},
});
let model = new RepositoryModel({ externalId: "gh_" + r.id });
if (isConnected) {
const dbModel = await RepositoryModel.findOne({ externalId: "gh_" + r.id });
if (dbModel) {
model = dbModel;
if (opt.repo.indexOf(".git") > -1) {
opt.repo = opt.repo.replace(".git", "");
}
const octokit = new Octokit({ auth: opt.accessToken });
let r: RestEndpointMethodTypes["repos"]["get"]["response"]["data"];
try {
r = (
await octokit.repos.get({
owner: opt.owner,
repo: opt.repo,
})
).data;
} catch (error) {
span.recordException(error as Error);
throw new AnonymousError("repo_not_found", {
httpStatus: (error as any).status,
object: {
owner: opt.owner,
repo: opt.repo,
},
cause: error as Error,
});
}
if (!r)
throw new AnonymousError("repo_not_found", {
httpStatus: 404,
object: {
owner: opt.owner,
repo: opt.repo,
},
});
let model = new RepositoryModel({ externalId: "gh_" + r.id });
if (isConnected) {
const dbModel = await RepositoryModel.findOne({
externalId: "gh_" + r.id,
});
if (dbModel) {
model = dbModel;
}
}
model.name = r.full_name;
model.url = r.html_url;
model.size = r.size;
model.defaultBranch = r.default_branch;
model.hasPage = r.has_pages;
if (model.hasPage) {
const ghPageRes = await octokit.repos.getPages({
owner: opt.owner,
repo: opt.repo,
});
model.pageSource = ghPageRes.data.source;
}
if (isConnected) {
await model.save();
}
return new GitHubRepository(model);
} finally {
span.end();
}
model.name = r.full_name;
model.url = r.html_url;
model.size = r.size;
model.defaultBranch = r.default_branch;
model.hasPage = r.has_pages;
if (model.hasPage) {
const ghPageRes = await octokit.repos.getPages({
owner: opt.owner,
repo: opt.repo,
});
model.pageSource = ghPageRes.data.source;
}
if (isConnected) {
await model.save();
}
return new GitHubRepository(model);
}

View File

@@ -9,6 +9,7 @@ import * as path from "path";
import * as stream from "stream";
import AnonymousError from "../AnonymousError";
import config from "../../config";
import { trace } from "@opentelemetry/api";
export default class GitHubStream extends GitHubBase implements SourceBase {
constructor(
@@ -26,67 +27,83 @@ export default class GitHubStream extends GitHubBase implements SourceBase {
}
async getFileContent(file: AnonymizedFile): Promise<stream.Readable> {
const octokit = new Octokit({
auth: await this.getToken(),
});
return trace
.getTracer("ano-file")
.startActiveSpan("GHStream.getFileContent", async (span) => {
span.setAttribute("path", file.anonymizedPath);
const octokit = new Octokit({
auth: await this.getToken(),
});
const file_sha = await file.sha();
if (!file_sha) {
throw new AnonymousError("file_not_accessible", {
httpStatus: 404,
object: file,
const file_sha = await file.sha();
if (!file_sha) {
throw new AnonymousError("file_not_accessible", {
httpStatus: 404,
object: file,
});
}
try {
const ghRes = await octokit.rest.git.getBlob({
owner: this.githubRepository.owner,
repo: this.githubRepository.repo,
file_sha,
});
if (!ghRes.data.content && ghRes.data.size != 0) {
throw new AnonymousError("file_not_accessible", {
httpStatus: 404,
object: file,
});
}
// empty file
let content: Buffer;
if (ghRes.data.content) {
content = Buffer.from(
ghRes.data.content,
ghRes.data.encoding as BufferEncoding
);
} else {
content = Buffer.from("");
}
await storage.write(file.originalCachePath, content, file, this);
this.repository.model.isReseted = false;
await this.repository.model.save();
if (this.repository.status !== RepositoryStatus.READY)
await this.repository.updateStatus(RepositoryStatus.READY);
return stream.Readable.from(content);
} catch (error) {
if (
(error as any).status === 404 ||
(error as any).httpStatus === 404
) {
throw new AnonymousError("file_not_found", {
httpStatus: (error as any).status || (error as any).httpStatus,
cause: error as Error,
object: file,
});
}
throw new AnonymousError("file_too_big", {
httpStatus: (error as any).status || (error as any).httpStatus,
cause: error as Error,
object: file,
});
} finally {
span.end();
}
});
}
try {
const ghRes = await octokit.rest.git.getBlob({
owner: this.githubRepository.owner,
repo: this.githubRepository.repo,
file_sha,
});
if (!ghRes.data.content && ghRes.data.size != 0) {
throw new AnonymousError("file_not_accessible", {
httpStatus: 404,
object: file,
});
}
// empty file
let content: Buffer;
if (ghRes.data.content) {
content = Buffer.from(
ghRes.data.content,
ghRes.data.encoding as BufferEncoding
);
} else {
content = Buffer.from("");
}
await storage.write(file.originalCachePath, content, file, this);
this.repository.model.isReseted = false;
await this.repository.model.save();
if (this.repository.status !== RepositoryStatus.READY)
await this.repository.updateStatus(RepositoryStatus.READY);
return stream.Readable.from(content);
} catch (error) {
if ((error as any).status === 404 || (error as any).httpStatus === 404) {
throw new AnonymousError("file_not_found", {
httpStatus: (error as any).status || (error as any).httpStatus,
cause: error as Error,
object: file,
});
}
throw new AnonymousError("file_too_big", {
httpStatus: (error as any).status || (error as any).httpStatus,
cause: error as Error,
object: file,
});
}
}
async getFiles() {
let commit = this.branch?.commit;
if (!commit && this.repository.model.source.commit) {
commit = this.repository.model.source.commit;
const span = trace.getTracer("ano-file").startSpan("GHStream.getFiles");
span.setAttribute("repoId", this.repository.repoId);
try {
let commit = this.branch?.commit;
if (!commit && this.repository.model.source.commit) {
commit = this.repository.model.source.commit;
}
return this.getTree(commit);
} finally {
span.end();
}
return this.getTree(commit);
}
private async getTree(
@@ -98,6 +115,9 @@ export default class GitHubStream extends GitHubBase implements SourceBase {
request: 0,
}
) {
const span = trace.getTracer("ano-file").startSpan("GHStream.getTree");
span.setAttribute("repoId", this.repository.repoId);
span.setAttribute("sha", sha);
this.repository.model.truckedFileList = false;
let ghRes: Awaited<ReturnType<typeof this.getGHTree>>;
@@ -105,11 +125,13 @@ export default class GitHubStream extends GitHubBase implements SourceBase {
count.request++;
ghRes = await this.getGHTree(sha, { recursive: true });
} catch (error) {
span.recordException(error as Error);
if ((error as any).status == 409) {
// empty tree
if (this.repository.status != RepositoryStatus.READY)
await this.repository.updateStatus(RepositoryStatus.READY);
// cannot be empty otherwise it would try to download it again
span.end();
return { __: {} };
} else {
console.log(
@@ -121,7 +143,7 @@ export default class GitHubStream extends GitHubBase implements SourceBase {
RepositoryStatus.ERROR,
"repo_not_accessible"
);
throw new AnonymousError("repo_not_accessible", {
const err = new AnonymousError("repo_not_accessible", {
httpStatus: (error as any).status,
cause: error as Error,
object: {
@@ -130,6 +152,9 @@ export default class GitHubStream extends GitHubBase implements SourceBase {
tree_sha: sha,
},
});
span.recordException(err);
span.end();
throw err;
}
}
const tree = this.tree2Tree(ghRes.tree, truncatedTree, parentPath);
@@ -139,6 +164,7 @@ export default class GitHubStream extends GitHubBase implements SourceBase {
}
if (this.repository.status !== RepositoryStatus.READY)
await this.repository.updateStatus(RepositoryStatus.READY);
span.end();
return tree;
}
@@ -165,9 +191,6 @@ export default class GitHubStream extends GitHubBase implements SourceBase {
},
depth = 0
) {
console.log(
`sha ${sha}, countFiles: ${count.file} countRequest: ${count.request}, parentPath: "${parentPath}"`
);
count.request++;
let data = null;