perf: improve the perf of Anonymous GitHub

This commit is contained in:
tdurieux
2023-02-08 09:49:24 +01:00
parent 73f7582fd2
commit 2e36b72a7f
12 changed files with 115 additions and 53 deletions

View File

@@ -38,7 +38,7 @@ export default class AnonymizedFile {
repository: Repository;
anonymizedPath: string;
sha?: string;
_sha?: string;
constructor(data: { repository: Repository; anonymizedPath: string }) {
this.repository = data.repository;
@@ -50,6 +50,12 @@ export default class AnonymizedFile {
this.anonymizedPath = data.anonymizedPath;
}
async sha() {
if (this._sha) return this._sha;
await this.originalPath();
return this._sha;
}
/**
* De-anonymize the path
*
@@ -118,7 +124,7 @@ export default class AnonymizedFile {
const file: TreeFile = currentAnonymized as TreeFile;
this.fileSize = file.size;
this.sha = file.sha;
this._sha = file.sha;
if (isAmbiguous) {
// it should never happen
@@ -134,16 +140,15 @@ export default class AnonymizedFile {
} else {
this._originalPath = currentOriginalPath;
}
return this._originalPath;
}
async extension() {
const filename = basename(await this.originalPath());
extension() {
const filename = basename(this.anonymizedPath);
const extensions = filename.split(".").reverse();
return extensions[0].toLowerCase();
}
async isImage(): Promise<boolean> {
const extension = await this.extension();
isImage() {
const extension = this.extension();
return [
"png",
"jpg",
@@ -160,18 +165,21 @@ export default class AnonymizedFile {
"heic",
].includes(extension);
}
async isFileSupported() {
const extension = await this.extension();
isFileSupported() {
const extension = this.extension();
if (!this.repository.options.pdf && extension == "pdf") {
return false;
}
if (!this.repository.options.image && (await this.isImage())) {
if (!this.repository.options.image && this.isImage()) {
return false;
}
return true;
}
async content(): Promise<Readable> {
if (this.anonymizedPath.includes(config.ANONYMIZATION_MASK)) {
await this.originalPath();
}
if (this.fileSize && this.fileSize > config.MAX_FILE_SIZE) {
throw new AnonymousError("file_too_big", {
object: this,
@@ -185,9 +193,8 @@ export default class AnonymizedFile {
}
async anonymizedContent() {
await this.originalPath();
const rs = await this.content();
return rs.pipe(anonymizeStream(await this.originalPath(), this.repository));
return rs.pipe(anonymizeStream(this));
}
get originalCachePath() {
@@ -196,14 +203,25 @@ export default class AnonymizedFile {
object: this,
httpStatus: 400,
});
if (!this._originalPath) {
if (this.anonymizedPath.includes(config.ANONYMIZATION_MASK)) {
throw new AnonymousError("path_not_defined", {
object: this,
httpStatus: 400,
});
} else {
return join(this.repository.originalCachePath, this.anonymizedPath);
}
}
return join(this.repository.originalCachePath, this._originalPath);
}
async send(res: Response): Promise<void> {
const pipe = promisify(pipeline);
try {
if (await this.extension()) {
res.contentType(await this.extension());
if (this.extension()) {
res.contentType(this.extension());
}
await pipe(await this.anonymizedContent(), res);
} catch (error) {

View File

@@ -16,6 +16,7 @@ import ConferenceModel from "./database/conference/conferences.model";
import AnonymousError from "./AnonymousError";
import { downloadQueue } from "./queue";
import { isConnected } from "./database/database";
import AnonymizedFile from "./AnonymizedFile";
export default class Repository {
private _model: IAnonymizedRepositoryDocument;
@@ -48,12 +49,17 @@ export default class Repository {
* @param opt force to get an updated list of files
* @returns The anonymized file tree
*/
async anonymizedFiles(opt?: {
/** Force to refresh the file tree */
force?: boolean;
/** Include the file sha in the response */
includeSha: boolean;
}): Promise<Tree> {
async anonymizedFiles(
opt: {
/** Force to refresh the file tree */
force?: boolean;
/** Include the file sha in the response */
includeSha: boolean;
} = {
force: false,
includeSha: false,
}
): Promise<Tree> {
const terms = this._model.options.terms || [];
function anonymizeTreeRecursive(tree: TreeElement): TreeElement {
@@ -78,11 +84,11 @@ export default class Repository {
* @param opt force to get an updated list of files
* @returns The file tree
*/
async files(opt?: { force?: boolean }) {
async files(opt: { force?: boolean } = { force: false }): Promise<Tree> {
if (
this._model.originalFiles &&
Object.keys(this._model.originalFiles).length !== 0 &&
!opt?.force
!opt.force
) {
return this._model.originalFiles;
}
@@ -90,9 +96,6 @@ export default class Repository {
this._model.originalFiles = files;
this._model.size = { storage: 0, file: 0 };
await this.computeSize();
await this._model.save();
this._model.originalFiles = files;
return files;
}
@@ -140,8 +143,13 @@ export default class Repository {
zip(): Readable {
return storage.archive(this.originalCachePath, {
format: "zip",
fileTransformer: (filename) =>
anonymizeStream(filename, this) as Transformer,
fileTransformer: (filename: string) =>
anonymizeStream(
new AnonymizedFile({
repository: this,
anonymizedPath: filename,
})
) as Transformer,
});
}

View File

@@ -119,9 +119,14 @@ export default class User {
*/
async getRepositories() {
const repositories = (
await AnonymizedRepositoryModel.find({
owner: this.id,
}).exec()
await AnonymizedRepositoryModel.find(
{
owner: this.id,
},
{
originalFiles: 0,
}
).exec()
).map((d) => new Repository(d));
const promises = [];
for (let repo of repositories) {

View File

@@ -5,6 +5,7 @@ import { isText } from "istextorbinary";
import { basename } from "path";
import { Transform } from "stream";
import { Readable } from "stream";
import AnonymizedFile from "./AnonymizedFile";
const urlRegex =
/<?\b((https?|ftp|file):\/\/)[-A-Za-z0-9+&@#/%?=~_|!:,.;]+[-A-Za-z0-9+&@#/%=~_|]\b\/?>?/g;
@@ -31,7 +32,7 @@ export function isTextFile(filePath: string, content: Buffer) {
return isText(filename, content);
}
export function anonymizeStream(filename: string, repository: Repository) {
export function anonymizeStream(file: AnonymizedFile) {
const ts = new Transform();
var chunks = [],
len = 0,
@@ -43,8 +44,8 @@ export function anonymizeStream(filename: string, repository: Repository) {
if (pos === 1) {
let data: any = Buffer.concat(chunks, len);
if (isTextFile(filename, data)) {
data = anonymizeContent(data.toString(), repository);
if (isTextFile(file.anonymizedPath, data)) {
data = anonymizeContent(data.toString(), file.repository);
}
chunks = [];
@@ -60,8 +61,8 @@ export function anonymizeStream(filename: string, repository: Repository) {
ts._flush = function _flush(cb) {
if (chunks.length) {
let data: any = Buffer.concat(chunks, len);
if (isText(filename, data)) {
data = anonymizeContent(data.toString(), repository);
if (isText(file.anonymizedPath, data)) {
data = anonymizeContent(data.toString(), file.repository);
}
this.push(data);

View File

@@ -21,14 +21,25 @@ export async function connect() {
return database;
}
export async function getRepository(repoId: string) {
export async function getRepository(
repoId: string,
opts: {
includeFiles: boolean;
} = {
includeFiles: true,
}
) {
if (!repoId || repoId == "undefined") {
throw new AnonymousError("repo_not_found", {
object: repoId,
httpStatus: 404,
});
}
const data = await AnonymizedRepositoryModel.findOne({ repoId });
const project: any = {};
if (!opts.includeFiles) {
project.originalFiles = 0;
}
const data = await AnonymizedRepositoryModel.findOne({ repoId }, project);
if (!data)
throw new AnonymousError("repo_not_found", {
object: repoId,

View File

@@ -19,12 +19,12 @@ router.get(
try {
await repo.countView();
const f = new AnonymizedFile({
repository: repo,
anonymizedPath,
});
if (!(await f.isFileSupported())) {
if (!f.isFileSupported()) {
throw new AnonymousError("file_not_supported", {
httpStatus: 403,
object: f,
@@ -34,7 +34,7 @@ router.get(
anonymizedPath.substring(anonymizedPath.lastIndexOf("/") + 1)
);
// cache the file for 5min
res.header('Cache-Control', 'max-age=300');
res.header("Cache-Control", "max-age=300");
await f.send(res);
} catch (error) {
return handleError(error, res, req);

View File

@@ -109,7 +109,10 @@ router.post(
"/:repoId/refresh",
async (req: express.Request, res: express.Response) => {
try {
const repo = await getRepo(req, res, { nocheck: true });
const repo = await getRepo(req, res, {
nocheck: true,
includeFiles: false,
});
if (!repo) return;
if (
@@ -133,7 +136,10 @@ router.post(
router.delete(
"/:repoId/",
async (req: express.Request, res: express.Response) => {
const repo = await getRepo(req, res, { nocheck: true });
const repo = await getRepo(req, res, {
nocheck: true,
includeFiles: false,
});
if (!repo) return;
// if (repo.status == "removing") return res.json({ status: repo.status });
try {
@@ -237,7 +243,10 @@ router.get(
// get repository information
router.get("/:repoId/", async (req: express.Request, res: express.Response) => {
try {
const repo = await getRepo(req, res, { nocheck: true });
const repo = await getRepo(req, res, {
nocheck: true,
includeFiles: false,
});
if (!repo) return;
const user = await getUser(req);
@@ -327,7 +336,10 @@ router.post(
"/:repoId/",
async (req: express.Request, res: express.Response) => {
try {
const repo = await getRepo(req, res, { nocheck: true });
const repo = await getRepo(req, res, {
nocheck: true,
includeFiles: false,
});
if (!repo) return;
const user = await getUser(req);

View File

@@ -76,7 +76,7 @@ router.get(
async (req: express.Request, res: express.Response) => {
try {
res.header("Cache-Control", "no-cache");
const repo = await getRepo(req, res, { nocheck: true });
const repo = await getRepo(req, res, { nocheck: true, includeFiles: false });
if (!repo) return;
let redirectURL = null;
if (

View File

@@ -37,11 +37,16 @@ export async function getPullRequest(
export async function getRepo(
req: express.Request,
res: express.Response,
opt?: { nocheck?: boolean }
opt: { nocheck?: boolean; includeFiles?: boolean } = {
nocheck: false,
includeFiles: true,
}
) {
try {
const repo = await db.getRepository(req.params.repoId);
if (opt?.nocheck == true) {
const repo = await db.getRepository(req.params.repoId, {
includeFiles: opt.includeFiles,
});
if (opt.nocheck == true) {
} else {
// redirect if the repository is expired
if (

View File

@@ -78,8 +78,7 @@ async function webView(req: express.Request, res: express.Response) {
}
let best_match = null;
indexSelector:
for (const p of indexPriority) {
indexSelector: for (const p of indexPriority) {
for (let filename in currentAnonymized) {
if (filename.toLowerCase() == p) {
best_match = filename;
@@ -96,13 +95,13 @@ async function webView(req: express.Request, res: express.Response) {
}
}
if (!(await f.isFileSupported())) {
if (!f.isFileSupported()) {
throw new AnonymousError("file_not_supported", {
httpStatus: 400,
object: f,
});
}
if ((await f.extension()) == "md") {
if (f.extension() == "md") {
const content = await streamToString(await f.anonymizedContent());
res.contentType("html").send(marked.marked(content));
} else {

View File

@@ -39,7 +39,7 @@ export default class GitHubStream extends GitHubBase implements SourceBase {
const ghRes = await octokit.rest.git.getBlob({
owner: this.githubRepository.owner,
repo: this.githubRepository.repo,
file_sha: file.sha,
file_sha: await file.sha(),
});
if (!ghRes.data.content && ghRes.data.size != 0) {
throw new AnonymousError("file_not_accessible", {

View File

@@ -27,6 +27,9 @@ export default class S3Storage implements StorageBase {
endpoint: config.S3_ENDPOINT,
accessKeyId: config.S3_CLIENT_ID,
secretAccessKey: config.S3_CLIENT_SECRET,
httpOptions: {
timeout: 1000 * 60 * 60 * 2, // 2 hour
}
});
}