mirror of
https://github.com/tdurieux/anonymous_github.git
synced 2026-02-12 18:32:44 +00:00
perf: improve the perf of Anonymous GitHub
This commit is contained in:
@@ -38,7 +38,7 @@ export default class AnonymizedFile {
|
||||
|
||||
repository: Repository;
|
||||
anonymizedPath: string;
|
||||
sha?: string;
|
||||
_sha?: string;
|
||||
|
||||
constructor(data: { repository: Repository; anonymizedPath: string }) {
|
||||
this.repository = data.repository;
|
||||
@@ -50,6 +50,12 @@ export default class AnonymizedFile {
|
||||
this.anonymizedPath = data.anonymizedPath;
|
||||
}
|
||||
|
||||
async sha() {
|
||||
if (this._sha) return this._sha;
|
||||
await this.originalPath();
|
||||
return this._sha;
|
||||
}
|
||||
|
||||
/**
|
||||
* De-anonymize the path
|
||||
*
|
||||
@@ -118,7 +124,7 @@ export default class AnonymizedFile {
|
||||
|
||||
const file: TreeFile = currentAnonymized as TreeFile;
|
||||
this.fileSize = file.size;
|
||||
this.sha = file.sha;
|
||||
this._sha = file.sha;
|
||||
|
||||
if (isAmbiguous) {
|
||||
// it should never happen
|
||||
@@ -134,16 +140,15 @@ export default class AnonymizedFile {
|
||||
} else {
|
||||
this._originalPath = currentOriginalPath;
|
||||
}
|
||||
|
||||
return this._originalPath;
|
||||
}
|
||||
async extension() {
|
||||
const filename = basename(await this.originalPath());
|
||||
extension() {
|
||||
const filename = basename(this.anonymizedPath);
|
||||
const extensions = filename.split(".").reverse();
|
||||
return extensions[0].toLowerCase();
|
||||
}
|
||||
async isImage(): Promise<boolean> {
|
||||
const extension = await this.extension();
|
||||
isImage() {
|
||||
const extension = this.extension();
|
||||
return [
|
||||
"png",
|
||||
"jpg",
|
||||
@@ -160,18 +165,21 @@ export default class AnonymizedFile {
|
||||
"heic",
|
||||
].includes(extension);
|
||||
}
|
||||
async isFileSupported() {
|
||||
const extension = await this.extension();
|
||||
isFileSupported() {
|
||||
const extension = this.extension();
|
||||
if (!this.repository.options.pdf && extension == "pdf") {
|
||||
return false;
|
||||
}
|
||||
if (!this.repository.options.image && (await this.isImage())) {
|
||||
if (!this.repository.options.image && this.isImage()) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
async content(): Promise<Readable> {
|
||||
if (this.anonymizedPath.includes(config.ANONYMIZATION_MASK)) {
|
||||
await this.originalPath();
|
||||
}
|
||||
if (this.fileSize && this.fileSize > config.MAX_FILE_SIZE) {
|
||||
throw new AnonymousError("file_too_big", {
|
||||
object: this,
|
||||
@@ -185,9 +193,8 @@ export default class AnonymizedFile {
|
||||
}
|
||||
|
||||
async anonymizedContent() {
|
||||
await this.originalPath();
|
||||
const rs = await this.content();
|
||||
return rs.pipe(anonymizeStream(await this.originalPath(), this.repository));
|
||||
return rs.pipe(anonymizeStream(this));
|
||||
}
|
||||
|
||||
get originalCachePath() {
|
||||
@@ -196,14 +203,25 @@ export default class AnonymizedFile {
|
||||
object: this,
|
||||
httpStatus: 400,
|
||||
});
|
||||
if (!this._originalPath) {
|
||||
if (this.anonymizedPath.includes(config.ANONYMIZATION_MASK)) {
|
||||
throw new AnonymousError("path_not_defined", {
|
||||
object: this,
|
||||
httpStatus: 400,
|
||||
});
|
||||
} else {
|
||||
return join(this.repository.originalCachePath, this.anonymizedPath);
|
||||
}
|
||||
}
|
||||
|
||||
return join(this.repository.originalCachePath, this._originalPath);
|
||||
}
|
||||
|
||||
async send(res: Response): Promise<void> {
|
||||
const pipe = promisify(pipeline);
|
||||
try {
|
||||
if (await this.extension()) {
|
||||
res.contentType(await this.extension());
|
||||
if (this.extension()) {
|
||||
res.contentType(this.extension());
|
||||
}
|
||||
await pipe(await this.anonymizedContent(), res);
|
||||
} catch (error) {
|
||||
|
||||
@@ -16,6 +16,7 @@ import ConferenceModel from "./database/conference/conferences.model";
|
||||
import AnonymousError from "./AnonymousError";
|
||||
import { downloadQueue } from "./queue";
|
||||
import { isConnected } from "./database/database";
|
||||
import AnonymizedFile from "./AnonymizedFile";
|
||||
|
||||
export default class Repository {
|
||||
private _model: IAnonymizedRepositoryDocument;
|
||||
@@ -48,12 +49,17 @@ export default class Repository {
|
||||
* @param opt force to get an updated list of files
|
||||
* @returns The anonymized file tree
|
||||
*/
|
||||
async anonymizedFiles(opt?: {
|
||||
/** Force to refresh the file tree */
|
||||
force?: boolean;
|
||||
/** Include the file sha in the response */
|
||||
includeSha: boolean;
|
||||
}): Promise<Tree> {
|
||||
async anonymizedFiles(
|
||||
opt: {
|
||||
/** Force to refresh the file tree */
|
||||
force?: boolean;
|
||||
/** Include the file sha in the response */
|
||||
includeSha: boolean;
|
||||
} = {
|
||||
force: false,
|
||||
includeSha: false,
|
||||
}
|
||||
): Promise<Tree> {
|
||||
const terms = this._model.options.terms || [];
|
||||
|
||||
function anonymizeTreeRecursive(tree: TreeElement): TreeElement {
|
||||
@@ -78,11 +84,11 @@ export default class Repository {
|
||||
* @param opt force to get an updated list of files
|
||||
* @returns The file tree
|
||||
*/
|
||||
async files(opt?: { force?: boolean }) {
|
||||
async files(opt: { force?: boolean } = { force: false }): Promise<Tree> {
|
||||
if (
|
||||
this._model.originalFiles &&
|
||||
Object.keys(this._model.originalFiles).length !== 0 &&
|
||||
!opt?.force
|
||||
!opt.force
|
||||
) {
|
||||
return this._model.originalFiles;
|
||||
}
|
||||
@@ -90,9 +96,6 @@ export default class Repository {
|
||||
this._model.originalFiles = files;
|
||||
this._model.size = { storage: 0, file: 0 };
|
||||
await this.computeSize();
|
||||
await this._model.save();
|
||||
|
||||
this._model.originalFiles = files;
|
||||
return files;
|
||||
}
|
||||
|
||||
@@ -140,8 +143,13 @@ export default class Repository {
|
||||
zip(): Readable {
|
||||
return storage.archive(this.originalCachePath, {
|
||||
format: "zip",
|
||||
fileTransformer: (filename) =>
|
||||
anonymizeStream(filename, this) as Transformer,
|
||||
fileTransformer: (filename: string) =>
|
||||
anonymizeStream(
|
||||
new AnonymizedFile({
|
||||
repository: this,
|
||||
anonymizedPath: filename,
|
||||
})
|
||||
) as Transformer,
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
11
src/User.ts
11
src/User.ts
@@ -119,9 +119,14 @@ export default class User {
|
||||
*/
|
||||
async getRepositories() {
|
||||
const repositories = (
|
||||
await AnonymizedRepositoryModel.find({
|
||||
owner: this.id,
|
||||
}).exec()
|
||||
await AnonymizedRepositoryModel.find(
|
||||
{
|
||||
owner: this.id,
|
||||
},
|
||||
{
|
||||
originalFiles: 0,
|
||||
}
|
||||
).exec()
|
||||
).map((d) => new Repository(d));
|
||||
const promises = [];
|
||||
for (let repo of repositories) {
|
||||
|
||||
@@ -5,6 +5,7 @@ import { isText } from "istextorbinary";
|
||||
import { basename } from "path";
|
||||
import { Transform } from "stream";
|
||||
import { Readable } from "stream";
|
||||
import AnonymizedFile from "./AnonymizedFile";
|
||||
|
||||
const urlRegex =
|
||||
/<?\b((https?|ftp|file):\/\/)[-A-Za-z0-9+&@#/%?=~_|!:,.;]+[-A-Za-z0-9+&@#/%=~_|]\b\/?>?/g;
|
||||
@@ -31,7 +32,7 @@ export function isTextFile(filePath: string, content: Buffer) {
|
||||
return isText(filename, content);
|
||||
}
|
||||
|
||||
export function anonymizeStream(filename: string, repository: Repository) {
|
||||
export function anonymizeStream(file: AnonymizedFile) {
|
||||
const ts = new Transform();
|
||||
var chunks = [],
|
||||
len = 0,
|
||||
@@ -43,8 +44,8 @@ export function anonymizeStream(filename: string, repository: Repository) {
|
||||
|
||||
if (pos === 1) {
|
||||
let data: any = Buffer.concat(chunks, len);
|
||||
if (isTextFile(filename, data)) {
|
||||
data = anonymizeContent(data.toString(), repository);
|
||||
if (isTextFile(file.anonymizedPath, data)) {
|
||||
data = anonymizeContent(data.toString(), file.repository);
|
||||
}
|
||||
|
||||
chunks = [];
|
||||
@@ -60,8 +61,8 @@ export function anonymizeStream(filename: string, repository: Repository) {
|
||||
ts._flush = function _flush(cb) {
|
||||
if (chunks.length) {
|
||||
let data: any = Buffer.concat(chunks, len);
|
||||
if (isText(filename, data)) {
|
||||
data = anonymizeContent(data.toString(), repository);
|
||||
if (isText(file.anonymizedPath, data)) {
|
||||
data = anonymizeContent(data.toString(), file.repository);
|
||||
}
|
||||
|
||||
this.push(data);
|
||||
|
||||
@@ -21,14 +21,25 @@ export async function connect() {
|
||||
return database;
|
||||
}
|
||||
|
||||
export async function getRepository(repoId: string) {
|
||||
export async function getRepository(
|
||||
repoId: string,
|
||||
opts: {
|
||||
includeFiles: boolean;
|
||||
} = {
|
||||
includeFiles: true,
|
||||
}
|
||||
) {
|
||||
if (!repoId || repoId == "undefined") {
|
||||
throw new AnonymousError("repo_not_found", {
|
||||
object: repoId,
|
||||
httpStatus: 404,
|
||||
});
|
||||
}
|
||||
const data = await AnonymizedRepositoryModel.findOne({ repoId });
|
||||
const project: any = {};
|
||||
if (!opts.includeFiles) {
|
||||
project.originalFiles = 0;
|
||||
}
|
||||
const data = await AnonymizedRepositoryModel.findOne({ repoId }, project);
|
||||
if (!data)
|
||||
throw new AnonymousError("repo_not_found", {
|
||||
object: repoId,
|
||||
|
||||
@@ -19,12 +19,12 @@ router.get(
|
||||
|
||||
try {
|
||||
await repo.countView();
|
||||
|
||||
|
||||
const f = new AnonymizedFile({
|
||||
repository: repo,
|
||||
anonymizedPath,
|
||||
});
|
||||
if (!(await f.isFileSupported())) {
|
||||
if (!f.isFileSupported()) {
|
||||
throw new AnonymousError("file_not_supported", {
|
||||
httpStatus: 403,
|
||||
object: f,
|
||||
@@ -34,7 +34,7 @@ router.get(
|
||||
anonymizedPath.substring(anonymizedPath.lastIndexOf("/") + 1)
|
||||
);
|
||||
// cache the file for 5min
|
||||
res.header('Cache-Control', 'max-age=300');
|
||||
res.header("Cache-Control", "max-age=300");
|
||||
await f.send(res);
|
||||
} catch (error) {
|
||||
return handleError(error, res, req);
|
||||
|
||||
@@ -109,7 +109,10 @@ router.post(
|
||||
"/:repoId/refresh",
|
||||
async (req: express.Request, res: express.Response) => {
|
||||
try {
|
||||
const repo = await getRepo(req, res, { nocheck: true });
|
||||
const repo = await getRepo(req, res, {
|
||||
nocheck: true,
|
||||
includeFiles: false,
|
||||
});
|
||||
if (!repo) return;
|
||||
|
||||
if (
|
||||
@@ -133,7 +136,10 @@ router.post(
|
||||
router.delete(
|
||||
"/:repoId/",
|
||||
async (req: express.Request, res: express.Response) => {
|
||||
const repo = await getRepo(req, res, { nocheck: true });
|
||||
const repo = await getRepo(req, res, {
|
||||
nocheck: true,
|
||||
includeFiles: false,
|
||||
});
|
||||
if (!repo) return;
|
||||
// if (repo.status == "removing") return res.json({ status: repo.status });
|
||||
try {
|
||||
@@ -237,7 +243,10 @@ router.get(
|
||||
// get repository information
|
||||
router.get("/:repoId/", async (req: express.Request, res: express.Response) => {
|
||||
try {
|
||||
const repo = await getRepo(req, res, { nocheck: true });
|
||||
const repo = await getRepo(req, res, {
|
||||
nocheck: true,
|
||||
includeFiles: false,
|
||||
});
|
||||
if (!repo) return;
|
||||
|
||||
const user = await getUser(req);
|
||||
@@ -327,7 +336,10 @@ router.post(
|
||||
"/:repoId/",
|
||||
async (req: express.Request, res: express.Response) => {
|
||||
try {
|
||||
const repo = await getRepo(req, res, { nocheck: true });
|
||||
const repo = await getRepo(req, res, {
|
||||
nocheck: true,
|
||||
includeFiles: false,
|
||||
});
|
||||
if (!repo) return;
|
||||
const user = await getUser(req);
|
||||
|
||||
|
||||
@@ -76,7 +76,7 @@ router.get(
|
||||
async (req: express.Request, res: express.Response) => {
|
||||
try {
|
||||
res.header("Cache-Control", "no-cache");
|
||||
const repo = await getRepo(req, res, { nocheck: true });
|
||||
const repo = await getRepo(req, res, { nocheck: true, includeFiles: false });
|
||||
if (!repo) return;
|
||||
let redirectURL = null;
|
||||
if (
|
||||
|
||||
@@ -37,11 +37,16 @@ export async function getPullRequest(
|
||||
export async function getRepo(
|
||||
req: express.Request,
|
||||
res: express.Response,
|
||||
opt?: { nocheck?: boolean }
|
||||
opt: { nocheck?: boolean; includeFiles?: boolean } = {
|
||||
nocheck: false,
|
||||
includeFiles: true,
|
||||
}
|
||||
) {
|
||||
try {
|
||||
const repo = await db.getRepository(req.params.repoId);
|
||||
if (opt?.nocheck == true) {
|
||||
const repo = await db.getRepository(req.params.repoId, {
|
||||
includeFiles: opt.includeFiles,
|
||||
});
|
||||
if (opt.nocheck == true) {
|
||||
} else {
|
||||
// redirect if the repository is expired
|
||||
if (
|
||||
|
||||
@@ -78,8 +78,7 @@ async function webView(req: express.Request, res: express.Response) {
|
||||
}
|
||||
|
||||
let best_match = null;
|
||||
indexSelector:
|
||||
for (const p of indexPriority) {
|
||||
indexSelector: for (const p of indexPriority) {
|
||||
for (let filename in currentAnonymized) {
|
||||
if (filename.toLowerCase() == p) {
|
||||
best_match = filename;
|
||||
@@ -96,13 +95,13 @@ async function webView(req: express.Request, res: express.Response) {
|
||||
}
|
||||
}
|
||||
|
||||
if (!(await f.isFileSupported())) {
|
||||
if (!f.isFileSupported()) {
|
||||
throw new AnonymousError("file_not_supported", {
|
||||
httpStatus: 400,
|
||||
object: f,
|
||||
});
|
||||
}
|
||||
if ((await f.extension()) == "md") {
|
||||
if (f.extension() == "md") {
|
||||
const content = await streamToString(await f.anonymizedContent());
|
||||
res.contentType("html").send(marked.marked(content));
|
||||
} else {
|
||||
|
||||
@@ -39,7 +39,7 @@ export default class GitHubStream extends GitHubBase implements SourceBase {
|
||||
const ghRes = await octokit.rest.git.getBlob({
|
||||
owner: this.githubRepository.owner,
|
||||
repo: this.githubRepository.repo,
|
||||
file_sha: file.sha,
|
||||
file_sha: await file.sha(),
|
||||
});
|
||||
if (!ghRes.data.content && ghRes.data.size != 0) {
|
||||
throw new AnonymousError("file_not_accessible", {
|
||||
|
||||
@@ -27,6 +27,9 @@ export default class S3Storage implements StorageBase {
|
||||
endpoint: config.S3_ENDPOINT,
|
||||
accessKeyId: config.S3_CLIENT_ID,
|
||||
secretAccessKey: config.S3_CLIENT_SECRET,
|
||||
httpOptions: {
|
||||
timeout: 1000 * 60 * 60 * 2, // 2 hour
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user