From 4d12641c7e692d072a3ffbe05c0e4634a37296e1 Mon Sep 17 00:00:00 2001 From: tdurieux Date: Wed, 3 Apr 2024 11:13:01 +0100 Subject: [PATCH] feat: introduce streamers that handle the stream and anonymization from github --- Dockerfile | 4 +- docker-compose.yml | 45 +++++--- index.ts | 7 -- nginx.conf | 15 --- package.json | 7 +- cli.ts => src/cli/index.ts | 12 +- config.ts => src/config.ts | 24 ++-- src/{ => core}/AnonymizedFile.ts | 107 +++++++++++++----- src/{ => core}/AnonymousError.ts | 0 src/{ => core}/Conference.ts | 4 +- src/{ => core}/GitHubUtils.ts | 5 +- src/{ => core}/PullRequest.ts | 6 +- src/{ => core}/Repository.ts | 14 +-- src/{ => core}/User.ts | 11 +- src/{ => core}/anonymize-utils.ts | 13 +-- .../anonymizedPullRequests.model.ts | 0 .../anonymizedPullRequests.schema.ts | 0 .../anonymizedPullRequests.types.ts | 0 .../anonymizedRepositories.model.ts | 0 .../anonymizedRepositories.schema.ts | 0 .../anonymizedRepositories.types.ts | 0 .../model}/conference/conferences.model.ts | 0 .../model}/conference/conferences.schema.ts | 0 .../model}/conference/conferences.types.ts | 0 .../model}/repositories/repositories.model.ts | 0 .../repositories/repositories.schema.ts | 0 .../model}/repositories/repositories.types.ts | 0 .../model}/users/users.model.ts | 0 .../model}/users/users.schema.ts | 0 .../model}/users/users.types.ts | 0 src/{ => core}/source/GitHubBase.ts | 16 +-- src/{ => core}/source/GitHubDownload.ts | 4 +- src/{ => core}/source/GitHubRepository.ts | 9 +- src/{ => core}/source/GitHubStream.ts | 38 +++---- src/core/source/Source.ts | 24 ++++ src/{ => core}/source/Zip.ts | 5 +- src/{ => core}/storage.ts | 0 src/{ => core}/storage/FileSystem.ts | 8 ++ src/{ => core}/storage/S3.ts | 11 +- src/{ => core}/storage/Storage.ts | 4 + src/{ => core}/types.ts | 10 -- src/{queue.ts => queue/index.ts} | 8 +- .../processes/downloadRepository.ts | 8 +- src/{ => queue}/processes/removeCache.ts | 6 +- src/{ => queue}/processes/removeRepository.ts | 8 +- src/{database => server}/database.ts | 12 +- src/{server.ts => server/index.ts} | 16 ++- src/{ => server}/routes/admin.ts | 14 +-- src/{ => server}/routes/conference.ts | 8 +- src/{ => server}/routes/connection.ts | 6 +- src/{ => server}/routes/file.ts | 6 +- src/{ => server}/routes/index.ts | 0 src/{ => server}/routes/option.ts | 0 .../routes/pullRequest-private.ts | 10 +- src/{ => server}/routes/pullRequest-public.ts | 2 +- src/{ => server}/routes/repository-private.ts | 28 ++--- src/{ => server}/routes/repository-public.ts | 6 +- src/{ => server}/routes/route-utils.ts | 19 +++- src/{ => server}/routes/user.ts | 4 +- src/{ => server}/routes/webview.ts | 8 +- src/{ => server}/schedule.ts | 8 +- src/streamer/index.ts | 31 +++++ src/streamer/route.ts | 63 +++++++++++ tsconfig.json | 2 +- 64 files changed, 419 insertions(+), 257 deletions(-) delete mode 100644 index.ts delete mode 100644 nginx.conf rename cli.ts => src/cli/index.ts (90%) rename config.ts => src/config.ts (82%) rename src/{ => core}/AnonymizedFile.ts (75%) rename src/{ => core}/AnonymousError.ts (100%) rename src/{ => core}/Conference.ts (94%) rename src/{ => core}/GitHubUtils.ts (96%) rename src/{ => core}/PullRequest.ts (97%) rename src/{ => core}/Repository.ts (97%) rename src/{ => core}/User.ts (93%) rename src/{ => core}/anonymize-utils.ts (97%) rename src/{database => core/model}/anonymizedPullRequests/anonymizedPullRequests.model.ts (100%) rename src/{database => core/model}/anonymizedPullRequests/anonymizedPullRequests.schema.ts (100%) rename src/{database => core/model}/anonymizedPullRequests/anonymizedPullRequests.types.ts (100%) rename src/{database => core/model}/anonymizedRepositories/anonymizedRepositories.model.ts (100%) rename src/{database => core/model}/anonymizedRepositories/anonymizedRepositories.schema.ts (100%) rename src/{database => core/model}/anonymizedRepositories/anonymizedRepositories.types.ts (100%) rename src/{database => core/model}/conference/conferences.model.ts (100%) rename src/{database => core/model}/conference/conferences.schema.ts (100%) rename src/{database => core/model}/conference/conferences.types.ts (100%) rename src/{database => core/model}/repositories/repositories.model.ts (100%) rename src/{database => core/model}/repositories/repositories.schema.ts (100%) rename src/{database => core/model}/repositories/repositories.types.ts (100%) rename src/{database => core/model}/users/users.model.ts (100%) rename src/{database => core/model}/users/users.schema.ts (100%) rename src/{database => core/model}/users/users.types.ts (100%) rename src/{ => core}/source/GitHubBase.ts (66%) rename src/{ => core}/source/GitHubDownload.ts (100%) rename src/{ => core}/source/GitHubRepository.ts (97%) rename src/{ => core}/source/GitHubStream.ts (90%) create mode 100644 src/core/source/Source.ts rename src/{ => core}/source/Zip.ts (92%) rename src/{ => core}/storage.ts (100%) rename src/{ => core}/storage/FileSystem.ts (96%) rename src/{ => core}/storage/S3.ts (96%) rename src/{ => core}/storage/Storage.ts (95%) rename src/{ => core}/types.ts (64%) rename src/{queue.ts => queue/index.ts} (89%) rename src/{ => queue}/processes/downloadRepository.ts (89%) rename src/{ => queue}/processes/removeCache.ts (88%) rename src/{ => queue}/processes/removeRepository.ts (86%) rename src/{database => server}/database.ts (80%) rename src/{server.ts => server/index.ts} (94%) rename src/{ => server}/routes/admin.ts (93%) rename src/{ => server}/routes/conference.ts (96%) rename src/{ => server}/routes/connection.ts (94%) rename src/{ => server}/routes/file.ts (90%) rename src/{ => server}/routes/index.ts (100%) rename src/{ => server}/routes/option.ts (100%) rename src/{ => server}/routes/pullRequest-private.ts (94%) rename src/{ => server}/routes/pullRequest-public.ts (97%) rename src/{ => server}/routes/repository-private.ts (94%) rename src/{ => server}/routes/repository-public.ts (96%) rename src/{ => server}/routes/route-utils.ts (86%) rename src/{ => server}/routes/user.ts (97%) rename src/{ => server}/routes/webview.ts (92%) rename src/{ => server}/schedule.ts (84%) create mode 100644 src/streamer/index.ts create mode 100644 src/streamer/route.ts diff --git a/Dockerfile b/Dockerfile index 47d2716..aeff2b5 100644 --- a/Dockerfile +++ b/Dockerfile @@ -13,10 +13,8 @@ COPY healthcheck.js . COPY src ./src COPY public ./public -COPY index.ts . -COPY config.ts . RUN npm install && npm run build && npm cache clean --force COPY opentelemetry.js . -CMD [ "node", "--require", "./opentelemetry.js", "./build/index.js"] \ No newline at end of file +CMD [ "node", "--require", "./opentelemetry.js", "./build/server/index.js"] \ No newline at end of file diff --git a/docker-compose.yml b/docker-compose.yml index 2b2d161..79f5fd0 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -5,17 +5,17 @@ services: build: . restart: always image: tdurieux/anonymous_github:v2 - deploy: - mode: replicated - replicas: 4 - endpoint_mode: vip + ports: + - $EXPOSED_PORT:5000 env_file: - ./.env volumes: - ./repositories:/app/build/repositories/ environment: + - PORT=5000 - REDIS_HOSTNAME=redis - DB_HOSTNAME=mongodb + - STREAMER_ENTRYPOINT=http://streamer:5000/ healthcheck: test: - CMD @@ -27,22 +27,37 @@ services: links: - mongodb - redis - - opentelemetry + - streamer - nginx: - image: nginx:stable-alpine + streamer: + build: . + restart: always + image: tdurieux/anonymous_github:v2 + deploy: + mode: replicated + replicas: 4 + endpoint_mode: vip + entrypoint: ["node", "./build/streamer/index.js"] env_file: - ./.env volumes: - - ./nginx.conf:/etc/nginx/conf.d/default.conf:ro - depends_on: - - anonymous_github - ports: - - "$EXPOSED_PORT:4000" + - ./repositories:/app/build/repositories/ + environment: + - PORT=5000 + healthcheck: + test: + - CMD + - node + - healthcheck.js + interval: 10s + timeout: 10s + retries: 5 redis: image: "redis:alpine" restart: always + ports: + - 127.0.0.1:6379:6379 healthcheck: test: - CMD @@ -101,13 +116,15 @@ services: image: tiredofit/db-backup links: - mongodb + env_file: + - ./.env volumes: - ./db_backups:/backup environment: - DB_TYPE=mongo - DB_HOST=mongodb - - DB_DUMP_FREQ=60 - - DB_CLEANUP_TIME=240 + - DB_DUMP_FREQ=120 + - DB_CLEANUP_TIME=500 - COMPRESSION=XZ - DB_USER=$DB_USERNAME - DB_PASS=$DB_PASSWORD diff --git a/index.ts b/index.ts deleted file mode 100644 index fbd7cd1..0000000 --- a/index.ts +++ /dev/null @@ -1,7 +0,0 @@ -import { config } from "dotenv"; -config(); - -import server from "./src/server"; - -// start the server -server(); diff --git a/nginx.conf b/nginx.conf deleted file mode 100644 index c744f4d..0000000 --- a/nginx.conf +++ /dev/null @@ -1,15 +0,0 @@ -upstream backend { - server anonymous_github:5000; -} - -server { - listen 4000; - - resolver 127.0.0.11 valid=5s; - - include /etc/nginx/mime.types; - - location / { - proxy_pass http://backend/; - } -} \ No newline at end of file diff --git a/package.json b/package.json index fe815a2..3410b66 100644 --- a/package.json +++ b/package.json @@ -3,13 +3,12 @@ "version": "2.2.0", "description": "Anonymise Github repositories for double-anonymous reviews", "bin": { - "anonymous_github": "build/cli.js" + "anonymous_github": "build/cli/index.js" }, "scripts": { "test": "mocha --reporter spec", - "start": "node --inspect=5858 -r ts-node/register ./index.ts", - "dev": "nodemon --transpile-only index.ts", - "migrateDB": "ts-node --transpile-only migrateDB.ts", + "start": "node --inspect=5858 -r ts-node/register ./src/server/index.ts", + "dev": "nodemon --transpile-only ./src/server/index.ts", "build": "rm -rf build && tsc", "knip": "knip" }, diff --git a/cli.ts b/src/cli/index.ts similarity index 90% rename from cli.ts rename to src/cli/index.ts index f9907a4..acfbb3c 100644 --- a/cli.ts +++ b/src/cli/index.ts @@ -10,12 +10,12 @@ import { join } from "path"; import * as gh from "parse-github-url"; import * as inquirer from "inquirer"; -import server from "./src/server"; -import config from "./config"; -import GitHubDownload from "./src/source/GitHubDownload"; -import Repository from "./src/Repository"; -import AnonymizedRepositoryModel from "./src/database/anonymizedRepositories/anonymizedRepositories.model"; -import { getRepositoryFromGitHub } from "./src/source/GitHubRepository"; +import server from "../server"; +import config from "../config"; +import GitHubDownload from "../core/source/GitHubDownload"; +import Repository from "../core/Repository"; +import AnonymizedRepositoryModel from "../core/model/anonymizedRepositories/anonymizedRepositories.model"; +import { getRepositoryFromGitHub } from "../core/source/GitHubRepository"; function generateRandomFileName(size: number) { const characters = diff --git a/config.ts b/src/config.ts similarity index 82% rename from config.ts rename to src/config.ts index a3b6896..079524e 100644 --- a/config.ts +++ b/src/config.ts @@ -18,6 +18,7 @@ interface Config { * Allow to download repository and files */ ENABLE_DOWNLOAD: boolean; + STREAMER_ENTRYPOINT: string | null; ANONYMIZATION_MASK: string; PORT: number; APP_HOSTNAME: string; @@ -26,11 +27,11 @@ interface Config { DB_HOSTNAME: string; FOLDER: string; additionalExtensions: string[]; - S3_BUCKET?: string; - S3_CLIENT_ID?: string; - S3_CLIENT_SECRET?: string; - S3_ENDPOINT?: string; - S3_REGION?: string; + S3_BUCKET: string | null; + S3_CLIENT_ID: string | null; + S3_CLIENT_SECRET: string | null; + S3_ENDPOINT: string | null; + S3_REGION: string | null; STORAGE: "filesystem" | "s3"; TRUST_PROXY: number; RATE_LIMIT: number; @@ -58,7 +59,7 @@ const config: Config = { DB_HOSTNAME: "mongodb", REDIS_HOSTNAME: "redis", REDIS_PORT: 6379, - FOLDER: resolve(__dirname, "repositories"), + FOLDER: resolve(__dirname, "..", "repositories"), additionalExtensions: [ "license", "dockerfile", @@ -70,11 +71,12 @@ const config: Config = { "in", ], STORAGE: "filesystem", - S3_BUCKET: process.env.S3_BUCKET, - S3_CLIENT_ID: process.env.S3_CLIENT_ID, - S3_CLIENT_SECRET: process.env.S3_CLIENT_SECRET, - S3_ENDPOINT: process.env.S3_ENDPOINT, - S3_REGION: process.env.S3_REGION, + STREAMER_ENTRYPOINT: null, + S3_BUCKET: null, + S3_CLIENT_ID: null, + S3_CLIENT_SECRET: null, + S3_ENDPOINT: null, + S3_REGION: null, }; for (let conf in process.env) { diff --git a/src/AnonymizedFile.ts b/src/core/AnonymizedFile.ts similarity index 75% rename from src/AnonymizedFile.ts rename to src/core/AnonymizedFile.ts index bbf8a47..e45dd5a 100644 --- a/src/AnonymizedFile.ts +++ b/src/core/AnonymizedFile.ts @@ -2,13 +2,15 @@ import { join, basename } from "path"; import { Response } from "express"; import { Readable } from "stream"; import { trace } from "@opentelemetry/api"; +import { lookup } from "mime-types"; + import Repository from "./Repository"; import { RepositoryStatus, Tree, TreeElement, TreeFile } from "./types"; import config from "../config"; import { anonymizePath, isTextFile } from "./anonymize-utils"; import AnonymousError from "./AnonymousError"; -import { handleError } from "./routes/route-utils"; -import { lookup } from "mime-types"; +import { handleError } from "../server/routes/route-utils"; +import got from "got"; /** * Represent a file in a anonymized repository @@ -197,7 +199,7 @@ export default class AnonymizedFile { httpStatus: 403, }); } - const out = await this.repository.source?.getFileContent(this); + const content = await this.repository.source?.getFileContent(this); if ( !this.repository.model.isReseted || this.repository.status != RepositoryStatus.READY @@ -205,7 +207,7 @@ export default class AnonymizedFile { this.repository.model.isReseted = false; await this.repository.updateStatus(RepositoryStatus.READY); } - return out; + return content; } finally { span.end(); } @@ -213,19 +215,32 @@ export default class AnonymizedFile { } async anonymizedContent() { - return trace - .getTracer("ano-file") - .startActiveSpan("anonymizedContent", async (span) => { - span.setAttribute("anonymizedPath", this.anonymizedPath); - const content = await this.content(); - return content - .pipe( - this.repository.generateAnonymizeTransformer(this.anonymizedPath) - ) - .on("close", () => { - span.end(); - }); + const span = trace.getTracer("ano-file").startSpan("Repository.conference"); + span.setAttribute("anonymizedPath", this.anonymizedPath); + const anonymizer = this.repository.generateAnonymizeTransformer( + this.anonymizedPath + ); + if (!config.STREAMER_ENTRYPOINT) { + // collect the content locally + const content = await this.content(); + return content.pipe(anonymizer).on("close", () => { + span.end(); }); + } + // use the streamer service + return got.stream(join(config.STREAMER_ENTRYPOINT, "api"), { + method: "POST", + json: { + token: await this.repository.getToken(), + repoFullName: this.repository.model.source.repositoryName, + commit: this.repository.model.source.commit, + branch: this.repository.model.source.branch, + repoId: this.repository.repoId, + filePath: this.filePath, + sha: await this.sha(), + anonymizerOptions: anonymizer.opt, + }, + }); } get filePath() { @@ -243,6 +258,9 @@ export default class AnonymizedFile { } async send(res: Response): Promise { + const anonymizer = this.repository.generateAnonymizeTransformer( + this.anonymizedPath + ); return trace .getTracer("ano-file") .startActiveSpan("AnonymizedFile.send", async (span) => { @@ -250,6 +268,39 @@ export default class AnonymizedFile { span.setAttribute("anonymizedPath", this.anonymizedPath); return new Promise(async (resolve, reject) => { try { + if (config.STREAMER_ENTRYPOINT) { + // use the streamer service + got + .stream(join(config.STREAMER_ENTRYPOINT, "api"), { + method: "POST", + json: { + token: await this.repository.getToken(), + repoFullName: this.repository.model.source.repositoryName, + commit: this.repository.model.source.commit, + branch: this.repository.model.source.branch, + repoId: this.repository.repoId, + filePath: this.filePath, + sha: await this.sha(), + anonymizerOptions: anonymizer.opt, + }, + }) + .on("error", () => { + handleError( + new AnonymousError("file_not_found", { + object: this, + httpStatus: 404, + }), + res + ); + }) + .pipe(res) + .on("close", () => { + span.end(); + resolve(); + }); + return; + } + const mime = lookup(this.anonymizedPath); if (mime && this.extension() != "ts") { res.contentType(mime); @@ -257,9 +308,6 @@ export default class AnonymizedFile { res.contentType("text/plain"); } res.header("Accept-Ranges", "none"); - const anonymizer = this.repository.generateAnonymizeTransformer( - this.anonymizedPath - ); anonymizer.once("transform", (data) => { if (!mime && data.isText) { res.contentType("text/plain"); @@ -269,26 +317,27 @@ export default class AnonymizedFile { res.header("Content-Length", this.fileSize.toString()); } }); - const content = await this.content(); + function handleStreamError(error: Error) { + if (!content.closed && !content.destroyed) { + content.destroy(); + } + span.recordException(error); + span.end(); + reject(error); + // handleError(error, res); + } content + .on("error", handleStreamError) .pipe(anonymizer) .pipe(res) + .on("error", handleStreamError) .on("close", () => { if (!content.closed && !content.destroyed) { content.destroy(); } span.end(); resolve(); - }) - .on("error", (error) => { - if (!content.closed && !content.destroyed) { - content.destroy(); - } - span.recordException(error); - span.end(); - reject(error); - handleError(error, res); }); } catch (error) { handleError(error, res); diff --git a/src/AnonymousError.ts b/src/core/AnonymousError.ts similarity index 100% rename from src/AnonymousError.ts rename to src/core/AnonymousError.ts diff --git a/src/Conference.ts b/src/core/Conference.ts similarity index 94% rename from src/Conference.ts rename to src/core/Conference.ts index 3d654c9..80769af 100644 --- a/src/Conference.ts +++ b/src/core/Conference.ts @@ -1,5 +1,5 @@ -import AnonymizedRepositoryModel from "./database/anonymizedRepositories/anonymizedRepositories.model"; -import { IConferenceDocument } from "./database/conference/conferences.types"; +import AnonymizedRepositoryModel from "./model/anonymizedRepositories/anonymizedRepositories.model"; +import { IConferenceDocument } from "./model/conference/conferences.types"; import Repository from "./Repository"; import { ConferenceStatus } from "./types"; diff --git a/src/GitHubUtils.ts b/src/core/GitHubUtils.ts similarity index 96% rename from src/GitHubUtils.ts rename to src/core/GitHubUtils.ts index 6353007..ac072db 100644 --- a/src/GitHubUtils.ts +++ b/src/core/GitHubUtils.ts @@ -1,7 +1,8 @@ import { trace } from "@opentelemetry/api"; -import Repository from "./Repository"; import { Octokit } from "@octokit/rest"; -import UserModel from "./database/users/users.model"; + +import Repository from "./Repository"; +import UserModel from "./model/users/users.model"; import config from "../config"; export function octokit(token: string) { diff --git a/src/PullRequest.ts b/src/core/PullRequest.ts similarity index 97% rename from src/PullRequest.ts rename to src/core/PullRequest.ts index 93eff74..11ffcfe 100644 --- a/src/PullRequest.ts +++ b/src/core/PullRequest.ts @@ -1,10 +1,10 @@ import { RepositoryStatus } from "./types"; import User from "./User"; -import UserModel from "./database/users/users.model"; +import UserModel from "./model/users/users.model"; import Conference from "./Conference"; -import ConferenceModel from "./database/conference/conferences.model"; +import ConferenceModel from "./model/conference/conferences.model"; import AnonymousError from "./AnonymousError"; -import { IAnonymizedPullRequestDocument } from "./database/anonymizedPullRequests/anonymizedPullRequests.types"; +import { IAnonymizedPullRequestDocument } from "./model/anonymizedPullRequests/anonymizedPullRequests.types"; import config from "../config"; import got from "got"; import { octokit } from "./GitHubUtils"; diff --git a/src/Repository.ts b/src/core/Repository.ts similarity index 97% rename from src/Repository.ts rename to src/core/Repository.ts index 11b72a5..e91e762 100644 --- a/src/Repository.ts +++ b/src/core/Repository.ts @@ -6,16 +6,16 @@ import GitHubStream from "./source/GitHubStream"; import GitHubDownload from "./source/GitHubDownload"; import Zip from "./source/Zip"; import { anonymizePath } from "./anonymize-utils"; -import UserModel from "./database/users/users.model"; -import { IAnonymizedRepositoryDocument } from "./database/anonymizedRepositories/anonymizedRepositories.types"; +import UserModel from "./model/users/users.model"; +import { IAnonymizedRepositoryDocument } from "./model/anonymizedRepositories/anonymizedRepositories.types"; import { AnonymizeTransformer } from "./anonymize-utils"; import GitHubBase from "./source/GitHubBase"; import Conference from "./Conference"; -import ConferenceModel from "./database/conference/conferences.model"; +import ConferenceModel from "./model/conference/conferences.model"; import AnonymousError from "./AnonymousError"; -import { downloadQueue } from "./queue"; -import { isConnected } from "./database/database"; -import AnonymizedRepositoryModel from "./database/anonymizedRepositories/anonymizedRepositories.model"; +import { downloadQueue } from "../queue"; +import { isConnected } from "../server/database"; +import AnonymizedRepositoryModel from "./model/anonymizedRepositories/anonymizedRepositories.model"; import { GitHubRepository } from "./source/GitHubRepository"; import { trace } from "@opentelemetry/api"; import { getToken } from "./GitHubUtils"; @@ -59,7 +59,7 @@ export default class Repository { private checkedToken: boolean = false; - private async getToken() { + async getToken() { if (this.checkedToken) return this._model.source.accessToken as string; const originalToken = this._model.source.accessToken; const token = await getToken(this); diff --git a/src/User.ts b/src/core/User.ts similarity index 93% rename from src/User.ts rename to src/core/User.ts index c93d276..a743131 100644 --- a/src/User.ts +++ b/src/core/User.ts @@ -1,11 +1,12 @@ -import AnonymizedRepositoryModel from "./database/anonymizedRepositories/anonymizedRepositories.model"; -import RepositoryModel from "./database/repositories/repositories.model"; -import { IUserDocument } from "./database/users/users.types"; +import { trace } from "@opentelemetry/api"; + +import AnonymizedRepositoryModel from "./model/anonymizedRepositories/anonymizedRepositories.model"; +import RepositoryModel from "./model/repositories/repositories.model"; +import { IUserDocument } from "./model/users/users.types"; import Repository from "./Repository"; import { GitHubRepository } from "./source/GitHubRepository"; import PullRequest from "./PullRequest"; -import AnonymizedPullRequestModel from "./database/anonymizedPullRequests/anonymizedPullRequests.model"; -import { trace } from "@opentelemetry/api"; +import AnonymizedPullRequestModel from "./model/anonymizedPullRequests/anonymizedPullRequests.model"; import { octokit } from "./GitHubUtils"; /** diff --git a/src/anonymize-utils.ts b/src/core/anonymize-utils.ts similarity index 97% rename from src/anonymize-utils.ts rename to src/core/anonymize-utils.ts index b0a4905..ab07df0 100644 --- a/src/anonymize-utils.ts +++ b/src/core/anonymize-utils.ts @@ -1,10 +1,10 @@ -import config from "../config"; -import { isText } from "istextorbinary"; import { basename } from "path"; -import { Transform } from "stream"; -import { Readable } from "stream"; +import { Transform, Readable } from "stream"; +import { isText } from "istextorbinary"; import { trace } from "@opentelemetry/api"; +import config from "../config"; + const urlRegex = /?/g; @@ -35,7 +35,7 @@ export class AnonymizeTransformer extends Transform { anonimizer: ContentAnonimizer; constructor( - private readonly opt: { + readonly opt: { filePath: string; } & ConstructorParameters[0] ) { @@ -56,7 +56,6 @@ export class AnonymizeTransformer extends Transform { if (this.isText === null) { this.isText = isTextFile(this.opt.filePath, chunk); } - if (this.isText) { const content = this.anonimizer.anonymize(chunk.toString()); if (this.anonimizer.wasAnonymized) { @@ -171,7 +170,7 @@ export class ContentAnonimizer { }); // remove the term in the text - return content.replace(new RegExp(`\\b${term}\\b`, "gi"), () => { + content = content.replace(new RegExp(`\\b${term}\\b`, "gi"), () => { this.wasAnonymized = true; return mask; }); diff --git a/src/database/anonymizedPullRequests/anonymizedPullRequests.model.ts b/src/core/model/anonymizedPullRequests/anonymizedPullRequests.model.ts similarity index 100% rename from src/database/anonymizedPullRequests/anonymizedPullRequests.model.ts rename to src/core/model/anonymizedPullRequests/anonymizedPullRequests.model.ts diff --git a/src/database/anonymizedPullRequests/anonymizedPullRequests.schema.ts b/src/core/model/anonymizedPullRequests/anonymizedPullRequests.schema.ts similarity index 100% rename from src/database/anonymizedPullRequests/anonymizedPullRequests.schema.ts rename to src/core/model/anonymizedPullRequests/anonymizedPullRequests.schema.ts diff --git a/src/database/anonymizedPullRequests/anonymizedPullRequests.types.ts b/src/core/model/anonymizedPullRequests/anonymizedPullRequests.types.ts similarity index 100% rename from src/database/anonymizedPullRequests/anonymizedPullRequests.types.ts rename to src/core/model/anonymizedPullRequests/anonymizedPullRequests.types.ts diff --git a/src/database/anonymizedRepositories/anonymizedRepositories.model.ts b/src/core/model/anonymizedRepositories/anonymizedRepositories.model.ts similarity index 100% rename from src/database/anonymizedRepositories/anonymizedRepositories.model.ts rename to src/core/model/anonymizedRepositories/anonymizedRepositories.model.ts diff --git a/src/database/anonymizedRepositories/anonymizedRepositories.schema.ts b/src/core/model/anonymizedRepositories/anonymizedRepositories.schema.ts similarity index 100% rename from src/database/anonymizedRepositories/anonymizedRepositories.schema.ts rename to src/core/model/anonymizedRepositories/anonymizedRepositories.schema.ts diff --git a/src/database/anonymizedRepositories/anonymizedRepositories.types.ts b/src/core/model/anonymizedRepositories/anonymizedRepositories.types.ts similarity index 100% rename from src/database/anonymizedRepositories/anonymizedRepositories.types.ts rename to src/core/model/anonymizedRepositories/anonymizedRepositories.types.ts diff --git a/src/database/conference/conferences.model.ts b/src/core/model/conference/conferences.model.ts similarity index 100% rename from src/database/conference/conferences.model.ts rename to src/core/model/conference/conferences.model.ts diff --git a/src/database/conference/conferences.schema.ts b/src/core/model/conference/conferences.schema.ts similarity index 100% rename from src/database/conference/conferences.schema.ts rename to src/core/model/conference/conferences.schema.ts diff --git a/src/database/conference/conferences.types.ts b/src/core/model/conference/conferences.types.ts similarity index 100% rename from src/database/conference/conferences.types.ts rename to src/core/model/conference/conferences.types.ts diff --git a/src/database/repositories/repositories.model.ts b/src/core/model/repositories/repositories.model.ts similarity index 100% rename from src/database/repositories/repositories.model.ts rename to src/core/model/repositories/repositories.model.ts diff --git a/src/database/repositories/repositories.schema.ts b/src/core/model/repositories/repositories.schema.ts similarity index 100% rename from src/database/repositories/repositories.schema.ts rename to src/core/model/repositories/repositories.schema.ts diff --git a/src/database/repositories/repositories.types.ts b/src/core/model/repositories/repositories.types.ts similarity index 100% rename from src/database/repositories/repositories.types.ts rename to src/core/model/repositories/repositories.types.ts diff --git a/src/database/users/users.model.ts b/src/core/model/users/users.model.ts similarity index 100% rename from src/database/users/users.model.ts rename to src/core/model/users/users.model.ts diff --git a/src/database/users/users.schema.ts b/src/core/model/users/users.schema.ts similarity index 100% rename from src/database/users/users.schema.ts rename to src/core/model/users/users.schema.ts diff --git a/src/database/users/users.types.ts b/src/core/model/users/users.types.ts similarity index 100% rename from src/database/users/users.types.ts rename to src/core/model/users/users.types.ts diff --git a/src/source/GitHubBase.ts b/src/core/source/GitHubBase.ts similarity index 66% rename from src/source/GitHubBase.ts rename to src/core/source/GitHubBase.ts index 6b65ed1..6b03a40 100644 --- a/src/source/GitHubBase.ts +++ b/src/core/source/GitHubBase.ts @@ -2,6 +2,7 @@ import { Readable } from "stream"; import AnonymizedFile from "../AnonymizedFile"; import { Tree } from "../types"; +import { SourceBase } from "./Source"; export interface GitHubBaseData { getToken: () => string | Promise; @@ -11,21 +12,6 @@ export interface GitHubBaseData { commit: string; } -export interface SourceBase { - readonly type: string; - - /** - * Retrieve the fie content - * @param file the file of the content to retrieve - */ - getFileContent(file: AnonymizedFile): Promise; - - /** - * Get all the files from a specific source - */ - getFiles(progress?: (status: string) => void): Promise; -} - export default abstract class GitHubBase implements SourceBase { abstract type: "GitHubDownload" | "GitHubStream" | "Zip"; accessToken: string | undefined; diff --git a/src/source/GitHubDownload.ts b/src/core/source/GitHubDownload.ts similarity index 100% rename from src/source/GitHubDownload.ts rename to src/core/source/GitHubDownload.ts index b73e1aa..4465b71 100644 --- a/src/source/GitHubDownload.ts +++ b/src/core/source/GitHubDownload.ts @@ -4,11 +4,11 @@ import { OctokitResponse } from "@octokit/types"; import storage from "../storage"; import GitHubBase, { GitHubBaseData } from "./GitHubBase"; -import AnonymizedFile from "../AnonymizedFile"; -import AnonymousError from "../AnonymousError"; import { trace } from "@opentelemetry/api"; import { FILE_TYPE } from "../storage/Storage"; import { octokit } from "../GitHubUtils"; +import AnonymousError from "../AnonymousError"; +import AnonymizedFile from "../AnonymizedFile"; export default class GitHubDownload extends GitHubBase { type: "GitHubDownload" | "GitHubStream" | "Zip" = "GitHubDownload"; diff --git a/src/source/GitHubRepository.ts b/src/core/source/GitHubRepository.ts similarity index 97% rename from src/source/GitHubRepository.ts rename to src/core/source/GitHubRepository.ts index 6b9ee08..935a655 100644 --- a/src/source/GitHubRepository.ts +++ b/src/core/source/GitHubRepository.ts @@ -1,12 +1,13 @@ import { Branch } from "../types"; import * as gh from "parse-github-url"; -import { IRepositoryDocument } from "../database/repositories/repositories.types"; import { RestEndpointMethodTypes } from "@octokit/rest"; -import RepositoryModel from "../database/repositories/repositories.model"; -import AnonymousError from "../AnonymousError"; -import { isConnected } from "../database/database"; import { trace } from "@opentelemetry/api"; + +import AnonymousError from "../AnonymousError"; +import { isConnected } from "../../server/database"; import { octokit } from "../GitHubUtils"; +import { IRepositoryDocument } from "../model/repositories/repositories.types"; +import RepositoryModel from "../model/repositories/repositories.model"; export class GitHubRepository { private _data: Partial<{ diff --git a/src/source/GitHubStream.ts b/src/core/source/GitHubStream.ts similarity index 90% rename from src/source/GitHubStream.ts rename to src/core/source/GitHubStream.ts index 361af29..370dece 100644 --- a/src/source/GitHubStream.ts +++ b/src/core/source/GitHubStream.ts @@ -82,36 +82,24 @@ export default class GitHubStream extends GitHubBase { object: file, }); } - try { - const content = this.downloadFile(await this.data.getToken(), file_sha); + const content = this.downloadFile(await this.data.getToken(), file_sha); - // duplicate the stream to write it to the storage - const stream1 = content.pipe(new stream.PassThrough()); - const stream2 = content.pipe(new stream.PassThrough()); - storage.write( - file.repository.repoId, - file.filePath, - stream1, - this.type - ); - return stream2; - } catch (error) { - if ( - (error as any).status === 404 || - (error as any).httpStatus === 404 - ) { - throw new AnonymousError("file_not_found", { - httpStatus: (error as any).status || (error as any).httpStatus, - cause: error as Error, - object: file, - }); - } - throw new AnonymousError("file_too_big", { + // duplicate the stream to write it to the storage + const stream1 = content.pipe(new stream.PassThrough()); + const stream2 = content.pipe(new stream.PassThrough()); + + content.on("error", (error) => { + error = new AnonymousError("file_not_found", { httpStatus: (error as any).status || (error as any).httpStatus, cause: error as Error, object: file, }); - } + stream1.emit("error", error); + stream2.emit("error", error); + }); + + storage.write(file.repository.repoId, file.filePath, stream1, this.type); + return stream2; } finally { span.end(); } diff --git a/src/core/source/Source.ts b/src/core/source/Source.ts new file mode 100644 index 0000000..638404a --- /dev/null +++ b/src/core/source/Source.ts @@ -0,0 +1,24 @@ +import { Readable } from "stream"; + +import AnonymizedFile from "../AnonymizedFile"; +import { Tree } from "../types"; +import GitHubDownload from "./GitHubDownload"; +import GitHubStream from "./GitHubStream"; +import Zip from "./Zip"; + +export type Source = GitHubDownload | GitHubStream | Zip; + +export interface SourceBase { + readonly type: string; + + /** + * Retrieve the fie content + * @param file the file of the content to retrieve + */ + getFileContent(file: AnonymizedFile): Promise; + + /** + * Get all the files from a specific source + */ + getFiles(progress?: (status: string) => void): Promise; +} diff --git a/src/source/Zip.ts b/src/core/source/Zip.ts similarity index 92% rename from src/source/Zip.ts rename to src/core/source/Zip.ts index bead36f..f914b98 100644 --- a/src/source/Zip.ts +++ b/src/core/source/Zip.ts @@ -1,7 +1,8 @@ +import * as stream from "stream"; + import AnonymizedFile from "../AnonymizedFile"; import storage from "../storage"; -import * as stream from "stream"; -import { SourceBase } from "./GitHubBase"; +import { SourceBase } from "./Source"; export default class Zip implements SourceBase { type = "Zip"; diff --git a/src/storage.ts b/src/core/storage.ts similarity index 100% rename from src/storage.ts rename to src/core/storage.ts diff --git a/src/storage/FileSystem.ts b/src/core/storage/FileSystem.ts similarity index 96% rename from src/storage/FileSystem.ts rename to src/core/storage/FileSystem.ts index ccd8a3b..03171e7 100644 --- a/src/storage/FileSystem.ts +++ b/src/core/storage/FileSystem.ts @@ -83,7 +83,15 @@ export default class FileSystem extends StorageBase { span.setAttribute("path", fullPath); try { await this.mk(repoId, dirname(p)); + if (data instanceof Readable) { + data.on("error", (err) => { + this.rm(repoId, p); + }); + } return await fs.promises.writeFile(fullPath, data, "utf-8"); + } catch (err: any) { + span.recordException(err); + // throw err; } finally { span.end(); } diff --git a/src/storage/S3.ts b/src/core/storage/S3.ts similarity index 96% rename from src/storage/S3.ts rename to src/core/storage/S3.ts index 4d6d743..c5323a2 100644 --- a/src/storage/S3.ts +++ b/src/core/storage/S3.ts @@ -32,6 +32,8 @@ export default class S3Storage extends StorageBase { private client(timeout = 10000) { if (!config.S3_CLIENT_ID) throw new Error("S3_CLIENT_ID not set"); if (!config.S3_CLIENT_SECRET) throw new Error("S3_CLIENT_SECRET not set"); + if (!config.S3_REGION) throw new Error("S3_REGION not set"); + if (!config.S3_ENDPOINT) throw new Error("S3_ENDPOINT not set"); return new S3({ credentials: { accessKeyId: config.S3_CLIENT_ID, @@ -212,6 +214,14 @@ export default class S3Storage extends StorageBase { try { if (!config.S3_BUCKET) throw new Error("S3_BUCKET not set"); + if (data instanceof Readable) { + data.on("error", (err) => { + console.error(`[ERROR] S3 write ${path}`, err); + span.recordException(err as Error); + this.rm(repoId, path); + }); + } + const params: PutObjectCommandInput = { Bucket: config.S3_BUCKET, Key: join(this.repoPath(repoId), path), @@ -229,7 +239,6 @@ export default class S3Storage extends StorageBase { }); await parallelUploads3.done(); - return; } finally { span.end(); } diff --git a/src/storage/Storage.ts b/src/core/storage/Storage.ts similarity index 95% rename from src/storage/Storage.ts rename to src/core/storage/Storage.ts index e80b761..458136c 100644 --- a/src/storage/Storage.ts +++ b/src/core/storage/Storage.ts @@ -4,6 +4,10 @@ import * as archiver from "archiver"; import { Response } from "express"; import { Tree } from "../types"; +import S3Storage from "./S3"; +import FileSystem from "./FileSystem"; + +export type Storage = S3Storage | FileSystem; export enum FILE_TYPE { FILE = "file", diff --git a/src/types.ts b/src/core/types.ts similarity index 64% rename from src/types.ts rename to src/core/types.ts index 402aa88..9d5394f 100644 --- a/src/types.ts +++ b/src/core/types.ts @@ -1,13 +1,3 @@ -import GitHubDownload from "./source/GitHubDownload"; -import GitHubStream from "./source/GitHubStream"; -import Zip from "./source/Zip"; -import S3Storage from "./storage/S3"; -import FileSystem from "./storage/FileSystem"; - -export type Source = GitHubDownload | GitHubStream | Zip; - -export type Storage = S3Storage | FileSystem; - export interface Branch { name: string; commit: string; diff --git a/src/queue.ts b/src/queue/index.ts similarity index 89% rename from src/queue.ts rename to src/queue/index.ts index 00277f6..b0ebff6 100644 --- a/src/queue.ts +++ b/src/queue/index.ts @@ -1,6 +1,6 @@ import { Queue, Worker } from "bullmq"; import config from "../config"; -import Repository from "./Repository"; +import Repository from "../core/Repository"; import * as path from "path"; export let cacheQueue: Queue; @@ -37,7 +37,7 @@ export function startWorker() { }); const cacheWorker = new Worker( cacheQueue.name, - path.resolve("build/src/processes/removeCache.js"), + path.resolve("build/queue/processes/removeCache.js"), { concurrency: 5, connection, @@ -49,7 +49,7 @@ export function startWorker() { }); const removeWorker = new Worker( removeQueue.name, - path.resolve("build/src/processes/removeRepository.js"), + path.resolve("build/queue/processes/removeRepository.js"), { concurrency: 5, connection, @@ -62,7 +62,7 @@ export function startWorker() { const downloadWorker = new Worker( downloadQueue.name, - path.resolve("build/src/processes/downloadRepository.js"), + path.resolve("build/queue/processes/downloadRepository.js"), { concurrency: 3, connection, diff --git a/src/processes/downloadRepository.ts b/src/queue/processes/downloadRepository.ts similarity index 89% rename from src/processes/downloadRepository.ts rename to src/queue/processes/downloadRepository.ts index 688eedb..1a70bd6 100644 --- a/src/processes/downloadRepository.ts +++ b/src/queue/processes/downloadRepository.ts @@ -1,10 +1,10 @@ +import { Exception, trace } from "@opentelemetry/api"; import { SandboxedJob } from "bullmq"; import { config } from "dotenv"; config(); -import Repository from "../Repository"; -import { getRepository as getRepositoryImport } from "../database/database"; -import { RepositoryStatus } from "../types"; -import { Exception, trace } from "@opentelemetry/api"; +import Repository from "../../core/Repository"; +import { getRepository as getRepositoryImport } from "../../server/database"; +import { RepositoryStatus } from "../../core/types"; export default async function (job: SandboxedJob) { const { diff --git a/src/processes/removeCache.ts b/src/queue/processes/removeCache.ts similarity index 88% rename from src/processes/removeCache.ts rename to src/queue/processes/removeCache.ts index 1b4ded1..996004a 100644 --- a/src/processes/removeCache.ts +++ b/src/queue/processes/removeCache.ts @@ -1,7 +1,7 @@ -import { SandboxedJob } from "bullmq"; -import Repository from "../Repository"; -import { getRepository as getRepositoryImport } from "../database/database"; import { Exception, trace } from "@opentelemetry/api"; +import { SandboxedJob } from "bullmq"; +import Repository from "../../core/Repository"; +import { getRepository as getRepositoryImport } from "../../server/database"; export default async function (job: SandboxedJob) { const { diff --git a/src/processes/removeRepository.ts b/src/queue/processes/removeRepository.ts similarity index 86% rename from src/processes/removeRepository.ts rename to src/queue/processes/removeRepository.ts index 7f91697..03b36c1 100644 --- a/src/processes/removeRepository.ts +++ b/src/queue/processes/removeRepository.ts @@ -1,8 +1,8 @@ -import { SandboxedJob } from "bullmq"; -import Repository from "../Repository"; -import { getRepository as getRepositoryImport } from "../database/database"; -import { RepositoryStatus } from "../types"; import { trace } from "@opentelemetry/api"; +import { SandboxedJob } from "bullmq"; +import Repository from "../../core/Repository"; +import { getRepository as getRepositoryImport } from "../../server/database"; +import { RepositoryStatus } from "../../core/types"; export default async function (job: SandboxedJob) { const { diff --git a/src/database/database.ts b/src/server/database.ts similarity index 80% rename from src/database/database.ts rename to src/server/database.ts index bf1caae..5420497 100644 --- a/src/database/database.ts +++ b/src/server/database.ts @@ -1,10 +1,10 @@ import mongoose, { ConnectOptions } from "mongoose"; -import Repository from "../Repository"; -import config from "../../config"; -import AnonymizedRepositoryModel from "./anonymizedRepositories/anonymizedRepositories.model"; -import AnonymousError from "../AnonymousError"; -import AnonymizedPullRequestModel from "./anonymizedPullRequests/anonymizedPullRequests.model"; -import PullRequest from "../PullRequest"; +import Repository from "../core/Repository"; +import config from "../config"; +import AnonymizedRepositoryModel from "../core/model/anonymizedRepositories/anonymizedRepositories.model"; +import AnonymousError from "../core/AnonymousError"; +import AnonymizedPullRequestModel from "../core/model/anonymizedPullRequests/anonymizedPullRequests.model"; +import PullRequest from "../core/PullRequest"; const MONGO_URL = `mongodb://${config.DB_USERNAME}:${config.DB_PASSWORD}@${config.DB_HOSTNAME}:27017/`; diff --git a/src/server.ts b/src/server/index.ts similarity index 94% rename from src/server.ts rename to src/server/index.ts index 2af4c4b..34bf24c 100644 --- a/src/server.ts +++ b/src/server/index.ts @@ -1,3 +1,6 @@ +import { config as dotenv } from "dotenv"; +dotenv(); + import { createClient } from "redis"; import { resolve, join } from "path"; import { existsSync } from "fs"; @@ -7,16 +10,15 @@ import RedisStore from "rate-limit-redis"; import * as express from "express"; import * as compression from "compression"; import * as passport from "passport"; - -import config from "../config"; -import { connect } from "./database/database"; +import { connect } from "./database"; import { initSession, router as connectionRouter } from "./routes/connection"; import router from "./routes"; -import AnonymizedRepositoryModel from "./database/anonymizedRepositories/anonymizedRepositories.model"; +import AnonymizedRepositoryModel from "../core/model/anonymizedRepositories/anonymizedRepositories.model"; import { conferenceStatusCheck, repositoryStatusCheck } from "./schedule"; -import { startWorker } from "./queue"; -import AnonymizedPullRequestModel from "./database/anonymizedPullRequests/anonymizedPullRequests.model"; +import { startWorker } from "../queue"; +import AnonymizedPullRequestModel from "../core/model/anonymizedPullRequests/anonymizedPullRequests.model"; import { getUser } from "./routes/route-utils"; +import config from "../config"; function indexResponse(req: express.Request, res: express.Response) { if ( @@ -214,3 +216,5 @@ export default async function start() { app.listen(config.PORT); console.log("Database connected and Server started on port: " + config.PORT); } + +start(); diff --git a/src/routes/admin.ts b/src/server/routes/admin.ts similarity index 93% rename from src/routes/admin.ts rename to src/server/routes/admin.ts index 545fb83..033b982 100644 --- a/src/routes/admin.ts +++ b/src/server/routes/admin.ts @@ -1,12 +1,12 @@ import { Queue } from "bullmq"; import * as express from "express"; -import AnonymousError from "../AnonymousError"; -import AnonymizedRepositoryModel from "../database/anonymizedRepositories/anonymizedRepositories.model"; -import ConferenceModel from "../database/conference/conferences.model"; -import UserModel from "../database/users/users.model"; -import { cacheQueue, downloadQueue, removeQueue } from "../queue"; -import Repository from "../Repository"; -import User from "../User"; +import AnonymousError from "../../core/AnonymousError"; +import AnonymizedRepositoryModel from "../../core/model/anonymizedRepositories/anonymizedRepositories.model"; +import ConferenceModel from "../../core/model/conference/conferences.model"; +import UserModel from "../../core/model/users/users.model"; +import { cacheQueue, downloadQueue, removeQueue } from "../../queue"; +import Repository from "../../core/Repository"; +import User from "../../core/User"; import { ensureAuthenticated } from "./connection"; import { handleError, getUser, isOwnerOrAdmin, getRepo } from "./route-utils"; diff --git a/src/routes/conference.ts b/src/server/routes/conference.ts similarity index 96% rename from src/routes/conference.ts rename to src/server/routes/conference.ts index ea19ab9..19d8cfc 100644 --- a/src/routes/conference.ts +++ b/src/server/routes/conference.ts @@ -1,10 +1,10 @@ import * as express from "express"; -import AnonymousError from "../AnonymousError"; -import Conference from "../Conference"; -import ConferenceModel from "../database/conference/conferences.model"; +import AnonymousError from "../../core/AnonymousError"; +import Conference from "../../core/Conference"; +import ConferenceModel from "../../core/model/conference/conferences.model"; import { ensureAuthenticated } from "./connection"; import { handleError, getUser, isOwnerOrAdmin } from "./route-utils"; -import { IConferenceDocument } from "../database/conference/conferences.types"; +import { IConferenceDocument } from "../../core/model/conference/conferences.types"; const router = express.Router(); diff --git a/src/routes/connection.ts b/src/server/routes/connection.ts similarity index 94% rename from src/routes/connection.ts rename to src/server/routes/connection.ts index 1ec4052..45f4401 100644 --- a/src/routes/connection.ts +++ b/src/server/routes/connection.ts @@ -7,9 +7,9 @@ import { Profile, Strategy } from "passport-github2"; import * as express from "express"; import config from "../../config"; -import UserModel from "../database/users/users.model"; -import { IUserDocument } from "../database/users/users.types"; -import AnonymousError from "../AnonymousError"; +import UserModel from "../../core/model/users/users.model"; +import { IUserDocument } from "../../core/model/users/users.types"; +import AnonymousError from "../../core/AnonymousError"; export function ensureAuthenticated( req: express.Request, diff --git a/src/routes/file.ts b/src/server/routes/file.ts similarity index 90% rename from src/routes/file.ts rename to src/server/routes/file.ts index 1f877f0..5602c0e 100644 --- a/src/routes/file.ts +++ b/src/server/routes/file.ts @@ -1,6 +1,6 @@ import * as express from "express"; -import AnonymizedFile from "../AnonymizedFile"; -import AnonymousError from "../AnonymousError"; +import AnonymizedFile from "../../core/AnonymizedFile"; +import AnonymousError from "../../core/AnonymousError"; import { getRepo, handleError } from "./route-utils"; export const router = express.Router(); @@ -49,7 +49,7 @@ router.get( // cache the file for 5min res.header("Cache-Control", "max-age=300"); await repo.countView(); - f.send(res); + await f.send(res); } catch (error) { return handleError(error, res, req); } diff --git a/src/routes/index.ts b/src/server/routes/index.ts similarity index 100% rename from src/routes/index.ts rename to src/server/routes/index.ts diff --git a/src/routes/option.ts b/src/server/routes/option.ts similarity index 100% rename from src/routes/option.ts rename to src/server/routes/option.ts diff --git a/src/routes/pullRequest-private.ts b/src/server/routes/pullRequest-private.ts similarity index 94% rename from src/routes/pullRequest-private.ts rename to src/server/routes/pullRequest-private.ts index f70d591..4b8c8ea 100644 --- a/src/routes/pullRequest-private.ts +++ b/src/server/routes/pullRequest-private.ts @@ -7,11 +7,11 @@ import { handleError, isOwnerOrAdmin, } from "./route-utils"; -import AnonymousError from "../AnonymousError"; -import { IAnonymizedPullRequestDocument } from "../database/anonymizedPullRequests/anonymizedPullRequests.types"; -import PullRequest from "../PullRequest"; -import AnonymizedPullRequestModel from "../database/anonymizedPullRequests/anonymizedPullRequests.model"; -import { RepositoryStatus } from "../types"; +import AnonymousError from "../../core/AnonymousError"; +import { IAnonymizedPullRequestDocument } from "../../core/model/anonymizedPullRequests/anonymizedPullRequests.types"; +import PullRequest from "../../core/PullRequest"; +import AnonymizedPullRequestModel from "../../core/model/anonymizedPullRequests/anonymizedPullRequests.model"; +import { RepositoryStatus } from "../../core/types"; const router = express.Router(); diff --git a/src/routes/pullRequest-public.ts b/src/server/routes/pullRequest-public.ts similarity index 97% rename from src/routes/pullRequest-public.ts rename to src/server/routes/pullRequest-public.ts index 8224358..390830f 100644 --- a/src/routes/pullRequest-public.ts +++ b/src/server/routes/pullRequest-public.ts @@ -1,7 +1,7 @@ import * as express from "express"; import { getPullRequest, handleError } from "./route-utils"; -import AnonymousError from "../AnonymousError"; +import AnonymousError from "../../core/AnonymousError"; const router = express.Router(); diff --git a/src/routes/repository-private.ts b/src/server/routes/repository-private.ts similarity index 94% rename from src/routes/repository-private.ts rename to src/server/routes/repository-private.ts index 0de6364..19e5f56 100644 --- a/src/routes/repository-private.ts +++ b/src/server/routes/repository-private.ts @@ -1,22 +1,22 @@ import * as express from "express"; import { ensureAuthenticated } from "./connection"; -import * as db from "../database/database"; +import * as db from "../database"; import { getRepo, getUser, handleError, isOwnerOrAdmin } from "./route-utils"; -import { getRepositoryFromGitHub } from "../source/GitHubRepository"; +import { getRepositoryFromGitHub } from "../../core/source/GitHubRepository"; import gh = require("parse-github-url"); -import AnonymizedRepositoryModel from "../database/anonymizedRepositories/anonymizedRepositories.model"; -import { IAnonymizedRepositoryDocument } from "../database/anonymizedRepositories/anonymizedRepositories.types"; -import Repository from "../Repository"; -import UserModel from "../database/users/users.model"; -import ConferenceModel from "../database/conference/conferences.model"; -import AnonymousError from "../AnonymousError"; -import { downloadQueue, removeQueue } from "../queue"; -import RepositoryModel from "../database/repositories/repositories.model"; -import User from "../User"; -import { RepositoryStatus } from "../types"; -import { IUserDocument } from "../database/users/users.types"; -import { checkToken } from "../GitHubUtils"; +import AnonymizedRepositoryModel from "../../core/model/anonymizedRepositories/anonymizedRepositories.model"; +import { IAnonymizedRepositoryDocument } from "../../core/model/anonymizedRepositories/anonymizedRepositories.types"; +import Repository from "../../core/Repository"; +import UserModel from "../../core/model/users/users.model"; +import ConferenceModel from "../../core/model/conference/conferences.model"; +import AnonymousError from "../../core/AnonymousError"; +import { downloadQueue, removeQueue } from "../../queue"; +import RepositoryModel from "../../core/model/repositories/repositories.model"; +import User from "../../core/User"; +import { RepositoryStatus } from "../../core/types"; +import { IUserDocument } from "../../core/model/users/users.types"; +import { checkToken } from "../../core/GitHubUtils"; const router = express.Router(); diff --git a/src/routes/repository-public.ts b/src/server/routes/repository-public.ts similarity index 96% rename from src/routes/repository-public.ts rename to src/server/routes/repository-public.ts index 2f56c45..0021b69 100644 --- a/src/routes/repository-public.ts +++ b/src/server/routes/repository-public.ts @@ -4,9 +4,9 @@ import * as stream from "stream"; import config from "../../config"; import { getRepo, handleError } from "./route-utils"; -import AnonymousError from "../AnonymousError"; -import { downloadQueue } from "../queue"; -import { RepositoryStatus } from "../types"; +import AnonymousError from "../../core/AnonymousError"; +import { downloadQueue } from "../../queue"; +import { RepositoryStatus } from "../../core/types"; const router = express.Router(); diff --git a/src/routes/route-utils.ts b/src/server/routes/route-utils.ts similarity index 86% rename from src/routes/route-utils.ts rename to src/server/routes/route-utils.ts index 4f09122..407055d 100644 --- a/src/routes/route-utils.ts +++ b/src/server/routes/route-utils.ts @@ -1,8 +1,9 @@ import * as express from "express"; -import AnonymousError from "../AnonymousError"; -import * as db from "../database/database"; -import UserModel from "../database/users/users.model"; -import User from "../User"; +import AnonymousError from "../../core/AnonymousError"; +import * as db from "../database"; +import UserModel from "../../core/model/users/users.model"; +import User from "../../core/User"; +import { HTTPError } from "got"; export async function getPullRequest( req: express.Request, @@ -85,6 +86,11 @@ function printError(error: any, req?: express.Request) { if (req.originalUrl === "/api/repo/undefined/options") return; } console.error(message); + } else if (error instanceof HTTPError) { + let message = `[ERROR] HTTP.${ + error.code + } ${error.message.toString()} ${error.stack?.split("\n")[1].trim()}`; + console.error(message); } else if (error instanceof Error) { console.error(error); } else { @@ -107,7 +113,10 @@ export function handleError( status = error.httpStatus; } else if (error.$metadata?.httpStatusCode) { status = error.$metadata.httpStatusCode; - } else if (message && message.indexOf("not_found") > -1) { + } else if ( + message && + (message.indexOf("not_found") > -1 || message.indexOf("(Not Found)") > -1) + ) { status = 404; } else if (message && message.indexOf("not_connected") > -1) { status = 401; diff --git a/src/routes/user.ts b/src/server/routes/user.ts similarity index 97% rename from src/routes/user.ts rename to src/server/routes/user.ts index 74238b2..6ba5b1e 100644 --- a/src/routes/user.ts +++ b/src/server/routes/user.ts @@ -2,8 +2,8 @@ import * as express from "express"; import config from "../../config"; import { ensureAuthenticated } from "./connection"; import { handleError, getUser, isOwnerOrAdmin } from "./route-utils"; -import UserModel from "../database/users/users.model"; -import User from "../User"; +import UserModel from "../../core/model/users/users.model"; +import User from "../../core/User"; const router = express.Router(); diff --git a/src/routes/webview.ts b/src/server/routes/webview.ts similarity index 92% rename from src/routes/webview.ts rename to src/server/routes/webview.ts index fc55996..8ef2fa8 100644 --- a/src/routes/webview.ts +++ b/src/server/routes/webview.ts @@ -1,11 +1,11 @@ import * as express from "express"; import { getRepo, handleError } from "./route-utils"; import * as path from "path"; -import AnonymizedFile from "../AnonymizedFile"; -import AnonymousError from "../AnonymousError"; -import { Tree, TreeElement } from "../types"; +import AnonymizedFile from "../../core/AnonymizedFile"; +import AnonymousError from "../../core/AnonymousError"; +import { Tree, TreeElement } from "../../core/types"; import * as marked from "marked"; -import { streamToString } from "../anonymize-utils"; +import { streamToString } from "../../core/anonymize-utils"; const router = express.Router(); diff --git a/src/schedule.ts b/src/server/schedule.ts similarity index 84% rename from src/schedule.ts rename to src/server/schedule.ts index a58e916..5db791e 100644 --- a/src/schedule.ts +++ b/src/server/schedule.ts @@ -1,8 +1,8 @@ import * as schedule from "node-schedule"; -import Conference from "./Conference"; -import AnonymizedRepositoryModel from "./database/anonymizedRepositories/anonymizedRepositories.model"; -import ConferenceModel from "./database/conference/conferences.model"; -import Repository from "./Repository"; +import Conference from "../core/Conference"; +import AnonymizedRepositoryModel from "../core/model/anonymizedRepositories/anonymizedRepositories.model"; +import ConferenceModel from "../core/model/conference/conferences.model"; +import Repository from "../core/Repository"; export function conferenceStatusCheck() { // check every 6 hours the status of the conferences diff --git a/src/streamer/index.ts b/src/streamer/index.ts new file mode 100644 index 0000000..255aa82 --- /dev/null +++ b/src/streamer/index.ts @@ -0,0 +1,31 @@ +import { config as dotenv } from "dotenv"; +dotenv(); + +import * as express from "express"; +import * as compression from "compression"; + +import config from "../config"; +import router from "./route"; +import { handleError } from "../server/routes/route-utils"; +import AnonymousError from "../core/AnonymousError"; + +const app = express(); +app.use(express.json()); + +app.use(compression()); + +app.use("/api", router); + +app.all("*", (req, res) => { + handleError( + new AnonymousError("file_not_found", { + httpStatus: 404, + object: req.originalUrl, + }), + res, + req + ); +}); +app.listen(config.PORT, () => { + console.log(`Server started on http://streamer:${config.PORT}`); +}); diff --git a/src/streamer/route.ts b/src/streamer/route.ts new file mode 100644 index 0000000..9e1f575 --- /dev/null +++ b/src/streamer/route.ts @@ -0,0 +1,63 @@ +import * as express from "express"; +import GitHubStream from "../core/source/GitHubStream"; +import { AnonymizeTransformer, isTextFile } from "../core/anonymize-utils"; +import { handleError } from "../server/routes/route-utils"; +import { contentType } from "mime-types"; + +export const router = express.Router(); + +router.post("/", async (req: express.Request, res: express.Response) => { + req.body = req.body || {}; + const token: string = req.body.token; + const repoFullName = req.body.repoFullName.split("/"); + const repoId = req.body.repoId; + const branch = req.body.branch; + const fileSha = req.body.sha; + const commit = req.body.commit; + const filePath = req.body.filePath; + const anonymizerOptions = req.body.anonymizerOptions; + const anonymizer = new AnonymizeTransformer(anonymizerOptions); + + const source = new GitHubStream({ + repoId, + organization: repoFullName[0], + repoName: repoFullName[1], + commit: commit, + getToken: () => token, + }); + const content = source.downloadFile(token, fileSha); + try { + const mime = contentType(filePath); + if (mime && !filePath.endsWith(".ts")) { + res.contentType(mime); + } else if (isTextFile(filePath)) { + res.contentType("text/plain"); + } + res.header("Accept-Ranges", "none"); + anonymizer.once("transform", (data) => { + if (!mime && data.isText) { + res.contentType("text/plain"); + } + }); + function handleStreamError(error: Error) { + if (!content.closed && !content.destroyed) { + content.destroy(); + } + handleError(error, res); + } + content + .on("error", handleStreamError) + .pipe(anonymizer) + .pipe(res) + .on("error", handleStreamError) + .on("close", () => { + if (!content.closed && !content.destroyed) { + content.destroy(); + } + }); + } catch (error) { + handleError(error, res); + } +}); + +export default router; diff --git a/tsconfig.json b/tsconfig.json index b8729d6..c07ae26 100644 --- a/tsconfig.json +++ b/tsconfig.json @@ -17,6 +17,6 @@ "esModuleInterop": false, "incremental": true }, - "include": ["src/**/*.ts", "index.ts", "cli.ts"], + "include": ["src/**/*.ts", "index.ts", "src/cli/index.ts"], "exclude": ["node_modules", ".vscode"] }