feat: introduce streamers that handle the stream and anonymization from github

This commit is contained in:
tdurieux
2024-04-03 11:13:01 +01:00
parent 73019c1b44
commit 4d12641c7e
64 changed files with 419 additions and 257 deletions

View File

@@ -13,10 +13,8 @@ COPY healthcheck.js .
COPY src ./src
COPY public ./public
COPY index.ts .
COPY config.ts .
RUN npm install && npm run build && npm cache clean --force
COPY opentelemetry.js .
CMD [ "node", "--require", "./opentelemetry.js", "./build/index.js"]
CMD [ "node", "--require", "./opentelemetry.js", "./build/server/index.js"]

View File

@@ -5,17 +5,17 @@ services:
build: .
restart: always
image: tdurieux/anonymous_github:v2
deploy:
mode: replicated
replicas: 4
endpoint_mode: vip
ports:
- $EXPOSED_PORT:5000
env_file:
- ./.env
volumes:
- ./repositories:/app/build/repositories/
environment:
- PORT=5000
- REDIS_HOSTNAME=redis
- DB_HOSTNAME=mongodb
- STREAMER_ENTRYPOINT=http://streamer:5000/
healthcheck:
test:
- CMD
@@ -27,22 +27,37 @@ services:
links:
- mongodb
- redis
- opentelemetry
- streamer
nginx:
image: nginx:stable-alpine
streamer:
build: .
restart: always
image: tdurieux/anonymous_github:v2
deploy:
mode: replicated
replicas: 4
endpoint_mode: vip
entrypoint: ["node", "./build/streamer/index.js"]
env_file:
- ./.env
volumes:
- ./nginx.conf:/etc/nginx/conf.d/default.conf:ro
depends_on:
- anonymous_github
ports:
- "$EXPOSED_PORT:4000"
- ./repositories:/app/build/repositories/
environment:
- PORT=5000
healthcheck:
test:
- CMD
- node
- healthcheck.js
interval: 10s
timeout: 10s
retries: 5
redis:
image: "redis:alpine"
restart: always
ports:
- 127.0.0.1:6379:6379
healthcheck:
test:
- CMD
@@ -101,13 +116,15 @@ services:
image: tiredofit/db-backup
links:
- mongodb
env_file:
- ./.env
volumes:
- ./db_backups:/backup
environment:
- DB_TYPE=mongo
- DB_HOST=mongodb
- DB_DUMP_FREQ=60
- DB_CLEANUP_TIME=240
- DB_DUMP_FREQ=120
- DB_CLEANUP_TIME=500
- COMPRESSION=XZ
- DB_USER=$DB_USERNAME
- DB_PASS=$DB_PASSWORD

View File

@@ -1,7 +0,0 @@
import { config } from "dotenv";
config();
import server from "./src/server";
// start the server
server();

View File

@@ -1,15 +0,0 @@
upstream backend {
server anonymous_github:5000;
}
server {
listen 4000;
resolver 127.0.0.11 valid=5s;
include /etc/nginx/mime.types;
location / {
proxy_pass http://backend/;
}
}

View File

@@ -3,13 +3,12 @@
"version": "2.2.0",
"description": "Anonymise Github repositories for double-anonymous reviews",
"bin": {
"anonymous_github": "build/cli.js"
"anonymous_github": "build/cli/index.js"
},
"scripts": {
"test": "mocha --reporter spec",
"start": "node --inspect=5858 -r ts-node/register ./index.ts",
"dev": "nodemon --transpile-only index.ts",
"migrateDB": "ts-node --transpile-only migrateDB.ts",
"start": "node --inspect=5858 -r ts-node/register ./src/server/index.ts",
"dev": "nodemon --transpile-only ./src/server/index.ts",
"build": "rm -rf build && tsc",
"knip": "knip"
},

View File

@@ -10,12 +10,12 @@ import { join } from "path";
import * as gh from "parse-github-url";
import * as inquirer from "inquirer";
import server from "./src/server";
import config from "./config";
import GitHubDownload from "./src/source/GitHubDownload";
import Repository from "./src/Repository";
import AnonymizedRepositoryModel from "./src/database/anonymizedRepositories/anonymizedRepositories.model";
import { getRepositoryFromGitHub } from "./src/source/GitHubRepository";
import server from "../server";
import config from "../config";
import GitHubDownload from "../core/source/GitHubDownload";
import Repository from "../core/Repository";
import AnonymizedRepositoryModel from "../core/model/anonymizedRepositories/anonymizedRepositories.model";
import { getRepositoryFromGitHub } from "../core/source/GitHubRepository";
function generateRandomFileName(size: number) {
const characters =

View File

@@ -18,6 +18,7 @@ interface Config {
* Allow to download repository and files
*/
ENABLE_DOWNLOAD: boolean;
STREAMER_ENTRYPOINT: string | null;
ANONYMIZATION_MASK: string;
PORT: number;
APP_HOSTNAME: string;
@@ -26,11 +27,11 @@ interface Config {
DB_HOSTNAME: string;
FOLDER: string;
additionalExtensions: string[];
S3_BUCKET?: string;
S3_CLIENT_ID?: string;
S3_CLIENT_SECRET?: string;
S3_ENDPOINT?: string;
S3_REGION?: string;
S3_BUCKET: string | null;
S3_CLIENT_ID: string | null;
S3_CLIENT_SECRET: string | null;
S3_ENDPOINT: string | null;
S3_REGION: string | null;
STORAGE: "filesystem" | "s3";
TRUST_PROXY: number;
RATE_LIMIT: number;
@@ -58,7 +59,7 @@ const config: Config = {
DB_HOSTNAME: "mongodb",
REDIS_HOSTNAME: "redis",
REDIS_PORT: 6379,
FOLDER: resolve(__dirname, "repositories"),
FOLDER: resolve(__dirname, "..", "repositories"),
additionalExtensions: [
"license",
"dockerfile",
@@ -70,11 +71,12 @@ const config: Config = {
"in",
],
STORAGE: "filesystem",
S3_BUCKET: process.env.S3_BUCKET,
S3_CLIENT_ID: process.env.S3_CLIENT_ID,
S3_CLIENT_SECRET: process.env.S3_CLIENT_SECRET,
S3_ENDPOINT: process.env.S3_ENDPOINT,
S3_REGION: process.env.S3_REGION,
STREAMER_ENTRYPOINT: null,
S3_BUCKET: null,
S3_CLIENT_ID: null,
S3_CLIENT_SECRET: null,
S3_ENDPOINT: null,
S3_REGION: null,
};
for (let conf in process.env) {

View File

@@ -2,13 +2,15 @@ import { join, basename } from "path";
import { Response } from "express";
import { Readable } from "stream";
import { trace } from "@opentelemetry/api";
import { lookup } from "mime-types";
import Repository from "./Repository";
import { RepositoryStatus, Tree, TreeElement, TreeFile } from "./types";
import config from "../config";
import { anonymizePath, isTextFile } from "./anonymize-utils";
import AnonymousError from "./AnonymousError";
import { handleError } from "./routes/route-utils";
import { lookup } from "mime-types";
import { handleError } from "../server/routes/route-utils";
import got from "got";
/**
* Represent a file in a anonymized repository
@@ -197,7 +199,7 @@ export default class AnonymizedFile {
httpStatus: 403,
});
}
const out = await this.repository.source?.getFileContent(this);
const content = await this.repository.source?.getFileContent(this);
if (
!this.repository.model.isReseted ||
this.repository.status != RepositoryStatus.READY
@@ -205,7 +207,7 @@ export default class AnonymizedFile {
this.repository.model.isReseted = false;
await this.repository.updateStatus(RepositoryStatus.READY);
}
return out;
return content;
} finally {
span.end();
}
@@ -213,19 +215,32 @@ export default class AnonymizedFile {
}
async anonymizedContent() {
return trace
.getTracer("ano-file")
.startActiveSpan("anonymizedContent", async (span) => {
span.setAttribute("anonymizedPath", this.anonymizedPath);
const content = await this.content();
return content
.pipe(
this.repository.generateAnonymizeTransformer(this.anonymizedPath)
)
.on("close", () => {
span.end();
});
const span = trace.getTracer("ano-file").startSpan("Repository.conference");
span.setAttribute("anonymizedPath", this.anonymizedPath);
const anonymizer = this.repository.generateAnonymizeTransformer(
this.anonymizedPath
);
if (!config.STREAMER_ENTRYPOINT) {
// collect the content locally
const content = await this.content();
return content.pipe(anonymizer).on("close", () => {
span.end();
});
}
// use the streamer service
return got.stream(join(config.STREAMER_ENTRYPOINT, "api"), {
method: "POST",
json: {
token: await this.repository.getToken(),
repoFullName: this.repository.model.source.repositoryName,
commit: this.repository.model.source.commit,
branch: this.repository.model.source.branch,
repoId: this.repository.repoId,
filePath: this.filePath,
sha: await this.sha(),
anonymizerOptions: anonymizer.opt,
},
});
}
get filePath() {
@@ -243,6 +258,9 @@ export default class AnonymizedFile {
}
async send(res: Response): Promise<void> {
const anonymizer = this.repository.generateAnonymizeTransformer(
this.anonymizedPath
);
return trace
.getTracer("ano-file")
.startActiveSpan("AnonymizedFile.send", async (span) => {
@@ -250,6 +268,39 @@ export default class AnonymizedFile {
span.setAttribute("anonymizedPath", this.anonymizedPath);
return new Promise<void>(async (resolve, reject) => {
try {
if (config.STREAMER_ENTRYPOINT) {
// use the streamer service
got
.stream(join(config.STREAMER_ENTRYPOINT, "api"), {
method: "POST",
json: {
token: await this.repository.getToken(),
repoFullName: this.repository.model.source.repositoryName,
commit: this.repository.model.source.commit,
branch: this.repository.model.source.branch,
repoId: this.repository.repoId,
filePath: this.filePath,
sha: await this.sha(),
anonymizerOptions: anonymizer.opt,
},
})
.on("error", () => {
handleError(
new AnonymousError("file_not_found", {
object: this,
httpStatus: 404,
}),
res
);
})
.pipe(res)
.on("close", () => {
span.end();
resolve();
});
return;
}
const mime = lookup(this.anonymizedPath);
if (mime && this.extension() != "ts") {
res.contentType(mime);
@@ -257,9 +308,6 @@ export default class AnonymizedFile {
res.contentType("text/plain");
}
res.header("Accept-Ranges", "none");
const anonymizer = this.repository.generateAnonymizeTransformer(
this.anonymizedPath
);
anonymizer.once("transform", (data) => {
if (!mime && data.isText) {
res.contentType("text/plain");
@@ -269,26 +317,27 @@ export default class AnonymizedFile {
res.header("Content-Length", this.fileSize.toString());
}
});
const content = await this.content();
function handleStreamError(error: Error) {
if (!content.closed && !content.destroyed) {
content.destroy();
}
span.recordException(error);
span.end();
reject(error);
// handleError(error, res);
}
content
.on("error", handleStreamError)
.pipe(anonymizer)
.pipe(res)
.on("error", handleStreamError)
.on("close", () => {
if (!content.closed && !content.destroyed) {
content.destroy();
}
span.end();
resolve();
})
.on("error", (error) => {
if (!content.closed && !content.destroyed) {
content.destroy();
}
span.recordException(error);
span.end();
reject(error);
handleError(error, res);
});
} catch (error) {
handleError(error, res);

View File

@@ -1,5 +1,5 @@
import AnonymizedRepositoryModel from "./database/anonymizedRepositories/anonymizedRepositories.model";
import { IConferenceDocument } from "./database/conference/conferences.types";
import AnonymizedRepositoryModel from "./model/anonymizedRepositories/anonymizedRepositories.model";
import { IConferenceDocument } from "./model/conference/conferences.types";
import Repository from "./Repository";
import { ConferenceStatus } from "./types";

View File

@@ -1,7 +1,8 @@
import { trace } from "@opentelemetry/api";
import Repository from "./Repository";
import { Octokit } from "@octokit/rest";
import UserModel from "./database/users/users.model";
import Repository from "./Repository";
import UserModel from "./model/users/users.model";
import config from "../config";
export function octokit(token: string) {

View File

@@ -1,10 +1,10 @@
import { RepositoryStatus } from "./types";
import User from "./User";
import UserModel from "./database/users/users.model";
import UserModel from "./model/users/users.model";
import Conference from "./Conference";
import ConferenceModel from "./database/conference/conferences.model";
import ConferenceModel from "./model/conference/conferences.model";
import AnonymousError from "./AnonymousError";
import { IAnonymizedPullRequestDocument } from "./database/anonymizedPullRequests/anonymizedPullRequests.types";
import { IAnonymizedPullRequestDocument } from "./model/anonymizedPullRequests/anonymizedPullRequests.types";
import config from "../config";
import got from "got";
import { octokit } from "./GitHubUtils";

View File

@@ -6,16 +6,16 @@ import GitHubStream from "./source/GitHubStream";
import GitHubDownload from "./source/GitHubDownload";
import Zip from "./source/Zip";
import { anonymizePath } from "./anonymize-utils";
import UserModel from "./database/users/users.model";
import { IAnonymizedRepositoryDocument } from "./database/anonymizedRepositories/anonymizedRepositories.types";
import UserModel from "./model/users/users.model";
import { IAnonymizedRepositoryDocument } from "./model/anonymizedRepositories/anonymizedRepositories.types";
import { AnonymizeTransformer } from "./anonymize-utils";
import GitHubBase from "./source/GitHubBase";
import Conference from "./Conference";
import ConferenceModel from "./database/conference/conferences.model";
import ConferenceModel from "./model/conference/conferences.model";
import AnonymousError from "./AnonymousError";
import { downloadQueue } from "./queue";
import { isConnected } from "./database/database";
import AnonymizedRepositoryModel from "./database/anonymizedRepositories/anonymizedRepositories.model";
import { downloadQueue } from "../queue";
import { isConnected } from "../server/database";
import AnonymizedRepositoryModel from "./model/anonymizedRepositories/anonymizedRepositories.model";
import { GitHubRepository } from "./source/GitHubRepository";
import { trace } from "@opentelemetry/api";
import { getToken } from "./GitHubUtils";
@@ -59,7 +59,7 @@ export default class Repository {
private checkedToken: boolean = false;
private async getToken() {
async getToken() {
if (this.checkedToken) return this._model.source.accessToken as string;
const originalToken = this._model.source.accessToken;
const token = await getToken(this);

View File

@@ -1,11 +1,12 @@
import AnonymizedRepositoryModel from "./database/anonymizedRepositories/anonymizedRepositories.model";
import RepositoryModel from "./database/repositories/repositories.model";
import { IUserDocument } from "./database/users/users.types";
import { trace } from "@opentelemetry/api";
import AnonymizedRepositoryModel from "./model/anonymizedRepositories/anonymizedRepositories.model";
import RepositoryModel from "./model/repositories/repositories.model";
import { IUserDocument } from "./model/users/users.types";
import Repository from "./Repository";
import { GitHubRepository } from "./source/GitHubRepository";
import PullRequest from "./PullRequest";
import AnonymizedPullRequestModel from "./database/anonymizedPullRequests/anonymizedPullRequests.model";
import { trace } from "@opentelemetry/api";
import AnonymizedPullRequestModel from "./model/anonymizedPullRequests/anonymizedPullRequests.model";
import { octokit } from "./GitHubUtils";
/**

View File

@@ -1,10 +1,10 @@
import config from "../config";
import { isText } from "istextorbinary";
import { basename } from "path";
import { Transform } from "stream";
import { Readable } from "stream";
import { Transform, Readable } from "stream";
import { isText } from "istextorbinary";
import { trace } from "@opentelemetry/api";
import config from "../config";
const urlRegex =
/<?\b((https?|ftp|file):\/\/)[-A-Za-z0-9+&@#/%?=~_|!:,.;]+[-A-Za-z0-9+&@#/%=~_|]\b\/?>?/g;
@@ -35,7 +35,7 @@ export class AnonymizeTransformer extends Transform {
anonimizer: ContentAnonimizer;
constructor(
private readonly opt: {
readonly opt: {
filePath: string;
} & ConstructorParameters<typeof ContentAnonimizer>[0]
) {
@@ -56,7 +56,6 @@ export class AnonymizeTransformer extends Transform {
if (this.isText === null) {
this.isText = isTextFile(this.opt.filePath, chunk);
}
if (this.isText) {
const content = this.anonimizer.anonymize(chunk.toString());
if (this.anonimizer.wasAnonymized) {
@@ -171,7 +170,7 @@ export class ContentAnonimizer {
});
// remove the term in the text
return content.replace(new RegExp(`\\b${term}\\b`, "gi"), () => {
content = content.replace(new RegExp(`\\b${term}\\b`, "gi"), () => {
this.wasAnonymized = true;
return mask;
});

View File

@@ -2,6 +2,7 @@ import { Readable } from "stream";
import AnonymizedFile from "../AnonymizedFile";
import { Tree } from "../types";
import { SourceBase } from "./Source";
export interface GitHubBaseData {
getToken: () => string | Promise<string>;
@@ -11,21 +12,6 @@ export interface GitHubBaseData {
commit: string;
}
export interface SourceBase {
readonly type: string;
/**
* Retrieve the fie content
* @param file the file of the content to retrieve
*/
getFileContent(file: AnonymizedFile): Promise<Readable>;
/**
* Get all the files from a specific source
*/
getFiles(progress?: (status: string) => void): Promise<Tree>;
}
export default abstract class GitHubBase implements SourceBase {
abstract type: "GitHubDownload" | "GitHubStream" | "Zip";
accessToken: string | undefined;

View File

@@ -4,11 +4,11 @@ import { OctokitResponse } from "@octokit/types";
import storage from "../storage";
import GitHubBase, { GitHubBaseData } from "./GitHubBase";
import AnonymizedFile from "../AnonymizedFile";
import AnonymousError from "../AnonymousError";
import { trace } from "@opentelemetry/api";
import { FILE_TYPE } from "../storage/Storage";
import { octokit } from "../GitHubUtils";
import AnonymousError from "../AnonymousError";
import AnonymizedFile from "../AnonymizedFile";
export default class GitHubDownload extends GitHubBase {
type: "GitHubDownload" | "GitHubStream" | "Zip" = "GitHubDownload";

View File

@@ -1,12 +1,13 @@
import { Branch } from "../types";
import * as gh from "parse-github-url";
import { IRepositoryDocument } from "../database/repositories/repositories.types";
import { RestEndpointMethodTypes } from "@octokit/rest";
import RepositoryModel from "../database/repositories/repositories.model";
import AnonymousError from "../AnonymousError";
import { isConnected } from "../database/database";
import { trace } from "@opentelemetry/api";
import AnonymousError from "../AnonymousError";
import { isConnected } from "../../server/database";
import { octokit } from "../GitHubUtils";
import { IRepositoryDocument } from "../model/repositories/repositories.types";
import RepositoryModel from "../model/repositories/repositories.model";
export class GitHubRepository {
private _data: Partial<{

View File

@@ -82,36 +82,24 @@ export default class GitHubStream extends GitHubBase {
object: file,
});
}
try {
const content = this.downloadFile(await this.data.getToken(), file_sha);
const content = this.downloadFile(await this.data.getToken(), file_sha);
// duplicate the stream to write it to the storage
const stream1 = content.pipe(new stream.PassThrough());
const stream2 = content.pipe(new stream.PassThrough());
storage.write(
file.repository.repoId,
file.filePath,
stream1,
this.type
);
return stream2;
} catch (error) {
if (
(error as any).status === 404 ||
(error as any).httpStatus === 404
) {
throw new AnonymousError("file_not_found", {
httpStatus: (error as any).status || (error as any).httpStatus,
cause: error as Error,
object: file,
});
}
throw new AnonymousError("file_too_big", {
// duplicate the stream to write it to the storage
const stream1 = content.pipe(new stream.PassThrough());
const stream2 = content.pipe(new stream.PassThrough());
content.on("error", (error) => {
error = new AnonymousError("file_not_found", {
httpStatus: (error as any).status || (error as any).httpStatus,
cause: error as Error,
object: file,
});
}
stream1.emit("error", error);
stream2.emit("error", error);
});
storage.write(file.repository.repoId, file.filePath, stream1, this.type);
return stream2;
} finally {
span.end();
}

24
src/core/source/Source.ts Normal file
View File

@@ -0,0 +1,24 @@
import { Readable } from "stream";
import AnonymizedFile from "../AnonymizedFile";
import { Tree } from "../types";
import GitHubDownload from "./GitHubDownload";
import GitHubStream from "./GitHubStream";
import Zip from "./Zip";
export type Source = GitHubDownload | GitHubStream | Zip;
export interface SourceBase {
readonly type: string;
/**
* Retrieve the fie content
* @param file the file of the content to retrieve
*/
getFileContent(file: AnonymizedFile): Promise<Readable>;
/**
* Get all the files from a specific source
*/
getFiles(progress?: (status: string) => void): Promise<Tree>;
}

View File

@@ -1,7 +1,8 @@
import * as stream from "stream";
import AnonymizedFile from "../AnonymizedFile";
import storage from "../storage";
import * as stream from "stream";
import { SourceBase } from "./GitHubBase";
import { SourceBase } from "./Source";
export default class Zip implements SourceBase {
type = "Zip";

View File

@@ -83,7 +83,15 @@ export default class FileSystem extends StorageBase {
span.setAttribute("path", fullPath);
try {
await this.mk(repoId, dirname(p));
if (data instanceof Readable) {
data.on("error", (err) => {
this.rm(repoId, p);
});
}
return await fs.promises.writeFile(fullPath, data, "utf-8");
} catch (err: any) {
span.recordException(err);
// throw err;
} finally {
span.end();
}

View File

@@ -32,6 +32,8 @@ export default class S3Storage extends StorageBase {
private client(timeout = 10000) {
if (!config.S3_CLIENT_ID) throw new Error("S3_CLIENT_ID not set");
if (!config.S3_CLIENT_SECRET) throw new Error("S3_CLIENT_SECRET not set");
if (!config.S3_REGION) throw new Error("S3_REGION not set");
if (!config.S3_ENDPOINT) throw new Error("S3_ENDPOINT not set");
return new S3({
credentials: {
accessKeyId: config.S3_CLIENT_ID,
@@ -212,6 +214,14 @@ export default class S3Storage extends StorageBase {
try {
if (!config.S3_BUCKET) throw new Error("S3_BUCKET not set");
if (data instanceof Readable) {
data.on("error", (err) => {
console.error(`[ERROR] S3 write ${path}`, err);
span.recordException(err as Error);
this.rm(repoId, path);
});
}
const params: PutObjectCommandInput = {
Bucket: config.S3_BUCKET,
Key: join(this.repoPath(repoId), path),
@@ -229,7 +239,6 @@ export default class S3Storage extends StorageBase {
});
await parallelUploads3.done();
return;
} finally {
span.end();
}

View File

@@ -4,6 +4,10 @@ import * as archiver from "archiver";
import { Response } from "express";
import { Tree } from "../types";
import S3Storage from "./S3";
import FileSystem from "./FileSystem";
export type Storage = S3Storage | FileSystem;
export enum FILE_TYPE {
FILE = "file",

View File

@@ -1,13 +1,3 @@
import GitHubDownload from "./source/GitHubDownload";
import GitHubStream from "./source/GitHubStream";
import Zip from "./source/Zip";
import S3Storage from "./storage/S3";
import FileSystem from "./storage/FileSystem";
export type Source = GitHubDownload | GitHubStream | Zip;
export type Storage = S3Storage | FileSystem;
export interface Branch {
name: string;
commit: string;

View File

@@ -1,6 +1,6 @@
import { Queue, Worker } from "bullmq";
import config from "../config";
import Repository from "./Repository";
import Repository from "../core/Repository";
import * as path from "path";
export let cacheQueue: Queue<Repository>;
@@ -37,7 +37,7 @@ export function startWorker() {
});
const cacheWorker = new Worker<Repository>(
cacheQueue.name,
path.resolve("build/src/processes/removeCache.js"),
path.resolve("build/queue/processes/removeCache.js"),
{
concurrency: 5,
connection,
@@ -49,7 +49,7 @@ export function startWorker() {
});
const removeWorker = new Worker<Repository>(
removeQueue.name,
path.resolve("build/src/processes/removeRepository.js"),
path.resolve("build/queue/processes/removeRepository.js"),
{
concurrency: 5,
connection,
@@ -62,7 +62,7 @@ export function startWorker() {
const downloadWorker = new Worker<Repository>(
downloadQueue.name,
path.resolve("build/src/processes/downloadRepository.js"),
path.resolve("build/queue/processes/downloadRepository.js"),
{
concurrency: 3,
connection,

View File

@@ -1,10 +1,10 @@
import { Exception, trace } from "@opentelemetry/api";
import { SandboxedJob } from "bullmq";
import { config } from "dotenv";
config();
import Repository from "../Repository";
import { getRepository as getRepositoryImport } from "../database/database";
import { RepositoryStatus } from "../types";
import { Exception, trace } from "@opentelemetry/api";
import Repository from "../../core/Repository";
import { getRepository as getRepositoryImport } from "../../server/database";
import { RepositoryStatus } from "../../core/types";
export default async function (job: SandboxedJob<Repository, void>) {
const {

View File

@@ -1,7 +1,7 @@
import { SandboxedJob } from "bullmq";
import Repository from "../Repository";
import { getRepository as getRepositoryImport } from "../database/database";
import { Exception, trace } from "@opentelemetry/api";
import { SandboxedJob } from "bullmq";
import Repository from "../../core/Repository";
import { getRepository as getRepositoryImport } from "../../server/database";
export default async function (job: SandboxedJob<Repository, void>) {
const {

View File

@@ -1,8 +1,8 @@
import { SandboxedJob } from "bullmq";
import Repository from "../Repository";
import { getRepository as getRepositoryImport } from "../database/database";
import { RepositoryStatus } from "../types";
import { trace } from "@opentelemetry/api";
import { SandboxedJob } from "bullmq";
import Repository from "../../core/Repository";
import { getRepository as getRepositoryImport } from "../../server/database";
import { RepositoryStatus } from "../../core/types";
export default async function (job: SandboxedJob<Repository, void>) {
const {

View File

@@ -1,10 +1,10 @@
import mongoose, { ConnectOptions } from "mongoose";
import Repository from "../Repository";
import config from "../../config";
import AnonymizedRepositoryModel from "./anonymizedRepositories/anonymizedRepositories.model";
import AnonymousError from "../AnonymousError";
import AnonymizedPullRequestModel from "./anonymizedPullRequests/anonymizedPullRequests.model";
import PullRequest from "../PullRequest";
import Repository from "../core/Repository";
import config from "../config";
import AnonymizedRepositoryModel from "../core/model/anonymizedRepositories/anonymizedRepositories.model";
import AnonymousError from "../core/AnonymousError";
import AnonymizedPullRequestModel from "../core/model/anonymizedPullRequests/anonymizedPullRequests.model";
import PullRequest from "../core/PullRequest";
const MONGO_URL = `mongodb://${config.DB_USERNAME}:${config.DB_PASSWORD}@${config.DB_HOSTNAME}:27017/`;

View File

@@ -1,3 +1,6 @@
import { config as dotenv } from "dotenv";
dotenv();
import { createClient } from "redis";
import { resolve, join } from "path";
import { existsSync } from "fs";
@@ -7,16 +10,15 @@ import RedisStore from "rate-limit-redis";
import * as express from "express";
import * as compression from "compression";
import * as passport from "passport";
import config from "../config";
import { connect } from "./database/database";
import { connect } from "./database";
import { initSession, router as connectionRouter } from "./routes/connection";
import router from "./routes";
import AnonymizedRepositoryModel from "./database/anonymizedRepositories/anonymizedRepositories.model";
import AnonymizedRepositoryModel from "../core/model/anonymizedRepositories/anonymizedRepositories.model";
import { conferenceStatusCheck, repositoryStatusCheck } from "./schedule";
import { startWorker } from "./queue";
import AnonymizedPullRequestModel from "./database/anonymizedPullRequests/anonymizedPullRequests.model";
import { startWorker } from "../queue";
import AnonymizedPullRequestModel from "../core/model/anonymizedPullRequests/anonymizedPullRequests.model";
import { getUser } from "./routes/route-utils";
import config from "../config";
function indexResponse(req: express.Request, res: express.Response) {
if (
@@ -214,3 +216,5 @@ export default async function start() {
app.listen(config.PORT);
console.log("Database connected and Server started on port: " + config.PORT);
}
start();

View File

@@ -1,12 +1,12 @@
import { Queue } from "bullmq";
import * as express from "express";
import AnonymousError from "../AnonymousError";
import AnonymizedRepositoryModel from "../database/anonymizedRepositories/anonymizedRepositories.model";
import ConferenceModel from "../database/conference/conferences.model";
import UserModel from "../database/users/users.model";
import { cacheQueue, downloadQueue, removeQueue } from "../queue";
import Repository from "../Repository";
import User from "../User";
import AnonymousError from "../../core/AnonymousError";
import AnonymizedRepositoryModel from "../../core/model/anonymizedRepositories/anonymizedRepositories.model";
import ConferenceModel from "../../core/model/conference/conferences.model";
import UserModel from "../../core/model/users/users.model";
import { cacheQueue, downloadQueue, removeQueue } from "../../queue";
import Repository from "../../core/Repository";
import User from "../../core/User";
import { ensureAuthenticated } from "./connection";
import { handleError, getUser, isOwnerOrAdmin, getRepo } from "./route-utils";

View File

@@ -1,10 +1,10 @@
import * as express from "express";
import AnonymousError from "../AnonymousError";
import Conference from "../Conference";
import ConferenceModel from "../database/conference/conferences.model";
import AnonymousError from "../../core/AnonymousError";
import Conference from "../../core/Conference";
import ConferenceModel from "../../core/model/conference/conferences.model";
import { ensureAuthenticated } from "./connection";
import { handleError, getUser, isOwnerOrAdmin } from "./route-utils";
import { IConferenceDocument } from "../database/conference/conferences.types";
import { IConferenceDocument } from "../../core/model/conference/conferences.types";
const router = express.Router();

View File

@@ -7,9 +7,9 @@ import { Profile, Strategy } from "passport-github2";
import * as express from "express";
import config from "../../config";
import UserModel from "../database/users/users.model";
import { IUserDocument } from "../database/users/users.types";
import AnonymousError from "../AnonymousError";
import UserModel from "../../core/model/users/users.model";
import { IUserDocument } from "../../core/model/users/users.types";
import AnonymousError from "../../core/AnonymousError";
export function ensureAuthenticated(
req: express.Request,

View File

@@ -1,6 +1,6 @@
import * as express from "express";
import AnonymizedFile from "../AnonymizedFile";
import AnonymousError from "../AnonymousError";
import AnonymizedFile from "../../core/AnonymizedFile";
import AnonymousError from "../../core/AnonymousError";
import { getRepo, handleError } from "./route-utils";
export const router = express.Router();
@@ -49,7 +49,7 @@ router.get(
// cache the file for 5min
res.header("Cache-Control", "max-age=300");
await repo.countView();
f.send(res);
await f.send(res);
} catch (error) {
return handleError(error, res, req);
}

View File

@@ -7,11 +7,11 @@ import {
handleError,
isOwnerOrAdmin,
} from "./route-utils";
import AnonymousError from "../AnonymousError";
import { IAnonymizedPullRequestDocument } from "../database/anonymizedPullRequests/anonymizedPullRequests.types";
import PullRequest from "../PullRequest";
import AnonymizedPullRequestModel from "../database/anonymizedPullRequests/anonymizedPullRequests.model";
import { RepositoryStatus } from "../types";
import AnonymousError from "../../core/AnonymousError";
import { IAnonymizedPullRequestDocument } from "../../core/model/anonymizedPullRequests/anonymizedPullRequests.types";
import PullRequest from "../../core/PullRequest";
import AnonymizedPullRequestModel from "../../core/model/anonymizedPullRequests/anonymizedPullRequests.model";
import { RepositoryStatus } from "../../core/types";
const router = express.Router();

View File

@@ -1,7 +1,7 @@
import * as express from "express";
import { getPullRequest, handleError } from "./route-utils";
import AnonymousError from "../AnonymousError";
import AnonymousError from "../../core/AnonymousError";
const router = express.Router();

View File

@@ -1,22 +1,22 @@
import * as express from "express";
import { ensureAuthenticated } from "./connection";
import * as db from "../database/database";
import * as db from "../database";
import { getRepo, getUser, handleError, isOwnerOrAdmin } from "./route-utils";
import { getRepositoryFromGitHub } from "../source/GitHubRepository";
import { getRepositoryFromGitHub } from "../../core/source/GitHubRepository";
import gh = require("parse-github-url");
import AnonymizedRepositoryModel from "../database/anonymizedRepositories/anonymizedRepositories.model";
import { IAnonymizedRepositoryDocument } from "../database/anonymizedRepositories/anonymizedRepositories.types";
import Repository from "../Repository";
import UserModel from "../database/users/users.model";
import ConferenceModel from "../database/conference/conferences.model";
import AnonymousError from "../AnonymousError";
import { downloadQueue, removeQueue } from "../queue";
import RepositoryModel from "../database/repositories/repositories.model";
import User from "../User";
import { RepositoryStatus } from "../types";
import { IUserDocument } from "../database/users/users.types";
import { checkToken } from "../GitHubUtils";
import AnonymizedRepositoryModel from "../../core/model/anonymizedRepositories/anonymizedRepositories.model";
import { IAnonymizedRepositoryDocument } from "../../core/model/anonymizedRepositories/anonymizedRepositories.types";
import Repository from "../../core/Repository";
import UserModel from "../../core/model/users/users.model";
import ConferenceModel from "../../core/model/conference/conferences.model";
import AnonymousError from "../../core/AnonymousError";
import { downloadQueue, removeQueue } from "../../queue";
import RepositoryModel from "../../core/model/repositories/repositories.model";
import User from "../../core/User";
import { RepositoryStatus } from "../../core/types";
import { IUserDocument } from "../../core/model/users/users.types";
import { checkToken } from "../../core/GitHubUtils";
const router = express.Router();

View File

@@ -4,9 +4,9 @@ import * as stream from "stream";
import config from "../../config";
import { getRepo, handleError } from "./route-utils";
import AnonymousError from "../AnonymousError";
import { downloadQueue } from "../queue";
import { RepositoryStatus } from "../types";
import AnonymousError from "../../core/AnonymousError";
import { downloadQueue } from "../../queue";
import { RepositoryStatus } from "../../core/types";
const router = express.Router();

View File

@@ -1,8 +1,9 @@
import * as express from "express";
import AnonymousError from "../AnonymousError";
import * as db from "../database/database";
import UserModel from "../database/users/users.model";
import User from "../User";
import AnonymousError from "../../core/AnonymousError";
import * as db from "../database";
import UserModel from "../../core/model/users/users.model";
import User from "../../core/User";
import { HTTPError } from "got";
export async function getPullRequest(
req: express.Request,
@@ -85,6 +86,11 @@ function printError(error: any, req?: express.Request) {
if (req.originalUrl === "/api/repo/undefined/options") return;
}
console.error(message);
} else if (error instanceof HTTPError) {
let message = `[ERROR] HTTP.${
error.code
} ${error.message.toString()} ${error.stack?.split("\n")[1].trim()}`;
console.error(message);
} else if (error instanceof Error) {
console.error(error);
} else {
@@ -107,7 +113,10 @@ export function handleError(
status = error.httpStatus;
} else if (error.$metadata?.httpStatusCode) {
status = error.$metadata.httpStatusCode;
} else if (message && message.indexOf("not_found") > -1) {
} else if (
message &&
(message.indexOf("not_found") > -1 || message.indexOf("(Not Found)") > -1)
) {
status = 404;
} else if (message && message.indexOf("not_connected") > -1) {
status = 401;

View File

@@ -2,8 +2,8 @@ import * as express from "express";
import config from "../../config";
import { ensureAuthenticated } from "./connection";
import { handleError, getUser, isOwnerOrAdmin } from "./route-utils";
import UserModel from "../database/users/users.model";
import User from "../User";
import UserModel from "../../core/model/users/users.model";
import User from "../../core/User";
const router = express.Router();

View File

@@ -1,11 +1,11 @@
import * as express from "express";
import { getRepo, handleError } from "./route-utils";
import * as path from "path";
import AnonymizedFile from "../AnonymizedFile";
import AnonymousError from "../AnonymousError";
import { Tree, TreeElement } from "../types";
import AnonymizedFile from "../../core/AnonymizedFile";
import AnonymousError from "../../core/AnonymousError";
import { Tree, TreeElement } from "../../core/types";
import * as marked from "marked";
import { streamToString } from "../anonymize-utils";
import { streamToString } from "../../core/anonymize-utils";
const router = express.Router();

View File

@@ -1,8 +1,8 @@
import * as schedule from "node-schedule";
import Conference from "./Conference";
import AnonymizedRepositoryModel from "./database/anonymizedRepositories/anonymizedRepositories.model";
import ConferenceModel from "./database/conference/conferences.model";
import Repository from "./Repository";
import Conference from "../core/Conference";
import AnonymizedRepositoryModel from "../core/model/anonymizedRepositories/anonymizedRepositories.model";
import ConferenceModel from "../core/model/conference/conferences.model";
import Repository from "../core/Repository";
export function conferenceStatusCheck() {
// check every 6 hours the status of the conferences

31
src/streamer/index.ts Normal file
View File

@@ -0,0 +1,31 @@
import { config as dotenv } from "dotenv";
dotenv();
import * as express from "express";
import * as compression from "compression";
import config from "../config";
import router from "./route";
import { handleError } from "../server/routes/route-utils";
import AnonymousError from "../core/AnonymousError";
const app = express();
app.use(express.json());
app.use(compression());
app.use("/api", router);
app.all("*", (req, res) => {
handleError(
new AnonymousError("file_not_found", {
httpStatus: 404,
object: req.originalUrl,
}),
res,
req
);
});
app.listen(config.PORT, () => {
console.log(`Server started on http://streamer:${config.PORT}`);
});

63
src/streamer/route.ts Normal file
View File

@@ -0,0 +1,63 @@
import * as express from "express";
import GitHubStream from "../core/source/GitHubStream";
import { AnonymizeTransformer, isTextFile } from "../core/anonymize-utils";
import { handleError } from "../server/routes/route-utils";
import { contentType } from "mime-types";
export const router = express.Router();
router.post("/", async (req: express.Request, res: express.Response) => {
req.body = req.body || {};
const token: string = req.body.token;
const repoFullName = req.body.repoFullName.split("/");
const repoId = req.body.repoId;
const branch = req.body.branch;
const fileSha = req.body.sha;
const commit = req.body.commit;
const filePath = req.body.filePath;
const anonymizerOptions = req.body.anonymizerOptions;
const anonymizer = new AnonymizeTransformer(anonymizerOptions);
const source = new GitHubStream({
repoId,
organization: repoFullName[0],
repoName: repoFullName[1],
commit: commit,
getToken: () => token,
});
const content = source.downloadFile(token, fileSha);
try {
const mime = contentType(filePath);
if (mime && !filePath.endsWith(".ts")) {
res.contentType(mime);
} else if (isTextFile(filePath)) {
res.contentType("text/plain");
}
res.header("Accept-Ranges", "none");
anonymizer.once("transform", (data) => {
if (!mime && data.isText) {
res.contentType("text/plain");
}
});
function handleStreamError(error: Error) {
if (!content.closed && !content.destroyed) {
content.destroy();
}
handleError(error, res);
}
content
.on("error", handleStreamError)
.pipe(anonymizer)
.pipe(res)
.on("error", handleStreamError)
.on("close", () => {
if (!content.closed && !content.destroyed) {
content.destroy();
}
});
} catch (error) {
handleError(error, res);
}
});
export default router;

View File

@@ -17,6 +17,6 @@
"esModuleInterop": false,
"incremental": true
},
"include": ["src/**/*.ts", "index.ts", "cli.ts"],
"include": ["src/**/*.ts", "index.ts", "src/cli/index.ts"],
"exclude": ["node_modules", ".vscode"]
}