feat: introduce streamers that handle the stream and anonymization from github

This commit is contained in:
tdurieux
2024-04-03 11:13:01 +01:00
parent 73019c1b44
commit 4d12641c7e
64 changed files with 419 additions and 257 deletions

123
src/cli/index.ts Normal file
View File

@@ -0,0 +1,123 @@
#!/usr/bin/env node
import { config as dot } from "dotenv";
dot();
process.env.STORAGE = "filesystem";
import { writeFile } from "fs/promises";
import { join } from "path";
import * as gh from "parse-github-url";
import * as inquirer from "inquirer";
import server from "../server";
import config from "../config";
import GitHubDownload from "../core/source/GitHubDownload";
import Repository from "../core/Repository";
import AnonymizedRepositoryModel from "../core/model/anonymizedRepositories/anonymizedRepositories.model";
import { getRepositoryFromGitHub } from "../core/source/GitHubRepository";
function generateRandomFileName(size: number) {
const characters =
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
let result = "";
for (let i = 0; i < size; i++) {
result += characters.charAt(Math.floor(Math.random() * characters.length));
}
return result;
}
async function main() {
config.STORAGE = "filesystem";
const inq = await inquirer.prompt([
{
type: "string",
name: "token",
message: `Enter your GitHub token. You can create one at https://github.com/settings/personal-access-tokens/new.`,
default: process.env.GITHUB_TOKEN,
},
{
type: "string",
name: "repo",
message: `URL of the repository to anonymize (if you want to download a specific branch or commit use the GitHub URL of that branch or commit).`,
},
{
type: "string",
name: "terms",
message: `Terms to remove from your repository (separated with comma).`,
},
{
type: "string",
name: "output",
message: `The output folder where to save the zipped repository.`,
default: process.cwd(),
},
]);
const ghURL = gh(inq.repo) || {
owner: undefined,
name: undefined,
branch: undefined,
commit: undefined,
};
if (!ghURL.owner || !ghURL.name) {
throw new Error("Invalid GitHub URL");
}
const ghRepo = await getRepositoryFromGitHub({
accessToken: inq.token,
owner: ghURL.owner,
repo: ghURL.name,
});
const branches = await ghRepo.branches({
accessToken: inq.token,
force: true,
});
const branchToFind = inq.repo.includes(ghURL.branch)
? ghURL.branch
: ghRepo.model.defaultBranch || "master";
const branch = branches.find((b) => b.name === branchToFind);
const repository = new Repository(
new AnonymizedRepositoryModel({
repoId: `${ghURL.name}-${branch?.name}`,
source: {
type: "GitHubDownload",
accessToken: inq.token,
branch: branchToFind,
commit: branch?.commit || "HEAD",
repositoryName: `${ghURL.owner}/${ghURL.name}`,
},
options: {
terms: inq.terms.split(","),
expirationMode: "never",
update: false,
image: true,
pdf: true,
notebook: true,
link: true,
page: false,
},
})
);
console.info(
`[INFO] Downloading repository: ${repository.model.source.repositoryName} from branch ${repository.model.source.branch} and commit ${repository.model.source.commit}...`
);
await (repository.source as GitHubDownload).download(inq.token);
const outputFileName = join(inq.output, generateRandomFileName(8) + ".zip");
console.info("[INFO] Anonymizing repository and creation zip file...");
await writeFile(outputFileName, await repository.zip());
console.log(`Anonymized repository saved at ${outputFileName}`);
}
if (require.main === module) {
if (process.argv[2] == "server") {
// start the server
server();
} else {
// use the cli interface
main();
}
}

88
src/config.ts Normal file
View File

@@ -0,0 +1,88 @@
import { resolve } from "path";
interface Config {
SESSION_SECRET: string;
REDIS_PORT: number;
REDIS_HOSTNAME: string;
CLIENT_ID: string;
CLIENT_SECRET: string;
GITHUB_TOKEN: string;
DEFAULT_QUOTA: number;
MAX_FILE_FOLDER: number;
MAX_FILE_SIZE: number;
MAX_REPO_SIZE: number;
AUTO_DOWNLOAD_REPO_SIZE: number;
FREE_DOWNLOAD_REPO_SIZE: number;
AUTH_CALLBACK: string;
/**
* Allow to download repository and files
*/
ENABLE_DOWNLOAD: boolean;
STREAMER_ENTRYPOINT: string | null;
ANONYMIZATION_MASK: string;
PORT: number;
APP_HOSTNAME: string;
DB_USERNAME: string;
DB_PASSWORD: string;
DB_HOSTNAME: string;
FOLDER: string;
additionalExtensions: string[];
S3_BUCKET: string | null;
S3_CLIENT_ID: string | null;
S3_CLIENT_SECRET: string | null;
S3_ENDPOINT: string | null;
S3_REGION: string | null;
STORAGE: "filesystem" | "s3";
TRUST_PROXY: number;
RATE_LIMIT: number;
}
const config: Config = {
SESSION_SECRET: "SESSION_SECRET",
CLIENT_ID: "CLIENT_ID",
CLIENT_SECRET: "CLIENT_SECRET",
GITHUB_TOKEN: "",
DEFAULT_QUOTA: 2 * 1024 * 1024 * 1024 * 8,
MAX_FILE_FOLDER: 1000,
MAX_FILE_SIZE: 100 * 1024 * 1024, // in b, 10MB
MAX_REPO_SIZE: 60000, // in kb, 60MB
AUTO_DOWNLOAD_REPO_SIZE: 150, // in kb, 150kb
FREE_DOWNLOAD_REPO_SIZE: 150, // in kb, 150kb
ENABLE_DOWNLOAD: true,
AUTH_CALLBACK: "http://localhost:5000/github/auth",
ANONYMIZATION_MASK: "XXXX",
PORT: 5000,
TRUST_PROXY: 1,
RATE_LIMIT: 350,
APP_HOSTNAME: "anonymous.4open.science",
DB_USERNAME: "admin",
DB_PASSWORD: "password",
DB_HOSTNAME: "mongodb",
REDIS_HOSTNAME: "redis",
REDIS_PORT: 6379,
FOLDER: resolve(__dirname, "..", "repositories"),
additionalExtensions: [
"license",
"dockerfile",
"sbt",
"ipynb",
"gp",
"out",
"sol",
"in",
],
STORAGE: "filesystem",
STREAMER_ENTRYPOINT: null,
S3_BUCKET: null,
S3_CLIENT_ID: null,
S3_CLIENT_SECRET: null,
S3_ENDPOINT: null,
S3_REGION: null,
};
for (let conf in process.env) {
if ((config as any)[conf] !== undefined) {
(config as any)[conf] = process.env[conf];
}
}
export default config;

View File

@@ -2,13 +2,15 @@ import { join, basename } from "path";
import { Response } from "express";
import { Readable } from "stream";
import { trace } from "@opentelemetry/api";
import { lookup } from "mime-types";
import Repository from "./Repository";
import { RepositoryStatus, Tree, TreeElement, TreeFile } from "./types";
import config from "../config";
import { anonymizePath, isTextFile } from "./anonymize-utils";
import AnonymousError from "./AnonymousError";
import { handleError } from "./routes/route-utils";
import { lookup } from "mime-types";
import { handleError } from "../server/routes/route-utils";
import got from "got";
/**
* Represent a file in a anonymized repository
@@ -197,7 +199,7 @@ export default class AnonymizedFile {
httpStatus: 403,
});
}
const out = await this.repository.source?.getFileContent(this);
const content = await this.repository.source?.getFileContent(this);
if (
!this.repository.model.isReseted ||
this.repository.status != RepositoryStatus.READY
@@ -205,7 +207,7 @@ export default class AnonymizedFile {
this.repository.model.isReseted = false;
await this.repository.updateStatus(RepositoryStatus.READY);
}
return out;
return content;
} finally {
span.end();
}
@@ -213,19 +215,32 @@ export default class AnonymizedFile {
}
async anonymizedContent() {
return trace
.getTracer("ano-file")
.startActiveSpan("anonymizedContent", async (span) => {
span.setAttribute("anonymizedPath", this.anonymizedPath);
const content = await this.content();
return content
.pipe(
this.repository.generateAnonymizeTransformer(this.anonymizedPath)
)
.on("close", () => {
span.end();
});
const span = trace.getTracer("ano-file").startSpan("Repository.conference");
span.setAttribute("anonymizedPath", this.anonymizedPath);
const anonymizer = this.repository.generateAnonymizeTransformer(
this.anonymizedPath
);
if (!config.STREAMER_ENTRYPOINT) {
// collect the content locally
const content = await this.content();
return content.pipe(anonymizer).on("close", () => {
span.end();
});
}
// use the streamer service
return got.stream(join(config.STREAMER_ENTRYPOINT, "api"), {
method: "POST",
json: {
token: await this.repository.getToken(),
repoFullName: this.repository.model.source.repositoryName,
commit: this.repository.model.source.commit,
branch: this.repository.model.source.branch,
repoId: this.repository.repoId,
filePath: this.filePath,
sha: await this.sha(),
anonymizerOptions: anonymizer.opt,
},
});
}
get filePath() {
@@ -243,6 +258,9 @@ export default class AnonymizedFile {
}
async send(res: Response): Promise<void> {
const anonymizer = this.repository.generateAnonymizeTransformer(
this.anonymizedPath
);
return trace
.getTracer("ano-file")
.startActiveSpan("AnonymizedFile.send", async (span) => {
@@ -250,6 +268,39 @@ export default class AnonymizedFile {
span.setAttribute("anonymizedPath", this.anonymizedPath);
return new Promise<void>(async (resolve, reject) => {
try {
if (config.STREAMER_ENTRYPOINT) {
// use the streamer service
got
.stream(join(config.STREAMER_ENTRYPOINT, "api"), {
method: "POST",
json: {
token: await this.repository.getToken(),
repoFullName: this.repository.model.source.repositoryName,
commit: this.repository.model.source.commit,
branch: this.repository.model.source.branch,
repoId: this.repository.repoId,
filePath: this.filePath,
sha: await this.sha(),
anonymizerOptions: anonymizer.opt,
},
})
.on("error", () => {
handleError(
new AnonymousError("file_not_found", {
object: this,
httpStatus: 404,
}),
res
);
})
.pipe(res)
.on("close", () => {
span.end();
resolve();
});
return;
}
const mime = lookup(this.anonymizedPath);
if (mime && this.extension() != "ts") {
res.contentType(mime);
@@ -257,9 +308,6 @@ export default class AnonymizedFile {
res.contentType("text/plain");
}
res.header("Accept-Ranges", "none");
const anonymizer = this.repository.generateAnonymizeTransformer(
this.anonymizedPath
);
anonymizer.once("transform", (data) => {
if (!mime && data.isText) {
res.contentType("text/plain");
@@ -269,26 +317,27 @@ export default class AnonymizedFile {
res.header("Content-Length", this.fileSize.toString());
}
});
const content = await this.content();
function handleStreamError(error: Error) {
if (!content.closed && !content.destroyed) {
content.destroy();
}
span.recordException(error);
span.end();
reject(error);
// handleError(error, res);
}
content
.on("error", handleStreamError)
.pipe(anonymizer)
.pipe(res)
.on("error", handleStreamError)
.on("close", () => {
if (!content.closed && !content.destroyed) {
content.destroy();
}
span.end();
resolve();
})
.on("error", (error) => {
if (!content.closed && !content.destroyed) {
content.destroy();
}
span.recordException(error);
span.end();
reject(error);
handleError(error, res);
});
} catch (error) {
handleError(error, res);

View File

@@ -1,5 +1,5 @@
import AnonymizedRepositoryModel from "./database/anonymizedRepositories/anonymizedRepositories.model";
import { IConferenceDocument } from "./database/conference/conferences.types";
import AnonymizedRepositoryModel from "./model/anonymizedRepositories/anonymizedRepositories.model";
import { IConferenceDocument } from "./model/conference/conferences.types";
import Repository from "./Repository";
import { ConferenceStatus } from "./types";

View File

@@ -1,7 +1,8 @@
import { trace } from "@opentelemetry/api";
import Repository from "./Repository";
import { Octokit } from "@octokit/rest";
import UserModel from "./database/users/users.model";
import Repository from "./Repository";
import UserModel from "./model/users/users.model";
import config from "../config";
export function octokit(token: string) {

View File

@@ -1,10 +1,10 @@
import { RepositoryStatus } from "./types";
import User from "./User";
import UserModel from "./database/users/users.model";
import UserModel from "./model/users/users.model";
import Conference from "./Conference";
import ConferenceModel from "./database/conference/conferences.model";
import ConferenceModel from "./model/conference/conferences.model";
import AnonymousError from "./AnonymousError";
import { IAnonymizedPullRequestDocument } from "./database/anonymizedPullRequests/anonymizedPullRequests.types";
import { IAnonymizedPullRequestDocument } from "./model/anonymizedPullRequests/anonymizedPullRequests.types";
import config from "../config";
import got from "got";
import { octokit } from "./GitHubUtils";

View File

@@ -6,16 +6,16 @@ import GitHubStream from "./source/GitHubStream";
import GitHubDownload from "./source/GitHubDownload";
import Zip from "./source/Zip";
import { anonymizePath } from "./anonymize-utils";
import UserModel from "./database/users/users.model";
import { IAnonymizedRepositoryDocument } from "./database/anonymizedRepositories/anonymizedRepositories.types";
import UserModel from "./model/users/users.model";
import { IAnonymizedRepositoryDocument } from "./model/anonymizedRepositories/anonymizedRepositories.types";
import { AnonymizeTransformer } from "./anonymize-utils";
import GitHubBase from "./source/GitHubBase";
import Conference from "./Conference";
import ConferenceModel from "./database/conference/conferences.model";
import ConferenceModel from "./model/conference/conferences.model";
import AnonymousError from "./AnonymousError";
import { downloadQueue } from "./queue";
import { isConnected } from "./database/database";
import AnonymizedRepositoryModel from "./database/anonymizedRepositories/anonymizedRepositories.model";
import { downloadQueue } from "../queue";
import { isConnected } from "../server/database";
import AnonymizedRepositoryModel from "./model/anonymizedRepositories/anonymizedRepositories.model";
import { GitHubRepository } from "./source/GitHubRepository";
import { trace } from "@opentelemetry/api";
import { getToken } from "./GitHubUtils";
@@ -59,7 +59,7 @@ export default class Repository {
private checkedToken: boolean = false;
private async getToken() {
async getToken() {
if (this.checkedToken) return this._model.source.accessToken as string;
const originalToken = this._model.source.accessToken;
const token = await getToken(this);

View File

@@ -1,11 +1,12 @@
import AnonymizedRepositoryModel from "./database/anonymizedRepositories/anonymizedRepositories.model";
import RepositoryModel from "./database/repositories/repositories.model";
import { IUserDocument } from "./database/users/users.types";
import { trace } from "@opentelemetry/api";
import AnonymizedRepositoryModel from "./model/anonymizedRepositories/anonymizedRepositories.model";
import RepositoryModel from "./model/repositories/repositories.model";
import { IUserDocument } from "./model/users/users.types";
import Repository from "./Repository";
import { GitHubRepository } from "./source/GitHubRepository";
import PullRequest from "./PullRequest";
import AnonymizedPullRequestModel from "./database/anonymizedPullRequests/anonymizedPullRequests.model";
import { trace } from "@opentelemetry/api";
import AnonymizedPullRequestModel from "./model/anonymizedPullRequests/anonymizedPullRequests.model";
import { octokit } from "./GitHubUtils";
/**

View File

@@ -1,10 +1,10 @@
import config from "../config";
import { isText } from "istextorbinary";
import { basename } from "path";
import { Transform } from "stream";
import { Readable } from "stream";
import { Transform, Readable } from "stream";
import { isText } from "istextorbinary";
import { trace } from "@opentelemetry/api";
import config from "../config";
const urlRegex =
/<?\b((https?|ftp|file):\/\/)[-A-Za-z0-9+&@#/%?=~_|!:,.;]+[-A-Za-z0-9+&@#/%=~_|]\b\/?>?/g;
@@ -35,7 +35,7 @@ export class AnonymizeTransformer extends Transform {
anonimizer: ContentAnonimizer;
constructor(
private readonly opt: {
readonly opt: {
filePath: string;
} & ConstructorParameters<typeof ContentAnonimizer>[0]
) {
@@ -56,7 +56,6 @@ export class AnonymizeTransformer extends Transform {
if (this.isText === null) {
this.isText = isTextFile(this.opt.filePath, chunk);
}
if (this.isText) {
const content = this.anonimizer.anonymize(chunk.toString());
if (this.anonimizer.wasAnonymized) {
@@ -171,7 +170,7 @@ export class ContentAnonimizer {
});
// remove the term in the text
return content.replace(new RegExp(`\\b${term}\\b`, "gi"), () => {
content = content.replace(new RegExp(`\\b${term}\\b`, "gi"), () => {
this.wasAnonymized = true;
return mask;
});

View File

@@ -2,6 +2,7 @@ import { Readable } from "stream";
import AnonymizedFile from "../AnonymizedFile";
import { Tree } from "../types";
import { SourceBase } from "./Source";
export interface GitHubBaseData {
getToken: () => string | Promise<string>;
@@ -11,21 +12,6 @@ export interface GitHubBaseData {
commit: string;
}
export interface SourceBase {
readonly type: string;
/**
* Retrieve the fie content
* @param file the file of the content to retrieve
*/
getFileContent(file: AnonymizedFile): Promise<Readable>;
/**
* Get all the files from a specific source
*/
getFiles(progress?: (status: string) => void): Promise<Tree>;
}
export default abstract class GitHubBase implements SourceBase {
abstract type: "GitHubDownload" | "GitHubStream" | "Zip";
accessToken: string | undefined;

View File

@@ -4,11 +4,11 @@ import { OctokitResponse } from "@octokit/types";
import storage from "../storage";
import GitHubBase, { GitHubBaseData } from "./GitHubBase";
import AnonymizedFile from "../AnonymizedFile";
import AnonymousError from "../AnonymousError";
import { trace } from "@opentelemetry/api";
import { FILE_TYPE } from "../storage/Storage";
import { octokit } from "../GitHubUtils";
import AnonymousError from "../AnonymousError";
import AnonymizedFile from "../AnonymizedFile";
export default class GitHubDownload extends GitHubBase {
type: "GitHubDownload" | "GitHubStream" | "Zip" = "GitHubDownload";

View File

@@ -1,12 +1,13 @@
import { Branch } from "../types";
import * as gh from "parse-github-url";
import { IRepositoryDocument } from "../database/repositories/repositories.types";
import { RestEndpointMethodTypes } from "@octokit/rest";
import RepositoryModel from "../database/repositories/repositories.model";
import AnonymousError from "../AnonymousError";
import { isConnected } from "../database/database";
import { trace } from "@opentelemetry/api";
import AnonymousError from "../AnonymousError";
import { isConnected } from "../../server/database";
import { octokit } from "../GitHubUtils";
import { IRepositoryDocument } from "../model/repositories/repositories.types";
import RepositoryModel from "../model/repositories/repositories.model";
export class GitHubRepository {
private _data: Partial<{

View File

@@ -82,36 +82,24 @@ export default class GitHubStream extends GitHubBase {
object: file,
});
}
try {
const content = this.downloadFile(await this.data.getToken(), file_sha);
const content = this.downloadFile(await this.data.getToken(), file_sha);
// duplicate the stream to write it to the storage
const stream1 = content.pipe(new stream.PassThrough());
const stream2 = content.pipe(new stream.PassThrough());
storage.write(
file.repository.repoId,
file.filePath,
stream1,
this.type
);
return stream2;
} catch (error) {
if (
(error as any).status === 404 ||
(error as any).httpStatus === 404
) {
throw new AnonymousError("file_not_found", {
httpStatus: (error as any).status || (error as any).httpStatus,
cause: error as Error,
object: file,
});
}
throw new AnonymousError("file_too_big", {
// duplicate the stream to write it to the storage
const stream1 = content.pipe(new stream.PassThrough());
const stream2 = content.pipe(new stream.PassThrough());
content.on("error", (error) => {
error = new AnonymousError("file_not_found", {
httpStatus: (error as any).status || (error as any).httpStatus,
cause: error as Error,
object: file,
});
}
stream1.emit("error", error);
stream2.emit("error", error);
});
storage.write(file.repository.repoId, file.filePath, stream1, this.type);
return stream2;
} finally {
span.end();
}

24
src/core/source/Source.ts Normal file
View File

@@ -0,0 +1,24 @@
import { Readable } from "stream";
import AnonymizedFile from "../AnonymizedFile";
import { Tree } from "../types";
import GitHubDownload from "./GitHubDownload";
import GitHubStream from "./GitHubStream";
import Zip from "./Zip";
export type Source = GitHubDownload | GitHubStream | Zip;
export interface SourceBase {
readonly type: string;
/**
* Retrieve the fie content
* @param file the file of the content to retrieve
*/
getFileContent(file: AnonymizedFile): Promise<Readable>;
/**
* Get all the files from a specific source
*/
getFiles(progress?: (status: string) => void): Promise<Tree>;
}

View File

@@ -1,7 +1,8 @@
import * as stream from "stream";
import AnonymizedFile from "../AnonymizedFile";
import storage from "../storage";
import * as stream from "stream";
import { SourceBase } from "./GitHubBase";
import { SourceBase } from "./Source";
export default class Zip implements SourceBase {
type = "Zip";

View File

@@ -83,7 +83,15 @@ export default class FileSystem extends StorageBase {
span.setAttribute("path", fullPath);
try {
await this.mk(repoId, dirname(p));
if (data instanceof Readable) {
data.on("error", (err) => {
this.rm(repoId, p);
});
}
return await fs.promises.writeFile(fullPath, data, "utf-8");
} catch (err: any) {
span.recordException(err);
// throw err;
} finally {
span.end();
}

View File

@@ -32,6 +32,8 @@ export default class S3Storage extends StorageBase {
private client(timeout = 10000) {
if (!config.S3_CLIENT_ID) throw new Error("S3_CLIENT_ID not set");
if (!config.S3_CLIENT_SECRET) throw new Error("S3_CLIENT_SECRET not set");
if (!config.S3_REGION) throw new Error("S3_REGION not set");
if (!config.S3_ENDPOINT) throw new Error("S3_ENDPOINT not set");
return new S3({
credentials: {
accessKeyId: config.S3_CLIENT_ID,
@@ -212,6 +214,14 @@ export default class S3Storage extends StorageBase {
try {
if (!config.S3_BUCKET) throw new Error("S3_BUCKET not set");
if (data instanceof Readable) {
data.on("error", (err) => {
console.error(`[ERROR] S3 write ${path}`, err);
span.recordException(err as Error);
this.rm(repoId, path);
});
}
const params: PutObjectCommandInput = {
Bucket: config.S3_BUCKET,
Key: join(this.repoPath(repoId), path),
@@ -229,7 +239,6 @@ export default class S3Storage extends StorageBase {
});
await parallelUploads3.done();
return;
} finally {
span.end();
}

View File

@@ -4,6 +4,10 @@ import * as archiver from "archiver";
import { Response } from "express";
import { Tree } from "../types";
import S3Storage from "./S3";
import FileSystem from "./FileSystem";
export type Storage = S3Storage | FileSystem;
export enum FILE_TYPE {
FILE = "file",

View File

@@ -1,13 +1,3 @@
import GitHubDownload from "./source/GitHubDownload";
import GitHubStream from "./source/GitHubStream";
import Zip from "./source/Zip";
import S3Storage from "./storage/S3";
import FileSystem from "./storage/FileSystem";
export type Source = GitHubDownload | GitHubStream | Zip;
export type Storage = S3Storage | FileSystem;
export interface Branch {
name: string;
commit: string;

View File

@@ -1,6 +1,6 @@
import { Queue, Worker } from "bullmq";
import config from "../config";
import Repository from "./Repository";
import Repository from "../core/Repository";
import * as path from "path";
export let cacheQueue: Queue<Repository>;
@@ -37,7 +37,7 @@ export function startWorker() {
});
const cacheWorker = new Worker<Repository>(
cacheQueue.name,
path.resolve("build/src/processes/removeCache.js"),
path.resolve("build/queue/processes/removeCache.js"),
{
concurrency: 5,
connection,
@@ -49,7 +49,7 @@ export function startWorker() {
});
const removeWorker = new Worker<Repository>(
removeQueue.name,
path.resolve("build/src/processes/removeRepository.js"),
path.resolve("build/queue/processes/removeRepository.js"),
{
concurrency: 5,
connection,
@@ -62,7 +62,7 @@ export function startWorker() {
const downloadWorker = new Worker<Repository>(
downloadQueue.name,
path.resolve("build/src/processes/downloadRepository.js"),
path.resolve("build/queue/processes/downloadRepository.js"),
{
concurrency: 3,
connection,

View File

@@ -1,10 +1,10 @@
import { Exception, trace } from "@opentelemetry/api";
import { SandboxedJob } from "bullmq";
import { config } from "dotenv";
config();
import Repository from "../Repository";
import { getRepository as getRepositoryImport } from "../database/database";
import { RepositoryStatus } from "../types";
import { Exception, trace } from "@opentelemetry/api";
import Repository from "../../core/Repository";
import { getRepository as getRepositoryImport } from "../../server/database";
import { RepositoryStatus } from "../../core/types";
export default async function (job: SandboxedJob<Repository, void>) {
const {

View File

@@ -1,7 +1,7 @@
import { SandboxedJob } from "bullmq";
import Repository from "../Repository";
import { getRepository as getRepositoryImport } from "../database/database";
import { Exception, trace } from "@opentelemetry/api";
import { SandboxedJob } from "bullmq";
import Repository from "../../core/Repository";
import { getRepository as getRepositoryImport } from "../../server/database";
export default async function (job: SandboxedJob<Repository, void>) {
const {

View File

@@ -1,8 +1,8 @@
import { SandboxedJob } from "bullmq";
import Repository from "../Repository";
import { getRepository as getRepositoryImport } from "../database/database";
import { RepositoryStatus } from "../types";
import { trace } from "@opentelemetry/api";
import { SandboxedJob } from "bullmq";
import Repository from "../../core/Repository";
import { getRepository as getRepositoryImport } from "../../server/database";
import { RepositoryStatus } from "../../core/types";
export default async function (job: SandboxedJob<Repository, void>) {
const {

View File

@@ -1,10 +1,10 @@
import mongoose, { ConnectOptions } from "mongoose";
import Repository from "../Repository";
import config from "../../config";
import AnonymizedRepositoryModel from "./anonymizedRepositories/anonymizedRepositories.model";
import AnonymousError from "../AnonymousError";
import AnonymizedPullRequestModel from "./anonymizedPullRequests/anonymizedPullRequests.model";
import PullRequest from "../PullRequest";
import Repository from "../core/Repository";
import config from "../config";
import AnonymizedRepositoryModel from "../core/model/anonymizedRepositories/anonymizedRepositories.model";
import AnonymousError from "../core/AnonymousError";
import AnonymizedPullRequestModel from "../core/model/anonymizedPullRequests/anonymizedPullRequests.model";
import PullRequest from "../core/PullRequest";
const MONGO_URL = `mongodb://${config.DB_USERNAME}:${config.DB_PASSWORD}@${config.DB_HOSTNAME}:27017/`;

View File

@@ -1,3 +1,6 @@
import { config as dotenv } from "dotenv";
dotenv();
import { createClient } from "redis";
import { resolve, join } from "path";
import { existsSync } from "fs";
@@ -7,16 +10,15 @@ import RedisStore from "rate-limit-redis";
import * as express from "express";
import * as compression from "compression";
import * as passport from "passport";
import config from "../config";
import { connect } from "./database/database";
import { connect } from "./database";
import { initSession, router as connectionRouter } from "./routes/connection";
import router from "./routes";
import AnonymizedRepositoryModel from "./database/anonymizedRepositories/anonymizedRepositories.model";
import AnonymizedRepositoryModel from "../core/model/anonymizedRepositories/anonymizedRepositories.model";
import { conferenceStatusCheck, repositoryStatusCheck } from "./schedule";
import { startWorker } from "./queue";
import AnonymizedPullRequestModel from "./database/anonymizedPullRequests/anonymizedPullRequests.model";
import { startWorker } from "../queue";
import AnonymizedPullRequestModel from "../core/model/anonymizedPullRequests/anonymizedPullRequests.model";
import { getUser } from "./routes/route-utils";
import config from "../config";
function indexResponse(req: express.Request, res: express.Response) {
if (
@@ -214,3 +216,5 @@ export default async function start() {
app.listen(config.PORT);
console.log("Database connected and Server started on port: " + config.PORT);
}
start();

View File

@@ -1,12 +1,12 @@
import { Queue } from "bullmq";
import * as express from "express";
import AnonymousError from "../AnonymousError";
import AnonymizedRepositoryModel from "../database/anonymizedRepositories/anonymizedRepositories.model";
import ConferenceModel from "../database/conference/conferences.model";
import UserModel from "../database/users/users.model";
import { cacheQueue, downloadQueue, removeQueue } from "../queue";
import Repository from "../Repository";
import User from "../User";
import AnonymousError from "../../core/AnonymousError";
import AnonymizedRepositoryModel from "../../core/model/anonymizedRepositories/anonymizedRepositories.model";
import ConferenceModel from "../../core/model/conference/conferences.model";
import UserModel from "../../core/model/users/users.model";
import { cacheQueue, downloadQueue, removeQueue } from "../../queue";
import Repository from "../../core/Repository";
import User from "../../core/User";
import { ensureAuthenticated } from "./connection";
import { handleError, getUser, isOwnerOrAdmin, getRepo } from "./route-utils";

View File

@@ -1,10 +1,10 @@
import * as express from "express";
import AnonymousError from "../AnonymousError";
import Conference from "../Conference";
import ConferenceModel from "../database/conference/conferences.model";
import AnonymousError from "../../core/AnonymousError";
import Conference from "../../core/Conference";
import ConferenceModel from "../../core/model/conference/conferences.model";
import { ensureAuthenticated } from "./connection";
import { handleError, getUser, isOwnerOrAdmin } from "./route-utils";
import { IConferenceDocument } from "../database/conference/conferences.types";
import { IConferenceDocument } from "../../core/model/conference/conferences.types";
const router = express.Router();

View File

@@ -7,9 +7,9 @@ import { Profile, Strategy } from "passport-github2";
import * as express from "express";
import config from "../../config";
import UserModel from "../database/users/users.model";
import { IUserDocument } from "../database/users/users.types";
import AnonymousError from "../AnonymousError";
import UserModel from "../../core/model/users/users.model";
import { IUserDocument } from "../../core/model/users/users.types";
import AnonymousError from "../../core/AnonymousError";
export function ensureAuthenticated(
req: express.Request,

View File

@@ -1,6 +1,6 @@
import * as express from "express";
import AnonymizedFile from "../AnonymizedFile";
import AnonymousError from "../AnonymousError";
import AnonymizedFile from "../../core/AnonymizedFile";
import AnonymousError from "../../core/AnonymousError";
import { getRepo, handleError } from "./route-utils";
export const router = express.Router();
@@ -49,7 +49,7 @@ router.get(
// cache the file for 5min
res.header("Cache-Control", "max-age=300");
await repo.countView();
f.send(res);
await f.send(res);
} catch (error) {
return handleError(error, res, req);
}

View File

@@ -7,11 +7,11 @@ import {
handleError,
isOwnerOrAdmin,
} from "./route-utils";
import AnonymousError from "../AnonymousError";
import { IAnonymizedPullRequestDocument } from "../database/anonymizedPullRequests/anonymizedPullRequests.types";
import PullRequest from "../PullRequest";
import AnonymizedPullRequestModel from "../database/anonymizedPullRequests/anonymizedPullRequests.model";
import { RepositoryStatus } from "../types";
import AnonymousError from "../../core/AnonymousError";
import { IAnonymizedPullRequestDocument } from "../../core/model/anonymizedPullRequests/anonymizedPullRequests.types";
import PullRequest from "../../core/PullRequest";
import AnonymizedPullRequestModel from "../../core/model/anonymizedPullRequests/anonymizedPullRequests.model";
import { RepositoryStatus } from "../../core/types";
const router = express.Router();

View File

@@ -1,7 +1,7 @@
import * as express from "express";
import { getPullRequest, handleError } from "./route-utils";
import AnonymousError from "../AnonymousError";
import AnonymousError from "../../core/AnonymousError";
const router = express.Router();

View File

@@ -1,22 +1,22 @@
import * as express from "express";
import { ensureAuthenticated } from "./connection";
import * as db from "../database/database";
import * as db from "../database";
import { getRepo, getUser, handleError, isOwnerOrAdmin } from "./route-utils";
import { getRepositoryFromGitHub } from "../source/GitHubRepository";
import { getRepositoryFromGitHub } from "../../core/source/GitHubRepository";
import gh = require("parse-github-url");
import AnonymizedRepositoryModel from "../database/anonymizedRepositories/anonymizedRepositories.model";
import { IAnonymizedRepositoryDocument } from "../database/anonymizedRepositories/anonymizedRepositories.types";
import Repository from "../Repository";
import UserModel from "../database/users/users.model";
import ConferenceModel from "../database/conference/conferences.model";
import AnonymousError from "../AnonymousError";
import { downloadQueue, removeQueue } from "../queue";
import RepositoryModel from "../database/repositories/repositories.model";
import User from "../User";
import { RepositoryStatus } from "../types";
import { IUserDocument } from "../database/users/users.types";
import { checkToken } from "../GitHubUtils";
import AnonymizedRepositoryModel from "../../core/model/anonymizedRepositories/anonymizedRepositories.model";
import { IAnonymizedRepositoryDocument } from "../../core/model/anonymizedRepositories/anonymizedRepositories.types";
import Repository from "../../core/Repository";
import UserModel from "../../core/model/users/users.model";
import ConferenceModel from "../../core/model/conference/conferences.model";
import AnonymousError from "../../core/AnonymousError";
import { downloadQueue, removeQueue } from "../../queue";
import RepositoryModel from "../../core/model/repositories/repositories.model";
import User from "../../core/User";
import { RepositoryStatus } from "../../core/types";
import { IUserDocument } from "../../core/model/users/users.types";
import { checkToken } from "../../core/GitHubUtils";
const router = express.Router();

View File

@@ -4,9 +4,9 @@ import * as stream from "stream";
import config from "../../config";
import { getRepo, handleError } from "./route-utils";
import AnonymousError from "../AnonymousError";
import { downloadQueue } from "../queue";
import { RepositoryStatus } from "../types";
import AnonymousError from "../../core/AnonymousError";
import { downloadQueue } from "../../queue";
import { RepositoryStatus } from "../../core/types";
const router = express.Router();

View File

@@ -1,8 +1,9 @@
import * as express from "express";
import AnonymousError from "../AnonymousError";
import * as db from "../database/database";
import UserModel from "../database/users/users.model";
import User from "../User";
import AnonymousError from "../../core/AnonymousError";
import * as db from "../database";
import UserModel from "../../core/model/users/users.model";
import User from "../../core/User";
import { HTTPError } from "got";
export async function getPullRequest(
req: express.Request,
@@ -85,6 +86,11 @@ function printError(error: any, req?: express.Request) {
if (req.originalUrl === "/api/repo/undefined/options") return;
}
console.error(message);
} else if (error instanceof HTTPError) {
let message = `[ERROR] HTTP.${
error.code
} ${error.message.toString()} ${error.stack?.split("\n")[1].trim()}`;
console.error(message);
} else if (error instanceof Error) {
console.error(error);
} else {
@@ -107,7 +113,10 @@ export function handleError(
status = error.httpStatus;
} else if (error.$metadata?.httpStatusCode) {
status = error.$metadata.httpStatusCode;
} else if (message && message.indexOf("not_found") > -1) {
} else if (
message &&
(message.indexOf("not_found") > -1 || message.indexOf("(Not Found)") > -1)
) {
status = 404;
} else if (message && message.indexOf("not_connected") > -1) {
status = 401;

View File

@@ -2,8 +2,8 @@ import * as express from "express";
import config from "../../config";
import { ensureAuthenticated } from "./connection";
import { handleError, getUser, isOwnerOrAdmin } from "./route-utils";
import UserModel from "../database/users/users.model";
import User from "../User";
import UserModel from "../../core/model/users/users.model";
import User from "../../core/User";
const router = express.Router();

View File

@@ -1,11 +1,11 @@
import * as express from "express";
import { getRepo, handleError } from "./route-utils";
import * as path from "path";
import AnonymizedFile from "../AnonymizedFile";
import AnonymousError from "../AnonymousError";
import { Tree, TreeElement } from "../types";
import AnonymizedFile from "../../core/AnonymizedFile";
import AnonymousError from "../../core/AnonymousError";
import { Tree, TreeElement } from "../../core/types";
import * as marked from "marked";
import { streamToString } from "../anonymize-utils";
import { streamToString } from "../../core/anonymize-utils";
const router = express.Router();

View File

@@ -1,8 +1,8 @@
import * as schedule from "node-schedule";
import Conference from "./Conference";
import AnonymizedRepositoryModel from "./database/anonymizedRepositories/anonymizedRepositories.model";
import ConferenceModel from "./database/conference/conferences.model";
import Repository from "./Repository";
import Conference from "../core/Conference";
import AnonymizedRepositoryModel from "../core/model/anonymizedRepositories/anonymizedRepositories.model";
import ConferenceModel from "../core/model/conference/conferences.model";
import Repository from "../core/Repository";
export function conferenceStatusCheck() {
// check every 6 hours the status of the conferences

31
src/streamer/index.ts Normal file
View File

@@ -0,0 +1,31 @@
import { config as dotenv } from "dotenv";
dotenv();
import * as express from "express";
import * as compression from "compression";
import config from "../config";
import router from "./route";
import { handleError } from "../server/routes/route-utils";
import AnonymousError from "../core/AnonymousError";
const app = express();
app.use(express.json());
app.use(compression());
app.use("/api", router);
app.all("*", (req, res) => {
handleError(
new AnonymousError("file_not_found", {
httpStatus: 404,
object: req.originalUrl,
}),
res,
req
);
});
app.listen(config.PORT, () => {
console.log(`Server started on http://streamer:${config.PORT}`);
});

63
src/streamer/route.ts Normal file
View File

@@ -0,0 +1,63 @@
import * as express from "express";
import GitHubStream from "../core/source/GitHubStream";
import { AnonymizeTransformer, isTextFile } from "../core/anonymize-utils";
import { handleError } from "../server/routes/route-utils";
import { contentType } from "mime-types";
export const router = express.Router();
router.post("/", async (req: express.Request, res: express.Response) => {
req.body = req.body || {};
const token: string = req.body.token;
const repoFullName = req.body.repoFullName.split("/");
const repoId = req.body.repoId;
const branch = req.body.branch;
const fileSha = req.body.sha;
const commit = req.body.commit;
const filePath = req.body.filePath;
const anonymizerOptions = req.body.anonymizerOptions;
const anonymizer = new AnonymizeTransformer(anonymizerOptions);
const source = new GitHubStream({
repoId,
organization: repoFullName[0],
repoName: repoFullName[1],
commit: commit,
getToken: () => token,
});
const content = source.downloadFile(token, fileSha);
try {
const mime = contentType(filePath);
if (mime && !filePath.endsWith(".ts")) {
res.contentType(mime);
} else if (isTextFile(filePath)) {
res.contentType("text/plain");
}
res.header("Accept-Ranges", "none");
anonymizer.once("transform", (data) => {
if (!mime && data.isText) {
res.contentType("text/plain");
}
});
function handleStreamError(error: Error) {
if (!content.closed && !content.destroyed) {
content.destroy();
}
handleError(error, res);
}
content
.on("error", handleStreamError)
.pipe(anonymizer)
.pipe(res)
.on("error", handleStreamError)
.on("close", () => {
if (!content.closed && !content.destroyed) {
content.destroy();
}
});
} catch (error) {
handleError(error, res);
}
});
export default router;