migrate JavaScript to TypeScript

This commit is contained in:
tdurieux
2021-08-11 18:18:45 +02:00
parent ee4a20286d
commit caeff49ab0
58 changed files with 6034 additions and 3096 deletions

168
src/AnonymizedFile.ts Normal file
View File

@@ -0,0 +1,168 @@
import * as path from "path";
import * as express from "express";
import * as stream from "stream";
import Repository from "./Repository";
import { Tree, TreeFile } from "./types";
import storage from "./storage";
import config from "../config";
import { anonymizeStream } from "./anonymize-utils";
/**
* Represent a file in a anonymized repository
*/
export default class AnonymizedFile {
repository: Repository;
sha?: string;
size?: number;
path?: string;
anonymizedPath: string;
constructor(
repository: Repository,
data: {
path?: string;
anonymizedPath: string;
sha?: string;
size?: number;
}
) {
this.repository = repository;
if (!this.repository.options.terms) throw new Error("terms_not_specified");
this.anonymizedPath = data.anonymizedPath;
if (data.path) {
this.path = data.path;
}
if (!data.anonymizedPath && this.path) {
// anonymize the path
this.anonymizedPath = this.path;
for (let term of this.repository.options.terms) {
if (term.trim() == "") {
continue;
}
this.anonymizedPath = this.anonymizedPath.replace(
new RegExp(term, "gi"),
config.ANONYMIZATION_MASK
);
}
}
if (!this.sha) this.sha = data.sha;
if (!this.size) this.size = data.size;
}
async send(res: express.Response): Promise<void> {
try {
const s = await this.anonymizedContent();
s.on("error", (err) => {
console.log(err);
res.status(500).send({ error: err.message });
});
s.pipe(res);
} catch (error) {
console.log("Error during anonymization", error);
res.status(500).send({ error: error.message });
}
}
async isFileSupported() {
this.path = await this.getOriginalPath();
const filename = path.basename(this.path);
const extensions = filename.split(".").reverse();
const extension = extensions[0].toLowerCase();
if (!this.repository.options.pdf && extension == "pdf") {
return false;
}
if (
!this.repository.options.image &&
(extension == "png" ||
extension == "ico" ||
extension == "jpg" ||
extension == "jpeg" ||
extension == "gif")
) {
return false;
}
return true;
}
get originalCachePath() {
if (!this.path) throw "path_not_defined";
return path.join(
this.repository.originalCachePath,
this.path
);
}
async content(): Promise<stream.Readable> {
if (this.size && this.size > config.MAX_FILE_SIZE) {
throw new Error("file_too_big");
}
if (await storage.exists(this.originalCachePath)) {
return storage.read(this.originalCachePath);
} else {
return await this.repository.source?.getFileContent(this);
}
}
async anonymizedContent() {
await this.getOriginalPath();
if (!this.path) throw new Error("path_not_specified");
if (!this.repository.options.terms) throw new Error("terms_not_specified");
const rs = await this.content();
const contentStream = rs.pipe(anonymizeStream(this.path, this.repository));
return contentStream;
}
/**
* De-anonymize the path
*
* @returns the origin relative path of the file
*/
async getOriginalPath(): Promise<string> {
if (!this.anonymizedPath) throw new Error("path_not_specified");
const files = await this.repository.files();
const paths = this.anonymizedPath.trim().split("/");
let current: any = await this.repository.anonymizedFiles();
for (let i = 0; i < paths.length; i++) {
const fileName = paths[i];
if (fileName == "") {
continue;
}
if (current[fileName]) {
current = current[fileName];
} else {
throw new Error("file_not_found");
}
}
function tree2sha(
tree: any,
output: { [key: string]: string } = {},
parent: string = ""
): { [key: string]: string } {
for (let i in tree) {
const sha = tree[i].sha as string;
const size = tree[i].size as number;
if (sha != null && size != null) {
output[sha] = path.join(parent, i);
} else if (tree[i].child) {
tree2sha(tree[i].child as Tree, output, path.join(parent, i));
} else {
tree2sha(tree[i] as Tree, output, path.join(parent, i));
}
}
return output;
}
const shaTree = tree2sha(files);
if (!current.sha || !shaTree[current.sha]) {
throw new Error("file_not_found");
}
this.path = shaTree[current.sha];
this.sha = current.sha;
if ((current as TreeFile).size) this.size = (current as TreeFile).size;
return this.path;
}
}

280
src/Repository.ts Normal file
View File

@@ -0,0 +1,280 @@
import * as path from "path";
import storage from "./storage";
import { RepositoryStatus, Source, Tree } from "./types";
import * as stream from "stream";
import User from "./User";
import GitHubStream from "./source/GitHubStream";
import GitHubDownload from "./source/GitHubDownload";
import Zip from "./source/ZIP";
import { anonymizePath } from "./anonymize-utils";
import UserModel from "./database/users/users.model";
import { IAnonymizedRepositoryDocument } from "./database/anonymizedRepositories/anonymizedRepositories.types";
import { anonymizeStream } from "./anonymize-utils";
import GitHubBase from "./source/GitHubBase";
export default class Repository {
private _model: IAnonymizedRepositoryDocument;
source: Source;
owner: User;
constructor(data: IAnonymizedRepositoryDocument) {
this._model = data;
switch (data.source.type) {
case "GitHubDownload":
this.source = new GitHubDownload(data.source, this);
break;
case "GitHubStream":
this.source = new GitHubStream(data.source, this);
break;
case "Zip":
this.source = new Zip(data.source, this);
break;
default:
throw new Error("unsupported_source");
}
this.owner = new User(new UserModel({ username: data.owner }));
}
/**
* Get the anonymized file tree
* @param opt force to get an updated list of files
* @returns The anonymized file tree
*/
async anonymizedFiles(opt?: { force?: boolean }): Promise<Tree> {
const terms = this._model.options.terms || [];
function anonymizeTreeRecursive(tree: Tree): any {
if (Number.isInteger(tree.size)) {
return tree;
}
const output: any = {};
let current: any = tree;
if (current.child) {
current = current.child;
}
for (const file in current) {
const anonymizedPath = anonymizePath(file, terms);
output[anonymizedPath] = anonymizeTreeRecursive(current[file]);
}
return output;
}
return anonymizeTreeRecursive(await this.files(opt));
}
/**
* Get the file tree
*
* @param opt force to get an updated list of files
* @returns The file tree
*/
async files(opt?: { force?: boolean }) {
if (
this._model.originalFiles &&
Object.keys(this._model.originalFiles).length !== 0 &&
!opt?.force
) {
return this._model.originalFiles;
}
const files = await this.source.getFiles();
this._model.originalFiles = files;
this._model.size = 0;
await this._model.save();
this._model.originalFiles = files;
return files;
}
check() {
if (this._model.options.expirationMode != "never") {
if (this._model.options.expirationDate > new Date()) {
this.updateStatus("expired");
}
}
if (this._model.status == "expired") {
throw new Error("repository_expired");
}
if (this._model.status == "removed") {
throw new Error("repository_expired");
}
if (this._model.status != "ready") {
throw new Error("repository_not_ready");
}
}
/**
* Compress and anonymize the repository
*
* @returns A stream of anonymized repository compressed
*/
zip(): stream.Readable {
return storage.archive(this.originalCachePath, {
format: "zip",
fileTransformer: (filename) =>
anonymizeStream(filename, this) as Transformer,
});
}
/**
* Update the repository if a new commit exists
*
* @returns void
*/
async updateIfNeeded(): Promise<void> {
const yesterday = new Date();
yesterday.setDate(yesterday.getDate() - 1);
if (this._model.options.update && this._model.lastView < yesterday) {
if (this._model.status != "ready") {
throw new Error("repo_not_ready");
}
// Only GitHubBase can be update for the moment
if (this.source instanceof GitHubBase) {
const branches = await this.source.githubRepository.branches({
force: true,
accessToken: await this.source.getToken(),
});
const branch = this.source.branch;
if (
branch.commit ==
branches.filter((f) => f.name == branch.name)[0].commit
) {
console.log(`${this._model.repoId} is up to date`);
return;
}
this._model.source.commit = branches.filter(
(f) => f.name == branch.name
)[0].commit;
this._model.anonymizeDate = new Date();
await this.updateStatus("preparing");
console.log(
`${this._model.repoId} will be updated to ${this._model.source.commit}`
);
await this.resetSate();
await this.anonymize();
}
}
}
/**
* Download the require state for the repository to work
*
* @returns void
*/
async anonymize() {
if (this._model.status == "ready") return;
await this.updateStatus("queue");
await this.files();
await this.updateStatus("ready");
}
/**
* Update the last view and view count
*/
async countView() {
this._model.lastView = new Date();
this._model.pageView = (this._model.pageView || 0) + 1;
await this._model.save();
}
/**
* Update the status of the repository
* @param status the new status
* @param errorMessage a potential error message to display
*/
async updateStatus(status: RepositoryStatus, errorMessage?: string) {
this._model.status = status;
this._model.errorMessage = errorMessage;
this._model.status = status;
await this._model.save();
}
/**
* Expire the repository
*/
async expire() {
await this.updateStatus("expired");
await this.resetSate();
}
/**
* Remove the repository
*/
async remove() {
this._model.size = 0;
await this.resetSate();
}
/**
* Reset/delete the state of the repository
*/
private async resetSate() {
this._model.size = 0;
this._model.originalFiles = null;
await this._model.save();
await storage.rm(this._model.repoId + "/");
}
/**
* Compute the size of the repository in bite.
*
* @returns The size of the repository in bite
*/
async computeSize(): Promise<number> {
if (this._model.status != "ready") return 0;
if (this._model.size) return this._model.size;
function recursiveCount(files) {
let total = 0;
for (const name in files) {
const file = files[name];
if (file.size) {
total += file.size as number;
} else if (typeof file == "object") {
total += recursiveCount(file);
}
}
return total;
}
const files = await this.files({ force: false });
this._model.size = recursiveCount(files);
await this._model.save();
return this._model.size;
}
/***** Getters ********/
get repoId() {
return this._model.repoId;
}
get options() {
return this._model.options;
}
get model() {
return this._model;
}
get originalCachePath() {
return path.join(this._model.repoId, "original") + "/";
}
get status() {
return this._model.status;
}
toJSON() {
return {
repoId: this._model.repoId,
options: this._model.options,
anonymizeDate: this._model.anonymizeDate,
status: this._model.status,
source: this.source.toJSON(),
lastView: this._model.lastView,
pageView: this._model.pageView,
size: this._model.size,
};
}
}

110
src/User.ts Normal file
View File

@@ -0,0 +1,110 @@
import { Octokit } from "@octokit/rest";
import AnonymizedRepositoryModel from "./database/anonymizedRepositories/anonymizedRepositories.model";
import RepositoryModel from "./database/repositories/repositories.model";
import { IUserDocument } from "./database/users/users.types";
import Repository from "./Repository";
import { GitHubRepository } from "./source/GitHubRepository";
export default class User {
private _model: IUserDocument;
constructor(model: IUserDocument) {
this._model = model;
}
get username(): string {
return this._model.username;
}
get accessToken(): string {
return this._model.accessToken;
}
get photo(): string {
return this._model.photo;
}
get default() {
return this._model.default;
}
set default(d) {
this._model.default = d;
}
async getGitHubRepositories(opt?: {
force: boolean;
}): Promise<GitHubRepository[]> {
if (!this._model.repositories || opt?.force === true) {
// get the list of repo from github
const octokit = new Octokit({ auth: this.accessToken });
const repositories = (
await octokit.paginate(octokit.repos.listForAuthenticatedUser, {
visibility: "all",
sort: "pushed",
per_page: 100,
})
).map((r) => {
return new RepositoryModel({
externalId: "gh_" + r.id,
name: r.full_name,
url: r.html_url,
size: r.size,
defaultBranch: r.default_branch,
});
});
const finds = (
await RepositoryModel.find({
externalId: {
$in: repositories.map((repo) => repo.externalId),
},
}).select("externalId")
).map((m) => m.externalId);
await Promise.all(
repositories
.filter((r) => finds.indexOf(r.externalId) == -1)
.map((r) => r.save())
);
this._model.repositories = (
await RepositoryModel.find({
externalId: {
$in: repositories.map((repo) => repo.externalId),
},
}).select("id")
).map((m) => m.id);
await this._model.save();
return repositories.map((r) => new GitHubRepository(r));
} else {
return (
await RepositoryModel.find({ _id: { $in: this._model.repositories } })
).map((i) => new GitHubRepository(i));
}
}
async getRepositories() {
const repositories = (
await AnonymizedRepositoryModel.find({
owner: this.username,
}).exec()
).map((d) => new Repository(d));
for (let repo of repositories) {
if (repo.options.expirationDate) {
repo.options.expirationDate = new Date(repo.options.expirationDate);
}
if (
repo.options.expirationMode != "never" &&
repo.options.expirationDate != null &&
repo.options.expirationDate < new Date()
) {
await repo.expire()
}
}
return repositories;
}
toJSON() {
return this._model.toJSON();
}
}

136
src/anonymize-utils.ts Normal file
View File

@@ -0,0 +1,136 @@
import config from "../config";
import Repository from "./Repository";
import GitHubBase from "./source/GitHubBase";
import { isText } from "istextorbinary";
import * as path from "path";
import * as stream from "stream";
const urlRegex =
/<?\b((https?|ftp|file):\/\/)[-A-Za-z0-9+&@#/%?=~_|!:,.;]+[-A-Za-z0-9+&@#/%=~_|]\b\/?>?/g;
export function isTextFile(filePath, content) {
const filename = path.basename(filePath);
const extensions = filename.split(".").reverse();
const extension = extensions[0].toLowerCase();
if (config.additionalExtensions.includes(extension)) {
return true;
}
if (isText(filename)) {
return true;
}
return isText(filename, content);
}
export function anonymizeStream(filename: string, repository: Repository) {
const ts = new stream.Transform();
var chunks = [],
len = 0,
pos = 0;
ts._transform = function _transform(chunk, enc, cb) {
chunks.push(chunk);
len += chunk.length;
if (pos === 1) {
let data: any = Buffer.concat(chunks, len);
if (isTextFile(filename, data)) {
data = anonymizeContent(data.toString(), repository);
}
chunks = [];
len = 0;
this.push(data);
}
pos = 1 ^ pos;
cb(null);
};
ts._flush = function _flush(cb) {
if (chunks.length) {
let data: any = Buffer.concat(chunks, len);
if (isText(filename, data)) {
data = anonymizeContent(data.toString(), repository);
}
this.push(data);
}
cb(null);
};
return ts;
}
export function anonymizeContent(content: string, repository: Repository) {
if (repository.options?.image === false) {
// remove image in markdown
content = content.replace(
/!\[[^\]]*\]\((?<filename>.*?)(?=\"|\))(?<optionalpart>\".*\")?\)/g,
""
);
}
if (!repository.options?.link) {
// remove all links
content = content.replace(urlRegex, config.ANONYMIZATION_MASK);
}
if (repository.source instanceof GitHubBase) {
content = content.replace(
new RegExp(
`https://github.com/${
repository.source.githubRepository.fullName
}/blob/${repository.source.branch?.name || "HEAD"}\\b`,
"gi"
),
`https://${config.HOSTNAME}/r/${repository.repoId}`
);
content = content.replace(
new RegExp(
`https://github.com/${
repository.source.githubRepository.fullName
}/tree/${(repository.source as GitHubBase).branch?.name || "HEAD"}\\b`,
"gi"
),
`https://${config.HOSTNAME}/r/${repository.repoId}`
);
content = content.replace(
new RegExp(
`https://github.com/${repository.source.githubRepository.fullName}`,
"gi"
),
`https://${config.HOSTNAME}/r/${repository.repoId}`
);
}
for (let term of repository.options.terms || []) {
if (term.trim() == "") {
continue;
}
// remove whole url if it contains the term
content = content.replace(urlRegex, (match) => {
if (new RegExp(`\\b${term}\\b`, "gi").test(match))
return config.ANONYMIZATION_MASK;
return match;
});
// remove the term in the text
content = content.replace(
new RegExp(`\\b${term}\\b`, "gi"),
config.ANONYMIZATION_MASK
);
}
return content;
}
export function anonymizePath(path: string, terms: string[]) {
for (let term of terms) {
if (term.trim() == "") {
continue;
}
path = path.replace(new RegExp(term, "gi"), config.ANONYMIZATION_MASK);
}
return path;
}

View File

@@ -0,0 +1,15 @@
import * as mongoose from "mongoose";
const { model } = mongoose;
import {
IAnonymizedRepositoryDocument,
IAnonymizedRepositoryModel,
} from "./anonymizedRepositories.types";
import AnonymizedRepositorySchema from "./anonymizedRepositories.schema";
const AnonymizedRepositoryModel = model<IAnonymizedRepositoryDocument>(
"AnonymizedRepository",
AnonymizedRepositorySchema
) as IAnonymizedRepositoryModel;
export default AnonymizedRepositoryModel;

View File

@@ -0,0 +1,54 @@
import * as mongoose from "mongoose";
const { Schema } = mongoose;
const AnonymizedRepositorySchema = new Schema({
repoId: {
type: String,
index: { unique: true },
},
status: {
type: String,
default: "preparing",
},
errorMessage: String,
anonymizeDate: Date,
lastView: Date,
pageView: Number,
accessToken: String,
owner: String,
conference: String,
source: {
type: { type: String },
branch: String,
commit: String,
repositoryId: String,
repositoryName: String,
accessToken: String,
},
originalFiles: mongoose.Schema.Types.Mixed,
options: {
terms: [String],
expirationMode: { type: String },
expirationDate: Date,
update: Boolean,
image: Boolean,
pdf: Boolean,
notebook: Boolean,
link: Boolean,
page: Boolean,
pageSource: {
branch: String,
path: String,
},
},
dateOfEntry: {
type: Date,
default: new Date(),
},
size: {
type: Number,
default: 0,
},
});
export default AnonymizedRepositorySchema;

View File

@@ -0,0 +1,46 @@
import * as mongoose from "mongoose";
import { RepositoryStatus, Tree } from "../../types";
export interface IAnonymizedRepository {
repoId: string;
status?: RepositoryStatus;
errorMessage?: string;
anonymizeDate: Date;
source: {
type: "GitHubDownload" | "GitHubStream" | "Zip";
branch?: string;
commit?: string;
repositoryId?: string;
repositoryName?: string;
accessToken?: string;
};
owner: string;
originalFiles: Tree;
conference: string;
options: {
terms: string[];
expirationMode: "never" | "redirect" | "remove";
expirationDate?: Date;
update: boolean;
image: boolean;
pdf: boolean;
notebook: boolean;
link: boolean;
page: boolean;
pageSource?: {
branch: string;
path: string;
};
};
pageView: number;
lastView: Date;
size: number;
}
export interface IAnonymizedRepositoryDocument
extends IAnonymizedRepository,
mongoose.Document {
setLastUpdated: (this: IAnonymizedRepositoryDocument) => Promise<void>;
}
export interface IAnonymizedRepositoryModel
extends mongoose.Model<IAnonymizedRepositoryDocument> {}

28
src/database/database.ts Normal file
View File

@@ -0,0 +1,28 @@
import * as mongoose from "mongoose";
import Repository from "../Repository";
import config from "../../config";
import AnonymizedRepositoryModel from "./anonymizedRepositories/anonymizedRepositories.model";
const MONGO_URL = `mongodb://${config.DB_USERNAME}:${config.DB_PASSWORD}@${config.DB_HOSTNAME}:27017/`;
export const database = mongoose.connection;
export async function connect() {
mongoose.set("useNewUrlParser", true);
mongoose.set("useFindAndModify", true);
mongoose.set("useUnifiedTopology", true);
await mongoose.connect(MONGO_URL + "test", {
authSource: "admin",
useCreateIndex: true,
useFindAndModify: true,
});
return database;
}
export async function getRepository(repoId: string) {
const data = await AnonymizedRepositoryModel.findOne({ repoId });
if (!data) throw new Error("repo_not_found");
return new Repository(data);
}

View File

@@ -0,0 +1,12 @@
import * as mongoose from "mongoose";
const { model } = mongoose;
import { IRepositoryDocument, IRepositoryModel } from "./repositories.types";
import RepositorySchema from "./repositories.schema";
const RepositoryModel = model<IRepositoryDocument>(
"Repository",
RepositorySchema
) as IRepositoryModel;
export default RepositoryModel;

View File

@@ -0,0 +1,42 @@
import * as mongoose from "mongoose";
const { Schema } = mongoose;
const RepositorySchema = new Schema({
externalId: {
type: String,
index: { unique: true },
},
name: {
type: String,
index: true,
},
url: String,
source: {
type: String,
default: "github",
},
hasPage: { type: Boolean, default: false },
pageSource: {
branch: { type: String },
path: String,
},
branches: [
{
name: { type: String },
commit: String,
readme: String,
},
],
defaultBranch: String,
size: Number,
status: {
type: String,
default: "ready",
},
dateOfEntry: {
type: Date,
default: new Date(),
},
});
export default RepositorySchema;

View File

@@ -0,0 +1,25 @@
import * as mongoose from "mongoose";
export interface IRepository {
externalId: string;
name: string;
url?: string;
source: "github";
size?: number;
defaultBranch?: string;
hasPage: boolean;
pageSource?: {
branch: string;
path: string;
};
branches?: {
name: string;
commit: string;
readme?: string;
}[];
}
export interface IRepositoryDocument extends IRepository, mongoose.Document {
setLastUpdated: (this: IRepositoryDocument) => Promise<void>;
}
export interface IRepositoryModel extends mongoose.Model<IRepositoryDocument> {}

View File

@@ -0,0 +1,10 @@
import * as mongoose from "mongoose";
const { model } = mongoose;
import { IUserDocument, IUserModel } from "./users.types";
import UserSchema from "./users.schema";
const UserModel = model<IUserDocument>("user", UserSchema) as IUserModel;
export default UserModel

View File

@@ -0,0 +1,36 @@
import * as mongoose from "mongoose";
const { Schema } = mongoose;
const UserSchema = new Schema({
accessToken: String,
username: {
type: String,
index: { unique: true },
},
email: String,
photo: String,
repositories: [String],
default: {
terms: [String],
options: {
expirationMode: { type: String },
update: Boolean,
image: Boolean,
pdf: Boolean,
notebook: Boolean,
loc: Boolean,
link: Boolean,
page: { type: String },
},
},
status: {
type: String,
default: "active",
},
dateOfEntry: {
type: Date,
default: new Date(),
},
});
export default UserSchema;

View File

@@ -0,0 +1,32 @@
import * as mongoose from "mongoose";
export interface IUser {
accessToken: string;
username: string;
email: string;
photo?: string;
repositories?: number[];
default?: {
terms: string[];
options: {
expirationMode: "never" | "redirect" | "";
update: boolean;
image: boolean;
pdf: boolean;
notebook: boolean;
loc: boolean;
link: boolean;
page: string | null;
};
};
status?: "active" | "removed";
dateOfEntry?: Date;
lastUpdated?: Date;
}
export interface IUserDocument extends IUser, mongoose.Document {
setLastUpdated: (this: IUserDocument) => Promise<void>;
}
export interface IUserModel extends mongoose.Model<IUserDocument> {}

107
src/routes/connection.ts Normal file
View File

@@ -0,0 +1,107 @@
import * as redis from "redis";
import * as passport from "passport";
import * as session from "express-session";
import * as connectRedis from "connect-redis";
import * as OAuth2Strategy from "passport-oauth2";
import { Profile, Strategy } from "passport-github2";
import * as express from "express";
import config from "../../config";
import UserModel from "../database/users/users.model";
const RedisStore = connectRedis(session);
export function ensureAuthenticated(
req: express.Request,
res: express.Response,
next: express.NextFunction
) {
if (req.isAuthenticated()) {
return next();
}
res.status(401).json({ error: "not_connected" });
}
const verify = async (
accessToken: string,
refreshToken: string,
profile: Profile,
done: OAuth2Strategy.VerifyCallback
): Promise<void> => {
let user;
try {
user = await UserModel.findOne({ username: profile.username });
if (user) {
user.accessToken = accessToken;
user.email = profile.emails[0]?.value;
user.photo = profile.photos[0]?.value;
await user.save();
} else {
user = await new UserModel({
username: profile.username,
accessToken: accessToken,
email: profile.emails[0]?.value,
photo: profile.photos[0]?.value,
}).save();
}
} catch (error) {
console.error(error);
} finally {
done(null, {
username: profile.username,
accessToken,
refreshToken,
profile,
user,
});
}
};
passport.use(
new Strategy(
{
clientID: config.CLIENT_ID,
clientSecret: config.CLIENT_SECRET,
callbackURL: config.AUTH_CALLBACK,
},
verify
)
);
passport.serializeUser((user: Express.User, done) => {
done(null, user);
});
passport.deserializeUser((user: Express.User, done) => {
done(null, user);
});
export const appSession = session({
secret: "keyboard cat",
store: new RedisStore({
client: redis.createClient({
port: config.REDIS_PORT,
host: config.REDIS_HOSTNAME,
}),
}),
saveUninitialized: false,
resave: false,
});
export const router = express.Router();
router.get(
"/login",
passport.authenticate("github", { scope: ["repo"] }), // Note the scope here
function (req: express.Request, res: express.Response) {
res.redirect("/");
}
);
router.get(
"/auth",
passport.authenticate("github", { failureRedirect: "/" }),
function (req: express.Request, res: express.Response) {
res.redirect("/");
}
);

38
src/routes/file.ts Normal file
View File

@@ -0,0 +1,38 @@
import * as express from "express";
import AnonymizedFile from "../AnonymizedFile";
import { getRepo, handleError } from "./route-utils";
export const router = express.Router();
router.get(
"/:repoId/file/:path*",
async (req: express.Request, res: express.Response) => {
let anonymizedPath = req.params.path;
if (req.params[0]) {
anonymizedPath += req.params[0];
}
anonymizedPath = anonymizedPath;
const repo = await getRepo(req, res);
if (!repo) return;
await repo.countView();
try {
const f = new AnonymizedFile(repo, {
anonymizedPath,
});
if (!(await f.isFileSupported())) {
return res.status(500).send({ error: "file_not_supported" });
}
res.attachment(
anonymizedPath.substring(anonymizedPath.lastIndexOf("/") + 1)
);
await f.send(res);
} catch (error) {
return handleError(error, res);
}
}
);
export default router;

13
src/routes/index.ts Normal file
View File

@@ -0,0 +1,13 @@
import repositoryPrivate from "./repository-private";
import repositoryPublic from "./repository-public";
import file from "./file";
import webview from "./webview";
import user from "./user";
export default {
repositoryPrivate,
repositoryPublic,
file,
webview,
user,
};

View File

@@ -0,0 +1,270 @@
import * as express from "express";
import { ensureAuthenticated } from "./connection";
import * as db from "../database/database";
import { getRepo, getUser, handleError } from "./route-utils";
import RepositoryModel from "../database/repositories/repositories.model";
import {
GitHubRepository,
getRepositoryFromGitHub,
} from "../source/GitHubRepository";
import gh = require("parse-github-url");
import GitHubBase from "../source/GitHubBase";
import AnonymizedRepositoryModel from "../database/anonymizedRepositories/anonymizedRepositories.model";
import config from "../../config";
import { IAnonymizedRepositoryDocument } from "../database/anonymizedRepositories/anonymizedRepositories.types";
import Repository from "../Repository";
const router = express.Router();
// user needs to be connected for all user API
router.use(ensureAuthenticated);
// claim a repository
router.post("/claim", async (req: express.Request, res: express.Response) => {
const user = await getUser(req);
try {
if (!req.body.repoId) {
return res.status(500).json({ error: "repoId_not_defined" });
}
if (!req.body.repoUrl) {
return res.status(500).json({ error: "repoUrl_not_defined" });
}
const repoConfig = await db.getRepository(req.body.repoId);
if (repoConfig == null) {
return res.status(500).json({ error: "repo_not_found" });
}
const r = gh(req.body.repoUrl);
const repo = await getRepositoryFromGitHub({
owner: r.owner,
repo: r.name,
accessToken: user.accessToken,
});
if ((repoConfig.source as GitHubBase).githubRepository.id != repo.id) {
return res.status(500).json({ error: "repo_not_found" });
}
console.log(`${user.username} claims ${r.repository}.`);
repoConfig.owner = user;
await AnonymizedRepositoryModel.updateOne(
{ repoId: repoConfig.repoId },
{ $set: { owner: user.username } }
);
return res.send("Ok");
} catch (error) {
console.error(req.path, error);
return res.status(500).json({ error });
}
});
// refresh a repository
router.post(
"/:repoId/refresh",
async (req: express.Request, res: express.Response) => {
const repo = await getRepo(req, res);
if (!repo) return;
const user = await getUser(req);
if (repo.owner.username != user.username) {
return res.status(401).json({ error: "not_authorized" });
}
await repo.anonymize();
res.end("ok");
}
);
// delete a repository
router.delete(
"/:repoId/",
async (req: express.Request, res: express.Response) => {
const repo = await getRepo(req, res, { nocheck: false });
if (!repo) return;
const user = await getUser(req);
if (repo.owner.username != user.username) {
return res.status(401).json({ error: "not_authorized" });
}
await repo.remove();
console.log(`${req.params.repoId} is removed`);
return res.json("ok");
}
);
router.get(
"/:owner/:repo/",
async (req: express.Request, res: express.Response) => {
const user = await getUser(req);
try {
const repo = await getRepositoryFromGitHub({
owner: req.params.owner,
repo: req.params.repo,
accessToken: user.accessToken,
});
res.json(repo.toJSON());
} catch (error) {
handleError(error, res);
}
}
);
router.get(
"/:owner/:repo/branches",
async (req: express.Request, res: express.Response) => {
const user = await getUser(req);
try {
const repository = await getRepositoryFromGitHub({
accessToken: user.accessToken,
owner: req.params.owner,
repo: req.params.repo,
});
return res.json(
await repository.branches({
accessToken: user.accessToken,
force: req.query.force == "1",
})
);
} catch (error) {
handleError(error, res);
}
}
);
router.get(
"/:owner/:repo/readme",
async (req: express.Request, res: express.Response) => {
const user = await getUser(req);
const repo = await RepositoryModel.findOne({
name: `${req.params.owner}/${req.params.repo}`,
});
if (!repo) return res.status(404).send({ error: "repo_not_found" });
const repository = new GitHubRepository(repo);
return res.send(
await repository.readme({
accessToken: user.accessToken,
force: req.query.force == "1",
branch: req.query.branch as string,
})
);
}
);
function validateNewRepo(repoUpdate) {
const validCharacters = /^[0-9a-zA-Z\-\_]+$/;
if (
!repoUpdate.repoId.match(validCharacters) ||
repoUpdate.repoId.length < 3
) {
throw new Error("invalid_repoId");
}
if (!repoUpdate.branch) {
throw new Error("branch_not_specified");
}
if (!repoUpdate.options) {
throw new Error("options_not_provided");
}
if (!Array.isArray(repoUpdate.terms)) {
throw new Error("invalid_terms_format");
}
if (!/^[a-f0-9]+$/.test(repoUpdate.commit)) {
throw new Error("invalid_commit_format");
}
}
function updateRepoModel(model: IAnonymizedRepositoryDocument, repoUpdate) {
model.source.commit = repoUpdate.commit;
model.source.branch = repoUpdate.branch;
model.conference = repoUpdate.conference;
model.options = {
terms: repoUpdate.terms,
expirationMode: repoUpdate.options.expirationMode,
expirationDate: repoUpdate.options.expirationDate
? new Date(repoUpdate.options.expirationDate)
: null,
update: repoUpdate.options.update,
image: repoUpdate.options.image,
pdf: repoUpdate.options.pdf,
notebook: repoUpdate.options.notebook,
link: repoUpdate.options.link,
page: repoUpdate.options.page,
pageSource: repoUpdate.options.pageSource,
};
}
// update a repository
router.post(
"/:repoId/",
async (req: express.Request, res: express.Response) => {
const repo = await getRepo(req, res, { nocheck: true });
if (!repo) return;
const user = await getUser(req);
if (repo.owner.username != user.username) {
return res.status(401).json({ error: "not_authorized" });
}
const repoUpdate = req.body;
try {
validateNewRepo(repoUpdate);
} catch (error) {
return handleError(error, res);
}
if (repoUpdate.commit != repo.model.source.commit) {
repo.model.anonymizeDate = new Date();
repo.model.source.commit = repoUpdate.commit;
}
updateRepoModel(repo.model, repoUpdate);
await repo.updateStatus("preparing");
await repo.model.save();
res.send("ok");
repo.anonymize();
}
);
// add repository
router.post("/", async (req: express.Request, res: express.Response) => {
const user = await getUser(req);
const repoUpdate = req.body;
try {
validateNewRepo(repoUpdate);
} catch (error) {
return handleError(error, res);
}
const r = gh(repoUpdate.fullName);
const repository = await getRepositoryFromGitHub({
accessToken: user.accessToken,
owner: r.owner,
repo: r.name,
});
const repo = new AnonymizedRepositoryModel();
repo.repoId = repoUpdate.repoId;
repo.anonymizeDate = new Date();
repo.owner = user.username;
repo.source = {
type:
repoUpdate.options.mode == "download" ? "GitHubDownload" : "GitHubStream",
accessToken: user.accessToken,
repositoryId: repository.model.id,
repositoryName: repoUpdate.fullName,
};
if (repo.source.type == "GitHubDownload") {
// details.size is in kilobytes
if (repository.size > config.MAX_REPO_SIZE) {
return res.status(500).send({ error: "invalid_mode" });
}
}
updateRepoModel(repo, repoUpdate);
await repo.save();
res.send("ok");
new Repository(repo).anonymize();
});
export default router;

View File

@@ -0,0 +1,43 @@
import * as express from "express";
import * as db from "../database/database";
import { getRepo, getUser, handleError } from "./route-utils";
const router = express.Router();
router.get("/:repoId/", async (req: express.Request, res: express.Response) => {
const repo = await getRepo(req, res, { nocheck: true });
if (!repo) return;
res.json((await db.getRepository(req.params.repoId)).toJSON());
});
router.get(
"/:repoId/zip",
async (req: express.Request, res: express.Response) => {
const repo = await getRepo(req, res);
if (!repo) return;
res.attachment(`${repo.repoId}.zip`);
repo.zip().pipe(res);
}
);
router.get(
"/:repoId/files",
async (req: express.Request, res: express.Response) => {
const repo = await getRepo(req, res);
if (!repo) return;
res.json(await repo.anonymizedFiles({ force: true }));
}
);
router.get(
"/:repoId/options",
async (req: express.Request, res: express.Response) => {
const repo = await getRepo(req, res);
if (!repo) return;
await repo.updateIfNeeded();
res.json(repo.options);
}
);
export default router;

63
src/routes/route-utils.ts Normal file
View File

@@ -0,0 +1,63 @@
import * as express from "express";
import * as db from "../database/database";
import UserModel from "../database/users/users.model";
import User from "../User";
export async function getRepo(
req: express.Request,
res: express.Response,
opt?: { nocheck?: boolean }
) {
try {
const repo = await db.getRepository(req.params.repoId);
if (opt?.nocheck == true) {
} else {
// redirect if the repository is expired
if (
repo.status == "expired" &&
repo.options.expirationMode == "redirect" &&
repo.source.url
) {
res.redirect(repo.source.url);
return null;
}
repo.check();
}
return repo;
} catch (error) {
handleError(error, res);
return null;
}
}
export function handleError(error: any, res: express.Response) {
console.log(error);
let message = error;
if (error instanceof Error) {
message = error.message;
}
let status = 500;
if (message && message.indexOf("not_found") > -1) {
status = 400;
} else if (message && message.indexOf("not_connected") > -1) {
status = 401;
}
res.status(status).send({ error: message });
return;
}
export async function getUser(req: express.Request) {
const user = (req.user as any).user;
if (!user) {
req.logout();
throw new Error("not_connected");
}
const model = await UserModel.findById(user._id);
if (!model) {
req.logout();
throw new Error("not_connected");
}
return new User(model);
}

96
src/routes/user.ts Normal file
View File

@@ -0,0 +1,96 @@
import * as express from "express";
import config from "../../config";
import { ensureAuthenticated } from "./connection";
import { handleError, getUser } from "./route-utils";
const router = express.Router();
// user needs to be connected for all user API
router.use(ensureAuthenticated);
router.get("/logout", async (req: express.Request, res: express.Response) => {
try {
req.logout();
res.redirect("/");
} catch (error) {
handleError(error, res);
}
});
router.get("/", async (req: express.Request, res: express.Response) => {
try {
const user = await getUser(req);
res.json({ username: user.username, photo: user.photo });
} catch (error) {
handleError(error, res);
}
});
router.get("/quota", async (req: express.Request, res: express.Response) => {
try {
const user = await getUser(req);
const sizes = await Promise.all(
(await user.getRepositories())
.filter((r) => r.status == "ready")
.map((r) => r.computeSize())
);
res.json({
used: sizes.reduce((sum, i) => sum + i, 0),
total: config.DEFAULT_QUOTA,
});
} catch (error) {
handleError(error, res);
}
});
router.get("/default", async (req: express.Request, res: express.Response) => {
const user = await getUser(req);
try {
res.json(user.default);
} catch (error) {
handleError(error, res);
}
});
router.post("/default", async (req: express.Request, res: express.Response) => {
const user = await getUser(req);
try {
const d = req.body;
user.default = d;
res.send("ok");
} catch (error) {
handleError(error, res);
}
});
router.get(
"/anonymized_repositories",
async (req: express.Request, res: express.Response) => {
const user = await getUser(req);
res.json(
(await user.getRepositories()).map((x) => {
return x.toJSON();
})
);
}
);
router.get(
"/all_repositories",
async (req: express.Request, res: express.Response) => {
const user = await getUser(req);
const repos = await user.getGitHubRepositories({
force: req.query.force == "1",
});
res.json(
repos.map((x) => {
return {
fullName: x.fullName,
id: x.id,
};
})
);
}
);
export default router;

54
src/routes/webview.ts Normal file
View File

@@ -0,0 +1,54 @@
import * as express from "express";
import { getRepo, handleError } from "./route-utils";
import * as path from "path";
import AnonymizedFile from "../AnonymizedFile";
import GitHubDownload from "../source/GitHubDownload";
const router = express.Router();
async function webView(req: express.Request, res: express.Response) {
const repo = await getRepo(req, res);
if (!repo) return;
try {
if (!repo.options.page) {
throw "page_not_activated";
}
if (!repo.options.pageSource) {
throw "page_not_activated";
}
if (
repo.options.pageSource?.branch !=
(repo.source as GitHubDownload).branch.name
) {
throw "page_not_supported_on_different_branch";
}
let requestPath = path.join(
repo.options.pageSource?.path,
req.path.substring(
req.path.indexOf(req.params.repoId) + req.params.repoId.length
)
);
if (requestPath[requestPath.length - 1] == "/") {
requestPath = path.join(requestPath, "index.html");
}
requestPath = requestPath;
const f = new AnonymizedFile(repo, {
anonymizedPath: requestPath,
});
if (!(await f.isFileSupported())) {
return res.status(500).send({ error: "file_not_supported" });
}
f.send(res);
} catch (error) {
handleError(error, res);
}
}
router.get("/:repoId/*", webView);
router.get("/:repoId", (req: express.Request, res: express.Response) => {
res.redirect("/w" + req.url + "/");
});
export default router;

95
src/server.ts Normal file
View File

@@ -0,0 +1,95 @@
import * as path from "path";
import * as ofs from "fs";
import * as redis from "redis";
import * as rateLimit from "express-rate-limit";
import * as RedisStore from "rate-limit-redis";
import * as express from "express";
import * as compression from "compression";
import * as db from "./database/database";
import config from "../config";
import * as passport from "passport";
import * as connection from "./routes/connection";
import router from "./routes";
import AnonymizedRepositoryModel from "./database/anonymizedRepositories/anonymizedRepositories.model";
function indexResponse(req: express.Request, res: express.Response) {
if (
req.params.repoId &&
req.headers["accept"] &&
req.headers["accept"].indexOf("text/html") == -1
) {
const repoId = req.path.split("/")[2];
// if it is not an html request, it assumes that the browser try to load a different type of resource
return res.redirect(
`/api/repo/${repoId}/file/${req.path.substring(
req.path.indexOf(repoId) + repoId.length + 1
)}`
);
}
res.sendFile(path.resolve(__dirname, "..", "public", "index.html"));
}
export default async function start() {
const app = express();
app.use(express.json());
app.use(compression());
app.set("trust proxy", 1);
// handle session and connection
app.use(connection.appSession);
app.use(passport.initialize());
app.use(passport.session());
const rate = rateLimit({
store: new RedisStore({
client: redis.createClient({
host: config.REDIS_HOSTNAME,
port: config.REDIS_PORT,
}),
}),
windowMs: 15 * 60 * 1000, // 15 minutes
max: 200, // limit each IP
// delayMs: 0, // disable delaying - full speed until the max limit is reached
});
app.use("/github", rate, connection.router);
// app routes
app.use("/api/user", rate, router.user);
app.use("/api/repo", rate, router.repositoryPublic);
app.use("/api/repo", rate, router.file);
app.use("/api/repo", rate, router.repositoryPrivate);
app.use("/w/", rate, router.webview);
app.get("/api/message", async (_, res) => {
if (ofs.existsSync("./message.txt")) {
return res.sendFile(path.resolve(__dirname, "..", "message.txt"));
}
res.sendStatus(404);
});
app.get("/api/stat", async (_, res) => {
const nbRepositories =
await AnonymizedRepositoryModel.estimatedDocumentCount();
const nbUsers = (await AnonymizedRepositoryModel.distinct("owner")).length;
res.json({ nbRepositories, nbUsers });
});
app
.get("/", indexResponse)
.get("/404", indexResponse)
.get("/anonymize", indexResponse)
.get("/r/:repoId/?*", indexResponse)
.get("/repository/:repoId/?*", indexResponse);
app.use(express.static(path.join(__dirname, "..", "public")));
app.get("*", indexResponse);
await db.connect();
app.listen(config.PORT);
console.log("Database connected and Server started on port: " + config.PORT);
}

83
src/source/GitHubBase.ts Normal file
View File

@@ -0,0 +1,83 @@
import AnonymizedFile from "../AnonymizedFile";
import { Branch, Tree } from "../types";
import { GitHubRepository } from "./GitHubRepository";
import config from "../../config";
import { OAuthApp } from "@octokit/oauth-app";
import Repository from "../Repository";
import * as stream from "stream";
import UserModel from "../database/users/users.model";
export default abstract class GitHubBase {
type: "GitHubDownload" | "GitHubStream" | "Zip";
githubRepository: GitHubRepository;
branch: Branch;
accessToken: string;
repository: Repository;
constructor(
data: {
type: "GitHubDownload" | "GitHubStream" | "Zip";
branch?: string;
commit?: string;
repositoryId?: string;
repositoryName?: string;
accessToken?: string;
},
repository: Repository
) {
this.type = data.type;
this.accessToken = data.accessToken;
this.githubRepository = new GitHubRepository({
name: data.repositoryName,
externalId: data.repositoryId,
branches: [{ commit: data.commit, name: data.branch }],
});
this.repository = repository;
this.branch = { commit: data.commit, name: data.branch };
}
async getFileContent(file: AnonymizedFile): Promise<stream.Readable> {
throw new Error("Method not implemented.");
}
getFiles(): Promise<Tree> {
throw new Error("Method not implemented.");
}
async getToken(owner?: string) {
if (owner) {
const user = await UserModel.findOne({ username: owner });
if (user && user.accessToken) {
return user.accessToken as string;
}
}
if (this.accessToken) {
try {
const app = new OAuthApp({
clientType: "github-app",
clientId: config.CLIENT_ID,
clientSecret: config.CLIENT_SECRET,
});
await app.checkToken({
token: this.accessToken,
});
return this.accessToken;
} catch (error) {
// console.debug("Token is invalid.", error);
this.accessToken = config.GITHUB_TOKEN;
}
}
return config.GITHUB_TOKEN;
}
get url() {
return "https://github.com/" + this.githubRepository.fullName;
}
toJSON(): any {
return {
type: this.type,
fullName: this.githubRepository.fullName?.toString(),
branch: this.branch,
};
}
}

View File

@@ -0,0 +1,75 @@
import { Octokit } from "@octokit/rest";
import * as path from "path";
import config from "../../config";
import storage from "../storage";
import Repository from "../Repository";
import GitHubBase from "./GitHubBase";
import AnonymizedFile from "../AnonymizedFile";
import { SourceBase } from "../types";
import * as got from "got";
import * as stream from "stream";
import { OctokitResponse } from "@octokit/types";
export default class GitHubDownload extends GitHubBase implements SourceBase {
constructor(
data: {
type: "GitHubDownload" | "GitHubStream" | "Zip";
branch?: string;
commit?: string;
repositoryId?: string;
repositoryName?: string;
accessToken?: string;
},
repository: Repository
) {
super(data, repository);
}
private async _getZipUrl(
auth?: string
): Promise<OctokitResponse<unknown, 302>> {
const octokit = new Octokit({ auth });
return octokit.rest.repos.downloadTarballArchive({
owner: this.githubRepository.owner,
repo: this.githubRepository.repo,
ref: this.branch?.commit || "HEAD",
method: "HEAD",
});
}
async download() {
let response: OctokitResponse<unknown, number>;
try {
response = await this._getZipUrl(await this.getToken());
} catch (error) {
if (error.status == 401 && config.GITHUB_TOKEN) {
try {
response = await this._getZipUrl(config.GITHUB_TOKEN);
} catch (error) {
throw new Error("repo_not_accessible");
}
} else {
throw new Error("repo_not_accessible");
}
}
const originalPath = this.repository.originalCachePath;
await storage.mk(originalPath);
await storage.extractTar(originalPath, got.stream(response.url));
}
async getFileContent(file: AnonymizedFile): Promise<stream.Readable> {
await this.download();
// update the file list
await this.repository.files({ force: true });
return storage.read(file.originalCachePath);
}
async getFiles() {
const folder = this.repository.originalCachePath;
if (!(await storage.exists(folder))) {
await this.download();
}
return storage.listFiles(folder);
}
}

View File

@@ -0,0 +1,171 @@
import { Branch } from "../types";
import * as gh from "parse-github-url";
import { IRepositoryDocument } from "../database/repositories/repositories.types";
import { Octokit } from "@octokit/rest";
import RepositoryModel from "../database/repositories/repositories.model";
export class GitHubRepository {
private _data: Partial<
{ [P in keyof IRepositoryDocument]: IRepositoryDocument[P] }
>;
constructor(
data: Partial<{ [P in keyof IRepositoryDocument]: IRepositoryDocument[P] }>
) {
this._data = data;
}
toJSON() {
return {
repo: this.repo,
owner: this.owner,
hasPage: this._data.hasPage,
pageSource: this._data.pageSource,
fullName: this.fullName,
defaultBranch: this._data.defaultBranch,
size: this.size,
};
}
get model() {
return this._data;
}
public get fullName(): string {
return this._data.name;
}
public get id(): string {
return this._data.externalId;
}
public get size(): number {
return this._data.size;
}
async branches(opt: {
accessToken?: string;
force?: boolean;
}): Promise<Branch[]> {
if (
!this._data.branches ||
this._data.branches.length == 0 ||
opt?.force === true
) {
// get the list of repo from github
const octokit = new Octokit({ auth: opt.accessToken });
const branches = (
await octokit.paginate(octokit.repos.listBranches, {
owner: this.owner,
repo: this.repo,
per_page: 100,
})
).map((b) => {
return {
name: b.name,
commit: b.commit.sha,
readme: this._data.branches?.filter(
(f: Branch) => f.name == b.name
)[0]?.readme,
} as Branch;
});
this._data.branches = branches;
await RepositoryModel.updateOne(
{ externalId: this.id },
{ $set: { branches } }
);
} else {
this._data.branches = (
await RepositoryModel.findOne({ externalId: this.id }).select(
"branches"
)
).branches;
}
return this._data.branches;
}
async readme(opt: {
branch?: string;
force?: boolean;
accessToken?: string;
}): Promise<string> {
if (!opt.branch) opt.branch = this._data.defaultBranch || "master";
const model = await RepositoryModel.findOne({
externalId: this.id,
}).select("branches");
this._data.branches = await this.branches(opt);
model.branches = this._data.branches;
const selected = model.branches.filter((f) => f.name == opt.branch)[0];
if (!selected?.readme || opt?.force === true) {
// get the list of repo from github
const octokit = new Octokit({ auth: opt.accessToken });
const ghRes = await octokit.repos.getReadme({
owner: this.owner,
repo: this.repo,
ref: selected?.commit,
});
const readme = Buffer.from(
ghRes.data.content,
ghRes.data.encoding as BufferEncoding
).toString("utf-8");
selected.readme = readme;
await model.save();
}
return selected.readme;
}
public get owner(): string {
const repo = gh(this.fullName);
if (!repo) {
throw "invalid_repo";
}
return repo.owner || this.fullName;
}
public get repo(): string {
const repo = gh(this.fullName);
if (!repo) {
throw "invalid_repo";
}
return repo.name || this.fullName;
}
}
export async function getRepositoryFromGitHub(opt: {
owner: string;
repo: string;
accessToken: string;
}) {
const octokit = new Octokit({ auth: opt.accessToken });
const r = (
await octokit.repos.get({
owner: opt.owner,
repo: opt.repo,
})
).data;
if (!r) throw new Error("repo_not_found");
let model = await RepositoryModel.findOne({ externalId: "gh_" + r.id });
if (!model) {
model = new RepositoryModel({ externalId: "gh_" + r.id });
}
model.name = r.full_name;
model.url = r.html_url;
model.size = r.size;
model.defaultBranch = r.default_branch;
model.hasPage = r.has_pages;
if (model.hasPage) {
const ghPageRes = await octokit.repos.getPages({
owner: opt.owner,
repo: opt.repo,
});
model.pageSource = ghPageRes.data.source;
}
await model.save();
return new GitHubRepository(model);
}

171
src/source/GitHubStream.ts Normal file
View File

@@ -0,0 +1,171 @@
import { Octokit } from "@octokit/rest";
import AnonymizedFile from "../AnonymizedFile";
import Repository from "../Repository";
import GitHubBase from "./GitHubBase";
import storage from "../storage";
import { SourceBase, Tree } from "../types";
import * as path from "path";
import * as stream from "stream";
export default class GitHubStream extends GitHubBase implements SourceBase {
constructor(
data: {
type: "GitHubDownload" | "GitHubStream" | "Zip";
branch?: string;
commit?: string;
repositoryId?: string;
repositoryName?: string;
accessToken?: string;
},
repository: Repository
) {
super(data, repository);
}
async getFileContent(file: AnonymizedFile): Promise<stream.Readable> {
if (!file.sha) throw new Error("file_sha_not_provided");
const octokit = new Octokit({
auth: await this.getToken(),
});
try {
const ghRes = await octokit.rest.git.getBlob({
owner: this.githubRepository.owner,
repo: this.githubRepository.repo,
file_sha: file.sha,
});
if (!ghRes.data.content && ghRes.data.size != 0) {
throw new Error("file_not_accessible");
}
// empty file
let content: Buffer;
if (ghRes.data.content) {
content = Buffer.from(
ghRes.data.content,
ghRes.data.encoding as BufferEncoding
);
} else {
content = Buffer.from("");
}
await storage.write(file.originalCachePath, content);
return stream.Readable.from(content.toString());
} catch (error) {
if (error.status == 403) {
throw new Error("file_too_big");
}
console.error(error);
}
throw new Error("file_not_accessible");
}
async getFiles() {
return this.getTree(this.branch.commit);
}
private async getTree(
sha: string,
truncatedTree: Tree = {},
parentPath: string = ""
) {
const octokit = new Octokit({
auth: await this.getToken(),
});
const ghRes = await octokit.git.getTree({
owner: this.githubRepository.owner,
repo: this.githubRepository.repo,
tree_sha: sha,
recursive: "1",
});
const tree = this.tree2Tree(ghRes.data.tree, truncatedTree, parentPath);
if (ghRes.data.truncated) {
await this.getTruncatedTree(sha, tree, parentPath);
}
return tree;
}
private async getTruncatedTree(
sha: string,
truncatedTree: Tree = {},
parentPath: string = ""
) {
const octokit = new Octokit({
auth: await this.getToken(),
});
const ghRes = await octokit.git.getTree({
owner: this.githubRepository.owner,
repo: this.githubRepository.repo,
tree_sha: sha,
});
const tree = ghRes.data.tree;
for (let elem of tree) {
if (!elem.path) continue;
if (elem.type == "tree") {
const elementPath = path.join(parentPath, elem.path);
const paths = elementPath.split("/");
let current = truncatedTree;
for (let i = 0; i < paths.length; i++) {
let p = paths[i];
if (!current[p]) {
if (elem.sha)
await this.getTree(elem.sha, truncatedTree, elementPath);
break;
}
current = current[p] as Tree;
}
}
}
this.tree2Tree(ghRes.data.tree, truncatedTree, parentPath);
return truncatedTree;
}
private tree2Tree(
tree: {
path?: string;
mode?: string;
type?: string;
sha?: string;
size?: number;
url?: string;
}[],
partialTree: Tree = {},
parentPath: string = ""
) {
for (let elem of tree) {
let current = partialTree;
if (!elem.path) continue;
const paths = path.join(parentPath, elem.path).split("/");
// if elem is a folder iterate on all folders if it is a file stop before the filename
const end = elem.type == "tree" ? paths.length : paths.length - 1;
for (let i = 0; i < end; i++) {
let p = paths[i];
if (p[0] == "$") {
p = "\\" + p;
}
if (!current[p]) {
current[p] = {};
}
current = current[p] as Tree;
}
// if elem is a file add the file size in the file list
if (elem.type == "blob") {
let p = paths[end];
if (p[0] == "$") {
p = "\\" + p;
}
current[p] = {
size: elem.size || 0, // size in bit
sha: elem.sha || "",
};
}
}
return partialTree;
}
}

31
src/source/Zip.ts Normal file
View File

@@ -0,0 +1,31 @@
import * as path from "path";
import AnonymizedFile from "../AnonymizedFile";
import Repository from "../Repository";
import storage from "../storage";
import { SourceBase } from "../types";
import * as stream from "stream";
export default class Zip implements SourceBase {
type = "Zip";
repository: Repository;
url?: string;
constructor(data: any, repository: Repository) {
this.repository = repository;
this.url = data.url;
}
async getFiles() {
return storage.listFiles(this.repository.originalCachePath);
}
async getFileContent(file: AnonymizedFile): Promise<stream.Readable> {
return storage.read(file.originalCachePath);
}
toJSON(): any {
return {
type: this.type,
};
}
}

7
src/storage.ts Normal file
View File

@@ -0,0 +1,7 @@
import FileSystem from "./storage/FileSystem";
import S3Storage from "./storage/S3";
import { StorageBase } from "./types";
const storage = new FileSystem();
export default storage as StorageBase;

136
src/storage/FileSystem.ts Normal file
View File

@@ -0,0 +1,136 @@
import { StorageBase, Tree } from "../types";
import * as fs from "fs";
import * as tar from "tar-fs";
import * as path from "path";
import * as express from "express";
import config from "../../config";
import * as stream from "stream";
import * as gunzip from "gunzip-maybe";
import * as archiver from "archiver";
export default class FileSystem implements StorageBase {
type = "FileSystem";
constructor() {}
/** @override */
async exists(p: string): Promise<boolean> {
return fs.existsSync(path.join(config.FOLDER, p));
}
/** @override */
send(p: string, res: express.Response) {
res.sendFile(path.join(config.FOLDER, p), { dotfiles: "allow" });
}
/** @override */
read(p: string): stream.Readable {
return fs.createReadStream(path.join(config.FOLDER, p));
}
/** @override */
async write(p: string, data: Buffer): Promise<void> {
if (!(await this.exists(path.dirname(p)))) {
await fs.promises.mkdir(path.dirname(path.join(config.FOLDER, p)), {
recursive: true,
});
}
return fs.promises.writeFile(path.join(config.FOLDER, p), data);
}
/** @override */
async rm(path: string): Promise<void> {
await fs.promises.rm(path, { force: true, recursive: true });
}
/** @override */
async mk(dir: string): Promise<void> {
if (!(await this.exists(dir)))
fs.promises.mkdir(path.join(config.FOLDER, dir), { recursive: true });
}
/** @override */
async listFiles(
dir: string,
opt: {
root?: string;
onEntry?: (file: { path: string; size: number }) => void;
} = {}
): Promise<Tree> {
if (opt.root == null) {
opt.root = config.FOLDER;
}
let files = await fs.promises.readdir(path.join(opt.root, dir));
const output: Tree = {};
for (let file of files) {
let filePath = path.join(dir, file);
try {
const stats = await fs.promises.stat(path.join(opt.root, filePath));
if (file[0] == "$") {
file = "\\" + file;
}
if (stats.isDirectory()) {
output[file] = await this.listFiles(filePath, opt);
} else if (stats.isFile()) {
if (opt.onEntry) {
opt.onEntry({
path: filePath,
size: stats.size,
});
}
output[file] = { size: stats.size, sha: stats.ino.toString() };
}
} catch (error) {
console.error(error);
}
}
return output;
}
/** @override */
async extractTar(p: string, data: stream.Readable): Promise<void> {
return new Promise((resolve, reject) => {
data
.pipe(gunzip())
.pipe(
tar.extract(path.join(config.FOLDER, p), {
map: (header) => {
header.name = header.name.substr(header.name.indexOf("/") + 1);
return header;
},
})
)
.on("finish", resolve)
.on("error", reject);
});
}
/** @override */
archive(
dir: string,
opt?: {
format?: "zip" | "tar";
fileTransformer?;
}
) {
const archive = archiver(opt?.format, {});
this.listFiles(dir, {
onEntry: (file) => {
let rs = this.read(file.path);
if (opt?.fileTransformer) {
// apply transformation on the stream
rs = rs.pipe(opt.fileTransformer(file.path));
}
const f = file.path.replace(dir, "");
archive.append(rs, {
name: path.basename(f),
prefix: path.dirname(f),
});
},
}).then(() => {
archive.finalize();
});
return archive;
}
}

225
src/storage/S3.ts Normal file
View File

@@ -0,0 +1,225 @@
import { StorageBase, Tree, TreeFile } from "../types";
import { S3 } from "aws-sdk";
import config from "../../config";
import * as stream from "stream";
import { ArchiveStreamToS3 } from "archive-stream-to-s3";
import * as express from "express";
import * as mime from "mime-types";
import * as flow from "xml-flow";
import * as archiver from "archiver";
import * as path from "path";
import * as gunzip from "gunzip-maybe";
const originalArchiveStreamToS3Entry: Function = (ArchiveStreamToS3 as any)
.prototype.onEntry;
export default class S3Storage implements StorageBase {
type = "AWS";
client: S3;
constructor() {
if (!config.S3_BUCKET) throw new Error("s3_config_not_provided");
this.client = new S3({
region: config.S3_REGION,
endpoint: config.S3_ENDPOINT,
accessKeyId: config.S3_CLIENT_ID,
secretAccessKey: config.S3_CLIENT_SECRET,
});
}
/** @override */
async exists(path: string): Promise<boolean> {
try {
await this.client
.headObject({
Bucket: config.S3_BUCKET,
Key: path,
})
.promise();
return true;
} catch (err) {
return false;
}
}
/** @override */
async mk(dir: string): Promise<void> {
if (dir && dir[dir.length - 1] != "/") dir = dir + "/";
await this.client
.putObject({
Bucket: config.S3_BUCKET,
Key: dir,
})
.promise();
}
/** @override */
async rm(dir: string): Promise<void> {
const data = await this.client
.listObjectsV2({
Bucket: config.S3_BUCKET,
Prefix: dir,
})
.promise();
const params = { Bucket: config.S3_BUCKET, Delete: { Objects: [] } };
data.Contents.forEach(function (content) {
params.Delete.Objects.push({ Key: content.Key });
});
if (params.Delete.Objects.length == 0) {
// nothing to remove
return;
}
await this.client.deleteObjects(params).promise();
if (data.IsTruncated) {
await this.rm(dir);
}
}
/** @override */
send(p: string, res: express.Response) {
const s = this.client
.getObject({
Bucket: config.S3_BUCKET,
Key: p,
})
.on("httpHeaders", (statusCode, headers, response) => {
res.status(statusCode);
if (statusCode < 300) {
res.set("Content-Length", headers["content-length"]);
res.set("Content-Type", headers["content-type"]);
}
(
response.httpResponse.createUnbufferedStream() as stream.Readable
).pipe(res);
});
s.send();
}
/** @override */
read(path: string): stream.Readable {
return this.client
.getObject({
Bucket: config.S3_BUCKET,
Key: path,
})
.createReadStream();
}
/** @override */
async write(path: string, data: Buffer): Promise<void> {
await this.client
.putObject({
Bucket: config.S3_BUCKET,
Key: path,
Body: data,
ContentType: mime.lookup(path).toString(),
})
.promise();
return;
}
/** @override */
async listFiles(dir: string): Promise<Tree> {
if (dir && dir[dir.length - 1] != "/") dir = dir + "/";
const out: Tree = {};
const req = await this.client
.listObjectsV2({
Bucket: config.S3_BUCKET,
Prefix: dir,
})
.promise();
if (!req.Contents) return out;
for (const f of req.Contents) {
if (!f.Key) continue;
f.Key = f.Key.replace(dir, "");
const paths = f.Key.split("/");
let current: Tree = out;
for (let i = 0; i < paths.length - 1; i++) {
let p = paths[i];
if (!p) continue;
if (!(current[p] as Tree)) {
current[p] = {} as Tree;
}
current = current[p] as Tree;
}
const fileInfo: TreeFile = { size: f.Size || 0, sha: f.ETag };
const fileName = paths[paths.length - 1];
if (fileName) current[fileName] = fileInfo;
}
return out;
}
/** @override */
async extractTar(p: string, data: stream.Readable): Promise<void> {
return new Promise<void>((resolve, reject) => {
const toS3 = new ArchiveStreamToS3(config.S3_BUCKET, p, this.client);
let rootFolder = null;
(ArchiveStreamToS3 as any).prototype.onEntry = function (
header: any,
stream: any,
next: any
) {
if (rootFolder == null) {
rootFolder = header.name.substr(0, header.name.indexOf("/") + 1);
}
header.name = header.name.replace(rootFolder, "");
originalArchiveStreamToS3Entry.call(toS3, header, stream, next);
};
toS3.on("finish", (result) => {
resolve(result);
});
toS3.on("error", (e) => {
reject(e);
});
data.pipe(gunzip()).pipe(toS3);
});
}
/** @override */
archive(
dir: string,
opt?: {
format?: "zip" | "tar";
fileTransformer?;
}
) {
const archive = archiver(opt?.format, {});
if (dir && dir[dir.length - 1] != "/") dir = dir + "/";
const req = this.client.listObjectsV2({
Bucket: config.S3_BUCKET,
Prefix: dir,
});
const filesStream = req.createReadStream();
const xmlStream = flow(filesStream);
const that = this;
xmlStream.on("tag:contents", function (file) {
let rs = that.read(file.key);
file.key = file.key.replace(dir, "");
const filename = path.basename(file.key);
if (filename == "") return;
if (opt?.fileTransformer) {
rs = rs.pipe(opt.fileTransformer(filename));
}
archive.append(rs, {
name: filename,
prefix: path.dirname(file.key),
});
});
xmlStream.on("end", () => {
archive.finalize();
});
return archive;
}
}

98
src/types.ts Normal file
View File

@@ -0,0 +1,98 @@
import GitHubDownload from "./source/GitHubDownload";
import GitHubStream from "./source/GitHubStream";
import Zip from "./source/ZIP";
import S3Storage from "./storage/S3";
import FileSystem from "./storage/FileSystem";
import AnonymizedFile from "./AnonymizedFile";
import * as stream from "stream";
import * as archiver from "archiver";
export interface SourceBase {
readonly type: string;
/**
* The url of the source
*/
url?: string;
/**
* Retrieve the fie content
* @param file the file of the content to retrieve
*/
getFileContent(file: AnonymizedFile): Promise<stream.Readable>;
/**
* Get all the files from a specific source
*/
getFiles(): Promise<Tree>;
toJSON(): any;
}
export type Source = GitHubDownload | GitHubStream | Zip;
export interface StorageBase {
type: string;
exists(path: string): Promise<boolean>;
read(path: string): stream.Readable;
write(path: string, data: Buffer): Promise<void>;
listFiles(dir: string): Promise<Tree>;
extractTar(p: string, data: stream.Readable): Promise<void>;
rm(path: string): Promise<void>;
archive(
dir: string,
opt?: {
format?: "zip" | "tar";
fileTransformer?: (p: any) => Transformer;
}
): archiver.Archiver;
mk(path: string): Promise<void>;
}
export type Storage = S3Storage | FileSystem;
export interface Branch {
name: string;
commit: string;
readme?: string;
}
export type RepositoryStatus =
| "ready"
| "preparing"
| "expired"
| "removed"
| "download"
| "queue";
export type SourceStatus = "available" | "unavailable";
export interface Tree {
[key: string]: TreeElement;
}
export type TreeElement = Tree | TreeFile;
export interface TreeFile {
sha: string;
size: number;
}
export interface Loc {
info: { total: number; code: number; commit: number };
languages: {
[key: string]: {
total: number;
code: number;
commit: number;
sum: number;
};
};
}