migrate JavaScript to TypeScript

This commit is contained in:
tdurieux
2021-08-11 18:18:45 +02:00
parent ee4a20286d
commit caeff49ab0
58 changed files with 6034 additions and 3096 deletions

2
.gitignore vendored
View File

@@ -1,5 +1,5 @@
.env
repositories/
/repositories
repo/
db_backups
message.txt

View File

@@ -1,16 +0,0 @@
const config = {
CLIENT_ID: null,
CLIENT_SECRET: null,
GITHUB_TOKEN: null,
MAX_FILE_SIZE: 10 * 1024 * 1024, // in b
MAX_REPO_SIZE: 8 * 1024, // in kb
AUTH_CALLBACK: "http://localhost:5000/github/auth",
ANONYMIZATION_MASK: "XXXX",
PORT: 5000,
};
for (let conf in process.env) {
if (config[conf] !== undefined) {
config[conf] = process.env[conf];
}
}
module.exports = config;

61
config.ts Normal file
View File

@@ -0,0 +1,61 @@
import { resolve } from "path";
interface Config {
REDIS_PORT: number;
REDIS_HOSTNAME: string;
CLIENT_ID: string;
CLIENT_SECRET: string;
GITHUB_TOKEN: string;
DEFAULT_QUOTA: number;
MAX_FILE_SIZE: number;
MAX_REPO_SIZE: number;
AUTH_CALLBACK: string;
ANONYMIZATION_MASK: string;
PORT: number;
HOSTNAME: string;
DB_USERNAME: string;
DB_PASSWORD: string;
DB_HOSTNAME: string;
FOLDER: string;
additionalExtensions: string[];
S3_BUCKET?: string;
S3_CLIENT_ID?: string;
S3_CLIENT_SECRET?: string;
S3_ENDPOINT?: string;
S3_REGION?: string;
}
const config: Config = {
CLIENT_ID: "CLIENT_ID",
CLIENT_SECRET: "CLIENT_SECRET",
GITHUB_TOKEN: "",
DEFAULT_QUOTA: 2 * 1024 * 1024 * 1024 * 8,
MAX_FILE_SIZE: 10 * 1024 * 1024, // in b
MAX_REPO_SIZE: 8 * 1024, // in kb
AUTH_CALLBACK: "http://localhost:5000/github/auth",
ANONYMIZATION_MASK: "XXXX",
PORT: 5000,
HOSTNAME: "anonymous.4open.science",
DB_USERNAME: "admin",
DB_PASSWORD: "password",
DB_HOSTNAME: "mongodb",
REDIS_HOSTNAME: "redis",
REDIS_PORT: 6379,
FOLDER: resolve(__dirname, "repositories"),
additionalExtensions: [
"license",
"dockerfile",
"sbt",
"ipynb",
"gp",
"out",
"sol",
"in",
],
};
for (let conf in process.env) {
if ((config as any)[conf] !== undefined) {
(config as any)[conf] = process.env[conf];
}
}
export default config;

View File

@@ -9,6 +9,9 @@ services:
container_name: anonymous_github
env_file:
- ./.env
environment:
- REDIS_HOSTNAME=redis
- DB_HOSTNAME=mongodb
volumes:
- .:/app
# - ./repositories:/app/repositories

109
index.js
View File

@@ -1,109 +0,0 @@
const path = require("path");
const ofs = require("fs");
const redis = require("redis");
const RateLimit = require("express-rate-limit");
const RedisStore = require("rate-limit-redis");
const express = require("express");
const compression = require("compression");
const bodyParser = require("body-parser");
const config = require("./config");
const rediscli = redis.createClient({
host: "redis",
ttl: 260,
});
const connection = require("./routes/connection");
const db = require("./utils/database");
const fileUtils = require("./utils/file");
const app = express();
app.use(bodyParser.json());
app.use(compression());
app.set("trust proxy", 1);
// handle session and connection
app.use(connection.session);
app.use(connection.passport.initialize());
app.use(connection.passport.session());
const rateLimit = new RateLimit({
store: new RedisStore({
client: rediscli,
}),
windowMs: 15 * 60 * 1000, // 15 minutes
max: 200, // limit each IP to 100 requests per windowMs
// delayMs: 0, // disable delaying - full speed until the max limit is reached
});
app.use("/github", rateLimit, connection.router);
// app routes
app.use("/api/user", rateLimit, require("./routes/user"));
app.use("/api/repo", rateLimit, require("./routes/file"));
app.use("/api/repo", rateLimit, require("./routes/repository"));
// wesite view
app.use("/w/", rateLimit, require("./routes/webview"));
app.get("/api/supportedTypes", async (_, res) => {
res.json(
require("textextensions")
.default.concat(fileUtils.additionalExtensions)
.sort()
);
});
app.get("/api/message", async (_, res) => {
if (ofs.existsSync("./message.txt")) {
return res.sendFile(path.resolve(__dirname, "message.txt"));
}
res.sendStatus(404);
});
app.get("/api/stat", async (_, res) => {
const nbRepositories = await db
.get("anonymized_repositories")
.estimatedDocumentCount();
const nbUsers = (await db.get("anonymized_repositories").distinct("owner"))
.length; //await db.get("users").estimatedDocumentCount();
res.json({ nbRepositories, nbUsers });
});
function indexResponse(req, res) {
if (
req.params.repoId &&
req.headers["accept"] &&
req.headers["accept"].indexOf("text/html") == -1
) {
const repoId = req.path.split("/")[2];
// if it is not an html request, it assumes that the browser try to load a different type of resource
return res.redirect(
`/api/repo/${repoId}/file/${req.path.substring(
req.path.indexOf(repoId) + repoId.length + 1
)}`
);
}
res.sendFile(path.resolve(__dirname, "public", "index.html"));
}
app
.get("/", indexResponse)
.get("/404", indexResponse)
.get("/anonymize", indexResponse)
.get("/r/:repoId/?*", indexResponse)
.get("/repository/:repoId/?*", indexResponse);
app.use(express.static(__dirname + "/public"));
app.get("*", indexResponse);
db.connect().then((_) => {
app.listen(config.PORT, () => {
console.log(
"Database connected and Server started on port: " + config.PORT
);
});
});

6
index.ts Normal file
View File

@@ -0,0 +1,6 @@
require("dotenv").config();
import server from "./src/server";
// start the server
server();

178
migrateDB.ts Normal file
View File

@@ -0,0 +1,178 @@
import * as mongoose from "mongoose";
import config from "./config";
import * as database from "./src/database/database";
import RepositoryModel from "./src/database/repositories/repositories.model";
import AnonymizedRepositoryModel from "./src/database/anonymizedRepositories/anonymizedRepositories.model";
import UserModel from "./src/database/users/users.model";
import { IRepositoryDocument } from "./src/database/repositories/repositories.types";
import { LexRuntime } from "aws-sdk";
const MONGO_URL = `mongodb://${config.DB_USERNAME}:${config.DB_PASSWORD}@${config.DB_HOSTNAME}:27017/`;
async function connect(db) {
const t = new mongoose.Mongoose();
t.set("useNewUrlParser", true);
t.set("useFindAndModify", true);
t.set("useUnifiedTopology", true);
const database = t.connection;
await t.connect(MONGO_URL + db, {
authSource: "admin",
useCreateIndex: true,
useFindAndModify: true,
});
return database;
}
(async () => {
await database.connect();
const oldDB = await connect("anonymous_github");
console.log("Import Users");
await new Promise(async (resolve) => {
const promises = [];
await oldDB
.collection("users")
.find()
.batchSize(1)
.forEach(async (r) => {
let localResolve = null;
const p = new Promise((r) => (localResolve = r));
promises.push(p);
const repositoryModels: Promise<IRepositoryDocument>[] = [];
const finds = (
await RepositoryModel.find({
externalId: {
$in: r.repositories.map((repo) => "gh_" + repo.id),
},
}).select("externalId")
).map((m) => m.externalId);
for (const repo of r.repositories.filter(
(f) => finds.indexOf("gh_" + f.id) == -1
)) {
repositoryModels.push(
new RepositoryModel({
externalId: "gh_" + repo.id,
name: repo.full_name,
url: repo.html_url,
size: repo.size,
defaultBranch: repo.default_branch,
}).save()
);
}
const user = await new UserModel({
accessToken: r.accessToken,
username: r.username,
email: r.profile.emails[0]?.value,
photo: r.profile.photos[0]?.value,
repositories: (await Promise.all(repositoryModels)).map((d) => d._id),
default: {
terms: r.default.terms,
options: r.default.options,
},
}).save();
localResolve(user);
});
Promise.all(promises).then(resolve);
});
console.log("Import Repositories");
let promises = [];
await oldDB
.collection("repositories")
.find({})
.batchSize(1)
.forEach(async (r) => {
if (!r.id) return;
let localResolve = null;
const p = new Promise((r) => (localResolve = r));
promises.push(p);
let find = await RepositoryModel.findOne({
externalId: "gh_" + r.id,
});
// console.log("gh_" + r.id, find != null);
if (find == null) {
find = new RepositoryModel({
externalId: "gh_" + r.id,
name: r.fullName,
url: r.html_url,
size: r.size,
defaultBranch: r.default_branch,
});
}
const branches = [...Object.values(r.branches)].map((b: any) => {
const o: any = { name: b.name, commit: b.commit.sha };
if (b.name == find.defaultBranch) {
o.readme = r.readme;
}
return o;
});
find.branches = branches;
await find.save();
localResolve();
});
await Promise.all(promises);
console.log("Import Anonymized Repositories");
promises = [];
await oldDB
.collection("anonymized_repositories")
.find({})
.forEach(async (r) => {
let localResolve = null;
const p = new Promise((r) => (localResolve = r));
promises.push(p);
let repo = await RepositoryModel.findOne({ name: r.fullName });
if (repo == null) {
const tmp = await oldDB
.collection("repositories")
.findOne({ fullName: r.fullName });
if (tmp) {
repo = await RepositoryModel.findOne({ externalId: "gh_" + tmp.id });
} else {
console.error(`Repository ${r.fullName} is not found (renamed)`);
}
}
await new AnonymizedRepositoryModel({
repoId: r.repoId,
status: r.status,
anonymizeDate: r.anonymizeDate,
lastView: r.lastView,
pageView: r.pageView,
owner: r.owner,
source: {
accessToken: r.token,
type:
r.options.mode == "download" ? "GitHubDownload" : "GitHubStream",
branch: r.branch,
commit: r.commit,
repositoryId: repo?.id,
repositoryName: r.fullName,
},
options: {
terms: r.terms,
expirationMode: r.options.expirationMode,
expirationDate: r.options.expirationDate
? new Date(r.options.expirationDate)
: null,
update: r.options.update,
image: r.options.image,
pdf: r.options.pdf,
notebook: r.options.notebook,
loc: r.options.loc,
link: r.options.link,
page: r.options.page,
pageSource: r.options.pageSource,
},
}).save();
localResolve();
});
await Promise.all(promises);
console.log("Import finished!");
setTimeout(() => process.exit(), 5000);
})();

3776
package-lock.json generated

File diff suppressed because it is too large Load Diff

View File

@@ -1,12 +1,14 @@
{
"name": "anonymous_github",
"version": "2.0.0",
"version": "2.1.0",
"description": "Anonymise Github repositories for double-anonymous reviews",
"main": "index.js",
"main": "index.ts",
"scripts": {
"test": "mocha --reporter spec",
"start": "node index.js",
"dev": "nodemon index.js"
"start": "node --inspect=5858 -r ts-node/register ./index.ts",
"dev": "nodemon --transpile-only index.ts",
"migrateDB": "ts-node --transpile-only migrateDB.ts",
"build": "tsc"
},
"repository": {
"type": "git",
@@ -21,27 +23,50 @@
"dependencies": {
"@octokit/oauth-app": "^3.3.2",
"@octokit/rest": "^18.5.3",
"array-equal": "^1.0.0",
"archive-stream-to-s3": "^1.1.3",
"archiver": "^5.3.0",
"aws-sdk": "^2.958.0",
"compression": "^1.7.4",
"connect-redis": "^5.1.0",
"dotenv": "^10.0.0",
"express": "^4.17.1",
"express-rate-limit": "^5.2.6",
"express-session": "^1.17.1",
"extract-zip": "^2.0.1",
"github-linguist": "^2.3.0",
"got": "^9.6.0",
"gunzip-maybe": "^1.4.2",
"istextorbinary": "^5.12.0",
"mongodb": "^3.6.6",
"mime-types": "^2.1.30",
"mongoose": "^5.13.5",
"parse-github-url": "^1.0.2",
"passport": "^0.4.1",
"passport-github2": "^0.1.12",
"rate-limit-redis": "^2.1.0",
"redis": "^3.1.2",
"textextensions": "^5.12.0"
"tar-fs": "^2.1.1",
"textextensions": "^5.12.0",
"xml-flow": "^1.0.4"
},
"devDependencies": {
"@types/archiver": "^5.1.1",
"@types/compression": "^1.7.1",
"@types/connect-redis": "^0.0.17",
"@types/express": "^4.17.13",
"@types/express-rate-limit": "^5.1.3",
"@types/express-session": "^1.17.4",
"@types/got": "^9.6.12",
"@types/mime-types": "^2.1.0",
"@types/parse-github-url": "^1.0.0",
"@types/passport": "^1.0.7",
"@types/passport-github2": "^1.2.5",
"@types/rate-limit-redis": "^1.7.2",
"@types/redis": "^2.8.31",
"@types/tar-fs": "^2.0.1",
"@types/xml-flow": "^1.0.1",
"chai": "^4.3.4",
"mocha": "^8.3.2",
"nodemon": "^2.0.4"
"nodemon": "^2.0.7",
"ts-node": "^10.1.0",
"typescript": "^4.3.5"
},
"nodemonConfig": {
"ignore": [

View File

@@ -335,8 +335,8 @@
name="mode"
ng-model="options.mode"
>
<option value="stream" selected>Stream</option>
<option value="download">Download</option>
<option value="GitHubStream" selected>Stream</option>
<option value="GitHubDownload">Download</option>
</select>
<small class="form-text text-muted"
>How the repository will be anonymized. Stream mode will
@@ -367,22 +367,6 @@
https://anonymous.4open.science/w/{{repoId}}</small
>
</div>
<div class="form-check">
<input
class="form-check-input"
type="checkbox"
id="loc"
name="loc"
ng-model="options.loc"
/>
<label class="form-check-label" for="page"
>Line of code</label
>
<small id="termsHelp" class="form-text text-muted"
>Display the number of line of code in the
repository</small
>
</div>
</div>
</div>
</div>

View File

@@ -73,6 +73,8 @@
/>
<label class="form-check-label" for="removed"> Removed </label>
</div>
<h5>Quota</h5>
{{quota.used | humanFileSize}}/{{quota.total| humanFileSize}}
</div>
<div class="col-md h-100 overflow-auto body">
<div class="row">
@@ -95,6 +97,9 @@
<th scope="col" class="text-center d-none d-xl-table-cell">
Anonymize date
</th>
<th scope="col" class="text-center d-none d-xl-table-cell">
Size
</th>
<th scope="col" class="text-center d-none d-xl-table-cell">
# Views
</th>
@@ -113,32 +118,30 @@
{{$index + 1}}
</th>
<td class="align-middle">
<a href="/r/{{repo.repoId}}"
>{{repo.repoId}}</a
>
<a href="/r/{{repo.repoId}}">{{repo.repoId}}</a>
</td>
<td
class="align-middle"
title="Commit: {{repo.commit}}"
title="Commit: {{repo.source.branch.commit}}"
data-toggle="tooltip"
data-placement="bottom"
>
<a
href="https://github.com/{{repo.fullName}}/commit/{{repo.commit}}"
>{{repo.fullName}}</a
href="https://github.com/{{repo.source.fullName}}/commit/{{repo.source.branch.commit}}"
>{{repo.source.fullName}}</a
>
</td>
<td
title="Commit: {{repo.commit}}"
title="Commit: {{repo.source.branch.commit}}"
data-toggle="tooltip"
data-placement="bottom"
class="align-middle d-none d-lg-table-cell"
>
{{repo.branch}}
{{repo.source.branch.name}}
</td>
<!-- <td>{{repo.commit.substring(0, 6)}}</td> -->
<td class="text-center align-middle d-none d-lg-table-cell">
{{repo.terms.length}}
{{repo.options.terms.length}}
</td>
<td
class="text-center align-middle"
@@ -162,7 +165,12 @@
<td class="text-center align-middle d-none d-xl-table-cell">
{{repo.anonymizeDate | date}}
</td>
<td class="text-center align-middle d-none d-xl-table-cell">{{repo.pageView}}</td>
<td class="text-center align-middle d-none d-xl-table-cell">
{{repo.size | humanFileSize}}
</td>
<td class="text-center align-middle d-none d-xl-table-cell">
{{repo.pageView}}
</td>
<td class="text-center align-middle d-none d-xl-table-cell">
{{repo.lastView | date}}
</td>
@@ -214,10 +222,7 @@
>
<i class="fa fa-remove" aria-hidden="true"></i> Remove
</a>
<a
class="dropdown-item"
href="/r/{{repo.repoId}}/"
>
<a class="dropdown-item" href="/r/{{repo.repoId}}/">
<i class="fa fa-eye" aria-hidden="true"></i> View Repo
</a>
<a

View File

@@ -4,14 +4,24 @@
<tree class="files" file="files"></tree>
</div>
<div class="col-md h-100 overflow-auto p-0 d-flex flex-column">
<nav aria-label="breadcrumb">
<nav aria-label="repository menu">
<ol class="breadcrumb shadow paths">
<li class="breadcrumb-item" ng-repeat="p in paths" ng-bind="p">
Loading...
</li>
</ol>
<a ng-href="{{url}}" target="__self" class="btn btn-primary"
>Download file</a
>
<a
ng-href="/api/repo/{{repoId}}/zip"
target="__self"
class="btn btn-primary"
>Download Repository</a
>
</nav>
<loc stats="stats" ng-if="stats"></loc>
<div class="align-items-stretch h-100 w-100 overflow-auto">
<ng-include src="'./partials/pageView.htm'"></ng-include>
</div>

View File

@@ -81,6 +81,30 @@ angular
$locationProvider.html5Mode(true);
})
.run(["Analytics", function (Analytics) {}])
.filter("humanFileSize", function () {
return function humanFileSize(bytes, si = false, dp = 1) {
const thresh = si ? 1000 : 1024;
bytes = bytes / 8;
if (Math.abs(bytes) < thresh) {
return bytes + " B";
}
const units = si
? ["kB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB"]
: ["KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB"];
let u = -1;
const r = 10 ** dp;
do {
bytes /= thresh;
++u;
} while (Math.round(Math.abs(bytes) * r) / r >= thresh && u < units.length - 1);
return bytes.toFixed(dp) + " " + units[u];
};
})
.filter("title", function () {
return function (str) {
if (!str) return str;
@@ -370,7 +394,7 @@ angular
notebook: true,
loc: true,
link: true,
mode: "download",
mode: "GitHubDownload",
};
function getDefault() {
@@ -500,6 +524,13 @@ angular
}
getRepositories();
function getQuota() {
$http.get("/api/user/quota").then((res) => {
$scope.quota = res.data;
}, console.error);
}
getQuota();
$scope.removeRepository = (repo) => {
if (
confirm(
@@ -596,9 +627,8 @@ angular
image: true,
pdf: true,
notebook: true,
loc: true,
link: true,
mode: "download",
mode: "GitHubDownload",
};
$scope.options.expirationDate.setDate(
$scope.options.expirationDate.getDate() + 90
@@ -630,10 +660,10 @@ angular
$scope.repoId = $routeParams.repoId;
$http.get("/api/repo/" + $scope.repoId).then(
async (res) => {
$scope.repoUrl = "https://github.com/" + res.data.fullName;
$scope.repoUrl = "https://github.com/" + res.data.source.fullName;
$scope.terms = res.data.terms.join("\n");
$scope.branch = res.data.branch;
$scope.terms = res.data.options.terms.join("\n");
$scope.branch = res.data.source.branch.name;
$scope.options = res.data.options;
$scope.conference = res.data.conference;
if (res.data.options.expirationDate) {
@@ -648,11 +678,11 @@ angular
}
$scope.details = (
await $http.get(`/api/repo/${res.data.fullName}/`)
await $http.get(`/api/repo/${res.data.source.fullName}/`)
).data;
await getReadme();
await $scope.getBranches();
await getReadme();
anonymize();
$scope.$apply();
},
@@ -709,24 +739,32 @@ angular
};
$('[data-toggle="tooltip"]').tooltip();
$scope.$watch("branch", (v) => {
if ($scope.branches && $scope.branches[$scope.branch]) {
$scope.commit = $scope.branches[$scope.branch].commit.sha;
}
if ($scope.details && $scope.details.has_page) {
$scope.anonymize.page.disabled(false);
$scope.$watch("branch", async (v) => {
const selected = $scope.branches.filter(
(f) => f.name == $scope.branch
)[0];
if ($scope.details && $scope.details.hasPage) {
$scope.anonymize.page.$$element[0].disabled = false;
if ($scope.details.pageSource.branch != $scope.branch) {
$scope.anonymize.page.disabled(true);
$scope.anonymize.page.$$element[0].disabled = true;
}
}
if (selected) {
$scope.commit = selected.commit;
$scope.readme = selected.readme;
await getReadme();
anonymize();
$scope.$apply();
}
});
$scope.$watch("options.mode", (v) => {
if (v == "stream") {
$scope.options.loc = false;
$scope.anonymize.loc.$$element[0].disabled = true;
if (v == "GitHubStream") {
$scope.options.page = false;
$scope.anonymize.page.$$element[0].disabled = true;
} else {
$scope.anonymize.loc.$$element[0].disabled = false;
$scope.anonymize.page.$$element[0].disabled = false;
}
});
@@ -749,10 +787,12 @@ angular
);
$scope.branches = branches.data;
if (!$scope.branch) {
$scope.branch = $scope.details.default_branch;
$scope.branch = $scope.details.defaultBranch;
}
if ($scope.branches[$scope.branch]) {
$scope.commit = $scope.branches[$scope.branch].commit.sha;
const selected = $scope.branches.filter((b) => b.name == $scope.branch);
if (selected.length > 0) {
$scope.commit = selected[0].commit;
$scope.readme = selected[0].readme;
}
$scope.$apply();
};
@@ -771,12 +811,10 @@ angular
const res = await $http.get(`/api/repo/${o.owner}/${o.repo}/`);
$scope.details = res.data;
if ($scope.details.size > 1024 * 8) {
$scope.options.mode = "stream";
$scope.options.loc = false;
$scope.options.mode = "GitHubStream";
$scope.anonymize.mode.$$element[0].disabled = true;
$scope.anonymize.loc.$$element[0].disabled = true;
}
$scope.repoId = $scope.details.name + "-" + generateRandomId(4);
$scope.repoId = $scope.details.repo + "-" + generateRandomId(4);
await $scope.getBranches();
} catch (error) {
if (error.data) {
@@ -790,13 +828,16 @@ angular
}
}
async function getReadme() {
async function getReadme(force) {
if ($scope.readme) return $scope.readme;
const o = parseGithubUrl($scope.repoUrl);
const res = await $http.get(`/api/repo/${o.owner}/${o.repo}/readme`);
const res = await $http.get(`/api/repo/${o.owner}/${o.repo}/readme`, {
params: { force: force === true ? "1" : "0", branch: $scope.branch },
});
$scope.readme = res.data;
}
async function anonymize() {
function anonymize() {
const urlRegex =
/<?\b((https?|ftp|file):\/\/)[-A-Za-z0-9+&@#/%?=~_|!:,.;]+[-A-Za-z0-9+&@#/%=~_|]\b\/?>?/g;
let content = $scope.readme;
@@ -890,6 +931,7 @@ angular
function getRepo() {
const o = parseGithubUrl($scope.repoUrl);
$scope.options.pageSource = $scope.details.pageSource;
return {
repoId: $scope.repoId,
terms: $scope.terms.trim().split("\n"),
@@ -1200,7 +1242,7 @@ angular
getFiles(() => {
updateContent();
if (options.mode == "download") {
if (options.mode == "GitHubDownload") {
getStats();
}
});

View File

@@ -1,102 +0,0 @@
const redis = require("redis");
const passport = require("passport");
const session = require("express-session");
const redisStore = require("connect-redis")(session);
const GitHubStrategy = require("passport-github2").Strategy;
const express = require("express");
const router = express.Router();
const db = require("../utils/database");
const config = require("../config");
function ensureAuthenticated(req, res, next) {
if (req.isAuthenticated()) {
return next();
}
res.status(401).json({ error: "not_connected" });
}
passport.serializeUser(function(user, done) {
delete user.profile._json;
done(null, user);
});
passport.deserializeUser(function(obj, done) {
done(null, obj);
});
passport.use(
new GitHubStrategy(
{
clientID: config.CLIENT_ID,
clientSecret: config.CLIENT_SECRET,
callbackURL: config.AUTH_CALLBACK,
},
async (accessToken, refreshToken, profile, done) => {
try {
await db
.get()
.collection("users")
.updateOne(
{ username: profile.username },
{
$set: {
username: profile.username,
profile,
accessToken,
refreshToken,
},
},
{ upsert: true }
);
} catch (error) {
console.error(error);
} finally {
done(null, {
username: profile.username,
accessToken,
refreshToken,
profile,
});
}
}
)
);
const rediscli = redis.createClient({
host: "redis",
ttl: 260,
});
const appSession = session({
secret: "keyboard cat",
store: new redisStore({
client: rediscli,
}),
saveUninitialized: false,
resave: false,
});
router.get(
"/login",
passport.authenticate("github", { scope: ["repo"] }), // Note the scope here
function(req, res) {
res.redirect("/");
}
);
router.get(
"/auth",
passport.authenticate("github", { failureRedirect: "/" }),
function(req, res) {
res.redirect("/");
}
);
module.exports.ensureAuthenticated = ensureAuthenticated;
module.exports.passport = passport;
module.exports.session = appSession;
module.exports.router = router;

View File

@@ -1,166 +0,0 @@
const express = require("express");
const path = require("path");
const db = require("../utils/database");
const fileUtils = require("../utils/file");
const repoUtils = require("../utils/repository");
const githubUtils = require("../utils/github");
const router = express.Router();
async function anonymizeRepository(options) {
let repoConfig = options.repoConfig;
if (!repoConfig) {
repoConfig = await repoUtils.getConfig(options.repoId);
}
if (repoConfig == null) {
throw "repo_not_found";
}
if (repoConfig.status == "removed" || repoConfig.status == "expired") {
return;
}
if (repoConfig.options.expirationMode != "never") {
if (repoConfig.options.expirationDate <= new Date()) {
console.log(repoConfig.repoId, "The repository is expired");
await repoUtils.updateStatus(repoConfig, "expired");
await repoUtils.removeRepository(repoConfig);
throw "repository_expired";
}
}
const lastView = repoConfig.lastView;
const yesterday = new Date();
yesterday.setDate(yesterday.getDate() - 1);
if (repoConfig.options.update && lastView < yesterday) {
console.log(repoConfig.repoId, "check for updates in the repository.");
try {
} catch (error) {
console.error("Error while updating the repository.");
console.error(repoConfig.repoId, req.path, error);
}
await repoUtils.updateAnonymizedRepository(repoConfig);
} else {
await githubUtils.downloadRepoAndAnonymize(repoConfig);
}
}
router.get("/:repoId/files", async (req, res) => {
const repoConfig = await repoUtils.getConfig(req.params.repoId);
if (repoConfig == null) {
return res.status(500).json({ error: "repo_not_found" });
}
if (repoConfig.status != "ready") {
return res.status(500).json({ error: "repo_not_ready" });
}
try {
const files = await fileUtils.getFileList({ repoConfig });
return res.json(files);
} catch (error) {
console.error(req.path, error);
return res.status(500).json({ error });
}
});
router.get("/:repoId/stats", async (req, res) => {
const repoConfig = await repoUtils.getConfig(req.params.repoId);
if (repoConfig == null) {
return res.status(500).json({ error: "repo_not_found" });
}
if (repoConfig.status != "ready") {
return res.status(500).json({ error: "repo_not_ready" });
}
if (repoConfig.options.mode == "stream") {
return res.status(500).json({ error: "stream_not_supported" });
}
try {
const stats = await fileUtils.getStats({ repoConfig });
return res.json(stats.languages);
} catch (error) {
console.error(req.path, error);
return res.status(500).json({ error });
}
});
router.get("/:repoId/options", async (req, res) => {
const repoConfig = await repoUtils.getConfig(req.params.repoId);
if (repoConfig == null) {
return res.status(500).json({ error: "repo_not_found" });
}
try {
try {
await anonymizeRepository({ repoConfig });
} catch (error) {
console.log("Error during the anonymization of the repository");
console.error(req.path, error);
}
if (repoConfig.status == "removed") {
throw "repository_expired";
}
if (repoConfig.status == "expired") {
if (repoConfig.options.expirationMode == "redirect") {
repoConfig.options.url = "https://github.com/" + repoConfig.fullName;
} else {
throw "repository_expired";
}
} else if (repoConfig.status != "ready") {
throw "repository_not_ready";
}
return res.json(repoConfig.options);
} catch (error) {
console.error(req.path, error);
return res.status(500).json({ error });
}
});
router.get("/:repoId/file/:path*", async (req, res) => {
const repoConfig = await repoUtils.getConfig(req.params.repoId);
if (repoConfig == null) {
return res.status(500).json({ error: "repo_not_found" });
}
if (repoConfig.status != "ready") {
return res.status(500).json({ error: "repo_not_ready" });
}
let requestPath = req.params.path;
if (req.params[0]) {
requestPath += req.params[0];
}
try {
const isValid = await fileUtils.isFilePathValid({
repoConfig,
path: requestPath,
});
if (isValid) {
await db
.get("anonymized_repositories")
.updateOne(
{ repoId: repoConfig.repoId },
{ $set: { lastView: new Date() }, $inc: { pageView: 1 } }
);
const ppath = path.join(
repoUtils.getAnonymizedPath(repoConfig.repoId),
requestPath
);
return res.sendFile(ppath, { dotfiles: "allow" });
} else {
return res.status(404).json({ error: "file_not_found" });
}
} catch (error) {
console.error(req.path, error);
return res.status(500).send({ error });
}
});
module.exports = router;

View File

@@ -1,366 +0,0 @@
const ofs = require("fs");
const fs = require("fs").promises;
const express = require("express");
const gh = require("parse-github-url");
const arrayEquals = require("array-equal");
const connection = require("./connection");
const githubUtils = require("../utils/github");
const db = require("../utils/database");
const repoUtils = require("../utils/repository");
const config = require("../config");
const router = express.Router();
// user needs to be connected for all user API
router.use(connection.ensureAuthenticated);
// claim a repository
router.post("/claim", async (req, res) => {
try {
if (!req.body.repoId) {
return res.status(500).json({ error: "repoId_not_defined" });
}
if (!req.body.repoUrl) {
return res.status(500).json({ error: "repoUrl_not_defined" });
}
const repoConfig = await repoUtils.getConfig(req.body.repoId);
if (repoConfig == null) {
return res.status(500).json({ error: "repo_not_found" });
}
const repo = gh(req.body.repoUrl);
if (repoConfig.fullName != repo.repository) {
return res.status(500).json({ error: "repo_not_found" });
}
console.log(`${req.user.username} claims ${repoConfig.fullName}.`);
await db
.get("anonymized_repositories")
.updateOne(
{ repoId: repoConfig.repoId },
{ $set: { owner: req.user.username } }
);
return res.send("Ok");
} catch (error) {
console.error(req.path, error);
return res.status(500).json({ error });
}
});
router.get("/:repoId/", async (req, res) => {
try {
const repository = await repoUtils.getAnonymizedRepoDetails(
req.params.repoId,
req.user
);
if (repository) {
return res.json(repository);
}
res.status(404).send({error: "repo_not_found"});
} catch (error) {
console.error(req.path, error);
res.status(500).json({ error });
}
});
// update a repository
router.post("/:repoId/", async (req, res) => {
const repoUpdate = req.body;
let repoConfig = await repoUtils.getConfig(req.params.repoId);
if (repoConfig == null) {
return res.status(500).json({ error: "repo_not_found" });
}
if (repoConfig.owner != req.user.username) {
return res.status(401).json({ error: "not_authorized" });
}
if (!repoUpdate.branch) {
return res.status(500).json({ error: "branch_not_specified" });
}
if (!repoUpdate.options) {
return res.status(500).json({ error: "options_not_provided" });
}
if (!Array.isArray(repoUpdate.terms)) {
return res.status(500).send({ error: "invalid_terms_format" });
}
if (!/^[a-f0-9]+$/.test(repoUpdate.commit)) {
return res.status(500).send({ error: "invalid_commit_format" });
}
try {
const details = await repoUtils.getRepoDetails({
repoConfig,
force: true,
token: req.user.accessToken,
});
if (repoUpdate.options.mode == "download") {
// details.size is in kilobytes
if (details.size > config.MAX_REPO_SIZE) {
return res.status(500).send({ error: "invalid_mode" });
}
}
if (repoUpdate.commit != repoConfig.commit) {
repoUpdate.anonymizeDate = new Date();
await repoUtils.removeRepository(repoConfig);
}
if (
!arrayEquals(repoUpdate.terms, repoConfig.terms) ||
repoUpdate.options.link != repoConfig.options.link ||
repoUpdate.options.image != repoConfig.options.image
) {
repoUpdate.anonymizeDate = new Date();
if (ofs.existsSync(repoUtils.getAnonymizedPath(repoConfig.repoId))) {
await fs.rm(repoUtils.getAnonymizedPath(repoConfig.repoId), {
recursive: true,
force: true,
});
}
}
const data = {
terms: repoUpdate.terms,
branch: repoUpdate.branch,
commit: repoUpdate.commit,
options: {
expirationMode: repoUpdate.options.expirationMode,
expirationDate: repoUpdate.options.expirationDate,
update: repoUpdate.options.update,
image: repoUpdate.options.image,
pdf: repoUpdate.options.pdf,
notebook: repoUpdate.options.notebook,
loc: repoUpdate.options.loc,
link: repoUpdate.options.link,
mode: repoUpdate.options.mode,
page: repoUpdate.options.page,
},
};
if (repoUpdate.options.page) {
data.options.pageSource = details.pageSource;
}
await db.get("anonymized_repositories").updateOne(
{
repoId: repoConfig.repoId,
},
{
$set: data,
}
);
repoConfig = await repoUtils.getConfig(repoUpdate.repoId);
await repoUtils.updateStatus(repoConfig, "preparing");
res.send("ok");
} catch (error) {
console.error(req.path, error);
await repoUtils.updateStatus(repoConfig, "error", error);
return res.status(500).json({ error });
}
try {
await githubUtils.downloadRepoAndAnonymize(repoConfig);
await repoUtils.updateStatus(repoConfig, "ready");
} catch (error) {
console.error(req.path, error);
await repoUtils.updateStatus(repoConfig, "error", error);
}
});
// refresh a repository
router.post("/:repoId/refresh", async (req, res) => {
try {
const repoConfig = await repoUtils.getConfig(req.params.repoId);
if (repoConfig == null) {
return res.status(500).json({ error: "repo_not_found" });
}
if (repoConfig.owner != req.user.username) {
return res.status(401).json({ error: "not_authorized" });
}
await repoUtils.updateAnonymizedRepository(repoConfig);
return res.send("ok");
} catch (error) {
console.error(req.path, error);
return res.status(500).json({ error });
}
});
// delete a repository
router.delete("/:repoId/", async (req, res) => {
try {
const repoConfig = await repoUtils.getConfig(req.params.repoId);
if (repoConfig == null) {
return res.status(500).json({ error: "repo_not_found" });
}
if (repoConfig.owner != req.user.username) {
return res.status(401).json({ error: "not_authorized" });
}
await repoUtils.updateStatus(repoConfig, "removed");
await repoUtils.removeRepository(repoConfig);
console.log(`${req.params.repoId} is removed`);
return res.json("ok");
} catch (error) {
console.error(req.path, error);
return res.status(500).json({ error });
}
});
router.get("/:owner/:repo/", async (req, res) => {
try {
const repository = await repoUtils.getRepoDetails({
owner: req.params.owner,
repo: req.params.repo,
token: req.user.accessToken,
force: req.query.force === "1",
});
if (repository) {
return res.json(repository);
}
res.status(404).send({error: "repo_not_found"});
} catch (error) {
console.error(req.path, error);
res.status(500).json({ error });
}
});
router.get("/:owner/:repo/branches", async (req, res) => {
try {
const repository = await repoUtils.getRepoBranches({
owner: req.params.owner,
repo: req.params.repo,
token: req.user.accessToken,
force: req.query.force === "1",
});
if (repository) {
return res.json(repository);
}
res.status(404).send({error: "repo_not_found"});
} catch (error) {
console.error(req.path, error);
res.status(500).json({ error });
}
});
router.get("/:owner/:repo/readme", async (req, res) => {
try {
const readme = await repoUtils.getRepoReadme({
owner: req.params.owner,
repo: req.params.repo,
token: req.user.accessToken,
force: req.query.force === "1",
});
if (readme) {
return res.send(readme);
}
res.status(404).send({error: "repo_not_found"});
} catch (error) {
res.status(500).json({ error });
}
});
router.post("/", async (req, res) => {
const repoConfig = req.body;
let data = null;
try {
const repository = await repoUtils.getConfig(repoConfig.repoId);
const cacheExist = ofs.existsSync(
repoUtils.getOriginalPath(repoConfig.repoId)
);
if (repository && cacheExist) {
return res.status(500).send({ error: "repoId_already_used" });
}
var validCharacters = /^[0-9a-zA-Z\-\_]+$/;
if (
!repoConfig.repoId.match(validCharacters) ||
repoConfig.repoId.length < 3
) {
return res.status(500).send({ error: "invalid_repoId" });
}
if (!repoConfig.branch) {
return res.status(500).json({ error: "branch_not_specified" });
}
if (!repoConfig.options) {
return res.status(500).json({ error: "options_not_provided" });
}
if (!Array.isArray(repoConfig.terms)) {
return res.status(500).send({ error: "invalid_terms_format" });
}
if (!/^[a-f0-9]+$/.test(repoConfig.commit)) {
return res.status(500).send({ error: "invalid_commit_format" });
}
await repoUtils.getRepoBranches({
repoConfig,
token: req.user.accessToken,
});
const details = await repoUtils.getRepoDetails({
repoConfig,
token: req.user.accessToken,
});
if (details.branches[repoConfig.branch] == null) {
return res.status(500).send({ error: "invalid_branch" });
}
if (repoConfig.options.mode == "download") {
// details.size is in kilobytes
if (details.size > config.MAX_REPO_SIZE) {
return res.status(500).send({ error: "non_supported_mode" });
}
}
data = {
repoId: repoConfig.repoId,
fullName: repoConfig.fullName,
status: "preparing",
terms: repoConfig.terms,
owner: req.user.profile.username,
token: req.user.accessToken,
branch: repoConfig.branch,
conference: repoConfig.conference,
commit: repoConfig.commit
? repoConfig.commit
: details.branches[repoConfig.branch].commit.sha,
anonymizeDate: new Date(),
options: {
expirationMode: repoConfig.options.expirationMode,
expirationDate: repoConfig.options.expirationDate,
update: repoConfig.options.update,
image: repoConfig.options.image,
pdf: repoConfig.options.pdf,
notebook: repoConfig.options.notebook,
loc: repoConfig.options.loc,
link: repoConfig.options.link,
mode: repoConfig.options.mode,
page: repoConfig.options.page,
},
};
if (repoConfig.options.page) {
data.options.pageSource = details.pageSource;
}
await db.get("anonymized_repositories").updateOne(
{
repoId: data.repoId,
},
{
$set: data,
},
{ upsert: true }
);
res.send("ok");
} catch(error) {
console.error(req.path, error);
await repoUtils.updateStatus(repoConfig, "error", error);
return res.status(500).json({ error });
}
try {
await githubUtils.downloadRepoAndAnonymize(data);
await repoUtils.updateStatus(repoConfig, "ready");
} catch (error) {
console.error(req.path, error);
await repoUtils.updateStatus(repoConfig, "error", "unable_to_anonymize");
}
});
module.exports = router;

View File

@@ -1,134 +0,0 @@
const express = require("express");
const { Octokit } = require("@octokit/rest");
const connection = require("./connection");
const db = require("../utils/database");
const repoUtils = require("../utils/repository");
const router = express.Router();
// user needs to be connected for all user API
router.use(connection.ensureAuthenticated);
router.get("/logout", async (req, res) => {
try {
req.logout();
res.redirect("/");
} catch (error) {
console.error(req.path, error);
res.status(500).json({ error });
}
});
router.get("/", async (req, res) => {
try {
const photo = req.user.profile.photos.length
? req.user.profile.photos[0].value
: null;
res.json({ username: req.user.profile.username, photo });
} catch (error) {
console.error(req.path, error);
res.status(500).json({ error });
}
});
router.get("/default", async (req, res) => {
try {
const d = await db
.get("users")
.findOne({ username: req.user.username }, { projection: { default: 1 } });
res.json(d.default);
} catch (error) {
console.error(req.path, error);
res.status(500).json({ error });
}
});
router.post("/default", async (req, res) => {
try {
const d = req.body;
await db
.get("users")
.updateOne({ username: req.user.username }, { $set: { default: d } });
res.send("ok");
} catch (error) {
console.error(req.path, error);
res.status(500).json({ error });
}
});
router.get("/anonymized_repositories", async (req, res) => {
try {
const repos = await db
.get("anonymized_repositories")
.find(
{
owner: req.user.username,
},
{ projection: { token: 0, files: 0, originalFiles: 0 } }
)
.toArray();
for (let repo of repos) {
if (repo.options.expirationDate) {
repo.options.expirationDate = new Date(repo.options.expirationDate);
}
if (
repo.options.expirationMode != "never" &&
repo.options.expirationDate != null &&
repo.options.expirationDate < new Date()
) {
await repoUtils.updateStatus({ repoId: repo.repoId }, "expired");
repo.status = "expired";
}
}
res.json(repos);
} catch (error) {
console.error(req.path, error);
res.status(500).json({ error });
}
});
router.get("/all_repositories", async (req, res) => {
try {
const user = await db
.get()
.collection("users")
.findOne(
{ username: req.user.username },
{ projection: { repositories: 1 } }
);
if (!user) {
res.status(401).send({ error: "user_not_found" });
}
if (user.repositories && req.query.force !== "1") {
return res.json(user.repositories);
} else {
const octokit = new Octokit({ auth: req.user.accessToken });
const repositories = await octokit.paginate(
octokit.repos.listForAuthenticatedUser,
{
visibility: "all",
sort: "pushed",
per_page: 100,
}
);
try {
await db
.get()
.collection("users")
.updateOne(
{ username: req.user.profile.username },
{ $set: { repositories } }
);
res.json(repositories);
} catch (error) {
res.status(500).send(error);
}
}
} catch (error) {
console.error(req.path, error);
res.status(500).json({ error });
}
});
module.exports = router;

View File

@@ -1,79 +0,0 @@
const express = require("express");
const path = require("path");
const fileUtils = require("../utils/file");
const repoUtils = require("../utils/repository");
const router = express.Router();
async function webView(req, res) {
try {
const repoId = req.params.repoId;
const repoConfig = await repoUtils.getConfig(repoId);
if (!repoConfig.options.page) {
throw "page_not_activated";
}
if (!repoConfig.options.pageSource) {
throw "page_not_activated";
}
if (repoConfig.status == "expired") {
throw "repository_expired";
}
if (repoConfig.status == "removed") {
throw "repository_expired";
}
if (repoConfig.options.pageSource.branch != repoConfig.branch) {
throw "page_not_supported_on_different_branch";
}
let requestPath = req.path.substring(
req.path.indexOf(repoId) + repoId.length
);
if (requestPath[requestPath.length - 1] == "/") {
requestPath = path.join(requestPath, "index.html");
}
// TODO: handle website that are not in the docs folder (master, docs, gh-pages)
requestPath = path.join(repoConfig.options.pageSource.path, requestPath);
if (await fileUtils.isFilePathValid({ repoConfig, path: requestPath })) {
const ppath = path.join(
repoUtils.getAnonymizedPath(repoConfig.repoId),
requestPath
);
return res.sendFile(ppath, { dotfiles: "allow" }, (err) => {
if (err) {
if (err.path) {
const newPath = path.join(
req.path,
err.path.replace(
path.join(
repoUtils.getAnonymizedPath(repoConfig.repoId),
"docs"
),
""
)
);
if (newPath != req.path) {
return res.redirect(newPath);
}
}
}
console.log(err);
});
}
} catch (error) {
console.error(req.path, error);
return res.status(500).send({ error });
}
return res.status(404).send("file_not_found");
}
router.get("/:repoId/*", webView);
router.get("/:repoId", (req, res) => {
res.redirect("/w" + req.url + "/")
});
module.exports = router;

168
src/AnonymizedFile.ts Normal file
View File

@@ -0,0 +1,168 @@
import * as path from "path";
import * as express from "express";
import * as stream from "stream";
import Repository from "./Repository";
import { Tree, TreeFile } from "./types";
import storage from "./storage";
import config from "../config";
import { anonymizeStream } from "./anonymize-utils";
/**
* Represent a file in a anonymized repository
*/
export default class AnonymizedFile {
repository: Repository;
sha?: string;
size?: number;
path?: string;
anonymizedPath: string;
constructor(
repository: Repository,
data: {
path?: string;
anonymizedPath: string;
sha?: string;
size?: number;
}
) {
this.repository = repository;
if (!this.repository.options.terms) throw new Error("terms_not_specified");
this.anonymizedPath = data.anonymizedPath;
if (data.path) {
this.path = data.path;
}
if (!data.anonymizedPath && this.path) {
// anonymize the path
this.anonymizedPath = this.path;
for (let term of this.repository.options.terms) {
if (term.trim() == "") {
continue;
}
this.anonymizedPath = this.anonymizedPath.replace(
new RegExp(term, "gi"),
config.ANONYMIZATION_MASK
);
}
}
if (!this.sha) this.sha = data.sha;
if (!this.size) this.size = data.size;
}
async send(res: express.Response): Promise<void> {
try {
const s = await this.anonymizedContent();
s.on("error", (err) => {
console.log(err);
res.status(500).send({ error: err.message });
});
s.pipe(res);
} catch (error) {
console.log("Error during anonymization", error);
res.status(500).send({ error: error.message });
}
}
async isFileSupported() {
this.path = await this.getOriginalPath();
const filename = path.basename(this.path);
const extensions = filename.split(".").reverse();
const extension = extensions[0].toLowerCase();
if (!this.repository.options.pdf && extension == "pdf") {
return false;
}
if (
!this.repository.options.image &&
(extension == "png" ||
extension == "ico" ||
extension == "jpg" ||
extension == "jpeg" ||
extension == "gif")
) {
return false;
}
return true;
}
get originalCachePath() {
if (!this.path) throw "path_not_defined";
return path.join(
this.repository.originalCachePath,
this.path
);
}
async content(): Promise<stream.Readable> {
if (this.size && this.size > config.MAX_FILE_SIZE) {
throw new Error("file_too_big");
}
if (await storage.exists(this.originalCachePath)) {
return storage.read(this.originalCachePath);
} else {
return await this.repository.source?.getFileContent(this);
}
}
async anonymizedContent() {
await this.getOriginalPath();
if (!this.path) throw new Error("path_not_specified");
if (!this.repository.options.terms) throw new Error("terms_not_specified");
const rs = await this.content();
const contentStream = rs.pipe(anonymizeStream(this.path, this.repository));
return contentStream;
}
/**
* De-anonymize the path
*
* @returns the origin relative path of the file
*/
async getOriginalPath(): Promise<string> {
if (!this.anonymizedPath) throw new Error("path_not_specified");
const files = await this.repository.files();
const paths = this.anonymizedPath.trim().split("/");
let current: any = await this.repository.anonymizedFiles();
for (let i = 0; i < paths.length; i++) {
const fileName = paths[i];
if (fileName == "") {
continue;
}
if (current[fileName]) {
current = current[fileName];
} else {
throw new Error("file_not_found");
}
}
function tree2sha(
tree: any,
output: { [key: string]: string } = {},
parent: string = ""
): { [key: string]: string } {
for (let i in tree) {
const sha = tree[i].sha as string;
const size = tree[i].size as number;
if (sha != null && size != null) {
output[sha] = path.join(parent, i);
} else if (tree[i].child) {
tree2sha(tree[i].child as Tree, output, path.join(parent, i));
} else {
tree2sha(tree[i] as Tree, output, path.join(parent, i));
}
}
return output;
}
const shaTree = tree2sha(files);
if (!current.sha || !shaTree[current.sha]) {
throw new Error("file_not_found");
}
this.path = shaTree[current.sha];
this.sha = current.sha;
if ((current as TreeFile).size) this.size = (current as TreeFile).size;
return this.path;
}
}

280
src/Repository.ts Normal file
View File

@@ -0,0 +1,280 @@
import * as path from "path";
import storage from "./storage";
import { RepositoryStatus, Source, Tree } from "./types";
import * as stream from "stream";
import User from "./User";
import GitHubStream from "./source/GitHubStream";
import GitHubDownload from "./source/GitHubDownload";
import Zip from "./source/ZIP";
import { anonymizePath } from "./anonymize-utils";
import UserModel from "./database/users/users.model";
import { IAnonymizedRepositoryDocument } from "./database/anonymizedRepositories/anonymizedRepositories.types";
import { anonymizeStream } from "./anonymize-utils";
import GitHubBase from "./source/GitHubBase";
export default class Repository {
private _model: IAnonymizedRepositoryDocument;
source: Source;
owner: User;
constructor(data: IAnonymizedRepositoryDocument) {
this._model = data;
switch (data.source.type) {
case "GitHubDownload":
this.source = new GitHubDownload(data.source, this);
break;
case "GitHubStream":
this.source = new GitHubStream(data.source, this);
break;
case "Zip":
this.source = new Zip(data.source, this);
break;
default:
throw new Error("unsupported_source");
}
this.owner = new User(new UserModel({ username: data.owner }));
}
/**
* Get the anonymized file tree
* @param opt force to get an updated list of files
* @returns The anonymized file tree
*/
async anonymizedFiles(opt?: { force?: boolean }): Promise<Tree> {
const terms = this._model.options.terms || [];
function anonymizeTreeRecursive(tree: Tree): any {
if (Number.isInteger(tree.size)) {
return tree;
}
const output: any = {};
let current: any = tree;
if (current.child) {
current = current.child;
}
for (const file in current) {
const anonymizedPath = anonymizePath(file, terms);
output[anonymizedPath] = anonymizeTreeRecursive(current[file]);
}
return output;
}
return anonymizeTreeRecursive(await this.files(opt));
}
/**
* Get the file tree
*
* @param opt force to get an updated list of files
* @returns The file tree
*/
async files(opt?: { force?: boolean }) {
if (
this._model.originalFiles &&
Object.keys(this._model.originalFiles).length !== 0 &&
!opt?.force
) {
return this._model.originalFiles;
}
const files = await this.source.getFiles();
this._model.originalFiles = files;
this._model.size = 0;
await this._model.save();
this._model.originalFiles = files;
return files;
}
check() {
if (this._model.options.expirationMode != "never") {
if (this._model.options.expirationDate > new Date()) {
this.updateStatus("expired");
}
}
if (this._model.status == "expired") {
throw new Error("repository_expired");
}
if (this._model.status == "removed") {
throw new Error("repository_expired");
}
if (this._model.status != "ready") {
throw new Error("repository_not_ready");
}
}
/**
* Compress and anonymize the repository
*
* @returns A stream of anonymized repository compressed
*/
zip(): stream.Readable {
return storage.archive(this.originalCachePath, {
format: "zip",
fileTransformer: (filename) =>
anonymizeStream(filename, this) as Transformer,
});
}
/**
* Update the repository if a new commit exists
*
* @returns void
*/
async updateIfNeeded(): Promise<void> {
const yesterday = new Date();
yesterday.setDate(yesterday.getDate() - 1);
if (this._model.options.update && this._model.lastView < yesterday) {
if (this._model.status != "ready") {
throw new Error("repo_not_ready");
}
// Only GitHubBase can be update for the moment
if (this.source instanceof GitHubBase) {
const branches = await this.source.githubRepository.branches({
force: true,
accessToken: await this.source.getToken(),
});
const branch = this.source.branch;
if (
branch.commit ==
branches.filter((f) => f.name == branch.name)[0].commit
) {
console.log(`${this._model.repoId} is up to date`);
return;
}
this._model.source.commit = branches.filter(
(f) => f.name == branch.name
)[0].commit;
this._model.anonymizeDate = new Date();
await this.updateStatus("preparing");
console.log(
`${this._model.repoId} will be updated to ${this._model.source.commit}`
);
await this.resetSate();
await this.anonymize();
}
}
}
/**
* Download the require state for the repository to work
*
* @returns void
*/
async anonymize() {
if (this._model.status == "ready") return;
await this.updateStatus("queue");
await this.files();
await this.updateStatus("ready");
}
/**
* Update the last view and view count
*/
async countView() {
this._model.lastView = new Date();
this._model.pageView = (this._model.pageView || 0) + 1;
await this._model.save();
}
/**
* Update the status of the repository
* @param status the new status
* @param errorMessage a potential error message to display
*/
async updateStatus(status: RepositoryStatus, errorMessage?: string) {
this._model.status = status;
this._model.errorMessage = errorMessage;
this._model.status = status;
await this._model.save();
}
/**
* Expire the repository
*/
async expire() {
await this.updateStatus("expired");
await this.resetSate();
}
/**
* Remove the repository
*/
async remove() {
this._model.size = 0;
await this.resetSate();
}
/**
* Reset/delete the state of the repository
*/
private async resetSate() {
this._model.size = 0;
this._model.originalFiles = null;
await this._model.save();
await storage.rm(this._model.repoId + "/");
}
/**
* Compute the size of the repository in bite.
*
* @returns The size of the repository in bite
*/
async computeSize(): Promise<number> {
if (this._model.status != "ready") return 0;
if (this._model.size) return this._model.size;
function recursiveCount(files) {
let total = 0;
for (const name in files) {
const file = files[name];
if (file.size) {
total += file.size as number;
} else if (typeof file == "object") {
total += recursiveCount(file);
}
}
return total;
}
const files = await this.files({ force: false });
this._model.size = recursiveCount(files);
await this._model.save();
return this._model.size;
}
/***** Getters ********/
get repoId() {
return this._model.repoId;
}
get options() {
return this._model.options;
}
get model() {
return this._model;
}
get originalCachePath() {
return path.join(this._model.repoId, "original") + "/";
}
get status() {
return this._model.status;
}
toJSON() {
return {
repoId: this._model.repoId,
options: this._model.options,
anonymizeDate: this._model.anonymizeDate,
status: this._model.status,
source: this.source.toJSON(),
lastView: this._model.lastView,
pageView: this._model.pageView,
size: this._model.size,
};
}
}

110
src/User.ts Normal file
View File

@@ -0,0 +1,110 @@
import { Octokit } from "@octokit/rest";
import AnonymizedRepositoryModel from "./database/anonymizedRepositories/anonymizedRepositories.model";
import RepositoryModel from "./database/repositories/repositories.model";
import { IUserDocument } from "./database/users/users.types";
import Repository from "./Repository";
import { GitHubRepository } from "./source/GitHubRepository";
export default class User {
private _model: IUserDocument;
constructor(model: IUserDocument) {
this._model = model;
}
get username(): string {
return this._model.username;
}
get accessToken(): string {
return this._model.accessToken;
}
get photo(): string {
return this._model.photo;
}
get default() {
return this._model.default;
}
set default(d) {
this._model.default = d;
}
async getGitHubRepositories(opt?: {
force: boolean;
}): Promise<GitHubRepository[]> {
if (!this._model.repositories || opt?.force === true) {
// get the list of repo from github
const octokit = new Octokit({ auth: this.accessToken });
const repositories = (
await octokit.paginate(octokit.repos.listForAuthenticatedUser, {
visibility: "all",
sort: "pushed",
per_page: 100,
})
).map((r) => {
return new RepositoryModel({
externalId: "gh_" + r.id,
name: r.full_name,
url: r.html_url,
size: r.size,
defaultBranch: r.default_branch,
});
});
const finds = (
await RepositoryModel.find({
externalId: {
$in: repositories.map((repo) => repo.externalId),
},
}).select("externalId")
).map((m) => m.externalId);
await Promise.all(
repositories
.filter((r) => finds.indexOf(r.externalId) == -1)
.map((r) => r.save())
);
this._model.repositories = (
await RepositoryModel.find({
externalId: {
$in: repositories.map((repo) => repo.externalId),
},
}).select("id")
).map((m) => m.id);
await this._model.save();
return repositories.map((r) => new GitHubRepository(r));
} else {
return (
await RepositoryModel.find({ _id: { $in: this._model.repositories } })
).map((i) => new GitHubRepository(i));
}
}
async getRepositories() {
const repositories = (
await AnonymizedRepositoryModel.find({
owner: this.username,
}).exec()
).map((d) => new Repository(d));
for (let repo of repositories) {
if (repo.options.expirationDate) {
repo.options.expirationDate = new Date(repo.options.expirationDate);
}
if (
repo.options.expirationMode != "never" &&
repo.options.expirationDate != null &&
repo.options.expirationDate < new Date()
) {
await repo.expire()
}
}
return repositories;
}
toJSON() {
return this._model.toJSON();
}
}

136
src/anonymize-utils.ts Normal file
View File

@@ -0,0 +1,136 @@
import config from "../config";
import Repository from "./Repository";
import GitHubBase from "./source/GitHubBase";
import { isText } from "istextorbinary";
import * as path from "path";
import * as stream from "stream";
const urlRegex =
/<?\b((https?|ftp|file):\/\/)[-A-Za-z0-9+&@#/%?=~_|!:,.;]+[-A-Za-z0-9+&@#/%=~_|]\b\/?>?/g;
export function isTextFile(filePath, content) {
const filename = path.basename(filePath);
const extensions = filename.split(".").reverse();
const extension = extensions[0].toLowerCase();
if (config.additionalExtensions.includes(extension)) {
return true;
}
if (isText(filename)) {
return true;
}
return isText(filename, content);
}
export function anonymizeStream(filename: string, repository: Repository) {
const ts = new stream.Transform();
var chunks = [],
len = 0,
pos = 0;
ts._transform = function _transform(chunk, enc, cb) {
chunks.push(chunk);
len += chunk.length;
if (pos === 1) {
let data: any = Buffer.concat(chunks, len);
if (isTextFile(filename, data)) {
data = anonymizeContent(data.toString(), repository);
}
chunks = [];
len = 0;
this.push(data);
}
pos = 1 ^ pos;
cb(null);
};
ts._flush = function _flush(cb) {
if (chunks.length) {
let data: any = Buffer.concat(chunks, len);
if (isText(filename, data)) {
data = anonymizeContent(data.toString(), repository);
}
this.push(data);
}
cb(null);
};
return ts;
}
export function anonymizeContent(content: string, repository: Repository) {
if (repository.options?.image === false) {
// remove image in markdown
content = content.replace(
/!\[[^\]]*\]\((?<filename>.*?)(?=\"|\))(?<optionalpart>\".*\")?\)/g,
""
);
}
if (!repository.options?.link) {
// remove all links
content = content.replace(urlRegex, config.ANONYMIZATION_MASK);
}
if (repository.source instanceof GitHubBase) {
content = content.replace(
new RegExp(
`https://github.com/${
repository.source.githubRepository.fullName
}/blob/${repository.source.branch?.name || "HEAD"}\\b`,
"gi"
),
`https://${config.HOSTNAME}/r/${repository.repoId}`
);
content = content.replace(
new RegExp(
`https://github.com/${
repository.source.githubRepository.fullName
}/tree/${(repository.source as GitHubBase).branch?.name || "HEAD"}\\b`,
"gi"
),
`https://${config.HOSTNAME}/r/${repository.repoId}`
);
content = content.replace(
new RegExp(
`https://github.com/${repository.source.githubRepository.fullName}`,
"gi"
),
`https://${config.HOSTNAME}/r/${repository.repoId}`
);
}
for (let term of repository.options.terms || []) {
if (term.trim() == "") {
continue;
}
// remove whole url if it contains the term
content = content.replace(urlRegex, (match) => {
if (new RegExp(`\\b${term}\\b`, "gi").test(match))
return config.ANONYMIZATION_MASK;
return match;
});
// remove the term in the text
content = content.replace(
new RegExp(`\\b${term}\\b`, "gi"),
config.ANONYMIZATION_MASK
);
}
return content;
}
export function anonymizePath(path: string, terms: string[]) {
for (let term of terms) {
if (term.trim() == "") {
continue;
}
path = path.replace(new RegExp(term, "gi"), config.ANONYMIZATION_MASK);
}
return path;
}

View File

@@ -0,0 +1,15 @@
import * as mongoose from "mongoose";
const { model } = mongoose;
import {
IAnonymizedRepositoryDocument,
IAnonymizedRepositoryModel,
} from "./anonymizedRepositories.types";
import AnonymizedRepositorySchema from "./anonymizedRepositories.schema";
const AnonymizedRepositoryModel = model<IAnonymizedRepositoryDocument>(
"AnonymizedRepository",
AnonymizedRepositorySchema
) as IAnonymizedRepositoryModel;
export default AnonymizedRepositoryModel;

View File

@@ -0,0 +1,54 @@
import * as mongoose from "mongoose";
const { Schema } = mongoose;
const AnonymizedRepositorySchema = new Schema({
repoId: {
type: String,
index: { unique: true },
},
status: {
type: String,
default: "preparing",
},
errorMessage: String,
anonymizeDate: Date,
lastView: Date,
pageView: Number,
accessToken: String,
owner: String,
conference: String,
source: {
type: { type: String },
branch: String,
commit: String,
repositoryId: String,
repositoryName: String,
accessToken: String,
},
originalFiles: mongoose.Schema.Types.Mixed,
options: {
terms: [String],
expirationMode: { type: String },
expirationDate: Date,
update: Boolean,
image: Boolean,
pdf: Boolean,
notebook: Boolean,
link: Boolean,
page: Boolean,
pageSource: {
branch: String,
path: String,
},
},
dateOfEntry: {
type: Date,
default: new Date(),
},
size: {
type: Number,
default: 0,
},
});
export default AnonymizedRepositorySchema;

View File

@@ -0,0 +1,46 @@
import * as mongoose from "mongoose";
import { RepositoryStatus, Tree } from "../../types";
export interface IAnonymizedRepository {
repoId: string;
status?: RepositoryStatus;
errorMessage?: string;
anonymizeDate: Date;
source: {
type: "GitHubDownload" | "GitHubStream" | "Zip";
branch?: string;
commit?: string;
repositoryId?: string;
repositoryName?: string;
accessToken?: string;
};
owner: string;
originalFiles: Tree;
conference: string;
options: {
terms: string[];
expirationMode: "never" | "redirect" | "remove";
expirationDate?: Date;
update: boolean;
image: boolean;
pdf: boolean;
notebook: boolean;
link: boolean;
page: boolean;
pageSource?: {
branch: string;
path: string;
};
};
pageView: number;
lastView: Date;
size: number;
}
export interface IAnonymizedRepositoryDocument
extends IAnonymizedRepository,
mongoose.Document {
setLastUpdated: (this: IAnonymizedRepositoryDocument) => Promise<void>;
}
export interface IAnonymizedRepositoryModel
extends mongoose.Model<IAnonymizedRepositoryDocument> {}

28
src/database/database.ts Normal file
View File

@@ -0,0 +1,28 @@
import * as mongoose from "mongoose";
import Repository from "../Repository";
import config from "../../config";
import AnonymizedRepositoryModel from "./anonymizedRepositories/anonymizedRepositories.model";
const MONGO_URL = `mongodb://${config.DB_USERNAME}:${config.DB_PASSWORD}@${config.DB_HOSTNAME}:27017/`;
export const database = mongoose.connection;
export async function connect() {
mongoose.set("useNewUrlParser", true);
mongoose.set("useFindAndModify", true);
mongoose.set("useUnifiedTopology", true);
await mongoose.connect(MONGO_URL + "test", {
authSource: "admin",
useCreateIndex: true,
useFindAndModify: true,
});
return database;
}
export async function getRepository(repoId: string) {
const data = await AnonymizedRepositoryModel.findOne({ repoId });
if (!data) throw new Error("repo_not_found");
return new Repository(data);
}

View File

@@ -0,0 +1,12 @@
import * as mongoose from "mongoose";
const { model } = mongoose;
import { IRepositoryDocument, IRepositoryModel } from "./repositories.types";
import RepositorySchema from "./repositories.schema";
const RepositoryModel = model<IRepositoryDocument>(
"Repository",
RepositorySchema
) as IRepositoryModel;
export default RepositoryModel;

View File

@@ -0,0 +1,42 @@
import * as mongoose from "mongoose";
const { Schema } = mongoose;
const RepositorySchema = new Schema({
externalId: {
type: String,
index: { unique: true },
},
name: {
type: String,
index: true,
},
url: String,
source: {
type: String,
default: "github",
},
hasPage: { type: Boolean, default: false },
pageSource: {
branch: { type: String },
path: String,
},
branches: [
{
name: { type: String },
commit: String,
readme: String,
},
],
defaultBranch: String,
size: Number,
status: {
type: String,
default: "ready",
},
dateOfEntry: {
type: Date,
default: new Date(),
},
});
export default RepositorySchema;

View File

@@ -0,0 +1,25 @@
import * as mongoose from "mongoose";
export interface IRepository {
externalId: string;
name: string;
url?: string;
source: "github";
size?: number;
defaultBranch?: string;
hasPage: boolean;
pageSource?: {
branch: string;
path: string;
};
branches?: {
name: string;
commit: string;
readme?: string;
}[];
}
export interface IRepositoryDocument extends IRepository, mongoose.Document {
setLastUpdated: (this: IRepositoryDocument) => Promise<void>;
}
export interface IRepositoryModel extends mongoose.Model<IRepositoryDocument> {}

View File

@@ -0,0 +1,10 @@
import * as mongoose from "mongoose";
const { model } = mongoose;
import { IUserDocument, IUserModel } from "./users.types";
import UserSchema from "./users.schema";
const UserModel = model<IUserDocument>("user", UserSchema) as IUserModel;
export default UserModel

View File

@@ -0,0 +1,36 @@
import * as mongoose from "mongoose";
const { Schema } = mongoose;
const UserSchema = new Schema({
accessToken: String,
username: {
type: String,
index: { unique: true },
},
email: String,
photo: String,
repositories: [String],
default: {
terms: [String],
options: {
expirationMode: { type: String },
update: Boolean,
image: Boolean,
pdf: Boolean,
notebook: Boolean,
loc: Boolean,
link: Boolean,
page: { type: String },
},
},
status: {
type: String,
default: "active",
},
dateOfEntry: {
type: Date,
default: new Date(),
},
});
export default UserSchema;

View File

@@ -0,0 +1,32 @@
import * as mongoose from "mongoose";
export interface IUser {
accessToken: string;
username: string;
email: string;
photo?: string;
repositories?: number[];
default?: {
terms: string[];
options: {
expirationMode: "never" | "redirect" | "";
update: boolean;
image: boolean;
pdf: boolean;
notebook: boolean;
loc: boolean;
link: boolean;
page: string | null;
};
};
status?: "active" | "removed";
dateOfEntry?: Date;
lastUpdated?: Date;
}
export interface IUserDocument extends IUser, mongoose.Document {
setLastUpdated: (this: IUserDocument) => Promise<void>;
}
export interface IUserModel extends mongoose.Model<IUserDocument> {}

107
src/routes/connection.ts Normal file
View File

@@ -0,0 +1,107 @@
import * as redis from "redis";
import * as passport from "passport";
import * as session from "express-session";
import * as connectRedis from "connect-redis";
import * as OAuth2Strategy from "passport-oauth2";
import { Profile, Strategy } from "passport-github2";
import * as express from "express";
import config from "../../config";
import UserModel from "../database/users/users.model";
const RedisStore = connectRedis(session);
export function ensureAuthenticated(
req: express.Request,
res: express.Response,
next: express.NextFunction
) {
if (req.isAuthenticated()) {
return next();
}
res.status(401).json({ error: "not_connected" });
}
const verify = async (
accessToken: string,
refreshToken: string,
profile: Profile,
done: OAuth2Strategy.VerifyCallback
): Promise<void> => {
let user;
try {
user = await UserModel.findOne({ username: profile.username });
if (user) {
user.accessToken = accessToken;
user.email = profile.emails[0]?.value;
user.photo = profile.photos[0]?.value;
await user.save();
} else {
user = await new UserModel({
username: profile.username,
accessToken: accessToken,
email: profile.emails[0]?.value,
photo: profile.photos[0]?.value,
}).save();
}
} catch (error) {
console.error(error);
} finally {
done(null, {
username: profile.username,
accessToken,
refreshToken,
profile,
user,
});
}
};
passport.use(
new Strategy(
{
clientID: config.CLIENT_ID,
clientSecret: config.CLIENT_SECRET,
callbackURL: config.AUTH_CALLBACK,
},
verify
)
);
passport.serializeUser((user: Express.User, done) => {
done(null, user);
});
passport.deserializeUser((user: Express.User, done) => {
done(null, user);
});
export const appSession = session({
secret: "keyboard cat",
store: new RedisStore({
client: redis.createClient({
port: config.REDIS_PORT,
host: config.REDIS_HOSTNAME,
}),
}),
saveUninitialized: false,
resave: false,
});
export const router = express.Router();
router.get(
"/login",
passport.authenticate("github", { scope: ["repo"] }), // Note the scope here
function (req: express.Request, res: express.Response) {
res.redirect("/");
}
);
router.get(
"/auth",
passport.authenticate("github", { failureRedirect: "/" }),
function (req: express.Request, res: express.Response) {
res.redirect("/");
}
);

38
src/routes/file.ts Normal file
View File

@@ -0,0 +1,38 @@
import * as express from "express";
import AnonymizedFile from "../AnonymizedFile";
import { getRepo, handleError } from "./route-utils";
export const router = express.Router();
router.get(
"/:repoId/file/:path*",
async (req: express.Request, res: express.Response) => {
let anonymizedPath = req.params.path;
if (req.params[0]) {
anonymizedPath += req.params[0];
}
anonymizedPath = anonymizedPath;
const repo = await getRepo(req, res);
if (!repo) return;
await repo.countView();
try {
const f = new AnonymizedFile(repo, {
anonymizedPath,
});
if (!(await f.isFileSupported())) {
return res.status(500).send({ error: "file_not_supported" });
}
res.attachment(
anonymizedPath.substring(anonymizedPath.lastIndexOf("/") + 1)
);
await f.send(res);
} catch (error) {
return handleError(error, res);
}
}
);
export default router;

13
src/routes/index.ts Normal file
View File

@@ -0,0 +1,13 @@
import repositoryPrivate from "./repository-private";
import repositoryPublic from "./repository-public";
import file from "./file";
import webview from "./webview";
import user from "./user";
export default {
repositoryPrivate,
repositoryPublic,
file,
webview,
user,
};

View File

@@ -0,0 +1,270 @@
import * as express from "express";
import { ensureAuthenticated } from "./connection";
import * as db from "../database/database";
import { getRepo, getUser, handleError } from "./route-utils";
import RepositoryModel from "../database/repositories/repositories.model";
import {
GitHubRepository,
getRepositoryFromGitHub,
} from "../source/GitHubRepository";
import gh = require("parse-github-url");
import GitHubBase from "../source/GitHubBase";
import AnonymizedRepositoryModel from "../database/anonymizedRepositories/anonymizedRepositories.model";
import config from "../../config";
import { IAnonymizedRepositoryDocument } from "../database/anonymizedRepositories/anonymizedRepositories.types";
import Repository from "../Repository";
const router = express.Router();
// user needs to be connected for all user API
router.use(ensureAuthenticated);
// claim a repository
router.post("/claim", async (req: express.Request, res: express.Response) => {
const user = await getUser(req);
try {
if (!req.body.repoId) {
return res.status(500).json({ error: "repoId_not_defined" });
}
if (!req.body.repoUrl) {
return res.status(500).json({ error: "repoUrl_not_defined" });
}
const repoConfig = await db.getRepository(req.body.repoId);
if (repoConfig == null) {
return res.status(500).json({ error: "repo_not_found" });
}
const r = gh(req.body.repoUrl);
const repo = await getRepositoryFromGitHub({
owner: r.owner,
repo: r.name,
accessToken: user.accessToken,
});
if ((repoConfig.source as GitHubBase).githubRepository.id != repo.id) {
return res.status(500).json({ error: "repo_not_found" });
}
console.log(`${user.username} claims ${r.repository}.`);
repoConfig.owner = user;
await AnonymizedRepositoryModel.updateOne(
{ repoId: repoConfig.repoId },
{ $set: { owner: user.username } }
);
return res.send("Ok");
} catch (error) {
console.error(req.path, error);
return res.status(500).json({ error });
}
});
// refresh a repository
router.post(
"/:repoId/refresh",
async (req: express.Request, res: express.Response) => {
const repo = await getRepo(req, res);
if (!repo) return;
const user = await getUser(req);
if (repo.owner.username != user.username) {
return res.status(401).json({ error: "not_authorized" });
}
await repo.anonymize();
res.end("ok");
}
);
// delete a repository
router.delete(
"/:repoId/",
async (req: express.Request, res: express.Response) => {
const repo = await getRepo(req, res, { nocheck: false });
if (!repo) return;
const user = await getUser(req);
if (repo.owner.username != user.username) {
return res.status(401).json({ error: "not_authorized" });
}
await repo.remove();
console.log(`${req.params.repoId} is removed`);
return res.json("ok");
}
);
router.get(
"/:owner/:repo/",
async (req: express.Request, res: express.Response) => {
const user = await getUser(req);
try {
const repo = await getRepositoryFromGitHub({
owner: req.params.owner,
repo: req.params.repo,
accessToken: user.accessToken,
});
res.json(repo.toJSON());
} catch (error) {
handleError(error, res);
}
}
);
router.get(
"/:owner/:repo/branches",
async (req: express.Request, res: express.Response) => {
const user = await getUser(req);
try {
const repository = await getRepositoryFromGitHub({
accessToken: user.accessToken,
owner: req.params.owner,
repo: req.params.repo,
});
return res.json(
await repository.branches({
accessToken: user.accessToken,
force: req.query.force == "1",
})
);
} catch (error) {
handleError(error, res);
}
}
);
router.get(
"/:owner/:repo/readme",
async (req: express.Request, res: express.Response) => {
const user = await getUser(req);
const repo = await RepositoryModel.findOne({
name: `${req.params.owner}/${req.params.repo}`,
});
if (!repo) return res.status(404).send({ error: "repo_not_found" });
const repository = new GitHubRepository(repo);
return res.send(
await repository.readme({
accessToken: user.accessToken,
force: req.query.force == "1",
branch: req.query.branch as string,
})
);
}
);
function validateNewRepo(repoUpdate) {
const validCharacters = /^[0-9a-zA-Z\-\_]+$/;
if (
!repoUpdate.repoId.match(validCharacters) ||
repoUpdate.repoId.length < 3
) {
throw new Error("invalid_repoId");
}
if (!repoUpdate.branch) {
throw new Error("branch_not_specified");
}
if (!repoUpdate.options) {
throw new Error("options_not_provided");
}
if (!Array.isArray(repoUpdate.terms)) {
throw new Error("invalid_terms_format");
}
if (!/^[a-f0-9]+$/.test(repoUpdate.commit)) {
throw new Error("invalid_commit_format");
}
}
function updateRepoModel(model: IAnonymizedRepositoryDocument, repoUpdate) {
model.source.commit = repoUpdate.commit;
model.source.branch = repoUpdate.branch;
model.conference = repoUpdate.conference;
model.options = {
terms: repoUpdate.terms,
expirationMode: repoUpdate.options.expirationMode,
expirationDate: repoUpdate.options.expirationDate
? new Date(repoUpdate.options.expirationDate)
: null,
update: repoUpdate.options.update,
image: repoUpdate.options.image,
pdf: repoUpdate.options.pdf,
notebook: repoUpdate.options.notebook,
link: repoUpdate.options.link,
page: repoUpdate.options.page,
pageSource: repoUpdate.options.pageSource,
};
}
// update a repository
router.post(
"/:repoId/",
async (req: express.Request, res: express.Response) => {
const repo = await getRepo(req, res, { nocheck: true });
if (!repo) return;
const user = await getUser(req);
if (repo.owner.username != user.username) {
return res.status(401).json({ error: "not_authorized" });
}
const repoUpdate = req.body;
try {
validateNewRepo(repoUpdate);
} catch (error) {
return handleError(error, res);
}
if (repoUpdate.commit != repo.model.source.commit) {
repo.model.anonymizeDate = new Date();
repo.model.source.commit = repoUpdate.commit;
}
updateRepoModel(repo.model, repoUpdate);
await repo.updateStatus("preparing");
await repo.model.save();
res.send("ok");
repo.anonymize();
}
);
// add repository
router.post("/", async (req: express.Request, res: express.Response) => {
const user = await getUser(req);
const repoUpdate = req.body;
try {
validateNewRepo(repoUpdate);
} catch (error) {
return handleError(error, res);
}
const r = gh(repoUpdate.fullName);
const repository = await getRepositoryFromGitHub({
accessToken: user.accessToken,
owner: r.owner,
repo: r.name,
});
const repo = new AnonymizedRepositoryModel();
repo.repoId = repoUpdate.repoId;
repo.anonymizeDate = new Date();
repo.owner = user.username;
repo.source = {
type:
repoUpdate.options.mode == "download" ? "GitHubDownload" : "GitHubStream",
accessToken: user.accessToken,
repositoryId: repository.model.id,
repositoryName: repoUpdate.fullName,
};
if (repo.source.type == "GitHubDownload") {
// details.size is in kilobytes
if (repository.size > config.MAX_REPO_SIZE) {
return res.status(500).send({ error: "invalid_mode" });
}
}
updateRepoModel(repo, repoUpdate);
await repo.save();
res.send("ok");
new Repository(repo).anonymize();
});
export default router;

View File

@@ -0,0 +1,43 @@
import * as express from "express";
import * as db from "../database/database";
import { getRepo, getUser, handleError } from "./route-utils";
const router = express.Router();
router.get("/:repoId/", async (req: express.Request, res: express.Response) => {
const repo = await getRepo(req, res, { nocheck: true });
if (!repo) return;
res.json((await db.getRepository(req.params.repoId)).toJSON());
});
router.get(
"/:repoId/zip",
async (req: express.Request, res: express.Response) => {
const repo = await getRepo(req, res);
if (!repo) return;
res.attachment(`${repo.repoId}.zip`);
repo.zip().pipe(res);
}
);
router.get(
"/:repoId/files",
async (req: express.Request, res: express.Response) => {
const repo = await getRepo(req, res);
if (!repo) return;
res.json(await repo.anonymizedFiles({ force: true }));
}
);
router.get(
"/:repoId/options",
async (req: express.Request, res: express.Response) => {
const repo = await getRepo(req, res);
if (!repo) return;
await repo.updateIfNeeded();
res.json(repo.options);
}
);
export default router;

63
src/routes/route-utils.ts Normal file
View File

@@ -0,0 +1,63 @@
import * as express from "express";
import * as db from "../database/database";
import UserModel from "../database/users/users.model";
import User from "../User";
export async function getRepo(
req: express.Request,
res: express.Response,
opt?: { nocheck?: boolean }
) {
try {
const repo = await db.getRepository(req.params.repoId);
if (opt?.nocheck == true) {
} else {
// redirect if the repository is expired
if (
repo.status == "expired" &&
repo.options.expirationMode == "redirect" &&
repo.source.url
) {
res.redirect(repo.source.url);
return null;
}
repo.check();
}
return repo;
} catch (error) {
handleError(error, res);
return null;
}
}
export function handleError(error: any, res: express.Response) {
console.log(error);
let message = error;
if (error instanceof Error) {
message = error.message;
}
let status = 500;
if (message && message.indexOf("not_found") > -1) {
status = 400;
} else if (message && message.indexOf("not_connected") > -1) {
status = 401;
}
res.status(status).send({ error: message });
return;
}
export async function getUser(req: express.Request) {
const user = (req.user as any).user;
if (!user) {
req.logout();
throw new Error("not_connected");
}
const model = await UserModel.findById(user._id);
if (!model) {
req.logout();
throw new Error("not_connected");
}
return new User(model);
}

96
src/routes/user.ts Normal file
View File

@@ -0,0 +1,96 @@
import * as express from "express";
import config from "../../config";
import { ensureAuthenticated } from "./connection";
import { handleError, getUser } from "./route-utils";
const router = express.Router();
// user needs to be connected for all user API
router.use(ensureAuthenticated);
router.get("/logout", async (req: express.Request, res: express.Response) => {
try {
req.logout();
res.redirect("/");
} catch (error) {
handleError(error, res);
}
});
router.get("/", async (req: express.Request, res: express.Response) => {
try {
const user = await getUser(req);
res.json({ username: user.username, photo: user.photo });
} catch (error) {
handleError(error, res);
}
});
router.get("/quota", async (req: express.Request, res: express.Response) => {
try {
const user = await getUser(req);
const sizes = await Promise.all(
(await user.getRepositories())
.filter((r) => r.status == "ready")
.map((r) => r.computeSize())
);
res.json({
used: sizes.reduce((sum, i) => sum + i, 0),
total: config.DEFAULT_QUOTA,
});
} catch (error) {
handleError(error, res);
}
});
router.get("/default", async (req: express.Request, res: express.Response) => {
const user = await getUser(req);
try {
res.json(user.default);
} catch (error) {
handleError(error, res);
}
});
router.post("/default", async (req: express.Request, res: express.Response) => {
const user = await getUser(req);
try {
const d = req.body;
user.default = d;
res.send("ok");
} catch (error) {
handleError(error, res);
}
});
router.get(
"/anonymized_repositories",
async (req: express.Request, res: express.Response) => {
const user = await getUser(req);
res.json(
(await user.getRepositories()).map((x) => {
return x.toJSON();
})
);
}
);
router.get(
"/all_repositories",
async (req: express.Request, res: express.Response) => {
const user = await getUser(req);
const repos = await user.getGitHubRepositories({
force: req.query.force == "1",
});
res.json(
repos.map((x) => {
return {
fullName: x.fullName,
id: x.id,
};
})
);
}
);
export default router;

54
src/routes/webview.ts Normal file
View File

@@ -0,0 +1,54 @@
import * as express from "express";
import { getRepo, handleError } from "./route-utils";
import * as path from "path";
import AnonymizedFile from "../AnonymizedFile";
import GitHubDownload from "../source/GitHubDownload";
const router = express.Router();
async function webView(req: express.Request, res: express.Response) {
const repo = await getRepo(req, res);
if (!repo) return;
try {
if (!repo.options.page) {
throw "page_not_activated";
}
if (!repo.options.pageSource) {
throw "page_not_activated";
}
if (
repo.options.pageSource?.branch !=
(repo.source as GitHubDownload).branch.name
) {
throw "page_not_supported_on_different_branch";
}
let requestPath = path.join(
repo.options.pageSource?.path,
req.path.substring(
req.path.indexOf(req.params.repoId) + req.params.repoId.length
)
);
if (requestPath[requestPath.length - 1] == "/") {
requestPath = path.join(requestPath, "index.html");
}
requestPath = requestPath;
const f = new AnonymizedFile(repo, {
anonymizedPath: requestPath,
});
if (!(await f.isFileSupported())) {
return res.status(500).send({ error: "file_not_supported" });
}
f.send(res);
} catch (error) {
handleError(error, res);
}
}
router.get("/:repoId/*", webView);
router.get("/:repoId", (req: express.Request, res: express.Response) => {
res.redirect("/w" + req.url + "/");
});
export default router;

95
src/server.ts Normal file
View File

@@ -0,0 +1,95 @@
import * as path from "path";
import * as ofs from "fs";
import * as redis from "redis";
import * as rateLimit from "express-rate-limit";
import * as RedisStore from "rate-limit-redis";
import * as express from "express";
import * as compression from "compression";
import * as db from "./database/database";
import config from "../config";
import * as passport from "passport";
import * as connection from "./routes/connection";
import router from "./routes";
import AnonymizedRepositoryModel from "./database/anonymizedRepositories/anonymizedRepositories.model";
function indexResponse(req: express.Request, res: express.Response) {
if (
req.params.repoId &&
req.headers["accept"] &&
req.headers["accept"].indexOf("text/html") == -1
) {
const repoId = req.path.split("/")[2];
// if it is not an html request, it assumes that the browser try to load a different type of resource
return res.redirect(
`/api/repo/${repoId}/file/${req.path.substring(
req.path.indexOf(repoId) + repoId.length + 1
)}`
);
}
res.sendFile(path.resolve(__dirname, "..", "public", "index.html"));
}
export default async function start() {
const app = express();
app.use(express.json());
app.use(compression());
app.set("trust proxy", 1);
// handle session and connection
app.use(connection.appSession);
app.use(passport.initialize());
app.use(passport.session());
const rate = rateLimit({
store: new RedisStore({
client: redis.createClient({
host: config.REDIS_HOSTNAME,
port: config.REDIS_PORT,
}),
}),
windowMs: 15 * 60 * 1000, // 15 minutes
max: 200, // limit each IP
// delayMs: 0, // disable delaying - full speed until the max limit is reached
});
app.use("/github", rate, connection.router);
// app routes
app.use("/api/user", rate, router.user);
app.use("/api/repo", rate, router.repositoryPublic);
app.use("/api/repo", rate, router.file);
app.use("/api/repo", rate, router.repositoryPrivate);
app.use("/w/", rate, router.webview);
app.get("/api/message", async (_, res) => {
if (ofs.existsSync("./message.txt")) {
return res.sendFile(path.resolve(__dirname, "..", "message.txt"));
}
res.sendStatus(404);
});
app.get("/api/stat", async (_, res) => {
const nbRepositories =
await AnonymizedRepositoryModel.estimatedDocumentCount();
const nbUsers = (await AnonymizedRepositoryModel.distinct("owner")).length;
res.json({ nbRepositories, nbUsers });
});
app
.get("/", indexResponse)
.get("/404", indexResponse)
.get("/anonymize", indexResponse)
.get("/r/:repoId/?*", indexResponse)
.get("/repository/:repoId/?*", indexResponse);
app.use(express.static(path.join(__dirname, "..", "public")));
app.get("*", indexResponse);
await db.connect();
app.listen(config.PORT);
console.log("Database connected and Server started on port: " + config.PORT);
}

83
src/source/GitHubBase.ts Normal file
View File

@@ -0,0 +1,83 @@
import AnonymizedFile from "../AnonymizedFile";
import { Branch, Tree } from "../types";
import { GitHubRepository } from "./GitHubRepository";
import config from "../../config";
import { OAuthApp } from "@octokit/oauth-app";
import Repository from "../Repository";
import * as stream from "stream";
import UserModel from "../database/users/users.model";
export default abstract class GitHubBase {
type: "GitHubDownload" | "GitHubStream" | "Zip";
githubRepository: GitHubRepository;
branch: Branch;
accessToken: string;
repository: Repository;
constructor(
data: {
type: "GitHubDownload" | "GitHubStream" | "Zip";
branch?: string;
commit?: string;
repositoryId?: string;
repositoryName?: string;
accessToken?: string;
},
repository: Repository
) {
this.type = data.type;
this.accessToken = data.accessToken;
this.githubRepository = new GitHubRepository({
name: data.repositoryName,
externalId: data.repositoryId,
branches: [{ commit: data.commit, name: data.branch }],
});
this.repository = repository;
this.branch = { commit: data.commit, name: data.branch };
}
async getFileContent(file: AnonymizedFile): Promise<stream.Readable> {
throw new Error("Method not implemented.");
}
getFiles(): Promise<Tree> {
throw new Error("Method not implemented.");
}
async getToken(owner?: string) {
if (owner) {
const user = await UserModel.findOne({ username: owner });
if (user && user.accessToken) {
return user.accessToken as string;
}
}
if (this.accessToken) {
try {
const app = new OAuthApp({
clientType: "github-app",
clientId: config.CLIENT_ID,
clientSecret: config.CLIENT_SECRET,
});
await app.checkToken({
token: this.accessToken,
});
return this.accessToken;
} catch (error) {
// console.debug("Token is invalid.", error);
this.accessToken = config.GITHUB_TOKEN;
}
}
return config.GITHUB_TOKEN;
}
get url() {
return "https://github.com/" + this.githubRepository.fullName;
}
toJSON(): any {
return {
type: this.type,
fullName: this.githubRepository.fullName?.toString(),
branch: this.branch,
};
}
}

View File

@@ -0,0 +1,75 @@
import { Octokit } from "@octokit/rest";
import * as path from "path";
import config from "../../config";
import storage from "../storage";
import Repository from "../Repository";
import GitHubBase from "./GitHubBase";
import AnonymizedFile from "../AnonymizedFile";
import { SourceBase } from "../types";
import * as got from "got";
import * as stream from "stream";
import { OctokitResponse } from "@octokit/types";
export default class GitHubDownload extends GitHubBase implements SourceBase {
constructor(
data: {
type: "GitHubDownload" | "GitHubStream" | "Zip";
branch?: string;
commit?: string;
repositoryId?: string;
repositoryName?: string;
accessToken?: string;
},
repository: Repository
) {
super(data, repository);
}
private async _getZipUrl(
auth?: string
): Promise<OctokitResponse<unknown, 302>> {
const octokit = new Octokit({ auth });
return octokit.rest.repos.downloadTarballArchive({
owner: this.githubRepository.owner,
repo: this.githubRepository.repo,
ref: this.branch?.commit || "HEAD",
method: "HEAD",
});
}
async download() {
let response: OctokitResponse<unknown, number>;
try {
response = await this._getZipUrl(await this.getToken());
} catch (error) {
if (error.status == 401 && config.GITHUB_TOKEN) {
try {
response = await this._getZipUrl(config.GITHUB_TOKEN);
} catch (error) {
throw new Error("repo_not_accessible");
}
} else {
throw new Error("repo_not_accessible");
}
}
const originalPath = this.repository.originalCachePath;
await storage.mk(originalPath);
await storage.extractTar(originalPath, got.stream(response.url));
}
async getFileContent(file: AnonymizedFile): Promise<stream.Readable> {
await this.download();
// update the file list
await this.repository.files({ force: true });
return storage.read(file.originalCachePath);
}
async getFiles() {
const folder = this.repository.originalCachePath;
if (!(await storage.exists(folder))) {
await this.download();
}
return storage.listFiles(folder);
}
}

View File

@@ -0,0 +1,171 @@
import { Branch } from "../types";
import * as gh from "parse-github-url";
import { IRepositoryDocument } from "../database/repositories/repositories.types";
import { Octokit } from "@octokit/rest";
import RepositoryModel from "../database/repositories/repositories.model";
export class GitHubRepository {
private _data: Partial<
{ [P in keyof IRepositoryDocument]: IRepositoryDocument[P] }
>;
constructor(
data: Partial<{ [P in keyof IRepositoryDocument]: IRepositoryDocument[P] }>
) {
this._data = data;
}
toJSON() {
return {
repo: this.repo,
owner: this.owner,
hasPage: this._data.hasPage,
pageSource: this._data.pageSource,
fullName: this.fullName,
defaultBranch: this._data.defaultBranch,
size: this.size,
};
}
get model() {
return this._data;
}
public get fullName(): string {
return this._data.name;
}
public get id(): string {
return this._data.externalId;
}
public get size(): number {
return this._data.size;
}
async branches(opt: {
accessToken?: string;
force?: boolean;
}): Promise<Branch[]> {
if (
!this._data.branches ||
this._data.branches.length == 0 ||
opt?.force === true
) {
// get the list of repo from github
const octokit = new Octokit({ auth: opt.accessToken });
const branches = (
await octokit.paginate(octokit.repos.listBranches, {
owner: this.owner,
repo: this.repo,
per_page: 100,
})
).map((b) => {
return {
name: b.name,
commit: b.commit.sha,
readme: this._data.branches?.filter(
(f: Branch) => f.name == b.name
)[0]?.readme,
} as Branch;
});
this._data.branches = branches;
await RepositoryModel.updateOne(
{ externalId: this.id },
{ $set: { branches } }
);
} else {
this._data.branches = (
await RepositoryModel.findOne({ externalId: this.id }).select(
"branches"
)
).branches;
}
return this._data.branches;
}
async readme(opt: {
branch?: string;
force?: boolean;
accessToken?: string;
}): Promise<string> {
if (!opt.branch) opt.branch = this._data.defaultBranch || "master";
const model = await RepositoryModel.findOne({
externalId: this.id,
}).select("branches");
this._data.branches = await this.branches(opt);
model.branches = this._data.branches;
const selected = model.branches.filter((f) => f.name == opt.branch)[0];
if (!selected?.readme || opt?.force === true) {
// get the list of repo from github
const octokit = new Octokit({ auth: opt.accessToken });
const ghRes = await octokit.repos.getReadme({
owner: this.owner,
repo: this.repo,
ref: selected?.commit,
});
const readme = Buffer.from(
ghRes.data.content,
ghRes.data.encoding as BufferEncoding
).toString("utf-8");
selected.readme = readme;
await model.save();
}
return selected.readme;
}
public get owner(): string {
const repo = gh(this.fullName);
if (!repo) {
throw "invalid_repo";
}
return repo.owner || this.fullName;
}
public get repo(): string {
const repo = gh(this.fullName);
if (!repo) {
throw "invalid_repo";
}
return repo.name || this.fullName;
}
}
export async function getRepositoryFromGitHub(opt: {
owner: string;
repo: string;
accessToken: string;
}) {
const octokit = new Octokit({ auth: opt.accessToken });
const r = (
await octokit.repos.get({
owner: opt.owner,
repo: opt.repo,
})
).data;
if (!r) throw new Error("repo_not_found");
let model = await RepositoryModel.findOne({ externalId: "gh_" + r.id });
if (!model) {
model = new RepositoryModel({ externalId: "gh_" + r.id });
}
model.name = r.full_name;
model.url = r.html_url;
model.size = r.size;
model.defaultBranch = r.default_branch;
model.hasPage = r.has_pages;
if (model.hasPage) {
const ghPageRes = await octokit.repos.getPages({
owner: opt.owner,
repo: opt.repo,
});
model.pageSource = ghPageRes.data.source;
}
await model.save();
return new GitHubRepository(model);
}

171
src/source/GitHubStream.ts Normal file
View File

@@ -0,0 +1,171 @@
import { Octokit } from "@octokit/rest";
import AnonymizedFile from "../AnonymizedFile";
import Repository from "../Repository";
import GitHubBase from "./GitHubBase";
import storage from "../storage";
import { SourceBase, Tree } from "../types";
import * as path from "path";
import * as stream from "stream";
export default class GitHubStream extends GitHubBase implements SourceBase {
constructor(
data: {
type: "GitHubDownload" | "GitHubStream" | "Zip";
branch?: string;
commit?: string;
repositoryId?: string;
repositoryName?: string;
accessToken?: string;
},
repository: Repository
) {
super(data, repository);
}
async getFileContent(file: AnonymizedFile): Promise<stream.Readable> {
if (!file.sha) throw new Error("file_sha_not_provided");
const octokit = new Octokit({
auth: await this.getToken(),
});
try {
const ghRes = await octokit.rest.git.getBlob({
owner: this.githubRepository.owner,
repo: this.githubRepository.repo,
file_sha: file.sha,
});
if (!ghRes.data.content && ghRes.data.size != 0) {
throw new Error("file_not_accessible");
}
// empty file
let content: Buffer;
if (ghRes.data.content) {
content = Buffer.from(
ghRes.data.content,
ghRes.data.encoding as BufferEncoding
);
} else {
content = Buffer.from("");
}
await storage.write(file.originalCachePath, content);
return stream.Readable.from(content.toString());
} catch (error) {
if (error.status == 403) {
throw new Error("file_too_big");
}
console.error(error);
}
throw new Error("file_not_accessible");
}
async getFiles() {
return this.getTree(this.branch.commit);
}
private async getTree(
sha: string,
truncatedTree: Tree = {},
parentPath: string = ""
) {
const octokit = new Octokit({
auth: await this.getToken(),
});
const ghRes = await octokit.git.getTree({
owner: this.githubRepository.owner,
repo: this.githubRepository.repo,
tree_sha: sha,
recursive: "1",
});
const tree = this.tree2Tree(ghRes.data.tree, truncatedTree, parentPath);
if (ghRes.data.truncated) {
await this.getTruncatedTree(sha, tree, parentPath);
}
return tree;
}
private async getTruncatedTree(
sha: string,
truncatedTree: Tree = {},
parentPath: string = ""
) {
const octokit = new Octokit({
auth: await this.getToken(),
});
const ghRes = await octokit.git.getTree({
owner: this.githubRepository.owner,
repo: this.githubRepository.repo,
tree_sha: sha,
});
const tree = ghRes.data.tree;
for (let elem of tree) {
if (!elem.path) continue;
if (elem.type == "tree") {
const elementPath = path.join(parentPath, elem.path);
const paths = elementPath.split("/");
let current = truncatedTree;
for (let i = 0; i < paths.length; i++) {
let p = paths[i];
if (!current[p]) {
if (elem.sha)
await this.getTree(elem.sha, truncatedTree, elementPath);
break;
}
current = current[p] as Tree;
}
}
}
this.tree2Tree(ghRes.data.tree, truncatedTree, parentPath);
return truncatedTree;
}
private tree2Tree(
tree: {
path?: string;
mode?: string;
type?: string;
sha?: string;
size?: number;
url?: string;
}[],
partialTree: Tree = {},
parentPath: string = ""
) {
for (let elem of tree) {
let current = partialTree;
if (!elem.path) continue;
const paths = path.join(parentPath, elem.path).split("/");
// if elem is a folder iterate on all folders if it is a file stop before the filename
const end = elem.type == "tree" ? paths.length : paths.length - 1;
for (let i = 0; i < end; i++) {
let p = paths[i];
if (p[0] == "$") {
p = "\\" + p;
}
if (!current[p]) {
current[p] = {};
}
current = current[p] as Tree;
}
// if elem is a file add the file size in the file list
if (elem.type == "blob") {
let p = paths[end];
if (p[0] == "$") {
p = "\\" + p;
}
current[p] = {
size: elem.size || 0, // size in bit
sha: elem.sha || "",
};
}
}
return partialTree;
}
}

31
src/source/Zip.ts Normal file
View File

@@ -0,0 +1,31 @@
import * as path from "path";
import AnonymizedFile from "../AnonymizedFile";
import Repository from "../Repository";
import storage from "../storage";
import { SourceBase } from "../types";
import * as stream from "stream";
export default class Zip implements SourceBase {
type = "Zip";
repository: Repository;
url?: string;
constructor(data: any, repository: Repository) {
this.repository = repository;
this.url = data.url;
}
async getFiles() {
return storage.listFiles(this.repository.originalCachePath);
}
async getFileContent(file: AnonymizedFile): Promise<stream.Readable> {
return storage.read(file.originalCachePath);
}
toJSON(): any {
return {
type: this.type,
};
}
}

7
src/storage.ts Normal file
View File

@@ -0,0 +1,7 @@
import FileSystem from "./storage/FileSystem";
import S3Storage from "./storage/S3";
import { StorageBase } from "./types";
const storage = new FileSystem();
export default storage as StorageBase;

136
src/storage/FileSystem.ts Normal file
View File

@@ -0,0 +1,136 @@
import { StorageBase, Tree } from "../types";
import * as fs from "fs";
import * as tar from "tar-fs";
import * as path from "path";
import * as express from "express";
import config from "../../config";
import * as stream from "stream";
import * as gunzip from "gunzip-maybe";
import * as archiver from "archiver";
export default class FileSystem implements StorageBase {
type = "FileSystem";
constructor() {}
/** @override */
async exists(p: string): Promise<boolean> {
return fs.existsSync(path.join(config.FOLDER, p));
}
/** @override */
send(p: string, res: express.Response) {
res.sendFile(path.join(config.FOLDER, p), { dotfiles: "allow" });
}
/** @override */
read(p: string): stream.Readable {
return fs.createReadStream(path.join(config.FOLDER, p));
}
/** @override */
async write(p: string, data: Buffer): Promise<void> {
if (!(await this.exists(path.dirname(p)))) {
await fs.promises.mkdir(path.dirname(path.join(config.FOLDER, p)), {
recursive: true,
});
}
return fs.promises.writeFile(path.join(config.FOLDER, p), data);
}
/** @override */
async rm(path: string): Promise<void> {
await fs.promises.rm(path, { force: true, recursive: true });
}
/** @override */
async mk(dir: string): Promise<void> {
if (!(await this.exists(dir)))
fs.promises.mkdir(path.join(config.FOLDER, dir), { recursive: true });
}
/** @override */
async listFiles(
dir: string,
opt: {
root?: string;
onEntry?: (file: { path: string; size: number }) => void;
} = {}
): Promise<Tree> {
if (opt.root == null) {
opt.root = config.FOLDER;
}
let files = await fs.promises.readdir(path.join(opt.root, dir));
const output: Tree = {};
for (let file of files) {
let filePath = path.join(dir, file);
try {
const stats = await fs.promises.stat(path.join(opt.root, filePath));
if (file[0] == "$") {
file = "\\" + file;
}
if (stats.isDirectory()) {
output[file] = await this.listFiles(filePath, opt);
} else if (stats.isFile()) {
if (opt.onEntry) {
opt.onEntry({
path: filePath,
size: stats.size,
});
}
output[file] = { size: stats.size, sha: stats.ino.toString() };
}
} catch (error) {
console.error(error);
}
}
return output;
}
/** @override */
async extractTar(p: string, data: stream.Readable): Promise<void> {
return new Promise((resolve, reject) => {
data
.pipe(gunzip())
.pipe(
tar.extract(path.join(config.FOLDER, p), {
map: (header) => {
header.name = header.name.substr(header.name.indexOf("/") + 1);
return header;
},
})
)
.on("finish", resolve)
.on("error", reject);
});
}
/** @override */
archive(
dir: string,
opt?: {
format?: "zip" | "tar";
fileTransformer?;
}
) {
const archive = archiver(opt?.format, {});
this.listFiles(dir, {
onEntry: (file) => {
let rs = this.read(file.path);
if (opt?.fileTransformer) {
// apply transformation on the stream
rs = rs.pipe(opt.fileTransformer(file.path));
}
const f = file.path.replace(dir, "");
archive.append(rs, {
name: path.basename(f),
prefix: path.dirname(f),
});
},
}).then(() => {
archive.finalize();
});
return archive;
}
}

225
src/storage/S3.ts Normal file
View File

@@ -0,0 +1,225 @@
import { StorageBase, Tree, TreeFile } from "../types";
import { S3 } from "aws-sdk";
import config from "../../config";
import * as stream from "stream";
import { ArchiveStreamToS3 } from "archive-stream-to-s3";
import * as express from "express";
import * as mime from "mime-types";
import * as flow from "xml-flow";
import * as archiver from "archiver";
import * as path from "path";
import * as gunzip from "gunzip-maybe";
const originalArchiveStreamToS3Entry: Function = (ArchiveStreamToS3 as any)
.prototype.onEntry;
export default class S3Storage implements StorageBase {
type = "AWS";
client: S3;
constructor() {
if (!config.S3_BUCKET) throw new Error("s3_config_not_provided");
this.client = new S3({
region: config.S3_REGION,
endpoint: config.S3_ENDPOINT,
accessKeyId: config.S3_CLIENT_ID,
secretAccessKey: config.S3_CLIENT_SECRET,
});
}
/** @override */
async exists(path: string): Promise<boolean> {
try {
await this.client
.headObject({
Bucket: config.S3_BUCKET,
Key: path,
})
.promise();
return true;
} catch (err) {
return false;
}
}
/** @override */
async mk(dir: string): Promise<void> {
if (dir && dir[dir.length - 1] != "/") dir = dir + "/";
await this.client
.putObject({
Bucket: config.S3_BUCKET,
Key: dir,
})
.promise();
}
/** @override */
async rm(dir: string): Promise<void> {
const data = await this.client
.listObjectsV2({
Bucket: config.S3_BUCKET,
Prefix: dir,
})
.promise();
const params = { Bucket: config.S3_BUCKET, Delete: { Objects: [] } };
data.Contents.forEach(function (content) {
params.Delete.Objects.push({ Key: content.Key });
});
if (params.Delete.Objects.length == 0) {
// nothing to remove
return;
}
await this.client.deleteObjects(params).promise();
if (data.IsTruncated) {
await this.rm(dir);
}
}
/** @override */
send(p: string, res: express.Response) {
const s = this.client
.getObject({
Bucket: config.S3_BUCKET,
Key: p,
})
.on("httpHeaders", (statusCode, headers, response) => {
res.status(statusCode);
if (statusCode < 300) {
res.set("Content-Length", headers["content-length"]);
res.set("Content-Type", headers["content-type"]);
}
(
response.httpResponse.createUnbufferedStream() as stream.Readable
).pipe(res);
});
s.send();
}
/** @override */
read(path: string): stream.Readable {
return this.client
.getObject({
Bucket: config.S3_BUCKET,
Key: path,
})
.createReadStream();
}
/** @override */
async write(path: string, data: Buffer): Promise<void> {
await this.client
.putObject({
Bucket: config.S3_BUCKET,
Key: path,
Body: data,
ContentType: mime.lookup(path).toString(),
})
.promise();
return;
}
/** @override */
async listFiles(dir: string): Promise<Tree> {
if (dir && dir[dir.length - 1] != "/") dir = dir + "/";
const out: Tree = {};
const req = await this.client
.listObjectsV2({
Bucket: config.S3_BUCKET,
Prefix: dir,
})
.promise();
if (!req.Contents) return out;
for (const f of req.Contents) {
if (!f.Key) continue;
f.Key = f.Key.replace(dir, "");
const paths = f.Key.split("/");
let current: Tree = out;
for (let i = 0; i < paths.length - 1; i++) {
let p = paths[i];
if (!p) continue;
if (!(current[p] as Tree)) {
current[p] = {} as Tree;
}
current = current[p] as Tree;
}
const fileInfo: TreeFile = { size: f.Size || 0, sha: f.ETag };
const fileName = paths[paths.length - 1];
if (fileName) current[fileName] = fileInfo;
}
return out;
}
/** @override */
async extractTar(p: string, data: stream.Readable): Promise<void> {
return new Promise<void>((resolve, reject) => {
const toS3 = new ArchiveStreamToS3(config.S3_BUCKET, p, this.client);
let rootFolder = null;
(ArchiveStreamToS3 as any).prototype.onEntry = function (
header: any,
stream: any,
next: any
) {
if (rootFolder == null) {
rootFolder = header.name.substr(0, header.name.indexOf("/") + 1);
}
header.name = header.name.replace(rootFolder, "");
originalArchiveStreamToS3Entry.call(toS3, header, stream, next);
};
toS3.on("finish", (result) => {
resolve(result);
});
toS3.on("error", (e) => {
reject(e);
});
data.pipe(gunzip()).pipe(toS3);
});
}
/** @override */
archive(
dir: string,
opt?: {
format?: "zip" | "tar";
fileTransformer?;
}
) {
const archive = archiver(opt?.format, {});
if (dir && dir[dir.length - 1] != "/") dir = dir + "/";
const req = this.client.listObjectsV2({
Bucket: config.S3_BUCKET,
Prefix: dir,
});
const filesStream = req.createReadStream();
const xmlStream = flow(filesStream);
const that = this;
xmlStream.on("tag:contents", function (file) {
let rs = that.read(file.key);
file.key = file.key.replace(dir, "");
const filename = path.basename(file.key);
if (filename == "") return;
if (opt?.fileTransformer) {
rs = rs.pipe(opt.fileTransformer(filename));
}
archive.append(rs, {
name: filename,
prefix: path.dirname(file.key),
});
});
xmlStream.on("end", () => {
archive.finalize();
});
return archive;
}
}

98
src/types.ts Normal file
View File

@@ -0,0 +1,98 @@
import GitHubDownload from "./source/GitHubDownload";
import GitHubStream from "./source/GitHubStream";
import Zip from "./source/ZIP";
import S3Storage from "./storage/S3";
import FileSystem from "./storage/FileSystem";
import AnonymizedFile from "./AnonymizedFile";
import * as stream from "stream";
import * as archiver from "archiver";
export interface SourceBase {
readonly type: string;
/**
* The url of the source
*/
url?: string;
/**
* Retrieve the fie content
* @param file the file of the content to retrieve
*/
getFileContent(file: AnonymizedFile): Promise<stream.Readable>;
/**
* Get all the files from a specific source
*/
getFiles(): Promise<Tree>;
toJSON(): any;
}
export type Source = GitHubDownload | GitHubStream | Zip;
export interface StorageBase {
type: string;
exists(path: string): Promise<boolean>;
read(path: string): stream.Readable;
write(path: string, data: Buffer): Promise<void>;
listFiles(dir: string): Promise<Tree>;
extractTar(p: string, data: stream.Readable): Promise<void>;
rm(path: string): Promise<void>;
archive(
dir: string,
opt?: {
format?: "zip" | "tar";
fileTransformer?: (p: any) => Transformer;
}
): archiver.Archiver;
mk(path: string): Promise<void>;
}
export type Storage = S3Storage | FileSystem;
export interface Branch {
name: string;
commit: string;
readme?: string;
}
export type RepositoryStatus =
| "ready"
| "preparing"
| "expired"
| "removed"
| "download"
| "queue";
export type SourceStatus = "available" | "unavailable";
export interface Tree {
[key: string]: TreeElement;
}
export type TreeElement = Tree | TreeFile;
export interface TreeFile {
sha: string;
size: number;
}
export interface Loc {
info: { total: number; code: number; commit: number };
languages: {
[key: string]: {
total: number;
code: number;
commit: number;
sum: number;
};
};
}

View File

@@ -1,17 +0,0 @@
var expect = require("chai").expect;
var assert = require("chai").assert;
const fs = require("fs");
const githubUtils = require("../utils/github");
const fileUtils = require("../utils/file");
const repoUtils = require("../utils/repository");
const db = require("../utils/database");
describe("Test Files Utils", async function() {
describe("List all files", function() {
it("Get all file from repo with more than 1000 files", async function() {
const fullName = "TQRG/BugSwarm";
await fileUtils.getTree({ fullName });
});
});
});

View File

@@ -1,31 +0,0 @@
var expect = require("chai").expect;
var assert = require("chai").assert;
const fs = require("fs");
const githubUtils = require("../utils/github");
const repoUtils = require("../utils/repository");
const db = require("../utils/database");
describe("Test GitHub Utils", async function() {
describe("Download Repository", function() {
const target = "/tmp/repo.zip";
it("Download an exisiting repo to a folder", async function() {
await repoUtils.downloadRepoZip(
{ fullName: "tdurieux/binance-trade-bot" },
target
);
expect(fs.existsSync(target)).to.equal(true, `${target} should exist`);
fs.unlinkSync(target);
});
it("Download a non-exisiting repo to a folder", async function() {
try {
await repoUtils.downloadRepoZip(
{ fullName: "tdurieux/missing" },
target
);
fs.unlinkSync(target);
assert.fail("Should trigger an exception");
} catch (error) {}
});
});
});

10
tsconfig.json Normal file
View File

@@ -0,0 +1,10 @@
{
"compilerOptions": {
"target": "es6",
"module": "commonjs",
"outDir": "dist",
"sourceMap": true
},
"include": ["src/**/*.ts", "index.ts", "tests3.ts"],
"exclude": ["node_modules", ".vscode"]
}

View File

@@ -1,116 +0,0 @@
const fs = require("fs").promises;
const ofs = require("fs");
const path = require("path");
const fileUtils = require("./file");
const config = require("../config")
const anonymizeContent = (content, repoConfig) => {
const urlRegex = /<?\b((https?|ftp|file):\/\/)[-A-Za-z0-9+&@#/%?=~_|!:,.;]+[-A-Za-z0-9+&@#/%=~_|]\b\/?>?/g;
if (repoConfig.options.image === false) {
// remove image in markdown
content = content.replace(
/!\[[^\]]*\]\((?<filename>.*?)(?=\"|\))(?<optionalpart>\".*\")?\)/g,
""
);
}
if (!repoConfig.options.link) {
// remove all links
content = content.replace(urlRegex, config.ANONYMIZATION_MASK);
}
content = content.replace(
new RegExp(
`https://github.com/${repoConfig.fullName}/blob/${repoConfig.branch}\\b`,
"gi"
),
`https://anonymous.4open.science/r/${repoConfig.repoId}`
);
content = content.replace(
new RegExp(
`https://github.com/${repoConfig.fullName}/tree/${repoConfig.branch}\\b`,
"gi"
),
`https://anonymous.4open.science/r/${repoConfig.repoId}`
);
content = content.replace(
new RegExp(`https://github.com/${repoConfig.fullName}`, "gi"),
`https://anonymous.4open.science/r/${repoConfig.repoId}`
);
for (let term of repoConfig.terms) {
if (term.trim() == "") {
continue;
}
// remove whole url if it contains the term
content = content.replace(urlRegex, (match) => {
if (new RegExp(`\\b${term}\\b`, "gi").test(match)) return config.ANONYMIZATION_MASK;
return match;
});
// remove the term in the text
content = content.replace(new RegExp(`\\b${term}\\b`, "gi"), config.ANONYMIZATION_MASK);
}
return content;
};
const anonymizePath = (path, repoConfig) => {
for (let term of repoConfig.terms) {
if (term.trim() == "") {
continue;
}
path = path.replace(new RegExp(term, "gi"), config.ANONYMIZATION_MASK);
}
return path;
};
async function* walk(dir) {
for await (const d of await fs.opendir(dir)) {
const entry = path.join(dir, d.name);
if (d.isDirectory()) yield* await walk(entry);
else if (d.isFile()) yield entry;
}
}
const anonymizeFolder = async (root, destination, repoConfig) => {
if (!ofs.existsSync(destination)) {
await fs.mkdir(destination, { recursive: true });
}
try {
for await (const originalFilePath of walk(root)) {
const destinationFilePath = path.join(
destination,
anonymizePath(originalFilePath.replace(root, ""), repoConfig)
);
const destinationFolder = path.dirname(destinationFilePath);
if (!ofs.existsSync(destinationFolder)) {
await fs.mkdir(destinationFolder, { recursive: true });
}
await anonymizeFile(originalFilePath, destinationFilePath, repoConfig);
}
} catch (error) {
fs.rm(destination, { recursive: true, force: true });
throw error;
}
};
const anonymizeFile = async (filePath, target, repoConfig) => {
if (!ofs.existsSync(path.dirname(target))) {
await fs.mkdir(path.dirname(target), { recursive: true });
}
if (fileUtils.isText(filePath)) {
const content = anonymizeContent(
(await fs.readFile(filePath)).toString(),
repoConfig
);
await fs.writeFile(target, content);
} else {
await fs.copyFile(filePath, target);
}
};
module.exports.anonymizeFile = anonymizeFile;
module.exports.anonymizePath = anonymizePath;
module.exports.anonymizeFolder = anonymizeFolder;
module.exports.anonymizeContent = anonymizeContent;

View File

@@ -1,39 +0,0 @@
const config = require("../config");
var MongoClient = require("mongodb").MongoClient;
const MONGO_URL = "mongodb://root:rootpassword@mongodb:27017/?authSource=admin";
let mongoClient = null;
let DB = null;
module.exports.get = (collection) => {
if (!collection) return DB;
return DB.collection(collection);
};
module.exports.connect = async () => {
mongoClient = await MongoClient.connect(
MONGO_URL,
{ useNewUrlParser: true, useUnifiedTopology: true }
);
DB = mongoClient.db("anonymous_github");
await DB.collection("anonymized_repositories").createIndex(
{ repoId: 1 },
{ unique: true, name: "repoId" }
);
await DB.collection("anonymized_repositories").createIndex(
{ fullName: 1 },
{ name: "fullName" }
);
await DB.collection("repositories").createIndex(
{ fullName: 1 },
{ unique: true, name: "fullName" }
);
await DB.collection("users").createIndex(
{ username: 1 },
{ unique: true, name: "username" }
);
return DB;
};
module.exports.close = async () => {
return await mongoClient.close();
};

View File

@@ -1,491 +0,0 @@
const ofs = require("fs");
const fs = require("fs").promises;
const path = require("path");
const { Octokit } = require("@octokit/rest");
const gh = require("parse-github-url");
const loc = require("github-linguist").default;
const { isText } = require("istextorbinary");
const db = require("./database");
const repoUtils = require("./repository");
const githubUtils = require("./github");
const anonymizeUtils = require("./anonymize");
const config = require("../config");
async function walk(dir, root) {
if (root == null) {
root = dir;
}
let files = await fs.readdir(dir);
const output = { child: {} };
for (let file of files) {
let filePath = path.join(dir, file);
try {
const stats = await fs.stat(filePath);
if (file[0] == "$") {
file = "\\" + file;
}
if (stats.isDirectory()) {
output.child[file] = await walk(filePath, root);
output.child[file].sha = stats.ino;
} else if (stats.isFile()) {
output.child[file] = { size: stats.size, sha: stats.ino };
}
} catch (error) {
console.error(error);
}
}
return output;
}
function tree2tree(tree, partialTree, parentPath) {
if (!parentPath) parentPath = "";
if (partialTree == null) {
partialTree = { child: Object.create(null) };
}
for (let elem of tree) {
const paths = path.join(parentPath, elem.path).split("/");
let current = partialTree;
// if elem is a folder iterate on all folders if it is a file stop before the filename
const end = elem.type == "tree" ? paths.length : paths.length - 1;
for (let i = 0; i < end; i++) {
let p = paths[i];
if (p[0] == "$") {
p = "\\" + p;
}
if (!current.child[p]) {
current.child[p] = {
child: Object.create(null),
};
}
current = current.child[p];
}
// if elem is a file add the file size in the file list
if (elem.type == "blob") {
let p = paths[end];
if (p[0] == "$") {
p = "\\" + p;
}
current.child[p] = {
size: elem.size,
sha: elem.sha,
};
} else {
current.sha = elem.sha;
}
}
return partialTree;
}
async function getTruncatedTree(repoConfig, truncatedTree, sha, parentPath) {
const repo = gh(repoConfig.fullName);
if (!sha || !/^[a-f0-9]+$/.test(sha)) {
if (repoConfig.commit && /^[a-f0-9]+$/.test(repoConfig.commit)) {
sha = repoConfig.commit;
} else {
sha = "HEAD";
}
repoConfig.commit = sha;
}
const octokit = new Octokit({
auth: await githubUtils.getToken(repoConfig),
});
const ghRes = await octokit.git.getTree({
owner: repo.owner,
repo: repo.name,
tree_sha: sha,
});
const tree = ghRes.data.tree;
for (let elem of tree) {
if (elem.type == "tree") {
const elementPath = path.join(parentPath, elem.path);
const paths = elementPath.split("/");
let current = truncatedTree;
for (let i = 0; i < paths.length; i++) {
let p = paths[i];
if (!current.child[p]) {
await module.exports.getTree(
repoConfig,
elem.sha,
truncatedTree,
elementPath
);
break;
}
current = current.child[p];
}
}
}
tree2tree(ghRes.data.tree, truncatedTree, parentPath);
return truncatedTree;
}
module.exports.getTree = async (repoConfig, sha, truncatedTree, parentPath) => {
const repo = gh(repoConfig.fullName);
if (!sha || !/^[a-f0-9]+$/.test(sha)) {
if (repoConfig.commit && /^[a-f0-9]+$/.test(repoConfig.commit)) {
sha = repoConfig.commit;
} else {
sha = "HEAD";
}
}
if (!parentPath) parentPath = "";
const token = await githubUtils.getToken(repoConfig);
const octokit = new Octokit({
auth: token,
});
const ghRes = await octokit.git.getTree({
owner: repo.owner,
repo: repo.name,
tree_sha: sha,
recursive: true,
});
sha = ghRes.data.sha;
repoConfig.commit = sha;
const tree = tree2tree(ghRes.data.tree, truncatedTree, parentPath);
if (ghRes.data.truncated) {
await getTruncatedTree(repoConfig, tree, sha, parentPath);
}
return tree;
};
module.exports.getFileList = async (options) => {
let repoConfig = options.repoConfig;
if (!repoConfig) {
repoConfig = await repoUtils.getConfig(options.repoId);
}
if (repoConfig == null) {
throw "repo_not_found";
}
const r = await db.get("anonymized_repositories").findOne(
{ repoId: repoConfig.repoId },
{
projection: { files: 1 },
}
);
if (r && r.files) {
return r.files;
}
if (repoConfig.options.mode == "stream") {
// get file list from github
const tree = await module.exports.getTree(repoConfig, repoConfig.commit);
const files = anonymizeTree(tree, repoConfig);
await db.get("anonymized_repositories").updateOne(
{ repoId: repoConfig.repoId },
{
$set: {
commit: repoConfig.commit,
originalFiles: tree.child,
files,
},
},
{ upsert: true }
);
return files;
} else if (repoConfig.options.mode == "download") {
const originalFiles = await walk(
repoUtils.getOriginalPath(repoConfig.repoId)
);
const files = anonymizeTree(originalFiles, repoConfig);
await db.get("anonymized_repositories").updateOne(
{ repoId: repoConfig.repoId },
{
$set: {
originalFiles: originalFiles.child,
files,
},
},
{ upsert: true }
);
return files;
} else {
throw "non_supported_mode";
}
};
function anonymizeTree(tree, repoConfig) {
if (Number.isInteger(tree.size)) {
return tree;
}
const output = {};
for (let file in tree.child) {
const anonymizedPath = anonymizeUtils.anonymizePath(file, repoConfig);
output[anonymizedPath] = anonymizeTree(tree.child[file], repoConfig);
}
return output;
}
function tree2sha(tree, output, parent) {
if (!output) {
output = {};
parent = "";
}
for (let i in tree) {
if (tree[i].sha) {
output[tree[i].sha] = path.join(parent, i);
}
if (tree[i].child) {
tree2sha(tree[i].child, output, path.join(parent, i));
}
}
return output;
}
function getFile(tree, elementPath) {
const paths = elementPath.trim().split("/");
let current = tree;
if (!tree.child) {
current = { child: tree };
}
for (let i = 0; i < paths.length; i++) {
let p = paths[i];
if (p == "") {
continue;
}
let tmp = current;
if (current.child) {
tmp = current.child;
}
if (!tmp[p]) {
return null;
}
current = tmp[p];
}
return current;
}
module.exports.additionalExtensions = [
"license",
"dockerfile",
"sbt",
"ipynb",
"gp",
"out",
"sol",
"in",
];
module.exports.isText = (p) => {
const filename = path.basename(p);
const extensions = filename.split(".").reverse();
const extension = extensions[0].toLowerCase();
if (module.exports.additionalExtensions.includes(extension)) {
return true;
}
if (isText(p)) {
return true;
}
if (ofs.existsSync(p)) {
if (isText(p, ofs.readFileSync(p))) {
return true;
}
}
return false;
};
module.exports.isFileSupported = (repoConfig, p) => {
if (module.exports.isText(p)) {
return true;
}
const filename = path.basename(p);
const extensions = filename.split(".").reverse();
const extension = extensions[0].toLowerCase();
if (repoConfig.options.pdf && extension == "pdf") {
return true;
}
if (
repoConfig.options.image &&
(extension == "png" ||
extension == "ico" ||
extension == "jpg" ||
extension == "jpeg" ||
extension == "gif")
) {
return true;
}
return false;
};
module.exports.isFilePathValid = async (options) => {
if (options.path == null) {
throw "invalid_path";
}
let repoConfig = options.repoConfig;
if (!repoConfig) {
repoConfig = await repoUtils.getConfig(options.repoId);
}
if (repoConfig == null) {
throw "repo_not_found";
}
if (repoConfig.status == "expired") {
throw "repository_expired";
}
if (repoConfig.status == "removed") {
throw "repository_expired";
}
if (repoConfig.status != "ready") {
throw "repository_not_ready";
}
const anonymizedFilePath = path.join(
repoUtils.getAnonymizedPath(repoConfig.repoId),
options.path
);
if (ofs.existsSync(anonymizedFilePath)) {
if (ofs.lstatSync(anonymizedFilePath).isDirectory()) {
throw "is_folder";
}
return true;
}
let unanonymizePath = options.path;
const files = await module.exports.getFileList({ repoConfig });
const file = getFile(files, options.path);
if (file == null) {
throw "file_not_found";
}
if (file) {
const r = await db
.get("anonymized_repositories")
.findOne(
{ repoId: repoConfig.repoId },
{ projection: { originalFiles: 1 } }
);
const shatree = tree2sha(r.originalFiles);
if (shatree[file.sha]) {
unanonymizePath = shatree[file.sha];
}
}
const originalFilePath = path.join(
repoUtils.getOriginalPath(repoConfig.repoId),
unanonymizePath
);
if (ofs.existsSync(originalFilePath)) {
if (ofs.lstatSync(originalFilePath).isDirectory()) {
throw "is_folder";
}
if (!module.exports.isFileSupported(repoConfig, originalFilePath)) {
throw "file_not_supported";
}
await anonymizeUtils.anonymizeFile(
originalFilePath,
anonymizedFilePath,
repoConfig
);
return true;
}
// if stream mode check download the file
if (repoConfig.options.mode == "stream") {
if (!file.sha) {
throw "is_folder";
}
if (file.size > config.MAX_FILE_SIZE) {
// file bigger than 10mb
throw "file_too_big";
}
const octokit = new Octokit({
auth: await githubUtils.getToken(repoConfig),
});
let ghRes = null;
try {
const repo = gh(repoConfig.fullName);
ghRes = await octokit.request(
"GET /repos/{owner}/{repo}/git/blobs/{file_sha}",
{
owner: repo.owner,
repo: repo.name,
file_sha: file.sha,
}
);
} catch (error) {
if (error.status == 403) {
throw "file_too_big";
}
console.error(error);
throw "file_not_accessible";
}
if (!ghRes.data.content && ghRes.data.size != 0) {
throw "file_not_accessible";
}
// empty file
let content = "";
if (ghRes.data.content) {
content = new Buffer.from(ghRes.data.content, ghRes.data.encoding);
}
try {
await fs.mkdir(path.dirname(originalFilePath), { recursive: true });
} catch (_) {
// ignore
}
try {
await fs.writeFile(originalFilePath, content, { encoding: "utf-8" });
} catch (error) {
console.error(error);
throw "unable_to_write_file";
}
if (!module.exports.isFileSupported(repoConfig, originalFilePath)) {
throw "file_not_supported";
}
await anonymizeUtils.anonymizeFile(
originalFilePath,
anonymizedFilePath,
repoConfig
);
return true;
}
return false;
};
module.exports.getStats = async (options) => {
let repoConfig = options.repoConfig;
if (!repoConfig) {
repoConfig = await repoUtils.getConfig(options.repoId);
}
if (repoConfig == null) {
throw "repo_not_found";
}
if (repoConfig.options.mode != "download") {
throw "stats_unsupported";
}
if (repoConfig.loc) {
return repoConfig.loc;
}
const repoCache = repoUtils.getOriginalPath(repoConfig.repoId);
try {
await fs.access(repoCache, ofs.constants.R_OK);
} catch (error) {
throw "repo_not_found";
}
const o = await loc(repoCache);
delete o.files;
await db.get("anonymized_repositories").updateOne(
{ repoId: repoConfig.repoId },
{
$set: {
loc: o,
},
},
{ upsert: true }
);
return o;
};

View File

@@ -1,75 +0,0 @@
const ofs = require("fs");
const { OAuthApp } = require("@octokit/oauth-app");
const db = require("./database");
const repoUtils = require("./repository");
const fileUtils = require("./file");
const config = require("../config");
const app = new OAuthApp({
clientType: "github-app",
clientId: config.CLIENT_ID,
clientSecret: config.CLIENT_SECRET,
});
module.exports.getToken = async (repoConfig) => {
if (repoConfig.owner) {
const user = await db
.get()
.collection("users")
.findOne(
{ username: repoConfig.owner },
{ projection: { accessToken: 1 } }
);
if (user && user.accessToken) {
return user.accessToken;
}
}
if (repoConfig.token) {
try {
await app.checkToken({
token: repoConfig.token,
});
return repoConfig.token;
} catch (error) {
console.debug("Token is invalid.", error);
delete repoConfig.token;
}
}
return config.GITHUB_TOKEN;
};
module.exports.downloadRepoAndAnonymize = async (repoConfig) => {
const cachePath = repoUtils.getAnonymizedPath(repoConfig.repoId);
const originalPath = repoUtils.getOriginalPath(repoConfig.repoId);
if (ofs.existsSync(cachePath) || ofs.existsSync(originalPath)) {
return true;
}
if (repoConfig.options.mode == "download") {
// if cache folder does not exist download and anonymize it
const originalPath = repoUtils.getOriginalPath(repoConfig.repoId);
await repoUtils.updateStatus(repoConfig, "downloading");
await repoUtils.downloadOriginalRepo(repoConfig, originalPath);
await repoUtils.updateStatus(repoConfig, "ready");
// anonymize all the files
// await repoUtils.updateStatus(repoConfig, "anonymize");
// await anonymizeUtils.anonymizeFolder(originalPath, cachePath, repoConfig);
// await repoUtils.updateStatus(repoConfig, "anonymized");
// clean up
// await fs.rm(originalPath, { recursive: true, force: true });
return true;
} else if (repoConfig.options.mode == "stream") {
// in stream mode only download the list of file from github
await repoUtils.updateStatus(repoConfig, "downloading");
await fileUtils.getFileList({ repoConfig });
await repoUtils.updateStatus(repoConfig, "ready");
return true;
}
return false;
};

View File

@@ -1,369 +0,0 @@
const fs = require("fs").promises;
const ofs = require("fs");
const path = require("path");
const gh = require("parse-github-url");
const { Octokit } = require("@octokit/rest");
const extract = require("extract-zip");
const db = require("./database");
const githubUtils = require("./github");
const config = require("../config");
module.exports.getPath = (repoId) => {
return path.resolve(__dirname, "..", "repositories", repoId);
};
module.exports.getOriginalPath = (repoId) => {
return path.resolve(__dirname, "..", "repositories", repoId, "original");
};
module.exports.getAnonymizedPath = (repoId) => {
return path.resolve(__dirname, "..", "repositories", repoId, "cache");
};
module.exports.getConfig = async (repoId) => {
const repo = await db
.get()
.collection("anonymized_repositories")
.findOne(
{ repoId },
{
projection: {
// files: 1,
token: 1,
branch: 1,
commit: 1,
owner: 1,
fullName: 1,
repoId: 1,
terms: 1,
options: 1,
loc: 1,
status: 1,
lastView: 1,
},
}
);
if (repo && repo.options.expirationDate) {
repo.options.expirationDate = new Date(repo.options.expirationDate);
repo.lastView = new Date(repo.lastView);
}
return repo;
};
module.exports.getRepoDetails = async (options) => {
const query = {};
if (options.fullName) {
query.fullName = options.fullName;
} else if (options.repoConfig) {
query.fullName = options.repoConfig.fullName;
options.fullName = query.fullName;
} else if (options.owner && options.repo) {
query.fullName = `${options.owner}/${options.repo}`;
options.fullName = query.fullName;
} else {
throw "invalid_options";
}
if (options.force !== true) {
const repository = await db
.get("repositories")
.findOne(query, { projection: { readme: 0 } });
if (repository && repository.id) return repository;
}
try {
const repo = gh(options.fullName);
const octokit = new Octokit({ auth: options.token });
let ghRes = await octokit.repos.get({
owner: repo.owner,
repo: repo.name,
});
ghRes.data.fullName = ghRes.data.full_name;
if (ghRes.data.fullName != query.fullName) {
// repo renamed keep the old name
ghRes.data.fullName = query.fullName;
}
if (ghRes.data.has_pages) {
ghPageRes = await octokit.request("GET /repos/{owner}/{repo}/pages", {
owner: repo.owner,
repo: repo.name,
});
ghRes.data.pageSource = ghPageRes.data.source;
}
delete ghRes.data.full_name;
await db
.get("repositories")
.updateOne(query, { $set: ghRes.data }, { upsert: true });
return ghRes.data;
} catch (error) {
console.log(query, error);
if (error.status == 401 && options.token != config.GITHUB_TOKEN) {
options.token = config.GITHUB_TOKEN;
return await module.exports.getRepoDetails(options);
} else if (error.status == 403) {
throw "repo_not_accessible";
}
throw "repo_not_found";
}
};
module.exports.downloadRepoZip = async (repoConfig, target) => {
const repo = gh(repoConfig.fullName);
async function getZip(token) {
const octokit = new Octokit({ auth: token });
return await octokit.request("GET /repos/{owner}/{repo}/zipball/{ref}", {
owner: repo.owner,
repo: repo.name,
ref: repoConfig.commit,
});
}
let response = null;
try {
response = await getZip(await githubUtils.getToken(repoConfig));
} catch (error) {
if (error.status == 401 && config.GITHUB_TOKEN) {
try {
response = await getZip(config.GITHUB_TOKEN);
} catch (error) {
throw "repo_not_accessible";
}
} else {
throw "repo_not_accessible";
}
}
await fs.mkdir(path.dirname(target), { recursive: true });
await fs.writeFile(target, Buffer.from(response.data), {
encoding: "binary",
});
};
module.exports.updateStatus = async (repoConfig, status, errorMessage) => {
repoConfig.status = status;
repoConfig.errorMessage = errorMessage;
const update = { $set: { status } };
if (!errorMessage) {
update["$unset"] = { errorMessage: "" };
} else {
update["$set"].errorMessage = errorMessage;
}
await db
.get("anonymized_repositories")
.updateOne({ repoId: repoConfig.repoId }, update);
};
module.exports.downloadOriginalRepo = async (repoConfig, destination) => {
const zipPath = path.join(
module.exports.getPath(repoConfig.repoId),
"content.zip"
);
const destinationZip = destination + "_zip";
// download the repository and unzip it
await module.exports.downloadRepoZip(repoConfig, zipPath);
await extract(zipPath, { dir: destinationZip });
const folders = await fs.readdir(destinationZip);
if (ofs.existsSync(destination)) {
await fs.rm(destination, {force: true, recursive: true})
}
fs.rename(path.join(destinationZip, folders[0]), destination);
await fs.rm(zipPath);
await fs.rm(destinationZip, { recursive: true });
};
module.exports.getAnonymizedRepoDetails = async (repoId, user) => {
return db.get("anonymized_repositories").findOne(
{
repoId,
owner: user.username,
},
{ projection: { token: 0, files: 0, originalFiles: 0, loc: 0 } }
);
};
module.exports.getRepoCommit = async (options) => {
let repoConfig = options.repoConfig;
if (!repoConfig) {
repoConfig = await module.exports.getConfig(options.repoId);
}
if (repoConfig == null) {
throw "repo_not_found";
}
if (options.force !== true) {
const query = { fullName: repoConfig.fullName };
query["branches." + repoConfig.branch + ""] = { $exists: true };
const repository = await db
.get("repositories")
.findOne(query, { projection: { branches: 1 } });
if (
repository &&
repository.branches &&
repository.branches[repoConfig.branch]
)
return repository.branches[repoConfig.branch].commit.sha;
}
const branches = await module.exports.getRepoBranches({
repoConfig,
token: await githubUtils.getToken(repoConfig),
force: options.force,
});
if (!branches[repoConfig.branch]) {
throw "branch_not_found";
}
return branches[repoConfig.branch].commit.sha;
};
module.exports.getRepoBranches = async (options) => {
const query = {};
if (options.fullName) {
query.fullName = options.fullName;
} else if (options.repoConfig) {
query.fullName = options.repoConfig.fullName;
options.fullName = query.fullName;
} else if (options.owner && options.repo) {
query.fullName = `${options.owner}/${options.repo}`;
options.fullName = query.fullName;
} else {
throw new Error("Invalid options");
}
if (options.force !== true) {
let repository = await db
.get("repositories")
.findOne(query, { projection: { branches: 1 } });
if (repository && repository.branches) return repository.branches;
}
try {
const repo = gh(options.fullName);
const octokit = new Octokit({ auth: options.token });
const data = await octokit.paginate(octokit.repos.listBranches, {
owner: repo.owner,
repo: repo.name,
per_page: 100,
});
const branches = {};
for (let b of data) {
branches[b.name] = b;
}
await db
.get("repositories")
.updateOne(query, { $set: { branches } }, { upsert: true });
return branches;
} catch (error) {
if (error.status == 401 && options.token != config.GITHUB_TOKEN) {
options.token = config.GITHUB_TOKEN;
return await module.exports.getRepoBranches(options);
}
if (error.status == 404) {
throw "repo_not_found";
}
console.error(error);
throw "branches_not_found";
}
};
module.exports.getRepoReadme = async (options) => {
const query = {};
if (options.fullName) {
query.fullName = options.fullName;
} else if (options.repoConfig) {
query.fullName = options.repoConfig.fullName;
options.fullName = query.fullName;
} else if (options.owner && options.repo) {
query.fullName = `${options.owner}/${options.repo}`;
options.fullName = query.fullName;
} else {
throw new Error("Invalid options");
}
if (options.force !== true) {
let repository = await db
.get("repositories")
.findOne(query, { projection: { readme: 1 } });
if (repository && repository.readme) return repository.readme;
}
try {
const repo = gh(options.fullName);
const octokit = new Octokit({ auth: options.token });
const ghRes = await octokit.repos.getReadme({
owner: repo.owner,
repo: repo.name,
});
const readme = new Buffer.from(ghRes.data.content, "base64").toString(
"utf-8"
);
await db
.get("repositories")
.updateOne(query, { $set: { readme } }, { upsert: true });
return readme;
} catch (error) {
throw "readme_not_available";
}
};
module.exports.updateAnonymizedRepository = async (repoConfig) => {
if (repoConfig.status == "updating") {
throw "repo_is_updating";
}
repoConfig = await module.exports.getConfig(repoConfig.repoId);
if (repoConfig.status == "updating") {
throw "repo_is_updating";
}
// check new commit
const commit = await module.exports.getRepoCommit({
repoConfig,
force: true,
});
if (commit == repoConfig.commit) {
console.log(`${repoConfig.repoId} is up to date`);
return true;
}
repoConfig.commit = commit;
console.log(`${repoConfig.repoId} will be updated to ${commit}`);
await module.exports.updateStatus(repoConfig, "updating");
await db
.get("anonymized_repositories")
.updateOne({ repoId: repoConfig.repoId }, { $set: { commit } });
await module.exports.removeRepository(repoConfig);
await githubUtils.downloadRepoAndAnonymize(repoConfig);
await module.exports.updateStatus(repoConfig, "ready");
};
module.exports.removeRepository = async (repoConfig) => {
try {
if (ofs.existsSync(module.exports.getOriginalPath(repoConfig.repoId))) {
await fs.rm(module.exports.getOriginalPath(repoConfig.repoId), {
recursive: true,
force: true,
});
}
if (ofs.existsSync(module.exports.getAnonymizedPath(repoConfig.repoId))) {
await fs.rm(module.exports.getAnonymizedPath(repoConfig.repoId), {
recursive: true,
force: true,
});
}
await db
.get("anonymized_repositories")
.updateOne(
{ repoId: repoConfig.repoId },
{ $unset: { files: "", originalFiles: "", loc: "" } }
);
} catch (error) {
console.log(error);
throw error;
}
};