mirror of
https://github.com/tdurieux/anonymous_github.git
synced 2026-02-12 18:32:44 +00:00
migrate JavaScript to TypeScript
This commit is contained in:
2
.gitignore
vendored
2
.gitignore
vendored
@@ -1,5 +1,5 @@
|
||||
.env
|
||||
repositories/
|
||||
/repositories
|
||||
repo/
|
||||
db_backups
|
||||
message.txt
|
||||
|
||||
16
config.js
16
config.js
@@ -1,16 +0,0 @@
|
||||
const config = {
|
||||
CLIENT_ID: null,
|
||||
CLIENT_SECRET: null,
|
||||
GITHUB_TOKEN: null,
|
||||
MAX_FILE_SIZE: 10 * 1024 * 1024, // in b
|
||||
MAX_REPO_SIZE: 8 * 1024, // in kb
|
||||
AUTH_CALLBACK: "http://localhost:5000/github/auth",
|
||||
ANONYMIZATION_MASK: "XXXX",
|
||||
PORT: 5000,
|
||||
};
|
||||
for (let conf in process.env) {
|
||||
if (config[conf] !== undefined) {
|
||||
config[conf] = process.env[conf];
|
||||
}
|
||||
}
|
||||
module.exports = config;
|
||||
61
config.ts
Normal file
61
config.ts
Normal file
@@ -0,0 +1,61 @@
|
||||
import { resolve } from "path";
|
||||
|
||||
interface Config {
|
||||
REDIS_PORT: number;
|
||||
REDIS_HOSTNAME: string;
|
||||
CLIENT_ID: string;
|
||||
CLIENT_SECRET: string;
|
||||
GITHUB_TOKEN: string;
|
||||
DEFAULT_QUOTA: number;
|
||||
MAX_FILE_SIZE: number;
|
||||
MAX_REPO_SIZE: number;
|
||||
AUTH_CALLBACK: string;
|
||||
ANONYMIZATION_MASK: string;
|
||||
PORT: number;
|
||||
HOSTNAME: string;
|
||||
DB_USERNAME: string;
|
||||
DB_PASSWORD: string;
|
||||
DB_HOSTNAME: string;
|
||||
FOLDER: string;
|
||||
additionalExtensions: string[];
|
||||
S3_BUCKET?: string;
|
||||
S3_CLIENT_ID?: string;
|
||||
S3_CLIENT_SECRET?: string;
|
||||
S3_ENDPOINT?: string;
|
||||
S3_REGION?: string;
|
||||
}
|
||||
const config: Config = {
|
||||
CLIENT_ID: "CLIENT_ID",
|
||||
CLIENT_SECRET: "CLIENT_SECRET",
|
||||
GITHUB_TOKEN: "",
|
||||
DEFAULT_QUOTA: 2 * 1024 * 1024 * 1024 * 8,
|
||||
MAX_FILE_SIZE: 10 * 1024 * 1024, // in b
|
||||
MAX_REPO_SIZE: 8 * 1024, // in kb
|
||||
AUTH_CALLBACK: "http://localhost:5000/github/auth",
|
||||
ANONYMIZATION_MASK: "XXXX",
|
||||
PORT: 5000,
|
||||
HOSTNAME: "anonymous.4open.science",
|
||||
DB_USERNAME: "admin",
|
||||
DB_PASSWORD: "password",
|
||||
DB_HOSTNAME: "mongodb",
|
||||
REDIS_HOSTNAME: "redis",
|
||||
REDIS_PORT: 6379,
|
||||
FOLDER: resolve(__dirname, "repositories"),
|
||||
additionalExtensions: [
|
||||
"license",
|
||||
"dockerfile",
|
||||
"sbt",
|
||||
"ipynb",
|
||||
"gp",
|
||||
"out",
|
||||
"sol",
|
||||
"in",
|
||||
],
|
||||
};
|
||||
for (let conf in process.env) {
|
||||
if ((config as any)[conf] !== undefined) {
|
||||
(config as any)[conf] = process.env[conf];
|
||||
}
|
||||
}
|
||||
|
||||
export default config;
|
||||
@@ -9,6 +9,9 @@ services:
|
||||
container_name: anonymous_github
|
||||
env_file:
|
||||
- ./.env
|
||||
environment:
|
||||
- REDIS_HOSTNAME=redis
|
||||
- DB_HOSTNAME=mongodb
|
||||
volumes:
|
||||
- .:/app
|
||||
# - ./repositories:/app/repositories
|
||||
|
||||
109
index.js
109
index.js
@@ -1,109 +0,0 @@
|
||||
const path = require("path");
|
||||
const ofs = require("fs");
|
||||
const redis = require("redis");
|
||||
const RateLimit = require("express-rate-limit");
|
||||
const RedisStore = require("rate-limit-redis");
|
||||
const express = require("express");
|
||||
const compression = require("compression");
|
||||
const bodyParser = require("body-parser");
|
||||
|
||||
const config = require("./config");
|
||||
const rediscli = redis.createClient({
|
||||
host: "redis",
|
||||
ttl: 260,
|
||||
});
|
||||
|
||||
const connection = require("./routes/connection");
|
||||
|
||||
const db = require("./utils/database");
|
||||
const fileUtils = require("./utils/file");
|
||||
|
||||
const app = express();
|
||||
app.use(bodyParser.json());
|
||||
app.use(compression());
|
||||
app.set("trust proxy", 1);
|
||||
|
||||
// handle session and connection
|
||||
app.use(connection.session);
|
||||
app.use(connection.passport.initialize());
|
||||
app.use(connection.passport.session());
|
||||
|
||||
const rateLimit = new RateLimit({
|
||||
store: new RedisStore({
|
||||
client: rediscli,
|
||||
}),
|
||||
windowMs: 15 * 60 * 1000, // 15 minutes
|
||||
max: 200, // limit each IP to 100 requests per windowMs
|
||||
// delayMs: 0, // disable delaying - full speed until the max limit is reached
|
||||
});
|
||||
|
||||
app.use("/github", rateLimit, connection.router);
|
||||
|
||||
// app routes
|
||||
app.use("/api/user", rateLimit, require("./routes/user"));
|
||||
app.use("/api/repo", rateLimit, require("./routes/file"));
|
||||
app.use("/api/repo", rateLimit, require("./routes/repository"));
|
||||
|
||||
// wesite view
|
||||
app.use("/w/", rateLimit, require("./routes/webview"));
|
||||
|
||||
app.get("/api/supportedTypes", async (_, res) => {
|
||||
res.json(
|
||||
require("textextensions")
|
||||
.default.concat(fileUtils.additionalExtensions)
|
||||
.sort()
|
||||
);
|
||||
});
|
||||
|
||||
app.get("/api/message", async (_, res) => {
|
||||
if (ofs.existsSync("./message.txt")) {
|
||||
return res.sendFile(path.resolve(__dirname, "message.txt"));
|
||||
}
|
||||
res.sendStatus(404);
|
||||
});
|
||||
|
||||
app.get("/api/stat", async (_, res) => {
|
||||
const nbRepositories = await db
|
||||
.get("anonymized_repositories")
|
||||
.estimatedDocumentCount();
|
||||
|
||||
const nbUsers = (await db.get("anonymized_repositories").distinct("owner"))
|
||||
.length; //await db.get("users").estimatedDocumentCount();
|
||||
res.json({ nbRepositories, nbUsers });
|
||||
});
|
||||
|
||||
function indexResponse(req, res) {
|
||||
if (
|
||||
req.params.repoId &&
|
||||
req.headers["accept"] &&
|
||||
req.headers["accept"].indexOf("text/html") == -1
|
||||
) {
|
||||
const repoId = req.path.split("/")[2];
|
||||
// if it is not an html request, it assumes that the browser try to load a different type of resource
|
||||
return res.redirect(
|
||||
`/api/repo/${repoId}/file/${req.path.substring(
|
||||
req.path.indexOf(repoId) + repoId.length + 1
|
||||
)}`
|
||||
);
|
||||
}
|
||||
res.sendFile(path.resolve(__dirname, "public", "index.html"));
|
||||
}
|
||||
|
||||
app
|
||||
.get("/", indexResponse)
|
||||
.get("/404", indexResponse)
|
||||
.get("/anonymize", indexResponse)
|
||||
.get("/r/:repoId/?*", indexResponse)
|
||||
.get("/repository/:repoId/?*", indexResponse);
|
||||
|
||||
app.use(express.static(__dirname + "/public"));
|
||||
|
||||
app.get("*", indexResponse);
|
||||
|
||||
db.connect().then((_) => {
|
||||
app.listen(config.PORT, () => {
|
||||
console.log(
|
||||
"Database connected and Server started on port: " + config.PORT
|
||||
);
|
||||
});
|
||||
});
|
||||
6
index.ts
Normal file
6
index.ts
Normal file
@@ -0,0 +1,6 @@
|
||||
require("dotenv").config();
|
||||
|
||||
import server from "./src/server";
|
||||
|
||||
// start the server
|
||||
server();
|
||||
178
migrateDB.ts
Normal file
178
migrateDB.ts
Normal file
@@ -0,0 +1,178 @@
|
||||
import * as mongoose from "mongoose";
|
||||
import config from "./config";
|
||||
import * as database from "./src/database/database";
|
||||
import RepositoryModel from "./src/database/repositories/repositories.model";
|
||||
import AnonymizedRepositoryModel from "./src/database/anonymizedRepositories/anonymizedRepositories.model";
|
||||
import UserModel from "./src/database/users/users.model";
|
||||
import { IRepositoryDocument } from "./src/database/repositories/repositories.types";
|
||||
import { LexRuntime } from "aws-sdk";
|
||||
|
||||
const MONGO_URL = `mongodb://${config.DB_USERNAME}:${config.DB_PASSWORD}@${config.DB_HOSTNAME}:27017/`;
|
||||
|
||||
async function connect(db) {
|
||||
const t = new mongoose.Mongoose();
|
||||
t.set("useNewUrlParser", true);
|
||||
t.set("useFindAndModify", true);
|
||||
t.set("useUnifiedTopology", true);
|
||||
|
||||
const database = t.connection;
|
||||
|
||||
await t.connect(MONGO_URL + db, {
|
||||
authSource: "admin",
|
||||
useCreateIndex: true,
|
||||
useFindAndModify: true,
|
||||
});
|
||||
|
||||
return database;
|
||||
}
|
||||
|
||||
(async () => {
|
||||
await database.connect();
|
||||
const oldDB = await connect("anonymous_github");
|
||||
|
||||
console.log("Import Users");
|
||||
await new Promise(async (resolve) => {
|
||||
const promises = [];
|
||||
await oldDB
|
||||
.collection("users")
|
||||
.find()
|
||||
.batchSize(1)
|
||||
.forEach(async (r) => {
|
||||
let localResolve = null;
|
||||
const p = new Promise((r) => (localResolve = r));
|
||||
promises.push(p);
|
||||
|
||||
const repositoryModels: Promise<IRepositoryDocument>[] = [];
|
||||
const finds = (
|
||||
await RepositoryModel.find({
|
||||
externalId: {
|
||||
$in: r.repositories.map((repo) => "gh_" + repo.id),
|
||||
},
|
||||
}).select("externalId")
|
||||
).map((m) => m.externalId);
|
||||
for (const repo of r.repositories.filter(
|
||||
(f) => finds.indexOf("gh_" + f.id) == -1
|
||||
)) {
|
||||
repositoryModels.push(
|
||||
new RepositoryModel({
|
||||
externalId: "gh_" + repo.id,
|
||||
name: repo.full_name,
|
||||
url: repo.html_url,
|
||||
size: repo.size,
|
||||
defaultBranch: repo.default_branch,
|
||||
}).save()
|
||||
);
|
||||
}
|
||||
const user = await new UserModel({
|
||||
accessToken: r.accessToken,
|
||||
username: r.username,
|
||||
email: r.profile.emails[0]?.value,
|
||||
photo: r.profile.photos[0]?.value,
|
||||
repositories: (await Promise.all(repositoryModels)).map((d) => d._id),
|
||||
default: {
|
||||
terms: r.default.terms,
|
||||
options: r.default.options,
|
||||
},
|
||||
}).save();
|
||||
|
||||
localResolve(user);
|
||||
});
|
||||
Promise.all(promises).then(resolve);
|
||||
});
|
||||
|
||||
console.log("Import Repositories");
|
||||
let promises = [];
|
||||
await oldDB
|
||||
.collection("repositories")
|
||||
.find({})
|
||||
.batchSize(1)
|
||||
.forEach(async (r) => {
|
||||
if (!r.id) return;
|
||||
let localResolve = null;
|
||||
const p = new Promise((r) => (localResolve = r));
|
||||
promises.push(p);
|
||||
|
||||
let find = await RepositoryModel.findOne({
|
||||
externalId: "gh_" + r.id,
|
||||
});
|
||||
// console.log("gh_" + r.id, find != null);
|
||||
if (find == null) {
|
||||
find = new RepositoryModel({
|
||||
externalId: "gh_" + r.id,
|
||||
name: r.fullName,
|
||||
url: r.html_url,
|
||||
size: r.size,
|
||||
defaultBranch: r.default_branch,
|
||||
});
|
||||
}
|
||||
const branches = [...Object.values(r.branches)].map((b: any) => {
|
||||
const o: any = { name: b.name, commit: b.commit.sha };
|
||||
if (b.name == find.defaultBranch) {
|
||||
o.readme = r.readme;
|
||||
}
|
||||
return o;
|
||||
});
|
||||
find.branches = branches;
|
||||
await find.save();
|
||||
localResolve();
|
||||
});
|
||||
await Promise.all(promises);
|
||||
console.log("Import Anonymized Repositories");
|
||||
promises = [];
|
||||
await oldDB
|
||||
.collection("anonymized_repositories")
|
||||
.find({})
|
||||
.forEach(async (r) => {
|
||||
let localResolve = null;
|
||||
const p = new Promise((r) => (localResolve = r));
|
||||
promises.push(p);
|
||||
|
||||
let repo = await RepositoryModel.findOne({ name: r.fullName });
|
||||
if (repo == null) {
|
||||
const tmp = await oldDB
|
||||
.collection("repositories")
|
||||
.findOne({ fullName: r.fullName });
|
||||
if (tmp) {
|
||||
repo = await RepositoryModel.findOne({ externalId: "gh_" + tmp.id });
|
||||
} else {
|
||||
console.error(`Repository ${r.fullName} is not found (renamed)`);
|
||||
}
|
||||
}
|
||||
await new AnonymizedRepositoryModel({
|
||||
repoId: r.repoId,
|
||||
status: r.status,
|
||||
anonymizeDate: r.anonymizeDate,
|
||||
lastView: r.lastView,
|
||||
pageView: r.pageView,
|
||||
owner: r.owner,
|
||||
source: {
|
||||
accessToken: r.token,
|
||||
type:
|
||||
r.options.mode == "download" ? "GitHubDownload" : "GitHubStream",
|
||||
branch: r.branch,
|
||||
commit: r.commit,
|
||||
repositoryId: repo?.id,
|
||||
repositoryName: r.fullName,
|
||||
},
|
||||
options: {
|
||||
terms: r.terms,
|
||||
expirationMode: r.options.expirationMode,
|
||||
expirationDate: r.options.expirationDate
|
||||
? new Date(r.options.expirationDate)
|
||||
: null,
|
||||
update: r.options.update,
|
||||
image: r.options.image,
|
||||
pdf: r.options.pdf,
|
||||
notebook: r.options.notebook,
|
||||
loc: r.options.loc,
|
||||
link: r.options.link,
|
||||
page: r.options.page,
|
||||
pageSource: r.options.pageSource,
|
||||
},
|
||||
}).save();
|
||||
localResolve();
|
||||
});
|
||||
await Promise.all(promises);
|
||||
console.log("Import finished!");
|
||||
setTimeout(() => process.exit(), 5000);
|
||||
})();
|
||||
3776
package-lock.json
generated
3776
package-lock.json
generated
File diff suppressed because it is too large
Load Diff
45
package.json
45
package.json
@@ -1,12 +1,14 @@
|
||||
{
|
||||
"name": "anonymous_github",
|
||||
"version": "2.0.0",
|
||||
"version": "2.1.0",
|
||||
"description": "Anonymise Github repositories for double-anonymous reviews",
|
||||
"main": "index.js",
|
||||
"main": "index.ts",
|
||||
"scripts": {
|
||||
"test": "mocha --reporter spec",
|
||||
"start": "node index.js",
|
||||
"dev": "nodemon index.js"
|
||||
"start": "node --inspect=5858 -r ts-node/register ./index.ts",
|
||||
"dev": "nodemon --transpile-only index.ts",
|
||||
"migrateDB": "ts-node --transpile-only migrateDB.ts",
|
||||
"build": "tsc"
|
||||
},
|
||||
"repository": {
|
||||
"type": "git",
|
||||
@@ -21,27 +23,50 @@
|
||||
"dependencies": {
|
||||
"@octokit/oauth-app": "^3.3.2",
|
||||
"@octokit/rest": "^18.5.3",
|
||||
"array-equal": "^1.0.0",
|
||||
"archive-stream-to-s3": "^1.1.3",
|
||||
"archiver": "^5.3.0",
|
||||
"aws-sdk": "^2.958.0",
|
||||
"compression": "^1.7.4",
|
||||
"connect-redis": "^5.1.0",
|
||||
"dotenv": "^10.0.0",
|
||||
"express": "^4.17.1",
|
||||
"express-rate-limit": "^5.2.6",
|
||||
"express-session": "^1.17.1",
|
||||
"extract-zip": "^2.0.1",
|
||||
"github-linguist": "^2.3.0",
|
||||
"got": "^9.6.0",
|
||||
"gunzip-maybe": "^1.4.2",
|
||||
"istextorbinary": "^5.12.0",
|
||||
"mongodb": "^3.6.6",
|
||||
"mime-types": "^2.1.30",
|
||||
"mongoose": "^5.13.5",
|
||||
"parse-github-url": "^1.0.2",
|
||||
"passport": "^0.4.1",
|
||||
"passport-github2": "^0.1.12",
|
||||
"rate-limit-redis": "^2.1.0",
|
||||
"redis": "^3.1.2",
|
||||
"textextensions": "^5.12.0"
|
||||
"tar-fs": "^2.1.1",
|
||||
"textextensions": "^5.12.0",
|
||||
"xml-flow": "^1.0.4"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@types/archiver": "^5.1.1",
|
||||
"@types/compression": "^1.7.1",
|
||||
"@types/connect-redis": "^0.0.17",
|
||||
"@types/express": "^4.17.13",
|
||||
"@types/express-rate-limit": "^5.1.3",
|
||||
"@types/express-session": "^1.17.4",
|
||||
"@types/got": "^9.6.12",
|
||||
"@types/mime-types": "^2.1.0",
|
||||
"@types/parse-github-url": "^1.0.0",
|
||||
"@types/passport": "^1.0.7",
|
||||
"@types/passport-github2": "^1.2.5",
|
||||
"@types/rate-limit-redis": "^1.7.2",
|
||||
"@types/redis": "^2.8.31",
|
||||
"@types/tar-fs": "^2.0.1",
|
||||
"@types/xml-flow": "^1.0.1",
|
||||
"chai": "^4.3.4",
|
||||
"mocha": "^8.3.2",
|
||||
"nodemon": "^2.0.4"
|
||||
"nodemon": "^2.0.7",
|
||||
"ts-node": "^10.1.0",
|
||||
"typescript": "^4.3.5"
|
||||
},
|
||||
"nodemonConfig": {
|
||||
"ignore": [
|
||||
|
||||
@@ -335,8 +335,8 @@
|
||||
name="mode"
|
||||
ng-model="options.mode"
|
||||
>
|
||||
<option value="stream" selected>Stream</option>
|
||||
<option value="download">Download</option>
|
||||
<option value="GitHubStream" selected>Stream</option>
|
||||
<option value="GitHubDownload">Download</option>
|
||||
</select>
|
||||
<small class="form-text text-muted"
|
||||
>How the repository will be anonymized. Stream mode will
|
||||
@@ -367,22 +367,6 @@
|
||||
https://anonymous.4open.science/w/{{repoId}}</small
|
||||
>
|
||||
</div>
|
||||
<div class="form-check">
|
||||
<input
|
||||
class="form-check-input"
|
||||
type="checkbox"
|
||||
id="loc"
|
||||
name="loc"
|
||||
ng-model="options.loc"
|
||||
/>
|
||||
<label class="form-check-label" for="page"
|
||||
>Line of code</label
|
||||
>
|
||||
<small id="termsHelp" class="form-text text-muted"
|
||||
>Display the number of line of code in the
|
||||
repository</small
|
||||
>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
@@ -73,6 +73,8 @@
|
||||
/>
|
||||
<label class="form-check-label" for="removed"> Removed </label>
|
||||
</div>
|
||||
<h5>Quota</h5>
|
||||
{{quota.used | humanFileSize}}/{{quota.total| humanFileSize}}
|
||||
</div>
|
||||
<div class="col-md h-100 overflow-auto body">
|
||||
<div class="row">
|
||||
@@ -95,6 +97,9 @@
|
||||
<th scope="col" class="text-center d-none d-xl-table-cell">
|
||||
Anonymize date
|
||||
</th>
|
||||
<th scope="col" class="text-center d-none d-xl-table-cell">
|
||||
Size
|
||||
</th>
|
||||
<th scope="col" class="text-center d-none d-xl-table-cell">
|
||||
# Views
|
||||
</th>
|
||||
@@ -113,32 +118,30 @@
|
||||
{{$index + 1}}
|
||||
</th>
|
||||
<td class="align-middle">
|
||||
<a href="/r/{{repo.repoId}}"
|
||||
>{{repo.repoId}}</a
|
||||
>
|
||||
<a href="/r/{{repo.repoId}}">{{repo.repoId}}</a>
|
||||
</td>
|
||||
<td
|
||||
class="align-middle"
|
||||
title="Commit: {{repo.commit}}"
|
||||
title="Commit: {{repo.source.branch.commit}}"
|
||||
data-toggle="tooltip"
|
||||
data-placement="bottom"
|
||||
>
|
||||
<a
|
||||
href="https://github.com/{{repo.fullName}}/commit/{{repo.commit}}"
|
||||
>{{repo.fullName}}</a
|
||||
href="https://github.com/{{repo.source.fullName}}/commit/{{repo.source.branch.commit}}"
|
||||
>{{repo.source.fullName}}</a
|
||||
>
|
||||
</td>
|
||||
<td
|
||||
title="Commit: {{repo.commit}}"
|
||||
title="Commit: {{repo.source.branch.commit}}"
|
||||
data-toggle="tooltip"
|
||||
data-placement="bottom"
|
||||
class="align-middle d-none d-lg-table-cell"
|
||||
>
|
||||
{{repo.branch}}
|
||||
{{repo.source.branch.name}}
|
||||
</td>
|
||||
<!-- <td>{{repo.commit.substring(0, 6)}}</td> -->
|
||||
<td class="text-center align-middle d-none d-lg-table-cell">
|
||||
{{repo.terms.length}}
|
||||
{{repo.options.terms.length}}
|
||||
</td>
|
||||
<td
|
||||
class="text-center align-middle"
|
||||
@@ -162,7 +165,12 @@
|
||||
<td class="text-center align-middle d-none d-xl-table-cell">
|
||||
{{repo.anonymizeDate | date}}
|
||||
</td>
|
||||
<td class="text-center align-middle d-none d-xl-table-cell">{{repo.pageView}}</td>
|
||||
<td class="text-center align-middle d-none d-xl-table-cell">
|
||||
{{repo.size | humanFileSize}}
|
||||
</td>
|
||||
<td class="text-center align-middle d-none d-xl-table-cell">
|
||||
{{repo.pageView}}
|
||||
</td>
|
||||
<td class="text-center align-middle d-none d-xl-table-cell">
|
||||
{{repo.lastView | date}}
|
||||
</td>
|
||||
@@ -214,10 +222,7 @@
|
||||
>
|
||||
<i class="fa fa-remove" aria-hidden="true"></i> Remove
|
||||
</a>
|
||||
<a
|
||||
class="dropdown-item"
|
||||
href="/r/{{repo.repoId}}/"
|
||||
>
|
||||
<a class="dropdown-item" href="/r/{{repo.repoId}}/">
|
||||
<i class="fa fa-eye" aria-hidden="true"></i> View Repo
|
||||
</a>
|
||||
<a
|
||||
|
||||
@@ -4,14 +4,24 @@
|
||||
<tree class="files" file="files"></tree>
|
||||
</div>
|
||||
<div class="col-md h-100 overflow-auto p-0 d-flex flex-column">
|
||||
<nav aria-label="breadcrumb">
|
||||
<nav aria-label="repository menu">
|
||||
<ol class="breadcrumb shadow paths">
|
||||
<li class="breadcrumb-item" ng-repeat="p in paths" ng-bind="p">
|
||||
Loading...
|
||||
</li>
|
||||
</ol>
|
||||
|
||||
<a ng-href="{{url}}" target="__self" class="btn btn-primary"
|
||||
>Download file</a
|
||||
>
|
||||
|
||||
<a
|
||||
ng-href="/api/repo/{{repoId}}/zip"
|
||||
target="__self"
|
||||
class="btn btn-primary"
|
||||
>Download Repository</a
|
||||
>
|
||||
</nav>
|
||||
<loc stats="stats" ng-if="stats"></loc>
|
||||
<div class="align-items-stretch h-100 w-100 overflow-auto">
|
||||
<ng-include src="'./partials/pageView.htm'"></ng-include>
|
||||
</div>
|
||||
|
||||
@@ -81,6 +81,30 @@ angular
|
||||
$locationProvider.html5Mode(true);
|
||||
})
|
||||
.run(["Analytics", function (Analytics) {}])
|
||||
.filter("humanFileSize", function () {
|
||||
return function humanFileSize(bytes, si = false, dp = 1) {
|
||||
const thresh = si ? 1000 : 1024;
|
||||
|
||||
bytes = bytes / 8;
|
||||
|
||||
if (Math.abs(bytes) < thresh) {
|
||||
return bytes + " B";
|
||||
}
|
||||
|
||||
const units = si
|
||||
? ["kB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB"]
|
||||
: ["KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB"];
|
||||
let u = -1;
|
||||
const r = 10 ** dp;
|
||||
|
||||
do {
|
||||
bytes /= thresh;
|
||||
++u;
|
||||
} while (Math.round(Math.abs(bytes) * r) / r >= thresh && u < units.length - 1);
|
||||
|
||||
return bytes.toFixed(dp) + " " + units[u];
|
||||
};
|
||||
})
|
||||
.filter("title", function () {
|
||||
return function (str) {
|
||||
if (!str) return str;
|
||||
@@ -370,7 +394,7 @@ angular
|
||||
notebook: true,
|
||||
loc: true,
|
||||
link: true,
|
||||
mode: "download",
|
||||
mode: "GitHubDownload",
|
||||
};
|
||||
|
||||
function getDefault() {
|
||||
@@ -500,6 +524,13 @@ angular
|
||||
}
|
||||
getRepositories();
|
||||
|
||||
function getQuota() {
|
||||
$http.get("/api/user/quota").then((res) => {
|
||||
$scope.quota = res.data;
|
||||
}, console.error);
|
||||
}
|
||||
getQuota();
|
||||
|
||||
$scope.removeRepository = (repo) => {
|
||||
if (
|
||||
confirm(
|
||||
@@ -596,9 +627,8 @@ angular
|
||||
image: true,
|
||||
pdf: true,
|
||||
notebook: true,
|
||||
loc: true,
|
||||
link: true,
|
||||
mode: "download",
|
||||
mode: "GitHubDownload",
|
||||
};
|
||||
$scope.options.expirationDate.setDate(
|
||||
$scope.options.expirationDate.getDate() + 90
|
||||
@@ -630,10 +660,10 @@ angular
|
||||
$scope.repoId = $routeParams.repoId;
|
||||
$http.get("/api/repo/" + $scope.repoId).then(
|
||||
async (res) => {
|
||||
$scope.repoUrl = "https://github.com/" + res.data.fullName;
|
||||
$scope.repoUrl = "https://github.com/" + res.data.source.fullName;
|
||||
|
||||
$scope.terms = res.data.terms.join("\n");
|
||||
$scope.branch = res.data.branch;
|
||||
$scope.terms = res.data.options.terms.join("\n");
|
||||
$scope.branch = res.data.source.branch.name;
|
||||
$scope.options = res.data.options;
|
||||
$scope.conference = res.data.conference;
|
||||
if (res.data.options.expirationDate) {
|
||||
@@ -648,11 +678,11 @@ angular
|
||||
}
|
||||
|
||||
$scope.details = (
|
||||
await $http.get(`/api/repo/${res.data.fullName}/`)
|
||||
await $http.get(`/api/repo/${res.data.source.fullName}/`)
|
||||
).data;
|
||||
|
||||
await getReadme();
|
||||
await $scope.getBranches();
|
||||
await getReadme();
|
||||
anonymize();
|
||||
$scope.$apply();
|
||||
},
|
||||
@@ -709,24 +739,32 @@ angular
|
||||
};
|
||||
$('[data-toggle="tooltip"]').tooltip();
|
||||
|
||||
$scope.$watch("branch", (v) => {
|
||||
if ($scope.branches && $scope.branches[$scope.branch]) {
|
||||
$scope.commit = $scope.branches[$scope.branch].commit.sha;
|
||||
}
|
||||
if ($scope.details && $scope.details.has_page) {
|
||||
$scope.anonymize.page.disabled(false);
|
||||
$scope.$watch("branch", async (v) => {
|
||||
const selected = $scope.branches.filter(
|
||||
(f) => f.name == $scope.branch
|
||||
)[0];
|
||||
if ($scope.details && $scope.details.hasPage) {
|
||||
$scope.anonymize.page.$$element[0].disabled = false;
|
||||
if ($scope.details.pageSource.branch != $scope.branch) {
|
||||
$scope.anonymize.page.disabled(true);
|
||||
$scope.anonymize.page.$$element[0].disabled = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (selected) {
|
||||
$scope.commit = selected.commit;
|
||||
$scope.readme = selected.readme;
|
||||
await getReadme();
|
||||
anonymize();
|
||||
$scope.$apply();
|
||||
}
|
||||
});
|
||||
|
||||
$scope.$watch("options.mode", (v) => {
|
||||
if (v == "stream") {
|
||||
$scope.options.loc = false;
|
||||
$scope.anonymize.loc.$$element[0].disabled = true;
|
||||
if (v == "GitHubStream") {
|
||||
$scope.options.page = false;
|
||||
$scope.anonymize.page.$$element[0].disabled = true;
|
||||
} else {
|
||||
$scope.anonymize.loc.$$element[0].disabled = false;
|
||||
$scope.anonymize.page.$$element[0].disabled = false;
|
||||
}
|
||||
});
|
||||
|
||||
@@ -749,10 +787,12 @@ angular
|
||||
);
|
||||
$scope.branches = branches.data;
|
||||
if (!$scope.branch) {
|
||||
$scope.branch = $scope.details.default_branch;
|
||||
$scope.branch = $scope.details.defaultBranch;
|
||||
}
|
||||
if ($scope.branches[$scope.branch]) {
|
||||
$scope.commit = $scope.branches[$scope.branch].commit.sha;
|
||||
const selected = $scope.branches.filter((b) => b.name == $scope.branch);
|
||||
if (selected.length > 0) {
|
||||
$scope.commit = selected[0].commit;
|
||||
$scope.readme = selected[0].readme;
|
||||
}
|
||||
$scope.$apply();
|
||||
};
|
||||
@@ -771,12 +811,10 @@ angular
|
||||
const res = await $http.get(`/api/repo/${o.owner}/${o.repo}/`);
|
||||
$scope.details = res.data;
|
||||
if ($scope.details.size > 1024 * 8) {
|
||||
$scope.options.mode = "stream";
|
||||
$scope.options.loc = false;
|
||||
$scope.options.mode = "GitHubStream";
|
||||
$scope.anonymize.mode.$$element[0].disabled = true;
|
||||
$scope.anonymize.loc.$$element[0].disabled = true;
|
||||
}
|
||||
$scope.repoId = $scope.details.name + "-" + generateRandomId(4);
|
||||
$scope.repoId = $scope.details.repo + "-" + generateRandomId(4);
|
||||
await $scope.getBranches();
|
||||
} catch (error) {
|
||||
if (error.data) {
|
||||
@@ -790,13 +828,16 @@ angular
|
||||
}
|
||||
}
|
||||
|
||||
async function getReadme() {
|
||||
async function getReadme(force) {
|
||||
if ($scope.readme) return $scope.readme;
|
||||
const o = parseGithubUrl($scope.repoUrl);
|
||||
const res = await $http.get(`/api/repo/${o.owner}/${o.repo}/readme`);
|
||||
const res = await $http.get(`/api/repo/${o.owner}/${o.repo}/readme`, {
|
||||
params: { force: force === true ? "1" : "0", branch: $scope.branch },
|
||||
});
|
||||
$scope.readme = res.data;
|
||||
}
|
||||
|
||||
async function anonymize() {
|
||||
function anonymize() {
|
||||
const urlRegex =
|
||||
/<?\b((https?|ftp|file):\/\/)[-A-Za-z0-9+&@#/%?=~_|!:,.;]+[-A-Za-z0-9+&@#/%=~_|]\b\/?>?/g;
|
||||
let content = $scope.readme;
|
||||
@@ -890,6 +931,7 @@ angular
|
||||
|
||||
function getRepo() {
|
||||
const o = parseGithubUrl($scope.repoUrl);
|
||||
$scope.options.pageSource = $scope.details.pageSource;
|
||||
return {
|
||||
repoId: $scope.repoId,
|
||||
terms: $scope.terms.trim().split("\n"),
|
||||
@@ -1200,7 +1242,7 @@ angular
|
||||
getFiles(() => {
|
||||
updateContent();
|
||||
|
||||
if (options.mode == "download") {
|
||||
if (options.mode == "GitHubDownload") {
|
||||
getStats();
|
||||
}
|
||||
});
|
||||
|
||||
@@ -1,102 +0,0 @@
|
||||
const redis = require("redis");
|
||||
|
||||
const passport = require("passport");
|
||||
const session = require("express-session");
|
||||
const redisStore = require("connect-redis")(session);
|
||||
const GitHubStrategy = require("passport-github2").Strategy;
|
||||
|
||||
const express = require("express");
|
||||
|
||||
const router = express.Router();
|
||||
|
||||
const db = require("../utils/database");
|
||||
const config = require("../config");
|
||||
|
||||
function ensureAuthenticated(req, res, next) {
|
||||
if (req.isAuthenticated()) {
|
||||
return next();
|
||||
}
|
||||
res.status(401).json({ error: "not_connected" });
|
||||
}
|
||||
|
||||
passport.serializeUser(function(user, done) {
|
||||
delete user.profile._json;
|
||||
done(null, user);
|
||||
});
|
||||
|
||||
passport.deserializeUser(function(obj, done) {
|
||||
done(null, obj);
|
||||
});
|
||||
|
||||
passport.use(
|
||||
new GitHubStrategy(
|
||||
{
|
||||
clientID: config.CLIENT_ID,
|
||||
clientSecret: config.CLIENT_SECRET,
|
||||
callbackURL: config.AUTH_CALLBACK,
|
||||
},
|
||||
async (accessToken, refreshToken, profile, done) => {
|
||||
try {
|
||||
await db
|
||||
.get()
|
||||
.collection("users")
|
||||
.updateOne(
|
||||
{ username: profile.username },
|
||||
{
|
||||
$set: {
|
||||
username: profile.username,
|
||||
profile,
|
||||
accessToken,
|
||||
refreshToken,
|
||||
},
|
||||
},
|
||||
{ upsert: true }
|
||||
);
|
||||
} catch (error) {
|
||||
console.error(error);
|
||||
} finally {
|
||||
done(null, {
|
||||
username: profile.username,
|
||||
accessToken,
|
||||
refreshToken,
|
||||
profile,
|
||||
});
|
||||
}
|
||||
}
|
||||
)
|
||||
);
|
||||
|
||||
const rediscli = redis.createClient({
|
||||
host: "redis",
|
||||
ttl: 260,
|
||||
});
|
||||
|
||||
const appSession = session({
|
||||
secret: "keyboard cat",
|
||||
store: new redisStore({
|
||||
client: rediscli,
|
||||
}),
|
||||
saveUninitialized: false,
|
||||
resave: false,
|
||||
});
|
||||
|
||||
router.get(
|
||||
"/login",
|
||||
passport.authenticate("github", { scope: ["repo"] }), // Note the scope here
|
||||
function(req, res) {
|
||||
res.redirect("/");
|
||||
}
|
||||
);
|
||||
|
||||
router.get(
|
||||
"/auth",
|
||||
passport.authenticate("github", { failureRedirect: "/" }),
|
||||
function(req, res) {
|
||||
res.redirect("/");
|
||||
}
|
||||
);
|
||||
|
||||
module.exports.ensureAuthenticated = ensureAuthenticated;
|
||||
module.exports.passport = passport;
|
||||
module.exports.session = appSession;
|
||||
module.exports.router = router;
|
||||
166
routes/file.js
166
routes/file.js
@@ -1,166 +0,0 @@
|
||||
const express = require("express");
|
||||
const path = require("path");
|
||||
|
||||
const db = require("../utils/database");
|
||||
const fileUtils = require("../utils/file");
|
||||
const repoUtils = require("../utils/repository");
|
||||
const githubUtils = require("../utils/github");
|
||||
|
||||
const router = express.Router();
|
||||
|
||||
async function anonymizeRepository(options) {
|
||||
let repoConfig = options.repoConfig;
|
||||
if (!repoConfig) {
|
||||
repoConfig = await repoUtils.getConfig(options.repoId);
|
||||
}
|
||||
|
||||
if (repoConfig == null) {
|
||||
throw "repo_not_found";
|
||||
}
|
||||
if (repoConfig.status == "removed" || repoConfig.status == "expired") {
|
||||
return;
|
||||
}
|
||||
|
||||
if (repoConfig.options.expirationMode != "never") {
|
||||
if (repoConfig.options.expirationDate <= new Date()) {
|
||||
console.log(repoConfig.repoId, "The repository is expired");
|
||||
await repoUtils.updateStatus(repoConfig, "expired");
|
||||
await repoUtils.removeRepository(repoConfig);
|
||||
throw "repository_expired";
|
||||
}
|
||||
}
|
||||
|
||||
const lastView = repoConfig.lastView;
|
||||
|
||||
const yesterday = new Date();
|
||||
yesterday.setDate(yesterday.getDate() - 1);
|
||||
|
||||
if (repoConfig.options.update && lastView < yesterday) {
|
||||
console.log(repoConfig.repoId, "check for updates in the repository.");
|
||||
try {
|
||||
} catch (error) {
|
||||
console.error("Error while updating the repository.");
|
||||
console.error(repoConfig.repoId, req.path, error);
|
||||
}
|
||||
await repoUtils.updateAnonymizedRepository(repoConfig);
|
||||
} else {
|
||||
await githubUtils.downloadRepoAndAnonymize(repoConfig);
|
||||
}
|
||||
}
|
||||
|
||||
router.get("/:repoId/files", async (req, res) => {
|
||||
const repoConfig = await repoUtils.getConfig(req.params.repoId);
|
||||
if (repoConfig == null) {
|
||||
return res.status(500).json({ error: "repo_not_found" });
|
||||
}
|
||||
|
||||
if (repoConfig.status != "ready") {
|
||||
return res.status(500).json({ error: "repo_not_ready" });
|
||||
}
|
||||
|
||||
try {
|
||||
const files = await fileUtils.getFileList({ repoConfig });
|
||||
return res.json(files);
|
||||
} catch (error) {
|
||||
console.error(req.path, error);
|
||||
return res.status(500).json({ error });
|
||||
}
|
||||
});
|
||||
|
||||
router.get("/:repoId/stats", async (req, res) => {
|
||||
const repoConfig = await repoUtils.getConfig(req.params.repoId);
|
||||
|
||||
if (repoConfig == null) {
|
||||
return res.status(500).json({ error: "repo_not_found" });
|
||||
}
|
||||
if (repoConfig.status != "ready") {
|
||||
return res.status(500).json({ error: "repo_not_ready" });
|
||||
}
|
||||
|
||||
if (repoConfig.options.mode == "stream") {
|
||||
return res.status(500).json({ error: "stream_not_supported" });
|
||||
}
|
||||
|
||||
try {
|
||||
const stats = await fileUtils.getStats({ repoConfig });
|
||||
return res.json(stats.languages);
|
||||
} catch (error) {
|
||||
console.error(req.path, error);
|
||||
return res.status(500).json({ error });
|
||||
}
|
||||
});
|
||||
|
||||
router.get("/:repoId/options", async (req, res) => {
|
||||
const repoConfig = await repoUtils.getConfig(req.params.repoId);
|
||||
if (repoConfig == null) {
|
||||
return res.status(500).json({ error: "repo_not_found" });
|
||||
}
|
||||
try {
|
||||
try {
|
||||
await anonymizeRepository({ repoConfig });
|
||||
} catch (error) {
|
||||
console.log("Error during the anonymization of the repository");
|
||||
console.error(req.path, error);
|
||||
}
|
||||
if (repoConfig.status == "removed") {
|
||||
throw "repository_expired";
|
||||
}
|
||||
if (repoConfig.status == "expired") {
|
||||
if (repoConfig.options.expirationMode == "redirect") {
|
||||
repoConfig.options.url = "https://github.com/" + repoConfig.fullName;
|
||||
} else {
|
||||
throw "repository_expired";
|
||||
}
|
||||
} else if (repoConfig.status != "ready") {
|
||||
throw "repository_not_ready";
|
||||
}
|
||||
|
||||
return res.json(repoConfig.options);
|
||||
} catch (error) {
|
||||
console.error(req.path, error);
|
||||
return res.status(500).json({ error });
|
||||
}
|
||||
});
|
||||
|
||||
router.get("/:repoId/file/:path*", async (req, res) => {
|
||||
const repoConfig = await repoUtils.getConfig(req.params.repoId);
|
||||
|
||||
if (repoConfig == null) {
|
||||
return res.status(500).json({ error: "repo_not_found" });
|
||||
}
|
||||
if (repoConfig.status != "ready") {
|
||||
return res.status(500).json({ error: "repo_not_ready" });
|
||||
}
|
||||
|
||||
let requestPath = req.params.path;
|
||||
if (req.params[0]) {
|
||||
requestPath += req.params[0];
|
||||
}
|
||||
|
||||
try {
|
||||
const isValid = await fileUtils.isFilePathValid({
|
||||
repoConfig,
|
||||
path: requestPath,
|
||||
});
|
||||
if (isValid) {
|
||||
await db
|
||||
.get("anonymized_repositories")
|
||||
.updateOne(
|
||||
{ repoId: repoConfig.repoId },
|
||||
{ $set: { lastView: new Date() }, $inc: { pageView: 1 } }
|
||||
);
|
||||
const ppath = path.join(
|
||||
repoUtils.getAnonymizedPath(repoConfig.repoId),
|
||||
requestPath
|
||||
);
|
||||
return res.sendFile(ppath, { dotfiles: "allow" });
|
||||
} else {
|
||||
return res.status(404).json({ error: "file_not_found" });
|
||||
}
|
||||
} catch (error) {
|
||||
console.error(req.path, error);
|
||||
return res.status(500).send({ error });
|
||||
}
|
||||
});
|
||||
|
||||
module.exports = router;
|
||||
@@ -1,366 +0,0 @@
|
||||
const ofs = require("fs");
|
||||
const fs = require("fs").promises;
|
||||
|
||||
const express = require("express");
|
||||
const gh = require("parse-github-url");
|
||||
const arrayEquals = require("array-equal");
|
||||
|
||||
const connection = require("./connection");
|
||||
const githubUtils = require("../utils/github");
|
||||
const db = require("../utils/database");
|
||||
const repoUtils = require("../utils/repository");
|
||||
const config = require("../config");
|
||||
|
||||
const router = express.Router();
|
||||
|
||||
// user needs to be connected for all user API
|
||||
router.use(connection.ensureAuthenticated);
|
||||
|
||||
// claim a repository
|
||||
router.post("/claim", async (req, res) => {
|
||||
try {
|
||||
if (!req.body.repoId) {
|
||||
return res.status(500).json({ error: "repoId_not_defined" });
|
||||
}
|
||||
if (!req.body.repoUrl) {
|
||||
return res.status(500).json({ error: "repoUrl_not_defined" });
|
||||
}
|
||||
|
||||
const repoConfig = await repoUtils.getConfig(req.body.repoId);
|
||||
if (repoConfig == null) {
|
||||
return res.status(500).json({ error: "repo_not_found" });
|
||||
}
|
||||
|
||||
const repo = gh(req.body.repoUrl);
|
||||
if (repoConfig.fullName != repo.repository) {
|
||||
return res.status(500).json({ error: "repo_not_found" });
|
||||
}
|
||||
|
||||
console.log(`${req.user.username} claims ${repoConfig.fullName}.`);
|
||||
|
||||
await db
|
||||
.get("anonymized_repositories")
|
||||
.updateOne(
|
||||
{ repoId: repoConfig.repoId },
|
||||
{ $set: { owner: req.user.username } }
|
||||
);
|
||||
return res.send("Ok");
|
||||
} catch (error) {
|
||||
console.error(req.path, error);
|
||||
return res.status(500).json({ error });
|
||||
}
|
||||
});
|
||||
|
||||
router.get("/:repoId/", async (req, res) => {
|
||||
try {
|
||||
const repository = await repoUtils.getAnonymizedRepoDetails(
|
||||
req.params.repoId,
|
||||
req.user
|
||||
);
|
||||
if (repository) {
|
||||
return res.json(repository);
|
||||
}
|
||||
res.status(404).send({error: "repo_not_found"});
|
||||
} catch (error) {
|
||||
console.error(req.path, error);
|
||||
res.status(500).json({ error });
|
||||
}
|
||||
});
|
||||
|
||||
// update a repository
|
||||
router.post("/:repoId/", async (req, res) => {
|
||||
const repoUpdate = req.body;
|
||||
|
||||
let repoConfig = await repoUtils.getConfig(req.params.repoId);
|
||||
if (repoConfig == null) {
|
||||
return res.status(500).json({ error: "repo_not_found" });
|
||||
}
|
||||
if (repoConfig.owner != req.user.username) {
|
||||
return res.status(401).json({ error: "not_authorized" });
|
||||
}
|
||||
if (!repoUpdate.branch) {
|
||||
return res.status(500).json({ error: "branch_not_specified" });
|
||||
}
|
||||
if (!repoUpdate.options) {
|
||||
return res.status(500).json({ error: "options_not_provided" });
|
||||
}
|
||||
if (!Array.isArray(repoUpdate.terms)) {
|
||||
return res.status(500).send({ error: "invalid_terms_format" });
|
||||
}
|
||||
if (!/^[a-f0-9]+$/.test(repoUpdate.commit)) {
|
||||
return res.status(500).send({ error: "invalid_commit_format" });
|
||||
}
|
||||
|
||||
try {
|
||||
const details = await repoUtils.getRepoDetails({
|
||||
repoConfig,
|
||||
force: true,
|
||||
token: req.user.accessToken,
|
||||
});
|
||||
if (repoUpdate.options.mode == "download") {
|
||||
// details.size is in kilobytes
|
||||
if (details.size > config.MAX_REPO_SIZE) {
|
||||
return res.status(500).send({ error: "invalid_mode" });
|
||||
}
|
||||
}
|
||||
|
||||
if (repoUpdate.commit != repoConfig.commit) {
|
||||
repoUpdate.anonymizeDate = new Date();
|
||||
await repoUtils.removeRepository(repoConfig);
|
||||
}
|
||||
if (
|
||||
!arrayEquals(repoUpdate.terms, repoConfig.terms) ||
|
||||
repoUpdate.options.link != repoConfig.options.link ||
|
||||
repoUpdate.options.image != repoConfig.options.image
|
||||
) {
|
||||
repoUpdate.anonymizeDate = new Date();
|
||||
if (ofs.existsSync(repoUtils.getAnonymizedPath(repoConfig.repoId))) {
|
||||
await fs.rm(repoUtils.getAnonymizedPath(repoConfig.repoId), {
|
||||
recursive: true,
|
||||
force: true,
|
||||
});
|
||||
}
|
||||
}
|
||||
const data = {
|
||||
terms: repoUpdate.terms,
|
||||
branch: repoUpdate.branch,
|
||||
commit: repoUpdate.commit,
|
||||
options: {
|
||||
expirationMode: repoUpdate.options.expirationMode,
|
||||
expirationDate: repoUpdate.options.expirationDate,
|
||||
update: repoUpdate.options.update,
|
||||
image: repoUpdate.options.image,
|
||||
pdf: repoUpdate.options.pdf,
|
||||
notebook: repoUpdate.options.notebook,
|
||||
loc: repoUpdate.options.loc,
|
||||
link: repoUpdate.options.link,
|
||||
mode: repoUpdate.options.mode,
|
||||
page: repoUpdate.options.page,
|
||||
},
|
||||
};
|
||||
if (repoUpdate.options.page) {
|
||||
data.options.pageSource = details.pageSource;
|
||||
}
|
||||
await db.get("anonymized_repositories").updateOne(
|
||||
{
|
||||
repoId: repoConfig.repoId,
|
||||
},
|
||||
{
|
||||
$set: data,
|
||||
}
|
||||
);
|
||||
|
||||
repoConfig = await repoUtils.getConfig(repoUpdate.repoId);
|
||||
await repoUtils.updateStatus(repoConfig, "preparing");
|
||||
|
||||
res.send("ok");
|
||||
} catch (error) {
|
||||
console.error(req.path, error);
|
||||
await repoUtils.updateStatus(repoConfig, "error", error);
|
||||
return res.status(500).json({ error });
|
||||
}
|
||||
try {
|
||||
await githubUtils.downloadRepoAndAnonymize(repoConfig);
|
||||
await repoUtils.updateStatus(repoConfig, "ready");
|
||||
} catch (error) {
|
||||
console.error(req.path, error);
|
||||
await repoUtils.updateStatus(repoConfig, "error", error);
|
||||
}
|
||||
});
|
||||
|
||||
// refresh a repository
|
||||
router.post("/:repoId/refresh", async (req, res) => {
|
||||
try {
|
||||
const repoConfig = await repoUtils.getConfig(req.params.repoId);
|
||||
if (repoConfig == null) {
|
||||
return res.status(500).json({ error: "repo_not_found" });
|
||||
}
|
||||
if (repoConfig.owner != req.user.username) {
|
||||
return res.status(401).json({ error: "not_authorized" });
|
||||
}
|
||||
|
||||
await repoUtils.updateAnonymizedRepository(repoConfig);
|
||||
return res.send("ok");
|
||||
} catch (error) {
|
||||
console.error(req.path, error);
|
||||
return res.status(500).json({ error });
|
||||
}
|
||||
});
|
||||
|
||||
// delete a repository
|
||||
router.delete("/:repoId/", async (req, res) => {
|
||||
try {
|
||||
const repoConfig = await repoUtils.getConfig(req.params.repoId);
|
||||
if (repoConfig == null) {
|
||||
return res.status(500).json({ error: "repo_not_found" });
|
||||
}
|
||||
if (repoConfig.owner != req.user.username) {
|
||||
return res.status(401).json({ error: "not_authorized" });
|
||||
}
|
||||
|
||||
await repoUtils.updateStatus(repoConfig, "removed");
|
||||
await repoUtils.removeRepository(repoConfig);
|
||||
console.log(`${req.params.repoId} is removed`);
|
||||
return res.json("ok");
|
||||
} catch (error) {
|
||||
console.error(req.path, error);
|
||||
return res.status(500).json({ error });
|
||||
}
|
||||
});
|
||||
|
||||
router.get("/:owner/:repo/", async (req, res) => {
|
||||
try {
|
||||
const repository = await repoUtils.getRepoDetails({
|
||||
owner: req.params.owner,
|
||||
repo: req.params.repo,
|
||||
token: req.user.accessToken,
|
||||
force: req.query.force === "1",
|
||||
});
|
||||
if (repository) {
|
||||
return res.json(repository);
|
||||
}
|
||||
res.status(404).send({error: "repo_not_found"});
|
||||
} catch (error) {
|
||||
console.error(req.path, error);
|
||||
res.status(500).json({ error });
|
||||
}
|
||||
});
|
||||
|
||||
router.get("/:owner/:repo/branches", async (req, res) => {
|
||||
try {
|
||||
const repository = await repoUtils.getRepoBranches({
|
||||
owner: req.params.owner,
|
||||
repo: req.params.repo,
|
||||
token: req.user.accessToken,
|
||||
force: req.query.force === "1",
|
||||
});
|
||||
if (repository) {
|
||||
return res.json(repository);
|
||||
}
|
||||
res.status(404).send({error: "repo_not_found"});
|
||||
} catch (error) {
|
||||
console.error(req.path, error);
|
||||
res.status(500).json({ error });
|
||||
}
|
||||
});
|
||||
|
||||
router.get("/:owner/:repo/readme", async (req, res) => {
|
||||
try {
|
||||
const readme = await repoUtils.getRepoReadme({
|
||||
owner: req.params.owner,
|
||||
repo: req.params.repo,
|
||||
token: req.user.accessToken,
|
||||
force: req.query.force === "1",
|
||||
});
|
||||
if (readme) {
|
||||
return res.send(readme);
|
||||
}
|
||||
res.status(404).send({error: "repo_not_found"});
|
||||
} catch (error) {
|
||||
res.status(500).json({ error });
|
||||
}
|
||||
});
|
||||
|
||||
router.post("/", async (req, res) => {
|
||||
const repoConfig = req.body;
|
||||
let data = null;
|
||||
try {
|
||||
const repository = await repoUtils.getConfig(repoConfig.repoId);
|
||||
const cacheExist = ofs.existsSync(
|
||||
repoUtils.getOriginalPath(repoConfig.repoId)
|
||||
);
|
||||
if (repository && cacheExist) {
|
||||
return res.status(500).send({ error: "repoId_already_used" });
|
||||
}
|
||||
var validCharacters = /^[0-9a-zA-Z\-\_]+$/;
|
||||
if (
|
||||
!repoConfig.repoId.match(validCharacters) ||
|
||||
repoConfig.repoId.length < 3
|
||||
) {
|
||||
return res.status(500).send({ error: "invalid_repoId" });
|
||||
}
|
||||
if (!repoConfig.branch) {
|
||||
return res.status(500).json({ error: "branch_not_specified" });
|
||||
}
|
||||
if (!repoConfig.options) {
|
||||
return res.status(500).json({ error: "options_not_provided" });
|
||||
}
|
||||
if (!Array.isArray(repoConfig.terms)) {
|
||||
return res.status(500).send({ error: "invalid_terms_format" });
|
||||
}
|
||||
if (!/^[a-f0-9]+$/.test(repoConfig.commit)) {
|
||||
return res.status(500).send({ error: "invalid_commit_format" });
|
||||
}
|
||||
|
||||
await repoUtils.getRepoBranches({
|
||||
repoConfig,
|
||||
token: req.user.accessToken,
|
||||
});
|
||||
const details = await repoUtils.getRepoDetails({
|
||||
repoConfig,
|
||||
token: req.user.accessToken,
|
||||
});
|
||||
if (details.branches[repoConfig.branch] == null) {
|
||||
return res.status(500).send({ error: "invalid_branch" });
|
||||
}
|
||||
if (repoConfig.options.mode == "download") {
|
||||
// details.size is in kilobytes
|
||||
if (details.size > config.MAX_REPO_SIZE) {
|
||||
return res.status(500).send({ error: "non_supported_mode" });
|
||||
}
|
||||
}
|
||||
|
||||
data = {
|
||||
repoId: repoConfig.repoId,
|
||||
fullName: repoConfig.fullName,
|
||||
status: "preparing",
|
||||
terms: repoConfig.terms,
|
||||
owner: req.user.profile.username,
|
||||
token: req.user.accessToken,
|
||||
branch: repoConfig.branch,
|
||||
conference: repoConfig.conference,
|
||||
commit: repoConfig.commit
|
||||
? repoConfig.commit
|
||||
: details.branches[repoConfig.branch].commit.sha,
|
||||
anonymizeDate: new Date(),
|
||||
options: {
|
||||
expirationMode: repoConfig.options.expirationMode,
|
||||
expirationDate: repoConfig.options.expirationDate,
|
||||
update: repoConfig.options.update,
|
||||
image: repoConfig.options.image,
|
||||
pdf: repoConfig.options.pdf,
|
||||
notebook: repoConfig.options.notebook,
|
||||
loc: repoConfig.options.loc,
|
||||
link: repoConfig.options.link,
|
||||
mode: repoConfig.options.mode,
|
||||
page: repoConfig.options.page,
|
||||
},
|
||||
};
|
||||
if (repoConfig.options.page) {
|
||||
data.options.pageSource = details.pageSource;
|
||||
}
|
||||
await db.get("anonymized_repositories").updateOne(
|
||||
{
|
||||
repoId: data.repoId,
|
||||
},
|
||||
{
|
||||
$set: data,
|
||||
},
|
||||
{ upsert: true }
|
||||
);
|
||||
res.send("ok");
|
||||
} catch(error) {
|
||||
console.error(req.path, error);
|
||||
await repoUtils.updateStatus(repoConfig, "error", error);
|
||||
return res.status(500).json({ error });
|
||||
}
|
||||
try {
|
||||
await githubUtils.downloadRepoAndAnonymize(data);
|
||||
await repoUtils.updateStatus(repoConfig, "ready");
|
||||
} catch (error) {
|
||||
console.error(req.path, error);
|
||||
await repoUtils.updateStatus(repoConfig, "error", "unable_to_anonymize");
|
||||
}
|
||||
});
|
||||
|
||||
module.exports = router;
|
||||
134
routes/user.js
134
routes/user.js
@@ -1,134 +0,0 @@
|
||||
const express = require("express");
|
||||
const { Octokit } = require("@octokit/rest");
|
||||
|
||||
const connection = require("./connection");
|
||||
const db = require("../utils/database");
|
||||
const repoUtils = require("../utils/repository");
|
||||
|
||||
const router = express.Router();
|
||||
|
||||
// user needs to be connected for all user API
|
||||
router.use(connection.ensureAuthenticated);
|
||||
|
||||
router.get("/logout", async (req, res) => {
|
||||
try {
|
||||
req.logout();
|
||||
res.redirect("/");
|
||||
} catch (error) {
|
||||
console.error(req.path, error);
|
||||
res.status(500).json({ error });
|
||||
}
|
||||
});
|
||||
|
||||
router.get("/", async (req, res) => {
|
||||
try {
|
||||
const photo = req.user.profile.photos.length
|
||||
? req.user.profile.photos[0].value
|
||||
: null;
|
||||
res.json({ username: req.user.profile.username, photo });
|
||||
} catch (error) {
|
||||
console.error(req.path, error);
|
||||
res.status(500).json({ error });
|
||||
}
|
||||
});
|
||||
|
||||
router.get("/default", async (req, res) => {
|
||||
try {
|
||||
const d = await db
|
||||
.get("users")
|
||||
.findOne({ username: req.user.username }, { projection: { default: 1 } });
|
||||
res.json(d.default);
|
||||
} catch (error) {
|
||||
console.error(req.path, error);
|
||||
res.status(500).json({ error });
|
||||
}
|
||||
});
|
||||
|
||||
router.post("/default", async (req, res) => {
|
||||
try {
|
||||
const d = req.body;
|
||||
await db
|
||||
.get("users")
|
||||
.updateOne({ username: req.user.username }, { $set: { default: d } });
|
||||
res.send("ok");
|
||||
} catch (error) {
|
||||
console.error(req.path, error);
|
||||
res.status(500).json({ error });
|
||||
}
|
||||
});
|
||||
|
||||
router.get("/anonymized_repositories", async (req, res) => {
|
||||
try {
|
||||
const repos = await db
|
||||
.get("anonymized_repositories")
|
||||
.find(
|
||||
{
|
||||
owner: req.user.username,
|
||||
},
|
||||
{ projection: { token: 0, files: 0, originalFiles: 0 } }
|
||||
)
|
||||
.toArray();
|
||||
for (let repo of repos) {
|
||||
if (repo.options.expirationDate) {
|
||||
repo.options.expirationDate = new Date(repo.options.expirationDate);
|
||||
}
|
||||
if (
|
||||
repo.options.expirationMode != "never" &&
|
||||
repo.options.expirationDate != null &&
|
||||
repo.options.expirationDate < new Date()
|
||||
) {
|
||||
await repoUtils.updateStatus({ repoId: repo.repoId }, "expired");
|
||||
repo.status = "expired";
|
||||
}
|
||||
}
|
||||
res.json(repos);
|
||||
} catch (error) {
|
||||
console.error(req.path, error);
|
||||
res.status(500).json({ error });
|
||||
}
|
||||
});
|
||||
|
||||
router.get("/all_repositories", async (req, res) => {
|
||||
try {
|
||||
const user = await db
|
||||
.get()
|
||||
.collection("users")
|
||||
.findOne(
|
||||
{ username: req.user.username },
|
||||
{ projection: { repositories: 1 } }
|
||||
);
|
||||
if (!user) {
|
||||
res.status(401).send({ error: "user_not_found" });
|
||||
}
|
||||
if (user.repositories && req.query.force !== "1") {
|
||||
return res.json(user.repositories);
|
||||
} else {
|
||||
const octokit = new Octokit({ auth: req.user.accessToken });
|
||||
const repositories = await octokit.paginate(
|
||||
octokit.repos.listForAuthenticatedUser,
|
||||
{
|
||||
visibility: "all",
|
||||
sort: "pushed",
|
||||
per_page: 100,
|
||||
}
|
||||
);
|
||||
try {
|
||||
await db
|
||||
.get()
|
||||
.collection("users")
|
||||
.updateOne(
|
||||
{ username: req.user.profile.username },
|
||||
{ $set: { repositories } }
|
||||
);
|
||||
res.json(repositories);
|
||||
} catch (error) {
|
||||
res.status(500).send(error);
|
||||
}
|
||||
}
|
||||
} catch (error) {
|
||||
console.error(req.path, error);
|
||||
res.status(500).json({ error });
|
||||
}
|
||||
});
|
||||
|
||||
module.exports = router;
|
||||
@@ -1,79 +0,0 @@
|
||||
const express = require("express");
|
||||
const path = require("path");
|
||||
|
||||
const fileUtils = require("../utils/file");
|
||||
const repoUtils = require("../utils/repository");
|
||||
|
||||
const router = express.Router();
|
||||
|
||||
async function webView(req, res) {
|
||||
try {
|
||||
const repoId = req.params.repoId;
|
||||
const repoConfig = await repoUtils.getConfig(repoId);
|
||||
|
||||
if (!repoConfig.options.page) {
|
||||
throw "page_not_activated";
|
||||
}
|
||||
if (!repoConfig.options.pageSource) {
|
||||
throw "page_not_activated";
|
||||
}
|
||||
|
||||
if (repoConfig.status == "expired") {
|
||||
throw "repository_expired";
|
||||
}
|
||||
if (repoConfig.status == "removed") {
|
||||
throw "repository_expired";
|
||||
}
|
||||
|
||||
if (repoConfig.options.pageSource.branch != repoConfig.branch) {
|
||||
throw "page_not_supported_on_different_branch";
|
||||
}
|
||||
|
||||
let requestPath = req.path.substring(
|
||||
req.path.indexOf(repoId) + repoId.length
|
||||
);
|
||||
if (requestPath[requestPath.length - 1] == "/") {
|
||||
requestPath = path.join(requestPath, "index.html");
|
||||
}
|
||||
// TODO: handle website that are not in the docs folder (master, docs, gh-pages)
|
||||
requestPath = path.join(repoConfig.options.pageSource.path, requestPath);
|
||||
|
||||
if (await fileUtils.isFilePathValid({ repoConfig, path: requestPath })) {
|
||||
const ppath = path.join(
|
||||
repoUtils.getAnonymizedPath(repoConfig.repoId),
|
||||
requestPath
|
||||
);
|
||||
return res.sendFile(ppath, { dotfiles: "allow" }, (err) => {
|
||||
if (err) {
|
||||
if (err.path) {
|
||||
const newPath = path.join(
|
||||
req.path,
|
||||
err.path.replace(
|
||||
path.join(
|
||||
repoUtils.getAnonymizedPath(repoConfig.repoId),
|
||||
"docs"
|
||||
),
|
||||
""
|
||||
)
|
||||
);
|
||||
if (newPath != req.path) {
|
||||
return res.redirect(newPath);
|
||||
}
|
||||
}
|
||||
}
|
||||
console.log(err);
|
||||
});
|
||||
}
|
||||
} catch (error) {
|
||||
console.error(req.path, error);
|
||||
return res.status(500).send({ error });
|
||||
}
|
||||
return res.status(404).send("file_not_found");
|
||||
}
|
||||
|
||||
router.get("/:repoId/*", webView);
|
||||
router.get("/:repoId", (req, res) => {
|
||||
res.redirect("/w" + req.url + "/")
|
||||
});
|
||||
|
||||
module.exports = router;
|
||||
168
src/AnonymizedFile.ts
Normal file
168
src/AnonymizedFile.ts
Normal file
@@ -0,0 +1,168 @@
|
||||
import * as path from "path";
|
||||
import * as express from "express";
|
||||
import * as stream from "stream";
|
||||
import Repository from "./Repository";
|
||||
import { Tree, TreeFile } from "./types";
|
||||
import storage from "./storage";
|
||||
import config from "../config";
|
||||
import { anonymizeStream } from "./anonymize-utils";
|
||||
|
||||
/**
|
||||
* Represent a file in a anonymized repository
|
||||
*/
|
||||
export default class AnonymizedFile {
|
||||
repository: Repository;
|
||||
sha?: string;
|
||||
size?: number;
|
||||
path?: string;
|
||||
anonymizedPath: string;
|
||||
|
||||
constructor(
|
||||
repository: Repository,
|
||||
data: {
|
||||
path?: string;
|
||||
anonymizedPath: string;
|
||||
sha?: string;
|
||||
size?: number;
|
||||
}
|
||||
) {
|
||||
this.repository = repository;
|
||||
if (!this.repository.options.terms) throw new Error("terms_not_specified");
|
||||
this.anonymizedPath = data.anonymizedPath;
|
||||
if (data.path) {
|
||||
this.path = data.path;
|
||||
}
|
||||
|
||||
if (!data.anonymizedPath && this.path) {
|
||||
// anonymize the path
|
||||
this.anonymizedPath = this.path;
|
||||
for (let term of this.repository.options.terms) {
|
||||
if (term.trim() == "") {
|
||||
continue;
|
||||
}
|
||||
this.anonymizedPath = this.anonymizedPath.replace(
|
||||
new RegExp(term, "gi"),
|
||||
config.ANONYMIZATION_MASK
|
||||
);
|
||||
}
|
||||
}
|
||||
if (!this.sha) this.sha = data.sha;
|
||||
if (!this.size) this.size = data.size;
|
||||
}
|
||||
|
||||
async send(res: express.Response): Promise<void> {
|
||||
try {
|
||||
const s = await this.anonymizedContent();
|
||||
s.on("error", (err) => {
|
||||
console.log(err);
|
||||
res.status(500).send({ error: err.message });
|
||||
});
|
||||
s.pipe(res);
|
||||
} catch (error) {
|
||||
console.log("Error during anonymization", error);
|
||||
res.status(500).send({ error: error.message });
|
||||
}
|
||||
}
|
||||
|
||||
async isFileSupported() {
|
||||
this.path = await this.getOriginalPath();
|
||||
const filename = path.basename(this.path);
|
||||
const extensions = filename.split(".").reverse();
|
||||
const extension = extensions[0].toLowerCase();
|
||||
if (!this.repository.options.pdf && extension == "pdf") {
|
||||
return false;
|
||||
}
|
||||
if (
|
||||
!this.repository.options.image &&
|
||||
(extension == "png" ||
|
||||
extension == "ico" ||
|
||||
extension == "jpg" ||
|
||||
extension == "jpeg" ||
|
||||
extension == "gif")
|
||||
) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
get originalCachePath() {
|
||||
if (!this.path) throw "path_not_defined";
|
||||
return path.join(
|
||||
this.repository.originalCachePath,
|
||||
this.path
|
||||
);
|
||||
}
|
||||
|
||||
async content(): Promise<stream.Readable> {
|
||||
if (this.size && this.size > config.MAX_FILE_SIZE) {
|
||||
throw new Error("file_too_big");
|
||||
}
|
||||
if (await storage.exists(this.originalCachePath)) {
|
||||
return storage.read(this.originalCachePath);
|
||||
} else {
|
||||
return await this.repository.source?.getFileContent(this);
|
||||
}
|
||||
}
|
||||
|
||||
async anonymizedContent() {
|
||||
await this.getOriginalPath();
|
||||
if (!this.path) throw new Error("path_not_specified");
|
||||
if (!this.repository.options.terms) throw new Error("terms_not_specified");
|
||||
const rs = await this.content();
|
||||
const contentStream = rs.pipe(anonymizeStream(this.path, this.repository));
|
||||
return contentStream;
|
||||
}
|
||||
|
||||
/**
|
||||
* De-anonymize the path
|
||||
*
|
||||
* @returns the origin relative path of the file
|
||||
*/
|
||||
async getOriginalPath(): Promise<string> {
|
||||
if (!this.anonymizedPath) throw new Error("path_not_specified");
|
||||
|
||||
const files = await this.repository.files();
|
||||
const paths = this.anonymizedPath.trim().split("/");
|
||||
|
||||
let current: any = await this.repository.anonymizedFiles();
|
||||
for (let i = 0; i < paths.length; i++) {
|
||||
const fileName = paths[i];
|
||||
if (fileName == "") {
|
||||
continue;
|
||||
}
|
||||
if (current[fileName]) {
|
||||
current = current[fileName];
|
||||
} else {
|
||||
throw new Error("file_not_found");
|
||||
}
|
||||
}
|
||||
|
||||
function tree2sha(
|
||||
tree: any,
|
||||
output: { [key: string]: string } = {},
|
||||
parent: string = ""
|
||||
): { [key: string]: string } {
|
||||
for (let i in tree) {
|
||||
const sha = tree[i].sha as string;
|
||||
const size = tree[i].size as number;
|
||||
if (sha != null && size != null) {
|
||||
output[sha] = path.join(parent, i);
|
||||
} else if (tree[i].child) {
|
||||
tree2sha(tree[i].child as Tree, output, path.join(parent, i));
|
||||
} else {
|
||||
tree2sha(tree[i] as Tree, output, path.join(parent, i));
|
||||
}
|
||||
}
|
||||
return output;
|
||||
}
|
||||
|
||||
const shaTree = tree2sha(files);
|
||||
if (!current.sha || !shaTree[current.sha]) {
|
||||
throw new Error("file_not_found");
|
||||
}
|
||||
this.path = shaTree[current.sha];
|
||||
this.sha = current.sha;
|
||||
if ((current as TreeFile).size) this.size = (current as TreeFile).size;
|
||||
return this.path;
|
||||
}
|
||||
}
|
||||
280
src/Repository.ts
Normal file
280
src/Repository.ts
Normal file
@@ -0,0 +1,280 @@
|
||||
import * as path from "path";
|
||||
import storage from "./storage";
|
||||
import { RepositoryStatus, Source, Tree } from "./types";
|
||||
import * as stream from "stream";
|
||||
import User from "./User";
|
||||
import GitHubStream from "./source/GitHubStream";
|
||||
import GitHubDownload from "./source/GitHubDownload";
|
||||
import Zip from "./source/ZIP";
|
||||
import { anonymizePath } from "./anonymize-utils";
|
||||
import UserModel from "./database/users/users.model";
|
||||
import { IAnonymizedRepositoryDocument } from "./database/anonymizedRepositories/anonymizedRepositories.types";
|
||||
import { anonymizeStream } from "./anonymize-utils";
|
||||
import GitHubBase from "./source/GitHubBase";
|
||||
|
||||
export default class Repository {
|
||||
private _model: IAnonymizedRepositoryDocument;
|
||||
source: Source;
|
||||
owner: User;
|
||||
|
||||
constructor(data: IAnonymizedRepositoryDocument) {
|
||||
this._model = data;
|
||||
switch (data.source.type) {
|
||||
case "GitHubDownload":
|
||||
this.source = new GitHubDownload(data.source, this);
|
||||
break;
|
||||
case "GitHubStream":
|
||||
this.source = new GitHubStream(data.source, this);
|
||||
break;
|
||||
case "Zip":
|
||||
this.source = new Zip(data.source, this);
|
||||
break;
|
||||
default:
|
||||
throw new Error("unsupported_source");
|
||||
}
|
||||
this.owner = new User(new UserModel({ username: data.owner }));
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the anonymized file tree
|
||||
* @param opt force to get an updated list of files
|
||||
* @returns The anonymized file tree
|
||||
*/
|
||||
async anonymizedFiles(opt?: { force?: boolean }): Promise<Tree> {
|
||||
const terms = this._model.options.terms || [];
|
||||
|
||||
function anonymizeTreeRecursive(tree: Tree): any {
|
||||
if (Number.isInteger(tree.size)) {
|
||||
return tree;
|
||||
}
|
||||
const output: any = {};
|
||||
let current: any = tree;
|
||||
if (current.child) {
|
||||
current = current.child;
|
||||
}
|
||||
for (const file in current) {
|
||||
const anonymizedPath = anonymizePath(file, terms);
|
||||
output[anonymizedPath] = anonymizeTreeRecursive(current[file]);
|
||||
}
|
||||
return output;
|
||||
}
|
||||
|
||||
return anonymizeTreeRecursive(await this.files(opt));
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the file tree
|
||||
*
|
||||
* @param opt force to get an updated list of files
|
||||
* @returns The file tree
|
||||
*/
|
||||
async files(opt?: { force?: boolean }) {
|
||||
if (
|
||||
this._model.originalFiles &&
|
||||
Object.keys(this._model.originalFiles).length !== 0 &&
|
||||
!opt?.force
|
||||
) {
|
||||
return this._model.originalFiles;
|
||||
}
|
||||
const files = await this.source.getFiles();
|
||||
this._model.originalFiles = files;
|
||||
this._model.size = 0;
|
||||
await this._model.save();
|
||||
|
||||
this._model.originalFiles = files;
|
||||
return files;
|
||||
}
|
||||
|
||||
check() {
|
||||
if (this._model.options.expirationMode != "never") {
|
||||
if (this._model.options.expirationDate > new Date()) {
|
||||
this.updateStatus("expired");
|
||||
}
|
||||
}
|
||||
if (this._model.status == "expired") {
|
||||
throw new Error("repository_expired");
|
||||
}
|
||||
if (this._model.status == "removed") {
|
||||
throw new Error("repository_expired");
|
||||
}
|
||||
if (this._model.status != "ready") {
|
||||
throw new Error("repository_not_ready");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Compress and anonymize the repository
|
||||
*
|
||||
* @returns A stream of anonymized repository compressed
|
||||
*/
|
||||
zip(): stream.Readable {
|
||||
return storage.archive(this.originalCachePath, {
|
||||
format: "zip",
|
||||
fileTransformer: (filename) =>
|
||||
anonymizeStream(filename, this) as Transformer,
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Update the repository if a new commit exists
|
||||
*
|
||||
* @returns void
|
||||
*/
|
||||
async updateIfNeeded(): Promise<void> {
|
||||
const yesterday = new Date();
|
||||
yesterday.setDate(yesterday.getDate() - 1);
|
||||
|
||||
if (this._model.options.update && this._model.lastView < yesterday) {
|
||||
if (this._model.status != "ready") {
|
||||
throw new Error("repo_not_ready");
|
||||
}
|
||||
|
||||
// Only GitHubBase can be update for the moment
|
||||
if (this.source instanceof GitHubBase) {
|
||||
const branches = await this.source.githubRepository.branches({
|
||||
force: true,
|
||||
accessToken: await this.source.getToken(),
|
||||
});
|
||||
const branch = this.source.branch;
|
||||
if (
|
||||
branch.commit ==
|
||||
branches.filter((f) => f.name == branch.name)[0].commit
|
||||
) {
|
||||
console.log(`${this._model.repoId} is up to date`);
|
||||
return;
|
||||
}
|
||||
this._model.source.commit = branches.filter(
|
||||
(f) => f.name == branch.name
|
||||
)[0].commit;
|
||||
this._model.anonymizeDate = new Date();
|
||||
await this.updateStatus("preparing");
|
||||
console.log(
|
||||
`${this._model.repoId} will be updated to ${this._model.source.commit}`
|
||||
);
|
||||
await this.resetSate();
|
||||
await this.anonymize();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Download the require state for the repository to work
|
||||
*
|
||||
* @returns void
|
||||
*/
|
||||
async anonymize() {
|
||||
if (this._model.status == "ready") return;
|
||||
await this.updateStatus("queue");
|
||||
await this.files();
|
||||
await this.updateStatus("ready");
|
||||
}
|
||||
|
||||
/**
|
||||
* Update the last view and view count
|
||||
*/
|
||||
async countView() {
|
||||
this._model.lastView = new Date();
|
||||
this._model.pageView = (this._model.pageView || 0) + 1;
|
||||
await this._model.save();
|
||||
}
|
||||
|
||||
/**
|
||||
* Update the status of the repository
|
||||
* @param status the new status
|
||||
* @param errorMessage a potential error message to display
|
||||
*/
|
||||
async updateStatus(status: RepositoryStatus, errorMessage?: string) {
|
||||
this._model.status = status;
|
||||
this._model.errorMessage = errorMessage;
|
||||
this._model.status = status;
|
||||
await this._model.save();
|
||||
}
|
||||
|
||||
/**
|
||||
* Expire the repository
|
||||
*/
|
||||
async expire() {
|
||||
await this.updateStatus("expired");
|
||||
await this.resetSate();
|
||||
}
|
||||
|
||||
/**
|
||||
* Remove the repository
|
||||
*/
|
||||
async remove() {
|
||||
this._model.size = 0;
|
||||
await this.resetSate();
|
||||
}
|
||||
|
||||
/**
|
||||
* Reset/delete the state of the repository
|
||||
*/
|
||||
private async resetSate() {
|
||||
this._model.size = 0;
|
||||
this._model.originalFiles = null;
|
||||
await this._model.save();
|
||||
await storage.rm(this._model.repoId + "/");
|
||||
}
|
||||
|
||||
/**
|
||||
* Compute the size of the repository in bite.
|
||||
*
|
||||
* @returns The size of the repository in bite
|
||||
*/
|
||||
async computeSize(): Promise<number> {
|
||||
if (this._model.status != "ready") return 0;
|
||||
if (this._model.size) return this._model.size;
|
||||
function recursiveCount(files) {
|
||||
let total = 0;
|
||||
for (const name in files) {
|
||||
const file = files[name];
|
||||
if (file.size) {
|
||||
total += file.size as number;
|
||||
} else if (typeof file == "object") {
|
||||
total += recursiveCount(file);
|
||||
}
|
||||
}
|
||||
return total;
|
||||
}
|
||||
|
||||
const files = await this.files({ force: false });
|
||||
this._model.size = recursiveCount(files);
|
||||
await this._model.save();
|
||||
return this._model.size;
|
||||
}
|
||||
|
||||
/***** Getters ********/
|
||||
|
||||
get repoId() {
|
||||
return this._model.repoId;
|
||||
}
|
||||
|
||||
get options() {
|
||||
return this._model.options;
|
||||
}
|
||||
|
||||
get model() {
|
||||
return this._model;
|
||||
}
|
||||
|
||||
get originalCachePath() {
|
||||
return path.join(this._model.repoId, "original") + "/";
|
||||
}
|
||||
|
||||
get status() {
|
||||
return this._model.status;
|
||||
}
|
||||
|
||||
toJSON() {
|
||||
return {
|
||||
repoId: this._model.repoId,
|
||||
options: this._model.options,
|
||||
anonymizeDate: this._model.anonymizeDate,
|
||||
status: this._model.status,
|
||||
source: this.source.toJSON(),
|
||||
lastView: this._model.lastView,
|
||||
pageView: this._model.pageView,
|
||||
size: this._model.size,
|
||||
};
|
||||
}
|
||||
}
|
||||
110
src/User.ts
Normal file
110
src/User.ts
Normal file
@@ -0,0 +1,110 @@
|
||||
import { Octokit } from "@octokit/rest";
|
||||
import AnonymizedRepositoryModel from "./database/anonymizedRepositories/anonymizedRepositories.model";
|
||||
import RepositoryModel from "./database/repositories/repositories.model";
|
||||
import { IUserDocument } from "./database/users/users.types";
|
||||
import Repository from "./Repository";
|
||||
import { GitHubRepository } from "./source/GitHubRepository";
|
||||
|
||||
export default class User {
|
||||
private _model: IUserDocument;
|
||||
constructor(model: IUserDocument) {
|
||||
this._model = model;
|
||||
}
|
||||
|
||||
get username(): string {
|
||||
return this._model.username;
|
||||
}
|
||||
|
||||
get accessToken(): string {
|
||||
return this._model.accessToken;
|
||||
}
|
||||
|
||||
get photo(): string {
|
||||
return this._model.photo;
|
||||
}
|
||||
|
||||
get default() {
|
||||
return this._model.default;
|
||||
}
|
||||
|
||||
set default(d) {
|
||||
this._model.default = d;
|
||||
}
|
||||
|
||||
async getGitHubRepositories(opt?: {
|
||||
force: boolean;
|
||||
}): Promise<GitHubRepository[]> {
|
||||
if (!this._model.repositories || opt?.force === true) {
|
||||
// get the list of repo from github
|
||||
const octokit = new Octokit({ auth: this.accessToken });
|
||||
const repositories = (
|
||||
await octokit.paginate(octokit.repos.listForAuthenticatedUser, {
|
||||
visibility: "all",
|
||||
sort: "pushed",
|
||||
per_page: 100,
|
||||
})
|
||||
).map((r) => {
|
||||
return new RepositoryModel({
|
||||
externalId: "gh_" + r.id,
|
||||
name: r.full_name,
|
||||
url: r.html_url,
|
||||
size: r.size,
|
||||
defaultBranch: r.default_branch,
|
||||
});
|
||||
});
|
||||
|
||||
const finds = (
|
||||
await RepositoryModel.find({
|
||||
externalId: {
|
||||
$in: repositories.map((repo) => repo.externalId),
|
||||
},
|
||||
}).select("externalId")
|
||||
).map((m) => m.externalId);
|
||||
|
||||
await Promise.all(
|
||||
repositories
|
||||
.filter((r) => finds.indexOf(r.externalId) == -1)
|
||||
.map((r) => r.save())
|
||||
);
|
||||
|
||||
this._model.repositories = (
|
||||
await RepositoryModel.find({
|
||||
externalId: {
|
||||
$in: repositories.map((repo) => repo.externalId),
|
||||
},
|
||||
}).select("id")
|
||||
).map((m) => m.id);
|
||||
await this._model.save();
|
||||
return repositories.map((r) => new GitHubRepository(r));
|
||||
} else {
|
||||
return (
|
||||
await RepositoryModel.find({ _id: { $in: this._model.repositories } })
|
||||
).map((i) => new GitHubRepository(i));
|
||||
}
|
||||
}
|
||||
|
||||
async getRepositories() {
|
||||
const repositories = (
|
||||
await AnonymizedRepositoryModel.find({
|
||||
owner: this.username,
|
||||
}).exec()
|
||||
).map((d) => new Repository(d));
|
||||
for (let repo of repositories) {
|
||||
if (repo.options.expirationDate) {
|
||||
repo.options.expirationDate = new Date(repo.options.expirationDate);
|
||||
}
|
||||
if (
|
||||
repo.options.expirationMode != "never" &&
|
||||
repo.options.expirationDate != null &&
|
||||
repo.options.expirationDate < new Date()
|
||||
) {
|
||||
await repo.expire()
|
||||
}
|
||||
}
|
||||
return repositories;
|
||||
}
|
||||
|
||||
toJSON() {
|
||||
return this._model.toJSON();
|
||||
}
|
||||
}
|
||||
136
src/anonymize-utils.ts
Normal file
136
src/anonymize-utils.ts
Normal file
@@ -0,0 +1,136 @@
|
||||
import config from "../config";
|
||||
import Repository from "./Repository";
|
||||
import GitHubBase from "./source/GitHubBase";
|
||||
import { isText } from "istextorbinary";
|
||||
import * as path from "path";
|
||||
|
||||
import * as stream from "stream";
|
||||
|
||||
const urlRegex =
|
||||
/<?\b((https?|ftp|file):\/\/)[-A-Za-z0-9+&@#/%?=~_|!:,.;]+[-A-Za-z0-9+&@#/%=~_|]\b\/?>?/g;
|
||||
|
||||
export function isTextFile(filePath, content) {
|
||||
const filename = path.basename(filePath);
|
||||
const extensions = filename.split(".").reverse();
|
||||
const extension = extensions[0].toLowerCase();
|
||||
if (config.additionalExtensions.includes(extension)) {
|
||||
return true;
|
||||
}
|
||||
if (isText(filename)) {
|
||||
return true;
|
||||
}
|
||||
return isText(filename, content);
|
||||
}
|
||||
|
||||
export function anonymizeStream(filename: string, repository: Repository) {
|
||||
const ts = new stream.Transform();
|
||||
var chunks = [],
|
||||
len = 0,
|
||||
pos = 0;
|
||||
|
||||
ts._transform = function _transform(chunk, enc, cb) {
|
||||
chunks.push(chunk);
|
||||
len += chunk.length;
|
||||
|
||||
if (pos === 1) {
|
||||
let data: any = Buffer.concat(chunks, len);
|
||||
if (isTextFile(filename, data)) {
|
||||
data = anonymizeContent(data.toString(), repository);
|
||||
}
|
||||
|
||||
chunks = [];
|
||||
len = 0;
|
||||
|
||||
this.push(data);
|
||||
}
|
||||
|
||||
pos = 1 ^ pos;
|
||||
cb(null);
|
||||
};
|
||||
|
||||
ts._flush = function _flush(cb) {
|
||||
if (chunks.length) {
|
||||
let data: any = Buffer.concat(chunks, len);
|
||||
if (isText(filename, data)) {
|
||||
data = anonymizeContent(data.toString(), repository);
|
||||
}
|
||||
|
||||
this.push(data);
|
||||
}
|
||||
|
||||
cb(null);
|
||||
};
|
||||
return ts;
|
||||
}
|
||||
|
||||
export function anonymizeContent(content: string, repository: Repository) {
|
||||
if (repository.options?.image === false) {
|
||||
// remove image in markdown
|
||||
content = content.replace(
|
||||
/!\[[^\]]*\]\((?<filename>.*?)(?=\"|\))(?<optionalpart>\".*\")?\)/g,
|
||||
""
|
||||
);
|
||||
}
|
||||
|
||||
if (!repository.options?.link) {
|
||||
// remove all links
|
||||
content = content.replace(urlRegex, config.ANONYMIZATION_MASK);
|
||||
}
|
||||
|
||||
if (repository.source instanceof GitHubBase) {
|
||||
content = content.replace(
|
||||
new RegExp(
|
||||
`https://github.com/${
|
||||
repository.source.githubRepository.fullName
|
||||
}/blob/${repository.source.branch?.name || "HEAD"}\\b`,
|
||||
"gi"
|
||||
),
|
||||
`https://${config.HOSTNAME}/r/${repository.repoId}`
|
||||
);
|
||||
content = content.replace(
|
||||
new RegExp(
|
||||
`https://github.com/${
|
||||
repository.source.githubRepository.fullName
|
||||
}/tree/${(repository.source as GitHubBase).branch?.name || "HEAD"}\\b`,
|
||||
"gi"
|
||||
),
|
||||
`https://${config.HOSTNAME}/r/${repository.repoId}`
|
||||
);
|
||||
content = content.replace(
|
||||
new RegExp(
|
||||
`https://github.com/${repository.source.githubRepository.fullName}`,
|
||||
"gi"
|
||||
),
|
||||
`https://${config.HOSTNAME}/r/${repository.repoId}`
|
||||
);
|
||||
}
|
||||
|
||||
for (let term of repository.options.terms || []) {
|
||||
if (term.trim() == "") {
|
||||
continue;
|
||||
}
|
||||
// remove whole url if it contains the term
|
||||
content = content.replace(urlRegex, (match) => {
|
||||
if (new RegExp(`\\b${term}\\b`, "gi").test(match))
|
||||
return config.ANONYMIZATION_MASK;
|
||||
return match;
|
||||
});
|
||||
|
||||
// remove the term in the text
|
||||
content = content.replace(
|
||||
new RegExp(`\\b${term}\\b`, "gi"),
|
||||
config.ANONYMIZATION_MASK
|
||||
);
|
||||
}
|
||||
return content;
|
||||
}
|
||||
|
||||
export function anonymizePath(path: string, terms: string[]) {
|
||||
for (let term of terms) {
|
||||
if (term.trim() == "") {
|
||||
continue;
|
||||
}
|
||||
path = path.replace(new RegExp(term, "gi"), config.ANONYMIZATION_MASK);
|
||||
}
|
||||
return path;
|
||||
}
|
||||
@@ -0,0 +1,15 @@
|
||||
import * as mongoose from "mongoose";
|
||||
const { model } = mongoose;
|
||||
|
||||
import {
|
||||
IAnonymizedRepositoryDocument,
|
||||
IAnonymizedRepositoryModel,
|
||||
} from "./anonymizedRepositories.types";
|
||||
import AnonymizedRepositorySchema from "./anonymizedRepositories.schema";
|
||||
|
||||
const AnonymizedRepositoryModel = model<IAnonymizedRepositoryDocument>(
|
||||
"AnonymizedRepository",
|
||||
AnonymizedRepositorySchema
|
||||
) as IAnonymizedRepositoryModel;
|
||||
|
||||
export default AnonymizedRepositoryModel;
|
||||
@@ -0,0 +1,54 @@
|
||||
import * as mongoose from "mongoose";
|
||||
const { Schema } = mongoose;
|
||||
|
||||
const AnonymizedRepositorySchema = new Schema({
|
||||
repoId: {
|
||||
type: String,
|
||||
index: { unique: true },
|
||||
},
|
||||
status: {
|
||||
type: String,
|
||||
default: "preparing",
|
||||
},
|
||||
errorMessage: String,
|
||||
anonymizeDate: Date,
|
||||
lastView: Date,
|
||||
pageView: Number,
|
||||
accessToken: String,
|
||||
owner: String,
|
||||
conference: String,
|
||||
source: {
|
||||
type: { type: String },
|
||||
branch: String,
|
||||
commit: String,
|
||||
repositoryId: String,
|
||||
repositoryName: String,
|
||||
accessToken: String,
|
||||
},
|
||||
originalFiles: mongoose.Schema.Types.Mixed,
|
||||
options: {
|
||||
terms: [String],
|
||||
expirationMode: { type: String },
|
||||
expirationDate: Date,
|
||||
update: Boolean,
|
||||
image: Boolean,
|
||||
pdf: Boolean,
|
||||
notebook: Boolean,
|
||||
link: Boolean,
|
||||
page: Boolean,
|
||||
pageSource: {
|
||||
branch: String,
|
||||
path: String,
|
||||
},
|
||||
},
|
||||
dateOfEntry: {
|
||||
type: Date,
|
||||
default: new Date(),
|
||||
},
|
||||
size: {
|
||||
type: Number,
|
||||
default: 0,
|
||||
},
|
||||
});
|
||||
|
||||
export default AnonymizedRepositorySchema;
|
||||
@@ -0,0 +1,46 @@
|
||||
import * as mongoose from "mongoose";
|
||||
import { RepositoryStatus, Tree } from "../../types";
|
||||
|
||||
export interface IAnonymizedRepository {
|
||||
repoId: string;
|
||||
status?: RepositoryStatus;
|
||||
errorMessage?: string;
|
||||
anonymizeDate: Date;
|
||||
source: {
|
||||
type: "GitHubDownload" | "GitHubStream" | "Zip";
|
||||
branch?: string;
|
||||
commit?: string;
|
||||
repositoryId?: string;
|
||||
repositoryName?: string;
|
||||
accessToken?: string;
|
||||
};
|
||||
owner: string;
|
||||
originalFiles: Tree;
|
||||
conference: string;
|
||||
options: {
|
||||
terms: string[];
|
||||
expirationMode: "never" | "redirect" | "remove";
|
||||
expirationDate?: Date;
|
||||
update: boolean;
|
||||
image: boolean;
|
||||
pdf: boolean;
|
||||
notebook: boolean;
|
||||
link: boolean;
|
||||
page: boolean;
|
||||
pageSource?: {
|
||||
branch: string;
|
||||
path: string;
|
||||
};
|
||||
};
|
||||
pageView: number;
|
||||
lastView: Date;
|
||||
size: number;
|
||||
}
|
||||
|
||||
export interface IAnonymizedRepositoryDocument
|
||||
extends IAnonymizedRepository,
|
||||
mongoose.Document {
|
||||
setLastUpdated: (this: IAnonymizedRepositoryDocument) => Promise<void>;
|
||||
}
|
||||
export interface IAnonymizedRepositoryModel
|
||||
extends mongoose.Model<IAnonymizedRepositoryDocument> {}
|
||||
28
src/database/database.ts
Normal file
28
src/database/database.ts
Normal file
@@ -0,0 +1,28 @@
|
||||
import * as mongoose from "mongoose";
|
||||
import Repository from "../Repository";
|
||||
import config from "../../config";
|
||||
import AnonymizedRepositoryModel from "./anonymizedRepositories/anonymizedRepositories.model";
|
||||
|
||||
const MONGO_URL = `mongodb://${config.DB_USERNAME}:${config.DB_PASSWORD}@${config.DB_HOSTNAME}:27017/`;
|
||||
|
||||
export const database = mongoose.connection;
|
||||
|
||||
export async function connect() {
|
||||
mongoose.set("useNewUrlParser", true);
|
||||
mongoose.set("useFindAndModify", true);
|
||||
mongoose.set("useUnifiedTopology", true);
|
||||
|
||||
await mongoose.connect(MONGO_URL + "test", {
|
||||
authSource: "admin",
|
||||
useCreateIndex: true,
|
||||
useFindAndModify: true,
|
||||
});
|
||||
|
||||
return database;
|
||||
}
|
||||
|
||||
export async function getRepository(repoId: string) {
|
||||
const data = await AnonymizedRepositoryModel.findOne({ repoId });
|
||||
if (!data) throw new Error("repo_not_found");
|
||||
return new Repository(data);
|
||||
}
|
||||
12
src/database/repositories/repositories.model.ts
Normal file
12
src/database/repositories/repositories.model.ts
Normal file
@@ -0,0 +1,12 @@
|
||||
import * as mongoose from "mongoose";
|
||||
const { model } = mongoose;
|
||||
|
||||
import { IRepositoryDocument, IRepositoryModel } from "./repositories.types";
|
||||
import RepositorySchema from "./repositories.schema";
|
||||
|
||||
const RepositoryModel = model<IRepositoryDocument>(
|
||||
"Repository",
|
||||
RepositorySchema
|
||||
) as IRepositoryModel;
|
||||
|
||||
export default RepositoryModel;
|
||||
42
src/database/repositories/repositories.schema.ts
Normal file
42
src/database/repositories/repositories.schema.ts
Normal file
@@ -0,0 +1,42 @@
|
||||
import * as mongoose from "mongoose";
|
||||
const { Schema } = mongoose;
|
||||
|
||||
const RepositorySchema = new Schema({
|
||||
externalId: {
|
||||
type: String,
|
||||
index: { unique: true },
|
||||
},
|
||||
name: {
|
||||
type: String,
|
||||
index: true,
|
||||
},
|
||||
url: String,
|
||||
source: {
|
||||
type: String,
|
||||
default: "github",
|
||||
},
|
||||
hasPage: { type: Boolean, default: false },
|
||||
pageSource: {
|
||||
branch: { type: String },
|
||||
path: String,
|
||||
},
|
||||
branches: [
|
||||
{
|
||||
name: { type: String },
|
||||
commit: String,
|
||||
readme: String,
|
||||
},
|
||||
],
|
||||
defaultBranch: String,
|
||||
size: Number,
|
||||
status: {
|
||||
type: String,
|
||||
default: "ready",
|
||||
},
|
||||
dateOfEntry: {
|
||||
type: Date,
|
||||
default: new Date(),
|
||||
},
|
||||
});
|
||||
|
||||
export default RepositorySchema;
|
||||
25
src/database/repositories/repositories.types.ts
Normal file
25
src/database/repositories/repositories.types.ts
Normal file
@@ -0,0 +1,25 @@
|
||||
import * as mongoose from "mongoose";
|
||||
|
||||
export interface IRepository {
|
||||
externalId: string;
|
||||
name: string;
|
||||
url?: string;
|
||||
source: "github";
|
||||
size?: number;
|
||||
defaultBranch?: string;
|
||||
hasPage: boolean;
|
||||
pageSource?: {
|
||||
branch: string;
|
||||
path: string;
|
||||
};
|
||||
branches?: {
|
||||
name: string;
|
||||
commit: string;
|
||||
readme?: string;
|
||||
}[];
|
||||
}
|
||||
|
||||
export interface IRepositoryDocument extends IRepository, mongoose.Document {
|
||||
setLastUpdated: (this: IRepositoryDocument) => Promise<void>;
|
||||
}
|
||||
export interface IRepositoryModel extends mongoose.Model<IRepositoryDocument> {}
|
||||
10
src/database/users/users.model.ts
Normal file
10
src/database/users/users.model.ts
Normal file
@@ -0,0 +1,10 @@
|
||||
import * as mongoose from "mongoose";
|
||||
const { model } = mongoose;
|
||||
|
||||
import { IUserDocument, IUserModel } from "./users.types";
|
||||
import UserSchema from "./users.schema";
|
||||
|
||||
const UserModel = model<IUserDocument>("user", UserSchema) as IUserModel;
|
||||
|
||||
export default UserModel
|
||||
|
||||
36
src/database/users/users.schema.ts
Normal file
36
src/database/users/users.schema.ts
Normal file
@@ -0,0 +1,36 @@
|
||||
import * as mongoose from "mongoose";
|
||||
const { Schema } = mongoose;
|
||||
|
||||
const UserSchema = new Schema({
|
||||
accessToken: String,
|
||||
username: {
|
||||
type: String,
|
||||
index: { unique: true },
|
||||
},
|
||||
email: String,
|
||||
photo: String,
|
||||
repositories: [String],
|
||||
default: {
|
||||
terms: [String],
|
||||
options: {
|
||||
expirationMode: { type: String },
|
||||
update: Boolean,
|
||||
image: Boolean,
|
||||
pdf: Boolean,
|
||||
notebook: Boolean,
|
||||
loc: Boolean,
|
||||
link: Boolean,
|
||||
page: { type: String },
|
||||
},
|
||||
},
|
||||
status: {
|
||||
type: String,
|
||||
default: "active",
|
||||
},
|
||||
dateOfEntry: {
|
||||
type: Date,
|
||||
default: new Date(),
|
||||
},
|
||||
});
|
||||
|
||||
export default UserSchema;
|
||||
32
src/database/users/users.types.ts
Normal file
32
src/database/users/users.types.ts
Normal file
@@ -0,0 +1,32 @@
|
||||
import * as mongoose from "mongoose";
|
||||
|
||||
export interface IUser {
|
||||
accessToken: string;
|
||||
|
||||
username: string;
|
||||
email: string;
|
||||
photo?: string;
|
||||
|
||||
repositories?: number[];
|
||||
default?: {
|
||||
terms: string[];
|
||||
options: {
|
||||
expirationMode: "never" | "redirect" | "";
|
||||
update: boolean;
|
||||
image: boolean;
|
||||
pdf: boolean;
|
||||
notebook: boolean;
|
||||
loc: boolean;
|
||||
link: boolean;
|
||||
page: string | null;
|
||||
};
|
||||
};
|
||||
status?: "active" | "removed";
|
||||
dateOfEntry?: Date;
|
||||
lastUpdated?: Date;
|
||||
}
|
||||
|
||||
export interface IUserDocument extends IUser, mongoose.Document {
|
||||
setLastUpdated: (this: IUserDocument) => Promise<void>;
|
||||
}
|
||||
export interface IUserModel extends mongoose.Model<IUserDocument> {}
|
||||
107
src/routes/connection.ts
Normal file
107
src/routes/connection.ts
Normal file
@@ -0,0 +1,107 @@
|
||||
import * as redis from "redis";
|
||||
import * as passport from "passport";
|
||||
import * as session from "express-session";
|
||||
import * as connectRedis from "connect-redis";
|
||||
import * as OAuth2Strategy from "passport-oauth2";
|
||||
import { Profile, Strategy } from "passport-github2";
|
||||
import * as express from "express";
|
||||
|
||||
import config from "../../config";
|
||||
import UserModel from "../database/users/users.model";
|
||||
|
||||
const RedisStore = connectRedis(session);
|
||||
|
||||
export function ensureAuthenticated(
|
||||
req: express.Request,
|
||||
res: express.Response,
|
||||
next: express.NextFunction
|
||||
) {
|
||||
if (req.isAuthenticated()) {
|
||||
return next();
|
||||
}
|
||||
res.status(401).json({ error: "not_connected" });
|
||||
}
|
||||
|
||||
const verify = async (
|
||||
accessToken: string,
|
||||
refreshToken: string,
|
||||
profile: Profile,
|
||||
done: OAuth2Strategy.VerifyCallback
|
||||
): Promise<void> => {
|
||||
let user;
|
||||
try {
|
||||
user = await UserModel.findOne({ username: profile.username });
|
||||
if (user) {
|
||||
user.accessToken = accessToken;
|
||||
user.email = profile.emails[0]?.value;
|
||||
user.photo = profile.photos[0]?.value;
|
||||
await user.save();
|
||||
} else {
|
||||
user = await new UserModel({
|
||||
username: profile.username,
|
||||
accessToken: accessToken,
|
||||
email: profile.emails[0]?.value,
|
||||
photo: profile.photos[0]?.value,
|
||||
}).save();
|
||||
}
|
||||
} catch (error) {
|
||||
console.error(error);
|
||||
} finally {
|
||||
done(null, {
|
||||
username: profile.username,
|
||||
accessToken,
|
||||
refreshToken,
|
||||
profile,
|
||||
user,
|
||||
});
|
||||
}
|
||||
};
|
||||
|
||||
passport.use(
|
||||
new Strategy(
|
||||
{
|
||||
clientID: config.CLIENT_ID,
|
||||
clientSecret: config.CLIENT_SECRET,
|
||||
callbackURL: config.AUTH_CALLBACK,
|
||||
},
|
||||
verify
|
||||
)
|
||||
);
|
||||
|
||||
passport.serializeUser((user: Express.User, done) => {
|
||||
done(null, user);
|
||||
});
|
||||
|
||||
passport.deserializeUser((user: Express.User, done) => {
|
||||
done(null, user);
|
||||
});
|
||||
|
||||
export const appSession = session({
|
||||
secret: "keyboard cat",
|
||||
store: new RedisStore({
|
||||
client: redis.createClient({
|
||||
port: config.REDIS_PORT,
|
||||
host: config.REDIS_HOSTNAME,
|
||||
}),
|
||||
}),
|
||||
saveUninitialized: false,
|
||||
resave: false,
|
||||
});
|
||||
|
||||
export const router = express.Router();
|
||||
|
||||
router.get(
|
||||
"/login",
|
||||
passport.authenticate("github", { scope: ["repo"] }), // Note the scope here
|
||||
function (req: express.Request, res: express.Response) {
|
||||
res.redirect("/");
|
||||
}
|
||||
);
|
||||
|
||||
router.get(
|
||||
"/auth",
|
||||
passport.authenticate("github", { failureRedirect: "/" }),
|
||||
function (req: express.Request, res: express.Response) {
|
||||
res.redirect("/");
|
||||
}
|
||||
);
|
||||
38
src/routes/file.ts
Normal file
38
src/routes/file.ts
Normal file
@@ -0,0 +1,38 @@
|
||||
import * as express from "express";
|
||||
import AnonymizedFile from "../AnonymizedFile";
|
||||
import { getRepo, handleError } from "./route-utils";
|
||||
|
||||
export const router = express.Router();
|
||||
|
||||
router.get(
|
||||
"/:repoId/file/:path*",
|
||||
async (req: express.Request, res: express.Response) => {
|
||||
let anonymizedPath = req.params.path;
|
||||
if (req.params[0]) {
|
||||
anonymizedPath += req.params[0];
|
||||
}
|
||||
anonymizedPath = anonymizedPath;
|
||||
|
||||
const repo = await getRepo(req, res);
|
||||
if (!repo) return;
|
||||
|
||||
await repo.countView();
|
||||
|
||||
try {
|
||||
const f = new AnonymizedFile(repo, {
|
||||
anonymizedPath,
|
||||
});
|
||||
if (!(await f.isFileSupported())) {
|
||||
return res.status(500).send({ error: "file_not_supported" });
|
||||
}
|
||||
res.attachment(
|
||||
anonymizedPath.substring(anonymizedPath.lastIndexOf("/") + 1)
|
||||
);
|
||||
await f.send(res);
|
||||
} catch (error) {
|
||||
return handleError(error, res);
|
||||
}
|
||||
}
|
||||
);
|
||||
|
||||
export default router;
|
||||
13
src/routes/index.ts
Normal file
13
src/routes/index.ts
Normal file
@@ -0,0 +1,13 @@
|
||||
import repositoryPrivate from "./repository-private";
|
||||
import repositoryPublic from "./repository-public";
|
||||
import file from "./file";
|
||||
import webview from "./webview";
|
||||
import user from "./user";
|
||||
|
||||
export default {
|
||||
repositoryPrivate,
|
||||
repositoryPublic,
|
||||
file,
|
||||
webview,
|
||||
user,
|
||||
};
|
||||
270
src/routes/repository-private.ts
Normal file
270
src/routes/repository-private.ts
Normal file
@@ -0,0 +1,270 @@
|
||||
import * as express from "express";
|
||||
import { ensureAuthenticated } from "./connection";
|
||||
|
||||
import * as db from "../database/database";
|
||||
import { getRepo, getUser, handleError } from "./route-utils";
|
||||
import RepositoryModel from "../database/repositories/repositories.model";
|
||||
import {
|
||||
GitHubRepository,
|
||||
getRepositoryFromGitHub,
|
||||
} from "../source/GitHubRepository";
|
||||
import gh = require("parse-github-url");
|
||||
import GitHubBase from "../source/GitHubBase";
|
||||
import AnonymizedRepositoryModel from "../database/anonymizedRepositories/anonymizedRepositories.model";
|
||||
import config from "../../config";
|
||||
import { IAnonymizedRepositoryDocument } from "../database/anonymizedRepositories/anonymizedRepositories.types";
|
||||
import Repository from "../Repository";
|
||||
|
||||
const router = express.Router();
|
||||
|
||||
// user needs to be connected for all user API
|
||||
router.use(ensureAuthenticated);
|
||||
|
||||
// claim a repository
|
||||
router.post("/claim", async (req: express.Request, res: express.Response) => {
|
||||
const user = await getUser(req);
|
||||
try {
|
||||
if (!req.body.repoId) {
|
||||
return res.status(500).json({ error: "repoId_not_defined" });
|
||||
}
|
||||
if (!req.body.repoUrl) {
|
||||
return res.status(500).json({ error: "repoUrl_not_defined" });
|
||||
}
|
||||
|
||||
const repoConfig = await db.getRepository(req.body.repoId);
|
||||
if (repoConfig == null) {
|
||||
return res.status(500).json({ error: "repo_not_found" });
|
||||
}
|
||||
|
||||
const r = gh(req.body.repoUrl);
|
||||
const repo = await getRepositoryFromGitHub({
|
||||
owner: r.owner,
|
||||
repo: r.name,
|
||||
accessToken: user.accessToken,
|
||||
});
|
||||
if ((repoConfig.source as GitHubBase).githubRepository.id != repo.id) {
|
||||
return res.status(500).json({ error: "repo_not_found" });
|
||||
}
|
||||
|
||||
console.log(`${user.username} claims ${r.repository}.`);
|
||||
repoConfig.owner = user;
|
||||
|
||||
await AnonymizedRepositoryModel.updateOne(
|
||||
{ repoId: repoConfig.repoId },
|
||||
{ $set: { owner: user.username } }
|
||||
);
|
||||
return res.send("Ok");
|
||||
} catch (error) {
|
||||
console.error(req.path, error);
|
||||
return res.status(500).json({ error });
|
||||
}
|
||||
});
|
||||
|
||||
// refresh a repository
|
||||
router.post(
|
||||
"/:repoId/refresh",
|
||||
async (req: express.Request, res: express.Response) => {
|
||||
const repo = await getRepo(req, res);
|
||||
if (!repo) return;
|
||||
const user = await getUser(req);
|
||||
if (repo.owner.username != user.username) {
|
||||
return res.status(401).json({ error: "not_authorized" });
|
||||
}
|
||||
await repo.anonymize();
|
||||
res.end("ok");
|
||||
}
|
||||
);
|
||||
|
||||
// delete a repository
|
||||
router.delete(
|
||||
"/:repoId/",
|
||||
async (req: express.Request, res: express.Response) => {
|
||||
const repo = await getRepo(req, res, { nocheck: false });
|
||||
if (!repo) return;
|
||||
const user = await getUser(req);
|
||||
if (repo.owner.username != user.username) {
|
||||
return res.status(401).json({ error: "not_authorized" });
|
||||
}
|
||||
await repo.remove();
|
||||
console.log(`${req.params.repoId} is removed`);
|
||||
return res.json("ok");
|
||||
}
|
||||
);
|
||||
|
||||
router.get(
|
||||
"/:owner/:repo/",
|
||||
async (req: express.Request, res: express.Response) => {
|
||||
const user = await getUser(req);
|
||||
try {
|
||||
const repo = await getRepositoryFromGitHub({
|
||||
owner: req.params.owner,
|
||||
repo: req.params.repo,
|
||||
accessToken: user.accessToken,
|
||||
});
|
||||
res.json(repo.toJSON());
|
||||
} catch (error) {
|
||||
handleError(error, res);
|
||||
}
|
||||
}
|
||||
);
|
||||
|
||||
router.get(
|
||||
"/:owner/:repo/branches",
|
||||
async (req: express.Request, res: express.Response) => {
|
||||
const user = await getUser(req);
|
||||
try {
|
||||
const repository = await getRepositoryFromGitHub({
|
||||
accessToken: user.accessToken,
|
||||
owner: req.params.owner,
|
||||
repo: req.params.repo,
|
||||
});
|
||||
return res.json(
|
||||
await repository.branches({
|
||||
accessToken: user.accessToken,
|
||||
force: req.query.force == "1",
|
||||
})
|
||||
);
|
||||
} catch (error) {
|
||||
handleError(error, res);
|
||||
}
|
||||
}
|
||||
);
|
||||
|
||||
router.get(
|
||||
"/:owner/:repo/readme",
|
||||
async (req: express.Request, res: express.Response) => {
|
||||
const user = await getUser(req);
|
||||
const repo = await RepositoryModel.findOne({
|
||||
name: `${req.params.owner}/${req.params.repo}`,
|
||||
});
|
||||
if (!repo) return res.status(404).send({ error: "repo_not_found" });
|
||||
const repository = new GitHubRepository(repo);
|
||||
return res.send(
|
||||
await repository.readme({
|
||||
accessToken: user.accessToken,
|
||||
force: req.query.force == "1",
|
||||
branch: req.query.branch as string,
|
||||
})
|
||||
);
|
||||
}
|
||||
);
|
||||
|
||||
function validateNewRepo(repoUpdate) {
|
||||
const validCharacters = /^[0-9a-zA-Z\-\_]+$/;
|
||||
if (
|
||||
!repoUpdate.repoId.match(validCharacters) ||
|
||||
repoUpdate.repoId.length < 3
|
||||
) {
|
||||
throw new Error("invalid_repoId");
|
||||
}
|
||||
if (!repoUpdate.branch) {
|
||||
throw new Error("branch_not_specified");
|
||||
}
|
||||
if (!repoUpdate.options) {
|
||||
throw new Error("options_not_provided");
|
||||
}
|
||||
if (!Array.isArray(repoUpdate.terms)) {
|
||||
throw new Error("invalid_terms_format");
|
||||
}
|
||||
if (!/^[a-f0-9]+$/.test(repoUpdate.commit)) {
|
||||
throw new Error("invalid_commit_format");
|
||||
}
|
||||
}
|
||||
|
||||
function updateRepoModel(model: IAnonymizedRepositoryDocument, repoUpdate) {
|
||||
model.source.commit = repoUpdate.commit;
|
||||
model.source.branch = repoUpdate.branch;
|
||||
model.conference = repoUpdate.conference;
|
||||
model.options = {
|
||||
terms: repoUpdate.terms,
|
||||
expirationMode: repoUpdate.options.expirationMode,
|
||||
expirationDate: repoUpdate.options.expirationDate
|
||||
? new Date(repoUpdate.options.expirationDate)
|
||||
: null,
|
||||
update: repoUpdate.options.update,
|
||||
image: repoUpdate.options.image,
|
||||
pdf: repoUpdate.options.pdf,
|
||||
notebook: repoUpdate.options.notebook,
|
||||
link: repoUpdate.options.link,
|
||||
page: repoUpdate.options.page,
|
||||
pageSource: repoUpdate.options.pageSource,
|
||||
};
|
||||
}
|
||||
// update a repository
|
||||
router.post(
|
||||
"/:repoId/",
|
||||
async (req: express.Request, res: express.Response) => {
|
||||
const repo = await getRepo(req, res, { nocheck: true });
|
||||
if (!repo) return;
|
||||
const user = await getUser(req);
|
||||
|
||||
if (repo.owner.username != user.username) {
|
||||
return res.status(401).json({ error: "not_authorized" });
|
||||
}
|
||||
|
||||
const repoUpdate = req.body;
|
||||
|
||||
try {
|
||||
validateNewRepo(repoUpdate);
|
||||
} catch (error) {
|
||||
return handleError(error, res);
|
||||
}
|
||||
|
||||
if (repoUpdate.commit != repo.model.source.commit) {
|
||||
repo.model.anonymizeDate = new Date();
|
||||
repo.model.source.commit = repoUpdate.commit;
|
||||
}
|
||||
|
||||
updateRepoModel(repo.model, repoUpdate);
|
||||
|
||||
await repo.updateStatus("preparing");
|
||||
|
||||
await repo.model.save();
|
||||
res.send("ok");
|
||||
repo.anonymize();
|
||||
}
|
||||
);
|
||||
|
||||
// add repository
|
||||
router.post("/", async (req: express.Request, res: express.Response) => {
|
||||
const user = await getUser(req);
|
||||
const repoUpdate = req.body;
|
||||
|
||||
try {
|
||||
validateNewRepo(repoUpdate);
|
||||
} catch (error) {
|
||||
return handleError(error, res);
|
||||
}
|
||||
const r = gh(repoUpdate.fullName);
|
||||
const repository = await getRepositoryFromGitHub({
|
||||
accessToken: user.accessToken,
|
||||
owner: r.owner,
|
||||
repo: r.name,
|
||||
});
|
||||
const repo = new AnonymizedRepositoryModel();
|
||||
repo.repoId = repoUpdate.repoId;
|
||||
repo.anonymizeDate = new Date();
|
||||
repo.owner = user.username;
|
||||
repo.source = {
|
||||
type:
|
||||
repoUpdate.options.mode == "download" ? "GitHubDownload" : "GitHubStream",
|
||||
accessToken: user.accessToken,
|
||||
repositoryId: repository.model.id,
|
||||
repositoryName: repoUpdate.fullName,
|
||||
};
|
||||
|
||||
if (repo.source.type == "GitHubDownload") {
|
||||
// details.size is in kilobytes
|
||||
if (repository.size > config.MAX_REPO_SIZE) {
|
||||
return res.status(500).send({ error: "invalid_mode" });
|
||||
}
|
||||
}
|
||||
|
||||
updateRepoModel(repo, repoUpdate);
|
||||
|
||||
await repo.save();
|
||||
res.send("ok");
|
||||
new Repository(repo).anonymize();
|
||||
});
|
||||
|
||||
export default router;
|
||||
43
src/routes/repository-public.ts
Normal file
43
src/routes/repository-public.ts
Normal file
@@ -0,0 +1,43 @@
|
||||
import * as express from "express";
|
||||
|
||||
import * as db from "../database/database";
|
||||
import { getRepo, getUser, handleError } from "./route-utils";
|
||||
|
||||
const router = express.Router();
|
||||
|
||||
router.get("/:repoId/", async (req: express.Request, res: express.Response) => {
|
||||
const repo = await getRepo(req, res, { nocheck: true });
|
||||
if (!repo) return;
|
||||
res.json((await db.getRepository(req.params.repoId)).toJSON());
|
||||
});
|
||||
|
||||
router.get(
|
||||
"/:repoId/zip",
|
||||
async (req: express.Request, res: express.Response) => {
|
||||
const repo = await getRepo(req, res);
|
||||
if (!repo) return;
|
||||
res.attachment(`${repo.repoId}.zip`);
|
||||
repo.zip().pipe(res);
|
||||
}
|
||||
);
|
||||
|
||||
router.get(
|
||||
"/:repoId/files",
|
||||
async (req: express.Request, res: express.Response) => {
|
||||
const repo = await getRepo(req, res);
|
||||
if (!repo) return;
|
||||
res.json(await repo.anonymizedFiles({ force: true }));
|
||||
}
|
||||
);
|
||||
|
||||
router.get(
|
||||
"/:repoId/options",
|
||||
async (req: express.Request, res: express.Response) => {
|
||||
const repo = await getRepo(req, res);
|
||||
if (!repo) return;
|
||||
await repo.updateIfNeeded();
|
||||
res.json(repo.options);
|
||||
}
|
||||
);
|
||||
|
||||
export default router;
|
||||
63
src/routes/route-utils.ts
Normal file
63
src/routes/route-utils.ts
Normal file
@@ -0,0 +1,63 @@
|
||||
import * as express from "express";
|
||||
import * as db from "../database/database";
|
||||
import UserModel from "../database/users/users.model";
|
||||
import User from "../User";
|
||||
|
||||
export async function getRepo(
|
||||
req: express.Request,
|
||||
res: express.Response,
|
||||
opt?: { nocheck?: boolean }
|
||||
) {
|
||||
try {
|
||||
const repo = await db.getRepository(req.params.repoId);
|
||||
if (opt?.nocheck == true) {
|
||||
} else {
|
||||
// redirect if the repository is expired
|
||||
if (
|
||||
repo.status == "expired" &&
|
||||
repo.options.expirationMode == "redirect" &&
|
||||
repo.source.url
|
||||
) {
|
||||
res.redirect(repo.source.url);
|
||||
return null;
|
||||
}
|
||||
|
||||
repo.check();
|
||||
}
|
||||
return repo;
|
||||
} catch (error) {
|
||||
handleError(error, res);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
export function handleError(error: any, res: express.Response) {
|
||||
console.log(error);
|
||||
let message = error;
|
||||
if (error instanceof Error) {
|
||||
message = error.message;
|
||||
}
|
||||
let status = 500;
|
||||
if (message && message.indexOf("not_found") > -1) {
|
||||
status = 400;
|
||||
} else if (message && message.indexOf("not_connected") > -1) {
|
||||
status = 401;
|
||||
}
|
||||
|
||||
res.status(status).send({ error: message });
|
||||
return;
|
||||
}
|
||||
|
||||
export async function getUser(req: express.Request) {
|
||||
const user = (req.user as any).user;
|
||||
if (!user) {
|
||||
req.logout();
|
||||
throw new Error("not_connected");
|
||||
}
|
||||
const model = await UserModel.findById(user._id);
|
||||
if (!model) {
|
||||
req.logout();
|
||||
throw new Error("not_connected");
|
||||
}
|
||||
return new User(model);
|
||||
}
|
||||
96
src/routes/user.ts
Normal file
96
src/routes/user.ts
Normal file
@@ -0,0 +1,96 @@
|
||||
import * as express from "express";
|
||||
import config from "../../config";
|
||||
import { ensureAuthenticated } from "./connection";
|
||||
import { handleError, getUser } from "./route-utils";
|
||||
|
||||
const router = express.Router();
|
||||
|
||||
// user needs to be connected for all user API
|
||||
router.use(ensureAuthenticated);
|
||||
|
||||
router.get("/logout", async (req: express.Request, res: express.Response) => {
|
||||
try {
|
||||
req.logout();
|
||||
res.redirect("/");
|
||||
} catch (error) {
|
||||
handleError(error, res);
|
||||
}
|
||||
});
|
||||
|
||||
router.get("/", async (req: express.Request, res: express.Response) => {
|
||||
try {
|
||||
const user = await getUser(req);
|
||||
res.json({ username: user.username, photo: user.photo });
|
||||
} catch (error) {
|
||||
handleError(error, res);
|
||||
}
|
||||
});
|
||||
|
||||
router.get("/quota", async (req: express.Request, res: express.Response) => {
|
||||
try {
|
||||
const user = await getUser(req);
|
||||
const sizes = await Promise.all(
|
||||
(await user.getRepositories())
|
||||
.filter((r) => r.status == "ready")
|
||||
.map((r) => r.computeSize())
|
||||
);
|
||||
res.json({
|
||||
used: sizes.reduce((sum, i) => sum + i, 0),
|
||||
total: config.DEFAULT_QUOTA,
|
||||
});
|
||||
} catch (error) {
|
||||
handleError(error, res);
|
||||
}
|
||||
});
|
||||
|
||||
router.get("/default", async (req: express.Request, res: express.Response) => {
|
||||
const user = await getUser(req);
|
||||
try {
|
||||
res.json(user.default);
|
||||
} catch (error) {
|
||||
handleError(error, res);
|
||||
}
|
||||
});
|
||||
|
||||
router.post("/default", async (req: express.Request, res: express.Response) => {
|
||||
const user = await getUser(req);
|
||||
try {
|
||||
const d = req.body;
|
||||
user.default = d;
|
||||
res.send("ok");
|
||||
} catch (error) {
|
||||
handleError(error, res);
|
||||
}
|
||||
});
|
||||
|
||||
router.get(
|
||||
"/anonymized_repositories",
|
||||
async (req: express.Request, res: express.Response) => {
|
||||
const user = await getUser(req);
|
||||
res.json(
|
||||
(await user.getRepositories()).map((x) => {
|
||||
return x.toJSON();
|
||||
})
|
||||
);
|
||||
}
|
||||
);
|
||||
|
||||
router.get(
|
||||
"/all_repositories",
|
||||
async (req: express.Request, res: express.Response) => {
|
||||
const user = await getUser(req);
|
||||
const repos = await user.getGitHubRepositories({
|
||||
force: req.query.force == "1",
|
||||
});
|
||||
res.json(
|
||||
repos.map((x) => {
|
||||
return {
|
||||
fullName: x.fullName,
|
||||
id: x.id,
|
||||
};
|
||||
})
|
||||
);
|
||||
}
|
||||
);
|
||||
|
||||
export default router;
|
||||
54
src/routes/webview.ts
Normal file
54
src/routes/webview.ts
Normal file
@@ -0,0 +1,54 @@
|
||||
import * as express from "express";
|
||||
import { getRepo, handleError } from "./route-utils";
|
||||
import * as path from "path";
|
||||
import AnonymizedFile from "../AnonymizedFile";
|
||||
import GitHubDownload from "../source/GitHubDownload";
|
||||
|
||||
const router = express.Router();
|
||||
|
||||
async function webView(req: express.Request, res: express.Response) {
|
||||
const repo = await getRepo(req, res);
|
||||
if (!repo) return;
|
||||
try {
|
||||
if (!repo.options.page) {
|
||||
throw "page_not_activated";
|
||||
}
|
||||
if (!repo.options.pageSource) {
|
||||
throw "page_not_activated";
|
||||
}
|
||||
|
||||
if (
|
||||
repo.options.pageSource?.branch !=
|
||||
(repo.source as GitHubDownload).branch.name
|
||||
) {
|
||||
throw "page_not_supported_on_different_branch";
|
||||
}
|
||||
|
||||
let requestPath = path.join(
|
||||
repo.options.pageSource?.path,
|
||||
req.path.substring(
|
||||
req.path.indexOf(req.params.repoId) + req.params.repoId.length
|
||||
)
|
||||
);
|
||||
if (requestPath[requestPath.length - 1] == "/") {
|
||||
requestPath = path.join(requestPath, "index.html");
|
||||
}
|
||||
requestPath = requestPath;
|
||||
const f = new AnonymizedFile(repo, {
|
||||
anonymizedPath: requestPath,
|
||||
});
|
||||
if (!(await f.isFileSupported())) {
|
||||
return res.status(500).send({ error: "file_not_supported" });
|
||||
}
|
||||
f.send(res);
|
||||
} catch (error) {
|
||||
handleError(error, res);
|
||||
}
|
||||
}
|
||||
|
||||
router.get("/:repoId/*", webView);
|
||||
router.get("/:repoId", (req: express.Request, res: express.Response) => {
|
||||
res.redirect("/w" + req.url + "/");
|
||||
});
|
||||
|
||||
export default router;
|
||||
95
src/server.ts
Normal file
95
src/server.ts
Normal file
@@ -0,0 +1,95 @@
|
||||
import * as path from "path";
|
||||
import * as ofs from "fs";
|
||||
import * as redis from "redis";
|
||||
import * as rateLimit from "express-rate-limit";
|
||||
import * as RedisStore from "rate-limit-redis";
|
||||
import * as express from "express";
|
||||
import * as compression from "compression";
|
||||
import * as db from "./database/database";
|
||||
import config from "../config";
|
||||
import * as passport from "passport";
|
||||
|
||||
import * as connection from "./routes/connection";
|
||||
import router from "./routes";
|
||||
import AnonymizedRepositoryModel from "./database/anonymizedRepositories/anonymizedRepositories.model";
|
||||
|
||||
function indexResponse(req: express.Request, res: express.Response) {
|
||||
if (
|
||||
req.params.repoId &&
|
||||
req.headers["accept"] &&
|
||||
req.headers["accept"].indexOf("text/html") == -1
|
||||
) {
|
||||
const repoId = req.path.split("/")[2];
|
||||
// if it is not an html request, it assumes that the browser try to load a different type of resource
|
||||
return res.redirect(
|
||||
`/api/repo/${repoId}/file/${req.path.substring(
|
||||
req.path.indexOf(repoId) + repoId.length + 1
|
||||
)}`
|
||||
);
|
||||
}
|
||||
res.sendFile(path.resolve(__dirname, "..", "public", "index.html"));
|
||||
}
|
||||
|
||||
export default async function start() {
|
||||
const app = express();
|
||||
app.use(express.json());
|
||||
|
||||
app.use(compression());
|
||||
app.set("trust proxy", 1);
|
||||
|
||||
// handle session and connection
|
||||
app.use(connection.appSession);
|
||||
app.use(passport.initialize());
|
||||
app.use(passport.session());
|
||||
|
||||
const rate = rateLimit({
|
||||
store: new RedisStore({
|
||||
client: redis.createClient({
|
||||
host: config.REDIS_HOSTNAME,
|
||||
port: config.REDIS_PORT,
|
||||
}),
|
||||
}),
|
||||
windowMs: 15 * 60 * 1000, // 15 minutes
|
||||
max: 200, // limit each IP
|
||||
// delayMs: 0, // disable delaying - full speed until the max limit is reached
|
||||
});
|
||||
|
||||
app.use("/github", rate, connection.router);
|
||||
|
||||
// app routes
|
||||
app.use("/api/user", rate, router.user);
|
||||
app.use("/api/repo", rate, router.repositoryPublic);
|
||||
app.use("/api/repo", rate, router.file);
|
||||
app.use("/api/repo", rate, router.repositoryPrivate);
|
||||
app.use("/w/", rate, router.webview);
|
||||
|
||||
app.get("/api/message", async (_, res) => {
|
||||
if (ofs.existsSync("./message.txt")) {
|
||||
return res.sendFile(path.resolve(__dirname, "..", "message.txt"));
|
||||
}
|
||||
res.sendStatus(404);
|
||||
});
|
||||
|
||||
app.get("/api/stat", async (_, res) => {
|
||||
const nbRepositories =
|
||||
await AnonymizedRepositoryModel.estimatedDocumentCount();
|
||||
|
||||
const nbUsers = (await AnonymizedRepositoryModel.distinct("owner")).length;
|
||||
res.json({ nbRepositories, nbUsers });
|
||||
});
|
||||
|
||||
app
|
||||
.get("/", indexResponse)
|
||||
.get("/404", indexResponse)
|
||||
.get("/anonymize", indexResponse)
|
||||
.get("/r/:repoId/?*", indexResponse)
|
||||
.get("/repository/:repoId/?*", indexResponse);
|
||||
|
||||
app.use(express.static(path.join(__dirname, "..", "public")));
|
||||
|
||||
app.get("*", indexResponse);
|
||||
|
||||
await db.connect();
|
||||
app.listen(config.PORT);
|
||||
console.log("Database connected and Server started on port: " + config.PORT);
|
||||
}
|
||||
83
src/source/GitHubBase.ts
Normal file
83
src/source/GitHubBase.ts
Normal file
@@ -0,0 +1,83 @@
|
||||
import AnonymizedFile from "../AnonymizedFile";
|
||||
import { Branch, Tree } from "../types";
|
||||
import { GitHubRepository } from "./GitHubRepository";
|
||||
import config from "../../config";
|
||||
import { OAuthApp } from "@octokit/oauth-app";
|
||||
import Repository from "../Repository";
|
||||
import * as stream from "stream";
|
||||
import UserModel from "../database/users/users.model";
|
||||
|
||||
export default abstract class GitHubBase {
|
||||
type: "GitHubDownload" | "GitHubStream" | "Zip";
|
||||
githubRepository: GitHubRepository;
|
||||
branch: Branch;
|
||||
accessToken: string;
|
||||
repository: Repository;
|
||||
|
||||
constructor(
|
||||
data: {
|
||||
type: "GitHubDownload" | "GitHubStream" | "Zip";
|
||||
branch?: string;
|
||||
commit?: string;
|
||||
repositoryId?: string;
|
||||
repositoryName?: string;
|
||||
accessToken?: string;
|
||||
},
|
||||
repository: Repository
|
||||
) {
|
||||
this.type = data.type;
|
||||
this.accessToken = data.accessToken;
|
||||
this.githubRepository = new GitHubRepository({
|
||||
name: data.repositoryName,
|
||||
externalId: data.repositoryId,
|
||||
branches: [{ commit: data.commit, name: data.branch }],
|
||||
});
|
||||
this.repository = repository;
|
||||
this.branch = { commit: data.commit, name: data.branch };
|
||||
}
|
||||
|
||||
async getFileContent(file: AnonymizedFile): Promise<stream.Readable> {
|
||||
throw new Error("Method not implemented.");
|
||||
}
|
||||
getFiles(): Promise<Tree> {
|
||||
throw new Error("Method not implemented.");
|
||||
}
|
||||
|
||||
async getToken(owner?: string) {
|
||||
if (owner) {
|
||||
const user = await UserModel.findOne({ username: owner });
|
||||
if (user && user.accessToken) {
|
||||
return user.accessToken as string;
|
||||
}
|
||||
}
|
||||
if (this.accessToken) {
|
||||
try {
|
||||
const app = new OAuthApp({
|
||||
clientType: "github-app",
|
||||
clientId: config.CLIENT_ID,
|
||||
clientSecret: config.CLIENT_SECRET,
|
||||
});
|
||||
await app.checkToken({
|
||||
token: this.accessToken,
|
||||
});
|
||||
return this.accessToken;
|
||||
} catch (error) {
|
||||
// console.debug("Token is invalid.", error);
|
||||
this.accessToken = config.GITHUB_TOKEN;
|
||||
}
|
||||
}
|
||||
return config.GITHUB_TOKEN;
|
||||
}
|
||||
|
||||
get url() {
|
||||
return "https://github.com/" + this.githubRepository.fullName;
|
||||
}
|
||||
|
||||
toJSON(): any {
|
||||
return {
|
||||
type: this.type,
|
||||
fullName: this.githubRepository.fullName?.toString(),
|
||||
branch: this.branch,
|
||||
};
|
||||
}
|
||||
}
|
||||
75
src/source/GitHubDownload.ts
Normal file
75
src/source/GitHubDownload.ts
Normal file
@@ -0,0 +1,75 @@
|
||||
import { Octokit } from "@octokit/rest";
|
||||
import * as path from "path";
|
||||
import config from "../../config";
|
||||
import storage from "../storage";
|
||||
import Repository from "../Repository";
|
||||
|
||||
import GitHubBase from "./GitHubBase";
|
||||
import AnonymizedFile from "../AnonymizedFile";
|
||||
import { SourceBase } from "../types";
|
||||
import * as got from "got";
|
||||
import * as stream from "stream";
|
||||
import { OctokitResponse } from "@octokit/types";
|
||||
|
||||
export default class GitHubDownload extends GitHubBase implements SourceBase {
|
||||
constructor(
|
||||
data: {
|
||||
type: "GitHubDownload" | "GitHubStream" | "Zip";
|
||||
branch?: string;
|
||||
commit?: string;
|
||||
repositoryId?: string;
|
||||
repositoryName?: string;
|
||||
accessToken?: string;
|
||||
},
|
||||
repository: Repository
|
||||
) {
|
||||
super(data, repository);
|
||||
}
|
||||
|
||||
private async _getZipUrl(
|
||||
auth?: string
|
||||
): Promise<OctokitResponse<unknown, 302>> {
|
||||
const octokit = new Octokit({ auth });
|
||||
return octokit.rest.repos.downloadTarballArchive({
|
||||
owner: this.githubRepository.owner,
|
||||
repo: this.githubRepository.repo,
|
||||
ref: this.branch?.commit || "HEAD",
|
||||
method: "HEAD",
|
||||
});
|
||||
}
|
||||
|
||||
async download() {
|
||||
let response: OctokitResponse<unknown, number>;
|
||||
try {
|
||||
response = await this._getZipUrl(await this.getToken());
|
||||
} catch (error) {
|
||||
if (error.status == 401 && config.GITHUB_TOKEN) {
|
||||
try {
|
||||
response = await this._getZipUrl(config.GITHUB_TOKEN);
|
||||
} catch (error) {
|
||||
throw new Error("repo_not_accessible");
|
||||
}
|
||||
} else {
|
||||
throw new Error("repo_not_accessible");
|
||||
}
|
||||
}
|
||||
const originalPath = this.repository.originalCachePath;
|
||||
await storage.mk(originalPath);
|
||||
await storage.extractTar(originalPath, got.stream(response.url));
|
||||
}
|
||||
|
||||
async getFileContent(file: AnonymizedFile): Promise<stream.Readable> {
|
||||
await this.download();
|
||||
// update the file list
|
||||
await this.repository.files({ force: true });
|
||||
return storage.read(file.originalCachePath);
|
||||
}
|
||||
|
||||
async getFiles() {
|
||||
const folder = this.repository.originalCachePath;
|
||||
if (!(await storage.exists(folder))) {
|
||||
await this.download();
|
||||
}
|
||||
return storage.listFiles(folder);
|
||||
}
|
||||
}
|
||||
171
src/source/GitHubRepository.ts
Normal file
171
src/source/GitHubRepository.ts
Normal file
@@ -0,0 +1,171 @@
|
||||
import { Branch } from "../types";
|
||||
import * as gh from "parse-github-url";
|
||||
import { IRepositoryDocument } from "../database/repositories/repositories.types";
|
||||
import { Octokit } from "@octokit/rest";
|
||||
import RepositoryModel from "../database/repositories/repositories.model";
|
||||
|
||||
export class GitHubRepository {
|
||||
private _data: Partial<
|
||||
{ [P in keyof IRepositoryDocument]: IRepositoryDocument[P] }
|
||||
>;
|
||||
constructor(
|
||||
data: Partial<{ [P in keyof IRepositoryDocument]: IRepositoryDocument[P] }>
|
||||
) {
|
||||
this._data = data;
|
||||
}
|
||||
|
||||
toJSON() {
|
||||
return {
|
||||
repo: this.repo,
|
||||
owner: this.owner,
|
||||
hasPage: this._data.hasPage,
|
||||
pageSource: this._data.pageSource,
|
||||
fullName: this.fullName,
|
||||
defaultBranch: this._data.defaultBranch,
|
||||
size: this.size,
|
||||
};
|
||||
}
|
||||
|
||||
get model() {
|
||||
return this._data;
|
||||
}
|
||||
|
||||
public get fullName(): string {
|
||||
return this._data.name;
|
||||
}
|
||||
|
||||
public get id(): string {
|
||||
return this._data.externalId;
|
||||
}
|
||||
|
||||
public get size(): number {
|
||||
return this._data.size;
|
||||
}
|
||||
|
||||
async branches(opt: {
|
||||
accessToken?: string;
|
||||
force?: boolean;
|
||||
}): Promise<Branch[]> {
|
||||
if (
|
||||
!this._data.branches ||
|
||||
this._data.branches.length == 0 ||
|
||||
opt?.force === true
|
||||
) {
|
||||
// get the list of repo from github
|
||||
const octokit = new Octokit({ auth: opt.accessToken });
|
||||
const branches = (
|
||||
await octokit.paginate(octokit.repos.listBranches, {
|
||||
owner: this.owner,
|
||||
repo: this.repo,
|
||||
per_page: 100,
|
||||
})
|
||||
).map((b) => {
|
||||
return {
|
||||
name: b.name,
|
||||
commit: b.commit.sha,
|
||||
readme: this._data.branches?.filter(
|
||||
(f: Branch) => f.name == b.name
|
||||
)[0]?.readme,
|
||||
} as Branch;
|
||||
});
|
||||
this._data.branches = branches;
|
||||
|
||||
await RepositoryModel.updateOne(
|
||||
{ externalId: this.id },
|
||||
{ $set: { branches } }
|
||||
);
|
||||
} else {
|
||||
this._data.branches = (
|
||||
await RepositoryModel.findOne({ externalId: this.id }).select(
|
||||
"branches"
|
||||
)
|
||||
).branches;
|
||||
}
|
||||
|
||||
return this._data.branches;
|
||||
}
|
||||
|
||||
async readme(opt: {
|
||||
branch?: string;
|
||||
force?: boolean;
|
||||
accessToken?: string;
|
||||
}): Promise<string> {
|
||||
if (!opt.branch) opt.branch = this._data.defaultBranch || "master";
|
||||
|
||||
const model = await RepositoryModel.findOne({
|
||||
externalId: this.id,
|
||||
}).select("branches");
|
||||
|
||||
this._data.branches = await this.branches(opt);
|
||||
model.branches = this._data.branches;
|
||||
|
||||
const selected = model.branches.filter((f) => f.name == opt.branch)[0];
|
||||
if (!selected?.readme || opt?.force === true) {
|
||||
// get the list of repo from github
|
||||
const octokit = new Octokit({ auth: opt.accessToken });
|
||||
const ghRes = await octokit.repos.getReadme({
|
||||
owner: this.owner,
|
||||
repo: this.repo,
|
||||
ref: selected?.commit,
|
||||
});
|
||||
const readme = Buffer.from(
|
||||
ghRes.data.content,
|
||||
ghRes.data.encoding as BufferEncoding
|
||||
).toString("utf-8");
|
||||
selected.readme = readme;
|
||||
|
||||
await model.save();
|
||||
}
|
||||
|
||||
return selected.readme;
|
||||
}
|
||||
|
||||
public get owner(): string {
|
||||
const repo = gh(this.fullName);
|
||||
if (!repo) {
|
||||
throw "invalid_repo";
|
||||
}
|
||||
return repo.owner || this.fullName;
|
||||
}
|
||||
|
||||
public get repo(): string {
|
||||
const repo = gh(this.fullName);
|
||||
if (!repo) {
|
||||
throw "invalid_repo";
|
||||
}
|
||||
return repo.name || this.fullName;
|
||||
}
|
||||
}
|
||||
|
||||
export async function getRepositoryFromGitHub(opt: {
|
||||
owner: string;
|
||||
repo: string;
|
||||
accessToken: string;
|
||||
}) {
|
||||
const octokit = new Octokit({ auth: opt.accessToken });
|
||||
const r = (
|
||||
await octokit.repos.get({
|
||||
owner: opt.owner,
|
||||
repo: opt.repo,
|
||||
})
|
||||
).data;
|
||||
if (!r) throw new Error("repo_not_found");
|
||||
let model = await RepositoryModel.findOne({ externalId: "gh_" + r.id });
|
||||
if (!model) {
|
||||
model = new RepositoryModel({ externalId: "gh_" + r.id });
|
||||
}
|
||||
model.name = r.full_name;
|
||||
model.url = r.html_url;
|
||||
model.size = r.size;
|
||||
model.defaultBranch = r.default_branch;
|
||||
model.hasPage = r.has_pages;
|
||||
if (model.hasPage) {
|
||||
const ghPageRes = await octokit.repos.getPages({
|
||||
owner: opt.owner,
|
||||
repo: opt.repo,
|
||||
});
|
||||
model.pageSource = ghPageRes.data.source;
|
||||
}
|
||||
await model.save();
|
||||
return new GitHubRepository(model);
|
||||
}
|
||||
171
src/source/GitHubStream.ts
Normal file
171
src/source/GitHubStream.ts
Normal file
@@ -0,0 +1,171 @@
|
||||
import { Octokit } from "@octokit/rest";
|
||||
import AnonymizedFile from "../AnonymizedFile";
|
||||
import Repository from "../Repository";
|
||||
import GitHubBase from "./GitHubBase";
|
||||
import storage from "../storage";
|
||||
import { SourceBase, Tree } from "../types";
|
||||
import * as path from "path";
|
||||
|
||||
import * as stream from "stream";
|
||||
|
||||
export default class GitHubStream extends GitHubBase implements SourceBase {
|
||||
constructor(
|
||||
data: {
|
||||
type: "GitHubDownload" | "GitHubStream" | "Zip";
|
||||
branch?: string;
|
||||
commit?: string;
|
||||
repositoryId?: string;
|
||||
repositoryName?: string;
|
||||
accessToken?: string;
|
||||
},
|
||||
repository: Repository
|
||||
) {
|
||||
super(data, repository);
|
||||
}
|
||||
|
||||
async getFileContent(file: AnonymizedFile): Promise<stream.Readable> {
|
||||
if (!file.sha) throw new Error("file_sha_not_provided");
|
||||
const octokit = new Octokit({
|
||||
auth: await this.getToken(),
|
||||
});
|
||||
|
||||
try {
|
||||
const ghRes = await octokit.rest.git.getBlob({
|
||||
owner: this.githubRepository.owner,
|
||||
repo: this.githubRepository.repo,
|
||||
file_sha: file.sha,
|
||||
});
|
||||
if (!ghRes.data.content && ghRes.data.size != 0) {
|
||||
throw new Error("file_not_accessible");
|
||||
}
|
||||
// empty file
|
||||
let content: Buffer;
|
||||
if (ghRes.data.content) {
|
||||
content = Buffer.from(
|
||||
ghRes.data.content,
|
||||
ghRes.data.encoding as BufferEncoding
|
||||
);
|
||||
} else {
|
||||
content = Buffer.from("");
|
||||
}
|
||||
await storage.write(file.originalCachePath, content);
|
||||
return stream.Readable.from(content.toString());
|
||||
} catch (error) {
|
||||
if (error.status == 403) {
|
||||
throw new Error("file_too_big");
|
||||
}
|
||||
console.error(error);
|
||||
}
|
||||
throw new Error("file_not_accessible");
|
||||
}
|
||||
|
||||
async getFiles() {
|
||||
return this.getTree(this.branch.commit);
|
||||
}
|
||||
|
||||
private async getTree(
|
||||
sha: string,
|
||||
truncatedTree: Tree = {},
|
||||
parentPath: string = ""
|
||||
) {
|
||||
const octokit = new Octokit({
|
||||
auth: await this.getToken(),
|
||||
});
|
||||
const ghRes = await octokit.git.getTree({
|
||||
owner: this.githubRepository.owner,
|
||||
repo: this.githubRepository.repo,
|
||||
tree_sha: sha,
|
||||
recursive: "1",
|
||||
});
|
||||
|
||||
const tree = this.tree2Tree(ghRes.data.tree, truncatedTree, parentPath);
|
||||
if (ghRes.data.truncated) {
|
||||
await this.getTruncatedTree(sha, tree, parentPath);
|
||||
}
|
||||
return tree;
|
||||
}
|
||||
|
||||
private async getTruncatedTree(
|
||||
sha: string,
|
||||
truncatedTree: Tree = {},
|
||||
parentPath: string = ""
|
||||
) {
|
||||
const octokit = new Octokit({
|
||||
auth: await this.getToken(),
|
||||
});
|
||||
const ghRes = await octokit.git.getTree({
|
||||
owner: this.githubRepository.owner,
|
||||
repo: this.githubRepository.repo,
|
||||
tree_sha: sha,
|
||||
});
|
||||
const tree = ghRes.data.tree;
|
||||
|
||||
for (let elem of tree) {
|
||||
if (!elem.path) continue;
|
||||
if (elem.type == "tree") {
|
||||
const elementPath = path.join(parentPath, elem.path);
|
||||
const paths = elementPath.split("/");
|
||||
|
||||
let current = truncatedTree;
|
||||
for (let i = 0; i < paths.length; i++) {
|
||||
let p = paths[i];
|
||||
if (!current[p]) {
|
||||
if (elem.sha)
|
||||
await this.getTree(elem.sha, truncatedTree, elementPath);
|
||||
break;
|
||||
}
|
||||
current = current[p] as Tree;
|
||||
}
|
||||
}
|
||||
}
|
||||
this.tree2Tree(ghRes.data.tree, truncatedTree, parentPath);
|
||||
return truncatedTree;
|
||||
}
|
||||
|
||||
private tree2Tree(
|
||||
tree: {
|
||||
path?: string;
|
||||
mode?: string;
|
||||
type?: string;
|
||||
sha?: string;
|
||||
size?: number;
|
||||
url?: string;
|
||||
}[],
|
||||
partialTree: Tree = {},
|
||||
parentPath: string = ""
|
||||
) {
|
||||
for (let elem of tree) {
|
||||
let current = partialTree;
|
||||
|
||||
if (!elem.path) continue;
|
||||
|
||||
const paths = path.join(parentPath, elem.path).split("/");
|
||||
|
||||
// if elem is a folder iterate on all folders if it is a file stop before the filename
|
||||
const end = elem.type == "tree" ? paths.length : paths.length - 1;
|
||||
for (let i = 0; i < end; i++) {
|
||||
let p = paths[i];
|
||||
if (p[0] == "$") {
|
||||
p = "\\" + p;
|
||||
}
|
||||
if (!current[p]) {
|
||||
current[p] = {};
|
||||
}
|
||||
current = current[p] as Tree;
|
||||
}
|
||||
|
||||
// if elem is a file add the file size in the file list
|
||||
if (elem.type == "blob") {
|
||||
let p = paths[end];
|
||||
if (p[0] == "$") {
|
||||
p = "\\" + p;
|
||||
}
|
||||
current[p] = {
|
||||
size: elem.size || 0, // size in bit
|
||||
sha: elem.sha || "",
|
||||
};
|
||||
}
|
||||
}
|
||||
return partialTree;
|
||||
}
|
||||
}
|
||||
31
src/source/Zip.ts
Normal file
31
src/source/Zip.ts
Normal file
@@ -0,0 +1,31 @@
|
||||
import * as path from "path";
|
||||
import AnonymizedFile from "../AnonymizedFile";
|
||||
import Repository from "../Repository";
|
||||
import storage from "../storage";
|
||||
import { SourceBase } from "../types";
|
||||
import * as stream from "stream";
|
||||
|
||||
export default class Zip implements SourceBase {
|
||||
type = "Zip";
|
||||
repository: Repository;
|
||||
url?: string;
|
||||
|
||||
constructor(data: any, repository: Repository) {
|
||||
this.repository = repository;
|
||||
this.url = data.url;
|
||||
}
|
||||
|
||||
async getFiles() {
|
||||
return storage.listFiles(this.repository.originalCachePath);
|
||||
}
|
||||
|
||||
async getFileContent(file: AnonymizedFile): Promise<stream.Readable> {
|
||||
return storage.read(file.originalCachePath);
|
||||
}
|
||||
|
||||
toJSON(): any {
|
||||
return {
|
||||
type: this.type,
|
||||
};
|
||||
}
|
||||
}
|
||||
7
src/storage.ts
Normal file
7
src/storage.ts
Normal file
@@ -0,0 +1,7 @@
|
||||
import FileSystem from "./storage/FileSystem";
|
||||
import S3Storage from "./storage/S3";
|
||||
import { StorageBase } from "./types";
|
||||
|
||||
const storage = new FileSystem();
|
||||
|
||||
export default storage as StorageBase;
|
||||
136
src/storage/FileSystem.ts
Normal file
136
src/storage/FileSystem.ts
Normal file
@@ -0,0 +1,136 @@
|
||||
import { StorageBase, Tree } from "../types";
|
||||
import * as fs from "fs";
|
||||
import * as tar from "tar-fs";
|
||||
import * as path from "path";
|
||||
import * as express from "express";
|
||||
import config from "../../config";
|
||||
import * as stream from "stream";
|
||||
import * as gunzip from "gunzip-maybe";
|
||||
import * as archiver from "archiver";
|
||||
|
||||
export default class FileSystem implements StorageBase {
|
||||
type = "FileSystem";
|
||||
|
||||
constructor() {}
|
||||
|
||||
/** @override */
|
||||
async exists(p: string): Promise<boolean> {
|
||||
return fs.existsSync(path.join(config.FOLDER, p));
|
||||
}
|
||||
|
||||
/** @override */
|
||||
send(p: string, res: express.Response) {
|
||||
res.sendFile(path.join(config.FOLDER, p), { dotfiles: "allow" });
|
||||
}
|
||||
|
||||
/** @override */
|
||||
read(p: string): stream.Readable {
|
||||
return fs.createReadStream(path.join(config.FOLDER, p));
|
||||
}
|
||||
|
||||
/** @override */
|
||||
async write(p: string, data: Buffer): Promise<void> {
|
||||
if (!(await this.exists(path.dirname(p)))) {
|
||||
await fs.promises.mkdir(path.dirname(path.join(config.FOLDER, p)), {
|
||||
recursive: true,
|
||||
});
|
||||
}
|
||||
return fs.promises.writeFile(path.join(config.FOLDER, p), data);
|
||||
}
|
||||
|
||||
/** @override */
|
||||
async rm(path: string): Promise<void> {
|
||||
await fs.promises.rm(path, { force: true, recursive: true });
|
||||
}
|
||||
|
||||
/** @override */
|
||||
async mk(dir: string): Promise<void> {
|
||||
if (!(await this.exists(dir)))
|
||||
fs.promises.mkdir(path.join(config.FOLDER, dir), { recursive: true });
|
||||
}
|
||||
|
||||
/** @override */
|
||||
async listFiles(
|
||||
dir: string,
|
||||
opt: {
|
||||
root?: string;
|
||||
onEntry?: (file: { path: string; size: number }) => void;
|
||||
} = {}
|
||||
): Promise<Tree> {
|
||||
if (opt.root == null) {
|
||||
opt.root = config.FOLDER;
|
||||
}
|
||||
let files = await fs.promises.readdir(path.join(opt.root, dir));
|
||||
const output: Tree = {};
|
||||
for (let file of files) {
|
||||
let filePath = path.join(dir, file);
|
||||
try {
|
||||
const stats = await fs.promises.stat(path.join(opt.root, filePath));
|
||||
if (file[0] == "$") {
|
||||
file = "\\" + file;
|
||||
}
|
||||
if (stats.isDirectory()) {
|
||||
output[file] = await this.listFiles(filePath, opt);
|
||||
} else if (stats.isFile()) {
|
||||
if (opt.onEntry) {
|
||||
opt.onEntry({
|
||||
path: filePath,
|
||||
size: stats.size,
|
||||
});
|
||||
}
|
||||
output[file] = { size: stats.size, sha: stats.ino.toString() };
|
||||
}
|
||||
} catch (error) {
|
||||
console.error(error);
|
||||
}
|
||||
}
|
||||
return output;
|
||||
}
|
||||
|
||||
/** @override */
|
||||
async extractTar(p: string, data: stream.Readable): Promise<void> {
|
||||
return new Promise((resolve, reject) => {
|
||||
data
|
||||
.pipe(gunzip())
|
||||
.pipe(
|
||||
tar.extract(path.join(config.FOLDER, p), {
|
||||
map: (header) => {
|
||||
header.name = header.name.substr(header.name.indexOf("/") + 1);
|
||||
return header;
|
||||
},
|
||||
})
|
||||
)
|
||||
.on("finish", resolve)
|
||||
.on("error", reject);
|
||||
});
|
||||
}
|
||||
|
||||
/** @override */
|
||||
archive(
|
||||
dir: string,
|
||||
opt?: {
|
||||
format?: "zip" | "tar";
|
||||
fileTransformer?;
|
||||
}
|
||||
) {
|
||||
const archive = archiver(opt?.format, {});
|
||||
|
||||
this.listFiles(dir, {
|
||||
onEntry: (file) => {
|
||||
let rs = this.read(file.path);
|
||||
if (opt?.fileTransformer) {
|
||||
// apply transformation on the stream
|
||||
rs = rs.pipe(opt.fileTransformer(file.path));
|
||||
}
|
||||
const f = file.path.replace(dir, "");
|
||||
archive.append(rs, {
|
||||
name: path.basename(f),
|
||||
prefix: path.dirname(f),
|
||||
});
|
||||
},
|
||||
}).then(() => {
|
||||
archive.finalize();
|
||||
});
|
||||
return archive;
|
||||
}
|
||||
}
|
||||
225
src/storage/S3.ts
Normal file
225
src/storage/S3.ts
Normal file
@@ -0,0 +1,225 @@
|
||||
import { StorageBase, Tree, TreeFile } from "../types";
|
||||
import { S3 } from "aws-sdk";
|
||||
import config from "../../config";
|
||||
import * as stream from "stream";
|
||||
import { ArchiveStreamToS3 } from "archive-stream-to-s3";
|
||||
import * as express from "express";
|
||||
import * as mime from "mime-types";
|
||||
import * as flow from "xml-flow";
|
||||
import * as archiver from "archiver";
|
||||
import * as path from "path";
|
||||
import * as gunzip from "gunzip-maybe";
|
||||
|
||||
const originalArchiveStreamToS3Entry: Function = (ArchiveStreamToS3 as any)
|
||||
.prototype.onEntry;
|
||||
|
||||
export default class S3Storage implements StorageBase {
|
||||
type = "AWS";
|
||||
client: S3;
|
||||
|
||||
constructor() {
|
||||
if (!config.S3_BUCKET) throw new Error("s3_config_not_provided");
|
||||
this.client = new S3({
|
||||
region: config.S3_REGION,
|
||||
endpoint: config.S3_ENDPOINT,
|
||||
accessKeyId: config.S3_CLIENT_ID,
|
||||
secretAccessKey: config.S3_CLIENT_SECRET,
|
||||
});
|
||||
}
|
||||
|
||||
/** @override */
|
||||
async exists(path: string): Promise<boolean> {
|
||||
try {
|
||||
await this.client
|
||||
.headObject({
|
||||
Bucket: config.S3_BUCKET,
|
||||
Key: path,
|
||||
})
|
||||
.promise();
|
||||
return true;
|
||||
} catch (err) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/** @override */
|
||||
async mk(dir: string): Promise<void> {
|
||||
if (dir && dir[dir.length - 1] != "/") dir = dir + "/";
|
||||
|
||||
await this.client
|
||||
.putObject({
|
||||
Bucket: config.S3_BUCKET,
|
||||
Key: dir,
|
||||
})
|
||||
.promise();
|
||||
}
|
||||
|
||||
/** @override */
|
||||
async rm(dir: string): Promise<void> {
|
||||
const data = await this.client
|
||||
.listObjectsV2({
|
||||
Bucket: config.S3_BUCKET,
|
||||
Prefix: dir,
|
||||
})
|
||||
.promise();
|
||||
|
||||
const params = { Bucket: config.S3_BUCKET, Delete: { Objects: [] } };
|
||||
|
||||
data.Contents.forEach(function (content) {
|
||||
params.Delete.Objects.push({ Key: content.Key });
|
||||
});
|
||||
|
||||
if (params.Delete.Objects.length == 0) {
|
||||
// nothing to remove
|
||||
return;
|
||||
}
|
||||
await this.client.deleteObjects(params).promise();
|
||||
|
||||
if (data.IsTruncated) {
|
||||
await this.rm(dir);
|
||||
}
|
||||
}
|
||||
|
||||
/** @override */
|
||||
send(p: string, res: express.Response) {
|
||||
const s = this.client
|
||||
.getObject({
|
||||
Bucket: config.S3_BUCKET,
|
||||
Key: p,
|
||||
})
|
||||
.on("httpHeaders", (statusCode, headers, response) => {
|
||||
res.status(statusCode);
|
||||
if (statusCode < 300) {
|
||||
res.set("Content-Length", headers["content-length"]);
|
||||
res.set("Content-Type", headers["content-type"]);
|
||||
}
|
||||
(
|
||||
response.httpResponse.createUnbufferedStream() as stream.Readable
|
||||
).pipe(res);
|
||||
});
|
||||
|
||||
s.send();
|
||||
}
|
||||
|
||||
/** @override */
|
||||
read(path: string): stream.Readable {
|
||||
return this.client
|
||||
.getObject({
|
||||
Bucket: config.S3_BUCKET,
|
||||
Key: path,
|
||||
})
|
||||
.createReadStream();
|
||||
}
|
||||
|
||||
/** @override */
|
||||
async write(path: string, data: Buffer): Promise<void> {
|
||||
await this.client
|
||||
.putObject({
|
||||
Bucket: config.S3_BUCKET,
|
||||
Key: path,
|
||||
Body: data,
|
||||
ContentType: mime.lookup(path).toString(),
|
||||
})
|
||||
.promise();
|
||||
return;
|
||||
}
|
||||
|
||||
/** @override */
|
||||
async listFiles(dir: string): Promise<Tree> {
|
||||
if (dir && dir[dir.length - 1] != "/") dir = dir + "/";
|
||||
const out: Tree = {};
|
||||
const req = await this.client
|
||||
.listObjectsV2({
|
||||
Bucket: config.S3_BUCKET,
|
||||
Prefix: dir,
|
||||
})
|
||||
.promise();
|
||||
|
||||
if (!req.Contents) return out;
|
||||
for (const f of req.Contents) {
|
||||
if (!f.Key) continue;
|
||||
f.Key = f.Key.replace(dir, "");
|
||||
const paths = f.Key.split("/");
|
||||
let current: Tree = out;
|
||||
for (let i = 0; i < paths.length - 1; i++) {
|
||||
let p = paths[i];
|
||||
if (!p) continue;
|
||||
if (!(current[p] as Tree)) {
|
||||
current[p] = {} as Tree;
|
||||
}
|
||||
current = current[p] as Tree;
|
||||
}
|
||||
|
||||
const fileInfo: TreeFile = { size: f.Size || 0, sha: f.ETag };
|
||||
const fileName = paths[paths.length - 1];
|
||||
if (fileName) current[fileName] = fileInfo;
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
/** @override */
|
||||
async extractTar(p: string, data: stream.Readable): Promise<void> {
|
||||
return new Promise<void>((resolve, reject) => {
|
||||
const toS3 = new ArchiveStreamToS3(config.S3_BUCKET, p, this.client);
|
||||
|
||||
let rootFolder = null;
|
||||
(ArchiveStreamToS3 as any).prototype.onEntry = function (
|
||||
header: any,
|
||||
stream: any,
|
||||
next: any
|
||||
) {
|
||||
if (rootFolder == null) {
|
||||
rootFolder = header.name.substr(0, header.name.indexOf("/") + 1);
|
||||
}
|
||||
header.name = header.name.replace(rootFolder, "");
|
||||
originalArchiveStreamToS3Entry.call(toS3, header, stream, next);
|
||||
};
|
||||
|
||||
toS3.on("finish", (result) => {
|
||||
resolve(result);
|
||||
});
|
||||
toS3.on("error", (e) => {
|
||||
reject(e);
|
||||
});
|
||||
data.pipe(gunzip()).pipe(toS3);
|
||||
});
|
||||
}
|
||||
|
||||
/** @override */
|
||||
archive(
|
||||
dir: string,
|
||||
opt?: {
|
||||
format?: "zip" | "tar";
|
||||
fileTransformer?;
|
||||
}
|
||||
) {
|
||||
const archive = archiver(opt?.format, {});
|
||||
if (dir && dir[dir.length - 1] != "/") dir = dir + "/";
|
||||
const req = this.client.listObjectsV2({
|
||||
Bucket: config.S3_BUCKET,
|
||||
Prefix: dir,
|
||||
});
|
||||
const filesStream = req.createReadStream();
|
||||
|
||||
const xmlStream = flow(filesStream);
|
||||
|
||||
const that = this;
|
||||
xmlStream.on("tag:contents", function (file) {
|
||||
let rs = that.read(file.key);
|
||||
file.key = file.key.replace(dir, "");
|
||||
const filename = path.basename(file.key);
|
||||
if (filename == "") return;
|
||||
if (opt?.fileTransformer) {
|
||||
rs = rs.pipe(opt.fileTransformer(filename));
|
||||
}
|
||||
archive.append(rs, {
|
||||
name: filename,
|
||||
prefix: path.dirname(file.key),
|
||||
});
|
||||
});
|
||||
xmlStream.on("end", () => {
|
||||
archive.finalize();
|
||||
});
|
||||
return archive;
|
||||
}
|
||||
}
|
||||
98
src/types.ts
Normal file
98
src/types.ts
Normal file
@@ -0,0 +1,98 @@
|
||||
import GitHubDownload from "./source/GitHubDownload";
|
||||
import GitHubStream from "./source/GitHubStream";
|
||||
import Zip from "./source/ZIP";
|
||||
import S3Storage from "./storage/S3";
|
||||
import FileSystem from "./storage/FileSystem";
|
||||
import AnonymizedFile from "./AnonymizedFile";
|
||||
import * as stream from "stream";
|
||||
import * as archiver from "archiver";
|
||||
|
||||
export interface SourceBase {
|
||||
readonly type: string;
|
||||
|
||||
/**
|
||||
* The url of the source
|
||||
*/
|
||||
url?: string;
|
||||
|
||||
/**
|
||||
* Retrieve the fie content
|
||||
* @param file the file of the content to retrieve
|
||||
*/
|
||||
getFileContent(file: AnonymizedFile): Promise<stream.Readable>;
|
||||
|
||||
/**
|
||||
* Get all the files from a specific source
|
||||
*/
|
||||
getFiles(): Promise<Tree>;
|
||||
|
||||
toJSON(): any;
|
||||
}
|
||||
|
||||
export type Source = GitHubDownload | GitHubStream | Zip;
|
||||
|
||||
export interface StorageBase {
|
||||
type: string;
|
||||
|
||||
exists(path: string): Promise<boolean>;
|
||||
|
||||
read(path: string): stream.Readable;
|
||||
|
||||
write(path: string, data: Buffer): Promise<void>;
|
||||
|
||||
listFiles(dir: string): Promise<Tree>;
|
||||
|
||||
extractTar(p: string, data: stream.Readable): Promise<void>;
|
||||
|
||||
rm(path: string): Promise<void>;
|
||||
|
||||
archive(
|
||||
dir: string,
|
||||
opt?: {
|
||||
format?: "zip" | "tar";
|
||||
fileTransformer?: (p: any) => Transformer;
|
||||
}
|
||||
): archiver.Archiver;
|
||||
|
||||
mk(path: string): Promise<void>;
|
||||
}
|
||||
|
||||
export type Storage = S3Storage | FileSystem;
|
||||
|
||||
export interface Branch {
|
||||
name: string;
|
||||
commit: string;
|
||||
readme?: string;
|
||||
}
|
||||
|
||||
export type RepositoryStatus =
|
||||
| "ready"
|
||||
| "preparing"
|
||||
| "expired"
|
||||
| "removed"
|
||||
| "download"
|
||||
| "queue";
|
||||
export type SourceStatus = "available" | "unavailable";
|
||||
|
||||
export interface Tree {
|
||||
[key: string]: TreeElement;
|
||||
}
|
||||
|
||||
export type TreeElement = Tree | TreeFile;
|
||||
|
||||
export interface TreeFile {
|
||||
sha: string;
|
||||
size: number;
|
||||
}
|
||||
|
||||
export interface Loc {
|
||||
info: { total: number; code: number; commit: number };
|
||||
languages: {
|
||||
[key: string]: {
|
||||
total: number;
|
||||
code: number;
|
||||
commit: number;
|
||||
sum: number;
|
||||
};
|
||||
};
|
||||
}
|
||||
@@ -1,17 +0,0 @@
|
||||
var expect = require("chai").expect;
|
||||
var assert = require("chai").assert;
|
||||
const fs = require("fs");
|
||||
|
||||
const githubUtils = require("../utils/github");
|
||||
const fileUtils = require("../utils/file");
|
||||
const repoUtils = require("../utils/repository");
|
||||
const db = require("../utils/database");
|
||||
|
||||
describe("Test Files Utils", async function() {
|
||||
describe("List all files", function() {
|
||||
it("Get all file from repo with more than 1000 files", async function() {
|
||||
const fullName = "TQRG/BugSwarm";
|
||||
await fileUtils.getTree({ fullName });
|
||||
});
|
||||
});
|
||||
});
|
||||
@@ -1,31 +0,0 @@
|
||||
var expect = require("chai").expect;
|
||||
var assert = require("chai").assert;
|
||||
const fs = require("fs");
|
||||
|
||||
const githubUtils = require("../utils/github");
|
||||
const repoUtils = require("../utils/repository");
|
||||
const db = require("../utils/database");
|
||||
|
||||
describe("Test GitHub Utils", async function() {
|
||||
describe("Download Repository", function() {
|
||||
const target = "/tmp/repo.zip";
|
||||
it("Download an exisiting repo to a folder", async function() {
|
||||
await repoUtils.downloadRepoZip(
|
||||
{ fullName: "tdurieux/binance-trade-bot" },
|
||||
target
|
||||
);
|
||||
expect(fs.existsSync(target)).to.equal(true, `${target} should exist`);
|
||||
fs.unlinkSync(target);
|
||||
});
|
||||
it("Download a non-exisiting repo to a folder", async function() {
|
||||
try {
|
||||
await repoUtils.downloadRepoZip(
|
||||
{ fullName: "tdurieux/missing" },
|
||||
target
|
||||
);
|
||||
fs.unlinkSync(target);
|
||||
assert.fail("Should trigger an exception");
|
||||
} catch (error) {}
|
||||
});
|
||||
});
|
||||
});
|
||||
10
tsconfig.json
Normal file
10
tsconfig.json
Normal file
@@ -0,0 +1,10 @@
|
||||
{
|
||||
"compilerOptions": {
|
||||
"target": "es6",
|
||||
"module": "commonjs",
|
||||
"outDir": "dist",
|
||||
"sourceMap": true
|
||||
},
|
||||
"include": ["src/**/*.ts", "index.ts", "tests3.ts"],
|
||||
"exclude": ["node_modules", ".vscode"]
|
||||
}
|
||||
@@ -1,116 +0,0 @@
|
||||
const fs = require("fs").promises;
|
||||
const ofs = require("fs");
|
||||
const path = require("path");
|
||||
const fileUtils = require("./file");
|
||||
const config = require("../config")
|
||||
|
||||
const anonymizeContent = (content, repoConfig) => {
|
||||
const urlRegex = /<?\b((https?|ftp|file):\/\/)[-A-Za-z0-9+&@#/%?=~_|!:,.;]+[-A-Za-z0-9+&@#/%=~_|]\b\/?>?/g;
|
||||
|
||||
if (repoConfig.options.image === false) {
|
||||
// remove image in markdown
|
||||
content = content.replace(
|
||||
/!\[[^\]]*\]\((?<filename>.*?)(?=\"|\))(?<optionalpart>\".*\")?\)/g,
|
||||
""
|
||||
);
|
||||
}
|
||||
|
||||
if (!repoConfig.options.link) {
|
||||
// remove all links
|
||||
content = content.replace(urlRegex, config.ANONYMIZATION_MASK);
|
||||
}
|
||||
|
||||
content = content.replace(
|
||||
new RegExp(
|
||||
`https://github.com/${repoConfig.fullName}/blob/${repoConfig.branch}\\b`,
|
||||
"gi"
|
||||
),
|
||||
`https://anonymous.4open.science/r/${repoConfig.repoId}`
|
||||
);
|
||||
content = content.replace(
|
||||
new RegExp(
|
||||
`https://github.com/${repoConfig.fullName}/tree/${repoConfig.branch}\\b`,
|
||||
"gi"
|
||||
),
|
||||
`https://anonymous.4open.science/r/${repoConfig.repoId}`
|
||||
);
|
||||
content = content.replace(
|
||||
new RegExp(`https://github.com/${repoConfig.fullName}`, "gi"),
|
||||
`https://anonymous.4open.science/r/${repoConfig.repoId}`
|
||||
);
|
||||
|
||||
for (let term of repoConfig.terms) {
|
||||
if (term.trim() == "") {
|
||||
continue;
|
||||
}
|
||||
// remove whole url if it contains the term
|
||||
content = content.replace(urlRegex, (match) => {
|
||||
if (new RegExp(`\\b${term}\\b`, "gi").test(match)) return config.ANONYMIZATION_MASK;
|
||||
return match;
|
||||
});
|
||||
|
||||
// remove the term in the text
|
||||
content = content.replace(new RegExp(`\\b${term}\\b`, "gi"), config.ANONYMIZATION_MASK);
|
||||
}
|
||||
return content;
|
||||
};
|
||||
|
||||
const anonymizePath = (path, repoConfig) => {
|
||||
for (let term of repoConfig.terms) {
|
||||
if (term.trim() == "") {
|
||||
continue;
|
||||
}
|
||||
path = path.replace(new RegExp(term, "gi"), config.ANONYMIZATION_MASK);
|
||||
}
|
||||
return path;
|
||||
};
|
||||
|
||||
async function* walk(dir) {
|
||||
for await (const d of await fs.opendir(dir)) {
|
||||
const entry = path.join(dir, d.name);
|
||||
if (d.isDirectory()) yield* await walk(entry);
|
||||
else if (d.isFile()) yield entry;
|
||||
}
|
||||
}
|
||||
|
||||
const anonymizeFolder = async (root, destination, repoConfig) => {
|
||||
if (!ofs.existsSync(destination)) {
|
||||
await fs.mkdir(destination, { recursive: true });
|
||||
}
|
||||
try {
|
||||
for await (const originalFilePath of walk(root)) {
|
||||
const destinationFilePath = path.join(
|
||||
destination,
|
||||
anonymizePath(originalFilePath.replace(root, ""), repoConfig)
|
||||
);
|
||||
const destinationFolder = path.dirname(destinationFilePath);
|
||||
if (!ofs.existsSync(destinationFolder)) {
|
||||
await fs.mkdir(destinationFolder, { recursive: true });
|
||||
}
|
||||
await anonymizeFile(originalFilePath, destinationFilePath, repoConfig);
|
||||
}
|
||||
} catch (error) {
|
||||
fs.rm(destination, { recursive: true, force: true });
|
||||
throw error;
|
||||
}
|
||||
};
|
||||
|
||||
const anonymizeFile = async (filePath, target, repoConfig) => {
|
||||
if (!ofs.existsSync(path.dirname(target))) {
|
||||
await fs.mkdir(path.dirname(target), { recursive: true });
|
||||
}
|
||||
if (fileUtils.isText(filePath)) {
|
||||
const content = anonymizeContent(
|
||||
(await fs.readFile(filePath)).toString(),
|
||||
repoConfig
|
||||
);
|
||||
await fs.writeFile(target, content);
|
||||
} else {
|
||||
await fs.copyFile(filePath, target);
|
||||
}
|
||||
};
|
||||
|
||||
module.exports.anonymizeFile = anonymizeFile;
|
||||
module.exports.anonymizePath = anonymizePath;
|
||||
module.exports.anonymizeFolder = anonymizeFolder;
|
||||
module.exports.anonymizeContent = anonymizeContent;
|
||||
@@ -1,39 +0,0 @@
|
||||
const config = require("../config");
|
||||
|
||||
var MongoClient = require("mongodb").MongoClient;
|
||||
const MONGO_URL = "mongodb://root:rootpassword@mongodb:27017/?authSource=admin";
|
||||
let mongoClient = null;
|
||||
let DB = null;
|
||||
|
||||
module.exports.get = (collection) => {
|
||||
if (!collection) return DB;
|
||||
return DB.collection(collection);
|
||||
};
|
||||
|
||||
module.exports.connect = async () => {
|
||||
mongoClient = await MongoClient.connect(
|
||||
MONGO_URL,
|
||||
{ useNewUrlParser: true, useUnifiedTopology: true }
|
||||
);
|
||||
DB = mongoClient.db("anonymous_github");
|
||||
await DB.collection("anonymized_repositories").createIndex(
|
||||
{ repoId: 1 },
|
||||
{ unique: true, name: "repoId" }
|
||||
);
|
||||
await DB.collection("anonymized_repositories").createIndex(
|
||||
{ fullName: 1 },
|
||||
{ name: "fullName" }
|
||||
);
|
||||
await DB.collection("repositories").createIndex(
|
||||
{ fullName: 1 },
|
||||
{ unique: true, name: "fullName" }
|
||||
);
|
||||
await DB.collection("users").createIndex(
|
||||
{ username: 1 },
|
||||
{ unique: true, name: "username" }
|
||||
);
|
||||
return DB;
|
||||
};
|
||||
module.exports.close = async () => {
|
||||
return await mongoClient.close();
|
||||
};
|
||||
491
utils/file.js
491
utils/file.js
@@ -1,491 +0,0 @@
|
||||
const ofs = require("fs");
|
||||
const fs = require("fs").promises;
|
||||
const path = require("path");
|
||||
const { Octokit } = require("@octokit/rest");
|
||||
const gh = require("parse-github-url");
|
||||
const loc = require("github-linguist").default;
|
||||
const { isText } = require("istextorbinary");
|
||||
|
||||
const db = require("./database");
|
||||
const repoUtils = require("./repository");
|
||||
const githubUtils = require("./github");
|
||||
const anonymizeUtils = require("./anonymize");
|
||||
const config = require("../config");
|
||||
|
||||
async function walk(dir, root) {
|
||||
if (root == null) {
|
||||
root = dir;
|
||||
}
|
||||
let files = await fs.readdir(dir);
|
||||
const output = { child: {} };
|
||||
for (let file of files) {
|
||||
let filePath = path.join(dir, file);
|
||||
try {
|
||||
const stats = await fs.stat(filePath);
|
||||
if (file[0] == "$") {
|
||||
file = "\\" + file;
|
||||
}
|
||||
if (stats.isDirectory()) {
|
||||
output.child[file] = await walk(filePath, root);
|
||||
output.child[file].sha = stats.ino;
|
||||
} else if (stats.isFile()) {
|
||||
output.child[file] = { size: stats.size, sha: stats.ino };
|
||||
}
|
||||
} catch (error) {
|
||||
console.error(error);
|
||||
}
|
||||
}
|
||||
return output;
|
||||
}
|
||||
function tree2tree(tree, partialTree, parentPath) {
|
||||
if (!parentPath) parentPath = "";
|
||||
if (partialTree == null) {
|
||||
partialTree = { child: Object.create(null) };
|
||||
}
|
||||
for (let elem of tree) {
|
||||
const paths = path.join(parentPath, elem.path).split("/");
|
||||
let current = partialTree;
|
||||
|
||||
// if elem is a folder iterate on all folders if it is a file stop before the filename
|
||||
const end = elem.type == "tree" ? paths.length : paths.length - 1;
|
||||
for (let i = 0; i < end; i++) {
|
||||
let p = paths[i];
|
||||
if (p[0] == "$") {
|
||||
p = "\\" + p;
|
||||
}
|
||||
if (!current.child[p]) {
|
||||
current.child[p] = {
|
||||
child: Object.create(null),
|
||||
};
|
||||
}
|
||||
current = current.child[p];
|
||||
}
|
||||
|
||||
// if elem is a file add the file size in the file list
|
||||
if (elem.type == "blob") {
|
||||
let p = paths[end];
|
||||
if (p[0] == "$") {
|
||||
p = "\\" + p;
|
||||
}
|
||||
current.child[p] = {
|
||||
size: elem.size,
|
||||
sha: elem.sha,
|
||||
};
|
||||
} else {
|
||||
current.sha = elem.sha;
|
||||
}
|
||||
}
|
||||
return partialTree;
|
||||
}
|
||||
async function getTruncatedTree(repoConfig, truncatedTree, sha, parentPath) {
|
||||
const repo = gh(repoConfig.fullName);
|
||||
|
||||
if (!sha || !/^[a-f0-9]+$/.test(sha)) {
|
||||
if (repoConfig.commit && /^[a-f0-9]+$/.test(repoConfig.commit)) {
|
||||
sha = repoConfig.commit;
|
||||
} else {
|
||||
sha = "HEAD";
|
||||
}
|
||||
repoConfig.commit = sha;
|
||||
}
|
||||
|
||||
const octokit = new Octokit({
|
||||
auth: await githubUtils.getToken(repoConfig),
|
||||
});
|
||||
const ghRes = await octokit.git.getTree({
|
||||
owner: repo.owner,
|
||||
repo: repo.name,
|
||||
tree_sha: sha,
|
||||
});
|
||||
const tree = ghRes.data.tree;
|
||||
|
||||
for (let elem of tree) {
|
||||
if (elem.type == "tree") {
|
||||
const elementPath = path.join(parentPath, elem.path);
|
||||
const paths = elementPath.split("/");
|
||||
|
||||
let current = truncatedTree;
|
||||
for (let i = 0; i < paths.length; i++) {
|
||||
let p = paths[i];
|
||||
if (!current.child[p]) {
|
||||
await module.exports.getTree(
|
||||
repoConfig,
|
||||
elem.sha,
|
||||
truncatedTree,
|
||||
elementPath
|
||||
);
|
||||
break;
|
||||
}
|
||||
current = current.child[p];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
tree2tree(ghRes.data.tree, truncatedTree, parentPath);
|
||||
|
||||
return truncatedTree;
|
||||
}
|
||||
module.exports.getTree = async (repoConfig, sha, truncatedTree, parentPath) => {
|
||||
const repo = gh(repoConfig.fullName);
|
||||
|
||||
if (!sha || !/^[a-f0-9]+$/.test(sha)) {
|
||||
if (repoConfig.commit && /^[a-f0-9]+$/.test(repoConfig.commit)) {
|
||||
sha = repoConfig.commit;
|
||||
} else {
|
||||
sha = "HEAD";
|
||||
}
|
||||
}
|
||||
|
||||
if (!parentPath) parentPath = "";
|
||||
|
||||
const token = await githubUtils.getToken(repoConfig);
|
||||
|
||||
const octokit = new Octokit({
|
||||
auth: token,
|
||||
});
|
||||
const ghRes = await octokit.git.getTree({
|
||||
owner: repo.owner,
|
||||
repo: repo.name,
|
||||
tree_sha: sha,
|
||||
recursive: true,
|
||||
});
|
||||
sha = ghRes.data.sha;
|
||||
repoConfig.commit = sha;
|
||||
|
||||
const tree = tree2tree(ghRes.data.tree, truncatedTree, parentPath);
|
||||
if (ghRes.data.truncated) {
|
||||
await getTruncatedTree(repoConfig, tree, sha, parentPath);
|
||||
}
|
||||
return tree;
|
||||
};
|
||||
module.exports.getFileList = async (options) => {
|
||||
let repoConfig = options.repoConfig;
|
||||
if (!repoConfig) {
|
||||
repoConfig = await repoUtils.getConfig(options.repoId);
|
||||
}
|
||||
|
||||
if (repoConfig == null) {
|
||||
throw "repo_not_found";
|
||||
}
|
||||
|
||||
const r = await db.get("anonymized_repositories").findOne(
|
||||
{ repoId: repoConfig.repoId },
|
||||
{
|
||||
projection: { files: 1 },
|
||||
}
|
||||
);
|
||||
if (r && r.files) {
|
||||
return r.files;
|
||||
}
|
||||
|
||||
if (repoConfig.options.mode == "stream") {
|
||||
// get file list from github
|
||||
const tree = await module.exports.getTree(repoConfig, repoConfig.commit);
|
||||
const files = anonymizeTree(tree, repoConfig);
|
||||
await db.get("anonymized_repositories").updateOne(
|
||||
{ repoId: repoConfig.repoId },
|
||||
{
|
||||
$set: {
|
||||
commit: repoConfig.commit,
|
||||
originalFiles: tree.child,
|
||||
files,
|
||||
},
|
||||
},
|
||||
{ upsert: true }
|
||||
);
|
||||
return files;
|
||||
} else if (repoConfig.options.mode == "download") {
|
||||
const originalFiles = await walk(
|
||||
repoUtils.getOriginalPath(repoConfig.repoId)
|
||||
);
|
||||
const files = anonymizeTree(originalFiles, repoConfig);
|
||||
await db.get("anonymized_repositories").updateOne(
|
||||
{ repoId: repoConfig.repoId },
|
||||
{
|
||||
$set: {
|
||||
originalFiles: originalFiles.child,
|
||||
files,
|
||||
},
|
||||
},
|
||||
{ upsert: true }
|
||||
);
|
||||
return files;
|
||||
} else {
|
||||
throw "non_supported_mode";
|
||||
}
|
||||
};
|
||||
function anonymizeTree(tree, repoConfig) {
|
||||
if (Number.isInteger(tree.size)) {
|
||||
return tree;
|
||||
}
|
||||
const output = {};
|
||||
for (let file in tree.child) {
|
||||
const anonymizedPath = anonymizeUtils.anonymizePath(file, repoConfig);
|
||||
output[anonymizedPath] = anonymizeTree(tree.child[file], repoConfig);
|
||||
}
|
||||
return output;
|
||||
}
|
||||
|
||||
function tree2sha(tree, output, parent) {
|
||||
if (!output) {
|
||||
output = {};
|
||||
parent = "";
|
||||
}
|
||||
for (let i in tree) {
|
||||
if (tree[i].sha) {
|
||||
output[tree[i].sha] = path.join(parent, i);
|
||||
}
|
||||
if (tree[i].child) {
|
||||
tree2sha(tree[i].child, output, path.join(parent, i));
|
||||
}
|
||||
}
|
||||
return output;
|
||||
}
|
||||
|
||||
function getFile(tree, elementPath) {
|
||||
const paths = elementPath.trim().split("/");
|
||||
let current = tree;
|
||||
if (!tree.child) {
|
||||
current = { child: tree };
|
||||
}
|
||||
for (let i = 0; i < paths.length; i++) {
|
||||
let p = paths[i];
|
||||
if (p == "") {
|
||||
continue;
|
||||
}
|
||||
let tmp = current;
|
||||
if (current.child) {
|
||||
tmp = current.child;
|
||||
}
|
||||
if (!tmp[p]) {
|
||||
return null;
|
||||
}
|
||||
current = tmp[p];
|
||||
}
|
||||
return current;
|
||||
}
|
||||
module.exports.additionalExtensions = [
|
||||
"license",
|
||||
"dockerfile",
|
||||
"sbt",
|
||||
"ipynb",
|
||||
"gp",
|
||||
"out",
|
||||
"sol",
|
||||
"in",
|
||||
];
|
||||
module.exports.isText = (p) => {
|
||||
const filename = path.basename(p);
|
||||
const extensions = filename.split(".").reverse();
|
||||
const extension = extensions[0].toLowerCase();
|
||||
if (module.exports.additionalExtensions.includes(extension)) {
|
||||
return true;
|
||||
}
|
||||
if (isText(p)) {
|
||||
return true;
|
||||
}
|
||||
if (ofs.existsSync(p)) {
|
||||
if (isText(p, ofs.readFileSync(p))) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
};
|
||||
module.exports.isFileSupported = (repoConfig, p) => {
|
||||
if (module.exports.isText(p)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
const filename = path.basename(p);
|
||||
const extensions = filename.split(".").reverse();
|
||||
const extension = extensions[0].toLowerCase();
|
||||
|
||||
if (repoConfig.options.pdf && extension == "pdf") {
|
||||
return true;
|
||||
}
|
||||
if (
|
||||
repoConfig.options.image &&
|
||||
(extension == "png" ||
|
||||
extension == "ico" ||
|
||||
extension == "jpg" ||
|
||||
extension == "jpeg" ||
|
||||
extension == "gif")
|
||||
) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
};
|
||||
module.exports.isFilePathValid = async (options) => {
|
||||
if (options.path == null) {
|
||||
throw "invalid_path";
|
||||
}
|
||||
let repoConfig = options.repoConfig;
|
||||
if (!repoConfig) {
|
||||
repoConfig = await repoUtils.getConfig(options.repoId);
|
||||
}
|
||||
|
||||
if (repoConfig == null) {
|
||||
throw "repo_not_found";
|
||||
}
|
||||
if (repoConfig.status == "expired") {
|
||||
throw "repository_expired";
|
||||
}
|
||||
if (repoConfig.status == "removed") {
|
||||
throw "repository_expired";
|
||||
}
|
||||
if (repoConfig.status != "ready") {
|
||||
throw "repository_not_ready";
|
||||
}
|
||||
|
||||
const anonymizedFilePath = path.join(
|
||||
repoUtils.getAnonymizedPath(repoConfig.repoId),
|
||||
options.path
|
||||
);
|
||||
|
||||
if (ofs.existsSync(anonymizedFilePath)) {
|
||||
if (ofs.lstatSync(anonymizedFilePath).isDirectory()) {
|
||||
throw "is_folder";
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
let unanonymizePath = options.path;
|
||||
const files = await module.exports.getFileList({ repoConfig });
|
||||
|
||||
const file = getFile(files, options.path);
|
||||
if (file == null) {
|
||||
throw "file_not_found";
|
||||
}
|
||||
if (file) {
|
||||
const r = await db
|
||||
.get("anonymized_repositories")
|
||||
.findOne(
|
||||
{ repoId: repoConfig.repoId },
|
||||
{ projection: { originalFiles: 1 } }
|
||||
);
|
||||
|
||||
const shatree = tree2sha(r.originalFiles);
|
||||
if (shatree[file.sha]) {
|
||||
unanonymizePath = shatree[file.sha];
|
||||
}
|
||||
}
|
||||
|
||||
const originalFilePath = path.join(
|
||||
repoUtils.getOriginalPath(repoConfig.repoId),
|
||||
unanonymizePath
|
||||
);
|
||||
|
||||
if (ofs.existsSync(originalFilePath)) {
|
||||
if (ofs.lstatSync(originalFilePath).isDirectory()) {
|
||||
throw "is_folder";
|
||||
}
|
||||
if (!module.exports.isFileSupported(repoConfig, originalFilePath)) {
|
||||
throw "file_not_supported";
|
||||
}
|
||||
await anonymizeUtils.anonymizeFile(
|
||||
originalFilePath,
|
||||
anonymizedFilePath,
|
||||
repoConfig
|
||||
);
|
||||
return true;
|
||||
}
|
||||
// if stream mode check download the file
|
||||
if (repoConfig.options.mode == "stream") {
|
||||
if (!file.sha) {
|
||||
throw "is_folder";
|
||||
}
|
||||
if (file.size > config.MAX_FILE_SIZE) {
|
||||
// file bigger than 10mb
|
||||
throw "file_too_big";
|
||||
}
|
||||
const octokit = new Octokit({
|
||||
auth: await githubUtils.getToken(repoConfig),
|
||||
});
|
||||
|
||||
let ghRes = null;
|
||||
try {
|
||||
const repo = gh(repoConfig.fullName);
|
||||
ghRes = await octokit.request(
|
||||
"GET /repos/{owner}/{repo}/git/blobs/{file_sha}",
|
||||
{
|
||||
owner: repo.owner,
|
||||
repo: repo.name,
|
||||
file_sha: file.sha,
|
||||
}
|
||||
);
|
||||
} catch (error) {
|
||||
if (error.status == 403) {
|
||||
throw "file_too_big";
|
||||
}
|
||||
console.error(error);
|
||||
throw "file_not_accessible";
|
||||
}
|
||||
if (!ghRes.data.content && ghRes.data.size != 0) {
|
||||
throw "file_not_accessible";
|
||||
}
|
||||
// empty file
|
||||
let content = "";
|
||||
if (ghRes.data.content) {
|
||||
content = new Buffer.from(ghRes.data.content, ghRes.data.encoding);
|
||||
}
|
||||
|
||||
try {
|
||||
await fs.mkdir(path.dirname(originalFilePath), { recursive: true });
|
||||
} catch (_) {
|
||||
// ignore
|
||||
}
|
||||
try {
|
||||
await fs.writeFile(originalFilePath, content, { encoding: "utf-8" });
|
||||
} catch (error) {
|
||||
console.error(error);
|
||||
throw "unable_to_write_file";
|
||||
}
|
||||
if (!module.exports.isFileSupported(repoConfig, originalFilePath)) {
|
||||
throw "file_not_supported";
|
||||
}
|
||||
await anonymizeUtils.anonymizeFile(
|
||||
originalFilePath,
|
||||
anonymizedFilePath,
|
||||
repoConfig
|
||||
);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
};
|
||||
|
||||
module.exports.getStats = async (options) => {
|
||||
let repoConfig = options.repoConfig;
|
||||
if (!repoConfig) {
|
||||
repoConfig = await repoUtils.getConfig(options.repoId);
|
||||
}
|
||||
|
||||
if (repoConfig == null) {
|
||||
throw "repo_not_found";
|
||||
}
|
||||
if (repoConfig.options.mode != "download") {
|
||||
throw "stats_unsupported";
|
||||
}
|
||||
|
||||
if (repoConfig.loc) {
|
||||
return repoConfig.loc;
|
||||
}
|
||||
|
||||
const repoCache = repoUtils.getOriginalPath(repoConfig.repoId);
|
||||
try {
|
||||
await fs.access(repoCache, ofs.constants.R_OK);
|
||||
} catch (error) {
|
||||
throw "repo_not_found";
|
||||
}
|
||||
const o = await loc(repoCache);
|
||||
delete o.files;
|
||||
await db.get("anonymized_repositories").updateOne(
|
||||
{ repoId: repoConfig.repoId },
|
||||
{
|
||||
$set: {
|
||||
loc: o,
|
||||
},
|
||||
},
|
||||
{ upsert: true }
|
||||
);
|
||||
return o;
|
||||
};
|
||||
@@ -1,75 +0,0 @@
|
||||
const ofs = require("fs");
|
||||
const { OAuthApp } = require("@octokit/oauth-app");
|
||||
|
||||
const db = require("./database");
|
||||
const repoUtils = require("./repository");
|
||||
const fileUtils = require("./file");
|
||||
|
||||
const config = require("../config");
|
||||
|
||||
const app = new OAuthApp({
|
||||
clientType: "github-app",
|
||||
clientId: config.CLIENT_ID,
|
||||
clientSecret: config.CLIENT_SECRET,
|
||||
});
|
||||
|
||||
module.exports.getToken = async (repoConfig) => {
|
||||
if (repoConfig.owner) {
|
||||
const user = await db
|
||||
.get()
|
||||
.collection("users")
|
||||
.findOne(
|
||||
{ username: repoConfig.owner },
|
||||
{ projection: { accessToken: 1 } }
|
||||
);
|
||||
if (user && user.accessToken) {
|
||||
return user.accessToken;
|
||||
}
|
||||
}
|
||||
if (repoConfig.token) {
|
||||
try {
|
||||
await app.checkToken({
|
||||
token: repoConfig.token,
|
||||
});
|
||||
return repoConfig.token;
|
||||
} catch (error) {
|
||||
console.debug("Token is invalid.", error);
|
||||
delete repoConfig.token;
|
||||
}
|
||||
}
|
||||
return config.GITHUB_TOKEN;
|
||||
};
|
||||
|
||||
module.exports.downloadRepoAndAnonymize = async (repoConfig) => {
|
||||
const cachePath = repoUtils.getAnonymizedPath(repoConfig.repoId);
|
||||
const originalPath = repoUtils.getOriginalPath(repoConfig.repoId);
|
||||
if (ofs.existsSync(cachePath) || ofs.existsSync(originalPath)) {
|
||||
return true;
|
||||
}
|
||||
if (repoConfig.options.mode == "download") {
|
||||
// if cache folder does not exist download and anonymize it
|
||||
|
||||
const originalPath = repoUtils.getOriginalPath(repoConfig.repoId);
|
||||
|
||||
await repoUtils.updateStatus(repoConfig, "downloading");
|
||||
await repoUtils.downloadOriginalRepo(repoConfig, originalPath);
|
||||
await repoUtils.updateStatus(repoConfig, "ready");
|
||||
|
||||
// anonymize all the files
|
||||
// await repoUtils.updateStatus(repoConfig, "anonymize");
|
||||
|
||||
// await anonymizeUtils.anonymizeFolder(originalPath, cachePath, repoConfig);
|
||||
// await repoUtils.updateStatus(repoConfig, "anonymized");
|
||||
|
||||
// clean up
|
||||
// await fs.rm(originalPath, { recursive: true, force: true });
|
||||
return true;
|
||||
} else if (repoConfig.options.mode == "stream") {
|
||||
// in stream mode only download the list of file from github
|
||||
await repoUtils.updateStatus(repoConfig, "downloading");
|
||||
await fileUtils.getFileList({ repoConfig });
|
||||
await repoUtils.updateStatus(repoConfig, "ready");
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
};
|
||||
@@ -1,369 +0,0 @@
|
||||
const fs = require("fs").promises;
|
||||
const ofs = require("fs");
|
||||
const path = require("path");
|
||||
const gh = require("parse-github-url");
|
||||
const { Octokit } = require("@octokit/rest");
|
||||
const extract = require("extract-zip");
|
||||
|
||||
const db = require("./database");
|
||||
const githubUtils = require("./github");
|
||||
const config = require("../config");
|
||||
|
||||
module.exports.getPath = (repoId) => {
|
||||
return path.resolve(__dirname, "..", "repositories", repoId);
|
||||
};
|
||||
module.exports.getOriginalPath = (repoId) => {
|
||||
return path.resolve(__dirname, "..", "repositories", repoId, "original");
|
||||
};
|
||||
module.exports.getAnonymizedPath = (repoId) => {
|
||||
return path.resolve(__dirname, "..", "repositories", repoId, "cache");
|
||||
};
|
||||
|
||||
module.exports.getConfig = async (repoId) => {
|
||||
const repo = await db
|
||||
.get()
|
||||
.collection("anonymized_repositories")
|
||||
.findOne(
|
||||
{ repoId },
|
||||
{
|
||||
projection: {
|
||||
// files: 1,
|
||||
token: 1,
|
||||
branch: 1,
|
||||
commit: 1,
|
||||
owner: 1,
|
||||
fullName: 1,
|
||||
repoId: 1,
|
||||
terms: 1,
|
||||
options: 1,
|
||||
loc: 1,
|
||||
status: 1,
|
||||
lastView: 1,
|
||||
},
|
||||
}
|
||||
);
|
||||
|
||||
if (repo && repo.options.expirationDate) {
|
||||
repo.options.expirationDate = new Date(repo.options.expirationDate);
|
||||
repo.lastView = new Date(repo.lastView);
|
||||
}
|
||||
return repo;
|
||||
};
|
||||
|
||||
module.exports.getRepoDetails = async (options) => {
|
||||
const query = {};
|
||||
if (options.fullName) {
|
||||
query.fullName = options.fullName;
|
||||
} else if (options.repoConfig) {
|
||||
query.fullName = options.repoConfig.fullName;
|
||||
options.fullName = query.fullName;
|
||||
} else if (options.owner && options.repo) {
|
||||
query.fullName = `${options.owner}/${options.repo}`;
|
||||
options.fullName = query.fullName;
|
||||
} else {
|
||||
throw "invalid_options";
|
||||
}
|
||||
|
||||
if (options.force !== true) {
|
||||
const repository = await db
|
||||
.get("repositories")
|
||||
.findOne(query, { projection: { readme: 0 } });
|
||||
if (repository && repository.id) return repository;
|
||||
}
|
||||
|
||||
try {
|
||||
const repo = gh(options.fullName);
|
||||
|
||||
const octokit = new Octokit({ auth: options.token });
|
||||
let ghRes = await octokit.repos.get({
|
||||
owner: repo.owner,
|
||||
repo: repo.name,
|
||||
});
|
||||
ghRes.data.fullName = ghRes.data.full_name;
|
||||
if (ghRes.data.fullName != query.fullName) {
|
||||
// repo renamed keep the old name
|
||||
ghRes.data.fullName = query.fullName;
|
||||
}
|
||||
if (ghRes.data.has_pages) {
|
||||
ghPageRes = await octokit.request("GET /repos/{owner}/{repo}/pages", {
|
||||
owner: repo.owner,
|
||||
repo: repo.name,
|
||||
});
|
||||
ghRes.data.pageSource = ghPageRes.data.source;
|
||||
}
|
||||
|
||||
delete ghRes.data.full_name;
|
||||
await db
|
||||
.get("repositories")
|
||||
.updateOne(query, { $set: ghRes.data }, { upsert: true });
|
||||
return ghRes.data;
|
||||
} catch (error) {
|
||||
console.log(query, error);
|
||||
if (error.status == 401 && options.token != config.GITHUB_TOKEN) {
|
||||
options.token = config.GITHUB_TOKEN;
|
||||
return await module.exports.getRepoDetails(options);
|
||||
} else if (error.status == 403) {
|
||||
throw "repo_not_accessible";
|
||||
}
|
||||
throw "repo_not_found";
|
||||
}
|
||||
};
|
||||
|
||||
module.exports.downloadRepoZip = async (repoConfig, target) => {
|
||||
const repo = gh(repoConfig.fullName);
|
||||
|
||||
async function getZip(token) {
|
||||
const octokit = new Octokit({ auth: token });
|
||||
return await octokit.request("GET /repos/{owner}/{repo}/zipball/{ref}", {
|
||||
owner: repo.owner,
|
||||
repo: repo.name,
|
||||
ref: repoConfig.commit,
|
||||
});
|
||||
}
|
||||
let response = null;
|
||||
try {
|
||||
response = await getZip(await githubUtils.getToken(repoConfig));
|
||||
} catch (error) {
|
||||
if (error.status == 401 && config.GITHUB_TOKEN) {
|
||||
try {
|
||||
response = await getZip(config.GITHUB_TOKEN);
|
||||
} catch (error) {
|
||||
throw "repo_not_accessible";
|
||||
}
|
||||
} else {
|
||||
throw "repo_not_accessible";
|
||||
}
|
||||
}
|
||||
|
||||
await fs.mkdir(path.dirname(target), { recursive: true });
|
||||
await fs.writeFile(target, Buffer.from(response.data), {
|
||||
encoding: "binary",
|
||||
});
|
||||
};
|
||||
|
||||
module.exports.updateStatus = async (repoConfig, status, errorMessage) => {
|
||||
repoConfig.status = status;
|
||||
repoConfig.errorMessage = errorMessage;
|
||||
const update = { $set: { status } };
|
||||
if (!errorMessage) {
|
||||
update["$unset"] = { errorMessage: "" };
|
||||
} else {
|
||||
update["$set"].errorMessage = errorMessage;
|
||||
}
|
||||
|
||||
await db
|
||||
.get("anonymized_repositories")
|
||||
.updateOne({ repoId: repoConfig.repoId }, update);
|
||||
};
|
||||
|
||||
module.exports.downloadOriginalRepo = async (repoConfig, destination) => {
|
||||
const zipPath = path.join(
|
||||
module.exports.getPath(repoConfig.repoId),
|
||||
"content.zip"
|
||||
);
|
||||
const destinationZip = destination + "_zip";
|
||||
|
||||
// download the repository and unzip it
|
||||
await module.exports.downloadRepoZip(repoConfig, zipPath);
|
||||
await extract(zipPath, { dir: destinationZip });
|
||||
|
||||
const folders = await fs.readdir(destinationZip);
|
||||
if (ofs.existsSync(destination)) {
|
||||
await fs.rm(destination, {force: true, recursive: true})
|
||||
}
|
||||
fs.rename(path.join(destinationZip, folders[0]), destination);
|
||||
await fs.rm(zipPath);
|
||||
await fs.rm(destinationZip, { recursive: true });
|
||||
};
|
||||
|
||||
module.exports.getAnonymizedRepoDetails = async (repoId, user) => {
|
||||
return db.get("anonymized_repositories").findOne(
|
||||
{
|
||||
repoId,
|
||||
owner: user.username,
|
||||
},
|
||||
{ projection: { token: 0, files: 0, originalFiles: 0, loc: 0 } }
|
||||
);
|
||||
};
|
||||
|
||||
module.exports.getRepoCommit = async (options) => {
|
||||
let repoConfig = options.repoConfig;
|
||||
if (!repoConfig) {
|
||||
repoConfig = await module.exports.getConfig(options.repoId);
|
||||
}
|
||||
|
||||
if (repoConfig == null) {
|
||||
throw "repo_not_found";
|
||||
}
|
||||
|
||||
if (options.force !== true) {
|
||||
const query = { fullName: repoConfig.fullName };
|
||||
query["branches." + repoConfig.branch + ""] = { $exists: true };
|
||||
const repository = await db
|
||||
.get("repositories")
|
||||
.findOne(query, { projection: { branches: 1 } });
|
||||
if (
|
||||
repository &&
|
||||
repository.branches &&
|
||||
repository.branches[repoConfig.branch]
|
||||
)
|
||||
return repository.branches[repoConfig.branch].commit.sha;
|
||||
}
|
||||
const branches = await module.exports.getRepoBranches({
|
||||
repoConfig,
|
||||
token: await githubUtils.getToken(repoConfig),
|
||||
force: options.force,
|
||||
});
|
||||
if (!branches[repoConfig.branch]) {
|
||||
throw "branch_not_found";
|
||||
}
|
||||
return branches[repoConfig.branch].commit.sha;
|
||||
};
|
||||
|
||||
module.exports.getRepoBranches = async (options) => {
|
||||
const query = {};
|
||||
if (options.fullName) {
|
||||
query.fullName = options.fullName;
|
||||
} else if (options.repoConfig) {
|
||||
query.fullName = options.repoConfig.fullName;
|
||||
options.fullName = query.fullName;
|
||||
} else if (options.owner && options.repo) {
|
||||
query.fullName = `${options.owner}/${options.repo}`;
|
||||
options.fullName = query.fullName;
|
||||
} else {
|
||||
throw new Error("Invalid options");
|
||||
}
|
||||
|
||||
if (options.force !== true) {
|
||||
let repository = await db
|
||||
.get("repositories")
|
||||
.findOne(query, { projection: { branches: 1 } });
|
||||
if (repository && repository.branches) return repository.branches;
|
||||
}
|
||||
|
||||
try {
|
||||
const repo = gh(options.fullName);
|
||||
|
||||
const octokit = new Octokit({ auth: options.token });
|
||||
const data = await octokit.paginate(octokit.repos.listBranches, {
|
||||
owner: repo.owner,
|
||||
repo: repo.name,
|
||||
per_page: 100,
|
||||
});
|
||||
|
||||
const branches = {};
|
||||
for (let b of data) {
|
||||
branches[b.name] = b;
|
||||
}
|
||||
await db
|
||||
.get("repositories")
|
||||
.updateOne(query, { $set: { branches } }, { upsert: true });
|
||||
return branches;
|
||||
} catch (error) {
|
||||
if (error.status == 401 && options.token != config.GITHUB_TOKEN) {
|
||||
options.token = config.GITHUB_TOKEN;
|
||||
return await module.exports.getRepoBranches(options);
|
||||
}
|
||||
if (error.status == 404) {
|
||||
throw "repo_not_found";
|
||||
}
|
||||
console.error(error);
|
||||
throw "branches_not_found";
|
||||
}
|
||||
};
|
||||
|
||||
module.exports.getRepoReadme = async (options) => {
|
||||
const query = {};
|
||||
if (options.fullName) {
|
||||
query.fullName = options.fullName;
|
||||
} else if (options.repoConfig) {
|
||||
query.fullName = options.repoConfig.fullName;
|
||||
options.fullName = query.fullName;
|
||||
} else if (options.owner && options.repo) {
|
||||
query.fullName = `${options.owner}/${options.repo}`;
|
||||
options.fullName = query.fullName;
|
||||
} else {
|
||||
throw new Error("Invalid options");
|
||||
}
|
||||
|
||||
if (options.force !== true) {
|
||||
let repository = await db
|
||||
.get("repositories")
|
||||
.findOne(query, { projection: { readme: 1 } });
|
||||
if (repository && repository.readme) return repository.readme;
|
||||
}
|
||||
|
||||
try {
|
||||
const repo = gh(options.fullName);
|
||||
|
||||
const octokit = new Octokit({ auth: options.token });
|
||||
const ghRes = await octokit.repos.getReadme({
|
||||
owner: repo.owner,
|
||||
repo: repo.name,
|
||||
});
|
||||
const readme = new Buffer.from(ghRes.data.content, "base64").toString(
|
||||
"utf-8"
|
||||
);
|
||||
await db
|
||||
.get("repositories")
|
||||
.updateOne(query, { $set: { readme } }, { upsert: true });
|
||||
return readme;
|
||||
} catch (error) {
|
||||
throw "readme_not_available";
|
||||
}
|
||||
};
|
||||
|
||||
module.exports.updateAnonymizedRepository = async (repoConfig) => {
|
||||
if (repoConfig.status == "updating") {
|
||||
throw "repo_is_updating";
|
||||
}
|
||||
repoConfig = await module.exports.getConfig(repoConfig.repoId);
|
||||
if (repoConfig.status == "updating") {
|
||||
throw "repo_is_updating";
|
||||
}
|
||||
// check new commit
|
||||
const commit = await module.exports.getRepoCommit({
|
||||
repoConfig,
|
||||
force: true,
|
||||
});
|
||||
if (commit == repoConfig.commit) {
|
||||
console.log(`${repoConfig.repoId} is up to date`);
|
||||
return true;
|
||||
}
|
||||
repoConfig.commit = commit;
|
||||
console.log(`${repoConfig.repoId} will be updated to ${commit}`);
|
||||
await module.exports.updateStatus(repoConfig, "updating");
|
||||
await db
|
||||
.get("anonymized_repositories")
|
||||
.updateOne({ repoId: repoConfig.repoId }, { $set: { commit } });
|
||||
await module.exports.removeRepository(repoConfig);
|
||||
await githubUtils.downloadRepoAndAnonymize(repoConfig);
|
||||
await module.exports.updateStatus(repoConfig, "ready");
|
||||
};
|
||||
|
||||
module.exports.removeRepository = async (repoConfig) => {
|
||||
try {
|
||||
if (ofs.existsSync(module.exports.getOriginalPath(repoConfig.repoId))) {
|
||||
await fs.rm(module.exports.getOriginalPath(repoConfig.repoId), {
|
||||
recursive: true,
|
||||
force: true,
|
||||
});
|
||||
}
|
||||
if (ofs.existsSync(module.exports.getAnonymizedPath(repoConfig.repoId))) {
|
||||
await fs.rm(module.exports.getAnonymizedPath(repoConfig.repoId), {
|
||||
recursive: true,
|
||||
force: true,
|
||||
});
|
||||
}
|
||||
|
||||
await db
|
||||
.get("anonymized_repositories")
|
||||
.updateOne(
|
||||
{ repoId: repoConfig.repoId },
|
||||
{ $unset: { files: "", originalFiles: "", loc: "" } }
|
||||
);
|
||||
} catch (error) {
|
||||
console.log(error);
|
||||
throw error;
|
||||
}
|
||||
};
|
||||
Reference in New Issue
Block a user