Compare commits

...

89 Commits

Author SHA1 Message Date
tdurieux
696a465d5c 2.2.0 2023-08-28 14:46:54 +02:00
tdurieux
ecfd69bd37 fix: fix zip extract 2023-08-28 14:36:04 +02:00
tdurieux
d8de3f189a Release 2.1.5 2023-08-28 12:12:16 +02:00
tdurieux
f72a662750 chore: update dependencies 2023-08-28 12:12:13 +02:00
tdurieux
2f5d7a1089 fix: improve S3 reliability 2023-08-28 12:11:43 +02:00
tdurieux
92347fbcfb fix: trust proxy parameter 2023-08-28 12:11:43 +02:00
dependabot[bot]
48ae137f96 chore(deps): bump semver from 5.7.1 to 5.7.2 (#224)
Bumps [semver](https://github.com/npm/node-semver) from 5.7.1 to 5.7.2.
- [Release notes](https://github.com/npm/node-semver/releases)
- [Changelog](https://github.com/npm/node-semver/blob/v5.7.2/CHANGELOG.md)
- [Commits](https://github.com/npm/node-semver/compare/v5.7.1...v5.7.2)

---
updated-dependencies:
- dependency-name: semver
  dependency-type: indirect
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2023-08-14 23:11:40 +02:00
tdurieux
84877506a6 fix: fix comments URL API 2023-07-25 12:47:21 +02:00
dependabot[bot]
275d4827a8 chore(deps): bump mongoose from 7.3.1 to 7.3.3 (#226)
Bumps [mongoose](https://github.com/Automattic/mongoose) from 7.3.1 to 7.3.3.
- [Release notes](https://github.com/Automattic/mongoose/releases)
- [Changelog](https://github.com/Automattic/mongoose/blob/master/CHANGELOG.md)
- [Commits](https://github.com/Automattic/mongoose/compare/7.3.1...7.3.3)

---
updated-dependencies:
- dependency-name: mongoose
  dependency-type: direct:production
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2023-07-18 22:27:07 +02:00
tdurieux
9fea119f50 chore: improve job status reporting 2023-07-05 13:43:57 +02:00
tdurieux
68d96ad82e chore: update dependencies 2023-06-26 18:24:28 +02:00
tdurieux
f54b9f355b fix: make the repoId case insensitive 2023-06-26 18:17:49 +02:00
tdurieux
e24d1b4630 update dependencies & remove google analytics 2023-06-20 09:58:25 +02:00
tdurieux
406330d957 improve error handling during the download process 2023-05-31 11:46:20 +02:00
tdurieux
0997e19d3d fix(#208) update notebook renderer for the last version of marked 2023-05-16 14:34:01 +02:00
tdurieux
897426743f force GitHubStream when updating a repository 2023-05-10 08:05:52 +02:00
tdurieux
2f916c6968 fix: auto select file when a folder is selected 2023-05-08 15:39:17 +02:00
tdurieux
027f14ffbc fix: improve error message for folders 2023-05-08 15:16:56 +02:00
tdurieux
4f6c1d25fc fix: always send a response 2023-05-08 14:14:19 +02:00
tdurieux
6c4363182b chore: update decompress-stream-to-s3 2023-05-08 13:57:16 +02:00
tdurieux
66d5d91e3e fix(#206): make sure that all text files are anonimized 2023-05-08 13:56:33 +02:00
tdurieux
deba2b567e fix: change deprecated property for S3 timeout 2023-05-03 08:34:14 +02:00
tdurieux
e5ffad6364 fix(#205): fix encoded urls 2023-05-02 18:31:57 +02:00
tdurieux
f1d6e4534d change default filter for the admin 2023-05-02 18:22:14 +02:00
tdurieux
dde7fa2d72 feat(#204): display videos in md 2023-05-02 18:21:47 +02:00
tdurieux
abddf10c11 fix: fix anonymization of raw links 2023-05-02 16:00:47 +02:00
tdurieux
53ea31008a fix: download the repository if the file is cached 2023-05-02 08:17:46 +02:00
tdurieux
7d8b087a5d fix: improve reqs on non-existing files 2023-05-01 22:45:18 +02:00
tdurieux
a23f089a8a fix: hotfix for config 2023-05-01 14:54:47 +02:00
tdurieux
ee82d3c12a chore: update dependencies and update AWS S3 lib to v3 2023-05-01 14:53:27 +02:00
tdurieux
6226f32471 fix(#203): recursively get all files for downloaded repositories 2023-05-01 09:47:36 +02:00
tdurieux
3bf6864472 feat: improve error management in front end 2023-04-26 00:27:52 +02:00
tdurieux
083026f168 fix: fix content type for typescript 2023-04-26 00:21:49 +02:00
tdurieux
35d796f871 fix: fix content type for typescript 2023-04-26 00:19:26 +02:00
tdurieux
7e2c490e4b feat(#200): save the commit date of the anonymized commit 2023-04-25 23:40:12 +02:00
tdurieux
c9acb7b899 feat: improve response content type header 2023-04-25 17:42:50 +02:00
tdurieux
8ac3a66a30 feat(#169): add emoji support for markdown 2023-04-21 13:23:34 +02:00
tdurieux
3627096e63 feat(#148): add support for Math expression 2023-04-21 12:29:19 +02:00
tdurieux
4293fa01b2 feat: add media player in content view 2023-04-20 23:30:26 +02:00
tdurieux
13e5e35d46 fix(#199): stop content download when request is canceled and always define contentLength 2023-04-20 23:21:39 +02:00
tdurieux
0a021d6e61 fix: fix authentification for PR download 2023-04-20 13:44:35 +02:00
dependabot[bot]
a07c8d4635 chore(deps): bump xml2js from 0.4.19 to 0.5.0 (#196)
Bumps [xml2js](https://github.com/Leonidas-from-XIV/node-xml2js) from 0.4.19 to 0.5.0.
- [Release notes](https://github.com/Leonidas-from-XIV/node-xml2js/releases)
- [Commits](https://github.com/Leonidas-from-XIV/node-xml2js/compare/0.4.19...0.5.0)

---
updated-dependencies:
- dependency-name: xml2js
  dependency-type: indirect
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2023-04-12 10:21:11 +02:00
tdurieux
66341ec410 fix: PR credentials 2023-04-11 09:25:29 +02:00
tdurieux
5d1eb333cf try to resolve unassessible website 2023-04-06 18:25:03 +02:00
tdurieux
9ecfdae9d7 fix: protect against double save 2023-04-06 13:36:36 +02:00
tdurieux
0afcb9733a reenable Github download 2023-04-06 13:17:07 +02:00
tdurieux
3a55a4d5b0 fix: remove quote from sha 2023-04-06 13:09:15 +02:00
tdurieux
e94a5f164a disable download mode for the moment 2023-04-05 12:12:57 +02:00
tdurieux
d29d4281ab fix: fix getUser in rate limit 2023-04-03 11:18:15 +02:00
tdurieux
f8a0315a1d feat: adapt the rate limit to the user 2023-04-03 11:11:08 +02:00
tdurieux
ed0dd82cfb fix: improve error message when too many requests are sent 2023-04-03 10:50:55 +02:00
tdurieux
d3f9e67c62 fix: try to handle duplicate user id error 2023-04-03 10:34:28 +02:00
tdurieux
344ecf2a33 feat: add a flag to know if a repo has been reseted 2023-04-03 10:21:56 +02:00
tdurieux
ef1a2bfa4a feat: check repository size when repo is updated 2023-04-03 10:10:43 +02:00
tdurieux
f1fe8eff14 feat(#171): supports display raw content 2023-03-29 13:05:13 +02:00
tdurieux
38d3e54d0b fix(#186): use a different name for the hostname configuration 2023-03-02 16:40:38 +01:00
tdurieux
74aacd223d fix(#186): use a different name for the hostname configuration 2023-03-02 16:39:01 +01:00
tdurieux
8221b2ee7f feat: dont download notebook if not necessary 2023-02-23 16:21:08 +01:00
tdurieux
c59e202124 feat: list gh repos in user admin 2023-02-22 11:05:37 +01:00
tdurieux
d825cc1d69 fix: fix admin filtering 2023-02-22 10:00:18 +01:00
tdurieux
f3b8860838 fix: fix error message when repository is not found 2023-02-22 09:57:49 +01:00
tdurieux
a558a6c2bd feat: add log in reset state 2023-02-22 09:44:35 +01:00
tdurieux
7422a3a262 fix: save model after the state reset 2023-02-22 09:41:28 +01:00
tdurieux
3c18884de2 fix: fix filter remove button in dashboard 2023-02-22 09:28:27 +01:00
tdurieux
1d4eb7a1b0 fix: fix action menu in the admin 2023-02-22 09:21:14 +01:00
tdurieux
b6049c4ed2 feat: improve the error handling in s3 2023-02-22 08:39:23 +01:00
tdurieux
7dbfdb3056 fix(#181): check if folder exists in S3 2023-02-22 08:00:33 +01:00
Maxim Van de Wynckel
6caca33145 fix(#174): correctly append trailing slash in windows
Windows requires a backslash instead of a forward slash. As this variable is used in a simple string replace, this needs to be platform specific.
2023-02-22 07:44:47 +01:00
tdurieux
8c8f8dbd90 fix: fix user connection 2023-02-16 08:27:03 +01:00
tdurieux
83a9505a11 fix: fix compilation 2023-02-16 08:21:10 +01:00
tdurieux
74d625d6d4 fix: make sure that the user data are uptodate 2023-02-16 08:18:23 +01:00
tdurieux
2b10b10207 fix: check if cache exists before removing it 2023-02-16 08:15:21 +01:00
tdurieux
2a5f22a483 fix: fix new user when it already exists 2023-02-16 08:09:49 +01:00
tdurieux
9cde774273 fix: improve error handling when users conntect 2023-02-16 07:56:50 +01:00
tdurieux
95354292b5 refactor: refactor the queue init function 2023-02-15 20:19:26 +01:00
tdurieux
da194d9d71 fix(#174) improve cli interface to anonimize repositories 2023-02-15 19:23:45 +01:00
tdurieux
fb9bbe105a fix: improve getToken 2023-02-15 19:22:11 +01:00
tdurieux
99f837c3cf fix: fix and optimize Dockerfile 2023-02-13 14:07:57 +01:00
tdurieux
ec6098b3a1 chore: use strict compilation mode 2023-02-13 13:38:57 +01:00
tdurieux
3ab9b0c7a4 fix(#166): fix docker build 2023-02-13 08:15:55 +01:00
tdurieux
cff3636523 remove log 2023-02-08 16:33:06 +01:00
tdurieux
32d1884450 fix: get files for new repos 2023-02-08 16:26:46 +01:00
tdurieux
5c72f54db5 perf: improve the perf of Anonymous GitHub 2023-02-08 15:34:50 +01:00
tdurieux
2e36b72a7f perf: improve the perf of Anonymous GitHub 2023-02-08 09:49:24 +01:00
tdurieux
73f7582fd2 feat: admin to remove repo cache 2023-02-07 13:27:06 +01:00
tdurieux
3eee62d6ad feat: increase slowdown threshold for webview 2023-02-07 09:52:19 +01:00
Thomas Durieux
696b24a648 Update README.md 2023-02-07 09:35:09 +01:00
tdurieux
7c5fcfe069 chore: improve readme 2023-02-07 09:33:42 +01:00
tdurieux
6debb6aa0f fix: do not change repoID 2023-02-07 09:10:15 +01:00
63 changed files with 8250 additions and 3859 deletions

View File

@@ -1,3 +1,5 @@
/repositories
repo/
db_backups
db_backups
build
node_modules
.github

View File

@@ -1,22 +1,25 @@
FROM node:15-slim
FROM node:18-slim
ENV PORT 5000
EXPOSE $PORT
WORKDIR /app
RUN npm install pm2 -g
RUN pm2 install typescript
RUN npm install pm2 -g && pm2 install typescript && npm cache clean --force;
COPY package.json .
COPY package-lock.json .
RUN npm install
COPY tsconfig.json .
COPY ecosystem.config.js .
COPY healthcheck.js .
COPY src .
COPY src ./src
COPY public ./public
COPY index.ts .
COPY public .
COPY config.ts .
RUN npm install && npm run build && npm cache clean --force
CMD [ "pm2-runtime", "ecosystem.config.js"]

View File

@@ -1,46 +1,23 @@
# Anonymous Github
Anonymous Github is a system to anonymize Github repositories before referring to them in a double-anonymous paper submission.
To start using Anonymous Github right now: **[http://anonymous.4open.science/](http://anonymous.4open.science/)**
Anonymous Github is a system that helps anonymize Github repositories for double-anonymous paper submissions. A public instance of Anonymous Github is hosted at https://anonymous.4open.science/.
Indeed, in a double-anonymous review process, the open-science data or code that is in the online appendix must be anonymized, similarly to paper anonymization. The authors must
![screenshot](https://user-images.githubusercontent.com/5577568/217193282-42f608d3-2b46-4ebc-90df-772f248605be.png)
- anonymize URLs: the name of the institution/department/group/authors should not appear in the URLs of the open-science appendix
- anonymize the appendix content itself
Anonymizing an open-science appendix needs some work, but fortunately, this can be automated, this is what Anonymous Github is about.
Anonymous Github anonymizes the following:
Anonymous Github anonymizes:
- Github repository owner, organization, and name
- File and directory names
- File contents of all extensions, including markdown, text, Java, etc.
- the Github owner / organization / repository name
- the content of the repository
- file contents (all extensions, md/txt/java/etc)
- file and directory names
## Usage
Question / Feedback / Bug report: please open an issue in this repository.
### Public instance
## Using Anonymous Github
**https://anonymous.4open.science/**
## How to create a new anonymized repository
To use it, open the main page (e.g., [http://anonymous.4open.science/](http://anonymous.4open.science/)), login with GitHub, and click on "Anonymize".
Simply fill 1. the Github repo URL and 2. the id of the anonymized repository, 3. the terms to anonymize (which can be updated afterward).
The anonymization of the content is done by replacing all occurrences of words in a list by "XXXX" (can be changed in the configuration).
The word list is provided by the authors, and typically contains the institution name, author names, logins, etc...
The README is anonymized as well as all files of the repository. Even filenames are anonymized.
In a paper under double-anonymous review, instead of putting a link to Github, one puts a link to the Anonymous Github instance (e.g.
<http://anonymous.4open.science/r/840c8c57-3c32-451e-bf12-0e20be300389/> which is an anonymous version of this repo).
To start using Anonymous Github right now, a public instance of anonymous_github is hosted at 4open.science:
**[http://anonymous.4open.science/](http://anonymous.4open.science/)**
## What is the scope of anonymization?
In double-anonymous peer-review, the boundary of anonymization is the paper plus its online appendix, and only this, it's not the whole world. Googling any part of the paper or the online appendix can be considered as a deliberate attempt to break anonymity ([explanation](http://www.monperrus.net/martin/open-science-double-anonymous))
## CLI
### CLI
This CLI tool allows you to anonymize your GitHub repositories locally, generating an anonymized zip file based on your configuration settings.
@@ -51,13 +28,10 @@ npm install -g @tdurieux/anonymous_github
# Run the Anonymous GitHub CLI tool
anonymous_github
```
## How does it work?
Anonymous Github either download the complete repository and anonymize the content of the file or proxy the request to GitHub. In both case, the original and anonymized versions of the file are cached on the server.
### Own instance
## Installing Anonymous Github
1. Clone the repository
#### 1. Clone the repository
```bash
git clone https://github.com/tdurieux/anonymous_github/
@@ -65,9 +39,9 @@ cd anonymous_github
npm i
```
2. Configure the Github token
#### 2. Configure the GitHub token
Create a file `.env` that contains
Create a `.env` file with the following contents:
```env
GITHUB_TOKEN=<GITHUB_TOKEN>
@@ -79,19 +53,27 @@ DB_PASSWORD=
AUTH_CALLBACK=http://localhost:5000/github/auth,
```
`GITHUB_TOKEN` can be generated here: https://github.com/settings/tokens/new with `repo` scope.
`CLIENT_ID` and `CLIENT_SECRET` are the tokens are generated when you create a new GitHub app https://github.com/settings/applications/new.
The callback of the GitHub app needs to be defined as `https://<host>/github/auth` (the same as defined in AUTH_CALLBACK).
- `GITHUB_TOKEN` can be generated here: https://github.com/settings/tokens/new with `repo` scope.
- `CLIENT_ID` and `CLIENT_SECRET` are the tokens are generated when you create a new GitHub app https://github.com/settings/applications/new.
- The callback of the GitHub app needs to be defined as `https://<host>/github/auth` (the same as defined in AUTH_CALLBACK).
3. Run Anonymous Github
#### 3. Start Anonymous Github server
```bash
docker-compose up -d
```
4. Go to Anonymous Github
#### 4. Go to Anonymous Github
By default, Anonymous Github uses port 5000. It can be changed in `docker-compose.yml`.
Go to http://localhost:5000. By default, Anonymous Github uses port 5000. It can be changed in `docker-compose.yml`. I would recommand to put Anonymous GitHub behind ngnix to handle the https certificates.
## What is the scope of anonymization?
In double-anonymous peer-review, the boundary of anonymization is the paper plus its online appendix, and only this, it's not the whole world. Googling any part of the paper or the online appendix can be considered as a deliberate attempt to break anonymity ([explanation](https://www.monperrus.net/martin/open-science-double-blind))
## How does it work?
Anonymous Github either download the complete repository and anonymize the content of the file or proxy the request to GitHub. In both case, the original and anonymized versions of the file are cached on the server.
## Related tools
@@ -102,3 +84,4 @@ By default, Anonymous Github uses port 5000. It can be changed in `docker-compos
## See also
- [Open-science and double-anonymous Peer-Review](https://www.monperrus.net/martin/open-science-double-blind)
- [ACM Policy on Double-Blind Reviewing](https://dl.acm.org/journal/tods/DoubleBlindPolicy)

60
cli.ts
View File

@@ -2,18 +2,20 @@
import { config as dot } from "dotenv";
dot();
process.env.STORAGE = "filesystem";
import { writeFile } from "fs/promises";
import { join } from "path";
import { tmpdir } from "os";
import * as gh from "parse-github-url";
import * as inquirer from "inquirer";
import server from "./src/server";
import config from "./config";
import GitHubDownload from "./src/source/GitHubDownload";
import Repository from "./src/Repository";
import AnonymizedRepositoryModel from "./src/database/anonymizedRepositories/anonymizedRepositories.model";
import { getRepositoryFromGitHub } from "./src/source/GitHubRepository";
function generateRandomFileName(size: number) {
const characters =
@@ -44,18 +46,47 @@ async function main() {
name: "terms",
message: `Terms to remove from your repository (separated with comma).`,
},
{
type: "string",
name: "output",
message: `The output folder where to save the zipped repository.`,
default: process.cwd(),
},
]);
const ghURL = gh(inq.repo) || { owner: "", name: "", branch: "", commit: "" };
const ghURL = gh(inq.repo) || {
owner: undefined,
name: undefined,
branch: undefined,
commit: undefined,
};
if (!ghURL.owner || !ghURL.name) {
throw new Error("Invalid GitHub URL");
}
const ghRepo = await getRepositoryFromGitHub({
accessToken: inq.token,
owner: ghURL.owner,
repo: ghURL.name,
});
const branches = await ghRepo.branches({
accessToken: inq.token,
force: true,
});
const branchToFind = inq.repo.includes(ghURL.branch)
? ghURL.branch
: ghRepo.model.defaultBranch || "master";
const branch = branches.find((b) => b.name === branchToFind);
const repository = new Repository(
new AnonymizedRepositoryModel({
repoId: "test",
repoId: `${ghURL.name}-${branch?.name}`,
source: {
type: "GitHubDownload",
accessToken: inq.token,
branch: ghURL.branch || "master",
commit: ghURL.branch || "HEAD",
branch: branchToFind,
commit: branch?.commit || "HEAD",
repositoryName: `${ghURL.owner}/${ghURL.name}`,
},
options: {
@@ -71,27 +102,20 @@ async function main() {
})
);
const source = new GitHubDownload(
{
type: "GitHubDownload",
accessToken: inq.token,
repositoryName: inq.repo,
},
repository
console.info(
`[INFO] Downloading repository: ${repository.model.source.repositoryName} from branch ${repository.model.source.branch} and commit ${repository.model.source.commit}...`
);
console.info("[INFO] Downloading repository...");
await source.download(inq.token);
const outputFileName = join(tmpdir(), generateRandomFileName(8) + ".zip");
await (repository.source as GitHubDownload).download(inq.token);
const outputFileName = join(inq.output, generateRandomFileName(8) + ".zip");
console.info("[INFO] Anonymizing repository and creation zip file...");
await writeFile(outputFileName, repository.zip());
await writeFile(outputFileName, await repository.zip());
console.log(`Anonymized repository saved at ${outputFileName}`);
}
if (require.main === module) {
if (process.argv[2] == "server") {
// start the server
require("./src/server").default();
server();
} else {
// use the cli interface
main();

View File

@@ -1,6 +1,7 @@
import { resolve } from "path";
interface Config {
SESSION_SECRET: string;
REDIS_PORT: number;
REDIS_HOSTNAME: string;
CLIENT_ID: string;
@@ -19,7 +20,7 @@ interface Config {
ENABLE_DOWNLOAD: boolean;
ANONYMIZATION_MASK: string;
PORT: number;
HOSTNAME: string;
APP_HOSTNAME: string;
DB_USERNAME: string;
DB_PASSWORD: string;
DB_HOSTNAME: string;
@@ -35,6 +36,7 @@ interface Config {
RATE_LIMIT: number;
}
const config: Config = {
SESSION_SECRET: "SESSION_SECRET",
CLIENT_ID: "CLIENT_ID",
CLIENT_SECRET: "CLIENT_SECRET",
GITHUB_TOKEN: "",
@@ -50,7 +52,7 @@ const config: Config = {
PORT: 5000,
TRUST_PROXY: 1,
RATE_LIMIT: 350,
HOSTNAME: "anonymous.4open.science",
APP_HOSTNAME: "anonymous.4open.science",
DB_USERNAME: "admin",
DB_PASSWORD: "password",
DB_HOSTNAME: "mongodb",
@@ -68,11 +70,11 @@ const config: Config = {
"in",
],
STORAGE: "filesystem",
S3_BUCKET: null,
S3_CLIENT_ID: null,
S3_CLIENT_SECRET: null,
S3_ENDPOINT: null,
S3_REGION: null,
S3_BUCKET: process.env.S3_BUCKET,
S3_CLIENT_ID: process.env.S3_CLIENT_ID,
S3_CLIENT_SECRET: process.env.S3_CLIENT_SECRET,
S3_ENDPOINT: process.env.S3_ENDPOINT,
S3_REGION: process.env.S3_REGION,
};
for (let conf in process.env) {

View File

@@ -10,8 +10,6 @@ services:
environment:
- REDIS_HOSTNAME=redis
- DB_HOSTNAME=mongodb
volumes:
- .:/app
ports:
- $PORT:$PORT
healthcheck:
@@ -28,12 +26,19 @@ services:
redis:
image: "redis:alpine"
restart: always
healthcheck:
test:
- CMD
- redis-cli
- ping
interval: 10s
timeout: 10s
retries: 5
mongodb:
image: mongo:latest
restart: on-failure
ports:
- "127.0.0.1:27017:27017"
environment:
MONGO_INITDB_ROOT_USERNAME: $DB_USERNAME
MONGO_INITDB_ROOT_PASSWORD: $DB_PASSWORD

View File

@@ -2,9 +2,9 @@ module.exports = {
apps: [
{
name: "AnonymousGitHub",
script: "./index.ts",
script: "build/index.js",
exec_mode: "fork",
watch: true,
watch: false,
ignore_watch: [
"node_modules",
"repositories",
@@ -12,10 +12,9 @@ module.exports = {
"public",
".git",
"db_backups",
"dist",
"build",
],
interpreter: "node",
interpreter_args: "--require ts-node/register",
},
],
};

7023
package-lock.json generated

File diff suppressed because it is too large Load Diff

View File

@@ -1,6 +1,6 @@
{
"name": "@tdurieux/anonymous_github",
"version": "2.1.1",
"version": "2.2.0",
"description": "Anonymise Github repositories for double-anonymous reviews",
"bin": {
"anonymous_github": "build/cli.js"
@@ -10,7 +10,7 @@
"start": "node --inspect=5858 -r ts-node/register ./index.ts",
"dev": "nodemon --transpile-only index.ts",
"migrateDB": "ts-node --transpile-only migrateDB.ts",
"build": "tsc"
"build": "rm -rf build && tsc"
},
"repository": {
"type": "git",
@@ -30,58 +30,61 @@
"build"
],
"dependencies": {
"@octokit/oauth-app": "^4.1.0",
"@octokit/rest": "^19.0.5",
"@aws-sdk/client-s3": "^3.374.0",
"@aws-sdk/node-http-handler": "^3.374.0",
"@octokit/oauth-app": "^6.0.0",
"@octokit/plugin-paginate-rest": "^8.0.0",
"@octokit/rest": "^20.0.1",
"@pm2/io": "^5.0.0",
"archiver": "^5.3.1",
"aws-sdk": "^2.1238.0",
"bullmq": "^2.3.2",
"compression": "^1.7.4",
"connect-redis": "^6.1.3",
"decompress-stream-to-s3": "^1.3.1",
"connect-redis": "^7.0.1",
"decompress-stream-to-s3": "^2.1.1",
"dotenv": "^16.0.3",
"express": "^4.18.2",
"express-rate-limit": "^6.6.0",
"express-rate-limit": "^6.8.0",
"express-session": "^1.17.3",
"express-slow-down": "^1.5.0",
"express-slow-down": "^1.6.0",
"got": "^11.8.5",
"inquirer": "^8.2.5",
"istextorbinary": "^6.0.0",
"marked": "^4.1.1",
"marked": "^5.1.2",
"mime-types": "^2.1.35",
"mongoose": "^6.6.7",
"node-schedule": "^2.1.0",
"mongoose": "^7.4.1",
"node-schedule": "^2.1.1",
"parse-github-url": "^1.0.2",
"passport": "^0.6.0",
"passport-github2": "^0.1.12",
"rate-limit-redis": "^3.0.1",
"redis": "^4.3.1",
"textextensions": "^5.15.0",
"ts-custom-error": "^3.3.0",
"rate-limit-redis": "^3.0.2",
"redis": "^4.6.7",
"textextensions": "^5.16.0",
"ts-custom-error": "^3.3.1",
"unzip-stream": "^0.3.1",
"xml-flow": "^1.0.4"
},
"devDependencies": {
"@types/archiver": "^5.3.1",
"@types/compression": "^1.7.1",
"@types/connect-redis": "^0.0.18",
"@types/connect-redis": "^0.0.20",
"@types/express": "^4.17.14",
"@types/express-rate-limit": "^6.0.0",
"@types/express-session": "^1.17.5",
"@types/express-slow-down": "^1.3.2",
"@types/got": "^9.6.12",
"@types/inquirer": "^8.0.0",
"@types/marked": "^4.0.7",
"@types/marked": "^5.0.1",
"@types/mime-types": "^2.1.0",
"@types/node-schedule": "^2.1.0",
"@types/parse-github-url": "^1.0.0",
"@types/passport": "^1.0.11",
"@types/passport-github2": "^1.2.5",
"@types/rate-limit-redis": "^1.7.4",
"@types/tar-fs": "^2.0.1",
"@types/unzip-stream": "^0.3.1",
"@types/xml-flow": "^1.0.1",
"chai": "^4.3.6",
"mocha": "^10.1.0",
"nodemon": "^3.0.1",
"ts-node": "^10.9.1",
"typescript": "^4.8.4"
},

View File

@@ -133,6 +133,12 @@ a:hover {
color: var(--link-hover-color);
}
.markdown-body .emoji {
height: 1.3em;
margin: 0;
vertical-align: -0.1em;
}
.navbar {
background: var(--header-bg-color) !important;
}

View File

@@ -1,5 +1,8 @@
{
"ERRORS": {
"unknown_error": "Unknown error, contact the admin.",
"unreachable": "Anonymous GitHub is unreachable, contact the admin.",
"request_error": "Unable to download the file, check your connection or contact the admin.",
"repo_not_found": "The repository is not found.",
"repo_not_accessible": "Anonymous GitHub is unable to or is forbidden to access the repository.",
"repository_expired": "The repository is expired",

View File

@@ -50,19 +50,23 @@
<script src="/script/external/angular-translate-loader-static-files.min.js"></script>
<script src="/script/external/angular-sanitize.min.js"></script>
<script src="/script/external/angular-route.min.js"></script>
<script src="/script/external/ana.min.js"></script>
<script src="/script/external/jquery-3.4.1.min.js"></script>
<script src="/script/external/popper.min.js"></script>
<script src="/script/external/bootstrap.min.js"></script>
<!-- PDF -->
<script src="/script/external/pdf.compat.js"></script>
<script src="/script/external/pdf.js"></script>
<!-- Code -->
<script src="/script/external/ace/ace.js"></script>
<script src="/script/external/ui-ace.min.js"></script>
<script src="/script/langColors.js"></script>
<!-- Notebook -->
<script src="/script/external/github-emojis.js"></script>
<script src="/script/external/marked-emoji.js"></script>
<script src="/script/external/marked.min.js"></script>
<script src="/script/external/purify.min.js"></script>
<script src="/script/external/ansi_up.min.js"></script>

View File

@@ -148,5 +148,79 @@
There is no job to display.
</li>
</ul>
<h1>Remove Cache</h1>
<ul class="p-0 m-0 w-100">
<li
class="col-12 d-flex px-0 py-3 border-bottom color-border-secondary"
ng-repeat="job in removeCaches as filteredRemoveCache"
>
<div class="w-100">
<div class="">
<h3>
<a target="__blank" ng-href="/r/{{job.id}}" ng-bind="job.id"></a>
<span class="badge" ng-bind="job.progress.status | title"></span>
</h3>
</div>
<div class="color-text-secondary mb-1">
<span ng-if="job.timestamp">
Created on:
<span ng-bind="job.timestamp | humanTime"></span>
</span>
<span ng-if="job.finishedOn">
Finished on:
<span ng-bind="job.finishedOn | humanTime"></span>
</span>
<span ng-if="job.processedOn">
Processed on:
<span ng-bind="job.processedOn | humanTime"></span>
</span>
</div>
<div>
<pre
ng-repeat="stack in job.stacktrace track by $index"
><code ng-bind="stack"></code></pre>
</div>
</div>
<div class="d-flex">
<div class="dropdown">
<button
class="btn black_border dropdown-toggle btn-sm"
type="button"
id="dropdownMenuButton"
data-toggle="dropdown"
aria-haspopup="true"
aria-expanded="false"
>
Actions
</button>
<div class="dropdown-menu" aria-labelledby="dropdownMenuButton">
<a
class="dropdown-item"
href="#"
ng-click="removeJob('remove', job)"
>
<i class="fas fa-trash-alt"></i> Remove
</a>
<a
class="dropdown-item"
href="#"
ng-click="retryJob('remove', job)"
>
<i class="fas fa-sync"></i> Retry
</a>
<a class="dropdown-item" href="/anonymize/{{job.id}}">
<i class="far fa-edit" aria-hidden="true"></i> Edit
</a>
</div>
</div>
</div>
</li>
<li
class="col-12 d-flex px-0 py-3 border-bottom color-border-secondary"
ng-if="filteredRemoveCache.length == 0"
>
There is no job to display.
</li>
</ul>
</div>
</div>

View File

@@ -313,6 +313,9 @@
Actions
</button>
<div class="dropdown-menu" aria-labelledby="dropdownMenuButton">
<a class="dropdown-item" href="#" ng-click="removeCache(repo)">
<i class="fas fa-trash-alt"></i> Remove Cache
</a>
<a class="dropdown-item" href="/anonymize/{{repo.repoId}}">
<i class="far fa-edit" aria-hidden="true"></i> Edit
</a>

View File

@@ -1,9 +1,15 @@
<div class="container page">
<div class="row">
<h1>
<img ng-src="{{userInfo.photo}}" ng-if="userInfo.photo" width="30" height="30" class="rounded-circle ng-scope">
<img
ng-src="{{userInfo.photo}}"
ng-if="userInfo.photo"
width="30"
height="30"
class="rounded-circle ng-scope"
/>
{{userInfo.username}}
<span class="badge"><span ng-bind="userInfo.status | title"></span>
<span class="badge"><span ng-bind="userInfo.status | title"></span></span>
</h1>
<div class="row mb-3 m-0 py-2 border">
<div class="col-2 font-weight-bold">ID</div>
@@ -16,12 +22,47 @@
<div class="col-10">{{userInfo.accessTokens.github}}</div>
<div class="col-2 font-weight-bold">Github</div>
<div class="col-10"><a ng-href="https://github.com/{{userInfo.username}}">{{userInfo.username}}</a></div>
<div class="col-10">
<a ng-href="https://github.com/{{userInfo.username}}"
>{{userInfo.username}}</a
>
</div>
<div class="col-2 font-weight-bold">Github Repositories</div>
<div class="col-10">{{userInfo.repositories.length}}</a></div>
<div class="col-10" ng-click="showRepos =!showRepos">
{{userInfo.repositories.length}}
</div>
<button
class="btn btn-primary m-1 mx-3"
ng-click="getGitHubRepositories()"
>
Regresh Repositories
</button>
<ul class="m-0 col-12" ng-if="showRepos">
<li
class="col-12 d-flex px-0 py-3 border-bottom color-border-secondary"
ng-repeat="repo in userInfo.repositories"
>
<div class="w-100">
<div class="">
{{repo.name}}
</div>
<div class="color-text-secondary mt-2">
<span
class="ml-0 mr-3"
title="Size: {{::repo.size | humanFileSize}}"
data-toggle="tooltip"
data-placement="bottom"
>
<i class="fas fa-database"></i> {{::repo.size |
humanFileSize}}</span
>
</div>
</div>
</li>
</ul>
</div>
<h3>Repositories {{repositories.length}}</h3>
<div class="border-bottom color-border-secondary py-3 w-100">
<div class="d-flex flex-items-start w-100">
@@ -245,6 +286,64 @@
>
</div>
</div>
<div class="d-flex">
<div class="dropdown">
<button
class="btn black_border dropdown-toggle btn-sm"
type="button"
id="dropdownMenuButton"
data-toggle="dropdown"
aria-haspopup="true"
aria-expanded="false"
>
Actions
</button>
<div class="dropdown-menu" aria-labelledby="dropdownMenuButton">
<a class="dropdown-item" href="#" ng-click="removeCache(repo)">
<i class="fas fa-trash-alt"></i> Remove Cache
</a>
<a class="dropdown-item" href="/anonymize/{{repo.repoId}}">
<i class="far fa-edit" aria-hidden="true"></i> Edit
</a>
<a
class="dropdown-item"
href="#"
ng-show="repo.status == 'ready' || repo.status == 'error'"
ng-click="updateRepository(repo)"
>
<i class="fas fa-sync"></i> Force update
</a>
<a
class="dropdown-item"
href="#"
ng-show="repo.status == 'removed'"
ng-click="updateRepository(repo)"
>
<i class="fas fa-check-circle"></i>
Enable
</a>
<a
class="dropdown-item"
href="#"
ng-show="repo.status == 'ready'"
ng-click="removeRepository(repo)"
>
<i class="fas fa-trash-alt"></i> Remove
</a>
<a class="dropdown-item" href="/r/{{repo.repoId}}/">
<i class="fa fa-eye" aria-hidden="true"></i> View Repo
</a>
<a
class="dropdown-item"
href="/w/{{repo.repoId}}/"
target="_self"
ng-if="repo.options.page && repo.status == 'ready'"
>
<i class="fas fa-globe"></i> View Page
</a>
</div>
</div>
</div>
</li>
<li
class="col-12 d-flex px-0 py-3 border-bottom color-border-secondary"

View File

@@ -13,14 +13,9 @@
name="anonymize"
novalidate
>
<h5 class="card-title">Anonymize a repository</h5>
<h6 class="card-subtitle mb-2 text-muted">
Fill the information to anonymize! It will only take 5min.
</h6>
<h2>Source</h2>
<h3 class="card-title mb-3">Anonymize your repository</h3>
<!-- repoUrl -->
<div class="form-group">
<label for="repoUrl">Type the url of your repository</label>
<div class="form-group mb-0">
<input
type="text"
class="form-control"
@@ -28,6 +23,7 @@
id="repoUrl"
ng-class="{'is-invalid': anonymize.repoUrl.$invalid}"
ng-model="repoUrl"
placeholder="URL of your GitHub repository"
ng-model-options="{ debounce: {default: 1000, blur: 0, click: 0}, updateOn: 'default blur click' }"
ng-change="repoSelected()"
/>
@@ -58,37 +54,6 @@
{{repoUrl}} is already anonymized
</div>
</div>
<!-- select repo -->
<div class="form-group" ng-hide="repoUrl">
<label for="repositories">Or select one of your repository</label>
<div class="input-group mb-3">
<select
class="form-control"
id="repositories"
name="repositories"
ng-model="repoUrl"
ng-change="repoSelected()"
>
<option selected value="">None</option>
<option
ng-repeat="repo in repositories|orderBy:'fullName'"
value="https://github.com/{{ repo.fullName }}"
ng-bind="repo.fullName"
></option>
</select>
<div class="input-group-append">
<button
class="btn btn-outline-secondary"
ng-click="getRepositories(true)"
title="Refresh!"
data-toggle="tooltip"
data-placement="bottom"
>
<i class="fa fa-undo"></i>
</button>
</div>
</div>
</div>
<div ng-show="repoUrl">
<!-- Branch -->
<div class="form-group">
@@ -386,29 +351,6 @@
>Display Notebooks</label
>
</div>
<div class="form-group">
<label for="mode">Proxy mode</label>
<select
class="form-control"
id="mode"
name="mode"
ng-model="source.type"
>
<option value="GitHubStream" selected>Stream</option>
<option value="GitHubDownload">Download</option>
</select>
<small class="form-text text-muted"
>How the repository will be anonymized. Stream mode
will request the content on the flight. This is the
only option for repositories bigger than
{{site_options.MAX_REPO_SIZE * 1024| humanFileSize}}.
This repository is {{details.size * 8 *1024 |
humanFileSize}}. Download will download the repository
the repository on the anonymous.4open.science server,
it is faster and offer more features.</small
>
</div>
</div>
<div class="form-group">
<div class="form-check">

View File

@@ -242,7 +242,7 @@
<div class="d-flex d-inline-flex mt-2">
<div class="alert alert-info alert-dismissible my-0 ml-1" ng-show="!v" role="alert" ng-repeat="(f, v) in filters.status">
<strong>{{f | title}}</strong>
<button type="button" class="close" data-dismiss="alert" aria-label="Close">
<button type="button" class="close" data-dismiss="alert" aria-label="Close" ng-click="filters.status[f] = true;">
<span aria-hidden="true">&times;</span>
</button>
</div>

View File

@@ -3,7 +3,12 @@
<div class="leftCol shadow p-1 overflow-auto" ng-show="files">
<tree class="files" file="files"></tree>
<div class="bottom column">
<div class="last-update">
<div
class="last-update"
data-toggle="tooltip"
data-placement="top"
title="{{options.lastUpdateDate}}"
>
Last Update: {{options.lastUpdateDate|date}}
</div>
</div>
@@ -20,6 +25,12 @@
ng-href="{{url}}"
target="__self"
class="btn btn-outline-primary btn-sm"
>View raw</a
>
<a
ng-href="{{url}}?download=true"
target="__self"
class="btn btn-outline-primary btn-sm"
>Download file</a
>
<a

View File

@@ -2,6 +2,7 @@
<div ng-if="type == 'html'" ng-bind-html="content" class="file-content markdown-body"></div>
<div ng-if="type == 'code' && content != null" ui-ace="aceOption" ng-model="content"></div>
<img ng-if="type == 'image'" class="image-content" ng-src="{{url}}"></img>
<iframe class="h-100 overflow-auto w-100 b-0" ng-if="type == 'media'" ng-src="{{url}}"></iframe>
<div class="h-100 overflow-auto" ng-if="type == 'pdf'">
<pdfviewer class="h-100 overflow-auto" src="{{url}}" id="viewer"></pdfviewer>
</div>

View File

@@ -20,22 +20,56 @@ angular
$scope.query = {
page: 1,
limit: 25,
sort: "source.repositoryName",
sort: "lastView",
search: "",
ready: true,
expired: true,
removed: true,
ready: false,
expired: false,
removed: false,
error: true,
preparing: true,
};
$scope.removeCache = (repo) => {
$http.delete("/api/admin/repos/" + repo.repoId).then(
(res) => {
$scope.$apply();
},
(err) => {
console.error(err);
}
);
};
$scope.updateRepository = (repo) => {
const toast = {
title: `Refreshing ${repo.repoId}...`,
date: new Date(),
body: `The repository ${repo.repoId} is going to be refreshed.`,
};
$scope.toasts.push(toast);
repo.s;
$http.post(`/api/repo/${repo.repoId}/refresh`).then(
(res) => {
if (res.data.status == "ready") {
toast.title = `${repo.repoId} is refreshed.`;
} else {
toast.title = `Refreshing of ${repo.repoId}.`;
}
},
(error) => {
toast.title = `Error during the refresh of ${repo.repoId}.`;
toast.body = error.body;
}
);
};
function getRepositories() {
$http.get("/api/admin/repos", { params: $scope.query }).then(
(res) => {
$scope.total = res.data.total;
$scope.totalPage = Math.ceil(res.data.total / $scope.query.limit);
$scope.repositories = res.data.results;
$scope.$apply();
},
(err) => {
console.error(err);
@@ -138,7 +172,7 @@ angular
return false;
};
function getUserRepositories(username) {
$http.get("/api/admin/users/" + username + "/repos", {}).then(
(res) => {
@@ -162,6 +196,51 @@ angular
getUser($routeParams.username);
getUserRepositories($routeParams.username);
$scope.removeCache = (repo) => {
$http.delete("/api/admin/repos/" + repo.repoId).then(
(res) => {
$scope.$apply();
},
(err) => {
console.error(err);
}
);
};
$scope.updateRepository = (repo) => {
const toast = {
title: `Refreshing ${repo.repoId}...`,
date: new Date(),
body: `The repository ${repo.repoId} is going to be refreshed.`,
};
$scope.toasts.push(toast);
repo.s;
$http.post(`/api/repo/${repo.repoId}/refresh`).then(
(res) => {
if (res.data.status == "ready") {
toast.title = `${repo.repoId} is refreshed.`;
} else {
toast.title = `Refreshing of ${repo.repoId}.`;
}
},
(error) => {
toast.title = `Error during the refresh of ${repo.repoId}.`;
toast.body = error.body;
}
);
};
$scope.getGitHubRepositories = (force) => {
$http
.get(`/api/user/${$scope.userInfo.username}/all_repositories`, {
params: { force: "1" },
})
.then((res) => {
$scope.userInfo.repositories = res.data;
});
};
let timeClear = null;
$scope.$watch(
"query",
@@ -247,6 +326,7 @@ angular
(res) => {
$scope.downloadJobs = res.data.downloadQueue;
$scope.removeJobs = res.data.removeQueue;
$scope.removeCaches = res.data.cacheQueue;
},
(err) => {
console.error(err);

View File

@@ -5,17 +5,9 @@ angular
"ui.ace",
"ngPDFViewer",
"pascalprecht.translate",
"angular-google-analytics",
"admin",
])
.config(function (
$routeProvider,
$locationProvider,
$translateProvider,
AnalyticsProvider
) {
AnalyticsProvider.setAccount("UA-5954162-28");
.config(function ($routeProvider, $locationProvider, $translateProvider) {
$translateProvider.useStaticFilesLoader({
prefix: "/i18n/locale-",
suffix: ".json",
@@ -142,7 +134,6 @@ angular
});
$locationProvider.html5Mode(true);
})
.run(["Analytics", function (Analytics) {}])
.filter("humanFileSize", function () {
return function humanFileSize(bytes, si = false, dp = 1) {
const thresh = si ? 1000 : 1024;
@@ -259,7 +250,7 @@ angular
},
link: function (scope, elem, attrs) {
function update() {
elem.html(marked(scope.content, { baseUrl: $location.url() }));
elem.html(renderMD(scope.content, $location.url()));
}
scope.$watch(attrs.terms, update);
scope.$watch("terms", update);
@@ -415,18 +406,30 @@ angular
restrict: "E",
scope: { file: "=" },
controller: function ($element, $scope, $http) {
function renderNotebookJSON(json) {
const notebook = nb.parse(json);
try {
$element.html("");
$element.append(notebook.render());
Prism.highlightAll();
} catch (error) {
$element.html("Unable to render the notebook.");
}
}
function render() {
if (!$scope.file) return;
$http.get($scope.file).then((res) => {
var notebook = nb.parse(res.data);
if ($scope.$parent.content) {
try {
var rendered = notebook.render();
$element.append(rendered);
Prism.highlightAll();
renderNotebookJSON(JSON.parse($scope.$parent.content));
} catch (error) {
$element.html("Unable to render the notebook.");
$element.html(
"Unable to render the notebook invalid notebook format."
);
}
});
} else if ($scope.file) {
$http
.get($scope.file.download_url)
.then((res) => renderNotebookJSON(res.data));
}
}
$scope.$watch("file", (v) => {
render();
@@ -508,7 +511,6 @@ angular
$http.get("/api/user").then(
(res) => {
if (res) $scope.user = res.data;
getQuota();
},
() => {
$scope.user = null;
@@ -528,22 +530,6 @@ angular
);
}
getOptions();
function getQuota() {
$http.get("/api/user/quota").then((res) => {
$scope.quota = res.data;
$scope.quota.storage.percent = $scope.quota.storage.total
? ($scope.quota.storage.used * 100) / $scope.quota.storage.total
: 100;
$scope.quota.file.percent = $scope.quota.file.total
? ($scope.quota.file.used * 100) / $scope.quota.file.total
: 100;
$scope.quota.repository.percent = $scope.quota.repository.total
? ($scope.quota.repository.used * 100) /
$scope.quota.repository.total
: 100;
}, console.error);
}
getQuota();
function getMessage() {
$http.get("/api/message").then(
@@ -703,6 +689,23 @@ angular
};
$scope.orderBy = "-anonymizeDate";
function getQuota() {
$http.get("/api/user/quota").then((res) => {
$scope.quota = res.data;
$scope.quota.storage.percent = $scope.quota.storage.total
? ($scope.quota.storage.used * 100) / $scope.quota.storage.total
: 100;
$scope.quota.file.percent = $scope.quota.file.total
? ($scope.quota.file.used * 100) / $scope.quota.file.total
: 100;
$scope.quota.repository.percent = $scope.quota.repository.total
? ($scope.quota.repository.used * 100) /
$scope.quota.repository.total
: 100;
}, console.error);
}
getQuota();
function getRepositories() {
$http.get("/api/user/anonymized_repositories").then(
(res) => {
@@ -987,9 +990,7 @@ angular
$scope.terms = "";
$scope.defaultTerms = "";
$scope.branches = [];
$scope.repositories = [];
$scope.source = {
type: "GitHubStream",
branch: "",
commit: "",
};
@@ -1067,17 +1068,6 @@ angular
}
});
$scope.getRepositories = (force) => {
$http
.get("/api/user/all_repositories", {
params: { force: force === true ? "1" : "0" },
})
.then((res) => {
$scope.repositories = res.data;
});
};
$scope.getRepositories();
$scope.repoSelected = async () => {
$scope.terms = $scope.defaultTerms;
$scope.repoId = "";
@@ -1164,15 +1154,9 @@ angular
resetValidity();
const res = await $http.get(`/api/repo/${o.owner}/${o.repo}/`);
$scope.details = res.data;
if ($scope.details.size > $scope.site_options.MAX_REPO_SIZE) {
$scope.anonymize.mode.$$element[0].disabled = true;
$scope.$apply(() => {
$scope.source.type = "GitHubStream";
checkSourceType();
});
if (!$scope.repoId) {
$scope.repoId = $scope.details.repo + "-" + generateRandomId(4);
}
$scope.repoId = $scope.details.repo + "-" + generateRandomId(4);
await $scope.getBranches();
} catch (error) {
console.log("here", error);
@@ -1327,7 +1311,7 @@ angular
}
$scope.anonymize_readme = content;
const html = marked($scope.anonymize_readme);
const html = renderMD($scope.anonymize_readme, $location.url());
$scope.html_readme = $sce.trustAsHtml(html);
setTimeout(Prism.highlightAll, 150);
}
@@ -1439,7 +1423,7 @@ angular
const selected = $scope.branches.filter(
(f) => f.name == $scope.source.branch
)[0];
checkSourceType();
checkHasPage();
if (selected) {
$scope.source.commit = selected.commit;
@@ -1450,22 +1434,15 @@ angular
}
});
function checkSourceType() {
if ($scope.source.type == "GitHubStream") {
$scope.options.page = false;
//$scope.anonymize.page.$$element[0].disabled = true;
} else {
if ($scope.details && $scope.details.hasPage) {
$scope.anonymize.page.$$element[0].disabled = false;
if ($scope.details.pageSource.branch != $scope.source.branch) {
$scope.anonymize.page.$$element[0].disabled = true;
}
function checkHasPage() {
if ($scope.details && $scope.details.hasPage) {
$scope.anonymize.page.$$element[0].disabled = false;
if ($scope.details.pageSource.branch != $scope.source.branch) {
$scope.anonymize.page.$$element[0].disabled = true;
}
}
}
$scope.$watch("source.type", checkSourceType);
$scope.$watch("terms", anonymize);
$scope.$watch("options.image", anonymize);
$scope.$watch("options.link", anonymize);
@@ -1502,13 +1479,26 @@ angular
"heif",
"heic",
];
const mediaFiles = [
"wav",
"mp3",
"ogg",
"mp4",
"avi",
"webm",
"mov",
"mpg",
"wma",
];
$scope.$on("$routeUpdate", function (event, current) {
if (($routeParams.path || "") == $scope.filePath) {
return;
}
$scope.filePath = $routeParams.path || "";
$scope.paths = $scope.filePath.split("/");
$scope.paths = $scope.filePath
.split("/")
.filter((f) => f && f.trim().length > 0);
if ($scope.repoId != $routeParams.repoId) {
return init();
@@ -1517,45 +1507,55 @@ angular
updateContent();
});
function selectFile() {
const readmePriority = [
"readme.md",
"readme.txt",
"readme.org",
"readme.1st",
"readme",
];
// find current folder
let currentFolder = $scope.files;
for (const p of $scope.paths) {
if (currentFolder[p]) {
currentFolder = currentFolder[p];
}
}
if (currentFolder.size && Number.isInteger(currentFolder.size)) {
// a file is already selected
return;
}
const readmeCandidates = {};
for (const file in currentFolder) {
if (file.toLowerCase().indexOf("readme") > -1) {
readmeCandidates[file.toLowerCase()] = file;
}
}
let best_match = null;
for (const p of readmePriority) {
if (readmeCandidates[p]) {
best_match = p;
break;
}
}
if (!best_match && Object.keys(readmeCandidates).length > 0)
best_match = Object.keys(readmeCandidates)[0];
if (best_match) {
let uri = $location.url();
if (uri[uri.length - 1] != "/") {
uri += "/";
}
// redirect to readme
$location.url(uri + readmeCandidates[best_match]);
}
}
function getFiles(callback) {
$http.get(`/api/repo/${$scope.repoId}/files/`).then(
(res) => {
$scope.files = res.data;
if ($scope.paths.length == 0 || $scope.paths[0] == "") {
// redirect to readme
const readmeCandidates = {};
for (const file in $scope.files) {
if (file.toLowerCase().indexOf("readme") > -1) {
readmeCandidates[file.toLowerCase()] = file;
}
}
const readmePriority = [
"readme.md",
"readme.txt",
"readme.org",
"readme.1st",
"readme",
];
let best_match = null;
for (const p of readmePriority) {
if (readmeCandidates[p]) {
best_match = p;
break;
}
}
if (!best_match && Object.keys(readmeCandidates).length > 0)
best_match = Object.keys(readmeCandidates)[0];
if (best_match) {
let uri = $location.url();
if (uri[uri.length - 1] != "/") {
uri += "/";
}
// redirect to readme
$location.url(uri + readmeCandidates[best_match]);
}
}
selectFile();
if (callback) {
return callback();
}
@@ -1615,6 +1615,9 @@ angular
if (imageFiles.indexOf(extension) > -1) {
return "image";
}
if (mediaFiles.indexOf(extension) > -1) {
return "media";
}
return "code";
}
@@ -1642,9 +1645,8 @@ angular
}
if ($scope.type == "md") {
const md = contentAbs2Relative(res.data);
$scope.content = $sce.trustAsHtml(
marked(md, { baseUrl: $location.url() })
renderMD(res.data, $location.url())
);
$scope.type = "html";
}
@@ -1668,13 +1670,22 @@ angular
},
(err) => {
$scope.type = "error";
$scope.content = "unknown_error";
try {
err.data = JSON.parse(err.data);
} catch (ignore) {}
if (err.data.error) {
$scope.content = err.data.error;
} else {
$scope.content = err.data;
if (err.data.error) {
$scope.content = err.data.error;
} else {
$scope.content = err.data;
}
} catch (ignore) {
console.log(err);
if (err.status == -1) {
$scope.content = "request_error";
} else if (err.status == 502) {
// cloudflare error
$scope.content = "unreachable";
}
}
}
);

File diff suppressed because one or more lines are too long

1879
public/script/external/github-emojis.js vendored Normal file

File diff suppressed because it is too large Load Diff

57
public/script/external/marked-emoji.js vendored Normal file
View File

@@ -0,0 +1,57 @@
const defaultOptions = {
// emojis: {}, required
unicode: false,
};
function markedEmoji(options) {
options = {
...defaultOptions,
...options,
};
if (!options.emojis) {
throw new Error("Must provide emojis to markedEmoji");
}
return {
extensions: [
{
name: "emoji",
level: "inline",
start(src) {
return src.indexOf(":");
},
tokenizer(src, tokens) {
const rule = /^:(.+?):/;
const match = rule.exec(src);
if (!match) {
return;
}
const name = match[1];
const emoji = options.emojis[name];
if (!emoji) {
return;
}
return {
type: "emoji",
raw: match[0],
name,
emoji,
};
},
renderer(token) {
if (options.unicode) {
return token.emoji;
} else {
return `<img class="emoji" alt="${token.name}" src="${
token.emoji
}"${this.parser.options.xhtml ? " /" : ""}>`;
}
},
},
],
};
}

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@@ -109,3 +109,57 @@ function parseGithubUrl(url) {
throw "Invalid url";
}
}
marked.use(
markedEmoji({
emojis: githubEmojis,
unicode: false,
})
);
function renderMD(md, baseUrl) {
md = contentAbs2Relative(md);
const renderer = new marked.Renderer();
// katex
function mathsExpression(expr) {
if (expr.match(/^\$\$[\s\S]*\$\$$/)) {
expr = expr.substr(2, expr.length - 4);
return katex.renderToString(expr, { displayMode: true });
} else if (expr.match(/^\$[\s\S]*\$$/)) {
expr = expr.substr(1, expr.length - 2);
return katex.renderToString(expr, { isplayMode: false });
}
}
const rendererCode = renderer.code;
renderer.code = function (code, lang, escaped) {
if (!lang) {
const math = mathsExpression(code);
if (math) {
return math;
}
}
// call default renderer
return rendererCode.call(this, code, lang, escaped);
};
const rendererCodespan = renderer.codespan;
renderer.codespan = function (text) {
const math = mathsExpression(text);
if (math) {
return math;
}
return rendererCodespan.call(this, text);
};
const rendererLink = renderer.link;
renderer.link = function (href, title, text) {
// wrap videos links (mp4 and mov) with media https://github.blog/2021-05-13-video-uploads-available-github/
if (href.match(/\.mp4$|\.mov$/)) {
return `<div class="media"><video controls title="${title}" src="${href}">${text}</video></div>`;
}
return rendererLink.call(this, href, title, text);
};
return marked.parse(md, { baseUrl, renderer });
}

View File

@@ -1,44 +1,29 @@
import { join, basename } from "path";
import { Response } from "express";
import { Readable, pipeline } from "stream";
import { promisify } from "util";
import { Readable } from "stream";
import Repository from "./Repository";
import { Tree, TreeElement, TreeFile } from "./types";
import { FILE_TYPE, Tree, TreeElement, TreeFile } from "./types";
import storage from "./storage";
import config from "../config";
import { anonymizePath, anonymizeStream } from "./anonymize-utils";
import {
anonymizePath,
AnonymizeTransformer,
isTextFile,
} from "./anonymize-utils";
import AnonymousError from "./AnonymousError";
import { handleError } from "./routes/route-utils";
function tree2sha(
tree: any,
output: { [key: string]: string } = {},
parent: string = ""
): { [key: string]: string } {
for (let i in tree) {
const sha = tree[i].sha as string;
const size = tree[i].size as number;
if (sha != null && size != null) {
output[sha] = join(parent, i);
} else if (tree[i].child) {
tree2sha(tree[i].child as Tree, output, join(parent, i));
} else {
tree2sha(tree[i] as Tree, output, join(parent, i));
}
}
return output;
}
import { lookup } from "mime-types";
/**
* Represent a file in a anonymized repository
*/
export default class AnonymizedFile {
private _originalPath: string;
private _originalPath: string | undefined;
private fileSize?: number;
repository: Repository;
anonymizedPath: string;
sha?: string;
_sha?: string;
constructor(data: { repository: Repository; anonymizedPath: string }) {
this.repository = data.repository;
@@ -50,6 +35,12 @@ export default class AnonymizedFile {
this.anonymizedPath = data.anonymizedPath;
}
async sha() {
if (this._sha) return this._sha.replace(/"/g, "");
await this.originalPath();
return this._sha?.replace(/"/g, "");
}
/**
* De-anonymize the path
*
@@ -64,27 +55,16 @@ export default class AnonymizedFile {
});
const paths = this.anonymizedPath.trim().split("/");
let currentAnonymized: TreeElement = await this.repository.anonymizedFiles({
includeSha: true,
});
let currentOriginal: TreeElement = await this.repository.files();
let currentOriginal = (await this.repository.files({
force: false,
})) as TreeElement;
let currentOriginalPath = "";
let isAmbiguous = false;
for (let i = 0; i < paths.length; i++) {
const fileName = paths[i];
if (fileName == "") {
continue;
}
if (!currentAnonymized[fileName]) {
throw new AnonymousError("file_not_found", {
object: this,
httpStatus: 404,
});
}
currentAnonymized = currentAnonymized[fileName];
if (!isAmbiguous && !currentOriginal[fileName]) {
if (!(currentOriginal as Tree)[fileName]) {
// anonymize all the file in the folder and check if there is one that match the current filename
const options = [];
for (let originalFileName in currentOriginal) {
@@ -99,51 +79,65 @@ export default class AnonymizedFile {
// if only one option we found the original filename
if (options.length == 1) {
currentOriginalPath = join(currentOriginalPath, options[0]);
currentOriginal = currentOriginal[options[0]];
currentOriginal = (currentOriginal as Tree)[options[0]];
} else if (options.length == 0) {
throw new AnonymousError("file_not_found", {
object: this,
httpStatus: 404,
});
} else {
isAmbiguous = true;
const nextName = paths[i + 1];
if (!nextName) {
// if there is no next name we can't find the file and we return the first option
currentOriginalPath = join(currentOriginalPath, options[0]);
currentOriginal = (currentOriginal as Tree)[options[0]];
}
let found = false;
for (const option of options) {
const optionTree = (currentOriginal as Tree)[option];
if ((optionTree as Tree).child) {
const optionTreeChild = (optionTree as Tree).child;
if ((optionTreeChild as Tree)[nextName]) {
currentOriginalPath = join(currentOriginalPath, option);
currentOriginal = optionTreeChild;
found = true;
break;
}
}
}
if (!found) {
// if we didn't find the next name we return the first option
currentOriginalPath = join(currentOriginalPath, options[0]);
currentOriginal = (currentOriginal as Tree)[options[0]];
}
}
} else if (!isAmbiguous) {
} else {
currentOriginalPath = join(currentOriginalPath, fileName);
currentOriginal = currentOriginal[fileName];
currentOriginal = (currentOriginal as Tree)[fileName];
}
}
if (
currentAnonymized.sha === undefined ||
currentAnonymized.size === undefined
currentOriginal.sha === undefined ||
currentOriginal.size === undefined
) {
throw new AnonymousError("folder_not_supported", { object: this });
}
const file: TreeFile = currentAnonymized as TreeFile;
const file = currentOriginal as TreeFile;
this.fileSize = file.size;
this.sha = file.sha;
if (isAmbiguous) {
// it should never happen
const shaTree = tree2sha(currentOriginal);
if (!currentAnonymized.sha || !shaTree[file.sha]) {
throw new AnonymousError("file_not_found", {
object: this,
httpStatus: 404,
});
}
this._originalPath = join(currentOriginalPath, shaTree[file.sha]);
} else {
this._originalPath = currentOriginalPath;
}
this._sha = file.sha;
this._originalPath = currentOriginalPath;
return this._originalPath;
}
async extension() {
const filename = basename(await this.originalPath());
extension() {
const filename = basename(this.anonymizedPath);
const extensions = filename.split(".").reverse();
return extensions[0].toLowerCase();
}
async isImage(): Promise<boolean> {
const extension = await this.extension();
isImage() {
const extension = this.extension();
return [
"png",
"jpg",
@@ -160,34 +154,41 @@ export default class AnonymizedFile {
"heic",
].includes(extension);
}
async isFileSupported() {
const extension = await this.extension();
isFileSupported() {
const extension = this.extension();
if (!this.repository.options.pdf && extension == "pdf") {
return false;
}
if (!this.repository.options.image && (await this.isImage())) {
if (!this.repository.options.image && this.isImage()) {
return false;
}
return true;
}
async content(): Promise<Readable> {
if (this.anonymizedPath.includes(config.ANONYMIZATION_MASK)) {
await this.originalPath();
}
if (this.fileSize && this.fileSize > config.MAX_FILE_SIZE) {
throw new AnonymousError("file_too_big", {
object: this,
httpStatus: 403,
});
}
if (await storage.exists(this.originalCachePath)) {
const exist = await storage.exists(this.originalCachePath);
if (exist == FILE_TYPE.FILE) {
return storage.read(this.originalCachePath);
} else if (exist == FILE_TYPE.FOLDER) {
throw new AnonymousError("folder_not_supported", {
object: this,
httpStatus: 400,
});
}
return await this.repository.source?.getFileContent(this);
}
async anonymizedContent() {
await this.originalPath();
const rs = await this.content();
return rs.pipe(anonymizeStream(await this.originalPath(), this.repository));
return (await this.content()).pipe(new AnonymizeTransformer(this));
}
get originalCachePath() {
@@ -196,18 +197,70 @@ export default class AnonymizedFile {
object: this,
httpStatus: 400,
});
if (!this._originalPath) {
if (this.anonymizedPath.includes(config.ANONYMIZATION_MASK)) {
throw new AnonymousError("path_not_defined", {
object: this,
httpStatus: 400,
});
} else {
return join(this.repository.originalCachePath, this.anonymizedPath);
}
}
return join(this.repository.originalCachePath, this._originalPath);
}
async send(res: Response): Promise<void> {
const pipe = promisify(pipeline);
try {
if (await this.extension()) {
res.contentType(await this.extension());
return new Promise(async (resolve, reject) => {
try {
const content = await this.content();
const mime = lookup(this.anonymizedPath);
if (mime && this.extension() != "ts") {
res.contentType(mime);
} else if (isTextFile(this.anonymizedPath)) {
res.contentType("text/plain");
}
res.header("Accept-Ranges", "none");
let fileInfo: Awaited<ReturnType<typeof storage.fileInfo>>;
try {
fileInfo = await storage.fileInfo(this.originalCachePath);
} catch (error) {
// unable to get file size
console.error(error);
}
const anonymizer = new AnonymizeTransformer(this);
anonymizer.once("transform", (data) => {
if (data.isText && !mime) {
res.contentType("text/plain");
}
if (fileInfo?.size && !data.wasAnonimized) {
// the text files may be anonymized and therefore the size may be different
res.header("Content-Length", fileInfo.size.toString());
}
});
content
.pipe(anonymizer)
.pipe(res)
.on("close", () => {
if (!content.closed && !content.destroyed) {
content.destroy();
}
resolve();
})
.on("error", (error) => {
if (!content.closed && !content.destroyed) {
content.destroy();
}
reject(error);
handleError(error, res);
});
} catch (error) {
handleError(error, res);
}
await pipe(await this.anonymizedContent(), res);
} catch (error) {
handleError(error, res);
}
});
}
}

View File

@@ -5,7 +5,7 @@ import { ConferenceStatus } from "./types";
export default class Conference {
private _data: IConferenceDocument;
private _repositories: Repository[] = null;
private _repositories: Repository[] = [];
constructor(data: IConferenceDocument) {
this._data = data;

View File

@@ -17,61 +17,84 @@ export default class PullRequest {
constructor(data: IAnonymizedPullRequestDocument) {
this._model = data;
this.owner = new User(new UserModel({ _id: data.owner }));
this.owner.model.isNew = false;
}
getToken() {
if (this.owner && this.owner.accessToken) {
return this.owner.accessToken;
async getToken() {
let owner = this.owner.model;
if (owner && !owner.accessTokens.github) {
const temp = await UserModel.findById(owner._id);
if (temp) {
owner = temp;
}
}
if (owner && owner.accessTokens && owner.accessTokens.github) {
if (owner.accessTokens.github != this._model.source.accessToken) {
this._model.source.accessToken = owner.accessTokens.github;
}
return owner.accessTokens.github;
}
if (this._model.source.accessToken) {
try {
return this._model.source.accessToken;
} catch (error) {
console.debug("[ERROR] Token is invalid", this.pullRequestId);
console.debug(
"[ERROR] Token is invalid",
this._model.source.pullRequestId
);
}
}
return config.GITHUB_TOKEN;
}
async download() {
console.debug("[INFO] Downloading pull request", this.pullRequestId);
const auth = this.getToken();
const octokit = new Octokit({ auth });
console.debug(
"[INFO] Downloading pull request",
this._model.source.pullRequestId
);
const auth = await this.getToken();
const octokit = new Octokit({ auth: auth });
const [owner, repo] = this._model.source.repositoryFullName.split("/");
const pull_number = this._model.source.pullRequestId;
const [prInfo, comments, diff] = await Promise.all([
octokit.rest.pulls.get({
owner,
repo,
pull_number,
}),
octokit.rest.issues.listComments({
owner,
repo,
issue_number: pull_number,
per_page: 100,
}),
octokit.paginate(
"GET /repos/{owner}/{repo}/issues/{issue_number}/comments",
{
owner: owner,
repo: repo,
issue_number: pull_number,
per_page: 100,
}
),
got(`https://github.com/${owner}/${repo}/pull/${pull_number}.diff`),
]);
this._model.pullRequest = {
diff: diff.body,
title: prInfo.data.title,
body: prInfo.data.body,
body: prInfo.data.body || "",
creationDate: new Date(prInfo.data.created_at),
updatedDate: new Date(prInfo.data.updated_at),
draft: prInfo.data.draft,
merged: prInfo.data.merged,
mergedDate: prInfo.data.merged_at
? new Date(prInfo.data.merged_at)
: null,
: undefined,
state: prInfo.data.state,
baseRepositoryFullName: prInfo.data.base.repo.full_name,
headRepositoryFullName: prInfo.data.head.repo.full_name,
comments: comments.data.map((comment) => ({
body: comment.body,
headRepositoryFullName: prInfo.data.head.repo?.full_name,
comments: comments.map((comment) => ({
body: comment.body || "",
creationDate: new Date(comment.created_at),
updatedDate: new Date(comment.updated_at),
author: comment.user.login,
author: comment.user?.login || "",
})),
};
}
@@ -82,7 +105,8 @@ export default class PullRequest {
check() {
if (
this._model.options.expirationMode !== "never" &&
this.status == "ready"
this.status == "ready" &&
this._model.options.expirationDate
) {
if (this._model.options.expirationDate <= new Date()) {
this.expire();
@@ -135,10 +159,10 @@ export default class PullRequest {
* @returns void
*/
async anonymize() {
if (this.status == "ready") return;
await this.updateStatus("preparing");
if (this.status === RepositoryStatus.READY) return;
await this.updateStatus(RepositoryStatus.PREPARING);
await this.updateIfNeeded({ force: true });
return this.updateStatus("ready");
return this.updateStatus(RepositoryStatus.READY);
}
/**
@@ -166,18 +190,18 @@ export default class PullRequest {
* Expire the pullRequest
*/
async expire() {
await this.updateStatus("expiring");
await this.updateStatus(RepositoryStatus.EXPIRING);
await this.resetSate();
await this.updateStatus("expired");
await this.updateStatus(RepositoryStatus.EXPIRED);
}
/**
* Remove the pullRequest
*/
async remove() {
await this.updateStatus("removing");
await this.updateStatus(RepositoryStatus.REMOVING);
await this.resetSate();
await this.updateStatus("removed");
await this.updateStatus(RepositoryStatus.REMOVED);
}
/**
@@ -187,7 +211,16 @@ export default class PullRequest {
if (status) this._model.status = status;
if (statusMessage) this._model.statusMessage = statusMessage;
// remove cache
this._model.pullRequest = null;
this._model.pullRequest.comments = [];
this._model.pullRequest.body = "";
this._model.pullRequest.title = "";
this._model.pullRequest.diff = "";
this._model.pullRequest.baseRepositoryFullName = "";
this._model.pullRequest.headRepositoryFullName = "";
this._model.pullRequest.merged = false;
this._model.pullRequest.mergedDate = undefined;
this._model.pullRequest.state = "closed";
this._model.pullRequest.draft = false;
return Promise.all([this._model.save()]);
}
@@ -222,7 +255,7 @@ export default class PullRequest {
output.body = anonymizeContent(this._model.pullRequest.body, this);
}
if (this.options.comments) {
output.comments = this._model.pullRequest.comments.map((comment) => {
output.comments = this._model.pullRequest.comments?.map((comment) => {
const o: any = {};
if (this.options.body) o.body = anonymizeContent(comment.body, this);
if (this.options.username)

View File

@@ -1,6 +1,13 @@
import { join } from "path";
import storage from "./storage";
import { RepositoryStatus, Source, Tree, TreeElement, TreeFile } from "./types";
import {
FILE_TYPE,
RepositoryStatus,
Source,
Tree,
TreeElement,
TreeFile,
} from "./types";
import { Readable } from "stream";
import User from "./User";
import GitHubStream from "./source/GitHubStream";
@@ -9,13 +16,43 @@ import Zip from "./source/Zip";
import { anonymizePath } from "./anonymize-utils";
import UserModel from "./database/users/users.model";
import { IAnonymizedRepositoryDocument } from "./database/anonymizedRepositories/anonymizedRepositories.types";
import { anonymizeStream } from "./anonymize-utils";
import { AnonymizeTransformer } from "./anonymize-utils";
import GitHubBase from "./source/GitHubBase";
import Conference from "./Conference";
import ConferenceModel from "./database/conference/conferences.model";
import AnonymousError from "./AnonymousError";
import { downloadQueue } from "./queue";
import { downloadQueue, removeQueue } from "./queue";
import { isConnected } from "./database/database";
import AnonymizedFile from "./AnonymizedFile";
import AnonymizedRepositoryModel from "./database/anonymizedRepositories/anonymizedRepositories.model";
import { getRepositoryFromGitHub } from "./source/GitHubRepository";
import config from "../config";
function anonymizeTreeRecursive(
tree: TreeElement,
terms: string[],
opt: {
/** Include the file sha in the response */
includeSha: boolean;
} = {
includeSha: false,
}
): TreeElement {
if (typeof tree.size !== "object" && tree.sha !== undefined) {
if (opt?.includeSha) return tree as TreeFile;
return { size: tree.size } as TreeFile;
}
const output: Tree = {};
Object.getOwnPropertyNames(tree).forEach((file) => {
const anonymizedPath = anonymizePath(file, terms);
output[anonymizedPath] = anonymizeTreeRecursive(
(tree as Tree)[file],
terms,
opt
);
});
return output;
}
export default class Repository {
private _model: IAnonymizedRepositoryDocument;
@@ -41,6 +78,7 @@ export default class Repository {
});
}
this.owner = new User(new UserModel({ _id: data.owner }));
this.owner.model.isNew = false;
}
/**
@@ -48,28 +86,19 @@ export default class Repository {
* @param opt force to get an updated list of files
* @returns The anonymized file tree
*/
async anonymizedFiles(opt?: {
/** Force to refresh the file tree */
force?: boolean;
/** Include the file sha in the response */
includeSha: boolean;
}): Promise<Tree> {
const terms = this._model.options.terms || [];
function anonymizeTreeRecursive(tree: TreeElement): TreeElement {
if (Number.isInteger(tree.size) && tree.sha !== undefined) {
if (opt?.includeSha) return tree as TreeFile;
return { size: tree.size } as TreeFile;
}
const output: Tree = {};
for (const file in tree) {
const anonymizedPath = anonymizePath(file, terms);
output[anonymizedPath] = anonymizeTreeRecursive(tree[file]);
}
return output;
async anonymizedFiles(
opt: {
/** Force to refresh the file tree */
force?: boolean;
/** Include the file sha in the response */
includeSha: boolean;
} = {
force: false,
includeSha: false,
}
return anonymizeTreeRecursive(await this.files(opt)) as Tree;
): Promise<Tree> {
const terms = this._model.options.terms || [];
return anonymizeTreeRecursive(await this.files(opt), terms, opt) as Tree;
}
/**
@@ -78,11 +107,18 @@ export default class Repository {
* @param opt force to get an updated list of files
* @returns The file tree
*/
async files(opt?: { force?: boolean }) {
async files(opt: { force?: boolean } = { force: false }): Promise<Tree> {
if (!this._model.originalFiles && !opt.force) {
const res = await AnonymizedRepositoryModel.findById(this._model._id, {
originalFiles: 1,
});
if (!res) throw new AnonymousError("repository_not_found");
this.model.originalFiles = res.originalFiles;
}
if (
this._model.originalFiles &&
Object.keys(this._model.originalFiles).length !== 0 &&
!opt?.force
Object.getOwnPropertyNames(this._model.originalFiles).length !== 0 &&
!opt.force
) {
return this._model.originalFiles;
}
@@ -90,9 +126,6 @@ export default class Repository {
this._model.originalFiles = files;
this._model.size = { storage: 0, file: 0 };
await this.computeSize();
await this._model.save();
this._model.originalFiles = files;
return files;
}
@@ -102,7 +135,8 @@ export default class Repository {
check() {
if (
this._model.options.expirationMode !== "never" &&
this.status == "ready"
this.status == "ready" &&
this._model.options.expirationDate
) {
if (this._model.options.expirationDate <= new Date()) {
this.expire();
@@ -137,11 +171,16 @@ export default class Repository {
*
* @returns A stream of anonymized repository compressed
*/
zip(): Readable {
zip(): Promise<Readable> {
return storage.archive(this.originalCachePath, {
format: "zip",
fileTransformer: (filename) =>
anonymizeStream(filename, this) as Transformer,
fileTransformer: (filename: string) =>
new AnonymizeTransformer(
new AnonymizedFile({
repository: this,
anonymizedPath: filename,
})
),
});
}
@@ -159,33 +198,65 @@ export default class Repository {
) {
// Only GitHubBase can be update for the moment
if (this.source instanceof GitHubBase) {
const token = await this.source.getToken();
const branches = await this.source.githubRepository.branches({
force: true,
accessToken: await this.source.getToken(),
accessToken: token,
});
const branch = this.source.branch;
const newCommit = branches.filter((f) => f.name == branch.name)[0]
?.commit;
if (branch.commit == newCommit && this.status == "ready") {
console.log(`${this._model.repoId} is up to date`);
console.log(`[UPDATE] ${this._model.repoId} is up to date`);
return;
}
this._model.source.commit = newCommit;
const commitInfo = await this.source.githubRepository.getCommitInfo(
newCommit,
{
accessToken: token,
}
);
if (commitInfo.commit.author?.date) {
this._model.source.commitDate = new Date(
commitInfo.commit.author?.date
);
}
branch.commit = newCommit;
if (!newCommit) {
console.error(
`${branch.name} for ${this.source.githubRepository.fullName} is not found`
);
await this.updateStatus("error", "branch_not_found");
await this.updateStatus(RepositoryStatus.ERROR, "branch_not_found");
await this.resetSate();
throw new AnonymousError("branch_not_found", {
object: this,
});
}
this._model.anonymizeDate = new Date();
console.log(`${this._model.repoId} will be updated to ${newCommit}`);
await this.resetSate("preparing");
console.log(
`[UPDATE] ${this._model.repoId} will be updated to ${newCommit}`
);
if (this.source.type == "GitHubDownload") {
const repository = await getRepositoryFromGitHub({
accessToken: await this.source.getToken(),
owner: this.source.githubRepository.owner,
repo: this.source.githubRepository.repo,
});
if (
repository.size === undefined ||
repository.size > config.MAX_REPO_SIZE
) {
console.log(
`[UPDATE] ${this._model.repoId} will be streamed instead of downloaded`
);
this._model.source.type = "GitHubStream";
}
}
await this.resetSate(RepositoryStatus.PREPARING);
await downloadQueue.add(this.repoId, this, {
jobId: this.repoId,
attempts: 3,
@@ -199,19 +270,19 @@ export default class Repository {
* @returns void
*/
async anonymize() {
if (this.status == "ready") return;
await this.updateStatus("preparing");
if (this.status === RepositoryStatus.READY) return;
await this.updateStatus(RepositoryStatus.PREPARING);
await this.files();
return this.updateStatus("ready");
return this.updateStatus(RepositoryStatus.READY);
}
/**
* Update the last view and view count
*/
async countView() {
if (!isConnected) return this.model;
this._model.lastView = new Date();
this._model.pageView = (this._model.pageView || 0) + 1;
if (!isConnected) return this.model;
return this._model.save();
}
@@ -233,30 +304,33 @@ export default class Repository {
* Expire the repository
*/
async expire() {
await this.updateStatus("expiring");
await this.updateStatus(RepositoryStatus.EXPIRING);
await this.resetSate();
await this.updateStatus("expired");
await this.updateStatus(RepositoryStatus.EXPIRED);
}
/**
* Remove the repository
*/
async remove() {
await this.updateStatus("removing");
await this.updateStatus(RepositoryStatus.REMOVING);
await this.resetSate();
await this.updateStatus("removed");
await this.updateStatus(RepositoryStatus.REMOVED);
}
/**
* Reset/delete the state of the repository
*/
async resetSate(status?: RepositoryStatus, statusMessage?: string) {
const p = this.updateStatus(status, statusMessage);
// remove attribute
this._model.size = { storage: 0, file: 0 };
this._model.originalFiles = null;
this._model.originalFiles = undefined;
if (status) {
await this.updateStatus(status, statusMessage);
}
// remove cache
return Promise.all([p, this.removeCache()]);
await this.removeCache();
console.log(`[RESET] ${this._model.repoId} has been reset`);
}
/**
@@ -264,7 +338,13 @@ export default class Repository {
* @returns
*/
async removeCache() {
return storage.rm(this._model.repoId + "/");
this.model.isReseted = true;
await this.model.save();
if (
(await storage.exists(this._model.repoId + "/")) !== FILE_TYPE.NOT_FOUND
) {
return storage.rm(this._model.repoId + "/");
}
}
/**
@@ -282,7 +362,7 @@ export default class Repository {
*/
file: number;
}> {
if (this.status != "ready") return { storage: 0, file: 0 };
if (this.status !== RepositoryStatus.READY) return { storage: 0, file: 0 };
if (this._model.size.file) return this._model.size;
function recursiveCount(files: Tree): { storage: number; file: number } {
const out = { storage: 0, file: 0 };
@@ -337,7 +417,10 @@ export default class Repository {
}
get originalCachePath() {
return join(this._model.repoId, "original") + "/";
return (
join(this._model.repoId, "original") +
(process.platform === "win32" ? "\\" : "/")
);
}
get status() {

View File

@@ -32,7 +32,7 @@ export default class User {
return this._model.accessTokens.github;
}
get photo(): string {
get photo(): string | undefined {
return this._model.photo;
}
@@ -63,7 +63,7 @@ export default class User {
// get the list of repo from github
const octokit = new Octokit({ auth: this.accessToken });
const repositories = (
await octokit.paginate(octokit.repos.listForAuthenticatedUser, {
await octokit.paginate("GET /user/repos", {
visibility: "all",
sort: "pushed",
per_page: 100,
@@ -119,9 +119,14 @@ export default class User {
*/
async getRepositories() {
const repositories = (
await AnonymizedRepositoryModel.find({
owner: this.id,
}).exec()
await AnonymizedRepositoryModel.find(
{
owner: this.id,
},
{
originalFiles: 0,
}
).exec()
).map((d) => new Repository(d));
const promises = [];
for (let repo of repositories) {

View File

@@ -1,16 +1,16 @@
import config from "../config";
import Repository from "./Repository";
import GitHubBase from "./source/GitHubBase";
import { isText } from "istextorbinary";
import { basename } from "path";
import { Transform } from "stream";
import { Readable } from "stream";
import AnonymizedFile from "./AnonymizedFile";
const urlRegex =
/<?\b((https?|ftp|file):\/\/)[-A-Za-z0-9+&@#/%?=~_|!:,.;]+[-A-Za-z0-9+&@#/%=~_|]\b\/?>?/g;
export function streamToString(stream: Readable): Promise<string> {
const chunks = [];
const chunks: Buffer[] = [];
return new Promise((resolve, reject) => {
stream.on("data", (chunk) => chunks.push(Buffer.from(chunk)));
stream.on("error", (err) => reject(err));
@@ -18,7 +18,7 @@ export function streamToString(stream: Readable): Promise<string> {
});
}
export function isTextFile(filePath: string, content: Buffer) {
export function isTextFile(filePath: string, content?: Buffer) {
const filename = basename(filePath);
const extensions = filename.split(".").reverse();
const extension = extensions[0].toLowerCase();
@@ -31,45 +31,45 @@ export function isTextFile(filePath: string, content: Buffer) {
return isText(filename, content);
}
export function anonymizeStream(filename: string, repository: Repository) {
const ts = new Transform();
var chunks = [],
len = 0,
pos = 0;
export class AnonymizeTransformer extends Transform {
public wasAnonimized = false;
public isText = false;
ts._transform = function _transform(chunk, enc, cb) {
chunks.push(chunk);
len += chunk.length;
constructor(private readonly file: AnonymizedFile) {
super();
}
if (pos === 1) {
let data: any = Buffer.concat(chunks, len);
if (isTextFile(filename, data)) {
data = anonymizeContent(data.toString(), repository);
_transform(chunk: Buffer, encoding: string, callback: () => void) {
const isText = isTextFile(this.file.anonymizedPath, chunk);
if (isText) {
this.isText = true;
const anonimizer = new ContentAnonimizer(chunk.toString(), {
repoId: this.file.repository.repoId,
image: this.file.repository.options.image,
link: this.file.repository.options.link,
terms: this.file.repository.options.terms,
repoName: (this.file.repository.source as GitHubBase).githubRepository
?.fullName,
branchName:
(this.file.repository.source as GitHubBase).branch?.name || "main",
});
anonimizer.anonymize();
if (anonimizer.wasAnonymized) {
this.wasAnonimized = true;
chunk = Buffer.from(anonimizer.content);
}
chunks = [];
len = 0;
this.push(data);
}
pos = 1 ^ pos;
cb(null);
};
this.emit("transform", {
isText,
wasAnonimized: this.wasAnonimized,
chunk,
});
ts._flush = function _flush(cb) {
if (chunks.length) {
let data: any = Buffer.concat(chunks, len);
if (isText(filename, data)) {
data = anonymizeContent(data.toString(), repository);
}
this.push(data);
}
cb(null);
};
return ts;
this.push(chunk);
callback();
}
}
interface Anonymizationptions {
@@ -86,77 +86,138 @@ interface Anonymizationptions {
};
}
export class ContentAnonimizer {
public wasAnonymized = false;
constructor(
public content: string,
readonly opt: {
image?: boolean;
link?: boolean;
terms?: string[];
repoName?: string;
branchName?: string;
repoId?: string;
}
) {}
private removeImage() {
if (this.opt.image !== false) {
return;
}
// remove image in markdown
this.content = this.content.replace(
/!\[[^\]]*\]\((?<filename>.*?)(?=\"|\))(?<optionalpart>\".*\")?\)/g,
() => {
this.wasAnonymized = true;
return config.ANONYMIZATION_MASK;
}
);
}
private removeLink() {
if (this.opt.link !== false) {
return;
}
// remove image in markdown
this.content = this.content.replace(urlRegex, () => {
this.wasAnonymized = true;
return config.ANONYMIZATION_MASK;
});
}
private replaceGitHubSelfLinks() {
if (!this.opt.repoName || !this.opt.branchName) {
return;
}
const repoName = this.opt.repoName;
const branchName = this.opt.branchName;
const replaceCallback = () => {
this.wasAnonymized = true;
return `https://${config.APP_HOSTNAME}/r/${this.opt.repoId}`;
};
this.content = this.content.replace(
new RegExp(
`https://raw.githubusercontent.com/${repoName}/${branchName}\\b`,
"gi"
),
replaceCallback
);
this.content = this.content.replace(
new RegExp(`https://github.com/${repoName}/blob/${branchName}\\b`, "gi"),
replaceCallback
);
this.content = this.content.replace(
new RegExp(`https://github.com/${repoName}/tree/${branchName}\\b`, "gi"),
replaceCallback
);
this.content = this.content.replace(
new RegExp(`https://github.com/${repoName}`, "gi"),
replaceCallback
);
}
private replaceTerms() {
const terms = this.opt.terms || [];
for (let i = 0; i < terms.length; i++) {
let term = terms[i];
if (term.trim() == "") {
continue;
}
const mask = config.ANONYMIZATION_MASK + "-" + (i + 1);
try {
new RegExp(term, "gi");
} catch {
// escape regex characters
term = term.replace(/[-[\]{}()*+?.,\\^$|#]/g, "\\$&");
}
// remove whole url if it contains the term
this.content = this.content.replace(urlRegex, (match) => {
if (new RegExp(`\\b${term}\\b`, "gi").test(match)) {
this.wasAnonymized = true;
return mask;
}
return match;
});
// remove the term in the text
this.content = this.content.replace(
new RegExp(`\\b${term}\\b`, "gi"),
() => {
this.wasAnonymized = true;
return mask;
}
);
}
}
anonymize() {
this.removeImage();
this.removeLink();
this.replaceGitHubSelfLinks();
this.replaceTerms();
return this.content;
}
}
export function anonymizeContent(
content: string,
repository: Anonymizationptions
) {
if (repository.options?.image === false) {
// remove image in markdown
content = content.replace(
/!\[[^\]]*\]\((?<filename>.*?)(?=\"|\))(?<optionalpart>\".*\")?\)/g,
""
);
}
if (!repository.options?.link) {
// remove all links
content = content.replace(urlRegex, config.ANONYMIZATION_MASK);
}
let repoName: string | undefined;
let branchName: string | undefined;
if (repository.source instanceof GitHubBase) {
content = content.replace(
new RegExp(
`https://github.com/${
repository.source.githubRepository.fullName
}/blob/${repository.source.branch?.name || "HEAD"}\\b`,
"gi"
),
`https://${config.HOSTNAME}/r/${repository.repoId}`
);
content = content.replace(
new RegExp(
`https://github.com/${
repository.source.githubRepository.fullName
}/tree/${(repository.source as GitHubBase).branch?.name || "HEAD"}\\b`,
"gi"
),
`https://${config.HOSTNAME}/r/${repository.repoId}`
);
content = content.replace(
new RegExp(
`https://github.com/${repository.source.githubRepository.fullName}`,
"gi"
),
`https://${config.HOSTNAME}/r/${repository.repoId}`
);
repoName = repository.source.githubRepository.fullName;
branchName = repository.source.branch.name;
}
const terms = repository.options.terms || [];
for (let i = 0; i < terms.length; i++) {
let term = terms[i];
if (term.trim() == "") {
continue;
}
try {
new RegExp(term, "gi");
} catch {
// escape regex characters
term = term.replace(/[-[\]{}()*+?.,\\^$|#]/g, "\\$&");
}
// remove whole url if it contains the term
content = content.replace(urlRegex, (match) => {
if (new RegExp(`\\b${term}\\b`, "gi").test(match))
return config.ANONYMIZATION_MASK + "-" + (i + 1);
return match;
});
// remove the term in the text
content = content.replace(
new RegExp(`\\b${term}\\b`, "gi"),
config.ANONYMIZATION_MASK + "-" + (i + 1)
);
}
return content;
return new ContentAnonimizer(content, {
repoId: repository.repoId,
image: repository.options.image,
link: repository.options.link,
terms: repository.options.terms,
repoName,
branchName,
}).anonymize();
}
export function anonymizePath(path: string, terms: string[]) {

View File

@@ -9,7 +9,7 @@ export interface IAnonymizedPullRequest {
anonymizeDate: Date;
source: {
pullRequestId: number;
repositoryFullName?: string;
repositoryFullName: string;
accessToken?: string;
};
owner: string;

View File

@@ -3,7 +3,7 @@ import { Schema } from "mongoose";
const AnonymizedRepositorySchema = new Schema({
repoId: {
type: String,
index: { unique: true },
index: { unique: true, collation: { locale: "en", strength: 2 } },
},
status: {
type: String,
@@ -15,12 +15,17 @@ const AnonymizedRepositorySchema = new Schema({
lastView: Date,
pageView: Number,
accessToken: String,
owner: Schema.Types.ObjectId,
owner: {
type: Schema.Types.ObjectId,
ref: "user",
index: true,
},
conference: String,
source: {
type: { type: String },
branch: String,
commit: String,
commitDate: Date,
repositoryId: String,
repositoryName: String,
accessToken: String,
@@ -59,6 +64,10 @@ const AnonymizedRepositorySchema = new Schema({
default: 0,
},
},
isReseted: {
type: Boolean,
default: false,
},
});
export default AnonymizedRepositorySchema;

View File

@@ -11,13 +11,14 @@ export interface IAnonymizedRepository {
type: "GitHubDownload" | "GitHubStream" | "Zip";
branch?: string;
commit?: string;
commitDate?: Date,
repositoryId?: string;
repositoryName?: string;
accessToken?: string;
};
owner: string;
truckedFileList: boolean;
originalFiles: Tree;
originalFiles?: Tree;
conference: string;
options: {
terms: string[];
@@ -40,6 +41,7 @@ export interface IAnonymizedRepository {
storage: number;
file: number;
};
isReseted: boolean;
}
export interface IAnonymizedRepositoryDocument

View File

@@ -13,22 +13,42 @@ export const database = mongoose.connection;
export let isConnected = false;
export async function connect() {
mongoose.set("strictQuery", false);
await mongoose.connect(MONGO_URL + "production", {
authSource: "admin",
appName: "Anonymous GitHub Server",
compressors: "zlib",
} as ConnectOptions);
isConnected = true;
return database;
}
export async function getRepository(repoId: string) {
export async function getRepository(
repoId: string,
opts: {
includeFiles: boolean;
} = {
includeFiles: true,
}
) {
if (!repoId || repoId == "undefined") {
throw new AnonymousError("repo_not_found", {
object: repoId,
httpStatus: 404,
});
}
const data = await AnonymizedRepositoryModel.findOne({ repoId });
const project: any = {};
if (!opts.includeFiles) {
project.originalFiles = 0;
}
const data = await AnonymizedRepositoryModel.findOne(
{ repoId },
project
).collation({
locale: "en",
strength: 2,
});
if (!data)
throw new AnonymousError("repo_not_found", {
object: repoId,

View File

@@ -19,7 +19,12 @@ const UserSchema = new Schema({
],
isAdmin: { type: Boolean, default: false },
photo: String,
repositories: [String],
repositories: [
{
type: String,
ref: "Repository",
},
],
default: {
terms: [String],
options: {

View File

@@ -2,25 +2,39 @@ import { SandboxedJob } from "bullmq";
import { config } from "dotenv";
config();
import Repository from "../Repository";
import { getRepository as getRepositoryImport } from "../database/database";
import { RepositoryStatus } from "../types";
export default async function (job: SandboxedJob<Repository, void>) {
const { connect, getRepository } = require("../database/database");
console.log(`${job.data.repoId} is going to be downloaded`);
const {
connect,
getRepository,
}: {
connect: () => Promise<void>;
getRepository: typeof getRepositoryImport;
} = require("../database/database");
console.log(`[QUEUE] ${job.data.repoId} is going to be downloaded`);
try {
await connect();
const repo = await getRepository(job.data.repoId);
job.updateProgress({ status: "get_repo" });
await repo.resetSate("preparing");
job.updateProgress({ status: "resetSate" });
try {
job.updateProgress({ status: "resetSate" });
await repo.resetSate(RepositoryStatus.PREPARING, "");
job.updateProgress({ status: "download" });
await repo.anonymize();
console.log(`[QUEUE] ${job.data.repoId} is downloaded`);
} catch (error) {
await repo.updateStatus("error", error.message);
job.updateProgress({ status: "error" });
if (error instanceof Error) {
await repo.updateStatus(RepositoryStatus.ERROR, error.message);
} else if (typeof error === "string") {
await repo.updateStatus(RepositoryStatus.ERROR, error);
}
throw error;
}
} catch (error) {
console.error(error);
} finally {
console.log(`${job.data.repoId} is downloaded`);
console.log(`[QUEUE] ${job.data.repoId} is finished with an error`);
}
}

View File

@@ -0,0 +1,29 @@
import { SandboxedJob } from "bullmq";
import Repository from "../Repository";
import { getRepository as getRepositoryImport } from "../database/database";
export default async function (job: SandboxedJob<Repository, void>) {
const {
connect,
getRepository,
}: {
connect: () => Promise<void>;
getRepository: typeof getRepositoryImport;
} = require("../database/database");
try {
await connect();
console.log(
`[QUEUE] Cache of ${job.data.repoId} is going to be removed...`
);
const repo = await getRepository(job.data.repoId);
try {
await repo.removeCache();
} catch (error) {
throw error;
}
} catch (error) {
console.error(error);
} finally {
console.log(`[QUEUE] Cache of ${job.data.repoId} is removed.`);
}
}

View File

@@ -1,21 +1,34 @@
import { SandboxedJob } from "bullmq";
import Repository from "../Repository";
import { getRepository as getRepositoryImport } from "../database/database";
import { RepositoryStatus } from "../types";
export default async function (job: SandboxedJob<Repository, void>) {
const { connect, getRepository } = require("../database/database");
const {
connect,
getRepository,
}: {
connect: () => Promise<void>;
getRepository: typeof getRepositoryImport;
} = require("../database/database");
try {
await connect();
console.log(`${job.data.repoId} is going to be removed`);
console.log(`[QUEUE] ${job.data.repoId} is going to be removed`);
const repo = await getRepository(job.data.repoId);
await repo.updateStatus(RepositoryStatus.REMOVING, "");
try {
await repo.remove();
} catch (error) {
await repo.updateStatus("error", error.message);
if (error instanceof Error) {
await repo.updateStatus(RepositoryStatus.ERROR, error.message);
} else if (typeof error === "string") {
await repo.updateStatus(RepositoryStatus.ERROR, error);
}
throw error;
}
} catch (error) {
console.error(error);
} finally {
console.log(`${job.data.repoId} is removed`);
console.log(`[QUEUE] ${job.data.repoId} is removed`);
}
}

View File

@@ -3,11 +3,23 @@ import config from "../config";
import Repository from "./Repository";
import * as path from "path";
export let cacheQueue: Queue<Repository>;
export let removeQueue: Queue<Repository>;
export let downloadQueue: Queue<Repository>;
// avoid to load the queue outside the main server
export function startWorker() {
const connection = {
host: config.REDIS_HOSTNAME,
port: config.REDIS_PORT,
};
cacheQueue = new Queue<Repository>("cache removal", {
connection,
defaultJobOptions: {
removeOnComplete: true,
},
});
removeQueue = new Queue<Repository>("repository removal", {
connection: {
host: config.REDIS_HOSTNAME,
@@ -18,31 +30,32 @@ export function startWorker() {
},
});
downloadQueue = new Queue<Repository>("repository download", {
connection: {
host: config.REDIS_HOSTNAME,
port: config.REDIS_PORT,
},
connection,
defaultJobOptions: {
removeOnComplete: true,
},
});
const cacheWorker = new Worker<Repository>(
cacheQueue.name,
path.resolve("build/src/processes/removeCache.js"),
{
concurrency: 5,
connection,
autorun: true,
}
);
cacheWorker.on("completed", async (job) => {
await job.remove();
});
const removeWorker = new Worker<Repository>(
removeQueue.name,
path.resolve("build/src/processes/removeRepository.js"),
//removeRepository,
{
concurrency: 5,
connection: {
host: config.REDIS_HOSTNAME,
port: config.REDIS_PORT,
},
connection,
autorun: true,
}
);
removeWorker.on("error", async (error) => {
console.log(error);
});
removeWorker.on("completed", async (job) => {
await job.remove();
});
@@ -50,31 +63,21 @@ export function startWorker() {
const downloadWorker = new Worker<Repository>(
downloadQueue.name,
path.resolve("build/src/processes/downloadRepository.js"),
// downloadRepository,
{
concurrency: 3,
connection: {
host: config.REDIS_HOSTNAME,
port: config.REDIS_PORT,
},
autorun: true
connection,
autorun: true,
}
);
if (!downloadWorker.isRunning) downloadWorker.run();
downloadWorker.on("active", async (job) => {
console.log("active", job.data.repoId);
console.log("[QUEUE] download repository start", job.data.repoId);
});
downloadWorker.on("completed", async (job) => {
console.log("completed", job.data.repoId);
console.log("[QUEUE] download repository completed", job.data.repoId);
});
downloadWorker.on("failed", async (job) => {
console.log("failed", job.data.repoId);
});
downloadWorker.on("closing", async (error) => {
console.log("closing", error);
});
downloadWorker.on("error", async (error) => {
console.log(error);
console.log("download repository failed", job.data.repoId);
});
}

View File

@@ -4,11 +4,12 @@ import AnonymousError from "../AnonymousError";
import AnonymizedRepositoryModel from "../database/anonymizedRepositories/anonymizedRepositories.model";
import ConferenceModel from "../database/conference/conferences.model";
import UserModel from "../database/users/users.model";
import { downloadQueue, removeQueue } from "../queue";
import { cacheQueue, downloadQueue, removeQueue } from "../queue";
import Repository from "../Repository";
import User from "../User";
import { ensureAuthenticated } from "./connection";
import { handleError, getUser, isOwnerOrAdmin } from "./route-utils";
import { handleError, getUser, isOwnerOrAdmin, getRepo } from "./route-utils";
import RepositoryModel from "../database/repositories/repositories.model";
const router = express.Router();
@@ -91,10 +92,12 @@ router.get("/queues", async (req, res) => {
"failed",
"delayed",
]),
cacheQueue.getJobs(["waiting", "active", "completed", "failed", "delayed"]),
]);
res.json({
downloadQueue: out[0],
removeQueue: out[1],
cacheQueue: out[2],
});
});
@@ -104,7 +107,7 @@ router.get("/repos", async (req, res) => {
const ready = req.query.ready == "true";
const error = req.query.error == "true";
const preparing = req.query.preparing == "true";
const remove = req.query.remove == "true";
const remove = req.query.removed == "true";
const expired = req.query.expired == "true";
let sort: any = { _id: 1 };
@@ -116,7 +119,7 @@ router.get("/repos", async (req, res) => {
if (req.query.search) {
query.push({ repoId: { $regex: req.query.search } });
}
let status = [];
const status: { status: string }[] = [];
query.push({ $or: status });
if (ready) {
status.push({ status: "ready" });
@@ -137,20 +140,43 @@ router.get("/repos", async (req, res) => {
status.push({ status: "download" });
}
const skipIndex = (page - 1) * limit;
const [total, results] = await Promise.all([
AnonymizedRepositoryModel.find(
{
$and: query,
},
{ originalFiles: 0 }
).countDocuments(),
AnonymizedRepositoryModel.find({ $and: query }, { originalFiles: 0 })
.skip(skipIndex)
.sort(sort)
.limit(limit)
.exec(),
]);
res.json({
query: { $and: query },
page,
total: await AnonymizedRepositoryModel.find({
$and: query,
}).countDocuments(),
total,
sort,
results: await AnonymizedRepositoryModel.find({ $and: query })
.sort(sort)
.limit(limit)
.skip(skipIndex),
results,
});
});
// delete a repository
router.delete(
"/repos/:repoId/",
async (req: express.Request, res: express.Response) => {
const repo = await getRepo(req, res, { nocheck: true });
if (!repo) return;
try {
await cacheQueue.add(repo.repoId, repo, { jobId: repo.repoId });
return res.json({ status: repo.status });
} catch (error) {
handleError(error, res, req);
}
}
);
router.get("/users", async (req, res) => {
const page = parseInt(req.query.page as string) || 1;
const limit = parseInt(req.query.limit as string) || 10;
@@ -181,7 +207,14 @@ router.get(
"/users/:username",
async (req: express.Request, res: express.Response) => {
try {
const model = await UserModel.findOne({ username: req.params.username });
const model = await UserModel.findOne({
username: req.params.username,
}).populate({
path: "repositories",
model: "Repository",
foreignField: "_id",
localField: "repositories",
});
if (!model) {
req.logout((error) => console.error(error));
throw new AnonymousError("user_not_found", {

View File

@@ -4,6 +4,7 @@ import Conference from "../Conference";
import ConferenceModel from "../database/conference/conferences.model";
import { ensureAuthenticated } from "./connection";
import { handleError, getUser, isOwnerOrAdmin } from "./route-utils";
import { IConferenceDocument } from "../database/conference/conferences.types";
const router = express.Router();
@@ -66,7 +67,7 @@ router.get("/", async (req: express.Request, res: express.Response) => {
}
});
function validateConferenceForm(conf) {
function validateConferenceForm(conf: any) {
if (!conf.name)
throw new AnonymousError("conf_name_missing", {
object: conf,
@@ -148,11 +149,17 @@ router.post(
async (req: express.Request, res: express.Response) => {
try {
const user = await getUser(req);
let model = new ConferenceModel();
let model: IConferenceDocument = new ConferenceModel();
if (req.params.conferenceID) {
model = await ConferenceModel.findOne({
const queryModel = await ConferenceModel.findOne({
conferenceID: req.params.conferenceID,
});
if (!queryModel) {
throw new AnonymousError("conference_not_found", {
httpStatus: 404,
});
}
model = queryModel;
isOwnerOrAdmin(model.owners, user);
}
validateConferenceForm(req.body);
@@ -197,7 +204,10 @@ router.post(
res.send("ok");
} catch (error) {
if (error.message?.indexOf(" duplicate key") > -1) {
if (
error instanceof Error &&
error.message?.indexOf(" duplicate key") > -1
) {
return handleError(
new AnonymousError("conf_id_used", {
object: req.params.conferenceID,
@@ -219,16 +229,18 @@ router.get(
conferenceID: req.params.conferenceID,
});
if (!data)
throw new AnonymousError("conf_not_found", {
object: req.params.conferenceID,
httpStatus: 404,
});
throw new AnonymousError("conf_not_found", {
object: req.params.conferenceID,
httpStatus: 404,
});
const user = await getUser(req);
const conference = new Conference(data);
try {
isOwnerOrAdmin(conference.ownerIDs, user);
const o: any = conference.toJSON();
o.repositories = (await conference.repositories()).map((r) => r.toJSON());
o.repositories = (await conference.repositories()).map((r) =>
r.toJSON()
);
res.json(o);
} catch (error) {
return res.json({
@@ -238,7 +250,7 @@ router.get(
startDate: conference.startDate,
endDate: conference.endDate,
options: conference.options,
})
});
}
} catch (error) {
handleError(error, res, req);

View File

@@ -1,7 +1,7 @@
import { createClient } from "redis";
import * as passport from "passport";
import * as session from "express-session";
import * as connectRedis from "connect-redis";
import RedisStore from "connect-redis";
import * as OAuth2Strategy from "passport-oauth2";
import { Profile, Strategy } from "passport-github2";
import * as express from "express";
@@ -9,8 +9,7 @@ import * as express from "express";
import config from "../../config";
import UserModel from "../database/users/users.model";
import { IUserDocument } from "../database/users/users.types";
const RedisStore = connectRedis(session);
import AnonymousError from "../AnonymousError";
export function ensureAuthenticated(
req: express.Request,
@@ -29,7 +28,7 @@ const verify = async (
profile: Profile,
done: OAuth2Strategy.VerifyCallback
): Promise<void> => {
let user: IUserDocument;
let user: IUserDocument | null = null;
try {
user = await UserModel.findOne({ "externalIDs.github": profile.id });
if (user) {
@@ -54,6 +53,11 @@ const verify = async (
await user.save();
} catch (error) {
console.error(error);
throw new AnonymousError("unable_to_connect_user", {
httpStatus: 500,
object: profile,
cause: error as Error,
});
} finally {
done(null, {
username: profile.username,
@@ -84,23 +88,28 @@ passport.deserializeUser((user: Express.User, done) => {
done(null, user);
});
const redisClient = createClient({
legacyMode: true,
socket: {
port: config.REDIS_PORT,
host: config.REDIS_HOSTNAME,
},
});
redisClient.on("error", (err) => console.log("Redis Client Error", err));
redisClient.connect();
export const appSession = session({
secret: "keyboard cat",
store: new RedisStore({
export function initSession() {
const redisClient = createClient({
legacyMode: false,
socket: {
port: config.REDIS_PORT,
host: config.REDIS_HOSTNAME,
},
});
redisClient.on("error", (err) => console.log("Redis Client Error", err));
redisClient.connect();
const redisStore = new RedisStore({
client: redisClient,
}),
saveUninitialized: false,
resave: false,
});
prefix: "anoGH_session:",
});
return session({
secret: config.SESSION_SECRET,
store: redisStore,
saveUninitialized: false,
resave: false,
});
}
export const router = express.Router();

View File

@@ -8,34 +8,47 @@ export const router = express.Router();
router.get(
"/:repoId/file/:path*",
async (req: express.Request, res: express.Response) => {
let anonymizedPath = req.params.path;
if (req.params[0]) {
anonymizedPath += req.params[0];
const anonymizedPath = decodeURI(
new URL(req.url, `${req.protocol}://${req.hostname}`).pathname.replace(
`/${req.params.repoId}/file/`,
""
)
);
if (anonymizedPath.endsWith("/")) {
return handleError(
new AnonymousError("folder_not_supported", {
httpStatus: 404,
object: anonymizedPath,
}),
res
);
}
anonymizedPath = anonymizedPath;
const repo = await getRepo(req, res);
const repo = await getRepo(req, res, {
nocheck: false,
includeFiles: false,
});
if (!repo) return;
try {
await repo.countView();
const f = new AnonymizedFile({
repository: repo,
anonymizedPath,
});
if (!(await f.isFileSupported())) {
if (!f.isFileSupported()) {
throw new AnonymousError("file_not_supported", {
httpStatus: 403,
object: f,
});
}
res.attachment(
anonymizedPath.substring(anonymizedPath.lastIndexOf("/") + 1)
);
if (req.query.download) {
res.attachment(
anonymizedPath.substring(anonymizedPath.lastIndexOf("/") + 1)
);
}
// cache the file for 5min
res.header('Cache-Control', 'max-age=300');
await f.send(res);
res.header("Cache-Control", "max-age=300");
await Promise.all([repo.countView(), f.send(res)]);
} catch (error) {
return handleError(error, res, req);
}

View File

@@ -71,6 +71,7 @@ router.get(
},
})
);
pullRequest.owner = user;
await pullRequest.download();
res.json(pullRequest.toJSON());
} catch (error) {
@@ -96,7 +97,7 @@ router.get(
}
);
function validateNewPullRequest(pullRequestUpdate): void {
function validateNewPullRequest(pullRequestUpdate: any): void {
const validCharacters = /^[0-9a-zA-Z\-\_]+$/;
if (
!pullRequestUpdate.pullRequestId.match(validCharacters) ||
@@ -151,7 +152,7 @@ function updatePullRequestModel(
expirationMode: pullRequestUpdate.options.expirationMode,
expirationDate: pullRequestUpdate.options.expirationDate
? new Date(pullRequestUpdate.options.expirationDate)
: null,
: undefined,
update: pullRequestUpdate.options.update,
image: pullRequestUpdate.options.image,
link: pullRequestUpdate.options.link,
@@ -220,7 +221,10 @@ router.post("/", async (req: express.Request, res: express.Response) => {
await pullRequest.anonymize();
res.send(pullRequest.toJSON());
} catch (error) {
if (error.message?.indexOf(" duplicate key") > -1) {
if (
error instanceof Error &&
error.message.indexOf(" duplicate key") > -1
) {
return handleError(
new AnonymousError("pullRequestId_already_used", {
httpStatus: 400,

View File

@@ -16,6 +16,8 @@ import AnonymousError from "../AnonymousError";
import { downloadQueue, removeQueue } from "../queue";
import RepositoryModel from "../database/repositories/repositories.model";
import User from "../User";
import { RepositoryStatus } from "../types";
import { IUserDocument } from "../database/users/users.types";
const router = express.Router();
@@ -31,8 +33,19 @@ async function getTokenForAdmin(user: User, req: express.Request) {
},
{
"source.accessToken": 1,
owner: 1,
}
).exec();
).populate({
path: "owner",
model: UserModel,
});
const user: IUserDocument = existingRepo?.owner as any;
if (user instanceof UserModel) {
const check = await GitHubBase.checkToken(user.accessTokens.github);
if (check) {
return user.accessTokens.github;
}
}
if (existingRepo) {
return existingRepo.source.accessToken;
}
@@ -68,6 +81,12 @@ router.post("/claim", async (req: express.Request, res: express.Response) => {
}
const r = gh(req.body.repoUrl);
if (!r?.owner || !r?.name) {
throw new AnonymousError("repo_not_found", {
object: req.body,
httpStatus: 404,
});
}
const repo = await getRepositoryFromGitHub({
owner: r.owner,
repo: r.name,
@@ -109,7 +128,10 @@ router.post(
"/:repoId/refresh",
async (req: express.Request, res: express.Response) => {
try {
const repo = await getRepo(req, res, { nocheck: true });
const repo = await getRepo(req, res, {
nocheck: true,
includeFiles: false,
});
if (!repo) return;
if (
@@ -133,7 +155,10 @@ router.post(
router.delete(
"/:repoId/",
async (req: express.Request, res: express.Response) => {
const repo = await getRepo(req, res, { nocheck: true });
const repo = await getRepo(req, res, {
nocheck: true,
includeFiles: false,
});
if (!repo) return;
// if (repo.status == "removing") return res.json({ status: repo.status });
try {
@@ -144,7 +169,7 @@ router.delete(
});
const user = await getUser(req);
isOwnerOrAdmin([repo.owner.id], user);
await repo.updateStatus("removing");
await repo.updateStatus(RepositoryStatus.REMOVING);
await removeQueue.add(repo.repoId, repo, { jobId: repo.repoId });
return res.json({ status: repo.status });
} catch (error) {
@@ -237,7 +262,10 @@ router.get(
// get repository information
router.get("/:repoId/", async (req: express.Request, res: express.Response) => {
try {
const repo = await getRepo(req, res, { nocheck: true });
const repo = await getRepo(req, res, {
nocheck: true,
includeFiles: false,
});
if (!repo) return;
const user = await getUser(req);
@@ -248,7 +276,7 @@ router.get("/:repoId/", async (req: express.Request, res: express.Response) => {
}
});
function validateNewRepo(repoUpdate): void {
function validateNewRepo(repoUpdate: any): void {
const validCharacters = /^[0-9a-zA-Z\-\_]+$/;
if (
!repoUpdate.repoId.match(validCharacters) ||
@@ -311,7 +339,7 @@ function updateRepoModel(
expirationMode: repoUpdate.options.expirationMode,
expirationDate: repoUpdate.options.expirationDate
? new Date(repoUpdate.options.expirationDate)
: null,
: undefined,
update: repoUpdate.options.update,
image: repoUpdate.options.image,
pdf: repoUpdate.options.pdf,
@@ -327,7 +355,10 @@ router.post(
"/:repoId/",
async (req: express.Request, res: express.Response) => {
try {
const repo = await getRepo(req, res, { nocheck: true });
const repo = await getRepo(req, res, {
nocheck: true,
includeFiles: false,
});
if (!repo) return;
const user = await getUser(req);
@@ -344,8 +375,9 @@ router.post(
}
updateRepoModel(repo.model, repoUpdate);
repo.source.type = "GitHubStream";
async function removeRepoFromConference(conferenceID) {
const removeRepoFromConference = async (conferenceID: string) => {
const conf = await ConferenceModel.findOne({
conferenceID,
});
@@ -354,7 +386,7 @@ router.post(
if (r.length == 1) r[0].removeDate = new Date();
await conf.save();
}
}
};
if (!repoUpdate.conference) {
// remove conference
if (repo.model.conference) {
@@ -380,7 +412,7 @@ router.post(
if (f.length) {
// the repository already referenced the conference
f[0].addDate = new Date();
f[0].removeDate = null;
f[0].removeDate = undefined;
} else {
conf.repositories.push({
id: repo.model.id,
@@ -394,7 +426,7 @@ router.post(
}
}
repo.model.conference = repoUpdate.conference;
await repo.updateStatus("preparing");
await repo.updateStatus(RepositoryStatus.PREPARING);
res.json({ status: repo.status });
await downloadQueue.add(repo.repoId, repo, { jobId: repo.repoId });
} catch (error) {
@@ -409,37 +441,70 @@ router.post("/", async (req: express.Request, res: express.Response) => {
const repoUpdate = req.body;
try {
try {
await db.getRepository(repoUpdate.repoId, { includeFiles: false });
throw new AnonymousError("repoId_already_used", {
httpStatus: 400,
object: repoUpdate,
});
} catch (error: any) {
if (error.message == "repo_not_found") {
// the repository does not exist yet
} else {
throw error;
}
}
validateNewRepo(repoUpdate);
const r = gh(repoUpdate.fullName);
if (!r?.owner || !r?.name) {
throw new AnonymousError("repo_not_found", {
object: req.body,
httpStatus: 404,
});
}
const repository = await getRepositoryFromGitHub({
accessToken: user.accessToken,
owner: r.owner,
repo: r.name,
});
if (!repository) {
throw new AnonymousError("repo_not_found", {
object: req.body,
httpStatus: 404,
});
}
const repo = new AnonymizedRepositoryModel();
repo.repoId = repoUpdate.repoId;
repo.anonymizeDate = new Date();
repo.owner = user.id;
updateRepoModel(repo, repoUpdate);
repo.source.type = "GitHubStream";
repo.source.accessToken = user.accessToken;
repo.source.repositoryId = repository.model.id;
repo.source.repositoryName = repoUpdate.fullName;
if (repo.source.type == "GitHubDownload") {
// details.size is in kilobytes
if (repository.size > config.MAX_REPO_SIZE) {
throw new AnonymousError("invalid_mode", {
object: repository,
httpStatus: 400,
});
}
}
if (repository.size < config.AUTO_DOWNLOAD_REPO_SIZE) {
repo.source.type = "GitHubDownload";
}
// if (repo.source.type === "GitHubDownload") {
// // details.size is in kilobytes
// if (
// repository.size === undefined ||
// repository.size > config.MAX_REPO_SIZE
// ) {
// throw new AnonymousError("invalid_mode", {
// object: repository,
// httpStatus: 400,
// });
// }
// }
// if (
// repository.size !== undefined &&
// repository.size < config.AUTO_DOWNLOAD_REPO_SIZE
// ) {
// repo.source.type = "GitHubDownload";
// }
repo.conference = repoUpdate.conference;
await repo.save();
@@ -454,7 +519,7 @@ router.post("/", async (req: express.Request, res: express.Response) => {
new Date() > conf.endDate ||
conf.status !== "ready"
) {
await repo.remove();
await repo.deleteOne();
throw new AnonymousError("conf_not_activated", {
object: conf,
httpStatus: 400,
@@ -474,7 +539,10 @@ router.post("/", async (req: express.Request, res: express.Response) => {
attempts: 3,
});
} catch (error) {
if (error.message?.indexOf(" duplicate key") > -1) {
if (
error instanceof Error &&
error.message?.indexOf(" duplicate key") > -1
) {
return handleError(
new AnonymousError("repoId_already_used", {
httpStatus: 400,

View File

@@ -6,6 +6,7 @@ import config from "../../config";
import { getRepo, handleError } from "./route-utils";
import AnonymousError from "../AnonymousError";
import { downloadQueue } from "../queue";
import { RepositoryStatus } from "../types";
const router = express.Router();
@@ -50,7 +51,7 @@ router.get(
// cache the file for 6 hours
res.header("Cache-Control", "max-age=21600");
await pipeline(repo.zip(), res);
await pipeline(await repo.zip(), res);
} catch (error) {
handleError(error, res, req);
}
@@ -61,7 +62,7 @@ router.get(
"/:repoId/files",
async (req: express.Request, res: express.Response) => {
res.header("Cache-Control", "no-cache");
const repo = await getRepo(req, res);
const repo = await getRepo(req, res, { includeFiles: true });
if (!repo) return;
try {
res.json(await repo.anonymizedFiles({ includeSha: false }));
@@ -76,7 +77,10 @@ router.get(
async (req: express.Request, res: express.Response) => {
try {
res.header("Cache-Control", "no-cache");
const repo = await getRepo(req, res, { nocheck: true });
const repo = await getRepo(req, res, {
nocheck: true,
includeFiles: false,
});
if (!repo) return;
let redirectURL = null;
if (
@@ -105,7 +109,7 @@ router.get(
repo.model.statusDate < fiveMinuteAgo
// && repo.status != "preparing"
) {
await repo.updateStatus("preparing");
await repo.updateStatus(RepositoryStatus.PREPARING);
await downloadQueue.add(repo.repoId, repo, {
jobId: repo.repoId,
attempts: 3,
@@ -149,7 +153,9 @@ router.get(
res.json({
url: redirectURL,
download,
lastUpdateDate: repo.model.statusDate,
lastUpdateDate: repo.model.source.commitDate
? repo.model.source.commitDate
: repo.model.anonymizeDate,
});
} catch (error) {
handleError(error, res, req);

View File

@@ -37,11 +37,16 @@ export async function getPullRequest(
export async function getRepo(
req: express.Request,
res: express.Response,
opt?: { nocheck?: boolean }
opt: { nocheck?: boolean; includeFiles?: boolean } = {
nocheck: false,
includeFiles: false,
}
) {
try {
const repo = await db.getRepository(req.params.repoId);
if (opt?.nocheck == true) {
const repo = await db.getRepository(req.params.repoId, {
includeFiles: opt.includeFiles === true,
});
if (opt.nocheck == true) {
} else {
// redirect if the repository is expired
if (
@@ -74,7 +79,7 @@ function printError(error: any, req?: express.Request) {
io.notifyError(error, error.value);
if (error instanceof AnonymousError) {
let message = `[ERROR] ${error.toString()} ${error.stack
.split("\n")[1]
?.split("\n")[1]
.trim()}`;
if (req) {
message += ` ${req.originalUrl}`;
@@ -102,8 +107,10 @@ export function handleError(
let status = 500;
if (error.httpStatus) {
status = error.httpStatus;
} else if (error.$metadata?.httpStatusCode) {
status = error.$metadata.httpStatusCode;
} else if (message && message.indexOf("not_found") > -1) {
status = 400;
status = 404;
} else if (message && message.indexOf("not_connected") > -1) {
status = 401;
}
@@ -114,19 +121,26 @@ export function handleError(
}
export async function getUser(req: express.Request) {
const user = (req.user as any).user;
if (!user) {
req.logout((error) => console.error(error));
function notConnected(): never {
req.logout((error) => {
if (error) {
console.error(`[ERROR] Error while logging out: ${error}`);
}
});
throw new AnonymousError("not_connected", {
httpStatus: 401,
});
}
if (!req.user) {
notConnected();
}
const user = (req.user as any).user;
if (!user) {
notConnected();
}
const model = await UserModel.findById(user._id);
if (!model) {
req.logout((error) => console.error(error));
throw new AnonymousError("not_connected", {
httpStatus: 401,
});
notConnected();
}
return new User(model);
}

View File

@@ -1,7 +1,9 @@
import * as express from "express";
import config from "../../config";
import { ensureAuthenticated } from "./connection";
import { handleError, getUser } from "./route-utils";
import { handleError, getUser, isOwnerOrAdmin } from "./route-utils";
import UserModel from "../database/users/users.model";
import User from "../User";
const router = express.Router();
@@ -10,7 +12,11 @@ router.use(ensureAuthenticated);
router.get("/logout", async (req: express.Request, res: express.Response) => {
try {
req.logout((error) => console.error(error));
req.logout((error) => {
if (error) {
console.error(`[ERROR] Logout error: ${error}`);
}
});
res.redirect("/");
} catch (error) {
handleError(error, res, req);
@@ -113,22 +119,40 @@ router.get(
}
);
async function getAllRepositories(user: User, force: boolean) {
const repos = await user.getGitHubRepositories({
force,
});
return repos.map((x) => {
return {
fullName: x.fullName,
id: x.id,
};
});
}
router.get(
"/all_repositories",
async (req: express.Request, res: express.Response) => {
try {
const user = await getUser(req);
const repos = await user.getGitHubRepositories({
force: req.query.force == "1",
});
res.json(
repos.map((x) => {
return {
fullName: x.fullName,
id: x.id,
};
})
);
res.json(await getAllRepositories(user, req.query.force == "1"));
} catch (error) {
handleError(error, res, req);
}
}
);
router.get(
"/:username/all_repositories",
async (req: express.Request, res: express.Response) => {
try {
const loggedUser = await getUser(req);
isOwnerOrAdmin([req.params.username], loggedUser);
const model = await UserModel.findOne({ username: req.params.username });
if (!model) {
throw new Error("User not found");
}
const user = new User(model);
res.json(await getAllRepositories(user, req.query.force == "1"));
} catch (error) {
handleError(error, res, req);
}

View File

@@ -4,7 +4,7 @@ import * as path from "path";
import AnonymizedFile from "../AnonymizedFile";
import GitHubDownload from "../source/GitHubDownload";
import AnonymousError from "../AnonymousError";
import { TreeElement } from "../types";
import { Tree, TreeElement } from "../types";
import * as marked from "marked";
import { streamToString } from "../anonymize-utils";
@@ -68,18 +68,17 @@ async function webView(req: express.Request, res: express.Response) {
if (fileName == "") {
continue;
}
if (!currentAnonymized[fileName]) {
if (!(currentAnonymized as Tree)[fileName]) {
throw new AnonymousError("file_not_found", {
object: this,
object: repo,
httpStatus: 404,
});
}
currentAnonymized = currentAnonymized[fileName];
currentAnonymized = (currentAnonymized as Tree)[fileName];
}
let best_match = null;
indexSelector:
for (const p of indexPriority) {
indexSelector: for (const p of indexPriority) {
for (let filename in currentAnonymized) {
if (filename.toLowerCase() == p) {
best_match = filename;
@@ -96,15 +95,17 @@ async function webView(req: express.Request, res: express.Response) {
}
}
if (!(await f.isFileSupported())) {
if (!f.isFileSupported()) {
throw new AnonymousError("file_not_supported", {
httpStatus: 400,
object: f,
});
}
if ((await f.extension()) == "md") {
if (f.extension() == "md") {
const content = await streamToString(await f.anonymizedContent());
res.contentType("html").send(marked.marked(content));
res
.contentType("html")
.send(marked.marked(content, { headerIds: false, mangle: false }));
} else {
f.send(res);
}

View File

@@ -27,7 +27,10 @@ export function repositoryStatusCheck() {
const job = schedule.scheduleJob("0 */6 * * *", async () => {
console.log("[schedule] Check repository status and unused repositories");
(
await AnonymizedRepositoryModel.find({ status: { $eq: "ready" } })
await AnonymizedRepositoryModel.find({
status: { $eq: "ready" },
isReseted: { $eq: false },
})
).forEach((data) => {
const repo = new Repository(data);
try {
@@ -35,13 +38,13 @@ export function repositoryStatusCheck() {
} catch (error) {
console.log(`Repository ${repo.repoId} is expired`);
}
const sixMonthAgo = new Date();
sixMonthAgo.setMonth(sixMonthAgo.getMonth() - 6);
const fourMonthAgo = new Date();
fourMonthAgo.setMonth(fourMonthAgo.getMonth() - 4);
if (repo.model.lastView < sixMonthAgo) {
if (repo.model.lastView < fourMonthAgo) {
repo.removeCache().then(() => {
console.log(
`Repository ${repo.repoId} not visited for 6 months remove the cached files`
`Repository ${repo.repoId} not visited for 4 months remove the cached files`
);
});
}

View File

@@ -1,21 +1,22 @@
import * as path from "path";
import * as ofs from "fs";
import { createClient } from "redis";
import { resolve, join } from "path";
import { existsSync } from "fs";
import rateLimit from "express-rate-limit";
import * as slowDown from "express-slow-down";
import RedisStore from "rate-limit-redis";
import * as express from "express";
import * as compression from "compression";
import * as db from "./database/database";
import config from "../config";
import * as passport from "passport";
import * as connection from "./routes/connection";
import config from "../config";
import { connect } from "./database/database";
import { initSession, router as connectionRouter } from "./routes/connection";
import router from "./routes";
import AnonymizedRepositoryModel from "./database/anonymizedRepositories/anonymizedRepositories.model";
import { conferenceStatusCheck, repositoryStatusCheck } from "./schedule";
import { startWorker } from "./queue";
import AnonymizedPullRequestModel from "./database/anonymizedPullRequests/anonymizedPullRequests.model";
import { getUser } from "./routes/route-utils";
function indexResponse(req: express.Request, res: express.Response) {
if (
@@ -31,7 +32,7 @@ function indexResponse(req: express.Request, res: express.Response) {
)}`
);
}
res.sendFile(path.resolve(__dirname, "..", "public", "index.html"));
res.sendFile(resolve("public", "index.html"));
}
export default async function start() {
@@ -39,13 +40,13 @@ export default async function start() {
app.use(express.json());
app.use(compression());
app.set("trust proxy", true);
app.set("trust proxy", config.TRUST_PROXY);
app.set("etag", "strong");
app.get("/ip", (request, response) => response.send(request.ip));
// handle session and connection
app.use(connection.appSession);
app.use(initSession());
app.use(passport.initialize());
app.use(passport.session());
@@ -66,9 +67,22 @@ export default async function start() {
sendCommand: (...args: string[]) => redisClient.sendCommand(args),
}),
windowMs: 15 * 60 * 1000, // 15 minutes
max: config.RATE_LIMIT, // limit each IP
max: async (request: express.Request, response: express.Response) => {
try {
const user = await getUser(request);
if (user && user.isAdmin) return 0;
if (user) return config.RATE_LIMIT;
} catch (_) {
// ignore: user not connected
}
// if not logged in, limit to half the rate
return config.RATE_LIMIT / 2;
},
standardHeaders: true,
legacyHeaders: false,
message: (request: express.Request, response: express.Response) => {
return `You can only make ${config.RATE_LIMIT} requests every 15min. Please try again later.`;
},
});
const speedLimiter = slowDown({
windowMs: 15 * 60 * 1000, // 15 minutes
@@ -77,8 +91,15 @@ export default async function start() {
maxDelayMs: 5000,
headers: true,
});
const webViewSpeedLimiter = slowDown({
windowMs: 15 * 60 * 1000, // 15 minutes
delayAfter: 200,
delayMs: 150,
maxDelayMs: 5000,
headers: true,
});
app.use("/github", rate, speedLimiter, connection.router);
app.use("/github", rate, speedLimiter, connectionRouter);
// api routes
const apiRouter = express.Router();
@@ -95,8 +116,8 @@ export default async function start() {
apiRouter.use("/pr", speedLimiter, router.pullRequestPrivate);
apiRouter.get("/message", async (_, res) => {
if (ofs.existsSync("./message.txt")) {
return res.sendFile(path.resolve(__dirname, "..", "message.txt"));
if (existsSync("./message.txt")) {
return res.sendFile(resolve("message.txt"));
}
res.sendStatus(404);
});
@@ -119,13 +140,13 @@ export default async function start() {
res.json({
nbRepositories,
nbUsers: users.length,
nbPageViews: nbPageViews[0].total,
nbPageViews: nbPageViews[0]?.total || 0,
nbPullRequests,
});
});
// web view
app.use("/w/", rate, speedLimiter, router.webview);
app.use("/w/", rate, webViewSpeedLimiter, router.webview);
app
.get("/", indexResponse)
@@ -135,7 +156,7 @@ export default async function start() {
.get("/repository/:repoId/?*", indexResponse);
app.use(
express.static(path.join(__dirname, "..", "public"), {
express.static(join("public"), {
etag: true,
lastModified: true,
maxAge: 3600, // 1h
@@ -148,7 +169,7 @@ export default async function start() {
conferenceStatusCheck();
repositoryStatusCheck();
await db.connect();
await connect();
app.listen(config.PORT);
console.log("Database connected and Server started on port: " + config.PORT);
}

View File

@@ -6,12 +6,13 @@ import Repository from "../Repository";
import { Readable } from "stream";
import UserModel from "../database/users/users.model";
import AnonymousError from "../AnonymousError";
import { Octokit } from "@octokit/rest";
export default abstract class GitHubBase {
type: "GitHubDownload" | "GitHubStream" | "Zip";
githubRepository: GitHubRepository;
branch: Branch;
accessToken: string;
accessToken: string | undefined;
repository: Repository;
constructor(
@@ -27,13 +28,17 @@ export default abstract class GitHubBase {
) {
this.type = data.type;
this.accessToken = data.accessToken;
const branches = [];
if (data.branch && data.commit) {
branches.push({ commit: data.commit, name: data.branch });
}
this.githubRepository = new GitHubRepository({
name: data.repositoryName,
externalId: data.repositoryId,
branches: [{ commit: data.commit, name: data.branch }],
branches,
});
this.repository = repository;
this.branch = { commit: data.commit, name: data.branch };
this.branch = branches[0];
}
async getFileContent(file: AnonymizedFile): Promise<Readable> {
@@ -50,28 +55,32 @@ export default abstract class GitHubBase {
});
}
static async checkToken(token: string) {
const octokit = new Octokit({ auth: token });
try {
await octokit.users.getAuthenticated();
return true;
} catch (error) {
return false;
}
}
async getToken() {
const user = await UserModel.findById(this.repository.owner.id);
if (user && user.accessTokens.github) {
return user.accessTokens.github as string;
}
if (this.accessToken) {
try {
// const app = new OAuthApp({
// clientType: "github-app",
// clientId: config.CLIENT_ID,
// clientSecret: config.CLIENT_SECRET,
// });
// await app.checkToken({
// token: this.accessToken,
// });
const check = await GitHubBase.checkToken(user.accessTokens.github);
if (check) {
this.accessToken = user.accessTokens.github;
return this.accessToken;
} catch (error) {
console.debug("[ERROR] Token is invalid", this.repository.repoId);
this.accessToken = config.GITHUB_TOKEN;
}
}
return config.GITHUB_TOKEN;
if (this.accessToken) {
if (await GitHubBase.checkToken(this.accessToken)) {
return this.accessToken;
}
}
this.accessToken = config.GITHUB_TOKEN;
return this.accessToken;
}
get url() {
@@ -82,8 +91,8 @@ export default abstract class GitHubBase {
return {
type: this.type,
fullName: this.githubRepository.fullName?.toString(),
branch: this.branch.name,
commit: this.branch.commit,
branch: this.branch?.name,
commit: this.branch?.commit,
};
}
}

View File

@@ -1,15 +1,16 @@
import { Octokit } from "@octokit/rest";
import config from "../../config";
import storage from "../storage";
import Repository from "../Repository";
import GitHubBase from "./GitHubBase";
import AnonymizedFile from "../AnonymizedFile";
import { SourceBase } from "../types";
import got from "got";
import { Readable } from "stream";
import { OctokitResponse } from "@octokit/types";
import config from "../../config";
import storage from "../storage";
import Repository from "../Repository";
import GitHubBase from "./GitHubBase";
import AnonymizedFile from "../AnonymizedFile";
import { FILE_TYPE, RepositoryStatus, SourceBase } from "../types";
import AnonymousError from "../AnonymousError";
import { tryCatch } from "bullmq";
export default class GitHubDownload extends GitHubBase implements SourceBase {
constructor(
@@ -56,42 +57,48 @@ export default class GitHubDownload extends GitHubBase implements SourceBase {
}
response = await this._getZipUrl(token);
} catch (error) {
if (error.status == 401 && config.GITHUB_TOKEN) {
if ((error as any).status == 401 && config.GITHUB_TOKEN) {
try {
response = await this._getZipUrl(config.GITHUB_TOKEN);
} catch (error) {
await this.repository.resetSate("error", "repo_not_accessible");
await this.repository.resetSate(
RepositoryStatus.ERROR,
"repo_not_accessible"
);
throw new AnonymousError("repo_not_accessible", {
httpStatus: 404,
cause: error,
cause: error as Error,
object: this.repository,
});
}
} else {
await this.repository.resetSate("error", "repo_not_accessible");
await this.repository.resetSate(
RepositoryStatus.ERROR,
"repo_not_accessible"
);
throw new AnonymousError("repo_not_accessible", {
httpStatus: 404,
object: this.repository,
cause: error,
cause: error as Error,
});
}
}
await this.repository.updateStatus("download");
await this.repository.updateStatus(RepositoryStatus.DOWNLOAD);
const originalPath = this.repository.originalCachePath;
await storage.mk(originalPath);
let progress = null;
let progress: { transferred: number } | undefined = undefined;
let progressTimeout;
let inDownload = true;
const that = this;
async function updateProgress() {
if (progress) {
await that.repository.updateStatus(
that.repository.status,
progress.transferred
);
}
if (inDownload) {
if (progress && that.repository.status == RepositoryStatus.DOWNLOAD) {
await that.repository.updateStatus(
that.repository.status,
progress.transferred.toString()
);
}
progressTimeout = setTimeout(updateProgress, 1500);
}
}
@@ -99,13 +106,18 @@ export default class GitHubDownload extends GitHubBase implements SourceBase {
try {
const downloadStream = got.stream(response.url);
downloadStream.addListener("downloadProgress", (p) => (progress = p));
await storage.extractZip(originalPath, downloadStream, null, this);
downloadStream.addListener("downloadProgress", async (p) => {
progress = p;
});
await storage.extractZip(originalPath, downloadStream, undefined, this);
} catch (error) {
await this.repository.updateStatus("error", "unable_to_download");
await this.repository.updateStatus(
RepositoryStatus.ERROR,
"unable_to_download"
);
throw new AnonymousError("unable_to_download", {
httpStatus: 500,
cause: error,
cause: error as Error,
object: this.repository,
});
} finally {
@@ -113,22 +125,35 @@ export default class GitHubDownload extends GitHubBase implements SourceBase {
clearTimeout(progressTimeout);
}
await this.repository.updateStatus("ready");
this.repository.model.isReseted = false;
try {
await this.repository.updateStatus(RepositoryStatus.READY);
} catch (error) {
console.error(error);
}
}
async getFileContent(file: AnonymizedFile): Promise<Readable> {
if (await storage.exists(file.originalCachePath)) {
const exists = await storage.exists(file.originalCachePath);
if (exists === FILE_TYPE.FILE) {
return storage.read(file.originalCachePath);
} else if (exists === FILE_TYPE.FOLDER) {
throw new AnonymousError("folder_not_supported", {
httpStatus: 400,
object: file,
});
}
// will throw an error if the file is not in the repository
await file.originalPath();
// the cache is not ready, we need to download the repository
await this.download();
// update the file list
await this.repository.files({ force: true });
return storage.read(file.originalCachePath);
}
async getFiles() {
const folder = this.repository.originalCachePath;
if (!(await storage.exists(folder))) {
if ((await storage.exists(folder)) === FILE_TYPE.NOT_FOUND) {
await this.download();
}
return storage.listFiles(folder);

View File

@@ -1,14 +1,15 @@
import { Branch } from "../types";
import * as gh from "parse-github-url";
import { IRepositoryDocument } from "../database/repositories/repositories.types";
import { Octokit } from "@octokit/rest";
import { Octokit, RestEndpointMethodTypes } from "@octokit/rest";
import RepositoryModel from "../database/repositories/repositories.model";
import AnonymousError from "../AnonymousError";
import { isConnected } from "../database/database";
export class GitHubRepository {
private _data: Partial<
{ [P in keyof IRepositoryDocument]: IRepositoryDocument[P] }
>;
private _data: Partial<{
[P in keyof IRepositoryDocument]: IRepositoryDocument[P];
}>;
constructor(
data: Partial<{ [P in keyof IRepositoryDocument]: IRepositoryDocument[P] }>
) {
@@ -31,18 +32,33 @@ export class GitHubRepository {
return this._data;
}
public get fullName(): string {
public get fullName(): string | undefined {
return this._data.name;
}
public get id(): string {
public get id(): string | undefined {
return this._data.externalId;
}
public get size(): number {
public get size(): number | undefined {
return this._data.size;
}
async getCommitInfo(
sha: string,
opt: {
accessToken?: string;
}
) {
const octokit = new Octokit({ auth: opt.accessToken });
const commit = await octokit.repos.getCommit({
owner: this.owner,
repo: this.repo,
ref: sha,
});
return commit.data;
}
async branches(opt: {
accessToken?: string;
force?: boolean;
@@ -54,49 +70,61 @@ export class GitHubRepository {
) {
// get the list of repo from github
const octokit = new Octokit({ auth: opt.accessToken });
const branches = (
await octokit.paginate(octokit.repos.listBranches, {
owner: this.owner,
repo: this.repo,
per_page: 100,
})
).map((b) => {
return {
name: b.name,
commit: b.commit.sha,
readme: this._data.branches?.filter(
(f: Branch) => f.name == b.name
)[0]?.readme,
} as Branch;
});
this._data.branches = branches;
await RepositoryModel.updateOne(
{ externalId: this.id },
{ $set: { branches } }
try {
const branches = (
await octokit.paginate("GET /repos/{owner}/{repo}/branches", {
owner: this.owner,
repo: this.repo,
per_page: 100,
})
).map((b) => {
return {
name: b.name,
commit: b.commit.sha,
readme: this._data.branches?.filter(
(f: Branch) => f.name == b.name
)[0]?.readme,
} as Branch;
});
this._data.branches = branches;
if (isConnected) {
await RepositoryModel.updateOne(
{ externalId: this.id },
{ $set: { branches } }
);
}
} catch (error) {
throw new AnonymousError("repo_not_found", {
httpStatus: (error as any).status,
cause: error as Error,
object: this,
});
}
} else if (isConnected) {
const q = await RepositoryModel.findOne({ externalId: this.id }).select(
"branches"
);
} else {
this._data.branches = (
await RepositoryModel.findOne({ externalId: this.id }).select(
"branches"
)
).branches;
this._data.branches = q?.branches;
}
return this._data.branches;
return this._data.branches || [];
}
async readme(opt: {
branch?: string;
force?: boolean;
accessToken?: string;
}): Promise<string> {
}): Promise<string | undefined> {
if (!opt.branch) opt.branch = this._data.defaultBranch || "master";
const model = await RepositoryModel.findOne({
externalId: this.id,
}).select("branches");
if (!model) {
throw new AnonymousError("repo_not_found", { httpStatus: 404 });
}
this._data.branches = await this.branches(opt);
model.branches = this._data.branches;
@@ -119,7 +147,7 @@ export class GitHubRepository {
} catch (error) {
throw new AnonymousError("readme_not_available", {
httpStatus: 404,
cause: error,
cause: error as Error,
object: this,
});
}
@@ -136,6 +164,12 @@ export class GitHubRepository {
}
public get owner(): string {
if (!this.fullName) {
throw new AnonymousError("invalid_repo", {
httpStatus: 400,
object: this,
});
}
const repo = gh(this.fullName);
if (!repo) {
throw new AnonymousError("invalid_repo", {
@@ -147,6 +181,12 @@ export class GitHubRepository {
}
public get repo(): string {
if (!this.fullName) {
throw new AnonymousError("invalid_repo", {
httpStatus: 400,
object: this,
});
}
const repo = gh(this.fullName);
if (!repo) {
throw new AnonymousError("invalid_repo", {
@@ -167,7 +207,7 @@ export async function getRepositoryFromGitHub(opt: {
opt.repo = opt.repo.replace(".git", "");
}
const octokit = new Octokit({ auth: opt.accessToken });
let r;
let r: RestEndpointMethodTypes["repos"]["get"]["response"]["data"];
try {
r = (
await octokit.repos.get({
@@ -177,12 +217,12 @@ export async function getRepositoryFromGitHub(opt: {
).data;
} catch (error) {
throw new AnonymousError("repo_not_found", {
httpStatus: error.status,
httpStatus: (error as any).status,
object: {
owner: opt.owner,
repo: opt.repo,
},
cause: error,
cause: error as Error,
});
}
if (!r)
@@ -193,9 +233,12 @@ export async function getRepositoryFromGitHub(opt: {
repo: opt.repo,
},
});
let model = await RepositoryModel.findOne({ externalId: "gh_" + r.id });
if (!model) {
model = new RepositoryModel({ externalId: "gh_" + r.id });
let model = new RepositoryModel({ externalId: "gh_" + r.id });
if (isConnected) {
const dbModel = await RepositoryModel.findOne({ externalId: "gh_" + r.id });
if (dbModel) {
model = dbModel;
}
}
model.name = r.full_name;
model.url = r.html_url;
@@ -209,6 +252,8 @@ export async function getRepositoryFromGitHub(opt: {
});
model.pageSource = ghPageRes.data.source;
}
await model.save();
if (isConnected) {
await model.save();
}
return new GitHubRepository(model);
}

View File

@@ -3,7 +3,7 @@ import AnonymizedFile from "../AnonymizedFile";
import Repository from "../Repository";
import GitHubBase from "./GitHubBase";
import storage from "../storage";
import { SourceBase, Tree } from "../types";
import { RepositoryStatus, SourceBase, Tree } from "../types";
import * as path from "path";
import * as stream from "stream";
@@ -26,20 +26,22 @@ export default class GitHubStream extends GitHubBase implements SourceBase {
}
async getFileContent(file: AnonymizedFile): Promise<stream.Readable> {
if (!file.sha)
throw new AnonymousError("file_sha_not_provided", {
httpStatus: 400,
object: file,
});
const octokit = new Octokit({
auth: await this.getToken(),
});
const file_sha = await file.sha();
if (!file_sha) {
throw new AnonymousError("file_not_accessible", {
httpStatus: 404,
object: file,
});
}
try {
const ghRes = await octokit.rest.git.getBlob({
owner: this.githubRepository.owner,
repo: this.githubRepository.repo,
file_sha: file.sha,
file_sha,
});
if (!ghRes.data.content && ghRes.data.size != 0) {
throw new AnonymousError("file_not_accessible", {
@@ -57,28 +59,34 @@ export default class GitHubStream extends GitHubBase implements SourceBase {
} else {
content = Buffer.from("");
}
if (this.repository.status != "ready")
await this.repository.updateStatus("ready");
await storage.write(file.originalCachePath, content, file, this);
this.repository.model.isReseted = false;
await this.repository.model.save();
if (this.repository.status !== RepositoryStatus.READY)
await this.repository.updateStatus(RepositoryStatus.READY);
return stream.Readable.from(content);
} catch (error) {
if (error.status == 404) {
if ((error as any).status === 404 || (error as any).httpStatus === 404) {
throw new AnonymousError("file_not_found", {
httpStatus: error.status,
cause: error,
httpStatus: (error as any).status || (error as any).httpStatus,
cause: error as Error,
object: file,
});
}
throw new AnonymousError("file_too_big", {
httpStatus: error.status,
cause: error,
httpStatus: (error as any).status || (error as any).httpStatus,
cause: error as Error,
object: file,
});
}
}
async getFiles() {
return this.getTree(this.branch.commit);
let commit = this.branch?.commit;
if (!commit && this.repository.model.source.commit) {
commit = this.repository.model.source.commit;
}
return this.getTree(commit);
}
private async getTree(
@@ -97,20 +105,25 @@ export default class GitHubStream extends GitHubBase implements SourceBase {
count.request++;
ghRes = await this.getGHTree(sha, { recursive: true });
} catch (error) {
if (error.status == 409) {
if ((error as any).status == 409) {
// empty tree
if (this.repository.status != "ready")
await this.repository.updateStatus("ready");
if (this.repository.status != RepositoryStatus.READY)
await this.repository.updateStatus(RepositoryStatus.READY);
// cannot be empty otherwise it would try to download it again
return { __: {} };
} else {
console.log(
`[ERROR] getTree ${this.repository.repoId}@${sha}: ${error.message}`
`[ERROR] getTree ${this.repository.repoId}@${sha}: ${
(error as Error).message
}`
);
await this.repository.resetSate(
RepositoryStatus.ERROR,
"repo_not_accessible"
);
await this.repository.resetSate("error", "repo_not_accessible");
throw new AnonymousError("repo_not_accessible", {
httpStatus: error.status,
cause: error,
httpStatus: (error as any).status,
cause: error as Error,
object: {
owner: this.githubRepository.owner,
repo: this.githubRepository.repo,
@@ -124,8 +137,8 @@ export default class GitHubStream extends GitHubBase implements SourceBase {
if (ghRes.truncated) {
await this.getTruncatedTree(sha, tree, parentPath, count);
}
if (this.repository.status != "ready")
await this.repository.updateStatus("ready");
if (this.repository.status !== RepositoryStatus.READY)
await this.repository.updateStatus(RepositoryStatus.READY);
return tree;
}
@@ -163,8 +176,8 @@ export default class GitHubStream extends GitHubBase implements SourceBase {
if (data.tree.length < 100 && count.request < 200) {
const promises: Promise<any>[] = [];
for (const file of data.tree) {
const elementPath = path.join(parentPath, file.path);
if (file.type == "tree") {
if (file.type == "tree" && file.path && file.sha) {
const elementPath = path.join(parentPath, file.path);
promises.push(
this.getTruncatedTree(
file.sha,

View File

@@ -1,8 +1,7 @@
import config from "../config";
import FileSystem from "./storage/FileSystem";
import S3Storage from "./storage/S3";
import { StorageBase } from "./types";
const storage = config.STORAGE == "s3" ? new S3Storage() : new FileSystem();
export default storage as StorageBase;
export default (() => {
return config.STORAGE == "s3" ? new S3Storage() : new FileSystem();
})();

View File

@@ -1,14 +1,15 @@
import { SourceBase, StorageBase, Tree } from "../types";
import { FILE_TYPE, SourceBase, StorageBase, Tree } from "../types";
import config from "../../config";
import * as fs from "fs";
import { Extract } from "unzip-stream";
import { join, basename, dirname } from "path";
import { Response } from "express";
import { Readable, pipeline } from "stream";
import { Readable, pipeline, Transform } from "stream";
import * as archiver from "archiver";
import { promisify } from "util";
import AnonymizedFile from "../AnonymizedFile";
import { lookup } from "mime-types";
export default class FileSystem implements StorageBase {
type = "FileSystem";
@@ -16,27 +17,46 @@ export default class FileSystem implements StorageBase {
constructor() {}
/** @override */
async exists(p: string): Promise<boolean> {
return fs.existsSync(join(config.FOLDER, p));
async exists(p: string): Promise<FILE_TYPE> {
try {
const stat = await fs.promises.stat(join(config.FOLDER, p));
if (stat.isDirectory()) return FILE_TYPE.FOLDER;
if (stat.isFile()) return FILE_TYPE.FILE;
} catch (_) {
// ignore file not found or not downloaded
}
return FILE_TYPE.NOT_FOUND;
}
/** @override */
send(p: string, res: Response) {
async send(p: string, res: Response) {
res.sendFile(join(config.FOLDER, p), { dotfiles: "allow" });
}
/** @override */
read(p: string): Readable {
async read(p: string): Promise<Readable> {
return fs.createReadStream(join(config.FOLDER, p));
}
async fileInfo(path: string) {
const info = await fs.promises.stat(join(config.FOLDER, path));
return {
size: info.size,
lastModified: info.mtime,
contentType: info.isDirectory()
? "application/x-directory"
: (lookup(join(config.FOLDER, path)) as string),
};
}
/** @override */
async write(p: string, data: Buffer, file?: AnonymizedFile, source?: SourceBase): Promise<void> {
if (!(await this.exists(dirname(p)))) {
await fs.promises.mkdir(dirname(join(config.FOLDER, p)), {
recursive: true,
});
}
async write(
p: string,
data: Buffer,
file?: AnonymizedFile,
source?: SourceBase
): Promise<void> {
await this.mk(dirname(p));
return fs.promises.writeFile(join(config.FOLDER, p), data);
}
@@ -50,7 +70,7 @@ export default class FileSystem implements StorageBase {
/** @override */
async mk(dir: string): Promise<void> {
if (!(await this.exists(dir)))
if ((await this.exists(dir)) === FILE_TYPE.NOT_FOUND)
fs.promises.mkdir(join(config.FOLDER, dir), { recursive: true });
}
@@ -93,12 +113,17 @@ export default class FileSystem implements StorageBase {
}
/** @override */
async extractZip(p: string, data: Readable, file?: AnonymizedFile, source?: SourceBase): Promise<void> {
async extractZip(
p: string,
data: Readable,
file?: AnonymizedFile,
source?: SourceBase
): Promise<void> {
const pipe = promisify(pipeline);
return pipe(
data,
Extract({
path: join(join(config.FOLDER, p)),
path: join(config.FOLDER, p),
decodeString: (buf) => {
const name = buf.toString();
const newName = name.substr(name.indexOf("/") + 1);
@@ -110,18 +135,18 @@ export default class FileSystem implements StorageBase {
}
/** @override */
archive(
async archive(
dir: string,
opt?: {
format?: "zip" | "tar";
fileTransformer?;
fileTransformer?: (path: string) => Transform;
}
) {
const archive = archiver(opt?.format, {});
const archive = archiver(opt?.format || "zip", {});
this.listFiles(dir, {
onEntry: (file) => {
let rs = this.read(file.path);
onEntry: async (file) => {
let rs = await this.read(file.path);
if (opt?.fileTransformer) {
// apply transformation on the stream
rs = rs.pipe(opt.fileTransformer(file.path));

View File

@@ -1,11 +1,16 @@
import { SourceBase, StorageBase, Tree, TreeFile } from "../types";
import { S3 } from "aws-sdk";
import { FILE_TYPE, SourceBase, StorageBase, Tree, TreeFile } from "../types";
import {
GetObjectCommand,
ListObjectsV2CommandOutput,
PutObjectCommandInput,
S3,
} from "@aws-sdk/client-s3";
import { NodeHttpHandler } from "@aws-sdk/node-http-handler";
import config from "../../config";
import { pipeline, Readable } from "stream";
import { pipeline, Readable, Transform } from "stream";
import ArchiveStreamToS3 from "decompress-stream-to-s3";
import { Response } from "express";
import { lookup } from "mime-types";
import * as flow from "xml-flow";
import * as archiver from "archiver";
import { dirname, basename } from "path";
import AnonymousError from "../AnonymousError";
@@ -21,62 +26,73 @@ export default class S3Storage implements StorageBase {
});
}
get client() {
private client(timeout = 10000) {
if (!config.S3_CLIENT_ID) throw new Error("S3_CLIENT_ID not set");
if (!config.S3_CLIENT_SECRET) throw new Error("S3_CLIENT_SECRET not set");
return new S3({
credentials: {
accessKeyId: config.S3_CLIENT_ID,
secretAccessKey: config.S3_CLIENT_SECRET,
},
region: config.S3_REGION,
endpoint: config.S3_ENDPOINT,
accessKeyId: config.S3_CLIENT_ID,
secretAccessKey: config.S3_CLIENT_SECRET,
requestHandler: new NodeHttpHandler({
requestTimeout: timeout,
connectionTimeout: timeout,
}),
});
}
/** @override */
async exists(path: string): Promise<boolean> {
async exists(path: string): Promise<FILE_TYPE> {
if (!config.S3_BUCKET) throw new Error("S3_BUCKET not set");
try {
await this.client
.headObject({
Bucket: config.S3_BUCKET,
Key: path,
})
.promise();
return true;
// if we can get the file info, it is a file
await this.fileInfo(path);
return FILE_TYPE.FILE;
} catch (err) {
return false;
// check if it is a directory
const data = await this.client().listObjectsV2({
Bucket: config.S3_BUCKET,
Prefix: path,
MaxKeys: 1,
});
return (data.Contents?.length || 0) > 0
? FILE_TYPE.FOLDER
: FILE_TYPE.NOT_FOUND;
}
}
/** @override */
async mk(dir: string): Promise<void> {
if (dir && dir[dir.length - 1] != "/") dir = dir + "/";
await this.client
.putObject({
Bucket: config.S3_BUCKET,
Key: dir,
})
.promise();
// no need to create folder on S3
}
/** @override */
async rm(dir: string): Promise<void> {
const data = await this.client
.listObjectsV2({
Bucket: config.S3_BUCKET,
Prefix: dir,
})
.promise();
if (!config.S3_BUCKET) throw new Error("S3_BUCKET not set");
const data = await this.client(200000).listObjectsV2({
Bucket: config.S3_BUCKET,
Prefix: dir,
MaxKeys: 100,
});
const params = { Bucket: config.S3_BUCKET, Delete: { Objects: [] } };
const params = {
Bucket: config.S3_BUCKET,
Delete: { Objects: new Array<{ Key: string }>() },
};
data.Contents.forEach(function (content) {
params.Delete.Objects.push({ Key: content.Key });
data.Contents?.forEach(function (content) {
if (content.Key) {
params.Delete.Objects.push({ Key: content.Key });
}
});
if (params.Delete.Objects.length == 0) {
// nothing to remove
return;
}
await this.client.deleteObjects(params).promise();
await this.client(200000).deleteObjects(params);
if (data.IsTruncated) {
await this.rm(dir);
@@ -84,42 +100,65 @@ export default class S3Storage implements StorageBase {
}
/** @override */
send(p: string, res: Response) {
const s = this.client
.getObject({
async send(p: string, res: Response) {
if (!config.S3_BUCKET) throw new Error("S3_BUCKET not set");
try {
const command = new GetObjectCommand({
Bucket: config.S3_BUCKET,
Key: p,
})
.on("error", (error) => {
try {
res.status(error.statusCode);
} catch (err) {
console.error(err);
}
})
.on("httpHeaders", (statusCode, headers, response) => {
res.status(statusCode);
if (statusCode < 300) {
res.set("Content-Length", headers["content-length"]);
res.set("Content-Type", headers["content-type"]);
}
pipeline(
response.httpResponse.createUnbufferedStream() as Readable,
res
);
});
const s = await this.client().send(command);
res.status(200);
if (s.ContentType) {
res.contentType(s.ContentType);
}
if (s.ContentLength) {
res.set("Content-Length", s.ContentLength.toString());
}
if (s.Body) {
(s.Body as Readable)?.pipe(res);
} else {
res.end();
}
} catch (error) {
try {
res.status(500);
} catch (err) {
console.error(`[ERROR] S3 send ${p}`, err);
}
}
}
s.send();
async fileInfo(path: string) {
if (!config.S3_BUCKET) throw new Error("S3_BUCKET not set");
const info = await this.client(3000).headObject({
Bucket: config.S3_BUCKET,
Key: path,
});
return {
size: info.ContentLength,
lastModified: info.LastModified,
contentType: info.ContentType
? info.ContentType
: (lookup(path) as string),
};
}
/** @override */
read(path: string): Readable {
return this.client
.getObject({
Bucket: config.S3_BUCKET,
Key: path,
})
.createReadStream();
async read(path: string): Promise<Readable> {
if (!config.S3_BUCKET) throw new Error("S3_BUCKET not set");
const command = new GetObjectCommand({
Bucket: config.S3_BUCKET,
Key: path,
});
const res = (await this.client(3000).send(command)).Body;
if (!res) {
throw new AnonymousError("file_not_found", {
httpStatus: 404,
object: path,
});
}
return res as Readable;
}
/** @override */
@@ -129,49 +168,59 @@ export default class S3Storage implements StorageBase {
file?: AnonymizedFile,
source?: SourceBase
): Promise<void> {
const params: S3.PutObjectRequest = {
if (!config.S3_BUCKET) throw new Error("S3_BUCKET not set");
const params: PutObjectCommandInput = {
Bucket: config.S3_BUCKET,
Key: path,
Body: data,
ContentType: lookup(path).toString(),
};
if (source) {
params.Tagging = `source=${source.type}`
params.Tagging = `source=${source.type}`;
}
await this.client.putObject(params).promise();
// 30s timeout
await this.client(30000).putObject(params);
return;
}
/** @override */
async listFiles(dir: string): Promise<Tree> {
if (!config.S3_BUCKET) throw new Error("S3_BUCKET not set");
if (dir && dir[dir.length - 1] != "/") dir = dir + "/";
const out: Tree = {};
const req = await this.client
.listObjectsV2({
let req: ListObjectsV2CommandOutput;
let nextContinuationToken: string | undefined;
do {
req = await this.client(30000).listObjectsV2({
Bucket: config.S3_BUCKET,
Prefix: dir,
})
.promise();
MaxKeys: 250,
ContinuationToken: nextContinuationToken,
});
if (!req.Contents) return out;
nextContinuationToken = req.NextContinuationToken;
if (!req.Contents) return out;
for (const f of req.Contents) {
if (!f.Key) continue;
f.Key = f.Key.replace(dir, "");
const paths = f.Key.split("/");
let current: Tree = out;
for (let i = 0; i < paths.length - 1; i++) {
let p = paths[i];
if (!p) continue;
if (!(current[p] as Tree)) {
current[p] = {} as Tree;
for (const f of req.Contents) {
if (!f.Key) continue;
f.Key = f.Key.replace(dir, "");
const paths = f.Key.split("/");
let current: Tree = out;
for (let i = 0; i < paths.length - 1; i++) {
let p = paths[i];
if (!p) continue;
if (!(current[p] as Tree)) {
current[p] = {} as Tree;
}
current = current[p] as Tree;
}
current = current[p] as Tree;
}
const fileInfo: TreeFile = { size: f.Size || 0, sha: f.ETag };
const fileName = paths[paths.length - 1];
if (fileName) current[fileName] = fileInfo;
}
if (f.ETag) {
const fileInfo: TreeFile = { size: f.Size || 0, sha: f.ETag };
const fileName = paths[paths.length - 1];
if (fileName) current[fileName] = fileInfo;
}
}
} while (req && req.Contents && req.IsTruncated);
return out;
}
@@ -185,59 +234,75 @@ export default class S3Storage implements StorageBase {
let toS3: ArchiveStreamToS3;
return new Promise((resolve, reject) => {
if (!config.S3_BUCKET) return reject("S3_BUCKET not set");
toS3 = new ArchiveStreamToS3({
bucket: config.S3_BUCKET,
prefix: p,
s3: this.client,
s3: this.client(2 * 60 * 60 * 1000), // 2h timeout
type: "zip",
onEntry: (header) => {
header.name = header.name.substr(header.name.indexOf("/") + 1);
header.name = header.name.substring(header.name.indexOf("/") + 1);
if (source) {
header.Tagging = `source=${source.type}`;
header.Metadata = {
source: source.type,
};
}
},
maxParallel: 10,
});
pipeline(data, toS3, () => {})
pipeline(data, toS3, (err) => {
if (err) {
return reject(err);
}
resolve();
})
.on("finish", resolve)
.on("error", reject);
});
}
/** @override */
archive(
async archive(
dir: string,
opt?: {
format?: "zip" | "tar";
fileTransformer?;
fileTransformer?: (p: string) => Transform;
}
) {
const archive = archiver(opt?.format, {});
if (!config.S3_BUCKET) throw new Error("S3_BUCKET not set");
const archive = archiver(opt?.format || "zip", {});
if (dir && dir[dir.length - 1] != "/") dir = dir + "/";
const req = this.client.listObjectsV2({
Bucket: config.S3_BUCKET,
Prefix: dir,
});
const filesStream = req.createReadStream();
const xmlStream = flow(filesStream);
const that = this;
xmlStream.on("tag:contents", function (file) {
let rs = that.read(file.key);
file.key = file.key.replace(dir, "");
const filename = basename(file.key);
if (filename == "") return;
if (opt?.fileTransformer) {
rs = rs.pipe(opt.fileTransformer(filename));
}
archive.append(rs, {
name: filename,
prefix: dirname(file.key),
let req: ListObjectsV2CommandOutput;
let nextContinuationToken: string | undefined;
do {
req = await this.client(30000).listObjectsV2({
Bucket: config.S3_BUCKET,
Prefix: dir,
MaxKeys: 250,
ContinuationToken: nextContinuationToken,
});
});
xmlStream.on("end", () => {
archive.finalize();
});
nextContinuationToken = req.NextContinuationToken;
for (const f of req.Contents || []) {
if (!f.Key) continue;
const filename = basename(f.Key);
const prefix = dirname(f.Key.replace(dir, ""));
let rs = await this.read(f.Key);
if (opt?.fileTransformer) {
// apply transformation on the stream
rs = rs.pipe(opt.fileTransformer(f.Key));
}
archive.append(rs, {
name: filename,
prefix,
});
}
} while (req && req.Contents?.length && req.IsTruncated);
archive.finalize();
return archive;
}
}

View File

@@ -4,8 +4,9 @@ import Zip from "./source/Zip";
import S3Storage from "./storage/S3";
import FileSystem from "./storage/FileSystem";
import AnonymizedFile from "./AnonymizedFile";
import * as stream from "stream";
import { Transform, Readable } from "stream";
import * as archiver from "archiver";
import { Response } from "express";
export interface SourceBase {
readonly type: string;
@@ -19,7 +20,7 @@ export interface SourceBase {
* Retrieve the fie content
* @param file the file of the content to retrieve
*/
getFileContent(file: AnonymizedFile): Promise<stream.Readable>;
getFileContent(file: AnonymizedFile): Promise<Readable>;
/**
* Get all the files from a specific source
@@ -31,6 +32,12 @@ export interface SourceBase {
export type Source = GitHubDownload | GitHubStream | Zip;
export enum FILE_TYPE {
FILE = "file",
FOLDER = "folder",
NOT_FOUND = "not_found",
}
export interface StorageBase {
/**
* The type of storage
@@ -41,13 +48,21 @@ export interface StorageBase {
* check if the path exists
* @param path the path to check
*/
exists(path: string): Promise<boolean>;
exists(path: string): Promise<FILE_TYPE>;
send(p: string, res: Response): Promise<void>;
/**
* Read the content of a file
* @param path the path to the file
*/
read(path: string): stream.Readable;
read(path: string): Promise<Readable>;
fileInfo(path: string): Promise<{
size: number | undefined;
lastModified: Date | undefined;
contentType: string;
}>;
/**
* Write data to a file
@@ -56,7 +71,12 @@ export interface StorageBase {
* @param file the file
* @param source the source of the file
*/
write(path: string, data: Buffer, file?: AnonymizedFile, source?: SourceBase): Promise<void>;
write(
path: string,
data: Buffer,
file?: AnonymizedFile,
source?: SourceBase
): Promise<void>;
/**
* List the files from dir
@@ -71,7 +91,12 @@ export interface StorageBase {
* @param file the file
* @param source the source of the file
*/
extractZip(dir: string, tar: stream.Readable, file?: AnonymizedFile, source?: SourceBase): Promise<void>;
extractZip(
dir: string,
tar: Readable,
file?: AnonymizedFile,
source?: SourceBase
): Promise<void>;
/**
* Remove the path
@@ -94,9 +119,9 @@ export interface StorageBase {
/**
* Transformer to apply on the content of the file
*/
fileTransformer?: (p: any) => Transformer;
fileTransformer?: (p: string) => Transform;
}
): archiver.Archiver;
): Promise<archiver.Archiver>;
/**
* Create a directory
@@ -113,16 +138,17 @@ export interface Branch {
readme?: string;
}
export type RepositoryStatus =
| "queue"
| "preparing"
| "download"
| "ready"
| "expired"
| "expiring"
| "removed"
| "removing"
| "error";
export enum RepositoryStatus {
QUEUE = "queue",
PREPARING = "preparing",
DOWNLOAD = "download",
READY = "ready",
EXPIRED = "expired",
EXPIRING = "expiring",
REMOVED = "removed",
REMOVING = "removing",
ERROR = "error",
}
export type ConferenceStatus = "ready" | "expired" | "removed";

View File

@@ -3,14 +3,18 @@
"transpileOnly": true
},
"compilerOptions": {
"types": ["node"],
"lib": ["es6"],
"target": "es6",
"module": "commonjs",
"module": "CommonJS",
"outDir": "build",
"removeComments": true,
"preserveConstEnums": true,
"forceConsistentCasingInFileNames": true,
"sourceMap": false,
"skipLibCheck": true
"skipLibCheck": true,
"strict": true,
"esModuleInterop": false
},
"include": ["src/**/*.ts", "index.ts", "cli.ts"],
"exclude": ["node_modules", ".vscode"]