This commit is contained in:
tdurieux
2021-03-16 11:23:16 +01:00
parent 141d016aae
commit a2d0f0b212
475 changed files with 23855 additions and 3869 deletions

115
utils/anonymize.js Normal file
View File

@@ -0,0 +1,115 @@
const fs = require("fs").promises;
const ofs = require("fs");
const path = require("path");
const fileUtils = require("./file");
const ananymiseContent = (content, repoConfig) => {
const urlRegex = /<?\b((https?|ftp|file):\/\/)[-A-Za-z0-9+&@#/%?=~_|!:,.;]+[-A-Za-z0-9+&@#/%=~_|]\b\/?>?/g;
if (repoConfig.options.image === false) {
// remove image in markdown
content = content.replace(
/!\[[^\]]*\]\((?<filename>.*?)(?=\"|\))(?<optionalpart>\".*\")?\)/g,
""
);
}
if (!repoConfig.options.link) {
// remove all links
content = content.replace(urlRegex, "XXX");
}
content = content.replace(
new RegExp(
`https://github.com/${repoConfig.fullName}/blob/${repoConfig.branch}\\b`,
"gi"
),
`https://anonymous.4open.science/r/${repoConfig.repoId}`
);
content = content.replace(
new RegExp(
`https://github.com/${repoConfig.fullName}/tree/${repoConfig.branch}\\b`,
"gi"
),
`https://anonymous.4open.science/r/${repoConfig.repoId}`
);
content = content.replace(
new RegExp(`https://github.com/${repoConfig.fullName}`, "gi"),
`https://anonymous.4open.science/r/${repoConfig.repoId}`
);
for (let term of repoConfig.terms) {
if (term.trim() == "") {
continue;
}
// remove whole url if it contains the term
content = content.replace(urlRegex, (match) => {
if (new RegExp(`\\b${term}\\b`, "gi").test(match)) return "XXX";
return match;
});
// remove the term in the text
content = content.replace(new RegExp(`\\b${term}\\b`, "gi"), "XXX");
}
return content;
};
const ananymisePath = (path, repoConfig) => {
for (let term of repoConfig.terms) {
if (term.trim() == "") {
continue;
}
path = path.replace(new RegExp(term, "gi"), "XXX");
}
return path;
};
async function* walk(dir) {
for await (const d of await fs.opendir(dir)) {
const entry = path.join(dir, d.name);
if (d.isDirectory()) yield* await walk(entry);
else if (d.isFile()) yield entry;
}
}
const anonymizeFolder = async (root, destination, repoConfig) => {
if (!ofs.existsSync(destination)) {
await fs.mkdir(destination, { recursive: true });
}
try {
for await (const originalFilePath of walk(root)) {
const destinationFilePath = path.join(
destination,
ananymisePath(originalFilePath.replace(root, ""), repoConfig)
);
const destinationFolder = path.dirname(destinationFilePath);
if (!ofs.existsSync(destinationFolder)) {
await fs.mkdir(destinationFolder, { recursive: true });
}
await ananymiseFile(originalFilePath, destinationFilePath, repoConfig);
}
} catch (error) {
fs.rm(destination, { recursive: true, force: true });
throw error;
}
};
const ananymiseFile = async (filePath, target, repoConfig) => {
if (!ofs.existsSync(path.dirname(target))) {
await fs.mkdir(path.dirname(target), { recursive: true });
}
if (fileUtils.isText(filePath)) {
const content = ananymiseContent(
(await fs.readFile(filePath)).toString(),
repoConfig
);
await fs.writeFile(target, content);
} else {
await fs.copyFile(filePath, target);
}
};
module.exports.ananymiseFile = ananymiseFile;
module.exports.ananymisePath = ananymisePath;
module.exports.anonymizeFolder = anonymizeFolder;
module.exports.ananymiseContent = ananymiseContent;

39
utils/database.js Normal file
View File

@@ -0,0 +1,39 @@
const config = require("../config");
var MongoClient = require("mongodb").MongoClient;
const MONGO_URL = "mongodb://root:rootpassword@mongodb:27017/?authSource=admin";
let mongoClient = null;
let DB = null;
module.exports.get = (collection) => {
if (!collection) return DB;
return DB.collection(collection);
};
module.exports.connect = async () => {
mongoClient = await MongoClient.connect(
MONGO_URL,
{ useNewUrlParser: true, useUnifiedTopology: true }
);
DB = mongoClient.db("anonymous_github");
await DB.collection("anonymized_repositories").createIndex(
{ repoId: 1 },
{ unique: true, name: "repoId" }
);
await DB.collection("anonymized_repositories").createIndex(
{ fullName: 1 },
{ name: "fullName" }
);
await DB.collection("repositories").createIndex(
{ fullName: 1 },
{ unique: true, name: "fullName" }
);
await DB.collection("users").createIndex(
{ username: 1 },
{ unique: true, name: "username" }
);
return DB;
};
module.exports.close = async () => {
return await mongoClient.close();
};

489
utils/file.js Normal file
View File

@@ -0,0 +1,489 @@
const ofs = require("fs");
const fs = require("fs").promises;
const path = require("path");
const { Octokit } = require("@octokit/rest");
const gh = require("parse-github-url");
const loc = require("@umijs/linguist");
const { isText } = require("istextorbinary");
const db = require("./database");
const repoUtils = require("./repository");
const githubUtils = require("./github");
const anonymizeUtils = require("./anonymize");
const config = require("../config");
async function walk(dir, root) {
if (root == null) {
root = dir;
}
let files = await fs.readdir(dir);
const output = { child: {} };
for (let file of files) {
let filePath = path.join(dir, file);
const stats = await fs.stat(filePath);
if (file[0] == "$") {
file = "\\" + file;
}
if (stats.isDirectory()) {
output.child[file] = await walk(filePath, root);
output.child[file].sha = stats.ino;
} else if (stats.isFile()) {
output.child[file] = { size: stats.size, sha: stats.ino };
}
}
return output;
}
function tree2tree(tree, partialTree, parentPath) {
if (!parentPath) parentPath = "";
if (partialTree == null) {
partialTree = { child: Object.create(null) };
}
for (let elem of tree) {
const paths = path.join(parentPath, elem.path).split("/");
let current = partialTree;
// if elem is a folder iterate on all folders if it is a file stop before the filename
const end = elem.type == "tree" ? paths.length : paths.length - 1;
for (let i = 0; i < end; i++) {
let p = paths[i];
if (p[0] == "$") {
p = "\\" + p;
}
if (!current.child[p]) {
current.child[p] = {
child: Object.create(null),
};
}
current = current.child[p];
}
// if elem is a file add the file size in the file list
if (elem.type == "blob") {
let p = paths[end];
if (p[0] == "$") {
p = "\\" + p;
}
current.child[p] = {
size: elem.size,
sha: elem.sha,
};
} else {
current.sha = elem.sha;
}
}
return partialTree;
}
async function getTruncatedTree(repoConfig, truncatedTree, sha, parentPath) {
const repo = gh(repoConfig.fullName);
if (!sha) {
sha = repoConfig.commit ? repoConfig.commit : "HEAD";
}
const octokit = new Octokit({
auth: await githubUtils.getToken(repoConfig),
});
const ghRes = await octokit.git.getTree({
owner: repo.owner,
repo: repo.name,
tree_sha: sha,
});
const tree = ghRes.data.tree;
for (let elem of tree) {
if (elem.type == "tree") {
const elementPath = path.join(parentPath, elem.path);
const paths = elementPath.split("/");
let current = truncatedTree;
for (let i = 0; i < paths.length; i++) {
let p = paths[i];
if (!current.child[p]) {
await module.exports.getTree(
repoConfig,
elem.sha,
truncatedTree,
elementPath
);
break;
}
current = current.child[p];
}
}
}
tree2tree(ghRes.data.tree, truncatedTree, parentPath);
return truncatedTree;
}
module.exports.getTree = async (repoConfig, sha, truncatedTree, parentPath) => {
const repo = gh(repoConfig.fullName);
if (!sha) {
sha = repoConfig.commit ? repoConfig.commit : "HEAD";
}
if (!parentPath) parentPath = "";
const octokit = new Octokit({
auth: await githubUtils.getToken(repoConfig),
});
const ghRes = await octokit.git.getTree({
owner: repo.owner,
repo: repo.name,
tree_sha: sha,
recursive: true,
});
const tree = tree2tree(ghRes.data.tree, truncatedTree, parentPath);
if (ghRes.data.truncated) {
await getTruncatedTree(repoConfig, tree, sha, parentPath);
}
return tree;
};
module.exports.getFileList = async (options) => {
let repoConfig = options.repoConfig;
if (!repoConfig) {
repoConfig = await repoUtils.getConfig(options.repoId);
}
if (repoConfig == null) {
throw "repo_not_found";
}
const r = await db.get("anonymized_repositories").findOne(
{ repoId: repoConfig.repoId },
{
projection: { files: 1 },
}
);
if (r && r.files) {
return r.files;
}
if (repoConfig.options.mode == "stream") {
// get file list from github
const tree = await module.exports.getTree(repoConfig, repoConfig.commit);
const files = anonymizeTree(tree, repoConfig);
await db.get("anonymized_repositories").updateOne(
{ repoId: repoConfig.repoId },
{
$set: {
originalFiles: tree.child,
files,
},
},
{ upsert: true }
);
return files;
} else if (repoConfig.options.mode == "download") {
const originalFiles = await walk(
repoUtils.getOriginalPath(repoConfig.repoId)
);
const files = anonymizeTree(originalFiles, repoConfig);
await db.get("anonymized_repositories").updateOne(
{ repoId: repoConfig.repoId },
{
$set: {
originalFiles: originalFiles.child,
files,
},
},
{ upsert: true }
);
return files;
} else {
throw "non_supported_mode";
}
};
function anonymizeTree(tree, repoConfig) {
if (Number.isInteger(tree.size)) {
return tree;
}
const output = {};
for (let file in tree.child) {
const anonymizedPath = anonymizeUtils.ananymisePath(file, repoConfig);
output[anonymizedPath] = anonymizeTree(tree.child[file], repoConfig);
}
return output;
}
function tree2sha(tree, output, parent) {
if (!output) {
output = {};
parent = "";
}
for (let i in tree) {
if (tree[i].sha) {
output[tree[i].sha] = path.join(parent, i);
}
if (tree[i].child) {
tree2sha(tree[i].child, output, path.join(parent, i));
}
}
return output;
}
function getFile(tree, elementPath) {
const paths = elementPath.trim().split("/");
let current = tree;
if (!tree.child) {
current = { child: tree };
}
for (let i = 0; i < paths.length; i++) {
let p = paths[i];
if (p == "") {
continue;
}
let tmp = current;
if (current.child) {
tmp = current.child;
}
if (!tmp[p]) {
return null;
}
current = tmp[p];
}
return current;
}
module.exports.additionalExtensions = [
"license",
"dockerfile",
"sbt",
"ipynb",
"gp",
"out",
];
module.exports.isText = (p) => {
if (isText(p)) {
return true;
}
const filename = path.basename(p);
const extensions = filename.split(".").reverse();
const extension = extensions[0].toLowerCase();
if (module.exports.additionalExtensions.includes(extension)) {
return true;
}
return false;
};
module.exports.isFileSupported = (repoConfig, p) => {
if (module.exports.isText(p)) {
return true;
}
const filename = path.basename(p);
const extensions = filename.split(".").reverse();
const extension = extensions[0].toLowerCase();
if (repoConfig.options.pdf && extension == "pdf") {
return true;
}
if (
repoConfig.options.image &&
(extension == "png" ||
extension == "jpg" ||
extension == "jpeg" ||
extension == "gif")
) {
return true;
}
return false;
};
module.exports.isFilePathValid = async (options) => {
if (options.path == null) {
throw "invalid_path";
}
let repoConfig = options.repoConfig;
if (!repoConfig) {
repoConfig = await repoUtils.getConfig(options.repoId);
}
if (repoConfig == null) {
throw "repo_not_found";
}
if (repoConfig.status == "expired") {
throw "repository_expired";
}
if (repoConfig.status == "removed") {
throw "repository_expired";
}
if (repoConfig.status != "ready") {
throw "repository_not_ready";
}
const anonymizedFilePath = path.join(
repoUtils.getAnonymizedPath(repoConfig.repoId),
options.path
);
if (!module.exports.isFileSupported(repoConfig, anonymizedFilePath)) {
throw "file_not_supported";
}
let unanonymizePath = options.path;
if (unanonymizePath.indexOf("XXX") > -1) {
const files = await module.exports.getFileList({ repoConfig });
const file = getFile(files, options.path);
if (file) {
const r = await db
.get("anonymized_repositories")
.findOne(
{ repoId: repoConfig.repoId },
{ projection: { originalFiles: 1 } }
);
const shatree = tree2sha(r.originalFiles);
if (shatree[file.sha]) {
unanonymizePath = shatree[file.sha];
}
}
}
const orignalFilePath = path.join(
repoUtils.getOriginalPath(repoConfig.repoId),
unanonymizePath
);
if (ofs.existsSync(anonymizedFilePath)) {
return true;
}
if (ofs.existsSync(orignalFilePath)) {
if (!module.exports.isFileSupported(repoConfig, anonymizedFilePath)) {
throw "file_not_supported";
}
await anonymizeUtils.ananymiseFile(
orignalFilePath,
anonymizedFilePath,
repoConfig
);
return true;
}
// if stream mode check download the file
if (repoConfig.options.mode == "stream") {
const repo = gh(repoConfig.fullName);
const files = await module.exports.getFileList({ repoConfig });
let file = getFile(files, options.path);
if (file == null) {
throw "file_not_found";
}
if (!file.sha) {
throw "is_folder";
}
if (file.size > config.MAX_FILE_SIZE) {
// file bigger than 10mb
throw "file_too_big";
}
const octokit = new Octokit({
auth: await githubUtils.getToken(repoConfig),
});
let ghRes = null;
if (file) {
if (!module.exports.isFileSupported(repoConfig, anonymizedFilePath)) {
throw "file_not_supported";
}
try {
ghRes = await octokit.request(
"GET /repos/{owner}/{repo}/git/blobs/{file_sha}",
{
owner: repo.owner,
repo: repo.name,
file_sha: file.sha,
}
);
} catch (error) {
if (error.status == 401 && config.GITHUB_TOKEN) {
try {
response = await getZip(config.GITHUB_TOKEN);
} catch (error) {
throw "repo_not_accessible";
}
} else if (error.status == 403) {
throw "file_too_big";
}
console.error(error);
throw "file_not_accessible";
}
} else {
try {
ghRes = await octokit.repos.getContents({
owner: repo.owner,
repo: repo.name,
path: options.path,
ref: repoConfig.commit ? repoConfig.commit : "HEAD",
});
} catch (error) {
if (error.status == 404) {
return false;
}
if (error.status == 403) {
console.log(error);
throw "content_too_large";
}
throw error;
}
}
if (!ghRes.data.content && ghRes.data.size != 0) {
throw "content_not_accessible";
}
// empty file
let content = "";
if (ghRes.data.content) {
content = new Buffer.from(ghRes.data.content, ghRes.data.encoding);
}
try {
await fs.mkdir(path.dirname(orignalFilePath), { recursive: true });
} catch (_) {
// ignore
}
try {
await fs.writeFile(orignalFilePath, content, { encoding: "utf-8" });
await anonymizeUtils.ananymiseFile(
orignalFilePath,
anonymizedFilePath,
repoConfig
);
} catch (error) {
console.error(error);
throw "unable_to_write_file";
}
return true;
}
return false;
};
module.exports.getStats = async (options) => {
let repoConfig = options.repoConfig;
if (!repoConfig) {
repoConfig = await repoUtils.getConfig(options.repoId);
}
if (repoConfig == null) {
throw "repo_not_found";
}
if (repoConfig.mode == "stream") {
throw "stats_unsupported";
}
if (repoConfig.loc) {
return repoConfig.loc;
}
const repoCache = repoUtils.getOriginalPath(repoConfig.repoId);
try {
await fs.access(repoCache, ofs.constants.R_OK);
} catch (error) {
throw "repo_not_found";
}
const o = loc(repoCache);
await db.get("anonymized_repositories").updateOne(
{ repoId: repoConfig.repoId },
{
$set: {
loc: o,
},
},
{ upsert: true }
);
return o;
};

59
utils/github.js Normal file
View File

@@ -0,0 +1,59 @@
const ofs = require("fs");
const db = require("./database");
const repoUtils = require("./repository");
const fileUtils = require("./file");
const config = require("../config");
module.exports.getToken = async (repoConfig) => {
if (repoConfig.owner) {
const user = await db
.get()
.collection("users")
.findOne(
{ username: repoConfig.owner },
{ projection: { accessToken: 1 } }
);
if (user && user.accessToken) {
return user.accessToken;
}
}
if (repoConfig.token) {
return repoConfig.token;
}
return config.GITHUB_TOKEN;
};
module.exports.downloadRepoAndAnonymize = async (repoConfig) => {
const cachePath = repoUtils.getAnonymizedPath(repoConfig.repoId);
const originalPath = repoUtils.getOriginalPath(repoConfig.repoId);
if (ofs.existsSync(cachePath) || ofs.existsSync(originalPath)) {
return true;
}
if (repoConfig.options.mode == "download") {
// if cache folder does not exist download and anonumize it
const originalPath = repoUtils.getOriginalPath(repoConfig.repoId);
await repoUtils.updateStatus(repoConfig, "downloading");
await repoUtils.downloadOriginalRepo(repoConfig, originalPath);
await repoUtils.updateStatus(repoConfig, "ready");
// anonymize all the files
// await repoUtils.updateStatus(repoConfig, "anonymize");
// await anonymizeUtils.anonymizeFolder(originalPath, cachePath, repoConfig);
// await repoUtils.updateStatus(repoConfig, "anonymized");
// clean up
// await fs.rm(originalPath, { recursive: true, force: true });
return true;
} else if (repoConfig.options.mode == "stream") {
// in stream mode only download the list of file from github
await fileUtils.getFileList({ repoConfig });
await repoUtils.updateStatus(repoConfig, "ready");
return true;
}
return false;
};

356
utils/repository.js Normal file
View File

@@ -0,0 +1,356 @@
const fs = require("fs").promises;
const ofs = require("fs");
const path = require("path");
const gh = require("parse-github-url");
const { Octokit } = require("@octokit/rest");
const extract = require("extract-zip");
const db = require("./database");
const githubUtils = require("./github");
const config = require("../config");
module.exports.getPath = (repoId) => {
return path.resolve(__dirname, "..", "repositories", repoId);
};
module.exports.getOriginalPath = (repoId) => {
return path.resolve(__dirname, "..", "repositories", repoId, "original");
};
module.exports.getAnonymizedPath = (repoId) => {
return path.resolve(__dirname, "..", "repositories", repoId, "cache");
};
module.exports.getConfig = async (repoId) => {
const repo = await db
.get()
.collection("anonymized_repositories")
.findOne(
{ repoId },
{
projection: {
// files: 1,
token: 1,
branch: 1,
commit: 1,
owner: 1,
fullName: 1,
repoId: 1,
terms: 1,
options: 1,
loc: 1,
status: 1,
lastView: 1,
},
}
);
if (repo && repo.options.expirationDate) {
repo.options.expirationDate = new Date(repo.options.expirationDate);
repo.lastView = new Date(repo.lastView);
}
return repo;
};
module.exports.getRepoDetails = async (options) => {
const query = {};
if (options.fullName) {
query.fullName = options.fullName;
} else if (options.repoConfig) {
query.fullName = options.repoConfig.fullName;
options.fullName = query.fullName;
} else if (options.owner && options.repo) {
query.fullName = `${options.owner}/${options.repo}`;
options.fullName = query.fullName;
} else {
throw "invalid_options";
}
if (options.force !== true) {
const repository = await db
.get("repositories")
.findOne(query, { projection: { readme: 0 } });
if (repository && repository.id) return repository;
}
try {
const repo = gh(options.fullName);
const octokit = new Octokit({ auth: options.token });
let ghRes = await octokit.repos.get({
owner: repo.owner,
repo: repo.name,
});
ghRes.data.fullName = ghRes.data.full_name;
if (ghRes.data.fullName != query.fullName) {
// repo renamed keep the old name
ghRes.data.fullName = query.fullName;
}
if (ghRes.data.has_pages) {
ghPageRes = await octokit.request("GET /repos/{owner}/{repo}/pages", {
owner: repo.owner,
repo: repo.name,
});
ghRes.data.pageSource = ghPageRes.data.source;
}
delete ghRes.data.full_name;
await db
.get("repositories")
.updateOne(query, { $set: ghRes.data }, { upsert: true });
return ghRes.data;
} catch (error) {
console.log(query, error);
if (error.status == 401 && options.token != config.GITHUB_TOKEN) {
options.token = config.GITHUB_TOKEN;
return await module.exports.getRepoDetails(options);
}
throw "repo_not_found";
}
};
module.exports.downloadRepoZip = async (repoConfig, target) => {
const repo = gh(repoConfig.fullName);
async function getZip(token) {
const octokit = new Octokit({ auth: token });
return await octokit.request("GET /repos/{owner}/{repo}/zipball/{ref}", {
owner: repo.owner,
repo: repo.name,
ref: repoConfig.commit,
});
}
let response = null;
try {
response = await getZip(await githubUtils.getToken(repoConfig));
} catch (error) {
if (error.status == 401 && config.GITHUB_TOKEN) {
try {
response = await getZip(config.GITHUB_TOKEN);
} catch (error) {
throw "repo_not_accessible";
}
} else {
throw "repo_not_accessible";
}
}
await fs.mkdir(path.dirname(target), { recursive: true });
await fs.writeFile(target, Buffer.from(response.data), {
encoding: "binary",
});
};
module.exports.updateStatus = async (repoConfig, status) => {
repoConfig.status = status;
await db
.get("anonymized_repositories")
.updateOne({ repoId: repoConfig.repoId }, { $set: { status } });
};
module.exports.downloadOriginalRepo = async (repoConfig, destination) => {
const zipPath = path.join(
module.exports.getPath(repoConfig.repoId),
"content.zip"
);
const destinationZip = destination + "_zip";
// download the repository and unzip it
await module.exports.downloadRepoZip(repoConfig, zipPath);
await extract(zipPath, { dir: destinationZip });
const folders = await fs.readdir(destinationZip);
fs.rename(path.join(destinationZip, folders[0]), destination);
await fs.rm(zipPath);
await fs.rm(destinationZip, { recursive: true });
};
module.exports.getAnonymizedRepoDetails = async (repoId, user) => {
return db.get("anonymized_repositories").findOne(
{
repoId,
owner: user.username,
},
{ projection: { token: 0, files: 0, originalFiles: 0, loc: 0 } }
);
};
module.exports.getRepoCommit = async (options) => {
let repoConfig = options.repoConfig;
if (!repoConfig) {
repoConfig = await module.exports.getConfig(options.repoId);
}
if (repoConfig == null) {
throw "repo_not_found";
}
if (options.force !== true) {
const query = { fullName: repoConfig.fullName };
query["branches." + repoConfig.branch + ""] = { $exists: true };
const repository = await db
.get("repositories")
.findOne(query, { projection: { branches: 1 } });
if (
repository &&
repository.branches &&
repository.branches[repoConfig.branch]
)
return repository.branches[repoConfig.branch].commit.sha;
}
const branches = await module.exports.getRepoBranches({
repoConfig,
token: await githubUtils.getToken(repoConfig),
force: options.force,
});
if (!branches[repoConfig.branch]) {
console.log(branches, repoConfig.branch);
throw "branch_not_found";
}
return branches[repoConfig.branch].commit.sha;
};
module.exports.getRepoBranches = async (options) => {
const query = {};
if (options.fullName) {
query.fullName = options.fullName;
} else if (options.repoConfig) {
query.fullName = options.repoConfig.fullName;
options.fullName = query.fullName;
} else if (options.owner && options.repo) {
query.fullName = `${options.owner}/${options.repo}`;
options.fullName = query.fullName;
} else {
throw new Error("Invalid options");
}
if (options.force !== true) {
let repository = await db
.get("repositories")
.findOne(query, { projection: { branches: 1 } });
if (repository && repository.branches) return repository.branches;
}
try {
const repo = gh(options.fullName);
const octokit = new Octokit({ auth: options.token });
const data = await octokit.paginate(octokit.repos.listBranches, {
owner: repo.owner,
repo: repo.name,
per_page: 100,
});
const branches = {};
for (let b of data) {
branches[b.name] = b;
}
await db
.get("repositories")
.updateOne(query, { $set: { branches } }, { upsert: true });
return branches;
} catch (error) {
if (error.status == 401 && options.token != config.GITHUB_TOKEN) {
options.token = config.GITHUB_TOKEN;
return await module.exports.getRepoBranches(options);
}
if (error.status == 404) {
throw "repo_not_found";
}
console.error(error);
throw "branches_not_found";
}
};
module.exports.getRepoReadme = async (options) => {
const query = {};
if (options.fullName) {
query.fullName = options.fullName;
} else if (options.repoConfig) {
query.fullName = options.repoConfig.fullName;
options.fullName = query.fullName;
} else if (options.owner && options.repo) {
query.fullName = `${options.owner}/${options.repo}`;
options.fullName = query.fullName;
} else {
throw new Error("Invalid options");
}
if (options.force !== true) {
let repository = await db
.get("repositories")
.findOne(query, { projection: { readme: 1 } });
if (repository && repository.readme) return repository.readme;
}
try {
const repo = gh(options.fullName);
const octokit = new Octokit({ auth: options.token });
const ghRes = await octokit.repos.getReadme({
owner: repo.owner,
repo: repo.name,
});
const readme = new Buffer.from(ghRes.data.content, "base64").toString(
"utf-8"
);
await db
.get("repositories")
.updateOne(query, { $set: { readme } }, { upsert: true });
return readme;
} catch (error) {
throw "readme_not_available";
}
};
module.exports.updateAnonimizedRepository = async (repoConfig) => {
if (repoConfig.status == "updating") {
throw "repo_is_updating";
}
repoConfig = await module.exports.getConfig(repoConfig.repoId);
if (repoConfig.status == "updating") {
throw "repo_is_updating";
}
// check new commit
const commit = await module.exports.getRepoCommit({
repoConfig,
force: true,
});
if (commit == repoConfig.commit) {
console.log(`${repoConfig.repoId} is up to date`);
return true;
}
console.log(`${repoConfig.repoId} will be updated to ${commit}`);
await module.exports.updateStatus(repoConfig, "updating");
await db
.get("anonymized_repositories")
.updateOne({ repoId: repoConfig.repoId }, { $set: { commit } });
await module.exports.removeRepository(repoConfig);
await githubUtils.downloadRepoAndAnonymize(repoConfig);
await module.exports.updateStatus(repoConfig, "ready");
};
module.exports.removeRepository = async (repoConfig) => {
try {
if (ofs.existsSync(module.exports.getOriginalPath(repoConfig.repoId))) {
await fs.rm(module.exports.getOriginalPath(repoConfig.repoId), {
recursive: true,
force: true,
});
}
if (ofs.existsSync(module.exports.getAnonymizedPath(repoConfig.repoId))) {
await fs.rm(module.exports.getAnonymizedPath(repoConfig.repoId), {
recursive: true,
force: true,
});
}
await db
.get("anonymized_repositories")
.updateOne(
{ repoId: repoConfig.repoId },
{ $unset: { files: "", originalFiles: "", loc: "" } }
);
} catch (error) {
console.log(error);
throw error;
}
};