add parameter for anonymization mask

This commit is contained in:
tdurieux
2021-04-07 15:09:59 +02:00
parent c11ae5eb7c
commit d9aa8cce10
5 changed files with 13 additions and 9 deletions

View File

@@ -27,7 +27,7 @@ Using Anonymous Github
To use it, open the main page (e.g., [http://anonymous.4open.science/](http://anonymous.4open.science/)), login with GitHub, and click on "Anonymize".
Simply fill 1. the Github repo URL and 2. the id of the anonymized repository, 3. the terms to anonymize (which can be updated afterward).
The anonymization of the content is done by replacing all occurrences of words in a list by "XXX".
The anonymization of the content is done by replacing all occurrences of words in a list by "XXXX" (can be changed in the configuration).
The word list is provided by the authors, and typically contains the institution name, author names, logins, etc...
The README is anonymized as well as all files of the repository. Even filenames are anonymized.

View File

@@ -5,6 +5,7 @@ const config = {
MAX_FILE_SIZE: 10 * 1024 * 1024, // in b
MAX_REPO_SIZE: 8 * 1024, // in kb
AUTH_CALLBACK: "http://localhost:5000/github/auth",
ANONYMIZATION_MASK: "XXXX"
};
for (let conf in process.env) {
if (config[conf] !== undefined) {

View File

@@ -454,6 +454,8 @@ angular
if (v == "stream") {
$scope.options.loc = false;
$scope.anonymize.loc.$$element[0].disabled = true;
} else {
$scope.anonymize.loc.$$element[0].disabled = false;
}
});
@@ -530,7 +532,7 @@ angular
);
}
if (!$scope.options.link) {
content = content.replace(urlRegex, "XXX");
content = content.replace(urlRegex, "XXXX");
}
content = content.replace(
@@ -553,12 +555,12 @@ angular
// remove whole url if it contains the term
content = content.replace(urlRegex, (match) => {
if (new RegExp(`\\b${term}\\b`, "gi").test(match)) return "XXX";
if (new RegExp(`\\b${term}\\b`, "gi").test(match)) return "XXXX";
return match;
});
// remove the term in the text
content = content.replace(new RegExp(`\\b${term}\\b`, "gi"), "XXX");
content = content.replace(new RegExp(`\\b${term}\\b`, "gi"), "XXXX");
}
$scope.anonymize_readme = content;

View File

@@ -2,6 +2,7 @@ const fs = require("fs").promises;
const ofs = require("fs");
const path = require("path");
const fileUtils = require("./file");
const config = require("../config")
const anonymizeContent = (content, repoConfig) => {
const urlRegex = /<?\b((https?|ftp|file):\/\/)[-A-Za-z0-9+&@#/%?=~_|!:,.;]+[-A-Za-z0-9+&@#/%=~_|]\b\/?>?/g;
@@ -16,7 +17,7 @@ const anonymizeContent = (content, repoConfig) => {
if (!repoConfig.options.link) {
// remove all links
content = content.replace(urlRegex, "XXX");
content = content.replace(urlRegex, config.ANONYMIZATION_MASK);
}
content = content.replace(
@@ -44,12 +45,12 @@ const anonymizeContent = (content, repoConfig) => {
}
// remove whole url if it contains the term
content = content.replace(urlRegex, (match) => {
if (new RegExp(`\\b${term}\\b`, "gi").test(match)) return "XXX";
if (new RegExp(`\\b${term}\\b`, "gi").test(match)) return config.ANONYMIZATION_MASK;
return match;
});
// remove the term in the text
content = content.replace(new RegExp(`\\b${term}\\b`, "gi"), "XXX");
content = content.replace(new RegExp(`\\b${term}\\b`, "gi"), config.ANONYMIZATION_MASK);
}
return content;
};
@@ -59,7 +60,7 @@ const anonymizePath = (path, repoConfig) => {
if (term.trim() == "") {
continue;
}
path = path.replace(new RegExp(term, "gi"), "XXX");
path = path.replace(new RegExp(term, "gi"), config.ANONYMIZATION_MASK);
}
return path;
};

View File

@@ -322,7 +322,7 @@ module.exports.isFilePathValid = async (options) => {
}
let anonymizePath = options.path;
if (anonymizePath.indexOf("XXX") > -1) {
if (anonymizePath.indexOf(config.ANONYMIZATION_MASK) > -1) {
const files = await module.exports.getFileList({ repoConfig });
const file = getFile(files, options.path);