fix: add metadata about the source of the files in S3

This commit is contained in:
tdurieux
2022-08-10 13:58:37 +02:00
parent d9225fc5ee
commit 9d9a6879ab
7 changed files with 861 additions and 775 deletions

1577
package-lock.json generated

File diff suppressed because it is too large Load Diff

View File

@@ -32,7 +32,7 @@
"bullmq": "^1.86.7",
"compression": "^1.7.4",
"connect-redis": "^6.1.3",
"decompress-stream-to-s3": "^1.2.1",
"decompress-stream-to-s3": "^1.3.1",
"dotenv": "^16.0.1",
"express": "^4.18.1",
"express-rate-limit": "^6.4.0",

View File

@@ -97,7 +97,7 @@ export default class GitHubDownload extends GitHubBase implements SourceBase {
try {
const downloadStream = got.stream(response.url);
downloadStream.addListener("downloadProgress", (p) => (progress = p));
await storage.extractZip(originalPath, downloadStream);
await storage.extractZip(originalPath, downloadStream, null, this);
} catch (error) {
await this.repository.updateStatus("error", "unable_to_download");
throw new AnonymousError("unable_to_download", {

View File

@@ -58,7 +58,7 @@ export default class GitHubStream extends GitHubBase implements SourceBase {
}
if (this.repository.status != "ready")
await this.repository.updateStatus("ready");
await storage.write(file.originalCachePath, content);
await storage.write(file.originalCachePath, content, file, this);
return stream.Readable.from(content);
} catch (error) {
throw new AnonymousError("file_too_big", {

View File

@@ -1,4 +1,4 @@
import { StorageBase, Tree } from "../types";
import { SourceBase, StorageBase, Tree } from "../types";
import config from "../../config";
import * as fs from "fs";
@@ -8,6 +8,7 @@ import { Response } from "express";
import { Readable, pipeline } from "stream";
import * as archiver from "archiver";
import { promisify } from "util";
import AnonymizedFile from "../AnonymizedFile";
export default class FileSystem implements StorageBase {
type = "FileSystem";
@@ -30,7 +31,7 @@ export default class FileSystem implements StorageBase {
}
/** @override */
async write(p: string, data: Buffer): Promise<void> {
async write(p: string, data: Buffer, file?: AnonymizedFile, source?: SourceBase): Promise<void> {
if (!(await this.exists(dirname(p)))) {
await fs.promises.mkdir(dirname(join(config.FOLDER, p)), {
recursive: true,
@@ -92,7 +93,7 @@ export default class FileSystem implements StorageBase {
}
/** @override */
async extractZip(p: string, data: Readable): Promise<void> {
async extractZip(p: string, data: Readable, file?: AnonymizedFile, source?: SourceBase): Promise<void> {
const pipe = promisify(pipeline);
return pipe(
data,

View File

@@ -1,4 +1,4 @@
import { StorageBase, Tree, TreeFile } from "../types";
import { SourceBase, StorageBase, Tree, TreeFile } from "../types";
import { S3 } from "aws-sdk";
import config from "../../config";
import { pipeline, Readable } from "stream";
@@ -9,6 +9,7 @@ import * as flow from "xml-flow";
import * as archiver from "archiver";
import { dirname, basename } from "path";
import AnonymousError from "../AnonymousError";
import AnonymizedFile from "../AnonymizedFile";
export default class S3Storage implements StorageBase {
type = "AWS";
@@ -122,15 +123,22 @@ export default class S3Storage implements StorageBase {
}
/** @override */
async write(path: string, data: Buffer): Promise<void> {
await this.client
.putObject({
Bucket: config.S3_BUCKET,
Key: path,
Body: data,
ContentType: lookup(path).toString(),
})
.promise();
async write(
path: string,
data: Buffer,
file?: AnonymizedFile,
source?: SourceBase
): Promise<void> {
const params: S3.PutObjectRequest = {
Bucket: config.S3_BUCKET,
Key: path,
Body: data,
ContentType: lookup(path).toString(),
};
if (source) {
params.Tagging = `source=${source.type}`
}
await this.client.putObject(params).promise();
return;
}
@@ -168,7 +176,12 @@ export default class S3Storage implements StorageBase {
}
/** @override */
async extractZip(p: string, data: Readable): Promise<void> {
async extractZip(
p: string,
data: Readable,
file?: AnonymizedFile,
source?: SourceBase
): Promise<void> {
let toS3: ArchiveStreamToS3;
return new Promise((resolve, reject) => {
@@ -179,6 +192,9 @@ export default class S3Storage implements StorageBase {
type: "zip",
onEntry: (header) => {
header.name = header.name.substr(header.name.indexOf("/") + 1);
if (source) {
header.Tagging = `source=${source.type}`;
}
},
});
pipeline(data, toS3, () => {})

View File

@@ -53,8 +53,10 @@ export interface StorageBase {
* Write data to a file
* @param path the path to the file
* @param data the content of the file
* @param file the file
* @param source the source of the file
*/
write(path: string, data: Buffer): Promise<void>;
write(path: string, data: Buffer, file?: AnonymizedFile, source?: SourceBase): Promise<void>;
/**
* List the files from dir
@@ -66,8 +68,10 @@ export interface StorageBase {
* Extract the content of tar to dir
* @param dir
* @param tar
* @param file the file
* @param source the source of the file
*/
extractZip(dir: string, tar: stream.Readable): Promise<void>;
extractZip(dir: string, tar: stream.Readable, file?: AnonymizedFile, source?: SourceBase): Promise<void>;
/**
* Remove the path