mirror of
https://github.com/tdurieux/anonymous_github.git
synced 2026-05-16 14:59:07 +02:00
f413a30313
Follow-up to the GitHubStream cache fixes. The same poisoned-cache
class existed in the GitHubDownload path and a few related spots:
- GitHubDownload.download: wipe pre-existing state before extracting
and write a .anon-complete marker only after a successful extract.
On error, rm the partial cache so a retry starts clean. getFileContent
and getFiles now gate on the marker instead of "any file/folder
exists," so a half-extracted tree can never be served as canonical.
- GitHubDownload.getFileContent: validate cached file size against the
upstream FileModel size (via the new AnonymizedFile.size()), same
guard as GitHubStream. getFiles filters the marker from the listing.
- FileSystem.listFiles: drop the bogus stats.ino.toString() as sha.
An inode isn't a content hash; anything comparing it to a Git blob
sha would silently disagree. Leave undefined.
- S3.write: remove the fire-and-forget data.on("error") -> this.rm(...).
Multipart Upload doesn't commit partial objects, so there was nothing
to clean up, and the handler raced retries and could delete a
previously-good object on a transient source-stream hiccup. The
size-validated read path recovers from any other undersized objects.
- GitHubStream.resolveLfsPointer: drop the post-decision early-return
in blobStream.on("error"). Currently redundant with the inner
listener, but removes the future-refactor footgun.
Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
227 lines
6.9 KiB
TypeScript
227 lines
6.9 KiB
TypeScript
import config from "../../config";
|
|
import * as fs from "fs";
|
|
import { Extract } from "unzip-stream";
|
|
import { join, basename, dirname } from "path";
|
|
import { Response } from "express";
|
|
import { Readable, pipeline, Transform } from "stream";
|
|
import * as archiver from "archiver";
|
|
import { promisify } from "util";
|
|
import { lookup } from "mime-types";
|
|
import StorageBase, { FILE_TYPE } from "./Storage";
|
|
import FileModel from "../model/files/files.model";
|
|
import { IFile } from "../model/files/files.types";
|
|
|
|
export default class FileSystem extends StorageBase {
|
|
type = "FileSystem";
|
|
|
|
constructor() {
|
|
super();
|
|
}
|
|
|
|
/** @override */
|
|
async exists(repoId: string, p: string = ""): Promise<FILE_TYPE> {
|
|
const fullPath = join(config.FOLDER, this.repoPath(repoId), p);
|
|
try {
|
|
const stat = await fs.promises.stat(fullPath);
|
|
if (stat.isDirectory()) return FILE_TYPE.FOLDER;
|
|
if (stat.isFile()) return FILE_TYPE.FILE;
|
|
} catch {
|
|
// ignore file not found or not downloaded
|
|
}
|
|
return FILE_TYPE.NOT_FOUND;
|
|
}
|
|
|
|
/** @override */
|
|
async send(repoId: string, p: string, res: Response) {
|
|
const fullPath = join(config.FOLDER, this.repoPath(repoId), p);
|
|
res.sendFile(fullPath, { dotfiles: "allow" });
|
|
}
|
|
|
|
/** @override */
|
|
async read(repoId: string, p: string): Promise<Readable> {
|
|
const fullPath = join(config.FOLDER, this.repoPath(repoId), p);
|
|
return fs.createReadStream(fullPath);
|
|
}
|
|
|
|
async fileInfo(repoId: string, path: string) {
|
|
const fullPath = join(config.FOLDER, this.repoPath(repoId), path);
|
|
const info = await fs.promises.stat(fullPath);
|
|
return {
|
|
size: info.size,
|
|
lastModified: info.mtime,
|
|
contentType: info.isDirectory()
|
|
? "application/x-directory"
|
|
: (lookup(fullPath) as string),
|
|
};
|
|
}
|
|
|
|
/** @override */
|
|
async write(
|
|
repoId: string,
|
|
p: string,
|
|
data: string | Readable
|
|
): Promise<void> {
|
|
const fullPath = join(config.FOLDER, this.repoPath(repoId), p);
|
|
// Atomic write: stream into a sibling .tmp and only rename into place
|
|
// when the source stream finishes successfully. If the source errors
|
|
// mid-flight (transient GitHub 5xx, socket reset, etc.), we drop the
|
|
// tmp and leave any pre-existing cached file untouched. Without this,
|
|
// a partial fetch would commit a 0-byte or truncated cache entry that
|
|
// future reads would happily serve as the file's content.
|
|
await this.mk(repoId, dirname(p));
|
|
const tmpPath = `${fullPath}.tmp.${process.pid}.${Date.now()}.${Math.random()
|
|
.toString(36)
|
|
.slice(2, 8)}`;
|
|
try {
|
|
if (typeof data === "string") {
|
|
await fs.promises.writeFile(tmpPath, data);
|
|
} else {
|
|
await new Promise<void>((resolve, reject) => {
|
|
const ws = fs.createWriteStream(tmpPath);
|
|
let settled = false;
|
|
const finish = (err?: Error) => {
|
|
if (settled) return;
|
|
settled = true;
|
|
if (err) {
|
|
ws.destroy();
|
|
reject(err);
|
|
} else {
|
|
resolve();
|
|
}
|
|
};
|
|
data.on("error", finish);
|
|
ws.on("error", finish);
|
|
ws.on("finish", () => finish());
|
|
data.pipe(ws);
|
|
});
|
|
}
|
|
await fs.promises.rename(tmpPath, fullPath);
|
|
} catch (err) {
|
|
console.error("[ERROR] FileSystem.write failed:", err);
|
|
await fs.promises.rm(tmpPath, { force: true }).catch(() => undefined);
|
|
throw err;
|
|
}
|
|
}
|
|
|
|
/** @override */
|
|
async rm(repoId: string, dir: string = ""): Promise<void> {
|
|
const fullPath = join(config.FOLDER, this.repoPath(repoId), dir);
|
|
await fs.promises.rm(fullPath, {
|
|
force: true,
|
|
recursive: true,
|
|
});
|
|
}
|
|
|
|
/** @override */
|
|
async mk(repoId: string, dir: string = ""): Promise<void> {
|
|
const fullPath = join(config.FOLDER, this.repoPath(repoId), dir);
|
|
try {
|
|
await fs.promises.mkdir(fullPath, {
|
|
recursive: true,
|
|
});
|
|
} catch (err: unknown) {
|
|
if (err instanceof Error && (err as NodeJS.ErrnoException).code !== "EEXIST") {
|
|
throw err;
|
|
}
|
|
}
|
|
}
|
|
|
|
/** @override */
|
|
async listFiles(
|
|
repoId: string,
|
|
dir: string = "",
|
|
opt: {
|
|
onEntry?: (file: { path: string; size: number }) => void;
|
|
} = {}
|
|
): Promise<IFile[]> {
|
|
const fullPath = join(config.FOLDER, this.repoPath(repoId), dir);
|
|
const files = await fs.promises.readdir(fullPath);
|
|
const output2: IFile[] = [];
|
|
for (const file of files) {
|
|
const filePath = join(fullPath, file);
|
|
try {
|
|
const stats = await fs.promises.stat(filePath);
|
|
if (stats.isDirectory()) {
|
|
output2.push(new FileModel({ name: file, path: dir, repoId }));
|
|
output2.push(
|
|
...(await this.listFiles(repoId, join(dir, file), opt))
|
|
);
|
|
} else if (stats.isFile()) {
|
|
if (opt.onEntry) {
|
|
opt.onEntry({
|
|
path: join(dir, file),
|
|
size: stats.size,
|
|
});
|
|
}
|
|
// Don't synthesise a sha here. The previous value (stats.ino)
|
|
// wasn't a content hash — just an inode number — and any code
|
|
// that compared it to an upstream Git blob sha would silently
|
|
// disagree. Leave it undefined so callers either look up the
|
|
// real sha from FileModel/GitHub or skip sha-keyed paths.
|
|
output2.push(
|
|
new FileModel({
|
|
name: file,
|
|
path: dir,
|
|
repoId: repoId,
|
|
size: stats.size,
|
|
})
|
|
);
|
|
}
|
|
} catch {
|
|
// ignore stat errors for individual files
|
|
}
|
|
}
|
|
return output2;
|
|
}
|
|
|
|
/** @override */
|
|
async extractZip(repoId: string, p: string, data: Readable): Promise<void> {
|
|
const pipe = promisify(pipeline);
|
|
const fullPath = join(config.FOLDER, this.repoPath(repoId), p);
|
|
const extractor = Extract({
|
|
path: fullPath,
|
|
decodeString: (buf) => {
|
|
const name = buf.toString();
|
|
const newName = name.substr(name.indexOf("/") + 1);
|
|
if (newName == "") {
|
|
return "___IGNORE___";
|
|
}
|
|
return newName;
|
|
},
|
|
});
|
|
await pipe(data, extractor);
|
|
await this.rm(repoId, join(p, "___IGNORE___"));
|
|
}
|
|
|
|
/** @override */
|
|
async archive(
|
|
repoId: string,
|
|
dir: string,
|
|
opt?: {
|
|
format?: "zip" | "tar";
|
|
fileTransformer?: (path: string) => Transform;
|
|
}
|
|
) {
|
|
const archive = archiver(opt?.format || "zip", {});
|
|
const fullPath = join(config.FOLDER, this.repoPath(repoId), dir);
|
|
|
|
await this.listFiles(repoId, dir, {
|
|
onEntry: async (file) => {
|
|
let rs = await this.read(repoId, file.path);
|
|
if (opt?.fileTransformer) {
|
|
// apply transformation on the stream
|
|
rs = rs.pipe(opt.fileTransformer(file.path));
|
|
}
|
|
const f = file.path.replace(fullPath, "");
|
|
archive.append(rs, {
|
|
name: basename(f),
|
|
prefix: dirname(f),
|
|
});
|
|
},
|
|
}).then(() => {
|
|
archive.finalize();
|
|
});
|
|
return archive;
|
|
}
|
|
}
|