Improve error handling

This commit is contained in:
tdurieux
2026-05-06 18:43:36 +03:00
parent aae6eae6eb
commit da78708b7b
16 changed files with 360 additions and 49 deletions
+83 -7
View File
@@ -13,6 +13,78 @@ import { handleError } from "../server/routes/route-utils";
import FileModel from "./model/files/files.model";
import { IFile } from "./model/files/files.types";
import { FilterQuery } from "mongoose";
import { createLogger, serializeError } from "./logger";
const logger = createLogger("anonymized-file");
// Map a streamer error response to an AnonymousError that preserves the
// upstream status and error code instead of collapsing every failure into a
// generic 404. Without this, a corrupt cache, a 5xx from the streamer, an
// LFS pointer issue, and a missing file all surface to the user as the
// same `file_not_found` — which makes incidents impossible to triage.
function streamerErrorToAnonymous(
err: Error & { response?: { statusCode?: number; body?: unknown } },
context: { repoId: string; filePath: string }
): { error: AnonymousError; upstreamStatus?: number; upstreamBody?: string } {
const upstreamStatus = err?.response?.statusCode;
let errCode = "file_not_found";
let httpStatus = 404;
let upstreamBody: string | undefined;
if (err?.response?.body != null) {
try {
upstreamBody =
typeof err.response.body === "string"
? err.response.body
: Buffer.isBuffer(err.response.body)
? err.response.body.toString("utf8")
: JSON.stringify(err.response.body);
} catch {
// ignore body decode failures
}
if (upstreamBody) {
try {
const parsed = JSON.parse(upstreamBody);
if (parsed && typeof parsed.error === "string") {
errCode = parsed.error;
}
} catch {
// body wasn't JSON — keep the default code
}
}
}
if (typeof upstreamStatus === "number") {
// Pass through 4xx (client-meaningful: 404 file_not_found, 413
// file_too_big, 403 file_not_accessible). Collapse 5xx into 502 so
// browsers don't cache an upstream-fault as a missing-file 404.
httpStatus = upstreamStatus >= 500 ? 502 : upstreamStatus;
if (upstreamStatus >= 500 && errCode === "file_not_found") {
errCode = "streamer_upstream_error";
}
} else if (errCode === "file_not_found") {
// No HTTP response at all (connection refused, timeout, DNS) — that's
// a streamer fault, not a missing file.
errCode = "streamer_unreachable";
httpStatus = 502;
}
logger.warn("streamer fetch failed", {
repoId: context.repoId,
filePath: context.filePath,
upstreamStatus,
upstreamBody: upstreamBody?.slice(0, 500),
err: serializeError(err),
});
return {
error: new AnonymousError(errCode, {
httpStatus,
cause: err,
}),
upstreamStatus,
upstreamBody,
};
}
/**
* Represent a file in a anonymized repository
@@ -255,14 +327,18 @@ export default class AnonymizedFile {
anonymizerOptions: anonymizer.opt,
},
})
.on("error", (_err) => {
handleError(
new AnonymousError("file_not_found", {
object: this,
httpStatus: 404,
}),
res
.on("error", (err: Error) => {
const { error } = streamerErrorToAnonymous(
err as Error & {
response?: { statusCode?: number; body?: unknown };
},
{
repoId: this.repository.repoId,
filePath: this.anonymizedPath,
}
);
error.value = this;
handleError(error, res);
});
// Forward Content-Type from the streamer's upstream response.
// got.stream(...).pipe(res) forwards body bytes only — without
+34 -4
View File
@@ -5,6 +5,7 @@ import Conference from "./Conference";
import ConferenceModel from "./model/conference/conferences.model";
import AnonymousError from "./AnonymousError";
import { IAnonymizedGistDocument } from "./model/anonymizedGists/anonymizedGists.types";
import AnonymizedGistModel from "./model/anonymizedGists/anonymizedGists.model";
import config from "../config";
import { octokit } from "./GitHubUtils";
import { ContentAnonimizer } from "./anonymize-utils";
@@ -175,7 +176,10 @@ export default class Gist {
await this.download();
this._model.anonymizeDate = new Date();
await this.updateStatus(RepositoryStatus.READY);
await this._model.save();
await AnonymizedGistModel.updateOne(
{ _id: this._model._id },
{ $set: { anonymizeDate: this._model.anonymizeDate } }
).exec();
}
}
@@ -189,14 +193,29 @@ export default class Gist {
async countView() {
this._model.lastView = new Date();
this._model.pageView = (this._model.pageView || 0) + 1;
await this._model.save();
await AnonymizedGistModel.updateOne(
{ _id: this._model._id },
{
$set: { lastView: this._model.lastView },
$inc: { pageView: 1 },
}
).exec();
}
async updateStatus(status: RepositoryStatus, statusMessage?: string) {
this._model.status = status;
this._model.statusDate = new Date();
this._model.statusMessage = statusMessage;
await this._model.save();
await AnonymizedGistModel.updateOne(
{ _id: this._model._id },
{
$set: {
status,
statusDate: this._model.statusDate,
statusMessage,
},
}
).exec();
}
async expire() {
@@ -218,7 +237,18 @@ export default class Gist {
this._model.gist.description = "";
this._model.gist.files = [];
this._model.gist.ownerLogin = "";
await this._model.save();
const set: Record<string, unknown> = {
"gist.comments": [],
"gist.description": "",
"gist.files": [],
"gist.ownerLogin": "",
};
if (status) set.status = status;
if (statusMessage) set.statusMessage = statusMessage;
await AnonymizedGistModel.updateOne(
{ _id: this._model._id },
{ $set: set }
).exec();
}
async conference(): Promise<Conference | null> {
+10 -1
View File
@@ -179,7 +179,16 @@ export async function getToken(repository: Repository) {
} else {
repository.owner.model.accessTokenDates.github = new Date();
}
await repository.owner.model.save();
await UserModel.updateOne(
{ _id: repository.owner.model._id },
{
$set: {
"accessTokens.github": refreshed,
"accessTokenDates.github":
repository.owner.model.accessTokenDates.github,
},
}
).exec();
return refreshed;
}
}
+40 -4
View File
@@ -5,6 +5,7 @@ import Conference from "./Conference";
import ConferenceModel from "./model/conference/conferences.model";
import AnonymousError from "./AnonymousError";
import { IAnonymizedPullRequestDocument } from "./model/anonymizedPullRequests/anonymizedPullRequests.types";
import AnonymizedPullRequestModel from "./model/anonymizedPullRequests/anonymizedPullRequests.model";
import config from "../config";
import got, { HTTPError } from "got";
import { octokit } from "./GitHubUtils";
@@ -189,7 +190,10 @@ export default class PullRequest {
await this.download();
this._model.anonymizeDate = new Date();
await this.updateStatus(RepositoryStatus.READY);
await this._model.save();
await AnonymizedPullRequestModel.updateOne(
{ _id: this._model._id },
{ $set: { anonymizeDate: this._model.anonymizeDate } }
).exec();
}
}
/**
@@ -211,7 +215,13 @@ export default class PullRequest {
async countView() {
this._model.lastView = new Date();
this._model.pageView = (this._model.pageView || 0) + 1;
await this._model.save();
await AnonymizedPullRequestModel.updateOne(
{ _id: this._model._id },
{
$set: { lastView: this._model.lastView },
$inc: { pageView: 1 },
}
).exec();
}
/**
@@ -223,7 +233,16 @@ export default class PullRequest {
this._model.status = status;
this._model.statusDate = new Date();
this._model.statusMessage = statusMessage;
await this._model.save();
await AnonymizedPullRequestModel.updateOne(
{ _id: this._model._id },
{
$set: {
status,
statusDate: this._model.statusDate,
statusMessage,
},
}
).exec();
}
/**
@@ -261,7 +280,24 @@ export default class PullRequest {
this._model.pullRequest.mergedDate = undefined;
this._model.pullRequest.state = "closed";
this._model.pullRequest.draft = false;
await this._model.save();
const set: Record<string, unknown> = {
"pullRequest.comments": [],
"pullRequest.body": "",
"pullRequest.title": "",
"pullRequest.diff": "",
"pullRequest.baseRepositoryFullName": "",
"pullRequest.headRepositoryFullName": "",
"pullRequest.merged": false,
"pullRequest.state": "closed",
"pullRequest.draft": false,
};
const unset: Record<string, ""> = { "pullRequest.mergedDate": "" };
if (status) set.status = status;
if (statusMessage) set.statusMessage = statusMessage;
await AnonymizedPullRequestModel.updateOne(
{ _id: this._model._id },
{ $set: set, $unset: unset }
).exec();
}
/**
+21 -4
View File
@@ -75,7 +75,10 @@ export default class Repository {
if (originalToken != token) {
this._model.source.accessToken = token;
if (isConnected) {
await this._model.save();
await AnonymizedRepositoryModel.updateOne(
{ _id: this._model._id },
{ $set: { "source.accessToken": token } }
).exec();
}
}
this.checkedToken = true;
@@ -155,7 +158,15 @@ export default class Repository {
this._model.size = { storage: 0, file: 0 };
await this.computeSize();
if (isConnected) {
await this._model.save();
await AnonymizedRepositoryModel.updateOne(
{ _id: this._model._id },
{
$set: {
truncatedFolders: this._model.truncatedFolders,
size: this._model.size,
},
}
).exec();
}
}
if (opt.path?.includes(config.ANONYMIZATION_MASK)) {
@@ -306,7 +317,10 @@ export default class Repository {
if (this.model.source.repositoryName !== ghRepo.fullName) {
this.model.source.repositoryName = ghRepo.fullName;
if (isConnected) {
await this._model.save();
await AnonymizedRepositoryModel.updateOne(
{ _id: this._model._id },
{ $set: { "source.repositoryName": ghRepo.fullName } }
).exec();
}
}
const branches = await ghRepo.branches({
@@ -521,7 +535,10 @@ export default class Repository {
file: res[0]?.file || 0,
};
if (isConnected) {
await this._model.save();
await AnonymizedRepositoryModel.updateOne(
{ _id: this._model._id },
{ $set: { size: this._model.size } }
).exec();
}
return this._model.size;
}
+5 -1
View File
@@ -1,5 +1,6 @@
import AnonymizedRepositoryModel from "./model/anonymizedRepositories/anonymizedRepositories.model";
import RepositoryModel from "./model/repositories/repositories.model";
import UserModel from "./model/users/users.model";
import { IUserDocument } from "./model/users/users.types";
import Repository from "./Repository";
import { GitHubRepository } from "./source/GitHubRepository";
@@ -106,7 +107,10 @@ export default class User {
.filter((id) => !!id) as unknown as typeof this._model.repositories;
// have the model
await this._model.save();
await UserModel.updateOne(
{ _id: this._model._id },
{ $set: { repositories: this._model.repositories } }
).exec();
return repositories.map((r) => new GitHubRepository(r));
} else {
// Only the fields read by GitHubRepository.toJSON() (and the immediate
+22 -7
View File
@@ -2,7 +2,7 @@ import { createClient, RedisClientType } from "redis";
import config from "../config";
export const ERROR_LOG_KEY = "admin:errors";
export const ERROR_LOG_MAX = 1000;
export const ERROR_LOG_MAX = 5000;
export const ERROR_LOG_HOURLY_PREFIX = "admin:errors:hourly:";
export const ERROR_LOG_DROPPED_KEY = "admin:errors:dropped";
// 48h retention on the hourly counters: stats endpoint reads "last 24h" and
@@ -176,7 +176,6 @@ function persistError(entry: {
droppedInProcess++;
return;
}
const payload = clampPayload(entry);
// Pre-compute the structured fields the stats endpoint needs so the read
// path doesn't have to parse the JSON list at all.
const detail = entry.raw.find(
@@ -191,12 +190,28 @@ function persistError(entry: {
? (detail.code as string)
: "") ||
"_";
// Routine client misuse (401/403/404 and the allowlisted "expected" 4xx
// codes below) drowns out real errors in the recent-entries ring. Keep the
// hourly counters so spikes still show on the admin page, but don't push
// these entries into the bounded list.
const noisyCodes = new Set([
"repository_expired",
"repository_not_ready",
"repoId_already_used",
"invalid_repoId",
"page_not_supported_on_different_branch",
]);
const skipRing = bucket === "info" || noisyCodes.has(code);
const hKey = hourKey(entry.ts);
client
.multi()
.lPush(ERROR_LOG_KEY, payload)
.lTrim(ERROR_LOG_KEY, 0, ERROR_LOG_MAX - 1)
.hIncrBy(hKey, "total", 1)
const tx = client.multi();
if (!skipRing) {
tx.lPush(ERROR_LOG_KEY, clampPayload(entry)).lTrim(
ERROR_LOG_KEY,
0,
ERROR_LOG_MAX - 1
);
}
tx.hIncrBy(hKey, "total", 1)
.hIncrBy(hKey, `bucket:${bucket}`, 1)
.hIncrBy(hKey, `level:${entry.level}`, 1)
.hIncrBy(hKey, `module:${entry.module}`, 1)
+4 -1
View File
@@ -170,7 +170,10 @@ export class GitHubRepository {
ghRes.data.encoding as BufferEncoding
).toString("utf-8");
selected.readme = readme;
await model.save();
await RepositoryModel.updateOne(
{ externalId: this.id, "branches.name": opt.branch },
{ $set: { "branches.$.readme": readme } }
).exec();
} catch (error) {
throw new AnonymousError("readme_not_available", {
httpStatus: 404,
+3 -1
View File
@@ -235,7 +235,9 @@ export default class GitHubStream extends GitHubBase {
// (`[fs] write failed`). Swallow the rejection here so an upstream error
// (e.g. GitHub 422 on a too-big blob) doesn't surface as an unhandled
// promise rejection and crash the streamer process.
storage.write(repoId, filePath, stream1, this.type).catch(() => {});
storage
.write(repoId, filePath, stream1, this.type, expected.size)
.catch(() => {});
return stream2;
}
+19 -1
View File
@@ -11,6 +11,7 @@ import StorageBase, { FILE_TYPE } from "./Storage";
import FileModel from "../model/files/files.model";
import { IFile } from "../model/files/files.types";
import { createLogger, serializeError } from "../logger";
import AnonymousError from "../AnonymousError";
const logger = createLogger("fs");
@@ -62,7 +63,9 @@ export default class FileSystem extends StorageBase {
async write(
repoId: string,
p: string,
data: string | Readable
data: string | Readable,
_source?: string,
expectedSize?: number
): Promise<void> {
const fullPath = join(config.FOLDER, this.repoPath(repoId), p);
// Atomic write: stream into a sibling .tmp and only rename into place
@@ -98,6 +101,21 @@ export default class FileSystem extends StorageBase {
data.pipe(ws);
});
}
// Size guard: stat the tmp before renaming. A short read produces a
// truncated tmp file that we MUST NOT promote — accepting it would
// shadow the real content on every subsequent request. Allow >=
// expectedSize because Git LFS files resolve to content larger than
// the pointer's recorded size.
if (typeof expectedSize === "number" && expectedSize > 0) {
const stat = await fs.promises.stat(tmpPath);
if (stat.size < expectedSize) {
await fs.promises.rm(tmpPath, { force: true }).catch(() => undefined);
throw new AnonymousError("storage_write_size_mismatch", {
httpStatus: 502,
object: { path: p, expected: expectedSize, actual: stat.size },
});
}
}
await fs.promises.rename(tmpPath, fullPath);
} catch (err) {
logger.error("write failed", serializeError(err));
+38 -3
View File
@@ -7,7 +7,7 @@ import {
import { Upload } from "@aws-sdk/lib-storage";
import { NodeHttpHandler } from "@smithy/node-http-handler";
import config from "../../config";
import { pipeline, Readable, Transform } from "stream";
import { pipeline, Readable, Transform, PassThrough } from "stream";
import ArchiveStreamToS3 from "decompress-stream-to-s3";
import { Response } from "express";
import { lookup } from "mime-types";
@@ -174,7 +174,8 @@ export default class S3Storage extends StorageBase {
repoId: string,
path: string,
data: string | Readable,
source?: string
source?: string,
expectedSize?: number
): Promise<void> {
if (!config.S3_BUCKET) throw new Error("S3_BUCKET not set");
@@ -186,10 +187,28 @@ export default class S3Storage extends StorageBase {
// recovers from any object that does end up undersized for any
// reason.
// When we know the upstream byte count, count bytes through a
// PassThrough so we can refuse to commit a short read. The Upload
// client itself doesn't expose bytesWritten, and S3 will happily
// PutObject a truncated body if the source stream ends cleanly with
// fewer bytes than expected.
let body: string | Readable = data;
let bytesWritten = -1;
if (typeof expectedSize === "number" && expectedSize > 0 && typeof data !== "string") {
bytesWritten = 0;
const counter = new PassThrough();
counter.on("data", (chunk: Buffer) => {
bytesWritten += chunk.length;
});
data.on("error", (err: Error) => counter.destroy(err));
data.pipe(counter);
body = counter;
}
const params: PutObjectCommandInput = {
Bucket: config.S3_BUCKET,
Key: join(this.repoPath(repoId), path),
Body: data,
Body: body,
ContentType: lookup(path).toString(),
};
if (source) {
@@ -203,6 +222,22 @@ export default class S3Storage extends StorageBase {
});
await parallelUploads3.done();
if (
typeof expectedSize === "number" &&
expectedSize > 0 &&
bytesWritten >= 0 &&
bytesWritten < expectedSize
) {
// The body completed cleanly but produced fewer bytes than the
// tree said — delete the truncated object so the next request
// re-fetches from GitHub instead of serving the short blob.
await this.rm(repoId, path).catch(() => undefined);
throw new AnonymousError("storage_write_size_mismatch", {
httpStatus: 502,
object: { path, expected: expectedSize, actual: bytesWritten },
});
}
}
/** @override */
+10 -1
View File
@@ -55,7 +55,16 @@ export default abstract class StorageBase {
repoId: string,
path: string,
data: string | Readable,
source?: string
source?: string,
/**
* Expected number of bytes for the source. When provided and the
* stream produces fewer bytes (a truncated upstream response, a socket
* reset that didn't surface as an error, etc.), the write is rejected
* and any partial blob is removed instead of being committed. This is
* the load-bearing guard that keeps zero-byte cache entries from
* silently shadowing real files on subsequent reads.
*/
expectedSize?: number
): Promise<void>;
/**
+10 -7
View File
@@ -39,17 +39,20 @@ router.post("/", async (req, res) => {
const plaintext = generateToken();
const tokenHash = hashToken(plaintext);
const model = await UserModel.findById(user.model.id);
if (!model) return res.status(404).json({ error: "user_not_found" });
if (!model.apiTokens) model.apiTokens = [];
model.apiTokens.push({
const entry = {
tokenHash,
name,
createdAt: new Date(),
});
await model.save();
};
const updated = await UserModel.findByIdAndUpdate(
user.model.id,
{ $push: { apiTokens: entry } },
{ new: true }
);
if (!updated) return res.status(404).json({ error: "user_not_found" });
const created = model.apiTokens[model.apiTokens.length - 1];
const tokens = updated.apiTokens || [];
const created = tokens[tokens.length - 1];
res.json({
id: created._id,
name: created.name,
+8 -2
View File
@@ -662,7 +662,10 @@ router.post(
addedAt: new Date(),
});
repo.model.coauthors = list;
await repo.model.save();
await AnonymizedRepositoryModel.updateOne(
{ _id: repo.model._id },
{ $set: { coauthors: list } }
).exec();
res.json(repo.model.coauthors);
} catch (error) {
handleError(error, res, req);
@@ -690,7 +693,10 @@ router.delete(
repo.model.coauthors = (repo.model.coauthors || []).filter(
(c) => c.username.toLowerCase() !== target.toLowerCase()
);
await repo.model.save();
await AnonymizedRepositoryModel.updateOne(
{ _id: repo.model._id },
{ $set: { coauthors: repo.model.coauthors } }
).exec();
res.json(repo.model.coauthors);
} catch (error) {
handleError(error, res, req);
+42 -3
View File
@@ -9,8 +9,11 @@ import { downloadQueue } from "../../queue";
import { RepositoryStatus } from "../../core/types";
import User from "../../core/User";
import { streamAnonymizedZip } from "../../core/zipStream";
import { createLogger, serializeError } from "../../core/logger";
import gh = require("parse-github-url");
const logger = createLogger("repository-public");
const router = express.Router();
router.get(
@@ -69,11 +72,47 @@ router.get(
},
},
})
.on("error", () => {
.on("error", (err: Error & { response?: { statusCode?: number; body?: unknown } }) => {
const upstreamStatus = err?.response?.statusCode;
let upstreamBody: string | undefined;
let errCode = "zip_not_available";
let httpStatus = 502;
if (err?.response?.body != null) {
try {
upstreamBody =
typeof err.response.body === "string"
? err.response.body
: Buffer.isBuffer(err.response.body)
? err.response.body.toString("utf8")
: JSON.stringify(err.response.body);
} catch {
/* ignore */
}
if (upstreamBody) {
try {
const parsed = JSON.parse(upstreamBody);
if (parsed && typeof parsed.error === "string") {
errCode = parsed.error;
}
} catch {
/* not JSON */
}
}
}
if (typeof upstreamStatus === "number") {
httpStatus = upstreamStatus >= 500 ? 502 : upstreamStatus;
}
logger.warn("streamer zip fetch failed", {
repoId: repo.repoId,
upstreamStatus,
upstreamBody: upstreamBody?.slice(0, 500),
err: serializeError(err),
});
handleError(
new AnonymousError("file_not_found", {
new AnonymousError(errCode, {
url: req.originalUrl,
httpStatus: 404,
httpStatus,
cause: err,
}),
res
);
+11 -2
View File
@@ -3,6 +3,7 @@ import config from "../../config";
import { ensureAuthenticated } from "./connection";
import { handleError, getUser, isOwnerOrAdmin } from "./route-utils";
import UserModel from "../../core/model/users/users.model";
import AnonymizedRepositoryModel from "../../core/model/anonymizedRepositories/anonymizedRepositories.model";
import User from "../../core/User";
import FileModel from "../../core/model/files/files.model";
import { isConnected } from "../database";
@@ -90,7 +91,12 @@ router.get("/quota", async (req: express.Request, res: express.Response) => {
await Promise.all(
ready
.filter((r) => uncachedSet.has(r.repoId))
.map((r) => r.model.save())
.map((r) =>
AnonymizedRepositoryModel.updateOne(
{ _id: r.model._id },
{ $set: { size: r.model.size } }
).exec()
)
);
}
}
@@ -131,7 +137,10 @@ router.post("/default", async (req: express.Request, res: express.Response) => {
const d = req.body;
user.model.default = d;
await user.model.save();
await UserModel.updateOne(
{ _id: user.model._id },
{ $set: { default: d } }
).exec();
res.send("ok");
} catch (error) {
handleError(error, res, req);