error logging improvement, regex fix

This commit is contained in:
tdurieux
2026-05-06 11:09:17 +03:00
parent e34f45522f
commit c2d43164d0
39 changed files with 747 additions and 126 deletions
+5 -2
View File
@@ -8,6 +8,9 @@ import { IAnonymizedGistDocument } from "./model/anonymizedGists/anonymizedGists
import config from "../config";
import { octokit } from "./GitHubUtils";
import { ContentAnonimizer } from "./anonymize-utils";
import { createLogger } from "./logger";
const logger = createLogger("gist");
type GistPayload = {
description: string;
@@ -59,14 +62,14 @@ export default class Gist {
try {
return this._model.source.accessToken;
} catch {
console.debug("[ERROR] Token is invalid", this._model.source.gistId);
logger.warn("invalid token", { gistId: this._model.source.gistId });
}
}
return config.GITHUB_TOKEN;
}
async download() {
console.debug("[INFO] Downloading gist", this._model.source.gistId);
logger.info("downloading gist", { gistId: this._model.source.gistId });
const oct = octokit(await this.getToken());
const gist_id = this._model.source.gistId;
+8 -4
View File
@@ -3,6 +3,9 @@ import { Octokit } from "@octokit/rest";
import Repository from "./Repository";
import UserModel from "./model/users/users.model";
import config from "../config";
import { createLogger } from "./logger";
const logger = createLogger("github");
export function octokit(token: string) {
return new Octokit({
@@ -24,7 +27,7 @@ export async function checkToken(token: string) {
}
export async function getToken(repository: Repository) {
console.log("getToken", repository.repoId);
logger.debug("getToken", { repoId: repository.repoId });
// if (repository.model.source.accessToken) {
// // only check the token if the repo has been visited less than 10 minutes ago
// if (
@@ -101,9 +104,10 @@ export async function getToken(repository: Repository) {
return refreshed;
}
}
console.warn(
`[getToken] refresh failed for ${repository.owner.model.username} (status ${res.status}); falling back`
);
logger.warn("token refresh failed; falling back", {
username: repository.owner.model.username,
status: res.status,
});
// fall through to the checkToken path / config.GITHUB_TOKEN
}
const check = await checkToken(ownerAccessToken);
+15 -16
View File
@@ -9,6 +9,9 @@ import config from "../config";
import got, { HTTPError } from "got";
import { octokit } from "./GitHubUtils";
import { ContentAnonimizer } from "./anonymize-utils";
import { createLogger } from "./logger";
const logger = createLogger("pull-request");
export default class PullRequest {
private _model: IAnonymizedPullRequestDocument;
@@ -38,20 +41,18 @@ export default class PullRequest {
try {
return this._model.source.accessToken;
} catch {
console.debug(
"[ERROR] Token is invalid",
this._model.source.pullRequestId
);
logger.warn("invalid token", {
pullRequestId: this._model.source.pullRequestId,
});
}
}
return config.GITHUB_TOKEN;
}
async download() {
console.debug(
"[INFO] Downloading pull request",
this._model.source.pullRequestId
);
logger.info("downloading pull request", {
pullRequestId: this._model.source.pullRequestId,
});
const oct = octokit(await this.getToken());
const [owner, repo] = this._model.source.repositoryFullName.split("/");
@@ -89,10 +90,9 @@ export default class PullRequest {
user?: { login?: string } | null;
}> => {
if ((err as { status?: number }).status === 404) {
console.warn(
"[WARN] Failed to fetch PR comments (404), continuing without them",
`${owner}/${repo}#${pull_number}`
);
logger.warn("PR comments 404, continuing without them", {
pr: `${owner}/${repo}#${pull_number}`,
});
return [];
}
throw err;
@@ -102,10 +102,9 @@ export default class PullRequest {
`https://github.com/${owner}/${repo}/pull/${pull_number}.diff`
).catch((err) => {
if (err instanceof HTTPError && err.response.statusCode === 404) {
console.warn(
"[WARN] Failed to fetch PR diff (404), continuing without it",
`${owner}/${repo}#${pull_number}`
);
logger.warn("PR diff 404, continuing without it", {
pr: `${owner}/${repo}#${pull_number}`,
});
return { body: "" };
}
throw err;
+14 -9
View File
@@ -23,6 +23,9 @@ import { getToken } from "./GitHubUtils";
import config from "../config";
import FileModel from "./model/files/files.model";
import AnonymizedRepositoryModel from "./model/anonymizedRepositories/anonymizedRepositories.model";
import { createLogger, serializeError } from "./logger";
const logger = createLogger("repository");
import { IFile } from "./model/files/files.types";
import AnonymizedFile from "./AnonymizedFile";
import { FilterQuery } from "mongoose";
@@ -316,9 +319,10 @@ export default class Repository {
?.commit;
if (!newCommit) {
console.error(
`${branchName} for ${this.model.source.repositoryName} is not found`
);
logger.error("branch not found", {
branch: branchName,
repo: this.model.source.repositoryName,
});
await this.updateStatus(RepositoryStatus.ERROR, "branch_not_found");
await this.resetSate();
throw new AnonymousError("branch_not_found", {
@@ -330,7 +334,7 @@ export default class Repository {
this.model.source.commit == newCommit &&
this.status == RepositoryStatus.READY
) {
console.log(`[UPDATE] ${this._model.repoId} is up to date`);
logger.info("up to date", { repoId: this._model.repoId });
return;
}
this._model.source.commit = newCommit;
@@ -347,9 +351,10 @@ export default class Repository {
}
this.model.source.commit = newCommit;
this._model.anonymizeDate = new Date();
console.log(
`[UPDATE] ${this._model.repoId} will be updated to ${newCommit}`
);
logger.info("update queued", {
repoId: this._model.repoId,
commit: newCommit,
});
await this.resetSate(RepositoryStatus.PREPARING);
await downloadQueue.add(this.repoId, { repoId: this.repoId }, {
@@ -450,7 +455,7 @@ export default class Repository {
FileModel.deleteMany({ repoId: this.repoId }).exec(),
this.removeCache(),
]);
console.log(`[RESET] ${this._model.repoId} has been reset`);
logger.info("reset", { repoId: this._model.repoId });
}
/**
@@ -464,7 +469,7 @@ export default class Repository {
try {
await this.model.save();
} catch (error) {
console.error("[ERROR] removeCache save", error);
logger.error("removeCache save failed", serializeError(error));
}
}
}
+172
View File
@@ -0,0 +1,172 @@
import { createClient, RedisClientType } from "redis";
import config from "../config";
export const ERROR_LOG_KEY = "admin:errors";
export const ERROR_LOG_MAX = 1000;
export type Logger = {
debug: (...args: unknown[]) => void;
info: (...args: unknown[]) => void;
warn: (...args: unknown[]) => void;
error: (...args: unknown[]) => void;
};
type Level = "debug" | "info" | "warn" | "error";
const LEVEL_ORDER: Record<Level, number> = {
debug: 10,
info: 20,
warn: 30,
error: 40,
};
function resolveThreshold(): number {
const raw = (process.env.LOG_LEVEL || "").toLowerCase() as Level;
if (raw in LEVEL_ORDER) return LEVEL_ORDER[raw];
return process.env.NODE_ENV === "production"
? LEVEL_ORDER.info
: LEVEL_ORDER.debug;
}
const threshold = resolveThreshold();
function formatArg(a: unknown): string {
if (typeof a === "string") return a;
if (a instanceof Error) return JSON.stringify(serializeError(a));
try {
return JSON.stringify(a);
} catch {
return String(a);
}
}
let redisClient: RedisClientType | null = null;
let redisDisabled = false;
function getRedis(): RedisClientType | null {
if (redisDisabled) return null;
if (redisClient) return redisClient;
try {
redisClient = createClient({
socket: {
host: config.REDIS_HOSTNAME,
port: config.REDIS_PORT,
// Give up on first failure — we don't want the redis client's
// reconnect timer keeping the event loop alive (breaks unit tests
// that just import the logger), and we don't want logger.error to
// recursively retrigger if redis is down.
reconnectStrategy: false,
},
}) as RedisClientType;
redisClient.on("error", () => {
if (!redisDisabled) {
redisDisabled = true;
try {
redisClient?.disconnect();
} catch {
/* ignore */
}
}
});
redisClient.connect().catch(() => {
redisDisabled = true;
});
return redisClient;
} catch {
redisDisabled = true;
return null;
}
}
function persistError(entry: {
ts: string;
module: string;
message: string;
raw: unknown[];
}) {
const client = getRedis();
if (!client || !client.isOpen) return;
const payload = JSON.stringify(entry);
client
.multi()
.lPush(ERROR_LOG_KEY, payload)
.lTrim(ERROR_LOG_KEY, 0, ERROR_LOG_MAX - 1)
.exec()
.catch(() => undefined);
}
function emit(level: Level, module: string, args: unknown[]) {
if (LEVEL_ORDER[level] < threshold) return;
const ts = new Date().toISOString();
const formatted = args.map(formatArg);
const line = `${ts} ${level.toUpperCase()} [${module}] ${formatted.join(" ")}`;
const sink =
level === "error"
? console.error
: level === "warn"
? console.warn
: level === "debug"
? console.debug
: console.log;
sink(line);
if (level === "error") {
persistError({
ts,
module,
message: typeof args[0] === "string" ? args[0] : "",
raw: args.map((a) => {
if (a instanceof Error) return serializeError(a);
return a;
}),
});
}
}
export function createLogger(module: string): Logger {
return {
debug: (...args) => emit("debug", module, args),
info: (...args) => emit("info", module, args),
warn: (...args) => emit("warn", module, args),
error: (...args) => emit("error", module, args),
};
}
type ErrorLike = {
name?: string;
message?: string;
stack?: string;
status?: number;
httpStatus?: number;
code?: string | number;
cause?: unknown;
request?: { url?: string; method?: string };
response?: { url?: string; status?: number };
};
export function serializeError(err: unknown): Record<string, unknown> {
if (err == null) return { value: err };
if (typeof err !== "object") return { value: String(err) };
const e = err as ErrorLike;
const out: Record<string, unknown> = {};
if (e.name) out.name = e.name;
if (e.message) out.message = e.message;
// Octokit RequestError / HTTP-shaped errors: surface status + url + method,
// skip the giant headers/response body dump.
if (typeof e.status === "number") out.status = e.status;
if (e.request?.url) out.url = e.request.url;
if (e.request?.method) out.method = e.request.method;
if (!e.request && e.response?.url) out.url = e.response.url;
// AnonymousError carries an httpStatus and an inner cause.
if (typeof e.httpStatus === "number") out.httpStatus = e.httpStatus;
if (e.code !== undefined && e.code !== e.message) out.code = e.code;
if (e.cause) out.cause = serializeError(e.cause);
// Only include the stack when there's nothing else useful — avoids dumping
// a stack for handled HTTP errors but keeps debuggability for plain Errors.
if (!out.status && !out.httpStatus && e.stack) out.stack = e.stack;
return out;
}
+9 -4
View File
@@ -14,6 +14,9 @@ import { FILE_TYPE } from "../storage/Storage";
import { octokit } from "../GitHubUtils";
import FileModel from "../model/files/files.model";
import { IFile } from "../model/files/files.types";
import { createLogger, serializeError } from "../logger";
const logger = createLogger("gh-stream");
export default class GitHubStream extends GitHubBase {
type: "GitHubDownload" | "GitHubStream" | "Zip" = "GitHubStream";
@@ -36,7 +39,7 @@ export default class GitHubStream extends GitHubBase {
repo: this.data.repoName,
file_sha: sha,
});
console.log("[GHStream] Downloading file", url);
logger.debug("downloading file", { url });
return got.stream(url, {
headers: {
"X-GitHub-Api-Version": "2022-11-28",
@@ -45,7 +48,7 @@ export default class GitHubStream extends GitHubBase {
},
});
} catch (error) {
console.error(error);
logger.error("downloadFile failed", serializeError(error));
throw new AnonymousError("repo_not_accessible", {
httpStatus: 404,
object: this.data,
@@ -60,7 +63,7 @@ export default class GitHubStream extends GitHubBase {
// as the fallback for LFS files (#95).
private downloadFileViaRaw(token: string, filePath: string) {
const url = `https://github.com/${this.data.organization}/${this.data.repoName}/raw/${this.data.commit}/${filePath}`;
console.log("[GHStream] Downloading via raw URL (LFS)", url);
logger.debug("downloading via raw URL (LFS)", { url });
return got.stream(url, {
headers: { authorization: `token ${token}` },
followRedirect: true,
@@ -267,9 +270,9 @@ export default class GitHubStream extends GitHubBase {
}
output.push(...this.tree2Tree(data.tree, parentPath));
} catch (error) {
console.log(error);
const status = (error as { status?: number }).status;
if (status === 409) {
logger.debug("getTree empty repo", serializeError(error));
throw new AnonymousError("repo_empty", {
httpStatus: 409,
object: this.data,
@@ -277,6 +280,7 @@ export default class GitHubStream extends GitHubBase {
});
}
if (status === 404) {
logger.debug("getTree miss", serializeError(error));
const code = await classifyGitHubMissError(error, this.data);
throw new AnonymousError(code, {
httpStatus: 404,
@@ -284,6 +288,7 @@ export default class GitHubStream extends GitHubBase {
cause: error as Error,
});
}
logger.warn("getTree failed", serializeError(error));
throw new AnonymousError("repo_not_found", {
httpStatus: status || 500,
object: this.data,
+4 -1
View File
@@ -10,6 +10,9 @@ import { lookup } from "mime-types";
import StorageBase, { FILE_TYPE } from "./Storage";
import FileModel from "../model/files/files.model";
import { IFile } from "../model/files/files.types";
import { createLogger, serializeError } from "../logger";
const logger = createLogger("fs");
export default class FileSystem extends StorageBase {
type = "FileSystem";
@@ -97,7 +100,7 @@ export default class FileSystem extends StorageBase {
}
await fs.promises.rename(tmpPath, fullPath);
} catch (err) {
console.error("[ERROR] FileSystem.write failed:", err);
logger.error("write failed", serializeError(err));
await fs.promises.rm(tmpPath, { force: true }).catch(() => undefined);
throw err;
}
+4 -1
View File
@@ -17,6 +17,9 @@ import AnonymousError from "../AnonymousError";
import StorageBase, { FILE_TYPE } from "./Storage";
import { IFile } from "../model/files/files.types";
import FileModel from "../model/files/files.model";
import { createLogger, serializeError } from "../logger";
const logger = createLogger("s3");
export default class S3Storage extends StorageBase {
type = "AWS";
@@ -129,7 +132,7 @@ export default class S3Storage extends StorageBase {
try {
res.status(500).json({ error: "file_not_found" });
} catch (err) {
console.error(`[ERROR] S3 send ${path}`, err);
logger.error("send failed", { path, err: serializeError(err) });
}
}
}
+21
View File
@@ -82,6 +82,7 @@ const UNICODE_ESCAPE_CHARS = new Set(
export function diacriticInsensitive(escapedTerm: string): string {
let out = "";
let i = 0;
let inClass = false;
while (i < escapedTerm.length) {
const c = escapedTerm[i];
if (c === "\\" && i + 1 < escapedTerm.length) {
@@ -96,6 +97,26 @@ export function diacriticInsensitive(escapedTerm: string): string {
i += 2;
continue;
}
// Inside a character class, leave letters alone — expanding them into
// bracketed alternatives would produce nested `[...]` which is a syntax
// error. The user's regex is responsible for its own char-class content.
if (c === "[" && !inClass) {
inClass = true;
out += c;
i += 1;
continue;
}
if (c === "]" && inClass) {
inClass = false;
out += c;
i += 1;
continue;
}
if (inClass) {
out += c;
i += 1;
continue;
}
const lower = c.toLowerCase();
out += DIACRITIC_CLASSES[lower] || c;
i += 1;
+7 -4
View File
@@ -8,6 +8,9 @@ import {
anonymizePathCompiled,
compileTerms,
} from "./anonymize-utils";
import { createLogger, serializeError } from "./logger";
const logger = createLogger("zip-stream");
export interface StreamAnonymizedZipOptions {
repoId: string;
@@ -82,7 +85,7 @@ export async function streamAnonymizedZip(
const downloadStream = got.stream(response.url);
res.on("error", (error) => {
console.error(error);
logger.error("response stream error", serializeError(error));
downloadStream.destroy();
});
res.on("close", () => {
@@ -101,7 +104,7 @@ export async function streamAnonymizedZip(
// bug as #694.
let upstreamSucceeded = false;
const fail = (error: Error) => {
console.error(error);
logger.error("upstream zipball failed", serializeError(error));
archive.abort();
const destroyable = res as unknown as {
destroy?: (err?: Error) => void;
@@ -140,7 +143,7 @@ export async function streamAnonymizedZip(
archive.append(st, { name: fileName });
} catch (error) {
entry.autodrain();
console.error(error);
logger.error("entry transform failed", serializeError(error));
}
} else {
entry.autodrain();
@@ -157,7 +160,7 @@ export async function streamAnonymizedZip(
});
archive.pipe(res).on("error", (error) => {
console.error(error);
logger.error("archive pipe error", serializeError(error));
if (!upstreamSucceeded) {
// archive errored while we were still depending on upstream bytes:
// treat as failure rather than truncating.