mirror of
https://github.com/tdurieux/anonymous_github.git
synced 2026-05-16 06:49:09 +02:00
error logging improvement, regex fix
This commit is contained in:
+5
-2
@@ -8,6 +8,9 @@ import { IAnonymizedGistDocument } from "./model/anonymizedGists/anonymizedGists
|
||||
import config from "../config";
|
||||
import { octokit } from "./GitHubUtils";
|
||||
import { ContentAnonimizer } from "./anonymize-utils";
|
||||
import { createLogger } from "./logger";
|
||||
|
||||
const logger = createLogger("gist");
|
||||
|
||||
type GistPayload = {
|
||||
description: string;
|
||||
@@ -59,14 +62,14 @@ export default class Gist {
|
||||
try {
|
||||
return this._model.source.accessToken;
|
||||
} catch {
|
||||
console.debug("[ERROR] Token is invalid", this._model.source.gistId);
|
||||
logger.warn("invalid token", { gistId: this._model.source.gistId });
|
||||
}
|
||||
}
|
||||
return config.GITHUB_TOKEN;
|
||||
}
|
||||
|
||||
async download() {
|
||||
console.debug("[INFO] Downloading gist", this._model.source.gistId);
|
||||
logger.info("downloading gist", { gistId: this._model.source.gistId });
|
||||
const oct = octokit(await this.getToken());
|
||||
|
||||
const gist_id = this._model.source.gistId;
|
||||
|
||||
@@ -3,6 +3,9 @@ import { Octokit } from "@octokit/rest";
|
||||
import Repository from "./Repository";
|
||||
import UserModel from "./model/users/users.model";
|
||||
import config from "../config";
|
||||
import { createLogger } from "./logger";
|
||||
|
||||
const logger = createLogger("github");
|
||||
|
||||
export function octokit(token: string) {
|
||||
return new Octokit({
|
||||
@@ -24,7 +27,7 @@ export async function checkToken(token: string) {
|
||||
}
|
||||
|
||||
export async function getToken(repository: Repository) {
|
||||
console.log("getToken", repository.repoId);
|
||||
logger.debug("getToken", { repoId: repository.repoId });
|
||||
// if (repository.model.source.accessToken) {
|
||||
// // only check the token if the repo has been visited less than 10 minutes ago
|
||||
// if (
|
||||
@@ -101,9 +104,10 @@ export async function getToken(repository: Repository) {
|
||||
return refreshed;
|
||||
}
|
||||
}
|
||||
console.warn(
|
||||
`[getToken] refresh failed for ${repository.owner.model.username} (status ${res.status}); falling back`
|
||||
);
|
||||
logger.warn("token refresh failed; falling back", {
|
||||
username: repository.owner.model.username,
|
||||
status: res.status,
|
||||
});
|
||||
// fall through to the checkToken path / config.GITHUB_TOKEN
|
||||
}
|
||||
const check = await checkToken(ownerAccessToken);
|
||||
|
||||
+15
-16
@@ -9,6 +9,9 @@ import config from "../config";
|
||||
import got, { HTTPError } from "got";
|
||||
import { octokit } from "./GitHubUtils";
|
||||
import { ContentAnonimizer } from "./anonymize-utils";
|
||||
import { createLogger } from "./logger";
|
||||
|
||||
const logger = createLogger("pull-request");
|
||||
|
||||
export default class PullRequest {
|
||||
private _model: IAnonymizedPullRequestDocument;
|
||||
@@ -38,20 +41,18 @@ export default class PullRequest {
|
||||
try {
|
||||
return this._model.source.accessToken;
|
||||
} catch {
|
||||
console.debug(
|
||||
"[ERROR] Token is invalid",
|
||||
this._model.source.pullRequestId
|
||||
);
|
||||
logger.warn("invalid token", {
|
||||
pullRequestId: this._model.source.pullRequestId,
|
||||
});
|
||||
}
|
||||
}
|
||||
return config.GITHUB_TOKEN;
|
||||
}
|
||||
|
||||
async download() {
|
||||
console.debug(
|
||||
"[INFO] Downloading pull request",
|
||||
this._model.source.pullRequestId
|
||||
);
|
||||
logger.info("downloading pull request", {
|
||||
pullRequestId: this._model.source.pullRequestId,
|
||||
});
|
||||
const oct = octokit(await this.getToken());
|
||||
|
||||
const [owner, repo] = this._model.source.repositoryFullName.split("/");
|
||||
@@ -89,10 +90,9 @@ export default class PullRequest {
|
||||
user?: { login?: string } | null;
|
||||
}> => {
|
||||
if ((err as { status?: number }).status === 404) {
|
||||
console.warn(
|
||||
"[WARN] Failed to fetch PR comments (404), continuing without them",
|
||||
`${owner}/${repo}#${pull_number}`
|
||||
);
|
||||
logger.warn("PR comments 404, continuing without them", {
|
||||
pr: `${owner}/${repo}#${pull_number}`,
|
||||
});
|
||||
return [];
|
||||
}
|
||||
throw err;
|
||||
@@ -102,10 +102,9 @@ export default class PullRequest {
|
||||
`https://github.com/${owner}/${repo}/pull/${pull_number}.diff`
|
||||
).catch((err) => {
|
||||
if (err instanceof HTTPError && err.response.statusCode === 404) {
|
||||
console.warn(
|
||||
"[WARN] Failed to fetch PR diff (404), continuing without it",
|
||||
`${owner}/${repo}#${pull_number}`
|
||||
);
|
||||
logger.warn("PR diff 404, continuing without it", {
|
||||
pr: `${owner}/${repo}#${pull_number}`,
|
||||
});
|
||||
return { body: "" };
|
||||
}
|
||||
throw err;
|
||||
|
||||
+14
-9
@@ -23,6 +23,9 @@ import { getToken } from "./GitHubUtils";
|
||||
import config from "../config";
|
||||
import FileModel from "./model/files/files.model";
|
||||
import AnonymizedRepositoryModel from "./model/anonymizedRepositories/anonymizedRepositories.model";
|
||||
import { createLogger, serializeError } from "./logger";
|
||||
|
||||
const logger = createLogger("repository");
|
||||
import { IFile } from "./model/files/files.types";
|
||||
import AnonymizedFile from "./AnonymizedFile";
|
||||
import { FilterQuery } from "mongoose";
|
||||
@@ -316,9 +319,10 @@ export default class Repository {
|
||||
?.commit;
|
||||
|
||||
if (!newCommit) {
|
||||
console.error(
|
||||
`${branchName} for ${this.model.source.repositoryName} is not found`
|
||||
);
|
||||
logger.error("branch not found", {
|
||||
branch: branchName,
|
||||
repo: this.model.source.repositoryName,
|
||||
});
|
||||
await this.updateStatus(RepositoryStatus.ERROR, "branch_not_found");
|
||||
await this.resetSate();
|
||||
throw new AnonymousError("branch_not_found", {
|
||||
@@ -330,7 +334,7 @@ export default class Repository {
|
||||
this.model.source.commit == newCommit &&
|
||||
this.status == RepositoryStatus.READY
|
||||
) {
|
||||
console.log(`[UPDATE] ${this._model.repoId} is up to date`);
|
||||
logger.info("up to date", { repoId: this._model.repoId });
|
||||
return;
|
||||
}
|
||||
this._model.source.commit = newCommit;
|
||||
@@ -347,9 +351,10 @@ export default class Repository {
|
||||
}
|
||||
this.model.source.commit = newCommit;
|
||||
this._model.anonymizeDate = new Date();
|
||||
console.log(
|
||||
`[UPDATE] ${this._model.repoId} will be updated to ${newCommit}`
|
||||
);
|
||||
logger.info("update queued", {
|
||||
repoId: this._model.repoId,
|
||||
commit: newCommit,
|
||||
});
|
||||
|
||||
await this.resetSate(RepositoryStatus.PREPARING);
|
||||
await downloadQueue.add(this.repoId, { repoId: this.repoId }, {
|
||||
@@ -450,7 +455,7 @@ export default class Repository {
|
||||
FileModel.deleteMany({ repoId: this.repoId }).exec(),
|
||||
this.removeCache(),
|
||||
]);
|
||||
console.log(`[RESET] ${this._model.repoId} has been reset`);
|
||||
logger.info("reset", { repoId: this._model.repoId });
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -464,7 +469,7 @@ export default class Repository {
|
||||
try {
|
||||
await this.model.save();
|
||||
} catch (error) {
|
||||
console.error("[ERROR] removeCache save", error);
|
||||
logger.error("removeCache save failed", serializeError(error));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,172 @@
|
||||
import { createClient, RedisClientType } from "redis";
|
||||
import config from "../config";
|
||||
|
||||
export const ERROR_LOG_KEY = "admin:errors";
|
||||
export const ERROR_LOG_MAX = 1000;
|
||||
|
||||
export type Logger = {
|
||||
debug: (...args: unknown[]) => void;
|
||||
info: (...args: unknown[]) => void;
|
||||
warn: (...args: unknown[]) => void;
|
||||
error: (...args: unknown[]) => void;
|
||||
};
|
||||
|
||||
type Level = "debug" | "info" | "warn" | "error";
|
||||
|
||||
const LEVEL_ORDER: Record<Level, number> = {
|
||||
debug: 10,
|
||||
info: 20,
|
||||
warn: 30,
|
||||
error: 40,
|
||||
};
|
||||
|
||||
function resolveThreshold(): number {
|
||||
const raw = (process.env.LOG_LEVEL || "").toLowerCase() as Level;
|
||||
if (raw in LEVEL_ORDER) return LEVEL_ORDER[raw];
|
||||
return process.env.NODE_ENV === "production"
|
||||
? LEVEL_ORDER.info
|
||||
: LEVEL_ORDER.debug;
|
||||
}
|
||||
|
||||
const threshold = resolveThreshold();
|
||||
|
||||
function formatArg(a: unknown): string {
|
||||
if (typeof a === "string") return a;
|
||||
if (a instanceof Error) return JSON.stringify(serializeError(a));
|
||||
try {
|
||||
return JSON.stringify(a);
|
||||
} catch {
|
||||
return String(a);
|
||||
}
|
||||
}
|
||||
|
||||
let redisClient: RedisClientType | null = null;
|
||||
let redisDisabled = false;
|
||||
|
||||
function getRedis(): RedisClientType | null {
|
||||
if (redisDisabled) return null;
|
||||
if (redisClient) return redisClient;
|
||||
try {
|
||||
redisClient = createClient({
|
||||
socket: {
|
||||
host: config.REDIS_HOSTNAME,
|
||||
port: config.REDIS_PORT,
|
||||
// Give up on first failure — we don't want the redis client's
|
||||
// reconnect timer keeping the event loop alive (breaks unit tests
|
||||
// that just import the logger), and we don't want logger.error to
|
||||
// recursively retrigger if redis is down.
|
||||
reconnectStrategy: false,
|
||||
},
|
||||
}) as RedisClientType;
|
||||
redisClient.on("error", () => {
|
||||
if (!redisDisabled) {
|
||||
redisDisabled = true;
|
||||
try {
|
||||
redisClient?.disconnect();
|
||||
} catch {
|
||||
/* ignore */
|
||||
}
|
||||
}
|
||||
});
|
||||
redisClient.connect().catch(() => {
|
||||
redisDisabled = true;
|
||||
});
|
||||
return redisClient;
|
||||
} catch {
|
||||
redisDisabled = true;
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
function persistError(entry: {
|
||||
ts: string;
|
||||
module: string;
|
||||
message: string;
|
||||
raw: unknown[];
|
||||
}) {
|
||||
const client = getRedis();
|
||||
if (!client || !client.isOpen) return;
|
||||
const payload = JSON.stringify(entry);
|
||||
client
|
||||
.multi()
|
||||
.lPush(ERROR_LOG_KEY, payload)
|
||||
.lTrim(ERROR_LOG_KEY, 0, ERROR_LOG_MAX - 1)
|
||||
.exec()
|
||||
.catch(() => undefined);
|
||||
}
|
||||
|
||||
function emit(level: Level, module: string, args: unknown[]) {
|
||||
if (LEVEL_ORDER[level] < threshold) return;
|
||||
const ts = new Date().toISOString();
|
||||
const formatted = args.map(formatArg);
|
||||
const line = `${ts} ${level.toUpperCase()} [${module}] ${formatted.join(" ")}`;
|
||||
const sink =
|
||||
level === "error"
|
||||
? console.error
|
||||
: level === "warn"
|
||||
? console.warn
|
||||
: level === "debug"
|
||||
? console.debug
|
||||
: console.log;
|
||||
sink(line);
|
||||
if (level === "error") {
|
||||
persistError({
|
||||
ts,
|
||||
module,
|
||||
message: typeof args[0] === "string" ? args[0] : "",
|
||||
raw: args.map((a) => {
|
||||
if (a instanceof Error) return serializeError(a);
|
||||
return a;
|
||||
}),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
export function createLogger(module: string): Logger {
|
||||
return {
|
||||
debug: (...args) => emit("debug", module, args),
|
||||
info: (...args) => emit("info", module, args),
|
||||
warn: (...args) => emit("warn", module, args),
|
||||
error: (...args) => emit("error", module, args),
|
||||
};
|
||||
}
|
||||
|
||||
type ErrorLike = {
|
||||
name?: string;
|
||||
message?: string;
|
||||
stack?: string;
|
||||
status?: number;
|
||||
httpStatus?: number;
|
||||
code?: string | number;
|
||||
cause?: unknown;
|
||||
request?: { url?: string; method?: string };
|
||||
response?: { url?: string; status?: number };
|
||||
};
|
||||
|
||||
export function serializeError(err: unknown): Record<string, unknown> {
|
||||
if (err == null) return { value: err };
|
||||
if (typeof err !== "object") return { value: String(err) };
|
||||
|
||||
const e = err as ErrorLike;
|
||||
const out: Record<string, unknown> = {};
|
||||
if (e.name) out.name = e.name;
|
||||
if (e.message) out.message = e.message;
|
||||
|
||||
// Octokit RequestError / HTTP-shaped errors: surface status + url + method,
|
||||
// skip the giant headers/response body dump.
|
||||
if (typeof e.status === "number") out.status = e.status;
|
||||
if (e.request?.url) out.url = e.request.url;
|
||||
if (e.request?.method) out.method = e.request.method;
|
||||
if (!e.request && e.response?.url) out.url = e.response.url;
|
||||
|
||||
// AnonymousError carries an httpStatus and an inner cause.
|
||||
if (typeof e.httpStatus === "number") out.httpStatus = e.httpStatus;
|
||||
if (e.code !== undefined && e.code !== e.message) out.code = e.code;
|
||||
if (e.cause) out.cause = serializeError(e.cause);
|
||||
|
||||
// Only include the stack when there's nothing else useful — avoids dumping
|
||||
// a stack for handled HTTP errors but keeps debuggability for plain Errors.
|
||||
if (!out.status && !out.httpStatus && e.stack) out.stack = e.stack;
|
||||
|
||||
return out;
|
||||
}
|
||||
@@ -14,6 +14,9 @@ import { FILE_TYPE } from "../storage/Storage";
|
||||
import { octokit } from "../GitHubUtils";
|
||||
import FileModel from "../model/files/files.model";
|
||||
import { IFile } from "../model/files/files.types";
|
||||
import { createLogger, serializeError } from "../logger";
|
||||
|
||||
const logger = createLogger("gh-stream");
|
||||
|
||||
export default class GitHubStream extends GitHubBase {
|
||||
type: "GitHubDownload" | "GitHubStream" | "Zip" = "GitHubStream";
|
||||
@@ -36,7 +39,7 @@ export default class GitHubStream extends GitHubBase {
|
||||
repo: this.data.repoName,
|
||||
file_sha: sha,
|
||||
});
|
||||
console.log("[GHStream] Downloading file", url);
|
||||
logger.debug("downloading file", { url });
|
||||
return got.stream(url, {
|
||||
headers: {
|
||||
"X-GitHub-Api-Version": "2022-11-28",
|
||||
@@ -45,7 +48,7 @@ export default class GitHubStream extends GitHubBase {
|
||||
},
|
||||
});
|
||||
} catch (error) {
|
||||
console.error(error);
|
||||
logger.error("downloadFile failed", serializeError(error));
|
||||
throw new AnonymousError("repo_not_accessible", {
|
||||
httpStatus: 404,
|
||||
object: this.data,
|
||||
@@ -60,7 +63,7 @@ export default class GitHubStream extends GitHubBase {
|
||||
// as the fallback for LFS files (#95).
|
||||
private downloadFileViaRaw(token: string, filePath: string) {
|
||||
const url = `https://github.com/${this.data.organization}/${this.data.repoName}/raw/${this.data.commit}/${filePath}`;
|
||||
console.log("[GHStream] Downloading via raw URL (LFS)", url);
|
||||
logger.debug("downloading via raw URL (LFS)", { url });
|
||||
return got.stream(url, {
|
||||
headers: { authorization: `token ${token}` },
|
||||
followRedirect: true,
|
||||
@@ -267,9 +270,9 @@ export default class GitHubStream extends GitHubBase {
|
||||
}
|
||||
output.push(...this.tree2Tree(data.tree, parentPath));
|
||||
} catch (error) {
|
||||
console.log(error);
|
||||
const status = (error as { status?: number }).status;
|
||||
if (status === 409) {
|
||||
logger.debug("getTree empty repo", serializeError(error));
|
||||
throw new AnonymousError("repo_empty", {
|
||||
httpStatus: 409,
|
||||
object: this.data,
|
||||
@@ -277,6 +280,7 @@ export default class GitHubStream extends GitHubBase {
|
||||
});
|
||||
}
|
||||
if (status === 404) {
|
||||
logger.debug("getTree miss", serializeError(error));
|
||||
const code = await classifyGitHubMissError(error, this.data);
|
||||
throw new AnonymousError(code, {
|
||||
httpStatus: 404,
|
||||
@@ -284,6 +288,7 @@ export default class GitHubStream extends GitHubBase {
|
||||
cause: error as Error,
|
||||
});
|
||||
}
|
||||
logger.warn("getTree failed", serializeError(error));
|
||||
throw new AnonymousError("repo_not_found", {
|
||||
httpStatus: status || 500,
|
||||
object: this.data,
|
||||
|
||||
@@ -10,6 +10,9 @@ import { lookup } from "mime-types";
|
||||
import StorageBase, { FILE_TYPE } from "./Storage";
|
||||
import FileModel from "../model/files/files.model";
|
||||
import { IFile } from "../model/files/files.types";
|
||||
import { createLogger, serializeError } from "../logger";
|
||||
|
||||
const logger = createLogger("fs");
|
||||
|
||||
export default class FileSystem extends StorageBase {
|
||||
type = "FileSystem";
|
||||
@@ -97,7 +100,7 @@ export default class FileSystem extends StorageBase {
|
||||
}
|
||||
await fs.promises.rename(tmpPath, fullPath);
|
||||
} catch (err) {
|
||||
console.error("[ERROR] FileSystem.write failed:", err);
|
||||
logger.error("write failed", serializeError(err));
|
||||
await fs.promises.rm(tmpPath, { force: true }).catch(() => undefined);
|
||||
throw err;
|
||||
}
|
||||
|
||||
@@ -17,6 +17,9 @@ import AnonymousError from "../AnonymousError";
|
||||
import StorageBase, { FILE_TYPE } from "./Storage";
|
||||
import { IFile } from "../model/files/files.types";
|
||||
import FileModel from "../model/files/files.model";
|
||||
import { createLogger, serializeError } from "../logger";
|
||||
|
||||
const logger = createLogger("s3");
|
||||
|
||||
export default class S3Storage extends StorageBase {
|
||||
type = "AWS";
|
||||
@@ -129,7 +132,7 @@ export default class S3Storage extends StorageBase {
|
||||
try {
|
||||
res.status(500).json({ error: "file_not_found" });
|
||||
} catch (err) {
|
||||
console.error(`[ERROR] S3 send ${path}`, err);
|
||||
logger.error("send failed", { path, err: serializeError(err) });
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -82,6 +82,7 @@ const UNICODE_ESCAPE_CHARS = new Set(
|
||||
export function diacriticInsensitive(escapedTerm: string): string {
|
||||
let out = "";
|
||||
let i = 0;
|
||||
let inClass = false;
|
||||
while (i < escapedTerm.length) {
|
||||
const c = escapedTerm[i];
|
||||
if (c === "\\" && i + 1 < escapedTerm.length) {
|
||||
@@ -96,6 +97,26 @@ export function diacriticInsensitive(escapedTerm: string): string {
|
||||
i += 2;
|
||||
continue;
|
||||
}
|
||||
// Inside a character class, leave letters alone — expanding them into
|
||||
// bracketed alternatives would produce nested `[...]` which is a syntax
|
||||
// error. The user's regex is responsible for its own char-class content.
|
||||
if (c === "[" && !inClass) {
|
||||
inClass = true;
|
||||
out += c;
|
||||
i += 1;
|
||||
continue;
|
||||
}
|
||||
if (c === "]" && inClass) {
|
||||
inClass = false;
|
||||
out += c;
|
||||
i += 1;
|
||||
continue;
|
||||
}
|
||||
if (inClass) {
|
||||
out += c;
|
||||
i += 1;
|
||||
continue;
|
||||
}
|
||||
const lower = c.toLowerCase();
|
||||
out += DIACRITIC_CLASSES[lower] || c;
|
||||
i += 1;
|
||||
|
||||
@@ -8,6 +8,9 @@ import {
|
||||
anonymizePathCompiled,
|
||||
compileTerms,
|
||||
} from "./anonymize-utils";
|
||||
import { createLogger, serializeError } from "./logger";
|
||||
|
||||
const logger = createLogger("zip-stream");
|
||||
|
||||
export interface StreamAnonymizedZipOptions {
|
||||
repoId: string;
|
||||
@@ -82,7 +85,7 @@ export async function streamAnonymizedZip(
|
||||
const downloadStream = got.stream(response.url);
|
||||
|
||||
res.on("error", (error) => {
|
||||
console.error(error);
|
||||
logger.error("response stream error", serializeError(error));
|
||||
downloadStream.destroy();
|
||||
});
|
||||
res.on("close", () => {
|
||||
@@ -101,7 +104,7 @@ export async function streamAnonymizedZip(
|
||||
// bug as #694.
|
||||
let upstreamSucceeded = false;
|
||||
const fail = (error: Error) => {
|
||||
console.error(error);
|
||||
logger.error("upstream zipball failed", serializeError(error));
|
||||
archive.abort();
|
||||
const destroyable = res as unknown as {
|
||||
destroy?: (err?: Error) => void;
|
||||
@@ -140,7 +143,7 @@ export async function streamAnonymizedZip(
|
||||
archive.append(st, { name: fileName });
|
||||
} catch (error) {
|
||||
entry.autodrain();
|
||||
console.error(error);
|
||||
logger.error("entry transform failed", serializeError(error));
|
||||
}
|
||||
} else {
|
||||
entry.autodrain();
|
||||
@@ -157,7 +160,7 @@ export async function streamAnonymizedZip(
|
||||
});
|
||||
|
||||
archive.pipe(res).on("error", (error) => {
|
||||
console.error(error);
|
||||
logger.error("archive pipe error", serializeError(error));
|
||||
if (!upstreamSucceeded) {
|
||||
// archive errored while we were still depending on upstream bytes:
|
||||
// treat as failure rather than truncating.
|
||||
|
||||
Reference in New Issue
Block a user