Improve error dashboard

This commit is contained in:
tdurieux
2026-05-06 16:12:37 +03:00
parent 6f418d6332
commit 873c910dd3
18 changed files with 1606 additions and 318 deletions
+15 -5
View File
@@ -27,10 +27,9 @@ export default class AnonymousError extends CustomError {
this.cause = opt?.cause;
}
detail(): string | undefined {
url(): string | undefined {
if (this.value == null) return undefined;
try {
if (this.value instanceof Repository) return this.value.repoId;
if (this.value instanceof AnonymizedFile) {
const repoId = this.value.repository?.repoId;
// anonymizedPath getter can throw if the file isn't initialized;
@@ -43,6 +42,17 @@ export default class AnonymousError extends CustomError {
}
return repoId ? `/r/${repoId}/${p ?? ""}` : p;
}
} catch {
/* ignore */
}
return undefined;
}
detail(): string | undefined {
if (this.value == null) return undefined;
try {
if (this.value instanceof Repository) return this.value.repoId;
if (this.value instanceof AnonymizedFile) return undefined;
if (this.value instanceof GitHubRepository) return this.value.fullName;
if (this.value instanceof User) return this.value.username;
if (this.value instanceof GitHubBase) {
@@ -57,9 +67,9 @@ export default class AnonymousError extends CustomError {
toString(): string {
let out = this.message;
const detail = this.detail();
if (detail) {
out += `: ${detail}`;
const info = this.url() ?? this.detail();
if (info) {
out += `: ${info}`;
}
if (this.cause) {
out += `\n\tCause by ${this.cause}\n${this.cause.stack}`;
+5 -1
View File
@@ -465,9 +465,13 @@ export default class Repository {
async removeCache() {
await storage.rm(this.repoId);
this.model.isReseted = true;
this.model.size = { storage: 0, file: 0 };
if (isConnected) {
try {
await this.model.save();
await AnonymizedRepositoryModel.updateOne(
{ _id: this._model._id },
{ $set: { isReseted: true, size: this._model.size } }
).exec();
} catch (error) {
logger.error("removeCache save failed", serializeError(error));
}
+134 -4
View File
@@ -3,6 +3,15 @@ import config from "../config";
export const ERROR_LOG_KEY = "admin:errors";
export const ERROR_LOG_MAX = 1000;
export const ERROR_LOG_HOURLY_PREFIX = "admin:errors:hourly:";
export const ERROR_LOG_DROPPED_KEY = "admin:errors:dropped";
// 48h retention on the hourly counters: stats endpoint reads "last 24h" and
// "previous 24h" buckets — anything older has nothing to compare against.
export const ERROR_LOG_HOURLY_TTL = 48 * 60 * 60;
// Hard cap on the JSON payload stored per entry. The recent detail() change
// (commit 6f418d6) can produce kilobyte payloads; without a cap the read
// path pulls multiple MB on every poll.
const MAX_PAYLOAD_BYTES = 4096;
export type Logger = {
debug: (...args: unknown[]) => void;
@@ -77,21 +86,132 @@ function getRedis(): RedisClientType | null {
}
}
// In-process counter for entries that couldn't be persisted (no Redis client,
// disconnected, or Redis-side rejection). Mirrors `admin:errors:dropped` once
// Redis is back. Read by /admin/errors/stats so the admin page surfaces
// "you're losing logs" instead of silently rendering an empty table.
let droppedInProcess = 0;
export function getInProcessDropped(): number {
return droppedInProcess;
}
function trimStack(s: unknown): unknown {
if (typeof s === "string" && s.length > 800) {
return s.slice(0, 800) + "…[truncated]";
}
return s;
}
function trimRawArg(a: unknown): unknown {
if (!a || typeof a !== "object") return a;
const o = a as Record<string, unknown>;
if (typeof o.stack === "string") {
return { ...o, stack: trimStack(o.stack) };
}
return o;
}
function clampPayload(entry: {
ts: string;
level: "warn" | "error";
module: string;
message: string;
raw: unknown[];
}): string {
// Cap raw to first 3 args and trim long stacks before stringifying.
if (entry.raw.length > 3) entry.raw = entry.raw.slice(0, 3);
entry.raw = entry.raw.map(trimRawArg);
let s = JSON.stringify(entry);
if (s.length <= MAX_PAYLOAD_BYTES) return s;
// Step 1: keep just the first arg (typically the human message + the
// structured detail object).
entry.raw = entry.raw.slice(0, 1);
s = JSON.stringify(entry);
if (s.length <= MAX_PAYLOAD_BYTES) return s;
// Step 2: replace the payload with a placeholder so the entry still shows
// up in the list but doesn't blow the cap.
entry.raw = [{ truncated: true, originalBytes: s.length }];
return JSON.stringify(entry);
}
// Map a logged entry to the bucket the admin UI uses. Mirrors the inline
// logic in /errors/stats so server and client agree on what "5xx / 4xx /
// info" means.
function bucketFor(
detail: Record<string, unknown> | undefined,
level: "warn" | "error"
): "error" | "warn" | "info" {
const s =
detail && typeof detail.httpStatus === "number"
? (detail.httpStatus as number)
: detail && typeof detail.status === "number"
? (detail.status as number)
: null;
if (typeof s === "number") {
if (s >= 500) return "error";
if (s === 401 || s === 403 || s === 404) return "info";
if (s >= 400) return "warn";
}
return level === "error" ? "error" : "warn";
}
function hourKey(ts: string): string {
// YYYYMMDDHH in UTC — sortable, lexicographically aligns with time.
const d = new Date(ts);
const y = d.getUTCFullYear();
const m = String(d.getUTCMonth() + 1).padStart(2, "0");
const day = String(d.getUTCDate()).padStart(2, "0");
const h = String(d.getUTCHours()).padStart(2, "0");
return `${ERROR_LOG_HOURLY_PREFIX}${y}${m}${day}${h}`;
}
function persistError(entry: {
ts: string;
level: "warn" | "error";
module: string;
message: string;
raw: unknown[];
}) {
const client = getRedis();
if (!client || !client.isOpen) return;
const payload = JSON.stringify(entry);
if (!client || !client.isOpen) {
droppedInProcess++;
return;
}
const payload = clampPayload(entry);
// Pre-compute the structured fields the stats endpoint needs so the read
// path doesn't have to parse the JSON list at all.
const detail = entry.raw.find(
(a) => a && typeof a === "object" && !Array.isArray(a)
) as Record<string, unknown> | undefined;
const bucket = bucketFor(detail, entry.level);
const code =
(detail && typeof detail.message === "string"
? (detail.message as string)
: "") ||
(detail && typeof detail.code === "string"
? (detail.code as string)
: "") ||
"_";
const hKey = hourKey(entry.ts);
client
.multi()
.lPush(ERROR_LOG_KEY, payload)
.lTrim(ERROR_LOG_KEY, 0, ERROR_LOG_MAX - 1)
.hIncrBy(hKey, "total", 1)
.hIncrBy(hKey, `bucket:${bucket}`, 1)
.hIncrBy(hKey, `level:${entry.level}`, 1)
.hIncrBy(hKey, `module:${entry.module}`, 1)
.hIncrBy(hKey, `cb:${bucket}:${code}`, 1)
.expire(hKey, ERROR_LOG_HOURLY_TTL)
.exec()
.catch(() => undefined);
.catch(() => {
droppedInProcess++;
// Best-effort flush of the in-process counter to redis so the admin UI
// sees the same number across processes.
const c = getRedis();
if (c && c.isOpen) {
c.incr(ERROR_LOG_DROPPED_KEY).catch(() => undefined);
}
});
}
function emit(level: Level, module: string, args: unknown[]) {
@@ -108,9 +228,10 @@ function emit(level: Level, module: string, args: unknown[]) {
? console.debug
: console.log;
sink(line);
if (level === "error") {
if (level === "error" || level === "warn") {
persistError({
ts,
level,
module,
message: typeof args[0] === "string" ? args[0] : "",
raw: args.map((a) => {
@@ -141,6 +262,7 @@ type ErrorLike = {
request?: { url?: string; method?: string };
response?: { url?: string; status?: number };
detail?: () => string | undefined;
url?: string | (() => string | undefined);
};
export function serializeError(err: unknown): Record<string, unknown> {
@@ -162,6 +284,14 @@ export function serializeError(err: unknown): Record<string, unknown> {
// AnonymousError carries an httpStatus and an inner cause.
if (typeof e.httpStatus === "number") out.httpStatus = e.httpStatus;
if (e.code !== undefined && e.code !== e.message) out.code = e.code;
if (typeof e.url === "function") {
try {
const u = e.url();
if (u) out.url = u;
} catch {
/* ignore */
}
}
if (typeof e.detail === "function") {
try {
const d = e.detail();
@@ -0,0 +1,11 @@
import { model } from "mongoose";
import { IDailyStatsDocument, IDailyStatsModel } from "./dailyStats.types";
import DailyStatsSchema from "./dailyStats.schema";
const DailyStatsModel = model<IDailyStatsDocument>(
"DailyStats",
DailyStatsSchema
) as IDailyStatsModel;
export default DailyStatsModel;
@@ -0,0 +1,11 @@
import { Schema } from "mongoose";
const DailyStatsSchema = new Schema({
date: { type: Date, unique: true, index: true },
nbRepositories: { type: Number, default: 0 },
nbUsers: { type: Number, default: 0 },
nbPageViews: { type: Number, default: 0 },
nbPullRequests: { type: Number, default: 0 },
});
export default DailyStatsSchema;
@@ -0,0 +1,12 @@
import { Document, Model } from "mongoose";
export interface IDailyStats {
date: Date;
nbRepositories: number;
nbUsers: number;
nbPageViews: number;
nbPullRequests: number;
}
export interface IDailyStatsDocument extends IDailyStats, Document {}
export interface IDailyStatsModel extends Model<IDailyStatsDocument> {}
+68
View File
@@ -0,0 +1,68 @@
import AnonymizedRepositoryModel from "../core/model/anonymizedRepositories/anonymizedRepositories.model";
import AnonymizedPullRequestModel from "../core/model/anonymizedPullRequests/anonymizedPullRequests.model";
import DailyStatsModel from "../core/model/dailyStats/dailyStats.model";
import { createLogger, serializeError } from "../core/logger";
const logger = createLogger("dailyStats");
export interface HomeStats {
nbRepositories: number;
nbUsers: number;
nbPageViews: number;
nbPullRequests: number;
}
export async function computeStats(): Promise<HomeStats> {
const [nbRepositories, nbUsersAgg, nbPageViews, nbPullRequests] =
await Promise.all([
AnonymizedRepositoryModel.estimatedDocumentCount(),
AnonymizedRepositoryModel.collection
.aggregate([{ $group: { _id: "$owner" } }, { $count: "n" }])
.toArray(),
AnonymizedRepositoryModel.collection
.aggregate([{ $group: { _id: null, total: { $sum: "$pageView" } } }])
.toArray(),
AnonymizedPullRequestModel.estimatedDocumentCount(),
]);
return {
nbRepositories,
nbUsers: (nbUsersAgg[0] as { n?: number } | undefined)?.n || 0,
nbPageViews:
(nbPageViews[0] as { total?: number } | undefined)?.total || 0,
nbPullRequests,
};
}
function utcMidnight(d: Date = new Date()): Date {
return new Date(
Date.UTC(d.getUTCFullYear(), d.getUTCMonth(), d.getUTCDate())
);
}
export async function computeAndStoreDailyStats(): Promise<void> {
try {
const stats = await computeStats();
const date = utcMidnight();
await DailyStatsModel.updateOne(
{ date },
{ $set: { ...stats, date } },
{ upsert: true }
);
logger.info("daily stats snapshot stored", { date, ...stats });
} catch (error) {
logger.error("daily stats snapshot failed", serializeError(error));
}
}
export async function ensureTodaySnapshot(): Promise<void> {
try {
const date = utcMidnight();
const existing = await DailyStatsModel.findOne({ date }).lean();
if (!existing) {
await computeAndStoreDailyStats();
}
} catch (error) {
logger.error("ensureTodaySnapshot failed", serializeError(error));
}
}
+45 -31
View File
@@ -14,10 +14,17 @@ import { connect } from "./database";
import { initSession, router as connectionRouter } from "./routes/connection";
import { bearerTokenAuth } from "./routes/token-auth";
import router from "./routes";
import AnonymizedRepositoryModel from "../core/model/anonymizedRepositories/anonymizedRepositories.model";
import { conferenceStatusCheck, repositoryStatusCheck } from "./schedule";
import {
conferenceStatusCheck,
repositoryStatusCheck,
dailyStatsSnapshot,
} from "./schedule";
import { startWorker, recoverStuckPreparing } from "../queue";
import AnonymizedPullRequestModel from "../core/model/anonymizedPullRequests/anonymizedPullRequests.model";
import {
computeStats,
ensureTodaySnapshot,
} from "./dailyStatsSnapshot";
import DailyStatsModel from "../core/model/dailyStats/dailyStats.model";
import { getUser } from "./routes/route-utils";
import config from "../config";
import { createLogger, serializeError } from "../core/logger";
@@ -186,9 +193,13 @@ export default async function start() {
});
let stat: Record<string, unknown> = {};
let history: Array<Record<string, unknown>> | null = null;
let historyKey: number | null = null;
setInterval(() => {
stat = {};
history = null;
historyKey = null;
}, 1000 * 60 * 60);
apiRouter.get("/healthcheck", async (_, res) => {
@@ -199,37 +210,36 @@ export default async function start() {
res.json(stat);
return;
}
const [nbRepositories, nbUsersAgg, nbPageViews, nbPullRequests] =
await Promise.all([
AnonymizedRepositoryModel.estimatedDocumentCount(),
// Count distinct owners server-side instead of materializing the full
// list of ObjectIds with `.distinct("owner")` only to take its length.
AnonymizedRepositoryModel.collection
.aggregate([
{ $group: { _id: "$owner" } },
{ $count: "n" },
])
.toArray(),
AnonymizedRepositoryModel.collection
.aggregate([
{
$group: { _id: null, total: { $sum: "$pageView" } },
},
])
.toArray(),
AnonymizedPullRequestModel.estimatedDocumentCount(),
]);
stat = {
nbRepositories,
nbUsers: (nbUsersAgg[0] as { n?: number } | undefined)?.n || 0,
nbPageViews: nbPageViews[0]?.total || 0,
nbPullRequests,
};
stat = { ...(await computeStats()) };
res.json(stat);
});
apiRouter.get("/stat/history", async (req, res) => {
const days = Math.min(
Math.max(parseInt(req.query.days as string) || 30, 1),
365
);
if (history && historyKey === days) {
res.json(history);
return;
}
const since = new Date();
since.setUTCDate(since.getUTCDate() - days + 1);
since.setUTCHours(0, 0, 0, 0);
const docs = await DailyStatsModel.find({ date: { $gte: since } })
.sort({ date: 1 })
.lean();
history = docs.map((d) => ({
date: d.date,
nbRepositories: d.nbRepositories,
nbUsers: d.nbUsers,
nbPageViews: d.nbPageViews,
nbPullRequests: d.nbPullRequests,
}));
historyKey = days;
res.json(history);
});
// web view
app.use("/w/", rate, webViewSpeedLimiter, router.webview);
@@ -253,10 +263,14 @@ export default async function start() {
// start schedules
conferenceStatusCheck();
repositoryStatusCheck();
dailyStatsSnapshot();
await connect();
app.listen(config.PORT);
logger.info("server started", { port: config.PORT });
ensureTodaySnapshot().catch((err) =>
logger.error("ensureTodaySnapshot failed", { err })
);
recoverStuckPreparing().catch((err) =>
logger.error("recoverStuckPreparing failed", { err })
);
+183 -7
View File
@@ -10,7 +10,15 @@ import { ensureAuthenticated } from "./connection";
import { handleError, getUser, isOwnerOrAdmin, getRepo } from "./route-utils";
import adminTokensRouter from "./admin-tokens";
import { octokit, getToken } from "../../core/GitHubUtils";
import { createLogger, serializeError, ERROR_LOG_KEY, ERROR_LOG_MAX } from "../../core/logger";
import {
createLogger,
serializeError,
ERROR_LOG_KEY,
ERROR_LOG_MAX,
ERROR_LOG_HOURLY_PREFIX,
ERROR_LOG_DROPPED_KEY,
getInProcessDropped,
} from "../../core/logger";
import { createClient, RedisClientType } from "redis";
import config from "../../config";
@@ -227,14 +235,32 @@ router.get("/queues", async (req, res) => {
});
});
// Errors captured by the logger sink (last ERROR_LOG_MAX entries).
// Errors captured by the logger sink. Server-paginated to avoid pulling
// the full ERROR_LOG_MAX entries on every poll — payloads can be a few KB
// each once detail() enrichment is included.
router.get("/errors", async (req, res) => {
try {
const client = await getErrorLogClient();
if (!client) {
return res.json({ entries: [], max: ERROR_LOG_MAX, available: false });
return res.json({
entries: [],
offset: 0,
limit: 0,
total: 0,
max: ERROR_LOG_MAX,
available: false,
});
}
const raw = await client.lRange(ERROR_LOG_KEY, 0, ERROR_LOG_MAX - 1);
const offset = Math.max(0, parseInt(String(req.query.offset || "0"), 10) || 0);
const limit = Math.min(
ERROR_LOG_MAX,
Math.max(1, parseInt(String(req.query.limit || "250"), 10) || 250)
);
const stop = offset + limit - 1;
const [raw, total] = await Promise.all([
client.lRange(ERROR_LOG_KEY, offset, stop),
client.lLen(ERROR_LOG_KEY),
]);
const entries = raw.map((s) => {
try {
return JSON.parse(s);
@@ -242,7 +268,141 @@ router.get("/errors", async (req, res) => {
return { ts: null, module: null, message: s, raw: [] };
}
});
res.json({ entries, max: ERROR_LOG_MAX, available: true });
res.json({
entries,
offset,
limit,
total,
max: ERROR_LOG_MAX,
available: true,
});
} catch (error) {
handleError(error, res, req);
}
});
// Aggregated stats from the precomputed hourly counters (HINCRBY on each
// persistError). No JSON parsing of stored entries — O(48 small HGETALLs).
router.get("/errors/stats", async (req, res) => {
try {
const client = await getErrorLogClient();
if (!client) {
return res.json({
available: false,
last24h: 0,
prev24h: 0,
severity: { error: 0, warn: 0, info: 0 },
unique: { error: 0, warn: 0, info: 0 },
buckets: [],
dropped: getInProcessDropped(),
});
}
const now = new Date();
// Build the 48 hour keys to fetch (24 for current window + 24 for prev).
function hourKey(d: Date) {
const y = d.getUTCFullYear();
const m = String(d.getUTCMonth() + 1).padStart(2, "0");
const day = String(d.getUTCDate()).padStart(2, "0");
const h = String(d.getUTCHours()).padStart(2, "0");
return `${ERROR_LOG_HOURLY_PREFIX}${y}${m}${day}${h}`;
}
const currentKeys: string[] = [];
const prevKeys: string[] = [];
const bucketHourTs: number[] = [];
for (let i = 23; i >= 0; i--) {
const d = new Date(now.getTime() - i * 3600 * 1000);
// Anchor each bar at the end of its hour so a "9s ago" event lands in
// the rightmost bar.
const anchor = new Date(
Date.UTC(
d.getUTCFullYear(),
d.getUTCMonth(),
d.getUTCDate(),
d.getUTCHours()
)
);
currentKeys.push(hourKey(anchor));
bucketHourTs.push(anchor.getTime() + 3600 * 1000);
}
for (let i = 47; i >= 24; i--) {
const d = new Date(now.getTime() - i * 3600 * 1000);
const anchor = new Date(
Date.UTC(
d.getUTCFullYear(),
d.getUTCMonth(),
d.getUTCDate(),
d.getUTCHours()
)
);
prevKeys.push(hourKey(anchor));
}
const pipe = client.multi();
for (const k of currentKeys) pipe.hGetAll(k);
for (const k of prevKeys) pipe.hGetAll(k);
pipe.get(ERROR_LOG_DROPPED_KEY);
const results = (await pipe.exec()) as unknown[];
const currentHashes = results.slice(0, currentKeys.length) as Record<
string,
string
>[];
const prevHashes = results.slice(
currentKeys.length,
currentKeys.length + prevKeys.length
) as Record<string, string>[];
const droppedRedis =
parseInt(String(results[results.length - 1] || "0"), 10) || 0;
const buckets: {
hour: number;
error: number;
warn: number;
info: number;
}[] = [];
const sev = { error: 0, warn: 0, info: 0 };
const uniqueCodes: Record<"error" | "warn" | "info", Set<string>> = {
error: new Set(),
warn: new Set(),
info: new Set(),
};
let last24h = 0;
currentHashes.forEach((h, i) => {
const flat = h || {};
const e = parseInt(flat["bucket:error"] || "0", 10) || 0;
const w = parseInt(flat["bucket:warn"] || "0", 10) || 0;
const inf = parseInt(flat["bucket:info"] || "0", 10) || 0;
buckets.push({ hour: bucketHourTs[i], error: e, warn: w, info: inf });
sev.error += e;
sev.warn += w;
sev.info += inf;
last24h += parseInt(flat.total || "0", 10) || 0;
// cb:<bucket>:<code> fields → unique code sets.
for (const k of Object.keys(flat)) {
if (!k.startsWith("cb:")) continue;
const sep = k.indexOf(":", 3);
if (sep < 0) continue;
const b = k.slice(3, sep) as "error" | "warn" | "info";
const code = k.slice(sep + 1);
if (b in uniqueCodes) uniqueCodes[b].add(code);
}
});
let prev24h = 0;
for (const h of prevHashes) {
prev24h += parseInt((h || {}).total || "0", 10) || 0;
}
res.json({
available: true,
last24h,
prev24h,
severity: sev,
unique: {
error: uniqueCodes.error.size,
warn: uniqueCodes.warn.size,
info: uniqueCodes.info.size,
},
buckets,
dropped: droppedRedis + getInProcessDropped(),
});
} catch (error) {
handleError(error, res, req);
}
@@ -253,8 +413,24 @@ router.delete("/errors", async (req, res) => {
const client = await getErrorLogClient();
if (!client) return res.json({ ok: true, cleared: 0 });
const len = await client.lLen(ERROR_LOG_KEY);
await client.del(ERROR_LOG_KEY);
res.json({ ok: true, cleared: len });
// SCAN the hourly counter keys and del them along with the list and
// dropped counter so the admin page comes back to a clean slate.
const hourlyKeys: string[] = [];
let cursor = 0;
do {
const reply = await client.scan(cursor, {
MATCH: `${ERROR_LOG_HOURLY_PREFIX}*`,
COUNT: 100,
});
cursor = Number(reply.cursor);
for (const k of reply.keys) hourlyKeys.push(k);
} while (cursor !== 0);
const pipe = client.multi();
pipe.del(ERROR_LOG_KEY);
pipe.del(ERROR_LOG_DROPPED_KEY);
if (hourlyKeys.length) pipe.del(hourlyKeys);
await pipe.exec();
res.json({ ok: true, cleared: len, hourlyCleared: hourlyKeys.length });
} catch (error) {
handleError(error, res, req);
}
+11 -2
View File
@@ -118,10 +118,19 @@ export function isOwnerCoauthorOrAdmin(repo: Repository, user: User) {
function printError(error: any, req?: express.Request) {
if (error instanceof AnonymousError) {
if (req?.originalUrl === "/api/repo/undefined/options") return;
logger.error("anonymous error", {
const payload = {
...serializeError(error),
url: req?.originalUrl,
});
};
// 4xx are expected client errors (not_found, expired, not_connected) —
// route them to warn so the admin Errors page can split server faults
// (5xx) from client misuse (4xx) cleanly.
const status = error.httpStatus;
if (typeof status === "number" && status >= 400 && status < 500) {
logger.warn("anonymous error", payload);
} else {
logger.error("anonymous error", payload);
}
} else if (error instanceof HTTPError) {
logger.error("http error", {
code: error.code,
+9
View File
@@ -4,6 +4,7 @@ import AnonymizedRepositoryModel from "../core/model/anonymizedRepositories/anon
import ConferenceModel from "../core/model/conference/conferences.model";
import Repository from "../core/Repository";
import { createLogger, serializeError } from "../core/logger";
import { computeAndStoreDailyStats } from "./dailyStatsSnapshot";
const logger = createLogger("schedule");
@@ -54,3 +55,11 @@ export function repositoryStatusCheck() {
});
});
}
export function dailyStatsSnapshot() {
// snapshot home-page stats once per day at 00:05 UTC
schedule.scheduleJob("5 0 * * *", async () => {
logger.info("running daily stats snapshot");
await computeAndStoreDailyStats();
});
}