Improve error dashboard

This commit is contained in:
tdurieux
2026-05-06 16:12:37 +03:00
parent 6f418d6332
commit 873c910dd3
18 changed files with 1606 additions and 318 deletions
+68
View File
@@ -0,0 +1,68 @@
import AnonymizedRepositoryModel from "../core/model/anonymizedRepositories/anonymizedRepositories.model";
import AnonymizedPullRequestModel from "../core/model/anonymizedPullRequests/anonymizedPullRequests.model";
import DailyStatsModel from "../core/model/dailyStats/dailyStats.model";
import { createLogger, serializeError } from "../core/logger";
const logger = createLogger("dailyStats");
export interface HomeStats {
nbRepositories: number;
nbUsers: number;
nbPageViews: number;
nbPullRequests: number;
}
export async function computeStats(): Promise<HomeStats> {
const [nbRepositories, nbUsersAgg, nbPageViews, nbPullRequests] =
await Promise.all([
AnonymizedRepositoryModel.estimatedDocumentCount(),
AnonymizedRepositoryModel.collection
.aggregate([{ $group: { _id: "$owner" } }, { $count: "n" }])
.toArray(),
AnonymizedRepositoryModel.collection
.aggregate([{ $group: { _id: null, total: { $sum: "$pageView" } } }])
.toArray(),
AnonymizedPullRequestModel.estimatedDocumentCount(),
]);
return {
nbRepositories,
nbUsers: (nbUsersAgg[0] as { n?: number } | undefined)?.n || 0,
nbPageViews:
(nbPageViews[0] as { total?: number } | undefined)?.total || 0,
nbPullRequests,
};
}
function utcMidnight(d: Date = new Date()): Date {
return new Date(
Date.UTC(d.getUTCFullYear(), d.getUTCMonth(), d.getUTCDate())
);
}
export async function computeAndStoreDailyStats(): Promise<void> {
try {
const stats = await computeStats();
const date = utcMidnight();
await DailyStatsModel.updateOne(
{ date },
{ $set: { ...stats, date } },
{ upsert: true }
);
logger.info("daily stats snapshot stored", { date, ...stats });
} catch (error) {
logger.error("daily stats snapshot failed", serializeError(error));
}
}
export async function ensureTodaySnapshot(): Promise<void> {
try {
const date = utcMidnight();
const existing = await DailyStatsModel.findOne({ date }).lean();
if (!existing) {
await computeAndStoreDailyStats();
}
} catch (error) {
logger.error("ensureTodaySnapshot failed", serializeError(error));
}
}
+45 -31
View File
@@ -14,10 +14,17 @@ import { connect } from "./database";
import { initSession, router as connectionRouter } from "./routes/connection";
import { bearerTokenAuth } from "./routes/token-auth";
import router from "./routes";
import AnonymizedRepositoryModel from "../core/model/anonymizedRepositories/anonymizedRepositories.model";
import { conferenceStatusCheck, repositoryStatusCheck } from "./schedule";
import {
conferenceStatusCheck,
repositoryStatusCheck,
dailyStatsSnapshot,
} from "./schedule";
import { startWorker, recoverStuckPreparing } from "../queue";
import AnonymizedPullRequestModel from "../core/model/anonymizedPullRequests/anonymizedPullRequests.model";
import {
computeStats,
ensureTodaySnapshot,
} from "./dailyStatsSnapshot";
import DailyStatsModel from "../core/model/dailyStats/dailyStats.model";
import { getUser } from "./routes/route-utils";
import config from "../config";
import { createLogger, serializeError } from "../core/logger";
@@ -186,9 +193,13 @@ export default async function start() {
});
let stat: Record<string, unknown> = {};
let history: Array<Record<string, unknown>> | null = null;
let historyKey: number | null = null;
setInterval(() => {
stat = {};
history = null;
historyKey = null;
}, 1000 * 60 * 60);
apiRouter.get("/healthcheck", async (_, res) => {
@@ -199,37 +210,36 @@ export default async function start() {
res.json(stat);
return;
}
const [nbRepositories, nbUsersAgg, nbPageViews, nbPullRequests] =
await Promise.all([
AnonymizedRepositoryModel.estimatedDocumentCount(),
// Count distinct owners server-side instead of materializing the full
// list of ObjectIds with `.distinct("owner")` only to take its length.
AnonymizedRepositoryModel.collection
.aggregate([
{ $group: { _id: "$owner" } },
{ $count: "n" },
])
.toArray(),
AnonymizedRepositoryModel.collection
.aggregate([
{
$group: { _id: null, total: { $sum: "$pageView" } },
},
])
.toArray(),
AnonymizedPullRequestModel.estimatedDocumentCount(),
]);
stat = {
nbRepositories,
nbUsers: (nbUsersAgg[0] as { n?: number } | undefined)?.n || 0,
nbPageViews: nbPageViews[0]?.total || 0,
nbPullRequests,
};
stat = { ...(await computeStats()) };
res.json(stat);
});
apiRouter.get("/stat/history", async (req, res) => {
const days = Math.min(
Math.max(parseInt(req.query.days as string) || 30, 1),
365
);
if (history && historyKey === days) {
res.json(history);
return;
}
const since = new Date();
since.setUTCDate(since.getUTCDate() - days + 1);
since.setUTCHours(0, 0, 0, 0);
const docs = await DailyStatsModel.find({ date: { $gte: since } })
.sort({ date: 1 })
.lean();
history = docs.map((d) => ({
date: d.date,
nbRepositories: d.nbRepositories,
nbUsers: d.nbUsers,
nbPageViews: d.nbPageViews,
nbPullRequests: d.nbPullRequests,
}));
historyKey = days;
res.json(history);
});
// web view
app.use("/w/", rate, webViewSpeedLimiter, router.webview);
@@ -253,10 +263,14 @@ export default async function start() {
// start schedules
conferenceStatusCheck();
repositoryStatusCheck();
dailyStatsSnapshot();
await connect();
app.listen(config.PORT);
logger.info("server started", { port: config.PORT });
ensureTodaySnapshot().catch((err) =>
logger.error("ensureTodaySnapshot failed", { err })
);
recoverStuckPreparing().catch((err) =>
logger.error("recoverStuckPreparing failed", { err })
);
+183 -7
View File
@@ -10,7 +10,15 @@ import { ensureAuthenticated } from "./connection";
import { handleError, getUser, isOwnerOrAdmin, getRepo } from "./route-utils";
import adminTokensRouter from "./admin-tokens";
import { octokit, getToken } from "../../core/GitHubUtils";
import { createLogger, serializeError, ERROR_LOG_KEY, ERROR_LOG_MAX } from "../../core/logger";
import {
createLogger,
serializeError,
ERROR_LOG_KEY,
ERROR_LOG_MAX,
ERROR_LOG_HOURLY_PREFIX,
ERROR_LOG_DROPPED_KEY,
getInProcessDropped,
} from "../../core/logger";
import { createClient, RedisClientType } from "redis";
import config from "../../config";
@@ -227,14 +235,32 @@ router.get("/queues", async (req, res) => {
});
});
// Errors captured by the logger sink (last ERROR_LOG_MAX entries).
// Errors captured by the logger sink. Server-paginated to avoid pulling
// the full ERROR_LOG_MAX entries on every poll — payloads can be a few KB
// each once detail() enrichment is included.
router.get("/errors", async (req, res) => {
try {
const client = await getErrorLogClient();
if (!client) {
return res.json({ entries: [], max: ERROR_LOG_MAX, available: false });
return res.json({
entries: [],
offset: 0,
limit: 0,
total: 0,
max: ERROR_LOG_MAX,
available: false,
});
}
const raw = await client.lRange(ERROR_LOG_KEY, 0, ERROR_LOG_MAX - 1);
const offset = Math.max(0, parseInt(String(req.query.offset || "0"), 10) || 0);
const limit = Math.min(
ERROR_LOG_MAX,
Math.max(1, parseInt(String(req.query.limit || "250"), 10) || 250)
);
const stop = offset + limit - 1;
const [raw, total] = await Promise.all([
client.lRange(ERROR_LOG_KEY, offset, stop),
client.lLen(ERROR_LOG_KEY),
]);
const entries = raw.map((s) => {
try {
return JSON.parse(s);
@@ -242,7 +268,141 @@ router.get("/errors", async (req, res) => {
return { ts: null, module: null, message: s, raw: [] };
}
});
res.json({ entries, max: ERROR_LOG_MAX, available: true });
res.json({
entries,
offset,
limit,
total,
max: ERROR_LOG_MAX,
available: true,
});
} catch (error) {
handleError(error, res, req);
}
});
// Aggregated stats from the precomputed hourly counters (HINCRBY on each
// persistError). No JSON parsing of stored entries — O(48 small HGETALLs).
router.get("/errors/stats", async (req, res) => {
try {
const client = await getErrorLogClient();
if (!client) {
return res.json({
available: false,
last24h: 0,
prev24h: 0,
severity: { error: 0, warn: 0, info: 0 },
unique: { error: 0, warn: 0, info: 0 },
buckets: [],
dropped: getInProcessDropped(),
});
}
const now = new Date();
// Build the 48 hour keys to fetch (24 for current window + 24 for prev).
function hourKey(d: Date) {
const y = d.getUTCFullYear();
const m = String(d.getUTCMonth() + 1).padStart(2, "0");
const day = String(d.getUTCDate()).padStart(2, "0");
const h = String(d.getUTCHours()).padStart(2, "0");
return `${ERROR_LOG_HOURLY_PREFIX}${y}${m}${day}${h}`;
}
const currentKeys: string[] = [];
const prevKeys: string[] = [];
const bucketHourTs: number[] = [];
for (let i = 23; i >= 0; i--) {
const d = new Date(now.getTime() - i * 3600 * 1000);
// Anchor each bar at the end of its hour so a "9s ago" event lands in
// the rightmost bar.
const anchor = new Date(
Date.UTC(
d.getUTCFullYear(),
d.getUTCMonth(),
d.getUTCDate(),
d.getUTCHours()
)
);
currentKeys.push(hourKey(anchor));
bucketHourTs.push(anchor.getTime() + 3600 * 1000);
}
for (let i = 47; i >= 24; i--) {
const d = new Date(now.getTime() - i * 3600 * 1000);
const anchor = new Date(
Date.UTC(
d.getUTCFullYear(),
d.getUTCMonth(),
d.getUTCDate(),
d.getUTCHours()
)
);
prevKeys.push(hourKey(anchor));
}
const pipe = client.multi();
for (const k of currentKeys) pipe.hGetAll(k);
for (const k of prevKeys) pipe.hGetAll(k);
pipe.get(ERROR_LOG_DROPPED_KEY);
const results = (await pipe.exec()) as unknown[];
const currentHashes = results.slice(0, currentKeys.length) as Record<
string,
string
>[];
const prevHashes = results.slice(
currentKeys.length,
currentKeys.length + prevKeys.length
) as Record<string, string>[];
const droppedRedis =
parseInt(String(results[results.length - 1] || "0"), 10) || 0;
const buckets: {
hour: number;
error: number;
warn: number;
info: number;
}[] = [];
const sev = { error: 0, warn: 0, info: 0 };
const uniqueCodes: Record<"error" | "warn" | "info", Set<string>> = {
error: new Set(),
warn: new Set(),
info: new Set(),
};
let last24h = 0;
currentHashes.forEach((h, i) => {
const flat = h || {};
const e = parseInt(flat["bucket:error"] || "0", 10) || 0;
const w = parseInt(flat["bucket:warn"] || "0", 10) || 0;
const inf = parseInt(flat["bucket:info"] || "0", 10) || 0;
buckets.push({ hour: bucketHourTs[i], error: e, warn: w, info: inf });
sev.error += e;
sev.warn += w;
sev.info += inf;
last24h += parseInt(flat.total || "0", 10) || 0;
// cb:<bucket>:<code> fields → unique code sets.
for (const k of Object.keys(flat)) {
if (!k.startsWith("cb:")) continue;
const sep = k.indexOf(":", 3);
if (sep < 0) continue;
const b = k.slice(3, sep) as "error" | "warn" | "info";
const code = k.slice(sep + 1);
if (b in uniqueCodes) uniqueCodes[b].add(code);
}
});
let prev24h = 0;
for (const h of prevHashes) {
prev24h += parseInt((h || {}).total || "0", 10) || 0;
}
res.json({
available: true,
last24h,
prev24h,
severity: sev,
unique: {
error: uniqueCodes.error.size,
warn: uniqueCodes.warn.size,
info: uniqueCodes.info.size,
},
buckets,
dropped: droppedRedis + getInProcessDropped(),
});
} catch (error) {
handleError(error, res, req);
}
@@ -253,8 +413,24 @@ router.delete("/errors", async (req, res) => {
const client = await getErrorLogClient();
if (!client) return res.json({ ok: true, cleared: 0 });
const len = await client.lLen(ERROR_LOG_KEY);
await client.del(ERROR_LOG_KEY);
res.json({ ok: true, cleared: len });
// SCAN the hourly counter keys and del them along with the list and
// dropped counter so the admin page comes back to a clean slate.
const hourlyKeys: string[] = [];
let cursor = 0;
do {
const reply = await client.scan(cursor, {
MATCH: `${ERROR_LOG_HOURLY_PREFIX}*`,
COUNT: 100,
});
cursor = Number(reply.cursor);
for (const k of reply.keys) hourlyKeys.push(k);
} while (cursor !== 0);
const pipe = client.multi();
pipe.del(ERROR_LOG_KEY);
pipe.del(ERROR_LOG_DROPPED_KEY);
if (hourlyKeys.length) pipe.del(hourlyKeys);
await pipe.exec();
res.json({ ok: true, cleared: len, hourlyCleared: hourlyKeys.length });
} catch (error) {
handleError(error, res, req);
}
+11 -2
View File
@@ -118,10 +118,19 @@ export function isOwnerCoauthorOrAdmin(repo: Repository, user: User) {
function printError(error: any, req?: express.Request) {
if (error instanceof AnonymousError) {
if (req?.originalUrl === "/api/repo/undefined/options") return;
logger.error("anonymous error", {
const payload = {
...serializeError(error),
url: req?.originalUrl,
});
};
// 4xx are expected client errors (not_found, expired, not_connected) —
// route them to warn so the admin Errors page can split server faults
// (5xx) from client misuse (4xx) cleanly.
const status = error.httpStatus;
if (typeof status === "number" && status >= 400 && status < 500) {
logger.warn("anonymous error", payload);
} else {
logger.error("anonymous error", payload);
}
} else if (error instanceof HTTPError) {
logger.error("http error", {
code: error.code,
+9
View File
@@ -4,6 +4,7 @@ import AnonymizedRepositoryModel from "../core/model/anonymizedRepositories/anon
import ConferenceModel from "../core/model/conference/conferences.model";
import Repository from "../core/Repository";
import { createLogger, serializeError } from "../core/logger";
import { computeAndStoreDailyStats } from "./dailyStatsSnapshot";
const logger = createLogger("schedule");
@@ -54,3 +55,11 @@ export function repositoryStatusCheck() {
});
});
}
export function dailyStatsSnapshot() {
// snapshot home-page stats once per day at 00:05 UTC
schedule.scheduleJob("5 0 * * *", async () => {
logger.info("running daily stats snapshot");
await computeAndStoreDailyStats();
});
}