feat: add dashboard query functions — pure transforms for team analytics

6 functions: detectRegressions, computeVelocity, computeCostTrend, computeLeaderboard, computeQATrend, computeEvalTrend. All pure, no I/O, with division-by-zero guards. 28 tests. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-05-05 13:15:24 +02:00 · 2026-03-16 02:43:52 -05:00
parent 6e14689f0e
commit e969c6dadf
2 changed files with 811 additions and 0 deletions
@@ -0,0 +1,368 @@
+/**
+ * Dashboard query/transform functions — pure, no I/O.
+ *
+ * All functions take arrays of Supabase rows (Record<string, unknown>[])
+ * and return structured results. Used by both the CLI leaderboard
+ * and the shared HTML dashboard.
+ */
+
+// --- Types ---
+
+export interface RegressionEntry {
+  testName: string;
+  previousRate: number;
+  currentRate: number;
+  delta: number;
+}
+
+export interface RegressionResult {
+  regressions: RegressionEntry[];
+  overallPreviousRate: number | null;
+  overallCurrentRate: number | null;
+  overallDelta: number;
+}
+
+export interface VelocityByUser {
+  userId: string;
+  email: string;
+  shipsThisWeek: number;
+  shipsThisMonth: number;
+}
+
+export interface VelocityResult {
+  byUser: VelocityByUser[];
+  teamTotal: { week: number; month: number };
+}
+
+export interface CostWeek {
+  weekStart: string;
+  totalCost: number;
+  runs: number;
+}
+
+export interface CostTrendResult {
+  weekly: CostWeek[];
+  totalAllTime: number;
+}
+
+export interface LeaderboardEntry {
+  userId: string;
+  email: string;
+  ships: number;
+  evalRuns: number;
+  sessions: number;
+  avgPassRate: number | null;
+  totalCost: number;
+}
+
+export interface QARepoTrend {
+  repoSlug: string;
+  scores: Array<{ date: string; score: number }>;
+}
+
+export interface QATrendResult {
+  byRepo: QARepoTrend[];
+}
+
+export interface EvalTestTrend {
+  testName: string;
+  history: Array<{ timestamp: string; passed: boolean }>;
+  passRate: number;
+  isFlaky: boolean;
+}
+
+export interface EvalTrendResult {
+  byTest: EvalTestTrend[];
+}
+
+// --- Helpers ---
+
+function safePassRate(passed: unknown, total: unknown): number | null {
+  const p = Number(passed) || 0;
+  const t = Number(total) || 0;
+  return t > 0 ? (p / t) * 100 : null;
+}
+
+function weekStart(date: Date): string {
+  const d = new Date(date);
+  d.setUTCDate(d.getUTCDate() - d.getUTCDay());
+  d.setUTCHours(0, 0, 0, 0);
+  return d.toISOString().slice(0, 10);
+}
+
+function daysAgo(days: number): string {
+  return new Date(Date.now() - days * 86_400_000).toISOString();
+}
+
+// --- Query functions ---
+
+/**
+ * Detect eval regressions by comparing the most recent run's pass rate
+ * against the average of the previous runs.
+ */
+export function detectRegressions(evalRuns: Record<string, unknown>[]): RegressionResult {
+  if (evalRuns.length < 2) {
+    return { regressions: [], overallPreviousRate: null, overallCurrentRate: null, overallDelta: 0 };
+  }
+
+  // Runs should be sorted by timestamp desc (newest first)
+  const latest = evalRuns[0];
+  const previous = evalRuns.slice(1);
+
+  const currentRate = safePassRate(latest.passed, latest.total_tests);
+  const previousRates = previous
+    .map(r => safePassRate(r.passed, r.total_tests))
+    .filter((r): r is number => r !== null);
+
+  const previousAvg = previousRates.length > 0
+    ? previousRates.reduce((a, b) => a + b, 0) / previousRates.length
+    : null;
+
+  const overallDelta = (currentRate !== null && previousAvg !== null)
+    ? currentRate - previousAvg
+    : 0;
+
+  // Per-test regression detection
+  const regressions: RegressionEntry[] = [];
+  const latestTests = (latest.tests as any[]) || [];
+  const previousTests = previous.flatMap(r => (r.tests as any[]) || []);
+
+  // Group previous test results by name
+  const previousByName = new Map<string, boolean[]>();
+  for (const t of previousTests) {
+    if (!t.name) continue;
+    const arr = previousByName.get(t.name) || [];
+    arr.push(!!t.passed);
+    previousByName.set(t.name, arr);
+  }
+
+  for (const t of latestTests) {
+    if (!t.name || t.passed) continue; // only look at failures
+    const prevResults = previousByName.get(t.name);
+    if (!prevResults || prevResults.length === 0) continue;
+
+    const prevPassRate = (prevResults.filter(Boolean).length / prevResults.length) * 100;
+    if (prevPassRate > 50) {
+      // Was passing >50% of the time, now failed
+      regressions.push({
+        testName: t.name,
+        previousRate: prevPassRate,
+        currentRate: 0,
+        delta: -prevPassRate,
+      });
+    }
+  }
+
+  return {
+    regressions,
+    overallPreviousRate: previousAvg,
+    overallCurrentRate: currentRate,
+    overallDelta,
+  };
+}
+
+/**
+ * Compute shipping velocity grouped by user.
+ */
+export function computeVelocity(shipLogs: Record<string, unknown>[], windowDays = 30): VelocityResult {
+  const weekAgo = daysAgo(7);
+  const monthAgo = daysAgo(windowDays);
+
+  const byUser = new Map<string, { email: string; week: number; month: number }>();
+
+  for (const log of shipLogs) {
+    const ts = String(log.created_at || log.timestamp || '');
+    const userId = String(log.user_id || 'unknown');
+    const email = String(log.email || log.user_id || 'unknown');
+
+    if (!byUser.has(userId)) {
+      byUser.set(userId, { email, week: 0, month: 0 });
+    }
+    const entry = byUser.get(userId)!;
+
+    if (ts >= monthAgo) entry.month++;
+    if (ts >= weekAgo) entry.week++;
+  }
+
+  const sorted = [...byUser.entries()]
+    .map(([userId, data]) => ({
+      userId,
+      email: data.email,
+      shipsThisWeek: data.week,
+      shipsThisMonth: data.month,
+    }))
+    .sort((a, b) => b.shipsThisWeek - a.shipsThisWeek || b.shipsThisMonth - a.shipsThisMonth);
+
+  const teamWeek = sorted.reduce((s, u) => s + u.shipsThisWeek, 0);
+  const teamMonth = sorted.reduce((s, u) => s + u.shipsThisMonth, 0);
+
+  return {
+    byUser: sorted,
+    teamTotal: { week: teamWeek, month: teamMonth },
+  };
+}
+
+/**
+ * Compute weekly cost trend from eval runs.
+ */
+export function computeCostTrend(evalRuns: Record<string, unknown>[]): CostTrendResult {
+  const byWeek = new Map<string, { cost: number; runs: number }>();
+
+  for (const run of evalRuns) {
+    const ts = run.timestamp || run.created_at;
+    if (!ts) continue;
+
+    const ws = weekStart(new Date(String(ts)));
+    const entry = byWeek.get(ws) || { cost: 0, runs: 0 };
+    entry.cost += Number(run.total_cost_usd) || 0;
+    entry.runs++;
+    byWeek.set(ws, entry);
+  }
+
+  const weekly = [...byWeek.entries()]
+    .map(([ws, data]) => ({ weekStart: ws, totalCost: data.cost, runs: data.runs }))
+    .sort((a, b) => b.weekStart.localeCompare(a.weekStart));
+
+  const totalAllTime = evalRuns.reduce((s, r) => s + (Number(r.total_cost_usd) || 0), 0);
+
+  return { weekly, totalAllTime };
+}
+
+/**
+ * Compute team leaderboard for the current week.
+ */
+export function computeLeaderboard(opts: {
+  evalRuns: Record<string, unknown>[];
+  shipLogs: Record<string, unknown>[];
+  sessions: Record<string, unknown>[];
+}): LeaderboardEntry[] {
+  const { evalRuns, shipLogs, sessions } = opts;
+  const weekAgo = daysAgo(7);
+
+  const users = new Map<string, LeaderboardEntry>();
+
+  function getUser(userId: string, email: string): LeaderboardEntry {
+    if (!users.has(userId)) {
+      users.set(userId, { userId, email, ships: 0, evalRuns: 0, sessions: 0, avgPassRate: null, totalCost: 0 });
+    }
+    return users.get(userId)!;
+  }
+
+  // Count eval runs this week
+  const passRates = new Map<string, number[]>();
+  for (const r of evalRuns) {
+    const ts = String(r.timestamp || r.created_at || '');
+    if (ts < weekAgo) continue;
+    const userId = String(r.user_id || 'unknown');
+    const email = String(r.email || r.user_id || 'unknown');
+    const user = getUser(userId, email);
+    user.evalRuns++;
+    user.totalCost += Number(r.total_cost_usd) || 0;
+
+    const rate = safePassRate(r.passed, r.total_tests);
+    if (rate !== null) {
+      const arr = passRates.get(userId) || [];
+      arr.push(rate);
+      passRates.set(userId, arr);
+    }
+  }
+
+  // Count ships this week
+  for (const log of shipLogs) {
+    const ts = String(log.created_at || log.timestamp || '');
+    if (ts < weekAgo) continue;
+    const userId = String(log.user_id || 'unknown');
+    const email = String(log.email || log.user_id || 'unknown');
+    const user = getUser(userId, email);
+    user.ships++;
+  }
+
+  // Count sessions this week
+  for (const s of sessions) {
+    const ts = String(s.started_at || s.created_at || '');
+    if (ts < weekAgo) continue;
+    const userId = String(s.user_id || 'unknown');
+    const email = String(s.email || s.user_id || 'unknown');
+    const user = getUser(userId, email);
+    user.sessions++;
+  }
+
+  // Compute avg pass rates
+  for (const [userId, rates] of passRates) {
+    const user = users.get(userId);
+    if (user && rates.length > 0) {
+      user.avgPassRate = rates.reduce((a, b) => a + b, 0) / rates.length;
+    }
+  }
+
+  // Sort by ships (primary), then eval runs, then sessions
+  return [...users.values()].sort((a, b) =>
+    b.ships - a.ships || b.evalRuns - a.evalRuns || b.sessions - a.sessions
+  );
+}
+
+/**
+ * Compute QA health score trends grouped by repo.
+ */
+export function computeQATrend(qaReports: Record<string, unknown>[]): QATrendResult {
+  const byRepo = new Map<string, Array<{ date: string; score: number }>>();
+
+  for (const r of qaReports) {
+    const repoSlug = String(r.repo_slug || 'unknown');
+    const date = String(r.created_at || '').slice(0, 10);
+    const score = Number(r.health_score) || 0;
+
+    if (!byRepo.has(repoSlug)) byRepo.set(repoSlug, []);
+    byRepo.get(repoSlug)!.push({ date, score });
+  }
+
+  // Sort each repo's scores by date descending
+  const result: QARepoTrend[] = [];
+  for (const [repoSlug, scores] of byRepo) {
+    scores.sort((a, b) => b.date.localeCompare(a.date));
+    result.push({ repoSlug, scores });
+  }
+
+  return { byRepo: result.sort((a, b) => a.repoSlug.localeCompare(b.repoSlug)) };
+}
+
+/**
+ * Compute per-test pass rate trends and flaky test detection.
+ */
+export function computeEvalTrend(evalRuns: Record<string, unknown>[]): EvalTrendResult {
+  const byTest = new Map<string, Array<{ timestamp: string; passed: boolean }>>();
+
+  // Runs should be sorted by timestamp desc; we process all of them
+  for (const run of evalRuns) {
+    const ts = String(run.timestamp || run.created_at || '');
+    const tests = (run.tests as any[]) || [];
+
+    for (const t of tests) {
+      if (!t.name) continue;
+      if (!byTest.has(t.name)) byTest.set(t.name, []);
+      byTest.get(t.name)!.push({ timestamp: ts, passed: !!t.passed });
+    }
+  }
+
+  const result: EvalTestTrend[] = [];
+  for (const [testName, history] of byTest) {
+    // Sort by timestamp ascending for trend display
+    history.sort((a, b) => a.timestamp.localeCompare(b.timestamp));
+
+    const passCount = history.filter(h => h.passed).length;
+    const passRate = history.length > 0 ? (passCount / history.length) * 100 : 0;
+
+    // Flaky = has both passes and failures, and pass rate between 20-80%
+    const isFlaky = history.length >= 3 && passRate > 20 && passRate < 80;
+
+    result.push({ testName, history, passRate, isFlaky });
+  }
+
+  // Sort: flaky first, then by pass rate ascending (worst first)
+  return {
+    byTest: result.sort((a, b) => {
+      if (a.isFlaky !== b.isFlaky) return a.isFlaky ? -1 : 1;
+      return a.passRate - b.passRate;
+    }),
+  };
+}
@@ -0,0 +1,443 @@
+/**
+ * Tests for dashboard query/transform functions (pure, no network).
+ */
+
+import { describe, test, expect } from 'bun:test';
+import {
+  detectRegressions,
+  computeVelocity,
+  computeCostTrend,
+  computeLeaderboard,
+  computeQATrend,
+  computeEvalTrend,
+} from '../lib/dashboard-queries';
+
+// --- Helpers ---
+
+const now = new Date().toISOString();
+const daysAgo = (d: number) => new Date(Date.now() - d * 86_400_000).toISOString();
+const hoursAgo = (h: number) => new Date(Date.now() - h * 3_600_000).toISOString();
+
+function makeEvalRun(overrides: Record<string, unknown> = {}) {
+  return {
+    timestamp: now,
+    user_id: 'u1',
+    email: 'alice@test.com',
+    branch: 'main',
+    passed: 8,
+    total_tests: 10,
+    total_cost_usd: 1.50,
+    tier: 'e2e',
+    tests: [],
+    ...overrides,
+  };
+}
+
+function makeShipLog(overrides: Record<string, unknown> = {}) {
+  return {
+    created_at: now,
+    user_id: 'u1',
+    email: 'alice@test.com',
+    version: '0.3.10',
+    branch: 'main',
+    pr_url: 'https://github.com/org/repo/pull/1',
+    ...overrides,
+  };
+}
+
+function makeSession(overrides: Record<string, unknown> = {}) {
+  return {
+    started_at: now,
+    ended_at: now,
+    user_id: 'u1',
+    email: 'alice@test.com',
+    repo_slug: 'org/repo',
+    total_turns: 10,
+    tools_used: ['Edit', 'Bash'],
+    summary: 'Did stuff',
+    ...overrides,
+  };
+}
+
+// --- detectRegressions ---
+
+describe('detectRegressions', () => {
+  test('returns empty for < 2 runs', () => {
+    const result = detectRegressions([makeEvalRun()]);
+    expect(result.regressions).toEqual([]);
+    expect(result.overallDelta).toBe(0);
+    expect(result.overallCurrentRate).toBeNull();
+  });
+
+  test('returns empty for empty array', () => {
+    const result = detectRegressions([]);
+    expect(result.regressions).toEqual([]);
+  });
+
+  test('detects overall regression', () => {
+    const runs = [
+      makeEvalRun({ passed: 5, total_tests: 10 }), // latest: 50%
+      makeEvalRun({ passed: 9, total_tests: 10, timestamp: daysAgo(1) }), // prev: 90%
+      makeEvalRun({ passed: 8, total_tests: 10, timestamp: daysAgo(2) }), // prev: 80%
+    ];
+    const result = detectRegressions(runs);
+    expect(result.overallCurrentRate).toBe(50);
+    expect(result.overallPreviousRate).toBe(85); // avg of 90 and 80
+    expect(result.overallDelta).toBe(-35);
+  });
+
+  test('detects per-test regressions', () => {
+    const runs = [
+      makeEvalRun({ passed: 1, total_tests: 2, tests: [
+        { name: 'test_a', passed: false },
+        { name: 'test_b', passed: true },
+      ]}),
+      makeEvalRun({ passed: 2, total_tests: 2, timestamp: daysAgo(1), tests: [
+        { name: 'test_a', passed: true },
+        { name: 'test_b', passed: true },
+      ]}),
+      makeEvalRun({ passed: 2, total_tests: 2, timestamp: daysAgo(2), tests: [
+        { name: 'test_a', passed: true },
+        { name: 'test_b', passed: true },
+      ]}),
+    ];
+    const result = detectRegressions(runs);
+    expect(result.regressions.length).toBe(1);
+    expect(result.regressions[0].testName).toBe('test_a');
+    expect(result.regressions[0].previousRate).toBe(100);
+    expect(result.regressions[0].currentRate).toBe(0);
+  });
+
+  test('handles total_tests = 0 gracefully', () => {
+    const runs = [
+      makeEvalRun({ passed: 0, total_tests: 0 }),
+      makeEvalRun({ passed: 5, total_tests: 10, timestamp: daysAgo(1) }),
+    ];
+    const result = detectRegressions(runs);
+    expect(result.overallCurrentRate).toBeNull();
+    expect(result.overallDelta).toBe(0);
+  });
+
+  test('no regression when pass rate improves', () => {
+    const runs = [
+      makeEvalRun({ passed: 10, total_tests: 10 }), // 100%
+      makeEvalRun({ passed: 5, total_tests: 10, timestamp: daysAgo(1) }), // 50%
+    ];
+    const result = detectRegressions(runs);
+    expect(result.overallDelta).toBe(50);
+    expect(result.regressions).toEqual([]);
+  });
+});
+
+// --- computeVelocity ---
+
+describe('computeVelocity', () => {
+  test('groups ships by user', () => {
+    const logs = [
+      makeShipLog({ user_id: 'u1', email: 'alice@test.com', created_at: hoursAgo(1) }),
+      makeShipLog({ user_id: 'u1', email: 'alice@test.com', created_at: hoursAgo(2) }),
+      makeShipLog({ user_id: 'u2', email: 'bob@test.com', created_at: hoursAgo(3) }),
+    ];
+    const result = computeVelocity(logs);
+
+    expect(result.teamTotal.week).toBe(3);
+    expect(result.byUser.length).toBe(2);
+    expect(result.byUser[0].email).toBe('alice@test.com');
+    expect(result.byUser[0].shipsThisWeek).toBe(2);
+    expect(result.byUser[1].email).toBe('bob@test.com');
+    expect(result.byUser[1].shipsThisWeek).toBe(1);
+  });
+
+  test('separates week from month', () => {
+    const logs = [
+      makeShipLog({ created_at: hoursAgo(1) }),       // this week
+      makeShipLog({ created_at: daysAgo(10) }),        // this month
+      makeShipLog({ created_at: daysAgo(20) }),        // this month
+    ];
+    const result = computeVelocity(logs);
+
+    expect(result.teamTotal.week).toBe(1);
+    expect(result.teamTotal.month).toBe(3);
+  });
+
+  test('handles empty array', () => {
+    const result = computeVelocity([]);
+    expect(result.byUser).toEqual([]);
+    expect(result.teamTotal).toEqual({ week: 0, month: 0 });
+  });
+
+  test('sorts by weekly ships descending', () => {
+    const logs = [
+      makeShipLog({ user_id: 'u1', created_at: hoursAgo(1) }),
+      makeShipLog({ user_id: 'u2', created_at: hoursAgo(1) }),
+      makeShipLog({ user_id: 'u2', created_at: hoursAgo(2) }),
+      makeShipLog({ user_id: 'u2', created_at: hoursAgo(3) }),
+    ];
+    const result = computeVelocity(logs);
+    expect(result.byUser[0].userId).toBe('u2');
+    expect(result.byUser[0].shipsThisWeek).toBe(3);
+  });
+});
+
+// --- computeCostTrend ---
+
+describe('computeCostTrend', () => {
+  test('groups costs by week', () => {
+    const runs = [
+      makeEvalRun({ total_cost_usd: 2.00, timestamp: '2026-03-16T12:00:00Z' }), // Mon
+      makeEvalRun({ total_cost_usd: 3.00, timestamp: '2026-03-17T12:00:00Z' }), // Tue (same week)
+      makeEvalRun({ total_cost_usd: 1.50, timestamp: '2026-03-08T12:00:00Z' }), // prev week
+    ];
+    const result = computeCostTrend(runs);
+
+    expect(result.totalAllTime).toBe(6.50);
+    expect(result.weekly.length).toBe(2);
+    // Most recent week first
+    const firstWeek = result.weekly[0];
+    expect(firstWeek.runs).toBe(2);
+    expect(firstWeek.totalCost).toBe(5.00);
+  });
+
+  test('handles empty array', () => {
+    const result = computeCostTrend([]);
+    expect(result.weekly).toEqual([]);
+    expect(result.totalAllTime).toBe(0);
+  });
+
+  test('handles missing cost values', () => {
+    const runs = [
+      makeEvalRun({ total_cost_usd: undefined }),
+      makeEvalRun({ total_cost_usd: null }),
+    ];
+    const result = computeCostTrend(runs);
+    expect(result.totalAllTime).toBe(0);
+  });
+});
+
+// --- computeLeaderboard ---
+
+describe('computeLeaderboard', () => {
+  test('aggregates across data sources', () => {
+    const result = computeLeaderboard({
+      evalRuns: [
+        makeEvalRun({ user_id: 'u1', email: 'alice@test.com', passed: 8, total_tests: 10 }),
+        makeEvalRun({ user_id: 'u1', email: 'alice@test.com', passed: 10, total_tests: 10 }),
+      ],
+      shipLogs: [
+        makeShipLog({ user_id: 'u1', email: 'alice@test.com' }),
+      ],
+      sessions: [
+        makeSession({ user_id: 'u1', email: 'alice@test.com' }),
+        makeSession({ user_id: 'u1', email: 'alice@test.com' }),
+      ],
+    });
+
+    expect(result.length).toBe(1);
+    expect(result[0].email).toBe('alice@test.com');
+    expect(result[0].ships).toBe(1);
+    expect(result[0].evalRuns).toBe(2);
+    expect(result[0].sessions).toBe(2);
+    expect(result[0].avgPassRate).toBe(90); // avg of 80% and 100%
+    expect(result[0].totalCost).toBe(3.00);
+  });
+
+  test('sorts by ships, then eval runs, then sessions', () => {
+    const result = computeLeaderboard({
+      evalRuns: [
+        makeEvalRun({ user_id: 'u1', email: 'alice@test.com' }),
+      ],
+      shipLogs: [
+        makeShipLog({ user_id: 'u2', email: 'bob@test.com' }),
+        makeShipLog({ user_id: 'u2', email: 'bob@test.com' }),
+      ],
+      sessions: [],
+    });
+
+    expect(result[0].email).toBe('bob@test.com');
+    expect(result[0].ships).toBe(2);
+    expect(result[1].email).toBe('alice@test.com');
+  });
+
+  test('excludes data older than 7 days', () => {
+    const result = computeLeaderboard({
+      evalRuns: [
+        makeEvalRun({ user_id: 'u1', timestamp: daysAgo(10) }),
+      ],
+      shipLogs: [
+        makeShipLog({ user_id: 'u1', created_at: daysAgo(10) }),
+      ],
+      sessions: [
+        makeSession({ user_id: 'u1', started_at: daysAgo(10) }),
+      ],
+    });
+
+    expect(result.length).toBe(0);
+  });
+
+  test('handles all empty inputs', () => {
+    const result = computeLeaderboard({
+      evalRuns: [],
+      shipLogs: [],
+      sessions: [],
+    });
+    expect(result).toEqual([]);
+  });
+
+  test('handles eval runs with total_tests = 0', () => {
+    const result = computeLeaderboard({
+      evalRuns: [makeEvalRun({ passed: 0, total_tests: 0 })],
+      shipLogs: [],
+      sessions: [],
+    });
+    expect(result.length).toBe(1);
+    expect(result[0].avgPassRate).toBeNull();
+  });
+
+  test('multiple users sorted correctly with ties', () => {
+    const result = computeLeaderboard({
+      evalRuns: [
+        makeEvalRun({ user_id: 'u1', email: 'alice@test.com' }),
+        makeEvalRun({ user_id: 'u2', email: 'bob@test.com' }),
+      ],
+      shipLogs: [
+        makeShipLog({ user_id: 'u1', email: 'alice@test.com' }),
+        makeShipLog({ user_id: 'u2', email: 'bob@test.com' }),
+      ],
+      sessions: [
+        makeSession({ user_id: 'u1', email: 'alice@test.com' }),
+        makeSession({ user_id: 'u1', email: 'alice@test.com' }),
+        makeSession({ user_id: 'u2', email: 'bob@test.com' }),
+      ],
+    });
+
+    // Same ships (1), same eval runs (1), u1 has more sessions
+    expect(result[0].email).toBe('alice@test.com');
+    expect(result[1].email).toBe('bob@test.com');
+  });
+});
+
+// --- computeQATrend ---
+
+describe('computeQATrend', () => {
+  test('groups scores by repo', () => {
+    const reports = [
+      { repo_slug: 'org/app', health_score: 85, created_at: '2026-03-15T12:00:00Z' },
+      { repo_slug: 'org/app', health_score: 90, created_at: '2026-03-14T12:00:00Z' },
+      { repo_slug: 'org/api', health_score: 70, created_at: '2026-03-15T12:00:00Z' },
+    ];
+    const result = computeQATrend(reports);
+
+    expect(result.byRepo.length).toBe(2);
+    const app = result.byRepo.find(r => r.repoSlug === 'org/app')!;
+    expect(app.scores.length).toBe(2);
+    // Most recent first
+    expect(app.scores[0].score).toBe(85);
+    expect(app.scores[1].score).toBe(90);
+  });
+
+  test('handles empty array', () => {
+    const result = computeQATrend([]);
+    expect(result.byRepo).toEqual([]);
+  });
+
+  test('handles missing health_score', () => {
+    const reports = [
+      { repo_slug: 'org/app', health_score: null, created_at: '2026-03-15T12:00:00Z' },
+    ];
+    const result = computeQATrend(reports);
+    expect(result.byRepo[0].scores[0].score).toBe(0);
+  });
+});
+
+// --- computeEvalTrend ---
+
+describe('computeEvalTrend', () => {
+  test('computes per-test pass rates', () => {
+    const runs = [
+      makeEvalRun({ timestamp: '2026-03-15T12:00:00Z', tests: [
+        { name: 'test_a', passed: true },
+        { name: 'test_b', passed: false },
+      ]}),
+      makeEvalRun({ timestamp: '2026-03-14T12:00:00Z', tests: [
+        { name: 'test_a', passed: true },
+        { name: 'test_b', passed: true },
+      ]}),
+    ];
+    const result = computeEvalTrend(runs);
+
+    const testA = result.byTest.find(t => t.testName === 'test_a')!;
+    expect(testA.passRate).toBe(100);
+    expect(testA.isFlaky).toBe(false);
+
+    const testB = result.byTest.find(t => t.testName === 'test_b')!;
+    expect(testB.passRate).toBe(50);
+  });
+
+  test('detects flaky tests', () => {
+    const runs = [
+      makeEvalRun({ timestamp: '2026-03-15T12:00:00Z', tests: [{ name: 'flaky', passed: true }] }),
+      makeEvalRun({ timestamp: '2026-03-14T12:00:00Z', tests: [{ name: 'flaky', passed: false }] }),
+      makeEvalRun({ timestamp: '2026-03-13T12:00:00Z', tests: [{ name: 'flaky', passed: true }] }),
+      makeEvalRun({ timestamp: '2026-03-12T12:00:00Z', tests: [{ name: 'flaky', passed: false }] }),
+    ];
+    const result = computeEvalTrend(runs);
+    const flaky = result.byTest.find(t => t.testName === 'flaky')!;
+    expect(flaky.isFlaky).toBe(true);
+    expect(flaky.passRate).toBe(50);
+  });
+
+  test('sorts flaky first, then by worst pass rate', () => {
+    const runs = [
+      makeEvalRun({ tests: [
+        { name: 'good', passed: true },
+        { name: 'flaky', passed: true },
+        { name: 'bad', passed: false },
+      ]}),
+      makeEvalRun({ timestamp: daysAgo(1), tests: [
+        { name: 'good', passed: true },
+        { name: 'flaky', passed: false },
+        { name: 'bad', passed: false },
+      ]}),
+      makeEvalRun({ timestamp: daysAgo(2), tests: [
+        { name: 'good', passed: true },
+        { name: 'flaky', passed: true },
+        { name: 'bad', passed: false },
+      ]}),
+    ];
+    const result = computeEvalTrend(runs);
+
+    // Flaky (50% pass rate, has both passes and failures across 3+ runs) should be first
+    expect(result.byTest[0].testName).toBe('flaky');
+    // Then bad (0%), then good (100%)
+    expect(result.byTest[1].testName).toBe('bad');
+    expect(result.byTest[2].testName).toBe('good');
+  });
+
+  test('handles empty array', () => {
+    const result = computeEvalTrend([]);
+    expect(result.byTest).toEqual([]);
+  });
+
+  test('handles tests without names', () => {
+    const runs = [
+      makeEvalRun({ tests: [{ passed: true }, { name: 'named', passed: true }] }),
+    ];
+    const result = computeEvalTrend(runs);
+    expect(result.byTest.length).toBe(1);
+    expect(result.byTest[0].testName).toBe('named');
+  });
+
+  test('history sorted ascending by timestamp', () => {
+    const runs = [
+      makeEvalRun({ timestamp: '2026-03-15T12:00:00Z', tests: [{ name: 'a', passed: true }] }),
+      makeEvalRun({ timestamp: '2026-03-13T12:00:00Z', tests: [{ name: 'a', passed: false }] }),
+      makeEvalRun({ timestamp: '2026-03-14T12:00:00Z', tests: [{ name: 'a', passed: true }] }),
+    ];
+    const result = computeEvalTrend(runs);
+    const a = result.byTest.find(t => t.testName === 'a')!;
+    // Should be sorted ascending: 13, 14, 15
+    expect(a.history[0].timestamp).toContain('2026-03-13');
+    expect(a.history[1].timestamp).toContain('2026-03-14');
+    expect(a.history[2].timestamp).toContain('2026-03-15');
+  });
+});