feat: add CLI leaderboard, refactor formatTeamSummary to use dashboard-queries

New `gstack eval leaderboard` subcommand pulls team data and renders weekly stats per contributor. Refactored formatTeamSummary to use computeVelocity from dashboard-queries (DRY). 4 new tests. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-05-05 21:25:27 +02:00 · 2026-03-16 02:44:12 -05:00
parent e969c6dadf
commit 4985c8e7e9
3 changed files with 135 additions and 5 deletions
@@ -29,6 +29,7 @@ import {
 } from '../test/helpers/eval-store';
 import type { EvalResult } from '../test/helpers/eval-store';
 import type { ComparisonResult } from '../test/helpers/eval-store';
+import { computeLeaderboard, type LeaderboardEntry } from './dashboard-queries';

 // --- ANSI color helpers ---

@@ -636,6 +637,74 @@ async function cmdTrend(args: string[]): Promise<void> {
  console.log('');
 }

+// --- Leaderboard ---
+
+/** Format leaderboard entries as a terminal table. Pure function for testing. */
+export function formatLeaderboard(entries: LeaderboardEntry[]): string {
+  if (entries.length === 0) return 'No activity this week.\n';
+
+  const lines: string[] = [];
+  lines.push('');
+  lines.push('Team Leaderboard (this week)');
+  lines.push('═'.repeat(85));
+  lines.push(
+    '  ' +
+    '#'.padEnd(4) +
+    'Who'.padEnd(22) +
+    'Ships'.padEnd(8) +
+    'Evals'.padEnd(8) +
+    'Sessions'.padEnd(10) +
+    'Pass Rate'.padEnd(12) +
+    'Cost'
+  );
+  lines.push('─'.repeat(85));
+
+  for (let i = 0; i < entries.length; i++) {
+    const e = entries[i];
+    const rank = `${i + 1}.`.padEnd(4);
+    const who = (e.email || e.userId).slice(0, 20).padEnd(22);
+    const ships = String(e.ships).padEnd(8);
+    const evals = String(e.evalRuns).padEnd(8);
+    const sessions = String(e.sessions).padEnd(10);
+    const rate = e.avgPassRate !== null ? `${e.avgPassRate.toFixed(0)}%`.padEnd(12) : '—'.padEnd(12);
+    const cost = `$${e.totalCost.toFixed(2)}`;
+    lines.push(`  ${rank}${who}${ships}${evals}${sessions}${rate}${cost}`);
+  }
+
+  lines.push('─'.repeat(85));
+  const totalShips = entries.reduce((s, e) => s + e.ships, 0);
+  const totalEvals = entries.reduce((s, e) => s + e.evalRuns, 0);
+  const totalCost = entries.reduce((s, e) => s + e.totalCost, 0);
+  lines.push(`  ${entries.length} contributors | ${totalShips} ships | ${totalEvals} eval runs | $${totalCost.toFixed(2)} spent`);
+  lines.push('');
+  return lines.join('\n');
+}
+
+async function cmdLeaderboard(args: string[]): Promise<void> {
+  try {
+    const { isSyncConfigured } = await import('./sync-config');
+    const { pullTable } = await import('./sync');
+
+    if (!isSyncConfigured()) {
+      console.log('Team sync not configured. Run: gstack sync setup');
+      console.log('See: docs/TEAM_SYNC_SETUP.md');
+      return;
+    }
+
+    const [evalRuns, shipLogs, sessions] = await Promise.all([
+      pullTable('eval_runs'),
+      pullTable('ship_logs'),
+      pullTable('session_transcripts'),
+    ]);
+
+    const entries = computeLeaderboard({ evalRuns, shipLogs, sessions });
+    console.log(formatLeaderboard(entries));
+  } catch (err: any) {
+    console.error(`Failed to load team data: ${err.message}`);
+    process.exit(1);
+  }
+}
+
 function printUsage(): void {
  console.log(`
 gstack eval — eval management CLI
@@ -649,6 +718,7 @@ Commands:
  push <file>                                 Validate + save + sync an eval result
  cost <file>                                 Show per-model cost breakdown
  trend [--limit N] [--tier X] [--test X] [--team]  Per-test pass rate trends
+  leaderboard                                 Weekly team leaderboard
  cache read|write|stats|clear|verify         Manage eval cache
  watch                                       Live E2E test dashboard
 `);
@@ -666,8 +736,9 @@ switch (command) {
  case 'summary': cmdSummary(cmdArgs); break;
  case 'push':    cmdPush(cmdArgs); break;
  case 'cost':    cmdCost(cmdArgs); break;
-  case 'trend':   cmdTrend(cmdArgs); break;
-  case 'cache':   cmdCache(cmdArgs); break;
+  case 'trend':       cmdTrend(cmdArgs); break;
+  case 'leaderboard': cmdLeaderboard(cmdArgs); break;
+  case 'cache':       cmdCache(cmdArgs); break;
  case 'watch':   cmdWatch(); break;
  case '--help': case '-h': case 'help': case undefined:
    printUsage();
@@ -10,6 +10,7 @@ import { runDeviceAuth } from './auth';
 import { pushEvalRun, pushRetro, pushQAReport, pushShipLog, pushGreptileTriage, pushHeartbeat, pullTable, pullTranscripts, drainQueue, getSyncStatus } from './sync';
 import { readJSON, getGitRoot, atomicWriteJSON } from './util';
 import { syncTranscripts } from './transcript-sync';
+import { computeVelocity } from './dashboard-queries';

 // --- Main (only when run directly, not imported) ---

@@ -318,9 +319,9 @@ export function formatTeamSummary(opts: {
  const evalContributors = new Set(recentEvals.map(r => r.user_id).filter(Boolean));
  lines.push(`  Eval runs (7d):   ${recentEvals.length} runs, ${evalContributors.size} contributors`);

-  // Ship velocity (last 7 days)
-  const recentShips = shipLogs.filter(r => (r.created_at as string || r.timestamp as string || '') > weekAgo);
-  lines.push(`  Ship velocity:    ${recentShips.length} PRs this week`);
+  // Ship velocity (via dashboard-queries)
+  const velocity = computeVelocity(shipLogs);
+  lines.push(`  Ship velocity:    ${velocity.teamTotal.week} PRs this week`);

  // Detection rate (from recent evals)
  const detectionRates = recentEvals
@@ -8,6 +8,8 @@ import { describe, test, expect, beforeAll, afterAll } from 'bun:test';
 import * as fs from 'fs';
 import * as path from 'path';
 import * as os from 'os';
+import { formatLeaderboard } from '../lib/cli-eval';
+import type { LeaderboardEntry } from '../lib/dashboard-queries';

 const CLI_PATH = path.resolve(__dirname, '..', 'lib', 'cli-eval.ts');
 const TEST_DIR = path.join(os.tmpdir(), `gstack-cli-eval-test-${Date.now()}`);
@@ -175,4 +177,60 @@ describe('lib/cli-eval', () => {
      expect(stdout).toContain('empty');
    });
  });
+
+  describe('help includes leaderboard', () => {
+    test('usage mentions leaderboard command', () => {
+      const { stdout } = runCli(['--help']);
+      expect(stdout).toContain('leaderboard');
+    });
+  });
+});
+
+// --- formatLeaderboard (pure function tests) ---
+
+describe('formatLeaderboard', () => {
+  test('formats entries as table', () => {
+    const entries: LeaderboardEntry[] = [
+      { userId: 'u1', email: 'alice@test.com', ships: 5, evalRuns: 3, sessions: 10, avgPassRate: 92, totalCost: 4.50 },
+      { userId: 'u2', email: 'bob@test.com', ships: 3, evalRuns: 2, sessions: 8, avgPassRate: 85, totalCost: 3.00 },
+    ];
+    const output = formatLeaderboard(entries);
+
+    expect(output).toContain('Team Leaderboard');
+    expect(output).toContain('alice@test.com');
+    expect(output).toContain('bob@test.com');
+    expect(output).toContain('5');  // alice's ships
+    expect(output).toContain('92%');
+    expect(output).toContain('85%');
+    expect(output).toContain('$4.50');
+    expect(output).toContain('2 contributors');
+    expect(output).toContain('8 ships');
+  });
+
+  test('returns message for empty entries', () => {
+    const output = formatLeaderboard([]);
+    expect(output).toContain('No activity');
+  });
+
+  test('handles null avgPassRate', () => {
+    const entries: LeaderboardEntry[] = [
+      { userId: 'u1', email: 'alice@test.com', ships: 1, evalRuns: 0, sessions: 2, avgPassRate: null, totalCost: 0 },
+    ];
+    const output = formatLeaderboard(entries);
+    expect(output).toContain('—');
+    expect(output).not.toContain('null');
+  });
+
+  test('ranks entries in order', () => {
+    const entries: LeaderboardEntry[] = [
+      { userId: 'u1', email: 'first@test.com', ships: 5, evalRuns: 0, sessions: 0, avgPassRate: null, totalCost: 0 },
+      { userId: 'u2', email: 'second@test.com', ships: 3, evalRuns: 0, sessions: 0, avgPassRate: null, totalCost: 0 },
+    ];
+    const output = formatLeaderboard(entries);
+    const firstIdx = output.indexOf('first@test.com');
+    const secondIdx = output.indexOf('second@test.com');
+    expect(firstIdx).toBeLessThan(secondIdx);
+    expect(output).toContain('1.');
+    expect(output).toContain('2.');
+  });
 });