From 4985c8e7e93d4084cb3fcc296d61d0f90fa9a561 Mon Sep 17 00:00:00 2001 From: Garry Tan Date: Mon, 16 Mar 2026 02:44:12 -0500 Subject: [PATCH] feat: add CLI leaderboard, refactor formatTeamSummary to use dashboard-queries New `gstack eval leaderboard` subcommand pulls team data and renders weekly stats per contributor. Refactored formatTeamSummary to use computeVelocity from dashboard-queries (DRY). 4 new tests. Co-Authored-By: Claude Opus 4.6 (1M context) --- lib/cli-eval.ts | 75 +++++++++++++++++++++++++++++++++++++-- lib/cli-sync.ts | 7 ++-- test/lib-eval-cli.test.ts | 58 ++++++++++++++++++++++++++++++ 3 files changed, 135 insertions(+), 5 deletions(-) diff --git a/lib/cli-eval.ts b/lib/cli-eval.ts index 87e8b5b8..41331345 100644 --- a/lib/cli-eval.ts +++ b/lib/cli-eval.ts @@ -29,6 +29,7 @@ import { } from '../test/helpers/eval-store'; import type { EvalResult } from '../test/helpers/eval-store'; import type { ComparisonResult } from '../test/helpers/eval-store'; +import { computeLeaderboard, type LeaderboardEntry } from './dashboard-queries'; // --- ANSI color helpers --- @@ -636,6 +637,74 @@ async function cmdTrend(args: string[]): Promise { console.log(''); } +// --- Leaderboard --- + +/** Format leaderboard entries as a terminal table. Pure function for testing. */ +export function formatLeaderboard(entries: LeaderboardEntry[]): string { + if (entries.length === 0) return 'No activity this week.\n'; + + const lines: string[] = []; + lines.push(''); + lines.push('Team Leaderboard (this week)'); + lines.push('═'.repeat(85)); + lines.push( + ' ' + + '#'.padEnd(4) + + 'Who'.padEnd(22) + + 'Ships'.padEnd(8) + + 'Evals'.padEnd(8) + + 'Sessions'.padEnd(10) + + 'Pass Rate'.padEnd(12) + + 'Cost' + ); + lines.push('─'.repeat(85)); + + for (let i = 0; i < entries.length; i++) { + const e = entries[i]; + const rank = `${i + 1}.`.padEnd(4); + const who = (e.email || e.userId).slice(0, 20).padEnd(22); + const ships = String(e.ships).padEnd(8); + const evals = String(e.evalRuns).padEnd(8); + const sessions = String(e.sessions).padEnd(10); + const rate = e.avgPassRate !== null ? `${e.avgPassRate.toFixed(0)}%`.padEnd(12) : '—'.padEnd(12); + const cost = `$${e.totalCost.toFixed(2)}`; + lines.push(` ${rank}${who}${ships}${evals}${sessions}${rate}${cost}`); + } + + lines.push('─'.repeat(85)); + const totalShips = entries.reduce((s, e) => s + e.ships, 0); + const totalEvals = entries.reduce((s, e) => s + e.evalRuns, 0); + const totalCost = entries.reduce((s, e) => s + e.totalCost, 0); + lines.push(` ${entries.length} contributors | ${totalShips} ships | ${totalEvals} eval runs | $${totalCost.toFixed(2)} spent`); + lines.push(''); + return lines.join('\n'); +} + +async function cmdLeaderboard(args: string[]): Promise { + try { + const { isSyncConfigured } = await import('./sync-config'); + const { pullTable } = await import('./sync'); + + if (!isSyncConfigured()) { + console.log('Team sync not configured. Run: gstack sync setup'); + console.log('See: docs/TEAM_SYNC_SETUP.md'); + return; + } + + const [evalRuns, shipLogs, sessions] = await Promise.all([ + pullTable('eval_runs'), + pullTable('ship_logs'), + pullTable('session_transcripts'), + ]); + + const entries = computeLeaderboard({ evalRuns, shipLogs, sessions }); + console.log(formatLeaderboard(entries)); + } catch (err: any) { + console.error(`Failed to load team data: ${err.message}`); + process.exit(1); + } +} + function printUsage(): void { console.log(` gstack eval — eval management CLI @@ -649,6 +718,7 @@ Commands: push Validate + save + sync an eval result cost Show per-model cost breakdown trend [--limit N] [--tier X] [--test X] [--team] Per-test pass rate trends + leaderboard Weekly team leaderboard cache read|write|stats|clear|verify Manage eval cache watch Live E2E test dashboard `); @@ -666,8 +736,9 @@ switch (command) { case 'summary': cmdSummary(cmdArgs); break; case 'push': cmdPush(cmdArgs); break; case 'cost': cmdCost(cmdArgs); break; - case 'trend': cmdTrend(cmdArgs); break; - case 'cache': cmdCache(cmdArgs); break; + case 'trend': cmdTrend(cmdArgs); break; + case 'leaderboard': cmdLeaderboard(cmdArgs); break; + case 'cache': cmdCache(cmdArgs); break; case 'watch': cmdWatch(); break; case '--help': case '-h': case 'help': case undefined: printUsage(); diff --git a/lib/cli-sync.ts b/lib/cli-sync.ts index f7efab94..bf82abe5 100644 --- a/lib/cli-sync.ts +++ b/lib/cli-sync.ts @@ -10,6 +10,7 @@ import { runDeviceAuth } from './auth'; import { pushEvalRun, pushRetro, pushQAReport, pushShipLog, pushGreptileTriage, pushHeartbeat, pullTable, pullTranscripts, drainQueue, getSyncStatus } from './sync'; import { readJSON, getGitRoot, atomicWriteJSON } from './util'; import { syncTranscripts } from './transcript-sync'; +import { computeVelocity } from './dashboard-queries'; // --- Main (only when run directly, not imported) --- @@ -318,9 +319,9 @@ export function formatTeamSummary(opts: { const evalContributors = new Set(recentEvals.map(r => r.user_id).filter(Boolean)); lines.push(` Eval runs (7d): ${recentEvals.length} runs, ${evalContributors.size} contributors`); - // Ship velocity (last 7 days) - const recentShips = shipLogs.filter(r => (r.created_at as string || r.timestamp as string || '') > weekAgo); - lines.push(` Ship velocity: ${recentShips.length} PRs this week`); + // Ship velocity (via dashboard-queries) + const velocity = computeVelocity(shipLogs); + lines.push(` Ship velocity: ${velocity.teamTotal.week} PRs this week`); // Detection rate (from recent evals) const detectionRates = recentEvals diff --git a/test/lib-eval-cli.test.ts b/test/lib-eval-cli.test.ts index 38814f76..5e67ce2b 100644 --- a/test/lib-eval-cli.test.ts +++ b/test/lib-eval-cli.test.ts @@ -8,6 +8,8 @@ import { describe, test, expect, beforeAll, afterAll } from 'bun:test'; import * as fs from 'fs'; import * as path from 'path'; import * as os from 'os'; +import { formatLeaderboard } from '../lib/cli-eval'; +import type { LeaderboardEntry } from '../lib/dashboard-queries'; const CLI_PATH = path.resolve(__dirname, '..', 'lib', 'cli-eval.ts'); const TEST_DIR = path.join(os.tmpdir(), `gstack-cli-eval-test-${Date.now()}`); @@ -175,4 +177,60 @@ describe('lib/cli-eval', () => { expect(stdout).toContain('empty'); }); }); + + describe('help includes leaderboard', () => { + test('usage mentions leaderboard command', () => { + const { stdout } = runCli(['--help']); + expect(stdout).toContain('leaderboard'); + }); + }); +}); + +// --- formatLeaderboard (pure function tests) --- + +describe('formatLeaderboard', () => { + test('formats entries as table', () => { + const entries: LeaderboardEntry[] = [ + { userId: 'u1', email: 'alice@test.com', ships: 5, evalRuns: 3, sessions: 10, avgPassRate: 92, totalCost: 4.50 }, + { userId: 'u2', email: 'bob@test.com', ships: 3, evalRuns: 2, sessions: 8, avgPassRate: 85, totalCost: 3.00 }, + ]; + const output = formatLeaderboard(entries); + + expect(output).toContain('Team Leaderboard'); + expect(output).toContain('alice@test.com'); + expect(output).toContain('bob@test.com'); + expect(output).toContain('5'); // alice's ships + expect(output).toContain('92%'); + expect(output).toContain('85%'); + expect(output).toContain('$4.50'); + expect(output).toContain('2 contributors'); + expect(output).toContain('8 ships'); + }); + + test('returns message for empty entries', () => { + const output = formatLeaderboard([]); + expect(output).toContain('No activity'); + }); + + test('handles null avgPassRate', () => { + const entries: LeaderboardEntry[] = [ + { userId: 'u1', email: 'alice@test.com', ships: 1, evalRuns: 0, sessions: 2, avgPassRate: null, totalCost: 0 }, + ]; + const output = formatLeaderboard(entries); + expect(output).toContain('—'); + expect(output).not.toContain('null'); + }); + + test('ranks entries in order', () => { + const entries: LeaderboardEntry[] = [ + { userId: 'u1', email: 'first@test.com', ships: 5, evalRuns: 0, sessions: 0, avgPassRate: null, totalCost: 0 }, + { userId: 'u2', email: 'second@test.com', ships: 3, evalRuns: 0, sessions: 0, avgPassRate: null, totalCost: 0 }, + ]; + const output = formatLeaderboard(entries); + const firstIdx = output.indexOf('first@test.com'); + const secondIdx = output.indexOf('second@test.com'); + expect(firstIdx).toBeLessThan(secondIdx); + expect(output).toContain('1.'); + expect(output).toContain('2.'); + }); });