feat: add CLI leaderboard, refactor formatTeamSummary to use dashboard-queries

New `gstack eval leaderboard` subcommand pulls team data and renders
weekly stats per contributor. Refactored formatTeamSummary to use
computeVelocity from dashboard-queries (DRY). 4 new tests.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Garry Tan
2026-03-16 02:44:12 -05:00
parent e969c6dadf
commit 4985c8e7e9
3 changed files with 135 additions and 5 deletions
+73 -2
View File
@@ -29,6 +29,7 @@ import {
} from '../test/helpers/eval-store';
import type { EvalResult } from '../test/helpers/eval-store';
import type { ComparisonResult } from '../test/helpers/eval-store';
import { computeLeaderboard, type LeaderboardEntry } from './dashboard-queries';
// --- ANSI color helpers ---
@@ -636,6 +637,74 @@ async function cmdTrend(args: string[]): Promise<void> {
console.log('');
}
// --- Leaderboard ---
/** Format leaderboard entries as a terminal table. Pure function for testing. */
export function formatLeaderboard(entries: LeaderboardEntry[]): string {
if (entries.length === 0) return 'No activity this week.\n';
const lines: string[] = [];
lines.push('');
lines.push('Team Leaderboard (this week)');
lines.push('═'.repeat(85));
lines.push(
' ' +
'#'.padEnd(4) +
'Who'.padEnd(22) +
'Ships'.padEnd(8) +
'Evals'.padEnd(8) +
'Sessions'.padEnd(10) +
'Pass Rate'.padEnd(12) +
'Cost'
);
lines.push('─'.repeat(85));
for (let i = 0; i < entries.length; i++) {
const e = entries[i];
const rank = `${i + 1}.`.padEnd(4);
const who = (e.email || e.userId).slice(0, 20).padEnd(22);
const ships = String(e.ships).padEnd(8);
const evals = String(e.evalRuns).padEnd(8);
const sessions = String(e.sessions).padEnd(10);
const rate = e.avgPassRate !== null ? `${e.avgPassRate.toFixed(0)}%`.padEnd(12) : '—'.padEnd(12);
const cost = `$${e.totalCost.toFixed(2)}`;
lines.push(` ${rank}${who}${ships}${evals}${sessions}${rate}${cost}`);
}
lines.push('─'.repeat(85));
const totalShips = entries.reduce((s, e) => s + e.ships, 0);
const totalEvals = entries.reduce((s, e) => s + e.evalRuns, 0);
const totalCost = entries.reduce((s, e) => s + e.totalCost, 0);
lines.push(` ${entries.length} contributors | ${totalShips} ships | ${totalEvals} eval runs | $${totalCost.toFixed(2)} spent`);
lines.push('');
return lines.join('\n');
}
async function cmdLeaderboard(args: string[]): Promise<void> {
try {
const { isSyncConfigured } = await import('./sync-config');
const { pullTable } = await import('./sync');
if (!isSyncConfigured()) {
console.log('Team sync not configured. Run: gstack sync setup');
console.log('See: docs/TEAM_SYNC_SETUP.md');
return;
}
const [evalRuns, shipLogs, sessions] = await Promise.all([
pullTable('eval_runs'),
pullTable('ship_logs'),
pullTable('session_transcripts'),
]);
const entries = computeLeaderboard({ evalRuns, shipLogs, sessions });
console.log(formatLeaderboard(entries));
} catch (err: any) {
console.error(`Failed to load team data: ${err.message}`);
process.exit(1);
}
}
function printUsage(): void {
console.log(`
gstack eval — eval management CLI
@@ -649,6 +718,7 @@ Commands:
push <file> Validate + save + sync an eval result
cost <file> Show per-model cost breakdown
trend [--limit N] [--tier X] [--test X] [--team] Per-test pass rate trends
leaderboard Weekly team leaderboard
cache read|write|stats|clear|verify Manage eval cache
watch Live E2E test dashboard
`);
@@ -666,8 +736,9 @@ switch (command) {
case 'summary': cmdSummary(cmdArgs); break;
case 'push': cmdPush(cmdArgs); break;
case 'cost': cmdCost(cmdArgs); break;
case 'trend': cmdTrend(cmdArgs); break;
case 'cache': cmdCache(cmdArgs); break;
case 'trend': cmdTrend(cmdArgs); break;
case 'leaderboard': cmdLeaderboard(cmdArgs); break;
case 'cache': cmdCache(cmdArgs); break;
case 'watch': cmdWatch(); break;
case '--help': case '-h': case 'help': case undefined:
printUsage();
+4 -3
View File
@@ -10,6 +10,7 @@ import { runDeviceAuth } from './auth';
import { pushEvalRun, pushRetro, pushQAReport, pushShipLog, pushGreptileTriage, pushHeartbeat, pullTable, pullTranscripts, drainQueue, getSyncStatus } from './sync';
import { readJSON, getGitRoot, atomicWriteJSON } from './util';
import { syncTranscripts } from './transcript-sync';
import { computeVelocity } from './dashboard-queries';
// --- Main (only when run directly, not imported) ---
@@ -318,9 +319,9 @@ export function formatTeamSummary(opts: {
const evalContributors = new Set(recentEvals.map(r => r.user_id).filter(Boolean));
lines.push(` Eval runs (7d): ${recentEvals.length} runs, ${evalContributors.size} contributors`);
// Ship velocity (last 7 days)
const recentShips = shipLogs.filter(r => (r.created_at as string || r.timestamp as string || '') > weekAgo);
lines.push(` Ship velocity: ${recentShips.length} PRs this week`);
// Ship velocity (via dashboard-queries)
const velocity = computeVelocity(shipLogs);
lines.push(` Ship velocity: ${velocity.teamTotal.week} PRs this week`);
// Detection rate (from recent evals)
const detectionRates = recentEvals
+58
View File
@@ -8,6 +8,8 @@ import { describe, test, expect, beforeAll, afterAll } from 'bun:test';
import * as fs from 'fs';
import * as path from 'path';
import * as os from 'os';
import { formatLeaderboard } from '../lib/cli-eval';
import type { LeaderboardEntry } from '../lib/dashboard-queries';
const CLI_PATH = path.resolve(__dirname, '..', 'lib', 'cli-eval.ts');
const TEST_DIR = path.join(os.tmpdir(), `gstack-cli-eval-test-${Date.now()}`);
@@ -175,4 +177,60 @@ describe('lib/cli-eval', () => {
expect(stdout).toContain('empty');
});
});
describe('help includes leaderboard', () => {
test('usage mentions leaderboard command', () => {
const { stdout } = runCli(['--help']);
expect(stdout).toContain('leaderboard');
});
});
});
// --- formatLeaderboard (pure function tests) ---
describe('formatLeaderboard', () => {
test('formats entries as table', () => {
const entries: LeaderboardEntry[] = [
{ userId: 'u1', email: 'alice@test.com', ships: 5, evalRuns: 3, sessions: 10, avgPassRate: 92, totalCost: 4.50 },
{ userId: 'u2', email: 'bob@test.com', ships: 3, evalRuns: 2, sessions: 8, avgPassRate: 85, totalCost: 3.00 },
];
const output = formatLeaderboard(entries);
expect(output).toContain('Team Leaderboard');
expect(output).toContain('alice@test.com');
expect(output).toContain('bob@test.com');
expect(output).toContain('5'); // alice's ships
expect(output).toContain('92%');
expect(output).toContain('85%');
expect(output).toContain('$4.50');
expect(output).toContain('2 contributors');
expect(output).toContain('8 ships');
});
test('returns message for empty entries', () => {
const output = formatLeaderboard([]);
expect(output).toContain('No activity');
});
test('handles null avgPassRate', () => {
const entries: LeaderboardEntry[] = [
{ userId: 'u1', email: 'alice@test.com', ships: 1, evalRuns: 0, sessions: 2, avgPassRate: null, totalCost: 0 },
];
const output = formatLeaderboard(entries);
expect(output).toContain('—');
expect(output).not.toContain('null');
});
test('ranks entries in order', () => {
const entries: LeaderboardEntry[] = [
{ userId: 'u1', email: 'first@test.com', ships: 5, evalRuns: 0, sessions: 0, avgPassRate: null, totalCost: 0 },
{ userId: 'u2', email: 'second@test.com', ships: 3, evalRuns: 0, sessions: 0, avgPassRate: null, totalCost: 0 },
];
const output = formatLeaderboard(entries);
const firstIdx = output.indexOf('first@test.com');
const secondIdx = output.indexOf('second@test.com');
expect(firstIdx).toBeLessThan(secondIdx);
expect(output).toContain('1.');
expect(output).toContain('2.');
});
});