mirror of
https://github.com/garrytan/gstack.git
synced 2026-05-05 21:25:27 +02:00
feat: add CLI leaderboard, refactor formatTeamSummary to use dashboard-queries
New `gstack eval leaderboard` subcommand pulls team data and renders weekly stats per contributor. Refactored formatTeamSummary to use computeVelocity from dashboard-queries (DRY). 4 new tests. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
+73
-2
@@ -29,6 +29,7 @@ import {
|
||||
} from '../test/helpers/eval-store';
|
||||
import type { EvalResult } from '../test/helpers/eval-store';
|
||||
import type { ComparisonResult } from '../test/helpers/eval-store';
|
||||
import { computeLeaderboard, type LeaderboardEntry } from './dashboard-queries';
|
||||
|
||||
// --- ANSI color helpers ---
|
||||
|
||||
@@ -636,6 +637,74 @@ async function cmdTrend(args: string[]): Promise<void> {
|
||||
console.log('');
|
||||
}
|
||||
|
||||
// --- Leaderboard ---
|
||||
|
||||
/** Format leaderboard entries as a terminal table. Pure function for testing. */
|
||||
export function formatLeaderboard(entries: LeaderboardEntry[]): string {
|
||||
if (entries.length === 0) return 'No activity this week.\n';
|
||||
|
||||
const lines: string[] = [];
|
||||
lines.push('');
|
||||
lines.push('Team Leaderboard (this week)');
|
||||
lines.push('═'.repeat(85));
|
||||
lines.push(
|
||||
' ' +
|
||||
'#'.padEnd(4) +
|
||||
'Who'.padEnd(22) +
|
||||
'Ships'.padEnd(8) +
|
||||
'Evals'.padEnd(8) +
|
||||
'Sessions'.padEnd(10) +
|
||||
'Pass Rate'.padEnd(12) +
|
||||
'Cost'
|
||||
);
|
||||
lines.push('─'.repeat(85));
|
||||
|
||||
for (let i = 0; i < entries.length; i++) {
|
||||
const e = entries[i];
|
||||
const rank = `${i + 1}.`.padEnd(4);
|
||||
const who = (e.email || e.userId).slice(0, 20).padEnd(22);
|
||||
const ships = String(e.ships).padEnd(8);
|
||||
const evals = String(e.evalRuns).padEnd(8);
|
||||
const sessions = String(e.sessions).padEnd(10);
|
||||
const rate = e.avgPassRate !== null ? `${e.avgPassRate.toFixed(0)}%`.padEnd(12) : '—'.padEnd(12);
|
||||
const cost = `$${e.totalCost.toFixed(2)}`;
|
||||
lines.push(` ${rank}${who}${ships}${evals}${sessions}${rate}${cost}`);
|
||||
}
|
||||
|
||||
lines.push('─'.repeat(85));
|
||||
const totalShips = entries.reduce((s, e) => s + e.ships, 0);
|
||||
const totalEvals = entries.reduce((s, e) => s + e.evalRuns, 0);
|
||||
const totalCost = entries.reduce((s, e) => s + e.totalCost, 0);
|
||||
lines.push(` ${entries.length} contributors | ${totalShips} ships | ${totalEvals} eval runs | $${totalCost.toFixed(2)} spent`);
|
||||
lines.push('');
|
||||
return lines.join('\n');
|
||||
}
|
||||
|
||||
async function cmdLeaderboard(args: string[]): Promise<void> {
|
||||
try {
|
||||
const { isSyncConfigured } = await import('./sync-config');
|
||||
const { pullTable } = await import('./sync');
|
||||
|
||||
if (!isSyncConfigured()) {
|
||||
console.log('Team sync not configured. Run: gstack sync setup');
|
||||
console.log('See: docs/TEAM_SYNC_SETUP.md');
|
||||
return;
|
||||
}
|
||||
|
||||
const [evalRuns, shipLogs, sessions] = await Promise.all([
|
||||
pullTable('eval_runs'),
|
||||
pullTable('ship_logs'),
|
||||
pullTable('session_transcripts'),
|
||||
]);
|
||||
|
||||
const entries = computeLeaderboard({ evalRuns, shipLogs, sessions });
|
||||
console.log(formatLeaderboard(entries));
|
||||
} catch (err: any) {
|
||||
console.error(`Failed to load team data: ${err.message}`);
|
||||
process.exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
function printUsage(): void {
|
||||
console.log(`
|
||||
gstack eval — eval management CLI
|
||||
@@ -649,6 +718,7 @@ Commands:
|
||||
push <file> Validate + save + sync an eval result
|
||||
cost <file> Show per-model cost breakdown
|
||||
trend [--limit N] [--tier X] [--test X] [--team] Per-test pass rate trends
|
||||
leaderboard Weekly team leaderboard
|
||||
cache read|write|stats|clear|verify Manage eval cache
|
||||
watch Live E2E test dashboard
|
||||
`);
|
||||
@@ -666,8 +736,9 @@ switch (command) {
|
||||
case 'summary': cmdSummary(cmdArgs); break;
|
||||
case 'push': cmdPush(cmdArgs); break;
|
||||
case 'cost': cmdCost(cmdArgs); break;
|
||||
case 'trend': cmdTrend(cmdArgs); break;
|
||||
case 'cache': cmdCache(cmdArgs); break;
|
||||
case 'trend': cmdTrend(cmdArgs); break;
|
||||
case 'leaderboard': cmdLeaderboard(cmdArgs); break;
|
||||
case 'cache': cmdCache(cmdArgs); break;
|
||||
case 'watch': cmdWatch(); break;
|
||||
case '--help': case '-h': case 'help': case undefined:
|
||||
printUsage();
|
||||
|
||||
+4
-3
@@ -10,6 +10,7 @@ import { runDeviceAuth } from './auth';
|
||||
import { pushEvalRun, pushRetro, pushQAReport, pushShipLog, pushGreptileTriage, pushHeartbeat, pullTable, pullTranscripts, drainQueue, getSyncStatus } from './sync';
|
||||
import { readJSON, getGitRoot, atomicWriteJSON } from './util';
|
||||
import { syncTranscripts } from './transcript-sync';
|
||||
import { computeVelocity } from './dashboard-queries';
|
||||
|
||||
// --- Main (only when run directly, not imported) ---
|
||||
|
||||
@@ -318,9 +319,9 @@ export function formatTeamSummary(opts: {
|
||||
const evalContributors = new Set(recentEvals.map(r => r.user_id).filter(Boolean));
|
||||
lines.push(` Eval runs (7d): ${recentEvals.length} runs, ${evalContributors.size} contributors`);
|
||||
|
||||
// Ship velocity (last 7 days)
|
||||
const recentShips = shipLogs.filter(r => (r.created_at as string || r.timestamp as string || '') > weekAgo);
|
||||
lines.push(` Ship velocity: ${recentShips.length} PRs this week`);
|
||||
// Ship velocity (via dashboard-queries)
|
||||
const velocity = computeVelocity(shipLogs);
|
||||
lines.push(` Ship velocity: ${velocity.teamTotal.week} PRs this week`);
|
||||
|
||||
// Detection rate (from recent evals)
|
||||
const detectionRates = recentEvals
|
||||
|
||||
@@ -8,6 +8,8 @@ import { describe, test, expect, beforeAll, afterAll } from 'bun:test';
|
||||
import * as fs from 'fs';
|
||||
import * as path from 'path';
|
||||
import * as os from 'os';
|
||||
import { formatLeaderboard } from '../lib/cli-eval';
|
||||
import type { LeaderboardEntry } from '../lib/dashboard-queries';
|
||||
|
||||
const CLI_PATH = path.resolve(__dirname, '..', 'lib', 'cli-eval.ts');
|
||||
const TEST_DIR = path.join(os.tmpdir(), `gstack-cli-eval-test-${Date.now()}`);
|
||||
@@ -175,4 +177,60 @@ describe('lib/cli-eval', () => {
|
||||
expect(stdout).toContain('empty');
|
||||
});
|
||||
});
|
||||
|
||||
describe('help includes leaderboard', () => {
|
||||
test('usage mentions leaderboard command', () => {
|
||||
const { stdout } = runCli(['--help']);
|
||||
expect(stdout).toContain('leaderboard');
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
// --- formatLeaderboard (pure function tests) ---
|
||||
|
||||
describe('formatLeaderboard', () => {
|
||||
test('formats entries as table', () => {
|
||||
const entries: LeaderboardEntry[] = [
|
||||
{ userId: 'u1', email: 'alice@test.com', ships: 5, evalRuns: 3, sessions: 10, avgPassRate: 92, totalCost: 4.50 },
|
||||
{ userId: 'u2', email: 'bob@test.com', ships: 3, evalRuns: 2, sessions: 8, avgPassRate: 85, totalCost: 3.00 },
|
||||
];
|
||||
const output = formatLeaderboard(entries);
|
||||
|
||||
expect(output).toContain('Team Leaderboard');
|
||||
expect(output).toContain('alice@test.com');
|
||||
expect(output).toContain('bob@test.com');
|
||||
expect(output).toContain('5'); // alice's ships
|
||||
expect(output).toContain('92%');
|
||||
expect(output).toContain('85%');
|
||||
expect(output).toContain('$4.50');
|
||||
expect(output).toContain('2 contributors');
|
||||
expect(output).toContain('8 ships');
|
||||
});
|
||||
|
||||
test('returns message for empty entries', () => {
|
||||
const output = formatLeaderboard([]);
|
||||
expect(output).toContain('No activity');
|
||||
});
|
||||
|
||||
test('handles null avgPassRate', () => {
|
||||
const entries: LeaderboardEntry[] = [
|
||||
{ userId: 'u1', email: 'alice@test.com', ships: 1, evalRuns: 0, sessions: 2, avgPassRate: null, totalCost: 0 },
|
||||
];
|
||||
const output = formatLeaderboard(entries);
|
||||
expect(output).toContain('—');
|
||||
expect(output).not.toContain('null');
|
||||
});
|
||||
|
||||
test('ranks entries in order', () => {
|
||||
const entries: LeaderboardEntry[] = [
|
||||
{ userId: 'u1', email: 'first@test.com', ships: 5, evalRuns: 0, sessions: 0, avgPassRate: null, totalCost: 0 },
|
||||
{ userId: 'u2', email: 'second@test.com', ships: 3, evalRuns: 0, sessions: 0, avgPassRate: null, totalCost: 0 },
|
||||
];
|
||||
const output = formatLeaderboard(entries);
|
||||
const firstIdx = output.indexOf('first@test.com');
|
||||
const secondIdx = output.indexOf('second@test.com');
|
||||
expect(firstIdx).toBeLessThan(secondIdx);
|
||||
expect(output).toContain('1.');
|
||||
expect(output).toContain('2.');
|
||||
});
|
||||
});
|
||||
|
||||
Reference in New Issue
Block a user