From 7f7035f55a70a6d0317920e5ede61bb5782da3b7 Mon Sep 17 00:00:00 2001 From: Garry Tan Date: Sun, 15 Mar 2026 09:39:09 -0500 Subject: [PATCH] feat: add listEvalFiles, loadEvalResults, formatTimestamp to lib/util.ts DRY up eval I/O duplicated across scripts/eval-list.ts, eval-compare.ts, and eval-summary.ts. Adds EVAL_DIR constant, formatTimestamp(), listEvalFiles(), loadEvalResults() with --limit support. 13 new tests. Co-Authored-By: Claude Opus 4.6 (1M context) --- lib/util.ts | 44 +++++++++++++++++++++ test/lib-util.test.ts | 92 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 136 insertions(+) diff --git a/lib/util.ts b/lib/util.ts index 7dba7f97..39ff2a6a 100644 --- a/lib/util.ts +++ b/lib/util.ts @@ -118,6 +118,50 @@ export function getVersion(): string { } } +// --- Eval I/O --- + +export const EVAL_DIR = path.join(GSTACK_DEV_DIR, 'evals'); + +/** Format ISO timestamp to "YYYY-MM-DD HH:MM" for display. */ +export function formatTimestamp(iso: string): string { + return iso.replace('T', ' ').slice(0, 16); +} + +/** + * List JSON eval files in the eval directory, sorted by filename descending (newest first). + * Returns full paths. Returns empty array if directory doesn't exist. + */ +export function listEvalFiles(evalDir?: string): string[] { + const dir = evalDir || EVAL_DIR; + try { + const files = fs.readdirSync(dir) + .filter(f => f.endsWith('.json') && !f.startsWith('_')); + files.sort().reverse(); + return files.map(f => path.join(dir, f)); + } catch { + return []; + } +} + +/** + * Load and parse all eval result JSON files from the eval directory. + * Skips files that fail to parse. Sorted newest-first by timestamp. + * Optional limit returns only the N most recent. + */ +export function loadEvalResults(evalDir?: string, limit?: number): T[] { + const files = listEvalFiles(evalDir); + const results: Array<{ data: T; timestamp: string }> = []; + for (const file of files) { + try { + const data = JSON.parse(fs.readFileSync(file, 'utf-8')); + results.push({ data, timestamp: data.timestamp || '' }); + } catch { continue; } + } + results.sort((a, b) => b.timestamp.localeCompare(a.timestamp)); + const sliced = limit ? results.slice(0, limit) : results; + return sliced.map(r => r.data); +} + // --- String helpers --- /** Sanitize a name for use as a filename: strip leading slashes, replace / with - */ diff --git a/test/lib-util.test.ts b/test/lib-util.test.ts index b085845b..66af3d9f 100644 --- a/test/lib-util.test.ts +++ b/test/lib-util.test.ts @@ -15,6 +15,10 @@ import { getRemoteSlug, getVersion, sanitizeForFilename, + formatTimestamp, + listEvalFiles, + loadEvalResults, + EVAL_DIR, } from '../lib/util'; function tmpDir(): string { @@ -145,4 +149,92 @@ describe('lib/util', () => { expect(sanitizeForFilename('simple')).toBe('simple'); }); }); + + describe('formatTimestamp', () => { + test('formats ISO timestamp to date and time', () => { + expect(formatTimestamp('2025-05-01T12:30:45.123Z')).toBe('2025-05-01 12:30'); + }); + + test('handles already-formatted strings gracefully', () => { + expect(formatTimestamp('2025-05-01 12:30')).toBe('2025-05-01 12:30'); + }); + + test('handles empty string', () => { + expect(formatTimestamp('')).toBe(''); + }); + }); + + describe('listEvalFiles', () => { + test('returns empty array for nonexistent dir', () => { + expect(listEvalFiles('/nonexistent/dir')).toEqual([]); + }); + + test('returns sorted JSON files (newest first)', () => { + const dir = tmpDir(); + fs.writeFileSync(path.join(dir, 'a-2025-01.json'), '{}'); + fs.writeFileSync(path.join(dir, 'b-2025-02.json'), '{}'); + fs.writeFileSync(path.join(dir, 'c-2025-03.json'), '{}'); + fs.writeFileSync(path.join(dir, 'not-json.txt'), 'skip'); + + const files = listEvalFiles(dir); + expect(files.length).toBe(3); + // Sorted reverse alphabetically (newest first) + expect(path.basename(files[0])).toBe('c-2025-03.json'); + expect(path.basename(files[2])).toBe('a-2025-01.json'); + fs.rmSync(dir, { recursive: true, force: true }); + }); + + test('skips _partial files', () => { + const dir = tmpDir(); + fs.writeFileSync(path.join(dir, 'run.json'), '{}'); + fs.writeFileSync(path.join(dir, '_partial-e2e.json'), '{}'); + + const files = listEvalFiles(dir); + expect(files.length).toBe(1); + expect(path.basename(files[0])).toBe('run.json'); + fs.rmSync(dir, { recursive: true, force: true }); + }); + }); + + describe('loadEvalResults', () => { + test('loads and parses JSON files sorted by timestamp', () => { + const dir = tmpDir(); + fs.writeFileSync(path.join(dir, 'old.json'), JSON.stringify({ timestamp: '2025-01-01T00:00:00Z', value: 'old' })); + fs.writeFileSync(path.join(dir, 'new.json'), JSON.stringify({ timestamp: '2025-05-01T00:00:00Z', value: 'new' })); + + const results = loadEvalResults<{ timestamp: string; value: string }>(dir); + expect(results.length).toBe(2); + expect(results[0].value).toBe('new'); // newest first + expect(results[1].value).toBe('old'); + fs.rmSync(dir, { recursive: true, force: true }); + }); + + test('respects limit parameter', () => { + const dir = tmpDir(); + for (let i = 0; i < 10; i++) { + fs.writeFileSync( + path.join(dir, `run-${i}.json`), + JSON.stringify({ timestamp: `2025-01-${String(i + 1).padStart(2, '0')}T00:00:00Z` }), + ); + } + + const results = loadEvalResults(dir, 3); + expect(results.length).toBe(3); + fs.rmSync(dir, { recursive: true, force: true }); + }); + + test('skips corrupt JSON files', () => { + const dir = tmpDir(); + fs.writeFileSync(path.join(dir, 'good.json'), JSON.stringify({ timestamp: '2025-01-01T00:00:00Z' })); + fs.writeFileSync(path.join(dir, 'bad.json'), 'not json'); + + const results = loadEvalResults(dir); + expect(results.length).toBe(1); + fs.rmSync(dir, { recursive: true, force: true }); + }); + + test('returns empty for nonexistent dir', () => { + expect(loadEvalResults('/nonexistent')).toEqual([]); + }); + }); });