mirror of
https://github.com/garrytan/gstack.git
synced 2026-05-02 03:35:09 +02:00
17c1c06cd9
* feat: diff-based test selection for E2E and LLM-judge evals Each test declares file dependencies in a TOUCHFILES map. The test runner checks git diff against the base branch and only runs tests whose dependencies were modified. Global touchfiles (session-runner, eval-store, gen-skill-docs) trigger all tests. New scripts: test:e2e:all, test:evals:all, eval:select Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * chore: bump version and changelog (v0.6.1.0) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * fix: plan-design-review-audit eval — bump turns to 30, add efficiency hints The test was flaky at 20 turns because the agent reads a 300-line SKILL.md, navigates, extracts design data, and writes a report. Added hints to skip preamble/batch commands/write early while still testing the real SKILL.md. Now completes in ~13 turns consistently. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
87 lines
3.1 KiB
TypeScript
87 lines
3.1 KiB
TypeScript
#!/usr/bin/env bun
|
|
/**
|
|
* Show which E2E and LLM-judge tests would run based on the current git diff.
|
|
*
|
|
* Usage:
|
|
* bun run eval:select # human-readable output
|
|
* bun run eval:select --json # machine-readable JSON
|
|
* bun run eval:select --base main # override base branch
|
|
*/
|
|
|
|
import * as path from 'path';
|
|
import {
|
|
selectTests,
|
|
detectBaseBranch,
|
|
getChangedFiles,
|
|
E2E_TOUCHFILES,
|
|
LLM_JUDGE_TOUCHFILES,
|
|
GLOBAL_TOUCHFILES,
|
|
} from '../test/helpers/touchfiles';
|
|
|
|
const ROOT = path.resolve(import.meta.dir, '..');
|
|
const args = process.argv.slice(2);
|
|
const jsonMode = args.includes('--json');
|
|
const baseIdx = args.indexOf('--base');
|
|
const baseOverride = baseIdx >= 0 ? args[baseIdx + 1] : undefined;
|
|
|
|
// Detect base branch
|
|
const baseBranch = baseOverride || detectBaseBranch(ROOT) || 'main';
|
|
const changedFiles = getChangedFiles(baseBranch, ROOT);
|
|
|
|
if (changedFiles.length === 0) {
|
|
if (jsonMode) {
|
|
console.log(JSON.stringify({ base: baseBranch, changed_files: 0, e2e: 'all', llm_judge: 'all', reason: 'no diff — would run all tests' }));
|
|
} else {
|
|
console.log(`Base: ${baseBranch}`);
|
|
console.log('No changed files detected — all tests would run.');
|
|
}
|
|
process.exit(0);
|
|
}
|
|
|
|
const e2eSelection = selectTests(changedFiles, E2E_TOUCHFILES, GLOBAL_TOUCHFILES);
|
|
const llmSelection = selectTests(changedFiles, LLM_JUDGE_TOUCHFILES, GLOBAL_TOUCHFILES);
|
|
|
|
if (jsonMode) {
|
|
console.log(JSON.stringify({
|
|
base: baseBranch,
|
|
changed_files: changedFiles,
|
|
e2e: {
|
|
selected: e2eSelection.selected,
|
|
skipped: e2eSelection.skipped,
|
|
reason: e2eSelection.reason,
|
|
count: `${e2eSelection.selected.length}/${Object.keys(E2E_TOUCHFILES).length}`,
|
|
},
|
|
llm_judge: {
|
|
selected: llmSelection.selected,
|
|
skipped: llmSelection.skipped,
|
|
reason: llmSelection.reason,
|
|
count: `${llmSelection.selected.length}/${Object.keys(LLM_JUDGE_TOUCHFILES).length}`,
|
|
},
|
|
}, null, 2));
|
|
} else {
|
|
console.log(`Base: ${baseBranch}`);
|
|
console.log(`Changed files: ${changedFiles.length}`);
|
|
console.log();
|
|
|
|
console.log(`E2E (${e2eSelection.reason}): ${e2eSelection.selected.length}/${Object.keys(E2E_TOUCHFILES).length} tests`);
|
|
if (e2eSelection.selected.length > 0 && e2eSelection.selected.length < Object.keys(E2E_TOUCHFILES).length) {
|
|
console.log(` Selected: ${e2eSelection.selected.join(', ')}`);
|
|
console.log(` Skipped: ${e2eSelection.skipped.join(', ')}`);
|
|
} else if (e2eSelection.selected.length === 0) {
|
|
console.log(' No E2E tests affected.');
|
|
} else {
|
|
console.log(' All E2E tests selected.');
|
|
}
|
|
console.log();
|
|
|
|
console.log(`LLM-judge (${llmSelection.reason}): ${llmSelection.selected.length}/${Object.keys(LLM_JUDGE_TOUCHFILES).length} tests`);
|
|
if (llmSelection.selected.length > 0 && llmSelection.selected.length < Object.keys(LLM_JUDGE_TOUCHFILES).length) {
|
|
console.log(` Selected: ${llmSelection.selected.join(', ')}`);
|
|
console.log(` Skipped: ${llmSelection.skipped.join(', ')}`);
|
|
} else if (llmSelection.selected.length === 0) {
|
|
console.log(' No LLM-judge tests affected.');
|
|
} else {
|
|
console.log(' All LLM-judge tests selected.');
|
|
}
|
|
}
|