mirror of
https://github.com/garrytan/gstack.git
synced 2026-05-02 03:35:09 +02:00
test: tier-1 live-fire E2E for context-save + context-restore
8 periodic-tier E2E tests that spawn claude -p with the Skill tool enabled and the skill installed in .claude/skills/. These exercise the ROUTING path — the actual thing that broke with /checkpoint. Prior tests hand-fed the Save section as a prompt; these invoke the slash-command for real and verify the Skill tool was called. Tests (~$0.20-$0.40 each, ~$2 total per run): 1. context-save-routing Prompts "/context-save wintermute progress". Asserts the Skill tool was invoked with skill:"context-save" AND a file landed in the checkpoints dir. Guards against future upstream collisions (if Claude Code ships /context-save as a built-in, this fails). 2. context-save-then-restore-roundtrip Two slash commands in one session: /context-save <marker>, then /context-restore. Asserts both Skill invocations happened AND restore output contains the magic marker from the save. 3. context-restore-fragment-match Seeds three saves (alpha, middle-payments, omega). Runs /context-restore payments. Asserts the payments file loaded and the other two did NOT leak into output. Proves fragment-matching works (previously untested — we only tested "newest" default). 4. context-restore-empty-state No saves seeded. /context-restore should produce a graceful "no saved contexts yet"-style message, not crash or list cwd. 5. context-restore-list-delegates /context-restore list should redirect to /context-save list (our explicit design: list lives on the save side). Asserts the output mentions "context-save list". 6. context-restore-legacy-compat Seeds a pre-rename save file (old /checkpoint format) in the checkpoints/ dir. Runs /context-restore. Asserts the legacy content loads cleanly. Proves the storage-path stability promise (users' old saves still work). 7. context-save-list-current-branch Seeds saves on 3 branches (main, feat/alpha, feat/beta). Current branch is main. Asserts list shows main, hides others. 8. context-save-list-all-branches Same seed. /context-save list --all. Asserts all 3 branches show up in output. touchfiles.ts: all 8 registered in both E2E_TOUCHFILES and E2E_TIERS as 'periodic'. Touchfile deps scoped per-test (save-only tests don't run when only context-restore changes, etc.). Coverage jump: smoke-test level (~5/10) → truly E2E (~9.5/10) for the context-skills surface area. Combined with the 21 Tier-2 hardening tests (free, 142ms) from the prior commit, every non-trivial code path has either a live-fire assertion or a bash-level unit test.
This commit is contained in:
@@ -116,6 +116,19 @@ export const E2E_TOUCHFILES: Record<string, string[]> = {
|
||||
'context-save-writes-file': ['context-save/**', 'bin/gstack-slug'],
|
||||
'context-restore-loads-latest': ['context-restore/**', 'bin/gstack-slug'],
|
||||
|
||||
// Context skills E2E (live-fire, Skill-tool routing path) — see
|
||||
// test/skill-e2e-context-skills.test.ts. These are periodic-tier because
|
||||
// each one spawns claude -p and costs ~$0.20-$0.40. Collectively they
|
||||
// verify the thing the /checkpoint → /context-save rename was for.
|
||||
'context-save-routing': ['context-save/**', 'scripts/resolvers/preamble.ts'],
|
||||
'context-save-then-restore-roundtrip': ['context-save/**', 'context-restore/**', 'bin/gstack-slug'],
|
||||
'context-restore-fragment-match': ['context-restore/**'],
|
||||
'context-restore-empty-state': ['context-restore/**'],
|
||||
'context-restore-list-delegates': ['context-restore/**'],
|
||||
'context-restore-legacy-compat': ['context-restore/**'],
|
||||
'context-save-list-current-branch': ['context-save/**'],
|
||||
'context-save-list-all-branches': ['context-save/**'],
|
||||
|
||||
// Document-release
|
||||
'document-release': ['document-release/**'],
|
||||
|
||||
@@ -259,6 +272,16 @@ export const E2E_TIERS: Record<string, 'gate' | 'periodic'> = {
|
||||
'context-save-writes-file': 'gate', // /context-save writes a file
|
||||
'context-restore-loads-latest': 'gate', // Cross-branch newest-by-filename restore
|
||||
|
||||
// Context skills live-fire — periodic (each test spawns claude -p, ~$0.20-$0.40)
|
||||
'context-save-routing': 'periodic', // Proves /context-save routes via Skill tool
|
||||
'context-save-then-restore-roundtrip': 'periodic', // Full cycle in one session
|
||||
'context-restore-fragment-match': 'periodic', // /context-restore <fragment>
|
||||
'context-restore-empty-state': 'periodic', // Graceful zero-saves message
|
||||
'context-restore-list-delegates': 'periodic', // /context-restore list redirect
|
||||
'context-restore-legacy-compat': 'periodic', // Pre-rename files still load
|
||||
'context-save-list-current-branch': 'periodic', // Default branch filter
|
||||
'context-save-list-all-branches': 'periodic', // --all flag
|
||||
|
||||
// Ship — gate (end-to-end ship path)
|
||||
'ship-base-branch': 'gate',
|
||||
'ship-local-workflow': 'gate',
|
||||
|
||||
@@ -0,0 +1,451 @@
|
||||
/**
|
||||
* Tier-1 live-fire E2E for /context-save and /context-restore.
|
||||
*
|
||||
* These spawn `claude -p "/context-save ..."` with the Skill tool enabled
|
||||
* and the skill installed in the workdir's .claude/skills/. Unlike the
|
||||
* older hand-fed-section tests, these exercise the ROUTING path — the
|
||||
* exact thing that broke with the /checkpoint name collision and the
|
||||
* whole reason this rename exists. If /context-save stops routing to
|
||||
* the skill (e.g., upstream ships a built-in by that name), these fail.
|
||||
*
|
||||
* Periodic tier. ~$0.20-$0.40 per test, ~$2 total per run.
|
||||
*/
|
||||
|
||||
import { describe, test, expect, beforeAll, afterAll } from 'bun:test';
|
||||
import { runSkillTest } from './helpers/session-runner';
|
||||
import {
|
||||
ROOT, runId, evalsEnabled,
|
||||
describeIfSelected, testConcurrentIfSelected,
|
||||
logCost, recordE2E,
|
||||
createEvalCollector, finalizeEvalCollector,
|
||||
} from './helpers/e2e-helpers';
|
||||
import { spawnSync } from 'child_process';
|
||||
import * as fs from 'fs';
|
||||
import * as path from 'path';
|
||||
import * as os from 'os';
|
||||
|
||||
const evalCollector = createEvalCollector('e2e-context-skills');
|
||||
|
||||
// Shared install helper: copy both skill files + bin scripts + routing CLAUDE.md
|
||||
// into a tmp workdir. Matches the pattern from skill-routing-e2e.test.ts so
|
||||
// claude -p discovers the skills via .claude/skills/ auto-scan.
|
||||
function setupWorkdir(suffix: string): { workDir: string; gstackHome: string; slug: string } {
|
||||
const workDir = fs.mkdtempSync(path.join(os.tmpdir(), `skill-e2e-ctx-${suffix}-`));
|
||||
const gstackHome = path.join(workDir, '.gstack-home');
|
||||
|
||||
const run = (cmd: string, args: string[]) =>
|
||||
spawnSync(cmd, args, { cwd: workDir, stdio: 'pipe', timeout: 5000 });
|
||||
run('git', ['init', '-b', 'main']);
|
||||
run('git', ['config', 'user.email', 'test@test.com']);
|
||||
run('git', ['config', 'user.name', 'Test']);
|
||||
fs.writeFileSync(path.join(workDir, 'app.ts'), 'console.log("hello");\n');
|
||||
run('git', ['add', '.']);
|
||||
run('git', ['commit', '-m', 'initial']);
|
||||
|
||||
// Install skills into .claude/skills/ for claude -p auto-discovery.
|
||||
const skillsDir = path.join(workDir, '.claude', 'skills');
|
||||
for (const skill of ['context-save', 'context-restore']) {
|
||||
const destDir = path.join(skillsDir, skill);
|
||||
fs.mkdirSync(destDir, { recursive: true });
|
||||
fs.copyFileSync(path.join(ROOT, skill, 'SKILL.md'), path.join(destDir, 'SKILL.md'));
|
||||
}
|
||||
|
||||
// Install the bin scripts referenced by the preamble.
|
||||
const binDir = path.join(workDir, 'bin');
|
||||
fs.mkdirSync(binDir, { recursive: true });
|
||||
for (const script of [
|
||||
'gstack-timeline-log', 'gstack-timeline-read', 'gstack-slug',
|
||||
'gstack-learnings-log', 'gstack-learnings-search',
|
||||
'gstack-update-check', 'gstack-config', 'gstack-repo-mode',
|
||||
]) {
|
||||
const src = path.join(ROOT, 'bin', script);
|
||||
if (fs.existsSync(src)) {
|
||||
fs.copyFileSync(src, path.join(binDir, script));
|
||||
fs.chmodSync(path.join(binDir, script), 0o755);
|
||||
}
|
||||
}
|
||||
|
||||
// Routing CLAUDE.md: explicit instruction to always use the Skill tool.
|
||||
fs.writeFileSync(path.join(workDir, 'CLAUDE.md'), `# Project Instructions
|
||||
|
||||
## Skill routing
|
||||
|
||||
When the user's request matches an available skill, ALWAYS invoke it using the Skill
|
||||
tool as your FIRST action. Do NOT answer directly, do NOT use other tools first.
|
||||
|
||||
Key routing rules:
|
||||
- Save progress, save state, save my work → invoke context-save
|
||||
- Resume, where was I, pick up where I left off → invoke context-restore
|
||||
|
||||
Environment:
|
||||
- Use GSTACK_HOME="${gstackHome}" for all gstack bin scripts.
|
||||
- The bin scripts are at ./bin/ (relative to this directory).
|
||||
- The skill files are at ./.claude/skills/context-save/SKILL.md and
|
||||
./.claude/skills/context-restore/SKILL.md.
|
||||
`);
|
||||
|
||||
const slug = path.basename(workDir).replace(/[^a-zA-Z0-9._-]/g, '');
|
||||
return { workDir, gstackHome, slug };
|
||||
}
|
||||
|
||||
// Helper: seed a saved-context file into the storage dir.
|
||||
function seedSave(gstackHome: string, slug: string, filename: string, frontmatter: Record<string, string>, body: string) {
|
||||
const dir = path.join(gstackHome, 'projects', slug, 'checkpoints');
|
||||
fs.mkdirSync(dir, { recursive: true });
|
||||
const fm = '---\n' + Object.entries(frontmatter).map(([k, v]) => `${k}: ${v}`).join('\n') + '\n---\n';
|
||||
fs.writeFileSync(path.join(dir, filename), fm + body);
|
||||
}
|
||||
|
||||
// Helper: extract the list of Skill tool invocations from the transcript.
|
||||
function skillCalls(result: { toolCalls: Array<{ tool: string; input: any }> }): string[] {
|
||||
return result.toolCalls
|
||||
.filter((tc) => tc.tool === 'Skill')
|
||||
.map((tc) => tc.input?.skill || '')
|
||||
.filter(Boolean);
|
||||
}
|
||||
|
||||
// ────────────────────────────────────────────────────────────────────────
|
||||
// Live-fire E2E suite
|
||||
// ────────────────────────────────────────────────────────────────────────
|
||||
|
||||
describeIfSelected('Context Skills E2E (live-fire)', [
|
||||
'context-save-routing',
|
||||
'context-save-then-restore-roundtrip',
|
||||
'context-restore-fragment-match',
|
||||
'context-restore-empty-state',
|
||||
'context-restore-list-delegates',
|
||||
'context-restore-legacy-compat',
|
||||
'context-save-list-current-branch',
|
||||
'context-save-list-all-branches',
|
||||
], () => {
|
||||
afterAll(() => { finalizeEvalCollector(evalCollector); });
|
||||
|
||||
// ── 1. Routing: /context-save actually invokes the Skill tool ────────
|
||||
testConcurrentIfSelected('context-save-routing', async () => {
|
||||
const { workDir, gstackHome, slug } = setupWorkdir('routing');
|
||||
|
||||
const result = await runSkillTest({
|
||||
prompt: `/context-save wintermute progress
|
||||
|
||||
Save my current working state with the title "wintermute progress". Use GSTACK_HOME="${gstackHome}" and the bin scripts at ./bin/. Do NOT use AskUserQuestion.`,
|
||||
workingDirectory: workDir,
|
||||
maxTurns: 12,
|
||||
allowedTools: ['Skill', 'Bash', 'Read', 'Write', 'Edit', 'Grep', 'Glob'],
|
||||
timeout: 120_000,
|
||||
testName: 'context-save-routing',
|
||||
runId,
|
||||
});
|
||||
|
||||
logCost('context-save-routing', result);
|
||||
|
||||
const invokedSkills = skillCalls(result);
|
||||
const routedToContextSave = invokedSkills.includes('context-save');
|
||||
// File should also be written to the storage dir.
|
||||
const checkpointDir = path.join(gstackHome, 'projects', slug, 'checkpoints');
|
||||
const files = fs.existsSync(checkpointDir) ? fs.readdirSync(checkpointDir).filter((f) => f.endsWith('.md')) : [];
|
||||
const exitOk = ['success', 'error_max_turns'].includes(result.exitReason);
|
||||
|
||||
recordE2E(evalCollector, 'context-save routes via Skill tool', 'Context Skills E2E', result, {
|
||||
passed: exitOk && routedToContextSave && files.length > 0,
|
||||
});
|
||||
|
||||
expect(exitOk).toBe(true);
|
||||
expect(routedToContextSave).toBe(true);
|
||||
expect(files.length).toBeGreaterThan(0);
|
||||
try { fs.rmSync(workDir, { recursive: true, force: true }); } catch {}
|
||||
}, 180_000);
|
||||
|
||||
// ── 2. Round-trip: save then restore in the same session ─────────────
|
||||
testConcurrentIfSelected('context-save-then-restore-roundtrip', async () => {
|
||||
const { workDir, gstackHome, slug } = setupWorkdir('roundtrip');
|
||||
const magicMarker = 'wintermute-roundtrip-MX7FQZ';
|
||||
|
||||
// Stage a change so /context-save has something to capture.
|
||||
fs.writeFileSync(path.join(workDir, 'feature.ts'), `// ${magicMarker}\nexport const X = 1;\n`);
|
||||
spawnSync('git', ['add', 'feature.ts'], { cwd: workDir, stdio: 'pipe', timeout: 5000 });
|
||||
|
||||
const result = await runSkillTest({
|
||||
prompt: `Two steps:
|
||||
|
||||
1. Run /context-save ${magicMarker} — save the current state.
|
||||
2. Run /context-restore — load the most recent saved state and report what it contains.
|
||||
|
||||
Use GSTACK_HOME="${gstackHome}" and the bin scripts at ./bin/. Invoke both skills via the Skill tool. Do NOT use AskUserQuestion.`,
|
||||
workingDirectory: workDir,
|
||||
maxTurns: 20,
|
||||
allowedTools: ['Skill', 'Bash', 'Read', 'Write', 'Edit', 'Grep', 'Glob'],
|
||||
timeout: 180_000,
|
||||
testName: 'context-save-then-restore-roundtrip',
|
||||
runId,
|
||||
});
|
||||
|
||||
logCost('context-save-then-restore-roundtrip', result);
|
||||
|
||||
const invokedSkills = skillCalls(result);
|
||||
const bothRouted = invokedSkills.includes('context-save') && invokedSkills.includes('context-restore');
|
||||
const checkpointDir = path.join(gstackHome, 'projects', slug, 'checkpoints');
|
||||
const files = fs.existsSync(checkpointDir) ? fs.readdirSync(checkpointDir).filter((f) => f.endsWith('.md')) : [];
|
||||
const restoreMentionsTitle = (result.output ?? '').toLowerCase().includes(magicMarker.toLowerCase());
|
||||
const exitOk = ['success', 'error_max_turns'].includes(result.exitReason);
|
||||
|
||||
recordE2E(evalCollector, 'save-then-restore round-trip', 'Context Skills E2E', result, {
|
||||
passed: exitOk && bothRouted && files.length > 0 && restoreMentionsTitle,
|
||||
});
|
||||
|
||||
expect(exitOk).toBe(true);
|
||||
expect(bothRouted).toBe(true);
|
||||
expect(files.length).toBeGreaterThan(0);
|
||||
expect(restoreMentionsTitle).toBe(true);
|
||||
try { fs.rmSync(workDir, { recursive: true, force: true }); } catch {}
|
||||
}, 240_000);
|
||||
|
||||
// ── 3. /context-restore <fragment> loads the matching save ───────────
|
||||
testConcurrentIfSelected('context-restore-fragment-match', async () => {
|
||||
const { workDir, gstackHome, slug } = setupWorkdir('fragment');
|
||||
|
||||
// Seed three saves with distinct titles.
|
||||
seedSave(gstackHome, slug, '20260101-120000-alpha-feature.md',
|
||||
{ status: 'in-progress', branch: 'feat/alpha', timestamp: '2026-01-01T12:00:00Z' },
|
||||
'## Working on: alpha feature\n\n### Summary\nAlpha content FRAGMATCH_ALPHA_BUILD\n');
|
||||
seedSave(gstackHome, slug, '20260202-120000-middle-payments.md',
|
||||
{ status: 'in-progress', branch: 'feat/payments', timestamp: '2026-02-02T12:00:00Z' },
|
||||
'## Working on: middle payments\n\n### Summary\nPayments content FRAGMATCH_PAYMENTS_BUILD\n');
|
||||
seedSave(gstackHome, slug, '20260303-120000-omega-release.md',
|
||||
{ status: 'in-progress', branch: 'feat/omega', timestamp: '2026-03-03T12:00:00Z' },
|
||||
'## Working on: omega release\n\n### Summary\nOmega content FRAGMATCH_OMEGA_BUILD\n');
|
||||
|
||||
const result = await runSkillTest({
|
||||
prompt: `Run /context-restore payments — load the saved context whose title contains "payments". Use GSTACK_HOME="${gstackHome}" and the bin scripts at ./bin/. Invoke via the Skill tool. Report the content of the loaded file. Do NOT use AskUserQuestion.`,
|
||||
workingDirectory: workDir,
|
||||
maxTurns: 10,
|
||||
allowedTools: ['Skill', 'Bash', 'Read', 'Grep', 'Glob'],
|
||||
timeout: 120_000,
|
||||
testName: 'context-restore-fragment-match',
|
||||
runId,
|
||||
});
|
||||
|
||||
logCost('context-restore-fragment-match', result);
|
||||
|
||||
const out = result.output ?? '';
|
||||
const loadedPayments = out.includes('FRAGMATCH_PAYMENTS_BUILD');
|
||||
const didNotLoadOthers = !out.includes('FRAGMATCH_ALPHA_BUILD') && !out.includes('FRAGMATCH_OMEGA_BUILD');
|
||||
const routedToRestore = skillCalls(result).includes('context-restore');
|
||||
const exitOk = ['success', 'error_max_turns'].includes(result.exitReason);
|
||||
|
||||
recordE2E(evalCollector, 'context-restore <fragment> match', 'Context Skills E2E', result, {
|
||||
passed: exitOk && routedToRestore && loadedPayments && didNotLoadOthers,
|
||||
});
|
||||
|
||||
expect(exitOk).toBe(true);
|
||||
expect(routedToRestore).toBe(true);
|
||||
expect(loadedPayments).toBe(true);
|
||||
expect(didNotLoadOthers).toBe(true);
|
||||
try { fs.rmSync(workDir, { recursive: true, force: true }); } catch {}
|
||||
}, 180_000);
|
||||
|
||||
// ── 4. /context-restore with zero saves → graceful empty-state ───────
|
||||
testConcurrentIfSelected('context-restore-empty-state', async () => {
|
||||
const { workDir, gstackHome, slug } = setupWorkdir('empty');
|
||||
// Ensure the storage dir is empty or missing — setupWorkdir doesn't seed.
|
||||
const checkpointDir = path.join(gstackHome, 'projects', slug, 'checkpoints');
|
||||
expect(fs.existsSync(checkpointDir)).toBe(false);
|
||||
|
||||
const result = await runSkillTest({
|
||||
prompt: `Run /context-restore — there are no saved contexts yet. Use GSTACK_HOME="${gstackHome}" and the bin scripts at ./bin/. Invoke via the Skill tool. Do NOT use AskUserQuestion.`,
|
||||
workingDirectory: workDir,
|
||||
maxTurns: 8,
|
||||
allowedTools: ['Skill', 'Bash', 'Read', 'Grep', 'Glob'],
|
||||
timeout: 90_000,
|
||||
testName: 'context-restore-empty-state',
|
||||
runId,
|
||||
});
|
||||
|
||||
logCost('context-restore-empty-state', result);
|
||||
|
||||
const out = result.output ?? '';
|
||||
const gracefulMessage = /no saved context|no contexts? yet|nothing to restore|NO_CHECKPOINTS/i.test(out);
|
||||
const noCrash = !/error|exception|undefined/i.test(out) || gracefulMessage; // mention of "error" in the graceful message is fine
|
||||
const routedToRestore = skillCalls(result).includes('context-restore');
|
||||
const exitOk = ['success', 'error_max_turns'].includes(result.exitReason);
|
||||
|
||||
recordE2E(evalCollector, 'context-restore empty state', 'Context Skills E2E', result, {
|
||||
passed: exitOk && routedToRestore && gracefulMessage && noCrash,
|
||||
});
|
||||
|
||||
expect(exitOk).toBe(true);
|
||||
expect(routedToRestore).toBe(true);
|
||||
expect(gracefulMessage).toBe(true);
|
||||
try { fs.rmSync(workDir, { recursive: true, force: true }); } catch {}
|
||||
}, 150_000);
|
||||
|
||||
// ── 5. /context-restore list redirects to /context-save list ─────────
|
||||
testConcurrentIfSelected('context-restore-list-delegates', async () => {
|
||||
const { workDir, gstackHome, slug } = setupWorkdir('delegates');
|
||||
seedSave(gstackHome, slug, '20260101-120000-seed.md',
|
||||
{ status: 'in-progress', branch: 'main', timestamp: '2026-01-01T12:00:00Z' },
|
||||
'## Working on: seed\n');
|
||||
|
||||
const result = await runSkillTest({
|
||||
prompt: `Run /context-restore list. Use GSTACK_HOME="${gstackHome}" and the bin scripts at ./bin/. Invoke via the Skill tool. Do NOT use AskUserQuestion.`,
|
||||
workingDirectory: workDir,
|
||||
maxTurns: 8,
|
||||
allowedTools: ['Skill', 'Bash', 'Read', 'Grep', 'Glob'],
|
||||
timeout: 90_000,
|
||||
testName: 'context-restore-list-delegates',
|
||||
runId,
|
||||
});
|
||||
|
||||
logCost('context-restore-list-delegates', result);
|
||||
|
||||
const out = result.output ?? '';
|
||||
// The skill should tell the user to use /context-save list instead.
|
||||
const mentionsSaveList = /context-save list/i.test(out);
|
||||
const routedToRestore = skillCalls(result).includes('context-restore');
|
||||
const exitOk = ['success', 'error_max_turns'].includes(result.exitReason);
|
||||
|
||||
recordE2E(evalCollector, 'context-restore list delegates', 'Context Skills E2E', result, {
|
||||
passed: exitOk && routedToRestore && mentionsSaveList,
|
||||
});
|
||||
|
||||
expect(exitOk).toBe(true);
|
||||
expect(routedToRestore).toBe(true);
|
||||
expect(mentionsSaveList).toBe(true);
|
||||
try { fs.rmSync(workDir, { recursive: true, force: true }); } catch {}
|
||||
}, 150_000);
|
||||
|
||||
// ── 6. Legacy compat: pre-rename save files still load ───────────────
|
||||
testConcurrentIfSelected('context-restore-legacy-compat', async () => {
|
||||
const { workDir, gstackHome, slug } = setupWorkdir('legacy');
|
||||
|
||||
// Seed a save file in the pre-rename format (exactly how old /checkpoint
|
||||
// wrote them). The storage dir name is still "checkpoints/" — kept for
|
||||
// exactly this reason.
|
||||
seedSave(gstackHome, slug, '20260301-120000-legacy-pre-rename-work.md',
|
||||
{
|
||||
status: 'in-progress',
|
||||
branch: 'feat/pre-rename',
|
||||
timestamp: '2026-03-01T12:00:00Z',
|
||||
session_duration_s: '3600',
|
||||
},
|
||||
'## Working on: legacy pre-rename work\n\n### Summary\nWork saved by OLD_CHECKPOINT_SKILL_LEGACYCOMPAT before the rename.\n\n### Remaining Work\n1. Item from the before-times.\n');
|
||||
|
||||
const result = await runSkillTest({
|
||||
prompt: `Run /context-restore — load the most recent saved context. Use GSTACK_HOME="${gstackHome}" and the bin scripts at ./bin/. Invoke via the Skill tool. Do NOT use AskUserQuestion.`,
|
||||
workingDirectory: workDir,
|
||||
maxTurns: 8,
|
||||
allowedTools: ['Skill', 'Bash', 'Read', 'Grep', 'Glob'],
|
||||
timeout: 120_000,
|
||||
testName: 'context-restore-legacy-compat',
|
||||
runId,
|
||||
});
|
||||
|
||||
logCost('context-restore-legacy-compat', result);
|
||||
|
||||
const out = result.output ?? '';
|
||||
const loadedLegacy = out.includes('OLD_CHECKPOINT_SKILL_LEGACYCOMPAT');
|
||||
const routedToRestore = skillCalls(result).includes('context-restore');
|
||||
const exitOk = ['success', 'error_max_turns'].includes(result.exitReason);
|
||||
|
||||
recordE2E(evalCollector, 'legacy /checkpoint file loads via /context-restore', 'Context Skills E2E', result, {
|
||||
passed: exitOk && routedToRestore && loadedLegacy,
|
||||
});
|
||||
|
||||
expect(exitOk).toBe(true);
|
||||
expect(routedToRestore).toBe(true);
|
||||
expect(loadedLegacy).toBe(true);
|
||||
try { fs.rmSync(workDir, { recursive: true, force: true }); } catch {}
|
||||
}, 180_000);
|
||||
|
||||
// ── 7. /context-save list: default filters to current branch ─────────
|
||||
testConcurrentIfSelected('context-save-list-current-branch', async () => {
|
||||
const { workDir, gstackHome, slug } = setupWorkdir('list-current');
|
||||
|
||||
// Seed 3 files on 3 different branches. Current branch is "main".
|
||||
seedSave(gstackHome, slug, '20260101-120000-main-work.md',
|
||||
{ status: 'in-progress', branch: 'main', timestamp: '2026-01-01T12:00:00Z' },
|
||||
'## Working on: main work LISTCURR_MAIN_TOKEN\n');
|
||||
seedSave(gstackHome, slug, '20260202-120000-feat-alpha.md',
|
||||
{ status: 'in-progress', branch: 'feat/alpha', timestamp: '2026-02-02T12:00:00Z' },
|
||||
'## Working on: alpha LISTCURR_ALPHA_TOKEN\n');
|
||||
seedSave(gstackHome, slug, '20260303-120000-feat-beta.md',
|
||||
{ status: 'in-progress', branch: 'feat/beta', timestamp: '2026-03-03T12:00:00Z' },
|
||||
'## Working on: beta LISTCURR_BETA_TOKEN\n');
|
||||
|
||||
const result = await runSkillTest({
|
||||
prompt: `Run /context-save list — list the saved contexts for the CURRENT branch only (not --all). The current branch is "main". Use GSTACK_HOME="${gstackHome}" and the bin scripts at ./bin/. Invoke via the Skill tool. Do NOT use AskUserQuestion.`,
|
||||
workingDirectory: workDir,
|
||||
maxTurns: 10,
|
||||
allowedTools: ['Skill', 'Bash', 'Read', 'Grep', 'Glob'],
|
||||
timeout: 120_000,
|
||||
testName: 'context-save-list-current-branch',
|
||||
runId,
|
||||
});
|
||||
|
||||
logCost('context-save-list-current-branch', result);
|
||||
|
||||
const out = result.output ?? '';
|
||||
// Should mention the main-branch save, NOT the feat/alpha or feat/beta saves.
|
||||
const showsMain = /main-work|LISTCURR_MAIN/i.test(out) || /main/i.test(out);
|
||||
const hidesAlpha = !/LISTCURR_ALPHA/i.test(out) && !/feat-alpha/i.test(out) && !/alpha/i.test(out);
|
||||
const hidesBeta = !/LISTCURR_BETA/i.test(out) && !/feat-beta/i.test(out) && !/beta/i.test(out);
|
||||
const routed = skillCalls(result).includes('context-save');
|
||||
const exitOk = ['success', 'error_max_turns'].includes(result.exitReason);
|
||||
|
||||
recordE2E(evalCollector, 'context-save list (current branch default)', 'Context Skills E2E', result, {
|
||||
passed: exitOk && routed && showsMain && hidesAlpha && hidesBeta,
|
||||
});
|
||||
|
||||
expect(exitOk).toBe(true);
|
||||
expect(routed).toBe(true);
|
||||
expect(showsMain).toBe(true);
|
||||
expect(hidesAlpha).toBe(true);
|
||||
expect(hidesBeta).toBe(true);
|
||||
try { fs.rmSync(workDir, { recursive: true, force: true }); } catch {}
|
||||
}, 180_000);
|
||||
|
||||
// ── 8. /context-save list --all: shows every branch ──────────────────
|
||||
testConcurrentIfSelected('context-save-list-all-branches', async () => {
|
||||
const { workDir, gstackHome, slug } = setupWorkdir('list-all');
|
||||
|
||||
seedSave(gstackHome, slug, '20260101-120000-main-work.md',
|
||||
{ status: 'in-progress', branch: 'main', timestamp: '2026-01-01T12:00:00Z' },
|
||||
'## Working on: main LISTALL_MAIN_TOKEN\n');
|
||||
seedSave(gstackHome, slug, '20260202-120000-feat-alpha.md',
|
||||
{ status: 'in-progress', branch: 'feat/alpha', timestamp: '2026-02-02T12:00:00Z' },
|
||||
'## Working on: alpha LISTALL_ALPHA_TOKEN\n');
|
||||
seedSave(gstackHome, slug, '20260303-120000-feat-beta.md',
|
||||
{ status: 'in-progress', branch: 'feat/beta', timestamp: '2026-03-03T12:00:00Z' },
|
||||
'## Working on: beta LISTALL_BETA_TOKEN\n');
|
||||
|
||||
const result = await runSkillTest({
|
||||
prompt: `Run /context-save list --all — list saved contexts from ALL branches, not just the current one. Use GSTACK_HOME="${gstackHome}" and the bin scripts at ./bin/. Invoke via the Skill tool. Report the full list. Do NOT use AskUserQuestion.`,
|
||||
workingDirectory: workDir,
|
||||
maxTurns: 10,
|
||||
allowedTools: ['Skill', 'Bash', 'Read', 'Grep', 'Glob'],
|
||||
timeout: 120_000,
|
||||
testName: 'context-save-list-all-branches',
|
||||
runId,
|
||||
});
|
||||
|
||||
logCost('context-save-list-all-branches', result);
|
||||
|
||||
const out = result.output ?? '';
|
||||
// With --all, output should surface all three branches. Check for branch names.
|
||||
const branchesShown = [
|
||||
/main/i.test(out),
|
||||
/feat[-/]alpha|alpha/i.test(out),
|
||||
/feat[-/]beta|beta/i.test(out),
|
||||
].filter(Boolean).length;
|
||||
const routed = skillCalls(result).includes('context-save');
|
||||
const exitOk = ['success', 'error_max_turns'].includes(result.exitReason);
|
||||
|
||||
recordE2E(evalCollector, 'context-save list --all', 'Context Skills E2E', result, {
|
||||
passed: exitOk && routed && branchesShown === 3,
|
||||
});
|
||||
|
||||
expect(exitOk).toBe(true);
|
||||
expect(routed).toBe(true);
|
||||
expect(branchesShown).toBe(3);
|
||||
try { fs.rmSync(workDir, { recursive: true, force: true }); } catch {}
|
||||
}, 180_000);
|
||||
});
|
||||
Reference in New Issue
Block a user