merge: integrate origin/main (v1.1.3.0) — /checkpoint → /context-save + /context-restore rename

Main shipped v1.1.3.0 fixing Claude Code's native /checkpoint alias shadowing gstack's skill. The old /checkpoint directory is gone, replaced by context-save/ and context-restore/. Storage path (~/.gstack/projects/$SLUG/checkpoints/) is unchanged, so existing saved contexts still load. Conflicts: - VERSION / package.json: kept 1.2.0.0 (above main's 1.1.3.0) - CHANGELOG: preserved 1.2.0.0 at top, inserted 1.1.3.0 below - scripts/resolvers/preamble.ts: same pattern as prior merges — main's side edited the monolithic file inline; I kept the submodule composition root intact (main's inline changes don't apply to this shape) Ported my continuous-checkpoint and context-health submodule prose to reference the new skill names: - generate-continuous-checkpoint.ts: "/checkpoint resume" → "/context-restore" - generate-context-health.ts: "/checkpoint" → "/context-save" Also updated user-facing prose in: - CHANGELOG.md (1.2.0.0 entry): "/checkpoint resume" → "/context-restore (formerly /checkpoint resume pre-v1.1.3)" - README.md Continuous checkpoint section: same rename Storage paths in generate-context-recovery.ts (`$_PROJ/checkpoints/`) left untouched — per main's v1.1.3.0 notes, the storage directory name stays `checkpoints/` to preserve backward-compat with saved files. Touchfiles.ts auto-merged cleanly — main's context-save-writes-file and context-restore-loads-latest replaced my old checkpoint-save-resume entry. Regenerated SKILL.md files. Ship golden fixtures refreshed. 423 tests pass. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-05 21:25:27 +02:00 · 2026-04-19 08:41:41 +08:00
parent 1a2e92278d 12260262ea
commit 2eac7009e9
50 changed files with 3258 additions and 565 deletions
@@ -0,0 +1,349 @@
+/**
+ * Tier-2 hardening tests for context-save + context-restore.
+ *
+ * These exercise the exact bash snippets from the SKILL.md templates,
+ * without spawning claude -p. Free tier, runs in milliseconds.
+ *
+ * Covers the hardening work from commit 3df8ea86:
+ *   - Bash-side title sanitizer (allowlist a-z0-9.-, cap 60, default "untitled")
+ *   - Collision-safe filenames (random suffix on same-second double-save)
+ *   - head -20 cap on the restore-flow directory listing
+ *   - Migration HOME unset guard
+ *   - Empty-set "NO_CHECKPOINTS" fallback
+ */
+
+import { describe, test, expect, beforeEach, afterEach } from 'bun:test';
+import { spawnSync } from 'child_process';
+import * as fs from 'fs';
+import * as path from 'path';
+import * as os from 'os';
+
+const ROOT = path.resolve(import.meta.dir, '..');
+
+// The exact sanitize+collision bash used by context-save/SKILL.md Step 4.
+// Kept in sync with context-save/SKILL.md.tmpl. If the template changes
+// this helper out of alignment, the title-sanitize tests fail — intended.
+const TITLE_BASH = `
+RAW="\${TITLE_RAW:-untitled}"
+TITLE_SLUG=$(printf '%s' "$RAW" | tr '[:upper:]' '[:lower:]' | tr -s ' \\t' '-' | tr -cd 'a-z0-9.-' | cut -c1-60)
+TITLE_SLUG="\${TITLE_SLUG:-untitled}"
+FILE="\${CHECKPOINT_DIR}/\${TIMESTAMP}-\${TITLE_SLUG}.md"
+if [ -e "$FILE" ]; then
+  SUFFIX=$(LC_ALL=C tr -dc 'a-z0-9' < /dev/urandom 2>/dev/null | head -c 4 || printf '%04x' "$$")
+  FILE="\${CHECKPOINT_DIR}/\${TIMESTAMP}-\${TITLE_SLUG}-\${SUFFIX}.md"
+fi
+echo "TITLE_SLUG=$TITLE_SLUG"
+echo "FILE=$FILE"
+`;
+
+// The exact find + sort + head used by context-restore/SKILL.md Step 1.
+const RESTORE_FIND_BASH = `
+if [ ! -d "$CHECKPOINT_DIR" ]; then
+  echo "NO_CHECKPOINTS"
+else
+  FILES=$(find "$CHECKPOINT_DIR" -maxdepth 1 -name "*.md" -type f 2>/dev/null | sort -r | head -20)
+  if [ -z "$FILES" ]; then
+    echo "NO_CHECKPOINTS"
+  else
+    echo "$FILES"
+  fi
+fi
+`;
+
+function runBash(script: string, env: Record<string, string>): { stdout: string; stderr: string; exitCode: number } {
+  const result = spawnSync('bash', ['-c', script], {
+    env: { ...process.env, ...env },
+    stdio: ['ignore', 'pipe', 'pipe'],
+    timeout: 5000,
+  });
+  return {
+    stdout: result.stdout.toString(),
+    stderr: result.stderr.toString(),
+    exitCode: result.status ?? 1,
+  };
+}
+
+function parseKV(stdout: string): Record<string, string> {
+  const out: Record<string, string> = {};
+  for (const line of stdout.split('\n')) {
+    const eq = line.indexOf('=');
+    if (eq > 0) out[line.slice(0, eq)] = line.slice(eq + 1);
+  }
+  return out;
+}
+
+// ─── Title sanitizer ───────────────────────────────────────────────────────
+
+describe('context-save: title sanitizer', () => {
+  let tmp: string;
+  beforeEach(() => { tmp = fs.mkdtempSync(path.join(os.tmpdir(), 'ctx-san-')); });
+  afterEach(() => { try { fs.rmSync(tmp, { recursive: true, force: true }); } catch {} });
+
+  test('shell metachars stripped to allowlist', () => {
+    const kv = parseKV(runBash(TITLE_BASH, {
+      TITLE_RAW: '$(rm -rf /) `whoami` ; echo pwned',
+      CHECKPOINT_DIR: tmp,
+      TIMESTAMP: '20260419-120000',
+    }).stdout);
+    expect(kv.TITLE_SLUG).toMatch(/^[a-z0-9.-]*$/);
+    expect(kv.TITLE_SLUG).not.toContain('$');
+    expect(kv.TITLE_SLUG).not.toContain('(');
+    expect(kv.TITLE_SLUG).not.toContain(';');
+    expect(kv.TITLE_SLUG).not.toContain('`');
+  });
+
+  test('path traversal attempt stripped', () => {
+    const kv = parseKV(runBash(TITLE_BASH, {
+      TITLE_RAW: '../../../etc/passwd',
+      CHECKPOINT_DIR: tmp,
+      TIMESTAMP: '20260419-120000',
+    }).stdout);
+    expect(kv.TITLE_SLUG).not.toContain('/');
+    // Slashes stripped, dots retained — result is contained within the
+    // checkpoint directory (no path escape possible). The exact number of dots
+    // depends on the input; what matters is the file stays inside $CHECKPOINT_DIR.
+    expect(kv.FILE.startsWith(`${tmp}/`)).toBe(true);
+    expect(path.dirname(kv.FILE)).toBe(tmp);
+  });
+
+  test('uppercase lowercased', () => {
+    const kv = parseKV(runBash(TITLE_BASH, {
+      TITLE_RAW: 'Wintermute Progress',
+      CHECKPOINT_DIR: tmp,
+      TIMESTAMP: '20260419-120000',
+    }).stdout);
+    expect(kv.TITLE_SLUG).toBe('wintermute-progress');
+  });
+
+  test('whitespace collapsed to single hyphen', () => {
+    const kv = parseKV(runBash(TITLE_BASH, {
+      TITLE_RAW: 'foo    bar\t\tbaz',
+      CHECKPOINT_DIR: tmp,
+      TIMESTAMP: '20260419-120000',
+    }).stdout);
+    expect(kv.TITLE_SLUG).toBe('foo-bar-baz');
+  });
+
+  test('length capped at 60 chars', () => {
+    const kv = parseKV(runBash(TITLE_BASH, {
+      TITLE_RAW: 'a'.repeat(200),
+      CHECKPOINT_DIR: tmp,
+      TIMESTAMP: '20260419-120000',
+    }).stdout);
+    expect(kv.TITLE_SLUG.length).toBe(60);
+  });
+
+  test('empty title falls back to "untitled"', () => {
+    const kv = parseKV(runBash(TITLE_BASH, {
+      TITLE_RAW: '',
+      CHECKPOINT_DIR: tmp,
+      TIMESTAMP: '20260419-120000',
+    }).stdout);
+    expect(kv.TITLE_SLUG).toBe('untitled');
+  });
+
+  test('only-special-chars title falls back to "untitled"', () => {
+    const kv = parseKV(runBash(TITLE_BASH, {
+      TITLE_RAW: '!@#$%^&*()+=<>?',
+      CHECKPOINT_DIR: tmp,
+      TIMESTAMP: '20260419-120000',
+    }).stdout);
+    expect(kv.TITLE_SLUG).toBe('untitled');
+  });
+
+  test('unicode stripped to ASCII allowlist', () => {
+    const kv = parseKV(runBash(TITLE_BASH, {
+      TITLE_RAW: '日本語 emoji 🚀 test',
+      CHECKPOINT_DIR: tmp,
+      TIMESTAMP: '20260419-120000',
+    }).stdout);
+    expect(kv.TITLE_SLUG).toMatch(/^[a-z0-9.-]*$/);
+    // Must contain the ASCII words that survived
+    expect(kv.TITLE_SLUG).toContain('emoji');
+    expect(kv.TITLE_SLUG).toContain('test');
+  });
+
+  test('numbers + dots + hyphens preserved', () => {
+    const kv = parseKV(runBash(TITLE_BASH, {
+      TITLE_RAW: 'v1.0.1-release-notes',
+      CHECKPOINT_DIR: tmp,
+      TIMESTAMP: '20260419-120000',
+    }).stdout);
+    expect(kv.TITLE_SLUG).toBe('v1.0.1-release-notes');
+  });
+});
+
+// ─── Filename collision handling ───────────────────────────────────────────
+
+describe('context-save: filename collision', () => {
+  let tmp: string;
+  beforeEach(() => { tmp = fs.mkdtempSync(path.join(os.tmpdir(), 'ctx-col-')); });
+  afterEach(() => { try { fs.rmSync(tmp, { recursive: true, force: true }); } catch {} });
+
+  test('first save with title uses predictable path', () => {
+    const kv = parseKV(runBash(TITLE_BASH, {
+      TITLE_RAW: 'foo',
+      CHECKPOINT_DIR: tmp,
+      TIMESTAMP: '20260419-120000',
+    }).stdout);
+    expect(kv.FILE).toBe(`${tmp}/20260419-120000-foo.md`);
+  });
+
+  test('second save same-second same-title gets random suffix', () => {
+    // Pre-seed: file already exists at the predictable path.
+    fs.writeFileSync(`${tmp}/20260419-120000-foo.md`, 'prior save');
+    const kv = parseKV(runBash(TITLE_BASH, {
+      TITLE_RAW: 'foo',
+      CHECKPOINT_DIR: tmp,
+      TIMESTAMP: '20260419-120000',
+    }).stdout);
+    // Path must differ (append-only contract).
+    expect(kv.FILE).not.toBe(`${tmp}/20260419-120000-foo.md`);
+    // Suffix format: base-XXXX.md where XXXX matches the suffix allowlist.
+    expect(kv.FILE).toMatch(new RegExp(`^${tmp.replace(/[/.]/g, '\\$&')}/20260419-120000-foo-[a-z0-9]+\\.md$`));
+  });
+
+  test('collision suffix preserves append-only — prior file intact', () => {
+    const priorPath = `${tmp}/20260419-120000-foo.md`;
+    fs.writeFileSync(priorPath, 'critical prior save');
+    const kv = parseKV(runBash(TITLE_BASH, {
+      TITLE_RAW: 'foo',
+      CHECKPOINT_DIR: tmp,
+      TIMESTAMP: '20260419-120000',
+    }).stdout);
+    // Write a new file at the collision-safe path.
+    fs.writeFileSync(kv.FILE, 'new save');
+    // Prior file must still exist and be untouched.
+    expect(fs.readFileSync(priorPath, 'utf-8')).toBe('critical prior save');
+    expect(fs.readFileSync(kv.FILE, 'utf-8')).toBe('new save');
+    // Directory should have exactly 2 files.
+    expect(fs.readdirSync(tmp).length).toBe(2);
+  });
+
+  test('different titles same second — no collision, no suffix', () => {
+    fs.writeFileSync(`${tmp}/20260419-120000-foo.md`, 'first save');
+    const kv = parseKV(runBash(TITLE_BASH, {
+      TITLE_RAW: 'bar',
+      CHECKPOINT_DIR: tmp,
+      TIMESTAMP: '20260419-120000',
+    }).stdout);
+    // Different title → predictable path, no suffix.
+    expect(kv.FILE).toBe(`${tmp}/20260419-120000-bar.md`);
+  });
+});
+
+// ─── Restore flow: head-20 cap + empty-set ─────────────────────────────────
+
+describe('context-restore: find + sort + head cap', () => {
+  let tmp: string;
+  beforeEach(() => { tmp = fs.mkdtempSync(path.join(os.tmpdir(), 'ctx-rest-')); });
+  afterEach(() => { try { fs.rmSync(tmp, { recursive: true, force: true }); } catch {} });
+
+  test('missing directory → NO_CHECKPOINTS', () => {
+    const out = runBash(RESTORE_FIND_BASH, {
+      CHECKPOINT_DIR: `${tmp}/nonexistent`,
+    }).stdout;
+    expect(out.trim()).toBe('NO_CHECKPOINTS');
+  });
+
+  test('empty directory → NO_CHECKPOINTS', () => {
+    const out = runBash(RESTORE_FIND_BASH, {
+      CHECKPOINT_DIR: tmp,
+    }).stdout;
+    expect(out.trim()).toBe('NO_CHECKPOINTS');
+  });
+
+  test('directory with non-.md files → NO_CHECKPOINTS', () => {
+    fs.writeFileSync(`${tmp}/not-a-save.txt`, 'noise');
+    fs.writeFileSync(`${tmp}/.DS_Store`, 'macos');
+    const out = runBash(RESTORE_FIND_BASH, {
+      CHECKPOINT_DIR: tmp,
+    }).stdout;
+    expect(out.trim()).toBe('NO_CHECKPOINTS');
+  });
+
+  test('50 .md files → only 20 returned, newest first by filename', () => {
+    // Seed 50 files with monotonically increasing timestamps.
+    for (let i = 0; i < 50; i++) {
+      const ts = `20260419-${String(120000 + i).padStart(6, '0')}`;
+      fs.writeFileSync(`${tmp}/${ts}-file${i}.md`, `content ${i}`);
+    }
+    const out = runBash(RESTORE_FIND_BASH, {
+      CHECKPOINT_DIR: tmp,
+    }).stdout;
+    const lines = out.trim().split('\n').filter(Boolean);
+    expect(lines.length).toBe(20);
+    // sort -r → newest first by filename. Highest timestamps (files 30-49).
+    expect(lines[0]).toContain('file49');
+    expect(lines[19]).toContain('file30');
+  });
+
+  test('sort is by filename prefix, NOT mtime', () => {
+    // Older filename, newer mtime. Sort -r must still put newer filename first.
+    const olderByFilename = `${tmp}/20260101-120000-old.md`;
+    const newerByFilename = `${tmp}/20260419-120000-new.md`;
+    fs.writeFileSync(olderByFilename, 'old content');
+    fs.writeFileSync(newerByFilename, 'new content');
+    // Scramble mtimes: older filename gets newer mtime.
+    const now = Math.floor(Date.now() / 1000);
+    fs.utimesSync(olderByFilename, now, now);
+    fs.utimesSync(newerByFilename, now - 86400 * 30, now - 86400 * 30);
+
+    const out = runBash(RESTORE_FIND_BASH, {
+      CHECKPOINT_DIR: tmp,
+    }).stdout;
+    const lines = out.trim().split('\n').filter(Boolean);
+    expect(lines[0]).toBe(newerByFilename);
+    expect(lines[1]).toBe(olderByFilename);
+  });
+
+  test('no listing-cwd fallback when empty (macOS xargs ls gotcha)', () => {
+    // On macOS, `find ... | xargs ls -1t` with zero results falls back to
+    // listing the current working directory. Our find|sort|head pattern must
+    // NOT have that behavior. Running from a dir with many .md files.
+    const out = runBash(RESTORE_FIND_BASH, {
+      CHECKPOINT_DIR: tmp,
+      // Intentionally: working directory is the gstack repo which has many .md files.
+    }).stdout;
+    expect(out.trim()).toBe('NO_CHECKPOINTS');
+    // Must NOT contain any .md filename from cwd.
+    expect(out).not.toContain('SKILL.md');
+    expect(out).not.toContain('README.md');
+  });
+});
+
+// ─── Migration HOME guard ──────────────────────────────────────────────────
+
+describe('migration v1.1.3.0: HOME guard', () => {
+  let tmp: string;
+  const MIGRATION = path.join(ROOT, 'gstack-upgrade', 'migrations', 'v1.1.3.0.sh');
+
+  beforeEach(() => { tmp = fs.mkdtempSync(path.join(os.tmpdir(), 'ctx-home-')); });
+  afterEach(() => { try { fs.rmSync(tmp, { recursive: true, force: true }); } catch {} });
+
+  test('HOME unset → exits 0 with diagnostic, no filesystem changes', () => {
+    // Create a file that would be wiped by an HOME="" bug: /.claude/skills/gstack/checkpoint
+    // (not actually writable by the test, but we verify the script doesn't TRY).
+    // Spawn without HOME in env.
+    const env = { PATH: process.env.PATH || '/usr/bin:/bin' } as Record<string, string>;
+    const result = spawnSync('bash', [MIGRATION], {
+      env,
+      stdio: ['ignore', 'pipe', 'pipe'],
+      timeout: 5000,
+    });
+    expect(result.status).toBe(0);
+    expect(result.stderr.toString()).toContain('HOME is unset');
+  });
+
+  test('HOME="" → exits 0 with diagnostic', () => {
+    const result = spawnSync('bash', [MIGRATION], {
+      env: { HOME: '', PATH: process.env.PATH || '/usr/bin:/bin' },
+      stdio: ['ignore', 'pipe', 'pipe'],
+      timeout: 5000,
+    });
+    expect(result.status).toBe(0);
+    expect(result.stderr.toString()).toContain('HOME is unset or empty');
+    // Critical: no stdout (no "Removed stale" messages — nothing touched).
+    expect(result.stdout.toString().trim()).toBe('');
+  });
+});
@@ -611,7 +611,7 @@ Skill: </skill-name-if-running>
 - Background discipline — do NOT announce each commit to the user. They can see
  `git log` whenever they want.

-**When `/checkpoint resume` runs,** it parses `[gstack-context]` blocks from WIP
+**When `/context-restore` runs,** it parses `[gstack-context]` blocks from WIP
 commits on the current branch to reconstruct session state. When `/ship` runs, it
 filter-squashes WIP commits only (preserving non-WIP commits) via
 `git rebase --autosquash` so the PR contains clean bisectable commits.
@@ -629,7 +629,7 @@ During long-running skill sessions, periodically write a brief `[PROGRESS]` summ

 If you notice you're going in circles — repeating the same diagnostic, re-reading the
 same file, or trying variants of a failed fix — STOP and reassess. Consider escalating
-or calling /checkpoint to save progress and start fresh.
+or calling /context-save to save progress and start fresh.

 This is a soft nudge, not a measurable feature. No thresholds, no enforcement. The
 goal is self-awareness during long sessions. If the session stays short, skip it.
@@ -600,7 +600,7 @@ Skill: </skill-name-if-running>
 - Background discipline — do NOT announce each commit to the user. They can see
  `git log` whenever they want.

-**When `/checkpoint resume` runs,** it parses `[gstack-context]` blocks from WIP
+**When `/context-restore` runs,** it parses `[gstack-context]` blocks from WIP
 commits on the current branch to reconstruct session state. When `/ship` runs, it
 filter-squashes WIP commits only (preserving non-WIP commits) via
 `git rebase --autosquash` so the PR contains clean bisectable commits.
@@ -618,7 +618,7 @@ During long-running skill sessions, periodically write a brief `[PROGRESS]` summ

 If you notice you're going in circles — repeating the same diagnostic, re-reading the
 same file, or trying variants of a failed fix — STOP and reassess. Consider escalating
-or calling /checkpoint to save progress and start fresh.
+or calling /context-save to save progress and start fresh.

 This is a soft nudge, not a measurable feature. No thresholds, no enforcement. The
 goal is self-awareness during long sessions. If the session stays short, skip it.
@@ -602,7 +602,7 @@ Skill: </skill-name-if-running>
 - Background discipline — do NOT announce each commit to the user. They can see
  `git log` whenever they want.

-**When `/checkpoint resume` runs,** it parses `[gstack-context]` blocks from WIP
+**When `/context-restore` runs,** it parses `[gstack-context]` blocks from WIP
 commits on the current branch to reconstruct session state. When `/ship` runs, it
 filter-squashes WIP commits only (preserving non-WIP commits) via
 `git rebase --autosquash` so the PR contains clean bisectable commits.
@@ -620,7 +620,7 @@ During long-running skill sessions, periodically write a brief `[PROGRESS]` summ

 If you notice you're going in circles — repeating the same diagnostic, re-reading the
 same file, or trying variants of a failed fix — STOP and reassess. Consider escalating
-or calling /checkpoint to save progress and start fresh.
+or calling /context-save to save progress and start fresh.

 This is a soft nudge, not a measurable feature. No thresholds, no enforcement. The
 goal is self-awareness during long sessions. If the session stays short, skip it.
@@ -126,6 +126,10 @@ export async function runSkillTest(options: {
  runId?: string;
  /** Model to use. Defaults to claude-sonnet-4-6 (overridable via EVALS_MODEL env). */
  model?: string;
+  /** Extra env vars merged into the spawned claude -p process. Useful for
+   *  per-test GSTACK_HOME overrides so the test doesn't have to spell out
+   *  env setup in the prompt itself. */
+  env?: Record<string, string>;
 }): Promise<SkillTestResult> {
  const {
    prompt,
@@ -135,6 +139,7 @@ export async function runSkillTest(options: {
    timeout = 120_000,
    testName,
    runId,
+    env: extraEnv,
  } = options;
  const model = options.model ?? process.env.EVALS_MODEL ?? 'claude-sonnet-4-6';

@@ -171,6 +176,7 @@ export async function runSkillTest(options: {

  const proc = Bun.spawn(['sh', '-c', `cat "${promptFile}" | claude ${args.map(a => `"${a}"`).join(' ')}`], {
    cwd: workingDirectory,
+    env: extraEnv ? { ...process.env, ...extraEnv } : undefined,
    stdout: 'pipe',
    stderr: 'pipe',
  });
@@ -113,10 +113,24 @@ export const E2E_TOUCHFILES: Record<string, string[]> = {
  // Learnings
  'learnings-show': ['learn/**', 'bin/gstack-learnings-search', 'bin/gstack-learnings-log', 'scripts/resolvers/learnings.ts'],

-  // Session Intelligence (timeline, context recovery, checkpoint)
-  'timeline-event-flow':         ['bin/gstack-timeline-log', 'bin/gstack-timeline-read'],
-  'context-recovery-artifacts':  ['scripts/resolvers/preamble.ts', 'bin/gstack-timeline-log', 'bin/gstack-slug', 'learn/**'],
-  'checkpoint-save-resume':      ['checkpoint/**', 'bin/gstack-slug'],
+  // Session Intelligence (timeline, context recovery, /context-save + /context-restore)
+  'timeline-event-flow':            ['bin/gstack-timeline-log', 'bin/gstack-timeline-read'],
+  'context-recovery-artifacts':     ['scripts/resolvers/preamble.ts', 'bin/gstack-timeline-log', 'bin/gstack-slug', 'learn/**'],
+  'context-save-writes-file':       ['context-save/**', 'bin/gstack-slug'],
+  'context-restore-loads-latest':   ['context-restore/**', 'bin/gstack-slug'],
+
+  // Context skills E2E (live-fire, Skill-tool routing path) — see
+  // test/skill-e2e-context-skills.test.ts. These are periodic-tier because
+  // each one spawns claude -p and costs ~$0.20-$0.40. Collectively they
+  // verify the thing the /checkpoint → /context-save rename was for.
+  'context-save-routing':                  ['context-save/**', 'scripts/resolvers/preamble.ts'],
+  'context-save-then-restore-roundtrip':   ['context-save/**', 'context-restore/**', 'bin/gstack-slug'],
+  'context-restore-fragment-match':        ['context-restore/**'],
+  'context-restore-empty-state':           ['context-restore/**'],
+  'context-restore-list-delegates':        ['context-restore/**'],
+  'context-restore-legacy-compat':         ['context-restore/**'],
+  'context-save-list-current-branch':      ['context-save/**'],
+  'context-save-list-all-branches':        ['context-save/**'],

  // Document-release
  'document-release': ['document-release/**'],
@@ -262,9 +276,20 @@ export const E2E_TIERS: Record<string, 'gate' | 'periodic'> = {
  'codex-offered-eng-review': 'gate',

  // Session Intelligence — gate for data flow, periodic for agent integration
-  'timeline-event-flow': 'gate',            // Binary data flow (no LLM needed)
-  'context-recovery-artifacts': 'gate',     // Preamble reads seeded artifacts
-  'checkpoint-save-resume': 'gate',         // Checkpoint round-trip
+  'timeline-event-flow': 'gate',                   // Binary data flow (no LLM needed)
+  'context-recovery-artifacts': 'gate',            // Preamble reads seeded artifacts
+  'context-save-writes-file': 'gate',              // /context-save writes a file
+  'context-restore-loads-latest': 'gate',          // Cross-branch newest-by-filename restore
+
+  // Context skills live-fire — periodic (each test spawns claude -p, ~$0.20-$0.40)
+  'context-save-routing': 'periodic',              // Proves /context-save routes via Skill tool
+  'context-save-then-restore-roundtrip': 'periodic', // Full cycle in one session
+  'context-restore-fragment-match': 'periodic',    // /context-restore <fragment>
+  'context-restore-empty-state': 'periodic',       // Graceful zero-saves message
+  'context-restore-list-delegates': 'periodic',    // /context-restore list redirect
+  'context-restore-legacy-compat': 'periodic',     // Pre-rename files still load
+  'context-save-list-current-branch': 'periodic',  // Default branch filter
+  'context-save-list-all-branches': 'periodic',    // --all flag

  // Ship — gate (end-to-end ship path)
  'ship-base-branch': 'gate',
@@ -0,0 +1,147 @@
+import { describe, test, expect, beforeEach, afterEach } from 'bun:test';
+import { spawnSync } from 'child_process';
+import * as fs from 'fs';
+import * as path from 'path';
+import * as os from 'os';
+
+const ROOT = path.resolve(import.meta.dir, '..');
+const MIGRATION = path.join(ROOT, 'gstack-upgrade', 'migrations', 'v1.1.3.0.sh');
+
+function runMigration(tmpHome: string): { exitCode: number; stdout: string; stderr: string } {
+  const result = spawnSync('bash', [MIGRATION], {
+    env: { ...process.env, HOME: tmpHome },
+    stdio: ['ignore', 'pipe', 'pipe'],
+    timeout: 10_000,
+  });
+  return {
+    exitCode: result.status ?? 1,
+    stdout: result.stdout.toString(),
+    stderr: result.stderr.toString(),
+  };
+}
+
+function setupFakeGstackRoot(tmpHome: string): string {
+  // A real target that the gstack symlink can resolve into.
+  const gstackDir = path.join(tmpHome, '.claude', 'skills', 'gstack');
+  fs.mkdirSync(path.join(gstackDir, 'checkpoint'), { recursive: true });
+  fs.writeFileSync(path.join(gstackDir, 'checkpoint', 'SKILL.md'), '# fake gstack checkpoint\n');
+  return gstackDir;
+}
+
+describe('migration v1.1.3.0 — checkpoint ownership guard', () => {
+  let tmpHome: string;
+
+  beforeEach(() => {
+    tmpHome = fs.mkdtempSync(path.join(os.tmpdir(), 'gstack-migration-ownership-'));
+  });
+
+  afterEach(() => {
+    try { fs.rmSync(tmpHome, { recursive: true, force: true }); } catch {}
+  });
+
+  test('scenario A: directory symlink into gstack → removed', () => {
+    setupFakeGstackRoot(tmpHome);
+    const skillsDir = path.join(tmpHome, '.claude', 'skills');
+    const gstackCheckpoint = path.join(skillsDir, 'gstack', 'checkpoint');
+    const topLevel = path.join(skillsDir, 'checkpoint');
+    fs.symlinkSync(gstackCheckpoint, topLevel);
+
+    const result = runMigration(tmpHome);
+    expect(result.exitCode).toBe(0);
+    expect(fs.existsSync(topLevel)).toBe(false);
+    // Also removes the gstack-owned inner copy (Shape 2 cleanup).
+    expect(fs.existsSync(gstackCheckpoint)).toBe(false);
+    expect(result.stdout).toContain('Removed stale /checkpoint symlink');
+  });
+
+  test('scenario B: directory with SKILL.md symlinked into gstack → removed', () => {
+    setupFakeGstackRoot(tmpHome);
+    const skillsDir = path.join(tmpHome, '.claude', 'skills');
+    const gstackSKILL = path.join(skillsDir, 'gstack', 'checkpoint', 'SKILL.md');
+    const topLevel = path.join(skillsDir, 'checkpoint');
+    fs.mkdirSync(topLevel, { recursive: true });
+    fs.symlinkSync(gstackSKILL, path.join(topLevel, 'SKILL.md'));
+
+    const result = runMigration(tmpHome);
+    expect(result.exitCode).toBe(0);
+    expect(fs.existsSync(topLevel)).toBe(false);
+    expect(result.stdout).toContain('Removed stale /checkpoint install directory');
+  });
+
+  test('scenario C: user-owned regular directory with custom content → preserved', () => {
+    setupFakeGstackRoot(tmpHome);
+    const skillsDir = path.join(tmpHome, '.claude', 'skills');
+    const topLevel = path.join(skillsDir, 'checkpoint');
+    fs.mkdirSync(topLevel, { recursive: true });
+    // User's own custom skill: regular file, not a symlink.
+    fs.writeFileSync(path.join(topLevel, 'SKILL.md'), '# my custom /checkpoint\n');
+    fs.writeFileSync(path.join(topLevel, 'extra.txt'), 'user content\n');
+
+    const result = runMigration(tmpHome);
+    expect(result.exitCode).toBe(0);
+    expect(fs.existsSync(topLevel)).toBe(true);
+    expect(fs.existsSync(path.join(topLevel, 'SKILL.md'))).toBe(true);
+    expect(fs.existsSync(path.join(topLevel, 'extra.txt'))).toBe(true);
+    expect(result.stdout).toContain('Leaving');
+    expect(result.stdout).toContain('not a gstack-owned install');
+  });
+
+  test('scenario D: symlink pointing outside gstack → preserved', () => {
+    setupFakeGstackRoot(tmpHome);
+    const skillsDir = path.join(tmpHome, '.claude', 'skills');
+    const topLevel = path.join(skillsDir, 'checkpoint');
+    // User's own skill elsewhere on the filesystem.
+    const userSkillDir = path.join(tmpHome, 'my-own-skill');
+    fs.mkdirSync(userSkillDir, { recursive: true });
+    fs.writeFileSync(path.join(userSkillDir, 'SKILL.md'), '# my custom /checkpoint\n');
+    fs.symlinkSync(userSkillDir, topLevel);
+
+    const result = runMigration(tmpHome);
+    expect(result.exitCode).toBe(0);
+    expect(fs.existsSync(topLevel)).toBe(true);
+    // The user's underlying dir is untouched.
+    expect(fs.existsSync(path.join(userSkillDir, 'SKILL.md'))).toBe(true);
+    expect(result.stdout).toContain('Leaving');
+    expect(result.stdout).toContain('outside gstack');
+  });
+
+  test('scenario E: nothing to do → no-op exit 0 (idempotent)', () => {
+    // No checkpoint install at all. First run: nothing removed.
+    setupFakeGstackRoot(tmpHome);
+    // Delete the inner gstack/checkpoint to simulate post-upgrade state.
+    fs.rmSync(path.join(tmpHome, '.claude', 'skills', 'gstack', 'checkpoint'), { recursive: true, force: true });
+
+    const result1 = runMigration(tmpHome);
+    expect(result1.exitCode).toBe(0);
+
+    // Second run: still exit 0, still no-op.
+    const result2 = runMigration(tmpHome);
+    expect(result2.exitCode).toBe(0);
+  });
+
+  test('scenario F: gstack not installed → no-op exit 0', () => {
+    // No ~/.claude/skills/gstack/ at all. Also no checkpoint install.
+    fs.mkdirSync(path.join(tmpHome, '.claude', 'skills'), { recursive: true });
+
+    const result = runMigration(tmpHome);
+    expect(result.exitCode).toBe(0);
+  });
+
+  test('scenario G: SKILL.md is a symlink pointing outside gstack → preserved', () => {
+    setupFakeGstackRoot(tmpHome);
+    const skillsDir = path.join(tmpHome, '.claude', 'skills');
+    const topLevel = path.join(skillsDir, 'checkpoint');
+    fs.mkdirSync(topLevel, { recursive: true });
+    // A directory containing SKILL.md that's a symlink pointing outside gstack.
+    const externalSkill = path.join(tmpHome, 'external', 'SKILL.md');
+    fs.mkdirSync(path.dirname(externalSkill), { recursive: true });
+    fs.writeFileSync(externalSkill, '# external skill\n');
+    fs.symlinkSync(externalSkill, path.join(topLevel, 'SKILL.md'));
+
+    const result = runMigration(tmpHome);
+    expect(result.exitCode).toBe(0);
+    expect(fs.existsSync(topLevel)).toBe(true);
+    expect(fs.existsSync(path.join(topLevel, 'SKILL.md'))).toBe(true);
+    expect(result.stdout).toContain('Leaving');
+  });
+});
@@ -0,0 +1,228 @@
+/**
+ * Collision Sentinel — insurance policy against upstream slash-command collisions.
+ *
+ * History: in April 2026 Claude Code shipped /checkpoint as a native alias
+ * for /rewind, silently shadowing the gstack /checkpoint skill. Users
+ * typed /checkpoint expecting to save state; agents routed to the built-in
+ * or confabulated "this is a built-in you need to type directly" and nothing
+ * was saved. We found out from users, not from tests.
+ *
+ * This file is the "never again" test. It enumerates every gstack skill name
+ * from every SKILL.md.tmpl file in the repo and cross-checks against a
+ * per-host list of known built-in slash commands. If any gstack skill name
+ * collides with a host built-in, this test fails and names the collision.
+ *
+ * Maintenance: when Claude Code (or any other host we support) ships a new
+ * built-in slash command, add the name to the host's KNOWN_BUILTINS list
+ * below. If a gstack skill needs to coexist with a built-in anyway (e.g.,
+ * we decide the semantic overlap is acceptable), add it to
+ * KNOWN_COLLISIONS_TOLERATED with a written justification.
+ *
+ * Free tier. ~50ms runtime.
+ */
+
+import { describe, test, expect } from 'bun:test';
+import * as fs from 'fs';
+import * as path from 'path';
+
+const ROOT = path.resolve(import.meta.dir, '..');
+
+// ─── Host built-in registries ──────────────────────────────────────────────
+//
+// One const per host we support. Names are the slash-command identifier WITHOUT
+// the leading slash. Keep sorted alphabetically within each host so diffs are
+// reviewable. Cite the source (docs URL, release notes, or "observed") in the
+// comment next to each entry — future maintainers need to know why an entry
+// is on the list.
+
+const KNOWN_BUILTINS: Record<string, string[]> = {
+  'claude-code': [
+    // Slash commands observed in 'claude --help' or cited in docs as of 2026-04.
+    // Sources:
+    //   https://code.claude.com/docs/en/checkpointing
+    //   https://claudelog.com/mechanics/rewind/
+    //   claude --help output
+    //   Claude Code skill list dumps from live sessions
+    'agents',         // Agent config
+    'bare',           // Minimal mode
+    'checkpoint',     // Alias of /rewind (the collision that started this file)
+    'clear',          // Clear the conversation
+    'compact',        // Context compaction
+    'config',         // Config UI
+    'context',        // Context usage display
+    'continue',       // --continue / resume last conversation
+    'cost',           // Cost display
+    'exit',           // Exit shell
+    'help',           // Help
+    'init',           // Initialize a new CLAUDE.md file
+    'mcp',            // MCP server config
+    'model',          // Model selection
+    'permissions',    // Permission config
+    'plan',           // Plan mode toggle (also Shift+Tab)
+    'quit',           // Quit
+    'review',         // Review a pull request (BUILT-IN shipped in 2026)
+    'rewind',         // Conversation rewind
+    'security-review', // Security audit of pending changes
+    'stats',          // Session stats
+    'usage',          // API usage stats
+  ],
+  // Add codex/kiro/opencode/slate/cursor/openclaw/hermes/factory/gbrain
+  // built-in lists when we encounter collisions. Claude Code is the primary
+  // shadow risk because it's the biggest audience and ships the most
+  // frequently; other hosts collide less often.
+  // TODO: codex CLI built-ins (login, logout, exec, review, etc. — but we
+  // invoke codex from gstack, we don't install skills INTO codex the same
+  // way, so this is lower priority).
+};
+
+// Collisions we know about and have consciously decided to tolerate. The
+// justification is mandatory — reviewers need the context next time the
+// user reports confusion, and blind additions to this map should fail code
+// review.
+const KNOWN_COLLISIONS_TOLERATED: Record<string, string> = {
+  // skill name → one-line justification + action plan
+  'review': 'gstack /review (pre-landing diff analysis) pre-dates the Claude Code built-in /review (Review a pull request). The gstack skill is much richer (SQL safety, LLM trust boundary, specialist dispatch). Watch for user confusion reports and consider renaming to /diff-review or /pre-land if the collision bites. TODO: track user-reported incidents in TODOS.md.',
+};
+
+// Generic-verb watchlist: skill names that are single common verbs, which
+// are at higher risk of being claimed by a future host built-in. Advisory
+// only — the test prints a warning but doesn't fail. If a name here stops
+// being safe, move it to the appropriate host's KNOWN_BUILTINS list.
+const GENERIC_VERB_WATCHLIST = [
+  'save', 'load', 'run', 'test', 'build', 'deploy',
+  'fork', 'branch', 'commit', 'push', 'pull', 'merge', 'rebase',
+  'start', 'stop', 'restart', 'reset', 'pause', 'resume',
+  'show', 'list', 'find', 'search', 'view',
+  'create', 'delete', 'remove', 'update', 'rename',
+  'login', 'logout', 'auth',
+];
+
+// ─── Enumerator ────────────────────────────────────────────────────────────
+
+interface GstackSkill {
+  name: string;
+  templatePath: string;
+}
+
+function enumerateGstackSkills(): GstackSkill[] {
+  const skills: GstackSkill[] = [];
+  // Scan one level deep for */SKILL.md.tmpl plus root SKILL.md.tmpl.
+  const candidates = [
+    path.join(ROOT, 'SKILL.md.tmpl'),
+    ...fs.readdirSync(ROOT, { withFileTypes: true })
+      .filter((d) => d.isDirectory())
+      .map((d) => path.join(ROOT, d.name, 'SKILL.md.tmpl')),
+  ];
+  for (const tmpl of candidates) {
+    if (!fs.existsSync(tmpl)) continue;
+    const content = fs.readFileSync(tmpl, 'utf-8');
+    // Parse the 'name:' field from YAML frontmatter.
+    const frontmatter = content.match(/^---\n([\s\S]+?)\n---/);
+    if (!frontmatter) continue;
+    const nameMatch = frontmatter[1].match(/^name:\s*(\S+)/m);
+    if (!nameMatch) continue;
+    skills.push({ name: nameMatch[1].trim(), templatePath: tmpl });
+  }
+  return skills;
+}
+
+// ─── Tests ─────────────────────────────────────────────────────────────────
+
+describe('skill-collision-sentinel', () => {
+  const skills = enumerateGstackSkills();
+
+  test('at least one skill is discovered (sanity)', () => {
+    // If this fails, the enumerator broke, not the collision check.
+    expect(skills.length).toBeGreaterThan(10);
+  });
+
+  test('no duplicate skill names within gstack', () => {
+    const seen = new Map<string, string>();
+    const dupes: string[] = [];
+    for (const { name, templatePath } of skills) {
+      if (seen.has(name)) {
+        dupes.push(`${name} appears in both ${seen.get(name)} and ${templatePath}`);
+      } else {
+        seen.set(name, templatePath);
+      }
+    }
+    if (dupes.length > 0) {
+      throw new Error(`Duplicate skill names:\n  ${dupes.join('\n  ')}`);
+    }
+  });
+
+  // Hard check: no gstack skill name collides with a known host built-in
+  // unless the collision is explicitly tolerated. This is the test that
+  // would have caught the /checkpoint bug in April 2026.
+  for (const [host, builtins] of Object.entries(KNOWN_BUILTINS)) {
+    test(`no skill name collides with a ${host} built-in (or has written justification)`, () => {
+      const builtinSet = new Set(builtins);
+      const collisions: Array<{ skill: string; builtin: string }> = [];
+      for (const { name } of skills) {
+        if (builtinSet.has(name) && !(name in KNOWN_COLLISIONS_TOLERATED)) {
+          collisions.push({ skill: name, builtin: name });
+        }
+      }
+      if (collisions.length > 0) {
+        const msg = collisions.map(c =>
+          `  /${c.skill} collides with ${host} built-in /${c.builtin}.\n` +
+          `    Fix: rename the gstack skill (precedent: /checkpoint → /context-save+/context-restore),\n` +
+          `    OR add an entry to KNOWN_COLLISIONS_TOLERATED with a written justification.`
+        ).join('\n\n');
+        throw new Error(`Found ${collisions.length} unresolved collision(s) with ${host} built-ins:\n\n${msg}`);
+      }
+    });
+  }
+
+  // Every KNOWN_COLLISIONS_TOLERATED entry must correspond to a real skill
+  // AND a real built-in. Prevents the exception list from rotting with
+  // stale entries after a rename.
+  test('KNOWN_COLLISIONS_TOLERATED entries are all still active collisions', () => {
+    const skillNames = new Set(skills.map(s => s.name));
+    const allBuiltins = new Set<string>();
+    for (const list of Object.values(KNOWN_BUILTINS)) {
+      for (const name of list) allBuiltins.add(name);
+    }
+    const stale: string[] = [];
+    for (const name of Object.keys(KNOWN_COLLISIONS_TOLERATED)) {
+      if (!skillNames.has(name)) {
+        stale.push(`  "${name}" is in KNOWN_COLLISIONS_TOLERATED but no gstack skill has that name — remove the exception`);
+      } else if (!allBuiltins.has(name)) {
+        stale.push(`  "${name}" is in KNOWN_COLLISIONS_TOLERATED but no host's KNOWN_BUILTINS lists it — remove the exception`);
+      }
+    }
+    if (stale.length > 0) {
+      throw new Error(`Stale tolerance entries:\n${stale.join('\n')}`);
+    }
+  });
+
+  // Self-check: the /checkpoint rename actually landed. If someone reverts
+  // the rename by accident, this catches it.
+  test('the /checkpoint collision that started this file is actually resolved', () => {
+    const names = new Set(skills.map(s => s.name));
+    expect(names.has('checkpoint')).toBe(false);
+    // And the replacements exist.
+    expect(names.has('context-save')).toBe(true);
+    expect(names.has('context-restore')).toBe(true);
+  });
+
+  // Advisory: print a warning for any skill whose name is a generic verb.
+  // Doesn't fail — just informs reviewers.
+  test('advisory: generic-verb watchlist (informational)', () => {
+    const watchlist = new Set(GENERIC_VERB_WATCHLIST);
+    const flagged: string[] = [];
+    for (const { name } of skills) {
+      if (watchlist.has(name)) flagged.push(name);
+    }
+    if (flagged.length > 0) {
+      console.log(
+        `\n⚠️  advisory: ${flagged.length} skill(s) use generic verbs that may be at risk ` +
+        `of future host built-in collisions: ${flagged.map(n => `/${n}`).join(', ')}\n` +
+        `   These are NOT current collisions — they're names to watch. If any become ` +
+        `taken, the per-host test above will fail.\n`
+      );
+    }
+    // Test always passes — this is advisory.
+    expect(true).toBe(true);
+  });
+});
@@ -70,31 +70,58 @@ Add a new /greet skill that prints a welcome message.
      // If Codex is unavailable on the test machine, the skill should print
      // [codex-unavailable] and still complete the Claude subagent half.
      const result = await runSkillTest({
-        name: 'autoplan-dual-voice',
-        workdir: workDir,
+        testName: 'autoplan-dual-voice',
+        workingDirectory: workDir,
        prompt: `/autoplan ${planPath}`,
-        timeoutMs: 300_000, // 5 min
-        evalCollector,
+        timeout: 300_000, // 5 min
+        // /autoplan spawns subagents and calls codex via Bash; it needs the
+        // full tool set to get past Phase 1. Bash+Read+Write alone wasn't
+        // enough — the skill stalled trying to invoke Agent/Skill.
+        allowedTools: ['Bash', 'Read', 'Write', 'Edit', 'Grep', 'Glob', 'Agent', 'Skill'],
+        maxTurns: 30,
+        runId,
      });

      // Accept EITHER outcome as success:
      //   (a) Both voices produced output (ideal case)
      //   (b) Codex unavailable + Claude voice produced output (graceful degrade)
-      const out = result.stdout + result.stderr;
-      const claudeVoiceFired = /Claude\s+(CEO|subagent)|claude-subagent/i.test(out);
-      const codexVoiceFired = /codex\s+(exec|review|CEO\s+voice)|\[via:codex\]/i.test(out);
-      const codexUnavailable = /\[codex-unavailable\]|AUTH_FAILED|codex_cli_missing/i.test(out);
+      // Search ONLY the tool-call structure — NOT the prompt string that went in.
+      // Matching against full transcript is risky because the prompt itself
+      // contains "plan-ceo-review" and other marker strings that would produce
+      // false positives regardless of skill behavior. Filter to tool_result
+      // content + assistant messages emitted DURING execution.
+      const transcript = Array.isArray(result.transcript) ? result.transcript : [];
+      const executionContent = transcript
+        .filter((entry: any) => entry && (entry.type === 'tool_use' || entry.type === 'tool_result' || entry.role === 'assistant'))
+        .map((entry: any) => JSON.stringify(entry))
+        .join('\n');
+      const out = (result.output ?? '') + '\n' + executionContent;
+
+      // Claude voice: require evidence of a dispatched Agent subagent, not
+      // merely the literal string "Agent(" (which could appear in any text).
+      // Task/Agent tool_use entries have name:"Agent" or subagent_type:"..."
+      const claudeVoiceFired = /"name":\s*"Agent"|"subagent_type":\s*"[^"]/.test(out) ||
+                               /Claude\s+(CEO|subagent)\s+(review|complete|finished)|claude-subagent\s/i.test(out);
+      // Codex voice: require evidence of codex CLI invocation (command string in
+      // a Bash tool_use), not prompt-text mentions.
+      const codexVoiceFired = /"command":\s*"[^"]*codex\s+(exec|review)/.test(out) ||
+                              /CODEX SAYS\s*\(/i.test(out);
+      // Unavailable markers: explicit probe-failure strings emitted by the skill.
+      const codexUnavailable = /\[codex-unavailable\]|AUTH_FAILED\b|CODEX_NOT_AVAILABLE\b|codex_cli_missing|Codex CLI not found/i.test(out);

      expect(claudeVoiceFired).toBe(true);
      expect(codexVoiceFired || codexUnavailable).toBe(true);

-      // Hang protection: if the skill reached Phase 1 at all, our hardening worked.
-      // If it didn't, this is a regression from the pre-wave stdin-deadlock era.
-      const reachedPhase1 = /Phase 1|CEO\s+Review|Strategy\s*&\s*Scope/i.test(out);
+      // Hang protection: require phase completion evidence, not name mentions.
+      // "Phase 1 complete" or a phase-transition marker, not "plan-ceo-review"
+      // as a bare string (which appears in the prompt itself).
+      const reachedPhase1 = /Phase\s+1\s+(complete|done|finished)|CEO\s+Review\s+(complete|done|approved)|Strategy\s*&\s*Scope\s+(complete|done)|Phase\s+2\s+(started|begin)/i.test(out);
      expect(reachedPhase1).toBe(true);

-      logCost(result);
-      recordE2E('autoplan-dual-voice', result);
+      logCost('autoplan-dual-voice', result);
+      recordE2E(evalCollector, 'autoplan-dual-voice', 'Autoplan dual-voice E2E', result, {
+        passed: claudeVoiceFired && (codexVoiceFired || codexUnavailable) && reachedPhase1,
+      });
    },
    330_000, // per-test timeout slightly > spawn timeout so cleanup can run
  );
@@ -0,0 +1,514 @@
+/**
+ * Tier-1 live-fire E2E for /context-save and /context-restore.
+ *
+ * These spawn `claude -p "/context-save ..."` with the Skill tool enabled
+ * and the skill installed in the workdir's .claude/skills/. Unlike the
+ * older hand-fed-section tests, these exercise the ROUTING path — the
+ * exact thing that broke with the /checkpoint name collision and the
+ * whole reason this rename exists. If /context-save stops routing to
+ * the skill (e.g., upstream ships a built-in by that name), these fail.
+ *
+ * Periodic tier. ~$0.20-$0.40 per test, ~$2 total per run.
+ */
+
+import { describe, test, expect, beforeAll, afterAll } from 'bun:test';
+import { runSkillTest } from './helpers/session-runner';
+import {
+  ROOT, runId, evalsEnabled,
+  describeIfSelected, testConcurrentIfSelected,
+  logCost, recordE2E,
+  createEvalCollector, finalizeEvalCollector,
+} from './helpers/e2e-helpers';
+import { spawnSync } from 'child_process';
+import * as fs from 'fs';
+import * as path from 'path';
+import * as os from 'os';
+
+const evalCollector = createEvalCollector('e2e-context-skills');
+
+// Shared install helper: copy both skill files + bin scripts + routing CLAUDE.md
+// into a tmp workdir. Matches the pattern from skill-routing-e2e.test.ts so
+// claude -p discovers the skills via .claude/skills/ auto-scan.
+function setupWorkdir(suffix: string): { workDir: string; gstackHome: string; slug: string } {
+  const workDir = fs.mkdtempSync(path.join(os.tmpdir(), `skill-e2e-ctx-${suffix}-`));
+  const gstackHome = path.join(workDir, '.gstack-home');
+
+  const run = (cmd: string, args: string[]) =>
+    spawnSync(cmd, args, { cwd: workDir, stdio: 'pipe', timeout: 5000 });
+  run('git', ['init', '-b', 'main']);
+  run('git', ['config', 'user.email', 'test@test.com']);
+  run('git', ['config', 'user.name', 'Test']);
+  fs.writeFileSync(path.join(workDir, 'app.ts'), 'console.log("hello");\n');
+  run('git', ['add', '.']);
+  run('git', ['commit', '-m', 'initial']);
+
+  // Install skills into .claude/skills/ for claude -p auto-discovery.
+  const skillsDir = path.join(workDir, '.claude', 'skills');
+  for (const skill of ['context-save', 'context-restore']) {
+    const destDir = path.join(skillsDir, skill);
+    fs.mkdirSync(destDir, { recursive: true });
+    fs.copyFileSync(path.join(ROOT, skill, 'SKILL.md'), path.join(destDir, 'SKILL.md'));
+  }
+
+  // Install the bin scripts referenced by the preamble.
+  const binDir = path.join(workDir, 'bin');
+  fs.mkdirSync(binDir, { recursive: true });
+  for (const script of [
+    'gstack-timeline-log', 'gstack-timeline-read', 'gstack-slug',
+    'gstack-learnings-log', 'gstack-learnings-search',
+    'gstack-update-check', 'gstack-config', 'gstack-repo-mode',
+  ]) {
+    const src = path.join(ROOT, 'bin', script);
+    if (fs.existsSync(src)) {
+      fs.copyFileSync(src, path.join(binDir, script));
+      fs.chmodSync(path.join(binDir, script), 0o755);
+    }
+  }
+
+  // Routing CLAUDE.md: explicit instruction to always use the Skill tool.
+  fs.writeFileSync(path.join(workDir, 'CLAUDE.md'), `# Project Instructions
+
+## Skill routing
+
+When the user's request matches an available skill, ALWAYS invoke it using the Skill
+tool as your FIRST action. Do NOT answer directly, do NOT use other tools first.
+
+Key routing rules:
+- Save progress, save state, save my work → invoke context-save
+- Resume, where was I, pick up where I left off → invoke context-restore
+
+Environment:
+- Use GSTACK_HOME="${gstackHome}" for all gstack bin scripts.
+- The bin scripts are at ./bin/ (relative to this directory).
+- The skill files are at ./.claude/skills/context-save/SKILL.md and
+  ./.claude/skills/context-restore/SKILL.md.
+`);
+
+  const slug = path.basename(workDir).replace(/[^a-zA-Z0-9._-]/g, '');
+  return { workDir, gstackHome, slug };
+}
+
+// Helper: seed a saved-context file into the storage dir.
+function seedSave(gstackHome: string, slug: string, filename: string, frontmatter: Record<string, string>, body: string) {
+  const dir = path.join(gstackHome, 'projects', slug, 'checkpoints');
+  fs.mkdirSync(dir, { recursive: true });
+  const fm = '---\n' + Object.entries(frontmatter).map(([k, v]) => `${k}: ${v}`).join('\n') + '\n---\n';
+  fs.writeFileSync(path.join(dir, filename), fm + body);
+}
+
+// Helper: extract the list of Skill tool invocations from the transcript.
+function skillCalls(result: { toolCalls: Array<{ tool: string; input: any }> }): string[] {
+  return result.toolCalls
+    .filter((tc) => tc.tool === 'Skill')
+    .map((tc) => tc.input?.skill || '')
+    .filter(Boolean);
+}
+
+// Build a broader assertion surface: final assistant message + every tool
+// input and output. The agent often finishes with a tool call instead of a
+// text response, leaving result.output as an empty string — but the data we
+// want to assert on (skill invocation args, bash stdout like NO_CHECKPOINTS,
+// file paths) is all present in the transcript. Search there too.
+function fullOutputSurface(result: {
+  output?: string;
+  transcript?: any[];
+  toolCalls?: Array<{ tool: string; input: any; output: string }>;
+}): string {
+  const parts: string[] = [];
+  if (result.output) parts.push(result.output);
+  for (const tc of result.toolCalls || []) {
+    parts.push(JSON.stringify(tc.input || {}));
+    if (tc.output) parts.push(tc.output);
+  }
+  // Also stringify transcript for tool_result / user-message content that
+  // isn't surfaced via toolCalls (e.g., Bash stdout echoed back).
+  for (const entry of result.transcript || []) {
+    try { parts.push(JSON.stringify(entry)); } catch { /* skip */ }
+  }
+  return parts.join('\n');
+}
+
+// ────────────────────────────────────────────────────────────────────────
+// Live-fire E2E suite
+// ────────────────────────────────────────────────────────────────────────
+
+describeIfSelected('Context Skills E2E (live-fire)', [
+  'context-save-routing',
+  'context-save-then-restore-roundtrip',
+  'context-restore-fragment-match',
+  'context-restore-empty-state',
+  'context-restore-list-delegates',
+  'context-restore-legacy-compat',
+  'context-save-list-current-branch',
+  'context-save-list-all-branches',
+], () => {
+  afterAll(() => { finalizeEvalCollector(evalCollector); });
+
+  // ── 1. Routing: /context-save actually invokes the Skill tool ────────
+  testConcurrentIfSelected('context-save-routing', async () => {
+    const { workDir, gstackHome, slug } = setupWorkdir('routing');
+
+    // Prompt pattern: the slash command + explicit "invoke via Skill tool"
+    // instruction. The GSTACK_HOME / ./bin bash setup that used to be in
+    // the prompt now comes via env:. Prompt without the Skill-tool hint
+    // causes the agent to interpret /context-save as a shell token and
+    // skip Skill routing entirely — which defeats this test's purpose.
+    const result = await runSkillTest({
+      prompt: `Run /context-save wintermute progress. Invoke via the Skill tool. Do NOT use AskUserQuestion.`,
+      workingDirectory: workDir,
+      env: { GSTACK_HOME: gstackHome },
+      maxTurns: 12,
+      allowedTools: ['Skill', 'Bash', 'Read', 'Write', 'Edit', 'Grep', 'Glob'],
+      timeout: 120_000,
+      testName: 'context-save-routing',
+      runId,
+    });
+
+    logCost('context-save-routing', result);
+
+    const invokedSkills = skillCalls(result);
+    const routedToContextSave = invokedSkills.includes('context-save');
+    // File should also be written to the storage dir.
+    const checkpointDir = path.join(gstackHome, 'projects', slug, 'checkpoints');
+    const files = fs.existsSync(checkpointDir) ? fs.readdirSync(checkpointDir).filter((f) => f.endsWith('.md')) : [];
+    const exitOk = ['success', 'error_max_turns'].includes(result.exitReason);
+
+    recordE2E(evalCollector, 'context-save routes via Skill tool', 'Context Skills E2E', result, {
+      passed: exitOk && routedToContextSave && files.length > 0,
+    });
+
+    expect(exitOk).toBe(true);
+    expect(routedToContextSave).toBe(true);
+    expect(files.length).toBeGreaterThan(0);
+    try { fs.rmSync(workDir, { recursive: true, force: true }); } catch {}
+  }, 180_000);
+
+  // ── 2. Round-trip: save then restore in the same session ─────────────
+  testConcurrentIfSelected('context-save-then-restore-roundtrip', async () => {
+    const { workDir, gstackHome, slug } = setupWorkdir('roundtrip');
+    const magicMarker = 'wintermute-roundtrip-MX7FQZ';
+
+    // Stage a change so /context-save has something to capture.
+    fs.writeFileSync(path.join(workDir, 'feature.ts'), `// ${magicMarker}\nexport const X = 1;\n`);
+    spawnSync('git', ['add', 'feature.ts'], { cwd: workDir, stdio: 'pipe', timeout: 5000 });
+
+    const result = await runSkillTest({
+      prompt: `Two steps:
+1. Run /context-save ${magicMarker} — invoke via the Skill tool.
+2. Run /context-restore — invoke via the Skill tool. Report what it loaded.
+Do NOT use AskUserQuestion.`,
+      workingDirectory: workDir,
+      env: { GSTACK_HOME: gstackHome },
+      maxTurns: 25,
+      allowedTools: ['Skill', 'Bash', 'Read', 'Write', 'Edit', 'Grep', 'Glob'],
+      timeout: 240_000,
+      testName: 'context-save-then-restore-roundtrip',
+      runId,
+    });
+
+    logCost('context-save-then-restore-roundtrip', result);
+
+    const invokedSkills = skillCalls(result);
+    const bothRouted = invokedSkills.includes('context-save') && invokedSkills.includes('context-restore');
+    const checkpointDir = path.join(gstackHome, 'projects', slug, 'checkpoints');
+    const files = fs.existsSync(checkpointDir) ? fs.readdirSync(checkpointDir).filter((f) => f.endsWith('.md')) : [];
+    // Broader surface — agent may stop at restore's Skill call without
+    // echoing the marker into result.output. The marker is also in the
+    // Skill tool input (we passed it as the save title) and in the
+    // file content that restore reads.
+    const restoreMentionsTitle = fullOutputSurface(result).toLowerCase().includes(magicMarker.toLowerCase());
+    const exitOk = ['success', 'error_max_turns'].includes(result.exitReason);
+
+    recordE2E(evalCollector, 'save-then-restore round-trip', 'Context Skills E2E', result, {
+      passed: exitOk && bothRouted && files.length > 0 && restoreMentionsTitle,
+    });
+
+    expect(exitOk).toBe(true);
+    expect(bothRouted).toBe(true);
+    expect(files.length).toBeGreaterThan(0);
+    expect(restoreMentionsTitle).toBe(true);
+    try { fs.rmSync(workDir, { recursive: true, force: true }); } catch {}
+  }, 240_000);
+
+  // ── 3. /context-restore <fragment> loads the matching save ───────────
+  testConcurrentIfSelected('context-restore-fragment-match', async () => {
+    const { workDir, gstackHome, slug } = setupWorkdir('fragment');
+
+    // Seed three saves with distinct titles.
+    seedSave(gstackHome, slug, '20260101-120000-alpha-feature.md',
+      { status: 'in-progress', branch: 'feat/alpha', timestamp: '2026-01-01T12:00:00Z' },
+      '## Working on: alpha feature\n\n### Summary\nAlpha content FRAGMATCH_ALPHA_BUILD\n');
+    seedSave(gstackHome, slug, '20260202-120000-middle-payments.md',
+      { status: 'in-progress', branch: 'feat/payments', timestamp: '2026-02-02T12:00:00Z' },
+      '## Working on: middle payments\n\n### Summary\nPayments content FRAGMATCH_PAYMENTS_BUILD\n');
+    seedSave(gstackHome, slug, '20260303-120000-omega-release.md',
+      { status: 'in-progress', branch: 'feat/omega', timestamp: '2026-03-03T12:00:00Z' },
+      '## Working on: omega release\n\n### Summary\nOmega content FRAGMATCH_OMEGA_BUILD\n');
+
+    const result = await runSkillTest({
+      prompt: `Run /context-restore payments — load the saved context whose title contains "payments". Invoke via the Skill tool. Report what was loaded. Do NOT use AskUserQuestion.`,
+      workingDirectory: workDir,
+      env: { GSTACK_HOME: gstackHome },
+      maxTurns: 10,
+      allowedTools: ['Skill', 'Bash', 'Read', 'Grep', 'Glob'],
+      timeout: 120_000,
+      testName: 'context-restore-fragment-match',
+      runId,
+    });
+
+    logCost('context-restore-fragment-match', result);
+
+    // Broader surface — agent may stop at Skill call without echoing the
+    // body marker. The payments file's body is in tool outputs (Read/Bash).
+    const out = fullOutputSurface(result);
+    const loadedPayments = out.includes('FRAGMATCH_PAYMENTS_BUILD');
+    const didNotLoadOthers = !out.includes('FRAGMATCH_ALPHA_BUILD') && !out.includes('FRAGMATCH_OMEGA_BUILD');
+    const routedToRestore = skillCalls(result).includes('context-restore');
+    const exitOk = ['success', 'error_max_turns'].includes(result.exitReason);
+
+    recordE2E(evalCollector, 'context-restore <fragment> match', 'Context Skills E2E', result, {
+      passed: exitOk && routedToRestore && loadedPayments && didNotLoadOthers,
+    });
+
+    expect(exitOk).toBe(true);
+    expect(routedToRestore).toBe(true);
+    expect(loadedPayments).toBe(true);
+    expect(didNotLoadOthers).toBe(true);
+    try { fs.rmSync(workDir, { recursive: true, force: true }); } catch {}
+  }, 180_000);
+
+  // ── 4. /context-restore with zero saves → graceful empty-state ───────
+  testConcurrentIfSelected('context-restore-empty-state', async () => {
+    const { workDir, gstackHome, slug } = setupWorkdir('empty');
+    // Ensure the storage dir is empty or missing — setupWorkdir doesn't seed.
+    const checkpointDir = path.join(gstackHome, 'projects', slug, 'checkpoints');
+    expect(fs.existsSync(checkpointDir)).toBe(false);
+
+    const result = await runSkillTest({
+      prompt: `Run /context-restore — there are no saved contexts yet. Invoke via the Skill tool. Do NOT use AskUserQuestion.`,
+      workingDirectory: workDir,
+      env: { GSTACK_HOME: gstackHome },
+      maxTurns: 8,
+      allowedTools: ['Skill', 'Bash', 'Read', 'Grep', 'Glob'],
+      timeout: 90_000,
+      testName: 'context-restore-empty-state',
+      runId,
+    });
+
+    logCost('context-restore-empty-state', result);
+
+    // Build broad surface: agent often stops after a tool call with no final
+    // text, so result.output is empty string. The bash "NO_CHECKPOINTS" echo
+    // is in tool outputs; the "no saved contexts yet" phrase may only appear
+    // in tool inputs / transcript entries.
+    const out = fullOutputSurface(result);
+    const gracefulMessage = /no saved context|no contexts? yet|nothing to restore|NO_CHECKPOINTS/i.test(out);
+    const noCrash = !/error|exception|undefined/i.test(out) || gracefulMessage; // mention of "error" in the graceful message is fine
+    const routedToRestore = skillCalls(result).includes('context-restore');
+    const exitOk = ['success', 'error_max_turns'].includes(result.exitReason);
+
+    recordE2E(evalCollector, 'context-restore empty state', 'Context Skills E2E', result, {
+      passed: exitOk && routedToRestore && gracefulMessage && noCrash,
+    });
+
+    expect(exitOk).toBe(true);
+    expect(routedToRestore).toBe(true);
+    expect(gracefulMessage).toBe(true);
+    try { fs.rmSync(workDir, { recursive: true, force: true }); } catch {}
+  }, 150_000);
+
+  // ── 5. /context-restore list redirects to /context-save list ─────────
+  testConcurrentIfSelected('context-restore-list-delegates', async () => {
+    const { workDir, gstackHome, slug } = setupWorkdir('delegates');
+    seedSave(gstackHome, slug, '20260101-120000-seed.md',
+      { status: 'in-progress', branch: 'main', timestamp: '2026-01-01T12:00:00Z' },
+      '## Working on: seed\n');
+
+    const result = await runSkillTest({
+      prompt: `Run /context-restore list. Invoke via the Skill tool. Do NOT use AskUserQuestion.`,
+      workingDirectory: workDir,
+      env: { GSTACK_HOME: gstackHome },
+      maxTurns: 8,
+      allowedTools: ['Skill', 'Bash', 'Read', 'Grep', 'Glob'],
+      timeout: 90_000,
+      testName: 'context-restore-list-delegates',
+      runId,
+    });
+
+    logCost('context-restore-list-delegates', result);
+
+    // Broader surface — agent sometimes stops after the Skill call without
+    // producing text output. The "use /context-save list" hint may only
+    // appear in tool inputs / transcript.
+    const out = fullOutputSurface(result);
+    const mentionsSaveList = /context-save list/i.test(out);
+    const routedToRestore = skillCalls(result).includes('context-restore');
+    const exitOk = ['success', 'error_max_turns'].includes(result.exitReason);
+
+    recordE2E(evalCollector, 'context-restore list delegates', 'Context Skills E2E', result, {
+      passed: exitOk && routedToRestore && mentionsSaveList,
+    });
+
+    expect(exitOk).toBe(true);
+    expect(routedToRestore).toBe(true);
+    expect(mentionsSaveList).toBe(true);
+    try { fs.rmSync(workDir, { recursive: true, force: true }); } catch {}
+  }, 150_000);
+
+  // ── 6. Legacy compat: pre-rename save files still load ───────────────
+  testConcurrentIfSelected('context-restore-legacy-compat', async () => {
+    const { workDir, gstackHome, slug } = setupWorkdir('legacy');
+
+    // Seed a save file in the pre-rename format (exactly how old /checkpoint
+    // wrote them). The storage dir name is still "checkpoints/" — kept for
+    // exactly this reason.
+    seedSave(gstackHome, slug, '20260301-120000-legacy-pre-rename-work.md',
+      {
+        status: 'in-progress',
+        branch: 'feat/pre-rename',
+        timestamp: '2026-03-01T12:00:00Z',
+        session_duration_s: '3600',
+      },
+      '## Working on: legacy pre-rename work\n\n### Summary\nWork saved by OLD_CHECKPOINT_SKILL_LEGACYCOMPAT before the rename.\n\n### Remaining Work\n1. Item from the before-times.\n');
+
+    const result = await runSkillTest({
+      prompt: `Run /context-restore — load the most recent saved context. Invoke via the Skill tool. Report the content of the loaded file. Do NOT use AskUserQuestion.`,
+      workingDirectory: workDir,
+      env: { GSTACK_HOME: gstackHome },
+      maxTurns: 8,
+      allowedTools: ['Skill', 'Bash', 'Read', 'Grep', 'Glob'],
+      timeout: 120_000,
+      testName: 'context-restore-legacy-compat',
+      runId,
+    });
+
+    logCost('context-restore-legacy-compat', result);
+
+    // Check for ANY evidence the legacy file was loaded. The agent may
+    // paraphrase the summary OR stop at a tool call without text output,
+    // so require at least ONE of:
+    //   (a) the unique body marker (verbatim pass-through)
+    //   (b) the title phrase "legacy pre-rename work"
+    //   (c) the filename or its timestamp prefix
+    //   (d) the branch name "feat/pre-rename"
+    // Search across the full transcript, not just result.output.
+    const out = fullOutputSurface(result);
+    const loadedLegacy =
+      out.includes('OLD_CHECKPOINT_SKILL_LEGACYCOMPAT') ||
+      /legacy.+pre-rename/i.test(out) ||
+      /20260301-120000-legacy/i.test(out) ||
+      /feat\/pre-rename/i.test(out) ||
+      /pre-rename/i.test(out);
+    const routedToRestore = skillCalls(result).includes('context-restore');
+    const exitOk = ['success', 'error_max_turns'].includes(result.exitReason);
+
+    recordE2E(evalCollector, 'legacy /checkpoint file loads via /context-restore', 'Context Skills E2E', result, {
+      passed: exitOk && routedToRestore && loadedLegacy,
+    });
+
+    expect(exitOk).toBe(true);
+    expect(routedToRestore).toBe(true);
+    expect(loadedLegacy).toBe(true);
+    try { fs.rmSync(workDir, { recursive: true, force: true }); } catch {}
+  }, 180_000);
+
+  // ── 7. /context-save list: default filters to current branch ─────────
+  testConcurrentIfSelected('context-save-list-current-branch', async () => {
+    const { workDir, gstackHome, slug } = setupWorkdir('list-current');
+
+    // Seed 3 files on 3 different branches. Current branch is "main".
+    seedSave(gstackHome, slug, '20260101-120000-main-work.md',
+      { status: 'in-progress', branch: 'main', timestamp: '2026-01-01T12:00:00Z' },
+      '## Working on: main work LISTCURR_MAIN_TOKEN\n');
+    seedSave(gstackHome, slug, '20260202-120000-feat-alpha.md',
+      { status: 'in-progress', branch: 'feat/alpha', timestamp: '2026-02-02T12:00:00Z' },
+      '## Working on: alpha LISTCURR_ALPHA_TOKEN\n');
+    seedSave(gstackHome, slug, '20260303-120000-feat-beta.md',
+      { status: 'in-progress', branch: 'feat/beta', timestamp: '2026-03-03T12:00:00Z' },
+      '## Working on: beta LISTCURR_BETA_TOKEN\n');
+
+    const result = await runSkillTest({
+      prompt: `Run /context-save list — list saved contexts for the CURRENT branch only (default, no --all). Invoke via the Skill tool. The current branch is "main". Do NOT use AskUserQuestion.`,
+      workingDirectory: workDir,
+      env: { GSTACK_HOME: gstackHome },
+      maxTurns: 10,
+      allowedTools: ['Skill', 'Bash', 'Read', 'Grep', 'Glob'],
+      timeout: 120_000,
+      testName: 'context-save-list-current-branch',
+      runId,
+    });
+
+    logCost('context-save-list-current-branch', result);
+
+    // Broad surface: the list output may only appear in bash tool_result
+    // entries (find output, file reads) rather than the agent's final text.
+    const out = fullOutputSurface(result);
+    // Must show the main-branch save. Hide the other branches' saves.
+    // Match by filename timestamp (stable, unambiguous) plus a looser
+    // prose check.
+    const showsMain = /20260101-120000|main-work/.test(out);
+    const hidesAlpha = !/20260202-120000/.test(out);
+    const hidesBeta = !/20260303-120000/.test(out);
+    const routed = skillCalls(result).includes('context-save');
+    const exitOk = ['success', 'error_max_turns'].includes(result.exitReason);
+
+    recordE2E(evalCollector, 'context-save list (current branch default)', 'Context Skills E2E', result, {
+      passed: exitOk && routed && showsMain && hidesAlpha && hidesBeta,
+    });
+
+    expect(exitOk).toBe(true);
+    expect(routed).toBe(true);
+    expect(showsMain).toBe(true);
+    expect(hidesAlpha).toBe(true);
+    expect(hidesBeta).toBe(true);
+    try { fs.rmSync(workDir, { recursive: true, force: true }); } catch {}
+  }, 180_000);
+
+  // ── 8. /context-save list --all: shows every branch ──────────────────
+  testConcurrentIfSelected('context-save-list-all-branches', async () => {
+    const { workDir, gstackHome, slug } = setupWorkdir('list-all');
+
+    seedSave(gstackHome, slug, '20260101-120000-main-work.md',
+      { status: 'in-progress', branch: 'main', timestamp: '2026-01-01T12:00:00Z' },
+      '## Working on: main LISTALL_MAIN_TOKEN\n');
+    seedSave(gstackHome, slug, '20260202-120000-feat-alpha.md',
+      { status: 'in-progress', branch: 'feat/alpha', timestamp: '2026-02-02T12:00:00Z' },
+      '## Working on: alpha LISTALL_ALPHA_TOKEN\n');
+    seedSave(gstackHome, slug, '20260303-120000-feat-beta.md',
+      { status: 'in-progress', branch: 'feat/beta', timestamp: '2026-03-03T12:00:00Z' },
+      '## Working on: beta LISTALL_BETA_TOKEN\n');
+
+    const result = await runSkillTest({
+      prompt: `Run /context-save list --all — list saved contexts from ALL branches (not just the current one). Invoke via the Skill tool. Report the full list. Do NOT use AskUserQuestion.`,
+      workingDirectory: workDir,
+      env: { GSTACK_HOME: gstackHome },
+      maxTurns: 10,
+      allowedTools: ['Skill', 'Bash', 'Read', 'Grep', 'Glob'],
+      timeout: 120_000,
+      testName: 'context-save-list-all-branches',
+      runId,
+    });
+
+    logCost('context-save-list-all-branches', result);
+
+    // Broad surface — same rationale as list-current-branch: the list output
+    // may only be in bash tool_result, not in the agent's final text.
+    const out = fullOutputSurface(result);
+    const filesShown = [
+      /20260101-120000/.test(out),
+      /20260202-120000/.test(out),
+      /20260303-120000/.test(out),
+    ].filter(Boolean).length;
+    const routed = skillCalls(result).includes('context-save');
+    const exitOk = ['success', 'error_max_turns'].includes(result.exitReason);
+
+    recordE2E(evalCollector, 'context-save list --all', 'Context Skills E2E', result, {
+      passed: exitOk && routed && filesShown === 3,
+    });
+
+    expect(exitOk).toBe(true);
+    expect(routed).toBe(true);
+    expect(filesShown).toBe(3);
+    try { fs.rmSync(workDir, { recursive: true, force: true }); } catch {}
+  }, 180_000);
+});
@@ -15,10 +15,11 @@ const evalCollector = createEvalCollector('e2e-session-intelligence');

 // --- Session Intelligence E2E ---
 // Tests the core contract: timeline events flow in, context recovery flows out,
-// checkpoints round-trip.
+// /context-save + /context-restore round-trip.

 describeIfSelected('Session Intelligence E2E', [
-  'timeline-event-flow', 'context-recovery-artifacts', 'checkpoint-save-resume',
+  'timeline-event-flow', 'context-recovery-artifacts',
+  'context-save-writes-file', 'context-restore-loads-latest',
 ], () => {
  let workDir: string;
  let gstackHome: string;
@@ -194,28 +195,28 @@ IMPORTANT:
    console.log(`Context recovery: artifacts=${foundArtifacts}, lastSession=${foundLastSession}, timeline=${foundTimeline}`);
  }, 180_000);

-  // --- Test 3: Checkpoint save and resume ---
-  // Run /checkpoint save via claude -p, verify file created. Then run /checkpoint resume
-  // and verify it reads the checkpoint back.
-  testConcurrentIfSelected('checkpoint-save-resume', async () => {
+  // --- Test 3: /context-save writes a file ---
+  // Hand-feed the save section of context-save/SKILL.md to claude -p and verify
+  // a file gets written to the project's checkpoints dir with valid frontmatter.
+  testConcurrentIfSelected('context-save-writes-file', async () => {
    const projectDir = path.join(gstackHome, 'projects', slug);
    fs.mkdirSync(path.join(projectDir, 'checkpoints'), { recursive: true });

-    // Copy the /checkpoint skill
-    copyDirSync(path.join(ROOT, 'checkpoint'), path.join(workDir, 'checkpoint'));
+    // Copy the /context-save skill
+    copyDirSync(path.join(ROOT, 'context-save'), path.join(workDir, 'context-save'));

-    // Add a staged change so /checkpoint has something to capture
+    // Add a staged change so /context-save has something to capture
    fs.writeFileSync(path.join(workDir, 'feature.ts'), 'export function newFeature() { return true; }\n');
    spawnSync('git', ['add', 'feature.ts'], { cwd: workDir, stdio: 'pipe', timeout: 5000 });

-    // Extract the checkpoint save section from the skill template
-    const full = fs.readFileSync(path.join(ROOT, 'checkpoint', 'SKILL.md'), 'utf-8');
-    const saveStart = full.indexOf('## Save');
-    const resumeStart = full.indexOf('## Resume');
-    const saveSection = full.slice(saveStart, resumeStart > saveStart ? resumeStart : undefined);
+    // Extract the save section from the skill template (before the List section)
+    const full = fs.readFileSync(path.join(ROOT, 'context-save', 'SKILL.md'), 'utf-8');
+    const saveStart = full.indexOf('## Save flow');
+    const listStart = full.indexOf('## List flow');
+    const saveSection = full.slice(saveStart, listStart > saveStart ? listStart : undefined);

    const result = await runSkillTest({
-      prompt: `You are testing the /checkpoint skill. Follow these instructions to save a checkpoint.
+      prompt: `You are testing the /context-save skill. Follow these instructions to save a context file.

 ${saveSection.slice(0, 2000)}

@@ -223,7 +224,7 @@ IMPORTANT:
 - Use GSTACK_HOME="${gstackHome}" as an environment variable when running bin scripts.
 - The bin scripts are at ./bin/ (relative to this directory), not at ~/.claude/skills/gstack/bin/.
  Replace any references to ~/.claude/skills/gstack/bin/ with ./bin/ when running commands.
- Save the checkpoint to ${projectDir}/checkpoints/ with a filename like "20260401-test-checkpoint.md".
+- Save the file to ${projectDir}/checkpoints/ with a filename like "20260401-test-context.md".
 - Include YAML frontmatter with status, branch, and timestamp.
 - Include a summary of what's being worked on (you can see from git status).
 - Do NOT use AskUserQuestion.`,
@@ -231,38 +232,134 @@ IMPORTANT:
      maxTurns: 10,
      allowedTools: ['Bash', 'Read', 'Write', 'Edit', 'Grep', 'Glob'],
      timeout: 120_000,
-      testName: 'checkpoint-save-resume',
+      testName: 'context-save-writes-file',
      runId,
    });

-    logCost('checkpoint save', result);
+    logCost('context-save', result);

-    // Check that a checkpoint file was created
+    // Check that a context file was created
    const checkpointDir = path.join(projectDir, 'checkpoints');
-    const checkpointFiles = fs.existsSync(checkpointDir)
+    const files = fs.existsSync(checkpointDir)
      ? fs.readdirSync(checkpointDir).filter(f => f.endsWith('.md'))
      : [];

    const exitOk = ['success', 'error_max_turns'].includes(result.exitReason);
-    const checkpointCreated = checkpointFiles.length > 0;
+    const fileCreated = files.length > 0;

-    let checkpointContent = '';
-    if (checkpointCreated) {
-      checkpointContent = fs.readFileSync(path.join(checkpointDir, checkpointFiles[0]), 'utf-8');
+    let fileContent = '';
+    if (fileCreated) {
+      fileContent = fs.readFileSync(path.join(checkpointDir, files[0]), 'utf-8');
    }

-    // Verify checkpoint has expected structure
-    const hasYamlFrontmatter = checkpointContent.includes('---') && checkpointContent.includes('status:');
-    const hasBranch = checkpointContent.includes('branch:') || checkpointContent.includes('main');
+    const hasYamlFrontmatter = fileContent.includes('---') && fileContent.includes('status:');
+    const hasBranch = fileContent.includes('branch:') || fileContent.includes('main');

-    recordE2E(evalCollector, 'checkpoint save-resume', 'Session Intelligence E2E', result, {
-      passed: exitOk && checkpointCreated && hasYamlFrontmatter,
+    recordE2E(evalCollector, 'context-save writes file', 'Session Intelligence E2E', result, {
+      passed: exitOk && fileCreated && hasYamlFrontmatter,
    });

    expect(exitOk).toBe(true);
-    expect(checkpointCreated).toBe(true);
+    expect(fileCreated).toBe(true);
    expect(hasYamlFrontmatter).toBe(true);

-    console.log(`Checkpoint: ${checkpointFiles.length} files created, YAML frontmatter: ${hasYamlFrontmatter}, branch: ${hasBranch}`);
+    console.log(`context-save: ${files.length} files created, YAML frontmatter: ${hasYamlFrontmatter}, branch: ${hasBranch}`);
+  }, 180_000);
+
+  // --- Test 4: /context-restore loads the newest file across branches ---
+  // Seed two saved-context files with different YYYYMMDD-HHMMSS prefixes and
+  // different branches in their frontmatter. Hand-feed the restore section to
+  // claude -p. Verify the agent identifies the newer file (by filename prefix)
+  // and presents its content, regardless of the current branch.
+  testConcurrentIfSelected('context-restore-loads-latest', async () => {
+    const projectDir = path.join(gstackHome, 'projects', slug);
+    const checkpointDir = path.join(projectDir, 'checkpoints');
+    fs.mkdirSync(checkpointDir, { recursive: true });
+
+    // Copy the /context-restore skill
+    copyDirSync(path.join(ROOT, 'context-restore'), path.join(workDir, 'context-restore'));
+
+    // Seed two files: older on branch-a (title "old-work"), newer on branch-b
+    // (title "newer-wintermute-work"). Current branch (main) matches neither.
+    const olderFile = path.join(checkpointDir, '20260101-120000-old-work.md');
+    const newerFile = path.join(checkpointDir, '20260202-130000-newer-wintermute-work.md');
+    fs.writeFileSync(olderFile, `---
+status: in-progress
+branch: branch-a
+timestamp: 2026-01-01T12:00:00-07:00
+---
+
+## Working on: old work
+
+### Summary
+This is older work on branch-a.
+
+### Remaining Work
+1. Should NOT be loaded by default restore.
+`);
+    fs.writeFileSync(newerFile, `---
+status: in-progress
+branch: branch-b
+timestamp: 2026-02-02T13:00:00-07:00
+---
+
+## Working on: newer wintermute work
+
+### Summary
+This is the newest saved context. Cross-branch restore should load THIS file.
+
+### Remaining Work
+1. Finish the wintermute integration.
+`);
+
+    // Deliberately scramble mtimes so filesystem mtime DISAGREES with filename
+    // prefix — this proves we're using filename ordering, not ls -1t.
+    const pastOlderMtime = Math.floor(Date.now() / 1000);       // now (newest mtime)
+    const pastNewerMtime = pastOlderMtime - 60 * 60 * 24 * 30;  // 30 days ago
+    fs.utimesSync(olderFile, pastOlderMtime, pastOlderMtime);
+    fs.utimesSync(newerFile, pastNewerMtime, pastNewerMtime);
+
+    // Extract the restore-flow section from the skill template
+    const full = fs.readFileSync(path.join(ROOT, 'context-restore', 'SKILL.md'), 'utf-8');
+    const restoreStart = full.indexOf('## Restore flow');
+    const importantStart = full.indexOf('## Important Rules', restoreStart);
+    const restoreSection = full.slice(restoreStart, importantStart > restoreStart ? importantStart : undefined);
+
+    const result = await runSkillTest({
+      prompt: `You are testing the /context-restore skill. Follow these instructions to restore the most recent saved context.
+
+${restoreSection.slice(0, 2500)}
+
+IMPORTANT:
+- Use GSTACK_HOME="${gstackHome}" as an environment variable when running bin scripts.
+- The bin scripts are at ./bin/ (relative to this directory), not at ~/.claude/skills/gstack/bin/.
+- Look in ${checkpointDir} for saved context files.
+- Current branch is "main" — do NOT filter by current branch. Load across all branches.
+- The newest file by YYYYMMDD-HHMMSS prefix is the canonical "most recent". Filesystem mtime has been scrambled — do not use it.
+- Do NOT use AskUserQuestion. Just present the content of the newest file.`,
+      workingDirectory: workDir,
+      maxTurns: 8,
+      allowedTools: ['Bash', 'Read', 'Grep', 'Glob'],
+      timeout: 120_000,
+      testName: 'context-restore-loads-latest',
+      runId,
+    });
+
+    logCost('context-restore', result);
+
+    const output = result.output ?? '';
+    const loadedNewer = output.includes('newer wintermute work') || output.includes('wintermute integration');
+    const loadedOlder = output.includes('old work') && !output.includes('newer');
+    const exitOk = ['success', 'error_max_turns'].includes(result.exitReason);
+
+    recordE2E(evalCollector, 'context-restore loads latest', 'Session Intelligence E2E', result, {
+      passed: exitOk && loadedNewer && !loadedOlder,
+    });
+
+    expect(exitOk).toBe(true);
+    expect(loadedNewer).toBe(true);
+    expect(loadedOlder).toBe(false);
+
+    console.log(`context-restore: loadedNewer=${loadedNewer}, loadedOlder=${loadedOlder}`);
  }, 180_000);
 });