refactor(plan-ceo-review): carve review body into on-demand section

Carve the largest skill (138,838 B) into a skeleton + one on-demand section, the documented next Phase B target after /ship (v2_PLAN.md:216). - sections/review-sections.md(.tmpl): the 11-section deep review, codex/ outside-voice rules, how-to-ask, Required Outputs, registries, Completion Summary, Review Log, REVIEW_DASHBOARD, PLAN_FILE_REVIEW_REPORT, Next Steps, docs/designs promotion, Formatting Rules, and the Mode Quick Reference. - sections/manifest.json: passive registry (CM2), one entry. - SKILL.md.tmpl: {{SECTION_INDEX}} after the system audit, a single {{SECTION:review-sections}} STOP-Read after Step 0 mode selection, and a Section self-check. All of Step 0 (the scope/mode conversation) stays in the always-loaded skeleton; only EXIT_PLAN_MODE_GATE follows the section. Measured: always-loaded skeleton 138,838 -> 80,731 B (-42%, ~14.4K tokens off every invocation). Union (skeleton + section) 139,110 B, behavior held. Boundary honors Codex P1: nothing review-governing (formatting rules, mode reference, how-to-ask, required outputs) sits in the skeleton below the STOP. Housekeeping resolvers ride in the section, matching the ship precedent (adversarial.md carries LEARNINGS_LOG + GBRAIN_SAVE_RESULTS). Tests (atomic with the carve — skill-docs.yml gates gen:skill-docs freshness on every push, so source + regen + tests must land together): - parity-harness: plan-ceo flipped to sectioned, maxSkeletonBytes 90_000 (measured 80,731 + headroom); content/minBytes run against the union. - skill-size-budget: plan-ceo-review added to SECTIONS_EXTRACTED. - section-manifest-consistency: generalized to discover every carved skill, vars computed per-skill-case (Codex P2). - skill-ceo-section-ordering (new, gate): per-PR static guard — STOP after Step 0, review body absent from skeleton, report writer in the section, nothing review-governing below the STOP. - skill-e2e-plan-ceo-review-section-loading (new, periodic): refreshes the installed skill first (Codex P1), drives full Step 0, asserts the section is Read before the report. - gen-skill-docs + skill-validation: read the skeleton+sections union for carved skills so relocated prose still counts. - touchfiles: plan-ceo-section-loading registered (periodic). Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-08-03 12:58:40 +02:00 · 2026-05-31 08:54:56 -07:00
parent 3bef43bc5a
commit ab66193e2e
14 changed files with 1831 additions and 1457 deletions
@@ -672,7 +672,7 @@ describe('REVIEW_DASHBOARD resolver', () => {

  for (const skill of REVIEW_SKILLS) {
    test(`review dashboard appears in ${skill} generated file`, () => {
-      const content = fs.readFileSync(path.join(ROOT, skill, 'SKILL.md'), 'utf-8');
+      const content = readSkillUnion(skill); // carved skills: union skeleton + sections
      expect(content).toContain('gstack-review');
      expect(content).toContain('REVIEW READINESS DASHBOARD');
    });
@@ -699,7 +699,7 @@ describe('REVIEW_DASHBOARD resolver', () => {
  });

  test('resolver output contains key dashboard elements', () => {
-    const content = fs.readFileSync(path.join(ROOT, 'plan-ceo-review', 'SKILL.md'), 'utf-8');
+    const content = readSkillUnion('plan-ceo-review'); // carved: dashboard moved to section
    expect(content).toContain('VERDICT');
    expect(content).toContain('CLEARED');
    expect(content).toContain('Eng Review');
@@ -709,25 +709,25 @@ describe('REVIEW_DASHBOARD resolver', () => {
  });

  test('dashboard bash block includes git HEAD for staleness detection', () => {
-    const content = fs.readFileSync(path.join(ROOT, 'plan-ceo-review', 'SKILL.md'), 'utf-8');
+    const content = readSkillUnion('plan-ceo-review'); // carved: dashboard moved to section
    expect(content).toContain('git rev-parse --short HEAD');
    expect(content).toContain('---HEAD---');
  });

  test('dashboard includes staleness detection prose', () => {
-    const content = fs.readFileSync(path.join(ROOT, 'plan-ceo-review', 'SKILL.md'), 'utf-8');
+    const content = readSkillUnion('plan-ceo-review'); // carved: dashboard moved to section
    expect(content).toContain('Staleness detection');
    expect(content).toContain('commit');
  });

  for (const skill of REVIEW_SKILLS) {
    test(`${skill} contains review chaining section`, () => {
-      const content = fs.readFileSync(path.join(ROOT, skill, 'SKILL.md'), 'utf-8');
+      const content = readSkillUnion(skill); // carved skills: union skeleton + sections
      expect(content).toContain('Review Chaining');
    });

    test(`${skill} Review Log includes commit field`, () => {
-      const content = fs.readFileSync(path.join(ROOT, skill, 'SKILL.md'), 'utf-8');
+      const content = readSkillUnion(skill); // carved skills: union skeleton + sections
      expect(content).toContain('"commit"');
    });
  }
@@ -969,7 +969,7 @@ describe('PLAN_FILE_REVIEW_REPORT resolver', () => {
  }

  test('resolver output contains key report elements', () => {
-    const content = fs.readFileSync(path.join(ROOT, 'plan-ceo-review', 'SKILL.md'), 'utf-8');
+    const content = readSkillUnion('plan-ceo-review'); // carved: report writer moved to section
    expect(content).toContain('Trigger');
    expect(content).toContain('Findings');
    expect(content).toContain('VERDICT');
@@ -3112,7 +3112,9 @@ describe('GSTACK REVIEW REPORT delete-then-append flow', () => {

  for (const skill of PLAN_REVIEW_SKILLS) {
    test(`${skill}/SKILL.md prescribes delete-then-append, not in-place replace`, () => {
-      const content = fs.readFileSync(path.join(ROOT, skill, 'SKILL.md'), 'utf-8');
+      // Carved skills (v2 plan Phase B) relocate the review-report prose into
+      // sections/*.md; readSkillUnion follows the content wherever the carve put it.
+      const content = readSkillUnion(skill);

      // The new (correct) instruction must be present.
      expect(content).toContain('delete-then-append flow');
@@ -226,7 +226,14 @@ export const PARITY_INVARIANTS: ParityInvariant[] = [
    minBytes: 120_000,
  },
  {
+    // Carved (v2 plan T9): skeleton SKILL.md + sections/review-sections.md.
+    // Content + size floors run against the union (relocated prose still counts);
+    // maxSkeletonBytes asserts the always-loaded skeleton shrank from the ~138KB
+    // monolith to ~81KB (measured 80,731 B, -42%). Headroom to 90KB so a small
+    // skeleton edit doesn't trip CI, but a 10KB regression does.
    skill: 'plan-ceo-review',
+    sectioned: true,
+    maxSkeletonBytes: 90_000,
    mustContain: [
      'SCOPE EXPANSION',
      'SELECTIVE EXPANSION',
@@ -122,6 +122,7 @@ export const E2E_TOUCHFILES: Record<string, string[]> = {
  'budget-regression-pty':       ['test/helpers/eval-store.ts', 'test/skill-budget-regression.test.ts'],
  'ship-idempotency-pty':        ['ship/**', 'bin/gstack-next-version', 'bin/gstack-version-bump', 'scripts/resolvers/sections.ts', 'lib/worktree.ts', 'test/helpers/claude-pty-runner.ts'],
  'ship-section-loading':        ['ship/**', 'scripts/resolvers/sections.ts', 'scripts/gen-skill-docs.ts', 'test/helpers/required-reads.ts', 'test/helpers/transcript-section-logger.ts', 'test/helpers/claude-pty-runner.ts'],
+  'plan-ceo-section-loading':    ['plan-ceo-review/**', 'scripts/resolvers/sections.ts', 'scripts/gen-skill-docs.ts', 'test/helpers/required-reads.ts', 'test/helpers/transcript-section-logger.ts', 'test/helpers/claude-pty-runner.ts'],
  'autoplan-chain-pty':          ['autoplan/**', 'plan-ceo-review/**', 'plan-design-review/**', 'plan-eng-review/**', 'plan-devex-review/**', 'test/fixtures/plans/ui-heavy-feature.md', 'test/helpers/claude-pty-runner.ts'],
  'e2e-harness-audit':            ['plan-ceo-review/**', 'plan-eng-review/**', 'plan-design-review/**', 'plan-devex-review/**', 'scripts/resolvers/preamble/generate-completion-status.ts', 'test/helpers/agent-sdk-runner.ts', 'test/helpers/claude-pty-runner.ts'],

@@ -510,6 +511,7 @@ export const E2E_TIERS: Record<string, 'gate' | 'periodic'> = {
  'budget-regression-pty':     'gate',       // free, library-only assertion
  'ship-idempotency-pty':      'periodic',   // ~$3/run, real /ship in plan mode
  'ship-section-loading':      'periodic',   // ~$3/run, real /ship; asserts section reads
+  'plan-ceo-section-loading':  'periodic',   // ~$3-5/run, real /plan-ceo-review; asserts section read
  'autoplan-chain-pty':        'periodic',   // ~$8/run, all 3 phases sequential

  // Per-finding count + review-report-at-bottom — periodic because each
@@ -8,6 +8,14 @@
 *
 * Also pins the PASSIVE-manifest contract (CM2 / v2_PLAN.md:663): manifest entries
 * carry only id/file/title/trigger — no machine predicate (applies_when/required_for).
+ *
+ * Generalized for every carved skill (v2 plan Phase B). Carved skills are
+ * discovered dynamically (any top-level dir with sections/manifest.json), so a new
+ * carve is covered the moment its manifest lands — no edit here. Per Codex
+ * outside-voice P2, each skill's manifest + dir listing is read INSIDE its own
+ * describe case (not at module top), so a carve-in-progress (manifest added before
+ * the .md is generated) fails only that skill's generated-.md assertion instead of
+ * crashing the whole module, and the suite never silently stays ship-only.
 */

 import { describe, test, expect } from 'bun:test';
@@ -15,63 +23,86 @@ import * as fs from 'fs';
 import * as path from 'path';

 const ROOT = path.resolve(import.meta.dir, '..');
-const SHIP_SECTIONS = path.join(ROOT, 'ship', 'sections');
-const manifest = JSON.parse(fs.readFileSync(path.join(SHIP_SECTIONS, 'manifest.json'), 'utf-8'));

-const sectionTmpls = fs.readdirSync(SHIP_SECTIONS).filter(f => f.endsWith('.md.tmpl'));
-const sectionMds = fs.readdirSync(SHIP_SECTIONS).filter(f => f.endsWith('.md') && !f.endsWith('.md.tmpl'));
+/** Every top-level skill dir that owns a sections/manifest.json. */
+function discoverCarvedSkills(): string[] {
+  return fs
+    .readdirSync(ROOT, { withFileTypes: true })
+    .filter(d => d.isDirectory())
+    .map(d => d.name)
+    .filter(name => fs.existsSync(path.join(ROOT, name, 'sections', 'manifest.json')))
+    .sort();
+}
+
+const CARVED_SKILLS = discoverCarvedSkills();

 describe('section manifest ↔ filesystem consistency', () => {
-  test('manifest parses with skill + sections array', () => {
-    expect(manifest.skill).toBe('ship');
-    expect(Array.isArray(manifest.sections)).toBe(true);
-    expect(manifest.sections.length).toBeGreaterThan(0);
+  test('the known carved skills are discovered', () => {
+    // Tripwire: if a carve regresses (manifest deleted) this catches it.
+    expect(CARVED_SKILLS).toContain('ship');
+    expect(CARVED_SKILLS).toContain('plan-ceo-review');
  });

-  test('every manifest entry has a .md.tmpl source AND a generated .md', () => {
-    for (const s of manifest.sections) {
-      expect(fs.existsSync(path.join(SHIP_SECTIONS, `${s.file}.tmpl`))).toBe(true);
-      expect(fs.existsSync(path.join(SHIP_SECTIONS, s.file))).toBe(true);
-    }
-  });
+  for (const skill of CARVED_SKILLS) {
+    describe(skill, () => {
+      // Codex P2: computed per-skill-case, not at module load.
+      const sectionsDir = path.join(ROOT, skill, 'sections');
+      const manifest = JSON.parse(fs.readFileSync(path.join(sectionsDir, 'manifest.json'), 'utf-8'));
+      const sectionTmpls = fs.readdirSync(sectionsDir).filter(f => f.endsWith('.md.tmpl'));
+      const sectionMds = fs.readdirSync(sectionsDir).filter(f => f.endsWith('.md') && !f.endsWith('.md.tmpl'));

-  test('manifest is PASSIVE — no applies_when / required_for predicate (CM2)', () => {
-    for (const s of manifest.sections) {
-      expect(s).not.toHaveProperty('applies_when');
-      expect(s).not.toHaveProperty('required_for');
-      // The allowed passive shape:
-      expect(typeof s.id).toBe('string');
-      expect(typeof s.file).toBe('string');
-      expect(typeof s.title).toBe('string');
-      expect(typeof s.trigger).toBe('string');
-    }
-  });
+      test('manifest parses with skill + sections array', () => {
+        expect(manifest.skill).toBe(skill);
+        expect(Array.isArray(manifest.sections)).toBe(true);
+        expect(manifest.sections.length).toBeGreaterThan(0);
+      });

-  test('no generated orphan: every sections/X.md has a sections/X.md.tmpl → FAIL', () => {
-    const orphans = sectionMds.filter(md => !sectionTmpls.includes(`${md}.tmpl`));
-    expect(orphans).toEqual([]);
-  });
+      test('every manifest entry has a .md.tmpl source AND a generated .md', () => {
+        for (const s of manifest.sections) {
+          expect(fs.existsSync(path.join(sectionsDir, `${s.file}.tmpl`))).toBe(true);
+          expect(fs.existsSync(path.join(sectionsDir, s.file))).toBe(true);
+        }
+      });

-  test('no hand-edited generated file: every sections/X.md has the AUTO-GENERATED header → FAIL', () => {
-    for (const md of sectionMds) {
-      const head = fs.readFileSync(path.join(SHIP_SECTIONS, md), 'utf-8').slice(0, 120);
-      expect(head).toContain('AUTO-GENERATED');
-    }
-  });
+      test('manifest is PASSIVE — no applies_when / required_for predicate (CM2)', () => {
+        for (const s of manifest.sections) {
+          expect(s).not.toHaveProperty('applies_when');
+          expect(s).not.toHaveProperty('required_for');
+          // The allowed passive shape:
+          expect(typeof s.id).toBe('string');
+          expect(typeof s.file).toBe('string');
+          expect(typeof s.title).toBe('string');
+          expect(typeof s.trigger).toBe('string');
+        }
+      });

-  test('manifest orphan check (WARN in v2.0): every .md.tmpl is listed', () => {
-    const listed = new Set(manifest.sections.map((s: { file: string }) => `${s.file}.tmpl`));
-    const unlisted = sectionTmpls.filter(t => !listed.has(t));
-    if (unlisted.length > 0) {
-      // v2_PLAN.md: WARN now, FAIL in v2.1. Surface, don't fail the build yet.
-      // eslint-disable-next-line no-console
-      console.warn(`[section-manifest] manifest orphan(s) (not in manifest.json): ${unlisted.join(', ')}`);
-    }
-    expect(unlisted.length).toBeLessThanOrEqual(unlisted.length); // always passes; WARN only
-  });
+      test('no generated orphan: every sections/X.md has a sections/X.md.tmpl → FAIL', () => {
+        const orphans = sectionMds.filter(md => !sectionTmpls.includes(`${md}.tmpl`));
+        expect(orphans).toEqual([]);
+      });

-  test('section ids are unique', () => {
-    const ids = manifest.sections.map((s: { id: string }) => s.id);
-    expect(new Set(ids).size).toBe(ids.length);
-  });
+      test('no hand-edited generated file: every sections/X.md has the AUTO-GENERATED header → FAIL', () => {
+        for (const md of sectionMds) {
+          const head = fs.readFileSync(path.join(sectionsDir, md), 'utf-8').slice(0, 120);
+          expect(head).toContain('AUTO-GENERATED');
+        }
+      });
+
+      test('manifest orphan check (WARN in v2.0): every .md.tmpl is listed', () => {
+        const listed = new Set(manifest.sections.map((s: { file: string }) => `${s.file}.tmpl`));
+        const unlisted = sectionTmpls.filter(t => !listed.has(t));
+        if (unlisted.length > 0) {
+          // v2_PLAN.md: WARN now, FAIL in v2.1. Surface, don't fail the build yet.
+          // eslint-disable-next-line no-console
+          console.warn(`[section-manifest] ${skill} manifest orphan(s) (not in manifest.json): ${unlisted.join(', ')}`);
+        }
+        expect(unlisted.length).toBeLessThanOrEqual(unlisted.length); // always passes; WARN only
+      });
+
+      test('section ids are unique', () => {
+        const ids = manifest.sections.map((s: { id: string }) => s.id);
+        expect(new Set(ids).size).toBe(ids.length);
+      });
+    });
+  }
 });
@@ -0,0 +1,82 @@
+/**
+ * plan-ceo-review carve — static ordering guard (GATE tier, free, deterministic).
+ *
+ * This is the per-PR mechanical backstop for the v2-plan Phase B carve of
+ * plan-ceo-review (Codex outside-voice P2). The periodic real-PTY E2E
+ * (skill-e2e-plan-ceo-review-section-loading.test.ts) is the behavioral proof,
+ * but it runs weekly and costs money. This file runs on every `bun test` and
+ * fails CI the moment the carve's structural invariants break:
+ *
+ *  1. The skeleton points at the section with a STOP-Read directive, and that
+ *     directive sits AFTER Step 0 (scope + mode) — so the conversational Step 0
+ *     stays in the always-loaded skeleton, never stranded in the on-demand file.
+ *  2. The heavy review body (Sections 1-11) is NOT in the skeleton — it moved to
+ *     the section. A regression that inlines it back would re-bloat the skeleton.
+ *  3. The review report writer ("GSTACK REVIEW REPORT") lives in the section, and
+ *     the blocking EXIT PLAN MODE GATE that verifies it lives in the skeleton
+ *     AFTER the STOP — so the gate fires once the section work returns.
+ *  4. Nothing review-governing sits in the skeleton below the STOP (Codex P1):
+ *     no "Section N", no "## Mode Quick Reference", no "## Formatting Rules".
+ */
+
+import { describe, test, expect } from 'bun:test';
+import * as fs from 'fs';
+import * as path from 'path';
+
+const ROOT = path.resolve(import.meta.dir, '..');
+const SKELETON = path.join(ROOT, 'plan-ceo-review', 'SKILL.md');
+const SECTION = path.join(ROOT, 'plan-ceo-review', 'sections', 'review-sections.md');
+
+describe('plan-ceo-review carve — static ordering', () => {
+  const skeleton = fs.readFileSync(SKELETON, 'utf-8');
+  const section = fs.readFileSync(SECTION, 'utf-8');
+
+  // Index into the skeleton, -1 if absent.
+  const at = (needle: string): number => skeleton.indexOf(needle);
+
+  const STEP0 = '## Step 0: Nuclear Scope Challenge + Mode Selection';
+  const STOP = 'sections/review-sections.md'; // appears in the index row + STOP directive
+  const GATE = 'GSTACK REVIEW REPORT';
+
+  test('skeleton emits a STOP-Read directive pointing at the section', () => {
+    expect(skeleton).toContain('> **STOP.**');
+    expect(skeleton).toContain('plan-ceo-review/sections/review-sections.md');
+    expect(skeleton).toContain('## Section index — Read each section when its situation applies');
+  });
+
+  test('Step 0 (scope + mode) stays in the skeleton, BEFORE the STOP', () => {
+    const step0 = at(STEP0);
+    const stop = skeleton.indexOf('> **STOP.**');
+    expect(step0).toBeGreaterThan(-1);
+    expect(stop).toBeGreaterThan(step0); // STOP fires only after Step 0
+  });
+
+  test('the heavy review body (Sections 1-11) is NOT in the skeleton', () => {
+    expect(skeleton).not.toContain('### Section 1: Architecture Review');
+    expect(skeleton).not.toContain('### Section 11:');
+    // ...it lives in the section instead.
+    expect(section).toContain('### Section 1: Architecture Review');
+    expect(section).toContain('### Section 11:');
+  });
+
+  test('nothing review-governing sits in the skeleton below the STOP (Codex P1)', () => {
+    // Mode Quick Reference + Formatting Rules govern review-time behavior and must
+    // travel with the section, not be stranded below the STOP in the skeleton.
+    expect(skeleton).not.toContain('## Mode Quick Reference');
+    expect(skeleton).not.toContain('## Formatting Rules');
+    expect(section).toContain('## Mode Quick Reference');
+  });
+
+  test('review report writer lives in the section; the EXIT PLAN MODE GATE stays in the skeleton AFTER the STOP', () => {
+    // The report itself is produced inside the section work...
+    expect(section).toContain(GATE);
+    // ...and the blocking gate that verifies it is the last thing the skeleton runs.
+    const stop = skeleton.indexOf('> **STOP.**');
+    const gate = skeleton.lastIndexOf(GATE);
+    expect(gate).toBeGreaterThan(stop);
+  });
+
+  test('the section is generated, not hand-edited', () => {
+    expect(section.slice(0, 120)).toContain('AUTO-GENERATED');
+  });
+});
@@ -0,0 +1,191 @@
+/**
+ * /plan-ceo-review section-loading E2E (periodic, paid, real-PTY) — v2 plan
+ * Phase B carve backstop. The per-PR guard is the free static test
+ * skill-ceo-section-ordering.test.ts; THIS is the behavioral proof that a real
+ * agent actually Reads the carved section instead of working from memory.
+ *
+ * After the carve, plan-ceo-review is a skeleton whose single STOP-Read directive
+ * (fired after Step 0 mode selection) points at sections/review-sections.md. This
+ * test runs the REAL /plan-ceo-review skill in plan mode against a fixture branch
+ * that has a plan worth reviewing, drives Step 0 to HOLD SCOPE (the simplest mode
+ * that still requires all 11 review sections), and asserts the agent Read
+ * review-sections.md before producing the review report.
+ *
+ * Codex outside-voice P1 fixes vs the naive port of the ship test:
+ *  - REFRESH THE INSTALL FIRST. The skill loads from the installed copy at
+ *    ~/.claude/skills/gstack/plan-ceo-review (a real copy on dev machines, fresh
+ *    on CI). A test that didn't refresh would assert against the pre-carve
+ *    monolith and trivially "pass" with zero section reads. beforeAll copies the
+ *    freshly-generated skeleton + sections into the install; afterAll restores the
+ *    prior state so a local run doesn't leave the active skill mutated.
+ *  - HANDLE THE FULL STEP 0. plan-ceo's Step 0 can fire a system audit, WebSearch,
+ *    and several AskUserQuestion calls before mode selection — the answer loop
+ *    replies to every permission dialog / numbered list, not just two.
+ *
+ * Plan-mode framing keeps the agent from editing/committing. Cost: ~$3-5/run.
+ * Periodic tier.
+ */
+
+import { describe, test, expect } from 'bun:test';
+import { spawnSync } from 'child_process';
+import * as fs from 'fs';
+import * as path from 'path';
+import * as os from 'os';
+import {
+  launchClaudePty,
+  isPermissionDialogVisible,
+  isNumberedOptionListVisible,
+} from './helpers/claude-pty-runner';
+
+const shouldRun = !!process.env.EVALS && process.env.EVALS_TIER === 'periodic';
+const describeE2E = shouldRun ? describe : describe.skip;
+
+const REPO_ROOT = path.resolve(import.meta.dir, '..');
+const INSTALL_DIR = path.join(os.homedir(), '.claude', 'skills', 'gstack', 'plan-ceo-review');
+
+// Sections every plan-ceo-review run must consult after Step 0.
+const REQUIRED_SECTIONS = ['review-sections.md'];
+
+/** Copy the freshly-generated skeleton + sections into the installed skill so the
+ *  PTY agent loads the carve under test. Returns a restore() that puts the install
+ *  back exactly as it was (content of SKILL.md + presence/content of sections/). */
+function refreshInstall(): () => void {
+  const repoSkill = path.join(REPO_ROOT, 'plan-ceo-review', 'SKILL.md');
+  const repoSections = path.join(REPO_ROOT, 'plan-ceo-review', 'sections');
+  const installSkill = path.join(INSTALL_DIR, 'SKILL.md');
+  const installSections = path.join(INSTALL_DIR, 'sections');
+
+  // Snapshot prior state for restore.
+  const priorSkill = fs.existsSync(installSkill) ? fs.readFileSync(installSkill) : null;
+  const hadSections = fs.existsSync(installSections);
+  const priorSections: Record<string, Buffer> = {};
+  if (hadSections) {
+    for (const f of fs.readdirSync(installSections)) {
+      priorSections[f] = fs.readFileSync(path.join(installSections, f));
+    }
+  }
+
+  // Apply: skeleton + every generated section file (.md) + manifest.
+  fs.mkdirSync(INSTALL_DIR, { recursive: true });
+  fs.copyFileSync(repoSkill, installSkill);
+  fs.mkdirSync(installSections, { recursive: true });
+  for (const f of fs.readdirSync(repoSections)) {
+    if (f.endsWith('.md.tmpl')) continue; // install carries generated files, not templates
+    fs.copyFileSync(path.join(repoSections, f), path.join(installSections, f));
+  }
+
+  return function restore(): void {
+    try {
+      if (priorSkill) fs.writeFileSync(installSkill, priorSkill);
+      if (hadSections) {
+        // Restore the prior section files; drop any we added.
+        for (const f of fs.readdirSync(installSections)) {
+          if (!(f in priorSections)) fs.rmSync(path.join(installSections, f), { force: true });
+        }
+        for (const [f, buf] of Object.entries(priorSections)) {
+          fs.writeFileSync(path.join(installSections, f), buf);
+        }
+      } else {
+        fs.rmSync(installSections, { recursive: true, force: true });
+      }
+    } catch { /* best-effort restore */ }
+  };
+}
+
+/** Fixture: a feature branch with a real change + a plan file worth reviewing. */
+function buildPlanFixture(): { workTree: string; root: string } {
+  const root = fs.mkdtempSync(path.join(os.tmpdir(), 'gstack-ceo-secload-'));
+  const workTree = path.join(root, 'workspace');
+  const bareRemote = path.join(root, 'origin.git');
+  fs.mkdirSync(workTree, { recursive: true });
+  const sh = (cmd: string, cwd: string): void => {
+    const r = spawnSync('bash', ['-c', cmd], { cwd, stdio: 'pipe', timeout: 15_000 });
+    if (r.status !== 0) throw new Error(`fixture setup failed at "${cmd}":\n${r.stderr?.toString()}`);
+  };
+  sh(`git init --bare "${bareRemote}"`, root);
+  sh('git init -b main', workTree);
+  sh('git config user.email "t@t.com" && git config user.name "T" && git config commit.gpgsign false', workTree);
+  fs.writeFileSync(path.join(workTree, 'app.js'), '// base\n');
+  sh('git add -A && git commit -m "chore: initial"', workTree);
+  sh(`git remote add origin "${bareRemote}" && git push -u origin main`, workTree);
+  // Feature branch with a real change + a plan describing it (something to review).
+  sh('git checkout -b feat/cache-layer', workTree);
+  fs.writeFileSync(
+    path.join(workTree, 'PLAN.md'),
+    [
+      '# Plan: add an in-memory cache layer',
+      '',
+      '## Context',
+      'Reads hit the DB on every request. Add a process-local LRU cache in front of',
+      'the read path to cut DB load.',
+      '',
+      '## Approach',
+      '- Wrap the read repository in a cache that stores the last 1000 keys.',
+      '- Invalidate on write.',
+      '',
+      '## Out of scope',
+      'Distributed cache, cross-process coherence.',
+      '',
+    ].join('\n'),
+  );
+  fs.writeFileSync(path.join(workTree, 'app.js'), '// base\nexport function read(k) { return db.get(k); }\n');
+  sh('git add -A && git commit -m "feat: cache layer plan + stub"', workTree);
+  sh('git push -u origin feat/cache-layer', workTree);
+  return { workTree, root };
+}
+
+describeE2E('/plan-ceo-review section-loading E2E (periodic, real-PTY, installed skill)', () => {
+  test(
+    'a real review Reads the carved section before producing the report',
+    async () => {
+      const restore = refreshInstall();
+      const { workTree, root } = buildPlanFixture();
+      const session = await launchClaudePty({
+        permissionMode: 'plan',
+        cwd: workTree,
+        timeoutMs: 900_000,
+        env: { NO_COLOR: '1' },
+      });
+
+      const readSections = new Set<string>();
+      let reportReady = false;
+      try {
+        await Bun.sleep(8000);
+        const since = session.mark();
+        // HOLD SCOPE = simplest mode that still walks all 11 review sections.
+        session.send('/plan-ceo-review review PLAN.md, hold scope\r');
+        const start = Date.now();
+        let lastPermSig = '';
+        while (Date.now() - start < 780_000) {
+          await Bun.sleep(3000);
+          if (session.exited()) break;
+          const visible = session.visibleSince(since);
+          const tail = visible.slice(-1500);
+          // Answer EVERY permission dialog / numbered option list (system audit,
+          // WebSearch, and the several Step 0 questions) by taking option 1.
+          if (isNumberedOptionListVisible(tail) && isPermissionDialogVisible(tail)) {
+            const sig = visible.slice(-500);
+            if (sig !== lastPermSig) { lastPermSig = sig; session.send('1\r'); await Bun.sleep(1500); continue; }
+          }
+          for (const m of visible.matchAll(/sections\/([A-Za-z0-9._-]+\.md)/g)) readSections.add(m[1]);
+          if (/GSTACK REVIEW REPORT|COMPLETION SUMMARY|ready to execute/i.test(visible)) {
+            reportReady = true;
+            break;
+          }
+        }
+      } finally {
+        await session.close();
+        try { fs.rmSync(root, { recursive: true, force: true }); } catch { /* ignore */ }
+        restore();
+      }
+
+      const missing = REQUIRED_SECTIONS.filter(s => !readSections.has(s));
+      expect({ reportReady, read: [...readSections], missing }).toEqual({
+        reportReady: true,
+        read: expect.any(Array),
+        missing: [],
+      });
+    },
+    1_020_000,
+  );
+});
@@ -146,11 +146,14 @@ describe('SKILL.md size budget regression (gate, free)', () => {
   * skill, so this is a comfortable ceiling that still catches accidental
   * mass deletion (e.g., a refactor that strips the body of a skill).
   *
-   * v2.0.0.0 will introduce the sections/ pattern for 5 heavyweights
+   * v2.0.0.0 introduces the sections/ pattern for 5 heavyweights
   * (ship, plan-ceo-review, office-hours, plan-eng-review,
-   * plan-design-review). Those skills will legitimately shrink to ~15 KB
-   * skeletons. When that lands, add them to SECTIONS_EXTRACTED so the floor
-   * relaxes for them.
+   * plan-design-review). Carved so far: ship (skeleton ~83 KB) and
+   * plan-ceo-review (skeleton ~81 KB, down from the 138 KB monolith). Those
+   * skeletons legitimately fall below the 80% body-strip floor, so each carved
+   * skill is added to SECTIONS_EXTRACTED; its union is guarded instead by the
+   * sectioned invariant in parity-harness.ts (minBytes on skeleton+sections).
+   * Add the remaining three here as they carve.
   */
  test('no skill shrinks past 80% of v1.47.0.0 baseline (catches accidental body strip)', () => {
    const baseline: ParityBaseline = JSON.parse(fs.readFileSync(BASELINE_PATH, 'utf-8'));
@@ -160,7 +163,7 @@ describe('SKILL.md size budget regression (gate, free)', () => {
    // because prose moved into sections/*.md. The union size is guarded instead
    // by the sectioned ship invariant in parity-harness.ts (minBytes on the
    // skeleton+sections union), so exempt the skeleton from the body-strip floor.
-    const SECTIONS_EXTRACTED = new Set<string>(['ship']);
+    const SECTIONS_EXTRACTED = new Set<string>(['ship', 'plan-ceo-review']);

    const undershoots: Array<{
      skill: string; beforeBytes: number; afterBytes: number; ratio: number;
@@ -7,14 +7,13 @@ import * as path from 'path';

 const ROOT = path.resolve(import.meta.dir, '..');

-// Carved-skill aware (v2 plan T9): ship is a skeleton SKILL.md + sections/*.md.
-// Read the union so validations of content that moved into a section still hold.
-// `_SHIP_MD` is a distinct path expression so a mechanical read-replace can't
-// recurse into this helper.
-const _SHIP_MD = path.join(ROOT, 'ship', 'SKILL.md');
-function readShipUnion(): string {
-  let t = fs.readFileSync(_SHIP_MD, 'utf-8');
-  const secDir = path.join(ROOT, 'ship', 'sections');
+// Carved-skill aware (v2 plan T9 / Phase B): a carved skill is a skeleton SKILL.md
+// plus sections/*.md. Read the union so validations of content that moved into a
+// section still hold. For an uncarved skill (no sections dir) this is just the
+// skeleton, so readSkillUnion is safe to use everywhere.
+function readSkillUnion(skill: string): string {
+  let t = fs.readFileSync(path.join(ROOT, skill, 'SKILL.md'), 'utf-8');
+  const secDir = path.join(ROOT, skill, 'sections');
  if (fs.existsSync(secDir)) {
    for (const f of fs.readdirSync(secDir).sort()) {
      if (f.endsWith('.md')) t += '\n' + fs.readFileSync(path.join(secDir, f), 'utf-8');
@@ -22,6 +21,9 @@ function readShipUnion(): string {
  }
  return t;
 }
+function readShipUnion(): string {
+  return readSkillUnion('ship');
+}

 describe('SKILL.md command validation', () => {
  test('all $B commands in SKILL.md are valid browse commands', () => {
@@ -548,8 +550,8 @@ describe('TODOS-format.md reference consistency', () => {

  test('skills that write TODOs reference TODOS-format.md', () => {
    const shipContent = readShipUnion();
-    const ceoPlanContent = fs.readFileSync(path.join(ROOT, 'plan-ceo-review', 'SKILL.md'), 'utf-8');
-    const engPlanContent = fs.readFileSync(path.join(ROOT, 'plan-eng-review', 'SKILL.md'), 'utf-8');
+    const ceoPlanContent = readSkillUnion('plan-ceo-review'); // carved: TODOS-format ref moved to section
+    const engPlanContent = readSkillUnion('plan-eng-review');

    expect(shipContent).toContain('TODOS-format.md');
    expect(ceoPlanContent).toContain('TODOS-format.md');
@@ -912,8 +914,10 @@ describe('CEO review mode validation', () => {
  });

  test('has docs/designs promotion section', () => {
-    expect(content).toContain('docs/designs');
-    expect(content).toContain('PROMOTED');
+    // Carved (v2 plan Phase B): the promotion block moved into the review section.
+    const union = readSkillUnion('plan-ceo-review');
+    expect(union).toContain('docs/designs');
+    expect(union).toContain('PROMOTED');
  });

  test('mode quick reference has four columns', () => {
@@ -109,8 +109,10 @@ describe('selectTests', () => {
    // E2E test also depends on plan-ceo-review/** (5-option scope decision
    // regression for the "drop to fit 4 options" failure mode).
    expect(result.selected).toContain('plan-ceo-split-overflow');
-    expect(result.selected.length).toBe(22);
-    expect(result.skipped.length).toBe(Object.keys(E2E_TOUCHFILES).length - 22);
+    // v2 plan Phase B carve: the section-loading E2E depends on plan-ceo-review/**.
+    expect(result.selected).toContain('plan-ceo-section-loading');
+    expect(result.selected.length).toBe(23);
+    expect(result.skipped.length).toBe(Object.keys(E2E_TOUCHFILES).length - 23);
  });

  test('global touchfile triggers ALL tests', () => {