v1.13.0.0 feat: add Claude outside-voice skill (#1212)

* Add Claude outside-voice skill * Fix gbrain config isolation test * Restore Opus fanout overlay nudge * Warn on oversized tracked files * Release v1.13.0.0 * Fix Claude diff temp file handling * Remove Opus fanout overlay nudge
2026-05-01 19:25:10 +02:00 · 2026-04-25 11:52:48 -07:00
parent 6209163900
commit 23c4d7b228
10 changed files with 450 additions and 31 deletions
@@ -1,5 +1,31 @@
 # Changelog

+## [1.13.0.0] - 2026-04-25
+
+## **`/gstack-claude` gives non-Claude hosts a read-only outside voice.**
+
+This release adds the reverse of `/codex`: external hosts can now ask Claude for review, adversarial challenge, or read-only consultation without handing nested Claude mutation tools.
+
+### Added
+
+- `claude/SKILL.md.tmpl`: new external-only `/gstack-claude` skill with `review`, `challenge`, and `consult` modes.
+- Review and challenge mode feed the detected base-branch diff to `claude -p --tools ""` with `--disable-slash-commands`.
+- Consult mode allows only `Read,Grep,Glob`, explicitly disallows `Bash,Edit,Write`, saves `.context/claude-session-id`, and can resume the prior consult session.
+- Claude prompt transport now uses a `/tmp/gstack-claude-prompt-*` file piped over stdin with cleanup.
+- Auth checks require the `claude` CLI plus either `~/.claude/.credentials.json` or `ANTHROPIC_API_KEY`.
+- JSON output parsing extracts `result`, `usage`, `model`, `session_id`, and `is_error`.
+
+### Fixed
+
+- `hosts/claude.ts`: excludes the Claude outside-voice skill from Claude-host generation.
+- `test/brain-sync.test.ts`: the `GSTACK_HOME` isolation test now snapshots and preserves the real config file instead of assuming local machine state.
+- `claude/SKILL.md.tmpl`: uses `mktemp` for diff capture in review/challenge mode instead of a `$$`-based temp path, avoiding collisions across concurrent invocations.
+
+### Changed
+
+- `test/skill-validation.test.ts`: the tracked-file-size check is now advisory. Large fixtures remain allowed in git and are reported as `[size-warning]` instead of failing the suite.
+- `test/gen-skill-docs.test.ts`: generation coverage now asserts external host docs include `gstack-claude/SKILL.md` while Claude host output omits `claude/SKILL.md`.
+
 ## [1.12.2.0] - 2026-04-24

 ## **`/setup-gbrain` polish: PATH parsing, repo init order, MCP user scope.**
@@ -1 +1 @@
-1.12.2.0
+1.13.0.0
@@ -0,0 +1,341 @@
+---
+name: claude
+preamble-tier: 3
+version: 1.0.0
+description: |
+  Claude Code CLI wrapper for non-Claude hosts - three modes. Review: independent
+  diff review via claude -p. Challenge: adversarial failure-mode review. Consult:
+  ask Claude about the repo with read-only file tools. Use when asked for "claude
+  review", "claude challenge", "ask claude", "second opinion from claude", or
+  "outside voice". (gstack)
+triggers:
+  - claude review
+  - claude challenge
+  - ask claude
+allowed-tools:
+  - Bash
+  - Read
+  - AskUserQuestion
+---
+
+{{PREAMBLE}}
+
+{{BASE_BRANCH_DETECT}}
+
+# /claude - Claude Outside Voice
+
+You are running the `/claude` skill from a non-Claude host. This wraps `claude -p`
+to get an independent Claude Code second opinion without allowing nested Claude to
+modify files.
+
+The generated external invocation name is `gstack-claude`.
+
+---
+
+## Step 0: Check Claude CLI
+
+```bash
+CLAUDE_BIN=$(command -v claude 2>/dev/null || echo "")
+[ -z "$CLAUDE_BIN" ] && echo "NOT_FOUND" || echo "FOUND: $CLAUDE_BIN"
+```
+
+If `NOT_FOUND`, stop and tell the user:
+"Claude CLI not found. Install Claude Code, then re-run this skill."
+
+Check auth:
+
+```bash
+if [ -f "$HOME/.claude/.credentials.json" ] || [ -n "${ANTHROPIC_API_KEY:-}" ]; then
+  echo "AUTH_FOUND"
+else
+  echo "AUTH_MISSING"
+fi
+```
+
+If `AUTH_MISSING`, stop and tell the user:
+"No Claude authentication found. Run `claude` interactively to log in, or export `ANTHROPIC_API_KEY`, then re-run this skill."
+
+---
+
+## Safety Boundary
+
+Nested Claude must stay focused on the user's repository and must not run gstack
+skills from inside this skill.
+
+All `claude -p` calls MUST include:
+
+- `--disable-slash-commands`
+- Review/challenge: `--tools ""`
+- Consult: `--allowedTools Read,Grep,Glob --disallowedTools Bash,Edit,Write`
+
+Never pass `Bash`, `Edit`, or `Write` to nested Claude in this skill.
+
+All prompts MUST be written to a temp file and fed through stdin. Never interpolate
+user text directly into the shell command.
+
+---
+
+## Step 1: Detect Mode
+
+Parse the user's input:
+
+1. `/claude review` or `/claude review <instructions>` - **Review mode** (Step 2A)
+2. `/claude challenge` or `/claude challenge <focus>` - **Challenge mode** (Step 2B)
+3. `/claude` with no arguments, or `/claude <anything else>` - **Consult mode** (Step 2C)
+
+If no mode is obvious and a diff exists, ask whether to review, challenge, or consult.
+
+---
+
+## Shared Helpers
+
+Use these shell snippets in every mode.
+
+Create temp files:
+
+```bash
+PROMPT_FILE=$(mktemp /tmp/gstack-claude-prompt-XXXXXX)
+RESP_FILE=$(mktemp /tmp/gstack-claude-response-XXXXXX.json)
+ERR_FILE=$(mktemp /tmp/gstack-claude-error-XXXXXX.txt)
+```
+
+Cleanup at the end of every mode:
+
+```bash
+rm -f "$PROMPT_FILE" "$RESP_FILE" "$ERR_FILE"
+```
+
+Parse JSON output:
+
+```bash
+python3 - "$RESP_FILE" <<'PY'
+import json, sys
+path = sys.argv[1]
+try:
+    obj = json.load(open(path))
+except Exception as exc:
+    print(f"CLAUDE_JSON_PARSE_ERROR: {exc}")
+    sys.exit(0)
+
+if obj.get("is_error"):
+    print("CLAUDE_ERROR: true")
+
+result = obj.get("result") or obj.get("response") or ""
+if result:
+    print(result)
+
+usage = obj.get("usage") or {}
+input_tokens = usage.get("input_tokens", 0) or 0
+output_tokens = usage.get("output_tokens", 0) or 0
+cache_read = usage.get("cache_read_input_tokens", 0) or 0
+model = obj.get("model") or "unknown"
+session_id = obj.get("session_id") or ""
+
+print(f"\nTokens: input={input_tokens} output={output_tokens} cache_read={cache_read} | Model: {model}")
+if session_id:
+    print(f"SESSION_ID:{session_id}")
+PY
+```
+
+If stderr contains `auth`, `login`, or `unauthorized`, tell the user:
+"Claude authentication failed. Run `claude` interactively to authenticate or export `ANTHROPIC_API_KEY`."
+
+---
+
+## Step 2A: Review Mode
+
+Review the current branch diff with nested Claude in tool-less mode.
+
+1. Fetch base and capture diff:
+
+```bash
+_REPO_ROOT=$(git rev-parse --show-toplevel) || { echo "ERROR: not in a git repo" >&2; exit 1; }
+cd "$_REPO_ROOT"
+DIFF_FILE=$(mktemp /tmp/gstack-claude-diff-XXXXXX.patch)
+git fetch origin <base> --quiet 2>/dev/null || true
+git diff "origin/<base>" > "$DIFF_FILE" 2>/dev/null || git diff "<base>" > "$DIFF_FILE"
+```
+
+If the diff file is empty, stop and say:
+"Nothing to review - no changes against the base branch."
+
+2. Write the prompt file:
+
+```bash
+cat > "$PROMPT_FILE" <<'EOF'
+You are a brutally honest Claude Code reviewer. Review this git diff for bugs,
+production failure modes, security issues, missing tests, and maintainability
+problems. Be direct. No compliments. Reference files and changed code where possible.
+
+Additional user instructions, if any:
+<custom review instructions>
+
+DIFF:
+EOF
+cat "$DIFF_FILE" >> "$PROMPT_FILE"
+```
+
+3. Run Claude:
+
+```bash
+cat "$PROMPT_FILE" | claude -p --output-format json --disable-slash-commands --tools "" > "$RESP_FILE" 2>"$ERR_FILE"
+```
+
+4. Present the parsed output:
+
+```
+CLAUDE SAYS (code review):
+============================================================
+<parsed result from RESP_FILE>
+============================================================
+```
+
+5. Cleanup:
+
+```bash
+rm -f "$DIFF_FILE" "$PROMPT_FILE" "$RESP_FILE" "$ERR_FILE"
+```
+
+---
+
+## Step 2B: Challenge Mode
+
+Run an adversarial failure-mode review with nested Claude in tool-less mode.
+
+1. Capture the diff using the same diff commands from Review mode.
+
+2. Write the prompt:
+
+```bash
+cat > "$PROMPT_FILE" <<'EOF'
+You are an adversarial Claude Code reviewer. Try to break this change before users do.
+Find edge cases, race conditions, security holes, resource leaks, silent data
+corruption, bad error handling, and operational failure modes. Be thorough. No
+compliments. If the user provided a focus area, prioritize it.
+
+Focus area, if any:
+<focus>
+
+DIFF:
+EOF
+cat "$DIFF_FILE" >> "$PROMPT_FILE"
+```
+
+3. Run Claude:
+
+```bash
+cat "$PROMPT_FILE" | claude -p --output-format json --disable-slash-commands --tools "" > "$RESP_FILE" 2>"$ERR_FILE"
+```
+
+4. Present the parsed output:
+
+```
+CLAUDE SAYS (adversarial challenge):
+============================================================
+<parsed result from RESP_FILE>
+============================================================
+```
+
+5. Cleanup:
+
+```bash
+rm -f "$DIFF_FILE" "$PROMPT_FILE" "$RESP_FILE" "$ERR_FILE"
+```
+
+---
+
+## Step 2C: Consult Mode
+
+Ask Claude about the repository. Consult mode may inspect files, but only with
+read-only tools.
+
+1. Check for an existing Claude session:
+
+```bash
+cat .context/claude-session-id 2>/dev/null || echo "NO_SESSION"
+```
+
+If a session exists, ask the user whether to continue it or start fresh.
+
+2. Write the prompt:
+
+```bash
+cat > "$PROMPT_FILE" <<'EOF'
+You are Claude Code acting as an independent outside voice for this repository.
+Answer the user's question directly. You may inspect repository files with Read,
+Grep, and Glob only. Do not use Bash. Do not edit or write files. Do not invoke
+slash commands or gstack skills.
+
+USER QUESTION:
+<user prompt>
+EOF
+```
+
+3. Run Claude.
+
+For a new session:
+
+```bash
+cat "$PROMPT_FILE" | claude -p --output-format json --disable-slash-commands --allowedTools Read,Grep,Glob --disallowedTools Bash,Edit,Write > "$RESP_FILE" 2>"$ERR_FILE"
+```
+
+For a resumed session:
+
+```bash
+cat "$PROMPT_FILE" | claude -p --resume "<session-id>" --output-format json --disable-slash-commands --allowedTools Read,Grep,Glob --disallowedTools Bash,Edit,Write > "$RESP_FILE" 2>"$ERR_FILE"
+```
+
+4. Parse and save the session id:
+
+```bash
+SESSION_ID=$(python3 - "$RESP_FILE" <<'PY'
+import json, sys
+try:
+    obj = json.load(open(sys.argv[1]))
+    print(obj.get("session_id") or "")
+except Exception:
+    print("")
+PY
+)
+if [ -n "$SESSION_ID" ]; then
+  mkdir -p .context
+  printf "%s\n" "$SESSION_ID" > .context/claude-session-id
+fi
+```
+
+5. Present the parsed output:
+
+```
+CLAUDE SAYS (consult):
+============================================================
+<parsed result from RESP_FILE>
+============================================================
+Session saved - run /claude again to continue this conversation.
+```
+
+6. Cleanup:
+
+```bash
+rm -f "$PROMPT_FILE" "$RESP_FILE" "$ERR_FILE"
+```
+
+---
+
+## Error Handling
+
+- **Binary not found:** Stop with install instructions.
+- **Auth missing:** Stop with login/API key instructions.
+- **Auth failure from stderr:** Surface the stderr line and ask the user to re-authenticate.
+- **JSON parse failure:** Show raw stdout from `$RESP_FILE` and stderr from `$ERR_FILE`.
+- **Empty response:** Tell the user "Claude returned no response. Check stderr for errors."
+- **Resume failure:** Delete `.context/claude-session-id` and retry with a fresh session.
+
+---
+
+## Important Rules
+
+- Nested Claude is read-only in consult mode and tool-less in review/challenge.
+- Always include `--disable-slash-commands`.
+- Never pass nested Claude `Bash`, `Edit`, or `Write`.
+- Never interpolate user text into a shell command.
+- Present Claude's response faithfully, then add any host-agent synthesis after it.
@@ -19,7 +19,7 @@ const claude: HostConfig = {

  generation: {
    generateMetadata: false,
-    skipSkills: [],
+    skipSkills: ['claude'],  // Claude outside-voice skill is for non-Claude hosts
  },

  pathRewrites: [],  // Claude is the primary host — no rewrites needed
@@ -1,6 +1,6 @@
 {
  "name": "gstack",
-  "version": "1.12.2.0",
+  "version": "1.13.0.0",
  "description": "Garry's Stack — Claude Code skills + fast headless browser. One repo, one install, entire AI engineering workflow.",
  "license": "MIT",
  "type": "module",
@@ -7,7 +7,7 @@
 *   3. The SDK event stream contains the types we assume (system init, assistant,
 *      result) with the fields we destructure.
 *   4. `scripts/resolvers/model-overlay.ts` resolves `{{INHERIT:claude}}` against
- *      `opus-4-7.md` AND the resolved text contains the "Fan out explicitly" nudge.
+ *      `opus-4-7.md` with no unresolved inheritance directives.
 *   5. A local `claude` binary exists at `which claude` so binary pinning is possible.
 *
 * Run: bun run scripts/preflight-agent-sdk.ts
@@ -28,7 +28,7 @@ async function main() {
    failures.push(msg);
  };

-  // 1. Overlay resolver + fanout nudge text
+  // 1. Overlay resolver
  console.log('1. Overlay resolver');
  const resolved = readOverlay('opus-4-7');
  if (!resolved) {
@@ -40,11 +40,6 @@ async function main() {
    } else {
      pass('no unresolved INHERIT directives');
    }
-    if (!/Fan out explicitly/i.test(resolved)) {
-      fail('resolved overlay does not contain "Fan out explicitly" text');
-    } else {
-      pass('fanout nudge text present in resolved overlay');
-    }
  }

  // 2. Local claude binary exists
@@ -97,11 +97,13 @@ describe('gstack-config gbrain keys', () => {
  });

  test('GSTACK_HOME overrides real config dir', () => {
-    run(['gstack-config', 'set', 'gbrain_sync_mode', 'full']);
-    // Real ~/.gstack/config.yaml must NOT have been touched.
+    // Real ~/.gstack/config.yaml must not change, regardless of what it
+    // already contains on the developer's machine.
    const realConfig = path.join(os.homedir(), '.gstack', 'config.yaml');
-    const real = fs.existsSync(realConfig) ? fs.readFileSync(realConfig, 'utf-8') : '';
-    expect(real).not.toContain('gbrain_sync_mode: full');
+    const before = fs.existsSync(realConfig) ? fs.readFileSync(realConfig, 'utf-8') : null;
+    run(['gstack-config', 'set', 'gbrain_sync_mode', 'full']);
+    const after = fs.existsSync(realConfig) ? fs.readFileSync(realConfig, 'utf-8') : null;
+    expect(after).toBe(before);
  });
 });

@@ -56,6 +56,9 @@ const ALL_SKILLS = (() => {
  return skills;
 })();

+const CLAUDE_SKIPPED_SKILL_DIRS = new Set(['claude']);
+const CLAUDE_GENERATED_SKILLS = ALL_SKILLS.filter(skill => !CLAUDE_SKIPPED_SKILL_DIRS.has(skill.dir));
+
 describe('gen-skill-docs', () => {
  test('generated SKILL.md contains all command categories', () => {
    const content = fs.readFileSync(path.join(ROOT, 'SKILL.md'), 'utf-8');
@@ -114,7 +117,7 @@ describe('gen-skill-docs', () => {
  });

  test('every skill has a generated SKILL.md with auto-generated header', () => {
-    for (const skill of ALL_SKILLS) {
+    for (const skill of CLAUDE_GENERATED_SKILLS) {
      const mdPath = path.join(ROOT, skill.dir, 'SKILL.md');
      expect(fs.existsSync(mdPath)).toBe(true);
      const content = fs.readFileSync(mdPath, 'utf-8');
@@ -124,7 +127,7 @@ describe('gen-skill-docs', () => {
  });

  test('every generated SKILL.md has valid YAML frontmatter', () => {
-    for (const skill of ALL_SKILLS) {
+    for (const skill of CLAUDE_GENERATED_SKILLS) {
      const content = fs.readFileSync(path.join(ROOT, skill.dir, 'SKILL.md'), 'utf-8');
      expect(content.startsWith('---\n')).toBe(true);
      expect(content).toContain('name:');
@@ -133,13 +136,18 @@ describe('gen-skill-docs', () => {
  });

  test(`every generated SKILL.md description stays within ${MAX_SKILL_DESCRIPTION_LENGTH} chars`, () => {
-    for (const skill of ALL_SKILLS) {
+    for (const skill of CLAUDE_GENERATED_SKILLS) {
      const content = fs.readFileSync(path.join(ROOT, skill.dir, 'SKILL.md'), 'utf-8');
      const description = extractDescription(content);
      expect(description.length).toBeLessThanOrEqual(MAX_SKILL_DESCRIPTION_LENGTH);
    }
  });

+  test('Claude outside-voice skill is not generated for Claude host', () => {
+    expect(fs.existsSync(path.join(ROOT, 'claude', 'SKILL.md.tmpl'))).toBe(true);
+    expect(fs.existsSync(path.join(ROOT, 'claude', 'SKILL.md'))).toBe(false);
+  });
+
  test(`every Codex SKILL.md description stays within ${MAX_SKILL_DESCRIPTION_LENGTH} chars`, () => {
    const agentsDir = path.join(ROOT, '.agents', 'skills');
    if (!fs.existsSync(agentsDir)) return; // skip if not generated
@@ -186,7 +194,7 @@ describe('gen-skill-docs', () => {
    expect(result.exitCode).toBe(0);
    const output = result.stdout.toString();
    // Every skill should be FRESH
-    for (const skill of ALL_SKILLS) {
+    for (const skill of CLAUDE_GENERATED_SKILLS) {
      const file = skill.dir === '.' ? 'SKILL.md' : `${skill.dir}/SKILL.md`;
      expect(output).toContain(`FRESH: ${file}`);
    }
@@ -194,7 +202,7 @@ describe('gen-skill-docs', () => {
  });

  test('no generated SKILL.md contains unresolved placeholders', () => {
-    for (const skill of ALL_SKILLS) {
+    for (const skill of CLAUDE_GENERATED_SKILLS) {
      const content = fs.readFileSync(path.join(ROOT, skill.dir, 'SKILL.md'), 'utf-8');
      const unresolved = content.match(/\{\{[A-Z_]+\}\}/g);
      expect(unresolved).toBeNull();
@@ -264,7 +272,7 @@ describe('gen-skill-docs', () => {
  });

  test('preamble .pending-* glob is zsh-safe (uses find, not shell glob)', () => {
-    for (const skill of ALL_SKILLS) {
+    for (const skill of CLAUDE_GENERATED_SKILLS) {
      const content = fs.readFileSync(path.join(ROOT, skill.dir, 'SKILL.md'), 'utf-8');
      if (!content.includes('.pending-')) continue;
      // Must NOT have a bare shell glob ".pending-*" outside of find's -name argument
@@ -275,7 +283,7 @@ describe('gen-skill-docs', () => {
  });

  test('bash blocks with shell globs are zsh-safe (setopt guard or find)', () => {
-    for (const skill of ALL_SKILLS) {
+    for (const skill of CLAUDE_GENERATED_SKILLS) {
      const content = fs.readFileSync(path.join(ROOT, skill.dir, 'SKILL.md'), 'utf-8');
      const bashBlocks = [...content.matchAll(/```bash\n([\s\S]*?)```/g)].map(m => m[1]);

@@ -1603,6 +1611,20 @@ describe('Codex generation (--host codex)', () => {
    expect(fs.existsSync(path.join(AGENTS_DIR, 'gstack-codex'))).toBe(false);
  });

+  test('Codex output includes Claude outside-voice skill with read-only boundary', () => {
+    const content = fs.readFileSync(path.join(AGENTS_DIR, 'gstack-claude', 'SKILL.md'), 'utf-8');
+    expect(content).toContain('claude -p');
+    expect(content).toContain('mktemp /tmp/gstack-claude-prompt-');
+    expect(content).toContain('mktemp /tmp/gstack-claude-diff-');
+    expect(content).not.toContain('/tmp/gstack-claude-diff-$$');
+    expect(content).toContain('cat "$PROMPT_FILE" | claude -p');
+    expect(content).toContain('--disable-slash-commands');
+    expect(content).toContain('--tools ""');
+    expect(content).toContain('--allowedTools Read,Grep,Glob');
+    expect(content).toContain('--disallowedTools Bash,Edit,Write');
+    expect(content).toContain('is_error');
+  });
+
  test('Codex review step stripped from Codex-host ship and review', () => {
    const shipContent = fs.readFileSync(path.join(AGENTS_DIR, 'gstack-ship', 'SKILL.md'), 'utf-8');
    expect(shipContent).not.toContain('codex review --base');
@@ -1773,7 +1795,7 @@ describe('Codex generation (--host codex)', () => {
  });

  test('Claude output unchanged: all Claude skills have zero Codex paths', () => {
-    for (const skill of ALL_SKILLS) {
+    for (const skill of CLAUDE_GENERATED_SKILLS) {
      const content = fs.readFileSync(path.join(ROOT, skill.dir, 'SKILL.md'), 'utf-8');
      // pair-agent legitimately documents how Codex agents store credentials.
      // codex + autoplan document the Codex CLI auth file (~/.codex/auth.json)
@@ -1996,6 +2018,16 @@ describe('Parameterized host smoke tests', () => {
        }
      });

+      test('generates Claude outside-voice skill for external hosts', () => {
+        const skillMd = path.join(hostDir, 'gstack-claude', 'SKILL.md');
+        expect(fs.existsSync(skillMd)).toBe(true);
+        const content = fs.readFileSync(skillMd, 'utf-8');
+        expect(content).toContain('claude -p');
+        expect(content).toContain('--disable-slash-commands');
+        expect(content).toContain('--allowedTools Read,Grep,Glob');
+        expect(content).toContain('--disallowedTools Bash,Edit,Write');
+      });
+
      test('--dry-run freshness check passes', () => {
        const result = Bun.spawnSync(
          ['bun', 'run', 'scripts/gen-skill-docs.ts', '--host', hostConfig.name, '--dry-run'],
@@ -82,9 +82,8 @@ describe('Opus 4.7 overlay — pacing directive', () => {
    expect(out).toMatch(/user approval/i);
  });

-  test('resolved overlay keeps Fan out / Effort-match / Literal interpretation nudges', () => {
+  test('resolved overlay keeps Effort-match / Literal interpretation nudges', () => {
    const out = generateModelOverlay(makeCtx('opus-4-7'));
-    expect(out).toContain('Fan out explicitly');
    expect(out).toContain('Effort-match the step');
    expect(out).toContain('Literal interpretation awareness');
  });
@@ -1468,12 +1468,16 @@ describe('Codex skill validation', () => {
    cwd: ROOT, stdout: 'pipe', stderr: 'pipe',
  });

-  // Discover all Claude skills with templates (except /codex which is Claude-only)
+  // Discover all shared skills with templates.
+  // Host-exclusive outside-voice skills are intentionally omitted here:
+  // - /codex is Claude-only
+  // - /claude is external-host-only
  const CLAUDE_SKILLS_WITH_TEMPLATES = (() => {
    const skills: string[] = [];
    for (const entry of fs.readdirSync(ROOT, { withFileTypes: true })) {
      if (!entry.isDirectory() || entry.name.startsWith('.') || entry.name === 'node_modules') continue;
      if (entry.name === 'codex') continue; // Claude-only skill
+      if (entry.name === 'claude') continue; // External-host-only skill
      if (fs.existsSync(path.join(ROOT, entry.name, 'SKILL.md.tmpl'))) {
        skills.push(entry.name);
      }
@@ -1504,6 +1508,13 @@ describe('Codex skill validation', () => {
    expect(fs.existsSync(path.join(AGENTS_DIR, 'gstack-codex', 'SKILL.md'))).toBe(false);
  });

+  test('/claude skill is external-host-only — no Claude-host variant', () => {
+    // Claude host should not get an outside-voice skill that shells into Claude.
+    expect(fs.existsSync(path.join(ROOT, 'claude', 'SKILL.md'))).toBe(false);
+    // Codex/external hosts should get the generated wrapper.
+    expect(fs.existsSync(path.join(AGENTS_DIR, 'gstack-claude', 'SKILL.md'))).toBe(true);
+  });
+
  test('Codex skill names follow gstack-{name} convention', () => {
    const codexDirs = fs.readdirSync(AGENTS_DIR);
    for (const dir of codexDirs) {
@@ -1631,18 +1642,31 @@ describe('no compiled binaries in git', () => {
    expect(binaries).toEqual([]);
  });

-  test('git tracks no files larger than 2MB', () => {
-    // Pure fs.statSync — no shell spawn per file.
+  test('warns about tracked files larger than 2MB', () => {
+    // Large fixtures can be legitimate test infrastructure. Keep visibility on
+    // repository size without blocking those fixtures from living in git.
    const MAX_BYTES = 2 * 1024 * 1024;
-    const oversized = trackedFiles.filter((f: string) => {
+    const oversized = trackedFiles.flatMap((f: string) => {
      const full = path.join(ROOT, f);
      try {
-        return fs.statSync(full).size > MAX_BYTES;
+        const size = fs.statSync(full).size;
+        return size > MAX_BYTES ? [{ file: f, size }] : [];
      } catch {
-        return false;
+        return [];
      }
    });
-    expect(oversized).toEqual([]);
+
+    if (oversized.length > 0) {
+      const formatted = oversized
+        .map(({ file, size }: { file: string; size: number }) => {
+          const mib = (size / (1024 * 1024)).toFixed(1);
+          return `${file} (${mib} MiB)`;
+        })
+        .join(', ');
+      console.warn(`[size-warning] tracked files over 2 MiB: ${formatted}`);
+    }
+
+    expect(Array.isArray(oversized)).toBe(true);
  });
 });
@@ -1 +1 @@
 .12.2.0
 .13.0.0