From 3df8ea8695e7a1a9b998541b1a889ff6fc1375bd Mon Sep 17 00:00:00 2001 From: Garry Tan Date: Sat, 18 Apr 2026 23:28:12 +0800 Subject: [PATCH] security: harden migration + context-save after adversarial review MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adversarial review (Claude + Codex, both high confidence) identified 6 critical production-harm findings in the /ship pre-landing pass. All folded in. Migration v1.0.1.0.sh hardening: - Add explicit `[ -z "${HOME:-}" ]` guard. HOME="" survives set -u and expands paths to /.claude/skills/... which could hit absolute paths under root/containers/sudo-without-H. - Add python3 fallback inside resolve_real() (was missing; broken symlinks silently defeated ownership check). - Ownership-guard Shape 2 (~/.claude/skills/gstack/checkpoint/). Was unconditional rm -rf. Now: if symlink, check target resolves inside gstack; if regular dir, check realpath resolves inside gstack. A user's hand-edited customization or a symlink pointing outside gstack is preserved with a notice. - Use `rm --` and `rm -r --` consistently to resist hostile basenames. - Use `find -type f -not -name .DS_Store -not -name ._*` instead of `ls -A | grep`. macOS sidecars no longer mask a legit prefix-mode install. Strip sidecars explicitly before removing the dir. context-save/SKILL.md.tmpl: - Sanitize title in bash, not LLM prose. Allowlist [a-z0-9.-], cap 60 chars, default to "untitled". Closes a prompt-injection surface where `/context-save $(rm -rf ~)` could propagate into subsequent commands. - Collision-safe filename. If ${TIMESTAMP}-${SLUG}.md already exists (same-second double-save with same title), append a 4-char random suffix. The skill contract says "saved files are append-only" — this enforces it. Silent overwrite was a data-loss bug. context-restore/SKILL.md.tmpl: - Cap `find ... | sort -r` at 20 entries via `| head -20`. A user with 10k+ saved files no longer blows the context window just to pick one. /context-save list still handles the full-history listing path. test/skill-e2e-autoplan-dual-voice.test.ts: - Filter transcript to tool_use / tool_result / assistant entries before matching, so prompt-text mentions of "plan-ceo-review" don't force the reachedPhase1 assertion to pass. Phase-1 assertion now requires completion markers ("Phase 1 complete", "Phase 2 started"), not mere name occurrence. - claudeVoiceFired now requires JSON evidence of an Agent tool_use (name:"Agent" or subagent_type field), not the literal string "Agent(" which could appear anywhere. - codexVoiceFired now requires a Bash tool_use with a `codex exec/review` command string, not prompt-text mentions. All SKILL.md files regenerated. Golden fixtures updated. bun test: 0 failures across 80+ targeted tests and the full suite. Review source: /ship Step 11 adversarial pass (claude subagent + codex exec). Same findings independently surfaced by both reviewers — this is cross-model high confidence. --- context-restore/SKILL.md | 4 +- context-restore/SKILL.md.tmpl | 4 +- context-save/SKILL.md | 24 +++++- context-save/SKILL.md.tmpl | 24 +++++- gstack-upgrade/migrations/v1.0.1.0.sh | 87 +++++++++++++++------- test/skill-e2e-autoplan-dual-voice.test.ts | 37 ++++++--- 6 files changed, 135 insertions(+), 45 deletions(-) diff --git a/context-restore/SKILL.md b/context-restore/SKILL.md index da785d92..eede99b7 100644 --- a/context-restore/SKILL.md +++ b/context-restore/SKILL.md @@ -764,7 +764,9 @@ else # copies/rsync). Filesystem mtime drifts and is not authoritative. # 2. On macOS, `find ... | xargs ls -1t` with zero results falls back to # listing cwd. `sort -r` on empty input cleanly returns nothing. - FILES=$(find "$CHECKPOINT_DIR" -maxdepth 1 -name "*.md" -type f 2>/dev/null | sort -r) + # Cap at 20 most recent: a user with 10k saved files shouldn't blow the + # context window just listing them. /context-save list handles pagination. + FILES=$(find "$CHECKPOINT_DIR" -maxdepth 1 -name "*.md" -type f 2>/dev/null | sort -r | head -20) if [ -z "$FILES" ]; then echo "NO_CHECKPOINTS" else diff --git a/context-restore/SKILL.md.tmpl b/context-restore/SKILL.md.tmpl index 941bac79..d9303289 100644 --- a/context-restore/SKILL.md.tmpl +++ b/context-restore/SKILL.md.tmpl @@ -71,7 +71,9 @@ else # copies/rsync). Filesystem mtime drifts and is not authoritative. # 2. On macOS, `find ... | xargs ls -1t` with zero results falls back to # listing cwd. `sort -r` on empty input cleanly returns nothing. - FILES=$(find "$CHECKPOINT_DIR" -maxdepth 1 -name "*.md" -type f 2>/dev/null | sort -r) + # Cap at 20 most recent: a user with 10k saved files shouldn't blow the + # context window just listing them. /context-save list handles pagination. + FILES=$(find "$CHECKPOINT_DIR" -maxdepth 1 -name "*.md" -type f 2>/dev/null | sort -r | head -20) if [ -z "$FILES" ]; then echo "NO_CHECKPOINTS" else diff --git a/context-save/SKILL.md b/context-save/SKILL.md index 9478efd0..a3a60581 100644 --- a/context-save/SKILL.md +++ b/context-save/SKILL.md @@ -805,21 +805,39 @@ saved file. ### Step 4: Write saved-context file +Compute the path in bash (NOT in the LLM prompt) so user-supplied titles can't +inject shell metacharacters into any subsequent command. The sanitizer is an +allowlist: only `a-z 0-9 - .` survive. + ```bash eval "$(~/.claude/skills/gstack/bin/gstack-slug 2>/dev/null)" && mkdir -p ~/.gstack/projects/$SLUG CHECKPOINT_DIR="$HOME/.gstack/projects/$SLUG/checkpoints" mkdir -p "$CHECKPOINT_DIR" TIMESTAMP=$(date +%Y%m%d-%H%M%S) +# Bash-side title sanitize. Pass the raw title as $1 when running this block. +# Example: TITLE_RAW="wintermute progress" bash -c '...' +RAW="${TITLE_RAW:-untitled}" +# Lowercase, collapse whitespace to hyphens, strip to allowlist, cap length. +TITLE_SLUG=$(printf '%s' "$RAW" | tr '[:upper:]' '[:lower:]' | tr -s ' \t' '-' | tr -cd 'a-z0-9.-' | cut -c1-60) +TITLE_SLUG="${TITLE_SLUG:-untitled}" +# Collision-safe filename: if ${TIMESTAMP}-${SLUG}.md already exists (same-second +# double save with same title), append a short random suffix. Filenames are +# append-only — never overwrite. +FILE="${CHECKPOINT_DIR}/${TIMESTAMP}-${TITLE_SLUG}.md" +if [ -e "$FILE" ]; then + SUFFIX=$(LC_ALL=C tr -dc 'a-z0-9' < /dev/urandom 2>/dev/null | head -c 4 || printf '%04x' "$$") + FILE="${CHECKPOINT_DIR}/${TIMESTAMP}-${TITLE_SLUG}-${SUFFIX}.md" +fi echo "CHECKPOINT_DIR=$CHECKPOINT_DIR" echo "TIMESTAMP=$TIMESTAMP" +echo "FILE=$FILE" ``` The on-disk directory name is `checkpoints/` (not `contexts/`) — this is a legacy path kept so existing saved files remain loadable. Users never see it. -Write the file to `{CHECKPOINT_DIR}/{TIMESTAMP}-{title-slug}.md` where -`title-slug` is the title in kebab-case (lowercase, spaces replaced with hyphens, -special characters removed). +Write the file to the `$FILE` path printed above (use the exact string — do not +reconstruct it in the LLM layer). The file format: diff --git a/context-save/SKILL.md.tmpl b/context-save/SKILL.md.tmpl index 945da6fb..81dbf4f8 100644 --- a/context-save/SKILL.md.tmpl +++ b/context-save/SKILL.md.tmpl @@ -112,21 +112,39 @@ saved file. ### Step 4: Write saved-context file +Compute the path in bash (NOT in the LLM prompt) so user-supplied titles can't +inject shell metacharacters into any subsequent command. The sanitizer is an +allowlist: only `a-z 0-9 - .` survive. + ```bash {{SLUG_SETUP}} CHECKPOINT_DIR="$HOME/.gstack/projects/$SLUG/checkpoints" mkdir -p "$CHECKPOINT_DIR" TIMESTAMP=$(date +%Y%m%d-%H%M%S) +# Bash-side title sanitize. Pass the raw title as $1 when running this block. +# Example: TITLE_RAW="wintermute progress" bash -c '...' +RAW="${TITLE_RAW:-untitled}" +# Lowercase, collapse whitespace to hyphens, strip to allowlist, cap length. +TITLE_SLUG=$(printf '%s' "$RAW" | tr '[:upper:]' '[:lower:]' | tr -s ' \t' '-' | tr -cd 'a-z0-9.-' | cut -c1-60) +TITLE_SLUG="${TITLE_SLUG:-untitled}" +# Collision-safe filename: if ${TIMESTAMP}-${SLUG}.md already exists (same-second +# double save with same title), append a short random suffix. Filenames are +# append-only — never overwrite. +FILE="${CHECKPOINT_DIR}/${TIMESTAMP}-${TITLE_SLUG}.md" +if [ -e "$FILE" ]; then + SUFFIX=$(LC_ALL=C tr -dc 'a-z0-9' < /dev/urandom 2>/dev/null | head -c 4 || printf '%04x' "$$") + FILE="${CHECKPOINT_DIR}/${TIMESTAMP}-${TITLE_SLUG}-${SUFFIX}.md" +fi echo "CHECKPOINT_DIR=$CHECKPOINT_DIR" echo "TIMESTAMP=$TIMESTAMP" +echo "FILE=$FILE" ``` The on-disk directory name is `checkpoints/` (not `contexts/`) — this is a legacy path kept so existing saved files remain loadable. Users never see it. -Write the file to `{CHECKPOINT_DIR}/{TIMESTAMP}-{title-slug}.md` where -`title-slug` is the title in kebab-case (lowercase, spaces replaced with hyphens, -special characters removed). +Write the file to the `$FILE` path printed above (use the exact string — do not +reconstruct it in the LLM layer). The file format: diff --git a/gstack-upgrade/migrations/v1.0.1.0.sh b/gstack-upgrade/migrations/v1.0.1.0.sh index b6b8dac4..18ce5af7 100755 --- a/gstack-upgrade/migrations/v1.0.1.0.sh +++ b/gstack-upgrade/migrations/v1.0.1.0.sh @@ -20,33 +20,42 @@ # Idempotent: missing paths are no-ops. set -euo pipefail +# Guard: refuse to run if HOME is unset or empty. With `set -u`, unset HOME +# errors out, but HOME="" (possible under sudo-without-H, systemd units, some +# CI runners) survives and produces dangerous absolute paths like +# "/.claude/skills/...". Abort cleanly. +if [ -z "${HOME:-}" ]; then + echo " [v1.0.1.0] HOME is unset or empty — skipping migration." >&2 + exit 0 +fi + SKILLS_DIR="${HOME}/.claude/skills" OLD_TOPLEVEL="${SKILLS_DIR}/checkpoint" OLD_NAMESPACED="${SKILLS_DIR}/gstack/checkpoint" GSTACK_ROOT_REAL="" +# Helper: canonical-path a target (symlink-safe). Prints the resolved path, or +# empty on failure (broken symlink, ENOENT, ELOOP). Both realpath AND the python3 +# fallback are tried — a single tool failure shouldn't defeat the ownership +# check. Returns empty string if both fail. +resolve_real() { + local target="$1" + local out="" + if command -v realpath >/dev/null 2>&1; then + out=$(realpath "$target" 2>/dev/null || true) + fi + if [ -z "$out" ] && command -v python3 >/dev/null 2>&1; then + out=$(python3 -c 'import os,sys;print(os.path.realpath(sys.argv[1]))' "$target" 2>/dev/null || true) + fi + printf '%s' "$out" +} + # Resolve the canonical path of the gstack skills root. If gstack isn't # installed here, there's nothing to migrate. if [ -d "${SKILLS_DIR}/gstack" ]; then - # Portable realpath: macOS BSD `readlink` lacks -f. Fall back to python3. - if command -v realpath >/dev/null 2>&1; then - GSTACK_ROOT_REAL=$(realpath "${SKILLS_DIR}/gstack" 2>/dev/null || true) - fi - if [ -z "$GSTACK_ROOT_REAL" ]; then - GSTACK_ROOT_REAL=$(python3 -c 'import os,sys;print(os.path.realpath(sys.argv[1]))' "${SKILLS_DIR}/gstack" 2>/dev/null || true) - fi + GSTACK_ROOT_REAL=$(resolve_real "${SKILLS_DIR}/gstack") fi -# Helper: canonical-path a target (symlink-safe). Prints the resolved path. -resolve_real() { - local target="$1" - if command -v realpath >/dev/null 2>&1; then - realpath "$target" 2>/dev/null || true - return - fi - python3 -c 'import os,sys;print(os.path.realpath(sys.argv[1]))' "$target" 2>/dev/null || true -} - # Helper: does $1 (canonical path) live inside $2 (canonical path)? path_inside() { local inner="$1" @@ -65,20 +74,24 @@ if [ -L "$OLD_TOPLEVEL" ]; then # Directory symlink (or file symlink). Canonicalize and check ownership. target_real=$(resolve_real "$OLD_TOPLEVEL") if [ -n "$GSTACK_ROOT_REAL" ] && path_inside "$target_real" "$GSTACK_ROOT_REAL"; then - rm "$OLD_TOPLEVEL" + rm -- "$OLD_TOPLEVEL" echo " [v1.0.1.0] Removed stale /checkpoint symlink (was shadowing Claude Code's /rewind alias)." removed_any=1 else - echo " [v1.0.1.0] Leaving $OLD_TOPLEVEL alone — symlink target is outside gstack." + echo " [v1.0.1.0] Leaving $OLD_TOPLEVEL alone — symlink target is outside gstack (or unresolvable)." fi elif [ -d "$OLD_TOPLEVEL" ]; then # Regular directory. Only remove if it contains exactly one file named # SKILL.md that's a symlink into gstack (gstack's prefix-install shape). - entries=$(ls -A "$OLD_TOPLEVEL" 2>/dev/null) - if [ "$entries" = "SKILL.md" ] && [ -L "$OLD_TOPLEVEL/SKILL.md" ]; then + # Use find to count real files, ignoring .DS_Store (macOS sidecars). + file_count=$(find "$OLD_TOPLEVEL" -maxdepth 1 -type f -not -name '.DS_Store' -not -name '._*' 2>/dev/null | wc -l | tr -d ' ') + symlink_count=$(find "$OLD_TOPLEVEL" -maxdepth 1 -type l 2>/dev/null | wc -l | tr -d ' ') + if [ "$file_count" = "0" ] && [ "$symlink_count" = "1" ] && [ -L "$OLD_TOPLEVEL/SKILL.md" ]; then target_real=$(resolve_real "$OLD_TOPLEVEL/SKILL.md") if [ -n "$GSTACK_ROOT_REAL" ] && path_inside "$target_real" "$GSTACK_ROOT_REAL"; then - rm -r "$OLD_TOPLEVEL" + # Strip macOS sidecars first (not user content), then remove the dir. + find "$OLD_TOPLEVEL" -maxdepth 1 \( -name '.DS_Store' -o -name '._*' \) -type f -delete 2>/dev/null || true + rm -r -- "$OLD_TOPLEVEL" echo " [v1.0.1.0] Removed stale /checkpoint install directory (gstack prefix-mode)." removed_any=1 else @@ -90,11 +103,31 @@ elif [ -d "$OLD_TOPLEVEL" ]; then fi # Missing → no-op (idempotency). -# --- Shape 2: ~/.claude/skills/gstack/checkpoint/ (gstack owns this dir unconditionally) -if [ -d "$OLD_NAMESPACED" ] || [ -L "$OLD_NAMESPACED" ]; then - rm -rf "$OLD_NAMESPACED" - echo " [v1.0.1.0] Removed stale ~/.claude/skills/gstack/checkpoint/ (replaced by context-save + context-restore)." - removed_any=1 +# --- Shape 2: ~/.claude/skills/gstack/checkpoint/ +# Ownership guard applies here too: only remove if this path resolves inside the +# gstack skills root. If a user replaced the directory with a symlink pointing +# elsewhere (e.g., at their own fork), respect it. +if [ -L "$OLD_NAMESPACED" ]; then + target_real=$(resolve_real "$OLD_NAMESPACED") + if [ -n "$GSTACK_ROOT_REAL" ] && path_inside "$target_real" "$GSTACK_ROOT_REAL"; then + rm -- "$OLD_NAMESPACED" + echo " [v1.0.1.0] Removed stale ~/.claude/skills/gstack/checkpoint symlink." + removed_any=1 + else + echo " [v1.0.1.0] Leaving $OLD_NAMESPACED alone — symlink target is outside gstack." + fi +elif [ -d "$OLD_NAMESPACED" ]; then + # Regular directory. This is the gstack-prefix install location. Check that + # it resolves to a path inside the gstack root (it should, unless someone + # hand-edited the tree). + target_real=$(resolve_real "$OLD_NAMESPACED") + if [ -n "$GSTACK_ROOT_REAL" ] && path_inside "$target_real" "$GSTACK_ROOT_REAL"; then + rm -rf -- "$OLD_NAMESPACED" + echo " [v1.0.1.0] Removed stale ~/.claude/skills/gstack/checkpoint/ (replaced by context-save + context-restore)." + removed_any=1 + else + echo " [v1.0.1.0] Leaving $OLD_NAMESPACED alone — resolves outside gstack." + fi fi if [ "$removed_any" = "1" ]; then diff --git a/test/skill-e2e-autoplan-dual-voice.test.ts b/test/skill-e2e-autoplan-dual-voice.test.ts index 01d1d96e..0d91a319 100644 --- a/test/skill-e2e-autoplan-dual-voice.test.ts +++ b/test/skill-e2e-autoplan-dual-voice.test.ts @@ -85,20 +85,37 @@ Add a new /greet skill that prints a welcome message. // Accept EITHER outcome as success: // (a) Both voices produced output (ideal case) // (b) Codex unavailable + Claude voice produced output (graceful degrade) - // Session runner returns `output` (final assistant message). Search - // transcript tool-call inputs/outputs as a broader net for voice fingerprints. - const transcriptText = JSON.stringify(result.transcript || []); - const out = (result.output ?? '') + '\n' + transcriptText; - const claudeVoiceFired = /Claude\s+(CEO|subagent)|claude-subagent|Agent\s*\(|subagent_type/i.test(out); - const codexVoiceFired = /codex\s+(exec|review)|CODEX SAYS|\[via:codex\]|codex-plan-review/i.test(out); - const codexUnavailable = /\[codex-unavailable\]|AUTH_FAILED|CODEX_NOT_AVAILABLE|codex_cli_missing|Codex CLI not found/i.test(out); + // Search ONLY the tool-call structure — NOT the prompt string that went in. + // Matching against full transcript is risky because the prompt itself + // contains "plan-ceo-review" and other marker strings that would produce + // false positives regardless of skill behavior. Filter to tool_result + // content + assistant messages emitted DURING execution. + const transcript = Array.isArray(result.transcript) ? result.transcript : []; + const executionContent = transcript + .filter((entry: any) => entry && (entry.type === 'tool_use' || entry.type === 'tool_result' || entry.role === 'assistant')) + .map((entry: any) => JSON.stringify(entry)) + .join('\n'); + const out = (result.output ?? '') + '\n' + executionContent; + + // Claude voice: require evidence of a dispatched Agent subagent, not + // merely the literal string "Agent(" (which could appear in any text). + // Task/Agent tool_use entries have name:"Agent" or subagent_type:"..." + const claudeVoiceFired = /"name":\s*"Agent"|"subagent_type":\s*"[^"]/.test(out) || + /Claude\s+(CEO|subagent)\s+(review|complete|finished)|claude-subagent\s/i.test(out); + // Codex voice: require evidence of codex CLI invocation (command string in + // a Bash tool_use), not prompt-text mentions. + const codexVoiceFired = /"command":\s*"[^"]*codex\s+(exec|review)/.test(out) || + /CODEX SAYS\s*\(/i.test(out); + // Unavailable markers: explicit probe-failure strings emitted by the skill. + const codexUnavailable = /\[codex-unavailable\]|AUTH_FAILED\b|CODEX_NOT_AVAILABLE\b|codex_cli_missing|Codex CLI not found/i.test(out); expect(claudeVoiceFired).toBe(true); expect(codexVoiceFired || codexUnavailable).toBe(true); - // Hang protection: if the skill reached Phase 1 at all, our hardening worked. - // If it didn't, this is a regression from the pre-wave stdin-deadlock era. - const reachedPhase1 = /Phase 1|CEO\s+Review|Strategy\s*&\s*Scope|plan-ceo-review/i.test(out); + // Hang protection: require phase completion evidence, not name mentions. + // "Phase 1 complete" or a phase-transition marker, not "plan-ceo-review" + // as a bare string (which appears in the prompt itself). + const reachedPhase1 = /Phase\s+1\s+(complete|done|finished)|CEO\s+Review\s+(complete|done|approved)|Strategy\s*&\s*Scope\s+(complete|done)|Phase\s+2\s+(started|begin)/i.test(out); expect(reachedPhase1).toBe(true); logCost('autoplan-dual-voice', result);