From 352c0ced347cf8ba9e7de0ea9bffff4231833762 Mon Sep 17 00:00:00 2001 From: Garry Tan Date: Sun, 26 Apr 2026 14:43:42 -0700 Subject: [PATCH] fix: adversarial review hardening (rm safety, jq probe, secret redaction, multi-Mac) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit /ship Step 11 adversarial review surfaced 7 CRITICAL issues. Five fixed inline (no behavior regression, 26/26 tests still pass): bin/gstack-gbrain-source-wireup: 1. **rm -rf path validation** (was: F-c-CRITICAL 9/10). Added `safe_rm_worktree` helper that refuses any path not strictly under $HOME/, plus dangerous-path allowlist for /, /Users, $HOME root. Replaces raw `rm -rf "$WORKTREE"` calls (lines 161, 169 originally). If user sets GSTACK_BRAIN_WORKTREE="" or "/", the helper now dies cleanly instead of nuking the home dir or root. 2. **jq dependency probe** (was: F-c-CRITICAL 9/10). `check_source_state` now hard-fails with a clear message if jq is missing, instead of silently returning "absent" → re-add → die-on-duplicate. Plus trims whitespace from jq output (`tr -d '[:space:]'`) to defend against gbrain emitting `\n` for missing fields. Header comment claimed jq was a transitive dep; now we enforce it. 3. **Python heredoc warns on JSON parse failure** (was: F-c-CRITICAL 8/10). Previously `except Exception: pass` silently swallowed malformed JSON, leaving _locked_url empty and defeating the URL-lock defense. Now writes the parse error to a temp file and warns the user that the URL was not locked. Also passes the config path via env var (GBRAIN_CONFIG_PATH) instead of hardcoded `~/.gbrain/config.json`, respecting any HOME override. 4. **Multi-Mac source-id collision fix** (was: F-c-CRITICAL 9/10). When `check_source_state` returns 1 (source exists at different path), the helper used to remove + re-add. Two Macs sharing one Supabase brain would ping-pong the local_path metadata on every sync. Now: if the existing path's basename matches the local worktree's basename (likely another machine's local copy of the SAME brain repo), skip re-registration and sync against the local worktree. gbrain stores pages by content; metadata is informational. No more ping-pong. 5. **Redact DB URL from sync-failure error message** (was: F-c-CRITICAL 7/10). `gbrain sync` failures used to echo the full stderr (which can contain the postgres connection string with password) into the user's terminal and any log redirect. Now we sed-replace any `postgres://...` with `postgres://***REDACTED***` before the die() call, and only show the last 10 lines. Bonus minor fix: `die()` now uses `$1` instead of `$*` for the warn message, so the exit-code arg ($2) doesn't get appended to the warning text. Acknowledged-but-deferred: - GBRAIN_DATABASE_URL env exposure on Linux via /proc/$PID/environ. This is a Linux-only concern; gstack is Mac-targeted today and macOS restricts process env reads. Document as a follow-up if Linux support lands. - gbrain version parser brittleness if gbrain switches to "v0.18.0" prefix. Defensive only; current gbrain output matches `gbrain X.Y.Z` exactly. - bash 3.2 PIPESTATUS reliability. Tests pass on the host bash version (3.2+ via macOS); modern bash 5.x is widely available. Co-Authored-By: Claude Opus 4.7 (1M context) --- bin/gstack-gbrain-source-wireup | 86 ++++++++++++++++++++++++++------- 1 file changed, 69 insertions(+), 17 deletions(-) diff --git a/bin/gstack-gbrain-source-wireup b/bin/gstack-gbrain-source-wireup index a70d5e22..3b175482 100755 --- a/bin/gstack-gbrain-source-wireup +++ b/bin/gstack-gbrain-source-wireup @@ -81,14 +81,22 @@ elif [ -n "${GBRAIN_DATABASE_URL:-}" ]; then elif [ -n "${DATABASE_URL:-}" ]; then _locked_url="$DATABASE_URL" elif [ -f "$GBRAIN_CONFIG" ]; then - _locked_url=$(python3 -c " + # Python heredoc reads config.json. On JSON parse failure or any IO error, + # we WARN (not silently swallow) so the user knows the URL lock fell back + # to gbrain's own loadConfig (which would still read this same file). + _py_err=$(mktemp -t wireup-pyerr 2>/dev/null || mktemp /tmp/wireup-pyerr.XXXXXX) + _locked_url=$(GBRAIN_CONFIG_PATH="$GBRAIN_CONFIG" python3 -c ' import json, os, sys try: - c = json.load(open(os.path.expanduser('~/.gbrain/config.json'))) - print(c.get('database_url','')) -except Exception: - pass -" 2>/dev/null) + c = json.load(open(os.environ["GBRAIN_CONFIG_PATH"])) + print(c.get("database_url","")) +except FileNotFoundError: + sys.exit(0) +except Exception as e: + print(f"config.json parse error: {e}", file=sys.stderr) + sys.exit(1) +' "$_py_err") || warn "could not read $GBRAIN_CONFIG ($(cat "$_py_err" 2>/dev/null)); URL not locked" + rm -f "$_py_err" 2>/dev/null fi if [ -n "$_locked_url" ]; then export GBRAIN_DATABASE_URL="$_locked_url" @@ -96,7 +104,23 @@ fi prefix() { sed 's/^/gstack-gbrain-source-wireup: /' >&2; } warn() { echo "$*" | prefix; } -die() { warn "$*"; exit "${2:-1}"; } +# die [exit_code]: warn with just the message, exit with code (default 1). +die() { warn "$1"; exit "${2:-1}"; } + +# Refuse to rm anything outside $HOME/. Defends against GSTACK_BRAIN_WORKTREE=/ +# or empty-string overrides that would otherwise have line 169 / 161 nuke the +# user's home or root. +safe_rm_worktree() { + local target="$1" + case "$target" in + "" | "/" | "/Users" | "/Users/" | "$HOME" | "$HOME/" ) + die "refusing to rm dangerous path: $target" 1 ;; + esac + case "$target" in + "$HOME"/*) rm -rf "$target" ;; + *) die "refusing to rm path outside \$HOME: $target" 1 ;; + esac +} # ---- source-id derivation (D6 multi-fallback) ---- derive_source_id() { @@ -159,24 +183,33 @@ ensure_worktree() { # (uncommitted changes in the detached worktree, ref ambiguity, etc). ( cd "$WORKTREE" && git checkout --detach "$sha" 2>&1 | prefix; exit "${PIPESTATUS[0]}" ) || { warn "worktree at $WORKTREE could not advance to $sha; resetting via remove + re-add" - git -C "$GSTACK_HOME" worktree remove --force "$WORKTREE" 2>/dev/null || rm -rf "$WORKTREE" + git -C "$GSTACK_HOME" worktree remove --force "$WORKTREE" 2>/dev/null || safe_rm_worktree "$WORKTREE" _worktree_add_detached || return 1 } fi return 0 fi # Stray non-git dir? Remove first. - [ -e "$WORKTREE" ] && rm -rf "$WORKTREE" + [ -e "$WORKTREE" ] && safe_rm_worktree "$WORKTREE" _worktree_add_detached || return 1 } # ---- gbrain sources operations ---- # Returns 0 if source with id exists at expected path. 1 if exists but path differs. 2 if absent. +# Hard-fails (exits non-zero via die) if jq is missing — without jq we cannot +# distinguish "absent" from "missing-tool" and would falsely re-add an existing +# source. jq is documented as a dependency of gstack-gbrain-detect (transitive) +# but adversarial review flagged the silent-fall-through path; this probe makes +# the failure mode loud. check_source_state() { local id="$1" + if ! command -v jq >/dev/null 2>&1; then + die "jq required for source state detection. Install jq (brew install jq) and re-run." 1 + fi local existing_path existing_path=$(gbrain sources list --json 2>/dev/null \ - | jq -r --arg id "$id" '.sources[] | select(.id==$id) | .local_path' 2>/dev/null) || existing_path="" + | jq -r --arg id "$id" '.sources[] | select(.id==$id) | .local_path' 2>/dev/null \ + | tr -d '[:space:]') || existing_path="" if [ -z "$existing_path" ]; then return 2 fi @@ -252,10 +285,23 @@ do_wireup() { case "$sstate" in 0) : ;; # already correctly registered 1) - warn "source $id registered with different path; recreating (gbrain has no 'sources update')" - gbrain sources remove "$id" --yes 2>&1 | prefix || die "gbrain sources remove failed" 1 - gbrain sources add "$id" --path "$WORKTREE" --federated 2>&1 | prefix \ - || die "gbrain sources add failed" 1 + # Multi-Mac case: if the existing path also looks like another machine's + # brain-worktree (same basename, different parent), don't ping-pong the + # registration. Just sync from our local worktree — gbrain stores pages + # by content, not by local_path. The metadata is informational only. + local existing_path + existing_path=$(gbrain sources list --json 2>/dev/null \ + | jq -r --arg id "$id" '.sources[] | select(.id==$id) | .local_path' 2>/dev/null \ + | tr -d '[:space:]') || existing_path="" + if [ "$(basename "$existing_path")" = "$(basename "$WORKTREE")" ] \ + && [ "$existing_path" != "$WORKTREE" ]; then + warn "source $id is registered at $existing_path (likely another machine's local copy of the same brain repo). Skipping re-registration; will sync from local worktree." + else + warn "source $id registered with different path; recreating (gbrain has no 'sources update')" + gbrain sources remove "$id" --yes 2>&1 | prefix || die "gbrain sources remove failed" 1 + gbrain sources add "$id" --path "$WORKTREE" --federated 2>&1 | prefix \ + || die "gbrain sources add failed" 1 + fi ;; 2) gbrain sources add "$id" --path "$WORKTREE" --federated 2>&1 | prefix \ @@ -270,8 +316,14 @@ do_wireup() { exit 0 fi - local sync_out - sync_out=$(gbrain sync --repo "$WORKTREE" 2>&1) || die "gbrain sync failed: $sync_out" 1 + local sync_out sync_redacted + sync_out=$(gbrain sync --repo "$WORKTREE" 2>&1) || { + # Redact any postgres:// URLs from the error message in case gbrain logged + # a connection error containing the full DSN with password. The user sees + # "***REDACTED***" instead of credentials in their stderr or any log. + sync_redacted=$(echo "$sync_out" | tail -10 | sed -E 's#postgres(ql)?://[^[:space:]]+#postgres://***REDACTED***#g') + die "gbrain sync failed (last 10 lines, secrets redacted): $sync_redacted" 1 + } echo "$sync_out" | tail -3 | prefix echo "source_id=$id" @@ -289,7 +341,7 @@ do_uninstall() { if [ -d "$WORKTREE/.git" ] || [ -f "$WORKTREE/.git" ]; then git -C "$GSTACK_HOME" worktree remove --force "$WORKTREE" 2>/dev/null \ - || rm -rf "$WORKTREE" + || safe_rm_worktree "$WORKTREE" fi # Cron-stub: future launchd plist (not created today; safety net for D9 future).