From e36ff7dd4ca9493db04e1fd73432ff8b6d86511c Mon Sep 17 00:00:00 2001 From: Garry Tan Date: Sat, 25 Apr 2026 23:13:23 -0700 Subject: [PATCH] feat: helper locks GBRAIN_DATABASE_URL at startup, defends against config rewrites MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The wireup helper previously read ~/.gbrain/config.json on every gbrain subprocess invocation. On Garry's Mac, multiple concurrent test runs and agent integrations were rewriting that file mid-sync, redirecting the wireup at the wrong brain partway through a 4-min initial import. This commit adds a `--database-url ` flag to the helper and locks the URL at startup. Precedence: 1. --database-url flag (explicit caller intent) 2. GBRAIN_DATABASE_URL / DATABASE_URL env (CI / manual override) 3. read once from ~/.gbrain/config.json (default) Whichever wins gets exported as GBRAIN_DATABASE_URL for every child `gbrain` invocation. Per gbrain's loadConfig at src/core/config.ts:53, env-var URLs override the file URL — so a process that flips config.json between two of our gbrain calls can't redirect us. Defense-in-depth: once the URL is locked, the wireup completes against the original brain even under hostile filesystem conditions. setup-gbrain/SKILL.md.tmpl Step 7 now reads the URL out of config.json once (via python3 inline) and passes it explicitly with --database-url, so even the very first wireup call is decoupled from config.json mutability. Three new test cases cover the lock behavior: - --database-url flag is exported to child gbrain calls - falls back to ~/.gbrain/config.json when no flag and no env - flag overrides env GBRAIN_DATABASE_URL and config.json values The fake gbrain in the test suite now records GBRAIN_DATABASE_URL alongside each call so tests can assert the helper exported the locked URL. Total test count: 13 → 16 passing. Co-Authored-By: Claude Opus 4.7 (1M context) --- CHANGELOG.md | 2 + bin/gstack-gbrain-source-wireup | 53 ++++++++++++++--- setup-gbrain/SKILL.md | 16 ++++- setup-gbrain/SKILL.md.tmpl | 16 ++++- test/gstack-gbrain-source-wireup.test.ts | 75 +++++++++++++++++++++++- 5 files changed, 152 insertions(+), 10 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 36a51e3e..49798029 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -23,6 +23,8 @@ gbrain search "ethos" --source gstack-brain-{user} | head -5 `bin/gstack-gbrain-source-wireup` is the new helper. It derives a per-user source id from `~/.gstack/.git`'s origin URL (with multi-fallback to `~/.gstack-brain-remote.txt` and a `--source-id` flag), creates a detached `git worktree` at `~/.gstack-brain-worktree/`, registers it as a federated source on gbrain, runs initial backfill, and supports `--strict` (Step 7 strictness), `--uninstall` (full teardown including future-launchd plist), and `--probe` (read-only state inspection). All idempotent. The helper depends on `jq` (transitive via `gstack-gbrain-detect`). +The helper locks the database URL at startup (precedence: `--database-url` flag > `GBRAIN_DATABASE_URL`/`DATABASE_URL` env > read once from `~/.gbrain/config.json`) and exports it as `GBRAIN_DATABASE_URL` for every child `gbrain` invocation. This means external rewrites of `~/.gbrain/config.json` mid-sync (e.g., a concurrent `gbrain init --non-interactive` running in another workspace) cannot redirect the wireup at a different brain. Per gbrain's `loadConfig()`, env-var URLs override the file. Step 7 of `/setup-gbrain` reads the URL out of `config.json` once and passes it explicitly via `--database-url`, so the wireup is robust against config flips during the seconds-to-minutes sync window. + `/setup-gbrain` Step 7 now invokes the helper with `--strict` after `gstack-brain-init`. `/gstack-upgrade` invokes the helper without `--strict` via `gstack-upgrade/migrations/v1.12.3.0.sh` so missing/old gbrain is a benign skip during batch upgrade. `bin/gstack-brain-restore` invokes the helper after the initial clone so a 2nd Mac gets the wireup automatically. `bin/gstack-brain-uninstall` invokes `--uninstall` plus removes legacy `consumers.json`. `bin/gstack-brain-init` drops 60 lines of dead consumer-registration code (the HTTP POST block, the `consumers.json` writer, the chore commit). `bin/gstack-brain-restore` drops the 18-line `consumers.json` token-rehydration block (the only consumer that used it never had real tokens). `bin/gstack-brain-consumer` is marked deprecated in its header docstring; removal in v1.13.0.0 after one cycle of grace. diff --git a/bin/gstack-gbrain-source-wireup b/bin/gstack-gbrain-source-wireup index 985aa05b..702384e5 100755 --- a/bin/gstack-gbrain-source-wireup +++ b/bin/gstack-gbrain-source-wireup @@ -8,7 +8,9 @@ # # Usage: # gstack-gbrain-source-wireup [--strict] [--source-id ] [--no-pull] +# [--database-url ] # gstack-gbrain-source-wireup --uninstall [--source-id ] +# [--database-url ] # gstack-gbrain-source-wireup --probe # gstack-gbrain-source-wireup --help # @@ -25,6 +27,14 @@ # GSTACK_BRAIN_NO_SYNC — skip the gbrain sync step (tests; helper still # ensures source registration) # +# Defense against external rewrites of ~/.gbrain/config.json: +# At helper startup we capture the database URL ONCE — from --database-url, +# from GBRAIN_DATABASE_URL/DATABASE_URL env, or from ~/.gbrain/config.json — +# and export it as GBRAIN_DATABASE_URL for every child `gbrain` invocation. +# That env var overrides whatever's in config.json (per gbrain's loadConfig +# at src/core/config.ts:53), so a process that flips config.json mid-sync +# can't redirect us at a different brain mid-stream. +# # Depends on: jq (transitive via gstack-gbrain-detect). set -euo pipefail @@ -36,25 +46,54 @@ GSTACK_HOME="${GSTACK_HOME:-$HOME/.gstack}" WORKTREE="${GSTACK_BRAIN_WORKTREE:-$HOME/.gstack-brain-worktree}" REMOTE_FILE="$HOME/.gstack-brain-remote.txt" PLIST_PATH="$HOME/Library/LaunchAgents/com.gstack.brain-sync.plist" +GBRAIN_CONFIG="$HOME/.gbrain/config.json" # ---- arg parse ---- MODE="wireup" STRICT=0 NO_PULL=0 SOURCE_ID="" +DATABASE_URL_ARG="" while [ $# -gt 0 ]; do case "$1" in - --uninstall) MODE="uninstall"; shift ;; - --probe) MODE="probe"; shift ;; - --strict) STRICT=1; shift ;; - --no-pull) NO_PULL=1; shift ;; - --source-id) SOURCE_ID="$2"; shift 2 ;; - --help|-h) sed -n '2,28p' "$0" | sed 's/^# \{0,1\}//'; exit 0 ;; - *) echo "Unknown flag: $1" >&2; exit 1 ;; + --uninstall) MODE="uninstall"; shift ;; + --probe) MODE="probe"; shift ;; + --strict) STRICT=1; shift ;; + --no-pull) NO_PULL=1; shift ;; + --source-id) SOURCE_ID="$2"; shift 2 ;; + --database-url) DATABASE_URL_ARG="$2"; shift 2 ;; + --help|-h) sed -n '2,40p' "$0" | sed 's/^# \{0,1\}//'; exit 0 ;; + *) echo "Unknown flag: $1" >&2; exit 1 ;; esac done +# ---- lock the database URL at startup ---- +# Precedence: --database-url flag > existing GBRAIN_DATABASE_URL/DATABASE_URL +# env > read once from ~/.gbrain/config.json. Whichever wins gets exported as +# GBRAIN_DATABASE_URL so every child `gbrain` invocation uses THAT brain even +# if config.json is rewritten by another process during the wireup. +_locked_url="" +if [ -n "$DATABASE_URL_ARG" ]; then + _locked_url="$DATABASE_URL_ARG" +elif [ -n "${GBRAIN_DATABASE_URL:-}" ]; then + _locked_url="$GBRAIN_DATABASE_URL" +elif [ -n "${DATABASE_URL:-}" ]; then + _locked_url="$DATABASE_URL" +elif [ -f "$GBRAIN_CONFIG" ]; then + _locked_url=$(python3 -c " +import json, os, sys +try: + c = json.load(open(os.path.expanduser('~/.gbrain/config.json'))) + print(c.get('database_url','')) +except Exception: + pass +" 2>/dev/null) +fi +if [ -n "$_locked_url" ]; then + export GBRAIN_DATABASE_URL="$_locked_url" +fi + prefix() { sed 's/^/gstack-gbrain-source-wireup: /' >&2; } warn() { echo "$*" | prefix; } die() { warn "$*"; exit "${2:-1}"; } diff --git a/setup-gbrain/SKILL.md b/setup-gbrain/SKILL.md index 0914b3b8..698d8c02 100644 --- a/setup-gbrain/SKILL.md +++ b/setup-gbrain/SKILL.md @@ -1370,8 +1370,22 @@ federated source on the user's gbrain (Supabase or PGLite), and runs an initial `gbrain sync`. Local-Mac only. No cloud agent required. Subsequent skill runs trigger incremental sync via the existing skill-end push hook. +Capture the database URL out of `~/.gbrain/config.json` first and pass it +explicitly so the wireup is robust against any other process rewriting +`~/.gbrain/config.json` mid-sync (e.g., concurrent `gbrain init` runs +elsewhere on the machine): + ```bash -~/.claude/skills/gstack/bin/gstack-gbrain-source-wireup --strict +GBRAIN_URL=$(python3 -c " +import json, os, sys +try: + c = json.load(open(os.path.expanduser('~/.gbrain/config.json'))) + print(c.get('database_url', '')) +except Exception: + pass +") +~/.claude/skills/gstack/bin/gstack-gbrain-source-wireup --strict \ + ${GBRAIN_URL:+--database-url "$GBRAIN_URL"} ``` `--strict` exits non-zero on missing prereqs (gbrain not installed, < 0.18.0, diff --git a/setup-gbrain/SKILL.md.tmpl b/setup-gbrain/SKILL.md.tmpl index 21c4093c..3bbf9b12 100644 --- a/setup-gbrain/SKILL.md.tmpl +++ b/setup-gbrain/SKILL.md.tmpl @@ -372,8 +372,22 @@ federated source on the user's gbrain (Supabase or PGLite), and runs an initial `gbrain sync`. Local-Mac only. No cloud agent required. Subsequent skill runs trigger incremental sync via the existing skill-end push hook. +Capture the database URL out of `~/.gbrain/config.json` first and pass it +explicitly so the wireup is robust against any other process rewriting +`~/.gbrain/config.json` mid-sync (e.g., concurrent `gbrain init` runs +elsewhere on the machine): + ```bash -~/.claude/skills/gstack/bin/gstack-gbrain-source-wireup --strict +GBRAIN_URL=$(python3 -c " +import json, os, sys +try: + c = json.load(open(os.path.expanduser('~/.gbrain/config.json'))) + print(c.get('database_url', '')) +except Exception: + pass +") +~/.claude/skills/gstack/bin/gstack-gbrain-source-wireup --strict \ + ${GBRAIN_URL:+--database-url "$GBRAIN_URL"} ``` `--strict` exits non-zero on missing prereqs (gbrain not installed, < 0.18.0, diff --git a/test/gstack-gbrain-source-wireup.test.ts b/test/gstack-gbrain-source-wireup.test.ts index 1200b6d9..3ca90a99 100644 --- a/test/gstack-gbrain-source-wireup.test.ts +++ b/test/gstack-gbrain-source-wireup.test.ts @@ -42,7 +42,12 @@ function makeFakeGbrain(opts: { const script = `#!/bin/bash LOG="${gbrainCallLog}" STATE="${gbrainStateFile}" -echo "gbrain $@" >> "$LOG" +# Record the call AND any GBRAIN_DATABASE_URL that the parent passed via env. +# Format: "gbrain [GBRAIN_DATABASE_URL=]" so tests can assert +# the wireup helper exported the locked URL into our env. +LINE="gbrain $@" +[ -n "\${GBRAIN_DATABASE_URL:-}" ] && LINE="\$LINE [GBRAIN_DATABASE_URL=\$GBRAIN_DATABASE_URL]" +echo "\$LINE" >> "$LOG" # --version if [ "$1" = "--version" ]; then @@ -286,6 +291,74 @@ describe('gstack-gbrain-source-wireup — wireup mode', () => { }); }); +describe('gstack-gbrain-source-wireup — --database-url lock (defends against external config rewrites)', () => { + test('--database-url flag is exported as GBRAIN_DATABASE_URL to child gbrain calls', () => { + setupGstackRepo('git@github.com:user/gstack-brain-user.git'); + makeFakeGbrain({}); + const TARGET = 'postgresql://postgres.abc:pw@aws.pooler.supabase.com:5432/postgres'; + const r = run(['--database-url', TARGET], { env: { GSTACK_BRAIN_NO_SYNC: '1' } }); + expect(r.status).toBe(0); + const calls = gbrainCalls(); + // every gbrain invocation should carry the locked URL + const writingCalls = calls.filter((c) => c.includes('sources') || c.includes('sync')); + expect(writingCalls.length).toBeGreaterThan(0); + for (const c of writingCalls) { + expect(c).toContain(`[GBRAIN_DATABASE_URL=${TARGET}]`); + } + }); + + test('falls back to ~/.gbrain/config.json database_url when no flag and no env', () => { + setupGstackRepo('git@github.com:user/gstack-brain-user.git'); + makeFakeGbrain({}); + const FILE_URL = 'postgresql://postgres.xyz:pw@aws.pooler.supabase.com:5432/postgres'; + fs.mkdirSync(path.join(tmpHome, '.gbrain'), { recursive: true }); + fs.writeFileSync( + path.join(tmpHome, '.gbrain', 'config.json'), + JSON.stringify({ engine: 'postgres', database_url: FILE_URL }) + ); + // Important: don't pass GBRAIN_DATABASE_URL or DATABASE_URL in env; helper + // should read from $HOME/.gbrain/config.json (HOME is tmpHome here). + const r = run([], { + env: { + GSTACK_BRAIN_NO_SYNC: '1', + GBRAIN_DATABASE_URL: '', + DATABASE_URL: '', + }, + }); + expect(r.status).toBe(0); + const calls = gbrainCalls(); + const writingCalls = calls.filter((c) => c.includes('sources add')); + expect(writingCalls.length).toBe(1); + expect(writingCalls[0]).toContain(`[GBRAIN_DATABASE_URL=${FILE_URL}]`); + }); + + test('--database-url overrides env GBRAIN_DATABASE_URL and config.json', () => { + setupGstackRepo('git@github.com:user/gstack-brain-user.git'); + makeFakeGbrain({}); + const FLAG_URL = 'postgresql://postgres.flag:pw@a.b:5432/postgres'; + const ENV_URL = 'postgresql://postgres.env:pw@x.y:5432/postgres'; + const FILE_URL = 'postgresql://postgres.file:pw@p.q:5432/postgres'; + fs.mkdirSync(path.join(tmpHome, '.gbrain'), { recursive: true }); + fs.writeFileSync( + path.join(tmpHome, '.gbrain', 'config.json'), + JSON.stringify({ engine: 'postgres', database_url: FILE_URL }) + ); + const r = run(['--database-url', FLAG_URL], { + env: { + GSTACK_BRAIN_NO_SYNC: '1', + GBRAIN_DATABASE_URL: ENV_URL, + }, + }); + expect(r.status).toBe(0); + const calls = gbrainCalls(); + const writingCalls = calls.filter((c) => c.includes('sources add')); + expect(writingCalls.length).toBe(1); + expect(writingCalls[0]).toContain(`[GBRAIN_DATABASE_URL=${FLAG_URL}]`); + expect(writingCalls[0]).not.toContain(ENV_URL); + expect(writingCalls[0]).not.toContain(FILE_URL); + }); +}); + describe('gstack-gbrain-source-wireup — uninstall mode', () => { test('after wireup: removes source + worktree', () => { setupGstackRepo('git@github.com:user/gstack-brain-user.git');