#!/usr/bin/env bash # gstack-distill-free-text — Layer 8 "dream cycle" batch distiller. # # Reads auq-other free-text events from this project's question-log.jsonl, # sends them to Claude via the Anthropic SDK, and writes structured proposals # the user can review via /plan-tune distill. Proposals require explicit # user Y before applying — never autonomous (Codex #15 trust boundary). # # Usage: # gstack-distill-free-text # sync, prompts at end # gstack-distill-free-text --background # spawn detached; results # # surface on next /plan-tune # gstack-distill-free-text --dry-run # show prompt, no API call # gstack-distill-free-text --status # show last-run stats # # No rate cap — the natural rate of free-text events (rare; user has to type # "Other" then content) bounds this loop already. Each Haiku call is ~$0.01, # so even a runaway at one-per-minute would be ~$14/day worst case. The # cumulative cost log at $GSTACK_STATE_ROOT/distill-cost.jsonl gives full # auditability via --status when you want it. # Per D6: Anthropic SDK direct call, fail-loud on missing ANTHROPIC_API_KEY. set -euo pipefail SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" ROOT_DIR="$(cd "$SCRIPT_DIR/.." && pwd)" GSTACK_HOME="${GSTACK_STATE_ROOT:-${GSTACK_HOME:-$HOME/.gstack}}" eval "$("$SCRIPT_DIR/gstack-slug" 2>/dev/null || true)" SLUG="${SLUG:-unknown}" PROJECT_DIR="$GSTACK_HOME/projects/$SLUG" LOG_FILE="$PROJECT_DIR/question-log.jsonl" PROPOSAL_FILE="$PROJECT_DIR/distillation-proposals.json" COST_LOG="$GSTACK_HOME/distill-cost.jsonl" mkdir -p "$PROJECT_DIR" MODE="sync" case "${1:-}" in --background) MODE="background" ;; --dry-run) MODE="dry-run" ;; --status) MODE="status" ;; --help|-h) sed -n '1,/^set -euo/p' "$0" | sed 's|^# \?||' exit 0 ;; '') ;; *) echo "unknown arg: $1" >&2; exit 1 ;; esac # --- Status subcommand -------------------------------------------------- if [ "$MODE" = "status" ]; then COST_LOG_PATH="$COST_LOG" SLUG_PATH="$SLUG" bun -e ' const fs = require("fs"); const slug = process.env.SLUG_PATH; const path = process.env.COST_LOG_PATH; if (!fs.existsSync(path)) { console.log("no distill runs yet"); process.exit(0); } const lines = fs.readFileSync(path, "utf-8").trim().split("\n").filter(Boolean); const mine = lines.map((l) => JSON.parse(l)).filter((e) => e.slug === slug); if (mine.length === 0) { console.log("no distill runs yet for slug=" + slug); process.exit(0); } const totalUsd = mine.reduce((a, e) => a + (e.cost_usd_est || 0), 0); const todayIso = new Date().toISOString().slice(0, 10); const today = mine.filter((e) => (e.ts || "").startsWith(todayIso)); const todayUsd = today.reduce((a, e) => a + (e.cost_usd_est || 0), 0); console.log("RUNS: " + mine.length); console.log("TODAY: " + today.length + " run(s), $" + todayUsd.toFixed(4)); console.log("ESTIMATED_TOTAL_USD: $" + totalUsd.toFixed(4)); const last = mine[mine.length - 1]; console.log("LAST_RUN: " + (last.ts || "?") + " | " + (last.proposals_count || 0) + " proposals"); ' exit 0 fi # --- Background mode: detach + invoke self synchronously --------------- if [ "$MODE" = "background" ]; then nohup "$0" >/dev/null 2>&1 & echo "DISTILL_SPAWNED: pid=$!" exit 0 fi # No rate cap. Natural input rate (free-text events are rare) + Haiku price # (~$0.01/run) keep this bounded. Use --status to audit spend. # --- Gather unprocessed auq-other events from this project ------------- if [ ! -f "$LOG_FILE" ]; then echo "NO_LOG: no question-log.jsonl in $PROJECT_DIR" exit 0 fi EVENTS_JSON=$(LOG_FILE_PATH="$LOG_FILE" bun -e ' const fs = require("fs"); const lines = fs.readFileSync(process.env.LOG_FILE_PATH, "utf-8").trim().split("\n").filter(Boolean); const out = []; for (const l of lines) { try { const e = JSON.parse(l); if (e.source === "auq-other" && !e.distilled_at && e.free_text) { out.push({ ts: e.ts, question_id: e.question_id, question_summary: e.question_summary, free_text: e.free_text, session_id: e.session_id, }); } } catch {} } process.stdout.write(JSON.stringify(out)); ') EVENT_COUNT=$(printf '%s' "$EVENTS_JSON" | bun -e 'const a = JSON.parse(await Bun.stdin.text()); console.log(a.length);') if [ "$EVENT_COUNT" -eq 0 ]; then echo "NO_FREE_TEXT: nothing to distill" exit 0 fi # --- Build distill prompt --------------------------------------------- # Heredoc into temp file (avoids $(cat <<'PROMPT'...) which choked the # bash parser on apostrophes elsewhere in the script). DISTILL_PROMPT_FILE=$(mktemp) trap 'rm -f "$DISTILL_PROMPT_FILE"' EXIT cat > "$DISTILL_PROMPT_FILE" <<'PROMPT' You are gstack dream-cycle distiller. Below are free-text responses the user typed into AskUserQuestion prompts (option "Other") across recent gstack sessions. For each response, extract structured signal that should update the user plan-tune profile or preferences. Return strict JSON with this shape: { "proposals": [ { "kind": "preference" | "declared-nudge" | "memory-nugget", "confidence": 0.0-1.0, "source_quotes": ["", ""], "question_id": "", "preference": "never-ask" | "always-ask" | "ask-only-for-one-way", "dimension": "scope_appetite | risk_tolerance | detail_preference | autonomy | architecture_care", "direction": "up | down", "magnitude": "small | medium | large", "rationale": "", "nugget": "", "applies_to_signal_keys": ["scope-appetite", "..."] } ] } Rules: - Reject any proposal where confidence < 0.7. - Quote VERBATIM from the user free_text. Never paraphrase a source quote. - A single user response may produce multiple proposals. - If nothing meaningful to extract, return {"proposals": []}. - No commentary outside the JSON. PROMPT DISTILL_PROMPT=$(cat "$DISTILL_PROMPT_FILE") # --- Dry-run: emit prompt + events, exit ------------------------------ if [ "$MODE" = "dry-run" ]; then echo "=== DISTILL PROMPT ===" echo "$DISTILL_PROMPT" echo echo "=== EVENTS ($EVENT_COUNT) ===" echo "$EVENTS_JSON" | bun -e 'console.log(JSON.stringify(JSON.parse(await Bun.stdin.text()), null, 2));' exit 0 fi # --- SDK call: fail-loud on missing key ------------------------------- if [ -z "${ANTHROPIC_API_KEY:-}" ]; then cat <&2 gstack-distill-free-text: ANTHROPIC_API_KEY not set. Dream-cycle distillation needs an API key for the SDK call. Set ANTHROPIC_API_KEY in your environment, or run with --dry-run to see what would be sent without actually calling. Note: this is a separate billing/auth surface from your interactive Claude Code session (per Codex correction in D6). EOF exit 1 fi # Run the SDK call in bun. Emits JSON: {proposals_count, cost_usd_est}. RESULT=$(EVENTS_JSON="$EVENTS_JSON" DISTILL_PROMPT="$DISTILL_PROMPT" \ PROPOSAL_FILE_PATH="$PROPOSAL_FILE" LOG_FILE_PATH="$LOG_FILE" \ ANTHROPIC_API_KEY="$ANTHROPIC_API_KEY" \ bun --cwd "$ROOT_DIR" -e ' const fs = require("fs"); const Anthropic = require("@anthropic-ai/sdk").default; const client = new Anthropic({ apiKey: process.env.ANTHROPIC_API_KEY }); const events = JSON.parse(process.env.EVENTS_JSON); const prompt = process.env.DISTILL_PROMPT + "\n\nFREE-TEXT RESPONSES (JSON array):\n" + JSON.stringify(events, null, 2); // Pricing (Haiku 4.5 — cheap, fast, sufficient for structured extraction). // Per token, USD: input $0.001/1k = 1e-6, output $0.005/1k = 5e-6. const INPUT_PER_TOKEN = 1e-6; const OUTPUT_PER_TOKEN = 5e-6; const resp = await client.messages.create({ model: "claude-haiku-4-5-20251001", max_tokens: 4096, messages: [{ role: "user", content: prompt }], }); const text = resp.content.map((b) => (b.type === "text" ? b.text : "")).join(""); // Strip optional fenced code blocks the model may wrap JSON in. const stripped = text.replace(/^```(?:json)?\s*/i, "").replace(/```\s*$/i, "").trim(); let parsed; try { parsed = JSON.parse(stripped); } catch (e) { process.stderr.write("DISTILL: model returned non-JSON: " + text.slice(0, 200) + "\n"); process.exit(1); } const proposals = Array.isArray(parsed.proposals) ? parsed.proposals : []; // Keep only proposals with confidence >= 0.7 (model is told this rule; // double-check in case it slipped). const filtered = proposals.filter((p) => typeof p.confidence === "number" && p.confidence >= 0.7); // Write proposals file (overwrite — only the latest run is reviewable). fs.writeFileSync(process.env.PROPOSAL_FILE_PATH, JSON.stringify({ generated_at: new Date().toISOString(), source_event_count: events.length, proposals: filtered, }, null, 2)); // Mark source events as distilled_at so they do not re-propose. // Update question-log.jsonl in place: read all, rewrite with distilled_at // set on the matching events. Match by ts + question_id. const logPath = process.env.LOG_FILE_PATH; const distilledAt = new Date().toISOString(); const matchKeys = new Set(events.map((e) => (e.ts || "") + "::" + (e.question_id || ""))); const lines = fs.readFileSync(logPath, "utf-8").split("\n"); const out = []; for (const ln of lines) { if (!ln.trim()) { out.push(ln); continue; } try { const e = JSON.parse(ln); const key = (e.ts || "") + "::" + (e.question_id || ""); if (matchKeys.has(key)) { e.distilled_at = distilledAt; out.push(JSON.stringify(e)); } else { out.push(ln); } } catch { out.push(ln); } } fs.writeFileSync(logPath, out.join("\n")); // Cost estimate from usage tokens. const usage = resp.usage || {}; const inTok = usage.input_tokens || 0; const outTok = usage.output_tokens || 0; const cost = inTok * INPUT_PER_TOKEN + outTok * OUTPUT_PER_TOKEN; process.stdout.write(JSON.stringify({ proposals_count: filtered.length, rejected_low_confidence: proposals.length - filtered.length, input_tokens: inTok, output_tokens: outTok, cost_usd_est: cost, })); ') # Append cost log line. TS=$(date -u +%Y-%m-%dT%H:%M:%SZ) echo "{\"ts\":\"$TS\",\"slug\":\"$SLUG\",$(echo "$RESULT" | sed 's/^{//; s/}$//')}" >> "$COST_LOG" echo "DISTILL_COMPLETE:" echo " proposals_file: $PROPOSAL_FILE" echo " $RESULT"