mirror of
https://github.com/garrytan/gstack.git
synced 2026-06-19 08:10:08 +02:00
e9ad7527ea
Plan-tune cathedral follow-up. The 3/day distill cap was theatrical: at ~$0.01 per Haiku call, even a runaway loop firing every minute would cost ~$14/day, and free-text events are rare enough that the natural input rate self-limits to 1-2 fires/day. Count caps don't protect against runaway bugs (which fire 1000x/second, not 4 times/day) but DO punish heavy users who'd legitimately distill multiple times during a busy week. Removed: 3/day rate cap on bin/gstack-distill-free-text. --status output swapped from "TODAY: N / 3" to "TODAY: N run(s), $X" so users see what they're spending instead of how close they are to a meaningless count. Loosened (caps that exist for real-runaway protection, not normal scope): - EVALS_BUDGET_HARD_CAP_GATE $25 → $200/run - EVALS_BUDGET_HARD_CAP_PERIODIC $70 → $500/run - EVALS_BUDGET_HARD_CAP $30 → $300/run (umbrella fallback) - GSTACK_SIZE_BUDGET_RATIO 1.05 → 1.50 per-skill ratio - plan-review preamble byte budget 40K → 60K Principle: caps exist to catch obvious bugs (infinite retry, model price change, prompt blowup), not to gate legitimate scope growth. Set high enough that real growth never trips them, only bug territory does. Adjusted defaults are 4-8× historical worst case, leaving ample headroom for the next 12 months of legitimate expansion. Tests updated: distill-free-text removes the 3-test rate-cap describe block in favor of "no rate cap" assertion that 10 runs/day pass. Other budget tests still pass because they were never near the old ceilings. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
273 lines
10 KiB
Bash
Executable File
273 lines
10 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
# gstack-distill-free-text — Layer 8 "dream cycle" batch distiller.
|
|
#
|
|
# Reads auq-other free-text events from this project's question-log.jsonl,
|
|
# sends them to Claude via the Anthropic SDK, and writes structured proposals
|
|
# the user can review via /plan-tune distill. Proposals require explicit
|
|
# user Y before applying — never autonomous (Codex #15 trust boundary).
|
|
#
|
|
# Usage:
|
|
# gstack-distill-free-text # sync, prompts at end
|
|
# gstack-distill-free-text --background # spawn detached; results
|
|
# # surface on next /plan-tune
|
|
# gstack-distill-free-text --dry-run # show prompt, no API call
|
|
# gstack-distill-free-text --status # show last-run stats
|
|
#
|
|
# No rate cap — the natural rate of free-text events (rare; user has to type
|
|
# "Other" then content) bounds this loop already. Each Haiku call is ~$0.01,
|
|
# so even a runaway at one-per-minute would be ~$14/day worst case. The
|
|
# cumulative cost log at $GSTACK_STATE_ROOT/distill-cost.jsonl gives full
|
|
# auditability via --status when you want it.
|
|
# Per D6: Anthropic SDK direct call, fail-loud on missing ANTHROPIC_API_KEY.
|
|
set -euo pipefail
|
|
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
|
ROOT_DIR="$(cd "$SCRIPT_DIR/.." && pwd)"
|
|
GSTACK_HOME="${GSTACK_STATE_ROOT:-${GSTACK_HOME:-$HOME/.gstack}}"
|
|
eval "$("$SCRIPT_DIR/gstack-slug" 2>/dev/null || true)"
|
|
SLUG="${SLUG:-unknown}"
|
|
PROJECT_DIR="$GSTACK_HOME/projects/$SLUG"
|
|
LOG_FILE="$PROJECT_DIR/question-log.jsonl"
|
|
PROPOSAL_FILE="$PROJECT_DIR/distillation-proposals.json"
|
|
COST_LOG="$GSTACK_HOME/distill-cost.jsonl"
|
|
mkdir -p "$PROJECT_DIR"
|
|
|
|
MODE="sync"
|
|
case "${1:-}" in
|
|
--background) MODE="background" ;;
|
|
--dry-run) MODE="dry-run" ;;
|
|
--status) MODE="status" ;;
|
|
--help|-h)
|
|
sed -n '1,/^set -euo/p' "$0" | sed 's|^# \?||'
|
|
exit 0
|
|
;;
|
|
'') ;;
|
|
*) echo "unknown arg: $1" >&2; exit 1 ;;
|
|
esac
|
|
|
|
# --- Status subcommand --------------------------------------------------
|
|
|
|
if [ "$MODE" = "status" ]; then
|
|
COST_LOG_PATH="$COST_LOG" SLUG_PATH="$SLUG" bun -e '
|
|
const fs = require("fs");
|
|
const slug = process.env.SLUG_PATH;
|
|
const path = process.env.COST_LOG_PATH;
|
|
if (!fs.existsSync(path)) { console.log("no distill runs yet"); process.exit(0); }
|
|
const lines = fs.readFileSync(path, "utf-8").trim().split("\n").filter(Boolean);
|
|
const mine = lines.map((l) => JSON.parse(l)).filter((e) => e.slug === slug);
|
|
if (mine.length === 0) { console.log("no distill runs yet for slug=" + slug); process.exit(0); }
|
|
const totalUsd = mine.reduce((a, e) => a + (e.cost_usd_est || 0), 0);
|
|
const todayIso = new Date().toISOString().slice(0, 10);
|
|
const today = mine.filter((e) => (e.ts || "").startsWith(todayIso));
|
|
const todayUsd = today.reduce((a, e) => a + (e.cost_usd_est || 0), 0);
|
|
console.log("RUNS: " + mine.length);
|
|
console.log("TODAY: " + today.length + " run(s), $" + todayUsd.toFixed(4));
|
|
console.log("ESTIMATED_TOTAL_USD: $" + totalUsd.toFixed(4));
|
|
const last = mine[mine.length - 1];
|
|
console.log("LAST_RUN: " + (last.ts || "?") + " | " + (last.proposals_count || 0) + " proposals");
|
|
'
|
|
exit 0
|
|
fi
|
|
|
|
# --- Background mode: detach + invoke self synchronously ---------------
|
|
|
|
if [ "$MODE" = "background" ]; then
|
|
nohup "$0" >/dev/null 2>&1 &
|
|
echo "DISTILL_SPAWNED: pid=$!"
|
|
exit 0
|
|
fi
|
|
|
|
# No rate cap. Natural input rate (free-text events are rare) + Haiku price
|
|
# (~$0.01/run) keep this bounded. Use --status to audit spend.
|
|
|
|
# --- Gather unprocessed auq-other events from this project -------------
|
|
|
|
if [ ! -f "$LOG_FILE" ]; then
|
|
echo "NO_LOG: no question-log.jsonl in $PROJECT_DIR"
|
|
exit 0
|
|
fi
|
|
|
|
EVENTS_JSON=$(LOG_FILE_PATH="$LOG_FILE" bun -e '
|
|
const fs = require("fs");
|
|
const lines = fs.readFileSync(process.env.LOG_FILE_PATH, "utf-8").trim().split("\n").filter(Boolean);
|
|
const out = [];
|
|
for (const l of lines) {
|
|
try {
|
|
const e = JSON.parse(l);
|
|
if (e.source === "auq-other" && !e.distilled_at && e.free_text) {
|
|
out.push({
|
|
ts: e.ts,
|
|
question_id: e.question_id,
|
|
question_summary: e.question_summary,
|
|
free_text: e.free_text,
|
|
session_id: e.session_id,
|
|
});
|
|
}
|
|
} catch {}
|
|
}
|
|
process.stdout.write(JSON.stringify(out));
|
|
')
|
|
|
|
EVENT_COUNT=$(printf '%s' "$EVENTS_JSON" | bun -e 'const a = JSON.parse(await Bun.stdin.text()); console.log(a.length);')
|
|
if [ "$EVENT_COUNT" -eq 0 ]; then
|
|
echo "NO_FREE_TEXT: nothing to distill"
|
|
exit 0
|
|
fi
|
|
|
|
# --- Build distill prompt ---------------------------------------------
|
|
|
|
# Heredoc into temp file (avoids $(cat <<'PROMPT'...) which choked the
|
|
# bash parser on apostrophes elsewhere in the script).
|
|
DISTILL_PROMPT_FILE=$(mktemp)
|
|
trap 'rm -f "$DISTILL_PROMPT_FILE"' EXIT
|
|
cat > "$DISTILL_PROMPT_FILE" <<'PROMPT'
|
|
You are gstack dream-cycle distiller. Below are free-text responses the
|
|
user typed into AskUserQuestion prompts (option "Other") across recent gstack
|
|
sessions. For each response, extract structured signal that should update the
|
|
user plan-tune profile or preferences.
|
|
|
|
Return strict JSON with this shape:
|
|
{
|
|
"proposals": [
|
|
{
|
|
"kind": "preference" | "declared-nudge" | "memory-nugget",
|
|
"confidence": 0.0-1.0,
|
|
"source_quotes": ["<verbatim quote 1>", "<verbatim quote 2>"],
|
|
"question_id": "<id>",
|
|
"preference": "never-ask" | "always-ask" | "ask-only-for-one-way",
|
|
"dimension": "scope_appetite | risk_tolerance | detail_preference | autonomy | architecture_care",
|
|
"direction": "up | down",
|
|
"magnitude": "small | medium | large",
|
|
"rationale": "<one sentence>",
|
|
"nugget": "<one-line memory>",
|
|
"applies_to_signal_keys": ["scope-appetite", "..."]
|
|
}
|
|
]
|
|
}
|
|
|
|
Rules:
|
|
- Reject any proposal where confidence < 0.7.
|
|
- Quote VERBATIM from the user free_text. Never paraphrase a source quote.
|
|
- A single user response may produce multiple proposals.
|
|
- If nothing meaningful to extract, return {"proposals": []}.
|
|
- No commentary outside the JSON.
|
|
PROMPT
|
|
DISTILL_PROMPT=$(cat "$DISTILL_PROMPT_FILE")
|
|
|
|
# --- Dry-run: emit prompt + events, exit ------------------------------
|
|
|
|
if [ "$MODE" = "dry-run" ]; then
|
|
echo "=== DISTILL PROMPT ==="
|
|
echo "$DISTILL_PROMPT"
|
|
echo
|
|
echo "=== EVENTS ($EVENT_COUNT) ==="
|
|
echo "$EVENTS_JSON" | bun -e 'console.log(JSON.stringify(JSON.parse(await Bun.stdin.text()), null, 2));'
|
|
exit 0
|
|
fi
|
|
|
|
# --- SDK call: fail-loud on missing key -------------------------------
|
|
|
|
if [ -z "${ANTHROPIC_API_KEY:-}" ]; then
|
|
cat <<EOF >&2
|
|
gstack-distill-free-text: ANTHROPIC_API_KEY not set.
|
|
|
|
Dream-cycle distillation needs an API key for the SDK call. Set
|
|
ANTHROPIC_API_KEY in your environment, or run with --dry-run to see
|
|
what would be sent without actually calling.
|
|
|
|
Note: this is a separate billing/auth surface from your interactive
|
|
Claude Code session (per Codex correction in D6).
|
|
EOF
|
|
exit 1
|
|
fi
|
|
|
|
# Run the SDK call in bun. Emits JSON: {proposals_count, cost_usd_est}.
|
|
RESULT=$(EVENTS_JSON="$EVENTS_JSON" DISTILL_PROMPT="$DISTILL_PROMPT" \
|
|
PROPOSAL_FILE_PATH="$PROPOSAL_FILE" LOG_FILE_PATH="$LOG_FILE" \
|
|
ANTHROPIC_API_KEY="$ANTHROPIC_API_KEY" \
|
|
bun --cwd "$ROOT_DIR" -e '
|
|
const fs = require("fs");
|
|
const Anthropic = require("@anthropic-ai/sdk").default;
|
|
const client = new Anthropic({ apiKey: process.env.ANTHROPIC_API_KEY });
|
|
|
|
const events = JSON.parse(process.env.EVENTS_JSON);
|
|
const prompt = process.env.DISTILL_PROMPT + "\n\nFREE-TEXT RESPONSES (JSON array):\n" + JSON.stringify(events, null, 2);
|
|
|
|
// Pricing (Haiku 4.5 — cheap, fast, sufficient for structured extraction).
|
|
// Per token, USD: input $0.001/1k = 1e-6, output $0.005/1k = 5e-6.
|
|
const INPUT_PER_TOKEN = 1e-6;
|
|
const OUTPUT_PER_TOKEN = 5e-6;
|
|
|
|
const resp = await client.messages.create({
|
|
model: "claude-haiku-4-5-20251001",
|
|
max_tokens: 4096,
|
|
messages: [{ role: "user", content: prompt }],
|
|
});
|
|
|
|
const text = resp.content.map((b) => (b.type === "text" ? b.text : "")).join("");
|
|
|
|
// Strip optional fenced code blocks the model may wrap JSON in.
|
|
const stripped = text.replace(/^```(?:json)?\s*/i, "").replace(/```\s*$/i, "").trim();
|
|
let parsed;
|
|
try { parsed = JSON.parse(stripped); } catch (e) {
|
|
process.stderr.write("DISTILL: model returned non-JSON: " + text.slice(0, 200) + "\n");
|
|
process.exit(1);
|
|
}
|
|
|
|
const proposals = Array.isArray(parsed.proposals) ? parsed.proposals : [];
|
|
// Keep only proposals with confidence >= 0.7 (model is told this rule;
|
|
// double-check in case it slipped).
|
|
const filtered = proposals.filter((p) => typeof p.confidence === "number" && p.confidence >= 0.7);
|
|
|
|
// Write proposals file (overwrite — only the latest run is reviewable).
|
|
fs.writeFileSync(process.env.PROPOSAL_FILE_PATH, JSON.stringify({
|
|
generated_at: new Date().toISOString(),
|
|
source_event_count: events.length,
|
|
proposals: filtered,
|
|
}, null, 2));
|
|
|
|
// Mark source events as distilled_at so they do not re-propose.
|
|
// Update question-log.jsonl in place: read all, rewrite with distilled_at
|
|
// set on the matching events. Match by ts + question_id.
|
|
const logPath = process.env.LOG_FILE_PATH;
|
|
const distilledAt = new Date().toISOString();
|
|
const matchKeys = new Set(events.map((e) => (e.ts || "") + "::" + (e.question_id || "")));
|
|
const lines = fs.readFileSync(logPath, "utf-8").split("\n");
|
|
const out = [];
|
|
for (const ln of lines) {
|
|
if (!ln.trim()) { out.push(ln); continue; }
|
|
try {
|
|
const e = JSON.parse(ln);
|
|
const key = (e.ts || "") + "::" + (e.question_id || "");
|
|
if (matchKeys.has(key)) {
|
|
e.distilled_at = distilledAt;
|
|
out.push(JSON.stringify(e));
|
|
} else {
|
|
out.push(ln);
|
|
}
|
|
} catch { out.push(ln); }
|
|
}
|
|
fs.writeFileSync(logPath, out.join("\n"));
|
|
|
|
// Cost estimate from usage tokens.
|
|
const usage = resp.usage || {};
|
|
const inTok = usage.input_tokens || 0;
|
|
const outTok = usage.output_tokens || 0;
|
|
const cost = inTok * INPUT_PER_TOKEN + outTok * OUTPUT_PER_TOKEN;
|
|
|
|
process.stdout.write(JSON.stringify({
|
|
proposals_count: filtered.length,
|
|
rejected_low_confidence: proposals.length - filtered.length,
|
|
input_tokens: inTok,
|
|
output_tokens: outTok,
|
|
cost_usd_est: cost,
|
|
}));
|
|
')
|
|
|
|
# Append cost log line.
|
|
TS=$(date -u +%Y-%m-%dT%H:%M:%SZ)
|
|
echo "{\"ts\":\"$TS\",\"slug\":\"$SLUG\",$(echo "$RESULT" | sed 's/^{//; s/}$//')}" >> "$COST_LOG"
|
|
|
|
echo "DISTILL_COMPLETE:"
|
|
echo " proposals_file: $PROPOSAL_FILE"
|
|
echo " $RESULT"
|