feat(hooks): PostToolUse capture hook for AskUserQuestion

Plan-tune cathedral T5. Closes the substrate hole that motivated this entire branch: agent-compliance-only logging produced zero events in weeks of dogfood. PostToolUse hook captures every AUQ fire deterministically. What ships: - hosts/claude/hooks/question-log-hook.ts — TS hook that reads Claude Code's hook stdin, walks tool_input.questions[*], extracts user choice + recommended option from tool_response, spawns gstack-question-log per question. - hosts/claude/hooks/question-log-hook — bash shim Claude Code's hook runner invokes; execs bun against the .ts file. - Marker-first question_id extraction (D18 progressive markers): <gstack-qid:foo-bar> stripped from question text, used as the id. Hash fallback hook-<sha1[:10]> for unmarked questions (observed-only, never used as preference key — D18 hash drift mitigation). - (recommended) label parsing for the user_choice/recommended fields, with refuse-on-ambiguous when two labels are present (D2 safety). - Free-text capture: source=auq-other + free_text field when user picks Other and types (Layer 8 dream cycle input). - Matcher covers both native AskUserQuestion and mcp__*__AskUserQuestion (Codex/Conductor catch from outside voice review). - Crash safety: always exits 0; errors land in ~/.gstack/hook-errors.log so the user's session is never blocked by a hook failure. gstack-question-log extended to: - Accept `source` field (default 'agent', new values: hook, auq-other, auto-decided, codex-import-marker, codex-import-pattern). - Accept `tool_use_id` (<=128 chars) for dedup. - Composite dedup on (source, tool_use_id) across the last 100 lines — protects against hook + preamble both firing on the same tool call (D3 belt+suspenders). - Async fire `gstack-developer-profile --derive` after each successful write so inferred.sample_size actually grows (D17 — without this, the cathedral's "before 0, after >0" metric never moves). - GSTACK_QUESTION_LOG_NO_DERIVE=1 escape hatch for tests. 9 new unit tests covering capture, marker extraction, MCP variant, free-text, dedup, ambiguous-recommended safety, crash paths. All pass plus the existing 88 tests across related files. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-06-19 00:00:13 +02:00 · 2026-05-27 07:42:46 -07:00
parent 2147532c07
commit a8a0447870
4 changed files with 658 additions and 2 deletions
@@ -0,0 +1,7 @@
+#!/usr/bin/env bash
+# Bash shim — Claude Code hooks run `command` strings via /bin/sh, so this
+# wrapper makes the TypeScript hook executable via bun. Settings.json
+# references this file directly.
+set -e
+HERE="$(cd "$(dirname "$0")" && pwd)"
+exec bun "$HERE/question-log-hook.ts"
@@ -0,0 +1,286 @@
+#!/usr/bin/env bun
+/**
+ * PostToolUse hook for AskUserQuestion (Claude Code, plan-tune cathedral T5).
+ *
+ * Reads hook stdin JSON, extracts every AUQ question + user choice from the
+ * tool_input/tool_response, and writes them via gstack-question-log so the
+ * substrate captures fires deterministically — no agent compliance required.
+ *
+ * Triggered by ~/.claude/settings.json:
+ *   {
+ *     "hooks": {
+ *       "PostToolUse": [
+ *         {
+ *           "matcher": "(AskUserQuestion|mcp__.*__AskUserQuestion)",
+ *           "hooks": [
+ *             { "type": "command",
+ *               "command": "$CLAUDE_PROJECT_DIR/.claude/skills/gstack/hosts/claude/hooks/question-log-hook",
+ *               "timeout": 5 }
+ *           ]
+ *         }
+ *       ]
+ *     }
+ *   }
+ *
+ * Invariants:
+ *   - Always exits 0. A failing hook MUST NOT block the user's session.
+ *     Errors land in ~/.gstack/hook-errors.log for postmortem.
+ *   - Spawns gstack-question-log as a subprocess; that bin handles
+ *     validation, dedup (source+tool_use_id), async derive.
+ *   - Marker-first question_id (`<gstack-qid:foo-bar>`), hash fallback
+ *     (D18 progressive markers).
+ *
+ * See docs/spikes/claude-code-hook-mutation.md for the protocol contract.
+ */
+import * as crypto from 'crypto';
+import * as fs from 'fs';
+import * as path from 'path';
+import * as os from 'os';
+import { spawnSync } from 'child_process';
+
+interface HookStdin {
+  session_id?: string;
+  hook_event_name?: string;
+  tool_name?: string;
+  tool_use_id?: string;
+  tool_input?: {
+    questions?: Array<{
+      question?: string;
+      options?: Array<string | { label?: string; description?: string }>;
+      multiSelect?: boolean;
+    }>;
+  };
+  tool_response?: unknown;
+  cwd?: string;
+}
+
+interface ExtractedQuestion {
+  question_id: string;
+  question_summary: string;
+  options_count: number;
+  user_choice: string;
+  recommended?: string;
+  free_text?: string;
+  category?: string;
+  door_type?: string;
+}
+
+const MARKER_RE = /<gstack-qid:([a-z0-9-]{1,64})>/i;
+const RECOMMENDED_LABEL_RE = /\(recommended\)\s*$/i;
+
+function logHookError(msg: string): void {
+  try {
+    const stateRoot =
+      process.env.GSTACK_STATE_ROOT ||
+      process.env.GSTACK_HOME ||
+      path.join(os.homedir(), '.gstack');
+    fs.mkdirSync(stateRoot, { recursive: true });
+    fs.appendFileSync(
+      path.join(stateRoot, 'hook-errors.log'),
+      `${new Date().toISOString()} question-log-hook: ${msg}\n`,
+    );
+  } catch {
+    // Last-resort: swallow. Hook must not block.
+  }
+}
+
+function readStdin(): Promise<string> {
+  return new Promise((resolve) => {
+    let buf = '';
+    process.stdin.setEncoding('utf-8');
+    process.stdin.on('data', (chunk) => (buf += chunk));
+    process.stdin.on('end', () => resolve(buf));
+    process.stdin.on('error', () => resolve(buf));
+    // Hard cutoff so we don't hang the user's session waiting for stdin.
+    setTimeout(() => resolve(buf), 2000);
+  });
+}
+
+function hashQuestionId(skill: string, question: string, options: string[]): string {
+  const sorted = [...options].sort().join('|');
+  const h = crypto
+    .createHash('sha1')
+    .update(`${skill}::${question}::${sorted}`)
+    .digest('hex');
+  return `hook-${h.slice(0, 10)}`;
+}
+
+/**
+ * Marker-first id extraction. Returns the marker id (stripped of the
+ * <gstack-qid:...> wrapper) when present, else a hash-based hook- id.
+ * Per D18 progressive markers — hash ids are observed-only, never used
+ * as preference keys.
+ */
+function extractQuestionId(
+  skill: string,
+  questionText: string,
+  options: string[],
+): { id: string; marker_present: boolean; stripped_question: string } {
+  const match = questionText.match(MARKER_RE);
+  if (match) {
+    return {
+      id: match[1],
+      marker_present: true,
+      stripped_question: questionText.replace(MARKER_RE, '').trim(),
+    };
+  }
+  return {
+    id: hashQuestionId(skill, questionText, options),
+    marker_present: false,
+    stripped_question: questionText,
+  };
+}
+
+function optionLabels(opts: Array<string | { label?: string; description?: string }>): string[] {
+  return opts.map((o) => (typeof o === 'string' ? o : o.label || o.description || ''));
+}
+
+/**
+ * Parse "(recommended)" label-first per D2; fall back to "Recommendation: X"
+ * prose match; refuse (return undefined) if ambiguous.
+ */
+function extractRecommended(questionText: string, opts: string[]): string | undefined {
+  const labelMatches = opts.filter((o) => RECOMMENDED_LABEL_RE.test(o));
+  if (labelMatches.length === 1) return labelMatches[0].replace(RECOMMENDED_LABEL_RE, '').trim();
+  if (labelMatches.length > 1) return undefined; // ambiguous
+
+  const m = questionText.match(/Recommendation:\s*([^\n]+)/i);
+  if (!m) return undefined;
+  const recPhrase = m[1].trim();
+  const matchByPrefix = opts.find((o) => o.toLowerCase().startsWith(recPhrase.toLowerCase().slice(0, 12)));
+  return matchByPrefix;
+}
+
+/**
+ * Best-effort extraction of which option the user picked per question.
+ * AUQ tool_response shape varies by Claude Code variant (native vs MCP),
+ * and the hook stdin docs don't pin a single canonical shape. We handle
+ * the common cases gracefully.
+ */
+function extractUserChoices(
+  response: unknown,
+  questionCount: number,
+): Array<{ choice: string; free_text?: string }> {
+  const out: Array<{ choice: string; free_text?: string }> = [];
+  if (!response) {
+    for (let i = 0; i < questionCount; i++) out.push({ choice: '__unknown__' });
+    return out;
+  }
+  // Shape A: { answers: [{option_label, free_text?}] }
+  // Shape B: { questions: [{user_answer}] }
+  // Shape C: { content: [...] } or array.
+  // We probe lazily.
+  const rec = response as Record<string, unknown>;
+  if (Array.isArray(rec.answers)) {
+    for (const a of rec.answers as Array<Record<string, unknown>>) {
+      const choice = (a.option_label || a.label || a.choice || a.answer || '__unknown__') as string;
+      const freeText = (a.free_text || a.other_text) as string | undefined;
+      out.push(freeText ? { choice, free_text: freeText } : { choice });
+    }
+    while (out.length < questionCount) out.push({ choice: '__unknown__' });
+    return out;
+  }
+  if (Array.isArray(rec.questions)) {
+    for (const q of rec.questions as Array<Record<string, unknown>>) {
+      const choice = (q.user_answer || q.answer || q.choice || '__unknown__') as string;
+      out.push({ choice });
+    }
+    while (out.length < questionCount) out.push({ choice: '__unknown__' });
+    return out;
+  }
+  // Fall back: stringify and log first 100 chars to help future debugging.
+  for (let i = 0; i < questionCount; i++) {
+    out.push({ choice: `__response-shape-unknown:${JSON.stringify(response).slice(0, 80)}__` });
+  }
+  return out;
+}
+
+function detectSkill(cwd: string | undefined): string {
+  // Best-effort: cwd often contains the project slug but rarely the running
+  // skill. Without a session-state mechanism, leave as 'unknown' — the
+  // skill marker (<gstack-skill:NAME>) embedded in question text per
+  // future plan-tune work is the durable path.
+  void cwd;
+  return 'unknown';
+}
+
+function spawnLog(payload: Record<string, unknown>): void {
+  // Locate the bin relative to this script's directory.
+  const here = path.dirname(new URL(import.meta.url).pathname);
+  // hosts/claude/hooks/ -> ../../../bin/
+  const repoRoot = path.resolve(here, '..', '..', '..');
+  const bin = path.join(repoRoot, 'bin', 'gstack-question-log');
+  const res = spawnSync(bin, [JSON.stringify(payload)], {
+    encoding: 'utf-8',
+    stdio: ['ignore', 'pipe', 'pipe'],
+    timeout: 3000,
+  });
+  if (res.status !== 0) {
+    logHookError(`gstack-question-log exited ${res.status}: ${res.stderr || res.stdout}`);
+  }
+}
+
+async function main(): Promise<void> {
+  const raw = await readStdin();
+  if (!raw.trim()) {
+    process.exit(0);
+  }
+  let stdin: HookStdin;
+  try {
+    stdin = JSON.parse(raw);
+  } catch (e) {
+    logHookError(`stdin parse failed: ${(e as Error).message}`);
+    process.exit(0);
+  }
+
+  const toolName = stdin.tool_name || '';
+  if (
+    toolName !== 'AskUserQuestion' &&
+    !toolName.match(/^mcp__.+__AskUserQuestion$/)
+  ) {
+    // Matcher should have filtered this out; defensive no-op.
+    process.exit(0);
+  }
+
+  const questions = stdin.tool_input?.questions || [];
+  if (questions.length === 0) {
+    process.exit(0);
+  }
+
+  const skill = detectSkill(stdin.cwd);
+  const choices = extractUserChoices(stdin.tool_response, questions.length);
+
+  for (let i = 0; i < questions.length; i++) {
+    const q = questions[i];
+    const qText = q.question || '';
+    if (!qText) continue;
+
+    const opts = optionLabels(q.options || []);
+    const { id, stripped_question } = extractQuestionId(skill, qText, opts);
+    const recommended = extractRecommended(stripped_question, opts);
+    const summary = stripped_question.slice(0, 200);
+    const choice = choices[i] || { choice: '__unknown__' };
+
+    const payload: Record<string, unknown> = {
+      skill,
+      question_id: id,
+      question_summary: summary,
+      options_count: opts.length,
+      user_choice: String(choice.choice).slice(0, 64),
+      source: choice.free_text ? 'auq-other' : 'hook',
+      session_id: stdin.session_id?.slice(0, 64),
+      tool_use_id: stdin.tool_use_id?.slice(0, 128),
+    };
+    if (recommended) payload.recommended = recommended.slice(0, 64);
+    if (choice.free_text) payload.free_text = String(choice.free_text);
+
+    spawnLog(payload);
+  }
+
+  process.exit(0);
+}
+
+main().catch((e) => {
+  logHookError(`main crash: ${(e as Error).message}`);
+  process.exit(0);
+});