#!/usr/bin/env bash # gstack-codex-session-import — backfill question-log.jsonl from Codex sessions. # # Codex has no AskUserQuestion tool (per docs/spikes/codex-session-format.md). # gstack skills running on Codex emit Decision Briefs as plain agent_message # text, and the user's response shows up in the next user_message. This # importer reconstructs those question/answer pairs from the structured # JSONL session files at ~/.codex/sessions//. # # Usage: # gstack-codex-session-import # latest session under ~/.codex/sessions/ # gstack-codex-session-import # explicit session file # gstack-codex-session-import --since # all sessions newer than # # Recovery strategy (two-tier per D5/T4 spike): # 1. Marker-first: extract from agent_message → stable id. # 2. Pattern fallback: detect D header + numbered options → hash id # (source=codex-import-pattern, never used as preference key per D18). # # Writes via bin/gstack-question-log so source tagging, dedup, and async # derive all apply uniformly. set -euo pipefail SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" GSTACK_HOME="${GSTACK_STATE_ROOT:-${GSTACK_HOME:-$HOME/.gstack}}" CODEX_SESSIONS_ROOT="${CODEX_SESSIONS_ROOT:-$HOME/.codex/sessions}" MODE="latest" EXPLICIT_PATH="" SINCE_ISO="" if [ $# -gt 0 ]; then case "$1" in --since) MODE="since" SINCE_ISO="${2:-}" ;; --help|-h) sed -n '1,/^set -euo/p' "$0" | sed 's|^# \?||' exit 0 ;; -*) echo "unknown flag: $1" >&2 exit 1 ;; *) MODE="explicit" EXPLICIT_PATH="$1" ;; esac fi # Resolve list of session files to process. SESSION_FILES=() case "$MODE" in explicit) if [ ! -f "$EXPLICIT_PATH" ]; then echo "gstack-codex-session-import: file not found: $EXPLICIT_PATH" >&2 exit 1 fi SESSION_FILES=("$EXPLICIT_PATH") ;; latest) if [ ! -d "$CODEX_SESSIONS_ROOT" ]; then echo "NO_SESSIONS: $CODEX_SESSIONS_ROOT does not exist" exit 0 fi LATEST=$(find "$CODEX_SESSIONS_ROOT" -type f -name "rollout-*.jsonl" -print 2>/dev/null \ | xargs ls -t 2>/dev/null | head -1 || true) if [ -z "$LATEST" ]; then echo "NO_SESSIONS: no rollout-*.jsonl files under $CODEX_SESSIONS_ROOT" exit 0 fi SESSION_FILES=("$LATEST") ;; since) if [ -z "$SINCE_ISO" ]; then echo "--since requires an ISO 8601 timestamp" >&2 exit 1 fi while IFS= read -r f; do SESSION_FILES+=("$f") done < <(find "$CODEX_SESSIONS_ROOT" -type f -name "rollout-*.jsonl" -newer <(date -u -d "$SINCE_ISO" 2>/dev/null || date -u) 2>/dev/null) ;; esac if [ ${#SESSION_FILES[@]} -eq 0 ]; then echo "NO_SESSIONS: nothing to import" exit 0 fi # Parse + extract via bun. Emits one line per question found, ready to pipe # into gstack-question-log. Tagged with source so downstream consumers # (/plan-tune stats, dream cycle) can distinguish backfilled events from # live captures. IMPORTED=0 SKIPPED_NO_ANSWER=0 for SESSION_FILE in "${SESSION_FILES[@]}"; do COUNT_LINE=$(SESSION_FILE_PATH="$SESSION_FILE" QLOG_BIN="$SCRIPT_DIR/gstack-question-log" bun -e ' const fs = require("fs"); const path = require("path"); const { spawnSync } = require("child_process"); const crypto = require("crypto"); const sessionPath = process.env.SESSION_FILE_PATH; const qlogBin = process.env.QLOG_BIN; const lines = fs.readFileSync(sessionPath, "utf-8").trim().split("\n").filter(Boolean); let meta = null; const stream = []; for (const ln of lines) { try { const e = JSON.parse(ln); if (e.type === "session_meta") meta = e.payload; else stream.push(e); } catch {} } if (!meta) { console.error("WARN: no session_meta in " + sessionPath); console.log("0 0"); process.exit(0); } const cwd = meta.cwd || ""; const sessionId = (meta.id || path.basename(sessionPath)).slice(0, 64); // Walk for agent_message → next user_message pairs. const briefs = []; for (let i = 0; i < stream.length; i++) { const e = stream[i]; if (e.type !== "event_msg" || e.payload?.type !== "agent_message") continue; const text = String(e.payload?.message || ""); if (!text) continue; // Detect D-numbered brief or marker. Markers are sufficient on their own. const markerMatch = text.match(//i); const dMatch = text.match(/^D\d+[\.\d]*\s*[—\-]\s*(.+?)$/m); if (!markerMatch && !dMatch) continue; // Find the next user_message in the stream. let answer = null; for (let j = i + 1; j < stream.length; j++) { const e2 = stream[j]; if (e2.type === "event_msg" && e2.payload?.type === "user_message") { answer = String(e2.payload?.message || "").trim(); break; } } if (!answer) continue; // Extract options A) ... B) ... from the brief. const optMatches = [...text.matchAll(/^([A-Z])\)\s+(.+?)(?:\s+\(recommended\))?$/gm)]; const options = optMatches.map((m) => m[2].trim()); // Identify recommended option (label first, prose fallback). let recommended; const recLabel = [...text.matchAll(/^([A-Z])\)\s+(.+?)\s+\(recommended\)$/gm)]; if (recLabel.length === 1) recommended = recLabel[0][2].trim(); // Identify which option the user picked from their answer. // Look for "A" / "A) ..." / option-label prefix match. let userChoice = "__unknown__"; const letterMatch = answer.match(/^\s*([A-Z])\b/); if (letterMatch) { const idx = letterMatch[1].charCodeAt(0) - 65; if (idx >= 0 && idx < options.length) userChoice = options[idx]; else userChoice = letterMatch[1]; } else if (options.length > 0) { const lower = answer.toLowerCase(); const m = options.find((o) => lower.includes(o.toLowerCase().slice(0, 12))); if (m) userChoice = m; } if (userChoice === "__unknown__") { userChoice = answer.slice(0, 64); } const summary = (dMatch?.[1] || text.split("\n")[0]).slice(0, 200); let questionId, source; if (markerMatch) { questionId = markerMatch[1]; source = "codex-import-marker"; } else { const sortedOpts = [...options].sort().join("|"); const h = crypto.createHash("sha1").update("codex::" + summary + "::" + sortedOpts).digest("hex").slice(0, 10); questionId = "hook-" + h; source = "codex-import-pattern"; } briefs.push({ skill: "codex", question_id: questionId, question_summary: summary, options_count: options.length || 1, user_choice: userChoice.slice(0, 64), ...(recommended ? { recommended: recommended.slice(0, 64) } : {}), source, session_id: sessionId, // Use ts_nanos+ts shape from the event itself if available; else null. ts: e.timestamp || undefined, }); } let imported = 0; for (const b of briefs) { const res = spawnSync(qlogBin, [JSON.stringify(b)], { encoding: "utf-8", stdio: ["ignore", "pipe", "pipe"], // Run from the originating cwd so gstack-slug bucks events into the // right project. Falls back to the importer cwd if the session cwd // no longer exists. cwd: cwd && fs.existsSync(cwd) ? cwd : undefined, timeout: 5000, }); if (res.status === 0) imported++; } console.log(imported + " 0"); ' 2>&1) IMP=$(echo "$COUNT_LINE" | awk "{print \$1}") IMPORTED=$((IMPORTED + IMP)) done echo "IMPORTED: $IMPORTED events from ${#SESSION_FILES[@]} session(s)"