/** * Real-PTY runner for Claude Code plan-mode E2E tests. * * Spawns the actual `claude` binary via `Bun.spawn({terminal:})`, drives * it through stdin/stdout, parses the rendered terminal frames, and exposes * primitives the 5 plan-mode tests need. Replaces the SDK-based * `runPlanModeSkillTest` from plan-mode-helpers.ts which never worked * because plan mode doesn't use the AskUserQuestion tool — it uses its * own TTY-rendered native confirmation UI. * * Why this exists: the SDK harness intercepts `canUseTool` for * `AskUserQuestion`. Claude in plan mode renders its "Ready to execute" * confirmation as a native option list (1-4 numbered options) without * invoking the AskUserQuestion tool. The SDK never sees it. Real PTY * does — it shows up as text on screen with `❯` cursor markers. * * Architecture: pure Bun.spawn — no node-pty, no native modules, no chmod * fixes. Bun 1.3.10+ has built-in PTY support via the `terminal:` spawn * option. Pattern borrowed from cc-pty-import branch's terminal-agent.ts * (the WS/cookie/Origin scaffolding there is for the browser sidebar; * tests don't need it). */ import * as fs from 'fs'; import * as os from 'os'; import * as path from 'path'; /** Strip ANSI escapes for pattern-matching against visible text. */ export function stripAnsi(s: string): string { return s .replace(/\x1b\[[\d;]*[a-zA-Z]/g, '') .replace(/\x1b\][^\x07\x1b]*(\x07|\x1b\\)/g, '') .replace(/\x1b[()][AB012]/g, '') .replace(/\x1b[78=>]/g, ''); } /** Find claude on PATH, with fallback locations. Mirrors terminal-agent.ts. */ export function resolveClaudeBinary(): string | null { const override = process.env.BROWSE_TERMINAL_BINARY; if (override && fs.existsSync(override)) return override; // eslint-disable-next-line @typescript-eslint/no-explicit-any const which = (Bun as any).which?.('claude'); if (which) return which; const candidates = [ '/opt/homebrew/bin/claude', '/usr/local/bin/claude', `${process.env.HOME}/.local/bin/claude`, `${process.env.HOME}/.bun/bin/claude`, `${process.env.HOME}/.npm-global/bin/claude`, ]; for (const c of candidates) { try { fs.accessSync(c, fs.constants.X_OK); return c; } catch { /* keep searching */ } } return null; } export interface ClaudePtyOptions { /** * Permission mode for the session. * - 'plan' (default) — launches with --permission-mode plan * - undefined — no --permission-mode flag at all (regular interactive) * Other valid SDK modes ('default', 'acceptEdits', 'bypassPermissions', * 'auto', 'dontAsk') are passed through verbatim. */ permissionMode?: 'plan' | 'default' | 'acceptEdits' | 'bypassPermissions' | 'auto' | 'dontAsk' | null; /** Extra args after the permission-mode flag. */ extraArgs?: string[]; /** Terminal size. Default 120x40. Plan-mode UI lays out cleanly at this size. */ cols?: number; rows?: number; /** Working directory. Default: process.cwd(). The repo cwd has the gstack * skill registry and trusted-folder cookie, so most tests want this. */ cwd?: string; /** Extra env on top of process.env. */ env?: Record; /** Total run timeout (ms). Default 240000 (4 min). */ timeoutMs?: number; } export interface ClaudePtySession { /** Send raw bytes to PTY stdin. Newlines = "\r" in TTY world. */ send(data: string): void; /** Send a key by name. Limited set used by these tests. */ sendKey(key: 'Enter' | 'Up' | 'Down' | 'Esc' | 'Tab' | 'ShiftTab' | 'CtrlC'): void; /** Raw accumulated stdout (with ANSI). For forensics. */ rawOutput(): string; /** Visible (ANSI-stripped) output for the entire session. For pattern matching. */ visibleText(): string; /** * Mark the current buffer position. Subsequent waitForAny / visibleSince * calls only look at output AFTER this mark. Use to scope assertions to * "after I sent the skill command" — avoids matching against the trust * dialog or boot banner residue. Returns a marker handle. */ mark(): number; /** Visible text since the most recent (or specific) mark. */ visibleSince(marker?: number): string; /** * Wait for any of the supplied patterns to appear in visibleText. Resolves * with the first match. Throws on timeout (with last 2KB of visible text). * If `since` is supplied, only matches text after that mark. */ waitForAny( patterns: Array, opts?: { timeoutMs?: number; pollMs?: number; since?: number }, ): Promise<{ matched: RegExp | string; index: number }>; /** Convenience: single-pattern wait. */ waitFor( pattern: RegExp | string, opts?: { timeoutMs?: number; pollMs?: number; since?: number }, ): Promise; /** Process pid (for debug). */ pid(): number | undefined; /** Whether the underlying process has exited. */ exited(): boolean; /** Exit code, if known. */ exitCode(): number | null; /** * Send SIGINT, then SIGKILL after 1s. Always safe to call multiple times. * Awaits process exit before resolving. */ close(): Promise; } /** Detect the workspace-trust dialog rendering. */ export function isTrustDialogVisible(visible: string): boolean { // Phrase Claude Code prints. Stable across versions in this branch's range. return visible.includes('trust this folder'); } /** * Detect plan-mode's native "ready to execute" confirmation. Tests both the * spaced and whitespace-collapsed forms because stripAnsi removes cursor- * positioning escapes (e.g. `\x1b[40C`) that render visually as spaces but * leave no character behind — so "ready to execute" can come through as * "readytoexecute" depending on the rendering path. */ export function isPlanReadyVisible(visible: string): boolean { if (/ready to execute|Would you like to proceed/i.test(visible)) return true; const collapsed = visible.replace(/\s+/g, ''); return /readytoexecute|Wouldyouliketoproceed/i.test(collapsed); } /** * Detect the AUTO_DECIDE preamble template firing. The model prints * "Auto-decided