test: E2E for Conductor prose + fix auto-decide-preserved GSTACK_HOME bug

- New skill-e2e-conductor-prose (periodic): Conductor env + plan-eng-review
  surfaces a prose decision brief, not a silent skip. Header documents this is
  end-to-end behavior coverage; the deterministic Conductor guard is the
  question-preference-hook unit test (the PTY harness can't register the MCP
  variant — Codex #10).
- Fix the pre-existing bug in auto-decide-preserved: it seeded the never-ask
  preference under GSTACK_HOME=tmpHome but never passed GSTACK_HOME into the
  PTY run, so the spawned claude read the real ~/.gstack and the preference
  was inert (Codex #9). Now passes GSTACK_HOME + CONDUCTOR_WORKSPACE_PATH to
  prove auto-decide still wins over the Conductor prose redirect.
- Register both in touchfiles (periodic tier).

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
Garry Tan
2026-06-11 20:19:10 -07:00
parent ec63a2d25b
commit 788f35f021
3 changed files with 84 additions and 1 deletions
+7 -1
View File
@@ -111,7 +111,12 @@ export const E2E_TOUCHFILES: Record<string, string[]> = {
// written a never-ask preference, AUQ should still auto-decide rather than
// surfacing the question. Touches the question-tuning + preference
// infrastructure plus the resolvers that own the AUTO_DECIDE preamble.
'auto-decide-preserved': ['scripts/resolvers/question-tuning.ts', 'scripts/resolvers/preamble/generate-ask-user-format.ts', 'scripts/resolvers/preamble/generate-completion-status.ts', 'plan-ceo-review/**', 'bin/gstack-question-preference', 'bin/gstack-config', 'bin/gstack-slug', 'test/helpers/claude-pty-runner.ts'],
'auto-decide-preserved': ['scripts/resolvers/question-tuning.ts', 'scripts/resolvers/preamble/generate-ask-user-format.ts', 'scripts/resolvers/preamble/generate-preamble-bash.ts', 'scripts/resolvers/preamble/generate-completion-status.ts', 'plan-ceo-review/**', 'bin/gstack-question-preference', 'bin/gstack-config', 'bin/gstack-slug', 'hosts/claude/hooks/question-preference-hook.ts', 'lib/is-conductor.ts', 'test/helpers/claude-pty-runner.ts'],
// Conductor → prose decision brief (Conductor signal makes prose the default;
// the PreToolUse hook denies the flaky tool). Touches the resolver that owns
// the Conductor rule, the preamble signal, the hook, and the detection helper.
'conductor-prose': ['scripts/resolvers/preamble/generate-ask-user-format.ts', 'scripts/resolvers/preamble/generate-preamble-bash.ts', 'scripts/resolvers/preamble.ts', 'plan-eng-review/**', 'hosts/claude/hooks/question-preference-hook.ts', 'lib/is-conductor.ts', 'test/helpers/claude-pty-runner.ts', 'test/skill-e2e-conductor-prose.test.ts'],
// Real-PTY E2E batch (#6 new tests on the harness).
// Each one tests behavior the SDK harness can't observe (rendered TTY,
@@ -505,6 +510,7 @@ export const E2E_TIERS: Record<string, 'gate' | 'periodic'> = {
// v1.21+ auto-mode regression tests
'office-hours-auto-mode': 'gate',
'auto-decide-preserved': 'periodic',
'conductor-prose': 'periodic',
'e2e-harness-audit': 'gate',
// Real-PTY E2E batch — tier classification: