diff --git a/test/e2e-harness-audit.test.ts b/test/e2e-harness-audit.test.ts
index ce709cae..bd3ecf46 100644
--- a/test/e2e-harness-audit.test.ts
+++ b/test/e2e-harness-audit.test.ts
@@ -1,8 +1,11 @@
 /**
  * E2E harness audit — every skill with `interactive: true` in its frontmatter
- * must have at least one test file that uses `canUseTool` via the extended
- * agent-sdk-runner. This prevents future drift where a skill opts into the
- * handshake without adding real coverage.
+ * must have at least one test file that drives a real interactive session.
+ * Two valid coverage paths:
+ *   1. `canUseTool` via the agent-sdk-runner (legacy SDK-based path)
+ *   2. `runPlanSkillObservation` via the claude-pty-runner (real-PTY path
+ *      added when the SDK harness was found unable to observe plan mode's
+ *      native confirmation UI — see test/helpers/claude-pty-runner.ts)
  *
  * Runs as a free unit test (no API calls). Pure filesystem scan.
  */
@@ -76,14 +79,16 @@ function findInteractiveSkills(): string[] {
 }
 
 /**
- * Scan a test file's contents for the canUseTool-via-harness pattern.
- * Either: direct canUseTool usage in runAgentSdkTest, or usage of the
- * shared plan-mode-helpers that wrap it.
+ * Scan a test file's contents for any of the supported real-interactive
+ * coverage patterns. Either: direct canUseTool usage in runAgentSdkTest,
+ * the legacy plan-mode-helpers wrapper, or the new real-PTY observation
+ * helper.
  */
 function hasCanUseToolCoverage(testFile: string): boolean {
   const content = fs.readFileSync(testFile, 'utf-8');
   if (content.includes('canUseTool')) return true;
   if (content.includes('runPlanModeSkillTest')) return true;
+  if (content.includes('runPlanSkillObservation')) return true;
   return false;
 }
 
diff --git a/test/helpers/touchfiles.ts b/test/helpers/touchfiles.ts
index abc76023..fb10f58a 100644
--- a/test/helpers/touchfiles.ts
+++ b/test/helpers/touchfiles.ts
@@ -84,14 +84,15 @@ export const E2E_TOUCHFILES: Record<string, string[]> = {
 
   // Plan-mode smoke tests — gate-tier safety regression tests. Each fires when
   // any of: the interactive skill's template, the plan-mode resolver
-  // (completion-status now owns generatePlanModeInfo), preamble composition,
-  // the Agent SDK harness, or the shared plan-mode-helpers change.
-  'plan-ceo-review-plan-mode':    ['plan-ceo-review/**', 'scripts/resolvers/preamble/generate-completion-status.ts', 'scripts/resolvers/preamble.ts', 'test/helpers/agent-sdk-runner.ts', 'test/helpers/plan-mode-helpers.ts'],
-  'plan-eng-review-plan-mode':    ['plan-eng-review/**', 'scripts/resolvers/preamble/generate-completion-status.ts', 'scripts/resolvers/preamble.ts', 'test/helpers/agent-sdk-runner.ts', 'test/helpers/plan-mode-helpers.ts'],
-  'plan-design-review-plan-mode': ['plan-design-review/**', 'scripts/resolvers/preamble/generate-completion-status.ts', 'scripts/resolvers/preamble.ts', 'test/helpers/agent-sdk-runner.ts', 'test/helpers/plan-mode-helpers.ts'],
-  'plan-devex-review-plan-mode':  ['plan-devex-review/**', 'scripts/resolvers/preamble/generate-completion-status.ts', 'scripts/resolvers/preamble.ts', 'test/helpers/agent-sdk-runner.ts', 'test/helpers/plan-mode-helpers.ts'],
-  'plan-mode-no-op':              ['plan-ceo-review/**', 'scripts/resolvers/preamble/generate-completion-status.ts', 'scripts/resolvers/preamble.ts', 'test/helpers/agent-sdk-runner.ts', 'test/helpers/plan-mode-helpers.ts'],
-  'e2e-harness-audit':            ['plan-ceo-review/**', 'plan-eng-review/**', 'plan-design-review/**', 'plan-devex-review/**', 'scripts/resolvers/preamble/generate-completion-status.ts', 'test/helpers/agent-sdk-runner.ts', 'test/helpers/plan-mode-helpers.ts'],
+  // (completion-status owns generatePlanModeInfo), preamble composition, or
+  // the real-PTY runner (which the tests now use instead of the SDK harness)
+  // change.
+  'plan-ceo-review-plan-mode':    ['plan-ceo-review/**', 'scripts/resolvers/preamble/generate-completion-status.ts', 'scripts/resolvers/preamble.ts', 'test/helpers/claude-pty-runner.ts'],
+  'plan-eng-review-plan-mode':    ['plan-eng-review/**', 'scripts/resolvers/preamble/generate-completion-status.ts', 'scripts/resolvers/preamble.ts', 'test/helpers/claude-pty-runner.ts'],
+  'plan-design-review-plan-mode': ['plan-design-review/**', 'scripts/resolvers/preamble/generate-completion-status.ts', 'scripts/resolvers/preamble.ts', 'test/helpers/claude-pty-runner.ts'],
+  'plan-devex-review-plan-mode':  ['plan-devex-review/**', 'scripts/resolvers/preamble/generate-completion-status.ts', 'scripts/resolvers/preamble.ts', 'test/helpers/claude-pty-runner.ts'],
+  'plan-mode-no-op':              ['plan-ceo-review/**', 'scripts/resolvers/preamble/generate-completion-status.ts', 'scripts/resolvers/preamble.ts', 'test/helpers/claude-pty-runner.ts'],
+  'e2e-harness-audit':            ['plan-ceo-review/**', 'plan-eng-review/**', 'plan-design-review/**', 'plan-devex-review/**', 'scripts/resolvers/preamble/generate-completion-status.ts', 'test/helpers/agent-sdk-runner.ts', 'test/helpers/claude-pty-runner.ts'],
   'brain-privacy-gate':           ['scripts/resolvers/preamble/generate-brain-sync-block.ts', 'scripts/resolvers/preamble.ts', 'bin/gstack-brain-sync', 'bin/gstack-brain-init', 'bin/gstack-config', 'test/helpers/agent-sdk-runner.ts'],
 
   // AskUserQuestion format regression (RECOMMENDATION + Completeness: N/10)
diff --git a/test/skill-e2e-plan-ceo-plan-mode.test.ts b/test/skill-e2e-plan-ceo-plan-mode.test.ts
index 8914967e..8bb6a95b 100644
--- a/test/skill-e2e-plan-ceo-plan-mode.test.ts
+++ b/test/skill-e2e-plan-ceo-plan-mode.test.ts
@@ -1,38 +1,48 @@
 /**
- * plan-ceo-review plan-mode smoke test (gate tier, paid).
+ * plan-ceo-review plan-mode smoke (gate, paid, real-PTY).
  *
- * Asserts: when /plan-ceo-review is invoked with the plan-mode distinctive
- * phrase in the system reminder, the skill goes STRAIGHT to its Step 0
- * scope-mode AskUserQuestion. Specifically:
- *   1. First AskUserQuestion is NOT the old vestigial handshake
- *      (A=exit-and-rerun / C=cancel).
- *   2. No Write or Edit tool fires before the first AskUserQuestion
- *      (catches silent plan-file-write bypass).
- *   3. ExitPlanMode does not fire before the first AskUserQuestion.
+ * Asserts: when /plan-ceo-review is invoked in plan mode, the skill reaches
+ * a terminal outcome that is either:
+ *   - 'asked'      — skill emitted its Step 0 numbered prompt (scope mode
+ *                    selection, or the routing-injection prompt that runs
+ *                    before Step 0)
+ *   - 'plan_ready' — skill ran end-to-end and surfaced claude's native
+ *                    "Ready to execute" confirmation
  *
- * Cost: ~$0.50–$1.00 per run. Gated: EVALS=1 EVALS_TIER=gate.
+ * FAIL conditions: silent Write/Edit before any prompt, claude crash,
+ * timeout.
+ *
+ * Replaces the SDK-based test that never worked: the SDK's canUseTool
+ * interceptor on AskUserQuestion never fires in plan mode because plan
+ * mode renders its native confirmation as TTY UI, not via the
+ * AskUserQuestion tool. The real PTY harness observes the rendered
+ * terminal output directly.
+ *
+ * See test/helpers/claude-pty-runner.ts for runner internals.
  */
 
 import { describe, test, expect } from 'bun:test';
-import {
-  runPlanModeSkillTest,
-  assertNotHandshakeShape,
-} from './helpers/plan-mode-helpers';
+import { runPlanSkillObservation } from './helpers/claude-pty-runner';
 
 const shouldRun = !!process.env.EVALS && process.env.EVALS_TIER === 'gate';
 const describeE2E = shouldRun ? describe : describe.skip;
 
 describeE2E('plan-ceo-review plan-mode smoke (gate)', () => {
-  test('goes straight to scope-mode question, no handshake, no silent writes', async () => {
-    const result = await runPlanModeSkillTest({
+  test('reaches a terminal outcome (asked or plan_ready) without silent writes', async () => {
+    const obs = await runPlanSkillObservation({
       skillName: 'plan-ceo-review',
-      // Step 0 asks for review mode; HOLD is the cheapest, most-neutral answer.
-      firstAnswerSubstring: 'HOLD',
+      inPlanMode: true,
+      timeoutMs: 300_000,
     });
 
-    expect(result.askUserQuestions.length).toBeGreaterThanOrEqual(1);
-    assertNotHandshakeShape(result.askUserQuestions[0]!);
-    expect(result.writeOrEditBeforeAsk).toBe(false);
-    expect(result.exitPlanModeBeforeAsk).toBe(false);
-  }, 120_000);
+    if (obs.outcome === 'silent_write' || obs.outcome === 'exited' || obs.outcome === 'timeout') {
+      throw new Error(
+        `plan-ceo-review plan-mode smoke FAILED: outcome=${obs.outcome}\n` +
+          `summary: ${obs.summary}\n` +
+          `elapsed: ${obs.elapsedMs}ms\n` +
+          `--- evidence (last 2KB visible) ---\n${obs.evidence}`,
+      );
+    }
+    expect(['asked', 'plan_ready']).toContain(obs.outcome);
+  }, 360_000);
 });
diff --git a/test/skill-e2e-plan-design-plan-mode.test.ts b/test/skill-e2e-plan-design-plan-mode.test.ts
index 98d050e1..6fd7881a 100644
--- a/test/skill-e2e-plan-design-plan-mode.test.ts
+++ b/test/skill-e2e-plan-design-plan-mode.test.ts
@@ -1,31 +1,36 @@
 /**
- * plan-design-review plan-mode smoke test (gate tier, paid).
+ * plan-design-review plan-mode smoke (gate, paid, real-PTY).
  *
  * See test/skill-e2e-plan-ceo-plan-mode.test.ts for the shared assertion
- * contract. Exercises the same assertions against /plan-design-review.
+ * contract. Exercises the same contract against /plan-design-review.
+ *
+ * Note: on no-UI-scope branches plan-design-review legitimately short-
+ * circuits to plan_ready without firing AskUserQuestion. Both 'asked' and
+ * 'plan_ready' are valid pass outcomes.
  */
 
 import { describe, test, expect } from 'bun:test';
-import {
-  runPlanModeSkillTest,
-  assertNotHandshakeShape,
-} from './helpers/plan-mode-helpers';
+import { runPlanSkillObservation } from './helpers/claude-pty-runner';
 
 const shouldRun = !!process.env.EVALS && process.env.EVALS_TIER === 'gate';
 const describeE2E = shouldRun ? describe : describe.skip;
 
 describeE2E('plan-design-review plan-mode smoke (gate)', () => {
-  test('goes straight to first design question, no handshake, no silent writes', async () => {
-    const result = await runPlanModeSkillTest({
+  test('reaches a terminal outcome (asked or plan_ready) without silent writes', async () => {
+    const obs = await runPlanSkillObservation({
       skillName: 'plan-design-review',
-      // First question for design review varies; pick any reasonable match.
-      // The substring match falls back to the first option if no match.
-      firstAnswerSubstring: '7',
+      inPlanMode: true,
+      timeoutMs: 300_000,
     });
 
-    expect(result.askUserQuestions.length).toBeGreaterThanOrEqual(1);
-    assertNotHandshakeShape(result.askUserQuestions[0]!);
-    expect(result.writeOrEditBeforeAsk).toBe(false);
-    expect(result.exitPlanModeBeforeAsk).toBe(false);
-  }, 120_000);
+    if (obs.outcome === 'silent_write' || obs.outcome === 'exited' || obs.outcome === 'timeout') {
+      throw new Error(
+        `plan-design-review plan-mode smoke FAILED: outcome=${obs.outcome}\n` +
+          `summary: ${obs.summary}\n` +
+          `elapsed: ${obs.elapsedMs}ms\n` +
+          `--- evidence (last 2KB visible) ---\n${obs.evidence}`,
+      );
+    }
+    expect(['asked', 'plan_ready']).toContain(obs.outcome);
+  }, 360_000);
 });
diff --git a/test/skill-e2e-plan-devex-plan-mode.test.ts b/test/skill-e2e-plan-devex-plan-mode.test.ts
index 64bc447e..05f1abb3 100644
--- a/test/skill-e2e-plan-devex-plan-mode.test.ts
+++ b/test/skill-e2e-plan-devex-plan-mode.test.ts
@@ -1,30 +1,32 @@
 /**
- * plan-devex-review plan-mode smoke test (gate tier, paid).
+ * plan-devex-review plan-mode smoke (gate, paid, real-PTY).
  *
  * See test/skill-e2e-plan-ceo-plan-mode.test.ts for the shared assertion
- * contract. Exercises the same assertions against /plan-devex-review.
+ * contract. Exercises the same contract against /plan-devex-review.
  */
 
 import { describe, test, expect } from 'bun:test';
-import {
-  runPlanModeSkillTest,
-  assertNotHandshakeShape,
-} from './helpers/plan-mode-helpers';
+import { runPlanSkillObservation } from './helpers/claude-pty-runner';
 
 const shouldRun = !!process.env.EVALS && process.env.EVALS_TIER === 'gate';
 const describeE2E = shouldRun ? describe : describe.skip;
 
 describeE2E('plan-devex-review plan-mode smoke (gate)', () => {
-  test('goes straight to DX-mode question, no handshake, no silent writes', async () => {
-    const result = await runPlanModeSkillTest({
+  test('reaches a terminal outcome (asked or plan_ready) without silent writes', async () => {
+    const obs = await runPlanSkillObservation({
       skillName: 'plan-devex-review',
-      // Step 0 asks for DX review mode; TRIAGE is the lightest-weight mode.
-      firstAnswerSubstring: 'TRIAGE',
+      inPlanMode: true,
+      timeoutMs: 300_000,
     });
 
-    expect(result.askUserQuestions.length).toBeGreaterThanOrEqual(1);
-    assertNotHandshakeShape(result.askUserQuestions[0]!);
-    expect(result.writeOrEditBeforeAsk).toBe(false);
-    expect(result.exitPlanModeBeforeAsk).toBe(false);
-  }, 120_000);
+    if (obs.outcome === 'silent_write' || obs.outcome === 'exited' || obs.outcome === 'timeout') {
+      throw new Error(
+        `plan-devex-review plan-mode smoke FAILED: outcome=${obs.outcome}\n` +
+          `summary: ${obs.summary}\n` +
+          `elapsed: ${obs.elapsedMs}ms\n` +
+          `--- evidence (last 2KB visible) ---\n${obs.evidence}`,
+      );
+    }
+    expect(['asked', 'plan_ready']).toContain(obs.outcome);
+  }, 360_000);
 });
diff --git a/test/skill-e2e-plan-eng-plan-mode.test.ts b/test/skill-e2e-plan-eng-plan-mode.test.ts
index c57bbee4..93d55ece 100644
--- a/test/skill-e2e-plan-eng-plan-mode.test.ts
+++ b/test/skill-e2e-plan-eng-plan-mode.test.ts
@@ -1,29 +1,32 @@
 /**
- * plan-eng-review plan-mode smoke test (gate tier, paid).
+ * plan-eng-review plan-mode smoke (gate, paid, real-PTY).
  *
  * See test/skill-e2e-plan-ceo-plan-mode.test.ts for the shared assertion
- * contract. This file exercises the same assertions against /plan-eng-review.
+ * contract. This file exercises the same contract against /plan-eng-review.
  */
 
 import { describe, test, expect } from 'bun:test';
-import {
-  runPlanModeSkillTest,
-  assertNotHandshakeShape,
-} from './helpers/plan-mode-helpers';
+import { runPlanSkillObservation } from './helpers/claude-pty-runner';
 
 const shouldRun = !!process.env.EVALS && process.env.EVALS_TIER === 'gate';
 const describeE2E = shouldRun ? describe : describe.skip;
 
 describeE2E('plan-eng-review plan-mode smoke (gate)', () => {
-  test('goes straight to scope-mode question, no handshake, no silent writes', async () => {
-    const result = await runPlanModeSkillTest({
+  test('reaches a terminal outcome (asked or plan_ready) without silent writes', async () => {
+    const obs = await runPlanSkillObservation({
       skillName: 'plan-eng-review',
-      firstAnswerSubstring: 'HOLD',
+      inPlanMode: true,
+      timeoutMs: 300_000,
     });
 
-    expect(result.askUserQuestions.length).toBeGreaterThanOrEqual(1);
-    assertNotHandshakeShape(result.askUserQuestions[0]!);
-    expect(result.writeOrEditBeforeAsk).toBe(false);
-    expect(result.exitPlanModeBeforeAsk).toBe(false);
-  }, 120_000);
+    if (obs.outcome === 'silent_write' || obs.outcome === 'exited' || obs.outcome === 'timeout') {
+      throw new Error(
+        `plan-eng-review plan-mode smoke FAILED: outcome=${obs.outcome}\n` +
+          `summary: ${obs.summary}\n` +
+          `elapsed: ${obs.elapsedMs}ms\n` +
+          `--- evidence (last 2KB visible) ---\n${obs.evidence}`,
+      );
+    }
+    expect(['asked', 'plan_ready']).toContain(obs.outcome);
+  }, 360_000);
 });
diff --git a/test/skill-e2e-plan-mode-no-op.test.ts b/test/skill-e2e-plan-mode-no-op.test.ts
index 061e9cd8..73999522 100644
--- a/test/skill-e2e-plan-mode-no-op.test.ts
+++ b/test/skill-e2e-plan-mode-no-op.test.ts
@@ -1,47 +1,48 @@
 /**
- * Plan-mode-info no-op regression (gate tier, paid).
+ * Plan-mode-info no-op regression (gate tier, paid, real-PTY).
  *
- * Asserts: when /plan-ceo-review is invoked WITHOUT the plan-mode distinctive
- * phrase in the system reminder, the plan-mode-info preamble section is a
- * no-op. The skill should proceed to its normal Step 0 flow with no
- * AskUserQuestion echoing or referencing the plan-mode reminder text.
+ * Asserts: when /plan-ceo-review is invoked OUTSIDE plan mode (no
+ * --permission-mode plan flag, no plan-mode reminder injected), the skill
+ * still reaches a terminal outcome ('asked' or 'plan_ready'). This is the
+ * negative coverage to the per-skill plan-mode smokes — if the
+ * plan-mode-info preamble section ever starts misfiring for non-plan-mode
+ * sessions (e.g., gating questions on a phrase that isn't there), this
+ * test catches it.
  *
- * This guardrails the "outside plan mode, this block doesn't interfere"
- * case — a different coverage case from the per-skill in-plan-mode smokes.
- * If the plan-mode-info section ever starts misfiring for non-plan-mode
- * sessions, this test catches it.
- *
- * Cost: ~$0.50 per run. Gated: EVALS=1 EVALS_TIER=gate.
+ * Why this matters: outside plan mode, claude doesn't render a native
+ * confirmation UI. The skill must drive its own AskUserQuestion. Same
+ * runner, same outcome contract — just `inPlanMode: false`.
  */
 
 import { describe, test, expect } from 'bun:test';
-import {
-  runPlanModeSkillTest,
-  PLAN_MODE_REMINDER,
-} from './helpers/plan-mode-helpers';
+import { runPlanSkillObservation } from './helpers/claude-pty-runner';
 
 const shouldRun = !!process.env.EVALS && process.env.EVALS_TIER === 'gate';
 const describeE2E = shouldRun ? describe : describe.skip;
 
 describeE2E('plan-mode-info no-op outside plan mode (gate regression)', () => {
-  test('no AskUserQuestion echoes the plan-mode reminder when absent', async () => {
-    const result = await runPlanModeSkillTest({
+  test('skill reaches a terminal outcome outside plan mode', async () => {
+    const obs = await runPlanSkillObservation({
       skillName: 'plan-ceo-review',
-      firstAnswerSubstring: 'HOLD',
-      omitPlanModeReminder: true,
-      maxTurns: 3,
+      inPlanMode: false,
+      timeoutMs: 300_000,
     });
 
-    // Skill should still hit Step 0 normally outside plan mode.
-    expect(result.askUserQuestions.length).toBeGreaterThanOrEqual(1);
-
-    // No AskUserQuestion should echo the plan-mode distinctive phrase.
-    // If one does, the plan-mode-info section is leaking outside plan mode.
-    for (const aq of result.askUserQuestions) {
-      const questions = aq.input.questions as Array<{ question: string }>;
-      for (const q of questions) {
-        expect(q.question).not.toContain(PLAN_MODE_REMINDER);
-      }
+    if (obs.outcome === 'silent_write' || obs.outcome === 'exited' || obs.outcome === 'timeout') {
+      throw new Error(
+        `plan-mode no-op regression FAILED: outcome=${obs.outcome}\n` +
+          `summary: ${obs.summary}\n` +
+          `elapsed: ${obs.elapsedMs}ms\n` +
+          `--- evidence (last 2KB visible) ---\n${obs.evidence}`,
+      );
     }
-  }, 120_000);
+    expect(['asked', 'plan_ready']).toContain(obs.outcome);
+
+    // Negative regression: the rendered output must NOT echo the plan-mode
+    // distinctive reminder phrase. If it does, the plan-mode preamble
+    // section is leaking outside plan mode.
+    const PLAN_MODE_REMINDER =
+      'Plan mode is active. The user indicated that they do not want you to execute yet';
+    expect(obs.evidence).not.toContain(PLAN_MODE_REMINDER);
+  }, 360_000);
 });