From 28b14fbf0ce22102e9e2ab2a33bbf6650e22a594 Mon Sep 17 00:00:00 2001 From: Garry Tan Date: Thu, 23 Apr 2026 23:40:50 -0700 Subject: [PATCH] feat: extend agent-sdk-runner with canUseTool for AskUserQuestion interception MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Test harness at test/helpers/agent-sdk-runner.ts gains an optional `canUseTool` callback parameter. When a test supplies it, the harness flips `permissionMode` from `bypassPermissions` (overlay-harness default) to `default` so the SDK actually invokes the callback on every tool use, and auto-adds `AskUserQuestion` to `allowedTools` so Claude can fire it at all. Exports a `passThroughNonAskUserQuestion` helper so tests that only want to intercept AskUserQuestion can auto-allow every other tool with one line: `return passThroughNonAskUserQuestion(toolName, input)`. This is the foundation for D14 — every future interactive-skill E2E test can now assert on AskUserQuestion shape and routing. Previous E2E tests at `test/skill-e2e.test.ts` explicitly instructed the model to skip AskUserQuestion ("non-interactive run") which meant no test could actually verify the question content or routing. 6 new unit tests in test/agent-sdk-runner.test.ts cover: - permissionMode flips to 'default' when canUseTool supplied - permissionMode stays 'bypassPermissions' when canUseTool absent - canUseTool callback reaches the SDK options - AskUserQuestion auto-added to allowedTools when canUseTool supplied - AskUserQuestion NOT added when canUseTool absent - passThroughNonAskUserQuestion helper returns allow+updatedInput Co-Authored-By: Claude Opus 4.7 --- test/agent-sdk-runner.test.ts | 95 ++++++++++++++++++++++++++++++++ test/helpers/agent-sdk-runner.ts | 66 ++++++++++++++++++++-- 2 files changed, 156 insertions(+), 5 deletions(-) diff --git a/test/agent-sdk-runner.test.ts b/test/agent-sdk-runner.test.ts index eb256092..39c5db81 100644 --- a/test/agent-sdk-runner.test.ts +++ b/test/agent-sdk-runner.test.ts @@ -366,6 +366,101 @@ describe('runAgentSdkTest — options propagation', () => { }); }); +// --------------------------------------------------------------------------- +// canUseTool extension (D10 CEO / D4 eng) +// --------------------------------------------------------------------------- + +describe('runAgentSdkTest — canUseTool extension', () => { + test('permissionMode flips to "default" when canUseTool is supplied', async () => { + freshSem(); + const stub: StubConfig = { + streams: [[systemInit(), assistantTurn([{ type: 'text', text: 'ok' }]), resultSuccess()]], + calls: [], + }; + await runAgentSdkTest({ + ...BASE_OPTS, + queryProvider: makeStubProvider(stub), + canUseTool: async (_toolName, input) => ({ behavior: 'allow', updatedInput: input }), + }); + const opts = stub.calls[0]!.options!; + expect(opts.permissionMode).toBe('default'); + expect(opts.allowDangerouslySkipPermissions).toBe(false); + }); + + test('permissionMode stays "bypassPermissions" when canUseTool is NOT supplied', async () => { + freshSem(); + const stub: StubConfig = { + streams: [[systemInit(), assistantTurn([{ type: 'text', text: 'ok' }]), resultSuccess()]], + calls: [], + }; + await runAgentSdkTest({ + ...BASE_OPTS, + queryProvider: makeStubProvider(stub), + }); + const opts = stub.calls[0]!.options!; + expect(opts.permissionMode).toBe('bypassPermissions'); + expect(opts.allowDangerouslySkipPermissions).toBe(true); + }); + + test('canUseTool callback reaches the SDK options', async () => { + freshSem(); + const stub: StubConfig = { + streams: [[systemInit(), assistantTurn([{ type: 'text', text: 'ok' }]), resultSuccess()]], + calls: [], + }; + const cb = async (_toolName: string, input: Record) => ({ + behavior: 'allow' as const, + updatedInput: input, + }); + await runAgentSdkTest({ + ...BASE_OPTS, + queryProvider: makeStubProvider(stub), + canUseTool: cb, + }); + const opts = stub.calls[0]!.options! as Options & { canUseTool?: unknown }; + expect(typeof opts.canUseTool).toBe('function'); + }); + + test('AskUserQuestion is auto-added to allowedTools when canUseTool is supplied', async () => { + freshSem(); + const stub: StubConfig = { + streams: [[systemInit(), assistantTurn([{ type: 'text', text: 'ok' }]), resultSuccess()]], + calls: [], + }; + await runAgentSdkTest({ + ...BASE_OPTS, + allowedTools: ['Read', 'Grep'], // explicitly omits AskUserQuestion + queryProvider: makeStubProvider(stub), + canUseTool: async (_toolName, input) => ({ behavior: 'allow', updatedInput: input }), + }); + const opts = stub.calls[0]!.options!; + expect(opts.allowedTools).toContain('AskUserQuestion'); + expect(opts.tools).toContain('AskUserQuestion'); + }); + + test('AskUserQuestion is NOT auto-added when canUseTool is absent', async () => { + freshSem(); + const stub: StubConfig = { + streams: [[systemInit(), assistantTurn([{ type: 'text', text: 'ok' }]), resultSuccess()]], + calls: [], + }; + await runAgentSdkTest({ + ...BASE_OPTS, + allowedTools: ['Read', 'Grep'], + queryProvider: makeStubProvider(stub), + }); + const opts = stub.calls[0]!.options!; + expect(opts.allowedTools).not.toContain('AskUserQuestion'); + }); + + test('passThroughNonAskUserQuestion helper returns allow+updatedInput', async () => { + const { passThroughNonAskUserQuestion } = await import('../test/helpers/agent-sdk-runner'); + const result = passThroughNonAskUserQuestion('Read', { file_path: '/tmp/x' }); + expect(result.behavior).toBe('allow'); + expect(result.updatedInput).toEqual({ file_path: '/tmp/x' }); + }); +}); + // --------------------------------------------------------------------------- // Rate-limit retry (three shapes) // --------------------------------------------------------------------------- diff --git a/test/helpers/agent-sdk-runner.ts b/test/helpers/agent-sdk-runner.ts index a4df71d9..cea7bf76 100644 --- a/test/helpers/agent-sdk-runner.ts +++ b/test/helpers/agent-sdk-runner.ts @@ -31,6 +31,7 @@ import { type PermissionMode, type SettingSource, type Options, + type CanUseTool, } from '@anthropic-ai/claude-agent-sdk'; import * as fs from 'fs'; import * as path from 'path'; @@ -111,6 +112,43 @@ export interface RunAgentSdkOptions { * retries reuse the original workingDirectory (fine for read-only tests). */ onRetry?: (freshDir: string) => void; + /** + * Optional canUseTool callback. When supplied, the harness flips + * permissionMode from 'bypassPermissions' to 'default' so the SDK actually + * routes tool-use approval decisions through the callback. Without this + * flip, bypassPermissions short-circuits the callback and tests that want + * to assert on AskUserQuestion content silently pass without asserting. + * + * Callback contract matches the SDK: fires on every tool-use approval + * request and on AskUserQuestion invocations. For non-AskUserQuestion + * tools that tests don't care about, use `passThroughNonAskUserQuestion` + * to auto-allow them. + */ + canUseTool?: CanUseTool; +} + +/** + * Pass-through helper: auto-allows any tool_use that isn't AskUserQuestion. + * Most plan-mode handshake tests only care about the handshake AskUserQuestion; + * every other tool (Read, Grep, Bash, Write, Edit, ExitPlanMode) should just + * run. Compose with a test-specific AskUserQuestion handler: + * + * canUseTool: async (toolName, input, options) => { + * if (toolName === 'AskUserQuestion') { + * // custom assertions + canned answer + * return { behavior: 'allow', updatedInput: { questions: input.questions, answers: {...} } }; + * } + * return passThroughNonAskUserQuestion(toolName, input); + * } + */ +export function passThroughNonAskUserQuestion( + toolName: string, + input: Record, +): { behavior: 'allow'; updatedInput: Record } { + // SDK requires an allow response to include updatedInput — pass the original + // input through unchanged so the tool runs as the model intended. + void toolName; + return { behavior: 'allow', updatedInput: input }; } export class RateLimitExhaustedError extends Error { @@ -287,19 +325,37 @@ export async function runAgentSdkTest( let terminalResult: SDKResultMessage | null = null; try { + // When canUseTool is supplied, the SDK must route tool-use approval + // decisions through the callback. bypassPermissions short-circuits + // that. Flip to 'default' mode so canUseTool actually fires. Tests + // that want AskUserQuestion interception without this flip would + // silently auto-pass — the exact testability gap D14/D4-eng fix. + const hasCanUseTool = typeof opts.canUseTool === 'function'; + const resolvedPermissionMode: PermissionMode = + opts.permissionMode ?? (hasCanUseTool ? 'default' : 'bypassPermissions'); + + // When canUseTool is supplied, ensure AskUserQuestion is in the allowed + // tools list. Without it, Claude can't invoke AskUserQuestion at all + // and the callback never has a chance to fire on it. + const baseTools = opts.allowedTools ?? ['Read', 'Glob', 'Grep', 'Bash']; + const resolvedTools = + hasCanUseTool && !baseTools.includes('AskUserQuestion') + ? [...baseTools, 'AskUserQuestion'] + : baseTools; + const sdkOpts: Options = { model, cwd: opts.workingDirectory, maxTurns: opts.maxTurns ?? 5, - tools: opts.allowedTools ?? ['Read', 'Glob', 'Grep', 'Bash'], + tools: resolvedTools, disallowedTools: opts.disallowedTools, - allowedTools: opts.allowedTools ?? ['Read', 'Glob', 'Grep', 'Bash'], - permissionMode: opts.permissionMode ?? 'bypassPermissions', - allowDangerouslySkipPermissions: - (opts.permissionMode ?? 'bypassPermissions') === 'bypassPermissions', + allowedTools: resolvedTools, + permissionMode: resolvedPermissionMode, + allowDangerouslySkipPermissions: resolvedPermissionMode === 'bypassPermissions', settingSources: opts.settingSources ?? [], env: opts.env, pathToClaudeCodeExecutable: opts.pathToClaudeCodeExecutable, + ...(hasCanUseTool ? { canUseTool: opts.canUseTool } : {}), }; // Empty bare string means "omit entirely" (SDK runs with no override). // Any object or non-empty string is passed through.