mirror of
https://github.com/garrytan/gstack.git
synced 2026-06-18 07:40:09 +02:00
feat(auq): defensive PostToolUse error-fallback hook (OV3:B)
When an AskUserQuestion call returns an error/missing result, this hook injects additionalContext reminding the model to run the prose fallback for the current SESSION_KIND. It does not render prose itself — it guarantees the reminder fires at the moment of failure instead of relying on the model recalling SESSION_KIND. Inert on success and inert if the platform never invokes PostToolUse on tool errors (unverified — could not force the Conductor MCP error in a harness; see the spike doc). The prompt-level fallback covers the case regardless. Decision logic is unit-tested deterministically; registered in setup beside the existing AUQ hooks. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,122 @@
|
||||
/**
|
||||
* auq-error-fallback-hook — the OV3:B runtime reliability layer.
|
||||
*
|
||||
* Two layers of testing:
|
||||
* - PURE functions (isErrorResponse, directiveFor): deterministic, the core logic.
|
||||
* - INTEGRATION: spawn the hook as a PostToolUse process with synthetic stdin and
|
||||
* a controlled env, assert it injects the right directive on an error result and
|
||||
* stays inert on a real answer.
|
||||
*
|
||||
* NOTE: whether the Claude Code PLATFORM invokes PostToolUse on an MCP
|
||||
* transport/missing-result error is unverified (could not force the Conductor
|
||||
* bug in a harness — see docs/spikes/claude-code-hook-mutation.md). These tests
|
||||
* pin the hook's BEHAVIOR given it is invoked; the platform trigger is the
|
||||
* documented residual risk. The hook is inert if never invoked.
|
||||
*/
|
||||
import { describe, test, expect } from 'bun:test';
|
||||
import { spawnSync } from 'child_process';
|
||||
import * as path from 'path';
|
||||
import { isErrorResponse, directiveFor } from '../hosts/claude/hooks/auq-error-fallback-hook.ts';
|
||||
|
||||
const HOOK = path.resolve(__dirname, '..', 'hosts', 'claude', 'hooks', 'auq-error-fallback-hook.ts');
|
||||
|
||||
describe('isErrorResponse — only clear failures, never a real answer', () => {
|
||||
test('null / undefined / empty string are failures', () => {
|
||||
expect(isErrorResponse(null)).toBe(true);
|
||||
expect(isErrorResponse(undefined)).toBe(true);
|
||||
expect(isErrorResponse('')).toBe(true);
|
||||
expect(isErrorResponse(' ')).toBe(true);
|
||||
});
|
||||
|
||||
test('the Conductor missing-result string is a failure', () => {
|
||||
expect(isErrorResponse('[Tool result missing due to internal error]')).toBe(true);
|
||||
});
|
||||
|
||||
test('is_error / error object shapes are failures', () => {
|
||||
expect(isErrorResponse({ is_error: true })).toBe(true);
|
||||
expect(isErrorResponse({ isError: true })).toBe(true);
|
||||
expect(isErrorResponse({ error: 'boom' })).toBe(true);
|
||||
expect(isErrorResponse({ content: '...internal error...' })).toBe(true);
|
||||
});
|
||||
|
||||
test('a real answer is NOT a failure (no false trigger)', () => {
|
||||
expect(isErrorResponse({ answers: [{ option_label: 'A' }] })).toBe(false);
|
||||
expect(isErrorResponse('A')).toBe(false);
|
||||
// a choice that coincidentally contains the word "error" must not trip it
|
||||
expect(isErrorResponse({ answers: [{ option_label: 'Fix the error' }] })).toBe(false);
|
||||
expect(isErrorResponse('Investigate the login error')).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
describe('directiveFor — per-session-kind instruction', () => {
|
||||
test('interactive directive demands the prose triad', () => {
|
||||
const d = directiveFor('interactive');
|
||||
expect(d).toMatch(/ELI10/);
|
||||
expect(d).toMatch(/Completeness: X\/10/);
|
||||
expect(d).toMatch(/\(recommended\)/);
|
||||
expect(d).toMatch(/reply with a letter/i);
|
||||
expect(d).toMatch(/STOP/);
|
||||
});
|
||||
|
||||
test('headless directive BLOCKs', () => {
|
||||
expect(directiveFor('headless')).toMatch(/BLOCKED — AskUserQuestion unavailable/);
|
||||
});
|
||||
|
||||
test('spawned directive auto-chooses', () => {
|
||||
expect(directiveFor('spawned')).toMatch(/auto-choose/i);
|
||||
});
|
||||
});
|
||||
|
||||
/** Spawn the hook with synthetic stdin + controlled env; parse its JSON stdout. */
|
||||
function runHook(stdin: object, env: Record<string, string>): { additionalContext?: string } {
|
||||
const res = spawnSync('bun', [HOOK], {
|
||||
input: JSON.stringify(stdin),
|
||||
encoding: 'utf-8',
|
||||
env: { PATH: process.env.PATH ?? '/usr/bin:/bin', ...env },
|
||||
});
|
||||
const parsed = JSON.parse(res.stdout || '{}');
|
||||
return parsed.hookSpecificOutput ?? {};
|
||||
}
|
||||
|
||||
describe('hook integration — invoked as PostToolUse', () => {
|
||||
test('error result + headless env → injects BLOCK directive', () => {
|
||||
const out = runHook(
|
||||
{ tool_name: 'mcp__conductor__AskUserQuestion', tool_response: '[Tool result missing due to internal error]' },
|
||||
{ GSTACK_HEADLESS: '1' },
|
||||
);
|
||||
expect(out.additionalContext).toMatch(/BLOCKED — AskUserQuestion unavailable/);
|
||||
});
|
||||
|
||||
test('error result + interactive env → injects prose-triad directive', () => {
|
||||
const out = runHook(
|
||||
{ tool_name: 'AskUserQuestion', tool_response: null },
|
||||
{ CONDUCTOR_PORT: '55010' },
|
||||
);
|
||||
expect(out.additionalContext).toMatch(/render the decision as a PROSE message/i);
|
||||
expect(out.additionalContext).toMatch(/Completeness: X\/10/);
|
||||
});
|
||||
|
||||
test('error result + spawned env → injects auto-choose directive', () => {
|
||||
const out = runHook(
|
||||
{ tool_name: 'AskUserQuestion', tool_response: { is_error: true } },
|
||||
{ OPENCLAW_SESSION: '1' },
|
||||
);
|
||||
expect(out.additionalContext).toMatch(/auto-choose/i);
|
||||
});
|
||||
|
||||
test('SUCCESSFUL answer → no injection (inert on real answers)', () => {
|
||||
const out = runHook(
|
||||
{ tool_name: 'AskUserQuestion', tool_response: { answers: [{ option_label: 'A' }] } },
|
||||
{ GSTACK_HEADLESS: '1' },
|
||||
);
|
||||
expect(out.additionalContext).toBeUndefined();
|
||||
});
|
||||
|
||||
test('non-AUQ tool → defers (no injection)', () => {
|
||||
const out = runHook(
|
||||
{ tool_name: 'Bash', tool_response: null },
|
||||
{ GSTACK_HEADLESS: '1' },
|
||||
);
|
||||
expect(out.additionalContext).toBeUndefined();
|
||||
});
|
||||
});
|
||||
Reference in New Issue
Block a user