mirror of
https://github.com/garrytan/gstack.git
synced 2026-05-06 21:46:40 +02:00
feat: one-way door classifier (belt-and-suspenders safety fallback)
scripts/one-way-doors.ts — secondary keyword-pattern classifier that catches
destructive questions even when the registry doesn't have an entry for them.
The registry's door_type field (from scripts/question-registry.ts) is the
PRIMARY safety gate. This classifier is the fallback for ad-hoc question_ids
that agents generate at runtime.
Classification priority:
1. Registry lookup by question_id → use declared door_type
2. Skill:category fallback (cso:approval, land-and-deploy:approval)
3. Keyword pattern match against question_summary
4. Default: treat as two-way (safer to log the miss than auto-decide unsafely)
Covers 21 destructive patterns across:
- File system (rm -rf, delete, wipe, purge, truncate)
- Database (drop table/database/schema, delete from)
- Git/VCS (force-push, reset --hard, checkout --, branch -D)
- Deploy/infra (kubectl delete, terraform destroy, rollback)
- Credentials (revoke/reset/rotate API key|token|secret|password)
- Architecture (breaking change, schema migration, data model change)
7 new tests in test/plan-tune.test.ts covering: registry-first lookup,
unknown-id fallthrough, keyword matching on destructive phrasings including
embedded filler words ("rotate the API key"), skill-category fallback,
benign questions defaulting to two-way, pattern-list non-empty.
27 pass, 0 fail. 1270 expect() calls.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -21,6 +21,12 @@ import {
|
||||
getRegistryStats,
|
||||
type QuestionDef,
|
||||
} from '../scripts/question-registry';
|
||||
import {
|
||||
classifyQuestion,
|
||||
isOneWayDoor,
|
||||
DESTRUCTIVE_PATTERN_LIST,
|
||||
ONE_WAY_SKILL_CATEGORY_SET,
|
||||
} from '../scripts/one-way-doors';
|
||||
import * as fs from 'fs';
|
||||
import * as path from 'path';
|
||||
|
||||
@@ -253,6 +259,74 @@ describe('AskUserQuestion template coverage (informational)', () => {
|
||||
});
|
||||
});
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// One-way door classifier (belt-and-suspenders keyword fallback)
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
describe('one-way-doors classifier', () => {
|
||||
test('registry lookup wins when question_id is known', () => {
|
||||
const result = classifyQuestion({ question_id: 'ship-test-failure-triage' });
|
||||
expect(result.oneWay).toBe(true);
|
||||
expect(result.reason).toBe('registry');
|
||||
|
||||
const safeResult = classifyQuestion({ question_id: 'ship-changelog-voice-polish' });
|
||||
expect(safeResult.oneWay).toBe(false);
|
||||
expect(safeResult.reason).toBe('registry');
|
||||
});
|
||||
|
||||
test('unknown question_id falls through to other checks', () => {
|
||||
const result = classifyQuestion({ question_id: 'some-ad-hoc-question-id' });
|
||||
expect(result.reason).not.toBe('registry');
|
||||
});
|
||||
|
||||
test('keyword fallback catches destructive summaries', () => {
|
||||
const cases = [
|
||||
'Delete this directory and all its contents?',
|
||||
'Run rm -rf /tmp/scratch — proceed?',
|
||||
'Force-push main?',
|
||||
'git reset --hard origin/main — ok?',
|
||||
'DROP TABLE users — confirm?',
|
||||
'kubectl delete namespace prod',
|
||||
'terraform destroy the staging cluster',
|
||||
'rotate the API key',
|
||||
'breaking change to the public API — ship anyway?',
|
||||
];
|
||||
for (const summary of cases) {
|
||||
const result = classifyQuestion({ summary });
|
||||
expect(result.oneWay).toBe(true);
|
||||
expect(result.reason).toBe('keyword');
|
||||
expect(result.matched).toBeDefined();
|
||||
}
|
||||
});
|
||||
|
||||
test('skill-category fallback fires for cso:approval and land-and-deploy:approval', () => {
|
||||
expect(isOneWayDoor({ skill: 'cso', category: 'approval' })).toBe(true);
|
||||
expect(isOneWayDoor({ skill: 'land-and-deploy', category: 'approval' })).toBe(true);
|
||||
});
|
||||
|
||||
test('benign questions default to two-way', () => {
|
||||
const benign = [
|
||||
'Want to update the changelog voice?',
|
||||
'Which mode should plan review use?',
|
||||
'Open the essay in your browser?',
|
||||
];
|
||||
for (const summary of benign) {
|
||||
const result = classifyQuestion({ summary });
|
||||
expect(result.oneWay).toBe(false);
|
||||
expect(result.reason).toBe('default-two-way');
|
||||
}
|
||||
});
|
||||
|
||||
test('keyword patterns are non-empty', () => {
|
||||
expect(DESTRUCTIVE_PATTERN_LIST.length).toBeGreaterThan(15);
|
||||
});
|
||||
|
||||
test('skill-category set covers security + deploy', () => {
|
||||
expect(ONE_WAY_SKILL_CATEGORY_SET.has('cso:approval')).toBe(true);
|
||||
expect(ONE_WAY_SKILL_CATEGORY_SET.has('land-and-deploy:approval')).toBe(true);
|
||||
});
|
||||
});
|
||||
|
||||
function findAllTemplates(): string[] {
|
||||
const results: string[] = [];
|
||||
function walk(dir: string) {
|
||||
|
||||
Reference in New Issue
Block a user