fix(safety): one-way-door classifier catches "rotate ... password" (#1839)

scripts/one-way-doors.ts is the secondary safety net for ad-hoc AskUserQuestion
ids with no registry entry; a false negative auto-approves a destructive op. The
revoke and reset credential patterns both include `password`, but the rotate
pattern omitted it, so the most common phrasing ("rotate the database password")
classified as a reversible two-way question.

Add `password` to the rotate alternation so all three verbs are parallel. New test
covers rotate+password, the revoke/reset/rotate parallel, and rotate's other nouns.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
Garry Tan
2026-06-07 22:50:10 -07:00
parent 549f32a8f9
commit 1f768ad142
2 changed files with 33 additions and 1 deletions
+1 -1
View File
@@ -65,7 +65,7 @@ const DESTRUCTIVE_PATTERNS: RegExp[] = [
// Credentials / auth — allow filler words ("the", "my") between verb and noun
/\brevoke\s+[\w\s]*\b(api key|token|credential|access key|password)\b/i,
/\breset\s+[\w\s]*\b(api key|token|password|credential)\b/i,
/\brotate\s+[\w\s]*\b(api key|token|secret|credential|access key)\b/i,
/\brotate\s+[\w\s]*\b(api key|token|secret|credential|access key|password)\b/i,
// Scope / architecture forks (reversible with effort — still deserve confirmation)
/\barchitectur(e|al)\s+(change|fork|shift|decision)\b/i,
+32
View File
@@ -0,0 +1,32 @@
/**
* Unit tests for scripts/one-way-doors.ts keyword safety net.
*
* The keyword layer is the SECONDARY safety net for ad-hoc AskUserQuestion ids
* with no registry entry. A false negative auto-approves a destructive op, so the
* credential-rotation patterns must be parallel across revoke/reset/rotate.
*/
import { describe, test, expect } from "bun:test";
import { classifyQuestion } from "../scripts/one-way-doors";
describe("one-way-door credential keyword net (#1839)", () => {
// rotate ... password was missing from the rotate alternation while revoke and
// reset both had it — the most common phrasing slipped through as two-way.
test('"rotate the database password" classifies one-way', () => {
const r = classifyQuestion({ summary: "rotate the database password" });
expect(r.oneWay).toBe(true);
expect(r.reason).toBe("keyword");
});
test("revoke/reset/rotate are all parallel for password", () => {
for (const verb of ["revoke", "reset", "rotate"]) {
const r = classifyQuestion({ summary: `${verb} the production password` });
expect(r.oneWay).toBe(true);
}
});
test("rotate still catches the other credential nouns", () => {
for (const noun of ["api key", "token", "secret", "credential", "access key"]) {
expect(classifyQuestion({ summary: `rotate my ${noun}` }).oneWay).toBe(true);
}
});
});