From 7f9a9a9dfffdc039b74f35549587951d0be1268b Mon Sep 17 00:00:00 2001 From: Garry Tan Date: Sun, 7 Jun 2026 22:47:29 -0700 Subject: [PATCH] fix(redact): detect modern sk-proj-/sk-svcacct-/sk-admin- OpenAI keys (#1868) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit openai.key (HIGH/block) used /\b(sk-(?:proj-)?[A-Za-z0-9]{32,})\b/, which stops at the first - or _ in the body. Modern OpenAI project/service-account/admin keys use base64url bodies containing - and _, so they never reached the 32-char run and produced ZERO findings — a HIGH credential failing open through /spec, /ship, /cso, and /document-*. Replace with explicit alternation, bare vs prefixed (not a globally-optional prefix, which would match malformed sk--... or separator-less sk-projabc...): sk-{proj,svcacct,admin}- + [A-Za-z0-9_-]{20,} | sk-[A-Za-z0-9]{32,} (legacy) Tests: the three previously-missed shapes now block; FP guards pin that hyphenated prose and malformed sk- strings do NOT match (HIGH tier blocks, so calibration matters). Reported by @jbetala7. Co-Authored-By: Claude Opus 4.8 (1M context) --- lib/redact-patterns.ts | 9 +++++++-- test/redact-engine.test.ts | 30 ++++++++++++++++++++++++++++++ 2 files changed, 37 insertions(+), 2 deletions(-) diff --git a/lib/redact-patterns.ts b/lib/redact-patterns.ts index a10f78e17..0645bfe1c 100644 --- a/lib/redact-patterns.ts +++ b/lib/redact-patterns.ts @@ -233,8 +233,13 @@ export const PATTERNS: RedactPattern[] = [ id: "openai.key", tier: "HIGH", category: "secret", - description: "OpenAI API key (incl. sk-proj-)", - regex: /\b(sk-(?:proj-)?[A-Za-z0-9]{32,})\b/, + description: "OpenAI API key (incl. sk-proj-/sk-svcacct-/sk-admin-)", + // Two explicit shapes (NOT a globally-optional prefix, which would match + // malformed sk--... or separator-less sk-projabc...): + // prefixed: sk-{proj,svcacct,admin}- + base64url-ish body (allows -_) + // bare: sk- + contiguous alphanumeric run (legacy), keeps {32,} floor + regex: + /\b(sk-(?:proj|svcacct|admin)-[A-Za-z0-9_-]{20,}|sk-[A-Za-z0-9]{32,})\b/, }, { id: "sendgrid.key", diff --git a/test/redact-engine.test.ts b/test/redact-engine.test.ts index 52c119a19..dbbfd8a3a 100644 --- a/test/redact-engine.test.ts +++ b/test/redact-engine.test.ts @@ -49,6 +49,36 @@ describe("HIGH credential patterns", () => { }); } + // #1868 — modern OpenAI keys use base64url bodies (with - and _). The old + // [A-Za-z0-9]{32,} regex stopped at the first separator and missed them all, + // failing a HIGH credential OPEN through the redaction gate. + test("openai.key flags modern sk-proj-/sk-svcacct-/sk-admin- shapes (#1868)", () => { + const missed = [ + "sk-proj-Ab12_Cd34-Ef56Gh78Ij90Kl12Mn34Op56Qr78St90Uv", + "sk-svcacct-abc_def-ghijklmnopqrstuvwxyz0123456789ABCDEF", + "sk-admin-AAAA_BBBB-CCCC_DDDD-EEEE_FFFF-GGGG_HHHH1234", + ]; + for (const key of missed) { + expect(ids(`OPENAI_API_KEY=${key}`)).toContain("openai.key"); + } + // legacy contiguous shape still flags + expect(ids("sk-proj-" + "a".repeat(40))).toContain("openai.key"); + }); + + test("openai.key does not over-match prose / malformed sk- strings (#1868 calibration)", () => { + // HIGH tier BLOCKS, so false positives on prose are costly. None of these + // should flag as openai.key. + const benign = [ + "the sk-learning-rate-schedule-was-tuned-carefully", // hyphenated prose + "sk--double-dash-typo-not-a-real-key", + "use sk-proj for the project prefix in docs", // no body + "sk-short", // too short, no prefix + ]; + for (const text of benign) { + expect(ids(text)).not.toContain("openai.key"); + } + }); + test("twilio.auth_token needs an SID nearby", () => { const sid = "AC" + "a".repeat(32); const tok = "b".repeat(32);