fix(redact): detect modern sk-proj-/sk-svcacct-/sk-admin- OpenAI keys (#1868)

openai.key (HIGH/block) used /\b(sk-(?:proj-)?[A-Za-z0-9]{32,})\b/, which stops
at the first - or _ in the body. Modern OpenAI project/service-account/admin keys
use base64url bodies containing - and _, so they never reached the 32-char run and
produced ZERO findings — a HIGH credential failing open through /spec, /ship, /cso,
and /document-*.

Replace with explicit alternation, bare vs prefixed (not a globally-optional prefix,
which would match malformed sk--... or separator-less sk-projabc...):
  sk-{proj,svcacct,admin}- + [A-Za-z0-9_-]{20,}  |  sk-[A-Za-z0-9]{32,} (legacy)

Tests: the three previously-missed shapes now block; FP guards pin that hyphenated
prose and malformed sk- strings do NOT match (HIGH tier blocks, so calibration matters).

Reported by @jbetala7.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
Garry Tan
2026-06-07 22:47:29 -07:00
parent b89b911a4a
commit 7f9a9a9dff
2 changed files with 37 additions and 2 deletions
+7 -2
View File
@@ -233,8 +233,13 @@ export const PATTERNS: RedactPattern[] = [
id: "openai.key",
tier: "HIGH",
category: "secret",
description: "OpenAI API key (incl. sk-proj-)",
regex: /\b(sk-(?:proj-)?[A-Za-z0-9]{32,})\b/,
description: "OpenAI API key (incl. sk-proj-/sk-svcacct-/sk-admin-)",
// Two explicit shapes (NOT a globally-optional prefix, which would match
// malformed sk--... or separator-less sk-projabc...):
// prefixed: sk-{proj,svcacct,admin}- + base64url-ish body (allows -_)
// bare: sk- + contiguous alphanumeric run (legacy), keeps {32,} floor
regex:
/\b(sk-(?:proj|svcacct|admin)-[A-Za-z0-9_-]{20,}|sk-[A-Za-z0-9]{32,})\b/,
},
{
id: "sendgrid.key",
+30
View File
@@ -49,6 +49,36 @@ describe("HIGH credential patterns", () => {
});
}
// #1868 — modern OpenAI keys use base64url bodies (with - and _). The old
// [A-Za-z0-9]{32,} regex stopped at the first separator and missed them all,
// failing a HIGH credential OPEN through the redaction gate.
test("openai.key flags modern sk-proj-/sk-svcacct-/sk-admin- shapes (#1868)", () => {
const missed = [
"sk-proj-Ab12_Cd34-Ef56Gh78Ij90Kl12Mn34Op56Qr78St90Uv",
"sk-svcacct-abc_def-ghijklmnopqrstuvwxyz0123456789ABCDEF",
"sk-admin-AAAA_BBBB-CCCC_DDDD-EEEE_FFFF-GGGG_HHHH1234",
];
for (const key of missed) {
expect(ids(`OPENAI_API_KEY=${key}`)).toContain("openai.key");
}
// legacy contiguous shape still flags
expect(ids("sk-proj-" + "a".repeat(40))).toContain("openai.key");
});
test("openai.key does not over-match prose / malformed sk- strings (#1868 calibration)", () => {
// HIGH tier BLOCKS, so false positives on prose are costly. None of these
// should flag as openai.key.
const benign = [
"the sk-learning-rate-schedule-was-tuned-carefully", // hyphenated prose
"sk--double-dash-typo-not-a-real-key",
"use sk-proj for the project prefix in docs", // no body
"sk-short", // too short, no prefix
];
for (const text of benign) {
expect(ids(text)).not.toContain("openai.key");
}
});
test("twilio.auth_token needs an SID nearby", () => {
const sid = "AC" + "a".repeat(32);
const tok = "b".repeat(32);