gstack/test/redact-pattern-lint.test.ts

/**
 * ReDoS guard (T10) — fails CI if any taxonomy pattern has a catastrophic-
 * backtracking shape, and asserts the engine's oversize-input path fails CLOSED.
 *
 * We do two things:
 *   1. Static lint: reject nested unbounded quantifiers like (a+)+ / (a*)* /
 *      (a+)* in any pattern source. These are the classic ReDoS forms.
 *   2. Runtime budget: run every pattern against a pathological input and assert
 *      no single pattern takes more than a generous wall-clock budget. This
 *      catches catastrophic forms the static check might miss.
 */
import { describe, test, expect } from "bun:test";
import { PATTERNS } from "../lib/redact-patterns";
import { scan } from "../lib/redact-engine";

// Nested-quantifier ReDoS shapes: a group ending in +/*/{n,} that is itself
// immediately quantified by +/*/{n,}. e.g. (x+)+  (x*)*  (x+)*  (?:x+){2,}
const NESTED_QUANTIFIER = /\([^)]*[+*]\)[+*]|\([^)]*[+*]\)\{\d+,?\}|\([^)]*\{\d+,\}\)[+*]/;

describe("pattern lint — no catastrophic backtracking", () => {
  for (const p of PATTERNS) {
    test(`${p.id} has no nested unbounded quantifier`, () => {
      expect(NESTED_QUANTIFIER.test(p.regex.source)).toBe(false);
    });
  }

  test("a planted catastrophic pattern WOULD be caught by the linter", () => {
    // meta-test: prove the linter actually detects the bad shape
    expect(NESTED_QUANTIFIER.test("(a+)+")).toBe(true);
    expect(NESTED_QUANTIFIER.test("(\\d*)*")).toBe(true);
  });
});

describe("runtime budget — pathological inputs do not hang", () => {
  // Inputs designed to stress backtracking on the real patterns.
  const adversarial = [
    "a".repeat(5000) + "!",
    "AKIA" + "A".repeat(5000),
    "eyJ" + "a".repeat(2000) + "." + "b".repeat(2000),
    "x@" + "a".repeat(3000),
    "/Users/" + "a".repeat(4000),
    ("1".repeat(19) + " ").repeat(200),
  ];

  for (const [i, input] of adversarial.entries()) {
    test(`adversarial input #${i} scans within budget`, () => {
      const start = performance.now();
      scan(input, { repoVisibility: "private", maxBytes: 1024 * 1024 });
      const elapsed = performance.now() - start;
      // Generous: full taxonomy over a 5KB pathological string should be well
      // under 1s on any CI box. A catastrophic pattern would blow past this.
      expect(elapsed).toBeLessThan(1000);
    });
  }
});

describe("oversize fails closed (the real ReDoS backstop)", () => {
  test("input over cap returns blocking HIGH, never runs the patterns", () => {
    const r = scan("a".repeat(50_000), { maxBytes: 10_000 });
    expect(r.oversize).toBe(true);
    expect(r.counts.HIGH).toBe(1);
    expect(r.findings[0].id).toBe("engine.input_too_large");
  });
});