From b5ff65c9fd7a37ca33f8a72d9eba806b86b642ef Mon Sep 17 00:00:00 2001 From: Garry Tan Date: Fri, 29 May 2026 07:06:01 -0700 Subject: [PATCH] feat(redact): bin/gstack-redact CLI shim over the engine Skill-facing CLI wrapping lib/redact-engine. Reads stdin or --from-file, scans, prints JSON (--json) or a human table. Exit codes 0/2/3 gate dispatch/file/edit/commit (WARN never gates). --auto-redact emits the sanitized body + diff for the PII-class one-keystroke path. --allowlist, --self-email, --repo-public-emails, --repo-visibility, --max-bytes. Fails closed on oversize at the CLI boundary before the engine even reads. 9 contract tests: exit codes, JSON shape, auto-redact, allowlist, self-email, from-file, oversize-fail-closed. Co-Authored-By: Claude Opus 4.8 (1M context) --- bin/gstack-redact | 156 +++++++++++++++++++++++++++++++++ test/gstack-redact-cli.test.ts | 97 ++++++++++++++++++++ 2 files changed, 253 insertions(+) create mode 100755 bin/gstack-redact create mode 100644 test/gstack-redact-cli.test.ts diff --git a/bin/gstack-redact b/bin/gstack-redact new file mode 100755 index 000000000..8f61e6580 --- /dev/null +++ b/bin/gstack-redact @@ -0,0 +1,156 @@ +#!/usr/bin/env bun +/** + * gstack-redact — scan text for secrets/PII/legal content via the shared engine. + * + * Skill-facing CLI over lib/redact-engine.ts. Reads from stdin (default) or + * --from-file, scans, and prints findings as JSON (--json) or a human table. + * + * Exit codes (consumed by skill bash to gate dispatch/file/edit/commit): + * 0 clean (no HIGH, no MEDIUM) + * 2 MEDIUM present (no HIGH) — skill runs the per-finding AskUserQuestion + * 3 HIGH present — skill blocks + * + * WARN findings (tool-fence-degraded credentials) never change the exit code. + * + * Flags: + * --json Emit JSON {findings, counts, repoVisibility, oversize} + * --repo-visibility V public | private | unknown (default unknown=public-strict wording) + * --from-file PATH Read input from PATH instead of stdin + * --allowlist PATH Newline-delimited exact spans to suppress + * --self-email EMAIL Suppress this email (the invoking user's own) + * --repo-public-emails PATH Newline-delimited repo-public emails to suppress + * --auto-redact IDS Comma-separated finding ids to auto-redact; + * prints the redacted body to stdout + diff to stderr. + * --max-bytes N Override the fail-closed size cap (default 1 MiB). + * + * Security note: this is a GUARDRAIL, not airtight enforcement. A determined + * user can always bypass it (direct gh/git). It catches accidents. + */ +import * as fs from "fs"; +import { + scan, + applyRedactions, + exitCodeFor, + type RepoVisibility, + type ScanOptions, + type Finding, +} from "../lib/redact-engine"; + +const MAX_STDIN_BYTES = 16 * 1024 * 1024; // hard ceiling before the engine cap + +function arg(name: string): string | undefined { + const i = process.argv.indexOf(name); + return i >= 0 ? process.argv[i + 1] : undefined; +} +function flag(name: string): boolean { + return process.argv.includes(name); +} + +function readInput(): string { + const file = arg("--from-file"); + if (file) { + const st = fs.statSync(file); + if (st.size > MAX_STDIN_BYTES) { + // Don't even read it — fail closed at the CLI boundary. + process.stderr.write(`gstack-redact: input file too large (${st.size} bytes)\n`); + process.exit(3); + } + return fs.readFileSync(file, "utf8"); + } + // stdin + const chunks: Buffer[] = []; + let total = 0; + const fd = 0; + const buf = Buffer.alloc(65536); + while (true) { + let n = 0; + try { + n = fs.readSync(fd, buf, 0, buf.length, null); + } catch (e: any) { + if (e.code === "EAGAIN") continue; + if (e.code === "EOF") break; + throw e; + } + if (n === 0) break; + total += n; + if (total > MAX_STDIN_BYTES) { + process.stderr.write("gstack-redact: stdin too large\n"); + process.exit(3); + } + chunks.push(Buffer.from(buf.subarray(0, n))); + } + return Buffer.concat(chunks).toString("utf8"); +} + +function readLines(path: string | undefined): string[] | undefined { + if (!path || !fs.existsSync(path)) return undefined; + return fs + .readFileSync(path, "utf8") + .split("\n") + .map((l) => l.trim()) + .filter(Boolean); +} + +function buildOpts(): ScanOptions { + const vis = (arg("--repo-visibility") as RepoVisibility) || "unknown"; + const maxBytes = arg("--max-bytes"); + return { + repoVisibility: ["public", "private", "unknown"].includes(vis) ? vis : "unknown", + allowlist: readLines(arg("--allowlist")), + selfEmail: arg("--self-email"), + repoPublicEmails: readLines(arg("--repo-public-emails")), + ...(maxBytes ? { maxBytes: parseInt(maxBytes, 10) } : {}), + }; +} + +function humanTable(findings: Finding[]): string { + if (!findings.length) return " (no findings)"; + const rows = findings.map( + (f) => + ` ${f.severity.padEnd(6)} ${f.id.padEnd(24)} ${String(f.line).padStart(4)}:${String( + f.col, + ).padEnd(3)} ${f.preview}`, + ); + return rows.join("\n"); +} + +function main() { + const opts = buildOpts(); + const input = readInput(); + + // Auto-redact mode: print redacted body to stdout, diff to stderr, exit 0. + const autoIds = arg("--auto-redact"); + if (autoIds) { + const { body, diff, skipped } = applyRedactions(input, autoIds.split(","), opts); + process.stdout.write(body); + if (diff) process.stderr.write(diff + "\n"); + if (skipped.length) { + process.stderr.write( + `\ngstack-redact: ${skipped.length} finding(s) could not be auto-redacted (structural) — edit manually:\n` + + skipped.map((f) => ` ${f.id} @ ${f.line}:${f.col}`).join("\n") + + "\n", + ); + } + process.exit(0); + } + + const result = scan(input, opts); + const code = exitCodeFor(result); + + if (flag("--json")) { + process.stdout.write(JSON.stringify(result, null, 2) + "\n"); + } else { + const vis = result.repoVisibility.toUpperCase(); + process.stdout.write(`gstack-redact scan — repo ${vis}\n`); + if (result.oversize) { + process.stdout.write(" BLOCKED — input too large to scan safely (fail-closed)\n"); + } else { + process.stdout.write(humanTable(result.findings) + "\n"); + const { HIGH, MEDIUM, LOW, WARN } = result.counts; + process.stdout.write(` HIGH=${HIGH} MEDIUM=${MEDIUM} LOW=${LOW} WARN=${WARN}\n`); + } + } + process.exit(code); +} + +main(); diff --git a/test/gstack-redact-cli.test.ts b/test/gstack-redact-cli.test.ts new file mode 100644 index 000000000..4808ba53b --- /dev/null +++ b/test/gstack-redact-cli.test.ts @@ -0,0 +1,97 @@ +/** + * Contract tests for bin/gstack-redact — exit codes, JSON shape, flags, + * auto-redact mode, oversize fail-closed. Spawns the shim via `bun`. + */ +import { describe, test, expect } from "bun:test"; +import * as path from "path"; +import * as fs from "fs"; +import * as os from "os"; + +const BIN = path.resolve(import.meta.dir, "..", "bin", "gstack-redact"); + +function run( + args: string[], + stdin: string, +): { code: number; stdout: string; stderr: string } { + const proc = Bun.spawnSync(["bun", BIN, ...args], { + stdin: Buffer.from(stdin), + }); + return { + code: proc.exitCode, + stdout: proc.stdout.toString(), + stderr: proc.stderr.toString(), + }; +} + +describe("gstack-redact exit codes", () => { + test("clean → 0", () => { + expect(run([], "just some prose").code).toBe(0); + }); + test("HIGH → 3", () => { + expect(run([], "key AKIA1234567890ABCDEF").code).toBe(3); + }); + test("MEDIUM only → 2", () => { + expect(run(["--repo-visibility", "public"], "mail bob@corp.io").code).toBe(2); + }); +}); + +describe("gstack-redact --json", () => { + test("emits valid JSON with findings + counts", () => { + const { stdout, code } = run(["--json"], "key AKIA1234567890ABCDEF"); + expect(code).toBe(3); + const parsed = JSON.parse(stdout); + expect(parsed.findings[0].id).toBe("aws.access_key"); + expect(parsed.counts.HIGH).toBe(1); + expect(parsed.repoVisibility).toBe("unknown"); + }); +}); + +describe("gstack-redact --auto-redact", () => { + test("prints redacted body to stdout, exits 0", () => { + const { stdout, code } = run(["--auto-redact", "pii.email"], "ping bob@corp.io please"); + expect(code).toBe(0); + expect(stdout).toContain(""); + expect(stdout).not.toContain("bob@corp.io"); + }); +}); + +describe("gstack-redact --allowlist", () => { + test("allowlisted span is suppressed", () => { + const dir = fs.mkdtempSync(path.join(os.tmpdir(), "redact-allow-")); + const allow = path.join(dir, "allow.txt"); + fs.writeFileSync(allow, "AKIA1234567890ABCDEF\n"); + const { code } = run(["--allowlist", allow], "key AKIA1234567890ABCDEF"); + expect(code).toBe(0); + fs.rmSync(dir, { recursive: true, force: true }); + }); +}); + +describe("gstack-redact --self-email", () => { + test("own email is not flagged", () => { + const { code } = run( + ["--repo-visibility", "public", "--self-email", "me@garry.dev"], + "from me@garry.dev", + ); + expect(code).toBe(0); + }); +}); + +describe("gstack-redact --from-file", () => { + test("reads input from a file", () => { + const dir = fs.mkdtempSync(path.join(os.tmpdir(), "redact-file-")); + const f = path.join(dir, "spec.md"); + fs.writeFileSync(f, "leaked ghp_" + "a".repeat(36)); + const proc = Bun.spawnSync(["bun", BIN, "--from-file", f, "--json"]); + const parsed = JSON.parse(proc.stdout.toString()); + expect(parsed.findings[0].id).toBe("github.pat"); + fs.rmSync(dir, { recursive: true, force: true }); + }); +}); + +describe("gstack-redact oversize fails closed", () => { + test("input over --max-bytes blocks (exit 3)", () => { + const { code, stdout } = run(["--max-bytes", "100"], "a".repeat(500)); + expect(code).toBe(3); + expect(stdout).toContain("too large"); + }); +});