From dd4dd9e1f5b759da3d0b919051fd13f4fc3f22bf Mon Sep 17 00:00:00 2001 From: Garry Tan Date: Fri, 29 May 2026 07:21:48 -0700 Subject: [PATCH] feat(ship,document-*): redaction scan-at-sink on PR bodies + generated docs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - /ship: scan the composed PR body + title before create AND edit, from a temp file (exact bytes scanned = bytes sent). HIGH blocks the PR (no skip); MEDIUM confirms per finding. Codex/Greptile/eval sections go in tool-attributed fences so example credentials those tools quote WARN-degrade instead of blocking the PR — a live-format credential inside the fence still blocks. - /document-release: scan the PR-body temp file before gh pr edit. - /document-generate: scan the staged doc diff (added lines) before commit — generated docs often carry example credentials; a live-format secret blocks. Tests: ship-template-redaction (incl. tool-fence WARN-degrade contract), document-skills-redaction. All skills stay under the v1.47 size budget. Co-Authored-By: Claude Opus 4.8 (1M context) --- document-generate/SKILL.md | 14 +++++++ document-generate/SKILL.md.tmpl | 14 +++++++ document-release/SKILL.md | 11 +++++- document-release/SKILL.md.tmpl | 11 +++++- ship/SKILL.md | 39 ++++++++++++++++--- ship/SKILL.md.tmpl | 39 ++++++++++++++++--- test/document-skills-redaction.test.ts | 37 ++++++++++++++++++ test/ship-template-redaction.test.ts | 54 ++++++++++++++++++++++++++ 8 files changed, 205 insertions(+), 14 deletions(-) create mode 100644 test/document-skills-redaction.test.ts create mode 100644 test/ship-template-redaction.test.ts diff --git a/document-generate/SKILL.md b/document-generate/SKILL.md index cb89b4ee5..e809a755e 100644 --- a/document-generate/SKILL.md +++ b/document-generate/SKILL.md @@ -1107,6 +1107,20 @@ Fix any failures before proceeding. 1. Stage new documentation files by name (never `git add -A` or `git add .`). +**Redaction scan before commit.** Generated docs frequently contain example +credentials; scan the staged doc content and block on a HIGH credential (a +live-format secret in committed docs is a leak). Example configs belong in +` ```example ` fences won't excuse a live-format secret, but the per-span +placeholder filter passes obvious docs examples (e.g. `AKIAIOSFODNN7EXAMPLE`): + +```bash +REDACT_VIS=$(~/.claude/skills/gstack/bin/gstack-config get redact_repo_visibility 2>/dev/null) +[ -z "$REDACT_VIS" ] && REDACT_VIS=$(gh repo view --json visibility -q .visibility 2>/dev/null | tr 'A-Z' 'a-z') +git diff --cached --no-color | grep '^+' | sed 's/^+//' | \ + ~/.claude/skills/gstack/bin/gstack-redact --repo-visibility "${REDACT_VIS:-unknown}" --json +# exit 3 (HIGH) → unstage the offending doc, remove the secret, re-stage. Do NOT commit. +``` + 2. Create a commit: ```bash diff --git a/document-generate/SKILL.md.tmpl b/document-generate/SKILL.md.tmpl index ad32619c4..e4ac067ad 100644 --- a/document-generate/SKILL.md.tmpl +++ b/document-generate/SKILL.md.tmpl @@ -378,6 +378,20 @@ Fix any failures before proceeding. 1. Stage new documentation files by name (never `git add -A` or `git add .`). +**Redaction scan before commit.** Generated docs frequently contain example +credentials; scan the staged doc content and block on a HIGH credential (a +live-format secret in committed docs is a leak). Example configs belong in +` ```example ` fences won't excuse a live-format secret, but the per-span +placeholder filter passes obvious docs examples (e.g. `AKIAIOSFODNN7EXAMPLE`): + +```bash +REDACT_VIS=$(~/.claude/skills/gstack/bin/gstack-config get redact_repo_visibility 2>/dev/null) +[ -z "$REDACT_VIS" ] && REDACT_VIS=$(gh repo view --json visibility -q .visibility 2>/dev/null | tr 'A-Z' 'a-z') +git diff --cached --no-color | grep '^+' | sed 's/^+//' | \ + ~/.claude/skills/gstack/bin/gstack-redact --repo-visibility "${REDACT_VIS:-unknown}" --json +# exit 3 (HIGH) → unstage the offending doc, remove the secret, re-stage. Do NOT commit. +``` + 2. Create a commit: ```bash diff --git a/document-release/SKILL.md b/document-release/SKILL.md index 3fc606e8a..b2391e53b 100644 --- a/document-release/SKILL.md +++ b/document-release/SKILL.md @@ -1105,7 +1105,16 @@ glab mr view -F json 2>/dev/null | python3 -c "import sys,json; print(json.load( If there are any documentation debt items, suggest adding a `docs-debt` label to the PR. -4. Write the updated body back: +4. Redaction scan-at-sink, then write the updated body back. The body is already + in a temp file (`/tmp/gstack-pr-body-$$.md`); scan THAT file before editing so + the bytes scanned are the bytes sent: + +```bash +REDACT_VIS=$(~/.claude/skills/gstack/bin/gstack-config get redact_repo_visibility 2>/dev/null) +[ -z "$REDACT_VIS" ] && REDACT_VIS=$(gh repo view --json visibility -q .visibility 2>/dev/null | tr 'A-Z' 'a-z') +~/.claude/skills/gstack/bin/gstack-redact --from-file /tmp/gstack-pr-body-$$.md --repo-visibility "${REDACT_VIS:-unknown}" --json +# exit 3 (HIGH) → do NOT edit, rotate+redact; exit 2 (MEDIUM) → confirm per finding. +``` **If GitHub:** ```bash diff --git a/document-release/SKILL.md.tmpl b/document-release/SKILL.md.tmpl index f1635a2af..7367cbf4e 100644 --- a/document-release/SKILL.md.tmpl +++ b/document-release/SKILL.md.tmpl @@ -375,7 +375,16 @@ glab mr view -F json 2>/dev/null | python3 -c "import sys,json; print(json.load( If there are any documentation debt items, suggest adding a `docs-debt` label to the PR. -4. Write the updated body back: +4. Redaction scan-at-sink, then write the updated body back. The body is already + in a temp file (`/tmp/gstack-pr-body-$$.md`); scan THAT file before editing so + the bytes scanned are the bytes sent: + +```bash +REDACT_VIS=$(~/.claude/skills/gstack/bin/gstack-config get redact_repo_visibility 2>/dev/null) +[ -z "$REDACT_VIS" ] && REDACT_VIS=$(gh repo view --json visibility -q .visibility 2>/dev/null | tr 'A-Z' 'a-z') +~/.claude/skills/gstack/bin/gstack-redact --from-file /tmp/gstack-pr-body-$$.md --repo-visibility "${REDACT_VIS:-unknown}" --json +# exit 3 (HIGH) → do NOT edit, rotate+redact; exit 2 (MEDIUM) → confirm per finding. +``` **If GitHub:** ```bash diff --git a/ship/SKILL.md b/ship/SKILL.md index 9611072f7..62a5007c6 100644 --- a/ship/SKILL.md +++ b/ship/SKILL.md @@ -2918,7 +2918,7 @@ gh pr view --json url,number,state -q 'if .state == "OPEN" then "PR #\(.number): glab mr view -F json 2>/dev/null | jq -r 'if .state == "opened" then "MR_EXISTS" else "NO_MR" end' 2>/dev/null || echo "NO_MR" ``` -If an **open** PR/MR already exists: **update** the PR body using `gh pr edit --body "..."` (GitHub) or `glab mr update -d "..."` (GitLab). Always regenerate the PR body from scratch using this run's fresh results (test output, coverage audit, review findings, adversarial review, TODOS summary, documentation_section from Step 18). Never reuse stale PR body content from a prior run. +If an **open** PR/MR already exists: **update** the PR body using `gh pr edit --body-file "$PR_BODY_FILE"` (GitHub) or `glab mr update -d ...` (GitLab). Always regenerate the PR body from scratch using this run's fresh results (test output, coverage audit, review findings, adversarial review, TODOS summary, documentation_section from Step 18). Never reuse stale PR body content from a prior run. **Run the same redaction scan-at-sink (PR body + title) as the create path (Step 19) before editing — scan the temp file, then `gh pr edit --body-file` from it.** **Always update the PR title to start with `v$NEW_VERSION`.** PR titles use the workspace-aware format `v : ` — version ALWAYS first, no exceptions, no "custom title kept intentionally" escape hatch. The shared helper `bin/gstack-pr-title-rewrite.sh` is the single source of truth for the rule. @@ -3027,15 +3027,42 @@ you missed it.> 🤖 Generated with [Claude Code](https://claude.com/claude-code) ``` -**If GitHub:** +#### Redaction scan (PR body + title) — runs before create AND edit + +The PR body is world-readable on a public repo. Scan-at-sink before sending: +write the composed body to a temp file, scan THAT file with the shared engine, +and pass the same file to `gh`/`glab`. Wrap any Codex / Greptile / eval output +sections in tool-attributed fences (` ```codex-review ` / ` ```greptile `) so the +engine WARN-degrades the example credentials those tools quote instead of blocking +the PR (a live-format credential inside the fence still blocks). + +```bash +REDACT_VIS=$(~/.claude/skills/gstack/bin/gstack-config get redact_repo_visibility 2>/dev/null) +[ -z "$REDACT_VIS" ] && REDACT_VIS=$(gh repo view --json visibility -q .visibility 2>/dev/null | tr 'A-Z' 'a-z') +REDACT_VIS="${REDACT_VIS:-unknown}" +PR_BODY_FILE=$(mktemp) +cat > "$PR_BODY_FILE" <<'PR_BODY_EOF' + +PR_BODY_EOF +~/.claude/skills/gstack/bin/gstack-redact --from-file "$PR_BODY_FILE" --repo-visibility "$REDACT_VIS" --self-email "$(git config user.email 2>/dev/null)" --json +case $? in + 3) echo "BLOCKED — credential in PR body. Rotate + redact, do not create the PR."; exit 1 ;; + 2) echo "MEDIUM findings — confirm per finding (sterner on public) before proceeding." ;; +esac +# Also scan the title (short, single-line): +printf '%s' "v$NEW_VERSION : " | ~/.claude/skills/gstack/bin/gstack-redact --repo-visibility "$REDACT_VIS" --json +``` + +HIGH blocks (exit 3, no skip). MEDIUM → AskUserQuestion (PII subset offers +`--auto-redact`). Same scan runs before the `gh pr edit --body` path (Step 17). + +**If GitHub:** create from the SCANNED file (exact bytes scanned = bytes sent): ```bash # PR title MUST start with v$NEW_VERSION — enforced on every run, no exceptions. # (See Step 19 idempotency block + bin/gstack-pr-title-rewrite.sh for the rule.) -gh pr create --base --title "v$NEW_VERSION : " --body "$(cat <<'EOF' - -EOF -)" +gh pr create --base --title "v$NEW_VERSION : " --body-file "$PR_BODY_FILE" +rm -f "$PR_BODY_FILE" ``` **If GitLab:** diff --git a/ship/SKILL.md.tmpl b/ship/SKILL.md.tmpl index 304bd6a1d..ea6d44ee0 100644 --- a/ship/SKILL.md.tmpl +++ b/ship/SKILL.md.tmpl @@ -811,7 +811,7 @@ gh pr view --json url,number,state -q 'if .state == "OPEN" then "PR #\(.number): glab mr view -F json 2>/dev/null | jq -r 'if .state == "opened" then "MR_EXISTS" else "NO_MR" end' 2>/dev/null || echo "NO_MR" ``` -If an **open** PR/MR already exists: **update** the PR body using `gh pr edit --body "..."` (GitHub) or `glab mr update -d "..."` (GitLab). Always regenerate the PR body from scratch using this run's fresh results (test output, coverage audit, review findings, adversarial review, TODOS summary, documentation_section from Step 18). Never reuse stale PR body content from a prior run. +If an **open** PR/MR already exists: **update** the PR body using `gh pr edit --body-file "$PR_BODY_FILE"` (GitHub) or `glab mr update -d ...` (GitLab). Always regenerate the PR body from scratch using this run's fresh results (test output, coverage audit, review findings, adversarial review, TODOS summary, documentation_section from Step 18). Never reuse stale PR body content from a prior run. **Run the same redaction scan-at-sink (PR body + title) as the create path (Step 19) before editing — scan the temp file, then `gh pr edit --body-file` from it.** **Always update the PR title to start with `v$NEW_VERSION`.** PR titles use the workspace-aware format `v : ` — version ALWAYS first, no exceptions, no "custom title kept intentionally" escape hatch. The shared helper `bin/gstack-pr-title-rewrite.sh` is the single source of truth for the rule. @@ -920,15 +920,42 @@ you missed it.> 🤖 Generated with [Claude Code](https://claude.com/claude-code) ``` -**If GitHub:** +#### Redaction scan (PR body + title) — runs before create AND edit + +The PR body is world-readable on a public repo. Scan-at-sink before sending: +write the composed body to a temp file, scan THAT file with the shared engine, +and pass the same file to `gh`/`glab`. Wrap any Codex / Greptile / eval output +sections in tool-attributed fences (` ```codex-review ` / ` ```greptile `) so the +engine WARN-degrades the example credentials those tools quote instead of blocking +the PR (a live-format credential inside the fence still blocks). + +```bash +REDACT_VIS=$(~/.claude/skills/gstack/bin/gstack-config get redact_repo_visibility 2>/dev/null) +[ -z "$REDACT_VIS" ] && REDACT_VIS=$(gh repo view --json visibility -q .visibility 2>/dev/null | tr 'A-Z' 'a-z') +REDACT_VIS="${REDACT_VIS:-unknown}" +PR_BODY_FILE=$(mktemp) +cat > "$PR_BODY_FILE" <<'PR_BODY_EOF' + +PR_BODY_EOF +~/.claude/skills/gstack/bin/gstack-redact --from-file "$PR_BODY_FILE" --repo-visibility "$REDACT_VIS" --self-email "$(git config user.email 2>/dev/null)" --json +case $? in + 3) echo "BLOCKED — credential in PR body. Rotate + redact, do not create the PR."; exit 1 ;; + 2) echo "MEDIUM findings — confirm per finding (sterner on public) before proceeding." ;; +esac +# Also scan the title (short, single-line): +printf '%s' "v$NEW_VERSION : " | ~/.claude/skills/gstack/bin/gstack-redact --repo-visibility "$REDACT_VIS" --json +``` + +HIGH blocks (exit 3, no skip). MEDIUM → AskUserQuestion (PII subset offers +`--auto-redact`). Same scan runs before the `gh pr edit --body` path (Step 17). + +**If GitHub:** create from the SCANNED file (exact bytes scanned = bytes sent): ```bash # PR title MUST start with v$NEW_VERSION — enforced on every run, no exceptions. # (See Step 19 idempotency block + bin/gstack-pr-title-rewrite.sh for the rule.) -gh pr create --base --title "v$NEW_VERSION : " --body "$(cat <<'EOF' - -EOF -)" +gh pr create --base --title "v$NEW_VERSION : " --body-file "$PR_BODY_FILE" +rm -f "$PR_BODY_FILE" ``` **If GitLab:** diff --git a/test/document-skills-redaction.test.ts b/test/document-skills-redaction.test.ts new file mode 100644 index 000000000..235d7895b --- /dev/null +++ b/test/document-skills-redaction.test.ts @@ -0,0 +1,37 @@ +/** + * /document-release + /document-generate redaction wiring (T6/T7). + */ +import { describe, test, expect } from "bun:test"; +import * as fs from "fs"; +import * as path from "path"; + +const ROOT = path.resolve(import.meta.dir, ".."); +const RELEASE = fs.readFileSync(path.join(ROOT, "document-release", "SKILL.md.tmpl"), "utf-8"); +const GENERATE = fs.readFileSync(path.join(ROOT, "document-generate", "SKILL.md.tmpl"), "utf-8"); + +describe("/document-release redaction", () => { + test("scans the PR-body temp file before gh pr edit", () => { + const scanIdx = RELEASE.indexOf("gstack-redact --from-file /tmp/gstack-pr-body"); + const editIdx = RELEASE.indexOf("gh pr edit --body-file /tmp/gstack-pr-body"); + expect(scanIdx).toBeGreaterThan(-1); + expect(editIdx).toBeGreaterThan(scanIdx); + }); + test("HIGH blocks the edit", () => { + expect(RELEASE).toMatch(/exit 3 \(HIGH\).*do NOT edit/i); + }); +}); + +describe("/document-generate redaction", () => { + test("scans staged doc diff before commit", () => { + const scanIdx = GENERATE.indexOf("gstack-redact --repo-visibility"); + const commitIdx = GENERATE.indexOf("git commit -m"); + expect(scanIdx).toBeGreaterThan(-1); + expect(commitIdx).toBeGreaterThan(scanIdx); + }); + test("scans added lines of the staged diff", () => { + expect(GENERATE).toMatch(/git diff --cached[\s\S]{0,80}gstack-redact/); + }); + test("HIGH blocks the commit", () => { + expect(GENERATE).toMatch(/Do NOT commit/i); + }); +}); diff --git a/test/ship-template-redaction.test.ts b/test/ship-template-redaction.test.ts new file mode 100644 index 000000000..45a681701 --- /dev/null +++ b/test/ship-template-redaction.test.ts @@ -0,0 +1,54 @@ +/** + * /ship redaction wiring (T5/T11). The PR body + title are scanned at-sink before + * create AND edit; tool output goes in attributed fences so example credentials + * WARN-degrade instead of blocking; create/edit file from the scanned temp file. + */ +import { describe, test, expect } from "bun:test"; +import * as fs from "fs"; +import * as path from "path"; +import { scan } from "../lib/redact-engine"; + +const ROOT = path.resolve(import.meta.dir, ".."); +const TMPL = fs.readFileSync(path.join(ROOT, "ship", "SKILL.md.tmpl"), "utf-8"); + +describe("/ship redaction wiring", () => { + test("scans the PR body via the shared bin before create", () => { + expect(TMPL).toContain("gstack-redact --from-file"); + expect(TMPL).toMatch(/Redaction scan \(PR body \+ title\)/); + }); + test("creates from the scanned temp file (exact bytes)", () => { + expect(TMPL).toMatch(/gh pr create[\s\S]{0,120}--body-file "\$PR_BODY_FILE"/); + }); + test("edit path also scans before sending", () => { + expect(TMPL).toMatch(/gh pr edit --body-file "\$PR_BODY_FILE"/); + expect(TMPL).toMatch(/same redaction scan-at-sink.*before editing/i); + }); + test("HIGH blocks the PR (exit 3), no skip", () => { + expect(TMPL).toMatch(/BLOCKED — credential in PR body/); + }); + test("instructs wrapping tool output in attributed fences (TENSION-3)", () => { + expect(TMPL).toMatch(/tool-attributed fences/); + expect(TMPL).toMatch(/codex-review/); + expect(TMPL).toMatch(/greptile/); + }); + test("scans the title too", () => { + expect(TMPL).toMatch(/scan the title/i); + }); +}); + +describe("tool-attributed fence behavior (engine contract /ship relies on)", () => { + test("a doc-example credential inside a tool fence WARN-degrades, does not block", () => { + const body = "## Codex review\n```codex-review\nflagged your_aws_key AKIAIOSFODNN7EXAMPLE\n```"; + const r = scan(body, { repoVisibility: "public" }); + expect(r.counts.HIGH).toBe(0); + }); + test("a live-format credential inside a tool fence STILL blocks", () => { + const body = "```codex-review\nleaked AKIA1234567890ABCDEF\n```"; + const r = scan(body, { repoVisibility: "public" }); + expect(r.counts.HIGH).toBe(1); + }); + test("a credential in plain PR prose (no fence) blocks", () => { + const body = "We hardcoded AKIA1234567890ABCDEF in the config"; + expect(scan(body, { repoVisibility: "public" }).counts.HIGH).toBe(1); + }); +});