mirror of
https://github.com/BigBodyCobain/Shadowbroker.git
synced 2026-05-30 19:09:34 +02:00
41799f9891
* feat(ci): switch mirror-to-github job from PAT to per-repo SSH deploy key GitHub fine-grained PATs are capped at 366 days, classic PATs would need 'public_repo' (broader scope than needed). Per-repo SSH deploy keys are tighter: - Can ONLY push to BigBodyCobain/Shadowbroker (no access to anything else, not even other repos owned by the same account). - Never expire. - Rotating == one-click delete on github.com/.../settings/keys. Changes: - New CI/CD variable GITHUB_MIRROR_SSH_KEY (File, Protected) holding the ed25519 private half. Public half lives on the repo's deploy keys with write access enabled. - mirror-to-github before_script writes the key to ~/.ssh/id_ed25519, pins github.com host fingerprints (ed25519 + ecdsa + rsa from the 2023-03-24 rotation) into ~/.ssh/known_hosts so we never trust a MITM, then pushes via git@github.com:... instead of HTTPS. - Job rule now gates on GITHUB_MIRROR_SSH_KEY (the new var) instead of GITHUB_MIRROR_TOKEN (which never existed). After this lands, every commit pushed directly to GitLab main will mirror back to GitHub main automatically — closing the loop on bi-directional sync. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com> * fix(secret-scan): exempt SSH known_hosts entries from leaked-key detection PR #331 introduced github.com host fingerprints pinned in .gitlab-ci.yml's mirror-to-github before_script. The scanner flagged them as embedded secrets and blocked CI: BLOCKED: Embedded secrets/tokens found in: .gitlab-ci.yml 133: github.com ssh-ed25519 AAAA... 135: github.com ssh-rsa AAAA... These are PUBLIC host keys — the whole point of pinning known_hosts is to publish the fingerprint widely so a MITM is detectable. They are documented at https://docs.github.com/en/authentication/keeping-your-account-and-data-secure/githubs-ssh-key-fingerprints and committing them is the correct, secure practice. Fix: add a KNOWN_HOSTS_LINE regex to the content-scan block that recognizes `<host-or-ip> [salt] <algo> AAAA...` shape lines (the exact format used in ~/.ssh/known_hosts) and filters them out before flagging the file. Bare `ssh-rsa AAAA...` lines without a host prefix are still caught — only the host-key shape is exempt. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.7 <noreply@anthropic.com>
141 lines
6.5 KiB
Bash
141 lines
6.5 KiB
Bash
#!/usr/bin/env bash
|
|
# scan-secrets.sh — Catch keys, secrets, and credentials before they hit git.
|
|
#
|
|
# Usage:
|
|
# ./backend/scripts/scan-secrets.sh # Scan staged files (pre-commit)
|
|
# ./backend/scripts/scan-secrets.sh --all # Scan entire working tree
|
|
# ./backend/scripts/scan-secrets.sh --staged # Scan staged files only (default)
|
|
#
|
|
# Exit code: 0 = clean, 1 = secrets found
|
|
|
|
set -euo pipefail
|
|
|
|
RED='\033[0;31m'
|
|
YELLOW='\033[1;33m'
|
|
GREEN='\033[0;32m'
|
|
NC='\033[0m'
|
|
|
|
MODE="${1:---staged}"
|
|
FOUND=0
|
|
|
|
# ── Get file list based on mode ─────────────────────────────────────────
|
|
if [[ "$MODE" == "--all" ]]; then
|
|
FILELIST=$(mktemp)
|
|
{ git ls-files 2>/dev/null; git ls-files --others --exclude-standard 2>/dev/null; } > "$FILELIST"
|
|
echo -e "${YELLOW}Scanning entire working tree...${NC}"
|
|
else
|
|
FILELIST=$(mktemp)
|
|
git diff --cached --name-only --diff-filter=ACMR 2>/dev/null > "$FILELIST" || true
|
|
if [[ ! -s "$FILELIST" ]]; then
|
|
echo -e "${GREEN}No staged files to scan.${NC}"
|
|
rm -f "$FILELIST"
|
|
exit 0
|
|
fi
|
|
echo -e "${YELLOW}Scanning $(wc -l < "$FILELIST" | tr -d ' ') staged files...${NC}"
|
|
fi
|
|
|
|
# ── Check 1: Dangerous file extensions ──────────────────────────────────
|
|
KEY_EXT='\.key$|\.pem$|\.p12$|\.pfx$|\.jks$|\.keystore$|\.p8$|\.der$'
|
|
SECRET_EXT='\.secret$|\.secrets$|\.credential$|\.credentials$'
|
|
|
|
HITS=$(grep -iE "$KEY_EXT|$SECRET_EXT" "$FILELIST" 2>/dev/null || true)
|
|
if [[ -n "$HITS" ]]; then
|
|
echo -e "\n${RED}BLOCKED: Key/secret files detected:${NC}"
|
|
echo "$HITS" | while read -r f; do echo -e " ${RED}$f${NC}"; done
|
|
FOUND=1
|
|
fi
|
|
|
|
# ── Check 2: Dangerous filenames ────────────────────────────────────────
|
|
RISKY='id_rsa|id_ed25519|id_ecdsa|private_key|private\.key|secret_key|master\.key'
|
|
RISKY+='|serviceaccount|gcloud.*\.json|firebase.*\.json|\.htpasswd'
|
|
|
|
HITS=$(grep -iE "$RISKY" "$FILELIST" 2>/dev/null || true)
|
|
if [[ -n "$HITS" ]]; then
|
|
echo -e "\n${RED}BLOCKED: Risky filenames detected:${NC}"
|
|
echo "$HITS" | while read -r f; do echo -e " ${RED}$f${NC}"; done
|
|
FOUND=1
|
|
fi
|
|
|
|
# ── Check 3: .env files (not .env.example) ──────────────────────────────
|
|
HITS=$(grep -E '(^|/)\.env(\.[^e].*)?$' "$FILELIST" 2>/dev/null | grep -v '\.example' || true)
|
|
if [[ -n "$HITS" ]]; then
|
|
echo -e "\n${RED}BLOCKED: Environment files detected:${NC}"
|
|
echo "$HITS" | while read -r f; do echo -e " ${RED}$f${NC}"; done
|
|
FOUND=1
|
|
fi
|
|
|
|
# ── Check 4: _domain_keys directory (project-specific) ──────────────────
|
|
HITS=$(grep '_domain_keys/' "$FILELIST" 2>/dev/null || true)
|
|
if [[ -n "$HITS" ]]; then
|
|
echo -e "\n${RED}BLOCKED: Domain keys directory detected:${NC}"
|
|
echo "$HITS" | while read -r f; do echo -e " ${RED}$f${NC}"; done
|
|
FOUND=1
|
|
fi
|
|
|
|
# ── Check 5: Content scan for embedded secrets (single grep pass) ───────
|
|
# Build one mega-pattern and run grep once across all files (fast!)
|
|
SECRET_REGEX='PRIVATE KEY-----|'
|
|
SECRET_REGEX+='ssh-rsa AAAA[0-9A-Za-z+/]|'
|
|
SECRET_REGEX+='ssh-ed25519 AAAA[0-9A-Za-z+/]|'
|
|
SECRET_REGEX+='ghp_[0-9a-zA-Z]{36}|' # GitHub PAT
|
|
SECRET_REGEX+='github_pat_[0-9a-zA-Z]{22}_[0-9a-zA-Z]{59}|' # GitHub fine-grained
|
|
SECRET_REGEX+='gho_[0-9a-zA-Z]{36}|' # GitHub OAuth
|
|
SECRET_REGEX+='sk-[0-9a-zA-Z]{48}|' # OpenAI key
|
|
SECRET_REGEX+='sk-ant-[0-9a-zA-Z-]{90,}|' # Anthropic key
|
|
SECRET_REGEX+='AKIA[0-9A-Z]{16}|' # AWS access key
|
|
SECRET_REGEX+='AIzaSy[0-9A-Za-z_-]{33}|' # Google API key
|
|
SECRET_REGEX+='xox[bpoas]-[0-9a-zA-Z-]+|' # Slack token
|
|
SECRET_REGEX+='npm_[0-9a-zA-Z]{36}|' # npm token
|
|
SECRET_REGEX+='pypi-[0-9a-zA-Z-]{50,}' # PyPI token
|
|
|
|
# Filter to text-like files only (skip binaries by extension + skip this script)
|
|
TEXT_FILES=$(grep -ivE '\.(png|jpg|jpeg|gif|ico|svg|woff2?|ttf|eot|pbf|zip|tar|gz|db|sqlite|xlsx|pdf|mp[34]|wav|ogg|webm|webp|avif)$' "$FILELIST" | grep -v 'scan-secrets\.sh$' || true)
|
|
|
|
if [[ -n "$TEXT_FILES" ]]; then
|
|
# Known-public exclusions: lines matching `<host-or-ip> ssh-<algo> <key>`
|
|
# are SSH known_hosts entries — the host's PUBLIC fingerprint, which is
|
|
# by definition safe to commit (the whole point of pinning known_hosts
|
|
# is to publish the fingerprint widely so MITM is detectable). Filter
|
|
# these out before flagging the file.
|
|
KNOWN_HOSTS_LINE='^[[:space:]]*[a-zA-Z0-9._:,*-]+([[:space:]]+[a-zA-Z0-9._:,*-]+)?[[:space:]]+(ssh-rsa|ssh-ed25519|ssh-dss|ecdsa-sha2-nistp256|ecdsa-sha2-nistp384|ecdsa-sha2-nistp521)[[:space:]]+AAAA'
|
|
|
|
# Use grep with file list, skip missing/binary, limit output
|
|
CONTENT_HITS=$(echo "$TEXT_FILES" | xargs grep -lE "$SECRET_REGEX" 2>/dev/null || true)
|
|
if [[ -n "$CONTENT_HITS" ]]; then
|
|
REAL_HITS=""
|
|
REAL_REPORT=""
|
|
while IFS= read -r f; do
|
|
[[ -z "$f" ]] && continue
|
|
# Re-grep this file, but filter out known_hosts-style lines.
|
|
FILE_HITS=$(grep -nE "$SECRET_REGEX" "$f" 2>/dev/null | grep -vE "$KNOWN_HOSTS_LINE" || true)
|
|
if [[ -n "$FILE_HITS" ]]; then
|
|
REAL_HITS+="$f"$'\n'
|
|
REAL_REPORT+=" ${RED}$f${NC}"$'\n'
|
|
# Show first 2 matching lines for context
|
|
while IFS= read -r line; do
|
|
[[ -z "$line" ]] && continue
|
|
REAL_REPORT+=" ${YELLOW}$line${NC}"$'\n'
|
|
done < <(echo "$FILE_HITS" | head -2)
|
|
fi
|
|
done <<< "$CONTENT_HITS"
|
|
if [[ -n "$REAL_HITS" ]]; then
|
|
echo -e "\n${RED}BLOCKED: Embedded secrets/tokens found in:${NC}"
|
|
echo -en "$REAL_REPORT"
|
|
FOUND=1
|
|
fi
|
|
fi
|
|
fi
|
|
|
|
rm -f "$FILELIST"
|
|
|
|
# ── Result ──────────────────────────────────────────────────────────────
|
|
echo ""
|
|
if [[ $FOUND -eq 1 ]]; then
|
|
echo -e "${RED}Secret scan FAILED. Add these to .gitignore or remove them before committing.${NC}"
|
|
echo -e "${YELLOW}If intentional (e.g. test fixtures): git commit --no-verify${NC}"
|
|
exit 1
|
|
else
|
|
echo -e "${GREEN}Secret scan passed. No keys or secrets detected.${NC}"
|
|
exit 0
|
|
fi
|