From c8fdfbdceb9fb48b7318d3f2522a1300121d5af8 Mon Sep 17 00:00:00 2001 From: BigBodyCobain <43977454+BigBodyCobain@users.noreply.github.com> Date: Mon, 25 May 2026 03:16:01 -0600 Subject: [PATCH] fix(secret-scan): exempt SSH known_hosts entries from leaked-key detection MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PR #331 introduced github.com host fingerprints pinned in .gitlab-ci.yml's mirror-to-github before_script. The scanner flagged them as embedded secrets and blocked CI: BLOCKED: Embedded secrets/tokens found in: .gitlab-ci.yml 133: github.com ssh-ed25519 AAAA... 135: github.com ssh-rsa AAAA... These are PUBLIC host keys — the whole point of pinning known_hosts is to publish the fingerprint widely so a MITM is detectable. They are documented at https://docs.github.com/en/authentication/keeping-your-account-and-data-secure/githubs-ssh-key-fingerprints and committing them is the correct, secure practice. Fix: add a KNOWN_HOSTS_LINE regex to the content-scan block that recognizes ` [salt] AAAA...` shape lines (the exact format used in ~/.ssh/known_hosts) and filters them out before flagging the file. Bare `ssh-rsa AAAA...` lines without a host prefix are still caught — only the host-key shape is exempt. Co-Authored-By: Claude Opus 4.7 --- backend/scripts/scan-secrets.sh | 37 +++++++++++++++++++++++++-------- 1 file changed, 28 insertions(+), 9 deletions(-) diff --git a/backend/scripts/scan-secrets.sh b/backend/scripts/scan-secrets.sh index dfd8d38..50910d1 100644 --- a/backend/scripts/scan-secrets.sh +++ b/backend/scripts/scan-secrets.sh @@ -92,18 +92,37 @@ SECRET_REGEX+='pypi-[0-9a-zA-Z-]{50,}' # PyPI token TEXT_FILES=$(grep -ivE '\.(png|jpg|jpeg|gif|ico|svg|woff2?|ttf|eot|pbf|zip|tar|gz|db|sqlite|xlsx|pdf|mp[34]|wav|ogg|webm|webp|avif)$' "$FILELIST" | grep -v 'scan-secrets\.sh$' || true) if [[ -n "$TEXT_FILES" ]]; then + # Known-public exclusions: lines matching ` ssh- ` + # are SSH known_hosts entries — the host's PUBLIC fingerprint, which is + # by definition safe to commit (the whole point of pinning known_hosts + # is to publish the fingerprint widely so MITM is detectable). Filter + # these out before flagging the file. + KNOWN_HOSTS_LINE='^[[:space:]]*[a-zA-Z0-9._:,*-]+([[:space:]]+[a-zA-Z0-9._:,*-]+)?[[:space:]]+(ssh-rsa|ssh-ed25519|ssh-dss|ecdsa-sha2-nistp256|ecdsa-sha2-nistp384|ecdsa-sha2-nistp521)[[:space:]]+AAAA' + # Use grep with file list, skip missing/binary, limit output CONTENT_HITS=$(echo "$TEXT_FILES" | xargs grep -lE "$SECRET_REGEX" 2>/dev/null || true) if [[ -n "$CONTENT_HITS" ]]; then - echo -e "\n${RED}BLOCKED: Embedded secrets/tokens found in:${NC}" - echo "$CONTENT_HITS" | while read -r f; do - echo -e " ${RED}$f${NC}" - # Show first matching line for context - grep -nE "$SECRET_REGEX" "$f" 2>/dev/null | head -2 | while read -r line; do - echo -e " ${YELLOW}$line${NC}" - done - done - FOUND=1 + REAL_HITS="" + REAL_REPORT="" + while IFS= read -r f; do + [[ -z "$f" ]] && continue + # Re-grep this file, but filter out known_hosts-style lines. + FILE_HITS=$(grep -nE "$SECRET_REGEX" "$f" 2>/dev/null | grep -vE "$KNOWN_HOSTS_LINE" || true) + if [[ -n "$FILE_HITS" ]]; then + REAL_HITS+="$f"$'\n' + REAL_REPORT+=" ${RED}$f${NC}"$'\n' + # Show first 2 matching lines for context + while IFS= read -r line; do + [[ -z "$line" ]] && continue + REAL_REPORT+=" ${YELLOW}$line${NC}"$'\n' + done < <(echo "$FILE_HITS" | head -2) + fi + done <<< "$CONTENT_HITS" + if [[ -n "$REAL_HITS" ]]; then + echo -e "\n${RED}BLOCKED: Embedded secrets/tokens found in:${NC}" + echo -en "$REAL_REPORT" + FOUND=1 + fi fi fi