mirror of
https://github.com/FuzzingLabs/fuzzforge_ai.git
synced 2026-02-12 19:12:49 +00:00
test: Add secret detection benchmark dataset and ground truth
Add comprehensive benchmark dataset with 32 documented secrets for testing secret detection workflows (gitleaks, trufflehog, llm_secret_detection). - Add test_projects/secret_detection_benchmark/ with 19 test files - Add ground truth JSON with precise line-by-line secret mappings - Update .gitignore with exceptions for benchmark files (not real secrets) Dataset breakdown: - 12 Easy secrets (standard patterns) - 10 Medium secrets (obfuscated) - 10 Hard secrets (well hidden)
This commit is contained in:
6
.gitignore
vendored
6
.gitignore
vendored
@@ -233,6 +233,12 @@ yarn-error.log*
|
||||
*.key
|
||||
*.p12
|
||||
*.pfx
|
||||
|
||||
# Exception: Secret detection benchmark test files (not real secrets)
|
||||
!test_projects/secret_detection_benchmark/
|
||||
!test_projects/secret_detection_benchmark/**
|
||||
!**/secret_detection_benchmark_GROUND_TRUTH.json
|
||||
|
||||
secret*
|
||||
secrets/
|
||||
credentials*
|
||||
|
||||
@@ -0,0 +1,344 @@
|
||||
{
|
||||
"description": "Ground truth dataset for secret detection benchmarking - Exactly 32 secrets",
|
||||
"version": "1.1.0",
|
||||
"total_secrets": 32,
|
||||
"secrets_by_difficulty": {
|
||||
"easy": 12,
|
||||
"medium": 10,
|
||||
"hard": 10
|
||||
},
|
||||
"secrets": [
|
||||
{
|
||||
"id": 1,
|
||||
"file": ".env",
|
||||
"line": 3,
|
||||
"difficulty": "easy",
|
||||
"type": "aws_access_key",
|
||||
"value": "AKIAIOSFODNN7EXAMPLE",
|
||||
"severity": "critical"
|
||||
},
|
||||
{
|
||||
"id": 2,
|
||||
"file": ".env",
|
||||
"line": 4,
|
||||
"difficulty": "easy",
|
||||
"type": "aws_secret_access_key",
|
||||
"value": "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY",
|
||||
"severity": "critical"
|
||||
},
|
||||
{
|
||||
"id": 3,
|
||||
"file": "config/settings.py",
|
||||
"line": 6,
|
||||
"difficulty": "easy",
|
||||
"type": "github_pat",
|
||||
"value": "ghp_vR8jK2mN4pQ6tX9bC3wY7zA1eF5hI8kL",
|
||||
"severity": "critical"
|
||||
},
|
||||
{
|
||||
"id": 4,
|
||||
"file": "config/settings.py",
|
||||
"line": 9,
|
||||
"difficulty": "easy",
|
||||
"type": "stripe_api_key",
|
||||
"value": "sk_live_51MabcdefghijklmnopqrstuvwxyzABCDEF123456789",
|
||||
"severity": "critical"
|
||||
},
|
||||
{
|
||||
"id": 5,
|
||||
"file": "config/settings.py",
|
||||
"line": 17,
|
||||
"difficulty": "easy",
|
||||
"type": "database_password",
|
||||
"value": "ProdDB_P@ssw0rd_2024_Secure!",
|
||||
"severity": "critical"
|
||||
},
|
||||
{
|
||||
"id": 6,
|
||||
"file": "src/app.py",
|
||||
"line": 6,
|
||||
"difficulty": "easy",
|
||||
"type": "jwt_secret",
|
||||
"value": "my-super-secret-jwt-key-do-not-share-2024",
|
||||
"severity": "critical"
|
||||
},
|
||||
{
|
||||
"id": 7,
|
||||
"file": "config/database.yaml",
|
||||
"line": 7,
|
||||
"difficulty": "easy",
|
||||
"type": "azure_storage_key",
|
||||
"value": "DefaultEndpointsProtocol=https;AccountName=prodstore;AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;EndpointSuffix=core.windows.net",
|
||||
"severity": "critical"
|
||||
},
|
||||
{
|
||||
"id": 8,
|
||||
"file": "scripts/webhook.js",
|
||||
"line": 4,
|
||||
"difficulty": "easy",
|
||||
"type": "slack_webhook",
|
||||
"value": "https://hooks.slack.com/services/T00000000/B00000000/XXXXXXXXXXXXXXXXXXXX",
|
||||
"severity": "high"
|
||||
},
|
||||
{
|
||||
"id": 9,
|
||||
"file": "config/app.properties",
|
||||
"line": 6,
|
||||
"difficulty": "easy",
|
||||
"type": "api_key",
|
||||
"value": "sk_test_4eC39HqLyjWDarjtT1zdp7dc",
|
||||
"severity": "high"
|
||||
},
|
||||
{
|
||||
"id": 10,
|
||||
"file": "id_rsa",
|
||||
"line": 1,
|
||||
"difficulty": "easy",
|
||||
"type": "ssh_private_key",
|
||||
"value": "-----BEGIN OPENSSH PRIVATE KEY-----",
|
||||
"severity": "critical"
|
||||
},
|
||||
{
|
||||
"id": 11,
|
||||
"file": "config/oauth.json",
|
||||
"line": 4,
|
||||
"difficulty": "easy",
|
||||
"type": "oauth_client_secret",
|
||||
"value": "GOCSPX-Ab12Cd34Ef56Gh78Ij90Kl12",
|
||||
"severity": "critical"
|
||||
},
|
||||
{
|
||||
"id": 12,
|
||||
"file": "src/Main.java",
|
||||
"line": 5,
|
||||
"difficulty": "easy",
|
||||
"type": "google_oauth_secret",
|
||||
"value": "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",
|
||||
"severity": "critical"
|
||||
},
|
||||
{
|
||||
"id": 13,
|
||||
"file": "src/config.py",
|
||||
"line": 7,
|
||||
"difficulty": "medium",
|
||||
"type": "aws_access_key_base64",
|
||||
"value": "QUtJQUlPU0ZPRE5ON0VYQU1QTEU=",
|
||||
"decoded": "AKIAIOSFODNN7EXAMPLE",
|
||||
"severity": "critical"
|
||||
},
|
||||
{
|
||||
"id": 14,
|
||||
"file": "src/config.py",
|
||||
"line": 10,
|
||||
"difficulty": "medium",
|
||||
"type": "api_token_hex",
|
||||
"value": "6170695f746f6b656e5f616263313233787977373839",
|
||||
"decoded": "api_token_abc123xyz789",
|
||||
"severity": "high"
|
||||
},
|
||||
{
|
||||
"id": 15,
|
||||
"file": "src/config.py",
|
||||
"line": 16,
|
||||
"difficulty": "medium",
|
||||
"type": "database_password_concatenated",
|
||||
"value": "MySecurePassword2024!",
|
||||
"note": "Built from DB_PASS_PART1 + DB_PASS_PART2 + DB_PASS_PART3",
|
||||
"severity": "critical"
|
||||
},
|
||||
{
|
||||
"id": 16,
|
||||
"file": "scripts/deploy.sh",
|
||||
"line": 5,
|
||||
"difficulty": "medium",
|
||||
"type": "api_key_export",
|
||||
"value": "sk_prod_1234567890abcdefghijklmnopqrstuvwxyz",
|
||||
"severity": "critical"
|
||||
},
|
||||
{
|
||||
"id": 17,
|
||||
"file": "scripts/deploy.sh",
|
||||
"line": 11,
|
||||
"difficulty": "medium",
|
||||
"type": "database_password_url_encoded",
|
||||
"value": "mysql://admin:MyP%40ssw0rd%21@db.example.com:3306/prod",
|
||||
"decoded": "mysql://admin:MyP@ssw0rd!@db.example.com:3306/prod",
|
||||
"note": "In comment",
|
||||
"severity": "critical"
|
||||
},
|
||||
{
|
||||
"id": 18,
|
||||
"file": "config/keys.yaml",
|
||||
"line": 6,
|
||||
"difficulty": "medium",
|
||||
"type": "rsa_private_key_multiline",
|
||||
"value": "-----BEGIN RSA PRIVATE KEY-----",
|
||||
"note": "Multi-line YAML literal block",
|
||||
"severity": "critical"
|
||||
},
|
||||
{
|
||||
"id": 19,
|
||||
"file": "config/keys.yaml",
|
||||
"line": 11,
|
||||
"difficulty": "medium",
|
||||
"type": "api_token_unicode",
|
||||
"value": "tøkęn_śęçrėt_ẃïth_ŭñïçődė_123456",
|
||||
"severity": "high"
|
||||
},
|
||||
{
|
||||
"id": 20,
|
||||
"file": "src/database.sql",
|
||||
"line": 6,
|
||||
"difficulty": "medium",
|
||||
"type": "database_connection_string",
|
||||
"value": "postgresql://admin:Pr0dDB_S3cr3t_P@ss@db.prod.example.com:5432/prod_db",
|
||||
"note": "In SQL comment",
|
||||
"severity": "critical"
|
||||
},
|
||||
{
|
||||
"id": 21,
|
||||
"file": "config/legacy.ini",
|
||||
"line": 3,
|
||||
"difficulty": "medium",
|
||||
"type": "database_password",
|
||||
"value": "L3g@cy_DB_P@ssw0rd_2023",
|
||||
"severity": "critical"
|
||||
},
|
||||
{
|
||||
"id": 22,
|
||||
"file": "config/legacy.ini",
|
||||
"line": 7,
|
||||
"difficulty": "medium",
|
||||
"type": "api_key_commented",
|
||||
"value": "backup_key_xyz789abc123def456ghi",
|
||||
"note": "Commented backup key",
|
||||
"severity": "high"
|
||||
},
|
||||
{
|
||||
"id": 23,
|
||||
"file": "src/obfuscated.py",
|
||||
"line": 7,
|
||||
"difficulty": "hard",
|
||||
"type": "stripe_key_rot13",
|
||||
"value": "fx_yvir_frperg_xrl_12345",
|
||||
"decoded": "sk_live_secret_key_12345",
|
||||
"severity": "critical"
|
||||
},
|
||||
{
|
||||
"id": 24,
|
||||
"file": "src/obfuscated.py",
|
||||
"line": 10,
|
||||
"difficulty": "hard",
|
||||
"type": "github_token_binary",
|
||||
"value": "b'\\x67\\x68\\x70\\x5f\\x4d\\x79\\x47\\x69\\x74\\x48\\x75\\x62\\x54\\x6f\\x6b\\x65\\x6e\\x31\\x32\\x33\\x34\\x35\\x36'",
|
||||
"decoded": "ghp_MyGitHubToken123456",
|
||||
"severity": "critical"
|
||||
},
|
||||
{
|
||||
"id": 25,
|
||||
"file": "src/obfuscated.py",
|
||||
"line": 13,
|
||||
"difficulty": "hard",
|
||||
"type": "aws_secret_char_array",
|
||||
"value": "['A','W','S','_','S','E','C','R','E','T','_','K','E','Y','_','X','Y','Z','7','8','9']",
|
||||
"decoded": "AWS_SECRET_KEY_XYZ789",
|
||||
"severity": "critical"
|
||||
},
|
||||
{
|
||||
"id": 26,
|
||||
"file": "src/obfuscated.py",
|
||||
"line": 17,
|
||||
"difficulty": "hard",
|
||||
"type": "api_token_reversed",
|
||||
"value": "321cba_desrever_nekot_ipa",
|
||||
"decoded": "api_token_reversed_abc123",
|
||||
"severity": "high"
|
||||
},
|
||||
{
|
||||
"id": 27,
|
||||
"file": "src/advanced.js",
|
||||
"line": 4,
|
||||
"difficulty": "hard",
|
||||
"type": "secret_template_string",
|
||||
"value": "sk_prod_template_key_xyz",
|
||||
"note": "Built from template literals",
|
||||
"severity": "critical"
|
||||
},
|
||||
{
|
||||
"id": 28,
|
||||
"file": "src/advanced.js",
|
||||
"line": 7,
|
||||
"difficulty": "hard",
|
||||
"type": "password_in_regex",
|
||||
"value": "password_regex_secret_789",
|
||||
"note": "Inside regex pattern",
|
||||
"severity": "medium"
|
||||
},
|
||||
{
|
||||
"id": 29,
|
||||
"file": "src/advanced.js",
|
||||
"line": 10,
|
||||
"difficulty": "hard",
|
||||
"type": "api_key_xor",
|
||||
"value": "[65,82,90,75,94,91,92,75,93,67,65,90,67,92,75,91,67,95]",
|
||||
"decoded": "api_xor_secret_key",
|
||||
"note": "XOR encrypted with key 42",
|
||||
"severity": "critical"
|
||||
},
|
||||
{
|
||||
"id": 30,
|
||||
"file": "src/advanced.js",
|
||||
"line": 17,
|
||||
"difficulty": "hard",
|
||||
"type": "api_key_escaped_json",
|
||||
"value": "sk_escaped_json_key_456",
|
||||
"note": "Escaped JSON within string",
|
||||
"severity": "high"
|
||||
},
|
||||
{
|
||||
"id": 31,
|
||||
"file": "src/Crypto.go",
|
||||
"line": 10,
|
||||
"difficulty": "hard",
|
||||
"type": "secret_in_heredoc",
|
||||
"value": "golang_heredoc_secret_999",
|
||||
"note": "In heredoc/multi-line string",
|
||||
"severity": "high"
|
||||
},
|
||||
{
|
||||
"id": 32,
|
||||
"file": "src/Crypto.go",
|
||||
"line": 15,
|
||||
"difficulty": "hard",
|
||||
"type": "stripe_key_typo",
|
||||
"value": "strippe_sk_live_corrected_key",
|
||||
"decoded": "stripe_sk_live_corrected_key",
|
||||
"note": "Intentional typo corrected programmatically",
|
||||
"severity": "critical"
|
||||
}
|
||||
],
|
||||
"file_summary": {
|
||||
".env": 2,
|
||||
"config/settings.py": 3,
|
||||
"src/app.py": 1,
|
||||
"config/database.yaml": 1,
|
||||
"scripts/webhook.js": 1,
|
||||
"config/app.properties": 1,
|
||||
"id_rsa": 1,
|
||||
"config/oauth.json": 1,
|
||||
"src/Main.java": 1,
|
||||
"src/config.py": 3,
|
||||
"scripts/deploy.sh": 2,
|
||||
"config/keys.yaml": 2,
|
||||
"src/database.sql": 1,
|
||||
"config/legacy.ini": 2,
|
||||
"src/obfuscated.py": 4,
|
||||
"src/advanced.js": 4,
|
||||
"src/Crypto.go": 2
|
||||
},
|
||||
"notes": {
|
||||
"easy_secrets": "Standard patterns that any decent secret scanner should detect",
|
||||
"medium_secrets": "Slightly obfuscated - base64, hex, concatenated, or in comments",
|
||||
"hard_secrets": "Well hidden - ROT13, binary, XOR, reversed, split across constructs"
|
||||
}
|
||||
}
|
||||
7
test_projects/secret_detection_benchmark/.env
Normal file
7
test_projects/secret_detection_benchmark/.env
Normal file
@@ -0,0 +1,7 @@
|
||||
# Environment configuration
|
||||
# EASY SECRET #1: Plain AWS access key
|
||||
AWS_ACCESS_KEY_ID=AKIAIOSFODNN7EXAMPLE
|
||||
AWS_SECRET_ACCESS_KEY=wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY
|
||||
|
||||
DATABASE_HOST=localhost
|
||||
DATABASE_PORT=5432
|
||||
BIN
test_projects/secret_detection_benchmark/.fuzzforge/findings.db
Normal file
BIN
test_projects/secret_detection_benchmark/.fuzzforge/findings.db
Normal file
Binary file not shown.
99
test_projects/secret_detection_benchmark/README.md
Normal file
99
test_projects/secret_detection_benchmark/README.md
Normal file
@@ -0,0 +1,99 @@
|
||||
# Secret Detection Benchmark Dataset
|
||||
|
||||
Ground truth dataset with **exactly 32 known secrets** for testing secret detection tools.
|
||||
|
||||
## Contents
|
||||
|
||||
- **12 Easy Secrets**: Standard patterns (AWS keys, GitHub PATs, Stripe keys, etc.)
|
||||
- **10 Medium Secrets**: Slightly obfuscated (Base64, hex, concatenated, in comments)
|
||||
- **10 Hard Secrets**: Well hidden (ROT13, binary, XOR, reversed, template strings)
|
||||
|
||||
## Files
|
||||
|
||||
```
|
||||
├── .env # 2 secrets
|
||||
├── config/
|
||||
│ ├── settings.py # 3 secrets
|
||||
│ ├── database.yaml # 1 secret
|
||||
│ ├── app.properties # 1 secret
|
||||
│ ├── oauth.json # 1 secret
|
||||
│ ├── keys.yaml # 2 secrets
|
||||
│ └── legacy.ini # 2 secrets
|
||||
├── src/
|
||||
│ ├── app.py # 1 secret
|
||||
│ ├── Main.java # 1 secret
|
||||
│ ├── config.py # 3 secrets (medium difficulty)
|
||||
│ ├── obfuscated.py # 4 secrets (hard difficulty)
|
||||
│ ├── advanced.js # 4 secrets (hard difficulty)
|
||||
│ ├── Crypto.go # 2 secrets (hard difficulty)
|
||||
│ └── database.sql # 1 secret
|
||||
├── scripts/
|
||||
│ ├── webhook.js # 1 secret
|
||||
│ └── deploy.sh # 2 secrets
|
||||
└── id_rsa # 1 secret
|
||||
|
||||
Total: 17 files with 32 secrets
|
||||
```
|
||||
|
||||
## Secret Difficulty Breakdown
|
||||
|
||||
### Easy (12 secrets)
|
||||
Should be detected by any decent secret scanner:
|
||||
- Plain AWS access keys
|
||||
- GitHub Personal Access Tokens
|
||||
- Stripe API keys
|
||||
- Database passwords in plain text
|
||||
- JWT secrets
|
||||
- SSH private keys
|
||||
- OAuth secrets
|
||||
- Slack webhooks
|
||||
|
||||
### Medium (10 secrets)
|
||||
Requires some parsing or contextual understanding:
|
||||
- Base64 encoded AWS key
|
||||
- Hex-encoded tokens
|
||||
- Split strings concatenated at runtime
|
||||
- URL-encoded passwords
|
||||
- Multi-line private keys in YAML
|
||||
- Secrets with Unicode characters
|
||||
- Secrets in SQL/shell comments
|
||||
- Deprecated config formats
|
||||
|
||||
### Hard (10 secrets)
|
||||
Well hidden, may challenge even advanced tools:
|
||||
- ROT13 encoded secrets
|
||||
- Binary string representations
|
||||
- Character array joins
|
||||
- Reversed strings
|
||||
- Template string constructs
|
||||
- Secrets in regex patterns
|
||||
- XOR encrypted values
|
||||
- Escaped JSON within strings
|
||||
- Heredoc patterns
|
||||
- Intentional typos corrected programmatically
|
||||
|
||||
## Usage
|
||||
|
||||
Run secret detection tools against this directory and compare results to the ground truth file (located in `backend/benchmarks/by_category/secret_detection/secret_detection_benchmark_GROUND_TRUTH.json`) to calculate:
|
||||
|
||||
- **Precision**: TP / (TP + FP) - How many detected secrets are real?
|
||||
- **Recall**: TP / (TP + FN) - How many real secrets were found?
|
||||
- **F1 Score**: 2 × (Precision × Recall) / (Precision + Recall)
|
||||
|
||||
### Expected Performance
|
||||
|
||||
| Tool Type | Expected Easy | Expected Medium | Expected Hard | Total Expected |
|
||||
|-----------|---------------|-----------------|---------------|----------------|
|
||||
| Pattern-based (Gitleaks) | 12/12 (100%) | 6-8/10 (60-80%) | 2-4/10 (20-40%) | 20-24/32 |
|
||||
| Entropy-based (TruffleHog) | 12/12 (100%) | 5-7/10 (50-70%) | 1-3/10 (10-30%) | 18-22/32 |
|
||||
| LLM-based | 12/12 (100%) | 8-10/10 (80-100%) | 4-8/10 (40-80%) | 24-30/32 |
|
||||
|
||||
## Validation
|
||||
|
||||
Use the validation script to check tool performance:
|
||||
|
||||
```bash
|
||||
python validate_ground_truth.py --tool-output results.json
|
||||
```
|
||||
|
||||
This will calculate precision, recall, and F1 score against the ground truth.
|
||||
@@ -0,0 +1,9 @@
|
||||
# Application properties file
|
||||
app.name=SecretDetectionBenchmark
|
||||
app.version=1.0.0
|
||||
|
||||
# EASY SECRET #8: API Key
|
||||
api.key=sk_test_4eC39HqLyjWDarjtT1zdp7dc
|
||||
api.endpoint=https://api.example.com
|
||||
|
||||
logging.level=INFO
|
||||
@@ -0,0 +1,10 @@
|
||||
# Database configuration
|
||||
databases:
|
||||
production:
|
||||
host: prod-db.example.com
|
||||
port: 5432
|
||||
# EASY SECRET #6: Azure connection string
|
||||
connection_string: "DefaultEndpointsProtocol=https;AccountName=prodstore;AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;EndpointSuffix=core.windows.net"
|
||||
staging:
|
||||
host: staging-db.example.com
|
||||
port: 5432
|
||||
12
test_projects/secret_detection_benchmark/config/keys.yaml
Normal file
12
test_projects/secret_detection_benchmark/config/keys.yaml
Normal file
@@ -0,0 +1,12 @@
|
||||
# Keys configuration
|
||||
api_keys:
|
||||
production:
|
||||
# MEDIUM SECRET #16: Multi-line private key in YAML literal block
|
||||
private_key: |
|
||||
-----BEGIN RSA PRIVATE KEY-----
|
||||
MIIEpAIBAAKCAQEAyLqJZvd5CZxJhLZYLFCqLV9G5k8dFz1LoNwPPfK3qE1k8H4y
|
||||
FQwNyX3WJZNmKJLOPQMfHZQxGhHJPwZYjKQPYHJ1234567890abcdefghijklmno
|
||||
-----END RSA PRIVATE KEY-----
|
||||
|
||||
# MEDIUM SECRET #17: Secret with Unicode characters
|
||||
api_token_intl: "tøkęn_śęçrėt_ẃïth_ŭñïçődė_123456"
|
||||
@@ -0,0 +1,8 @@
|
||||
[database]
|
||||
; MEDIUM SECRET #19: Secret in deprecated INI format
|
||||
password = L3g@cy_DB_P@ssw0rd_2023
|
||||
|
||||
[api]
|
||||
; MEDIUM SECRET #20: Commented backup API key
|
||||
; old_api_key = backup_key_xyz789abc123def456ghi
|
||||
endpoint = https://api.legacy.example.com
|
||||
11
test_projects/secret_detection_benchmark/config/oauth.json
Normal file
11
test_projects/secret_detection_benchmark/config/oauth.json
Normal file
@@ -0,0 +1,11 @@
|
||||
{
|
||||
"oauth_provider": "google",
|
||||
"client_id": "123456789012-abcdefghijklmnopqrstuvwxyz123456.apps.googleusercontent.com",
|
||||
"client_secret": "GOCSPX-Ab12Cd34Ef56Gh78Ij90Kl12",
|
||||
"redirect_uri": "https://example.com/oauth/callback",
|
||||
"scopes": [
|
||||
"openid",
|
||||
"email",
|
||||
"profile"
|
||||
]
|
||||
}
|
||||
21
test_projects/secret_detection_benchmark/config/settings.py
Normal file
21
test_projects/secret_detection_benchmark/config/settings.py
Normal file
@@ -0,0 +1,21 @@
|
||||
"""
|
||||
Application settings and configuration
|
||||
"""
|
||||
|
||||
# EASY SECRET #2: GitHub Personal Access Token
|
||||
GITHUB_TOKEN = "ghp_vR8jK2mN4pQ6tX9bC3wY7zA1eF5hI8kL"
|
||||
|
||||
# EASY SECRET #3: Stripe API key
|
||||
STRIPE_SECRET_KEY = "sk_live_51MabcdefghijklmnopqrstuvwxyzABCDEF123456789"
|
||||
|
||||
# Application settings
|
||||
DEBUG = False
|
||||
LOG_LEVEL = "INFO"
|
||||
|
||||
# EASY SECRET #4: Database password
|
||||
DATABASE_CONFIG = {
|
||||
"host": "prod-db.example.com",
|
||||
"port": 5432,
|
||||
"username": "admin",
|
||||
"password": "ProdDB_P@ssw0rd_2024_Secure!"
|
||||
}
|
||||
7
test_projects/secret_detection_benchmark/id_rsa
Normal file
7
test_projects/secret_detection_benchmark/id_rsa
Normal file
@@ -0,0 +1,7 @@
|
||||
-----BEGIN OPENSSH PRIVATE KEY-----
|
||||
b3BlbnNzaC1rZXktdjEAAAAABG5vbmUAAAAEbm9uZQAAAAAAAAABAAABlwAAAAdzc2gtcn
|
||||
NhAAAAAwEAAQAAAYEAyLqJZvd5CZxJhLZYLFCqLV9G5k8dFz1LoNwPPfK3qE1k8H4yFQwN
|
||||
yX3WJZNmKJLOPQMfHZQxGhHJPwZYjKQPYHJ1oNwPPfK3qE1k8H4yFQwNyX3WJZNmKJLO
|
||||
PQMfHZQxGhHJPwZYjKQPYHJ1aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
|
||||
aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa==
|
||||
-----END OPENSSH PRIVATE KEY-----
|
||||
16
test_projects/secret_detection_benchmark/scripts/deploy.sh
Normal file
16
test_projects/secret_detection_benchmark/scripts/deploy.sh
Normal file
@@ -0,0 +1,16 @@
|
||||
#!/bin/bash
|
||||
# Deployment script
|
||||
|
||||
# MEDIUM SECRET #14: Secret in environment variable export
|
||||
export SECRET_API_KEY="sk_prod_1234567890abcdefghijklmnopqrstuvwxyz"
|
||||
|
||||
echo "Deploying application..."
|
||||
|
||||
# MEDIUM SECRET #15: URL-encoded secret in connection string (backup comment)
|
||||
# backup_connection="mysql://admin:MyP%40ssw0rd%21@db.example.com:3306/prod"
|
||||
|
||||
deploy_app() {
|
||||
echo "Deployment complete"
|
||||
}
|
||||
|
||||
deploy_app
|
||||
13
test_projects/secret_detection_benchmark/scripts/webhook.js
Normal file
13
test_projects/secret_detection_benchmark/scripts/webhook.js
Normal file
@@ -0,0 +1,13 @@
|
||||
// Webhook configuration and handlers
|
||||
|
||||
// EASY SECRET #7: Slack webhook URL
|
||||
const SLACK_WEBHOOK = "https://hooks.slack.com/services/T00000000/B00000000/XXXXXXXXXXXXXXXXXXXX";
|
||||
|
||||
function sendSlackNotification(message) {
|
||||
fetch(SLACK_WEBHOOK, {
|
||||
method: 'POST',
|
||||
body: JSON.stringify({ text: message })
|
||||
});
|
||||
}
|
||||
|
||||
module.exports = { sendSlackNotification };
|
||||
25
test_projects/secret_detection_benchmark/src/Crypto.go
Normal file
25
test_projects/secret_detection_benchmark/src/Crypto.go
Normal file
@@ -0,0 +1,25 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// HARD SECRET #29: Heredoc with unusual delimiter
|
||||
const ConfigTemplate = `
|
||||
SECRET_KEY=golang_heredoc_secret_999
|
||||
END_OF_CONFIG
|
||||
`
|
||||
|
||||
// HARD SECRET #30: Secret with intentional typo corrected programmatically
|
||||
const API_KEY_TYPO = "strippe_sk_live_corrected_key"
|
||||
|
||||
func CorrectTypo(s string) string {
|
||||
return strings.Replace(s, "strippe", "stripe", 1)
|
||||
}
|
||||
|
||||
func main() {
|
||||
fmt.Println("Crypto utilities initialized")
|
||||
correctedKey := CorrectTypo(API_KEY_TYPO)
|
||||
fmt.Println("Key ready:", correctedKey[:10]+"...")
|
||||
}
|
||||
10
test_projects/secret_detection_benchmark/src/Main.java
Normal file
10
test_projects/secret_detection_benchmark/src/Main.java
Normal file
@@ -0,0 +1,10 @@
|
||||
package com.example.benchmark;
|
||||
|
||||
public class Main {
|
||||
// EASY SECRET #10: Google OAuth secret in Java
|
||||
private static final String GOOGLE_OAUTH_SECRET = "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";
|
||||
|
||||
public static void main(String[] args) {
|
||||
System.out.println("Application starting...");
|
||||
}
|
||||
}
|
||||
19
test_projects/secret_detection_benchmark/src/advanced.js
Normal file
19
test_projects/secret_detection_benchmark/src/advanced.js
Normal file
@@ -0,0 +1,19 @@
|
||||
// Advanced obfuscation techniques
|
||||
|
||||
// HARD SECRET #25: Template string with escaping
|
||||
const SECRET_TEMPLATE = `sk_${"prod"}_${"template"}_${"key"}_xyz`;
|
||||
|
||||
// HARD SECRET #26: Secret in regex pattern
|
||||
const PASSWORD_REGEX = /password_regex_secret_789/;
|
||||
|
||||
// HARD SECRET #27: XORed secret (XOR with key 42)
|
||||
const XOR_SECRET = [65,82,90,75,94,91,92,75,93,67,65,90,67,92,75,91,67,95];
|
||||
|
||||
function decodeXOR() {
|
||||
return String.fromCharCode(...XOR_SECRET.map(c => c ^ 42));
|
||||
}
|
||||
|
||||
// HARD SECRET #28: Escaped JSON within string
|
||||
const CONFIG_JSON = "{\"api_key\":\"sk_escaped_json_key_456\"}";
|
||||
|
||||
module.exports = { SECRET_TEMPLATE, decodeXOR };
|
||||
19
test_projects/secret_detection_benchmark/src/app.py
Normal file
19
test_projects/secret_detection_benchmark/src/app.py
Normal file
@@ -0,0 +1,19 @@
|
||||
"""
|
||||
Main application entry point
|
||||
"""
|
||||
import os
|
||||
|
||||
# EASY SECRET #5: JWT Secret
|
||||
JWT_SECRET_KEY = "my-super-secret-jwt-key-do-not-share-2024"
|
||||
|
||||
def init_app():
|
||||
"""Initialize the application"""
|
||||
app_config = {
|
||||
"name": "SecretDetectionBenchmark",
|
||||
"version": "1.0.0"
|
||||
}
|
||||
return app_config
|
||||
|
||||
if __name__ == "__main__":
|
||||
print("Application starting...")
|
||||
init_app()
|
||||
19
test_projects/secret_detection_benchmark/src/config.py
Normal file
19
test_projects/secret_detection_benchmark/src/config.py
Normal file
@@ -0,0 +1,19 @@
|
||||
"""
|
||||
Configuration with moderately obfuscated secrets
|
||||
"""
|
||||
import base64
|
||||
|
||||
# MEDIUM SECRET #11: Base64 encoded AWS key
|
||||
AWS_KEY_ENCODED = "QUtJQUlPU0ZPRE5ON0VYQU1QTEU="
|
||||
|
||||
# MEDIUM SECRET #12: Hex-encoded API token
|
||||
HEX_TOKEN = "6170695f746f6b656e5f616263313233787977373839"
|
||||
|
||||
# MEDIUM SECRET #13: Split secret concatenated at runtime
|
||||
DB_PASS_PART1 = "MySecure"
|
||||
DB_PASS_PART2 = "Password"
|
||||
DB_PASS_PART3 = "2024!"
|
||||
DATABASE_PASSWORD = DB_PASS_PART1 + DB_PASS_PART2 + DB_PASS_PART3
|
||||
|
||||
def get_aws_key():
|
||||
return base64.b64decode(AWS_KEY_ENCODED).decode()
|
||||
15
test_projects/secret_detection_benchmark/src/database.sql
Normal file
15
test_projects/secret_detection_benchmark/src/database.sql
Normal file
@@ -0,0 +1,15 @@
|
||||
-- Database initialization script
|
||||
|
||||
CREATE DATABASE prod_db;
|
||||
|
||||
-- MEDIUM SECRET #18: Secret in SQL comment
|
||||
-- Connection string: postgresql://admin:Pr0dDB_S3cr3t_P@ss@db.prod.example.com:5432/prod_db
|
||||
|
||||
CREATE TABLE users (
|
||||
id SERIAL PRIMARY KEY,
|
||||
username VARCHAR(255) NOT NULL,
|
||||
email VARCHAR(255) NOT NULL
|
||||
);
|
||||
|
||||
-- Insert test data
|
||||
INSERT INTO users (username, email) VALUES ('admin', 'admin@example.com');
|
||||
23
test_projects/secret_detection_benchmark/src/obfuscated.py
Normal file
23
test_projects/secret_detection_benchmark/src/obfuscated.py
Normal file
@@ -0,0 +1,23 @@
|
||||
"""
|
||||
Heavily obfuscated secrets - hard to detect
|
||||
"""
|
||||
import codecs
|
||||
|
||||
# HARD SECRET #21: ROT13 encoded secret
|
||||
SECRET_ROT13 = "fx_yvir_frperg_xrl_12345"
|
||||
|
||||
# HARD SECRET #22: Binary string representation
|
||||
GITHUB_TOKEN_BYTES = b'\x67\x68\x70\x5f\x4d\x79\x47\x69\x74\x48\x75\x62\x54\x6f\x6b\x65\x6e\x31\x32\x33\x34\x35\x36'
|
||||
|
||||
# HARD SECRET #23: Character array join
|
||||
AWS_SECRET_CHARS = ['A','W','S','_','S','E','C','R','E','T','_','K','E','Y','_','X','Y','Z','7','8','9']
|
||||
AWS_SECRET = ''.join(AWS_SECRET_CHARS)
|
||||
|
||||
# HARD SECRET #24: Reversed string that's un-reversed at runtime
|
||||
TOKEN_REVERSED = "321cba_desrever_nekot_ipa"
|
||||
|
||||
def get_rot13_secret():
|
||||
return codecs.decode(SECRET_ROT13, 'rot_13')
|
||||
|
||||
def get_token():
|
||||
return TOKEN_REVERSED[::-1]
|
||||
@@ -0,0 +1,80 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Validate secret detection tool results against ground truth
|
||||
"""
|
||||
import json
|
||||
import argparse
|
||||
from pathlib import Path
|
||||
from typing import Set, Tuple
|
||||
|
||||
def load_ground_truth(ground_truth_file: Path) -> Set[Tuple[str, int]]:
|
||||
"""Load ground truth secrets as set of (file, line) tuples"""
|
||||
with open(ground_truth_file) as f:
|
||||
data = json.load(f)
|
||||
|
||||
secrets = set()
|
||||
for secret in data["secrets"]:
|
||||
secrets.add((secret["file"], secret["line"]))
|
||||
|
||||
return secrets
|
||||
|
||||
def load_tool_results(results_file: Path) -> Set[Tuple[str, int]]:
|
||||
"""Load tool results as set of (file, line) tuples"""
|
||||
with open(results_file) as f:
|
||||
data = json.load(f)
|
||||
|
||||
findings = set()
|
||||
# Assume SARIF format or custom format with findings_by_file
|
||||
if "findings_by_file" in data:
|
||||
for file_path, lines in data["findings_by_file"].items():
|
||||
for line in lines:
|
||||
findings.add((file_path, line))
|
||||
|
||||
return findings
|
||||
|
||||
def calculate_metrics(ground_truth: Set, detected: Set):
|
||||
"""Calculate precision, recall, and F1 score"""
|
||||
tp = len(ground_truth & detected) # True positives
|
||||
fp = len(detected - ground_truth) # False positives
|
||||
fn = len(ground_truth - detected) # False negatives
|
||||
|
||||
precision = tp / (tp + fp) if (tp + fp) > 0 else 0
|
||||
recall = tp / (tp + fn) if (tp + fn) > 0 else 0
|
||||
f1 = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0
|
||||
|
||||
return {
|
||||
"true_positives": tp,
|
||||
"false_positives": fp,
|
||||
"false_negatives": fn,
|
||||
"precision": precision * 100,
|
||||
"recall": recall * 100,
|
||||
"f1_score": f1 * 100
|
||||
}
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Validate tool results against ground truth")
|
||||
parser.add_argument("--tool-output", required=True, help="Path to tool output JSON")
|
||||
parser.add_argument("--ground-truth",
|
||||
default="../../backend/benchmarks/by_category/secret_detection/secret_detection_benchmark_GROUND_TRUTH.json",
|
||||
help="Path to ground truth file")
|
||||
args = parser.parse_args()
|
||||
|
||||
ground_truth = load_ground_truth(Path(args.ground_truth))
|
||||
detected = load_tool_results(Path(args.tool_output))
|
||||
metrics = calculate_metrics(ground_truth, detected)
|
||||
|
||||
print("\n" + "="*60)
|
||||
print("Secret Detection Validation Results")
|
||||
print("="*60)
|
||||
print(f"Ground Truth Secrets: {len(ground_truth)}")
|
||||
print(f"Detected Secrets: {len(detected)}")
|
||||
print(f"\nTrue Positives: {metrics['true_positives']}")
|
||||
print(f"False Positives: {metrics['false_positives']}")
|
||||
print(f"False Negatives: {metrics['false_negatives']}")
|
||||
print(f"\n{'Precision:':<15} {metrics['precision']:.2f}%")
|
||||
print(f"{'Recall:':<15} {metrics['recall']:.2f}%")
|
||||
print(f"{'F1 Score:':<15} {metrics['f1_score']:.2f}%")
|
||||
print("="*60 + "\n")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user