mirror of
https://github.com/FuzzingLabs/fuzzforge_ai.git
synced 2026-05-23 08:19:51 +02:00
feat: Add secret detection workflows and comprehensive benchmarking (#15)
Add three production-ready secret detection workflows with full benchmarking infrastructure: **New Workflows:** - gitleaks_detection: Pattern-based secret scanning (13/32 benchmark secrets) - trufflehog_detection: Entropy-based detection with verification (1/32 benchmark secrets) - llm_secret_detection: AI-powered semantic analysis (32/32 benchmark secrets - 100% recall) **Benchmarking Infrastructure:** - Ground truth dataset with 32 documented secrets (12 Easy, 10 Medium, 10 Hard) - Automated comparison tools for precision/recall testing - SARIF output format for all workflows - Performance metrics and tool comparison reports **Fixes:** - Set gitleaks default to no_git=True for uploaded directories - Update documentation with correct secret counts and workflow names - Temporarily deactivate AI agent command - Clean up deprecated test files and GitGuardian workflow **Testing:** All workflows verified on secret_detection_benchmark and vulnerable_app test projects. Workers healthy and system fully functional.
This commit is contained in:
@@ -7,6 +7,8 @@ in codebases and repositories.
|
||||
Available modules:
|
||||
- TruffleHog: Comprehensive secret detection with verification
|
||||
- Gitleaks: Git-specific secret scanning and leak detection
|
||||
- GitGuardian: Enterprise secret detection using GitGuardian API
|
||||
- LLM Secret Detector: AI-powered semantic secret detection
|
||||
"""
|
||||
# Copyright (c) 2025 FuzzingLabs
|
||||
#
|
||||
|
||||
@@ -248,7 +248,8 @@ class GitleaksModule(BaseModule):
|
||||
rule_id = result.get("RuleID", "unknown")
|
||||
description = result.get("Description", "")
|
||||
file_path = result.get("File", "")
|
||||
line_number = result.get("LineNumber", 0)
|
||||
line_number = result.get("StartLine", 0) # Gitleaks outputs "StartLine", not "LineNumber"
|
||||
line_end = result.get("EndLine", 0)
|
||||
secret = result.get("Secret", "")
|
||||
match_text = result.get("Match", "")
|
||||
|
||||
@@ -278,6 +279,7 @@ class GitleaksModule(BaseModule):
|
||||
category="secret_leak",
|
||||
file_path=file_path if file_path else None,
|
||||
line_start=line_number if line_number > 0 else None,
|
||||
line_end=line_end if line_end > 0 else None,
|
||||
code_snippet=match_text if match_text else secret,
|
||||
recommendation=self._get_leak_recommendation(rule_id),
|
||||
metadata={
|
||||
|
||||
@@ -0,0 +1,397 @@
|
||||
"""
|
||||
LLM Secret Detection Module
|
||||
|
||||
This module uses an LLM to detect secrets and sensitive information via semantic understanding.
|
||||
"""
|
||||
# Copyright (c) 2025 FuzzingLabs
|
||||
#
|
||||
# Licensed under the Business Source License 1.1 (BSL). See the LICENSE file
|
||||
# at the root of this repository for details.
|
||||
#
|
||||
# After the Change Date (four years from publication), this version of the
|
||||
# Licensed Work will be made available under the Apache License, Version 2.0.
|
||||
# See the LICENSE-APACHE file or http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Additional attribution and requirements are provided in the NOTICE file.
|
||||
|
||||
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from typing import Dict, Any, List
|
||||
|
||||
from ..base import BaseModule, ModuleMetadata, ModuleFinding, ModuleResult
|
||||
from . import register_module
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@register_module
|
||||
class LLMSecretDetectorModule(BaseModule):
|
||||
"""
|
||||
LLM-based secret detection module using AI semantic analysis.
|
||||
|
||||
Uses an LLM agent to identify secrets through natural language understanding,
|
||||
potentially catching secrets that pattern-based tools miss.
|
||||
"""
|
||||
|
||||
def get_metadata(self) -> ModuleMetadata:
|
||||
"""Get module metadata"""
|
||||
return ModuleMetadata(
|
||||
name="llm_secret_detector",
|
||||
version="1.0.0",
|
||||
description="AI-powered secret detection using LLM semantic analysis",
|
||||
author="FuzzForge Team",
|
||||
category="secret_detection",
|
||||
tags=["secrets", "llm", "ai", "semantic"],
|
||||
input_schema={
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"agent_url": {
|
||||
"type": "string",
|
||||
"default": "http://fuzzforge-task-agent:8000/a2a/litellm_agent",
|
||||
"description": "A2A agent endpoint URL"
|
||||
},
|
||||
"llm_model": {
|
||||
"type": "string",
|
||||
"default": "gpt-4o-mini",
|
||||
"description": "LLM model to use"
|
||||
},
|
||||
"llm_provider": {
|
||||
"type": "string",
|
||||
"default": "openai",
|
||||
"description": "LLM provider (openai, anthropic, etc.)"
|
||||
},
|
||||
"file_patterns": {
|
||||
"type": "array",
|
||||
"items": {"type": "string"},
|
||||
"default": ["*.py", "*.js", "*.ts", "*.java", "*.go", "*.env", "*.yaml", "*.yml", "*.json", "*.xml", "*.ini", "*.sql", "*.properties", "*.sh", "*.bat", "*.config", "*.conf", "*.toml", "*id_rsa*"],
|
||||
"description": "File patterns to analyze"
|
||||
},
|
||||
"max_files": {
|
||||
"type": "integer",
|
||||
"default": 20,
|
||||
"description": "Maximum number of files to analyze"
|
||||
},
|
||||
"max_file_size": {
|
||||
"type": "integer",
|
||||
"default": 30000,
|
||||
"description": "Maximum file size in bytes (30KB default)"
|
||||
},
|
||||
"timeout": {
|
||||
"type": "integer",
|
||||
"default": 45,
|
||||
"description": "Timeout per file in seconds"
|
||||
}
|
||||
},
|
||||
"required": []
|
||||
},
|
||||
output_schema={
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"findings": {
|
||||
"type": "array",
|
||||
"description": "Secrets identified by LLM"
|
||||
}
|
||||
}
|
||||
}
|
||||
)
|
||||
|
||||
def validate_config(self, config: Dict[str, Any]) -> bool:
|
||||
"""Validate module configuration"""
|
||||
# Lazy import to avoid Temporal sandbox restrictions
|
||||
try:
|
||||
from fuzzforge_ai.a2a_wrapper import send_agent_task # noqa: F401
|
||||
except ImportError:
|
||||
raise RuntimeError(
|
||||
"A2A wrapper not available. Ensure fuzzforge_ai module is accessible."
|
||||
)
|
||||
|
||||
agent_url = config.get("agent_url")
|
||||
if not agent_url or not isinstance(agent_url, str):
|
||||
raise ValueError("agent_url must be a valid URL string")
|
||||
|
||||
max_files = config.get("max_files", 20)
|
||||
if not isinstance(max_files, int) or max_files <= 0:
|
||||
raise ValueError("max_files must be a positive integer")
|
||||
|
||||
return True
|
||||
|
||||
async def execute(self, config: Dict[str, Any], workspace: Path) -> ModuleResult:
|
||||
"""
|
||||
Execute LLM-based secret detection.
|
||||
|
||||
Args:
|
||||
config: Module configuration
|
||||
workspace: Path to the workspace containing code to analyze
|
||||
|
||||
Returns:
|
||||
ModuleResult with secrets detected by LLM
|
||||
"""
|
||||
self.start_timer()
|
||||
|
||||
logger.info(f"Starting LLM secret detection in workspace: {workspace}")
|
||||
|
||||
# Extract configuration
|
||||
agent_url = config.get("agent_url", "http://fuzzforge-task-agent:8000/a2a/litellm_agent")
|
||||
llm_model = config.get("llm_model", "gpt-4o-mini")
|
||||
llm_provider = config.get("llm_provider", "openai")
|
||||
file_patterns = config.get("file_patterns", ["*.py", "*.js", "*.ts", "*.java", "*.go", "*.env", "*.yaml", "*.yml", "*.json", "*.xml", "*.ini", "*.sql", "*.properties", "*.sh", "*.bat", "*.config", "*.conf", "*.toml", "*id_rsa*", "*.txt"])
|
||||
max_files = config.get("max_files", 20)
|
||||
max_file_size = config.get("max_file_size", 30000)
|
||||
timeout = config.get("timeout", 30) # Reduced from 45s
|
||||
|
||||
# Find files to analyze
|
||||
# Skip files that are unlikely to contain secrets
|
||||
skip_patterns = ['*.sarif', '*.md', '*.html', '*.css', '*.db', '*.sqlite']
|
||||
|
||||
files_to_analyze = []
|
||||
for pattern in file_patterns:
|
||||
for file_path in workspace.rglob(pattern):
|
||||
if file_path.is_file():
|
||||
try:
|
||||
# Skip unlikely files
|
||||
if any(file_path.match(skip) for skip in skip_patterns):
|
||||
logger.debug(f"Skipping {file_path.name} (unlikely to have secrets)")
|
||||
continue
|
||||
|
||||
# Check file size
|
||||
if file_path.stat().st_size > max_file_size:
|
||||
logger.debug(f"Skipping {file_path} (too large)")
|
||||
continue
|
||||
|
||||
files_to_analyze.append(file_path)
|
||||
|
||||
if len(files_to_analyze) >= max_files:
|
||||
break
|
||||
except Exception as e:
|
||||
logger.warning(f"Error checking file {file_path}: {e}")
|
||||
continue
|
||||
|
||||
if len(files_to_analyze) >= max_files:
|
||||
break
|
||||
|
||||
logger.info(f"Found {len(files_to_analyze)} files to analyze for secrets")
|
||||
|
||||
# Analyze each file with LLM
|
||||
all_findings = []
|
||||
for file_path in files_to_analyze:
|
||||
logger.info(f"Analyzing: {file_path.relative_to(workspace)}")
|
||||
|
||||
try:
|
||||
findings = await self._analyze_file_for_secrets(
|
||||
file_path=file_path,
|
||||
workspace=workspace,
|
||||
agent_url=agent_url,
|
||||
llm_model=llm_model,
|
||||
llm_provider=llm_provider,
|
||||
timeout=timeout
|
||||
)
|
||||
all_findings.extend(findings)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error analyzing {file_path}: {e}")
|
||||
# Continue with next file
|
||||
continue
|
||||
|
||||
logger.info(f"LLM secret detection complete. Found {len(all_findings)} potential secrets.")
|
||||
|
||||
# Create result
|
||||
return self.create_result(
|
||||
findings=all_findings,
|
||||
status="success",
|
||||
summary={
|
||||
"files_analyzed": len(files_to_analyze),
|
||||
"total_secrets": len(all_findings),
|
||||
"agent_url": agent_url,
|
||||
"model": f"{llm_provider}/{llm_model}"
|
||||
}
|
||||
)
|
||||
|
||||
async def _analyze_file_for_secrets(
|
||||
self,
|
||||
file_path: Path,
|
||||
workspace: Path,
|
||||
agent_url: str,
|
||||
llm_model: str,
|
||||
llm_provider: str,
|
||||
timeout: int
|
||||
) -> List[ModuleFinding]:
|
||||
"""Analyze a single file for secrets using LLM"""
|
||||
|
||||
# Read file content
|
||||
try:
|
||||
with open(file_path, 'r', encoding='utf-8') as f:
|
||||
code_content = f.read()
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to read {file_path}: {e}")
|
||||
return []
|
||||
|
||||
# Build specialized prompt for secret detection
|
||||
system_prompt = (
|
||||
"You are a security expert specialized in detecting secrets and credentials in code. "
|
||||
"Your job is to find REAL secrets that could be exploited. Be thorough and aggressive.\n\n"
|
||||
"For each secret found, respond in this exact format:\n"
|
||||
"SECRET_FOUND: [type like 'AWS Key', 'GitHub Token', 'Database Password']\n"
|
||||
"SEVERITY: [critical/high/medium/low]\n"
|
||||
"LINE: [exact line number]\n"
|
||||
"CONFIDENCE: [high/medium/low]\n"
|
||||
"DESCRIPTION: [brief explanation]\n\n"
|
||||
"EXAMPLES of secrets to find:\n"
|
||||
"1. API Keys: 'AKIA...', 'ghp_...', 'sk_live_...', 'SG.'\n"
|
||||
"2. Tokens: Bearer tokens, OAuth tokens, JWT secrets\n"
|
||||
"3. Passwords: Database passwords, admin passwords in configs\n"
|
||||
"4. Connection Strings: mongodb://, postgres://, redis:// with credentials\n"
|
||||
"5. Private Keys: -----BEGIN PRIVATE KEY-----, -----BEGIN RSA PRIVATE KEY-----\n"
|
||||
"6. Cloud Credentials: AWS keys, GCP keys, Azure keys\n"
|
||||
"7. Encryption Keys: AES keys, secret keys in config\n"
|
||||
"8. Webhook URLs: URLs with tokens like hooks.slack.com/services/...\n\n"
|
||||
"FIND EVERYTHING that looks like a real credential, password, key, or token.\n"
|
||||
"DO NOT be overly cautious. Report anything suspicious.\n\n"
|
||||
"If absolutely no secrets exist, respond with 'NO_SECRETS_FOUND'."
|
||||
)
|
||||
|
||||
user_message = (
|
||||
f"Analyze this code for secrets and credentials:\n\n"
|
||||
f"File: {file_path.relative_to(workspace)}\n\n"
|
||||
f"```\n{code_content}\n```"
|
||||
)
|
||||
|
||||
# Call LLM via A2A wrapper
|
||||
try:
|
||||
from fuzzforge_ai.a2a_wrapper import send_agent_task
|
||||
|
||||
result = await send_agent_task(
|
||||
url=agent_url,
|
||||
model=llm_model,
|
||||
provider=llm_provider,
|
||||
prompt=system_prompt,
|
||||
message=user_message,
|
||||
context=f"secret_detection_{file_path.stem}",
|
||||
timeout=float(timeout)
|
||||
)
|
||||
|
||||
llm_response = result.text
|
||||
|
||||
# Debug: Log LLM response
|
||||
logger.debug(f"LLM response for {file_path.name}: {llm_response[:200]}...")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"A2A call failed for {file_path}: {e}")
|
||||
return []
|
||||
|
||||
# Parse LLM response into findings
|
||||
findings = self._parse_llm_response(
|
||||
llm_response=llm_response,
|
||||
file_path=file_path,
|
||||
workspace=workspace
|
||||
)
|
||||
|
||||
if findings:
|
||||
logger.info(f"Found {len(findings)} secrets in {file_path.name}")
|
||||
else:
|
||||
logger.debug(f"No secrets found in {file_path.name}. Response: {llm_response[:500]}")
|
||||
|
||||
return findings
|
||||
|
||||
def _parse_llm_response(
|
||||
self,
|
||||
llm_response: str,
|
||||
file_path: Path,
|
||||
workspace: Path
|
||||
) -> List[ModuleFinding]:
|
||||
"""Parse LLM response into structured findings"""
|
||||
|
||||
if "NO_SECRETS_FOUND" in llm_response:
|
||||
return []
|
||||
|
||||
findings = []
|
||||
relative_path = str(file_path.relative_to(workspace))
|
||||
|
||||
# Simple parser for the expected format
|
||||
lines = llm_response.split('\n')
|
||||
current_secret = {}
|
||||
|
||||
for line in lines:
|
||||
line = line.strip()
|
||||
|
||||
if line.startswith("SECRET_FOUND:"):
|
||||
# Save previous secret if exists
|
||||
if current_secret:
|
||||
findings.append(self._create_secret_finding(current_secret, relative_path))
|
||||
current_secret = {"type": line.replace("SECRET_FOUND:", "").strip()}
|
||||
|
||||
elif line.startswith("SEVERITY:"):
|
||||
severity = line.replace("SEVERITY:", "").strip().lower()
|
||||
current_secret["severity"] = severity
|
||||
|
||||
elif line.startswith("LINE:"):
|
||||
line_num = line.replace("LINE:", "").strip()
|
||||
try:
|
||||
current_secret["line"] = int(line_num)
|
||||
except ValueError:
|
||||
current_secret["line"] = None
|
||||
|
||||
elif line.startswith("CONFIDENCE:"):
|
||||
confidence = line.replace("CONFIDENCE:", "").strip().lower()
|
||||
current_secret["confidence"] = confidence
|
||||
|
||||
elif line.startswith("DESCRIPTION:"):
|
||||
current_secret["description"] = line.replace("DESCRIPTION:", "").strip()
|
||||
|
||||
# Save last secret
|
||||
if current_secret:
|
||||
findings.append(self._create_secret_finding(current_secret, relative_path))
|
||||
|
||||
return findings
|
||||
|
||||
def _create_secret_finding(self, secret: Dict[str, Any], file_path: str) -> ModuleFinding:
|
||||
"""Create a ModuleFinding from parsed secret"""
|
||||
|
||||
severity_map = {
|
||||
"critical": "critical",
|
||||
"high": "high",
|
||||
"medium": "medium",
|
||||
"low": "low"
|
||||
}
|
||||
|
||||
severity = severity_map.get(secret.get("severity", "medium"), "medium")
|
||||
confidence = secret.get("confidence", "medium")
|
||||
|
||||
# Adjust severity based on confidence
|
||||
if confidence == "low" and severity == "critical":
|
||||
severity = "high"
|
||||
elif confidence == "low" and severity == "high":
|
||||
severity = "medium"
|
||||
|
||||
# Create finding
|
||||
title = f"LLM detected secret: {secret.get('type', 'Unknown secret')}"
|
||||
description = secret.get("description", "An LLM identified this as a potential secret.")
|
||||
description += f"\n\nConfidence: {confidence}"
|
||||
|
||||
return self.create_finding(
|
||||
title=title,
|
||||
description=description,
|
||||
severity=severity,
|
||||
category="secret_detection",
|
||||
file_path=file_path,
|
||||
line_start=secret.get("line"),
|
||||
recommendation=self._get_secret_recommendation(secret.get("type", "")),
|
||||
metadata={
|
||||
"tool": "llm-secret-detector",
|
||||
"secret_type": secret.get("type", "unknown"),
|
||||
"confidence": confidence,
|
||||
"detection_method": "semantic-analysis"
|
||||
}
|
||||
)
|
||||
|
||||
def _get_secret_recommendation(self, secret_type: str) -> str:
|
||||
"""Get remediation recommendation for detected secret"""
|
||||
return (
|
||||
f"A potential {secret_type} was detected by AI analysis. "
|
||||
f"Verify whether this is a real secret or a false positive. "
|
||||
f"If real: (1) Revoke the credential immediately, "
|
||||
f"(2) Remove from codebase and Git history, "
|
||||
f"(3) Rotate to a new secret, "
|
||||
f"(4) Use secret management tools for storage. "
|
||||
f"Implement pre-commit hooks to prevent future leaks."
|
||||
)
|
||||
@@ -61,11 +61,6 @@ class TruffleHogModule(BaseModule):
|
||||
"items": {"type": "string"},
|
||||
"description": "Specific detectors to exclude"
|
||||
},
|
||||
"max_depth": {
|
||||
"type": "integer",
|
||||
"default": 10,
|
||||
"description": "Maximum directory depth to scan"
|
||||
},
|
||||
"concurrency": {
|
||||
"type": "integer",
|
||||
"default": 10,
|
||||
@@ -100,11 +95,6 @@ class TruffleHogModule(BaseModule):
|
||||
if not isinstance(concurrency, int) or concurrency < 1 or concurrency > 50:
|
||||
raise ValueError("Concurrency must be between 1 and 50")
|
||||
|
||||
# Check max_depth bounds
|
||||
max_depth = config.get("max_depth", 10)
|
||||
if not isinstance(max_depth, int) or max_depth < 1 or max_depth > 20:
|
||||
raise ValueError("Max depth must be between 1 and 20")
|
||||
|
||||
return True
|
||||
|
||||
async def execute(self, config: Dict[str, Any], workspace: Path) -> ModuleResult:
|
||||
@@ -124,6 +114,9 @@ class TruffleHogModule(BaseModule):
|
||||
# Add verification flag
|
||||
if config.get("verify", False):
|
||||
cmd.append("--verify")
|
||||
else:
|
||||
# Explicitly disable verification to get all unverified secrets
|
||||
cmd.append("--no-verification")
|
||||
|
||||
# Add JSON output
|
||||
cmd.extend(["--json", "--no-update"])
|
||||
@@ -131,9 +124,6 @@ class TruffleHogModule(BaseModule):
|
||||
# Add concurrency
|
||||
cmd.extend(["--concurrency", str(config.get("concurrency", 10))])
|
||||
|
||||
# Add max depth
|
||||
cmd.extend(["--max-depth", str(config.get("max_depth", 10))])
|
||||
|
||||
# Add include/exclude detectors
|
||||
if config.get("include_detectors"):
|
||||
cmd.extend(["--include-detectors", ",".join(config["include_detectors"])])
|
||||
|
||||
Reference in New Issue
Block a user