From 2da986ebb0f74a2b28bfea5adc226ec9dfabc8a1 Mon Sep 17 00:00:00 2001 From: tduhamel42 Date: Thu, 16 Oct 2025 11:21:24 +0200 Subject: [PATCH] feat: Add secret detection workflows and comprehensive benchmarking (#15) Add three production-ready secret detection workflows with full benchmarking infrastructure: **New Workflows:** - gitleaks_detection: Pattern-based secret scanning (13/32 benchmark secrets) - trufflehog_detection: Entropy-based detection with verification (1/32 benchmark secrets) - llm_secret_detection: AI-powered semantic analysis (32/32 benchmark secrets - 100% recall) **Benchmarking Infrastructure:** - Ground truth dataset with 32 documented secrets (12 Easy, 10 Medium, 10 Hard) - Automated comparison tools for precision/recall testing - SARIF output format for all workflows - Performance metrics and tool comparison reports **Fixes:** - Set gitleaks default to no_git=True for uploaded directories - Update documentation with correct secret counts and workflow names - Temporarily deactivate AI agent command - Clean up deprecated test files and GitGuardian workflow **Testing:** All workflows verified on secret_detection_benchmark and vulnerable_app test projects. Workers healthy and system fully functional. --- .gitignore | 4 +- .../by_category/secret_detection/README.md | 240 ++++++++ .../secret_detection/bench_comparison.py | 285 +++++++++ .../secret_detection/compare_tools.py | 547 ++++++++++++++++++ .../modules/secret_detection/__init__.py | 2 + .../modules/secret_detection/gitleaks.py | 4 +- .../secret_detection/llm_secret_detector.py | 397 +++++++++++++ .../modules/secret_detection/trufflehog.py | 16 +- .../workflows/gitleaks_detection/__init__.py | 19 + .../gitleaks_detection/activities.py | 166 ++++++ .../gitleaks_detection/metadata.yaml | 42 ++ .../workflows/gitleaks_detection/workflow.py | 187 ++++++ .../llm_secret_detection/__init__.py | 6 + .../llm_secret_detection/activities.py | 112 ++++ .../llm_secret_detection/metadata.yaml | 43 ++ .../llm_secret_detection/workflow.py | 156 +++++ .../trufflehog_detection/__init__.py | 13 + .../trufflehog_detection/activities.py | 111 ++++ .../trufflehog_detection/metadata.yaml | 34 ++ .../trufflehog_detection/workflow.py | 104 ++++ cli/src/fuzzforge_cli/commands/ai.py | 18 +- docker-compose.yml | 2 + sdk/test_exception_handling.py | 213 ------- test_a2a_wrapper.py | 152 ----- test_projects/README.md | 24 +- .../fuzz/fuzz_targets/fuzz_waterfall.rs | 9 + test_security_workflow.py | 142 ----- test_temporal_workflow.py | 105 ---- 28 files changed, 2505 insertions(+), 648 deletions(-) create mode 100644 backend/benchmarks/by_category/secret_detection/README.md create mode 100644 backend/benchmarks/by_category/secret_detection/bench_comparison.py create mode 100644 backend/benchmarks/by_category/secret_detection/compare_tools.py create mode 100644 backend/toolbox/modules/secret_detection/llm_secret_detector.py create mode 100644 backend/toolbox/workflows/gitleaks_detection/__init__.py create mode 100644 backend/toolbox/workflows/gitleaks_detection/activities.py create mode 100644 backend/toolbox/workflows/gitleaks_detection/metadata.yaml create mode 100644 backend/toolbox/workflows/gitleaks_detection/workflow.py create mode 100644 backend/toolbox/workflows/llm_secret_detection/__init__.py create mode 100644 backend/toolbox/workflows/llm_secret_detection/activities.py create mode 100644 backend/toolbox/workflows/llm_secret_detection/metadata.yaml create mode 100644 backend/toolbox/workflows/llm_secret_detection/workflow.py create mode 100644 backend/toolbox/workflows/trufflehog_detection/__init__.py create mode 100644 backend/toolbox/workflows/trufflehog_detection/activities.py create mode 100644 backend/toolbox/workflows/trufflehog_detection/metadata.yaml create mode 100644 backend/toolbox/workflows/trufflehog_detection/workflow.py delete mode 100644 sdk/test_exception_handling.py delete mode 100755 test_a2a_wrapper.py create mode 100644 test_projects/rust_fuzz_test/fuzz/fuzz_targets/fuzz_waterfall.rs delete mode 100644 test_security_workflow.py delete mode 100644 test_temporal_workflow.py diff --git a/.gitignore b/.gitignore index e6d5c6f..dd922f9 100644 --- a/.gitignore +++ b/.gitignore @@ -204,6 +204,7 @@ dev_config.yaml reports/ output/ findings/ +*.sarif *.sarif.json *.html.report security_report.* @@ -291,4 +292,5 @@ test_projects/*/wallet.json test_projects/*/.npmrc test_projects/*/.git-credentials test_projects/*/credentials.* -test_projects/*/api_keys.* \ No newline at end of file +test_projects/*/api_keys.* +test_projects/*/ci-*.sh \ No newline at end of file diff --git a/backend/benchmarks/by_category/secret_detection/README.md b/backend/benchmarks/by_category/secret_detection/README.md new file mode 100644 index 0000000..9fd437d --- /dev/null +++ b/backend/benchmarks/by_category/secret_detection/README.md @@ -0,0 +1,240 @@ +# Secret Detection Benchmarks + +Comprehensive benchmarking suite comparing secret detection tools via complete workflow execution: +- **Gitleaks** - Fast pattern-based detection +- **TruffleHog** - Entropy analysis with verification +- **LLM Detector** - AI-powered semantic analysis (gpt-4o-mini, gpt-5-mini) + +## Quick Start + +### Run All Comparisons + +```bash +cd backend +python benchmarks/by_category/secret_detection/compare_tools.py +``` + +This will run all workflows on `test_projects/secret_detection_benchmark/` and generate comparison reports. + +### Run Benchmark Tests + +```bash +# All benchmarks (Gitleaks, TruffleHog, LLM with 3 models) +pytest benchmarks/by_category/secret_detection/bench_comparison.py --benchmark-only -v + +# Specific tool only +pytest benchmarks/by_category/secret_detection/bench_comparison.py::TestSecretDetectionComparison::test_gitleaks_workflow --benchmark-only -v + +# Performance tests only +pytest benchmarks/by_category/secret_detection/bench_comparison.py::TestSecretDetectionPerformance --benchmark-only -v +``` + +## Ground Truth Dataset + +**Controlled Benchmark** (`test_projects/secret_detection_benchmark/`) + +**Exactly 32 documented secrets** for accurate precision/recall testing: +- **12 Easy**: Standard patterns (AWS keys, GitHub PATs, Stripe keys, SSH keys) +- **10 Medium**: Obfuscated (Base64, hex, concatenated, in comments, Unicode) +- **10 Hard**: Well hidden (ROT13, binary, XOR, reversed, template strings, regex patterns) + +All secrets documented in `secret_detection_benchmark_GROUND_TRUTH.json` with exact file paths and line numbers. + +See `test_projects/secret_detection_benchmark/README.md` for details. + +## Metrics Measured + +### Accuracy Metrics +- **Precision**: TP / (TP + FP) - How many detected secrets are real? +- **Recall**: TP / (TP + FN) - How many real secrets were found? +- **F1 Score**: Harmonic mean of precision and recall +- **False Positive Rate**: FP / Total Detected + +### Performance Metrics +- **Execution Time**: Total time to scan all files +- **Throughput**: Files/secrets scanned per second +- **Memory Usage**: Peak memory during execution + +### Thresholds (from `category_configs.py`) +- Minimum Precision: 90% +- Minimum Recall: 95% +- Max Execution Time (small): 2.0s +- Max False Positives: 5 per 100 secrets + +## Tool Comparison + +### Gitleaks +**Strengths:** +- Fastest execution +- Git-aware (commit history scanning) +- Low false positive rate +- No API required +- Works offline + +**Weaknesses:** +- Pattern-based only +- May miss obfuscated secrets +- Limited to known patterns + +### TruffleHog +**Strengths:** +- Secret verification (validates if active) +- High detection rate with entropy analysis +- Multiple detectors (600+ secret types) +- Catches high-entropy strings + +**Weaknesses:** +- Slower than Gitleaks +- Higher false positive rate +- Verification requires network calls + +### LLM Detector +**Strengths:** +- Semantic understanding of context +- Catches novel/custom secret patterns +- Can reason about what "looks like" a secret +- Multiple model options (GPT-4, Claude, etc.) +- Understands code context + +**Weaknesses:** +- Slowest (API latency + LLM processing) +- Most expensive (LLM API costs) +- Requires A2A agent infrastructure +- Accuracy varies by model +- May miss well-disguised secrets + +## Results Directory + +After running comparisons, results are saved to: +``` +benchmarks/by_category/secret_detection/results/ +├── comparison_report.md # Human-readable comparison with: +│ # - Summary table with secrets/files/avg per file/time +│ # - Agreement analysis (secrets found by N tools) +│ # - Tool agreement matrix (overlap between pairs) +│ # - Per-file detailed comparison table +│ # - File type breakdown +│ # - Files analyzed by each tool +│ # - Overlap analysis and performance summary +└── comparison_results.json # Machine-readable data with findings_by_file +``` + +## Latest Benchmark Results + +Run the benchmark to generate results: +```bash +cd backend +python benchmarks/by_category/secret_detection/compare_tools.py +``` + +Results are saved to `results/comparison_report.md` with: +- Summary table (secrets found, files scanned, time) +- Agreement analysis (how many tools found each secret) +- Tool agreement matrix (overlap between tools) +- Per-file detailed comparison +- File type breakdown + +## CI/CD Integration + +Add to your CI pipeline: + +```yaml +# .github/workflows/benchmark-secrets.yml +name: Secret Detection Benchmark + +on: + schedule: + - cron: '0 0 * * 0' # Weekly + workflow_dispatch: + +jobs: + benchmark: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: '3.11' + + - name: Install dependencies + run: | + pip install -r backend/requirements.txt + pip install pytest-benchmark + + - name: Run benchmarks + env: + GITGUARDIAN_API_KEY: ${{ secrets.GITGUARDIAN_API_KEY }} + run: | + cd backend + pytest benchmarks/by_category/secret_detection/bench_comparison.py \ + --benchmark-only \ + --benchmark-json=results.json \ + --gitguardian-api-key + + - name: Upload results + uses: actions/upload-artifact@v3 + with: + name: benchmark-results + path: backend/results.json +``` + +## Adding New Tools + +To benchmark a new secret detection tool: + +1. Create module in `toolbox/modules/secret_detection/` +2. Register in `__init__.py` +3. Add to `compare_tools.py` in `run_all_tools()` +4. Add test in `bench_comparison.py` + +## Interpreting Results + +### High Precision, Low Recall +Tool is conservative - few false positives but misses secrets. +**Use case**: Production environments where false positives are costly. + +### Low Precision, High Recall +Tool is aggressive - finds most secrets but many false positives. +**Use case**: Initial scans where manual review is acceptable. + +### Balanced (High F1) +Tool has good balance of precision and recall. +**Use case**: General purpose scanning. + +### Fast Execution +Suitable for CI/CD pipelines and pre-commit hooks. + +### Slow but Accurate +Better for comprehensive security audits. + +## Best Practices + +1. **Use multiple tools**: Each has strengths/weaknesses +2. **Combine results**: Union of all findings for maximum coverage +3. **Filter intelligently**: Remove known false positives +4. **Verify findings**: Check if secrets are actually valid +5. **Track over time**: Monitor precision/recall trends +6. **Update regularly**: Patterns evolve, tools improve + +## Troubleshooting + +### GitGuardian Tests Skipped +- Set `GITGUARDIAN_API_KEY` environment variable +- Use `--gitguardian-api-key` flag + +### LLM Tests Skipped +- Ensure A2A agent is running +- Check agent URL in config +- Use `--llm-enabled` flag + +### Low Recall +- Check if ground truth is up to date +- Verify tool is configured correctly +- Review missed secrets manually + +### High False Positives +- Adjust tool sensitivity +- Add exclusion patterns +- Review false positive list diff --git a/backend/benchmarks/by_category/secret_detection/bench_comparison.py b/backend/benchmarks/by_category/secret_detection/bench_comparison.py new file mode 100644 index 0000000..1dc040e --- /dev/null +++ b/backend/benchmarks/by_category/secret_detection/bench_comparison.py @@ -0,0 +1,285 @@ +""" +Secret Detection Tool Comparison Benchmark + +Compares Gitleaks, TruffleHog, and LLM-based detection +on the vulnerable_app ground truth dataset via workflow execution. +""" + +import pytest +import json +from pathlib import Path +from typing import Dict, List, Any +import sys + +sys.path.insert(0, str(Path(__file__).resolve().parents[3] / "sdk" / "src")) + +from fuzzforge_sdk import FuzzForgeClient +from benchmarks.category_configs import ModuleCategory, get_threshold + + +@pytest.fixture +def target_path(): + """Path to vulnerable_app""" + path = Path(__file__).parent.parent.parent.parent.parent / "test_projects" / "vulnerable_app" + assert path.exists(), f"Target not found: {path}" + return path + + +@pytest.fixture +def ground_truth(target_path): + """Load ground truth data""" + metadata_file = target_path / "SECRETS_GROUND_TRUTH.json" + assert metadata_file.exists(), f"Ground truth not found: {metadata_file}" + + with open(metadata_file) as f: + return json.load(f) + + +@pytest.fixture +def sdk_client(): + """FuzzForge SDK client""" + client = FuzzForgeClient(base_url="http://localhost:8000") + yield client + client.close() + + +def calculate_metrics(sarif_results: List[Dict], ground_truth: Dict[str, Any]) -> Dict[str, float]: + """Calculate precision, recall, and F1 score""" + + # Extract expected secrets from ground truth + expected_secrets = set() + for file_info in ground_truth["files"]: + if "secrets" in file_info: + for secret in file_info["secrets"]: + expected_secrets.add((file_info["filename"], secret["line"])) + + # Extract detected secrets from SARIF + detected_secrets = set() + for result in sarif_results: + locations = result.get("locations", []) + for location in locations: + physical_location = location.get("physicalLocation", {}) + artifact_location = physical_location.get("artifactLocation", {}) + region = physical_location.get("region", {}) + + uri = artifact_location.get("uri", "") + line = region.get("startLine", 0) + + if uri and line: + file_path = Path(uri) + filename = file_path.name + detected_secrets.add((filename, line)) + # Also try with relative path + if len(file_path.parts) > 1: + rel_path = str(Path(*file_path.parts[-2:])) + detected_secrets.add((rel_path, line)) + + # Calculate metrics + true_positives = len(expected_secrets & detected_secrets) + false_positives = len(detected_secrets - expected_secrets) + false_negatives = len(expected_secrets - detected_secrets) + + precision = true_positives / (true_positives + false_positives) if (true_positives + false_positives) > 0 else 0 + recall = true_positives / (true_positives + false_negatives) if (true_positives + false_negatives) > 0 else 0 + f1 = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0 + + return { + "precision": precision, + "recall": recall, + "f1": f1, + "true_positives": true_positives, + "false_positives": false_positives, + "false_negatives": false_negatives + } + + +class TestSecretDetectionComparison: + """Compare all secret detection tools""" + + @pytest.mark.benchmark(group="secret_detection") + def test_gitleaks_workflow(self, benchmark, sdk_client, target_path, ground_truth): + """Benchmark Gitleaks workflow accuracy and performance""" + + def run_gitleaks(): + run = sdk_client.submit_workflow_with_upload( + workflow_name="gitleaks_detection", + target_path=str(target_path), + parameters={ + "scan_mode": "detect", + "no_git": True, + "redact": False + } + ) + + result = sdk_client.wait_for_completion(run.run_id, timeout=300) + assert result.status == "completed", f"Workflow failed: {result.status}" + + findings = sdk_client.get_run_findings(run.run_id) + assert findings and findings.sarif, "No findings returned" + + return findings + + findings = benchmark(run_gitleaks) + + # Extract SARIF results + sarif_results = [] + for run_data in findings.sarif.get("runs", []): + sarif_results.extend(run_data.get("results", [])) + + # Calculate metrics + metrics = calculate_metrics(sarif_results, ground_truth) + + # Log results + print(f"\n=== Gitleaks Workflow Results ===") + print(f"Precision: {metrics['precision']:.2%}") + print(f"Recall: {metrics['recall']:.2%}") + print(f"F1 Score: {metrics['f1']:.2%}") + print(f"True Positives: {metrics['true_positives']}") + print(f"False Positives: {metrics['false_positives']}") + print(f"False Negatives: {metrics['false_negatives']}") + print(f"Findings Count: {len(sarif_results)}") + + # Assert meets thresholds + min_precision = get_threshold(ModuleCategory.SECRET_DETECTION, "min_precision") + min_recall = get_threshold(ModuleCategory.SECRET_DETECTION, "min_recall") + + assert metrics['precision'] >= min_precision, \ + f"Precision {metrics['precision']:.2%} below threshold {min_precision:.2%}" + assert metrics['recall'] >= min_recall, \ + f"Recall {metrics['recall']:.2%} below threshold {min_recall:.2%}" + + @pytest.mark.benchmark(group="secret_detection") + def test_trufflehog_workflow(self, benchmark, sdk_client, target_path, ground_truth): + """Benchmark TruffleHog workflow accuracy and performance""" + + def run_trufflehog(): + run = sdk_client.submit_workflow_with_upload( + workflow_name="trufflehog_detection", + target_path=str(target_path), + parameters={ + "verify": False, + "max_depth": 10 + } + ) + + result = sdk_client.wait_for_completion(run.run_id, timeout=300) + assert result.status == "completed", f"Workflow failed: {result.status}" + + findings = sdk_client.get_run_findings(run.run_id) + assert findings and findings.sarif, "No findings returned" + + return findings + + findings = benchmark(run_trufflehog) + + sarif_results = [] + for run_data in findings.sarif.get("runs", []): + sarif_results.extend(run_data.get("results", [])) + + metrics = calculate_metrics(sarif_results, ground_truth) + + print(f"\n=== TruffleHog Workflow Results ===") + print(f"Precision: {metrics['precision']:.2%}") + print(f"Recall: {metrics['recall']:.2%}") + print(f"F1 Score: {metrics['f1']:.2%}") + print(f"True Positives: {metrics['true_positives']}") + print(f"False Positives: {metrics['false_positives']}") + print(f"False Negatives: {metrics['false_negatives']}") + print(f"Findings Count: {len(sarif_results)}") + + min_precision = get_threshold(ModuleCategory.SECRET_DETECTION, "min_precision") + min_recall = get_threshold(ModuleCategory.SECRET_DETECTION, "min_recall") + + assert metrics['precision'] >= min_precision + assert metrics['recall'] >= min_recall + + @pytest.mark.benchmark(group="secret_detection") + @pytest.mark.parametrize("model", [ + "gpt-4o-mini", + "gpt-4o", + "claude-3-5-sonnet-20241022" + ]) + def test_llm_workflow(self, benchmark, sdk_client, target_path, ground_truth, model): + """Benchmark LLM workflow with different models""" + + def run_llm(): + provider = "openai" if "gpt" in model else "anthropic" + + run = sdk_client.submit_workflow_with_upload( + workflow_name="llm_secret_detection", + target_path=str(target_path), + parameters={ + "agent_url": "http://fuzzforge-task-agent:8000/a2a/litellm_agent", + "llm_model": model, + "llm_provider": provider, + "max_files": 20, + "timeout": 60 + } + ) + + result = sdk_client.wait_for_completion(run.run_id, timeout=300) + assert result.status == "completed", f"Workflow failed: {result.status}" + + findings = sdk_client.get_run_findings(run.run_id) + assert findings and findings.sarif, "No findings returned" + + return findings + + findings = benchmark(run_llm) + + sarif_results = [] + for run_data in findings.sarif.get("runs", []): + sarif_results.extend(run_data.get("results", [])) + + metrics = calculate_metrics(sarif_results, ground_truth) + + print(f"\n=== LLM ({model}) Workflow Results ===") + print(f"Precision: {metrics['precision']:.2%}") + print(f"Recall: {metrics['recall']:.2%}") + print(f"F1 Score: {metrics['f1']:.2%}") + print(f"True Positives: {metrics['true_positives']}") + print(f"False Positives: {metrics['false_positives']}") + print(f"False Negatives: {metrics['false_negatives']}") + print(f"Findings Count: {len(sarif_results)}") + + +class TestSecretDetectionPerformance: + """Performance benchmarks for each tool""" + + @pytest.mark.benchmark(group="secret_detection") + def test_gitleaks_performance(self, benchmark, sdk_client, target_path): + """Benchmark Gitleaks workflow execution speed""" + + def run(): + run = sdk_client.submit_workflow_with_upload( + workflow_name="gitleaks_detection", + target_path=str(target_path), + parameters={"scan_mode": "detect", "no_git": True} + ) + result = sdk_client.wait_for_completion(run.run_id, timeout=300) + return result + + result = benchmark(run) + + max_time = get_threshold(ModuleCategory.SECRET_DETECTION, "max_execution_time_small") + # Note: Workflow execution time includes orchestration overhead + # so we allow 2x the module threshold + assert result.execution_time < max_time * 2 + + @pytest.mark.benchmark(group="secret_detection") + def test_trufflehog_performance(self, benchmark, sdk_client, target_path): + """Benchmark TruffleHog workflow execution speed""" + + def run(): + run = sdk_client.submit_workflow_with_upload( + workflow_name="trufflehog_detection", + target_path=str(target_path), + parameters={"verify": False} + ) + result = sdk_client.wait_for_completion(run.run_id, timeout=300) + return result + + result = benchmark(run) + + max_time = get_threshold(ModuleCategory.SECRET_DETECTION, "max_execution_time_small") + assert result.execution_time < max_time * 2 diff --git a/backend/benchmarks/by_category/secret_detection/compare_tools.py b/backend/benchmarks/by_category/secret_detection/compare_tools.py new file mode 100644 index 0000000..ae03c99 --- /dev/null +++ b/backend/benchmarks/by_category/secret_detection/compare_tools.py @@ -0,0 +1,547 @@ +""" +Secret Detection Tools Comparison Report Generator + +Generates comparison reports showing strengths/weaknesses of each tool. +Uses workflow execution via SDK to test complete pipeline. +""" + +import asyncio +import json +import time +from pathlib import Path +from typing import Dict, List, Any, Optional +from dataclasses import dataclass, asdict +import sys + +sys.path.insert(0, str(Path(__file__).resolve().parents[3] / "sdk" / "src")) + +from fuzzforge_sdk import FuzzForgeClient + + +@dataclass +class ToolResult: + """Results from running a tool""" + tool_name: str + execution_time: float + findings_count: int + findings_by_file: Dict[str, List[int]] # file_path -> [line_numbers] + unique_files: int + unique_locations: int # unique (file, line) pairs + secret_density: float # average secrets per file + file_types: Dict[str, int] # file extension -> count of files with secrets + + +class SecretDetectionComparison: + """Compare secret detection tools""" + + def __init__(self, target_path: Path, api_url: str = "http://localhost:8000"): + self.target_path = target_path + self.client = FuzzForgeClient(base_url=api_url) + + async def run_workflow(self, workflow_name: str, tool_name: str, config: Dict[str, Any] = None) -> Optional[ToolResult]: + """Run a workflow and extract findings""" + print(f"\nRunning {tool_name} workflow...") + + start_time = time.time() + + try: + # Start workflow + run = self.client.submit_workflow_with_upload( + workflow_name=workflow_name, + target_path=str(self.target_path), + parameters=config or {} + ) + + print(f" Started run: {run.run_id}") + + # Wait for completion (up to 30 minutes for slow LLMs) + print(f" Waiting for completion...") + result = self.client.wait_for_completion(run.run_id, timeout=1800) + + execution_time = time.time() - start_time + + if result.status != "COMPLETED": + print(f"❌ {tool_name} workflow failed: {result.status}") + return None + + # Get findings from SARIF + findings = self.client.get_run_findings(run.run_id) + + if not findings or not findings.sarif: + print(f"⚠️ {tool_name} produced no findings") + return None + + # Extract results from SARIF and group by file + findings_by_file = {} + unique_locations = set() + + for run_data in findings.sarif.get("runs", []): + for result in run_data.get("results", []): + locations = result.get("locations", []) + for location in locations: + physical_location = location.get("physicalLocation", {}) + artifact_location = physical_location.get("artifactLocation", {}) + region = physical_location.get("region", {}) + + uri = artifact_location.get("uri", "") + line = region.get("startLine", 0) + + if uri and line: + if uri not in findings_by_file: + findings_by_file[uri] = [] + findings_by_file[uri].append(line) + unique_locations.add((uri, line)) + + # Sort line numbers for each file + for file_path in findings_by_file: + findings_by_file[file_path] = sorted(set(findings_by_file[file_path])) + + # Calculate file type distribution + file_types = {} + for file_path in findings_by_file: + ext = Path(file_path).suffix or Path(file_path).name # Use full name for files like .env + if ext.startswith('.'): + file_types[ext] = file_types.get(ext, 0) + 1 + else: + file_types['[no extension]'] = file_types.get('[no extension]', 0) + 1 + + # Calculate secret density + secret_density = len(unique_locations) / len(findings_by_file) if findings_by_file else 0 + + print(f" ✓ Found {len(unique_locations)} secrets in {len(findings_by_file)} files (avg {secret_density:.1f} per file)") + + return ToolResult( + tool_name=tool_name, + execution_time=execution_time, + findings_count=len(unique_locations), + findings_by_file=findings_by_file, + unique_files=len(findings_by_file), + unique_locations=len(unique_locations), + secret_density=secret_density, + file_types=file_types + ) + + except Exception as e: + print(f"❌ {tool_name} error: {e}") + return None + + + async def run_all_tools(self, llm_models: List[str] = None) -> List[ToolResult]: + """Run all available tools""" + results = [] + + if llm_models is None: + llm_models = ["gpt-4o-mini"] + + # Gitleaks + result = await self.run_workflow("gitleaks_detection", "Gitleaks", { + "scan_mode": "detect", + "no_git": True, + "redact": False + }) + if result: + results.append(result) + + # TruffleHog + result = await self.run_workflow("trufflehog_detection", "TruffleHog", { + "verify": False, + "max_depth": 10 + }) + if result: + results.append(result) + + # LLM Detector with multiple models + for model in llm_models: + tool_name = f"LLM ({model})" + result = await self.run_workflow("llm_secret_detection", tool_name, { + "agent_url": "http://fuzzforge-task-agent:8000/a2a/litellm_agent", + "llm_model": model, + "llm_provider": "openai" if "gpt" in model else "anthropic", + "max_files": 20, + "timeout": 60, + "file_patterns": [ + "*.py", "*.js", "*.ts", "*.java", "*.go", "*.env", "*.yaml", "*.yml", + "*.json", "*.xml", "*.ini", "*.sql", "*.properties", "*.sh", "*.bat", + "*.config", "*.conf", "*.toml", "*id_rsa*", "*.txt" + ] + }) + if result: + results.append(result) + + return results + + def _calculate_agreement_matrix(self, results: List[ToolResult]) -> Dict[str, Dict[str, int]]: + """Calculate overlap matrix showing common secrets between tool pairs""" + matrix = {} + + for i, result1 in enumerate(results): + matrix[result1.tool_name] = {} + # Convert to set of (file, line) tuples + secrets1 = set() + for file_path, lines in result1.findings_by_file.items(): + for line in lines: + secrets1.add((file_path, line)) + + for result2 in results: + secrets2 = set() + for file_path, lines in result2.findings_by_file.items(): + for line in lines: + secrets2.add((file_path, line)) + + # Count common secrets + common = len(secrets1 & secrets2) + matrix[result1.tool_name][result2.tool_name] = common + + return matrix + + def _get_per_file_comparison(self, results: List[ToolResult]) -> Dict[str, Dict[str, int]]: + """Get per-file breakdown of findings across all tools""" + all_files = set() + for result in results: + all_files.update(result.findings_by_file.keys()) + + comparison = {} + for file_path in sorted(all_files): + comparison[file_path] = {} + for result in results: + comparison[file_path][result.tool_name] = len(result.findings_by_file.get(file_path, [])) + + return comparison + + def _get_agreement_stats(self, results: List[ToolResult]) -> Dict[int, int]: + """Calculate how many secrets are found by 1, 2, 3, or all tools""" + # Collect all unique (file, line) pairs across all tools + all_secrets = {} # (file, line) -> list of tools that found it + + for result in results: + for file_path, lines in result.findings_by_file.items(): + for line in lines: + key = (file_path, line) + if key not in all_secrets: + all_secrets[key] = [] + all_secrets[key].append(result.tool_name) + + # Count by number of tools + agreement_counts = {} + for secret, tools in all_secrets.items(): + count = len(set(tools)) # Unique tools + agreement_counts[count] = agreement_counts.get(count, 0) + 1 + + return agreement_counts + + def generate_markdown_report(self, results: List[ToolResult]) -> str: + """Generate markdown comparison report""" + report = [] + report.append("# Secret Detection Tools Comparison\n") + report.append(f"**Target**: {self.target_path.name}") + report.append(f"**Tools**: {', '.join([r.tool_name for r in results])}\n") + + # Summary table with extended metrics + report.append("\n## Summary\n") + report.append("| Tool | Secrets | Files | Avg/File | Time (s) |") + report.append("|------|---------|-------|----------|----------|") + + for result in results: + report.append( + f"| {result.tool_name} | " + f"{result.findings_count} | " + f"{result.unique_files} | " + f"{result.secret_density:.1f} | " + f"{result.execution_time:.2f} |" + ) + + # Agreement Analysis + agreement_stats = self._get_agreement_stats(results) + report.append("\n## Agreement Analysis\n") + report.append("Secrets found by different numbers of tools:\n") + for num_tools in sorted(agreement_stats.keys(), reverse=True): + count = agreement_stats[num_tools] + if num_tools == len(results): + report.append(f"- **All {num_tools} tools agree**: {count} secrets") + elif num_tools == 1: + report.append(f"- **Only 1 tool found**: {count} secrets") + else: + report.append(f"- **{num_tools} tools agree**: {count} secrets") + + # Agreement Matrix + agreement_matrix = self._calculate_agreement_matrix(results) + report.append("\n## Tool Agreement Matrix\n") + report.append("Number of common secrets found by tool pairs:\n") + + # Header row + header = "| Tool |" + separator = "|------|" + for result in results: + short_name = result.tool_name.replace("LLM (", "").replace(")", "") + header += f" {short_name} |" + separator += "------|" + report.append(header) + report.append(separator) + + # Data rows + for result in results: + short_name = result.tool_name.replace("LLM (", "").replace(")", "") + row = f"| {short_name} |" + for result2 in results: + count = agreement_matrix[result.tool_name][result2.tool_name] + row += f" {count} |" + report.append(row) + + # Per-File Comparison + per_file = self._get_per_file_comparison(results) + report.append("\n## Per-File Detailed Comparison\n") + report.append("Secrets found per file by each tool:\n") + + # Header + header = "| File |" + separator = "|------|" + for result in results: + short_name = result.tool_name.replace("LLM (", "").replace(")", "") + header += f" {short_name} |" + separator += "------|" + header += " Total |" + separator += "------|" + report.append(header) + report.append(separator) + + # Show top 15 files by total findings + file_totals = [(f, sum(counts.values())) for f, counts in per_file.items()] + file_totals.sort(key=lambda x: x[1], reverse=True) + + for file_path, total in file_totals[:15]: + row = f"| `{file_path}` |" + for result in results: + count = per_file[file_path].get(result.tool_name, 0) + row += f" {count} |" + row += f" **{total}** |" + report.append(row) + + if len(file_totals) > 15: + report.append(f"| ... and {len(file_totals) - 15} more files | ... | ... | ... | ... | ... |") + + # File Type Breakdown + report.append("\n## File Type Breakdown\n") + all_extensions = set() + for result in results: + all_extensions.update(result.file_types.keys()) + + if all_extensions: + header = "| Type |" + separator = "|------|" + for result in results: + short_name = result.tool_name.replace("LLM (", "").replace(")", "") + header += f" {short_name} |" + separator += "------|" + report.append(header) + report.append(separator) + + for ext in sorted(all_extensions): + row = f"| `{ext}` |" + for result in results: + count = result.file_types.get(ext, 0) + row += f" {count} files |" + report.append(row) + + # File analysis + report.append("\n## Files Analyzed\n") + + # Collect all unique files across all tools + all_files = set() + for result in results: + all_files.update(result.findings_by_file.keys()) + + report.append(f"**Total unique files with secrets**: {len(all_files)}\n") + + for result in results: + report.append(f"\n### {result.tool_name}\n") + report.append(f"Found secrets in **{result.unique_files} files**:\n") + + # Sort files by number of findings (descending) + sorted_files = sorted( + result.findings_by_file.items(), + key=lambda x: len(x[1]), + reverse=True + ) + + # Show top 10 files + for file_path, lines in sorted_files[:10]: + report.append(f"- `{file_path}`: {len(lines)} secrets (lines: {', '.join(map(str, lines[:5]))}{'...' if len(lines) > 5 else ''})") + + if len(sorted_files) > 10: + report.append(f"- ... and {len(sorted_files) - 10} more files") + + # Overlap analysis + if len(results) >= 2: + report.append("\n## Overlap Analysis\n") + + # Find common files + file_sets = [set(r.findings_by_file.keys()) for r in results] + common_files = set.intersection(*file_sets) if file_sets else set() + + if common_files: + report.append(f"\n**Files found by all tools** ({len(common_files)}):\n") + for file_path in sorted(common_files)[:10]: + report.append(f"- `{file_path}`") + else: + report.append("\n**No files were found by all tools**\n") + + # Find tool-specific files + for i, result in enumerate(results): + unique_to_tool = set(result.findings_by_file.keys()) + for j, other_result in enumerate(results): + if i != j: + unique_to_tool -= set(other_result.findings_by_file.keys()) + + if unique_to_tool: + report.append(f"\n**Unique to {result.tool_name}** ({len(unique_to_tool)} files):\n") + for file_path in sorted(unique_to_tool)[:5]: + report.append(f"- `{file_path}`") + if len(unique_to_tool) > 5: + report.append(f"- ... and {len(unique_to_tool) - 5} more") + + # Ground Truth Analysis (if available) + ground_truth_path = Path(__file__).parent / "secret_detection_benchmark_GROUND_TRUTH.json" + if ground_truth_path.exists(): + report.append("\n## Ground Truth Analysis\n") + try: + with open(ground_truth_path) as f: + gt_data = json.load(f) + + gt_total = gt_data.get("total_secrets", 30) + report.append(f"**Expected secrets**: {gt_total} (documented in ground truth)\n") + + # Build ground truth set of (file, line) tuples + gt_secrets = set() + for secret in gt_data.get("secrets", []): + gt_secrets.add((secret["file"], secret["line"])) + + report.append("### Tool Performance vs Ground Truth\n") + report.append("| Tool | Found | Expected | Recall | Extra Findings |") + report.append("|------|-------|----------|--------|----------------|") + + for result in results: + # Build tool findings set + tool_secrets = set() + for file_path, lines in result.findings_by_file.items(): + for line in lines: + tool_secrets.add((file_path, line)) + + # Calculate metrics + true_positives = len(gt_secrets & tool_secrets) + recall = (true_positives / gt_total * 100) if gt_total > 0 else 0 + extra = len(tool_secrets - gt_secrets) + + report.append( + f"| {result.tool_name} | " + f"{result.findings_count} | " + f"{gt_total} | " + f"{recall:.1f}% | " + f"{extra} |" + ) + + # Analyze LLM extra findings + llm_results = [r for r in results if "LLM" in r.tool_name] + if llm_results: + report.append("\n### LLM Extra Findings Explanation\n") + report.append("LLMs may find more than 30 secrets because they detect:\n") + report.append("- **Split secret components**: Each part of `DB_PASS_PART1 + PART2 + PART3` counted separately") + report.append("- **Join operations**: Lines like `''.join(AWS_SECRET_CHARS)` flagged as additional exposure") + report.append("- **Decoding functions**: Code that reveals secrets (e.g., `base64.b64decode()`, `codecs.decode()`)") + report.append("- **Comment identifiers**: Lines marking secret locations without plaintext values") + report.append("\nThese are *technically correct* detections of secret exposure points, not false positives.") + report.append("The ground truth documents 30 'primary' secrets, but the codebase has additional derivative exposures.\n") + + except Exception as e: + report.append(f"*Could not load ground truth: {e}*\n") + + # Performance summary + if results: + report.append("\n## Performance Summary\n") + most_findings = max(results, key=lambda r: r.findings_count) + most_files = max(results, key=lambda r: r.unique_files) + fastest = min(results, key=lambda r: r.execution_time) + + report.append(f"- **Most secrets found**: {most_findings.tool_name} ({most_findings.findings_count} secrets)") + report.append(f"- **Most files covered**: {most_files.tool_name} ({most_files.unique_files} files)") + report.append(f"- **Fastest**: {fastest.tool_name} ({fastest.execution_time:.2f}s)") + + return "\n".join(report) + + def save_json_report(self, results: List[ToolResult], output_path: Path): + """Save results as JSON""" + data = { + "target_path": str(self.target_path), + "results": [asdict(r) for r in results] + } + + with open(output_path, 'w') as f: + json.dump(data, f, indent=2) + + print(f"\n✅ JSON report saved to: {output_path}") + + def cleanup(self): + """Cleanup SDK client""" + self.client.close() + + +async def main(): + """Run comparison and generate reports""" + # Get target path (secret_detection_benchmark) + target_path = Path(__file__).parent.parent.parent.parent.parent / "test_projects" / "secret_detection_benchmark" + + if not target_path.exists(): + print(f"❌ Target not found at: {target_path}") + return 1 + + print("=" * 80) + print("Secret Detection Tools Comparison") + print("=" * 80) + print(f"Target: {target_path}") + + # LLM models to test + llm_models = [ + "gpt-4o-mini", + "gpt-5-mini" + ] + print(f"LLM models: {', '.join(llm_models)}\n") + + # Run comparison + comparison = SecretDetectionComparison(target_path) + + try: + results = await comparison.run_all_tools(llm_models=llm_models) + + if not results: + print("❌ No tools ran successfully") + return 1 + + # Generate reports + print("\n" + "=" * 80) + markdown_report = comparison.generate_markdown_report(results) + print(markdown_report) + + # Save reports + output_dir = Path(__file__).parent / "results" + output_dir.mkdir(exist_ok=True) + + markdown_path = output_dir / "comparison_report.md" + with open(markdown_path, 'w') as f: + f.write(markdown_report) + print(f"\n✅ Markdown report saved to: {markdown_path}") + + json_path = output_dir / "comparison_results.json" + comparison.save_json_report(results, json_path) + + print("\n" + "=" * 80) + print("✅ Comparison complete!") + print("=" * 80) + + return 0 + + finally: + comparison.cleanup() + + +if __name__ == "__main__": + exit_code = asyncio.run(main()) + sys.exit(exit_code) diff --git a/backend/toolbox/modules/secret_detection/__init__.py b/backend/toolbox/modules/secret_detection/__init__.py index fa66d4e..e3fc98e 100644 --- a/backend/toolbox/modules/secret_detection/__init__.py +++ b/backend/toolbox/modules/secret_detection/__init__.py @@ -7,6 +7,8 @@ in codebases and repositories. Available modules: - TruffleHog: Comprehensive secret detection with verification - Gitleaks: Git-specific secret scanning and leak detection +- GitGuardian: Enterprise secret detection using GitGuardian API +- LLM Secret Detector: AI-powered semantic secret detection """ # Copyright (c) 2025 FuzzingLabs # diff --git a/backend/toolbox/modules/secret_detection/gitleaks.py b/backend/toolbox/modules/secret_detection/gitleaks.py index 5bf2716..7005236 100644 --- a/backend/toolbox/modules/secret_detection/gitleaks.py +++ b/backend/toolbox/modules/secret_detection/gitleaks.py @@ -248,7 +248,8 @@ class GitleaksModule(BaseModule): rule_id = result.get("RuleID", "unknown") description = result.get("Description", "") file_path = result.get("File", "") - line_number = result.get("LineNumber", 0) + line_number = result.get("StartLine", 0) # Gitleaks outputs "StartLine", not "LineNumber" + line_end = result.get("EndLine", 0) secret = result.get("Secret", "") match_text = result.get("Match", "") @@ -278,6 +279,7 @@ class GitleaksModule(BaseModule): category="secret_leak", file_path=file_path if file_path else None, line_start=line_number if line_number > 0 else None, + line_end=line_end if line_end > 0 else None, code_snippet=match_text if match_text else secret, recommendation=self._get_leak_recommendation(rule_id), metadata={ diff --git a/backend/toolbox/modules/secret_detection/llm_secret_detector.py b/backend/toolbox/modules/secret_detection/llm_secret_detector.py new file mode 100644 index 0000000..3ba96f8 --- /dev/null +++ b/backend/toolbox/modules/secret_detection/llm_secret_detector.py @@ -0,0 +1,397 @@ +""" +LLM Secret Detection Module + +This module uses an LLM to detect secrets and sensitive information via semantic understanding. +""" +# Copyright (c) 2025 FuzzingLabs +# +# Licensed under the Business Source License 1.1 (BSL). See the LICENSE file +# at the root of this repository for details. +# +# After the Change Date (four years from publication), this version of the +# Licensed Work will be made available under the Apache License, Version 2.0. +# See the LICENSE-APACHE file or http://www.apache.org/licenses/LICENSE-2.0 +# +# Additional attribution and requirements are provided in the NOTICE file. + + +import logging +from pathlib import Path +from typing import Dict, Any, List + +from ..base import BaseModule, ModuleMetadata, ModuleFinding, ModuleResult +from . import register_module + +logger = logging.getLogger(__name__) + + +@register_module +class LLMSecretDetectorModule(BaseModule): + """ + LLM-based secret detection module using AI semantic analysis. + + Uses an LLM agent to identify secrets through natural language understanding, + potentially catching secrets that pattern-based tools miss. + """ + + def get_metadata(self) -> ModuleMetadata: + """Get module metadata""" + return ModuleMetadata( + name="llm_secret_detector", + version="1.0.0", + description="AI-powered secret detection using LLM semantic analysis", + author="FuzzForge Team", + category="secret_detection", + tags=["secrets", "llm", "ai", "semantic"], + input_schema={ + "type": "object", + "properties": { + "agent_url": { + "type": "string", + "default": "http://fuzzforge-task-agent:8000/a2a/litellm_agent", + "description": "A2A agent endpoint URL" + }, + "llm_model": { + "type": "string", + "default": "gpt-4o-mini", + "description": "LLM model to use" + }, + "llm_provider": { + "type": "string", + "default": "openai", + "description": "LLM provider (openai, anthropic, etc.)" + }, + "file_patterns": { + "type": "array", + "items": {"type": "string"}, + "default": ["*.py", "*.js", "*.ts", "*.java", "*.go", "*.env", "*.yaml", "*.yml", "*.json", "*.xml", "*.ini", "*.sql", "*.properties", "*.sh", "*.bat", "*.config", "*.conf", "*.toml", "*id_rsa*"], + "description": "File patterns to analyze" + }, + "max_files": { + "type": "integer", + "default": 20, + "description": "Maximum number of files to analyze" + }, + "max_file_size": { + "type": "integer", + "default": 30000, + "description": "Maximum file size in bytes (30KB default)" + }, + "timeout": { + "type": "integer", + "default": 45, + "description": "Timeout per file in seconds" + } + }, + "required": [] + }, + output_schema={ + "type": "object", + "properties": { + "findings": { + "type": "array", + "description": "Secrets identified by LLM" + } + } + } + ) + + def validate_config(self, config: Dict[str, Any]) -> bool: + """Validate module configuration""" + # Lazy import to avoid Temporal sandbox restrictions + try: + from fuzzforge_ai.a2a_wrapper import send_agent_task # noqa: F401 + except ImportError: + raise RuntimeError( + "A2A wrapper not available. Ensure fuzzforge_ai module is accessible." + ) + + agent_url = config.get("agent_url") + if not agent_url or not isinstance(agent_url, str): + raise ValueError("agent_url must be a valid URL string") + + max_files = config.get("max_files", 20) + if not isinstance(max_files, int) or max_files <= 0: + raise ValueError("max_files must be a positive integer") + + return True + + async def execute(self, config: Dict[str, Any], workspace: Path) -> ModuleResult: + """ + Execute LLM-based secret detection. + + Args: + config: Module configuration + workspace: Path to the workspace containing code to analyze + + Returns: + ModuleResult with secrets detected by LLM + """ + self.start_timer() + + logger.info(f"Starting LLM secret detection in workspace: {workspace}") + + # Extract configuration + agent_url = config.get("agent_url", "http://fuzzforge-task-agent:8000/a2a/litellm_agent") + llm_model = config.get("llm_model", "gpt-4o-mini") + llm_provider = config.get("llm_provider", "openai") + file_patterns = config.get("file_patterns", ["*.py", "*.js", "*.ts", "*.java", "*.go", "*.env", "*.yaml", "*.yml", "*.json", "*.xml", "*.ini", "*.sql", "*.properties", "*.sh", "*.bat", "*.config", "*.conf", "*.toml", "*id_rsa*", "*.txt"]) + max_files = config.get("max_files", 20) + max_file_size = config.get("max_file_size", 30000) + timeout = config.get("timeout", 30) # Reduced from 45s + + # Find files to analyze + # Skip files that are unlikely to contain secrets + skip_patterns = ['*.sarif', '*.md', '*.html', '*.css', '*.db', '*.sqlite'] + + files_to_analyze = [] + for pattern in file_patterns: + for file_path in workspace.rglob(pattern): + if file_path.is_file(): + try: + # Skip unlikely files + if any(file_path.match(skip) for skip in skip_patterns): + logger.debug(f"Skipping {file_path.name} (unlikely to have secrets)") + continue + + # Check file size + if file_path.stat().st_size > max_file_size: + logger.debug(f"Skipping {file_path} (too large)") + continue + + files_to_analyze.append(file_path) + + if len(files_to_analyze) >= max_files: + break + except Exception as e: + logger.warning(f"Error checking file {file_path}: {e}") + continue + + if len(files_to_analyze) >= max_files: + break + + logger.info(f"Found {len(files_to_analyze)} files to analyze for secrets") + + # Analyze each file with LLM + all_findings = [] + for file_path in files_to_analyze: + logger.info(f"Analyzing: {file_path.relative_to(workspace)}") + + try: + findings = await self._analyze_file_for_secrets( + file_path=file_path, + workspace=workspace, + agent_url=agent_url, + llm_model=llm_model, + llm_provider=llm_provider, + timeout=timeout + ) + all_findings.extend(findings) + + except Exception as e: + logger.error(f"Error analyzing {file_path}: {e}") + # Continue with next file + continue + + logger.info(f"LLM secret detection complete. Found {len(all_findings)} potential secrets.") + + # Create result + return self.create_result( + findings=all_findings, + status="success", + summary={ + "files_analyzed": len(files_to_analyze), + "total_secrets": len(all_findings), + "agent_url": agent_url, + "model": f"{llm_provider}/{llm_model}" + } + ) + + async def _analyze_file_for_secrets( + self, + file_path: Path, + workspace: Path, + agent_url: str, + llm_model: str, + llm_provider: str, + timeout: int + ) -> List[ModuleFinding]: + """Analyze a single file for secrets using LLM""" + + # Read file content + try: + with open(file_path, 'r', encoding='utf-8') as f: + code_content = f.read() + except Exception as e: + logger.error(f"Failed to read {file_path}: {e}") + return [] + + # Build specialized prompt for secret detection + system_prompt = ( + "You are a security expert specialized in detecting secrets and credentials in code. " + "Your job is to find REAL secrets that could be exploited. Be thorough and aggressive.\n\n" + "For each secret found, respond in this exact format:\n" + "SECRET_FOUND: [type like 'AWS Key', 'GitHub Token', 'Database Password']\n" + "SEVERITY: [critical/high/medium/low]\n" + "LINE: [exact line number]\n" + "CONFIDENCE: [high/medium/low]\n" + "DESCRIPTION: [brief explanation]\n\n" + "EXAMPLES of secrets to find:\n" + "1. API Keys: 'AKIA...', 'ghp_...', 'sk_live_...', 'SG.'\n" + "2. Tokens: Bearer tokens, OAuth tokens, JWT secrets\n" + "3. Passwords: Database passwords, admin passwords in configs\n" + "4. Connection Strings: mongodb://, postgres://, redis:// with credentials\n" + "5. Private Keys: -----BEGIN PRIVATE KEY-----, -----BEGIN RSA PRIVATE KEY-----\n" + "6. Cloud Credentials: AWS keys, GCP keys, Azure keys\n" + "7. Encryption Keys: AES keys, secret keys in config\n" + "8. Webhook URLs: URLs with tokens like hooks.slack.com/services/...\n\n" + "FIND EVERYTHING that looks like a real credential, password, key, or token.\n" + "DO NOT be overly cautious. Report anything suspicious.\n\n" + "If absolutely no secrets exist, respond with 'NO_SECRETS_FOUND'." + ) + + user_message = ( + f"Analyze this code for secrets and credentials:\n\n" + f"File: {file_path.relative_to(workspace)}\n\n" + f"```\n{code_content}\n```" + ) + + # Call LLM via A2A wrapper + try: + from fuzzforge_ai.a2a_wrapper import send_agent_task + + result = await send_agent_task( + url=agent_url, + model=llm_model, + provider=llm_provider, + prompt=system_prompt, + message=user_message, + context=f"secret_detection_{file_path.stem}", + timeout=float(timeout) + ) + + llm_response = result.text + + # Debug: Log LLM response + logger.debug(f"LLM response for {file_path.name}: {llm_response[:200]}...") + + except Exception as e: + logger.error(f"A2A call failed for {file_path}: {e}") + return [] + + # Parse LLM response into findings + findings = self._parse_llm_response( + llm_response=llm_response, + file_path=file_path, + workspace=workspace + ) + + if findings: + logger.info(f"Found {len(findings)} secrets in {file_path.name}") + else: + logger.debug(f"No secrets found in {file_path.name}. Response: {llm_response[:500]}") + + return findings + + def _parse_llm_response( + self, + llm_response: str, + file_path: Path, + workspace: Path + ) -> List[ModuleFinding]: + """Parse LLM response into structured findings""" + + if "NO_SECRETS_FOUND" in llm_response: + return [] + + findings = [] + relative_path = str(file_path.relative_to(workspace)) + + # Simple parser for the expected format + lines = llm_response.split('\n') + current_secret = {} + + for line in lines: + line = line.strip() + + if line.startswith("SECRET_FOUND:"): + # Save previous secret if exists + if current_secret: + findings.append(self._create_secret_finding(current_secret, relative_path)) + current_secret = {"type": line.replace("SECRET_FOUND:", "").strip()} + + elif line.startswith("SEVERITY:"): + severity = line.replace("SEVERITY:", "").strip().lower() + current_secret["severity"] = severity + + elif line.startswith("LINE:"): + line_num = line.replace("LINE:", "").strip() + try: + current_secret["line"] = int(line_num) + except ValueError: + current_secret["line"] = None + + elif line.startswith("CONFIDENCE:"): + confidence = line.replace("CONFIDENCE:", "").strip().lower() + current_secret["confidence"] = confidence + + elif line.startswith("DESCRIPTION:"): + current_secret["description"] = line.replace("DESCRIPTION:", "").strip() + + # Save last secret + if current_secret: + findings.append(self._create_secret_finding(current_secret, relative_path)) + + return findings + + def _create_secret_finding(self, secret: Dict[str, Any], file_path: str) -> ModuleFinding: + """Create a ModuleFinding from parsed secret""" + + severity_map = { + "critical": "critical", + "high": "high", + "medium": "medium", + "low": "low" + } + + severity = severity_map.get(secret.get("severity", "medium"), "medium") + confidence = secret.get("confidence", "medium") + + # Adjust severity based on confidence + if confidence == "low" and severity == "critical": + severity = "high" + elif confidence == "low" and severity == "high": + severity = "medium" + + # Create finding + title = f"LLM detected secret: {secret.get('type', 'Unknown secret')}" + description = secret.get("description", "An LLM identified this as a potential secret.") + description += f"\n\nConfidence: {confidence}" + + return self.create_finding( + title=title, + description=description, + severity=severity, + category="secret_detection", + file_path=file_path, + line_start=secret.get("line"), + recommendation=self._get_secret_recommendation(secret.get("type", "")), + metadata={ + "tool": "llm-secret-detector", + "secret_type": secret.get("type", "unknown"), + "confidence": confidence, + "detection_method": "semantic-analysis" + } + ) + + def _get_secret_recommendation(self, secret_type: str) -> str: + """Get remediation recommendation for detected secret""" + return ( + f"A potential {secret_type} was detected by AI analysis. " + f"Verify whether this is a real secret or a false positive. " + f"If real: (1) Revoke the credential immediately, " + f"(2) Remove from codebase and Git history, " + f"(3) Rotate to a new secret, " + f"(4) Use secret management tools for storage. " + f"Implement pre-commit hooks to prevent future leaks." + ) diff --git a/backend/toolbox/modules/secret_detection/trufflehog.py b/backend/toolbox/modules/secret_detection/trufflehog.py index 733482e..6c68e99 100644 --- a/backend/toolbox/modules/secret_detection/trufflehog.py +++ b/backend/toolbox/modules/secret_detection/trufflehog.py @@ -61,11 +61,6 @@ class TruffleHogModule(BaseModule): "items": {"type": "string"}, "description": "Specific detectors to exclude" }, - "max_depth": { - "type": "integer", - "default": 10, - "description": "Maximum directory depth to scan" - }, "concurrency": { "type": "integer", "default": 10, @@ -100,11 +95,6 @@ class TruffleHogModule(BaseModule): if not isinstance(concurrency, int) or concurrency < 1 or concurrency > 50: raise ValueError("Concurrency must be between 1 and 50") - # Check max_depth bounds - max_depth = config.get("max_depth", 10) - if not isinstance(max_depth, int) or max_depth < 1 or max_depth > 20: - raise ValueError("Max depth must be between 1 and 20") - return True async def execute(self, config: Dict[str, Any], workspace: Path) -> ModuleResult: @@ -124,6 +114,9 @@ class TruffleHogModule(BaseModule): # Add verification flag if config.get("verify", False): cmd.append("--verify") + else: + # Explicitly disable verification to get all unverified secrets + cmd.append("--no-verification") # Add JSON output cmd.extend(["--json", "--no-update"]) @@ -131,9 +124,6 @@ class TruffleHogModule(BaseModule): # Add concurrency cmd.extend(["--concurrency", str(config.get("concurrency", 10))]) - # Add max depth - cmd.extend(["--max-depth", str(config.get("max_depth", 10))]) - # Add include/exclude detectors if config.get("include_detectors"): cmd.extend(["--include-detectors", ",".join(config["include_detectors"])]) diff --git a/backend/toolbox/workflows/gitleaks_detection/__init__.py b/backend/toolbox/workflows/gitleaks_detection/__init__.py new file mode 100644 index 0000000..e192e0e --- /dev/null +++ b/backend/toolbox/workflows/gitleaks_detection/__init__.py @@ -0,0 +1,19 @@ +""" +Gitleaks Detection Workflow +""" + +# Copyright (c) 2025 FuzzingLabs +# +# Licensed under the Business Source License 1.1 (BSL). See the LICENSE file +# at the root of this repository for details. +# +# After the Change Date (four years from publication), this version of the +# Licensed Work will be made available under the Apache License, Version 2.0. +# See the LICENSE-APACHE file or http://www.apache.org/licenses/LICENSE-2.0 +# +# Additional attribution and requirements are provided in the NOTICE file. + +from .workflow import GitleaksDetectionWorkflow +from .activities import scan_with_gitleaks + +__all__ = ["GitleaksDetectionWorkflow", "scan_with_gitleaks"] diff --git a/backend/toolbox/workflows/gitleaks_detection/activities.py b/backend/toolbox/workflows/gitleaks_detection/activities.py new file mode 100644 index 0000000..c7273a3 --- /dev/null +++ b/backend/toolbox/workflows/gitleaks_detection/activities.py @@ -0,0 +1,166 @@ +""" +Gitleaks Detection Workflow Activities +""" + +# Copyright (c) 2025 FuzzingLabs +# +# Licensed under the Business Source License 1.1 (BSL). See the LICENSE file +# at the root of this repository for details. +# +# After the Change Date (four years from publication), this version of the +# Licensed Work will be made available under the Apache License, Version 2.0. +# See the LICENSE-APACHE file or http://www.apache.org/licenses/LICENSE-2.0 +# +# Additional attribution and requirements are provided in the NOTICE file. + +import logging +from pathlib import Path +from typing import Dict, Any + +from temporalio import activity + +try: + from toolbox.modules.secret_detection.gitleaks import GitleaksModule +except ImportError: + try: + from modules.secret_detection.gitleaks import GitleaksModule + except ImportError: + from src.toolbox.modules.secret_detection.gitleaks import GitleaksModule + +logger = logging.getLogger(__name__) + + +@activity.defn(name="scan_with_gitleaks") +async def scan_with_gitleaks(target_path: str, config: Dict[str, Any]) -> Dict[str, Any]: + """ + Scan code using Gitleaks. + + Args: + target_path: Path to the workspace containing code + config: Gitleaks configuration + + Returns: + Dictionary containing findings and summary + """ + activity.logger.info(f"Starting Gitleaks scan: {target_path}") + activity.logger.info(f"Config: {config}") + + workspace = Path(target_path) + + if not workspace.exists(): + raise FileNotFoundError(f"Workspace not found: {target_path}") + + # Create and execute Gitleaks module + gitleaks = GitleaksModule() + + # Validate configuration + gitleaks.validate_config(config) + + # Execute scan + result = await gitleaks.execute(config, workspace) + + if result.status == "failed": + raise RuntimeError(f"Gitleaks scan failed: {result.error or 'Unknown error'}") + + activity.logger.info( + f"Gitleaks scan completed: {len(result.findings)} findings from " + f"{result.summary.get('files_scanned', 0)} files" + ) + + # Convert ModuleFinding objects to dicts for serialization + findings_dicts = [finding.model_dump() for finding in result.findings] + + return { + "findings": findings_dicts, + "summary": result.summary + } + + +@activity.defn(name="gitleaks_generate_sarif") +async def gitleaks_generate_sarif(findings: list, metadata: Dict[str, Any]) -> Dict[str, Any]: + """ + Generate SARIF report from Gitleaks findings. + + Args: + findings: List of finding dictionaries + metadata: Metadata including tool_name, tool_version, run_id + + Returns: + SARIF report dictionary + """ + activity.logger.info(f"Generating SARIF report from {len(findings)} findings") + + # Debug: Check if first finding has line_start + if findings: + first_finding = findings[0] + activity.logger.info(f"First finding keys: {list(first_finding.keys())}") + activity.logger.info(f"line_start value: {first_finding.get('line_start')}") + + # Basic SARIF 2.1.0 structure + sarif_report = { + "version": "2.1.0", + "$schema": "https://raw.githubusercontent.com/oasis-tcs/sarif-spec/master/Schemata/sarif-schema-2.1.0.json", + "runs": [ + { + "tool": { + "driver": { + "name": metadata.get("tool_name", "gitleaks"), + "version": metadata.get("tool_version", "8.18.0"), + "informationUri": "https://github.com/gitleaks/gitleaks" + } + }, + "results": [] + } + ] + } + + # Convert findings to SARIF results + for finding in findings: + sarif_result = { + "ruleId": finding.get("metadata", {}).get("rule_id", "unknown"), + "level": _severity_to_sarif_level(finding.get("severity", "warning")), + "message": { + "text": finding.get("title", "Secret leak detected") + }, + "locations": [] + } + + # Add description if present + if finding.get("description"): + sarif_result["message"]["markdown"] = finding["description"] + + # Add location if file path is present + if finding.get("file_path"): + location = { + "physicalLocation": { + "artifactLocation": { + "uri": finding["file_path"] + } + } + } + + # Add region if line number is present + if finding.get("line_start"): + location["physicalLocation"]["region"] = { + "startLine": finding["line_start"] + } + + sarif_result["locations"].append(location) + + sarif_report["runs"][0]["results"].append(sarif_result) + + activity.logger.info(f"Generated SARIF report with {len(sarif_report['runs'][0]['results'])} results") + + return sarif_report + + +def _severity_to_sarif_level(severity: str) -> str: + """Convert severity to SARIF level""" + severity_map = { + "critical": "error", + "high": "error", + "medium": "warning", + "low": "note", + "info": "note" + } + return severity_map.get(severity.lower(), "warning") diff --git a/backend/toolbox/workflows/gitleaks_detection/metadata.yaml b/backend/toolbox/workflows/gitleaks_detection/metadata.yaml new file mode 100644 index 0000000..d2c343c --- /dev/null +++ b/backend/toolbox/workflows/gitleaks_detection/metadata.yaml @@ -0,0 +1,42 @@ +name: gitleaks_detection +version: "1.0.0" +vertical: secrets +description: "Detect secrets and credentials using Gitleaks" +author: "FuzzForge Team" +tags: + - "secrets" + - "gitleaks" + - "git" + - "leak-detection" + +workspace_isolation: "shared" + +parameters: + type: object + properties: + scan_mode: + type: string + enum: ["detect", "protect"] + default: "detect" + description: "Scan mode: detect (entire repo history) or protect (staged changes)" + + redact: + type: boolean + default: true + description: "Redact secrets in output" + + no_git: + type: boolean + default: false + description: "Scan files without Git context" + +default_parameters: + scan_mode: "detect" + redact: true + no_git: false + +required_modules: + - "gitleaks" + +supported_volume_modes: + - "ro" diff --git a/backend/toolbox/workflows/gitleaks_detection/workflow.py b/backend/toolbox/workflows/gitleaks_detection/workflow.py new file mode 100644 index 0000000..06c63a4 --- /dev/null +++ b/backend/toolbox/workflows/gitleaks_detection/workflow.py @@ -0,0 +1,187 @@ +""" +Gitleaks Detection Workflow - Temporal Version + +Scans code for secrets and credentials using Gitleaks. +""" + +# Copyright (c) 2025 FuzzingLabs +# +# Licensed under the Business Source License 1.1 (BSL). See the LICENSE file +# at the root of this repository for details. +# +# After the Change Date (four years from publication), this version of the +# Licensed Work will be made available under the Apache License, Version 2.0. +# See the LICENSE-APACHE file or http://www.apache.org/licenses/LICENSE-2.0 +# +# Additional attribution and requirements are provided in the NOTICE file. + +from datetime import timedelta +from typing import Dict, Any, Optional + +from temporalio import workflow +from temporalio.common import RetryPolicy + +# Import for type hints (will be executed by worker) +with workflow.unsafe.imports_passed_through(): + import logging + +logger = logging.getLogger(__name__) + + +@workflow.defn +class GitleaksDetectionWorkflow: + """ + Scan code for secrets using Gitleaks. + + User workflow: + 1. User runs: ff workflow run gitleaks_detection . + 2. CLI uploads project to MinIO + 3. Worker downloads project + 4. Worker runs Gitleaks + 5. Secrets reported as findings in SARIF format + """ + + @workflow.run + async def run( + self, + target_id: str, # MinIO UUID of uploaded user code + scan_mode: str = "detect", + redact: bool = True, + no_git: bool = True + ) -> Dict[str, Any]: + """ + Main workflow execution. + + Args: + target_id: UUID of the uploaded target in MinIO + scan_mode: Scan mode ('detect' or 'protect') + redact: Redact secrets in output + no_git: Scan files without Git context + + Returns: + Dictionary containing findings and summary + """ + workflow_id = workflow.info().workflow_id + + workflow.logger.info( + f"Starting GitleaksDetectionWorkflow " + f"(workflow_id={workflow_id}, target_id={target_id}, scan_mode={scan_mode})" + ) + + results = { + "workflow_id": workflow_id, + "target_id": target_id, + "status": "running", + "steps": [], + "findings": [] + } + + try: + # Get run ID for workspace isolation + run_id = workflow.info().run_id + + # Step 1: Download user's project from MinIO + workflow.logger.info("Step 1: Downloading user code from MinIO") + target_path = await workflow.execute_activity( + "get_target", + args=[target_id, run_id, "shared"], + start_to_close_timeout=timedelta(minutes=5), + retry_policy=RetryPolicy( + initial_interval=timedelta(seconds=1), + maximum_interval=timedelta(seconds=30), + maximum_attempts=3 + ) + ) + results["steps"].append({ + "step": "download", + "status": "success", + "target_path": target_path + }) + workflow.logger.info(f"✓ Target downloaded to: {target_path}") + + # Step 2: Run Gitleaks + workflow.logger.info("Step 2: Scanning with Gitleaks") + + scan_config = { + "scan_mode": scan_mode, + "redact": redact, + "no_git": no_git + } + + scan_results = await workflow.execute_activity( + "scan_with_gitleaks", + args=[target_path, scan_config], + start_to_close_timeout=timedelta(minutes=10), + retry_policy=RetryPolicy( + initial_interval=timedelta(seconds=2), + maximum_interval=timedelta(seconds=60), + maximum_attempts=2 + ) + ) + + results["steps"].append({ + "step": "gitleaks_scan", + "status": "success", + "leaks_found": scan_results.get("summary", {}).get("total_leaks", 0) + }) + workflow.logger.info( + f"✓ Gitleaks scan completed: " + f"{scan_results.get('summary', {}).get('total_leaks', 0)} leaks found" + ) + + # Step 3: Generate SARIF report + workflow.logger.info("Step 3: Generating SARIF report") + sarif_report = await workflow.execute_activity( + "gitleaks_generate_sarif", + args=[scan_results.get("findings", []), {"tool_name": "gitleaks", "tool_version": "8.18.0"}], + start_to_close_timeout=timedelta(minutes=2) + ) + + # Step 4: Upload results to MinIO + workflow.logger.info("Step 4: Uploading results") + try: + results_url = await workflow.execute_activity( + "upload_results", + args=[workflow_id, scan_results, "json"], + start_to_close_timeout=timedelta(minutes=2) + ) + results["results_url"] = results_url + workflow.logger.info(f"✓ Results uploaded to: {results_url}") + except Exception as e: + workflow.logger.warning(f"Failed to upload results: {e}") + results["results_url"] = None + + # Step 5: Cleanup cache + workflow.logger.info("Step 5: Cleaning up cache") + try: + await workflow.execute_activity( + "cleanup_cache", + args=[target_path, "shared"], + start_to_close_timeout=timedelta(minutes=1) + ) + workflow.logger.info("✓ Cache cleaned up") + except Exception as e: + workflow.logger.warning(f"Cache cleanup failed: {e}") + + # Mark workflow as successful + results["status"] = "success" + results["findings"] = scan_results.get("findings", []) + results["summary"] = scan_results.get("summary", {}) + results["sarif"] = sarif_report or {} + workflow.logger.info( + f"✓ Workflow completed successfully: {workflow_id} " + f"({results['summary'].get('total_leaks', 0)} leaks found)" + ) + + return results + + except Exception as e: + workflow.logger.error(f"Workflow failed: {e}") + results["status"] = "error" + results["error"] = str(e) + results["steps"].append({ + "step": "error", + "status": "failed", + "error": str(e) + }) + raise diff --git a/backend/toolbox/workflows/llm_secret_detection/__init__.py b/backend/toolbox/workflows/llm_secret_detection/__init__.py new file mode 100644 index 0000000..81148a7 --- /dev/null +++ b/backend/toolbox/workflows/llm_secret_detection/__init__.py @@ -0,0 +1,6 @@ +"""LLM Secret Detection Workflow""" + +from .workflow import LlmSecretDetectionWorkflow +from .activities import scan_with_llm + +__all__ = ["LlmSecretDetectionWorkflow", "scan_with_llm"] diff --git a/backend/toolbox/workflows/llm_secret_detection/activities.py b/backend/toolbox/workflows/llm_secret_detection/activities.py new file mode 100644 index 0000000..c16691f --- /dev/null +++ b/backend/toolbox/workflows/llm_secret_detection/activities.py @@ -0,0 +1,112 @@ +"""LLM Secret Detection Workflow Activities""" + +from pathlib import Path +from typing import Dict, Any +from temporalio import activity + +try: + from toolbox.modules.secret_detection.llm_secret_detector import LLMSecretDetectorModule +except ImportError: + from modules.secret_detection.llm_secret_detector import LLMSecretDetectorModule + +@activity.defn(name="scan_with_llm") +async def scan_with_llm(target_path: str, config: Dict[str, Any]) -> Dict[str, Any]: + """Scan code using LLM.""" + activity.logger.info(f"Starting LLM secret detection: {target_path}") + workspace = Path(target_path) + + llm_detector = LLMSecretDetectorModule() + llm_detector.validate_config(config) + result = await llm_detector.execute(config, workspace) + + if result.status == "failed": + raise RuntimeError(f"LLM detection failed: {result.error}") + + findings_dicts = [finding.model_dump() for finding in result.findings] + return {"findings": findings_dicts, "summary": result.summary} + + +@activity.defn(name="llm_secret_generate_sarif") +async def llm_secret_generate_sarif(findings: list, metadata: Dict[str, Any]) -> Dict[str, Any]: + """ + Generate SARIF report from LLM secret detection findings. + + Args: + findings: List of finding dictionaries from LLM secret detector + metadata: Metadata including tool_name, tool_version + + Returns: + SARIF 2.1.0 report dictionary + """ + activity.logger.info(f"Generating SARIF report from {len(findings)} findings") + + # Basic SARIF 2.1.0 structure + sarif_report = { + "version": "2.1.0", + "$schema": "https://raw.githubusercontent.com/oasis-tcs/sarif-spec/master/Schemata/sarif-schema-2.1.0.json", + "runs": [ + { + "tool": { + "driver": { + "name": metadata.get("tool_name", "llm-secret-detector"), + "version": metadata.get("tool_version", "1.0.0"), + "informationUri": "https://github.com/FuzzingLabs/fuzzforge_ai" + } + }, + "results": [] + } + ] + } + + # Convert findings to SARIF results + for finding in findings: + sarif_result = { + "ruleId": finding.get("id", finding.get("metadata", {}).get("secret_type", "unknown-secret")), + "level": _severity_to_sarif_level(finding.get("severity", "warning")), + "message": { + "text": finding.get("title", "Secret detected by LLM") + }, + "locations": [] + } + + # Add description if present + if finding.get("description"): + sarif_result["message"]["markdown"] = finding["description"] + + # Add location if file path is present + if finding.get("file_path"): + location = { + "physicalLocation": { + "artifactLocation": { + "uri": finding["file_path"] + } + } + } + + # Add region if line number is present + if finding.get("line_start"): + location["physicalLocation"]["region"] = { + "startLine": finding["line_start"] + } + if finding.get("line_end"): + location["physicalLocation"]["region"]["endLine"] = finding["line_end"] + + sarif_result["locations"].append(location) + + sarif_report["runs"][0]["results"].append(sarif_result) + + activity.logger.info(f"Generated SARIF report with {len(sarif_report['runs'][0]['results'])} results") + + return sarif_report + + +def _severity_to_sarif_level(severity: str) -> str: + """Convert severity to SARIF level""" + severity_map = { + "critical": "error", + "high": "error", + "medium": "warning", + "low": "note", + "info": "note" + } + return severity_map.get(severity.lower(), "warning") diff --git a/backend/toolbox/workflows/llm_secret_detection/metadata.yaml b/backend/toolbox/workflows/llm_secret_detection/metadata.yaml new file mode 100644 index 0000000..91b9c2c --- /dev/null +++ b/backend/toolbox/workflows/llm_secret_detection/metadata.yaml @@ -0,0 +1,43 @@ +name: llm_secret_detection +version: "1.0.0" +vertical: secrets +description: "AI-powered secret detection using LLM semantic analysis" +author: "FuzzForge Team" +tags: + - "secrets" + - "llm" + - "ai" + - "semantic" + +workspace_isolation: "shared" + +parameters: + type: object + properties: + agent_url: + type: string + default: "http://fuzzforge-task-agent:8000/a2a/litellm_agent" + + llm_model: + type: string + default: "gpt-4o-mini" + + llm_provider: + type: string + default: "openai" + + max_files: + type: integer + default: 20 + +default_parameters: + agent_url: "http://fuzzforge-task-agent:8000/a2a/litellm_agent" + llm_model: "gpt-4o-mini" + llm_provider: "openai" + max_files: 20 + +required_modules: + - "llm_secret_detector" + +supported_volume_modes: + - "ro" diff --git a/backend/toolbox/workflows/llm_secret_detection/workflow.py b/backend/toolbox/workflows/llm_secret_detection/workflow.py new file mode 100644 index 0000000..4f693d0 --- /dev/null +++ b/backend/toolbox/workflows/llm_secret_detection/workflow.py @@ -0,0 +1,156 @@ +"""LLM Secret Detection Workflow""" + +from datetime import timedelta +from typing import Dict, Any, Optional +from temporalio import workflow +from temporalio.common import RetryPolicy + +@workflow.defn +class LlmSecretDetectionWorkflow: + """Scan code for secrets using LLM AI.""" + + @workflow.run + async def run( + self, + target_id: str, + agent_url: Optional[str] = None, + llm_model: Optional[str] = None, + llm_provider: Optional[str] = None, + max_files: Optional[int] = None, + timeout: Optional[int] = None, + file_patterns: Optional[list] = None + ) -> Dict[str, Any]: + workflow_id = workflow.info().workflow_id + run_id = workflow.info().run_id + + workflow.logger.info( + f"Starting LLM Secret Detection Workflow " + f"(workflow_id={workflow_id}, target_id={target_id}, model={llm_model})" + ) + + results = { + "workflow_id": workflow_id, + "target_id": target_id, + "status": "running", + "steps": [], + "findings": [] + } + + try: + # Step 1: Download target from MinIO + workflow.logger.info("Step 1: Downloading target from MinIO") + target_path = await workflow.execute_activity( + "get_target", + args=[target_id, run_id, "shared"], + start_to_close_timeout=timedelta(minutes=5), + retry_policy=RetryPolicy( + initial_interval=timedelta(seconds=1), + maximum_interval=timedelta(seconds=30), + maximum_attempts=3 + ) + ) + results["steps"].append({ + "step": "download", + "status": "success", + "target_path": target_path + }) + workflow.logger.info(f"✓ Target downloaded to: {target_path}") + + # Step 2: Scan with LLM + workflow.logger.info("Step 2: Scanning with LLM") + config = {} + if agent_url: + config["agent_url"] = agent_url + if llm_model: + config["llm_model"] = llm_model + if llm_provider: + config["llm_provider"] = llm_provider + if max_files: + config["max_files"] = max_files + if timeout: + config["timeout"] = timeout + if file_patterns: + config["file_patterns"] = file_patterns + + scan_results = await workflow.execute_activity( + "scan_with_llm", + args=[target_path, config], + start_to_close_timeout=timedelta(minutes=30), + retry_policy=RetryPolicy( + initial_interval=timedelta(seconds=2), + maximum_interval=timedelta(seconds=60), + maximum_attempts=2 + ) + ) + + findings_count = len(scan_results.get("findings", [])) + results["steps"].append({ + "step": "llm_scan", + "status": "success", + "secrets_found": findings_count + }) + workflow.logger.info(f"✓ LLM scan completed: {findings_count} secrets found") + + # Step 3: Generate SARIF report + workflow.logger.info("Step 3: Generating SARIF report") + sarif_report = await workflow.execute_activity( + "llm_generate_sarif", # Use shared LLM SARIF activity + args=[ + scan_results.get("findings", []), + { + "tool_name": f"llm-secret-detector ({llm_model or 'gpt-4o-mini'})", + "tool_version": "1.0.0" + } + ], + start_to_close_timeout=timedelta(minutes=2) + ) + workflow.logger.info("✓ SARIF report generated") + + # Step 4: Upload results to MinIO + workflow.logger.info("Step 4: Uploading results") + try: + results_url = await workflow.execute_activity( + "upload_results", + args=[workflow_id, scan_results, "json"], + start_to_close_timeout=timedelta(minutes=2) + ) + results["results_url"] = results_url + workflow.logger.info(f"✓ Results uploaded to: {results_url}") + except Exception as e: + workflow.logger.warning(f"Failed to upload results: {e}") + results["results_url"] = None + + # Step 5: Cleanup cache + workflow.logger.info("Step 5: Cleaning up cache") + try: + await workflow.execute_activity( + "cleanup_cache", + args=[target_path, "shared"], + start_to_close_timeout=timedelta(minutes=1) + ) + workflow.logger.info("✓ Cache cleaned up") + except Exception as e: + workflow.logger.warning(f"Cache cleanup failed: {e}") + + # Mark workflow as successful + results["status"] = "success" + results["findings"] = scan_results.get("findings", []) + results["summary"] = scan_results.get("summary", {}) + results["sarif"] = sarif_report or {} + workflow.logger.info( + f"✓ Workflow completed successfully: {workflow_id} " + f"({findings_count} secrets found)" + ) + + return results + + except Exception as e: + workflow.logger.error(f"Workflow failed: {e}") + results["status"] = "error" + results["error"] = str(e) + results["steps"].append({ + "step": "error", + "status": "failed", + "error": str(e) + }) + raise diff --git a/backend/toolbox/workflows/trufflehog_detection/__init__.py b/backend/toolbox/workflows/trufflehog_detection/__init__.py new file mode 100644 index 0000000..d580fb8 --- /dev/null +++ b/backend/toolbox/workflows/trufflehog_detection/__init__.py @@ -0,0 +1,13 @@ +""" +TruffleHog Detection Workflow +""" + +# Copyright (c) 2025 FuzzingLabs +# +# Licensed under the Business Source License 1.1 (BSL). See the LICENSE file +# at the root of this repository for details. + +from .workflow import TrufflehogDetectionWorkflow +from .activities import scan_with_trufflehog, trufflehog_generate_sarif + +__all__ = ["TrufflehogDetectionWorkflow", "scan_with_trufflehog", "trufflehog_generate_sarif"] diff --git a/backend/toolbox/workflows/trufflehog_detection/activities.py b/backend/toolbox/workflows/trufflehog_detection/activities.py new file mode 100644 index 0000000..26f73af --- /dev/null +++ b/backend/toolbox/workflows/trufflehog_detection/activities.py @@ -0,0 +1,111 @@ +"""TruffleHog Detection Workflow Activities""" + +import logging +from pathlib import Path +from typing import Dict, Any +from temporalio import activity + +try: + from toolbox.modules.secret_detection.trufflehog import TruffleHogModule +except ImportError: + from modules.secret_detection.trufflehog import TruffleHogModule + +@activity.defn(name="scan_with_trufflehog") +async def scan_with_trufflehog(target_path: str, config: Dict[str, Any]) -> Dict[str, Any]: + """Scan code using TruffleHog.""" + activity.logger.info(f"Starting TruffleHog scan: {target_path}") + workspace = Path(target_path) + + trufflehog = TruffleHogModule() + trufflehog.validate_config(config) + result = await trufflehog.execute(config, workspace) + + if result.status == "failed": + raise RuntimeError(f"TruffleHog scan failed: {result.error}") + + findings_dicts = [finding.model_dump() for finding in result.findings] + return {"findings": findings_dicts, "summary": result.summary} + + +@activity.defn(name="trufflehog_generate_sarif") +async def trufflehog_generate_sarif(findings: list, metadata: Dict[str, Any]) -> Dict[str, Any]: + """ + Generate SARIF report from TruffleHog findings. + + Args: + findings: List of finding dictionaries + metadata: Metadata including tool_name, tool_version + + Returns: + SARIF report dictionary + """ + activity.logger.info(f"Generating SARIF report from {len(findings)} findings") + + # Basic SARIF 2.1.0 structure + sarif_report = { + "version": "2.1.0", + "$schema": "https://raw.githubusercontent.com/oasis-tcs/sarif-spec/master/Schemata/sarif-schema-2.1.0.json", + "runs": [ + { + "tool": { + "driver": { + "name": metadata.get("tool_name", "trufflehog"), + "version": metadata.get("tool_version", "3.63.2"), + "informationUri": "https://github.com/trufflesecurity/trufflehog" + } + }, + "results": [] + } + ] + } + + # Convert findings to SARIF results + for finding in findings: + sarif_result = { + "ruleId": finding.get("metadata", {}).get("detector", "unknown"), + "level": _severity_to_sarif_level(finding.get("severity", "warning")), + "message": { + "text": finding.get("title", "Secret detected") + }, + "locations": [] + } + + # Add description if present + if finding.get("description"): + sarif_result["message"]["markdown"] = finding["description"] + + # Add location if file path is present + if finding.get("file_path"): + location = { + "physicalLocation": { + "artifactLocation": { + "uri": finding["file_path"] + } + } + } + + # Add region if line number is present + if finding.get("line_start"): + location["physicalLocation"]["region"] = { + "startLine": finding["line_start"] + } + + sarif_result["locations"].append(location) + + sarif_report["runs"][0]["results"].append(sarif_result) + + activity.logger.info(f"Generated SARIF report with {len(sarif_report['runs'][0]['results'])} results") + + return sarif_report + + +def _severity_to_sarif_level(severity: str) -> str: + """Convert severity to SARIF level""" + severity_map = { + "critical": "error", + "high": "error", + "medium": "warning", + "low": "note", + "info": "note" + } + return severity_map.get(severity.lower(), "warning") diff --git a/backend/toolbox/workflows/trufflehog_detection/metadata.yaml b/backend/toolbox/workflows/trufflehog_detection/metadata.yaml new file mode 100644 index 0000000..1a147f0 --- /dev/null +++ b/backend/toolbox/workflows/trufflehog_detection/metadata.yaml @@ -0,0 +1,34 @@ +name: trufflehog_detection +version: "1.0.0" +vertical: secrets +description: "Detect secrets with verification using TruffleHog" +author: "FuzzForge Team" +tags: + - "secrets" + - "trufflehog" + - "verification" + +workspace_isolation: "shared" + +parameters: + type: object + properties: + verify: + type: boolean + default: true + description: "Verify discovered secrets" + + max_depth: + type: integer + default: 10 + description: "Maximum directory depth to scan" + +default_parameters: + verify: true + max_depth: 10 + +required_modules: + - "trufflehog" + +supported_volume_modes: + - "ro" diff --git a/backend/toolbox/workflows/trufflehog_detection/workflow.py b/backend/toolbox/workflows/trufflehog_detection/workflow.py new file mode 100644 index 0000000..62336f3 --- /dev/null +++ b/backend/toolbox/workflows/trufflehog_detection/workflow.py @@ -0,0 +1,104 @@ +"""TruffleHog Detection Workflow""" + +from datetime import timedelta +from typing import Dict, Any +from temporalio import workflow +from temporalio.common import RetryPolicy + +@workflow.defn +class TrufflehogDetectionWorkflow: + """Scan code for secrets using TruffleHog.""" + + @workflow.run + async def run(self, target_id: str, verify: bool = False, concurrency: int = 10) -> Dict[str, Any]: + workflow_id = workflow.info().workflow_id + run_id = workflow.info().run_id + + workflow.logger.info( + f"Starting TrufflehogDetectionWorkflow " + f"(workflow_id={workflow_id}, target_id={target_id}, verify={verify})" + ) + + results = {"workflow_id": workflow_id, "status": "running", "findings": []} + + try: + # Step 1: Download target + workflow.logger.info("Step 1: Downloading target from MinIO") + target_path = await workflow.execute_activity( + "get_target", args=[target_id, run_id, "shared"], + start_to_close_timeout=timedelta(minutes=5), + retry_policy=RetryPolicy( + initial_interval=timedelta(seconds=1), + maximum_interval=timedelta(seconds=30), + maximum_attempts=3 + ) + ) + workflow.logger.info(f"✓ Target downloaded to: {target_path}") + + # Step 2: Scan with TruffleHog + workflow.logger.info("Step 2: Scanning with TruffleHog") + scan_results = await workflow.execute_activity( + "scan_with_trufflehog", + args=[target_path, {"verify": verify, "concurrency": concurrency}], + start_to_close_timeout=timedelta(minutes=15), + retry_policy=RetryPolicy( + initial_interval=timedelta(seconds=2), + maximum_interval=timedelta(seconds=60), + maximum_attempts=2 + ) + ) + workflow.logger.info( + f"✓ TruffleHog scan completed: " + f"{scan_results.get('summary', {}).get('total_secrets', 0)} secrets found" + ) + + # Step 3: Generate SARIF report + workflow.logger.info("Step 3: Generating SARIF report") + sarif_report = await workflow.execute_activity( + "trufflehog_generate_sarif", + args=[scan_results.get("findings", []), {"tool_name": "trufflehog", "tool_version": "3.63.2"}], + start_to_close_timeout=timedelta(minutes=2) + ) + + # Step 4: Upload results to MinIO + workflow.logger.info("Step 4: Uploading results") + try: + results_url = await workflow.execute_activity( + "upload_results", + args=[workflow_id, scan_results, "json"], + start_to_close_timeout=timedelta(minutes=2) + ) + results["results_url"] = results_url + workflow.logger.info(f"✓ Results uploaded to: {results_url}") + except Exception as e: + workflow.logger.warning(f"Failed to upload results: {e}") + results["results_url"] = None + + # Step 5: Cleanup + workflow.logger.info("Step 5: Cleaning up cache") + try: + await workflow.execute_activity( + "cleanup_cache", args=[target_path, "shared"], + start_to_close_timeout=timedelta(minutes=1) + ) + workflow.logger.info("✓ Cache cleaned up") + except Exception as e: + workflow.logger.warning(f"Cache cleanup failed: {e}") + + # Mark workflow as successful + results["status"] = "success" + results["findings"] = scan_results.get("findings", []) + results["summary"] = scan_results.get("summary", {}) + results["sarif"] = sarif_report or {} + workflow.logger.info( + f"✓ Workflow completed successfully: {workflow_id} " + f"({results['summary'].get('total_secrets', 0)} secrets found)" + ) + + return results + + except Exception as e: + workflow.logger.error(f"Workflow failed: {e}") + results["status"] = "error" + results["error"] = str(e) + raise diff --git a/cli/src/fuzzforge_cli/commands/ai.py b/cli/src/fuzzforge_cli/commands/ai.py index 6a39493..2667845 100644 --- a/cli/src/fuzzforge_cli/commands/ai.py +++ b/cli/src/fuzzforge_cli/commands/ai.py @@ -27,21 +27,9 @@ app = typer.Typer(name="ai", help="Interact with the FuzzForge AI system") @app.command("agent") def ai_agent() -> None: """Launch the full AI agent CLI with A2A orchestration.""" - console.print("[cyan]🤖 Opening Project FuzzForge AI Agent session[/cyan]\n") - - try: - from fuzzforge_ai.cli import FuzzForgeCLI - - cli = FuzzForgeCLI() - asyncio.run(cli.run()) - except ImportError as exc: - console.print(f"[red]Failed to import AI CLI:[/red] {exc}") - console.print("[dim]Ensure AI dependencies are installed (pip install -e .)[/dim]") - raise typer.Exit(1) from exc - except Exception as exc: # pragma: no cover - runtime safety - console.print(f"[red]Failed to launch AI agent:[/red] {exc}") - console.print("[dim]Check that .env contains LITELLM_MODEL and API keys[/dim]") - raise typer.Exit(1) from exc + console.print("[yellow]⚠️ The AI agent command is temporarily deactivated[/yellow]") + console.print("[dim]This feature is undergoing maintenance and will be re-enabled soon.[/dim]") + raise typer.Exit(0) # Memory + health commands diff --git a/docker-compose.yml b/docker-compose.yml index 18fe145..f55e5ec 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -324,6 +324,8 @@ services: volumes: # Mount workflow code (read-only) for dynamic discovery - ./backend/toolbox:/app/toolbox:ro + # Mount AI module for A2A wrapper access + - ./ai/src:/app/ai_src:ro # Worker cache for downloaded targets - worker_secrets_cache:/cache networks: diff --git a/sdk/test_exception_handling.py b/sdk/test_exception_handling.py deleted file mode 100644 index fe60f72..0000000 --- a/sdk/test_exception_handling.py +++ /dev/null @@ -1,213 +0,0 @@ -# ruff: noqa: E402 # Imports delayed for environment/logging setup -#!/usr/bin/env python3 -""" -Quick smoke test for SDK exception handling after exceptions.py modifications. -Tests that the modified _fetch_container_diagnostics() no-op doesn't break exception flows. -""" - -import sys -from pathlib import Path - -# Add SDK to path -sdk_path = Path(__file__).parent / "src" -sys.path.insert(0, str(sdk_path)) - -from fuzzforge_sdk.exceptions import ( - FuzzForgeError, - FuzzForgeHTTPError, - WorkflowNotFoundError, - RunNotFoundError, - ErrorContext, - DeploymentError, - WorkflowExecutionError, - ValidationError, -) - - -def test_basic_import(): - """Test that all exception classes can be imported.""" - print("✓ All exception classes imported successfully") - - -def test_error_context(): - """Test ErrorContext instantiation.""" - context = ErrorContext( - url="http://localhost:8000/test", - related_run_id="test-run-123", - workflow_name="test_workflow" - ) - assert context.url == "http://localhost:8000/test" - assert context.related_run_id == "test-run-123" - assert context.workflow_name == "test_workflow" - print("✓ ErrorContext instantiation works") - - -def test_base_exception(): - """Test base FuzzForgeError.""" - context = ErrorContext(related_run_id="test-run-456") - - error = FuzzForgeError("Test error message", context=context) - - assert error.message == "Test error message" - assert error.context.related_run_id == "test-run-456" - print("✓ FuzzForgeError creation works") - - -def test_http_error(): - """Test HTTP error creation.""" - error = FuzzForgeHTTPError( - message="Test HTTP error", - status_code=500, - response_text='{"error": "Internal server error"}' - ) - - assert error.status_code == 500 - assert error.message == "Test HTTP error" - assert error.context.response_data == {"error": "Internal server error"} - print("✓ FuzzForgeHTTPError creation works") - - -def test_workflow_not_found(): - """Test WorkflowNotFoundError with suggestions.""" - error = WorkflowNotFoundError( - workflow_name="nonexistent_workflow", - available_workflows=["security_assessment", "secret_detection"] - ) - - assert error.workflow_name == "nonexistent_workflow" - assert len(error.context.suggested_fixes) > 0 - print("✓ WorkflowNotFoundError with suggestions works") - - -def test_run_not_found(): - """Test RunNotFoundError.""" - error = RunNotFoundError(run_id="missing-run-123") - - assert error.run_id == "missing-run-123" - assert error.context.related_run_id == "missing-run-123" - assert len(error.context.suggested_fixes) > 0 - print("✓ RunNotFoundError creation works") - - -def test_deployment_error(): - """Test DeploymentError.""" - error = DeploymentError( - workflow_name="test_workflow", - message="Deployment failed", - deployment_id="deploy-123", - container_name="test-container-456" # Kept for backward compatibility - ) - - assert error.workflow_name == "test_workflow" - assert error.deployment_id == "deploy-123" - print("✓ DeploymentError creation works") - - -def test_workflow_execution_error(): - """Test WorkflowExecutionError.""" - error = WorkflowExecutionError( - workflow_name="security_assessment", - run_id="run-789", - message="Execution timeout" - ) - - assert error.workflow_name == "security_assessment" - assert error.run_id == "run-789" - assert error.context.related_run_id == "run-789" - print("✓ WorkflowExecutionError creation works") - - -def test_validation_error(): - """Test ValidationError.""" - error = ValidationError( - field_name="target_path", - message="Path does not exist", - provided_value="/nonexistent/path", - expected_format="Valid directory path" - ) - - assert error.field_name == "target_path" - assert error.provided_value == "/nonexistent/path" - assert len(error.context.suggested_fixes) > 0 - print("✓ ValidationError with suggestions works") - - -def test_exception_string_representation(): - """Test exception summary and string conversion.""" - error = FuzzForgeHTTPError( - message="Test error", - status_code=404, - response_text="Not found" - ) - - summary = error.get_summary() - assert "404" in summary - assert "Test error" in summary - - str_repr = str(error) - assert str_repr == summary - print("✓ Exception string representation works") - - -def test_exception_detailed_info(): - """Test detailed error information.""" - context = ErrorContext( - url="http://localhost:8000/test", - workflow_name="test_workflow" - ) - error = FuzzForgeError("Test error", context=context) - - info = error.get_detailed_info() - assert info["message"] == "Test error" - assert info["type"] == "FuzzForgeError" - assert info["url"] == "http://localhost:8000/test" - assert info["workflow_name"] == "test_workflow" - print("✓ Exception detailed info works") - - -def main(): - """Run all tests.""" - print("\n" + "="*60) - print("SDK Exception Handling Smoke Tests") - print("="*60 + "\n") - - tests = [ - test_basic_import, - test_error_context, - test_base_exception, - test_http_error, - test_workflow_not_found, - test_run_not_found, - test_deployment_error, - test_workflow_execution_error, - test_validation_error, - test_exception_string_representation, - test_exception_detailed_info, - ] - - passed = 0 - failed = 0 - - for test_func in tests: - try: - test_func() - passed += 1 - except Exception as e: - print(f"✗ {test_func.__name__} FAILED: {e}") - failed += 1 - - print("\n" + "="*60) - print(f"Results: {passed} passed, {failed} failed") - print("="*60 + "\n") - - if failed > 0: - print("❌ SDK exception handling has issues") - return 1 - else: - print("✅ SDK exception handling works correctly") - print("✅ The no-op _fetch_container_diagnostics() doesn't break exception flows") - return 0 - - -if __name__ == "__main__": - sys.exit(main()) diff --git a/test_a2a_wrapper.py b/test_a2a_wrapper.py deleted file mode 100755 index 5748eb0..0000000 --- a/test_a2a_wrapper.py +++ /dev/null @@ -1,152 +0,0 @@ -# ruff: noqa: E402 # Imports delayed for environment/logging setup -#!/usr/bin/env python3 -""" -Test script for A2A wrapper module -Sends tasks to the task-agent to verify functionality -""" -import asyncio -import sys -from pathlib import Path - -# Add ai module to path -ai_src = Path(__file__).parent / "ai" / "src" -sys.path.insert(0, str(ai_src)) - -from fuzzforge_ai.a2a_wrapper import send_agent_task, get_agent_config - - -async def test_basic_task(): - """Test sending a basic task to the agent""" - print("=" * 80) - print("Test 1: Basic task without model specification") - print("=" * 80) - - result = await send_agent_task( - url="http://127.0.0.1:10900/a2a/litellm_agent", - message="What is 2+2? Answer in one sentence.", - timeout=30 - ) - - print(f"Context ID: {result.context_id}") - print(f"Response:\n{result.text}") - print() - return result.context_id - - -async def test_with_model_and_prompt(): - """Test sending a task with custom model and prompt""" - print("=" * 80) - print("Test 2: Task with model and prompt specification") - print("=" * 80) - - result = await send_agent_task( - url="http://127.0.0.1:10900/a2a/litellm_agent", - model="gpt-4o-mini", - provider="openai", - prompt="You are a concise Python expert. Answer in 2 sentences max.", - message="Write a simple Python function that checks if a number is prime.", - context="python_test", - timeout=60 - ) - - print(f"Context ID: {result.context_id}") - print(f"Response:\n{result.text}") - print() - return result.context_id - - -async def test_fuzzing_task(): - """Test a fuzzing-related task""" - print("=" * 80) - print("Test 3: Fuzzing harness generation task") - print("=" * 80) - - result = await send_agent_task( - url="http://127.0.0.1:10900/a2a/litellm_agent", - model="gpt-4o-mini", - provider="openai", - prompt="You are a security testing expert. Provide practical, working code.", - message="Generate a simple fuzzing harness for a C function that parses JSON strings. Include only the essential code.", - context="fuzzing_session", - timeout=90 - ) - - print(f"Context ID: {result.context_id}") - print(f"Response:\n{result.text}") - print() - - -async def test_get_config(): - """Test getting agent configuration""" - print("=" * 80) - print("Test 4: Get agent configuration") - print("=" * 80) - - config = await get_agent_config( - url="http://127.0.0.1:10900/a2a/litellm_agent", - timeout=30 - ) - - print(f"Agent Config:\n{config}") - print() - - -async def test_multi_turn(): - """Test multi-turn conversation with same context""" - print("=" * 80) - print("Test 5: Multi-turn conversation") - print("=" * 80) - - # First message - result1 = await send_agent_task( - url="http://127.0.0.1:10900/a2a/litellm_agent", - message="What is the capital of France?", - context="geography_quiz", - timeout=30 - ) - print("Q1: What is the capital of France?") - print(f"A1: {result1.text}") - print() - - # Follow-up in same context - result2 = await send_agent_task( - url="http://127.0.0.1:10900/a2a/litellm_agent", - message="What is the population of that city?", - context="geography_quiz", # Same context - timeout=30 - ) - print("Q2: What is the population of that city?") - print(f"A2: {result2.text}") - print() - - -async def main(): - """Run all tests""" - print("\n" + "=" * 80) - print("FuzzForge A2A Wrapper Test Suite") - print("=" * 80 + "\n") - - try: - # Run tests - await test_basic_task() - await test_with_model_and_prompt() - await test_fuzzing_task() - await test_get_config() - await test_multi_turn() - - print("=" * 80) - print("✅ All tests completed successfully!") - print("=" * 80) - - except Exception as e: - print(f"\n❌ Test failed with error: {e}") - import traceback - traceback.print_exc() - return 1 - - return 0 - - -if __name__ == "__main__": - exit_code = asyncio.run(main()) - sys.exit(exit_code) diff --git a/test_projects/README.md b/test_projects/README.md index 616978c..19cb8b5 100644 --- a/test_projects/README.md +++ b/test_projects/README.md @@ -1,6 +1,6 @@ # FuzzForge Vulnerable Test Project -This directory contains a comprehensive vulnerable test application designed to validate FuzzForge's security workflows. The project contains multiple categories of security vulnerabilities to test both the `security_assessment` and `secret_detection_scan` workflows. +This directory contains a comprehensive vulnerable test application designed to validate FuzzForge's security workflows. The project contains multiple categories of security vulnerabilities to test `security_assessment`, `gitleaks_detection`, `trufflehog_detection`, and `llm_secret_detection` workflows. ## Test Project Overview @@ -9,7 +9,9 @@ This directory contains a comprehensive vulnerable test application designed to **Supported Workflows**: - `security_assessment` - General security scanning and analysis -- `secret_detection_scan` - Detection of secrets, credentials, and sensitive data +- `gitleaks_detection` - Pattern-based secret detection +- `trufflehog_detection` - Entropy-based secret detection with verification +- `llm_secret_detection` - AI-powered semantic secret detection **Vulnerabilities Included**: - SQL injection vulnerabilities @@ -38,7 +40,7 @@ This directory contains a comprehensive vulnerable test application designed to ### Testing with FuzzForge Workflows -The vulnerable application can be tested with both essential workflows: +The vulnerable application can be tested with multiple security workflows: ```bash # Test security assessment workflow @@ -49,8 +51,16 @@ curl -X POST http://localhost:8000/workflows/security_assessment/submit \ "volume_mode": "ro" }' -# Test secret detection workflow -curl -X POST http://localhost:8000/workflows/secret_detection_scan/submit \ +# Test Gitleaks secret detection workflow +curl -X POST http://localhost:8000/workflows/gitleaks_detection/submit \ + -H "Content-Type: application/json" \ + -d '{ + "target_path": "/path/to/test_projects/vulnerable_app", + "volume_mode": "ro" + }' + +# Test TruffleHog secret detection workflow +curl -X POST http://localhost:8000/workflows/trufflehog_detection/submit \ -H "Content-Type: application/json" \ -d '{ "target_path": "/path/to/test_projects/vulnerable_app", @@ -70,7 +80,9 @@ Each workflow should produce SARIF-formatted results with: A successful test should detect: - **Security Assessment**: At least 20 various security vulnerabilities -- **Secret Detection**: At least 10 different types of secrets and credentials +- **Gitleaks Detection**: At least 10 different types of secrets +- **TruffleHog Detection**: At least 5 high-entropy secrets +- **LLM Secret Detection**: At least 15 secrets with semantic understanding --- diff --git a/test_projects/rust_fuzz_test/fuzz/fuzz_targets/fuzz_waterfall.rs b/test_projects/rust_fuzz_test/fuzz/fuzz_targets/fuzz_waterfall.rs new file mode 100644 index 0000000..fec90be --- /dev/null +++ b/test_projects/rust_fuzz_test/fuzz/fuzz_targets/fuzz_waterfall.rs @@ -0,0 +1,9 @@ +#![no_main] + +use libfuzzer_sys::fuzz_target; +use rust_fuzz_test::check_secret_waterfall; + +fuzz_target!(|data: &[u8]| { + // Fuzz the waterfall vulnerability - sequential secret checking + let _ = check_secret_waterfall(data); +}); diff --git a/test_security_workflow.py b/test_security_workflow.py deleted file mode 100644 index 7e1acb3..0000000 --- a/test_security_workflow.py +++ /dev/null @@ -1,142 +0,0 @@ -#!/usr/bin/env python3 -""" -Test security_assessment workflow with vulnerable_app test project -""" - -import asyncio -import shutil -import sys -import uuid -from pathlib import Path - -import boto3 -from temporalio.client import Client - - -async def main(): - # Configuration - temporal_address = "localhost:7233" - s3_endpoint = "http://localhost:9000" - s3_access_key = "fuzzforge" - s3_secret_key = "fuzzforge123" - - # Initialize S3 client - s3_client = boto3.client( - 's3', - endpoint_url=s3_endpoint, - aws_access_key_id=s3_access_key, - aws_secret_access_key=s3_secret_key, - region_name='us-east-1', - use_ssl=False - ) - - print("=" * 70) - print("Testing security_assessment workflow with vulnerable_app") - print("=" * 70) - - # Step 1: Create tarball of vulnerable_app - print("\n[1/5] Creating tarball of test_projects/vulnerable_app...") - vulnerable_app_dir = Path("test_projects/vulnerable_app") - - if not vulnerable_app_dir.exists(): - print(f"❌ Error: {vulnerable_app_dir} not found") - return 1 - - target_id = str(uuid.uuid4()) - tarball_path = f"/tmp/{target_id}.tar.gz" - - # Create tarball - shutil.make_archive( - tarball_path.replace('.tar.gz', ''), - 'gztar', - root_dir=vulnerable_app_dir.parent, - base_dir=vulnerable_app_dir.name - ) - - tarball_size = Path(tarball_path).stat().st_size - print(f"✓ Created tarball: {tarball_path} ({tarball_size / 1024:.2f} KB)") - - # Step 2: Upload to MinIO - print(f"\n[2/5] Uploading target to MinIO (target_id={target_id})...") - try: - s3_key = f'{target_id}/target' - s3_client.upload_file( - Filename=tarball_path, - Bucket='targets', - Key=s3_key - ) - print(f"✓ Uploaded to s3://targets/{s3_key}") - except Exception as e: - print(f"❌ Failed to upload: {e}") - return 1 - finally: - # Cleanup local tarball - Path(tarball_path).unlink(missing_ok=True) - - # Step 3: Connect to Temporal - print(f"\n[3/5] Connecting to Temporal at {temporal_address}...") - try: - client = await Client.connect(temporal_address) - print("✓ Connected to Temporal") - except Exception as e: - print(f"❌ Failed to connect to Temporal: {e}") - return 1 - - # Step 4: Execute workflow - print("\n[4/5] Executing security_assessment workflow...") - workflow_id = f"security-assessment-{target_id}" - - try: - result = await client.execute_workflow( - "SecurityAssessmentWorkflow", - args=[target_id], - id=workflow_id, - task_queue="rust-queue" - ) - - print(f"✓ Workflow completed successfully: {workflow_id}") - - except Exception as e: - print(f"❌ Workflow execution failed: {e}") - return 1 - - # Step 5: Display results - print("\n[5/5] Results Summary:") - print("=" * 70) - - if result.get("status") == "success": - summary = result.get("summary", {}) - print(f"Total findings: {summary.get('total_findings', 0)}") - print(f"Files scanned: {summary.get('files_scanned', 0)}") - - # Display SARIF results URL if available - if result.get("results_url"): - print(f"Results URL: {result['results_url']}") - - # Show workflow steps - print("\nWorkflow steps:") - for step in result.get("steps", []): - status_icon = "✓" if step["status"] == "success" else "✗" - print(f" {status_icon} {step['step']}") - - print("\n" + "=" * 70) - print("✅ Security assessment workflow test PASSED") - print("=" * 70) - return 0 - else: - print(f"❌ Workflow failed: {result.get('error', 'Unknown error')}") - return 1 - - -if __name__ == "__main__": - try: - exit_code = asyncio.run(main()) - sys.exit(exit_code) - except KeyboardInterrupt: - print("\n\nTest interrupted by user") - sys.exit(1) - except Exception as e: - print(f"\n❌ Fatal error: {e}") - import traceback - traceback.print_exc() - sys.exit(1) diff --git a/test_temporal_workflow.py b/test_temporal_workflow.py deleted file mode 100644 index 85976dd..0000000 --- a/test_temporal_workflow.py +++ /dev/null @@ -1,105 +0,0 @@ -#!/usr/bin/env python3 -""" -Test script for Temporal workflow execution. - -This script: -1. Creates a test target file -2. Uploads it to MinIO -3. Executes the rust_test workflow -4. Prints the results -""" - -import asyncio -import uuid -from pathlib import Path - -import boto3 -from temporalio.client import Client - - -async def main(): - print("=" * 60) - print("Testing Temporal Workflow Execution") - print("=" * 60) - - # Step 1: Create a test target file - print("\n[1/4] Creating test target file...") - test_file = Path("/tmp/test_target.txt") - test_file.write_text("This is a test target file for FuzzForge Temporal architecture.") - print(f"✓ Created test file: {test_file} ({test_file.stat().st_size} bytes)") - - # Step 2: Upload to MinIO - print("\n[2/4] Uploading target to MinIO...") - s3_client = boto3.client( - 's3', - endpoint_url='http://localhost:9000', - aws_access_key_id='fuzzforge', - aws_secret_access_key='fuzzforge123', - region_name='us-east-1', - use_ssl=False - ) - - # Generate target ID - target_id = str(uuid.uuid4()) - s3_key = f'{target_id}/target' - - # Upload file - s3_client.upload_file( - str(test_file), - 'targets', - s3_key, - ExtraArgs={ - 'Metadata': { - 'test': 'true', - 'uploaded_by': 'test_script' - } - } - ) - print(f"✓ Uploaded to MinIO: s3://targets/{s3_key}") - print(f" Target ID: {target_id}") - - # Step 3: Execute workflow - print("\n[3/4] Connecting to Temporal...") - client = await Client.connect("localhost:7233") - print("✓ Connected to Temporal") - - print("\n[4/4] Starting workflow execution...") - workflow_id = f"test-workflow-{uuid.uuid4().hex[:8]}" - - # Start workflow - handle = await client.start_workflow( - "RustTestWorkflow", # Workflow name (class name) - args=[target_id], # Arguments: target_id - id=workflow_id, - task_queue="rust-queue", # Route to rust worker - ) - - print("✓ Workflow started!") - print(f" Workflow ID: {workflow_id}") - print(f" Run ID: {handle.first_execution_run_id}") - print(f"\n View in UI: http://localhost:8080/namespaces/default/workflows/{workflow_id}") - - print("\nWaiting for workflow to complete...") - result = await handle.result() - - print("\n" + "=" * 60) - print("✓ WORKFLOW COMPLETED SUCCESSFULLY!") - print("=" * 60) - print("\nResults:") - print(f" Status: {result.get('status')}") - print(f" Workflow ID: {result.get('workflow_id')}") - print(f" Target ID: {result.get('target_id')}") - print(f" Message: {result.get('message')}") - print(f" Results URL: {result.get('results_url')}") - - print("\nSteps executed:") - for i, step in enumerate(result.get('steps', []), 1): - print(f" {i}. {step.get('step')}: {step.get('status')}") - - print("\n" + "=" * 60) - print("Test completed successfully! 🎉") - print("=" * 60) - - -if __name__ == "__main__": - asyncio.run(main())