feat: Add secret detection workflows and comprehensive benchmarking (#15)

Add three production-ready secret detection workflows with full benchmarking infrastructure: **New Workflows:** - gitleaks_detection: Pattern-based secret scanning (13/32 benchmark secrets) - trufflehog_detection: Entropy-based detection with verification (1/32 benchmark secrets) - llm_secret_detection: AI-powered semantic analysis (32/32 benchmark secrets - 100% recall) **Benchmarking Infrastructure:** - Ground truth dataset with 32 documented secrets (12 Easy, 10 Medium, 10 Hard) - Automated comparison tools for precision/recall testing - SARIF output format for all workflows - Performance metrics and tool comparison reports **Fixes:** - Set gitleaks default to no_git=True for uploaded directories - Update documentation with correct secret counts and workflow names - Temporarily deactivate AI agent command - Clean up deprecated test files and GitGuardian workflow **Testing:** All workflows verified on secret_detection_benchmark and vulnerable_app test projects. Workers healthy and system fully functional.
2026-07-10 20:23:42 +02:00 · 2025-10-16 11:21:24 +02:00
parent c3ce03e216
commit 2da986ebb0
28 changed files with 2505 additions and 648 deletions
@@ -0,0 +1,19 @@
+"""
+Gitleaks Detection Workflow
+"""
+
+# Copyright (c) 2025 FuzzingLabs
+#
+# Licensed under the Business Source License 1.1 (BSL). See the LICENSE file
+# at the root of this repository for details.
+#
+# After the Change Date (four years from publication), this version of the
+# Licensed Work will be made available under the Apache License, Version 2.0.
+# See the LICENSE-APACHE file or http://www.apache.org/licenses/LICENSE-2.0
+#
+# Additional attribution and requirements are provided in the NOTICE file.
+
+from .workflow import GitleaksDetectionWorkflow
+from .activities import scan_with_gitleaks
+
+__all__ = ["GitleaksDetectionWorkflow", "scan_with_gitleaks"]
@@ -0,0 +1,166 @@
+"""
+Gitleaks Detection Workflow Activities
+"""
+
+# Copyright (c) 2025 FuzzingLabs
+#
+# Licensed under the Business Source License 1.1 (BSL). See the LICENSE file
+# at the root of this repository for details.
+#
+# After the Change Date (four years from publication), this version of the
+# Licensed Work will be made available under the Apache License, Version 2.0.
+# See the LICENSE-APACHE file or http://www.apache.org/licenses/LICENSE-2.0
+#
+# Additional attribution and requirements are provided in the NOTICE file.
+
+import logging
+from pathlib import Path
+from typing import Dict, Any
+
+from temporalio import activity
+
+try:
+    from toolbox.modules.secret_detection.gitleaks import GitleaksModule
+except ImportError:
+    try:
+        from modules.secret_detection.gitleaks import GitleaksModule
+    except ImportError:
+        from src.toolbox.modules.secret_detection.gitleaks import GitleaksModule
+
+logger = logging.getLogger(__name__)
+
+
+@activity.defn(name="scan_with_gitleaks")
+async def scan_with_gitleaks(target_path: str, config: Dict[str, Any]) -> Dict[str, Any]:
+    """
+    Scan code using Gitleaks.
+
+    Args:
+        target_path: Path to the workspace containing code
+        config: Gitleaks configuration
+
+    Returns:
+        Dictionary containing findings and summary
+    """
+    activity.logger.info(f"Starting Gitleaks scan: {target_path}")
+    activity.logger.info(f"Config: {config}")
+
+    workspace = Path(target_path)
+
+    if not workspace.exists():
+        raise FileNotFoundError(f"Workspace not found: {target_path}")
+
+    # Create and execute Gitleaks module
+    gitleaks = GitleaksModule()
+
+    # Validate configuration
+    gitleaks.validate_config(config)
+
+    # Execute scan
+    result = await gitleaks.execute(config, workspace)
+
+    if result.status == "failed":
+        raise RuntimeError(f"Gitleaks scan failed: {result.error or 'Unknown error'}")
+
+    activity.logger.info(
+        f"Gitleaks scan completed: {len(result.findings)} findings from "
+        f"{result.summary.get('files_scanned', 0)} files"
+    )
+
+    # Convert ModuleFinding objects to dicts for serialization
+    findings_dicts = [finding.model_dump() for finding in result.findings]
+
+    return {
+        "findings": findings_dicts,
+        "summary": result.summary
+    }
+
+
+@activity.defn(name="gitleaks_generate_sarif")
+async def gitleaks_generate_sarif(findings: list, metadata: Dict[str, Any]) -> Dict[str, Any]:
+    """
+    Generate SARIF report from Gitleaks findings.
+
+    Args:
+        findings: List of finding dictionaries
+        metadata: Metadata including tool_name, tool_version, run_id
+
+    Returns:
+        SARIF report dictionary
+    """
+    activity.logger.info(f"Generating SARIF report from {len(findings)} findings")
+
+    # Debug: Check if first finding has line_start
+    if findings:
+        first_finding = findings[0]
+        activity.logger.info(f"First finding keys: {list(first_finding.keys())}")
+        activity.logger.info(f"line_start value: {first_finding.get('line_start')}")
+
+    # Basic SARIF 2.1.0 structure
+    sarif_report = {
+        "version": "2.1.0",
+        "$schema": "https://raw.githubusercontent.com/oasis-tcs/sarif-spec/master/Schemata/sarif-schema-2.1.0.json",
+        "runs": [
+            {
+                "tool": {
+                    "driver": {
+                        "name": metadata.get("tool_name", "gitleaks"),
+                        "version": metadata.get("tool_version", "8.18.0"),
+                        "informationUri": "https://github.com/gitleaks/gitleaks"
+                    }
+                },
+                "results": []
+            }
+        ]
+    }
+
+    # Convert findings to SARIF results
+    for finding in findings:
+        sarif_result = {
+            "ruleId": finding.get("metadata", {}).get("rule_id", "unknown"),
+            "level": _severity_to_sarif_level(finding.get("severity", "warning")),
+            "message": {
+                "text": finding.get("title", "Secret leak detected")
+            },
+            "locations": []
+        }
+
+        # Add description if present
+        if finding.get("description"):
+            sarif_result["message"]["markdown"] = finding["description"]
+
+        # Add location if file path is present
+        if finding.get("file_path"):
+            location = {
+                "physicalLocation": {
+                    "artifactLocation": {
+                        "uri": finding["file_path"]
+                    }
+                }
+            }
+
+            # Add region if line number is present
+            if finding.get("line_start"):
+                location["physicalLocation"]["region"] = {
+                    "startLine": finding["line_start"]
+                }
+
+            sarif_result["locations"].append(location)
+
+        sarif_report["runs"][0]["results"].append(sarif_result)
+
+    activity.logger.info(f"Generated SARIF report with {len(sarif_report['runs'][0]['results'])} results")
+
+    return sarif_report
+
+
+def _severity_to_sarif_level(severity: str) -> str:
+    """Convert severity to SARIF level"""
+    severity_map = {
+        "critical": "error",
+        "high": "error",
+        "medium": "warning",
+        "low": "note",
+        "info": "note"
+    }
+    return severity_map.get(severity.lower(), "warning")
@@ -0,0 +1,42 @@
+name: gitleaks_detection
+version: "1.0.0"
+vertical: secrets
+description: "Detect secrets and credentials using Gitleaks"
+author: "FuzzForge Team"
+tags:
+  - "secrets"
+  - "gitleaks"
+  - "git"
+  - "leak-detection"
+
+workspace_isolation: "shared"
+
+parameters:
+  type: object
+  properties:
+    scan_mode:
+      type: string
+      enum: ["detect", "protect"]
+      default: "detect"
+      description: "Scan mode: detect (entire repo history) or protect (staged changes)"
+
+    redact:
+      type: boolean
+      default: true
+      description: "Redact secrets in output"
+
+    no_git:
+      type: boolean
+      default: false
+      description: "Scan files without Git context"
+
+default_parameters:
+  scan_mode: "detect"
+  redact: true
+  no_git: false
+
+required_modules:
+  - "gitleaks"
+
+supported_volume_modes:
+  - "ro"
@@ -0,0 +1,187 @@
+"""
+Gitleaks Detection Workflow - Temporal Version
+
+Scans code for secrets and credentials using Gitleaks.
+"""
+
+# Copyright (c) 2025 FuzzingLabs
+#
+# Licensed under the Business Source License 1.1 (BSL). See the LICENSE file
+# at the root of this repository for details.
+#
+# After the Change Date (four years from publication), this version of the
+# Licensed Work will be made available under the Apache License, Version 2.0.
+# See the LICENSE-APACHE file or http://www.apache.org/licenses/LICENSE-2.0
+#
+# Additional attribution and requirements are provided in the NOTICE file.
+
+from datetime import timedelta
+from typing import Dict, Any, Optional
+
+from temporalio import workflow
+from temporalio.common import RetryPolicy
+
+# Import for type hints (will be executed by worker)
+with workflow.unsafe.imports_passed_through():
+    import logging
+
+logger = logging.getLogger(__name__)
+
+
+@workflow.defn
+class GitleaksDetectionWorkflow:
+    """
+    Scan code for secrets using Gitleaks.
+
+    User workflow:
+    1. User runs: ff workflow run gitleaks_detection .
+    2. CLI uploads project to MinIO
+    3. Worker downloads project
+    4. Worker runs Gitleaks
+    5. Secrets reported as findings in SARIF format
+    """
+
+    @workflow.run
+    async def run(
+        self,
+        target_id: str,  # MinIO UUID of uploaded user code
+        scan_mode: str = "detect",
+        redact: bool = True,
+        no_git: bool = True
+    ) -> Dict[str, Any]:
+        """
+        Main workflow execution.
+
+        Args:
+            target_id: UUID of the uploaded target in MinIO
+            scan_mode: Scan mode ('detect' or 'protect')
+            redact: Redact secrets in output
+            no_git: Scan files without Git context
+
+        Returns:
+            Dictionary containing findings and summary
+        """
+        workflow_id = workflow.info().workflow_id
+
+        workflow.logger.info(
+            f"Starting GitleaksDetectionWorkflow "
+            f"(workflow_id={workflow_id}, target_id={target_id}, scan_mode={scan_mode})"
+        )
+
+        results = {
+            "workflow_id": workflow_id,
+            "target_id": target_id,
+            "status": "running",
+            "steps": [],
+            "findings": []
+        }
+
+        try:
+            # Get run ID for workspace isolation
+            run_id = workflow.info().run_id
+
+            # Step 1: Download user's project from MinIO
+            workflow.logger.info("Step 1: Downloading user code from MinIO")
+            target_path = await workflow.execute_activity(
+                "get_target",
+                args=[target_id, run_id, "shared"],
+                start_to_close_timeout=timedelta(minutes=5),
+                retry_policy=RetryPolicy(
+                    initial_interval=timedelta(seconds=1),
+                    maximum_interval=timedelta(seconds=30),
+                    maximum_attempts=3
+                )
+            )
+            results["steps"].append({
+                "step": "download",
+                "status": "success",
+                "target_path": target_path
+            })
+            workflow.logger.info(f"✓ Target downloaded to: {target_path}")
+
+            # Step 2: Run Gitleaks
+            workflow.logger.info("Step 2: Scanning with Gitleaks")
+
+            scan_config = {
+                "scan_mode": scan_mode,
+                "redact": redact,
+                "no_git": no_git
+            }
+
+            scan_results = await workflow.execute_activity(
+                "scan_with_gitleaks",
+                args=[target_path, scan_config],
+                start_to_close_timeout=timedelta(minutes=10),
+                retry_policy=RetryPolicy(
+                    initial_interval=timedelta(seconds=2),
+                    maximum_interval=timedelta(seconds=60),
+                    maximum_attempts=2
+                )
+            )
+
+            results["steps"].append({
+                "step": "gitleaks_scan",
+                "status": "success",
+                "leaks_found": scan_results.get("summary", {}).get("total_leaks", 0)
+            })
+            workflow.logger.info(
+                f"✓ Gitleaks scan completed: "
+                f"{scan_results.get('summary', {}).get('total_leaks', 0)} leaks found"
+            )
+
+            # Step 3: Generate SARIF report
+            workflow.logger.info("Step 3: Generating SARIF report")
+            sarif_report = await workflow.execute_activity(
+                "gitleaks_generate_sarif",
+                args=[scan_results.get("findings", []), {"tool_name": "gitleaks", "tool_version": "8.18.0"}],
+                start_to_close_timeout=timedelta(minutes=2)
+            )
+
+            # Step 4: Upload results to MinIO
+            workflow.logger.info("Step 4: Uploading results")
+            try:
+                results_url = await workflow.execute_activity(
+                    "upload_results",
+                    args=[workflow_id, scan_results, "json"],
+                    start_to_close_timeout=timedelta(minutes=2)
+                )
+                results["results_url"] = results_url
+                workflow.logger.info(f"✓ Results uploaded to: {results_url}")
+            except Exception as e:
+                workflow.logger.warning(f"Failed to upload results: {e}")
+                results["results_url"] = None
+
+            # Step 5: Cleanup cache
+            workflow.logger.info("Step 5: Cleaning up cache")
+            try:
+                await workflow.execute_activity(
+                    "cleanup_cache",
+                    args=[target_path, "shared"],
+                    start_to_close_timeout=timedelta(minutes=1)
+                )
+                workflow.logger.info("✓ Cache cleaned up")
+            except Exception as e:
+                workflow.logger.warning(f"Cache cleanup failed: {e}")
+
+            # Mark workflow as successful
+            results["status"] = "success"
+            results["findings"] = scan_results.get("findings", [])
+            results["summary"] = scan_results.get("summary", {})
+            results["sarif"] = sarif_report or {}
+            workflow.logger.info(
+                f"✓ Workflow completed successfully: {workflow_id} "
+                f"({results['summary'].get('total_leaks', 0)} leaks found)"
+            )
+
+            return results
+
+        except Exception as e:
+            workflow.logger.error(f"Workflow failed: {e}")
+            results["status"] = "error"
+            results["error"] = str(e)
+            results["steps"].append({
+                "step": "error",
+                "status": "failed",
+                "error": str(e)
+            })
+            raise
@@ -0,0 +1,6 @@
+"""LLM Secret Detection Workflow"""
+
+from .workflow import LlmSecretDetectionWorkflow
+from .activities import scan_with_llm
+
+__all__ = ["LlmSecretDetectionWorkflow", "scan_with_llm"]
@@ -0,0 +1,112 @@
+"""LLM Secret Detection Workflow Activities"""
+
+from pathlib import Path
+from typing import Dict, Any
+from temporalio import activity
+
+try:
+    from toolbox.modules.secret_detection.llm_secret_detector import LLMSecretDetectorModule
+except ImportError:
+    from modules.secret_detection.llm_secret_detector import LLMSecretDetectorModule
+
+@activity.defn(name="scan_with_llm")
+async def scan_with_llm(target_path: str, config: Dict[str, Any]) -> Dict[str, Any]:
+    """Scan code using LLM."""
+    activity.logger.info(f"Starting LLM secret detection: {target_path}")
+    workspace = Path(target_path)
+
+    llm_detector = LLMSecretDetectorModule()
+    llm_detector.validate_config(config)
+    result = await llm_detector.execute(config, workspace)
+
+    if result.status == "failed":
+        raise RuntimeError(f"LLM detection failed: {result.error}")
+
+    findings_dicts = [finding.model_dump() for finding in result.findings]
+    return {"findings": findings_dicts, "summary": result.summary}
+
+
+@activity.defn(name="llm_secret_generate_sarif")
+async def llm_secret_generate_sarif(findings: list, metadata: Dict[str, Any]) -> Dict[str, Any]:
+    """
+    Generate SARIF report from LLM secret detection findings.
+
+    Args:
+        findings: List of finding dictionaries from LLM secret detector
+        metadata: Metadata including tool_name, tool_version
+
+    Returns:
+        SARIF 2.1.0 report dictionary
+    """
+    activity.logger.info(f"Generating SARIF report from {len(findings)} findings")
+
+    # Basic SARIF 2.1.0 structure
+    sarif_report = {
+        "version": "2.1.0",
+        "$schema": "https://raw.githubusercontent.com/oasis-tcs/sarif-spec/master/Schemata/sarif-schema-2.1.0.json",
+        "runs": [
+            {
+                "tool": {
+                    "driver": {
+                        "name": metadata.get("tool_name", "llm-secret-detector"),
+                        "version": metadata.get("tool_version", "1.0.0"),
+                        "informationUri": "https://github.com/FuzzingLabs/fuzzforge_ai"
+                    }
+                },
+                "results": []
+            }
+        ]
+    }
+
+    # Convert findings to SARIF results
+    for finding in findings:
+        sarif_result = {
+            "ruleId": finding.get("id", finding.get("metadata", {}).get("secret_type", "unknown-secret")),
+            "level": _severity_to_sarif_level(finding.get("severity", "warning")),
+            "message": {
+                "text": finding.get("title", "Secret detected by LLM")
+            },
+            "locations": []
+        }
+
+        # Add description if present
+        if finding.get("description"):
+            sarif_result["message"]["markdown"] = finding["description"]
+
+        # Add location if file path is present
+        if finding.get("file_path"):
+            location = {
+                "physicalLocation": {
+                    "artifactLocation": {
+                        "uri": finding["file_path"]
+                    }
+                }
+            }
+
+            # Add region if line number is present
+            if finding.get("line_start"):
+                location["physicalLocation"]["region"] = {
+                    "startLine": finding["line_start"]
+                }
+                if finding.get("line_end"):
+                    location["physicalLocation"]["region"]["endLine"] = finding["line_end"]
+
+            sarif_result["locations"].append(location)
+
+        sarif_report["runs"][0]["results"].append(sarif_result)
+
+    activity.logger.info(f"Generated SARIF report with {len(sarif_report['runs'][0]['results'])} results")
+
+    return sarif_report
+
+
+def _severity_to_sarif_level(severity: str) -> str:
+    """Convert severity to SARIF level"""
+    severity_map = {
+        "critical": "error",
+        "high": "error",
+        "medium": "warning",
+        "low": "note",
+        "info": "note"
+    }
+    return severity_map.get(severity.lower(), "warning")
@@ -0,0 +1,43 @@
+name: llm_secret_detection
+version: "1.0.0"
+vertical: secrets
+description: "AI-powered secret detection using LLM semantic analysis"
+author: "FuzzForge Team"
+tags:
+  - "secrets"
+  - "llm"
+  - "ai"
+  - "semantic"
+
+workspace_isolation: "shared"
+
+parameters:
+  type: object
+  properties:
+    agent_url:
+      type: string
+      default: "http://fuzzforge-task-agent:8000/a2a/litellm_agent"
+    
+    llm_model:
+      type: string
+      default: "gpt-4o-mini"
+    
+    llm_provider:
+      type: string
+      default: "openai"
+    
+    max_files:
+      type: integer
+      default: 20
+
+default_parameters:
+  agent_url: "http://fuzzforge-task-agent:8000/a2a/litellm_agent"
+  llm_model: "gpt-4o-mini"
+  llm_provider: "openai"
+  max_files: 20
+
+required_modules:
+  - "llm_secret_detector"
+
+supported_volume_modes:
+  - "ro"
@@ -0,0 +1,156 @@
+"""LLM Secret Detection Workflow"""
+
+from datetime import timedelta
+from typing import Dict, Any, Optional
+from temporalio import workflow
+from temporalio.common import RetryPolicy
+
+@workflow.defn
+class LlmSecretDetectionWorkflow:
+    """Scan code for secrets using LLM AI."""
+
+    @workflow.run
+    async def run(
+        self,
+        target_id: str,
+        agent_url: Optional[str] = None,
+        llm_model: Optional[str] = None,
+        llm_provider: Optional[str] = None,
+        max_files: Optional[int] = None,
+        timeout: Optional[int] = None,
+        file_patterns: Optional[list] = None
+    ) -> Dict[str, Any]:
+        workflow_id = workflow.info().workflow_id
+        run_id = workflow.info().run_id
+
+        workflow.logger.info(
+            f"Starting LLM Secret Detection Workflow "
+            f"(workflow_id={workflow_id}, target_id={target_id}, model={llm_model})"
+        )
+
+        results = {
+            "workflow_id": workflow_id,
+            "target_id": target_id,
+            "status": "running",
+            "steps": [],
+            "findings": []
+        }
+
+        try:
+            # Step 1: Download target from MinIO
+            workflow.logger.info("Step 1: Downloading target from MinIO")
+            target_path = await workflow.execute_activity(
+                "get_target",
+                args=[target_id, run_id, "shared"],
+                start_to_close_timeout=timedelta(minutes=5),
+                retry_policy=RetryPolicy(
+                    initial_interval=timedelta(seconds=1),
+                    maximum_interval=timedelta(seconds=30),
+                    maximum_attempts=3
+                )
+            )
+            results["steps"].append({
+                "step": "download",
+                "status": "success",
+                "target_path": target_path
+            })
+            workflow.logger.info(f"✓ Target downloaded to: {target_path}")
+
+            # Step 2: Scan with LLM
+            workflow.logger.info("Step 2: Scanning with LLM")
+            config = {}
+            if agent_url:
+                config["agent_url"] = agent_url
+            if llm_model:
+                config["llm_model"] = llm_model
+            if llm_provider:
+                config["llm_provider"] = llm_provider
+            if max_files:
+                config["max_files"] = max_files
+            if timeout:
+                config["timeout"] = timeout
+            if file_patterns:
+                config["file_patterns"] = file_patterns
+
+            scan_results = await workflow.execute_activity(
+                "scan_with_llm",
+                args=[target_path, config],
+                start_to_close_timeout=timedelta(minutes=30),
+                retry_policy=RetryPolicy(
+                    initial_interval=timedelta(seconds=2),
+                    maximum_interval=timedelta(seconds=60),
+                    maximum_attempts=2
+                )
+            )
+
+            findings_count = len(scan_results.get("findings", []))
+            results["steps"].append({
+                "step": "llm_scan",
+                "status": "success",
+                "secrets_found": findings_count
+            })
+            workflow.logger.info(f"✓ LLM scan completed: {findings_count} secrets found")
+
+            # Step 3: Generate SARIF report
+            workflow.logger.info("Step 3: Generating SARIF report")
+            sarif_report = await workflow.execute_activity(
+                "llm_generate_sarif",  # Use shared LLM SARIF activity
+                args=[
+                    scan_results.get("findings", []),
+                    {
+                        "tool_name": f"llm-secret-detector ({llm_model or 'gpt-4o-mini'})",
+                        "tool_version": "1.0.0"
+                    }
+                ],
+                start_to_close_timeout=timedelta(minutes=2)
+            )
+            workflow.logger.info("✓ SARIF report generated")
+
+            # Step 4: Upload results to MinIO
+            workflow.logger.info("Step 4: Uploading results")
+            try:
+                results_url = await workflow.execute_activity(
+                    "upload_results",
+                    args=[workflow_id, scan_results, "json"],
+                    start_to_close_timeout=timedelta(minutes=2)
+                )
+                results["results_url"] = results_url
+                workflow.logger.info(f"✓ Results uploaded to: {results_url}")
+            except Exception as e:
+                workflow.logger.warning(f"Failed to upload results: {e}")
+                results["results_url"] = None
+
+            # Step 5: Cleanup cache
+            workflow.logger.info("Step 5: Cleaning up cache")
+            try:
+                await workflow.execute_activity(
+                    "cleanup_cache",
+                    args=[target_path, "shared"],
+                    start_to_close_timeout=timedelta(minutes=1)
+                )
+                workflow.logger.info("✓ Cache cleaned up")
+            except Exception as e:
+                workflow.logger.warning(f"Cache cleanup failed: {e}")
+
+            # Mark workflow as successful
+            results["status"] = "success"
+            results["findings"] = scan_results.get("findings", [])
+            results["summary"] = scan_results.get("summary", {})
+            results["sarif"] = sarif_report or {}
+            workflow.logger.info(
+                f"✓ Workflow completed successfully: {workflow_id} "
+                f"({findings_count} secrets found)"
+            )
+
+            return results
+
+        except Exception as e:
+            workflow.logger.error(f"Workflow failed: {e}")
+            results["status"] = "error"
+            results["error"] = str(e)
+            results["steps"].append({
+                "step": "error",
+                "status": "failed",
+                "error": str(e)
+            })
+            raise
@@ -0,0 +1,13 @@
+"""
+TruffleHog Detection Workflow
+"""
+
+# Copyright (c) 2025 FuzzingLabs
+#
+# Licensed under the Business Source License 1.1 (BSL). See the LICENSE file
+# at the root of this repository for details.
+
+from .workflow import TrufflehogDetectionWorkflow
+from .activities import scan_with_trufflehog, trufflehog_generate_sarif
+
+__all__ = ["TrufflehogDetectionWorkflow", "scan_with_trufflehog", "trufflehog_generate_sarif"]
@@ -0,0 +1,111 @@
+"""TruffleHog Detection Workflow Activities"""
+
+import logging
+from pathlib import Path
+from typing import Dict, Any
+from temporalio import activity
+
+try:
+    from toolbox.modules.secret_detection.trufflehog import TruffleHogModule
+except ImportError:
+    from modules.secret_detection.trufflehog import TruffleHogModule
+
+@activity.defn(name="scan_with_trufflehog")
+async def scan_with_trufflehog(target_path: str, config: Dict[str, Any]) -> Dict[str, Any]:
+    """Scan code using TruffleHog."""
+    activity.logger.info(f"Starting TruffleHog scan: {target_path}")
+    workspace = Path(target_path)
+
+    trufflehog = TruffleHogModule()
+    trufflehog.validate_config(config)
+    result = await trufflehog.execute(config, workspace)
+
+    if result.status == "failed":
+        raise RuntimeError(f"TruffleHog scan failed: {result.error}")
+
+    findings_dicts = [finding.model_dump() for finding in result.findings]
+    return {"findings": findings_dicts, "summary": result.summary}
+
+
+@activity.defn(name="trufflehog_generate_sarif")
+async def trufflehog_generate_sarif(findings: list, metadata: Dict[str, Any]) -> Dict[str, Any]:
+    """
+    Generate SARIF report from TruffleHog findings.
+
+    Args:
+        findings: List of finding dictionaries
+        metadata: Metadata including tool_name, tool_version
+
+    Returns:
+        SARIF report dictionary
+    """
+    activity.logger.info(f"Generating SARIF report from {len(findings)} findings")
+
+    # Basic SARIF 2.1.0 structure
+    sarif_report = {
+        "version": "2.1.0",
+        "$schema": "https://raw.githubusercontent.com/oasis-tcs/sarif-spec/master/Schemata/sarif-schema-2.1.0.json",
+        "runs": [
+            {
+                "tool": {
+                    "driver": {
+                        "name": metadata.get("tool_name", "trufflehog"),
+                        "version": metadata.get("tool_version", "3.63.2"),
+                        "informationUri": "https://github.com/trufflesecurity/trufflehog"
+                    }
+                },
+                "results": []
+            }
+        ]
+    }
+
+    # Convert findings to SARIF results
+    for finding in findings:
+        sarif_result = {
+            "ruleId": finding.get("metadata", {}).get("detector", "unknown"),
+            "level": _severity_to_sarif_level(finding.get("severity", "warning")),
+            "message": {
+                "text": finding.get("title", "Secret detected")
+            },
+            "locations": []
+        }
+
+        # Add description if present
+        if finding.get("description"):
+            sarif_result["message"]["markdown"] = finding["description"]
+
+        # Add location if file path is present
+        if finding.get("file_path"):
+            location = {
+                "physicalLocation": {
+                    "artifactLocation": {
+                        "uri": finding["file_path"]
+                    }
+                }
+            }
+
+            # Add region if line number is present
+            if finding.get("line_start"):
+                location["physicalLocation"]["region"] = {
+                    "startLine": finding["line_start"]
+                }
+
+            sarif_result["locations"].append(location)
+
+        sarif_report["runs"][0]["results"].append(sarif_result)
+
+    activity.logger.info(f"Generated SARIF report with {len(sarif_report['runs'][0]['results'])} results")
+
+    return sarif_report
+
+
+def _severity_to_sarif_level(severity: str) -> str:
+    """Convert severity to SARIF level"""
+    severity_map = {
+        "critical": "error",
+        "high": "error",
+        "medium": "warning",
+        "low": "note",
+        "info": "note"
+    }
+    return severity_map.get(severity.lower(), "warning")
@@ -0,0 +1,34 @@
+name: trufflehog_detection
+version: "1.0.0"
+vertical: secrets
+description: "Detect secrets with verification using TruffleHog"
+author: "FuzzForge Team"
+tags:
+  - "secrets"
+  - "trufflehog"
+  - "verification"
+
+workspace_isolation: "shared"
+
+parameters:
+  type: object
+  properties:
+    verify:
+      type: boolean
+      default: true
+      description: "Verify discovered secrets"
+
+    max_depth:
+      type: integer
+      default: 10
+      description: "Maximum directory depth to scan"
+
+default_parameters:
+  verify: true
+  max_depth: 10
+
+required_modules:
+  - "trufflehog"
+
+supported_volume_modes:
+  - "ro"
@@ -0,0 +1,104 @@
+"""TruffleHog Detection Workflow"""
+
+from datetime import timedelta
+from typing import Dict, Any
+from temporalio import workflow
+from temporalio.common import RetryPolicy
+
+@workflow.defn
+class TrufflehogDetectionWorkflow:
+    """Scan code for secrets using TruffleHog."""
+
+    @workflow.run
+    async def run(self, target_id: str, verify: bool = False, concurrency: int = 10) -> Dict[str, Any]:
+        workflow_id = workflow.info().workflow_id
+        run_id = workflow.info().run_id
+
+        workflow.logger.info(
+            f"Starting TrufflehogDetectionWorkflow "
+            f"(workflow_id={workflow_id}, target_id={target_id}, verify={verify})"
+        )
+
+        results = {"workflow_id": workflow_id, "status": "running", "findings": []}
+
+        try:
+            # Step 1: Download target
+            workflow.logger.info("Step 1: Downloading target from MinIO")
+            target_path = await workflow.execute_activity(
+                "get_target", args=[target_id, run_id, "shared"],
+                start_to_close_timeout=timedelta(minutes=5),
+                retry_policy=RetryPolicy(
+                    initial_interval=timedelta(seconds=1),
+                    maximum_interval=timedelta(seconds=30),
+                    maximum_attempts=3
+                )
+            )
+            workflow.logger.info(f"✓ Target downloaded to: {target_path}")
+
+            # Step 2: Scan with TruffleHog
+            workflow.logger.info("Step 2: Scanning with TruffleHog")
+            scan_results = await workflow.execute_activity(
+                "scan_with_trufflehog",
+                args=[target_path, {"verify": verify, "concurrency": concurrency}],
+                start_to_close_timeout=timedelta(minutes=15),
+                retry_policy=RetryPolicy(
+                    initial_interval=timedelta(seconds=2),
+                    maximum_interval=timedelta(seconds=60),
+                    maximum_attempts=2
+                )
+            )
+            workflow.logger.info(
+                f"✓ TruffleHog scan completed: "
+                f"{scan_results.get('summary', {}).get('total_secrets', 0)} secrets found"
+            )
+
+            # Step 3: Generate SARIF report
+            workflow.logger.info("Step 3: Generating SARIF report")
+            sarif_report = await workflow.execute_activity(
+                "trufflehog_generate_sarif",
+                args=[scan_results.get("findings", []), {"tool_name": "trufflehog", "tool_version": "3.63.2"}],
+                start_to_close_timeout=timedelta(minutes=2)
+            )
+
+            # Step 4: Upload results to MinIO
+            workflow.logger.info("Step 4: Uploading results")
+            try:
+                results_url = await workflow.execute_activity(
+                    "upload_results",
+                    args=[workflow_id, scan_results, "json"],
+                    start_to_close_timeout=timedelta(minutes=2)
+                )
+                results["results_url"] = results_url
+                workflow.logger.info(f"✓ Results uploaded to: {results_url}")
+            except Exception as e:
+                workflow.logger.warning(f"Failed to upload results: {e}")
+                results["results_url"] = None
+
+            # Step 5: Cleanup
+            workflow.logger.info("Step 5: Cleaning up cache")
+            try:
+                await workflow.execute_activity(
+                    "cleanup_cache", args=[target_path, "shared"],
+                    start_to_close_timeout=timedelta(minutes=1)
+                )
+                workflow.logger.info("✓ Cache cleaned up")
+            except Exception as e:
+                workflow.logger.warning(f"Cache cleanup failed: {e}")
+
+            # Mark workflow as successful
+            results["status"] = "success"
+            results["findings"] = scan_results.get("findings", [])
+            results["summary"] = scan_results.get("summary", {})
+            results["sarif"] = sarif_report or {}
+            workflow.logger.info(
+                f"✓ Workflow completed successfully: {workflow_id} "
+                f"({results['summary'].get('total_secrets', 0)} secrets found)"
+            )
+
+            return results
+
+        except Exception as e:
+            workflow.logger.error(f"Workflow failed: {e}")
+            results["status"] = "error"
+            results["error"] = str(e)
+            raise