mirror of
https://github.com/FuzzingLabs/fuzzforge_ai.git
synced 2026-05-24 10:14:01 +02:00
feat: Add secret detection workflows and comprehensive benchmarking (#15)
Add three production-ready secret detection workflows with full benchmarking infrastructure: **New Workflows:** - gitleaks_detection: Pattern-based secret scanning (13/32 benchmark secrets) - trufflehog_detection: Entropy-based detection with verification (1/32 benchmark secrets) - llm_secret_detection: AI-powered semantic analysis (32/32 benchmark secrets - 100% recall) **Benchmarking Infrastructure:** - Ground truth dataset with 32 documented secrets (12 Easy, 10 Medium, 10 Hard) - Automated comparison tools for precision/recall testing - SARIF output format for all workflows - Performance metrics and tool comparison reports **Fixes:** - Set gitleaks default to no_git=True for uploaded directories - Update documentation with correct secret counts and workflow names - Temporarily deactivate AI agent command - Clean up deprecated test files and GitGuardian workflow **Testing:** All workflows verified on secret_detection_benchmark and vulnerable_app test projects. Workers healthy and system fully functional.
This commit is contained in:
@@ -0,0 +1,19 @@
|
||||
"""
|
||||
Gitleaks Detection Workflow
|
||||
"""
|
||||
|
||||
# Copyright (c) 2025 FuzzingLabs
|
||||
#
|
||||
# Licensed under the Business Source License 1.1 (BSL). See the LICENSE file
|
||||
# at the root of this repository for details.
|
||||
#
|
||||
# After the Change Date (four years from publication), this version of the
|
||||
# Licensed Work will be made available under the Apache License, Version 2.0.
|
||||
# See the LICENSE-APACHE file or http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Additional attribution and requirements are provided in the NOTICE file.
|
||||
|
||||
from .workflow import GitleaksDetectionWorkflow
|
||||
from .activities import scan_with_gitleaks
|
||||
|
||||
__all__ = ["GitleaksDetectionWorkflow", "scan_with_gitleaks"]
|
||||
@@ -0,0 +1,166 @@
|
||||
"""
|
||||
Gitleaks Detection Workflow Activities
|
||||
"""
|
||||
|
||||
# Copyright (c) 2025 FuzzingLabs
|
||||
#
|
||||
# Licensed under the Business Source License 1.1 (BSL). See the LICENSE file
|
||||
# at the root of this repository for details.
|
||||
#
|
||||
# After the Change Date (four years from publication), this version of the
|
||||
# Licensed Work will be made available under the Apache License, Version 2.0.
|
||||
# See the LICENSE-APACHE file or http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Additional attribution and requirements are provided in the NOTICE file.
|
||||
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from typing import Dict, Any
|
||||
|
||||
from temporalio import activity
|
||||
|
||||
try:
|
||||
from toolbox.modules.secret_detection.gitleaks import GitleaksModule
|
||||
except ImportError:
|
||||
try:
|
||||
from modules.secret_detection.gitleaks import GitleaksModule
|
||||
except ImportError:
|
||||
from src.toolbox.modules.secret_detection.gitleaks import GitleaksModule
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@activity.defn(name="scan_with_gitleaks")
|
||||
async def scan_with_gitleaks(target_path: str, config: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""
|
||||
Scan code using Gitleaks.
|
||||
|
||||
Args:
|
||||
target_path: Path to the workspace containing code
|
||||
config: Gitleaks configuration
|
||||
|
||||
Returns:
|
||||
Dictionary containing findings and summary
|
||||
"""
|
||||
activity.logger.info(f"Starting Gitleaks scan: {target_path}")
|
||||
activity.logger.info(f"Config: {config}")
|
||||
|
||||
workspace = Path(target_path)
|
||||
|
||||
if not workspace.exists():
|
||||
raise FileNotFoundError(f"Workspace not found: {target_path}")
|
||||
|
||||
# Create and execute Gitleaks module
|
||||
gitleaks = GitleaksModule()
|
||||
|
||||
# Validate configuration
|
||||
gitleaks.validate_config(config)
|
||||
|
||||
# Execute scan
|
||||
result = await gitleaks.execute(config, workspace)
|
||||
|
||||
if result.status == "failed":
|
||||
raise RuntimeError(f"Gitleaks scan failed: {result.error or 'Unknown error'}")
|
||||
|
||||
activity.logger.info(
|
||||
f"Gitleaks scan completed: {len(result.findings)} findings from "
|
||||
f"{result.summary.get('files_scanned', 0)} files"
|
||||
)
|
||||
|
||||
# Convert ModuleFinding objects to dicts for serialization
|
||||
findings_dicts = [finding.model_dump() for finding in result.findings]
|
||||
|
||||
return {
|
||||
"findings": findings_dicts,
|
||||
"summary": result.summary
|
||||
}
|
||||
|
||||
|
||||
@activity.defn(name="gitleaks_generate_sarif")
|
||||
async def gitleaks_generate_sarif(findings: list, metadata: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""
|
||||
Generate SARIF report from Gitleaks findings.
|
||||
|
||||
Args:
|
||||
findings: List of finding dictionaries
|
||||
metadata: Metadata including tool_name, tool_version, run_id
|
||||
|
||||
Returns:
|
||||
SARIF report dictionary
|
||||
"""
|
||||
activity.logger.info(f"Generating SARIF report from {len(findings)} findings")
|
||||
|
||||
# Debug: Check if first finding has line_start
|
||||
if findings:
|
||||
first_finding = findings[0]
|
||||
activity.logger.info(f"First finding keys: {list(first_finding.keys())}")
|
||||
activity.logger.info(f"line_start value: {first_finding.get('line_start')}")
|
||||
|
||||
# Basic SARIF 2.1.0 structure
|
||||
sarif_report = {
|
||||
"version": "2.1.0",
|
||||
"$schema": "https://raw.githubusercontent.com/oasis-tcs/sarif-spec/master/Schemata/sarif-schema-2.1.0.json",
|
||||
"runs": [
|
||||
{
|
||||
"tool": {
|
||||
"driver": {
|
||||
"name": metadata.get("tool_name", "gitleaks"),
|
||||
"version": metadata.get("tool_version", "8.18.0"),
|
||||
"informationUri": "https://github.com/gitleaks/gitleaks"
|
||||
}
|
||||
},
|
||||
"results": []
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
# Convert findings to SARIF results
|
||||
for finding in findings:
|
||||
sarif_result = {
|
||||
"ruleId": finding.get("metadata", {}).get("rule_id", "unknown"),
|
||||
"level": _severity_to_sarif_level(finding.get("severity", "warning")),
|
||||
"message": {
|
||||
"text": finding.get("title", "Secret leak detected")
|
||||
},
|
||||
"locations": []
|
||||
}
|
||||
|
||||
# Add description if present
|
||||
if finding.get("description"):
|
||||
sarif_result["message"]["markdown"] = finding["description"]
|
||||
|
||||
# Add location if file path is present
|
||||
if finding.get("file_path"):
|
||||
location = {
|
||||
"physicalLocation": {
|
||||
"artifactLocation": {
|
||||
"uri": finding["file_path"]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Add region if line number is present
|
||||
if finding.get("line_start"):
|
||||
location["physicalLocation"]["region"] = {
|
||||
"startLine": finding["line_start"]
|
||||
}
|
||||
|
||||
sarif_result["locations"].append(location)
|
||||
|
||||
sarif_report["runs"][0]["results"].append(sarif_result)
|
||||
|
||||
activity.logger.info(f"Generated SARIF report with {len(sarif_report['runs'][0]['results'])} results")
|
||||
|
||||
return sarif_report
|
||||
|
||||
|
||||
def _severity_to_sarif_level(severity: str) -> str:
|
||||
"""Convert severity to SARIF level"""
|
||||
severity_map = {
|
||||
"critical": "error",
|
||||
"high": "error",
|
||||
"medium": "warning",
|
||||
"low": "note",
|
||||
"info": "note"
|
||||
}
|
||||
return severity_map.get(severity.lower(), "warning")
|
||||
@@ -0,0 +1,42 @@
|
||||
name: gitleaks_detection
|
||||
version: "1.0.0"
|
||||
vertical: secrets
|
||||
description: "Detect secrets and credentials using Gitleaks"
|
||||
author: "FuzzForge Team"
|
||||
tags:
|
||||
- "secrets"
|
||||
- "gitleaks"
|
||||
- "git"
|
||||
- "leak-detection"
|
||||
|
||||
workspace_isolation: "shared"
|
||||
|
||||
parameters:
|
||||
type: object
|
||||
properties:
|
||||
scan_mode:
|
||||
type: string
|
||||
enum: ["detect", "protect"]
|
||||
default: "detect"
|
||||
description: "Scan mode: detect (entire repo history) or protect (staged changes)"
|
||||
|
||||
redact:
|
||||
type: boolean
|
||||
default: true
|
||||
description: "Redact secrets in output"
|
||||
|
||||
no_git:
|
||||
type: boolean
|
||||
default: false
|
||||
description: "Scan files without Git context"
|
||||
|
||||
default_parameters:
|
||||
scan_mode: "detect"
|
||||
redact: true
|
||||
no_git: false
|
||||
|
||||
required_modules:
|
||||
- "gitleaks"
|
||||
|
||||
supported_volume_modes:
|
||||
- "ro"
|
||||
@@ -0,0 +1,187 @@
|
||||
"""
|
||||
Gitleaks Detection Workflow - Temporal Version
|
||||
|
||||
Scans code for secrets and credentials using Gitleaks.
|
||||
"""
|
||||
|
||||
# Copyright (c) 2025 FuzzingLabs
|
||||
#
|
||||
# Licensed under the Business Source License 1.1 (BSL). See the LICENSE file
|
||||
# at the root of this repository for details.
|
||||
#
|
||||
# After the Change Date (four years from publication), this version of the
|
||||
# Licensed Work will be made available under the Apache License, Version 2.0.
|
||||
# See the LICENSE-APACHE file or http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Additional attribution and requirements are provided in the NOTICE file.
|
||||
|
||||
from datetime import timedelta
|
||||
from typing import Dict, Any, Optional
|
||||
|
||||
from temporalio import workflow
|
||||
from temporalio.common import RetryPolicy
|
||||
|
||||
# Import for type hints (will be executed by worker)
|
||||
with workflow.unsafe.imports_passed_through():
|
||||
import logging
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@workflow.defn
|
||||
class GitleaksDetectionWorkflow:
|
||||
"""
|
||||
Scan code for secrets using Gitleaks.
|
||||
|
||||
User workflow:
|
||||
1. User runs: ff workflow run gitleaks_detection .
|
||||
2. CLI uploads project to MinIO
|
||||
3. Worker downloads project
|
||||
4. Worker runs Gitleaks
|
||||
5. Secrets reported as findings in SARIF format
|
||||
"""
|
||||
|
||||
@workflow.run
|
||||
async def run(
|
||||
self,
|
||||
target_id: str, # MinIO UUID of uploaded user code
|
||||
scan_mode: str = "detect",
|
||||
redact: bool = True,
|
||||
no_git: bool = True
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Main workflow execution.
|
||||
|
||||
Args:
|
||||
target_id: UUID of the uploaded target in MinIO
|
||||
scan_mode: Scan mode ('detect' or 'protect')
|
||||
redact: Redact secrets in output
|
||||
no_git: Scan files without Git context
|
||||
|
||||
Returns:
|
||||
Dictionary containing findings and summary
|
||||
"""
|
||||
workflow_id = workflow.info().workflow_id
|
||||
|
||||
workflow.logger.info(
|
||||
f"Starting GitleaksDetectionWorkflow "
|
||||
f"(workflow_id={workflow_id}, target_id={target_id}, scan_mode={scan_mode})"
|
||||
)
|
||||
|
||||
results = {
|
||||
"workflow_id": workflow_id,
|
||||
"target_id": target_id,
|
||||
"status": "running",
|
||||
"steps": [],
|
||||
"findings": []
|
||||
}
|
||||
|
||||
try:
|
||||
# Get run ID for workspace isolation
|
||||
run_id = workflow.info().run_id
|
||||
|
||||
# Step 1: Download user's project from MinIO
|
||||
workflow.logger.info("Step 1: Downloading user code from MinIO")
|
||||
target_path = await workflow.execute_activity(
|
||||
"get_target",
|
||||
args=[target_id, run_id, "shared"],
|
||||
start_to_close_timeout=timedelta(minutes=5),
|
||||
retry_policy=RetryPolicy(
|
||||
initial_interval=timedelta(seconds=1),
|
||||
maximum_interval=timedelta(seconds=30),
|
||||
maximum_attempts=3
|
||||
)
|
||||
)
|
||||
results["steps"].append({
|
||||
"step": "download",
|
||||
"status": "success",
|
||||
"target_path": target_path
|
||||
})
|
||||
workflow.logger.info(f"✓ Target downloaded to: {target_path}")
|
||||
|
||||
# Step 2: Run Gitleaks
|
||||
workflow.logger.info("Step 2: Scanning with Gitleaks")
|
||||
|
||||
scan_config = {
|
||||
"scan_mode": scan_mode,
|
||||
"redact": redact,
|
||||
"no_git": no_git
|
||||
}
|
||||
|
||||
scan_results = await workflow.execute_activity(
|
||||
"scan_with_gitleaks",
|
||||
args=[target_path, scan_config],
|
||||
start_to_close_timeout=timedelta(minutes=10),
|
||||
retry_policy=RetryPolicy(
|
||||
initial_interval=timedelta(seconds=2),
|
||||
maximum_interval=timedelta(seconds=60),
|
||||
maximum_attempts=2
|
||||
)
|
||||
)
|
||||
|
||||
results["steps"].append({
|
||||
"step": "gitleaks_scan",
|
||||
"status": "success",
|
||||
"leaks_found": scan_results.get("summary", {}).get("total_leaks", 0)
|
||||
})
|
||||
workflow.logger.info(
|
||||
f"✓ Gitleaks scan completed: "
|
||||
f"{scan_results.get('summary', {}).get('total_leaks', 0)} leaks found"
|
||||
)
|
||||
|
||||
# Step 3: Generate SARIF report
|
||||
workflow.logger.info("Step 3: Generating SARIF report")
|
||||
sarif_report = await workflow.execute_activity(
|
||||
"gitleaks_generate_sarif",
|
||||
args=[scan_results.get("findings", []), {"tool_name": "gitleaks", "tool_version": "8.18.0"}],
|
||||
start_to_close_timeout=timedelta(minutes=2)
|
||||
)
|
||||
|
||||
# Step 4: Upload results to MinIO
|
||||
workflow.logger.info("Step 4: Uploading results")
|
||||
try:
|
||||
results_url = await workflow.execute_activity(
|
||||
"upload_results",
|
||||
args=[workflow_id, scan_results, "json"],
|
||||
start_to_close_timeout=timedelta(minutes=2)
|
||||
)
|
||||
results["results_url"] = results_url
|
||||
workflow.logger.info(f"✓ Results uploaded to: {results_url}")
|
||||
except Exception as e:
|
||||
workflow.logger.warning(f"Failed to upload results: {e}")
|
||||
results["results_url"] = None
|
||||
|
||||
# Step 5: Cleanup cache
|
||||
workflow.logger.info("Step 5: Cleaning up cache")
|
||||
try:
|
||||
await workflow.execute_activity(
|
||||
"cleanup_cache",
|
||||
args=[target_path, "shared"],
|
||||
start_to_close_timeout=timedelta(minutes=1)
|
||||
)
|
||||
workflow.logger.info("✓ Cache cleaned up")
|
||||
except Exception as e:
|
||||
workflow.logger.warning(f"Cache cleanup failed: {e}")
|
||||
|
||||
# Mark workflow as successful
|
||||
results["status"] = "success"
|
||||
results["findings"] = scan_results.get("findings", [])
|
||||
results["summary"] = scan_results.get("summary", {})
|
||||
results["sarif"] = sarif_report or {}
|
||||
workflow.logger.info(
|
||||
f"✓ Workflow completed successfully: {workflow_id} "
|
||||
f"({results['summary'].get('total_leaks', 0)} leaks found)"
|
||||
)
|
||||
|
||||
return results
|
||||
|
||||
except Exception as e:
|
||||
workflow.logger.error(f"Workflow failed: {e}")
|
||||
results["status"] = "error"
|
||||
results["error"] = str(e)
|
||||
results["steps"].append({
|
||||
"step": "error",
|
||||
"status": "failed",
|
||||
"error": str(e)
|
||||
})
|
||||
raise
|
||||
@@ -0,0 +1,6 @@
|
||||
"""LLM Secret Detection Workflow"""
|
||||
|
||||
from .workflow import LlmSecretDetectionWorkflow
|
||||
from .activities import scan_with_llm
|
||||
|
||||
__all__ = ["LlmSecretDetectionWorkflow", "scan_with_llm"]
|
||||
@@ -0,0 +1,112 @@
|
||||
"""LLM Secret Detection Workflow Activities"""
|
||||
|
||||
from pathlib import Path
|
||||
from typing import Dict, Any
|
||||
from temporalio import activity
|
||||
|
||||
try:
|
||||
from toolbox.modules.secret_detection.llm_secret_detector import LLMSecretDetectorModule
|
||||
except ImportError:
|
||||
from modules.secret_detection.llm_secret_detector import LLMSecretDetectorModule
|
||||
|
||||
@activity.defn(name="scan_with_llm")
|
||||
async def scan_with_llm(target_path: str, config: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Scan code using LLM."""
|
||||
activity.logger.info(f"Starting LLM secret detection: {target_path}")
|
||||
workspace = Path(target_path)
|
||||
|
||||
llm_detector = LLMSecretDetectorModule()
|
||||
llm_detector.validate_config(config)
|
||||
result = await llm_detector.execute(config, workspace)
|
||||
|
||||
if result.status == "failed":
|
||||
raise RuntimeError(f"LLM detection failed: {result.error}")
|
||||
|
||||
findings_dicts = [finding.model_dump() for finding in result.findings]
|
||||
return {"findings": findings_dicts, "summary": result.summary}
|
||||
|
||||
|
||||
@activity.defn(name="llm_secret_generate_sarif")
|
||||
async def llm_secret_generate_sarif(findings: list, metadata: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""
|
||||
Generate SARIF report from LLM secret detection findings.
|
||||
|
||||
Args:
|
||||
findings: List of finding dictionaries from LLM secret detector
|
||||
metadata: Metadata including tool_name, tool_version
|
||||
|
||||
Returns:
|
||||
SARIF 2.1.0 report dictionary
|
||||
"""
|
||||
activity.logger.info(f"Generating SARIF report from {len(findings)} findings")
|
||||
|
||||
# Basic SARIF 2.1.0 structure
|
||||
sarif_report = {
|
||||
"version": "2.1.0",
|
||||
"$schema": "https://raw.githubusercontent.com/oasis-tcs/sarif-spec/master/Schemata/sarif-schema-2.1.0.json",
|
||||
"runs": [
|
||||
{
|
||||
"tool": {
|
||||
"driver": {
|
||||
"name": metadata.get("tool_name", "llm-secret-detector"),
|
||||
"version": metadata.get("tool_version", "1.0.0"),
|
||||
"informationUri": "https://github.com/FuzzingLabs/fuzzforge_ai"
|
||||
}
|
||||
},
|
||||
"results": []
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
# Convert findings to SARIF results
|
||||
for finding in findings:
|
||||
sarif_result = {
|
||||
"ruleId": finding.get("id", finding.get("metadata", {}).get("secret_type", "unknown-secret")),
|
||||
"level": _severity_to_sarif_level(finding.get("severity", "warning")),
|
||||
"message": {
|
||||
"text": finding.get("title", "Secret detected by LLM")
|
||||
},
|
||||
"locations": []
|
||||
}
|
||||
|
||||
# Add description if present
|
||||
if finding.get("description"):
|
||||
sarif_result["message"]["markdown"] = finding["description"]
|
||||
|
||||
# Add location if file path is present
|
||||
if finding.get("file_path"):
|
||||
location = {
|
||||
"physicalLocation": {
|
||||
"artifactLocation": {
|
||||
"uri": finding["file_path"]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Add region if line number is present
|
||||
if finding.get("line_start"):
|
||||
location["physicalLocation"]["region"] = {
|
||||
"startLine": finding["line_start"]
|
||||
}
|
||||
if finding.get("line_end"):
|
||||
location["physicalLocation"]["region"]["endLine"] = finding["line_end"]
|
||||
|
||||
sarif_result["locations"].append(location)
|
||||
|
||||
sarif_report["runs"][0]["results"].append(sarif_result)
|
||||
|
||||
activity.logger.info(f"Generated SARIF report with {len(sarif_report['runs'][0]['results'])} results")
|
||||
|
||||
return sarif_report
|
||||
|
||||
|
||||
def _severity_to_sarif_level(severity: str) -> str:
|
||||
"""Convert severity to SARIF level"""
|
||||
severity_map = {
|
||||
"critical": "error",
|
||||
"high": "error",
|
||||
"medium": "warning",
|
||||
"low": "note",
|
||||
"info": "note"
|
||||
}
|
||||
return severity_map.get(severity.lower(), "warning")
|
||||
@@ -0,0 +1,43 @@
|
||||
name: llm_secret_detection
|
||||
version: "1.0.0"
|
||||
vertical: secrets
|
||||
description: "AI-powered secret detection using LLM semantic analysis"
|
||||
author: "FuzzForge Team"
|
||||
tags:
|
||||
- "secrets"
|
||||
- "llm"
|
||||
- "ai"
|
||||
- "semantic"
|
||||
|
||||
workspace_isolation: "shared"
|
||||
|
||||
parameters:
|
||||
type: object
|
||||
properties:
|
||||
agent_url:
|
||||
type: string
|
||||
default: "http://fuzzforge-task-agent:8000/a2a/litellm_agent"
|
||||
|
||||
llm_model:
|
||||
type: string
|
||||
default: "gpt-4o-mini"
|
||||
|
||||
llm_provider:
|
||||
type: string
|
||||
default: "openai"
|
||||
|
||||
max_files:
|
||||
type: integer
|
||||
default: 20
|
||||
|
||||
default_parameters:
|
||||
agent_url: "http://fuzzforge-task-agent:8000/a2a/litellm_agent"
|
||||
llm_model: "gpt-4o-mini"
|
||||
llm_provider: "openai"
|
||||
max_files: 20
|
||||
|
||||
required_modules:
|
||||
- "llm_secret_detector"
|
||||
|
||||
supported_volume_modes:
|
||||
- "ro"
|
||||
@@ -0,0 +1,156 @@
|
||||
"""LLM Secret Detection Workflow"""
|
||||
|
||||
from datetime import timedelta
|
||||
from typing import Dict, Any, Optional
|
||||
from temporalio import workflow
|
||||
from temporalio.common import RetryPolicy
|
||||
|
||||
@workflow.defn
|
||||
class LlmSecretDetectionWorkflow:
|
||||
"""Scan code for secrets using LLM AI."""
|
||||
|
||||
@workflow.run
|
||||
async def run(
|
||||
self,
|
||||
target_id: str,
|
||||
agent_url: Optional[str] = None,
|
||||
llm_model: Optional[str] = None,
|
||||
llm_provider: Optional[str] = None,
|
||||
max_files: Optional[int] = None,
|
||||
timeout: Optional[int] = None,
|
||||
file_patterns: Optional[list] = None
|
||||
) -> Dict[str, Any]:
|
||||
workflow_id = workflow.info().workflow_id
|
||||
run_id = workflow.info().run_id
|
||||
|
||||
workflow.logger.info(
|
||||
f"Starting LLM Secret Detection Workflow "
|
||||
f"(workflow_id={workflow_id}, target_id={target_id}, model={llm_model})"
|
||||
)
|
||||
|
||||
results = {
|
||||
"workflow_id": workflow_id,
|
||||
"target_id": target_id,
|
||||
"status": "running",
|
||||
"steps": [],
|
||||
"findings": []
|
||||
}
|
||||
|
||||
try:
|
||||
# Step 1: Download target from MinIO
|
||||
workflow.logger.info("Step 1: Downloading target from MinIO")
|
||||
target_path = await workflow.execute_activity(
|
||||
"get_target",
|
||||
args=[target_id, run_id, "shared"],
|
||||
start_to_close_timeout=timedelta(minutes=5),
|
||||
retry_policy=RetryPolicy(
|
||||
initial_interval=timedelta(seconds=1),
|
||||
maximum_interval=timedelta(seconds=30),
|
||||
maximum_attempts=3
|
||||
)
|
||||
)
|
||||
results["steps"].append({
|
||||
"step": "download",
|
||||
"status": "success",
|
||||
"target_path": target_path
|
||||
})
|
||||
workflow.logger.info(f"✓ Target downloaded to: {target_path}")
|
||||
|
||||
# Step 2: Scan with LLM
|
||||
workflow.logger.info("Step 2: Scanning with LLM")
|
||||
config = {}
|
||||
if agent_url:
|
||||
config["agent_url"] = agent_url
|
||||
if llm_model:
|
||||
config["llm_model"] = llm_model
|
||||
if llm_provider:
|
||||
config["llm_provider"] = llm_provider
|
||||
if max_files:
|
||||
config["max_files"] = max_files
|
||||
if timeout:
|
||||
config["timeout"] = timeout
|
||||
if file_patterns:
|
||||
config["file_patterns"] = file_patterns
|
||||
|
||||
scan_results = await workflow.execute_activity(
|
||||
"scan_with_llm",
|
||||
args=[target_path, config],
|
||||
start_to_close_timeout=timedelta(minutes=30),
|
||||
retry_policy=RetryPolicy(
|
||||
initial_interval=timedelta(seconds=2),
|
||||
maximum_interval=timedelta(seconds=60),
|
||||
maximum_attempts=2
|
||||
)
|
||||
)
|
||||
|
||||
findings_count = len(scan_results.get("findings", []))
|
||||
results["steps"].append({
|
||||
"step": "llm_scan",
|
||||
"status": "success",
|
||||
"secrets_found": findings_count
|
||||
})
|
||||
workflow.logger.info(f"✓ LLM scan completed: {findings_count} secrets found")
|
||||
|
||||
# Step 3: Generate SARIF report
|
||||
workflow.logger.info("Step 3: Generating SARIF report")
|
||||
sarif_report = await workflow.execute_activity(
|
||||
"llm_generate_sarif", # Use shared LLM SARIF activity
|
||||
args=[
|
||||
scan_results.get("findings", []),
|
||||
{
|
||||
"tool_name": f"llm-secret-detector ({llm_model or 'gpt-4o-mini'})",
|
||||
"tool_version": "1.0.0"
|
||||
}
|
||||
],
|
||||
start_to_close_timeout=timedelta(minutes=2)
|
||||
)
|
||||
workflow.logger.info("✓ SARIF report generated")
|
||||
|
||||
# Step 4: Upload results to MinIO
|
||||
workflow.logger.info("Step 4: Uploading results")
|
||||
try:
|
||||
results_url = await workflow.execute_activity(
|
||||
"upload_results",
|
||||
args=[workflow_id, scan_results, "json"],
|
||||
start_to_close_timeout=timedelta(minutes=2)
|
||||
)
|
||||
results["results_url"] = results_url
|
||||
workflow.logger.info(f"✓ Results uploaded to: {results_url}")
|
||||
except Exception as e:
|
||||
workflow.logger.warning(f"Failed to upload results: {e}")
|
||||
results["results_url"] = None
|
||||
|
||||
# Step 5: Cleanup cache
|
||||
workflow.logger.info("Step 5: Cleaning up cache")
|
||||
try:
|
||||
await workflow.execute_activity(
|
||||
"cleanup_cache",
|
||||
args=[target_path, "shared"],
|
||||
start_to_close_timeout=timedelta(minutes=1)
|
||||
)
|
||||
workflow.logger.info("✓ Cache cleaned up")
|
||||
except Exception as e:
|
||||
workflow.logger.warning(f"Cache cleanup failed: {e}")
|
||||
|
||||
# Mark workflow as successful
|
||||
results["status"] = "success"
|
||||
results["findings"] = scan_results.get("findings", [])
|
||||
results["summary"] = scan_results.get("summary", {})
|
||||
results["sarif"] = sarif_report or {}
|
||||
workflow.logger.info(
|
||||
f"✓ Workflow completed successfully: {workflow_id} "
|
||||
f"({findings_count} secrets found)"
|
||||
)
|
||||
|
||||
return results
|
||||
|
||||
except Exception as e:
|
||||
workflow.logger.error(f"Workflow failed: {e}")
|
||||
results["status"] = "error"
|
||||
results["error"] = str(e)
|
||||
results["steps"].append({
|
||||
"step": "error",
|
||||
"status": "failed",
|
||||
"error": str(e)
|
||||
})
|
||||
raise
|
||||
@@ -0,0 +1,13 @@
|
||||
"""
|
||||
TruffleHog Detection Workflow
|
||||
"""
|
||||
|
||||
# Copyright (c) 2025 FuzzingLabs
|
||||
#
|
||||
# Licensed under the Business Source License 1.1 (BSL). See the LICENSE file
|
||||
# at the root of this repository for details.
|
||||
|
||||
from .workflow import TrufflehogDetectionWorkflow
|
||||
from .activities import scan_with_trufflehog, trufflehog_generate_sarif
|
||||
|
||||
__all__ = ["TrufflehogDetectionWorkflow", "scan_with_trufflehog", "trufflehog_generate_sarif"]
|
||||
@@ -0,0 +1,111 @@
|
||||
"""TruffleHog Detection Workflow Activities"""
|
||||
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from typing import Dict, Any
|
||||
from temporalio import activity
|
||||
|
||||
try:
|
||||
from toolbox.modules.secret_detection.trufflehog import TruffleHogModule
|
||||
except ImportError:
|
||||
from modules.secret_detection.trufflehog import TruffleHogModule
|
||||
|
||||
@activity.defn(name="scan_with_trufflehog")
|
||||
async def scan_with_trufflehog(target_path: str, config: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Scan code using TruffleHog."""
|
||||
activity.logger.info(f"Starting TruffleHog scan: {target_path}")
|
||||
workspace = Path(target_path)
|
||||
|
||||
trufflehog = TruffleHogModule()
|
||||
trufflehog.validate_config(config)
|
||||
result = await trufflehog.execute(config, workspace)
|
||||
|
||||
if result.status == "failed":
|
||||
raise RuntimeError(f"TruffleHog scan failed: {result.error}")
|
||||
|
||||
findings_dicts = [finding.model_dump() for finding in result.findings]
|
||||
return {"findings": findings_dicts, "summary": result.summary}
|
||||
|
||||
|
||||
@activity.defn(name="trufflehog_generate_sarif")
|
||||
async def trufflehog_generate_sarif(findings: list, metadata: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""
|
||||
Generate SARIF report from TruffleHog findings.
|
||||
|
||||
Args:
|
||||
findings: List of finding dictionaries
|
||||
metadata: Metadata including tool_name, tool_version
|
||||
|
||||
Returns:
|
||||
SARIF report dictionary
|
||||
"""
|
||||
activity.logger.info(f"Generating SARIF report from {len(findings)} findings")
|
||||
|
||||
# Basic SARIF 2.1.0 structure
|
||||
sarif_report = {
|
||||
"version": "2.1.0",
|
||||
"$schema": "https://raw.githubusercontent.com/oasis-tcs/sarif-spec/master/Schemata/sarif-schema-2.1.0.json",
|
||||
"runs": [
|
||||
{
|
||||
"tool": {
|
||||
"driver": {
|
||||
"name": metadata.get("tool_name", "trufflehog"),
|
||||
"version": metadata.get("tool_version", "3.63.2"),
|
||||
"informationUri": "https://github.com/trufflesecurity/trufflehog"
|
||||
}
|
||||
},
|
||||
"results": []
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
# Convert findings to SARIF results
|
||||
for finding in findings:
|
||||
sarif_result = {
|
||||
"ruleId": finding.get("metadata", {}).get("detector", "unknown"),
|
||||
"level": _severity_to_sarif_level(finding.get("severity", "warning")),
|
||||
"message": {
|
||||
"text": finding.get("title", "Secret detected")
|
||||
},
|
||||
"locations": []
|
||||
}
|
||||
|
||||
# Add description if present
|
||||
if finding.get("description"):
|
||||
sarif_result["message"]["markdown"] = finding["description"]
|
||||
|
||||
# Add location if file path is present
|
||||
if finding.get("file_path"):
|
||||
location = {
|
||||
"physicalLocation": {
|
||||
"artifactLocation": {
|
||||
"uri": finding["file_path"]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Add region if line number is present
|
||||
if finding.get("line_start"):
|
||||
location["physicalLocation"]["region"] = {
|
||||
"startLine": finding["line_start"]
|
||||
}
|
||||
|
||||
sarif_result["locations"].append(location)
|
||||
|
||||
sarif_report["runs"][0]["results"].append(sarif_result)
|
||||
|
||||
activity.logger.info(f"Generated SARIF report with {len(sarif_report['runs'][0]['results'])} results")
|
||||
|
||||
return sarif_report
|
||||
|
||||
|
||||
def _severity_to_sarif_level(severity: str) -> str:
|
||||
"""Convert severity to SARIF level"""
|
||||
severity_map = {
|
||||
"critical": "error",
|
||||
"high": "error",
|
||||
"medium": "warning",
|
||||
"low": "note",
|
||||
"info": "note"
|
||||
}
|
||||
return severity_map.get(severity.lower(), "warning")
|
||||
@@ -0,0 +1,34 @@
|
||||
name: trufflehog_detection
|
||||
version: "1.0.0"
|
||||
vertical: secrets
|
||||
description: "Detect secrets with verification using TruffleHog"
|
||||
author: "FuzzForge Team"
|
||||
tags:
|
||||
- "secrets"
|
||||
- "trufflehog"
|
||||
- "verification"
|
||||
|
||||
workspace_isolation: "shared"
|
||||
|
||||
parameters:
|
||||
type: object
|
||||
properties:
|
||||
verify:
|
||||
type: boolean
|
||||
default: true
|
||||
description: "Verify discovered secrets"
|
||||
|
||||
max_depth:
|
||||
type: integer
|
||||
default: 10
|
||||
description: "Maximum directory depth to scan"
|
||||
|
||||
default_parameters:
|
||||
verify: true
|
||||
max_depth: 10
|
||||
|
||||
required_modules:
|
||||
- "trufflehog"
|
||||
|
||||
supported_volume_modes:
|
||||
- "ro"
|
||||
@@ -0,0 +1,104 @@
|
||||
"""TruffleHog Detection Workflow"""
|
||||
|
||||
from datetime import timedelta
|
||||
from typing import Dict, Any
|
||||
from temporalio import workflow
|
||||
from temporalio.common import RetryPolicy
|
||||
|
||||
@workflow.defn
|
||||
class TrufflehogDetectionWorkflow:
|
||||
"""Scan code for secrets using TruffleHog."""
|
||||
|
||||
@workflow.run
|
||||
async def run(self, target_id: str, verify: bool = False, concurrency: int = 10) -> Dict[str, Any]:
|
||||
workflow_id = workflow.info().workflow_id
|
||||
run_id = workflow.info().run_id
|
||||
|
||||
workflow.logger.info(
|
||||
f"Starting TrufflehogDetectionWorkflow "
|
||||
f"(workflow_id={workflow_id}, target_id={target_id}, verify={verify})"
|
||||
)
|
||||
|
||||
results = {"workflow_id": workflow_id, "status": "running", "findings": []}
|
||||
|
||||
try:
|
||||
# Step 1: Download target
|
||||
workflow.logger.info("Step 1: Downloading target from MinIO")
|
||||
target_path = await workflow.execute_activity(
|
||||
"get_target", args=[target_id, run_id, "shared"],
|
||||
start_to_close_timeout=timedelta(minutes=5),
|
||||
retry_policy=RetryPolicy(
|
||||
initial_interval=timedelta(seconds=1),
|
||||
maximum_interval=timedelta(seconds=30),
|
||||
maximum_attempts=3
|
||||
)
|
||||
)
|
||||
workflow.logger.info(f"✓ Target downloaded to: {target_path}")
|
||||
|
||||
# Step 2: Scan with TruffleHog
|
||||
workflow.logger.info("Step 2: Scanning with TruffleHog")
|
||||
scan_results = await workflow.execute_activity(
|
||||
"scan_with_trufflehog",
|
||||
args=[target_path, {"verify": verify, "concurrency": concurrency}],
|
||||
start_to_close_timeout=timedelta(minutes=15),
|
||||
retry_policy=RetryPolicy(
|
||||
initial_interval=timedelta(seconds=2),
|
||||
maximum_interval=timedelta(seconds=60),
|
||||
maximum_attempts=2
|
||||
)
|
||||
)
|
||||
workflow.logger.info(
|
||||
f"✓ TruffleHog scan completed: "
|
||||
f"{scan_results.get('summary', {}).get('total_secrets', 0)} secrets found"
|
||||
)
|
||||
|
||||
# Step 3: Generate SARIF report
|
||||
workflow.logger.info("Step 3: Generating SARIF report")
|
||||
sarif_report = await workflow.execute_activity(
|
||||
"trufflehog_generate_sarif",
|
||||
args=[scan_results.get("findings", []), {"tool_name": "trufflehog", "tool_version": "3.63.2"}],
|
||||
start_to_close_timeout=timedelta(minutes=2)
|
||||
)
|
||||
|
||||
# Step 4: Upload results to MinIO
|
||||
workflow.logger.info("Step 4: Uploading results")
|
||||
try:
|
||||
results_url = await workflow.execute_activity(
|
||||
"upload_results",
|
||||
args=[workflow_id, scan_results, "json"],
|
||||
start_to_close_timeout=timedelta(minutes=2)
|
||||
)
|
||||
results["results_url"] = results_url
|
||||
workflow.logger.info(f"✓ Results uploaded to: {results_url}")
|
||||
except Exception as e:
|
||||
workflow.logger.warning(f"Failed to upload results: {e}")
|
||||
results["results_url"] = None
|
||||
|
||||
# Step 5: Cleanup
|
||||
workflow.logger.info("Step 5: Cleaning up cache")
|
||||
try:
|
||||
await workflow.execute_activity(
|
||||
"cleanup_cache", args=[target_path, "shared"],
|
||||
start_to_close_timeout=timedelta(minutes=1)
|
||||
)
|
||||
workflow.logger.info("✓ Cache cleaned up")
|
||||
except Exception as e:
|
||||
workflow.logger.warning(f"Cache cleanup failed: {e}")
|
||||
|
||||
# Mark workflow as successful
|
||||
results["status"] = "success"
|
||||
results["findings"] = scan_results.get("findings", [])
|
||||
results["summary"] = scan_results.get("summary", {})
|
||||
results["sarif"] = sarif_report or {}
|
||||
workflow.logger.info(
|
||||
f"✓ Workflow completed successfully: {workflow_id} "
|
||||
f"({results['summary'].get('total_secrets', 0)} secrets found)"
|
||||
)
|
||||
|
||||
return results
|
||||
|
||||
except Exception as e:
|
||||
workflow.logger.error(f"Workflow failed: {e}")
|
||||
results["status"] = "error"
|
||||
results["error"] = str(e)
|
||||
raise
|
||||
Reference in New Issue
Block a user