fix: add missing secrets worker to repository

The secrets worker was being ignored due to broad gitignore pattern.
Added exception to allow workers/secrets/ directory while still ignoring actual secrets.

Files added:
- workers/secrets/Dockerfile
- workers/secrets/requirements.txt
- workers/secrets/worker.py
This commit is contained in:
tduhamel42
2025-10-22 08:39:20 +02:00
parent 4e14b4207d
commit 75df59ddef
4 changed files with 389 additions and 0 deletions

4
.gitignore vendored
View File

@@ -240,6 +240,10 @@ yarn-error.log*
!**/secret_detection_benchmark_GROUND_TRUTH.json
!**/secret_detection/results/
# Exception: Allow workers/secrets/ directory (secrets detection worker)
!workers/secrets/
!workers/secrets/**
secret*
secrets/
credentials*

View File

@@ -0,0 +1,61 @@
# FuzzForge Vertical Worker: Secret Detection
#
# Pre-installed tools for secret detection:
# - Gitleaks v8.18.0
# - TruffleHog v3.63.2
# - Temporal worker
FROM python:3.11-slim
# Set working directory
WORKDIR /app
# Install system dependencies
RUN apt-get update && apt-get install -y \
# Build essentials
build-essential \
# Development tools
git \
curl \
wget \
# Cleanup
&& rm -rf /var/lib/apt/lists/*
# Install Gitleaks v8.18.0
RUN wget -q https://github.com/gitleaks/gitleaks/releases/download/v8.18.0/gitleaks_8.18.0_linux_x64.tar.gz && \
tar -xzf gitleaks_8.18.0_linux_x64.tar.gz && \
mv gitleaks /usr/local/bin/ && \
chmod +x /usr/local/bin/gitleaks && \
rm gitleaks_8.18.0_linux_x64.tar.gz
# Install TruffleHog v3.63.2
RUN wget -q https://github.com/trufflesecurity/trufflehog/releases/download/v3.63.2/trufflehog_3.63.2_linux_amd64.tar.gz && \
tar -xzf trufflehog_3.63.2_linux_amd64.tar.gz && \
mv trufflehog /usr/local/bin/ && \
chmod +x /usr/local/bin/trufflehog && \
rm trufflehog_3.63.2_linux_amd64.tar.gz
# Verify installations
RUN gitleaks version && trufflehog --version
# Install Python dependencies for Temporal worker
COPY requirements.txt /tmp/requirements.txt
RUN pip3 install --no-cache-dir -r /tmp/requirements.txt && \
rm /tmp/requirements.txt
# Create cache directory for downloaded targets
RUN mkdir -p /cache && chmod 755 /cache
# Copy worker entrypoint
COPY worker.py /app/worker.py
# Add toolbox and AI module to Python path (mounted at runtime)
ENV PYTHONPATH="/app:/app/toolbox:/app/ai_src:${PYTHONPATH}"
ENV PYTHONUNBUFFERED=1
# Healthcheck
HEALTHCHECK --interval=30s --timeout=10s --start-period=40s --retries=3 \
CMD python3 -c "import sys; sys.exit(0)"
# Run worker
CMD ["python3", "/app/worker.py"]

View File

@@ -0,0 +1,15 @@
# Temporal worker dependencies
temporalio>=1.5.0
pydantic>=2.0.0
# Storage (MinIO/S3)
boto3>=1.34.0
# Configuration
pyyaml>=6.0.0
# HTTP Client (for real-time stats reporting)
httpx>=0.27.0
# A2A Agent Communication (for LLM-based secret detection)
a2a-sdk[all]>=0.1.0

309
workers/secrets/worker.py Normal file
View File

@@ -0,0 +1,309 @@
"""
FuzzForge Vertical Worker: Secret Detection
This worker:
1. Discovers workflows for the 'secrets' vertical from mounted toolbox
2. Dynamically imports and registers workflow classes
3. Connects to Temporal and processes tasks
4. Handles activities for target download/upload from MinIO
"""
import asyncio
import importlib
import inspect
import logging
import os
import sys
from pathlib import Path
from typing import List, Any
import yaml
from temporalio.client import Client
from temporalio.worker import Worker
# Add toolbox to path for workflow and activity imports
sys.path.insert(0, '/app/toolbox')
# Import common storage activities
from toolbox.common.storage_activities import (
get_target_activity,
cleanup_cache_activity,
upload_results_activity
)
# Configure logging
logging.basicConfig(
level=os.getenv('LOG_LEVEL', 'INFO'),
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)
async def discover_workflows(vertical: str) -> List[Any]:
"""
Discover workflows for this vertical from mounted toolbox.
Args:
vertical: The vertical name (e.g., 'secrets', 'python', 'web')
Returns:
List of workflow classes decorated with @workflow.defn
"""
workflows = []
toolbox_path = Path("/app/toolbox/workflows")
if not toolbox_path.exists():
logger.warning(f"Toolbox path does not exist: {toolbox_path}")
return workflows
logger.info(f"Scanning for workflows in: {toolbox_path}")
for workflow_dir in toolbox_path.iterdir():
if not workflow_dir.is_dir():
continue
# Skip special directories
if workflow_dir.name.startswith('.') or workflow_dir.name == '__pycache__':
continue
metadata_file = workflow_dir / "metadata.yaml"
if not metadata_file.exists():
logger.debug(f"No metadata.yaml in {workflow_dir.name}, skipping")
continue
try:
# Parse metadata
with open(metadata_file) as f:
metadata = yaml.safe_load(f)
# Check if workflow is for this vertical
workflow_vertical = metadata.get("vertical")
if workflow_vertical != vertical:
logger.debug(
f"Workflow {workflow_dir.name} is for vertical '{workflow_vertical}', "
f"not '{vertical}', skipping"
)
continue
# Check if workflow.py exists
workflow_file = workflow_dir / "workflow.py"
if not workflow_file.exists():
logger.warning(
f"Workflow {workflow_dir.name} has metadata but no workflow.py, skipping"
)
continue
# Dynamically import workflow module
module_name = f"toolbox.workflows.{workflow_dir.name}.workflow"
logger.info(f"Importing workflow module: {module_name}")
try:
module = importlib.import_module(module_name)
except Exception as e:
logger.error(
f"Failed to import workflow module {module_name}: {e}",
exc_info=True
)
continue
# Find @workflow.defn decorated classes
found_workflows = False
for name, obj in inspect.getmembers(module, inspect.isclass):
# Check if class has Temporal workflow definition
if hasattr(obj, '__temporal_workflow_definition'):
workflows.append(obj)
found_workflows = True
logger.info(
f"✓ Discovered workflow: {name} from {workflow_dir.name} "
f"(vertical: {vertical})"
)
if not found_workflows:
logger.warning(
f"Workflow {workflow_dir.name} has no @workflow.defn decorated classes"
)
except Exception as e:
logger.error(
f"Error processing workflow {workflow_dir.name}: {e}",
exc_info=True
)
continue
logger.info(f"Discovered {len(workflows)} workflows for vertical '{vertical}'")
return workflows
async def discover_activities(workflows_dir: Path) -> List[Any]:
"""
Discover activities from workflow directories.
Looks for activities.py files alongside workflow.py in each workflow directory.
Args:
workflows_dir: Path to workflows directory
Returns:
List of activity functions decorated with @activity.defn
"""
activities = []
if not workflows_dir.exists():
logger.warning(f"Workflows directory does not exist: {workflows_dir}")
return activities
logger.info(f"Scanning for workflow activities in: {workflows_dir}")
for workflow_dir in workflows_dir.iterdir():
if not workflow_dir.is_dir():
continue
# Skip special directories
if workflow_dir.name.startswith('.') or workflow_dir.name == '__pycache__':
continue
# Check if activities.py exists
activities_file = workflow_dir / "activities.py"
if not activities_file.exists():
logger.debug(f"No activities.py in {workflow_dir.name}, skipping")
continue
try:
# Dynamically import activities module
module_name = f"toolbox.workflows.{workflow_dir.name}.activities"
logger.info(f"Importing activities module: {module_name}")
try:
module = importlib.import_module(module_name)
except Exception as e:
logger.error(
f"Failed to import activities module {module_name}: {e}",
exc_info=True
)
continue
# Find @activity.defn decorated functions
found_activities = False
for name, obj in inspect.getmembers(module, inspect.isfunction):
# Check if function has Temporal activity definition
if hasattr(obj, '__temporal_activity_definition'):
activities.append(obj)
found_activities = True
logger.info(
f"✓ Discovered activity: {name} from {workflow_dir.name}"
)
if not found_activities:
logger.warning(
f"Workflow {workflow_dir.name} has activities.py but no @activity.defn decorated functions"
)
except Exception as e:
logger.error(
f"Error processing activities from {workflow_dir.name}: {e}",
exc_info=True
)
continue
logger.info(f"Discovered {len(activities)} workflow-specific activities")
return activities
async def main():
"""Main worker entry point"""
# Get configuration from environment
vertical = os.getenv("WORKER_VERTICAL", "secrets")
temporal_address = os.getenv("TEMPORAL_ADDRESS", "localhost:7233")
temporal_namespace = os.getenv("TEMPORAL_NAMESPACE", "default")
task_queue = os.getenv("WORKER_TASK_QUEUE", f"{vertical}-queue")
max_concurrent_activities = int(os.getenv("MAX_CONCURRENT_ACTIVITIES", "5"))
logger.info("=" * 60)
logger.info(f"FuzzForge Vertical Worker: {vertical}")
logger.info("=" * 60)
logger.info(f"Temporal Address: {temporal_address}")
logger.info(f"Temporal Namespace: {temporal_namespace}")
logger.info(f"Task Queue: {task_queue}")
logger.info(f"Max Concurrent Activities: {max_concurrent_activities}")
logger.info("=" * 60)
# Discover workflows for this vertical
logger.info(f"Discovering workflows for vertical: {vertical}")
workflows = await discover_workflows(vertical)
if not workflows:
logger.error(f"No workflows found for vertical: {vertical}")
logger.error("Worker cannot start without workflows. Exiting...")
sys.exit(1)
# Discover activities from workflow directories
logger.info("Discovering workflow-specific activities...")
workflows_dir = Path("/app/toolbox/workflows")
workflow_activities = await discover_activities(workflows_dir)
# Combine common storage activities with workflow-specific activities
activities = [
get_target_activity,
cleanup_cache_activity,
upload_results_activity
] + workflow_activities
logger.info(
f"Total activities registered: {len(activities)} "
f"(3 common + {len(workflow_activities)} workflow-specific)"
)
# Connect to Temporal
logger.info(f"Connecting to Temporal at {temporal_address}...")
try:
client = await Client.connect(
temporal_address,
namespace=temporal_namespace
)
logger.info("✓ Connected to Temporal successfully")
except Exception as e:
logger.error(f"Failed to connect to Temporal: {e}", exc_info=True)
sys.exit(1)
# Create worker with discovered workflows and activities
logger.info(f"Creating worker on task queue: {task_queue}")
try:
worker = Worker(
client,
task_queue=task_queue,
workflows=workflows,
activities=activities,
max_concurrent_activities=max_concurrent_activities
)
logger.info("✓ Worker created successfully")
except Exception as e:
logger.error(f"Failed to create worker: {e}", exc_info=True)
sys.exit(1)
# Start worker
logger.info("=" * 60)
logger.info(f"🚀 Worker started for vertical '{vertical}'")
logger.info(f"📦 Registered {len(workflows)} workflows")
logger.info(f"⚙️ Registered {len(activities)} activities")
logger.info(f"📨 Listening on task queue: {task_queue}")
logger.info("=" * 60)
logger.info("Worker is ready to process tasks...")
try:
await worker.run()
except KeyboardInterrupt:
logger.info("Shutting down worker (keyboard interrupt)...")
except Exception as e:
logger.error(f"Worker error: {e}", exc_info=True)
raise
if __name__ == "__main__":
try:
asyncio.run(main())
except KeyboardInterrupt:
logger.info("Worker stopped")
except Exception as e:
logger.error(f"Fatal error: {e}", exc_info=True)
sys.exit(1)