first commit

2026-02-13 11:52:47 +00:00 · 2025-10-03 11:45:17 +02:00
10855 changed files with 1393493 additions and 32885 deletions
--- a/.github/ISSUE_TEMPLATE/workflow_submission.md
+++ b/.github/ISSUE_TEMPLATE/workflow_submission.md
@@ -26,7 +26,7 @@ What type of security workflow is this?
 ## Files
 Please attach or provide links to your workflow files:

- [ ] `workflow.py` - Main Temporal flow implementation
+- [ ] `workflow.py` - Main Prefect flow implementation
 - [ ] `Dockerfile` - Container definition
 - [ ] `metadata.yaml` - Workflow metadata
 - [ ] Test files or examples
--- a/.github/pull_request_template.md
+++ b/.github/pull_request_template.md
@@ -1,79 +0,0 @@
-## Description
-
-<!-- Provide a brief description of the changes in this PR -->
-
-## Type of Change
-
-<!-- Mark the appropriate option with an 'x' -->
-
- [ ] 🐛 Bug fix (non-breaking change which fixes an issue)
- [ ] ✨ New feature (non-breaking change which adds functionality)
- [ ] 💥 Breaking change (fix or feature that would cause existing functionality to not work as expected)
- [ ] 📝 Documentation update
- [ ] 🔧 Configuration change
- [ ] ♻️ Refactoring (no functional changes)
- [ ] 🎨 Style/formatting changes
- [ ] ✅ Test additions or updates
-
-## Related Issues
-
-<!-- Link to related issues using #issue_number -->
-<!-- Example: Closes #123, Relates to #456 -->
-
-## Changes Made
-
-<!-- List the specific changes made in this PR -->
-
-
-
-
-
-## Testing
-
-<!-- Describe the tests you ran to verify your changes -->
-
-### Tested Locally
-
- [ ] All tests pass (`pytest`, `uv build`, etc.)
- [ ] Linting passes (`ruff check`)
- [ ] Code builds successfully
-
-### Worker Changes (if applicable)
-
- [ ] Docker images build successfully (`docker compose build`)
- [ ] Worker containers start correctly
- [ ] Tested with actual workflow execution
-
-### Documentation
-
- [ ] Documentation updated (if needed)
- [ ] README updated (if needed)
- [ ] CHANGELOG.md updated (if user-facing changes)
-
-## Pre-Merge Checklist
-
-<!-- Ensure all items are completed before requesting review -->
-
- [ ] My code follows the project's coding standards
- [ ] I have performed a self-review of my code
- [ ] I have commented my code, particularly in hard-to-understand areas
- [ ] I have made corresponding changes to the documentation
- [ ] My changes generate no new warnings
- [ ] I have added tests that prove my fix is effective or that my feature works
- [ ] New and existing unit tests pass locally with my changes
- [ ] Any dependent changes have been merged and published
-
-### Worker-Specific Checks (if workers/ modified)
-
- [ ] All worker files properly tracked by git (not gitignored)
- [ ] Worker validation script passes (`.github/scripts/validate-workers.sh`)
- [ ] Docker images build without errors
- [ ] Worker configuration updated in `docker-compose.yml` (if needed)
-
-## Screenshots (if applicable)
-
-<!-- Add screenshots to help explain your changes -->
-
-## Additional Notes
-
-<!-- Any additional information that reviewers should know -->
--- a/.github/scripts/validate-workers.sh
+++ b/.github/scripts/validate-workers.sh
@@ -1,127 +0,0 @@
-#!/bin/bash
-# Worker Validation Script
-# Ensures all workers defined in docker-compose.yml exist in the repository
-# and are properly tracked by git.
-
-set -e
-
-echo "🔍 Validating worker completeness..."
-
-# Colors for output
-RED='\033[0;31m'
-GREEN='\033[0;32m'
-YELLOW='\033[1;33m'
-NC='\033[0m' # No Color
-
-ERRORS=0
-WARNINGS=0
-
-# Extract worker service names from docker-compose.yml
-echo ""
-echo "📋 Checking workers defined in docker-compose.yml..."
-WORKERS=$(grep -E "^\s+worker-" docker-compose.yml | grep -v "#" | cut -d: -f1 | tr -d ' ' | sort -u)
-
-if [ -z "$WORKERS" ]; then
-    echo -e "${RED}❌ No workers found in docker-compose.yml${NC}"
-    exit 1
-fi
-
-echo "Found workers:"
-for worker in $WORKERS; do
-    echo "  - $worker"
-done
-
-# Check each worker
-echo ""
-echo "🔎 Validating worker files..."
-for worker in $WORKERS; do
-    WORKER_DIR="workers/${worker#worker-}"
-
-    echo ""
-    echo "Checking $worker ($WORKER_DIR)..."
-
-    # Check if directory exists
-    if [ ! -d "$WORKER_DIR" ]; then
-        echo -e "${RED}  ❌ Directory not found: $WORKER_DIR${NC}"
-        ERRORS=$((ERRORS + 1))
-        continue
-    fi
-
-    # Check Dockerfile (single file or multi-platform pattern)
-    if [ -f "$WORKER_DIR/Dockerfile" ]; then
-        # Single Dockerfile
-        if ! git ls-files --error-unmatch "$WORKER_DIR/Dockerfile" &> /dev/null; then
-            echo -e "${RED}  ❌ File not tracked by git: $WORKER_DIR/Dockerfile${NC}"
-            echo -e "${YELLOW}     Check .gitignore patterns!${NC}"
-            ERRORS=$((ERRORS + 1))
-        else
-            echo -e "${GREEN}  ✓ Dockerfile (tracked)${NC}"
-        fi
-    elif compgen -G "$WORKER_DIR/Dockerfile.*" > /dev/null; then
-        # Multi-platform Dockerfiles (e.g., Dockerfile.amd64, Dockerfile.arm64)
-        PLATFORM_DOCKERFILES=$(ls "$WORKER_DIR"/Dockerfile.* 2>/dev/null)
-        DOCKERFILE_FOUND=false
-        for dockerfile in $PLATFORM_DOCKERFILES; do
-            if git ls-files --error-unmatch "$dockerfile" &> /dev/null; then
-                echo -e "${GREEN}  ✓ $(basename "$dockerfile") (tracked)${NC}"
-                DOCKERFILE_FOUND=true
-            else
-                echo -e "${RED}  ❌ File not tracked by git: $dockerfile${NC}"
-                ERRORS=$((ERRORS + 1))
-            fi
-        done
-        if [ "$DOCKERFILE_FOUND" = false ]; then
-            echo -e "${RED}  ❌ No platform-specific Dockerfiles found${NC}"
-            ERRORS=$((ERRORS + 1))
-        fi
-    else
-        echo -e "${RED}  ❌ Missing Dockerfile or Dockerfile.* files${NC}"
-        ERRORS=$((ERRORS + 1))
-    fi
-
-    # Check other required files
-    REQUIRED_FILES=("requirements.txt" "worker.py")
-    for file in "${REQUIRED_FILES[@]}"; do
-        FILE_PATH="$WORKER_DIR/$file"
-
-        if [ ! -f "$FILE_PATH" ]; then
-            echo -e "${RED}  ❌ Missing file: $FILE_PATH${NC}"
-            ERRORS=$((ERRORS + 1))
-        else
-            # Check if file is tracked by git
-            if ! git ls-files --error-unmatch "$FILE_PATH" &> /dev/null; then
-                echo -e "${RED}  ❌ File not tracked by git: $FILE_PATH${NC}"
-                echo -e "${YELLOW}     Check .gitignore patterns!${NC}"
-                ERRORS=$((ERRORS + 1))
-            else
-                echo -e "${GREEN}  ✓ $file (tracked)${NC}"
-            fi
-        fi
-    done
-done
-
-# Check for any ignored worker files
-echo ""
-echo "🚫 Checking for gitignored worker files..."
-IGNORED_FILES=$(git check-ignore workers/*/* 2>/dev/null || true)
-if [ -n "$IGNORED_FILES" ]; then
-    echo -e "${YELLOW}⚠️  Warning: Some worker files are being ignored:${NC}"
-    echo "$IGNORED_FILES" | while read -r file; do
-        echo -e "${YELLOW}  - $file${NC}"
-    done
-    WARNINGS=$((WARNINGS + 1))
-fi
-
-# Summary
-echo ""
-echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
-if [ $ERRORS -eq 0 ] && [ $WARNINGS -eq 0 ]; then
-    echo -e "${GREEN}✅ All workers validated successfully!${NC}"
-    exit 0
-elif [ $ERRORS -eq 0 ]; then
-    echo -e "${YELLOW}⚠️  Validation passed with $WARNINGS warning(s)${NC}"
-    exit 0
-else
-    echo -e "${RED}❌ Validation failed with $ERRORS error(s) and $WARNINGS warning(s)${NC}"
-    exit 1
-fi
--- a/.github/workflows/benchmark.yml
+++ b/.github/workflows/benchmark.yml
@@ -1,165 +0,0 @@
-name: Benchmarks
-
-on:
-  # Disabled automatic runs - benchmarks not ready for CI/CD yet
-  # schedule:
-  #   - cron: '0 2 * * *'  # 2 AM UTC every day
-
-  # Allow manual trigger for testing
-  workflow_dispatch:
-    inputs:
-      compare_with:
-        description: 'Baseline commit to compare against (optional)'
-        required: false
-        default: ''
-
-  # pull_request:
-  #   paths:
-  #     - 'backend/benchmarks/**'
-  #     - 'backend/toolbox/modules/**'
-  #     - '.github/workflows/benchmark.yml'
-
-jobs:
-  benchmark:
-    name: Run Benchmarks
-    runs-on: ubuntu-latest
-
-    steps:
-      - uses: actions/checkout@v4
-        with:
-          fetch-depth: 0  # Fetch all history for comparison
-
-      - name: Set up Python
-        uses: actions/setup-python@v5
-        with:
-          python-version: '3.11'
-
-      - name: Install system dependencies
-        run: |
-          sudo apt-get update
-          sudo apt-get install -y build-essential
-
-      - name: Install Python dependencies
-        working-directory: ./backend
-        run: |
-          python -m pip install --upgrade pip
-          pip install -e ".[dev]"
-          pip install pytest pytest-asyncio pytest-benchmark pytest-benchmark[histogram]
-          pip install -e ../sdk  # Install SDK for benchmarks
-
-      - name: Run benchmarks
-        working-directory: ./backend
-        run: |
-          pytest benchmarks/ \
-            -v \
-            --benchmark-only \
-            --benchmark-json=benchmark-results.json \
-            --benchmark-histogram=benchmark-histogram
-
-      - name: Store benchmark results
-        uses: actions/upload-artifact@v4
-        with:
-          name: benchmark-results-${{ github.run_number }}
-          path: |
-            backend/benchmark-results.json
-            backend/benchmark-histogram.svg
-
-      - name: Download baseline benchmarks
-        if: github.event_name == 'pull_request'
-        uses: dawidd6/action-download-artifact@v3
-        continue-on-error: true
-        with:
-          workflow: benchmark.yml
-          branch: ${{ github.base_ref }}
-          name: benchmark-results-*
-          path: ./baseline
-          search_artifacts: true
-
-      - name: Compare with baseline
-        if: github.event_name == 'pull_request' && hashFiles('baseline/benchmark-results.json') != ''
-        run: |
-          python -c "
-          import json
-          import sys
-
-          with open('backend/benchmark-results.json') as f:
-              current = json.load(f)
-
-          with open('baseline/benchmark-results.json') as f:
-              baseline = json.load(f)
-
-          print('\\n## Benchmark Comparison\\n')
-          print('| Benchmark | Current | Baseline | Change |')
-          print('|-----------|---------|----------|--------|')
-
-          regressions = []
-
-          for bench in current['benchmarks']:
-              name = bench['name']
-              current_time = bench['stats']['mean']
-
-              # Find matching baseline
-              baseline_bench = next((b for b in baseline['benchmarks'] if b['name'] == name), None)
-              if baseline_bench:
-                  baseline_time = baseline_bench['stats']['mean']
-                  change = ((current_time - baseline_time) / baseline_time) * 100
-
-                  print(f'| {name} | {current_time:.4f}s | {baseline_time:.4f}s | {change:+.2f}% |')
-
-                  # Flag regressions > 10%
-                  if change > 10:
-                      regressions.append((name, change))
-              else:
-                  print(f'| {name} | {current_time:.4f}s | N/A | NEW |')
-
-          if regressions:
-              print('\\n⚠️  **Performance Regressions Detected:**')
-              for name, change in regressions:
-                  print(f'- {name}: +{change:.2f}%')
-              sys.exit(1)
-          else:
-              print('\\n✅ No significant performance regressions detected')
-          "
-
-      - name: Comment PR with results
-        if: github.event_name == 'pull_request'
-        uses: actions/github-script@v7
-        with:
-          script: |
-            const fs = require('fs');
-            const results = JSON.parse(fs.readFileSync('backend/benchmark-results.json', 'utf8'));
-
-            let body = '## Benchmark Results\\n\\n';
-            body += '| Category | Benchmark | Mean Time | Std Dev |\\n';
-            body += '|----------|-----------|-----------|---------|\\n';
-
-            for (const bench of results.benchmarks) {
-              const group = bench.group || 'ungrouped';
-              const name = bench.name.split('::').pop();
-              const mean = bench.stats.mean.toFixed(4);
-              const stddev = bench.stats.stddev.toFixed(4);
-              body += `| ${group} | ${name} | ${mean}s | ${stddev}s |\\n`;
-            }
-
-            body += '\\n📊 Full benchmark results available in artifacts.';
-
-            github.rest.issues.createComment({
-              issue_number: context.issue.number,
-              owner: context.repo.owner,
-              repo: context.repo.repo,
-              body: body
-            });
-
-  benchmark-summary:
-    name: Benchmark Summary
-    runs-on: ubuntu-latest
-    needs: benchmark
-    if: always()
-    steps:
-      - name: Check results
-        run: |
-          if [ "${{ needs.benchmark.result }}" != "success" ]; then
-            echo "Benchmarks failed or detected regressions"
-            exit 1
-          fi
-          echo "Benchmarks completed successfully!"
--- a/.github/workflows/examples/security-scan.yml
+++ b/.github/workflows/examples/security-scan.yml
@@ -1,152 +0,0 @@
-# FuzzForge CI/CD Example - Security Scanning
-#
-# This workflow demonstrates how to integrate FuzzForge into your CI/CD pipeline
-# for automated security testing on pull requests and pushes.
-#
-# Features:
-#   - Runs entirely in GitHub Actions (no external infrastructure needed)
-#   - Auto-starts FuzzForge services on-demand
-#   - Fails builds on error-level SARIF findings
-#   - Uploads SARIF results to GitHub Security tab
-#   - Exports findings as artifacts
-#
-# Prerequisites:
-#   - Ubuntu runner with Docker support
-#   - At least 4GB RAM available
-#   - ~90 seconds startup time
-
-name: Security Scan Example
-
-on:
-  pull_request:
-    branches: [main, develop]
-  push:
-    branches: [main]
-
-jobs:
-  security-scan:
-    name: Security Assessment
-    runs-on: ubuntu-latest
-    timeout-minutes: 30
-
-    steps:
-      - name: Checkout code
-        uses: actions/checkout@v4
-
-      - name: Start FuzzForge
-        run: |
-          bash scripts/ci-start.sh
-
-      - name: Set up Python
-        uses: actions/setup-python@v5
-        with:
-          python-version: '3.11'
-
-      - name: Install FuzzForge CLI
-        run: |
-          pip install ./cli
-
-      - name: Initialize FuzzForge
-        run: |
-          ff init --api-url http://localhost:8000 --name "GitHub Actions Security Scan"
-
-      - name: Run Security Assessment
-        run: |
-          ff workflow run security_assessment . \
-            --wait \
-            --fail-on error \
-            --export-sarif results.sarif
-
-      - name: Upload SARIF to GitHub Security
-        if: always()
-        uses: github/codeql-action/upload-sarif@v3
-        with:
-          sarif_file: results.sarif
-
-      - name: Upload findings as artifact
-        if: always()
-        uses: actions/upload-artifact@v4
-        with:
-          name: security-findings
-          path: results.sarif
-          retention-days: 30
-
-      - name: Stop FuzzForge
-        if: always()
-        run: |
-          bash scripts/ci-stop.sh
-
-  secret-scan:
-    name: Secret Detection
-    runs-on: ubuntu-latest
-    timeout-minutes: 15
-
-    steps:
-      - uses: actions/checkout@v4
-
-      - name: Start FuzzForge
-        run: bash scripts/ci-start.sh
-
-      - name: Install CLI
-        run: |
-          pip install ./cli
-
-      - name: Initialize & Scan
-        run: |
-          ff init --api-url http://localhost:8000 --name "Secret Detection"
-          ff workflow run secret_detection . \
-            --wait \
-            --fail-on all \
-            --export-sarif secrets.sarif
-
-      - name: Upload results
-        if: always()
-        uses: actions/upload-artifact@v4
-        with:
-          name: secret-scan-results
-          path: secrets.sarif
-          retention-days: 30
-
-      - name: Cleanup
-        if: always()
-        run: bash scripts/ci-stop.sh
-
-  # Example: Nightly fuzzing campaign (long-running)
-  nightly-fuzzing:
-    name: Nightly Fuzzing
-    runs-on: ubuntu-latest
-    timeout-minutes: 120
-    # Only run on schedule
-    if: github.event_name == 'schedule'
-
-    steps:
-      - uses: actions/checkout@v4
-
-      - name: Start FuzzForge
-        run: bash scripts/ci-start.sh
-
-      - name: Install CLI
-        run: pip install ./cli
-
-      - name: Run Fuzzing Campaign
-        run: |
-          ff init --api-url http://localhost:8000
-          ff workflow run atheris_fuzzing . \
-            max_iterations=100000000 \
-            timeout_seconds=7200 \
-            --wait \
-            --export-sarif fuzzing-results.sarif
-        # Don't fail on fuzzing findings, just report
-        continue-on-error: true
-
-      - name: Upload fuzzing results
-        if: always()
-        uses: actions/upload-artifact@v4
-        with:
-          name: fuzzing-results
-          path: fuzzing-results.sarif
-          retention-days: 90
-
-      - name: Cleanup
-        if: always()
-        run: bash scripts/ci-stop.sh
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -1,248 +0,0 @@
-name: Tests
-
-on:
-  push:
-    branches: [ main, master, dev, develop, feature/** ]
-  pull_request:
-    branches: [ main, master, dev, develop ]
-
-jobs:
-  validate-workers:
-    name: Validate Workers
-    runs-on: ubuntu-latest
-    steps:
-      - uses: actions/checkout@v4
-
-      - name: Run worker validation
-        run: |
-          chmod +x .github/scripts/validate-workers.sh
-          .github/scripts/validate-workers.sh
-
-  build-workers:
-    name: Build Worker Docker Images
-    runs-on: ubuntu-latest
-    steps:
-      - uses: actions/checkout@v4
-        with:
-          fetch-depth: 0  # Fetch all history for proper diff
-
-      - name: Check which workers were modified
-        id: check-workers
-        run: |
-          if [ "${{ github.event_name }}" == "pull_request" ]; then
-            # For PRs, check changed files
-            CHANGED_FILES=$(git diff --name-only origin/${{ github.base_ref }}...HEAD)
-            echo "Changed files:"
-            echo "$CHANGED_FILES"
-          else
-            # For direct pushes, check last commit
-            CHANGED_FILES=$(git diff --name-only HEAD~1 HEAD)
-          fi
-
-          # Check if docker-compose.yml changed (build all workers)
-          if echo "$CHANGED_FILES" | grep -q "^docker-compose.yml"; then
-            echo "workers_to_build=worker-python worker-secrets worker-rust worker-android worker-ossfuzz" >> $GITHUB_OUTPUT
-            echo "workers_modified=true" >> $GITHUB_OUTPUT
-            echo "✅ docker-compose.yml modified - building all workers"
-            exit 0
-          fi
-
-          # Detect which specific workers changed
-          WORKERS_TO_BUILD=""
-
-          if echo "$CHANGED_FILES" | grep -q "^workers/python/"; then
-            WORKERS_TO_BUILD="$WORKERS_TO_BUILD worker-python"
-            echo "✅ Python worker modified"
-          fi
-
-          if echo "$CHANGED_FILES" | grep -q "^workers/secrets/"; then
-            WORKERS_TO_BUILD="$WORKERS_TO_BUILD worker-secrets"
-            echo "✅ Secrets worker modified"
-          fi
-
-          if echo "$CHANGED_FILES" | grep -q "^workers/rust/"; then
-            WORKERS_TO_BUILD="$WORKERS_TO_BUILD worker-rust"
-            echo "✅ Rust worker modified"
-          fi
-
-          if echo "$CHANGED_FILES" | grep -q "^workers/android/"; then
-            WORKERS_TO_BUILD="$WORKERS_TO_BUILD worker-android"
-            echo "✅ Android worker modified"
-          fi
-
-          if echo "$CHANGED_FILES" | grep -q "^workers/ossfuzz/"; then
-            WORKERS_TO_BUILD="$WORKERS_TO_BUILD worker-ossfuzz"
-            echo "✅ OSS-Fuzz worker modified"
-          fi
-
-          if [ -z "$WORKERS_TO_BUILD" ]; then
-            echo "workers_modified=false" >> $GITHUB_OUTPUT
-            echo "⏭️  No worker changes detected - skipping build"
-          else
-            echo "workers_to_build=$WORKERS_TO_BUILD" >> $GITHUB_OUTPUT
-            echo "workers_modified=true" >> $GITHUB_OUTPUT
-            echo "Building workers:$WORKERS_TO_BUILD"
-          fi
-
-      - name: Set up Docker Buildx
-        if: steps.check-workers.outputs.workers_modified == 'true'
-        uses: docker/setup-buildx-action@v3
-
-      - name: Build worker images
-        if: steps.check-workers.outputs.workers_modified == 'true'
-        run: |
-          WORKERS="${{ steps.check-workers.outputs.workers_to_build }}"
-          echo "Building worker Docker images: $WORKERS"
-          docker compose build $WORKERS --no-cache
-        continue-on-error: false
-
-  lint:
-    name: Lint
-    runs-on: ubuntu-latest
-    steps:
-      - uses: actions/checkout@v4
-
-      - name: Set up Python
-        uses: actions/setup-python@v5
-        with:
-          python-version: '3.11'
-
-      - name: Install dependencies
-        run: |
-          python -m pip install --upgrade pip
-          pip install ruff mypy
-
-      - name: Run ruff
-        run: ruff check backend/src backend/toolbox backend/tests backend/benchmarks --output-format=github
-
-      - name: Run mypy (continue on error)
-        run: mypy backend/src backend/toolbox || true
-        continue-on-error: true
-
-  unit-tests:
-    name: Unit Tests
-    runs-on: ubuntu-latest
-    strategy:
-      matrix:
-        python-version: ['3.11', '3.12']
-
-    steps:
-      - uses: actions/checkout@v4
-
-      - name: Set up Python ${{ matrix.python-version }}
-        uses: actions/setup-python@v5
-        with:
-          python-version: ${{ matrix.python-version }}
-
-      - name: Install system dependencies
-        run: |
-          sudo apt-get update
-          sudo apt-get install -y build-essential
-
-      - name: Install Python dependencies
-        working-directory: ./backend
-        run: |
-          python -m pip install --upgrade pip
-          pip install -e ".[dev]"
-          pip install pytest pytest-asyncio pytest-cov pytest-xdist
-
-      - name: Run unit tests
-        working-directory: ./backend
-        run: |
-          pytest tests/unit/ \
-            -v \
-            --cov=toolbox/modules \
-            --cov=src \
-            --cov-report=xml \
-            --cov-report=term \
-            --cov-report=html \
-            -n auto
-
-      - name: Upload coverage to Codecov
-        if: matrix.python-version == '3.11'
-        uses: codecov/codecov-action@v4
-        with:
-          file: ./backend/coverage.xml
-          flags: unittests
-          name: codecov-backend
-
-      - name: Upload coverage HTML
-        if: matrix.python-version == '3.11'
-        uses: actions/upload-artifact@v4
-        with:
-          name: coverage-report
-          path: ./backend/htmlcov/
-
-  # integration-tests:
-  #   name: Integration Tests
-  #   runs-on: ubuntu-latest
-  #   needs: unit-tests
-  #
-  #   services:
-  #     postgres:
-  #       image: postgres:15
-  #       env:
-  #         POSTGRES_USER: postgres
-  #         POSTGRES_PASSWORD: postgres
-  #         POSTGRES_DB: fuzzforge_test
-  #       options: >-
-  #         --health-cmd pg_isready
-  #         --health-interval 10s
-  #         --health-timeout 5s
-  #         --health-retries 5
-  #       ports:
-  #         - 5432:5432
-  #
-  #   steps:
-  #     - uses: actions/checkout@v4
-  #
-  #     - name: Set up Python
-  #       uses: actions/setup-python@v5
-  #       with:
-  #         python-version: '3.11'
-  #
-  #     - name: Set up Docker Buildx
-  #       uses: docker/setup-buildx-action@v3
-  #
-  #     - name: Install Python dependencies
-  #       working-directory: ./backend
-  #       run: |
-  #         python -m pip install --upgrade pip
-  #         pip install -e ".[dev]"
-  #         pip install pytest pytest-asyncio
-  #
-  #     - name: Start services (Temporal, MinIO)
-  #       run: |
-  #         docker-compose -f docker-compose.yml up -d temporal minio
-  #         sleep 30
-  #
-  #     - name: Run integration tests
-  #       working-directory: ./backend
-  #       run: |
-  #         pytest tests/integration/ -v --tb=short
-  #       env:
-  #         DATABASE_URL: postgresql://postgres:postgres@localhost:5432/fuzzforge_test
-  #         TEMPORAL_ADDRESS: localhost:7233
-  #         MINIO_ENDPOINT: localhost:9000
-  #
-  #     - name: Shutdown services
-  #       if: always()
-  #       run: docker-compose down
-
-  test-summary:
-    name: Test Summary
-    runs-on: ubuntu-latest
-    needs: [validate-workers, lint, unit-tests]
-    if: always()
-    steps:
-      - name: Check test results
-        run: |
-          if [ "${{ needs.validate-workers.result }}" != "success" ]; then
-            echo "Worker validation failed"
-            exit 1
-          fi
-          if [ "${{ needs.unit-tests.result }}" != "success" ]; then
-            echo "Unit tests failed"
-            exit 1
-          fi
-          echo "All tests passed!"
--- a/.gitignore
+++ b/.gitignore
@@ -185,13 +185,6 @@ logs/
 # FuzzForge project directories (user projects should manage their own .gitignore)
 .fuzzforge/

-# Docker volume configs (keep .env.example but ignore actual .env)
-volumes/env/.env
-
-# Vendored proxy sources (kept locally for reference)
-ai/proxy/bifrost/
-ai/proxy/litellm/
-
 # Test project databases and configurations
 test_projects/*/.fuzzforge/
 test_projects/*/findings.db*
@@ -208,7 +201,6 @@ dev_config.yaml
 reports/
 output/
 findings/
-*.sarif
 *.sarif.json
 *.html.report
 security_report.*
@@ -237,17 +229,6 @@ yarn-error.log*
 *.key
 *.p12
 *.pfx
-
-# Exception: Secret detection benchmark test files (not real secrets)
-!test_projects/secret_detection_benchmark/
-!test_projects/secret_detection_benchmark/**
-!**/secret_detection_benchmark_GROUND_TRUTH.json
-!**/secret_detection/results/
-
-# Exception: Allow workers/secrets/ directory (secrets detection worker)
-!workers/secrets/
-!workers/secrets/**
-
 secret*
 secrets/
 credentials*
@@ -307,9 +288,4 @@ test_projects/*/wallet.json
 test_projects/*/.npmrc
 test_projects/*/.git-credentials
 test_projects/*/credentials.*
-test_projects/*/api_keys.*
-test_projects/*/ci-*.sh
-
-# -------------------- Internal Documentation --------------------
-# Weekly summaries and temporary project documentation
-WEEK_SUMMARY*.md
+test_projects/*/api_keys.*
--- a/.gitlab-ci.example.yml
+++ b/.gitlab-ci.example.yml
@@ -1,121 +0,0 @@
-# FuzzForge CI/CD Example - GitLab CI
-#
-# This file demonstrates how to integrate FuzzForge into your GitLab CI/CD pipeline.
-# Copy this to `.gitlab-ci.yml` in your project root to enable security scanning.
-#
-# Features:
-#   - Runs entirely in GitLab runners (no external infrastructure)
-#   - Auto-starts FuzzForge services on-demand
-#   - Fails pipelines on critical/high severity findings
-#   - Uploads SARIF reports to GitLab Security Dashboard
-#   - Exports findings as artifacts
-#
-# Prerequisites:
-#   - GitLab Runner with Docker support (docker:dind)
-#   - At least 4GB RAM available
-#   - ~90 seconds startup time
-
-stages:
-  - security
-
-variables:
-  FUZZFORGE_API_URL: "http://localhost:8000"
-  DOCKER_DRIVER: overlay2
-  DOCKER_TLS_CERTDIR: ""
-
-# Base template for all FuzzForge jobs
-.fuzzforge_template:
-  image: docker:24
-  services:
-    - docker:24-dind
-  before_script:
-    # Install dependencies
-    - apk add --no-cache bash curl python3 py3-pip git
-    # Start FuzzForge
-    - bash scripts/ci-start.sh
-    # Install CLI
-    - pip3 install ./cli --break-system-packages
-    # Initialize project
-    - ff init --api-url $FUZZFORGE_API_URL --name "GitLab CI Security Scan"
-  after_script:
-    # Cleanup
-    - bash scripts/ci-stop.sh || true
-
-# Security Assessment - Comprehensive code analysis
-security:scan:
-  extends: .fuzzforge_template
-  stage: security
-  timeout: 30 minutes
-  script:
-    - ff workflow run security_assessment . --wait --fail-on error --export-sarif results.sarif
-  artifacts:
-    when: always
-    reports:
-      sast: results.sarif
-    paths:
-      - results.sarif
-    expire_in: 30 days
-  only:
-    - merge_requests
-    - main
-    - develop
-
-# Secret Detection - Scan for exposed credentials
-security:secrets:
-  extends: .fuzzforge_template
-  stage: security
-  timeout: 15 minutes
-  script:
-    - ff workflow run secret_detection . --wait --fail-on all --export-sarif secrets.sarif
-  artifacts:
-    when: always
-    paths:
-      - secrets.sarif
-    expire_in: 30 days
-  only:
-    - merge_requests
-    - main
-
-# Nightly Fuzzing - Long-running fuzzing campaign (scheduled only)
-security:fuzzing:
-  extends: .fuzzforge_template
-  stage: security
-  timeout: 2 hours
-  script:
-    - |
-      ff workflow run atheris_fuzzing . \
-        max_iterations=100000000 \
-        timeout_seconds=7200 \
-        --wait \
-        --export-sarif fuzzing-results.sarif
-  artifacts:
-    when: always
-    paths:
-      - fuzzing-results.sarif
-    expire_in: 90 days
-  allow_failure: true  # Don't fail pipeline on fuzzing findings
-  only:
-    - schedules
-
-# OSS-Fuzz Campaign (for supported projects)
-security:ossfuzz:
-  extends: .fuzzforge_template
-  stage: security
-  timeout: 1 hour
-  script:
-    - |
-      ff workflow run ossfuzz_campaign . \
-        project_name=your-project-name \
-        campaign_duration_hours=0.5 \
-        --wait \
-        --export-sarif ossfuzz-results.sarif
-  artifacts:
-    when: always
-    paths:
-      - ossfuzz-results.sarif
-    expire_in: 90 days
-  allow_failure: true
-  only:
-    - schedules
-  # Uncomment and set your project name
-  # when: manual
--- a/ARCHITECTURE.md
+++ b/ARCHITECTURE.md
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,200 +0,0 @@
-# Changelog
-
-All notable changes to FuzzForge will be documented in this file.
-
-The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
-and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
-
-## [Unreleased]
-
-### 📝 Documentation
- Added comprehensive worker startup documentation across all guides
- Added workflow-to-worker mapping tables in README, troubleshooting guide, getting started guide, and docker setup guide
- Fixed broken documentation links in CLI reference
- Added WEEK_SUMMARY*.md pattern to .gitignore
-
---
-
-## [0.7.3] - 2025-10-30
-
-### 🎯 Major Features
-
-#### Android Static Analysis Workflow
- **Added comprehensive Android security testing workflow** (`android_static_analysis`):
-  - Jadx decompiler for APK → Java source code decompilation
-  - OpenGrep/Semgrep static analysis with custom Android security rules
-  - MobSF integration for comprehensive mobile security scanning
-  - SARIF report generation with unified findings format
-  - Test results: Successfully decompiled 4,145 Java files, found 8 security vulnerabilities
-  - Full workflow completes in ~1.5 minutes
-
-#### Platform-Aware Worker Architecture
- **ARM64 (Apple Silicon) support**:
-  - Automatic platform detection (ARM64 vs x86_64) in CLI using `platform.machine()`
-  - Worker metadata convention (`metadata.yaml`) for platform-specific capabilities
-  - Multi-Dockerfile support: `Dockerfile.amd64` (full toolchain) and `Dockerfile.arm64` (optimized)
-  - Conditional module imports for graceful degradation (MobSF skips on ARM64)
-  - Backend path resolution via `FUZZFORGE_HOST_ROOT` for CLI worker management
- **Worker selection logic**:
-  - CLI automatically selects appropriate Dockerfile based on detected platform
-  - Multi-strategy path resolution (API → .fuzzforge marker → environment variable)
-  - Platform-specific tool availability documented in metadata
-
-#### Python SAST Workflow
- **Added Python Static Application Security Testing workflow** (`python_sast`):
-  - Bandit for Python security linting (SAST)
-  - MyPy for static type checking
-  - Safety for dependency vulnerability scanning
-  - Integrated SARIF reporter for unified findings format
-  - Auto-start Python worker on-demand
-
-### ✨ Enhancements
-
-#### CI/CD Improvements
- Added automated worker validation in CI pipeline
- Docker build checks for all workers before merge
- Worker file change detection for selective builds
- Optimized Docker layer caching for faster builds
- Dev branch testing workflow triggers
-
-#### CLI Improvements
- Fixed live monitoring bug in `ff monitor live` command
- Enhanced `ff findings` command with better table formatting
- Improved `ff monitor` with clearer status displays
- Auto-start workers on-demand when workflows require them
- Better error messages with actionable manual start commands
-
-#### Worker Management
- Standardized worker service names (`worker-python`, `worker-android`, etc.)
- Added missing `worker-secrets` to repository
- Improved worker naming consistency across codebase
-
-#### LiteLLM Integration
- Centralized LLM provider management with proxy
- Governance and request/response routing
- OTEL collector integration for observability
- Environment-based configurable timeouts
- Optional `.env.litellm` configuration
-
-### 🐛 Bug Fixes
-
- Fixed MobSF API key generation from secret file (SHA256 hash)
- Corrected Temporal activity names (decompile_with_jadx, scan_with_opengrep, scan_with_mobsf)
- Resolved linter errors across codebase
- Fixed unused import issues to pass CI checks
- Removed deprecated workflow parameters
- Docker Compose version compatibility fixes
-
-### 🔧 Technical Changes
-
- Conditional import pattern for optional dependencies (MobSF on ARM64)
- Multi-platform Dockerfile architecture
- Worker metadata convention for capability declaration
- Improved CI worker build optimization
- Enhanced storage activity error handling
-
-### 📝 Test Projects
-
- Added `test_projects/android_test/` with BeetleBug.apk and shopnest.apk
- Android workflow validation with real APK samples
- ARM64 platform testing and validation
-
---
-
-## [0.7.2] - 2025-10-22
-
-### 🐛 Bug Fixes
- Fixed worker naming inconsistencies across codebase
- Improved monitor command consolidation and usability
- Enhanced findings CLI with better formatting and display
- Added missing secrets worker to repository
-
-### 📝 Documentation
- Added benchmark results files to git for secret detection workflows
-
-**Note:** v0.7.1 was re-tagged as v0.7.2 (both point to the same commit)
-
---
-
-## [0.7.0] - 2025-10-16
-
-### 🎯 Major Features
-
-#### Secret Detection Workflows
- **Added three secret detection workflows**:
-  - `gitleaks_detection` - Pattern-based secret scanning
-  - `trufflehog_detection` - Entropy-based secret detection with verification
-  - `llm_secret_detection` - AI-powered semantic secret detection using LLMs
- **Comprehensive benchmarking infrastructure**:
-  - 32-secret ground truth dataset for precision/recall testing
-  - Difficulty levels: 12 Easy, 10 Medium, 10 Hard secrets
-  - SARIF-formatted output for all workflows
-  - Achieved 100% recall with LLM-based detection on benchmark dataset
-
-#### AI Module & Agent Integration
- Added A2A (Agent-to-Agent) wrapper for multi-agent orchestration
- Task agent implementation with Google ADK
- LLM analysis workflow for code security analysis
- Reactivated AI agent command (`ff ai agent`)
-
-#### Temporal Migration Complete
- Fully migrated from Prefect to Temporal for workflow orchestration
- MinIO storage for unified file handling (replaces volume mounts)
- Vertical workers with pre-built security toolchains
- Improved worker lifecycle management
-
-#### CI/CD Integration
- Ephemeral deployment model for testing
- Automated workflow validation in CI pipeline
-
-### ✨ Enhancements
-
-#### Documentation
- Updated README for Temporal + MinIO architecture
- Added `.env` configuration guide for AI agent API keys
- Fixed worker startup instructions with correct service names
- Updated docker compose commands to modern syntax
-
-#### Worker Management
- Added `worker_service` field to API responses for correct service naming
- Improved error messages with actionable manual start commands
- Fixed default parameters for gitleaks (now uses `no_git=True` by default)
-
-### 🐛 Bug Fixes
-
- Fixed default parameters from metadata.yaml not being applied to workflows when no parameters provided
- Fixed gitleaks workflow failing on uploaded directories without Git history
- Fixed worker startup command suggestions (now uses `docker compose up -d` with service names)
- Fixed missing `cognify_text` method in CogneeProjectIntegration
-
-### 🔧 Technical Changes
-
- Updated all package versions to 0.7.0
- Improved SARIF output formatting for secret detection workflows
- Enhanced benchmark validation with ground truth JSON
- Better integration between CLI and backend for worker management
-
-### 📝 Test Projects
-
- Added `secret_detection_benchmark` with 32 documented secrets
- Ground truth JSON for automated precision/recall calculations
- Updated `vulnerable_app` for comprehensive security testing
-
---
-
-## [0.6.0] - Undocumented
-
-### Features
- Initial Temporal migration
- Fuzzing workflows (Atheris, Cargo, OSS-Fuzz)
- Security assessment workflow
- Basic CLI commands
-
-**Note:** No git tag exists for v0.6.0. Release date undocumented.
-
---
-
-[0.7.3]: https://github.com/FuzzingLabs/fuzzforge_ai/compare/v0.7.2...v0.7.3
-[0.7.2]: https://github.com/FuzzingLabs/fuzzforge_ai/compare/v0.7.0...v0.7.2
-[0.7.0]: https://github.com/FuzzingLabs/fuzzforge_ai/releases/tag/v0.7.0
-[0.6.0]: https://github.com/FuzzingLabs/fuzzforge_ai/tree/v0.6.0
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -84,10 +84,9 @@ docs(readme): update installation instructions
   ```
   backend/toolbox/workflows/your_workflow/
   ├── __init__.py
-   ├── workflow.py          # Main Temporal workflow
-   ├── activities.py        # Workflow activities (optional)
-   ├── metadata.yaml        # Workflow metadata (includes vertical field)
-   └── requirements.txt     # Additional dependencies (optional)
+   ├── workflow.py          # Main Prefect flow
+   ├── metadata.yaml        # Workflow metadata
+   └── Dockerfile          # Container definition
   ```

 2. **Register Your Workflow**
--- a/QUICKSTART_TEMPORAL.md
+++ b/QUICKSTART_TEMPORAL.md
@@ -1,421 +0,0 @@
-# FuzzForge Temporal Architecture - Quick Start Guide
-
-This guide walks you through starting and testing the new Temporal-based architecture.
-
-## Prerequisites
-
- Docker and Docker Compose installed
- At least 2GB free RAM (core services only, workers start on-demand)
- Ports available: 7233, 8233, 9000, 9001, 8000
-
-## Step 1: Start Core Services
-
-```bash
-# From project root
-cd /path/to/fuzzforge_ai
-
-# Start core services (Temporal, MinIO, Backend)
-docker-compose up -d
-
-# Workers are pre-built but don't auto-start (saves ~6-7GB RAM)
-# They'll start automatically when workflows need them
-
-# Check status
-docker-compose ps
-```
-
-**Expected output:**
-```
-NAME                          STATUS    PORTS
-fuzzforge-minio               healthy   0.0.0.0:9000-9001->9000-9001/tcp
-fuzzforge-temporal            healthy   0.0.0.0:7233->7233/tcp
-fuzzforge-temporal-postgresql healthy   5432/tcp
-fuzzforge-backend             healthy   0.0.0.0:8000->8000/tcp
-fuzzforge-minio-setup         exited (0)
-# Workers NOT running (will start on-demand)
-```
-
-**First startup takes ~30-60 seconds** for health checks to pass.
-
-## Step 2: Verify Worker Discovery
-
-Check worker logs to ensure workflows are discovered:
-
-```bash
-docker logs fuzzforge-worker-rust
-```
-
-**Expected output:**
-```
-============================================================
-FuzzForge Vertical Worker: rust
-============================================================
-Temporal Address: temporal:7233
-Task Queue: rust-queue
-Max Concurrent Activities: 5
-============================================================
-Discovering workflows for vertical: rust
-Importing workflow module: toolbox.workflows.rust_test.workflow
-✓ Discovered workflow: RustTestWorkflow from rust_test (vertical: rust)
-Discovered 1 workflows for vertical 'rust'
-Connecting to Temporal at temporal:7233...
-✓ Connected to Temporal successfully
-Creating worker on task queue: rust-queue
-✓ Worker created successfully
-============================================================
-🚀 Worker started for vertical 'rust'
-📦 Registered 1 workflows
-⚙️  Registered 3 activities
-📨 Listening on task queue: rust-queue
-============================================================
-Worker is ready to process tasks...
-```
-
-## Step 2.5: Worker Lifecycle Management (New in v0.7.0)
-
-Workers start on-demand when workflows need them:
-
-```bash
-# Check worker status (should show Exited or not running)
-docker ps -a --filter "name=fuzzforge-worker"
-
-# Run a workflow - worker starts automatically
-ff workflow run ossfuzz_campaign . project_name=zlib
-
-# Worker is now running
-docker ps --filter "name=fuzzforge-worker-ossfuzz"
-```
-
-**Configuration** (`.fuzzforge/config.yaml`):
-```yaml
-workers:
-  auto_start_workers: true    # Default: auto-start
-  auto_stop_workers: false    # Default: keep running
-  worker_startup_timeout: 60  # Startup timeout in seconds
-```
-
-**CLI Control**:
-```bash
-# Disable auto-start
-ff workflow run ossfuzz_campaign . --no-auto-start
-
-# Enable auto-stop after completion
-ff workflow run ossfuzz_campaign . --wait --auto-stop
-```
-
-## Step 3: Access Web UIs
-
-### Temporal Web UI
- URL: http://localhost:8233
- View workflows, executions, and task queues
-
-### MinIO Console
- URL: http://localhost:9001
- Login: `fuzzforge` / `fuzzforge123`
- View uploaded targets and results
-
-## Step 4: Test Workflow Execution
-
-### Option A: Using Temporal CLI (tctl)
-
-```bash
-# Install tctl (if not already installed)
-brew install temporal  # macOS
-# or download from https://github.com/temporalio/tctl/releases
-
-# Execute test workflow
-tctl workflow run \
-  --address localhost:7233 \
-  --taskqueue rust-queue \
-  --workflow_type RustTestWorkflow \
-  --input '{"target_id": "test-123", "test_message": "Hello Temporal!"}'
-```
-
-### Option B: Using Python Client
-
-Create `test_workflow.py`:
-
-```python
-import asyncio
-from temporalio.client import Client
-
-async def main():
-    # Connect to Temporal
-    client = await Client.connect("localhost:7233")
-
-    # Start workflow
-    result = await client.execute_workflow(
-        "RustTestWorkflow",
-        {"target_id": "test-123", "test_message": "Hello Temporal!"},
-        id="test-workflow-1",
-        task_queue="rust-queue"
-    )
-
-    print("Workflow result:", result)
-
-if __name__ == "__main__":
-    asyncio.run(main())
-```
-
-```bash
-python test_workflow.py
-```
-
-### Option C: Upload Target and Run (Full Flow)
-
-```python
-# upload_and_run.py
-import asyncio
-import boto3
-from pathlib import Path
-from temporalio.client import Client
-
-async def main():
-    # 1. Upload target to MinIO
-    s3 = boto3.client(
-        's3',
-        endpoint_url='http://localhost:9000',
-        aws_access_key_id='fuzzforge',
-        aws_secret_access_key='fuzzforge123',
-        region_name='us-east-1'
-    )
-
-    # Create a test file
-    test_file = Path('/tmp/test_target.txt')
-    test_file.write_text('This is a test target file')
-
-    # Upload to MinIO
-    target_id = 'my-test-target-001'
-    s3.upload_file(
-        str(test_file),
-        'targets',
-        f'{target_id}/target'
-    )
-    print(f"✓ Uploaded target: {target_id}")
-
-    # 2. Run workflow
-    client = await Client.connect("localhost:7233")
-
-    result = await client.execute_workflow(
-        "RustTestWorkflow",
-        {"target_id": target_id, "test_message": "Full flow test!"},
-        id=f"workflow-{target_id}",
-        task_queue="rust-queue"
-    )
-
-    print("✓ Workflow completed!")
-    print("Results:", result)
-
-if __name__ == "__main__":
-    asyncio.run(main())
-```
-
-```bash
-# Install dependencies
-pip install temporalio boto3
-
-# Run test
-python upload_and_run.py
-```
-
-## Step 5: Monitor Execution
-
-### View in Temporal UI
-
-1. Open http://localhost:8233
-2. Click on "Workflows"
-3. Find your workflow by ID
-4. Click to see:
-   - Execution history
-   - Activity results
-   - Error stack traces (if any)
-
-### View Logs
-
-```bash
-# Worker logs (shows activity execution)
-docker logs -f fuzzforge-worker-rust
-
-# Temporal server logs
-docker logs -f fuzzforge-temporal
-```
-
-### Check MinIO Storage
-
-1. Open http://localhost:9001
-2. Login: `fuzzforge` / `fuzzforge123`
-3. Browse buckets:
-   - `targets/` - Uploaded target files
-   - `results/` - Workflow results (if uploaded)
-   - `cache/` - Worker cache (temporary)
-
-## Troubleshooting
-
-### Services Not Starting
-
-```bash
-# Check logs for all services
-docker-compose -f docker-compose.temporal.yaml logs
-
-# Check specific service
-docker-compose -f docker-compose.temporal.yaml logs temporal
-docker-compose -f docker-compose.temporal.yaml logs minio
-docker-compose -f docker-compose.temporal.yaml logs worker-rust
-```
-
-### Worker Not Discovering Workflows
-
-**Issue**: Worker logs show "No workflows found for vertical: rust"
-
-**Solution**:
-1. Check toolbox mount: `docker exec fuzzforge-worker-rust ls /app/toolbox/workflows`
-2. Verify metadata.yaml exists and has `vertical: rust`
-3. Check workflow.py has `@workflow.defn` decorator
-
-### Cannot Connect to Temporal
-
-**Issue**: `Failed to connect to Temporal`
-
-**Solution**:
-```bash
-# Wait for Temporal to be healthy
-docker-compose -f docker-compose.temporal.yaml ps
-
-# Check Temporal health manually
-curl http://localhost:8233
-
-# Restart Temporal if needed
-docker-compose -f docker-compose.temporal.yaml restart temporal
-```
-
-### MinIO Connection Failed
-
-**Issue**: `Failed to download target`
-
-**Solution**:
-```bash
-# Check MinIO is running
-docker ps | grep minio
-
-# Check buckets exist
-docker exec fuzzforge-minio mc ls fuzzforge/
-
-# Verify target was uploaded
-docker exec fuzzforge-minio mc ls fuzzforge/targets/
-```
-
-### Workflow Hangs
-
-**Issue**: Workflow starts but never completes
-
-**Check**:
-1. Worker logs for errors: `docker logs fuzzforge-worker-rust`
-2. Activity timeouts in workflow code
-3. Target file actually exists in MinIO
-
-## Scaling
-
-### Add More Workers
-
-```bash
-# Scale rust workers horizontally
-docker-compose -f docker-compose.temporal.yaml up -d --scale worker-rust=3
-
-# Verify all workers are running
-docker ps | grep worker-rust
-```
-
-### Increase Concurrent Activities
-
-Edit `docker-compose.temporal.yaml`:
-
-```yaml
-worker-rust:
-  environment:
-    MAX_CONCURRENT_ACTIVITIES: 10  # Increase from 5
-```
-
-```bash
-# Apply changes
-docker-compose -f docker-compose.temporal.yaml up -d worker-rust
-```
-
-## Cleanup
-
-```bash
-# Stop all services
-docker-compose -f docker-compose.temporal.yaml down
-
-# Remove volumes (WARNING: deletes all data)
-docker-compose -f docker-compose.temporal.yaml down -v
-
-# Remove everything including images
-docker-compose -f docker-compose.temporal.yaml down -v --rmi all
-```
-
-## Next Steps
-
-1. **Add More Workflows**: Create workflows in `backend/toolbox/workflows/`
-2. **Add More Verticals**: Create new worker types (android, web, etc.) - see `workers/README.md`
-3. **Integrate with Backend**: Update FastAPI backend to use Temporal client
-4. **Update CLI**: Modify `ff` CLI to work with Temporal workflows
-
-## Useful Commands
-
-```bash
-# View all logs
-docker-compose -f docker-compose.temporal.yaml logs -f
-
-# View specific service logs
-docker-compose -f docker-compose.temporal.yaml logs -f worker-rust
-
-# Restart a service
-docker-compose -f docker-compose.temporal.yaml restart worker-rust
-
-# Check service status
-docker-compose -f docker-compose.temporal.yaml ps
-
-# Execute command in worker
-docker exec -it fuzzforge-worker-rust bash
-
-# View worker Python environment
-docker exec fuzzforge-worker-rust pip list
-
-# Check workflow discovery manually
-docker exec fuzzforge-worker-rust python -c "
-from pathlib import Path
-import yaml
-for w in Path('/app/toolbox/workflows').iterdir():
-    if w.is_dir():
-        meta = w / 'metadata.yaml'
-        if meta.exists():
-            print(f'{w.name}: {yaml.safe_load(meta.read_text()).get(\"vertical\")}')"
-```
-
-## Architecture Overview
-
-```
-┌─────────────┐     ┌──────────────┐     ┌──────────────┐
-│   Temporal  │────▶│  Task Queue  │────▶│ Worker-Rust  │
-│   Server    │     │  rust-queue  │     │  (Long-lived)│
-└─────────────┘     └──────────────┘     └──────┬───────┘
-       │                                         │
-       │                                         │
-       ▼                                         ▼
-┌─────────────┐                          ┌──────────────┐
-│  Postgres   │                          │    MinIO     │
-│  (State)    │                          │  (Storage)   │
-└─────────────┘                          └──────────────┘
-                                                │
-                                         ┌──────┴──────┐
-                                         │             │
-                                    ┌────▼────┐  ┌─────▼────┐
-                                    │ Targets │  │ Results  │
-                                    └─────────┘  └──────────┘
-```
-
-## Support
-
- **Documentation**: See `ARCHITECTURE.md` for detailed design
- **Worker Guide**: See `workers/README.md` for adding verticals
- **Issues**: Open GitHub issue with logs and steps to reproduce
--- a/README.md
+++ b/README.md
@@ -6,11 +6,11 @@
 <p align="center"><strong>AI-powered workflow automation and AI Agents for AppSec, Fuzzing & Offensive Security</strong></p>

 <p align="center">
-  <a href="https://discord.gg/8XEX33UUwZ/"><img src="https://img.shields.io/discord/1420767905255133267?logo=discord&label=Discord" alt="Discord"></a>
+  <a href="https://discord.com/invite/acqv9FVG"><img src="https://img.shields.io/discord/1420767905255133267?logo=discord&label=Discord" alt="Discord"></a>
  <a href="LICENSE"><img src="https://img.shields.io/badge/license-BSL%20%2B%20Apache-orange" alt="License: BSL + Apache"></a>
  <a href="https://www.python.org/downloads/"><img src="https://img.shields.io/badge/python-3.11%2B-blue" alt="Python 3.11+"/></a>
  <a href="https://fuzzforge.ai"><img src="https://img.shields.io/badge/Website-fuzzforge.ai-blue" alt="Website"/></a>
-  <img src="https://img.shields.io/badge/version-0.7.3-green" alt="Version">
+  <img src="https://img.shields.io/badge/version-0.6.0-green" alt="Version">
  <a href="https://github.com/FuzzingLabs/fuzzforge_ai/stargazers"><img src="https://img.shields.io/github/stars/FuzzingLabs/fuzzforge_ai?style=social" alt="GitHub Stars"></a>
  
 </p>
@@ -41,29 +41,6 @@
 FuzzForge is **open source**, built to empower security teams, researchers, and the community.

 > 🚧 FuzzForge is under active development. Expect breaking changes.
->
-> **Note:** Fuzzing workflows (`atheris_fuzzing`, `cargo_fuzzing`, `ossfuzz_campaign`) are in early development. OSS-Fuzz integration is under heavy active development. For stable workflows, use: `security_assessment`, `gitleaks_detection`, `trufflehog_detection`, or `llm_secret_detection`.
-
---
-
-## Demo - Manual Workflow Setup
-
-![Manual Workflow Demo](docs/static/videos/manual_workflow.gif)
-
-_Setting up and running security workflows through the interface_
-
-👉 More installation options in the [Documentation](https://docs.fuzzforge.ai).
-
---
-
-## ✨ Key Features
-
- 🤖 **AI Agents for Security** – Specialized agents for AppSec, reversing, and fuzzing
- 🛠 **Workflow Automation** – Define & execute AppSec workflows as code
- 📈 **Vulnerability Research at Scale** – Rediscover 1-days & find 0-days with automation
- 🔗 **Fuzzer Integration** – Atheris (Python), cargo-fuzz (Rust), OSS-Fuzz campaigns
- 🌐 **Community Marketplace** – Share workflows, corpora, PoCs, and modules
- 🔒 **Enterprise Ready** – Team/Corp cloud tiers for scaling offensive security

 ---

@@ -77,20 +54,14 @@ If you find FuzzForge useful, please star the repo to support development 🚀

 ---

-## 🔍 Secret Detection Benchmarks
+## ✨ Key Features

-FuzzForge includes three secret detection workflows benchmarked on a controlled dataset of **32 documented secrets** (12 Easy, 10 Medium, 10 Hard):
-
-| Tool | Recall | Secrets Found | Speed |
-|------|--------|---------------|-------|
-| **LLM (gpt-5-mini)** | **84.4%** | 41 | 618s |
-| **LLM (gpt-4o-mini)** | 56.2% | 30 | 297s |
-| **Gitleaks** | 37.5% | 12 | 5s |
-| **TruffleHog** | 0.0% | 1 | 5s |
-
-📊 [Full benchmark results and analysis](backend/benchmarks/by_category/secret_detection/results/comparison_report.md)
-
-The LLM-based detector excels at finding obfuscated and hidden secrets through semantic analysis, while pattern-based tools (Gitleaks) offer speed for standard secret formats.
+- 🤖 **AI Agents for Security** – Specialized agents for AppSec, reversing, and fuzzing
+- 🛠 **Workflow Automation** – Define & execute AppSec workflows as code
+- 📈 **Vulnerability Research at Scale** – Rediscover 1-days & find 0-days with automation
+- 🔗 **Fuzzer Integration** – AFL, Honggfuzz, AFLnet, StateAFL & more
+- 🌐 **Community Marketplace** – Share workflows, corpora, PoCs, and modules
+- 🔒 **Enterprise Ready** – Team/Corp cloud tiers for scaling offensive security

 ---

@@ -110,22 +81,38 @@ curl -LsSf https://astral.sh/uv/install.sh | sh
 **Docker**
 For containerized workflows, see the [Docker Installation Guide](https://docs.docker.com/get-docker/).

-#### Configure AI Agent API Keys (Optional)
+#### Configure Docker Daemon

-For AI-powered workflows, configure your LLM API keys:
+Before running `docker compose up`, configure Docker to allow insecure registries (required for the local registry).

-```bash
-cp volumes/env/.env.template volumes/env/.env
-# Edit volumes/env/.env and add your API keys (OpenAI, Anthropic, Google, etc.)
-# Add your key to LITELLM_GEMINI_API_KEY 
+Add the following to your Docker daemon configuration:
+
+```json
+{
+  "insecure-registries": [
+    "localhost:5000",
+    "host.docker.internal:5001",
+    "registry:5000"
+  ]
+}
 ```
-> Dont change the OPENAI_API_KEY default value, as it is used for the LLM proxy. 

-This is required for:
- `llm_secret_detection` workflow
- AI agent features (`ff ai agent`)
+**macOS (Docker Desktop):**
+1. Open Docker Desktop
+2. Go to Settings → Docker Engine
+3. Add the `insecure-registries` configuration to the JSON
+4. Click "Apply & Restart"

-Basic security workflows (gitleaks, trufflehog, security_assessment) work without this configuration.
+**Linux:**
+1. Edit `/etc/docker/daemon.json` (create if it doesn't exist):
+   ```bash
+   sudo nano /etc/docker/daemon.json
+   ```
+2. Add the configuration above
+3. Restart Docker:
+   ```bash
+   sudo systemctl restart docker
+   ```

 ### CLI Installation

@@ -144,55 +131,40 @@ uv tool install --python python3.12 .

 ## ⚡ Quickstart

-Run your first workflow with **Temporal orchestration** and **automatic file upload**:
+Run your first workflow :

 ```bash
 # 1. Clone the repo
 git clone https://github.com/fuzzinglabs/fuzzforge_ai.git
 cd fuzzforge_ai

-# 2. Copy the default LLM env config
-cp volumes/env/.env.template volumes/env/.env
-
-# 3. Start FuzzForge with Temporal
+# 2. Build & run with Docker
+# Set registry host for your OS (local registry is mandatory)
+# macOS/Windows (Docker Desktop):
+export REGISTRY_HOST=host.docker.internal
+# Linux (default):
+# export REGISTRY_HOST=localhost
 docker compose up -d
-
-# 4. Start the Python worker (needed for security_assessment workflow)
-docker compose up -d worker-python
 ```

-> The first launch can take 2-3 minutes for services to initialize ☕
->
-> Workers don't auto-start by default (saves RAM). Start the worker you need before running workflows.
-
-**Workflow-to-Worker Quick Reference:**
-
-| Workflow | Worker Required | Startup Command |
-|----------|----------------|-----------------|
-| `security_assessment`, `python_sast`, `llm_analysis`, `atheris_fuzzing` | worker-python | `docker compose up -d worker-python` |
-| `android_static_analysis` | worker-android | `docker compose up -d worker-android` |
-| `cargo_fuzzing` | worker-rust | `docker compose up -d worker-rust` |
-| `ossfuzz_campaign` | worker-ossfuzz | `docker compose up -d worker-ossfuzz` |
-| `llm_secret_detection`, `trufflehog_detection`, `gitleaks_detection` | worker-secrets | `docker compose up -d worker-secrets` |
+> The first launch can take 5-10 minutes due to Docker image building - a good time for a coffee break ☕

 ```bash
-# 5. Run your first workflow (files are automatically uploaded)
-cd test_projects/vulnerable_app/
-fuzzforge init                           # Initialize FuzzForge project
-ff workflow run security_assessment .    # Start workflow - CLI uploads files automatically!
-
-# The CLI will:
-# - Detect the local directory
-# - Create a compressed tarball
-# - Upload to backend (via MinIO)
-# - Start the workflow on vertical worker
+# 3. Run your first workflow
+cd test_projects/vulnerable_app/ # Go into the test directory
+fuzzforge init # Init a fuzzforge project
+ff workflow run security_assessment . # Start a workflow (you can also use ff command)
 ```

-**What's running:**
- **Temporal**: Workflow orchestration (UI at http://localhost:8080)
- **MinIO**: File storage for targets (Console at http://localhost:9001)
- **Vertical Workers**: Pre-built workers with security toolchains
- **Backend API**: FuzzForge REST API (http://localhost:8000)
+### Manual Workflow Setup
+
+![Manual Workflow Demo](docs/static/videos/manual_workflow.gif)
+
+_Setting up and running security workflows through the interface_
+
+👉 More installation options in the [Documentation](https://docs.fuzzforge.ai).
+
+---

 ## AI-Powered Workflow Execution

@@ -204,7 +176,7 @@ _AI agents automatically analyzing code and providing security insights_

 - 🌐 [Website](https://fuzzforge.ai)
 - 📖 [Documentation](https://docs.fuzzforge.ai)
- 💬 [Community Discord](https://discord.gg/8XEX33UUwZ)
+- 💬 [Community Discord](https://discord.com/invite/acqv9FVG)
 - 🎓 [FuzzingLabs Academy](https://academy.fuzzinglabs.com/?coupon=GITHUB_FUZZFORGE)

 ---
@@ -233,7 +205,7 @@ Planned features and improvements:
 - ☁️ Multi-tenant SaaS platform with team collaboration
 - 📊 Advanced reporting & analytics

-👉 Follow updates in the [GitHub issues](../../issues) and [Discord](https://discord.gg/8XEX33UUwZ)
+👉 Follow updates in the [GitHub issues](../../issues) and [Discord](https://discord.com/invite/acqv9FVG).

 ---

--- a/ai/README.md
+++ b/ai/README.md
@@ -1,6 +1,6 @@
 # FuzzForge AI Module

-FuzzForge AI is the multi-agent layer that lets you operate the FuzzForge security platform through natural language. It orchestrates local tooling, registered Agent-to-Agent (A2A) peers, and the Temporal-powered backend while keeping long-running context in memory and project knowledge graphs.
+FuzzForge AI is the multi-agent layer that lets you operate the FuzzForge security platform through natural language. It orchestrates local tooling, registered Agent-to-Agent (A2A) peers, and the Prefect-powered backend while keeping long-running context in memory and project knowledge graphs.

 ## Quick Start

@@ -32,7 +32,7 @@ FuzzForge AI is the multi-agent layer that lets you operate the FuzzForge securi
   ```bash
   fuzzforge ai agent
   ```
-   Keep the backend running (Temporal API at `FUZZFORGE_MCP_URL`) so workflow commands succeed.
+   Keep the backend running (Prefect API at `FUZZFORGE_MCP_URL`) so workflow commands succeed.

 ## Everyday Workflow

@@ -61,7 +61,7 @@ Inside `fuzzforge ai agent` you can mix slash commands and free-form prompts:
 /sendfile SecurityAgent src/report.md "Please review"
 You> route_to SecurityAnalyzer: scan ./backend for secrets
 You> run fuzzforge workflow static_analysis_scan on ./test_projects/demo
-You> search project knowledge for "temporal status" using INSIGHTS
+You> search project knowledge for "prefect status" using INSIGHTS
 ```

 Artifacts created during the conversation are served from `.fuzzforge/artifacts/` and exposed through the A2A HTTP API.
@@ -84,7 +84,7 @@ Use these to validate the setup once the agent shell is running:
 - `run fuzzforge workflow static_analysis_scan on ./backend with target_branch=main`
 - `show findings for that run once it finishes`
 - `refresh the project knowledge graph for ./backend`
- `search project knowledge for "temporal readiness" using INSIGHTS`
+- `search project knowledge for "prefect readiness" using INSIGHTS`
 - `/recall terraform secrets`
 - `/memory status`
 - `ROUTE_TO SecurityAnalyzer: audit infrastructure_vulnerable`
--- a/ai/agents/task_agent/.dockerignore
+++ b/ai/agents/task_agent/.dockerignore
@@ -1,9 +0,0 @@
-__pycache__
-*.pyc
-*.pyo
-*.pytest_cache
-*.coverage
-coverage.xml
-build/
-dist/
-.env
--- a/ai/agents/task_agent/ARCHITECTURE.md
+++ b/ai/agents/task_agent/ARCHITECTURE.md
@@ -1,82 +0,0 @@
-# Architecture Overview
-
-This package is a minimal ADK agent that keeps runtime behaviour and A2A access in separate layers so it can double as boilerplate.
-
-## Directory Layout
-
-```text
-agent_with_adk_format/
-├── __init__.py               # Exposes root_agent for ADK runners
-├── a2a_hot_swap.py           # JSON-RPC helper for model/prompt swaps
-├── README.md, QUICKSTART.md  # Operational docs
-├── ARCHITECTURE.md           # This document
-├── .env                      # Active environment (gitignored)
-├── .env.example              # Environment template
-└── litellm_agent/
-    ├── agent.py              # Root Agent definition (LiteLLM shell)
-    ├── callbacks.py          # before_agent / before_model hooks
-    ├── config.py             # Defaults, state keys, control prefix
-    ├── control.py            # HOTSWAP command parsing/serialization
-    ├── state.py              # Session state wrapper + LiteLLM factory
-    ├── tools.py              # set_model / set_prompt / get_config
-    ├── prompts.py            # Base instruction text
-    └── agent.json            # A2A agent card (served under /.well-known)
-```
-
-```mermaid
-flowchart TD
-  subgraph ADK Runner
-    A["adk api_server / adk web / adk run"]
-    B["agent_with_adk_format/__init__.py"]
-    C["litellm_agent/agent.py (root_agent)"]
-    D["HotSwapState (state.py)"]
-    E["LiteLlm(model, provider)"]
-  end
-
-  subgraph Session State
-    S1[app:litellm_agent/model]
-    S2[app:litellm_agent/provider]
-    S3[app:litellm_agent/prompt]
-  end
-
-  A --> B --> C
-  C --> D
-  D -->|instantiate| E
-  D --> S1
-  D --> S2
-  D --> S3
-  E --> C
-```
-
-## Runtime Flow (ADK Runners)
-
-1. **Startup**: `adk api_server`/`adk web` imports `agent_with_adk_format`, which exposes `root_agent` from `litellm_agent/agent.py`. `.env` at package root is loaded before the runner constructs the agent.
-2. **Session State**: `callbacks.py` and `tools.py` read/write through `state.py`. We store `model`, `provider`, and `prompt` keys (prefixed `app:litellm_agent/…`) which persist across turns.
-3. **Instruction Generation**: `provide_instruction` composes the base persona from `prompts.py` plus any stored prompt override. The current model/provider is appended for observability.
-4. **Model Hot-Swap**: When a control message is detected (`[HOTSWAP:MODEL:…]`) the callback parses it via `control.py`, updates the session state, and calls `state.apply_state_to_agent` to instantiate a new `LiteLlm(model=…, custom_llm_provider=…)`. ADK runners reuse that instance for subsequent turns.
-5. **Prompt Hot-Swap**: Similar path (`set_prompt` tool/callback) updates state; the dynamic instruction immediately reflects the change.
-6. **Config Reporting**: Both the callback and the tool surface the summary string produced by `HotSwapState.describe()`, ensuring CLI, A2A, and UI all show the same data.
-
-## A2A Integration
-
- `agent.json` defines the agent card and enables ADK to register `/a2a/litellm_agent` routes when launched with `--a2a`.
- `a2a_hot_swap.py` uses `a2a.client.A2AClient` to programmatically send control messages and user text via JSON-RPC. It supports streaming when available and falls back to blocking requests otherwise.
-
-```mermaid
-sequenceDiagram
-  participant Client as a2a_hot_swap.py
-  participant Server as ADK API Server
-  participant Agent as root_agent
-
-  Client->>Server: POST /a2a/litellm_agent (message/stream or message/send)
-  Server->>Agent: Invoke callbacks/tools
-  Agent->>Server: Status / artifacts / final message
-  Server->>Client: Streamed Task events
-  Client->>Client: Extract text & print summary
-```
-
-## Extending the Boilerplate
-
- Add tools under `litellm_agent/tools.py` and register them in `agent.py` to expose new capabilities.
- Use `state.py` to track additional configuration or session data (store under your own prefix to avoid collisions).
- When layering business logic, prefer expanding callbacks or adding higher-level agents while leaving the hot-swap mechanism untouched for reuse.
--- a/ai/agents/task_agent/DEPLOY.md
+++ b/ai/agents/task_agent/DEPLOY.md
@@ -1,71 +0,0 @@
-# Docker & Kubernetes Deployment
-
-## Local Docker
-
-Build from the repository root:
-
-```bash
-docker build -t litellm-hot-swap:latest agent_with_adk_format
-```
-
-Run the container (port 8000, inject provider keys via env file or flags):
-
-```bash
-docker run \
-  -p 8000:8000 \
-  --env-file agent_with_adk_format/.env \
-  litellm-hot-swap:latest
-```
-
-The container serves Uvicorn on `http://localhost:8000`. Update `.env` (or pass `-e KEY=value`) before launching if you plan to hot-swap providers.
-
-## Kubernetes (example manifest)
-
-Use the same image, optionally pushed to a registry (`docker tag` + `docker push`). A simple Deployment/Service pair:
-
-```yaml
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: litellm-hot-swap
-spec:
-  replicas: 1
-  selector:
-    matchLabels:
-      app: litellm-hot-swap
-  template:
-    metadata:
-      labels:
-        app: litellm-hot-swap
-    spec:
-      containers:
-      - name: server
-        image: <REGISTRY_URI>/litellm-hot-swap:latest
-        ports:
-        - containerPort: 8000
-        env:
-        - name: PORT
-          value: "8000"
-        - name: LITELLM_MODEL
-          value: gemini/gemini-2.0-flash-001
-        # Add provider keys as needed
-        # - name: OPENAI_API_KEY
-        #   valueFrom:
-        #     secretKeyRef:
-        #       name: litellm-secrets
-        #       key: OPENAI_API_KEY
---
-apiVersion: v1
-kind: Service
-metadata:
-  name: litellm-hot-swap
-spec:
-  type: LoadBalancer
-  selector:
-    app: litellm-hot-swap
-  ports:
-  - port: 80
-    targetPort: 8000
-```
-
-Apply with `kubectl apply -f deployment.yaml`. Provide secrets via `env` or Kubernetes Secrets.
--- a/ai/agents/task_agent/Dockerfile
+++ b/ai/agents/task_agent/Dockerfile
@@ -1,24 +0,0 @@
-# syntax=docker/dockerfile:1
-
-FROM python:3.11-slim AS base
-
-ENV PYTHONUNBUFFERED=1 \
-    PYTHONDONTWRITEBYTECODE=1 \
-    PIP_NO_CACHE_DIR=1 \
-    PORT=8000
-
-WORKDIR /app
-
-COPY requirements.txt ./requirements.txt
-RUN pip install --upgrade pip && pip install -r requirements.txt
-
-COPY . /app/agent_with_adk_format
-WORKDIR /app/agent_with_adk_format
-ENV PYTHONPATH=/app
-
-# Copy and set up entrypoint
-COPY docker-entrypoint.sh /docker-entrypoint.sh
-RUN chmod +x /docker-entrypoint.sh
-
-ENTRYPOINT ["/docker-entrypoint.sh"]
-CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"]
--- a/ai/agents/task_agent/QUICKSTART.md
+++ b/ai/agents/task_agent/QUICKSTART.md
@@ -1,61 +0,0 @@
-# Quick Start Guide
-
-## Launch the Agent
-
-From the repository root you can expose the agent through any ADK entry point:
-
-```bash
-# A2A / HTTP server
-adk api_server --a2a --port 8000 agent_with_adk_format
-
-# Browser UI
-adk web agent_with_adk_format
-
-# Interactive terminal
-adk run agent_with_adk_format
-```
-
-The A2A server exposes the JSON-RPC endpoint at `http://localhost:8000/a2a/litellm_agent`.
-
-## Hot-Swap from the Command Line
-
-Use the bundled helper to change model and prompt via A2A without touching the UI:
-
-```bash
-python agent_with_adk_format/a2a_hot_swap.py \
-  --model openai gpt-4o \
-  --prompt "You are concise." \
-  --config \
-  --context demo-session
-```
-
-The script sends the control messages for you and prints the server’s responses. The `--context` flag lets you reuse the same conversation across multiple invocations.
-
-### Follow-up Messages
-
-Once the swaps are applied you can send a user message on the same session:
-
-```bash
-python agent_with_adk_format/a2a_hot_swap.py \
-  --context demo-session \
-  --message "Summarise the current configuration in five words."
-```
-
-### Clearing the Prompt
-
-```bash
-python agent_with_adk_format/a2a_hot_swap.py \
-  --context demo-session \
-  --prompt "" \
-  --config
-```
-
-## Control Messages (for reference)
-
-Behind the scenes the helper sends plain text messages understood by the callbacks:
-
- `[HOTSWAP:MODEL:provider/model]`
- `[HOTSWAP:PROMPT:text]`
- `[HOTSWAP:GET_CONFIG]`
-
-You can craft the same messages from any A2A client if you prefer.
--- a/ai/agents/task_agent/README.md
+++ b/ai/agents/task_agent/README.md
@@ -1,365 +0,0 @@
-# LiteLLM Agent with Hot-Swap Support
-
-A flexible AI agent powered by LiteLLM that supports runtime hot-swapping of models and system prompts. Compatible with ADK and A2A protocols.
-
-## Features
-
- 🔄 **Hot-Swap Models**: Change LLM models on-the-fly without restarting
- 📝 **Dynamic Prompts**: Update system prompts during conversation
- 🌐 **Multi-Provider Support**: Works with OpenAI, Anthropic, Google, OpenRouter, and more
- 🔌 **A2A Compatible**: Can be served as an A2A agent
- 🛠️ **ADK Integration**: Run with `adk web`, `adk run`, or `adk api_server`
-
-## Architecture
-
-```
-task_agent/
-├── __init__.py              # Exposes root_agent for ADK
-├── a2a_hot_swap.py          # JSON-RPC helper for hot-swapping
-├── README.md                # This guide
-├── QUICKSTART.md            # Quick-start walkthrough
-├── .env                     # Active environment (gitignored)
-├── .env.example             # Environment template
-└── litellm_agent/
-    ├── __init__.py
-    ├── agent.py             # Main agent implementation
-    ├── agent.json           # A2A agent card
-    ├── callbacks.py         # ADK callbacks
-    ├── config.py            # Defaults and state keys
-    ├── control.py           # HOTSWAP message helpers
-    ├── prompts.py           # Base instruction
-    ├── state.py             # Session state utilities
-    └── tools.py             # set_model / set_prompt / get_config
-```
-
-## Setup
-
-### 1. Environment Configuration
-
-Copying the example file is optional—the repository already ships with a root-level `.env` seeded with defaults. Adjust the values at the package root:
-```bash
-cd task_agent
-# Optionally refresh from the template
-# cp .env.example .env
-```
-
-Edit `.env` (or `.env.example`) and add your proxy + API keys. The agent must be restarted after changes so the values are picked up:
-```bash
-# Route every request through the proxy container (use http://localhost:10999 from the host)
-FF_LLM_PROXY_BASE_URL=http://llm-proxy:4000
-
-# Default model + provider the agent boots with
-LITELLM_MODEL=openai/gpt-4o-mini
-LITELLM_PROVIDER=openai
-
-# Virtual key issued by the proxy to the task agent (bootstrap replaces the placeholder)
-OPENAI_API_KEY=sk-proxy-default
-
-# Upstream keys stay inside the proxy. Store real secrets under the LiteLLM
-# aliases and the bootstrapper mirrors them into .env.litellm for the proxy container.
-LITELLM_OPENAI_API_KEY=your_real_openai_api_key
-LITELLM_ANTHROPIC_API_KEY=your_real_anthropic_key
-LITELLM_GEMINI_API_KEY=your_real_gemini_key
-LITELLM_MISTRAL_API_KEY=your_real_mistral_key
-LITELLM_OPENROUTER_API_KEY=your_real_openrouter_key
-```
-
-> When running the agent outside of Docker, swap `FF_LLM_PROXY_BASE_URL` to the host port (default `http://localhost:10999`).
-
-The bootstrap container provisions LiteLLM, copies provider secrets into
-`volumes/env/.env.litellm`, and rewrites `volumes/env/.env` with the virtual key.
-Populate the `LITELLM_*_API_KEY` values before the first launch so the proxy can
-reach your upstream providers as soon as the bootstrap script runs.
-
-### 2. Install Dependencies
-
-```bash
-pip install "google-adk" "a2a-sdk[all]" "python-dotenv" "litellm"
-```
-
-### 3. Run in Docker
-
-Build the container (this image can be pushed to any registry or run locally):
-
-```bash
-docker build -t litellm-hot-swap:latest task_agent
-```
-
-Provide environment configuration at runtime (either pass variables individually or mount a file):
-
-```bash
-docker run \
-  -p 8000:8000 \
-  --env-file task_agent/.env \
-  litellm-hot-swap:latest
-```
-
-The container starts Uvicorn with the ADK app (`main.py`) listening on port 8000.
-
-## Running the Agent
-
-### Option 1: ADK Web UI (Recommended for Testing)
-
-Start the web interface:
-```bash
-adk web task_agent
-```
-
-> **Tip:** before launching `adk web`/`adk run`/`adk api_server`, ensure the root-level `.env` contains valid API keys for any provider you plan to hot-swap to (e.g. set `OPENAI_API_KEY` before switching to `openai/gpt-4o`).
-
-Open http://localhost:8000 in your browser and interact with the agent.
-
-### Option 2: ADK Terminal
-
-Run in terminal mode:
-```bash
-adk run task_agent
-```
-
-### Option 3: A2A API Server
-
-Start as an A2A-compatible API server:
-```bash
-adk api_server --a2a --port 8000 task_agent
-```
-
-The agent will be available at: `http://localhost:8000/a2a/litellm_agent`
-
-### Command-line helper
-
-Use the bundled script to drive hot-swaps and user messages over A2A:
-
-```bash
-python task_agent/a2a_hot_swap.py \
-  --url http://127.0.0.1:8000/a2a/litellm_agent \
-  --model openai gpt-4o \
-  --prompt "You are concise." \
-  --config \
-  --context demo-session
-```
-
-To send a follow-up prompt in the same session (with a larger timeout for long answers):
-
-```bash
-python task_agent/a2a_hot_swap.py \
-  --url http://127.0.0.1:8000/a2a/litellm_agent \
-  --model openai gpt-4o \
-  --prompt "You are concise." \
-  --message "Give me a fuzzing harness." \
-  --context demo-session \
-  --timeout 120
-```
-
-> Ensure the corresponding provider keys are present in `.env` (or passed via environment variables) before issuing model swaps.
-
-## Hot-Swap Tools
-
-The agent provides three special tools:
-
-### 1. `set_model` - Change the LLM Model
-
-Change the model during conversation:
-
-```
-User: Use the set_model tool to change to gpt-4o with openai provider
-Agent: ✅ Model configured to: openai/gpt-4o
-       This change is active now!
-```
-
-**Parameters:**
- `model`: Model name (e.g., "gpt-4o", "claude-3-sonnet-20240229")
- `custom_llm_provider`: Optional provider prefix (e.g., "openai", "anthropic", "openrouter")
-
-**Examples:**
- OpenAI: `set_model(model="gpt-4o", custom_llm_provider="openai")`
- Anthropic: `set_model(model="claude-3-sonnet-20240229", custom_llm_provider="anthropic")`
- Google: `set_model(model="gemini-2.0-flash-001", custom_llm_provider="gemini")`
-
-### 2. `set_prompt` - Change System Prompt
-
-Update the system instructions:
-
-```
-User: Use set_prompt to change my behavior to "You are a helpful coding assistant"
-Agent: ✅ System prompt updated:
-       You are a helpful coding assistant
-
-       This change is active now!
-```
-
-### 3. `get_config` - View Configuration
-
-Check current model and prompt:
-
-```
-User: Use get_config to show me your configuration
-Agent: 📊 Current Configuration:
-       ━━━━━━━━━━━━━━━━━━━━━━
-       Model: openai/gpt-4o
-       System Prompt: You are a helpful coding assistant
-       ━━━━━━━━━━━━━━━━━━━━━━
-```
-
-## Testing
-
-### Basic A2A Client Test
-
-```bash
-python agent/test_a2a_client.py
-```
-
-### Hot-Swap Functionality Test
-
-```bash
-python agent/test_hotswap.py
-```
-
-This will:
-1. Check initial configuration
-2. Query with default model
-3. Hot-swap to GPT-4o
-4. Verify model changed
-5. Change system prompt
-6. Test new prompt behavior
-7. Hot-swap to Claude
-8. Verify final configuration
-
-### Command-Line Hot-Swap Helper
-
-You can trigger model and prompt changes directly against the A2A endpoint without the interactive CLI:
-
-```bash
-# Start the agent first (in another terminal):
-adk api_server --a2a --port 8000 task_agent
-
-# Apply swaps via pure A2A calls
-python task_agent/a2a_hot_swap.py --model openai gpt-4o --prompt "You are concise." --config
-python task_agent/a2a_hot_swap.py --model anthropic claude-3-sonnet-20240229 --context shared-session --config
-python task_agent/a2a_hot_swap.py --prompt "" --context shared-session --config  # Clear the prompt and show current state
-```
-
-`--model` accepts either `provider/model` or a provider/model pair. Add `--context` if you want to reuse the same conversation across invocations. Use `--config` to dump the agent's configuration after the changes are applied.
-
-## Supported Models
-
-### OpenAI
- `openai/gpt-4o`
- `openai/gpt-4-turbo`
- `openai/gpt-3.5-turbo`
-
-### Anthropic
- `anthropic/claude-3-opus-20240229`
- `anthropic/claude-3-sonnet-20240229`
- `anthropic/claude-3-haiku-20240307`
-
-### Google
- `gemini/gemini-2.0-flash-001`
- `gemini/gemini-2.5-pro-exp-03-25`
- `vertex_ai/gemini-2.0-flash-001`
-
-### OpenRouter
- `openrouter/anthropic/claude-3-opus`
- `openrouter/openai/gpt-4`
- Any model from OpenRouter catalog
-
-## How It Works
-
-### Session State
- Model and prompt settings are stored in session state
- Each session maintains its own configuration
- Settings persist across messages in the same session
-
-### Hot-Swap Mechanism
-1. Tools update session state with new model/prompt
-2. `before_agent_callback` checks for changes
-3. If model changed, directly updates: `agent.model = LiteLlm(model=new_model)`
-4. Dynamic instruction function reads custom prompt from session state
-
-### A2A Compatibility
- Agent card at `agent.json` defines A2A metadata
- Served at `/a2a/litellm_agent` endpoint
- Compatible with A2A client protocol
-
-## Example Usage
-
-### Interactive Session
-
-```python
-from a2a.client import A2AClient
-import asyncio
-
-async def chat():
-    client = A2AClient("http://localhost:8000/a2a/litellm_agent")
-    context_id = "my-session-123"
-
-    # Start with default model
-    async for msg in client.send_message("Hello!", context_id=context_id):
-        print(msg)
-
-    # Switch to GPT-4
-    async for msg in client.send_message(
-        "Use set_model with model gpt-4o and provider openai",
-        context_id=context_id
-    ):
-        print(msg)
-
-    # Continue with new model
-    async for msg in client.send_message(
-        "Help me write a function",
-        context_id=context_id
-    ):
-        print(msg)
-
-asyncio.run(chat())
-```
-
-## Troubleshooting
-
-### Model Not Found
- Ensure API key for the provider is set in `.env`
- Check model name is correct for the provider
- Verify LiteLLM supports the model (https://docs.litellm.ai/docs/providers)
-
-### Connection Refused
- Ensure the agent is running (`adk api_server --a2a task_agent`)
- Check the port matches (default: 8000)
- Verify no firewall blocking localhost
-
-### Hot-Swap Not Working
- Check that you're using the same `context_id` across messages
- Ensure the tool is being called (not just asked to switch)
- Look for `🔄 Hot-swapped model to:` in server logs
-
-## Development
-
-### Adding New Tools
-
-```python
-async def my_tool(tool_ctx: ToolContext, param: str) -> str:
-    """Your tool description."""
-    # Access session state
-    tool_ctx.state["my_key"] = "my_value"
-    return "Tool result"
-
-# Add to agent
-root_agent = LlmAgent(
-    # ...
-    tools=[set_model, set_prompt, get_config, my_tool],
-)
-```
-
-### Modifying Callbacks
-
-```python
-async def after_model_callback(
-    callback_context: CallbackContext,
-    llm_response: LlmResponse
-) -> Optional[LlmResponse]:
-    """Modify response after model generates it."""
-    # Your logic here
-    return llm_response
-```
-
-## License
-
-Apache 2.0
--- a/ai/agents/task_agent/init.py
+++ b/ai/agents/task_agent/init.py
@@ -1,5 +0,0 @@
-"""Package entry point for the ADK-formatted hot swap agent."""
-
-from .litellm_agent.agent import root_agent
-
-__all__ = ["root_agent"]
--- a/ai/agents/task_agent/a2a_hot_swap.py
+++ b/ai/agents/task_agent/a2a_hot_swap.py
@@ -1,224 +0,0 @@
-#!/usr/bin/env python3
-"""Minimal A2A client utility for hot-swapping LiteLLM model/prompt."""
-
-from __future__ import annotations
-
-import argparse
-import asyncio
-from typing import Optional
-from uuid import uuid4
-
-import httpx
-from a2a.client import A2AClient
-from a2a.client.errors import A2AClientHTTPError
-from a2a.types import (
-    JSONRPCErrorResponse,
-    Message,
-    MessageSendConfiguration,
-    MessageSendParams,
-    Part,
-    Role,
-    SendMessageRequest,
-    SendStreamingMessageRequest,
-    Task,
-    TaskArtifactUpdateEvent,
-    TaskStatusUpdateEvent,
-    TextPart,
-)
-
-from litellm_agent.control import (
-    HotSwapCommand,
-    build_control_message,
-    parse_model_spec,
-    serialize_model_spec,
-)
-
-DEFAULT_URL = "http://localhost:8000/a2a/litellm_agent"
-
-
-async def _collect_text(client: A2AClient, message: str, context_id: str) -> str:
-    """Send a message and collect streamed agent text into a single string."""
-
-    params = MessageSendParams(
-        configuration=MessageSendConfiguration(blocking=True),
-        message=Message(
-            context_id=context_id,
-            message_id=str(uuid4()),
-            role=Role.user,
-            parts=[Part(root=TextPart(text=message))],
-        ),
-    )
-
-    stream_request = SendStreamingMessageRequest(id=str(uuid4()), params=params)
-    buffer: list[str] = []
-    try:
-        async for response in client.send_message_streaming(stream_request):
-            root = response.root
-            if isinstance(root, JSONRPCErrorResponse):
-                raise RuntimeError(f"A2A error: {root.error}")
-
-            payload = root.result
-            buffer.extend(_extract_text(payload))
-    except A2AClientHTTPError as exc:
-        if "text/event-stream" not in str(exc):
-            raise
-
-        send_request = SendMessageRequest(id=str(uuid4()), params=params)
-        response = await client.send_message(send_request)
-        root = response.root
-        if isinstance(root, JSONRPCErrorResponse):
-            raise RuntimeError(f"A2A error: {root.error}")
-        payload = root.result
-        buffer.extend(_extract_text(payload))
-
-    if buffer:
-        buffer = list(dict.fromkeys(buffer))
-    return "\n".join(buffer).strip()
-
-
-def _extract_text(
-    result: Message | Task | TaskStatusUpdateEvent | TaskArtifactUpdateEvent,
-) -> list[str]:
-    texts: list[str] = []
-    if isinstance(result, Message):
-        if result.role is Role.agent:
-            for part in result.parts:
-                root_part = part.root
-                text = getattr(root_part, "text", None)
-                if text:
-                    texts.append(text)
-    elif isinstance(result, Task) and result.history:
-        for msg in result.history:
-            if msg.role is Role.agent:
-                for part in msg.parts:
-                    root_part = part.root
-                    text = getattr(root_part, "text", None)
-                    if text:
-                        texts.append(text)
-    elif isinstance(result, TaskStatusUpdateEvent):
-        message = result.status.message
-        if message:
-            texts.extend(_extract_text(message))
-    elif isinstance(result, TaskArtifactUpdateEvent):
-        artifact = result.artifact
-        if artifact and artifact.parts:
-            for part in artifact.parts:
-                root_part = part.root
-                text = getattr(root_part, "text", None)
-                if text:
-                    texts.append(text)
-    return texts
-
-
-def _split_model_args(model_args: Optional[list[str]]) -> tuple[Optional[str], Optional[str]]:
-    if not model_args:
-        return None, None
-
-    if len(model_args) == 1:
-        return model_args[0], None
-
-    provider = model_args[0]
-    model = " ".join(model_args[1:])
-    return model, provider
-
-
-async def hot_swap(
-    url: str,
-    *,
-    model_args: Optional[list[str]],
-    provider: Optional[str],
-    prompt: Optional[str],
-    message: Optional[str],
-    show_config: bool,
-    context_id: Optional[str],
-    timeout: float,
-) -> None:
-    """Execute the requested hot-swap operations against the A2A endpoint."""
-
-    timeout_config = httpx.Timeout(timeout)
-    async with httpx.AsyncClient(timeout=timeout_config) as http_client:
-        client = A2AClient(url=url, httpx_client=http_client)
-        session_id = context_id or str(uuid4())
-
-        model, derived_provider = _split_model_args(model_args)
-
-        if model:
-            spec = parse_model_spec(model, provider=provider or derived_provider)
-            payload = serialize_model_spec(spec)
-            control_msg = build_control_message(HotSwapCommand.MODEL, payload)
-            result = await _collect_text(client, control_msg, session_id)
-            print(f"Model response: {result or '(no response)'}")
-
-        if prompt is not None:
-            control_msg = build_control_message(HotSwapCommand.PROMPT, prompt)
-            result = await _collect_text(client, control_msg, session_id)
-            print(f"Prompt response: {result or '(no response)'}")
-
-        if show_config:
-            control_msg = build_control_message(HotSwapCommand.GET_CONFIG)
-            result = await _collect_text(client, control_msg, session_id)
-            print(f"Config:\n{result or '(no response)'}")
-
-        if message:
-            result = await _collect_text(client, message, session_id)
-            print(f"Message response: {result or '(no response)'}")
-
-        print(f"Context ID: {session_id}")
-
-
-def parse_args() -> argparse.Namespace:
-    parser = argparse.ArgumentParser(description=__doc__)
-    parser.add_argument(
-        "--url",
-        default=DEFAULT_URL,
-        help=f"A2A endpoint for the agent (default: {DEFAULT_URL})",
-    )
-    parser.add_argument(
-        "--model",
-        nargs="+",
-        help="LiteLLM model spec: either 'provider/model' or '<provider> <model>'.",
-    )
-    parser.add_argument(
-        "--provider",
-        help="Optional LiteLLM provider when --model lacks a prefix.")
-    parser.add_argument(
-        "--prompt",
-        help="Set the system prompt (omit to leave unchanged; empty string clears it).",
-    )
-    parser.add_argument(
-        "--message",
-        help="Send an additional user message after the swaps complete.")
-    parser.add_argument(
-        "--config",
-        action="store_true",
-        help="Print the agent configuration after performing swaps.")
-    parser.add_argument(
-        "--context",
-        help="Optional context/session identifier to reuse across calls.")
-    parser.add_argument(
-        "--timeout",
-        type=float,
-        default=60.0,
-        help="Request timeout (seconds) for A2A calls (default: 60).",
-    )
-    return parser.parse_args()
-
-
-def main() -> None:
-    args = parse_args()
-    asyncio.run(
-        hot_swap(
-            args.url,
-            model_args=args.model,
-            provider=args.provider,
-            prompt=args.prompt,
-            message=args.message,
-            show_config=args.config,
-            context_id=args.context,
-            timeout=args.timeout,
-        )
-    )
-
-
-if __name__ == "__main__":
-    main()
--- a/ai/agents/task_agent/docker-compose.yml
+++ b/ai/agents/task_agent/docker-compose.yml
@@ -1,24 +0,0 @@
-version: '3.8'
-
-services:
-  task-agent:
-    build:
-      context: .
-      dockerfile: Dockerfile
-    container_name: fuzzforge-task-agent
-    ports:
-      - "10900:8000"
-    env_file:
-      - ../../../volumes/env/.env
-    environment:
-      - PORT=8000
-      - PYTHONUNBUFFERED=1
-    volumes:
-      # Mount volumes/env for runtime config access
-      - ../../../volumes/env:/app/config:ro
-    restart: unless-stopped
-    healthcheck:
-      test: ["CMD", "curl", "-f", "http://localhost:8000/health"]
-      interval: 30s
-      timeout: 10s
-      retries: 3
--- a/ai/agents/task_agent/docker-entrypoint.sh
+++ b/ai/agents/task_agent/docker-entrypoint.sh
@@ -1,31 +0,0 @@
-#!/bin/bash
-set -e
-
-# Wait for .env file to have keys (max 30 seconds)
-echo "[task-agent] Waiting for virtual keys to be provisioned..."
-for i in $(seq 1 30); do
-    if [ -f /app/config/.env ]; then
-        # Check if TASK_AGENT_API_KEY has a value (not empty)
-        KEY=$(grep -E '^TASK_AGENT_API_KEY=' /app/config/.env | cut -d'=' -f2)
-        if [ -n "$KEY" ] && [ "$KEY" != "" ]; then
-            echo "[task-agent] Virtual keys found, loading environment..."
-            # Export keys from .env file
-            export TASK_AGENT_API_KEY="$KEY"
-            export OPENAI_API_KEY=$(grep -E '^OPENAI_API_KEY=' /app/config/.env | cut -d'=' -f2)
-            export FF_LLM_PROXY_BASE_URL=$(grep -E '^FF_LLM_PROXY_BASE_URL=' /app/config/.env | cut -d'=' -f2)
-            echo "[task-agent] Loaded TASK_AGENT_API_KEY: ${TASK_AGENT_API_KEY:0:15}..."
-            echo "[task-agent] Loaded FF_LLM_PROXY_BASE_URL: $FF_LLM_PROXY_BASE_URL"
-            break
-        fi
-    fi
-    echo "[task-agent] Keys not ready yet, waiting... ($i/30)"
-    sleep 1
-done
-
-if [ -z "$TASK_AGENT_API_KEY" ]; then
-    echo "[task-agent] ERROR: Virtual keys were not provisioned within 30 seconds!"
-    exit 1
-fi
-
-echo "[task-agent] Starting uvicorn..."
-exec "$@"
--- a/ai/agents/task_agent/litellm_agent/init.py
+++ b/ai/agents/task_agent/litellm_agent/init.py
@@ -1,55 +0,0 @@
-"""LiteLLM hot-swap agent package exports."""
-
-from .agent import root_agent
-from .callbacks import (
-    before_agent_callback,
-    before_model_callback,
-    provide_instruction,
-)
-from .config import (
-    AGENT_DESCRIPTION,
-    AGENT_NAME,
-    CONTROL_PREFIX,
-    DEFAULT_MODEL,
-    DEFAULT_PROVIDER,
-    STATE_MODEL_KEY,
-    STATE_PROVIDER_KEY,
-    STATE_PROMPT_KEY,
-)
-from .control import (
-    HotSwapCommand,
-    ModelSpec,
-    build_control_message,
-    parse_control_message,
-    parse_model_spec,
-    serialize_model_spec,
-)
-from .state import HotSwapState, apply_state_to_agent
-from .tools import HOTSWAP_TOOLS, get_config, set_model, set_prompt
-
-__all__ = [
-    "root_agent",
-    "before_agent_callback",
-    "before_model_callback",
-    "provide_instruction",
-    "AGENT_DESCRIPTION",
-    "AGENT_NAME",
-    "CONTROL_PREFIX",
-    "DEFAULT_MODEL",
-    "DEFAULT_PROVIDER",
-    "STATE_MODEL_KEY",
-    "STATE_PROVIDER_KEY",
-    "STATE_PROMPT_KEY",
-    "HotSwapCommand",
-    "ModelSpec",
-    "HotSwapState",
-    "apply_state_to_agent",
-    "build_control_message",
-    "parse_control_message",
-    "parse_model_spec",
-    "serialize_model_spec",
-    "HOTSWAP_TOOLS",
-    "get_config",
-    "set_model",
-    "set_prompt",
-]
--- a/ai/agents/task_agent/litellm_agent/agent.json
+++ b/ai/agents/task_agent/litellm_agent/agent.json
@@ -1,24 +0,0 @@
-{
-  "name": "litellm_agent",
-  "description": "A flexible AI agent powered by LiteLLM with hot-swappable models from OpenRouter and other providers",
-  "url": "http://localhost:8000",
-  "version": "1.0.0",
-  "defaultInputModes": ["text/plain"],
-  "defaultOutputModes": ["text/plain"],
-  "capabilities": {
-    "streaming": true
-  },
-  "skills": [
-    {
-      "id": "litellm-general-purpose",
-      "name": "General Purpose AI Assistant",
-      "description": "A flexible AI assistant that can help with various tasks using any LiteLLM-supported model. Supports runtime model and prompt hot-swapping.",
-      "tags": ["ai", "assistant", "litellm", "flexible", "hot-swap"],
-      "examples": [
-        "Help me write a Python function",
-        "Explain quantum computing",
-        "Switch to Claude model and help me code"
-      ]
-    }
-  ]
-}
--- a/ai/agents/task_agent/litellm_agent/agent.py
+++ b/ai/agents/task_agent/litellm_agent/agent.py
@@ -1,29 +0,0 @@
-"""Root agent definition for the LiteLLM hot-swap shell."""
-
-from __future__ import annotations
-
-from google.adk.agents import Agent
-
-from .callbacks import (
-    before_agent_callback,
-    before_model_callback,
-    provide_instruction,
-)
-from .config import AGENT_DESCRIPTION, AGENT_NAME, DEFAULT_MODEL, DEFAULT_PROVIDER
-from .state import HotSwapState
-from .tools import HOTSWAP_TOOLS
-
-_initial_state = HotSwapState(model=DEFAULT_MODEL, provider=DEFAULT_PROVIDER)
-
-root_agent = Agent(
-    name=AGENT_NAME,
-    model=_initial_state.instantiate_llm(),
-    description=AGENT_DESCRIPTION,
-    instruction=provide_instruction,
-    tools=HOTSWAP_TOOLS,
-    before_agent_callback=before_agent_callback,
-    before_model_callback=before_model_callback,
-)
-
-
-__all__ = ["root_agent"]
--- a/ai/agents/task_agent/litellm_agent/callbacks.py
+++ b/ai/agents/task_agent/litellm_agent/callbacks.py
@@ -1,137 +0,0 @@
-"""Callbacks and instruction providers for the LiteLLM hot-swap agent."""
-
-from __future__ import annotations
-
-import logging
-from typing import Optional
-
-from google.adk.agents.callback_context import CallbackContext
-from google.adk.agents.readonly_context import ReadonlyContext
-from google.adk.models.llm_request import LlmRequest
-from google.genai import types
-
-from .config import CONTROL_PREFIX, DEFAULT_MODEL
-from .control import HotSwapCommand, parse_control_message, parse_model_spec
-from .prompts import BASE_INSTRUCTION
-from .state import HotSwapState, apply_state_to_agent
-
-_LOGGER = logging.getLogger(__name__)
-
-
-def provide_instruction(ctx: ReadonlyContext | None = None) -> str:
-    """Compose the system instruction using the stored state."""
-
-    state_mapping = getattr(ctx, "state", None)
-    state = HotSwapState.from_mapping(state_mapping)
-    prompt = state.prompt or BASE_INSTRUCTION
-    return f"{prompt}\n\nActive model: {state.display_model}"
-
-
-def _ensure_state(callback_context: CallbackContext) -> HotSwapState:
-    state = HotSwapState.from_mapping(callback_context.state)
-    state.persist(callback_context.state)
-    return state
-
-
-def _session_id(callback_context: CallbackContext) -> str:
-    session = getattr(callback_context, "session", None)
-    if session is None:
-        session = getattr(callback_context._invocation_context, "session", None)
-    return getattr(session, "id", "unknown-session")
-
-
-async def before_model_callback(
-    callback_context: CallbackContext,
-    llm_request: LlmRequest,
-) -> Optional[types.Content]:
-    """Ensure outgoing requests use the active model from session state."""
-
-    state = _ensure_state(callback_context)
-    try:
-        apply_state_to_agent(callback_context._invocation_context, state)
-    except Exception:  # pragma: no cover - defensive logging
-        _LOGGER.exception(
-            "Failed to apply LiteLLM model '%s' (provider=%s) for session %s",
-            state.model,
-            state.provider,
-            callback_context.session.id,
-        )
-    llm_request.model = state.model or DEFAULT_MODEL
-    return None
-
-
-async def before_agent_callback(
-    callback_context: CallbackContext,
-) -> Optional[types.Content]:
-    """Intercept hot-swap control messages and update session state."""
-
-    user_content = callback_context.user_content
-    if not user_content or not user_content.parts:
-        return None
-
-    first_part = user_content.parts[0]
-    message_text = (first_part.text or "").strip()
-    if not message_text.startswith(CONTROL_PREFIX):
-        return None
-
-    parsed = parse_control_message(message_text)
-    if not parsed:
-        return None
-
-    command, payload = parsed
-    state = _ensure_state(callback_context)
-
-    if command is HotSwapCommand.MODEL:
-        if not payload:
-            return _render("❌ Missing model specification for hot-swap.")
-        try:
-            spec = parse_model_spec(payload)
-        except ValueError as exc:
-            return _render(f"❌ Invalid model specification: {exc}")
-
-        state.model = spec.model
-        state.provider = spec.provider
-        state.persist(callback_context.state)
-        try:
-            apply_state_to_agent(callback_context._invocation_context, state)
-        except Exception:  # pragma: no cover - defensive logging
-            _LOGGER.exception(
-                "Failed to apply LiteLLM model '%s' (provider=%s) for session %s",
-                state.model,
-                state.provider,
-                _session_id(callback_context),
-            )
-        _LOGGER.info(
-            "Hot-swapped model to %s (provider=%s, session=%s)",
-            state.model,
-            state.provider,
-            _session_id(callback_context),
-        )
-        label = state.display_model
-        return _render(f"✅ Model switched to: {label}")
-
-    if command is HotSwapCommand.PROMPT:
-        prompt_value = (payload or "").strip()
-        state.prompt = prompt_value or None
-        state.persist(callback_context.state)
-        if state.prompt:
-            _LOGGER.info(
-                "Updated prompt for session %s", _session_id(callback_context)
-            )
-            return _render(
-                "✅ System prompt updated. This change takes effect immediately."
-            )
-        return _render("✅ System prompt cleared. Reverting to default instruction.")
-
-    if command is HotSwapCommand.GET_CONFIG:
-        return _render(state.describe())
-
-    expected = ", ".join(HotSwapCommand.choices())
-    return _render(
-        "⚠️ Unsupported hot-swap command. Available verbs: "
-        f"{expected}."
-    )
-
-
-def _render(message: str) -> types.ModelContent:
-    return types.ModelContent(parts=[types.Part(text=message)])
--- a/ai/agents/task_agent/litellm_agent/config.py
+++ b/ai/agents/task_agent/litellm_agent/config.py
@@ -1,35 +0,0 @@
-"""Configuration constants for the LiteLLM hot-swap agent."""
-
-from __future__ import annotations
-
-import os
-
-
-def _normalize_proxy_base_url(raw_value: str | None) -> str | None:
-    if not raw_value:
-        return None
-    cleaned = raw_value.strip()
-    if not cleaned:
-        return None
-    # Avoid double slashes in downstream requests
-    return cleaned.rstrip("/")
-
-AGENT_NAME = "litellm_agent"
-AGENT_DESCRIPTION = (
-    "A LiteLLM-backed shell that exposes hot-swappable model and prompt controls."
-)
-
-DEFAULT_MODEL = os.getenv("LITELLM_MODEL", "openai/gpt-4o-mini")
-DEFAULT_PROVIDER = os.getenv("LITELLM_PROVIDER") or None
-PROXY_BASE_URL = _normalize_proxy_base_url(
-    os.getenv("FF_LLM_PROXY_BASE_URL")
-    or os.getenv("LITELLM_API_BASE")
-    or os.getenv("LITELLM_BASE_URL")
-)
-
-STATE_PREFIX = "app:litellm_agent/"
-STATE_MODEL_KEY = f"{STATE_PREFIX}model"
-STATE_PROVIDER_KEY = f"{STATE_PREFIX}provider"
-STATE_PROMPT_KEY = f"{STATE_PREFIX}prompt"
-
-CONTROL_PREFIX = "[HOTSWAP"
--- a/ai/agents/task_agent/litellm_agent/control.py
+++ b/ai/agents/task_agent/litellm_agent/control.py
@@ -1,99 +0,0 @@
-"""Control message helpers for hot-swapping model and prompt."""
-
-from __future__ import annotations
-
-import re
-from dataclasses import dataclass
-from enum import Enum
-from typing import Optional, Tuple
-
-from .config import DEFAULT_PROVIDER
-
-
-class HotSwapCommand(str, Enum):
-    """Supported control verbs embedded in user messages."""
-
-    MODEL = "MODEL"
-    PROMPT = "PROMPT"
-    GET_CONFIG = "GET_CONFIG"
-
-    @classmethod
-    def choices(cls) -> tuple[str, ...]:
-        return tuple(item.value for item in cls)
-
-
-@dataclass(frozen=True)
-class ModelSpec:
-    """Represents a LiteLLM model and optional provider."""
-
-    model: str
-    provider: Optional[str] = None
-
-
-_COMMAND_PATTERN = re.compile(
-    r"^\[HOTSWAP:(?P<verb>[A-Z_]+)(?::(?P<payload>.*))?\]$",
-)
-
-
-def parse_control_message(text: str) -> Optional[Tuple[HotSwapCommand, Optional[str]]]:
-    """Return hot-swap command tuple when the string matches the control format."""
-
-    match = _COMMAND_PATTERN.match(text.strip())
-    if not match:
-        return None
-
-    verb = match.group("verb")
-    if verb not in HotSwapCommand.choices():
-        return None
-
-    payload = match.group("payload")
-    return HotSwapCommand(verb), payload if payload else None
-
-
-def build_control_message(command: HotSwapCommand, payload: Optional[str] = None) -> str:
-    """Serialise a control command for downstream clients."""
-
-    if command not in HotSwapCommand:
-        raise ValueError(f"Unsupported hot-swap command: {command}")
-    if payload is None or payload == "":
-        return f"[HOTSWAP:{command.value}]"
-    return f"[HOTSWAP:{command.value}:{payload}]"
-
-
-def parse_model_spec(model: str, provider: Optional[str] = None) -> ModelSpec:
-    """Parse model/provider inputs into a structured ModelSpec."""
-
-    candidate = (model or "").strip()
-    if not candidate:
-        raise ValueError("Model name cannot be empty")
-
-    if provider:
-        provider_clean = provider.strip()
-        if not provider_clean:
-            raise ValueError("Provider cannot be empty when supplied")
-        if "/" in candidate:
-            raise ValueError(
-                "Provide either provider/model or use provider argument, not both",
-            )
-        return ModelSpec(model=candidate, provider=provider_clean)
-
-    if "/" in candidate:
-        provider_part, model_part = candidate.split("/", 1)
-        provider_part = provider_part.strip()
-        model_part = model_part.strip()
-        if not provider_part or not model_part:
-            raise ValueError("Model spec must include provider and model when using '/' format")
-        return ModelSpec(model=model_part, provider=provider_part)
-
-    if DEFAULT_PROVIDER:
-        return ModelSpec(model=candidate, provider=DEFAULT_PROVIDER.strip())
-
-    return ModelSpec(model=candidate, provider=None)
-
-
-def serialize_model_spec(spec: ModelSpec) -> str:
-    """Render a ModelSpec to provider/model string for control messages."""
-
-    if spec.provider:
-        return f"{spec.provider}/{spec.model}"
-    return spec.model
--- a/ai/agents/task_agent/litellm_agent/prompts.py
+++ b/ai/agents/task_agent/litellm_agent/prompts.py
@@ -1,9 +0,0 @@
-"""System prompt templates for the LiteLLM agent."""
-
-BASE_INSTRUCTION = (
-    "You are a focused orchestration layer that relays between the user and a"
-    " LiteLLM managed model."
-    "\n- Keep answers concise and actionable."
-    "\n- Prefer plain language; reveal intermediate reasoning only when helpful."
-    "\n- Surface any tool results clearly with short explanations."
-)
--- a/ai/agents/task_agent/litellm_agent/state.py
+++ b/ai/agents/task_agent/litellm_agent/state.py
@@ -1,254 +0,0 @@
-"""Session state utilities for the LiteLLM hot-swap agent."""
-
-from __future__ import annotations
-
-from dataclasses import dataclass
-import os
-from typing import Any, Mapping, MutableMapping, Optional
-
-import httpx
-
-from .config import (
-    DEFAULT_MODEL,
-    DEFAULT_PROVIDER,
-    PROXY_BASE_URL,
-    STATE_MODEL_KEY,
-    STATE_PROMPT_KEY,
-    STATE_PROVIDER_KEY,
-)
-
-
-@dataclass(slots=True)
-class HotSwapState:
-    """Lightweight view of the hot-swap session state."""
-
-    model: str = DEFAULT_MODEL
-    provider: Optional[str] = None
-    prompt: Optional[str] = None
-
-    @classmethod
-    def from_mapping(cls, mapping: Optional[Mapping[str, Any]]) -> "HotSwapState":
-        if not mapping:
-            return cls()
-
-        raw_model = mapping.get(STATE_MODEL_KEY, DEFAULT_MODEL)
-        raw_provider = mapping.get(STATE_PROVIDER_KEY)
-        raw_prompt = mapping.get(STATE_PROMPT_KEY)
-
-        model = raw_model.strip() if isinstance(raw_model, str) else DEFAULT_MODEL
-        provider = raw_provider.strip() if isinstance(raw_provider, str) else None
-        if not provider and DEFAULT_PROVIDER:
-            provider = DEFAULT_PROVIDER.strip() or None
-        prompt = raw_prompt.strip() if isinstance(raw_prompt, str) else None
-        return cls(
-            model=model or DEFAULT_MODEL,
-            provider=provider or None,
-            prompt=prompt or None,
-        )
-
-    def persist(self, store: MutableMapping[str, object]) -> None:
-        store[STATE_MODEL_KEY] = self.model
-        if self.provider:
-            store[STATE_PROVIDER_KEY] = self.provider
-        else:
-            store[STATE_PROVIDER_KEY] = None
-        store[STATE_PROMPT_KEY] = self.prompt
-
-    def describe(self) -> str:
-        prompt_value = self.prompt if self.prompt else "(default prompt)"
-        provider_value = self.provider if self.provider else "(default provider)"
-        return (
-            "📊 Current Configuration\n"
-            "━━━━━━━━━━━━━━━━━━━━━━\n"
-            f"Model: {self.model}\n"
-            f"Provider: {provider_value}\n"
-            f"System Prompt: {prompt_value}\n"
-            "━━━━━━━━━━━━━━━━━━━━━━"
-        )
-
-    def instantiate_llm(self):
-        """Create a LiteLlm instance for the current state."""
-
-        from google.adk.models.lite_llm import LiteLlm  # Lazy import to avoid cycle
-        from google.adk.models.lite_llm import LiteLLMClient
-        from litellm.types.utils import Choices, Message, ModelResponse, Usage
-
-        kwargs = {"model": self.model}
-        if self.provider:
-            kwargs["custom_llm_provider"] = self.provider
-        if PROXY_BASE_URL:
-            provider = (self.provider or DEFAULT_PROVIDER or "").lower()
-            if provider and provider != "openai":
-                kwargs["api_base"] = f"{PROXY_BASE_URL.rstrip('/')}/{provider}"
-            else:
-                kwargs["api_base"] = PROXY_BASE_URL
-        kwargs.setdefault("api_key", os.environ.get("TASK_AGENT_API_KEY") or os.environ.get("OPENAI_API_KEY"))
-
-        provider = (self.provider or DEFAULT_PROVIDER or "").lower()
-        model_suffix = self.model.split("/", 1)[-1]
-        use_responses = provider == "openai" and (
-            model_suffix.startswith("gpt-5") or model_suffix.startswith("o1")
-        )
-        if use_responses:
-            kwargs.setdefault("use_responses_api", True)
-
-        llm = LiteLlm(**kwargs)
-
-        if use_responses and PROXY_BASE_URL:
-
-            class _ResponsesAwareClient(LiteLLMClient):
-                def __init__(self, base_client: LiteLLMClient, api_base: str, api_key: str):
-                    self._base_client = base_client
-                    self._api_base = api_base.rstrip("/")
-                    self._api_key = api_key
-
-                async def acompletion(self, model, messages, tools, **kwargs):  # type: ignore[override]
-                    use_responses_api = kwargs.pop("use_responses_api", False)
-                    if not use_responses_api:
-                        return await self._base_client.acompletion(
-                            model=model,
-                            messages=messages,
-                            tools=tools,
-                            **kwargs,
-                        )
-
-                    resolved_model = model
-                    if "/" not in resolved_model:
-                        resolved_model = f"openai/{resolved_model}"
-
-                    payload = {
-                        "model": resolved_model,
-                        "input": _messages_to_responses_input(messages),
-                    }
-
-                    timeout = kwargs.get("timeout", 60)
-                    headers = {
-                        "Authorization": f"Bearer {self._api_key}",
-                        "Content-Type": "application/json",
-                    }
-
-                    async with httpx.AsyncClient(timeout=timeout) as client:
-                        response = await client.post(
-                            f"{self._api_base}/v1/responses",
-                            json=payload,
-                            headers=headers,
-                        )
-                        try:
-                            response.raise_for_status()
-                        except httpx.HTTPStatusError as exc:
-                            text = exc.response.text
-                            raise RuntimeError(
-                                f"LiteLLM responses request failed: {text}"
-                            ) from exc
-                        data = response.json()
-
-                    text_output = _extract_output_text(data)
-                    usage = data.get("usage", {})
-
-                    return ModelResponse(
-                        id=data.get("id"),
-                        model=model,
-                        choices=[
-                            Choices(
-                                finish_reason="stop",
-                                index=0,
-                                message=Message(role="assistant", content=text_output),
-                                provider_specific_fields={"bifrost_response": data},
-                            )
-                        ],
-                        usage=Usage(
-                            prompt_tokens=usage.get("input_tokens"),
-                            completion_tokens=usage.get("output_tokens"),
-                            reasoning_tokens=usage.get("output_tokens_details", {}).get(
-                                "reasoning_tokens"
-                            ),
-                            total_tokens=usage.get("total_tokens"),
-                        ),
-                    )
-
-            llm.llm_client = _ResponsesAwareClient(
-                llm.llm_client,
-                PROXY_BASE_URL,
-                os.environ.get("TASK_AGENT_API_KEY") or os.environ.get("OPENAI_API_KEY", ""),
-            )
-
-        return llm
-
-    @property
-    def display_model(self) -> str:
-        if self.provider:
-            return f"{self.provider}/{self.model}"
-        return self.model
-
-
-def apply_state_to_agent(invocation_context, state: HotSwapState) -> None:
-    """Update the provided agent with a LiteLLM instance matching state."""
-
-    agent = invocation_context.agent
-    agent.model = state.instantiate_llm()
-
-
-def _messages_to_responses_input(messages: list[dict[str, Any]]) -> list[dict[str, Any]]:
-    inputs: list[dict[str, Any]] = []
-    for message in messages:
-        role = message.get("role", "user")
-        content = message.get("content", "")
-        text_segments: list[str] = []
-
-        if isinstance(content, list):
-            for item in content:
-                if isinstance(item, dict):
-                    text = item.get("text") or item.get("content")
-                    if text:
-                        text_segments.append(str(text))
-                elif isinstance(item, str):
-                    text_segments.append(item)
-        elif isinstance(content, str):
-            text_segments.append(content)
-
-        text = "\n".join(segment.strip() for segment in text_segments if segment)
-        if not text:
-            continue
-
-        entry_type = "input_text"
-        if role == "assistant":
-            entry_type = "output_text"
-
-        inputs.append(
-            {
-                "role": role,
-                "content": [
-                    {
-                        "type": entry_type,
-                        "text": text,
-                    }
-                ],
-            }
-        )
-
-    if not inputs:
-        inputs.append(
-            {
-                "role": "user",
-                "content": [
-                    {
-                        "type": "input_text",
-                        "text": "",
-                    }
-                ],
-            }
-        )
-    return inputs
-
-
-def _extract_output_text(response_json: dict[str, Any]) -> str:
-    outputs = response_json.get("output", [])
-    collected: list[str] = []
-    for item in outputs:
-        if isinstance(item, dict) and item.get("type") == "message":
-            for part in item.get("content", []):
-                if isinstance(part, dict) and part.get("type") == "output_text":
-                    text = part.get("text", "")
-                    if text:
-                        collected.append(str(text))
-    return "\n\n".join(collected).strip()
--- a/ai/agents/task_agent/litellm_agent/tools.py
+++ b/ai/agents/task_agent/litellm_agent/tools.py
@@ -1,64 +0,0 @@
-"""Tool definitions exposed to the LiteLLM agent."""
-
-from __future__ import annotations
-
-from typing import Optional
-
-from google.adk.tools import FunctionTool, ToolContext
-
-from .control import parse_model_spec
-from .state import HotSwapState, apply_state_to_agent
-
-
-async def set_model(
-    model: str,
-    *,
-    provider: Optional[str] = None,
-    tool_context: ToolContext,
-) -> str:
-    """Hot-swap the active LiteLLM model for this session."""
-
-    spec = parse_model_spec(model, provider=provider)
-    state = HotSwapState.from_mapping(tool_context.state)
-    state.model = spec.model
-    state.provider = spec.provider
-    state.persist(tool_context.state)
-    try:
-        apply_state_to_agent(tool_context._invocation_context, state)
-    except Exception as exc:  # pragma: no cover - defensive reporting
-        return f"❌ Failed to apply model '{state.display_model}': {exc}"
-    return f"✅ Model switched to: {state.display_model}"
-
-
-async def set_prompt(prompt: str, *, tool_context: ToolContext) -> str:
-    """Update or clear the system prompt used for this session."""
-
-    state = HotSwapState.from_mapping(tool_context.state)
-    prompt_value = prompt.strip()
-    state.prompt = prompt_value or None
-    state.persist(tool_context.state)
-    if state.prompt:
-        return "✅ System prompt updated. This change takes effect immediately."
-    return "✅ System prompt cleared. Reverting to default instruction."
-
-
-async def get_config(*, tool_context: ToolContext) -> str:
-    """Return a summary of the current model and prompt configuration."""
-
-    state = HotSwapState.from_mapping(tool_context.state)
-    return state.describe()
-
-
-HOTSWAP_TOOLS = [
-    FunctionTool(set_model),
-    FunctionTool(set_prompt),
-    FunctionTool(get_config),
-]
-
-
-__all__ = [
-    "set_model",
-    "set_prompt",
-    "get_config",
-    "HOTSWAP_TOOLS",
-]
--- a/ai/agents/task_agent/main.py
+++ b/ai/agents/task_agent/main.py
@@ -1,13 +0,0 @@
-"""ASGI entrypoint for containerized deployments."""
-
-from pathlib import Path
-
-from google.adk.cli.fast_api import get_fast_api_app
-
-AGENT_DIR = Path(__file__).resolve().parent
-
-app = get_fast_api_app(
-    agents_dir=str(AGENT_DIR),
-    web=False,
-    a2a=True,
-)
--- a/ai/agents/task_agent/requirements.txt
+++ b/ai/agents/task_agent/requirements.txt
@@ -1,4 +0,0 @@
-google-adk
-a2a-sdk[all]
-litellm
-python-dotenv
--- a/ai/proxy/README.md
+++ b/ai/proxy/README.md
@@ -1,5 +0,0 @@
-# LLM Proxy Integrations
-
-This directory contains vendor source trees that were vendored only for reference when integrating LLM gateways. The actual FuzzForge deployment uses the official Docker images for each project.
-
-See `docs/docs/how-to/llm-proxy.md` for up-to-date instructions on running the proxy services and issuing keys for the agents.
--- a/ai/pyproject.toml
+++ b/ai/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "fuzzforge-ai"
-version = "0.7.3"
+version = "0.6.0"
 description = "FuzzForge AI orchestration module"
 readme = "README.md"
 requires-python = ">=3.11"
--- a/ai/src/fuzzforge_ai/init.py
+++ b/ai/src/fuzzforge_ai/init.py
@@ -3,11 +3,6 @@ FuzzForge AI Module - Agent-to-Agent orchestration system

 This module integrates the fuzzforge_ai components into FuzzForge,
 providing intelligent AI agent capabilities for security analysis.
-
-Usage:
-    from fuzzforge_ai.a2a_wrapper import send_agent_task
-    from fuzzforge_ai.agent import FuzzForgeAgent
-    from fuzzforge_ai.config_manager import ConfigManager
 """
 # Copyright (c) 2025 FuzzingLabs
 #
@@ -21,4 +16,9 @@ Usage:
 # Additional attribution and requirements are provided in the NOTICE file.


-__version__ = "0.7.3"
+__version__ = "0.6.0"
+
+from .agent import FuzzForgeAgent
+from .config_manager import ConfigManager
+
+__all__ = ['FuzzForgeAgent', 'ConfigManager']
--- a/ai/src/fuzzforge_ai/main.py
+++ b/ai/src/fuzzforge_ai/main.py
@@ -1,4 +1,3 @@
-# ruff: noqa: E402  # Imports delayed for environment/logging setup
 """
 FuzzForge A2A Server
 Run this to expose FuzzForge as an A2A-compatible agent
@@ -79,7 +78,7 @@ def create_a2a_app():
    print("\033[0m")  # Reset color
    
    # Create A2A app
-    print("🚀 Starting FuzzForge A2A Server")
+    print(f"🚀 Starting FuzzForge A2A Server")
    print(f"   Model: {fuzzforge.model}")
    if fuzzforge.cognee_url:
        print(f"   Memory: Cognee at {fuzzforge.cognee_url}")
@@ -87,7 +86,7 @@ def create_a2a_app():
    
    app = create_custom_a2a_app(fuzzforge.adk_agent, port=port, executor=fuzzforge.executor)
    
-    print("\n✅ FuzzForge A2A Server ready!")
+    print(f"\n✅ FuzzForge A2A Server ready!")
    print(f"   Agent card: http://localhost:{port}/.well-known/agent-card.json")
    print(f"   A2A endpoint: http://localhost:{port}/")
    print(f"\n📡 Other agents can register FuzzForge at: http://localhost:{port}")
@@ -102,7 +101,7 @@ def main():
    app = create_a2a_app()
    port = int(os.getenv('FUZZFORGE_PORT', 10100))

-    print("\n🎯 Starting server with uvicorn...")
+    print(f"\n🎯 Starting server with uvicorn...")
    uvicorn.run(app, host="127.0.0.1", port=port)


--- a/ai/src/fuzzforge_ai/a2a_server.py
+++ b/ai/src/fuzzforge_ai/a2a_server.py
@@ -18,6 +18,7 @@ from typing import Optional, Union

 from starlette.applications import Starlette
 from starlette.responses import Response, FileResponse
+from starlette.routing import Route

 from google.adk.a2a.executor.a2a_agent_executor import A2aAgentExecutor
 from google.adk.a2a.utils.agent_card_builder import AgentCardBuilder
--- a/ai/src/fuzzforge_ai/a2a_wrapper.py
+++ b/ai/src/fuzzforge_ai/a2a_wrapper.py
@@ -1,288 +0,0 @@
-"""
-A2A Wrapper Module for FuzzForge
-Programmatic interface to send tasks to A2A agents with custom model/prompt/context
-"""
-# Copyright (c) 2025 FuzzingLabs
-#
-# Licensed under the Business Source License 1.1 (BSL). See the LICENSE file
-# at the root of this repository for details.
-#
-# After the Change Date (four years from publication), this version of the
-# Licensed Work will be made available under the Apache License, Version 2.0.
-# See the LICENSE-APACHE file or http://www.apache.org/licenses/LICENSE-2.0
-#
-# Additional attribution and requirements are provided in the NOTICE file.
-
-from __future__ import annotations
-
-from typing import Optional, Any
-from uuid import uuid4
-
-import httpx
-from a2a.client import A2AClient
-from a2a.client.errors import A2AClientHTTPError
-from a2a.types import (
-    JSONRPCErrorResponse,
-    Message,
-    MessageSendConfiguration,
-    MessageSendParams,
-    Part,
-    Role,
-    SendMessageRequest,
-    SendStreamingMessageRequest,
-    Task,
-    TaskArtifactUpdateEvent,
-    TaskStatusUpdateEvent,
-    TextPart,
-)
-
-
-class A2ATaskResult:
-    """Result from an A2A agent task"""
-
-    def __init__(self, text: str, context_id: str, raw_response: Any = None):
-        self.text = text
-        self.context_id = context_id
-        self.raw_response = raw_response
-
-    def __str__(self) -> str:
-        return self.text
-
-    def __repr__(self) -> str:
-        return f"A2ATaskResult(text={self.text[:50]}..., context_id={self.context_id})"
-
-
-def _build_control_message(command: str, payload: Optional[str] = None) -> str:
-    """Build a control message for hot-swapping agent configuration"""
-    if payload is None or payload == "":
-        return f"[HOTSWAP:{command}]"
-    return f"[HOTSWAP:{command}:{payload}]"
-
-
-def _extract_text(
-    result: Message | Task | TaskStatusUpdateEvent | TaskArtifactUpdateEvent,
-) -> list[str]:
-    """Extract text content from A2A response objects"""
-    texts: list[str] = []
-    if isinstance(result, Message):
-        if result.role is Role.agent:
-            for part in result.parts:
-                root_part = part.root
-                text = getattr(root_part, "text", None)
-                if text:
-                    texts.append(text)
-    elif isinstance(result, Task) and result.history:
-        for msg in result.history:
-            if msg.role is Role.agent:
-                for part in msg.parts:
-                    root_part = part.root
-                    text = getattr(root_part, "text", None)
-                    if text:
-                        texts.append(text)
-    elif isinstance(result, TaskStatusUpdateEvent):
-        message = result.status.message
-        if message:
-            texts.extend(_extract_text(message))
-    elif isinstance(result, TaskArtifactUpdateEvent):
-        artifact = result.artifact
-        if artifact and artifact.parts:
-            for part in artifact.parts:
-                root_part = part.root
-                text = getattr(root_part, "text", None)
-                if text:
-                    texts.append(text)
-    return texts
-
-
-async def _send_message(
-    client: A2AClient,
-    message: str,
-    context_id: str,
-) -> str:
-    """Send a message to the A2A agent and collect the response"""
-
-    params = MessageSendParams(
-        configuration=MessageSendConfiguration(blocking=True),
-        message=Message(
-            context_id=context_id,
-            message_id=str(uuid4()),
-            role=Role.user,
-            parts=[Part(root=TextPart(text=message))],
-        ),
-    )
-
-    stream_request = SendStreamingMessageRequest(id=str(uuid4()), params=params)
-    buffer: list[str] = []
-
-    try:
-        async for response in client.send_message_streaming(stream_request):
-            root = response.root
-            if isinstance(root, JSONRPCErrorResponse):
-                raise RuntimeError(f"A2A error: {root.error}")
-
-            payload = root.result
-            buffer.extend(_extract_text(payload))
-    except A2AClientHTTPError as exc:
-        if "text/event-stream" not in str(exc):
-            raise
-
-        # Fallback to non-streaming
-        send_request = SendMessageRequest(id=str(uuid4()), params=params)
-        response = await client.send_message(send_request)
-        root = response.root
-        if isinstance(root, JSONRPCErrorResponse):
-            raise RuntimeError(f"A2A error: {root.error}")
-        payload = root.result
-        buffer.extend(_extract_text(payload))
-
-    if buffer:
-        buffer = list(dict.fromkeys(buffer))  # Remove duplicates
-    return "\n".join(buffer).strip()
-
-
-async def send_agent_task(
-    url: str,
-    message: str,
-    *,
-    model: Optional[str] = None,
-    provider: Optional[str] = None,
-    prompt: Optional[str] = None,
-    context: Optional[str] = None,
-    timeout: float = 120.0,
-) -> A2ATaskResult:
-    """
-    Send a task to an A2A agent with optional model/prompt configuration.
-
-    Args:
-        url: A2A endpoint URL (e.g., "http://127.0.0.1:8000/a2a/litellm_agent")
-        message: The task message to send to the agent
-        model: Optional model name (e.g., "gpt-4o", "gemini-2.0-flash")
-        provider: Optional provider name (e.g., "openai", "gemini")
-        prompt: Optional system prompt to set before sending the message
-        context: Optional context/session ID (generated if not provided)
-        timeout: Request timeout in seconds (default: 120)
-
-    Returns:
-        A2ATaskResult with the agent's response text and context ID
-
-    Example:
-        >>> result = await send_agent_task(
-        ...     url="http://127.0.0.1:8000/a2a/litellm_agent",
-        ...     model="gpt-4o",
-        ...     provider="openai",
-        ...     prompt="You are concise.",
-        ...     message="Give me a fuzzing harness.",
-        ...     context="fuzzing",
-        ...     timeout=120
-        ... )
-        >>> print(result.text)
-    """
-    timeout_config = httpx.Timeout(timeout)
-    context_id = context or str(uuid4())
-
-    async with httpx.AsyncClient(timeout=timeout_config) as http_client:
-        client = A2AClient(url=url, httpx_client=http_client)
-
-        # Set model if provided
-        if model:
-            model_spec = f"{provider}/{model}" if provider else model
-            control_msg = _build_control_message("MODEL", model_spec)
-            await _send_message(client, control_msg, context_id)
-
-        # Set prompt if provided
-        if prompt is not None:
-            control_msg = _build_control_message("PROMPT", prompt)
-            await _send_message(client, control_msg, context_id)
-
-        # Send the actual task message
-        response_text = await _send_message(client, message, context_id)
-
-        return A2ATaskResult(
-            text=response_text,
-            context_id=context_id,
-        )
-
-
-async def get_agent_config(
-    url: str,
-    context: Optional[str] = None,
-    timeout: float = 60.0,
-) -> str:
-    """
-    Get the current configuration of an A2A agent.
-
-    Args:
-        url: A2A endpoint URL
-        context: Optional context/session ID
-        timeout: Request timeout in seconds
-
-    Returns:
-        Configuration string from the agent
-    """
-    timeout_config = httpx.Timeout(timeout)
-    context_id = context or str(uuid4())
-
-    async with httpx.AsyncClient(timeout=timeout_config) as http_client:
-        client = A2AClient(url=url, httpx_client=http_client)
-        control_msg = _build_control_message("GET_CONFIG")
-        config_text = await _send_message(client, control_msg, context_id)
-        return config_text
-
-
-async def hot_swap_model(
-    url: str,
-    model: str,
-    provider: Optional[str] = None,
-    context: Optional[str] = None,
-    timeout: float = 60.0,
-) -> str:
-    """
-    Hot-swap the model of an A2A agent without sending a task.
-
-    Args:
-        url: A2A endpoint URL
-        model: Model name to switch to
-        provider: Optional provider name
-        context: Optional context/session ID
-        timeout: Request timeout in seconds
-
-    Returns:
-        Response from the agent
-    """
-    timeout_config = httpx.Timeout(timeout)
-    context_id = context or str(uuid4())
-
-    async with httpx.AsyncClient(timeout=timeout_config) as http_client:
-        client = A2AClient(url=url, httpx_client=http_client)
-        model_spec = f"{provider}/{model}" if provider else model
-        control_msg = _build_control_message("MODEL", model_spec)
-        response = await _send_message(client, control_msg, context_id)
-        return response
-
-
-async def hot_swap_prompt(
-    url: str,
-    prompt: str,
-    context: Optional[str] = None,
-    timeout: float = 60.0,
-) -> str:
-    """
-    Hot-swap the system prompt of an A2A agent.
-
-    Args:
-        url: A2A endpoint URL
-        prompt: System prompt to set
-        context: Optional context/session ID
-        timeout: Request timeout in seconds
-
-    Returns:
-        Response from the agent
-    """
-    timeout_config = httpx.Timeout(timeout)
-    context_id = context or str(uuid4())
-
-    async with httpx.AsyncClient(timeout=timeout_config) as http_client:
-        client = A2AClient(url=url, httpx_client=http_client)
-        control_msg = _build_control_message("PROMPT", prompt)
-        response = await _send_message(client, control_msg, context_id)
-        return response
--- a/ai/src/fuzzforge_ai/agent.py
+++ b/ai/src/fuzzforge_ai/agent.py
@@ -60,7 +60,7 @@ class FuzzForgeAgent:
            debug=os.getenv('FUZZFORGE_DEBUG', '0') == '1',
            memory_service=self.memory_service,
            session_persistence=os.getenv('SESSION_PERSISTENCE', 'inmemory'),
-            fuzzforge_mcp_url=None,  # Disabled
+            fuzzforge_mcp_url=os.getenv('FUZZFORGE_MCP_URL'),
        )
        
        # Create Hybrid Memory Manager (ADK + Cognee direct integration)
--- a/ai/src/fuzzforge_ai/agent_card.py
+++ b/ai/src/fuzzforge_ai/agent_card.py
@@ -15,7 +15,7 @@ Defines what FuzzForge can do and how others can discover it


 from dataclasses import dataclass
-from typing import List, Dict, Any
+from typing import List, Optional, Dict, Any

@dataclass
 class AgentSkill:
@@ -172,6 +172,7 @@ def get_fuzzforge_agent_card(url: str = "http://localhost:10100") -> AgentCard:
            orchestration_skill,
            memory_skill,
            conversation_skill,
+            workflow_automation_skill,
            agent_management_skill
        ],
        capabilities=fuzzforge_capabilities,
--- a/ai/src/fuzzforge_ai/agent_executor.py
+++ b/ai/src/fuzzforge_ai/agent_executor.py
@@ -1,4 +1,3 @@
-# ruff: noqa: E402  # Imports delayed for environment/logging setup
 """FuzzForge Agent Executor - orchestrates workflows and delegation."""
 # Copyright (c) 2025 FuzzingLabs
 #
@@ -13,6 +12,7 @@


 import asyncio
+import base64
 import time
 import uuid
 import json
@@ -174,7 +174,7 @@ class FuzzForgeExecutor:
                        else:
                            # Run now if no loop is running
                            loop.run_until_complete(self._register_agent_async(url, name))
-                    except Exception:
+                    except:
                        # Ignore auto-registration failures
                        pass
        except Exception as e:
@@ -392,7 +392,7 @@ class FuzzForgeExecutor:
                    user_email = f"project_{config.get_project_context()['project_id']}@fuzzforge.example"
                    user = await get_user(user_email)
                    cognee.set_user(user)
-                except Exception:
+                except Exception as e:
                    pass  # User context not critical
                
                # Use cognee search directly for maximum flexibility
@@ -452,11 +452,11 @@ class FuzzForgeExecutor:
                        try:
                            user = await get_user(user_email)
                            logger.info(f"Using existing user: {user_email}")
-                        except Exception:
+                        except:
                            try:
                                user = await create_user(user_email, user_tenant)
                                logger.info(f"Created new user: {user_email}")
-                            except Exception:
+                            except:
                                user = None
                        
                        if user:
@@ -583,6 +583,7 @@ class FuzzForgeExecutor:
                pattern: Glob pattern (e.g. '*.py', '**/*.js', '') 
            """
            try:
+                from pathlib import Path

                # Get project root from config
                config = ProjectConfigManager()
@@ -647,6 +648,7 @@ class FuzzForgeExecutor:
                max_lines: Maximum lines to read (0 for all, default 200 for large files)
            """
            try:
+                from pathlib import Path

                # Get project root from config
                config = ProjectConfigManager()
@@ -709,6 +711,7 @@ class FuzzForgeExecutor:
            """
            try:
                import re
+                from pathlib import Path

                # Get project root from config
                config = ProjectConfigManager()
@@ -754,7 +757,7 @@ class FuzzForgeExecutor:
                    result = f"Found '{search_pattern}' in {len(matches)} locations (searched {files_searched} files):\n"
                    result += "\n".join(matches[:50])
                    if len(matches) >= 50:
-                        result += "\n... (showing first 50 matches)"
+                        result += f"\n... (showing first 50 matches)"
                    return result
                else:
                    return f"No matches found for '{search_pattern}' in {files_searched} files matching '{file_pattern}'"
@@ -831,15 +834,26 @@ class FuzzForgeExecutor:
            async def submit_security_scan_mcp(
                workflow_name: str,
                target_path: str = "",
+                volume_mode: str = "",
                parameters: Dict[str, Any] | None = None,
                tool_context: ToolContext | None = None,
            ) -> Any:
-                # Resolve the target path to an absolute path for validation
+                # Normalise volume mode to supported values
+                normalised_mode = (volume_mode or "ro").strip().lower().replace("-", "_")
+                if normalised_mode in {"read_only", "readonly", "ro"}:
+                    normalised_mode = "ro"
+                elif normalised_mode in {"read_write", "readwrite", "rw"}:
+                    normalised_mode = "rw"
+                else:
+                    # Fall back to Prefect defaults if we can't recognise the input
+                    normalised_mode = "ro"
+
+                # Resolve the target path to an absolute path for Prefect's validation
                resolved_path = target_path or "."
                try:
                    resolved_path = str(Path(resolved_path).expanduser().resolve())
                except Exception:
-                    # If resolution fails, use the raw value
+                    # If resolution fails, Prefect will surface the validation error – use the raw value
                    resolved_path = target_path

                # Ensure configuration objects default to dictionaries instead of None
@@ -872,6 +886,7 @@ class FuzzForgeExecutor:
                payload = {
                    "workflow_name": workflow_name,
                    "target_path": resolved_path,
+                    "volume_mode": normalised_mode,
                    "parameters": cleaned_parameters,
                }
                result = await _call_fuzzforge_mcp("submit_security_scan_mcp", payload)
@@ -1049,19 +1064,10 @@ class FuzzForgeExecutor:
            FunctionTool(get_task_list)
        ])

-
-        # Create the agent with LiteLLM configuration
-        llm_kwargs = {}
-        api_key = os.getenv('OPENAI_API_KEY') or os.getenv('LLM_API_KEY')
-        api_base = os.getenv('LLM_ENDPOINT') or os.getenv('LLM_API_BASE') or os.getenv('OPENAI_API_BASE')
-
-        if api_key:
-            llm_kwargs['api_key'] = api_key
-        if api_base:
-            llm_kwargs['api_base'] = api_base
-
+        
+        # Create the agent
        self.agent = LlmAgent(
-            model=LiteLlm(model=self.model, **llm_kwargs),
+            model=LiteLlm(model=self.model),
            name="fuzzforge_executor",
            description="Intelligent A2A orchestrator with memory",
            instruction=self._build_instruction(),
@@ -1082,7 +1088,7 @@ class FuzzForgeExecutor:
        
    def _build_instruction(self) -> str:
        """Build the agent's instruction prompt"""
-        instruction = """You are FuzzForge, an intelligent A2A orchestrator with dual memory systems.
+        instruction = f"""You are FuzzForge, an intelligent A2A orchestrator with dual memory systems.

 ## Your Core Responsibilities:

@@ -1702,7 +1708,7 @@ Be concise and intelligent in your responses."""
        if self.agentops_trace:
            try:
                agentops.end_trace()
-            except Exception:
+            except:
                pass

        # Cancel background monitors
--- a/ai/src/fuzzforge_ai/cli.py
+++ b/ai/src/fuzzforge_ai/cli.py
@@ -1,4 +1,3 @@
-# ruff: noqa: E402  # Imports delayed for environment/logging setup
 #!/usr/bin/env python3
 # Copyright (c) 2025 FuzzingLabs
 #
@@ -27,6 +26,7 @@ import random
 from datetime import datetime
 from contextlib import contextmanager
 from pathlib import Path
+from typing import Any

 from dotenv import load_dotenv

@@ -90,12 +90,18 @@ except ImportError:
 from rich.console import Console
 from rich.table import Table
 from rich.panel import Panel
+from rich.prompt import Prompt
 from rich import box

+from google.adk.events.event import Event
+from google.adk.events.event_actions import EventActions
+from google.genai import types as gen_types

 from .agent import FuzzForgeAgent
+from .agent_card import get_fuzzforge_agent_card
 from .config_manager import ConfigManager
 from .config_bridge import ProjectConfigManager
+from .remote_agent import RemoteAgentConnection

 console = Console()

@@ -237,7 +243,7 @@ class FuzzForgeCLI:
            )
        )
        if self.agent.executor.agentops_trace:
-            console.print("Tracking: [medium_purple1]AgentOps active[/medium_purple1]")
+            console.print(f"Tracking: [medium_purple1]AgentOps active[/medium_purple1]")

        # Show skills
        console.print("\nSkills:")
@@ -314,7 +320,7 @@ class FuzzForgeCLI:
                url=args.strip(),
                description=description
            )
-            console.print("   [dim]Saved to config for auto-registration[/dim]")
+            console.print(f"   [dim]Saved to config for auto-registration[/dim]")
        else:
            console.print(f"[red]Failed: {result['error']}[/red]")
            
@@ -340,9 +346,9 @@ class FuzzForgeCLI:
        # Remove from config
        if self.config_manager.remove_registered_agent(name=agent_to_remove['name'], url=agent_to_remove['url']):
            console.print(f"✅ Unregistered: [bold]{agent_to_remove['name']}[/bold]")
-            console.print("   [dim]Removed from config (won't auto-register next time)[/dim]")
+            console.print(f"   [dim]Removed from config (won't auto-register next time)[/dim]")
        else:
-            console.print("[yellow]Agent unregistered from session but not found in config[/yellow]")
+            console.print(f"[yellow]Agent unregistered from session but not found in config[/yellow]")
    
    async def cmd_list(self, args: str = "") -> None:
        """List registered agents"""
@@ -429,7 +435,7 @@ class FuzzForgeCLI:
                                        text = data['parts'][0].get('text', '')[:150]
                                        role = data.get('role', 'unknown')
                                        console.print(f"{i}. [{role}]: {text}...")
-                                except Exception:
+                                except:
                                    console.print(f"{i}. {content[:150]}...")
                        else:
                            console.print("[yellow]No matches found in SQLite either[/yellow]")
@@ -693,7 +699,7 @@ class FuzzForgeCLI:
        )
        
        console.print(table)
-        console.print("\n[dim]Use /artifacts <id> to view artifact content[/dim]")
+        console.print(f"\n[dim]Use /artifacts <id> to view artifact content[/dim]")

    async def cmd_tasks(self, args: str = "") -> None:
        """List tasks or show details for a specific task."""
--- a/ai/src/fuzzforge_ai/cognee_integration.py
+++ b/ai/src/fuzzforge_ai/cognee_integration.py
@@ -16,7 +16,9 @@ Can be reused by external agents and other components


 import os
-from typing import Dict, Any, Optional
+import asyncio
+import json
+from typing import Dict, List, Any, Optional, Union
 from pathlib import Path


@@ -187,69 +189,33 @@ class CogneeProjectIntegration:
        except Exception as e:
            return {"error": f"Failed to list data: {e}"}
    
-    async def cognify_text(self, text: str, dataset: str = None) -> Dict[str, Any]:
-        """
-        Cognify text content into knowledge graph
-
-        Args:
-            text: Text to cognify
-            dataset: Dataset name (defaults to project_name_codebase)
-
-        Returns:
-            Dict containing cognify results
-        """
-        if not self._initialized:
-            await self.initialize()
-
-        if not self._initialized:
-            return {"error": "Cognee not initialized"}
-
-        if not dataset:
-            dataset = f"{self.project_context['project_name']}_codebase"
-
-        try:
-            # Add text to dataset
-            await self._cognee.add([text], dataset_name=dataset)
-
-            # Process (cognify) the dataset
-            await self._cognee.cognify([dataset])
-
-            return {
-                "text_length": len(text),
-                "dataset": dataset,
-                "project": self.project_context["project_name"],
-                "status": "success"
-            }
-        except Exception as e:
-            return {"error": f"Cognify failed: {e}"}
-
    async def ingest_text_to_dataset(self, text: str, dataset: str = None) -> Dict[str, Any]:
        """
        Ingest text content into a specific dataset
-
+        
        Args:
            text: Text to ingest
            dataset: Dataset name (defaults to project_name_codebase)
-
+            
        Returns:
            Dict containing ingest results
        """
        if not self._initialized:
            await self.initialize()
-
+            
        if not self._initialized:
            return {"error": "Cognee not initialized"}
-
+            
        if not dataset:
            dataset = f"{self.project_context['project_name']}_codebase"
-
+            
        try:
            # Add text to dataset
            await self._cognee.add([text], dataset_name=dataset)
-
+            
            # Process (cognify) the dataset
            await self._cognee.cognify([dataset])
-
+            
            return {
                "text_length": len(text),
                "dataset": dataset,
--- a/ai/src/fuzzforge_ai/cognee_service.py
+++ b/ai/src/fuzzforge_ai/cognee_service.py
@@ -15,9 +15,11 @@ Provides integrated Cognee functionality for codebase analysis and knowledge gra


 import os
+import asyncio
 import logging
 from pathlib import Path
-from typing import Dict, List, Any
+from typing import Dict, List, Any, Optional
+from datetime import datetime

 logger = logging.getLogger(__name__)

@@ -56,7 +58,7 @@ class CogneeService:
            # Configure LLM with API key BEFORE any other cognee operations
            provider = os.getenv("LLM_PROVIDER", "openai")
            model = os.getenv("LLM_MODEL") or os.getenv("LITELLM_MODEL", "gpt-4o-mini")
-            api_key = os.getenv("COGNEE_API_KEY") or os.getenv("LLM_API_KEY") or os.getenv("OPENAI_API_KEY")
+            api_key = os.getenv("LLM_API_KEY") or os.getenv("OPENAI_API_KEY")
            endpoint = os.getenv("LLM_ENDPOINT")
            api_version = os.getenv("LLM_API_VERSION")
            max_tokens = os.getenv("LLM_MAX_TOKENS")
@@ -78,62 +80,48 @@ class CogneeService:
                    os.environ.setdefault("OPENAI_API_KEY", api_key)
            if endpoint:
                os.environ["LLM_ENDPOINT"] = endpoint
-                os.environ.setdefault("LLM_API_BASE", endpoint)
-                os.environ.setdefault("OPENAI_API_BASE", endpoint)
-                os.environ.setdefault("LITELLM_PROXY_API_BASE", endpoint)
-            if api_key:
-                os.environ.setdefault("LITELLM_PROXY_API_KEY", api_key)
            if api_version:
                os.environ["LLM_API_VERSION"] = api_version
            if max_tokens:
                os.environ["LLM_MAX_TOKENS"] = str(max_tokens)

            # Configure Cognee's runtime using its configuration helpers when available
-            embedding_model = os.getenv("LLM_EMBEDDING_MODEL")
-            embedding_endpoint = os.getenv("LLM_EMBEDDING_ENDPOINT")
-            if embedding_endpoint:
-                os.environ.setdefault("LLM_EMBEDDING_API_BASE", embedding_endpoint)
-
            if hasattr(cognee.config, "set_llm_provider"):
                cognee.config.set_llm_provider(provider)
-                if hasattr(cognee.config, "set_llm_model"):
-                    cognee.config.set_llm_model(model)
-                if api_key and hasattr(cognee.config, "set_llm_api_key"):
-                    cognee.config.set_llm_api_key(api_key)
-                if endpoint and hasattr(cognee.config, "set_llm_endpoint"):
-                    cognee.config.set_llm_endpoint(endpoint)
-            if embedding_model and hasattr(cognee.config, "set_llm_embedding_model"):
-                cognee.config.set_llm_embedding_model(embedding_model)
-            if embedding_endpoint and hasattr(cognee.config, "set_llm_embedding_endpoint"):
-                cognee.config.set_llm_embedding_endpoint(embedding_endpoint)
+            if hasattr(cognee.config, "set_llm_model"):
+                cognee.config.set_llm_model(model)
+            if api_key and hasattr(cognee.config, "set_llm_api_key"):
+                cognee.config.set_llm_api_key(api_key)
+            if endpoint and hasattr(cognee.config, "set_llm_endpoint"):
+                cognee.config.set_llm_endpoint(endpoint)
            if api_version and hasattr(cognee.config, "set_llm_api_version"):
                cognee.config.set_llm_api_version(api_version)
            if max_tokens and hasattr(cognee.config, "set_llm_max_tokens"):
                cognee.config.set_llm_max_tokens(int(max_tokens))
-
+            
            # Configure graph database
            cognee.config.set_graph_db_config({
                "graph_database_provider": self.cognee_config.get("graph_database_provider", "kuzu"),
            })
-
+            
            # Set data directories
            data_dir = self.cognee_config.get("data_directory")
            system_dir = self.cognee_config.get("system_directory")
-
+            
            if data_dir:
                logger.debug("Setting cognee data root", extra={"path": data_dir})
                cognee.config.data_root_directory(data_dir)
            if system_dir:
                logger.debug("Setting cognee system root", extra={"path": system_dir})
                cognee.config.system_root_directory(system_dir)
-
+            
            # Setup multi-tenant user context
            await self._setup_user_context()
-
+            
            self._initialized = True
            logger.info(f"Cognee initialized for project {self.project_context['project_name']} "
                       f"with Kuzu at {system_dir}")
-
+            
        except ImportError:
            logger.error("Cognee not installed. Install with: pip install cognee")
            raise
@@ -170,7 +158,7 @@ class CogneeService:
                self._user = await get_user(fallback_email)
                logger.info(f"Using existing user: {fallback_email}")
                return
-            except Exception:
+            except:
                # User doesn't exist, try to create fallback
                pass
            
--- a/ai/src/fuzzforge_ai/config_bridge.py
+++ b/ai/src/fuzzforge_ai/config_bridge.py
@@ -13,7 +13,7 @@

 try:
    from fuzzforge_cli.config import ProjectConfigManager as _ProjectConfigManager
-except ImportError:  # pragma: no cover - used when CLI not available
+except ImportError as exc:  # pragma: no cover - used when CLI not available
    class _ProjectConfigManager:  # type: ignore[no-redef]
        """Fallback implementation that raises a helpful error."""

@@ -21,10 +21,10 @@ except ImportError:  # pragma: no cover - used when CLI not available
            raise ImportError(
                "ProjectConfigManager is unavailable. Install the FuzzForge CLI "
                "package or supply a compatible configuration object."
-            )
+            ) from exc

    def __getattr__(name):  # pragma: no cover - defensive
-        raise ImportError("ProjectConfigManager unavailable")
+        raise ImportError("ProjectConfigManager unavailable") from exc

 ProjectConfigManager = _ProjectConfigManager

--- a/ai/src/fuzzforge_ai/memory_service.py
+++ b/ai/src/fuzzforge_ai/memory_service.py
@@ -16,12 +16,15 @@ Separate from Cognee which will be used for RAG/codebase analysis


 import os
-from typing import Dict, Any
+import json
+from typing import Dict, List, Any, Optional
+from datetime import datetime
 import logging

 # ADK Memory imports
 from google.adk.memory import InMemoryMemoryService, BaseMemoryService
 from google.adk.memory.base_memory_service import SearchMemoryResponse
+from google.adk.memory.memory_entry import MemoryEntry

 # Optional VertexAI Memory Bank
 try:
--- a/ai/src/fuzzforge_ai/remote_agent.py
+++ b/ai/src/fuzzforge_ai/remote_agent.py
@@ -37,7 +37,7 @@ class RemoteAgentConnection:
            response.raise_for_status()
            self.agent_card = response.json()
            return self.agent_card
-        except Exception:
+        except:
            # Try old path for compatibility
            try:
                response = await self.client.get(f"{self.url}/.well-known/agent.json")
--- a/backend/Dockerfile
+++ b/backend/Dockerfile
@@ -17,21 +17,25 @@ RUN apt-get update && apt-get install -y \

 # Docker client configuration removed - localhost:5001 doesn't require insecure registry config

+# Install uv for faster package management
+RUN pip install uv
+
 # Copy project files
 COPY pyproject.toml ./
+COPY uv.lock ./

-# Install dependencies with pip
-RUN pip install --no-cache-dir -e .
+# Install dependencies
+RUN uv sync --no-dev

 # Copy source code
 COPY . .

-# Expose ports (API on 8000, MCP on 8010)
-EXPOSE 8000 8010
+# Expose port
+EXPOSE 8000

 # Health check
 HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
    CMD curl -f http://localhost:8000/health || exit 1

 # Start the application
-CMD ["uvicorn", "src.main:app", "--host", "0.0.0.0", "--port", "8000"]
+CMD ["uv", "run", "uvicorn", "src.main:app", "--host", "0.0.0.0", "--port", "8000"]
--- a/backend/README.md
+++ b/backend/README.md
@@ -1,6 +1,6 @@
 # FuzzForge Backend

-A stateless API server for security testing workflow orchestration using Temporal. This system dynamically discovers workflows, executes them in isolated worker environments, and returns findings in SARIF format.
+A stateless API server for security testing workflow orchestration using Prefect. This system dynamically discovers workflows, executes them in isolated Docker containers with volume mounting, and returns findings in SARIF format.

 ## Architecture Overview

@@ -8,17 +8,17 @@ A stateless API server for security testing workflow orchestration using Tempora

 1. **Workflow Discovery System**: Automatically discovers workflows at startup
 2. **Module System**: Reusable components (scanner, analyzer, reporter) with a common interface
-3. **Temporal Integration**: Handles workflow orchestration, execution, and monitoring with vertical workers
-4. **File Upload & Storage**: HTTP multipart upload to MinIO for target files
+3. **Prefect Integration**: Handles container orchestration, workflow execution, and monitoring
+4. **Volume Mounting**: Secure file access with configurable permissions (ro/rw)
 5. **SARIF Output**: Standardized security findings format

 ### Key Features

 - **Stateless**: No persistent data, fully scalable
 - **Generic**: No hardcoded workflows, automatic discovery
- **Isolated**: Each workflow runs in specialized vertical workers
+- **Isolated**: Each workflow runs in its own Docker container
 - **Extensible**: Easy to add new workflows and modules
- **Secure**: File upload with MinIO storage, automatic cleanup via lifecycle policies
+- **Secure**: Read-only volume mounts by default, path validation
 - **Observable**: Comprehensive logging and status tracking

 ## Quick Start
@@ -32,17 +32,19 @@ A stateless API server for security testing workflow orchestration using Tempora
 From the project root, start all services:

 ```bash
-docker-compose -f docker-compose.temporal.yaml up -d
+docker-compose up -d
 ```

 This will start:
- Temporal server (Web UI at http://localhost:8233, gRPC at :7233)
- MinIO (S3 storage at http://localhost:9000, Console at http://localhost:9001)
- PostgreSQL database (for Temporal state)
- Vertical workers (worker-rust, worker-android, worker-web, etc.)
+- Prefect server (API at http://localhost:4200/api)
+- PostgreSQL database
+- Redis cache
+- Docker registry (port 5001)
+- Prefect worker (for running workflows)
 - FuzzForge backend API (port 8000)
+- FuzzForge MCP server (port 8010)

-**Note**: MinIO console login: `fuzzforge` / `fuzzforge123`
+**Note**: The Prefect UI at http://localhost:4200 is not currently accessible from the host due to the API being configured for inter-container communication. Use the REST API or MCP interface instead.

 ## API Endpoints

@@ -52,8 +54,7 @@ This will start:
 - `GET /workflows/{name}/metadata` - Get workflow metadata and parameters
 - `GET /workflows/{name}/parameters` - Get workflow parameter schema
 - `GET /workflows/metadata/schema` - Get metadata.yaml schema
- `POST /workflows/{name}/submit` - Submit a workflow for execution (path-based, legacy)
- `POST /workflows/{name}/upload-and-submit` - **Upload local files and submit workflow** (recommended)
+- `POST /workflows/{name}/submit` - Submit a workflow for execution

 ### Runs

@@ -67,13 +68,12 @@ Each workflow must have:

 ```
 toolbox/workflows/{workflow_name}/
-   workflow.py       # Temporal workflow definition
-   metadata.yaml     # Mandatory metadata (parameters, version, vertical, etc.)
-   requirements.txt  # Optional Python dependencies (installed in vertical worker)
+   workflow.py       # Prefect flow definition
+   metadata.yaml     # Mandatory metadata (parameters, version, etc.)
+   Dockerfile        # Optional custom container definition
+   requirements.txt  # Optional Python dependencies
 ```

-**Note**: With Temporal architecture, workflows run in pre-built vertical workers (e.g., `worker-rust`, `worker-android`), not individual Docker containers. The workflow code is mounted as a volume and discovered at runtime.
-
 ### Example metadata.yaml

 ```yaml
@@ -82,12 +82,15 @@ version: "1.0.0"
 description: "Comprehensive security analysis workflow"
 author: "FuzzForge Team"
 category: "comprehensive"
-vertical: "rust"  # Routes to worker-rust
 tags:
  - "security"
  - "analysis"
  - "comprehensive"

+supported_volume_modes:
+  - "ro"
+  - "rw"
+
 requirements:
  tools:
    - "file_scanner"
@@ -107,6 +110,11 @@ parameters:
      type: string
      default: "/workspace"
      description: "Path to analyze"
+    volume_mode:
+      type: string
+      enum: ["ro", "rw"]
+      default: "ro"
+      description: "Volume mount mode"
    scanner_config:
      type: object
      description: "Scanner configuration"
@@ -151,6 +159,7 @@ curl -X POST "http://localhost:8000/workflows/security_assessment/submit" \
  -H "Content-Type: application/json" \
  -d '{
    "target_path": "/tmp/project",
+    "volume_mode": "ro",
    "resource_limits": {
      "memory_limit": "1Gi",
      "cpu_limit": "1"
@@ -160,54 +169,6 @@ curl -X POST "http://localhost:8000/workflows/security_assessment/submit" \

 Resource precedence: User limits > Workflow requirements > System defaults

-## File Upload and Target Access
-
-### Upload Endpoint
-
-The backend provides an upload endpoint for submitting workflows with local files:
-
-```
-POST /workflows/{workflow_name}/upload-and-submit
-Content-Type: multipart/form-data
-
-Parameters:
-  file: File upload (supports .tar.gz for directories)
-  parameters: JSON string of workflow parameters (optional)
-  timeout: Execution timeout in seconds (optional)
-```
-
-Example using curl:
-
-```bash
-# Upload a directory (create tarball first)
-tar -czf project.tar.gz /path/to/project
-curl -X POST "http://localhost:8000/workflows/security_assessment/upload-and-submit" \
-  -F "file=@project.tar.gz" \
-  -F "parameters={\"check_secrets\":true}"
-
-# Upload a single file
-curl -X POST "http://localhost:8000/workflows/security_assessment/upload-and-submit" \
-  -F "file=@binary.elf"
-```
-
-### Storage Flow
-
-1. **CLI/API uploads file** via HTTP multipart
-2. **Backend receives file** and streams to temporary location (max 10GB)
-3. **Backend uploads to MinIO** with generated `target_id`
-4. **Workflow is submitted** to Temporal with `target_id`
-5. **Worker downloads target** from MinIO to local cache
-6. **Workflow processes target** from cache
-7. **MinIO lifecycle policy** deletes files after 7 days
-
-### Advantages
-
- **No host filesystem access required** - workers can run anywhere
- **Automatic cleanup** - lifecycle policies prevent disk exhaustion
- **Caching** - repeated workflows reuse cached targets
- **Multi-host ready** - targets accessible from any worker
- **Secure** - isolated storage, no arbitrary host path access
-
 ## Module Development

 Modules implement the `BaseModule` interface:
@@ -237,24 +198,12 @@ class MyModule(BaseModule):

 ## Submitting a Workflow

-### With File Upload (Recommended)
-
 ```bash
-# Automatic tarball and upload
-tar -czf project.tar.gz /home/user/project
-curl -X POST "http://localhost:8000/workflows/security_assessment/upload-and-submit" \
-  -F "file=@project.tar.gz" \
-  -F "parameters={\"scanner_config\":{\"patterns\":[\"*.py\"]},\"analyzer_config\":{\"check_secrets\":true}}"
-```
-
-### Legacy Path-Based Submission
-
-```bash
-# Only works if backend and target are on same machine
 curl -X POST "http://localhost:8000/workflows/security_assessment/submit" \
  -H "Content-Type: application/json" \
  -d '{
    "target_path": "/home/user/project",
+    "volume_mode": "ro",
    "parameters": {
      "scanner_config": {"patterns": ["*.py"]},
      "analyzer_config": {"check_secrets": true}
@@ -286,31 +235,23 @@ Returns SARIF-formatted findings:

 ## Security Considerations

-1. **File Upload Security**: Files uploaded to MinIO with isolated storage
-2. **Read-Only Default**: Target files accessed as read-only unless explicitly set
-3. **Worker Isolation**: Each workflow runs in isolated vertical workers
-4. **Resource Limits**: Can set CPU/memory limits per worker
-5. **Automatic Cleanup**: MinIO lifecycle policies delete old files after 7 days
+1. **Volume Mounting**: Only allowed directories can be mounted
+2. **Read-Only Default**: Volumes mounted as read-only unless explicitly set
+3. **Container Isolation**: Each workflow runs in an isolated container
+4. **Resource Limits**: Can set CPU/memory limits via Prefect
+5. **Network Isolation**: Containers use bridge networking

 ## Development

 ### Adding a New Workflow

 1. Create directory: `toolbox/workflows/my_workflow/`
-2. Add `workflow.py` with a Temporal workflow (using `@workflow.defn`)
-3. Add mandatory `metadata.yaml` with `vertical` field
-4. Restart the appropriate worker: `docker-compose -f docker-compose.temporal.yaml restart worker-rust`
-5. Worker will automatically discover and register the new workflow
+2. Add `workflow.py` with a Prefect flow
+3. Add mandatory `metadata.yaml`
+4. Restart backend: `docker-compose restart fuzzforge-backend`

 ### Adding a New Module

 1. Create module in `toolbox/modules/{category}/`
 2. Implement `BaseModule` interface
-3. Use in workflows via import
-
-### Adding a New Vertical Worker
-
-1. Create worker directory: `workers/{vertical}/`
-2. Create `Dockerfile` with required tools
-3. Add worker to `docker-compose.temporal.yaml`
-4. Worker will automatically discover workflows with matching `vertical` in metadata
+3. Use in workflows via import
--- a/backend/benchmarks/README.md
+++ b/backend/benchmarks/README.md
@@ -1,184 +0,0 @@
-# FuzzForge Benchmark Suite
-
-Performance benchmarking infrastructure organized by module category.
-
-## Directory Structure
-
-```
-benchmarks/
-├── conftest.py              # Benchmark fixtures
-├── category_configs.py      # Category-specific thresholds
-├── by_category/             # Benchmarks organized by category
-│   ├── fuzzer/
-│   │   ├── bench_cargo_fuzz.py
-│   │   └── bench_atheris.py
-│   ├── scanner/
-│   │   └── bench_file_scanner.py
-│   ├── secret_detection/
-│   │   ├── bench_gitleaks.py
-│   │   └── bench_trufflehog.py
-│   └── analyzer/
-│       └── bench_security_analyzer.py
-├── fixtures/                # Benchmark test data
-│   ├── small/               # ~1K LOC
-│   ├── medium/              # ~10K LOC
-│   └── large/               # ~100K LOC
-└── results/                 # Benchmark results (JSON)
-```
-
-## Module Categories
-
-### Fuzzer
-**Expected Metrics**: execs/sec, coverage_rate, time_to_crash, memory_usage
-
-**Performance Thresholds**:
- Min 1000 execs/sec
- Max 10s for small projects
- Max 2GB memory
-
-### Scanner
-**Expected Metrics**: files/sec, LOC/sec, findings_count
-
-**Performance Thresholds**:
- Min 100 files/sec
- Min 10K LOC/sec
- Max 512MB memory
-
-### Secret Detection
-**Expected Metrics**: patterns/sec, precision, recall, F1
-
-**Performance Thresholds**:
- Min 90% precision
- Min 95% recall
- Max 5 false positives per 100 secrets
-
-### Analyzer
-**Expected Metrics**: analysis_depth, files/sec, accuracy
-
-**Performance Thresholds**:
- Min 10 files/sec (deep analysis)
- Min 85% accuracy
- Max 2GB memory
-
-## Running Benchmarks
-
-### All Benchmarks
-```bash
-cd backend
-pytest benchmarks/ --benchmark-only -v
-```
-
-### Specific Category
-```bash
-pytest benchmarks/by_category/fuzzer/ --benchmark-only -v
-```
-
-### With Comparison
-```bash
-# Run and save baseline
-pytest benchmarks/ --benchmark-only --benchmark-save=baseline
-
-# Compare against baseline
-pytest benchmarks/ --benchmark-only --benchmark-compare=baseline
-```
-
-### Generate Histogram
-```bash
-pytest benchmarks/ --benchmark-only --benchmark-histogram=histogram
-```
-
-## Benchmark Results
-
-Results are saved as JSON and include:
- Mean execution time
- Standard deviation
- Min/Max values
- Iterations per second
- Memory usage
-
-Example output:
-```
------------------------ benchmark: fuzzer --------------------------
-Name                                Mean      StdDev    Ops/Sec
-bench_cargo_fuzz[discovery]        0.0012s   0.0001s   833.33
-bench_cargo_fuzz[execution]        0.1250s   0.0050s     8.00
-bench_cargo_fuzz[memory]           0.0100s   0.0005s   100.00
---------------------------------------------------------------------
-```
-
-## CI/CD Integration
-
-Benchmarks run:
- **Nightly**: Full benchmark suite, track trends
- **On PR**: When benchmarks/ or modules/ changed
- **Manual**: Via workflow_dispatch
-
-### Regression Detection
-
-Benchmarks automatically fail if:
- Performance degrades >10%
- Memory usage exceeds thresholds
- Throughput drops below minimum
-
-See `.github/workflows/benchmark.yml` for configuration.
-
-## Adding New Benchmarks
-
-### 1. Create benchmark file in category directory
-```python
-# benchmarks/by_category/fuzzer/bench_new_fuzzer.py
-
-import pytest
-from benchmarks.category_configs import ModuleCategory, get_threshold
-
-@pytest.mark.benchmark(group="fuzzer")
-def test_execution_performance(benchmark, new_fuzzer, test_workspace):
-    """Benchmark execution speed"""
-    result = benchmark(new_fuzzer.execute, config, test_workspace)
-
-    # Validate against threshold
-    threshold = get_threshold(ModuleCategory.FUZZER, "max_execution_time_small")
-    assert result.execution_time < threshold
-```
-
-### 2. Update category_configs.py if needed
-Add new thresholds or metrics for your module.
-
-### 3. Run locally
-```bash
-pytest benchmarks/by_category/fuzzer/bench_new_fuzzer.py --benchmark-only -v
-```
-
-## Best Practices
-
-1. **Use mocking** for external dependencies (network, disk I/O)
-2. **Fixed iterations** for consistent benchmarking
-3. **Warm-up runs** for JIT-compiled code
-4. **Category-specific metrics** aligned with module purpose
-5. **Realistic fixtures** that represent actual use cases
-6. **Memory profiling** using tracemalloc
-7. **Compare apples to apples** within the same category
-
-## Interpreting Results
-
-### Good Performance
- ✅ Execution time below threshold
- ✅ Memory usage within limits
- ✅ Throughput meets minimum
- ✅ <5% variance across runs
-
-### Performance Issues
- ⚠️ Execution time 10-20% over threshold
- ❌ Execution time >20% over threshold
- ❌ Memory leaks (increasing over iterations)
- ❌ High variance (>10%) indicates instability
-
-## Tracking Performance Over Time
-
-Benchmark results are stored as artifacts with:
- Commit SHA
- Timestamp
- Environment details (Python version, OS)
- Full metrics
-
-Use these to track long-term performance trends and detect gradual degradation.
--- a/backend/benchmarks/by_category/fuzzer/bench_cargo_fuzz.py
+++ b/backend/benchmarks/by_category/fuzzer/bench_cargo_fuzz.py
@@ -1,221 +0,0 @@
-"""
-Benchmarks for CargoFuzzer module
-
-Tests performance characteristics of Rust fuzzing:
- Execution throughput (execs/sec)
- Coverage rate
- Memory efficiency
- Time to first crash
-"""
-
-import pytest
-import asyncio
-from pathlib import Path
-from unittest.mock import AsyncMock, patch
-import sys
-
-sys.path.insert(0, str(Path(__file__).resolve().parents[3] / "toolbox"))
-
-from modules.fuzzer.cargo_fuzzer import CargoFuzzer
-from benchmarks.category_configs import ModuleCategory, get_threshold
-
-
-@pytest.fixture
-def cargo_fuzzer():
-    """Create CargoFuzzer instance for benchmarking"""
-    return CargoFuzzer()
-
-
-@pytest.fixture
-def benchmark_config():
-    """Benchmark-optimized configuration"""
-    return {
-        "target_name": None,
-        "max_iterations": 10000,  # Fixed iterations for consistent benchmarking
-        "timeout_seconds": 30,
-        "sanitizer": "address"
-    }
-
-
-@pytest.fixture
-def mock_rust_workspace(tmp_path):
-    """Create a minimal Rust workspace for benchmarking"""
-    workspace = tmp_path / "rust_project"
-    workspace.mkdir()
-
-    # Cargo.toml
-    (workspace / "Cargo.toml").write_text("""[package]
-name = "bench_project"
-version = "0.1.0"
-edition = "2021"
-""")
-
-    # src/lib.rs
-    src = workspace / "src"
-    src.mkdir()
-    (src / "lib.rs").write_text("""
-pub fn benchmark_function(data: &[u8]) -> Vec<u8> {
-    data.to_vec()
-}
-""")
-
-    # fuzz structure
-    fuzz = workspace / "fuzz"
-    fuzz.mkdir()
-    (fuzz / "Cargo.toml").write_text("""[package]
-name = "bench_project-fuzz"
-version = "0.0.0"
-edition = "2021"
-
-[dependencies]
-libfuzzer-sys = "0.4"
-
-[dependencies.bench_project]
-path = ".."
-
-[[bin]]
-name = "fuzz_target_1"
-path = "fuzz_targets/fuzz_target_1.rs"
-""")
-
-    targets = fuzz / "fuzz_targets"
-    targets.mkdir()
-    (targets / "fuzz_target_1.rs").write_text("""#![no_main]
-use libfuzzer_sys::fuzz_target;
-use bench_project::benchmark_function;
-
-fuzz_target!(|data: &[u8]| {
-    let _ = benchmark_function(data);
-});
-""")
-
-    return workspace
-
-
-class TestCargoFuzzerPerformance:
-    """Benchmark CargoFuzzer performance metrics"""
-
-    @pytest.mark.benchmark(group="fuzzer")
-    def test_target_discovery_performance(self, benchmark, cargo_fuzzer, mock_rust_workspace):
-        """Benchmark fuzz target discovery speed"""
-        def discover():
-            return asyncio.run(cargo_fuzzer._discover_fuzz_targets(mock_rust_workspace))
-
-        result = benchmark(discover)
-        assert len(result) > 0
-
-    @pytest.mark.benchmark(group="fuzzer")
-    def test_config_validation_performance(self, benchmark, cargo_fuzzer, benchmark_config):
-        """Benchmark configuration validation speed"""
-        result = benchmark(cargo_fuzzer.validate_config, benchmark_config)
-        assert result is True
-
-    @pytest.mark.benchmark(group="fuzzer")
-    def test_module_initialization_performance(self, benchmark):
-        """Benchmark module instantiation time"""
-        def init_module():
-            return CargoFuzzer()
-
-        module = benchmark(init_module)
-        assert module is not None
-
-
-class TestCargoFuzzerThroughput:
-    """Benchmark execution throughput"""
-
-    @pytest.mark.benchmark(group="fuzzer")
-    def test_execution_throughput(self, benchmark, cargo_fuzzer, mock_rust_workspace, benchmark_config):
-        """Benchmark fuzzing execution throughput"""
-
-        # Mock actual fuzzing to focus on orchestration overhead
-        async def mock_run(workspace, target, config, callback):
-            # Simulate 10K execs at 1000 execs/sec
-            if callback:
-                await callback({
-                    "total_execs": 10000,
-                    "execs_per_sec": 1000.0,
-                    "crashes": 0,
-                    "coverage": 50,
-                    "corpus_size": 10,
-                    "elapsed_time": 10
-                })
-            return [], {"total_executions": 10000, "execution_time": 10.0}
-
-        with patch.object(cargo_fuzzer, '_build_fuzz_target', new_callable=AsyncMock, return_value=True):
-            with patch.object(cargo_fuzzer, '_run_fuzzing', side_effect=mock_run):
-                with patch.object(cargo_fuzzer, '_parse_crash_artifacts', new_callable=AsyncMock, return_value=[]):
-                    def run_fuzzer():
-                        # Run in new event loop
-                        loop = asyncio.new_event_loop()
-                        try:
-                            return loop.run_until_complete(
-                                cargo_fuzzer.execute(benchmark_config, mock_rust_workspace)
-                            )
-                        finally:
-                            loop.close()
-
-                    result = benchmark(run_fuzzer)
-                    assert result.status == "success"
-
-                    # Verify performance threshold
-                    threshold = get_threshold(ModuleCategory.FUZZER, "max_execution_time_small")
-                    assert result.execution_time < threshold, \
-                        f"Execution time {result.execution_time}s exceeds threshold {threshold}s"
-
-
-class TestCargoFuzzerMemory:
-    """Benchmark memory efficiency"""
-
-    @pytest.mark.benchmark(group="fuzzer")
-    def test_memory_overhead(self, benchmark, cargo_fuzzer, mock_rust_workspace, benchmark_config):
-        """Benchmark memory usage during execution"""
-        import tracemalloc
-
-        def measure_memory():
-            tracemalloc.start()
-
-            # Simulate operations
-            cargo_fuzzer.validate_config(benchmark_config)
-            asyncio.run(cargo_fuzzer._discover_fuzz_targets(mock_rust_workspace))
-
-            current, peak = tracemalloc.get_traced_memory()
-            tracemalloc.stop()
-
-            return peak / 1024 / 1024  # Convert to MB
-
-        peak_mb = benchmark(measure_memory)
-
-        # Check against threshold
-        max_memory = get_threshold(ModuleCategory.FUZZER, "max_memory_mb")
-        assert peak_mb < max_memory, \
-            f"Peak memory {peak_mb:.2f}MB exceeds threshold {max_memory}MB"
-
-
-class TestCargoFuzzerScalability:
-    """Benchmark scalability characteristics"""
-
-    @pytest.mark.benchmark(group="fuzzer")
-    def test_multiple_target_discovery(self, benchmark, cargo_fuzzer, tmp_path):
-        """Benchmark discovery with multiple targets"""
-        workspace = tmp_path / "multi_target"
-        workspace.mkdir()
-
-        # Create workspace with 10 fuzz targets
-        (workspace / "Cargo.toml").write_text("[package]\nname = \"test\"\nversion = \"0.1.0\"\nedition = \"2021\"")
-        src = workspace / "src"
-        src.mkdir()
-        (src / "lib.rs").write_text("pub fn test() {}")
-
-        fuzz = workspace / "fuzz"
-        fuzz.mkdir()
-        targets = fuzz / "fuzz_targets"
-        targets.mkdir()
-
-        for i in range(10):
-            (targets / f"fuzz_target_{i}.rs").write_text("// Target")
-
-        def discover():
-            return asyncio.run(cargo_fuzzer._discover_fuzz_targets(workspace))
-
-        result = benchmark(discover)
-        assert len(result) == 10
--- a/backend/benchmarks/by_category/secret_detection/README.md
+++ b/backend/benchmarks/by_category/secret_detection/README.md
@@ -1,240 +0,0 @@
-# Secret Detection Benchmarks
-
-Comprehensive benchmarking suite comparing secret detection tools via complete workflow execution:
- **Gitleaks** - Fast pattern-based detection
- **TruffleHog** - Entropy analysis with verification
- **LLM Detector** - AI-powered semantic analysis (gpt-4o-mini, gpt-5-mini)
-
-## Quick Start
-
-### Run All Comparisons
-
-```bash
-cd backend
-python benchmarks/by_category/secret_detection/compare_tools.py
-```
-
-This will run all workflows on `test_projects/secret_detection_benchmark/` and generate comparison reports.
-
-### Run Benchmark Tests
-
-```bash
-# All benchmarks (Gitleaks, TruffleHog, LLM with 3 models)
-pytest benchmarks/by_category/secret_detection/bench_comparison.py --benchmark-only -v
-
-# Specific tool only
-pytest benchmarks/by_category/secret_detection/bench_comparison.py::TestSecretDetectionComparison::test_gitleaks_workflow --benchmark-only -v
-
-# Performance tests only
-pytest benchmarks/by_category/secret_detection/bench_comparison.py::TestSecretDetectionPerformance --benchmark-only -v
-```
-
-## Ground Truth Dataset
-
-**Controlled Benchmark** (`test_projects/secret_detection_benchmark/`)
-
-**Exactly 32 documented secrets** for accurate precision/recall testing:
- **12 Easy**: Standard patterns (AWS keys, GitHub PATs, Stripe keys, SSH keys)
- **10 Medium**: Obfuscated (Base64, hex, concatenated, in comments, Unicode)
- **10 Hard**: Well hidden (ROT13, binary, XOR, reversed, template strings, regex patterns)
-
-All secrets documented in `secret_detection_benchmark_GROUND_TRUTH.json` with exact file paths and line numbers.
-
-See `test_projects/secret_detection_benchmark/README.md` for details.
-
-## Metrics Measured
-
-### Accuracy Metrics
- **Precision**: TP / (TP + FP) - How many detected secrets are real?
- **Recall**: TP / (TP + FN) - How many real secrets were found?
- **F1 Score**: Harmonic mean of precision and recall
- **False Positive Rate**: FP / Total Detected
-
-### Performance Metrics
- **Execution Time**: Total time to scan all files
- **Throughput**: Files/secrets scanned per second
- **Memory Usage**: Peak memory during execution
-
-### Thresholds (from `category_configs.py`)
- Minimum Precision: 90%
- Minimum Recall: 95%
- Max Execution Time (small): 2.0s
- Max False Positives: 5 per 100 secrets
-
-## Tool Comparison
-
-### Gitleaks
-**Strengths:**
- Fastest execution
- Git-aware (commit history scanning)
- Low false positive rate
- No API required
- Works offline
-
-**Weaknesses:**
- Pattern-based only
- May miss obfuscated secrets
- Limited to known patterns
-
-### TruffleHog
-**Strengths:**
- Secret verification (validates if active)
- High detection rate with entropy analysis
- Multiple detectors (600+ secret types)
- Catches high-entropy strings
-
-**Weaknesses:**
- Slower than Gitleaks
- Higher false positive rate
- Verification requires network calls
-
-### LLM Detector
-**Strengths:**
- Semantic understanding of context
- Catches novel/custom secret patterns
- Can reason about what "looks like" a secret
- Multiple model options (GPT-4, Claude, etc.)
- Understands code context
-
-**Weaknesses:**
- Slowest (API latency + LLM processing)
- Most expensive (LLM API costs)
- Requires A2A agent infrastructure
- Accuracy varies by model
- May miss well-disguised secrets
-
-## Results Directory
-
-After running comparisons, results are saved to:
-```
-benchmarks/by_category/secret_detection/results/
-├── comparison_report.md    # Human-readable comparison with:
-│                           # - Summary table with secrets/files/avg per file/time
-│                           # - Agreement analysis (secrets found by N tools)
-│                           # - Tool agreement matrix (overlap between pairs)
-│                           # - Per-file detailed comparison table
-│                           # - File type breakdown
-│                           # - Files analyzed by each tool
-│                           # - Overlap analysis and performance summary
-└── comparison_results.json # Machine-readable data with findings_by_file
-```
-
-## Latest Benchmark Results
-
-Run the benchmark to generate results:
-```bash
-cd backend
-python benchmarks/by_category/secret_detection/compare_tools.py
-```
-
-Results are saved to `results/comparison_report.md` with:
- Summary table (secrets found, files scanned, time)
- Agreement analysis (how many tools found each secret)
- Tool agreement matrix (overlap between tools)
- Per-file detailed comparison
- File type breakdown
-
-## CI/CD Integration
-
-Add to your CI pipeline:
-
-```yaml
-# .github/workflows/benchmark-secrets.yml
-name: Secret Detection Benchmark
-
-on:
-  schedule:
-    - cron: '0 0 * * 0'  # Weekly
-  workflow_dispatch:
-
-jobs:
-  benchmark:
-    runs-on: ubuntu-latest
-    steps:
-      - uses: actions/checkout@v3
-
-      - name: Set up Python
-        uses: actions/setup-python@v4
-        with:
-          python-version: '3.11'
-
-      - name: Install dependencies
-        run: |
-          pip install -r backend/requirements.txt
-          pip install pytest-benchmark
-
-      - name: Run benchmarks
-        env:
-          GITGUARDIAN_API_KEY: ${{ secrets.GITGUARDIAN_API_KEY }}
-        run: |
-          cd backend
-          pytest benchmarks/by_category/secret_detection/bench_comparison.py \
-            --benchmark-only \
-            --benchmark-json=results.json \
-            --gitguardian-api-key
-
-      - name: Upload results
-        uses: actions/upload-artifact@v3
-        with:
-          name: benchmark-results
-          path: backend/results.json
-```
-
-## Adding New Tools
-
-To benchmark a new secret detection tool:
-
-1. Create module in `toolbox/modules/secret_detection/`
-2. Register in `__init__.py`
-3. Add to `compare_tools.py` in `run_all_tools()`
-4. Add test in `bench_comparison.py`
-
-## Interpreting Results
-
-### High Precision, Low Recall
-Tool is conservative - few false positives but misses secrets.
-**Use case**: Production environments where false positives are costly.
-
-### Low Precision, High Recall
-Tool is aggressive - finds most secrets but many false positives.
-**Use case**: Initial scans where manual review is acceptable.
-
-### Balanced (High F1)
-Tool has good balance of precision and recall.
-**Use case**: General purpose scanning.
-
-### Fast Execution
-Suitable for CI/CD pipelines and pre-commit hooks.
-
-### Slow but Accurate
-Better for comprehensive security audits.
-
-## Best Practices
-
-1. **Use multiple tools**: Each has strengths/weaknesses
-2. **Combine results**: Union of all findings for maximum coverage
-3. **Filter intelligently**: Remove known false positives
-4. **Verify findings**: Check if secrets are actually valid
-5. **Track over time**: Monitor precision/recall trends
-6. **Update regularly**: Patterns evolve, tools improve
-
-## Troubleshooting
-
-### GitGuardian Tests Skipped
- Set `GITGUARDIAN_API_KEY` environment variable
- Use `--gitguardian-api-key` flag
-
-### LLM Tests Skipped
- Ensure A2A agent is running
- Check agent URL in config
- Use `--llm-enabled` flag
-
-### Low Recall
- Check if ground truth is up to date
- Verify tool is configured correctly
- Review missed secrets manually
-
-### High False Positives
- Adjust tool sensitivity
- Add exclusion patterns
- Review false positive list
--- a/backend/benchmarks/by_category/secret_detection/bench_comparison.py
+++ b/backend/benchmarks/by_category/secret_detection/bench_comparison.py
@@ -1,285 +0,0 @@
-"""
-Secret Detection Tool Comparison Benchmark
-
-Compares Gitleaks, TruffleHog, and LLM-based detection
-on the vulnerable_app ground truth dataset via workflow execution.
-"""
-
-import pytest
-import json
-from pathlib import Path
-from typing import Dict, List, Any
-import sys
-
-sys.path.insert(0, str(Path(__file__).resolve().parents[3] / "sdk" / "src"))
-
-from fuzzforge_sdk import FuzzForgeClient
-from benchmarks.category_configs import ModuleCategory, get_threshold
-
-
-@pytest.fixture
-def target_path():
-    """Path to vulnerable_app"""
-    path = Path(__file__).parent.parent.parent.parent.parent / "test_projects" / "vulnerable_app"
-    assert path.exists(), f"Target not found: {path}"
-    return path
-
-
-@pytest.fixture
-def ground_truth(target_path):
-    """Load ground truth data"""
-    metadata_file = target_path / "SECRETS_GROUND_TRUTH.json"
-    assert metadata_file.exists(), f"Ground truth not found: {metadata_file}"
-
-    with open(metadata_file) as f:
-        return json.load(f)
-
-
-@pytest.fixture
-def sdk_client():
-    """FuzzForge SDK client"""
-    client = FuzzForgeClient(base_url="http://localhost:8000")
-    yield client
-    client.close()
-
-
-def calculate_metrics(sarif_results: List[Dict], ground_truth: Dict[str, Any]) -> Dict[str, float]:
-    """Calculate precision, recall, and F1 score"""
-
-    # Extract expected secrets from ground truth
-    expected_secrets = set()
-    for file_info in ground_truth["files"]:
-        if "secrets" in file_info:
-            for secret in file_info["secrets"]:
-                expected_secrets.add((file_info["filename"], secret["line"]))
-
-    # Extract detected secrets from SARIF
-    detected_secrets = set()
-    for result in sarif_results:
-        locations = result.get("locations", [])
-        for location in locations:
-            physical_location = location.get("physicalLocation", {})
-            artifact_location = physical_location.get("artifactLocation", {})
-            region = physical_location.get("region", {})
-
-            uri = artifact_location.get("uri", "")
-            line = region.get("startLine", 0)
-
-            if uri and line:
-                file_path = Path(uri)
-                filename = file_path.name
-                detected_secrets.add((filename, line))
-                # Also try with relative path
-                if len(file_path.parts) > 1:
-                    rel_path = str(Path(*file_path.parts[-2:]))
-                    detected_secrets.add((rel_path, line))
-
-    # Calculate metrics
-    true_positives = len(expected_secrets & detected_secrets)
-    false_positives = len(detected_secrets - expected_secrets)
-    false_negatives = len(expected_secrets - detected_secrets)
-
-    precision = true_positives / (true_positives + false_positives) if (true_positives + false_positives) > 0 else 0
-    recall = true_positives / (true_positives + false_negatives) if (true_positives + false_negatives) > 0 else 0
-    f1 = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0
-
-    return {
-        "precision": precision,
-        "recall": recall,
-        "f1": f1,
-        "true_positives": true_positives,
-        "false_positives": false_positives,
-        "false_negatives": false_negatives
-    }
-
-
-class TestSecretDetectionComparison:
-    """Compare all secret detection tools"""
-
-    @pytest.mark.benchmark(group="secret_detection")
-    def test_gitleaks_workflow(self, benchmark, sdk_client, target_path, ground_truth):
-        """Benchmark Gitleaks workflow accuracy and performance"""
-
-        def run_gitleaks():
-            run = sdk_client.submit_workflow_with_upload(
-                workflow_name="gitleaks_detection",
-                target_path=str(target_path),
-                parameters={
-                    "scan_mode": "detect",
-                    "no_git": True,
-                    "redact": False
-                }
-            )
-
-            result = sdk_client.wait_for_completion(run.run_id, timeout=300)
-            assert result.status == "completed", f"Workflow failed: {result.status}"
-
-            findings = sdk_client.get_run_findings(run.run_id)
-            assert findings and findings.sarif, "No findings returned"
-
-            return findings
-
-        findings = benchmark(run_gitleaks)
-
-        # Extract SARIF results
-        sarif_results = []
-        for run_data in findings.sarif.get("runs", []):
-            sarif_results.extend(run_data.get("results", []))
-
-        # Calculate metrics
-        metrics = calculate_metrics(sarif_results, ground_truth)
-
-        # Log results
-        print(f"\n=== Gitleaks Workflow Results ===")
-        print(f"Precision: {metrics['precision']:.2%}")
-        print(f"Recall: {metrics['recall']:.2%}")
-        print(f"F1 Score: {metrics['f1']:.2%}")
-        print(f"True Positives: {metrics['true_positives']}")
-        print(f"False Positives: {metrics['false_positives']}")
-        print(f"False Negatives: {metrics['false_negatives']}")
-        print(f"Findings Count: {len(sarif_results)}")
-
-        # Assert meets thresholds
-        min_precision = get_threshold(ModuleCategory.SECRET_DETECTION, "min_precision")
-        min_recall = get_threshold(ModuleCategory.SECRET_DETECTION, "min_recall")
-
-        assert metrics['precision'] >= min_precision, \
-            f"Precision {metrics['precision']:.2%} below threshold {min_precision:.2%}"
-        assert metrics['recall'] >= min_recall, \
-            f"Recall {metrics['recall']:.2%} below threshold {min_recall:.2%}"
-
-    @pytest.mark.benchmark(group="secret_detection")
-    def test_trufflehog_workflow(self, benchmark, sdk_client, target_path, ground_truth):
-        """Benchmark TruffleHog workflow accuracy and performance"""
-
-        def run_trufflehog():
-            run = sdk_client.submit_workflow_with_upload(
-                workflow_name="trufflehog_detection",
-                target_path=str(target_path),
-                parameters={
-                    "verify": False,
-                    "max_depth": 10
-                }
-            )
-
-            result = sdk_client.wait_for_completion(run.run_id, timeout=300)
-            assert result.status == "completed", f"Workflow failed: {result.status}"
-
-            findings = sdk_client.get_run_findings(run.run_id)
-            assert findings and findings.sarif, "No findings returned"
-
-            return findings
-
-        findings = benchmark(run_trufflehog)
-
-        sarif_results = []
-        for run_data in findings.sarif.get("runs", []):
-            sarif_results.extend(run_data.get("results", []))
-
-        metrics = calculate_metrics(sarif_results, ground_truth)
-
-        print(f"\n=== TruffleHog Workflow Results ===")
-        print(f"Precision: {metrics['precision']:.2%}")
-        print(f"Recall: {metrics['recall']:.2%}")
-        print(f"F1 Score: {metrics['f1']:.2%}")
-        print(f"True Positives: {metrics['true_positives']}")
-        print(f"False Positives: {metrics['false_positives']}")
-        print(f"False Negatives: {metrics['false_negatives']}")
-        print(f"Findings Count: {len(sarif_results)}")
-
-        min_precision = get_threshold(ModuleCategory.SECRET_DETECTION, "min_precision")
-        min_recall = get_threshold(ModuleCategory.SECRET_DETECTION, "min_recall")
-
-        assert metrics['precision'] >= min_precision
-        assert metrics['recall'] >= min_recall
-
-    @pytest.mark.benchmark(group="secret_detection")
-    @pytest.mark.parametrize("model", [
-        "gpt-4o-mini",
-        "gpt-4o",
-        "claude-3-5-sonnet-20241022"
-    ])
-    def test_llm_workflow(self, benchmark, sdk_client, target_path, ground_truth, model):
-        """Benchmark LLM workflow with different models"""
-
-        def run_llm():
-            provider = "openai" if "gpt" in model else "anthropic"
-
-            run = sdk_client.submit_workflow_with_upload(
-                workflow_name="llm_secret_detection",
-                target_path=str(target_path),
-                parameters={
-                    "agent_url": "http://fuzzforge-task-agent:8000/a2a/litellm_agent",
-                    "llm_model": model,
-                    "llm_provider": provider,
-                    "max_files": 20,
-                    "timeout": 60
-                }
-            )
-
-            result = sdk_client.wait_for_completion(run.run_id, timeout=300)
-            assert result.status == "completed", f"Workflow failed: {result.status}"
-
-            findings = sdk_client.get_run_findings(run.run_id)
-            assert findings and findings.sarif, "No findings returned"
-
-            return findings
-
-        findings = benchmark(run_llm)
-
-        sarif_results = []
-        for run_data in findings.sarif.get("runs", []):
-            sarif_results.extend(run_data.get("results", []))
-
-        metrics = calculate_metrics(sarif_results, ground_truth)
-
-        print(f"\n=== LLM ({model}) Workflow Results ===")
-        print(f"Precision: {metrics['precision']:.2%}")
-        print(f"Recall: {metrics['recall']:.2%}")
-        print(f"F1 Score: {metrics['f1']:.2%}")
-        print(f"True Positives: {metrics['true_positives']}")
-        print(f"False Positives: {metrics['false_positives']}")
-        print(f"False Negatives: {metrics['false_negatives']}")
-        print(f"Findings Count: {len(sarif_results)}")
-
-
-class TestSecretDetectionPerformance:
-    """Performance benchmarks for each tool"""
-
-    @pytest.mark.benchmark(group="secret_detection")
-    def test_gitleaks_performance(self, benchmark, sdk_client, target_path):
-        """Benchmark Gitleaks workflow execution speed"""
-
-        def run():
-            run = sdk_client.submit_workflow_with_upload(
-                workflow_name="gitleaks_detection",
-                target_path=str(target_path),
-                parameters={"scan_mode": "detect", "no_git": True}
-            )
-            result = sdk_client.wait_for_completion(run.run_id, timeout=300)
-            return result
-
-        result = benchmark(run)
-
-        max_time = get_threshold(ModuleCategory.SECRET_DETECTION, "max_execution_time_small")
-        # Note: Workflow execution time includes orchestration overhead
-        # so we allow 2x the module threshold
-        assert result.execution_time < max_time * 2
-
-    @pytest.mark.benchmark(group="secret_detection")
-    def test_trufflehog_performance(self, benchmark, sdk_client, target_path):
-        """Benchmark TruffleHog workflow execution speed"""
-
-        def run():
-            run = sdk_client.submit_workflow_with_upload(
-                workflow_name="trufflehog_detection",
-                target_path=str(target_path),
-                parameters={"verify": False}
-            )
-            result = sdk_client.wait_for_completion(run.run_id, timeout=300)
-            return result
-
-        result = benchmark(run)
-
-        max_time = get_threshold(ModuleCategory.SECRET_DETECTION, "max_execution_time_small")
-        assert result.execution_time < max_time * 2
--- a/backend/benchmarks/by_category/secret_detection/compare_tools.py
+++ b/backend/benchmarks/by_category/secret_detection/compare_tools.py
@@ -1,547 +0,0 @@
-"""
-Secret Detection Tools Comparison Report Generator
-
-Generates comparison reports showing strengths/weaknesses of each tool.
-Uses workflow execution via SDK to test complete pipeline.
-"""
-
-import asyncio
-import json
-import time
-from pathlib import Path
-from typing import Dict, List, Any, Optional
-from dataclasses import dataclass, asdict
-import sys
-
-sys.path.insert(0, str(Path(__file__).resolve().parents[3] / "sdk" / "src"))
-
-from fuzzforge_sdk import FuzzForgeClient
-
-
-@dataclass
-class ToolResult:
-    """Results from running a tool"""
-    tool_name: str
-    execution_time: float
-    findings_count: int
-    findings_by_file: Dict[str, List[int]]  # file_path -> [line_numbers]
-    unique_files: int
-    unique_locations: int  # unique (file, line) pairs
-    secret_density: float  # average secrets per file
-    file_types: Dict[str, int]  # file extension -> count of files with secrets
-
-
-class SecretDetectionComparison:
-    """Compare secret detection tools"""
-
-    def __init__(self, target_path: Path, api_url: str = "http://localhost:8000"):
-        self.target_path = target_path
-        self.client = FuzzForgeClient(base_url=api_url)
-
-    async def run_workflow(self, workflow_name: str, tool_name: str, config: Dict[str, Any] = None) -> Optional[ToolResult]:
-        """Run a workflow and extract findings"""
-        print(f"\nRunning {tool_name} workflow...")
-
-        start_time = time.time()
-
-        try:
-            # Start workflow
-            run = self.client.submit_workflow_with_upload(
-                workflow_name=workflow_name,
-                target_path=str(self.target_path),
-                parameters=config or {}
-            )
-
-            print(f"  Started run: {run.run_id}")
-
-            # Wait for completion (up to 30 minutes for slow LLMs)
-            print(f"  Waiting for completion...")
-            result = self.client.wait_for_completion(run.run_id, timeout=1800)
-
-            execution_time = time.time() - start_time
-
-            if result.status != "COMPLETED":
-                print(f"❌ {tool_name} workflow failed: {result.status}")
-                return None
-
-            # Get findings from SARIF
-            findings = self.client.get_run_findings(run.run_id)
-
-            if not findings or not findings.sarif:
-                print(f"⚠️  {tool_name} produced no findings")
-                return None
-
-            # Extract results from SARIF and group by file
-            findings_by_file = {}
-            unique_locations = set()
-
-            for run_data in findings.sarif.get("runs", []):
-                for result in run_data.get("results", []):
-                    locations = result.get("locations", [])
-                    for location in locations:
-                        physical_location = location.get("physicalLocation", {})
-                        artifact_location = physical_location.get("artifactLocation", {})
-                        region = physical_location.get("region", {})
-
-                        uri = artifact_location.get("uri", "")
-                        line = region.get("startLine", 0)
-
-                        if uri and line:
-                            if uri not in findings_by_file:
-                                findings_by_file[uri] = []
-                            findings_by_file[uri].append(line)
-                            unique_locations.add((uri, line))
-
-            # Sort line numbers for each file
-            for file_path in findings_by_file:
-                findings_by_file[file_path] = sorted(set(findings_by_file[file_path]))
-
-            # Calculate file type distribution
-            file_types = {}
-            for file_path in findings_by_file:
-                ext = Path(file_path).suffix or Path(file_path).name  # Use full name for files like .env
-                if ext.startswith('.'):
-                    file_types[ext] = file_types.get(ext, 0) + 1
-                else:
-                    file_types['[no extension]'] = file_types.get('[no extension]', 0) + 1
-
-            # Calculate secret density
-            secret_density = len(unique_locations) / len(findings_by_file) if findings_by_file else 0
-
-            print(f"  ✓ Found {len(unique_locations)} secrets in {len(findings_by_file)} files (avg {secret_density:.1f} per file)")
-
-            return ToolResult(
-                tool_name=tool_name,
-                execution_time=execution_time,
-                findings_count=len(unique_locations),
-                findings_by_file=findings_by_file,
-                unique_files=len(findings_by_file),
-                unique_locations=len(unique_locations),
-                secret_density=secret_density,
-                file_types=file_types
-            )
-
-        except Exception as e:
-            print(f"❌ {tool_name} error: {e}")
-            return None
-
-
-    async def run_all_tools(self, llm_models: List[str] = None) -> List[ToolResult]:
-        """Run all available tools"""
-        results = []
-
-        if llm_models is None:
-            llm_models = ["gpt-4o-mini"]
-
-        # Gitleaks
-        result = await self.run_workflow("gitleaks_detection", "Gitleaks", {
-            "scan_mode": "detect",
-            "no_git": True,
-            "redact": False
-        })
-        if result:
-            results.append(result)
-
-        # TruffleHog
-        result = await self.run_workflow("trufflehog_detection", "TruffleHog", {
-            "verify": False,
-            "max_depth": 10
-        })
-        if result:
-            results.append(result)
-
-        # LLM Detector with multiple models
-        for model in llm_models:
-            tool_name = f"LLM ({model})"
-            result = await self.run_workflow("llm_secret_detection", tool_name, {
-                "agent_url": "http://fuzzforge-task-agent:8000/a2a/litellm_agent",
-                "llm_model": model,
-                "llm_provider": "openai" if "gpt" in model else "anthropic",
-                "max_files": 20,
-                "timeout": 60,
-                "file_patterns": [
-                    "*.py", "*.js", "*.ts", "*.java", "*.go", "*.env", "*.yaml", "*.yml",
-                    "*.json", "*.xml", "*.ini", "*.sql", "*.properties", "*.sh", "*.bat",
-                    "*.config", "*.conf", "*.toml", "*id_rsa*", "*.txt"
-                ]
-            })
-            if result:
-                results.append(result)
-
-        return results
-
-    def _calculate_agreement_matrix(self, results: List[ToolResult]) -> Dict[str, Dict[str, int]]:
-        """Calculate overlap matrix showing common secrets between tool pairs"""
-        matrix = {}
-
-        for i, result1 in enumerate(results):
-            matrix[result1.tool_name] = {}
-            # Convert to set of (file, line) tuples
-            secrets1 = set()
-            for file_path, lines in result1.findings_by_file.items():
-                for line in lines:
-                    secrets1.add((file_path, line))
-
-            for result2 in results:
-                secrets2 = set()
-                for file_path, lines in result2.findings_by_file.items():
-                    for line in lines:
-                        secrets2.add((file_path, line))
-
-                # Count common secrets
-                common = len(secrets1 & secrets2)
-                matrix[result1.tool_name][result2.tool_name] = common
-
-        return matrix
-
-    def _get_per_file_comparison(self, results: List[ToolResult]) -> Dict[str, Dict[str, int]]:
-        """Get per-file breakdown of findings across all tools"""
-        all_files = set()
-        for result in results:
-            all_files.update(result.findings_by_file.keys())
-
-        comparison = {}
-        for file_path in sorted(all_files):
-            comparison[file_path] = {}
-            for result in results:
-                comparison[file_path][result.tool_name] = len(result.findings_by_file.get(file_path, []))
-
-        return comparison
-
-    def _get_agreement_stats(self, results: List[ToolResult]) -> Dict[int, int]:
-        """Calculate how many secrets are found by 1, 2, 3, or all tools"""
-        # Collect all unique (file, line) pairs across all tools
-        all_secrets = {}  # (file, line) -> list of tools that found it
-
-        for result in results:
-            for file_path, lines in result.findings_by_file.items():
-                for line in lines:
-                    key = (file_path, line)
-                    if key not in all_secrets:
-                        all_secrets[key] = []
-                    all_secrets[key].append(result.tool_name)
-
-        # Count by number of tools
-        agreement_counts = {}
-        for secret, tools in all_secrets.items():
-            count = len(set(tools))  # Unique tools
-            agreement_counts[count] = agreement_counts.get(count, 0) + 1
-
-        return agreement_counts
-
-    def generate_markdown_report(self, results: List[ToolResult]) -> str:
-        """Generate markdown comparison report"""
-        report = []
-        report.append("# Secret Detection Tools Comparison\n")
-        report.append(f"**Target**: {self.target_path.name}")
-        report.append(f"**Tools**: {', '.join([r.tool_name for r in results])}\n")
-
-        # Summary table with extended metrics
-        report.append("\n## Summary\n")
-        report.append("| Tool | Secrets | Files | Avg/File | Time (s) |")
-        report.append("|------|---------|-------|----------|----------|")
-
-        for result in results:
-            report.append(
-                f"| {result.tool_name} | "
-                f"{result.findings_count} | "
-                f"{result.unique_files} | "
-                f"{result.secret_density:.1f} | "
-                f"{result.execution_time:.2f} |"
-            )
-
-        # Agreement Analysis
-        agreement_stats = self._get_agreement_stats(results)
-        report.append("\n## Agreement Analysis\n")
-        report.append("Secrets found by different numbers of tools:\n")
-        for num_tools in sorted(agreement_stats.keys(), reverse=True):
-            count = agreement_stats[num_tools]
-            if num_tools == len(results):
-                report.append(f"- **All {num_tools} tools agree**: {count} secrets")
-            elif num_tools == 1:
-                report.append(f"- **Only 1 tool found**: {count} secrets")
-            else:
-                report.append(f"- **{num_tools} tools agree**: {count} secrets")
-
-        # Agreement Matrix
-        agreement_matrix = self._calculate_agreement_matrix(results)
-        report.append("\n## Tool Agreement Matrix\n")
-        report.append("Number of common secrets found by tool pairs:\n")
-
-        # Header row
-        header = "| Tool |"
-        separator = "|------|"
-        for result in results:
-            short_name = result.tool_name.replace("LLM (", "").replace(")", "")
-            header += f" {short_name} |"
-            separator += "------|"
-        report.append(header)
-        report.append(separator)
-
-        # Data rows
-        for result in results:
-            short_name = result.tool_name.replace("LLM (", "").replace(")", "")
-            row = f"| {short_name} |"
-            for result2 in results:
-                count = agreement_matrix[result.tool_name][result2.tool_name]
-                row += f" {count} |"
-            report.append(row)
-
-        # Per-File Comparison
-        per_file = self._get_per_file_comparison(results)
-        report.append("\n## Per-File Detailed Comparison\n")
-        report.append("Secrets found per file by each tool:\n")
-
-        # Header
-        header = "| File |"
-        separator = "|------|"
-        for result in results:
-            short_name = result.tool_name.replace("LLM (", "").replace(")", "")
-            header += f" {short_name} |"
-            separator += "------|"
-        header += " Total |"
-        separator += "------|"
-        report.append(header)
-        report.append(separator)
-
-        # Show top 15 files by total findings
-        file_totals = [(f, sum(counts.values())) for f, counts in per_file.items()]
-        file_totals.sort(key=lambda x: x[1], reverse=True)
-
-        for file_path, total in file_totals[:15]:
-            row = f"| `{file_path}` |"
-            for result in results:
-                count = per_file[file_path].get(result.tool_name, 0)
-                row += f" {count} |"
-            row += f" **{total}** |"
-            report.append(row)
-
-        if len(file_totals) > 15:
-            report.append(f"| ... and {len(file_totals) - 15} more files | ... | ... | ... | ... | ... |")
-
-        # File Type Breakdown
-        report.append("\n## File Type Breakdown\n")
-        all_extensions = set()
-        for result in results:
-            all_extensions.update(result.file_types.keys())
-
-        if all_extensions:
-            header = "| Type |"
-            separator = "|------|"
-            for result in results:
-                short_name = result.tool_name.replace("LLM (", "").replace(")", "")
-                header += f" {short_name} |"
-                separator += "------|"
-            report.append(header)
-            report.append(separator)
-
-            for ext in sorted(all_extensions):
-                row = f"| `{ext}` |"
-                for result in results:
-                    count = result.file_types.get(ext, 0)
-                    row += f" {count} files |"
-                report.append(row)
-
-        # File analysis
-        report.append("\n## Files Analyzed\n")
-
-        # Collect all unique files across all tools
-        all_files = set()
-        for result in results:
-            all_files.update(result.findings_by_file.keys())
-
-        report.append(f"**Total unique files with secrets**: {len(all_files)}\n")
-
-        for result in results:
-            report.append(f"\n### {result.tool_name}\n")
-            report.append(f"Found secrets in **{result.unique_files} files**:\n")
-
-            # Sort files by number of findings (descending)
-            sorted_files = sorted(
-                result.findings_by_file.items(),
-                key=lambda x: len(x[1]),
-                reverse=True
-            )
-
-            # Show top 10 files
-            for file_path, lines in sorted_files[:10]:
-                report.append(f"- `{file_path}`: {len(lines)} secrets (lines: {', '.join(map(str, lines[:5]))}{'...' if len(lines) > 5 else ''})")
-
-            if len(sorted_files) > 10:
-                report.append(f"- ... and {len(sorted_files) - 10} more files")
-
-        # Overlap analysis
-        if len(results) >= 2:
-            report.append("\n## Overlap Analysis\n")
-
-            # Find common files
-            file_sets = [set(r.findings_by_file.keys()) for r in results]
-            common_files = set.intersection(*file_sets) if file_sets else set()
-
-            if common_files:
-                report.append(f"\n**Files found by all tools** ({len(common_files)}):\n")
-                for file_path in sorted(common_files)[:10]:
-                    report.append(f"- `{file_path}`")
-            else:
-                report.append("\n**No files were found by all tools**\n")
-
-            # Find tool-specific files
-            for i, result in enumerate(results):
-                unique_to_tool = set(result.findings_by_file.keys())
-                for j, other_result in enumerate(results):
-                    if i != j:
-                        unique_to_tool -= set(other_result.findings_by_file.keys())
-
-                if unique_to_tool:
-                    report.append(f"\n**Unique to {result.tool_name}** ({len(unique_to_tool)} files):\n")
-                    for file_path in sorted(unique_to_tool)[:5]:
-                        report.append(f"- `{file_path}`")
-                    if len(unique_to_tool) > 5:
-                        report.append(f"- ... and {len(unique_to_tool) - 5} more")
-
-        # Ground Truth Analysis (if available)
-        ground_truth_path = Path(__file__).parent / "secret_detection_benchmark_GROUND_TRUTH.json"
-        if ground_truth_path.exists():
-            report.append("\n## Ground Truth Analysis\n")
-            try:
-                with open(ground_truth_path) as f:
-                    gt_data = json.load(f)
-
-                gt_total = gt_data.get("total_secrets", 30)
-                report.append(f"**Expected secrets**: {gt_total} (documented in ground truth)\n")
-
-                # Build ground truth set of (file, line) tuples
-                gt_secrets = set()
-                for secret in gt_data.get("secrets", []):
-                    gt_secrets.add((secret["file"], secret["line"]))
-
-                report.append("### Tool Performance vs Ground Truth\n")
-                report.append("| Tool | Found | Expected | Recall | Extra Findings |")
-                report.append("|------|-------|----------|--------|----------------|")
-
-                for result in results:
-                    # Build tool findings set
-                    tool_secrets = set()
-                    for file_path, lines in result.findings_by_file.items():
-                        for line in lines:
-                            tool_secrets.add((file_path, line))
-
-                    # Calculate metrics
-                    true_positives = len(gt_secrets & tool_secrets)
-                    recall = (true_positives / gt_total * 100) if gt_total > 0 else 0
-                    extra = len(tool_secrets - gt_secrets)
-
-                    report.append(
-                        f"| {result.tool_name} | "
-                        f"{result.findings_count} | "
-                        f"{gt_total} | "
-                        f"{recall:.1f}% | "
-                        f"{extra} |"
-                    )
-
-                # Analyze LLM extra findings
-                llm_results = [r for r in results if "LLM" in r.tool_name]
-                if llm_results:
-                    report.append("\n### LLM Extra Findings Explanation\n")
-                    report.append("LLMs may find more than 30 secrets because they detect:\n")
-                    report.append("- **Split secret components**: Each part of `DB_PASS_PART1 + PART2 + PART3` counted separately")
-                    report.append("- **Join operations**: Lines like `''.join(AWS_SECRET_CHARS)` flagged as additional exposure")
-                    report.append("- **Decoding functions**: Code that reveals secrets (e.g., `base64.b64decode()`, `codecs.decode()`)")
-                    report.append("- **Comment identifiers**: Lines marking secret locations without plaintext values")
-                    report.append("\nThese are *technically correct* detections of secret exposure points, not false positives.")
-                    report.append("The ground truth documents 30 'primary' secrets, but the codebase has additional derivative exposures.\n")
-
-            except Exception as e:
-                report.append(f"*Could not load ground truth: {e}*\n")
-
-        # Performance summary
-        if results:
-            report.append("\n## Performance Summary\n")
-            most_findings = max(results, key=lambda r: r.findings_count)
-            most_files = max(results, key=lambda r: r.unique_files)
-            fastest = min(results, key=lambda r: r.execution_time)
-
-            report.append(f"- **Most secrets found**: {most_findings.tool_name} ({most_findings.findings_count} secrets)")
-            report.append(f"- **Most files covered**: {most_files.tool_name} ({most_files.unique_files} files)")
-            report.append(f"- **Fastest**: {fastest.tool_name} ({fastest.execution_time:.2f}s)")
-
-        return "\n".join(report)
-
-    def save_json_report(self, results: List[ToolResult], output_path: Path):
-        """Save results as JSON"""
-        data = {
-            "target_path": str(self.target_path),
-            "results": [asdict(r) for r in results]
-        }
-
-        with open(output_path, 'w') as f:
-            json.dump(data, f, indent=2)
-
-        print(f"\n✅ JSON report saved to: {output_path}")
-
-    def cleanup(self):
-        """Cleanup SDK client"""
-        self.client.close()
-
-
-async def main():
-    """Run comparison and generate reports"""
-    # Get target path (secret_detection_benchmark)
-    target_path = Path(__file__).parent.parent.parent.parent.parent / "test_projects" / "secret_detection_benchmark"
-
-    if not target_path.exists():
-        print(f"❌ Target not found at: {target_path}")
-        return 1
-
-    print("=" * 80)
-    print("Secret Detection Tools Comparison")
-    print("=" * 80)
-    print(f"Target: {target_path}")
-
-    # LLM models to test
-    llm_models = [
-        "gpt-4o-mini",
-        "gpt-5-mini"
-    ]
-    print(f"LLM models: {', '.join(llm_models)}\n")
-
-    # Run comparison
-    comparison = SecretDetectionComparison(target_path)
-
-    try:
-        results = await comparison.run_all_tools(llm_models=llm_models)
-
-        if not results:
-            print("❌ No tools ran successfully")
-            return 1
-
-        # Generate reports
-        print("\n" + "=" * 80)
-        markdown_report = comparison.generate_markdown_report(results)
-        print(markdown_report)
-
-        # Save reports
-        output_dir = Path(__file__).parent / "results"
-        output_dir.mkdir(exist_ok=True)
-
-        markdown_path = output_dir / "comparison_report.md"
-        with open(markdown_path, 'w') as f:
-            f.write(markdown_report)
-        print(f"\n✅ Markdown report saved to: {markdown_path}")
-
-        json_path = output_dir / "comparison_results.json"
-        comparison.save_json_report(results, json_path)
-
-        print("\n" + "=" * 80)
-        print("✅ Comparison complete!")
-        print("=" * 80)
-
-        return 0
-
-    finally:
-        comparison.cleanup()
-
-
-if __name__ == "__main__":
-    exit_code = asyncio.run(main())
-    sys.exit(exit_code)
--- a/backend/benchmarks/by_category/secret_detection/results/comparison_report.md
+++ b/backend/benchmarks/by_category/secret_detection/results/comparison_report.md
@@ -1,169 +0,0 @@
-# Secret Detection Tools Comparison
-
-**Target**: secret_detection_benchmark
-**Tools**: Gitleaks, TruffleHog, LLM (gpt-4o-mini), LLM (gpt-5-mini)
-
-
-## Summary
-
-| Tool | Secrets | Files | Avg/File | Time (s) |
-|------|---------|-------|----------|----------|
-| Gitleaks | 12 | 10 | 1.2 | 5.18 |
-| TruffleHog | 1 | 1 | 1.0 | 5.06 |
-| LLM (gpt-4o-mini) | 30 | 15 | 2.0 | 296.85 |
-| LLM (gpt-5-mini) | 41 | 16 | 2.6 | 618.55 |
-
-## Agreement Analysis
-
-Secrets found by different numbers of tools:
-
- **3 tools agree**: 6 secrets
- **2 tools agree**: 22 secrets
- **Only 1 tool found**: 22 secrets
-
-## Tool Agreement Matrix
-
-Number of common secrets found by tool pairs:
-
-| Tool | Gitleaks | TruffleHog | gpt-4o-mini | gpt-5-mini |
-|------|------|------|------|------|
-| Gitleaks | 12 | 0 | 7 | 11 |
-| TruffleHog | 0 | 1 | 0 | 0 |
-| gpt-4o-mini | 7 | 0 | 30 | 22 |
-| gpt-5-mini | 11 | 0 | 22 | 41 |
-
-## Per-File Detailed Comparison
-
-Secrets found per file by each tool:
-
-| File | Gitleaks | TruffleHog | gpt-4o-mini | gpt-5-mini | Total |
-|------|------|------|------|------|------|
-| `src/obfuscated.py` | 2 | 0 | 6 | 7 | **15** |
-| `src/advanced.js` | 0 | 0 | 5 | 7 | **12** |
-| `src/config.py` | 1 | 0 | 0 | 6 | **7** |
-| `.env` | 1 | 0 | 2 | 2 | **5** |
-| `config/keys.yaml` | 1 | 0 | 2 | 2 | **5** |
-| `config/oauth.json` | 1 | 0 | 2 | 2 | **5** |
-| `config/settings.py` | 2 | 0 | 0 | 3 | **5** |
-| `scripts/deploy.sh` | 1 | 0 | 2 | 2 | **5** |
-| `config/legacy.ini` | 0 | 0 | 2 | 2 | **4** |
-| `src/Crypto.go` | 0 | 0 | 2 | 2 | **4** |
-| `config/app.properties` | 1 | 0 | 1 | 1 | **3** |
-| `config/database.yaml` | 0 | 1 | 1 | 1 | **3** |
-| `src/Main.java` | 1 | 0 | 1 | 1 | **3** |
-| `id_rsa` | 1 | 0 | 1 | 0 | **2** |
-| `scripts/webhook.js` | 0 | 0 | 1 | 1 | **2** |
-| ... and 2 more files | ... | ... | ... | ... | ... |
-
-## File Type Breakdown
-
-| Type | Gitleaks | TruffleHog | gpt-4o-mini | gpt-5-mini |
-|------|------|------|------|------|
-| `.env` | 1 files | 0 files | 1 files | 1 files |
-| `.go` | 0 files | 0 files | 1 files | 1 files |
-| `.ini` | 0 files | 0 files | 1 files | 1 files |
-| `.java` | 1 files | 0 files | 1 files | 1 files |
-| `.js` | 0 files | 0 files | 2 files | 2 files |
-| `.json` | 1 files | 0 files | 1 files | 1 files |
-| `.properties` | 1 files | 0 files | 1 files | 1 files |
-| `.py` | 3 files | 0 files | 2 files | 4 files |
-| `.sh` | 1 files | 0 files | 1 files | 1 files |
-| `.sql` | 0 files | 0 files | 1 files | 1 files |
-| `.yaml` | 1 files | 1 files | 2 files | 2 files |
-| `[no extension]` | 1 files | 0 files | 1 files | 0 files |
-
-## Files Analyzed
-
-**Total unique files with secrets**: 17
-
-
-### Gitleaks
-
-Found secrets in **10 files**:
-
- `config/settings.py`: 2 secrets (lines: 6, 9)
- `src/obfuscated.py`: 2 secrets (lines: 7, 17)
- `.env`: 1 secrets (lines: 3)
- `config/app.properties`: 1 secrets (lines: 6)
- `config/keys.yaml`: 1 secrets (lines: 6)
- `id_rsa`: 1 secrets (lines: 1)
- `config/oauth.json`: 1 secrets (lines: 4)
- `scripts/deploy.sh`: 1 secrets (lines: 5)
- `src/Main.java`: 1 secrets (lines: 5)
- `src/config.py`: 1 secrets (lines: 7)
-
-### TruffleHog
-
-Found secrets in **1 files**:
-
- `config/database.yaml`: 1 secrets (lines: 6)
-
-### LLM (gpt-4o-mini)
-
-Found secrets in **15 files**:
-
- `src/obfuscated.py`: 6 secrets (lines: 7, 10, 13, 18, 20...)
- `src/advanced.js`: 5 secrets (lines: 4, 7, 10, 12, 17)
- `src/Crypto.go`: 2 secrets (lines: 6, 10)
- `.env`: 2 secrets (lines: 3, 4)
- `config/keys.yaml`: 2 secrets (lines: 6, 12)
- `config/oauth.json`: 2 secrets (lines: 3, 4)
- `config/legacy.ini`: 2 secrets (lines: 4, 7)
- `scripts/deploy.sh`: 2 secrets (lines: 6, 9)
- `src/app.py`: 1 secrets (lines: 7)
- `scripts/webhook.js`: 1 secrets (lines: 4)
- ... and 5 more files
-
-### LLM (gpt-5-mini)
-
-Found secrets in **16 files**:
-
- `src/obfuscated.py`: 7 secrets (lines: 7, 10, 13, 14, 17...)
- `src/advanced.js`: 7 secrets (lines: 4, 7, 9, 10, 13...)
- `src/config.py`: 6 secrets (lines: 7, 10, 13, 14, 15...)
- `config/settings.py`: 3 secrets (lines: 6, 9, 20)
- `src/Crypto.go`: 2 secrets (lines: 10, 15)
- `.env`: 2 secrets (lines: 3, 4)
- `config/keys.yaml`: 2 secrets (lines: 6, 12)
- `config/oauth.json`: 2 secrets (lines: 3, 4)
- `config/legacy.ini`: 2 secrets (lines: 3, 7)
- `scripts/deploy.sh`: 2 secrets (lines: 5, 10)
- ... and 6 more files
-
-## Overlap Analysis
-
-
-**No files were found by all tools**
-
-
-## Ground Truth Analysis
-
-**Expected secrets**: 32 (documented in ground truth)
-
-### Tool Performance vs Ground Truth
-
-| Tool | Found | Expected | Recall | Extra Findings |
-|------|-------|----------|--------|----------------|
-| Gitleaks | 12 | 32 | 37.5% | 0 |
-| TruffleHog | 1 | 32 | 0.0% | 1 |
-| LLM (gpt-4o-mini) | 30 | 32 | 56.2% | 12 |
-| LLM (gpt-5-mini) | 41 | 32 | 84.4% | 14 |
-
-### LLM Extra Findings Explanation
-
-LLMs may find more than 30 secrets because they detect:
-
- **Split secret components**: Each part of `DB_PASS_PART1 + PART2 + PART3` counted separately
- **Join operations**: Lines like `''.join(AWS_SECRET_CHARS)` flagged as additional exposure
- **Decoding functions**: Code that reveals secrets (e.g., `base64.b64decode()`, `codecs.decode()`)
- **Comment identifiers**: Lines marking secret locations without plaintext values
-
-These are *technically correct* detections of secret exposure points, not false positives.
-The ground truth documents 30 'primary' secrets, but the codebase has additional derivative exposures.
-
-
-## Performance Summary
-
- **Most secrets found**: LLM (gpt-5-mini) (41 secrets)
- **Most files covered**: LLM (gpt-5-mini) (16 files)
- **Fastest**: TruffleHog (5.06s)
--- a/backend/benchmarks/by_category/secret_detection/results/comparison_results.json
+++ b/backend/benchmarks/by_category/secret_detection/results/comparison_results.json
@@ -1,253 +0,0 @@
-{
-  "target_path": "/Users/tduhamel/Documents/FuzzingLabs/fuzzforge_ai/test_projects/secret_detection_benchmark",
-  "results": [
-    {
-      "tool_name": "Gitleaks",
-      "execution_time": 5.177123069763184,
-      "findings_count": 12,
-      "findings_by_file": {
-        ".env": [
-          3
-        ],
-        "config/app.properties": [
-          6
-        ],
-        "config/keys.yaml": [
-          6
-        ],
-        "id_rsa": [
-          1
-        ],
-        "config/oauth.json": [
-          4
-        ],
-        "scripts/deploy.sh": [
-          5
-        ],
-        "config/settings.py": [
-          6,
-          9
-        ],
-        "src/Main.java": [
-          5
-        ],
-        "src/obfuscated.py": [
-          7,
-          17
-        ],
-        "src/config.py": [
-          7
-        ]
-      },
-      "unique_files": 10,
-      "unique_locations": 12,
-      "secret_density": 1.2,
-      "file_types": {
-        ".env": 1,
-        ".properties": 1,
-        ".yaml": 1,
-        "[no extension]": 1,
-        ".json": 1,
-        ".sh": 1,
-        ".py": 3,
-        ".java": 1
-      }
-    },
-    {
-      "tool_name": "TruffleHog",
-      "execution_time": 5.061383008956909,
-      "findings_count": 1,
-      "findings_by_file": {
-        "config/database.yaml": [
-          6
-        ]
-      },
-      "unique_files": 1,
-      "unique_locations": 1,
-      "secret_density": 1.0,
-      "file_types": {
-        ".yaml": 1
-      }
-    },
-    {
-      "tool_name": "LLM (gpt-4o-mini)",
-      "execution_time": 296.8492441177368,
-      "findings_count": 30,
-      "findings_by_file": {
-        "src/obfuscated.py": [
-          7,
-          10,
-          13,
-          18,
-          20,
-          23
-        ],
-        "src/app.py": [
-          7
-        ],
-        "scripts/webhook.js": [
-          4
-        ],
-        "src/advanced.js": [
-          4,
-          7,
-          10,
-          12,
-          17
-        ],
-        "src/Main.java": [
-          5
-        ],
-        "src/Crypto.go": [
-          6,
-          10
-        ],
-        ".env": [
-          3,
-          4
-        ],
-        "config/keys.yaml": [
-          6,
-          12
-        ],
-        "config/database.yaml": [
-          7
-        ],
-        "config/oauth.json": [
-          3,
-          4
-        ],
-        "config/legacy.ini": [
-          4,
-          7
-        ],
-        "src/database.sql": [
-          4
-        ],
-        "config/app.properties": [
-          6
-        ],
-        "scripts/deploy.sh": [
-          6,
-          9
-        ],
-        "id_rsa": [
-          1
-        ]
-      },
-      "unique_files": 15,
-      "unique_locations": 30,
-      "secret_density": 2.0,
-      "file_types": {
-        ".py": 2,
-        ".js": 2,
-        ".java": 1,
-        ".go": 1,
-        ".env": 1,
-        ".yaml": 2,
-        ".json": 1,
-        ".ini": 1,
-        ".sql": 1,
-        ".properties": 1,
-        ".sh": 1,
-        "[no extension]": 1
-      }
-    },
-    {
-      "tool_name": "LLM (gpt-5-mini)",
-      "execution_time": 618.5462851524353,
-      "findings_count": 41,
-      "findings_by_file": {
-        "config/settings.py": [
-          6,
-          9,
-          20
-        ],
-        "src/obfuscated.py": [
-          7,
-          10,
-          13,
-          14,
-          17,
-          20,
-          23
-        ],
-        "src/app.py": [
-          7
-        ],
-        "src/config.py": [
-          7,
-          10,
-          13,
-          14,
-          15,
-          16
-        ],
-        "scripts/webhook.js": [
-          4
-        ],
-        "src/advanced.js": [
-          4,
-          7,
-          9,
-          10,
-          13,
-          17,
-          19
-        ],
-        "src/Main.java": [
-          5
-        ],
-        "src/Crypto.go": [
-          10,
-          15
-        ],
-        ".env": [
-          3,
-          4
-        ],
-        "config/keys.yaml": [
-          6,
-          12
-        ],
-        "config/database.yaml": [
-          7
-        ],
-        "config/oauth.json": [
-          3,
-          4
-        ],
-        "config/legacy.ini": [
-          3,
-          7
-        ],
-        "src/database.sql": [
-          6
-        ],
-        "config/app.properties": [
-          6
-        ],
-        "scripts/deploy.sh": [
-          5,
-          10
-        ]
-      },
-      "unique_files": 16,
-      "unique_locations": 41,
-      "secret_density": 2.5625,
-      "file_types": {
-        ".py": 4,
-        ".js": 2,
-        ".java": 1,
-        ".go": 1,
-        ".env": 1,
-        ".yaml": 2,
-        ".json": 1,
-        ".ini": 1,
-        ".sql": 1,
-        ".properties": 1,
-        ".sh": 1
-      }
-    }
-  ]
-}
--- a/backend/benchmarks/by_category/secret_detection/secret_detection_benchmark_GROUND_TRUTH.json
+++ b/backend/benchmarks/by_category/secret_detection/secret_detection_benchmark_GROUND_TRUTH.json
@@ -1,344 +0,0 @@
-{
-  "description": "Ground truth dataset for secret detection benchmarking - Exactly 32 secrets",
-  "version": "1.1.0",
-  "total_secrets": 32,
-  "secrets_by_difficulty": {
-    "easy": 12,
-    "medium": 10,
-    "hard": 10
-  },
-  "secrets": [
-    {
-      "id": 1,
-      "file": ".env",
-      "line": 3,
-      "difficulty": "easy",
-      "type": "aws_access_key",
-      "value": "AKIAIOSFODNN7EXAMPLE",
-      "severity": "critical"
-    },
-    {
-      "id": 2,
-      "file": ".env",
-      "line": 4,
-      "difficulty": "easy",
-      "type": "aws_secret_access_key",
-      "value": "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY",
-      "severity": "critical"
-    },
-    {
-      "id": 3,
-      "file": "config/settings.py",
-      "line": 6,
-      "difficulty": "easy",
-      "type": "github_pat",
-      "value": "ghp_vR8jK2mN4pQ6tX9bC3wY7zA1eF5hI8kL",
-      "severity": "critical"
-    },
-    {
-      "id": 4,
-      "file": "config/settings.py",
-      "line": 9,
-      "difficulty": "easy",
-      "type": "stripe_api_key",
-      "value": "sk_live_51MabcdefghijklmnopqrstuvwxyzABCDEF123456789",
-      "severity": "critical"
-    },
-    {
-      "id": 5,
-      "file": "config/settings.py",
-      "line": 17,
-      "difficulty": "easy",
-      "type": "database_password",
-      "value": "ProdDB_P@ssw0rd_2024_Secure!",
-      "severity": "critical"
-    },
-    {
-      "id": 6,
-      "file": "src/app.py",
-      "line": 6,
-      "difficulty": "easy",
-      "type": "jwt_secret",
-      "value": "my-super-secret-jwt-key-do-not-share-2024",
-      "severity": "critical"
-    },
-    {
-      "id": 7,
-      "file": "config/database.yaml",
-      "line": 7,
-      "difficulty": "easy",
-      "type": "azure_storage_key",
-      "value": "DefaultEndpointsProtocol=https;AccountName=prodstore;AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;EndpointSuffix=core.windows.net",
-      "severity": "critical"
-    },
-    {
-      "id": 8,
-      "file": "scripts/webhook.js",
-      "line": 4,
-      "difficulty": "easy",
-      "type": "slack_webhook",
-      "value": "https://hooks.slack.com/services/T00000000/B00000000/XXXXXXXXXXXXXXXXXXXX",
-      "severity": "high"
-    },
-    {
-      "id": 9,
-      "file": "config/app.properties",
-      "line": 6,
-      "difficulty": "easy",
-      "type": "api_key",
-      "value": "sk_test_4eC39HqLyjWDarjtT1zdp7dc",
-      "severity": "high"
-    },
-    {
-      "id": 10,
-      "file": "id_rsa",
-      "line": 1,
-      "difficulty": "easy",
-      "type": "ssh_private_key",
-      "value": "-----BEGIN OPENSSH PRIVATE KEY-----",
-      "severity": "critical"
-    },
-    {
-      "id": 11,
-      "file": "config/oauth.json",
-      "line": 4,
-      "difficulty": "easy",
-      "type": "oauth_client_secret",
-      "value": "GOCSPX-Ab12Cd34Ef56Gh78Ij90Kl12",
-      "severity": "critical"
-    },
-    {
-      "id": 12,
-      "file": "src/Main.java",
-      "line": 5,
-      "difficulty": "easy",
-      "type": "google_oauth_secret",
-      "value": "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",
-      "severity": "critical"
-    },
-    {
-      "id": 13,
-      "file": "src/config.py",
-      "line": 7,
-      "difficulty": "medium",
-      "type": "aws_access_key_base64",
-      "value": "QUtJQUlPU0ZPRE5ON0VYQU1QTEU=",
-      "decoded": "AKIAIOSFODNN7EXAMPLE",
-      "severity": "critical"
-    },
-    {
-      "id": 14,
-      "file": "src/config.py",
-      "line": 10,
-      "difficulty": "medium",
-      "type": "api_token_hex",
-      "value": "6170695f746f6b656e5f616263313233787977373839",
-      "decoded": "api_token_abc123xyz789",
-      "severity": "high"
-    },
-    {
-      "id": 15,
-      "file": "src/config.py",
-      "line": 16,
-      "difficulty": "medium",
-      "type": "database_password_concatenated",
-      "value": "MySecurePassword2024!",
-      "note": "Built from DB_PASS_PART1 + DB_PASS_PART2 + DB_PASS_PART3",
-      "severity": "critical"
-    },
-    {
-      "id": 16,
-      "file": "scripts/deploy.sh",
-      "line": 5,
-      "difficulty": "medium",
-      "type": "api_key_export",
-      "value": "sk_prod_1234567890abcdefghijklmnopqrstuvwxyz",
-      "severity": "critical"
-    },
-    {
-      "id": 17,
-      "file": "scripts/deploy.sh",
-      "line": 11,
-      "difficulty": "medium",
-      "type": "database_password_url_encoded",
-      "value": "mysql://admin:MyP%40ssw0rd%21@db.example.com:3306/prod",
-      "decoded": "mysql://admin:MyP@ssw0rd!@db.example.com:3306/prod",
-      "note": "In comment",
-      "severity": "critical"
-    },
-    {
-      "id": 18,
-      "file": "config/keys.yaml",
-      "line": 6,
-      "difficulty": "medium",
-      "type": "rsa_private_key_multiline",
-      "value": "-----BEGIN RSA PRIVATE KEY-----",
-      "note": "Multi-line YAML literal block",
-      "severity": "critical"
-    },
-    {
-      "id": 19,
-      "file": "config/keys.yaml",
-      "line": 11,
-      "difficulty": "medium",
-      "type": "api_token_unicode",
-      "value": "tøkęn_śęçrėt_ẃïth_ŭñïçődė_123456",
-      "severity": "high"
-    },
-    {
-      "id": 20,
-      "file": "src/database.sql",
-      "line": 6,
-      "difficulty": "medium",
-      "type": "database_connection_string",
-      "value": "postgresql://admin:Pr0dDB_S3cr3t_P@ss@db.prod.example.com:5432/prod_db",
-      "note": "In SQL comment",
-      "severity": "critical"
-    },
-    {
-      "id": 21,
-      "file": "config/legacy.ini",
-      "line": 3,
-      "difficulty": "medium",
-      "type": "database_password",
-      "value": "L3g@cy_DB_P@ssw0rd_2023",
-      "severity": "critical"
-    },
-    {
-      "id": 22,
-      "file": "config/legacy.ini",
-      "line": 7,
-      "difficulty": "medium",
-      "type": "api_key_commented",
-      "value": "backup_key_xyz789abc123def456ghi",
-      "note": "Commented backup key",
-      "severity": "high"
-    },
-    {
-      "id": 23,
-      "file": "src/obfuscated.py",
-      "line": 7,
-      "difficulty": "hard",
-      "type": "stripe_key_rot13",
-      "value": "fx_yvir_frperg_xrl_12345",
-      "decoded": "sk_live_secret_key_12345",
-      "severity": "critical"
-    },
-    {
-      "id": 24,
-      "file": "src/obfuscated.py",
-      "line": 10,
-      "difficulty": "hard",
-      "type": "github_token_binary",
-      "value": "b'\\x67\\x68\\x70\\x5f\\x4d\\x79\\x47\\x69\\x74\\x48\\x75\\x62\\x54\\x6f\\x6b\\x65\\x6e\\x31\\x32\\x33\\x34\\x35\\x36'",
-      "decoded": "ghp_MyGitHubToken123456",
-      "severity": "critical"
-    },
-    {
-      "id": 25,
-      "file": "src/obfuscated.py",
-      "line": 13,
-      "difficulty": "hard",
-      "type": "aws_secret_char_array",
-      "value": "['A','W','S','_','S','E','C','R','E','T','_','K','E','Y','_','X','Y','Z','7','8','9']",
-      "decoded": "AWS_SECRET_KEY_XYZ789",
-      "severity": "critical"
-    },
-    {
-      "id": 26,
-      "file": "src/obfuscated.py",
-      "line": 17,
-      "difficulty": "hard",
-      "type": "api_token_reversed",
-      "value": "321cba_desrever_nekot_ipa",
-      "decoded": "api_token_reversed_abc123",
-      "severity": "high"
-    },
-    {
-      "id": 27,
-      "file": "src/advanced.js",
-      "line": 4,
-      "difficulty": "hard",
-      "type": "secret_template_string",
-      "value": "sk_prod_template_key_xyz",
-      "note": "Built from template literals",
-      "severity": "critical"
-    },
-    {
-      "id": 28,
-      "file": "src/advanced.js",
-      "line": 7,
-      "difficulty": "hard",
-      "type": "password_in_regex",
-      "value": "password_regex_secret_789",
-      "note": "Inside regex pattern",
-      "severity": "medium"
-    },
-    {
-      "id": 29,
-      "file": "src/advanced.js",
-      "line": 10,
-      "difficulty": "hard",
-      "type": "api_key_xor",
-      "value": "[65,82,90,75,94,91,92,75,93,67,65,90,67,92,75,91,67,95]",
-      "decoded": "api_xor_secret_key",
-      "note": "XOR encrypted with key 42",
-      "severity": "critical"
-    },
-    {
-      "id": 30,
-      "file": "src/advanced.js",
-      "line": 17,
-      "difficulty": "hard",
-      "type": "api_key_escaped_json",
-      "value": "sk_escaped_json_key_456",
-      "note": "Escaped JSON within string",
-      "severity": "high"
-    },
-    {
-      "id": 31,
-      "file": "src/Crypto.go",
-      "line": 10,
-      "difficulty": "hard",
-      "type": "secret_in_heredoc",
-      "value": "golang_heredoc_secret_999",
-      "note": "In heredoc/multi-line string",
-      "severity": "high"
-    },
-    {
-      "id": 32,
-      "file": "src/Crypto.go",
-      "line": 15,
-      "difficulty": "hard",
-      "type": "stripe_key_typo",
-      "value": "strippe_sk_live_corrected_key",
-      "decoded": "stripe_sk_live_corrected_key",
-      "note": "Intentional typo corrected programmatically",
-      "severity": "critical"
-    }
-  ],
-  "file_summary": {
-    ".env": 2,
-    "config/settings.py": 3,
-    "src/app.py": 1,
-    "config/database.yaml": 1,
-    "scripts/webhook.js": 1,
-    "config/app.properties": 1,
-    "id_rsa": 1,
-    "config/oauth.json": 1,
-    "src/Main.java": 1,
-    "src/config.py": 3,
-    "scripts/deploy.sh": 2,
-    "config/keys.yaml": 2,
-    "src/database.sql": 1,
-    "config/legacy.ini": 2,
-    "src/obfuscated.py": 4,
-    "src/advanced.js": 4,
-    "src/Crypto.go": 2
-  },
-  "notes": {
-    "easy_secrets": "Standard patterns that any decent secret scanner should detect",
-    "medium_secrets": "Slightly obfuscated - base64, hex, concatenated, or in comments",
-    "hard_secrets": "Well hidden - ROT13, binary, XOR, reversed, split across constructs"
-  }
-}
--- a/backend/benchmarks/category_configs.py
+++ b/backend/benchmarks/category_configs.py
@@ -1,151 +0,0 @@
-"""
-Category-specific benchmark configurations
-
-Defines expected metrics and performance thresholds for each module category.
-"""
-
-from dataclasses import dataclass
-from typing import List, Dict
-from enum import Enum
-
-
-class ModuleCategory(str, Enum):
-    """Module categories for benchmarking"""
-    FUZZER = "fuzzer"
-    SCANNER = "scanner"
-    ANALYZER = "analyzer"
-    SECRET_DETECTION = "secret_detection"
-    REPORTER = "reporter"
-
-
-@dataclass
-class CategoryBenchmarkConfig:
-    """Benchmark configuration for a module category"""
-    category: ModuleCategory
-    expected_metrics: List[str]
-    performance_thresholds: Dict[str, float]
-    description: str
-
-
-# Fuzzer category configuration
-FUZZER_CONFIG = CategoryBenchmarkConfig(
-    category=ModuleCategory.FUZZER,
-    expected_metrics=[
-        "execs_per_sec",
-        "coverage_rate",
-        "time_to_first_crash",
-        "corpus_efficiency",
-        "execution_time",
-        "peak_memory_mb"
-    ],
-    performance_thresholds={
-        "min_execs_per_sec": 1000,  # Minimum executions per second
-        "max_execution_time_small": 10.0,  # Max time for small project (seconds)
-        "max_execution_time_medium": 60.0,  # Max time for medium project
-        "max_memory_mb": 2048,  # Maximum memory usage
-        "min_coverage_rate": 1.0,  # Minimum new coverage per second
-    },
-    description="Fuzzing modules: coverage-guided fuzz testing"
-)
-
-# Scanner category configuration
-SCANNER_CONFIG = CategoryBenchmarkConfig(
-    category=ModuleCategory.SCANNER,
-    expected_metrics=[
-        "files_per_sec",
-        "loc_per_sec",
-        "execution_time",
-        "peak_memory_mb",
-        "findings_count"
-    ],
-    performance_thresholds={
-        "min_files_per_sec": 100,  # Minimum files scanned per second
-        "min_loc_per_sec": 10000,  # Minimum lines of code per second
-        "max_execution_time_small": 1.0,
-        "max_execution_time_medium": 10.0,
-        "max_memory_mb": 512,
-    },
-    description="File scanning modules: fast pattern-based scanning"
-)
-
-# Secret detection category configuration
-SECRET_DETECTION_CONFIG = CategoryBenchmarkConfig(
-    category=ModuleCategory.SECRET_DETECTION,
-    expected_metrics=[
-        "patterns_per_sec",
-        "precision",
-        "recall",
-        "f1_score",
-        "false_positive_rate",
-        "execution_time",
-        "peak_memory_mb"
-    ],
-    performance_thresholds={
-        "min_patterns_per_sec": 1000,
-        "min_precision": 0.90,  # 90% precision target
-        "min_recall": 0.95,  # 95% recall target
-        "max_false_positives": 5,  # Max false positives per 100 secrets
-        "max_execution_time_small": 2.0,
-        "max_execution_time_medium": 20.0,
-        "max_memory_mb": 1024,
-    },
-    description="Secret detection modules: high precision pattern matching"
-)
-
-# Analyzer category configuration
-ANALYZER_CONFIG = CategoryBenchmarkConfig(
-    category=ModuleCategory.ANALYZER,
-    expected_metrics=[
-        "analysis_depth",
-        "files_analyzed_per_sec",
-        "execution_time",
-        "peak_memory_mb",
-        "findings_count",
-        "accuracy"
-    ],
-    performance_thresholds={
-        "min_files_per_sec": 10,  # Slower than scanners due to deep analysis
-        "max_execution_time_small": 5.0,
-        "max_execution_time_medium": 60.0,
-        "max_memory_mb": 2048,
-        "min_accuracy": 0.85,  # 85% accuracy target
-    },
-    description="Code analysis modules: deep semantic analysis"
-)
-
-# Reporter category configuration
-REPORTER_CONFIG = CategoryBenchmarkConfig(
-    category=ModuleCategory.REPORTER,
-    expected_metrics=[
-        "report_generation_time",
-        "findings_per_sec",
-        "peak_memory_mb"
-    ],
-    performance_thresholds={
-        "max_report_time_100_findings": 1.0,  # Max 1 second for 100 findings
-        "max_report_time_1000_findings": 10.0,  # Max 10 seconds for 1000 findings
-        "max_memory_mb": 256,
-    },
-    description="Reporting modules: fast report generation"
-)
-
-
-# Category configurations map
-CATEGORY_CONFIGS = {
-    ModuleCategory.FUZZER: FUZZER_CONFIG,
-    ModuleCategory.SCANNER: SCANNER_CONFIG,
-    ModuleCategory.SECRET_DETECTION: SECRET_DETECTION_CONFIG,
-    ModuleCategory.ANALYZER: ANALYZER_CONFIG,
-    ModuleCategory.REPORTER: REPORTER_CONFIG,
-}
-
-
-def get_category_config(category: ModuleCategory) -> CategoryBenchmarkConfig:
-    """Get benchmark configuration for a category"""
-    return CATEGORY_CONFIGS[category]
-
-
-def get_threshold(category: ModuleCategory, metric: str) -> float:
-    """Get performance threshold for a specific metric"""
-    config = get_category_config(category)
-    return config.performance_thresholds.get(metric, 0.0)
--- a/backend/benchmarks/conftest.py
+++ b/backend/benchmarks/conftest.py
@@ -1,60 +0,0 @@
-"""
-Benchmark fixtures and configuration
-"""
-
-import sys
-from pathlib import Path
-import pytest
-
-# Add parent directories to path
-BACKEND_ROOT = Path(__file__).resolve().parents[1]
-TOOLBOX = BACKEND_ROOT / "toolbox"
-
-if str(BACKEND_ROOT) not in sys.path:
-    sys.path.insert(0, str(BACKEND_ROOT))
-if str(TOOLBOX) not in sys.path:
-    sys.path.insert(0, str(TOOLBOX))
-
-
-# ============================================================================
-# Benchmark Fixtures
-# ============================================================================
-
-@pytest.fixture(scope="session")
-def benchmark_fixtures_dir():
-    """Path to benchmark fixtures directory"""
-    return Path(__file__).parent / "fixtures"
-
-
-@pytest.fixture(scope="session")
-def small_project_fixture(benchmark_fixtures_dir):
-    """Small project fixture (~1K LOC)"""
-    return benchmark_fixtures_dir / "small"
-
-
-@pytest.fixture(scope="session")
-def medium_project_fixture(benchmark_fixtures_dir):
-    """Medium project fixture (~10K LOC)"""
-    return benchmark_fixtures_dir / "medium"
-
-
-@pytest.fixture(scope="session")
-def large_project_fixture(benchmark_fixtures_dir):
-    """Large project fixture (~100K LOC)"""
-    return benchmark_fixtures_dir / "large"
-
-
-# ============================================================================
-# pytest-benchmark Configuration
-# ============================================================================
-
-def pytest_configure(config):
-    """Configure pytest-benchmark"""
-    config.addinivalue_line(
-        "markers", "benchmark: mark test as a benchmark"
-    )
-
-
-def pytest_benchmark_group_stats(config, benchmarks, group_by):
-    """Group benchmark results by category"""
-    return group_by
--- a/backend/mcp-config.json
+++ b/backend/mcp-config.json
@@ -22,6 +22,7 @@
        "parameters": {
          "workflow_name": "string",
          "target_path": "string",
+          "volume_mode": "string (ro|rw)",
          "parameters": "object"
        }
      },
--- a/backend/pyproject.toml
+++ b/backend/pyproject.toml
@@ -1,14 +1,13 @@
 [project]
 name = "backend"
-version = "0.7.3"
+version = "0.6.0"
 description = "FuzzForge OSS backend"
 authors = []
 readme = "README.md"
 requires-python = ">=3.11"
 dependencies = [
    "fastapi>=0.116.1",
-    "temporalio>=1.6.0",
-    "boto3>=1.34.0",
+    "prefect>=3.4.18",
    "pydantic>=2.0.0",
    "pyyaml>=6.0",
    "docker>=7.0.0",
@@ -22,20 +21,5 @@ dependencies = [
 dev = [
    "pytest>=8.0.0",
    "pytest-asyncio>=0.23.0",
-    "pytest-benchmark>=4.0.0",
-    "pytest-cov>=5.0.0",
-    "pytest-xdist>=3.5.0",
-    "pytest-mock>=3.12.0",
    "httpx>=0.27.0",
-    "ruff>=0.1.0",
-]
-
-[tool.pytest.ini_options]
-asyncio_mode = "auto"
-testpaths = ["tests", "benchmarks"]
-python_files = ["test_*.py", "bench_*.py"]
-python_classes = ["Test*"]
-python_functions = ["test_*"]
-markers = [
-    "benchmark: mark test as a benchmark",
 ]
--- a/backend/src/api/fuzzing.py
+++ b/backend/src/api/fuzzing.py
@@ -14,8 +14,8 @@ API endpoints for fuzzing workflow management and real-time monitoring
 # Additional attribution and requirements are provided in the NOTICE file.

 import logging
-from typing import List, Dict
-from fastapi import APIRouter, HTTPException, WebSocket, WebSocketDisconnect
+from typing import List, Dict, Any
+from fastapi import APIRouter, HTTPException, Depends, WebSocket, WebSocketDisconnect
 from fastapi.responses import StreamingResponse
 import asyncio
 import json
@@ -25,6 +25,7 @@ from src.models.findings import (
    FuzzingStats,
    CrashReport
 )
+from src.core.workflow_discovery import WorkflowDiscovery

 logger = logging.getLogger(__name__)

@@ -125,13 +126,12 @@ async def update_fuzzing_stats(run_id: str, stats: FuzzingStats):
    # Debug: log reception for live instrumentation
    try:
        logger.info(
-            "Received fuzzing stats update: run_id=%s exec=%s eps=%.2f crashes=%s corpus=%s coverage=%s elapsed=%ss",
+            "Received fuzzing stats update: run_id=%s exec=%s eps=%.2f crashes=%s corpus=%s elapsed=%ss",
            run_id,
            stats.executions,
            stats.executions_per_sec,
            stats.crashes,
            stats.corpus_size,
-            stats.coverage,
            stats.elapsed_time,
        )
    except Exception:
--- a/backend/src/api/runs.py
+++ b/backend/src/api/runs.py
@@ -14,6 +14,7 @@ API endpoints for workflow run management and findings retrieval
 # Additional attribution and requirements are provided in the NOTICE file.

 import logging
+from typing import Dict, Any
 from fastapi import APIRouter, HTTPException, Depends

 from src.models.findings import WorkflowFindings, WorkflowStatus
@@ -23,22 +24,22 @@ logger = logging.getLogger(__name__)
 router = APIRouter(prefix="/runs", tags=["runs"])


-def get_temporal_manager():
-    """Dependency to get the Temporal manager instance"""
-    from src.main import temporal_mgr
-    return temporal_mgr
+def get_prefect_manager():
+    """Dependency to get the Prefect manager instance"""
+    from src.main import prefect_mgr
+    return prefect_mgr


@router.get("/{run_id}/status", response_model=WorkflowStatus)
 async def get_run_status(
    run_id: str,
-    temporal_mgr=Depends(get_temporal_manager)
+    prefect_mgr=Depends(get_prefect_manager)
 ) -> WorkflowStatus:
    """
    Get the current status of a workflow run.

    Args:
-        run_id: The workflow run ID
+        run_id: The flow run ID

    Returns:
        Status information including state, timestamps, and completion flags
@@ -47,26 +48,25 @@ async def get_run_status(
        HTTPException: 404 if run not found
    """
    try:
-        status = await temporal_mgr.get_workflow_status(run_id)
+        status = await prefect_mgr.get_flow_run_status(run_id)

-        # Map Temporal status to response format
-        workflow_status = status.get("status", "UNKNOWN")
-        is_completed = workflow_status in ["COMPLETED", "FAILED", "CANCELLED"]
-        is_failed = workflow_status == "FAILED"
-        is_running = workflow_status == "RUNNING"
-
-        # Extract workflow name from run_id (format: workflow_name-unique_id)
-        workflow_name = run_id.rsplit('-', 1)[0] if '-' in run_id else "unknown"
+        # Find workflow name from deployment
+        workflow_name = "unknown"
+        workflow_deployment_id = status.get("workflow", "")
+        for name, deployment_id in prefect_mgr.deployments.items():
+            if str(deployment_id) == str(workflow_deployment_id):
+                workflow_name = name
+                break

        return WorkflowStatus(
-            run_id=run_id,
+            run_id=status["run_id"],
            workflow=workflow_name,
-            status=workflow_status,
-            is_completed=is_completed,
-            is_failed=is_failed,
-            is_running=is_running,
-            created_at=status.get("start_time"),
-            updated_at=status.get("close_time") or status.get("execution_time")
+            status=status["status"],
+            is_completed=status["is_completed"],
+            is_failed=status["is_failed"],
+            is_running=status["is_running"],
+            created_at=status["created_at"],
+            updated_at=status["updated_at"]
        )

    except Exception as e:
@@ -80,13 +80,13 @@ async def get_run_status(
@router.get("/{run_id}/findings", response_model=WorkflowFindings)
 async def get_run_findings(
    run_id: str,
-    temporal_mgr=Depends(get_temporal_manager)
+    prefect_mgr=Depends(get_prefect_manager)
 ) -> WorkflowFindings:
    """
    Get the findings from a completed workflow run.

    Args:
-        run_id: The workflow run ID
+        run_id: The flow run ID

    Returns:
        SARIF-formatted findings from the workflow execution
@@ -96,49 +96,50 @@ async def get_run_findings(
    """
    try:
        # Get run status first
-        status = await temporal_mgr.get_workflow_status(run_id)
-        workflow_status = status.get("status", "UNKNOWN")
+        status = await prefect_mgr.get_flow_run_status(run_id)

-        if workflow_status not in ["COMPLETED", "FAILED", "CANCELLED"]:
-            if workflow_status == "RUNNING":
+        if not status["is_completed"]:
+            if status["is_running"]:
                raise HTTPException(
                    status_code=400,
-                    detail=f"Run {run_id} is still running. Current status: {workflow_status}"
+                    detail=f"Run {run_id} is still running. Current status: {status['status']}"
+                )
+            elif status["is_failed"]:
+                raise HTTPException(
+                    status_code=400,
+                    detail=f"Run {run_id} failed. Status: {status['status']}"
                )
            else:
                raise HTTPException(
                    status_code=400,
-                    detail=f"Run {run_id} not completed. Status: {workflow_status}"
+                    detail=f"Run {run_id} not completed. Status: {status['status']}"
                )

-        if workflow_status == "FAILED":
-            raise HTTPException(
-                status_code=400,
-                detail=f"Run {run_id} failed. Status: {workflow_status}"
-            )
+        # Get the findings
+        findings = await prefect_mgr.get_flow_run_findings(run_id)

-        # Get the workflow result
-        result = await temporal_mgr.get_workflow_result(run_id)
+        # Find workflow name
+        workflow_name = "unknown"
+        workflow_deployment_id = status.get("workflow", "")
+        for name, deployment_id in prefect_mgr.deployments.items():
+            if str(deployment_id) == str(workflow_deployment_id):
+                workflow_name = name
+                break

-        # Extract SARIF from result (handle None for backwards compatibility)
-        if isinstance(result, dict):
-            sarif = result.get("sarif") or {}
-        else:
-            sarif = {}
-
-        # Extract workflow name from run_id (format: workflow_name-unique_id)
-        workflow_name = run_id.rsplit('-', 1)[0] if '-' in run_id else "unknown"
-
-        # Metadata
+        # Get workflow version if available
        metadata = {
-            "completion_time": status.get("close_time"),
+            "completion_time": status["updated_at"],
            "workflow_version": "unknown"
        }

+        if workflow_name in prefect_mgr.workflows:
+            workflow_info = prefect_mgr.workflows[workflow_name]
+            metadata["workflow_version"] = workflow_info.metadata.get("version", "unknown")
+
        return WorkflowFindings(
            workflow=workflow_name,
            run_id=run_id,
-            sarif=sarif,
+            sarif=findings,
            metadata=metadata
        )

@@ -156,7 +157,7 @@ async def get_run_findings(
 async def get_workflow_findings(
    workflow_name: str,
    run_id: str,
-    temporal_mgr=Depends(get_temporal_manager)
+    prefect_mgr=Depends(get_prefect_manager)
 ) -> WorkflowFindings:
    """
    Get findings for a specific workflow run.
@@ -165,7 +166,7 @@ async def get_workflow_findings(

    Args:
        workflow_name: Name of the workflow
-        run_id: The workflow run ID
+        run_id: The flow run ID

    Returns:
        SARIF-formatted findings from the workflow execution
@@ -173,11 +174,11 @@ async def get_workflow_findings(
    Raises:
        HTTPException: 404 if workflow or run not found, 400 if run not completed
    """
-    if workflow_name not in temporal_mgr.workflows:
+    if workflow_name not in prefect_mgr.workflows:
        raise HTTPException(
            status_code=404,
            detail=f"Workflow not found: {workflow_name}"
        )

    # Delegate to the main findings endpoint
-    return await get_run_findings(run_id, temporal_mgr)
+    return await get_run_findings(run_id, prefect_mgr)
--- a/backend/src/api/system.py
+++ b/backend/src/api/system.py
@@ -1,47 +0,0 @@
-# Copyright (c) 2025 FuzzingLabs
-#
-# Licensed under the Business Source License 1.1 (BSL). See the LICENSE file
-# at the root of this repository for details.
-#
-# After the Change Date (four years from publication), this version of the
-# Licensed Work will be made available under the Apache License, Version 2.0.
-# See the LICENSE-APACHE file or http://www.apache.org/licenses/LICENSE-2.0
-#
-# Additional attribution and requirements are provided in the NOTICE file.
-
-"""
-System information endpoints for FuzzForge API.
-
-Provides system configuration and filesystem paths to CLI for worker management.
-"""
-
-import os
-from typing import Dict
-
-from fastapi import APIRouter
-
-router = APIRouter(prefix="/system", tags=["system"])
-
-
-@router.get("/info")
-async def get_system_info() -> Dict[str, str]:
-    """
-    Get system information including host filesystem paths.
-
-    This endpoint exposes paths needed by the CLI to manage workers via docker-compose.
-    The FUZZFORGE_HOST_ROOT environment variable is set by docker-compose and points
-    to the FuzzForge installation directory on the host machine.
-
-    Returns:
-        Dictionary containing:
-        - host_root: Absolute path to FuzzForge root on host
-        - docker_compose_path: Path to docker-compose.yml on host
-        - workers_dir: Path to workers directory on host
-    """
-    host_root = os.getenv("FUZZFORGE_HOST_ROOT", "")
-
-    return {
-        "host_root": host_root,
-        "docker_compose_path": f"{host_root}/docker-compose.yml" if host_root else "",
-        "workers_dir": f"{host_root}/workers" if host_root else "",
-    }
--- a/backend/src/api/workflows.py
+++ b/backend/src/api/workflows.py
@@ -15,9 +15,8 @@ API endpoints for workflow management with enhanced error handling

 import logging
 import traceback
-import tempfile
 from typing import List, Dict, Any, Optional
-from fastapi import APIRouter, HTTPException, Depends, UploadFile, File, Form
+from fastapi import APIRouter, HTTPException, Depends
 from pathlib import Path

 from src.models.findings import (
@@ -26,59 +25,13 @@ from src.models.findings import (
    WorkflowListItem,
    RunSubmissionResponse
 )
-from src.temporal.discovery import WorkflowDiscovery
+from src.core.workflow_discovery import WorkflowDiscovery

 logger = logging.getLogger(__name__)

-# Configuration for file uploads
-MAX_UPLOAD_SIZE = 10 * 1024 * 1024 * 1024  # 10 GB
-ALLOWED_CONTENT_TYPES = [
-    "application/gzip",
-    "application/x-gzip",
-    "application/x-tar",
-    "application/x-compressed-tar",
-    "application/octet-stream",  # Generic binary
-]
-
 router = APIRouter(prefix="/workflows", tags=["workflows"])


-def extract_defaults_from_json_schema(metadata: Dict[str, Any]) -> Dict[str, Any]:
-    """
-    Extract default parameter values from JSON Schema format.
-
-    Converts from:
-        parameters:
-          properties:
-            param_name:
-              default: value
-
-    To:
-        {param_name: value}
-
-    Args:
-        metadata: Workflow metadata dictionary
-
-    Returns:
-        Dictionary of parameter defaults
-    """
-    defaults = {}
-
-    # Check if there's a legacy default_parameters field
-    if "default_parameters" in metadata:
-        defaults.update(metadata["default_parameters"])
-
-    # Extract defaults from JSON Schema parameters
-    parameters = metadata.get("parameters", {})
-    properties = parameters.get("properties", {})
-
-    for param_name, param_spec in properties.items():
-        if "default" in param_spec:
-            defaults[param_name] = param_spec["default"]
-
-    return defaults
-
-
 def create_structured_error_response(
    error_type: str,
    message: str,
@@ -115,15 +68,15 @@ def create_structured_error_response(
    return error_response


-def get_temporal_manager():
-    """Dependency to get the Temporal manager instance"""
-    from src.main import temporal_mgr
-    return temporal_mgr
+def get_prefect_manager():
+    """Dependency to get the Prefect manager instance"""
+    from src.main import prefect_mgr
+    return prefect_mgr


@router.get("/", response_model=List[WorkflowListItem])
 async def list_workflows(
-    temporal_mgr=Depends(get_temporal_manager)
+    prefect_mgr=Depends(get_prefect_manager)
 ) -> List[WorkflowListItem]:
    """
    List all discovered workflows with their metadata.
@@ -132,7 +85,7 @@ async def list_workflows(
    author, and tags.
    """
    workflows = []
-    for name, info in temporal_mgr.workflows.items():
+    for name, info in prefect_mgr.workflows.items():
        workflows.append(WorkflowListItem(
            name=name,
            version=info.metadata.get("version", "0.6.0"),
@@ -158,7 +111,7 @@ async def get_metadata_schema() -> Dict[str, Any]:
@router.get("/{workflow_name}/metadata", response_model=WorkflowMetadata)
 async def get_workflow_metadata(
    workflow_name: str,
-    temporal_mgr=Depends(get_temporal_manager)
+    prefect_mgr=Depends(get_prefect_manager)
 ) -> WorkflowMetadata:
    """
    Get complete metadata for a specific workflow.
@@ -173,8 +126,8 @@ async def get_workflow_metadata(
    Raises:
        HTTPException: 404 if workflow not found
    """
-    if workflow_name not in temporal_mgr.workflows:
-        available_workflows = list(temporal_mgr.workflows.keys())
+    if workflow_name not in prefect_mgr.workflows:
+        available_workflows = list(prefect_mgr.workflows.keys())
        error_response = create_structured_error_response(
            error_type="WorkflowNotFound",
            message=f"Workflow '{workflow_name}' not found",
@@ -190,7 +143,7 @@ async def get_workflow_metadata(
            detail=error_response
        )

-    info = temporal_mgr.workflows[workflow_name]
+    info = prefect_mgr.workflows[workflow_name]
    metadata = info.metadata

    return WorkflowMetadata(
@@ -200,8 +153,10 @@ async def get_workflow_metadata(
        author=metadata.get("author"),
        tags=metadata.get("tags", []),
        parameters=metadata.get("parameters", {}),
-        default_parameters=extract_defaults_from_json_schema(metadata),
-        required_modules=metadata.get("required_modules", [])
+        default_parameters=metadata.get("default_parameters", {}),
+        required_modules=metadata.get("required_modules", []),
+        supported_volume_modes=metadata.get("supported_volume_modes", ["ro", "rw"]),
+        has_custom_docker=info.has_docker
    )


@@ -209,14 +164,14 @@ async def get_workflow_metadata(
 async def submit_workflow(
    workflow_name: str,
    submission: WorkflowSubmission,
-    temporal_mgr=Depends(get_temporal_manager)
+    prefect_mgr=Depends(get_prefect_manager)
 ) -> RunSubmissionResponse:
    """
-    Submit a workflow for execution.
+    Submit a workflow for execution with volume mounting.

    Args:
        workflow_name: Name of the workflow to execute
-        submission: Submission parameters including target path and parameters
+        submission: Submission parameters including target path and volume mode

    Returns:
        Run submission response with run_id and initial status
@@ -224,8 +179,8 @@ async def submit_workflow(
    Raises:
        HTTPException: 404 if workflow not found, 400 for invalid parameters
    """
-    if workflow_name not in temporal_mgr.workflows:
-        available_workflows = list(temporal_mgr.workflows.keys())
+    if workflow_name not in prefect_mgr.workflows:
+        available_workflows = list(prefect_mgr.workflows.keys())
        error_response = create_structured_error_response(
            error_type="WorkflowNotFound",
            message=f"Workflow '{workflow_name}' not found",
@@ -242,36 +197,31 @@ async def submit_workflow(
        )

    try:
-        # Upload target file to MinIO and get target_id
-        target_path = Path(submission.target_path)
-        if not target_path.exists():
-            raise ValueError(f"Target path does not exist: {submission.target_path}")
+        # Convert ResourceLimits to dict if provided
+        resource_limits_dict = None
+        if submission.resource_limits:
+            resource_limits_dict = {
+                "cpu_limit": submission.resource_limits.cpu_limit,
+                "memory_limit": submission.resource_limits.memory_limit,
+                "cpu_request": submission.resource_limits.cpu_request,
+                "memory_request": submission.resource_limits.memory_request
+            }

-        # Upload target (using anonymous user for now)
-        target_id = await temporal_mgr.upload_target(
-            file_path=target_path,
-            user_id="api-user",
-            metadata={"workflow": workflow_name}
-        )
-
-        # Merge default parameters with user parameters
-        workflow_info = temporal_mgr.workflows[workflow_name]
-        metadata = workflow_info.metadata or {}
-        defaults = extract_defaults_from_json_schema(metadata)
-        user_params = submission.parameters or {}
-        workflow_params = {**defaults, **user_params}
-
-        # Start workflow execution
-        handle = await temporal_mgr.run_workflow(
+        # Submit the workflow with enhanced parameters
+        flow_run = await prefect_mgr.submit_workflow(
            workflow_name=workflow_name,
-            target_id=target_id,
-            workflow_params=workflow_params
+            target_path=submission.target_path,
+            volume_mode=submission.volume_mode,
+            parameters=submission.parameters,
+            resource_limits=resource_limits_dict,
+            additional_volumes=submission.additional_volumes,
+            timeout=submission.timeout
        )

-        run_id = handle.id
+        run_id = str(flow_run.id)

        # Initialize fuzzing tracking if this looks like a fuzzing workflow
-        workflow_info = temporal_mgr.workflows.get(workflow_name, {})
+        workflow_info = prefect_mgr.workflows.get(workflow_name, {})
        workflow_tags = workflow_info.metadata.get("tags", []) if hasattr(workflow_info, 'metadata') else []
        if "fuzzing" in workflow_tags or "fuzz" in workflow_name.lower():
            from src.api.fuzzing import initialize_fuzzing_tracking
@@ -279,7 +229,7 @@ async def submit_workflow(

        return RunSubmissionResponse(
            run_id=run_id,
-            status="RUNNING",
+            status=flow_run.state.name if flow_run.state else "PENDING",
            workflow=workflow_name,
            message=f"Workflow '{workflow_name}' submitted successfully"
        )
@@ -311,13 +261,17 @@ async def submit_workflow(
        error_type = "WorkflowSubmissionError"

        # Detect specific error patterns
-        if "workflow" in error_message.lower() and "not found" in error_message.lower():
-            error_type = "WorkflowError"
+        if "deployment" in error_message.lower():
+            error_type = "DeploymentError"
+            deployment_info = {
+                "status": "failed",
+                "error": error_message
+            }
            suggestions.extend([
-                "Check if Temporal server is running and accessible",
-                "Verify workflow workers are running",
-                "Check if workflow is registered with correct vertical",
-                "Ensure Docker is running and has sufficient resources"
+                "Check if Prefect server is running and accessible",
+                "Verify Docker is running and has sufficient resources",
+                "Check container image availability",
+                "Ensure volume paths exist and are accessible"
            ])

        elif "volume" in error_message.lower() or "mount" in error_message.lower():
@@ -370,248 +324,10 @@ async def submit_workflow(
        )


-@router.post("/{workflow_name}/upload-and-submit", response_model=RunSubmissionResponse)
-async def upload_and_submit_workflow(
-    workflow_name: str,
-    file: UploadFile = File(..., description="Target file or tarball to analyze"),
-    parameters: Optional[str] = Form(None, description="JSON-encoded workflow parameters"),
-    timeout: Optional[int] = Form(None, description="Timeout in seconds"),
-    temporal_mgr=Depends(get_temporal_manager)
-) -> RunSubmissionResponse:
-    """
-    Upload a target file/tarball and submit workflow for execution.
-
-    This endpoint accepts multipart/form-data uploads and is the recommended
-    way to submit workflows from remote CLI clients.
-
-    Args:
-        workflow_name: Name of the workflow to execute
-        file: Target file or tarball (compressed directory)
-        parameters: JSON string of workflow parameters (optional)
-        timeout: Execution timeout in seconds (optional)
-
-    Returns:
-        Run submission response with run_id and initial status
-
-    Raises:
-        HTTPException: 404 if workflow not found, 400 for invalid parameters,
-                      413 if file too large
-    """
-    if workflow_name not in temporal_mgr.workflows:
-        available_workflows = list(temporal_mgr.workflows.keys())
-        error_response = create_structured_error_response(
-            error_type="WorkflowNotFound",
-            message=f"Workflow '{workflow_name}' not found",
-            workflow_name=workflow_name,
-            suggestions=[
-                f"Available workflows: {', '.join(available_workflows)}",
-                "Use GET /workflows/ to see all available workflows"
-            ]
-        )
-        raise HTTPException(status_code=404, detail=error_response)
-
-    temp_file_path = None
-
-    try:
-        # Validate file size
-        file_size = 0
-        chunk_size = 1024 * 1024  # 1MB chunks
-
-        # Create temporary file
-        temp_fd, temp_file_path = tempfile.mkstemp(suffix=".tar.gz")
-
-        logger.info(f"Receiving file upload for workflow '{workflow_name}': {file.filename}")
-
-        # Stream file to disk
-        with open(temp_fd, 'wb') as temp_file:
-            while True:
-                chunk = await file.read(chunk_size)
-                if not chunk:
-                    break
-
-                file_size += len(chunk)
-
-                # Check size limit
-                if file_size > MAX_UPLOAD_SIZE:
-                    raise HTTPException(
-                        status_code=413,
-                        detail=create_structured_error_response(
-                            error_type="FileTooLarge",
-                            message=f"File size exceeds maximum allowed size of {MAX_UPLOAD_SIZE / (1024**3):.1f} GB",
-                            workflow_name=workflow_name,
-                            suggestions=[
-                                "Reduce the size of your target directory",
-                                "Exclude unnecessary files (build artifacts, dependencies, etc.)",
-                                "Consider splitting into smaller analysis targets"
-                            ]
-                        )
-                    )
-
-                temp_file.write(chunk)
-
-        logger.info(f"Received file: {file_size / (1024**2):.2f} MB")
-
-        # Parse parameters
-        workflow_params = {}
-        if parameters:
-            try:
-                import json
-                workflow_params = json.loads(parameters)
-                if not isinstance(workflow_params, dict):
-                    raise ValueError("Parameters must be a JSON object")
-            except (json.JSONDecodeError, ValueError) as e:
-                raise HTTPException(
-                    status_code=400,
-                    detail=create_structured_error_response(
-                        error_type="InvalidParameters",
-                        message=f"Invalid parameters JSON: {e}",
-                        workflow_name=workflow_name,
-                        suggestions=["Ensure parameters is valid JSON object"]
-                    )
-                )
-
-        # Upload to MinIO
-        target_id = await temporal_mgr.upload_target(
-            file_path=Path(temp_file_path),
-            user_id="api-user",
-            metadata={
-                "workflow": workflow_name,
-                "original_filename": file.filename,
-                "upload_method": "multipart"
-            }
-        )
-
-        logger.info(f"Uploaded to MinIO with target_id: {target_id}")
-
-        # Merge default parameters with user parameters
-        workflow_info = temporal_mgr.workflows.get(workflow_name)
-        metadata = workflow_info.metadata or {}
-        defaults = extract_defaults_from_json_schema(metadata)
-        workflow_params = {**defaults, **workflow_params}
-
-        # Start workflow execution
-        handle = await temporal_mgr.run_workflow(
-            workflow_name=workflow_name,
-            target_id=target_id,
-            workflow_params=workflow_params
-        )
-
-        run_id = handle.id
-
-        # Initialize fuzzing tracking if needed
-        workflow_info = temporal_mgr.workflows.get(workflow_name, {})
-        workflow_tags = workflow_info.metadata.get("tags", []) if hasattr(workflow_info, 'metadata') else []
-        if "fuzzing" in workflow_tags or "fuzz" in workflow_name.lower():
-            from src.api.fuzzing import initialize_fuzzing_tracking
-            initialize_fuzzing_tracking(run_id, workflow_name)
-
-        return RunSubmissionResponse(
-            run_id=run_id,
-            status="RUNNING",
-            workflow=workflow_name,
-            message=f"Workflow '{workflow_name}' submitted successfully with uploaded target"
-        )
-
-    except HTTPException:
-        raise
-    except Exception as e:
-        logger.error(f"Failed to upload and submit workflow '{workflow_name}': {e}")
-        logger.error(f"Traceback: {traceback.format_exc()}")
-
-        error_response = create_structured_error_response(
-            error_type="WorkflowSubmissionError",
-            message=f"Failed to process upload and submit workflow: {str(e)}",
-            workflow_name=workflow_name,
-            suggestions=[
-                "Check if the uploaded file is a valid tarball",
-                "Verify MinIO storage is accessible",
-                "Check backend logs for detailed error information",
-                "Ensure Temporal workers are running"
-            ]
-        )
-
-        raise HTTPException(status_code=500, detail=error_response)
-
-    finally:
-        # Cleanup temporary file
-        if temp_file_path and Path(temp_file_path).exists():
-            try:
-                Path(temp_file_path).unlink()
-                logger.debug(f"Cleaned up temp file: {temp_file_path}")
-            except Exception as e:
-                logger.warning(f"Failed to cleanup temp file {temp_file_path}: {e}")
-
-
-@router.get("/{workflow_name}/worker-info")
-async def get_workflow_worker_info(
-    workflow_name: str,
-    temporal_mgr=Depends(get_temporal_manager)
-) -> Dict[str, Any]:
-    """
-    Get worker information for a workflow.
-
-    Returns details about which worker is required to execute this workflow,
-    including container name, task queue, and vertical.
-
-    Args:
-        workflow_name: Name of the workflow
-
-    Returns:
-        Worker information including vertical, container name, and task queue
-
-    Raises:
-        HTTPException: 404 if workflow not found
-    """
-    if workflow_name not in temporal_mgr.workflows:
-        available_workflows = list(temporal_mgr.workflows.keys())
-        error_response = create_structured_error_response(
-            error_type="WorkflowNotFound",
-            message=f"Workflow '{workflow_name}' not found",
-            workflow_name=workflow_name,
-            suggestions=[
-                f"Available workflows: {', '.join(available_workflows)}",
-                "Use GET /workflows/ to see all available workflows"
-            ]
-        )
-        raise HTTPException(
-            status_code=404,
-            detail=error_response
-        )
-
-    info = temporal_mgr.workflows[workflow_name]
-    metadata = info.metadata
-
-    # Extract vertical from metadata
-    vertical = metadata.get("vertical")
-
-    if not vertical:
-        error_response = create_structured_error_response(
-            error_type="MissingVertical",
-            message=f"Workflow '{workflow_name}' does not specify a vertical in metadata",
-            workflow_name=workflow_name,
-            suggestions=[
-                "Check workflow metadata.yaml for 'vertical' field",
-                "Contact workflow author for support"
-            ]
-        )
-        raise HTTPException(
-            status_code=500,
-            detail=error_response
-        )
-
-    return {
-        "workflow": workflow_name,
-        "vertical": vertical,
-        "worker_service": f"worker-{vertical}",
-        "task_queue": f"{vertical}-queue",
-        "required": True
-    }
-
-
@router.get("/{workflow_name}/parameters")
 async def get_workflow_parameters(
    workflow_name: str,
-    temporal_mgr=Depends(get_temporal_manager)
+    prefect_mgr=Depends(get_prefect_manager)
 ) -> Dict[str, Any]:
    """
    Get the parameters schema for a workflow.
@@ -625,8 +341,8 @@ async def get_workflow_parameters(
    Raises:
        HTTPException: 404 if workflow not found
    """
-    if workflow_name not in temporal_mgr.workflows:
-        available_workflows = list(temporal_mgr.workflows.keys())
+    if workflow_name not in prefect_mgr.workflows:
+        available_workflows = list(prefect_mgr.workflows.keys())
        error_response = create_structured_error_response(
            error_type="WorkflowNotFound",
            message=f"Workflow '{workflow_name}' not found",
@@ -641,7 +357,7 @@ async def get_workflow_parameters(
            detail=error_response
        )

-    info = temporal_mgr.workflows[workflow_name]
+    info = prefect_mgr.workflows[workflow_name]
    metadata = info.metadata

    # Return parameters with enhanced schema information
@@ -653,8 +369,11 @@ async def get_workflow_parameters(
    else:
        param_definitions = parameters_schema

-    # Extract default values from JSON Schema
-    default_params = extract_defaults_from_json_schema(metadata)
+    # Add default values to the schema
+    default_params = metadata.get("default_parameters", {})
+    for param_name, param_schema in param_definitions.items():
+        if isinstance(param_schema, dict) and param_name in default_params:
+            param_schema["default"] = default_params[param_name]

    return {
        "workflow": workflow_name,
--- a/backend/src/core/prefect_manager.py
+++ b/backend/src/core/prefect_manager.py
@@ -0,0 +1,770 @@
+"""
+Prefect Manager - Core orchestration for workflow deployment and execution
+"""
+
+# Copyright (c) 2025 FuzzingLabs
+#
+# Licensed under the Business Source License 1.1 (BSL). See the LICENSE file
+# at the root of this repository for details.
+#
+# After the Change Date (four years from publication), this version of the
+# Licensed Work will be made available under the Apache License, Version 2.0.
+# See the LICENSE-APACHE file or http://www.apache.org/licenses/LICENSE-2.0
+#
+# Additional attribution and requirements are provided in the NOTICE file.
+
+import logging
+import os
+import platform
+import re
+from pathlib import Path
+from typing import Dict, Optional, Any
+from prefect import get_client
+from prefect.docker import DockerImage
+from prefect.client.schemas import FlowRun
+
+from src.core.workflow_discovery import WorkflowDiscovery, WorkflowInfo
+
+logger = logging.getLogger(__name__)
+
+
+def get_registry_url(context: str = "default") -> str:
+    """
+    Get the container registry URL to use for a given operation context.
+
+    Goals:
+    - Work reliably across Linux and macOS Docker Desktop
+    - Prefer in-network service discovery when running inside containers
+    - Allow full override via env vars from docker-compose
+
+    Env overrides:
+      - FUZZFORGE_REGISTRY_PUSH_URL: used for image builds/pushes
+      - FUZZFORGE_REGISTRY_PULL_URL: used for workers to pull images
+    """
+    # Normalize context
+    ctx = (context or "default").lower()
+
+    # Always honor explicit overrides first
+    if ctx in ("push", "build"):
+        push_url = os.getenv("FUZZFORGE_REGISTRY_PUSH_URL")
+        if push_url:
+            logger.debug("Using FUZZFORGE_REGISTRY_PUSH_URL: %s", push_url)
+            return push_url
+        # Default to host-published registry for Docker daemon operations
+        return "localhost:5001"
+
+    if ctx == "pull":
+        pull_url = os.getenv("FUZZFORGE_REGISTRY_PULL_URL")
+        if pull_url:
+            logger.debug("Using FUZZFORGE_REGISTRY_PULL_URL: %s", pull_url)
+            return pull_url
+        # Prefect worker pulls via host Docker daemon as well
+        return "localhost:5001"
+
+    # Default/fallback
+    return os.getenv("FUZZFORGE_REGISTRY_PULL_URL", os.getenv("FUZZFORGE_REGISTRY_PUSH_URL", "localhost:5001"))
+
+
+def _compose_project_name(default: str = "fuzzforge") -> str:
+    """Return the docker-compose project name used for network/volume naming.
+
+    Always returns 'fuzzforge' regardless of environment variables.
+    """
+    return "fuzzforge"
+
+
+class PrefectManager:
+    """
+    Manages Prefect deployments and flow runs for discovered workflows.
+
+    This class handles:
+    - Workflow discovery and registration
+    - Docker image building through Prefect
+    - Deployment creation and management
+    - Flow run submission with volume mounting
+    - Findings retrieval from completed runs
+    """
+
+    def __init__(self, workflows_dir: Path = None):
+        """
+        Initialize the Prefect manager.
+
+        Args:
+            workflows_dir: Path to the workflows directory (default: toolbox/workflows)
+        """
+        if workflows_dir is None:
+            workflows_dir = Path("toolbox/workflows")
+
+        self.discovery = WorkflowDiscovery(workflows_dir)
+        self.workflows: Dict[str, WorkflowInfo] = {}
+        self.deployments: Dict[str, str] = {}  # workflow_name -> deployment_id
+
+        # Security: Define allowed and forbidden paths for host mounting
+        self.allowed_base_paths = [
+            "/tmp",
+            "/home",
+            "/Users",  # macOS users
+            "/opt",
+            "/var/tmp",
+            "/workspace",  # Common container workspace
+            "/app"  # Container application directory (for test projects)
+        ]
+
+        self.forbidden_paths = [
+            "/etc",
+            "/root",
+            "/var/run",
+            "/sys",
+            "/proc",
+            "/dev",
+            "/boot",
+            "/var/lib/docker",  # Critical Docker data
+            "/var/log",  # System logs
+            "/usr/bin",  # System binaries
+            "/usr/sbin",
+            "/sbin",
+            "/bin"
+        ]
+
+    @staticmethod
+    def _parse_memory_to_bytes(memory_str: str) -> int:
+        """
+        Parse memory string (like '512Mi', '1Gi') to bytes.
+
+        Args:
+            memory_str: Memory string with unit suffix
+
+        Returns:
+            Memory in bytes
+
+        Raises:
+            ValueError: If format is invalid
+        """
+        if not memory_str:
+            return 0
+
+        match = re.match(r'^(\d+(?:\.\d+)?)\s*([GMK]i?)$', memory_str.strip())
+        if not match:
+            raise ValueError(f"Invalid memory format: {memory_str}. Expected format like '512Mi', '1Gi'")
+
+        value, unit = match.groups()
+        value = float(value)
+
+        # Convert to bytes based on unit (binary units: Ki, Mi, Gi)
+        if unit in ['K', 'Ki']:
+            multiplier = 1024
+        elif unit in ['M', 'Mi']:
+            multiplier = 1024 * 1024
+        elif unit in ['G', 'Gi']:
+            multiplier = 1024 * 1024 * 1024
+        else:
+            raise ValueError(f"Unsupported memory unit: {unit}")
+
+        return int(value * multiplier)
+
+    @staticmethod
+    def _parse_cpu_to_millicores(cpu_str: str) -> int:
+        """
+        Parse CPU string (like '500m', '1', '2.5') to millicores.
+
+        Args:
+            cpu_str: CPU string
+
+        Returns:
+            CPU in millicores (1 core = 1000 millicores)
+
+        Raises:
+            ValueError: If format is invalid
+        """
+        if not cpu_str:
+            return 0
+
+        cpu_str = cpu_str.strip()
+
+        # Handle millicores format (e.g., '500m')
+        if cpu_str.endswith('m'):
+            try:
+                return int(cpu_str[:-1])
+            except ValueError:
+                raise ValueError(f"Invalid CPU format: {cpu_str}")
+
+        # Handle core format (e.g., '1', '2.5')
+        try:
+            cores = float(cpu_str)
+            return int(cores * 1000)  # Convert to millicores
+        except ValueError:
+            raise ValueError(f"Invalid CPU format: {cpu_str}")
+
+    def _extract_resource_requirements(self, workflow_info: WorkflowInfo) -> Dict[str, str]:
+        """
+        Extract resource requirements from workflow metadata.
+
+        Args:
+            workflow_info: Workflow information with metadata
+
+        Returns:
+            Dictionary with resource requirements in Docker format
+        """
+        metadata = workflow_info.metadata
+        requirements = metadata.get("requirements", {})
+        resources = requirements.get("resources", {})
+
+        resource_config = {}
+
+        # Extract memory requirement
+        memory = resources.get("memory")
+        if memory:
+            try:
+                # Validate memory format and store original string for Docker
+                self._parse_memory_to_bytes(memory)
+                resource_config["memory"] = memory
+            except ValueError as e:
+                logger.warning(f"Invalid memory requirement in {workflow_info.name}: {e}")
+
+        # Extract CPU requirement
+        cpu = resources.get("cpu")
+        if cpu:
+            try:
+                # Validate CPU format and store original string for Docker
+                self._parse_cpu_to_millicores(cpu)
+                resource_config["cpus"] = cpu
+            except ValueError as e:
+                logger.warning(f"Invalid CPU requirement in {workflow_info.name}: {e}")
+
+        # Extract timeout
+        timeout = resources.get("timeout")
+        if timeout and isinstance(timeout, int):
+            resource_config["timeout"] = str(timeout)
+
+        return resource_config
+
+    async def initialize(self):
+        """
+        Initialize the manager by discovering and deploying all workflows.
+
+        This method:
+        1. Discovers all valid workflows in the workflows directory
+        2. Validates their metadata
+        3. Deploys each workflow to Prefect with Docker images
+        """
+        try:
+            # Discover workflows
+            self.workflows = await self.discovery.discover_workflows()
+
+            if not self.workflows:
+                logger.warning("No workflows discovered")
+                return
+
+            logger.info(f"Discovered {len(self.workflows)} workflows: {list(self.workflows.keys())}")
+
+            # Deploy each workflow
+            for name, info in self.workflows.items():
+                try:
+                    await self._deploy_workflow(name, info)
+                except Exception as e:
+                    logger.error(f"Failed to deploy workflow '{name}': {e}")
+
+        except Exception as e:
+            logger.error(f"Failed to initialize Prefect manager: {e}")
+            raise
+
+    async def _deploy_workflow(self, name: str, info: WorkflowInfo):
+        """
+        Deploy a single workflow to Prefect with Docker image.
+
+        Args:
+            name: Workflow name
+            info: Workflow information including metadata and paths
+        """
+        logger.info(f"Deploying workflow '{name}'...")
+
+        # Get the flow function from registry
+        flow_func = self.discovery.get_flow_function(name)
+        if not flow_func:
+            logger.error(
+                f"Failed to get flow function for '{name}' from registry. "
+                f"Ensure the workflow is properly registered in toolbox/workflows/registry.py"
+            )
+            return
+
+        # Use the mandatory Dockerfile with absolute paths for Docker Compose
+        # Get absolute paths for build context and dockerfile
+        toolbox_path = info.path.parent.parent.resolve()
+        dockerfile_abs_path = info.dockerfile.resolve()
+
+        # Calculate relative dockerfile path from toolbox context
+        try:
+            dockerfile_rel_path = dockerfile_abs_path.relative_to(toolbox_path)
+        except ValueError:
+            # If relative path fails, use the workflow-specific path
+            dockerfile_rel_path = Path("workflows") / name / "Dockerfile"
+
+        # Determine deployment strategy based on Dockerfile presence
+        base_image = "prefecthq/prefect:3-python3.11"
+        has_custom_dockerfile = info.has_docker and info.dockerfile.exists()
+
+        logger.info(f"=== DEPLOYMENT DEBUG for '{name}' ===")
+        logger.info(f"info.has_docker: {info.has_docker}")
+        logger.info(f"info.dockerfile: {info.dockerfile}")
+        logger.info(f"info.dockerfile.exists(): {info.dockerfile.exists()}")
+        logger.info(f"has_custom_dockerfile: {has_custom_dockerfile}")
+        logger.info(f"toolbox_path: {toolbox_path}")
+        logger.info(f"dockerfile_rel_path: {dockerfile_rel_path}")
+
+        if has_custom_dockerfile:
+            logger.info(f"Workflow '{name}' has custom Dockerfile - building custom image")
+            # Decide whether to use registry or keep images local to host engine
+            import os
+            # Default to using the local registry; set FUZZFORGE_USE_REGISTRY=false to bypass (not recommended)
+            use_registry = os.getenv("FUZZFORGE_USE_REGISTRY", "true").lower() == "true"
+
+            if use_registry:
+                registry_url = get_registry_url(context="push")
+                image_spec = DockerImage(
+                    name=f"{registry_url}/fuzzforge/{name}",
+                    tag="latest",
+                    dockerfile=str(dockerfile_rel_path),
+                    context=str(toolbox_path)
+                )
+                deploy_image = f"{registry_url}/fuzzforge/{name}:latest"
+                build_custom = True
+                push_custom = True
+                logger.info(f"Using registry: {registry_url} for '{name}'")
+            else:
+                # Single-host mode: build into host engine cache; no push required
+                image_spec = DockerImage(
+                    name=f"fuzzforge/{name}",
+                    tag="latest",
+                    dockerfile=str(dockerfile_rel_path),
+                    context=str(toolbox_path)
+                )
+                deploy_image = f"fuzzforge/{name}:latest"
+                build_custom = True
+                push_custom = False
+                logger.info("Using single-host image (no registry push): %s", deploy_image)
+        else:
+            logger.info(f"Workflow '{name}' using base image - no custom dependencies needed")
+            deploy_image = base_image
+            build_custom = False
+            push_custom = False
+
+        # Pre-validate registry connectivity when pushing
+        if push_custom:
+            try:
+                from .setup import validate_registry_connectivity
+                await validate_registry_connectivity(registry_url)
+                logger.info(f"Registry connectivity validated for {registry_url}")
+            except Exception as e:
+                logger.error(f"Registry connectivity validation failed for {registry_url}: {e}")
+                raise RuntimeError(f"Cannot deploy workflow '{name}': Registry {registry_url} is not accessible. {e}")
+
+        # Deploy the workflow
+        try:
+            # Ensure any previous deployment is removed so job variables are updated
+            try:
+                async with get_client() as client:
+                    existing = await client.read_deployment_by_name(
+                        f"{name}/{name}-deployment"
+                    )
+                    if existing:
+                        logger.info(f"Removing existing deployment for '{name}' to refresh settings...")
+                        await client.delete_deployment(existing.id)
+            except Exception:
+                # If not found or deletion fails, continue with deployment
+                pass
+
+            # Extract resource requirements from metadata
+            workflow_resource_requirements = self._extract_resource_requirements(info)
+            logger.info(f"Workflow '{name}' resource requirements: {workflow_resource_requirements}")
+
+            # Build job variables with resource requirements
+            job_variables = {
+                "image": deploy_image,  # Use the worker-accessible registry name
+                "volumes": [],  # Populated at run submission with toolbox mount
+                "env": {
+                    "PYTHONPATH": "/opt/prefect/toolbox:/opt/prefect",
+                    "WORKFLOW_NAME": name
+                }
+            }
+
+            # Add resource requirements to job variables if present
+            if workflow_resource_requirements:
+                job_variables["resources"] = workflow_resource_requirements
+
+            # Prepare deployment parameters
+            deploy_params = {
+                "name": f"{name}-deployment",
+                "work_pool_name": "docker-pool",
+                "image": image_spec if has_custom_dockerfile else deploy_image,
+                "push": push_custom,
+                "build": build_custom,
+                "job_variables": job_variables
+            }
+
+            deployment = await flow_func.deploy(**deploy_params)
+
+            self.deployments[name] = str(deployment.id) if hasattr(deployment, 'id') else name
+            logger.info(f"Successfully deployed workflow '{name}'")
+
+        except Exception as e:
+            # Enhanced error reporting with more context
+            import traceback
+            logger.error(f"Failed to deploy workflow '{name}': {e}")
+            logger.error(f"Deployment traceback: {traceback.format_exc()}")
+
+            # Try to capture Docker-specific context
+            error_context = {
+                "workflow_name": name,
+                "has_dockerfile": has_custom_dockerfile,
+                "image_name": deploy_image if 'deploy_image' in locals() else "unknown",
+                "registry_url": registry_url if 'registry_url' in locals() else "unknown",
+                "error_type": type(e).__name__,
+                "error_message": str(e)
+            }
+
+            # Check for specific error patterns with detailed categorization
+            error_msg_lower = str(e).lower()
+            if "registry" in error_msg_lower and ("no such host" in error_msg_lower or "connection" in error_msg_lower):
+                error_context["category"] = "registry_connectivity_error"
+                error_context["solution"] = f"Cannot reach registry at {error_context['registry_url']}. Check Docker network and registry service."
+            elif "docker" in error_msg_lower:
+                error_context["category"] = "docker_error"
+                if "build" in error_msg_lower:
+                    error_context["subcategory"] = "image_build_failed"
+                    error_context["solution"] = "Check Dockerfile syntax and dependencies."
+                elif "pull" in error_msg_lower:
+                    error_context["subcategory"] = "image_pull_failed"
+                    error_context["solution"] = "Check if image exists in registry and network connectivity."
+                elif "push" in error_msg_lower:
+                    error_context["subcategory"] = "image_push_failed"
+                    error_context["solution"] = f"Check registry connectivity and push permissions to {error_context['registry_url']}."
+            elif "registry" in error_msg_lower:
+                error_context["category"] = "registry_error"
+                error_context["solution"] = "Check registry configuration and accessibility."
+            elif "prefect" in error_msg_lower:
+                error_context["category"] = "prefect_error"
+                error_context["solution"] = "Check Prefect server connectivity and deployment configuration."
+            else:
+                error_context["category"] = "unknown_deployment_error"
+                error_context["solution"] = "Check logs for more specific error details."
+
+            logger.error(f"Deployment error context: {error_context}")
+
+            # Raise enhanced exception with context
+            enhanced_error = Exception(f"Deployment failed for workflow '{name}': {str(e)} | Context: {error_context}")
+            enhanced_error.original_error = e
+            enhanced_error.context = error_context
+            raise enhanced_error
+
+    async def submit_workflow(
+        self,
+        workflow_name: str,
+        target_path: str,
+        volume_mode: str = "ro",
+        parameters: Dict[str, Any] = None,
+        resource_limits: Dict[str, str] = None,
+        additional_volumes: list = None,
+        timeout: int = None
+    ) -> FlowRun:
+        """
+        Submit a workflow for execution with volume mounting.
+
+        Args:
+            workflow_name: Name of the workflow to execute
+            target_path: Host path to mount as volume
+            volume_mode: Volume mount mode ("ro" for read-only, "rw" for read-write)
+            parameters: Workflow-specific parameters
+            resource_limits: CPU/memory limits for container
+            additional_volumes: List of additional volume mounts
+            timeout: Timeout in seconds
+
+        Returns:
+            FlowRun object with run information
+
+        Raises:
+            ValueError: If workflow not found or volume mode not supported
+        """
+        if workflow_name not in self.workflows:
+            raise ValueError(f"Unknown workflow: {workflow_name}")
+
+        # Validate volume mode
+        workflow_info = self.workflows[workflow_name]
+        supported_modes = workflow_info.metadata.get("supported_volume_modes", ["ro", "rw"])
+
+        if volume_mode not in supported_modes:
+            raise ValueError(
+                f"Workflow '{workflow_name}' doesn't support volume mode '{volume_mode}'. "
+                f"Supported modes: {supported_modes}"
+            )
+
+        # Validate target path with security checks
+        self._validate_target_path(target_path)
+
+        # Validate additional volumes if provided
+        if additional_volumes:
+            for volume in additional_volumes:
+                self._validate_target_path(volume.host_path)
+
+        async with get_client() as client:
+            # Get the deployment, auto-redeploy once if missing
+            try:
+                deployment = await client.read_deployment_by_name(
+                    f"{workflow_name}/{workflow_name}-deployment"
+                )
+            except Exception as e:
+                import traceback
+                logger.error(f"Failed to find deployment for workflow '{workflow_name}': {e}")
+                logger.error(f"Deployment lookup traceback: {traceback.format_exc()}")
+
+                # Attempt a one-time auto-deploy to recover from startup races
+                try:
+                    logger.info(f"Auto-deploying missing workflow '{workflow_name}' and retrying...")
+                    await self._deploy_workflow(workflow_name, workflow_info)
+                    deployment = await client.read_deployment_by_name(
+                        f"{workflow_name}/{workflow_name}-deployment"
+                    )
+                except Exception as redeploy_exc:
+                    # Enhanced error with context
+                    error_context = {
+                        "workflow_name": workflow_name,
+                        "error_type": type(e).__name__,
+                        "error_message": str(e),
+                        "redeploy_error": str(redeploy_exc),
+                        "available_deployments": list(self.deployments.keys()),
+                    }
+                    enhanced_error = ValueError(
+                        f"Deployment not found and redeploy failed for workflow '{workflow_name}': {e} | Context: {error_context}"
+                    )
+                    enhanced_error.context = error_context
+                    raise enhanced_error
+
+            # Determine the Docker Compose network name and volume names
+            # Hardcoded to 'fuzzforge' to avoid directory name dependencies
+            import os
+            compose_project = "fuzzforge"
+            docker_network = "fuzzforge_default"
+
+            # Build volume mounts
+            # Add toolbox volume mount for workflow code access
+            backend_toolbox_path = "/app/toolbox"  # Path in backend container
+
+            # Hardcoded volume names
+            prefect_storage_volume = "fuzzforge_prefect_storage"
+            toolbox_code_volume = "fuzzforge_toolbox_code"
+
+            volumes = [
+                f"{target_path}:/workspace:{volume_mode}",
+                f"{prefect_storage_volume}:/prefect-storage",  # Shared storage for results
+                f"{toolbox_code_volume}:/opt/prefect/toolbox:ro"  # Mount workflow code
+            ]
+
+            # Add additional volumes if provided
+            if additional_volumes:
+                for volume in additional_volumes:
+                    volume_spec = f"{volume.host_path}:{volume.container_path}:{volume.mode}"
+                    volumes.append(volume_spec)
+
+            # Build environment variables
+            env_vars = {
+                "PREFECT_API_URL": "http://prefect-server:4200/api",  # Use internal network hostname
+                "PREFECT_LOGGING_LEVEL": "INFO",
+                "PREFECT_LOCAL_STORAGE_PATH": "/prefect-storage",  # Use shared storage
+                "PREFECT_RESULTS_PERSIST_BY_DEFAULT": "true",  # Enable result persistence
+                "PREFECT_DEFAULT_RESULT_STORAGE_BLOCK": "local-file-system/fuzzforge-results",  # Use our storage block
+                "WORKSPACE_PATH": "/workspace",
+                "VOLUME_MODE": volume_mode,
+                "WORKFLOW_NAME": workflow_name
+            }
+
+            # Add additional volume paths to environment for easy access
+            if additional_volumes:
+                for i, volume in enumerate(additional_volumes):
+                    env_vars[f"ADDITIONAL_VOLUME_{i}_PATH"] = volume.container_path
+
+            # Determine which image to use based on workflow configuration
+            workflow_info = self.workflows[workflow_name]
+            has_custom_dockerfile = workflow_info.has_docker and workflow_info.dockerfile.exists()
+            # Use pull context for worker to pull from registry
+            registry_url = get_registry_url(context="pull")
+            workflow_image = f"{registry_url}/fuzzforge/{workflow_name}:latest" if has_custom_dockerfile else "prefecthq/prefect:3-python3.11"
+            logger.debug(f"Worker will pull image: {workflow_image} (Registry: {registry_url})")
+
+            # Configure job variables with volume mounting and network access
+            job_variables = {
+                # Use custom image if available, otherwise base Prefect image
+                "image": workflow_image,
+                "volumes": volumes,
+                "networks": [docker_network],  # Connect to Docker Compose network
+                "env": {
+                    **env_vars,
+                    "PYTHONPATH": "/opt/prefect/toolbox:/opt/prefect/toolbox/workflows",
+                    "WORKFLOW_NAME": workflow_name
+                }
+            }
+
+            # Apply resource requirements from workflow metadata and user overrides
+            workflow_resource_requirements = self._extract_resource_requirements(workflow_info)
+            final_resource_config = {}
+
+            # Start with workflow requirements as base
+            if workflow_resource_requirements:
+                final_resource_config.update(workflow_resource_requirements)
+
+            # Apply user-provided resource limits (overrides workflow defaults)
+            if resource_limits:
+                user_resource_config = {}
+                if resource_limits.get("cpu_limit"):
+                    user_resource_config["cpus"] = resource_limits["cpu_limit"]
+                if resource_limits.get("memory_limit"):
+                    user_resource_config["memory"] = resource_limits["memory_limit"]
+                # Note: cpu_request and memory_request are not directly supported by Docker
+                # but could be used for Kubernetes in the future
+
+                # User overrides take precedence
+                final_resource_config.update(user_resource_config)
+
+            # Apply final resource configuration
+            if final_resource_config:
+                job_variables["resources"] = final_resource_config
+                logger.info(f"Applied resource limits: {final_resource_config}")
+
+            # Merge parameters with defaults from metadata
+            default_params = workflow_info.metadata.get("default_parameters", {})
+            final_params = {**default_params, **(parameters or {})}
+
+            # Set flow parameters that match the flow signature
+            final_params["target_path"] = "/workspace"  # Container path where volume is mounted
+            final_params["volume_mode"] = volume_mode
+
+            # Create and submit the flow run
+            # Pass job_variables to ensure network, volumes, and environment are configured
+            logger.info(f"Submitting flow with job_variables: {job_variables}")
+            logger.info(f"Submitting flow with parameters: {final_params}")
+
+            # Prepare flow run creation parameters
+            flow_run_params = {
+                "deployment_id": deployment.id,
+                "parameters": final_params,
+                "job_variables": job_variables
+            }
+
+            # Note: Timeout is handled through workflow-level configuration
+            # Additional timeout configuration can be added to deployment metadata if needed
+
+            flow_run = await client.create_flow_run_from_deployment(**flow_run_params)
+
+            logger.info(
+                f"Submitted workflow '{workflow_name}' with run_id: {flow_run.id}, "
+                f"target: {target_path}, mode: {volume_mode}"
+            )
+
+            return flow_run
+
+    async def get_flow_run_findings(self, run_id: str) -> Dict[str, Any]:
+        """
+        Retrieve findings from a completed flow run.
+
+        Args:
+            run_id: The flow run ID
+
+        Returns:
+            Dictionary containing SARIF-formatted findings
+
+        Raises:
+            ValueError: If run not completed or not found
+        """
+        async with get_client() as client:
+            flow_run = await client.read_flow_run(run_id)
+
+            if not flow_run.state.is_completed():
+                raise ValueError(
+                    f"Flow run {run_id} not completed. Current status: {flow_run.state.name}"
+                )
+
+            # Get the findings from the flow run result
+            try:
+                findings = await flow_run.state.result()
+                return findings
+            except Exception as e:
+                logger.error(f"Failed to retrieve findings for run {run_id}: {e}")
+                raise ValueError(f"Failed to retrieve findings: {e}")
+
+    async def get_flow_run_status(self, run_id: str) -> Dict[str, Any]:
+        """
+        Get the current status of a flow run.
+
+        Args:
+            run_id: The flow run ID
+
+        Returns:
+            Dictionary with status information
+        """
+        async with get_client() as client:
+            flow_run = await client.read_flow_run(run_id)
+
+            return {
+                "run_id": str(flow_run.id),
+                "workflow": flow_run.deployment_id,
+                "status": flow_run.state.name,
+                "is_completed": flow_run.state.is_completed(),
+                "is_failed": flow_run.state.is_failed(),
+                "is_running": flow_run.state.is_running(),
+                "created_at": flow_run.created,
+                "updated_at": flow_run.updated
+            }
+
+    def _validate_target_path(self, target_path: str) -> None:
+        """
+        Validate target path for security before mounting as volume.
+
+        Args:
+            target_path: Host path to validate
+
+        Raises:
+            ValueError: If path is not allowed for security reasons
+        """
+        target = Path(target_path)
+
+        # Path must be absolute
+        if not target.is_absolute():
+            raise ValueError(f"Target path must be absolute: {target_path}")
+
+        # Resolve path to handle symlinks and relative components
+        try:
+            resolved_path = target.resolve()
+        except (OSError, RuntimeError) as e:
+            raise ValueError(f"Cannot resolve target path: {target_path} - {e}")
+
+        resolved_str = str(resolved_path)
+
+        # Check against forbidden paths first (more restrictive)
+        for forbidden in self.forbidden_paths:
+            if resolved_str.startswith(forbidden):
+                raise ValueError(
+                    f"Access denied: Path '{target_path}' resolves to forbidden directory '{forbidden}'. "
+                    f"This path contains sensitive system files and cannot be mounted."
+                )
+
+        # Check if path starts with any allowed base path
+        path_allowed = False
+        for allowed in self.allowed_base_paths:
+            if resolved_str.startswith(allowed):
+                path_allowed = True
+                break
+
+        if not path_allowed:
+            allowed_list = ", ".join(self.allowed_base_paths)
+            raise ValueError(
+                f"Access denied: Path '{target_path}' is not in allowed directories. "
+                f"Allowed base paths: {allowed_list}"
+            )
+
+        # Additional security checks
+        if resolved_str == "/":
+            raise ValueError("Cannot mount root filesystem")
+
+        # Warn if path doesn't exist (but don't block - it might be created later)
+        if not resolved_path.exists():
+            logger.warning(f"Target path does not exist: {target_path}")
+
+        logger.info(f"Path validation passed for: {target_path} -> {resolved_str}")
--- a/backend/src/core/setup.py
+++ b/backend/src/core/setup.py
@@ -1,5 +1,5 @@
 """
-Setup utilities for FuzzForge infrastructure
+Setup utilities for Prefect infrastructure
 """

 # Copyright (c) 2025 FuzzingLabs
@@ -14,21 +14,364 @@ Setup utilities for FuzzForge infrastructure
 # Additional attribution and requirements are provided in the NOTICE file.

 import logging
+from prefect import get_client
+from prefect.client.schemas.actions import WorkPoolCreate
+from prefect.client.schemas.objects import WorkPool
+from .prefect_manager import get_registry_url

 logger = logging.getLogger(__name__)


+async def setup_docker_pool():
+    """
+    Create or update the Docker work pool for container execution.
+
+    This work pool is configured to:
+    - Connect to the local Docker daemon
+    - Support volume mounting at runtime
+    - Clean up containers after execution
+    - Use bridge networking by default
+    """
+    import os
+
+    async with get_client() as client:
+        pool_name = "docker-pool"
+
+        # Add force recreation flag for debugging fresh install issues
+        force_recreate = os.getenv('FORCE_RECREATE_WORK_POOL', 'false').lower() == 'true'
+        debug_setup = os.getenv('DEBUG_WORK_POOL_SETUP', 'false').lower() == 'true'
+
+        if force_recreate:
+            logger.warning(f"FORCE_RECREATE_WORK_POOL=true - Will recreate work pool regardless of existing configuration")
+        if debug_setup:
+            logger.warning(f"DEBUG_WORK_POOL_SETUP=true - Enhanced logging enabled")
+            # Temporarily set logging level to DEBUG for this function
+            original_level = logger.level
+            logger.setLevel(logging.DEBUG)
+
+        try:
+            # Check if pool already exists and supports custom images
+            existing_pools = await client.read_work_pools()
+            existing_pool = None
+            for pool in existing_pools:
+                if pool.name == pool_name:
+                    existing_pool = pool
+                    break
+
+            if existing_pool and not force_recreate:
+                logger.info(f"Found existing work pool '{pool_name}' - validating configuration...")
+
+                # Check if the existing pool has the correct configuration
+                base_template = existing_pool.base_job_template or {}
+                logger.debug(f"Base template keys: {list(base_template.keys())}")
+
+                job_config = base_template.get("job_configuration", {})
+                logger.debug(f"Job config keys: {list(job_config.keys())}")
+
+                image_config = job_config.get("image", "")
+                has_image_variable = "{{ image }}" in str(image_config)
+                logger.debug(f"Image config: '{image_config}' -> has_image_variable: {has_image_variable}")
+
+                # Check if volume defaults include toolbox mount
+                variables = base_template.get("variables", {})
+                properties = variables.get("properties", {})
+                volume_config = properties.get("volumes", {})
+                volume_defaults = volume_config.get("default", [])
+                has_toolbox_volume = any("toolbox_code" in str(vol) for vol in volume_defaults) if volume_defaults else False
+                logger.debug(f"Volume defaults: {volume_defaults}")
+                logger.debug(f"Has toolbox volume: {has_toolbox_volume}")
+
+                # Check if environment defaults include required settings
+                env_config = properties.get("env", {})
+                env_defaults = env_config.get("default", {})
+                has_api_url = "PREFECT_API_URL" in env_defaults
+                has_storage_path = "PREFECT_LOCAL_STORAGE_PATH" in env_defaults
+                has_results_persist = "PREFECT_RESULTS_PERSIST_BY_DEFAULT" in env_defaults
+                has_required_env = has_api_url and has_storage_path and has_results_persist
+                logger.debug(f"Environment defaults: {env_defaults}")
+                logger.debug(f"Has API URL: {has_api_url}, Has storage path: {has_storage_path}, Has results persist: {has_results_persist}")
+                logger.debug(f"Has required env: {has_required_env}")
+
+                # Log the full validation result
+                logger.info(f"Work pool validation - Image: {has_image_variable}, Toolbox: {has_toolbox_volume}, Environment: {has_required_env}")
+
+                if has_image_variable and has_toolbox_volume and has_required_env:
+                    logger.info(f"Docker work pool '{pool_name}' already exists with correct configuration")
+                    return
+                else:
+                    reasons = []
+                    if not has_image_variable:
+                        reasons.append("missing image template")
+                    if not has_toolbox_volume:
+                        reasons.append("missing toolbox volume mount")
+                    if not has_required_env:
+                        if not has_api_url:
+                            reasons.append("missing PREFECT_API_URL")
+                        if not has_storage_path:
+                            reasons.append("missing PREFECT_LOCAL_STORAGE_PATH")
+                        if not has_results_persist:
+                            reasons.append("missing PREFECT_RESULTS_PERSIST_BY_DEFAULT")
+
+                    logger.warning(f"Docker work pool '{pool_name}' exists but lacks: {', '.join(reasons)}. Recreating...")
+                    # Delete the old pool and recreate it
+                    try:
+                        await client.delete_work_pool(pool_name)
+                        logger.info(f"Deleted old work pool '{pool_name}'")
+                    except Exception as e:
+                        logger.warning(f"Failed to delete old work pool: {e}")
+            elif force_recreate and existing_pool:
+                logger.warning(f"Force recreation enabled - deleting existing work pool '{pool_name}'")
+                try:
+                    await client.delete_work_pool(pool_name)
+                    logger.info(f"Deleted existing work pool for force recreation")
+                except Exception as e:
+                    logger.warning(f"Failed to delete work pool for force recreation: {e}")
+
+            logger.info(f"Creating Docker work pool '{pool_name}' with custom image support...")
+
+            # Create the work pool with proper Docker configuration
+            work_pool = WorkPoolCreate(
+                name=pool_name,
+                type="docker",
+                description="Docker work pool for FuzzForge workflows with custom image support",
+                base_job_template={
+                    "job_configuration": {
+                        "image": "{{ image }}",  # Template variable for custom images
+                        "volumes": "{{ volumes }}",  # List of volume mounts
+                        "env": "{{ env }}",  # Environment variables
+                        "networks": "{{ networks }}",  # Docker networks
+                        "stream_output": True,
+                        "auto_remove": True,
+                        "privileged": False,
+                        "network_mode": None,  # Use networks instead
+                        "labels": {},
+                        "command": None  # Let the image's CMD/ENTRYPOINT run
+                    },
+                    "variables": {
+                        "type": "object",
+                        "properties": {
+                            "image": {
+                                "type": "string",
+                                "title": "Docker Image",
+                                "default": "prefecthq/prefect:3-python3.11",
+                                "description": "Docker image for the flow run"
+                            },
+                            "volumes": {
+                                "type": "array",
+                                "title": "Volume Mounts",
+                                "default": [
+                                    "fuzzforge_prefect_storage:/prefect-storage",
+                                    "fuzzforge_toolbox_code:/opt/prefect/toolbox:ro"
+                                ],
+                                "description": "Volume mounts in format 'host:container:mode'",
+                                "items": {
+                                    "type": "string"
+                                }
+                            },
+                            "networks": {
+                                "type": "array",
+                                "title": "Docker Networks",
+                                "default": ["fuzzforge_default"],
+                                "description": "Docker networks to connect container to",
+                                "items": {
+                                    "type": "string"
+                                }
+                            },
+                            "env": {
+                                "type": "object",
+                                "title": "Environment Variables",
+                                "default": {
+                                    "PREFECT_API_URL": "http://prefect-server:4200/api",
+                                    "PREFECT_LOCAL_STORAGE_PATH": "/prefect-storage",
+                                    "PREFECT_RESULTS_PERSIST_BY_DEFAULT": "true"
+                                },
+                                "description": "Environment variables for the container",
+                                "additionalProperties": {
+                                    "type": "string"
+                                }
+                            }
+                        }
+                    }
+                }
+            )
+
+            await client.create_work_pool(work_pool)
+            logger.info(f"Created Docker work pool '{pool_name}'")
+
+        except Exception as e:
+            logger.error(f"Failed to setup Docker work pool: {e}")
+            raise
+        finally:
+            # Restore original logging level if debug mode was enabled
+            if debug_setup and 'original_level' in locals():
+                logger.setLevel(original_level)
+
+
+def get_actual_compose_project_name():
+    """
+    Return the hardcoded compose project name for FuzzForge.
+
+    Always returns 'fuzzforge' as per system requirements.
+    """
+    logger.info("Using hardcoded compose project name: fuzzforge")
+    return "fuzzforge"
+
+
 async def setup_result_storage():
    """
-    Setup result storage (MinIO).
+    Create or update Prefect result storage block for findings persistence.

-    MinIO is used for both target upload and result storage.
-    This is a placeholder for any MinIO-specific setup if needed.
+    This sets up a LocalFileSystem storage block pointing to the shared
+    /prefect-storage volume for result persistence.
    """
-    logger.info("Result storage (MinIO) configured")
-    # MinIO is configured via environment variables in docker-compose
-    # No additional setup needed here
-    return True
+    from prefect.filesystems import LocalFileSystem
+
+    storage_name = "fuzzforge-results"
+
+    try:
+        # Create the storage block, overwrite if it exists
+        logger.info(f"Setting up storage block '{storage_name}'...")
+        storage = LocalFileSystem(basepath="/prefect-storage")
+
+        block_doc_id = await storage.save(name=storage_name, overwrite=True)
+        logger.info(f"Storage block '{storage_name}' configured successfully")
+        return str(block_doc_id)
+
+    except Exception as e:
+        logger.error(f"Failed to setup result storage: {e}")
+        # Don't raise the exception - continue without storage block
+        logger.warning("Continuing without result storage block - findings may not persist")
+        return None
+
+
+async def validate_docker_connection():
+    """
+    Validate that Docker is accessible and running.
+
+    Note: In containerized deployments with Docker socket proxy,
+    the backend doesn't need direct Docker access.
+
+    Raises:
+        RuntimeError: If Docker is not accessible
+    """
+    import os
+
+    # Skip Docker validation if running in container without socket access
+    if os.path.exists("/.dockerenv") and not os.path.exists("/var/run/docker.sock"):
+        logger.info("Running in container without Docker socket - skipping Docker validation")
+        return
+
+    try:
+        import docker
+        client = docker.from_env()
+        client.ping()
+        logger.info("Docker connection validated")
+    except Exception as e:
+        logger.error(f"Docker is not accessible: {e}")
+        raise RuntimeError(
+            "Docker is not running or not accessible. "
+            "Please ensure Docker is installed and running."
+        )
+
+
+async def validate_registry_connectivity(registry_url: str = None):
+    """
+    Validate that the Docker registry is accessible.
+
+    Args:
+        registry_url: URL of the Docker registry to validate (auto-detected if None)
+
+    Raises:
+        RuntimeError: If registry is not accessible
+    """
+    # Resolve a reachable test URL from within this process
+    if registry_url is None:
+        # If not specified, prefer internal service name in containers, host port on host
+        import os
+        if os.path.exists('/.dockerenv'):
+            registry_url = "registry:5000"
+        else:
+            registry_url = "localhost:5001"
+
+    # If we're running inside a container and asked to probe localhost:PORT,
+    # the probe would hit the container, not the host. Use host.docker.internal instead.
+    import os
+    try:
+        host_part, port_part = registry_url.split(":", 1)
+    except ValueError:
+        host_part, port_part = registry_url, "80"
+
+    if os.path.exists('/.dockerenv') and host_part in ("localhost", "127.0.0.1"):
+        test_host = "host.docker.internal"
+    else:
+        test_host = host_part
+    test_url = f"http://{test_host}:{port_part}/v2/"
+
+    import aiohttp
+    import asyncio
+
+    logger.info(f"Validating registry connectivity to {registry_url}...")
+
+    try:
+        async with aiohttp.ClientSession(timeout=aiohttp.ClientTimeout(total=10)) as session:
+            async with session.get(test_url) as response:
+                if response.status == 200:
+                    logger.info(f"Registry at {registry_url} is accessible (tested via {test_host})")
+                    return
+                else:
+                    raise RuntimeError(f"Registry returned status {response.status}")
+    except asyncio.TimeoutError:
+        raise RuntimeError(f"Registry at {registry_url} is not responding (timeout)")
+    except aiohttp.ClientError as e:
+        raise RuntimeError(f"Registry at {registry_url} is not accessible: {e}")
+    except Exception as e:
+        raise RuntimeError(f"Failed to validate registry connectivity: {e}")
+
+
+async def validate_docker_network(network_name: str):
+    """
+    Validate that the specified Docker network exists.
+
+    Args:
+        network_name: Name of the Docker network to validate
+
+    Raises:
+        RuntimeError: If network doesn't exist
+    """
+    import os
+
+    # Skip network validation if running in container without Docker socket
+    if os.path.exists("/.dockerenv") and not os.path.exists("/var/run/docker.sock"):
+        logger.info("Running in container without Docker socket - skipping network validation")
+        return
+
+    try:
+        import docker
+        client = docker.from_env()
+
+        # List all networks
+        networks = client.networks.list(names=[network_name])
+
+        if not networks:
+            # Try to find networks with similar names
+            all_networks = client.networks.list()
+            similar_networks = [n.name for n in all_networks if "fuzzforge" in n.name.lower()]
+
+            error_msg = f"Docker network '{network_name}' not found."
+            if similar_networks:
+                error_msg += f" Available networks: {similar_networks}"
+            else:
+                error_msg += " Please ensure Docker Compose is running."
+
+            raise RuntimeError(error_msg)
+
+        logger.info(f"Docker network '{network_name}' validated")
+
+    except Exception as e:
+        if isinstance(e, RuntimeError):
+            raise
+        logger.error(f"Network validation failed: {e}")
+        raise RuntimeError(f"Failed to validate Docker network: {e}")


 async def validate_infrastructure():
@@ -39,7 +382,21 @@ async def validate_infrastructure():
    """
    logger.info("Validating infrastructure...")

-    # Setup storage (MinIO)
-    await setup_result_storage()
+    # Validate Docker connection
+    await validate_docker_connection()
+
+    # Validate registry connectivity for custom image building
+    await validate_registry_connectivity()
+
+    # Validate network (hardcoded to avoid directory name dependencies)
+    import os
+    compose_project = "fuzzforge"
+    docker_network = "fuzzforge_default"
+
+    try:
+        await validate_docker_network(docker_network)
+    except RuntimeError as e:
+        logger.warning(f"Network validation failed: {e}")
+        logger.warning("Workflows may not be able to connect to Prefect services")

    logger.info("Infrastructure validation completed")
--- a/backend/src/core/workflow_discovery.py
+++ b/backend/src/core/workflow_discovery.py
@@ -0,0 +1,459 @@
+"""
+Workflow Discovery - Registry-based discovery and loading of workflows
+"""
+
+# Copyright (c) 2025 FuzzingLabs
+#
+# Licensed under the Business Source License 1.1 (BSL). See the LICENSE file
+# at the root of this repository for details.
+#
+# After the Change Date (four years from publication), this version of the
+# Licensed Work will be made available under the Apache License, Version 2.0.
+# See the LICENSE-APACHE file or http://www.apache.org/licenses/LICENSE-2.0
+#
+# Additional attribution and requirements are provided in the NOTICE file.
+
+import logging
+import yaml
+from pathlib import Path
+from typing import Dict, Optional, Any, Callable
+from pydantic import BaseModel, Field, ConfigDict
+
+logger = logging.getLogger(__name__)
+
+
+class WorkflowInfo(BaseModel):
+    """Information about a discovered workflow"""
+    name: str = Field(..., description="Workflow name")
+    path: Path = Field(..., description="Path to workflow directory")
+    workflow_file: Path = Field(..., description="Path to workflow.py file")
+    dockerfile: Path = Field(..., description="Path to Dockerfile")
+    has_docker: bool = Field(..., description="Whether workflow has custom Dockerfile")
+    metadata: Dict[str, Any] = Field(..., description="Workflow metadata from YAML")
+    flow_function_name: str = Field(default="main_flow", description="Name of the flow function")
+
+    model_config = ConfigDict(arbitrary_types_allowed=True)
+
+
+class WorkflowDiscovery:
+    """
+    Discovers workflows from the filesystem and validates them against the registry.
+
+    This system:
+    1. Scans for workflows with metadata.yaml files
+    2. Cross-references them with the manual registry
+    3. Provides registry-based flow functions for deployment
+
+    Workflows must have:
+    - workflow.py: Contains the Prefect flow
+    - metadata.yaml: Mandatory metadata file
+    - Entry in toolbox/workflows/registry.py: Manual registration
+    - Dockerfile (optional): Custom container definition
+    - requirements.txt (optional): Python dependencies
+    """
+
+    def __init__(self, workflows_dir: Path):
+        """
+        Initialize workflow discovery.
+
+        Args:
+            workflows_dir: Path to the workflows directory
+        """
+        self.workflows_dir = workflows_dir
+        if not self.workflows_dir.exists():
+            self.workflows_dir.mkdir(parents=True, exist_ok=True)
+            logger.info(f"Created workflows directory: {self.workflows_dir}")
+
+        # Import registry - this validates it on import
+        try:
+            from toolbox.workflows.registry import WORKFLOW_REGISTRY, list_registered_workflows
+            self.registry = WORKFLOW_REGISTRY
+            logger.info(f"Loaded workflow registry with {len(self.registry)} registered workflows")
+        except ImportError as e:
+            logger.error(f"Failed to import workflow registry: {e}")
+            self.registry = {}
+        except Exception as e:
+            logger.error(f"Registry validation failed: {e}")
+            self.registry = {}
+
+        # Cache for discovered workflows
+        self._workflow_cache: Optional[Dict[str, WorkflowInfo]] = None
+        self._cache_timestamp: Optional[float] = None
+        self._cache_ttl = 60.0  # Cache TTL in seconds
+
+    async def discover_workflows(self) -> Dict[str, WorkflowInfo]:
+        """
+        Discover workflows by cross-referencing filesystem with registry.
+        Uses caching to avoid frequent filesystem scans.
+
+        Returns:
+            Dictionary mapping workflow names to their information
+        """
+        # Check cache validity
+        import time
+        current_time = time.time()
+
+        if (self._workflow_cache is not None and
+            self._cache_timestamp is not None and
+            (current_time - self._cache_timestamp) < self._cache_ttl):
+            # Return cached results
+            logger.debug(f"Returning cached workflow discovery ({len(self._workflow_cache)} workflows)")
+            return self._workflow_cache
+        workflows = {}
+        discovered_dirs = set()
+        registry_names = set(self.registry.keys())
+
+        if not self.workflows_dir.exists():
+            logger.warning(f"Workflows directory does not exist: {self.workflows_dir}")
+            return workflows
+
+        # Recursively scan all directories and subdirectories
+        await self._scan_directory_recursive(self.workflows_dir, workflows, discovered_dirs)
+
+        # Check for registry entries without corresponding directories
+        missing_dirs = registry_names - discovered_dirs
+        if missing_dirs:
+            logger.warning(
+                f"Registry contains workflows without filesystem directories: {missing_dirs}. "
+                f"These workflows cannot be deployed."
+            )
+
+        logger.info(
+            f"Discovery complete: {len(workflows)} workflows ready for deployment, "
+            f"{len(missing_dirs)} registry entries missing directories, "
+            f"{len(discovered_dirs - registry_names)} filesystem workflows not registered"
+        )
+
+        # Update cache
+        self._workflow_cache = workflows
+        self._cache_timestamp = current_time
+
+        return workflows
+
+    async def _scan_directory_recursive(self, directory: Path, workflows: Dict[str, WorkflowInfo], discovered_dirs: set):
+        """
+        Recursively scan directory for workflows.
+
+        Args:
+            directory: Directory to scan
+            workflows: Dictionary to populate with discovered workflows
+            discovered_dirs: Set to track discovered workflow names
+        """
+        for item in directory.iterdir():
+            if not item.is_dir():
+                continue
+
+            if item.name.startswith('_') or item.name.startswith('.'):
+                continue  # Skip hidden or private directories
+
+            # Check if this directory contains workflow files (workflow.py and metadata.yaml)
+            workflow_file = item / "workflow.py"
+            metadata_file = item / "metadata.yaml"
+
+            if workflow_file.exists() and metadata_file.exists():
+                # This is a workflow directory
+                workflow_name = item.name
+                discovered_dirs.add(workflow_name)
+
+                # Only process workflows that are in the registry
+                if workflow_name not in self.registry:
+                    logger.warning(
+                        f"Workflow '{workflow_name}' found in filesystem but not in registry. "
+                        f"Add it to toolbox/workflows/registry.py to enable deployment."
+                    )
+                    continue
+
+                try:
+                    workflow_info = await self._load_workflow(item)
+                    if workflow_info:
+                        workflows[workflow_info.name] = workflow_info
+                        logger.info(f"Discovered and registered workflow: {workflow_info.name}")
+                except Exception as e:
+                    logger.error(f"Failed to load workflow from {item}: {e}")
+            else:
+                # This is a category directory, recurse into it
+                await self._scan_directory_recursive(item, workflows, discovered_dirs)
+
+    async def _load_workflow(self, workflow_dir: Path) -> Optional[WorkflowInfo]:
+        """
+        Load and validate a single workflow.
+
+        Args:
+            workflow_dir: Path to the workflow directory
+
+        Returns:
+            WorkflowInfo if valid, None otherwise
+        """
+        workflow_name = workflow_dir.name
+
+        # Check for mandatory files
+        workflow_file = workflow_dir / "workflow.py"
+        metadata_file = workflow_dir / "metadata.yaml"
+
+        if not workflow_file.exists():
+            logger.warning(f"Workflow {workflow_name} missing workflow.py")
+            return None
+
+        if not metadata_file.exists():
+            logger.error(f"Workflow {workflow_name} missing mandatory metadata.yaml")
+            return None
+
+        # Load and validate metadata
+        try:
+            metadata = self._load_metadata(metadata_file)
+            if not self._validate_metadata(metadata, workflow_name):
+                return None
+        except Exception as e:
+            logger.error(f"Failed to load metadata for {workflow_name}: {e}")
+            return None
+
+        # Check for mandatory Dockerfile
+        dockerfile = workflow_dir / "Dockerfile"
+        if not dockerfile.exists():
+            logger.error(f"Workflow {workflow_name} missing mandatory Dockerfile")
+            return None
+
+        has_docker = True  # Always True since Dockerfile is mandatory
+
+        # Get flow function name from metadata or use default
+        flow_function_name = metadata.get("flow_function", "main_flow")
+
+        return WorkflowInfo(
+            name=workflow_name,
+            path=workflow_dir,
+            workflow_file=workflow_file,
+            dockerfile=dockerfile,
+            has_docker=has_docker,
+            metadata=metadata,
+            flow_function_name=flow_function_name
+        )
+
+    def _load_metadata(self, metadata_file: Path) -> Dict[str, Any]:
+        """
+        Load metadata from YAML file.
+
+        Args:
+            metadata_file: Path to metadata.yaml
+
+        Returns:
+            Dictionary containing metadata
+        """
+        with open(metadata_file, 'r') as f:
+            metadata = yaml.safe_load(f)
+
+        if metadata is None:
+            raise ValueError("Empty metadata file")
+
+        return metadata
+
+    def _validate_metadata(self, metadata: Dict[str, Any], workflow_name: str) -> bool:
+        """
+        Validate that metadata contains all required fields.
+
+        Args:
+            metadata: Metadata dictionary
+            workflow_name: Name of the workflow for logging
+
+        Returns:
+            True if valid, False otherwise
+        """
+        required_fields = ["name", "version", "description", "author", "category", "parameters", "requirements"]
+
+        missing_fields = []
+        for field in required_fields:
+            if field not in metadata:
+                missing_fields.append(field)
+
+        if missing_fields:
+            logger.error(
+                f"Workflow {workflow_name} metadata missing required fields: {missing_fields}"
+            )
+            return False
+
+        # Validate version format (semantic versioning)
+        version = metadata.get("version", "")
+        if not self._is_valid_version(version):
+            logger.error(f"Workflow {workflow_name} has invalid version format: {version}")
+            return False
+
+        # Validate parameters structure
+        parameters = metadata.get("parameters", {})
+        if not isinstance(parameters, dict):
+            logger.error(f"Workflow {workflow_name} parameters must be a dictionary")
+            return False
+
+        return True
+
+    def _is_valid_version(self, version: str) -> bool:
+        """
+        Check if version follows semantic versioning (x.y.z).
+
+        Args:
+            version: Version string
+
+        Returns:
+            True if valid semantic version
+        """
+        try:
+            parts = version.split('.')
+            if len(parts) != 3:
+                return False
+            for part in parts:
+                int(part)  # Check if each part is a number
+            return True
+        except (ValueError, AttributeError):
+            return False
+
+    def invalidate_cache(self) -> None:
+        """
+        Invalidate the workflow discovery cache.
+        Useful when workflows are added or modified.
+        """
+        self._workflow_cache = None
+        self._cache_timestamp = None
+        logger.debug("Workflow discovery cache invalidated")
+
+    def get_flow_function(self, workflow_name: str) -> Optional[Callable]:
+        """
+        Get the flow function from the registry.
+
+        Args:
+            workflow_name: Name of the workflow
+
+        Returns:
+            The flow function if found in registry, None otherwise
+        """
+        if workflow_name not in self.registry:
+            logger.error(
+                f"Workflow '{workflow_name}' not found in registry. "
+                f"Available workflows: {list(self.registry.keys())}"
+            )
+            return None
+
+        try:
+            from toolbox.workflows.registry import get_workflow_flow
+            flow_func = get_workflow_flow(workflow_name)
+            logger.debug(f"Retrieved flow function for '{workflow_name}' from registry")
+            return flow_func
+        except Exception as e:
+            logger.error(f"Failed to get flow function for '{workflow_name}': {e}")
+            return None
+
+    def get_registry_info(self, workflow_name: str) -> Optional[Dict[str, Any]]:
+        """
+        Get registry information for a workflow.
+
+        Args:
+            workflow_name: Name of the workflow
+
+        Returns:
+            Registry information if found, None otherwise
+        """
+        if workflow_name not in self.registry:
+            return None
+
+        try:
+            from toolbox.workflows.registry import get_workflow_info
+            return get_workflow_info(workflow_name)
+        except Exception as e:
+            logger.error(f"Failed to get registry info for '{workflow_name}': {e}")
+            return None
+
+    @staticmethod
+    def get_metadata_schema() -> Dict[str, Any]:
+        """
+        Get the JSON schema for workflow metadata.
+
+        Returns:
+            JSON schema dictionary
+        """
+        return {
+            "type": "object",
+            "required": ["name", "version", "description", "author", "category", "parameters", "requirements"],
+            "properties": {
+                "name": {
+                    "type": "string",
+                    "description": "Workflow name"
+                },
+                "version": {
+                    "type": "string",
+                    "pattern": "^\\d+\\.\\d+\\.\\d+$",
+                    "description": "Semantic version (x.y.z)"
+                },
+                "description": {
+                    "type": "string",
+                    "description": "Workflow description"
+                },
+                "author": {
+                    "type": "string",
+                    "description": "Workflow author"
+                },
+                "category": {
+                    "type": "string",
+                    "enum": ["comprehensive", "specialized", "fuzzing", "focused"],
+                    "description": "Workflow category"
+                },
+                "tags": {
+                    "type": "array",
+                    "items": {"type": "string"},
+                    "description": "Workflow tags for categorization"
+                },
+                "requirements": {
+                    "type": "object",
+                    "required": ["tools", "resources"],
+                    "properties": {
+                        "tools": {
+                            "type": "array",
+                            "items": {"type": "string"},
+                            "description": "Required security tools"
+                        },
+                        "resources": {
+                            "type": "object",
+                            "required": ["memory", "cpu", "timeout"],
+                            "properties": {
+                                "memory": {
+                                    "type": "string",
+                                    "pattern": "^\\d+[GMK]i$",
+                                    "description": "Memory limit (e.g., 1Gi, 512Mi)"
+                                },
+                                "cpu": {
+                                    "type": "string",
+                                    "pattern": "^\\d+m?$",
+                                    "description": "CPU limit (e.g., 1000m, 2)"
+                                },
+                                "timeout": {
+                                    "type": "integer",
+                                    "minimum": 60,
+                                    "maximum": 7200,
+                                    "description": "Workflow timeout in seconds"
+                                }
+                            }
+                        }
+                    }
+                },
+                "parameters": {
+                    "type": "object",
+                    "description": "Workflow parameters schema"
+                },
+                "default_parameters": {
+                    "type": "object",
+                    "description": "Default parameter values"
+                },
+                "required_modules": {
+                    "type": "array",
+                    "items": {"type": "string"},
+                    "description": "Required module names"
+                },
+                "supported_volume_modes": {
+                    "type": "array",
+                    "items": {"enum": ["ro", "rw"]},
+                    "default": ["ro", "rw"],
+                    "description": "Supported volume mount modes"
+                },
+                "flow_function": {
+                    "type": "string",
+                    "default": "main_flow",
+                    "description": "Name of the flow function in workflow.py"
+                }
+            }
+        }
--- a/backend/src/main.py
+++ b/backend/src/main.py
@@ -12,6 +12,7 @@
 import asyncio
 import logging
 import os
+from uuid import UUID
 from contextlib import AsyncExitStack, asynccontextmanager, suppress
 from typing import Any, Dict, Optional, List

@@ -22,20 +23,31 @@ from starlette.routing import Mount

 from fastmcp.server.http import create_sse_app

-from src.temporal.manager import TemporalManager
-from src.core.setup import setup_result_storage, validate_infrastructure
-from src.api import workflows, runs, fuzzing, system
+from src.core.prefect_manager import PrefectManager
+from src.core.setup import setup_docker_pool, setup_result_storage, validate_infrastructure
+from src.core.workflow_discovery import WorkflowDiscovery
+from src.api import workflows, runs, fuzzing
+from src.services.prefect_stats_monitor import prefect_stats_monitor

 from fastmcp import FastMCP
+from prefect.client.orchestration import get_client
+from prefect.client.schemas.filters import (
+    FlowRunFilter,
+    FlowRunFilterDeploymentId,
+    FlowRunFilterState,
+    FlowRunFilterStateType,
+)
+from prefect.client.schemas.sorting import FlowRunSort
+from prefect.states import StateType

 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)

-temporal_mgr = TemporalManager()
+prefect_mgr = PrefectManager()


-class TemporalBootstrapState:
-    """Tracks Temporal initialization progress for API and MCP consumers."""
+class PrefectBootstrapState:
+    """Tracks Prefect initialization progress for API and MCP consumers."""

    def __init__(self) -> None:
        self.ready: bool = False
@@ -52,19 +64,19 @@ class TemporalBootstrapState:
        }


-temporal_bootstrap_state = TemporalBootstrapState()
+prefect_bootstrap_state = PrefectBootstrapState()

-# Configure retry strategy for bootstrapping Temporal + infrastructure
+# Configure retry strategy for bootstrapping Prefect + infrastructure
 STARTUP_RETRY_SECONDS = max(1, int(os.getenv("FUZZFORGE_STARTUP_RETRY_SECONDS", "5")))
 STARTUP_RETRY_MAX_SECONDS = max(
    STARTUP_RETRY_SECONDS,
    int(os.getenv("FUZZFORGE_STARTUP_RETRY_MAX_SECONDS", "60")),
 )

-temporal_bootstrap_task: Optional[asyncio.Task] = None
+prefect_bootstrap_task: Optional[asyncio.Task] = None

 # ---------------------------------------------------------------------------
-# FastAPI application (REST API)
+# FastAPI application (REST API remains unchanged)
 # ---------------------------------------------------------------------------

 app = FastAPI(
@@ -76,22 +88,22 @@ app = FastAPI(
 app.include_router(workflows.router)
 app.include_router(runs.router)
 app.include_router(fuzzing.router)
-app.include_router(system.router)


-def get_temporal_status() -> Dict[str, Any]:
-    """Return a snapshot of Temporal bootstrap state for diagnostics."""
-    status = temporal_bootstrap_state.as_dict()
-    status["workflows_loaded"] = len(temporal_mgr.workflows)
+def get_prefect_status() -> Dict[str, Any]:
+    """Return a snapshot of Prefect bootstrap state for diagnostics."""
+    status = prefect_bootstrap_state.as_dict()
+    status["workflows_loaded"] = len(prefect_mgr.workflows)
+    status["deployments_tracked"] = len(prefect_mgr.deployments)
    status["bootstrap_task_running"] = (
-        temporal_bootstrap_task is not None and not temporal_bootstrap_task.done()
+        prefect_bootstrap_task is not None and not prefect_bootstrap_task.done()
    )
    return status


-def _temporal_not_ready_status() -> Optional[Dict[str, Any]]:
-    """Return status details if Temporal is not ready yet."""
-    status = get_temporal_status()
+def _prefect_not_ready_status() -> Optional[Dict[str, Any]]:
+    """Return status details if Prefect is not ready yet."""
+    status = get_prefect_status()
    if status.get("ready"):
        return None
    return status
@@ -99,19 +111,19 @@ def _temporal_not_ready_status() -> Optional[Dict[str, Any]]:

@app.get("/")
 async def root() -> Dict[str, Any]:
-    status = get_temporal_status()
+    status = get_prefect_status()
    return {
        "name": "FuzzForge API",
        "version": "0.6.0",
        "status": "ready" if status.get("ready") else "initializing",
        "workflows_loaded": status.get("workflows_loaded", 0),
-        "temporal": status,
+        "prefect": status,
    }


@app.get("/health")
 async def health() -> Dict[str, str]:
-    status = get_temporal_status()
+    status = get_prefect_status()
    health_status = "healthy" if status.get("ready") else "initializing"
    return {"status": health_status}

@@ -153,66 +165,78 @@ _fastapi_mcp_imported = False
 mcp = FastMCP(name="FuzzForge MCP")


-async def _bootstrap_temporal_with_retries() -> None:
-    """Initialize Temporal infrastructure with exponential backoff retries."""
+async def _bootstrap_prefect_with_retries() -> None:
+    """Initialize Prefect infrastructure with exponential backoff retries."""

    attempt = 0

    while True:
        attempt += 1
-        temporal_bootstrap_state.task_running = True
-        temporal_bootstrap_state.status = "starting"
-        temporal_bootstrap_state.ready = False
-        temporal_bootstrap_state.last_error = None
+        prefect_bootstrap_state.task_running = True
+        prefect_bootstrap_state.status = "starting"
+        prefect_bootstrap_state.ready = False
+        prefect_bootstrap_state.last_error = None

        try:
-            logger.info("Bootstrapping Temporal infrastructure...")
+            logger.info("Bootstrapping Prefect infrastructure...")
            await validate_infrastructure()
+            await setup_docker_pool()
            await setup_result_storage()
-            await temporal_mgr.initialize()
+            await prefect_mgr.initialize()
+            await prefect_stats_monitor.start_monitoring()

-            temporal_bootstrap_state.ready = True
-            temporal_bootstrap_state.status = "ready"
-            temporal_bootstrap_state.task_running = False
-            logger.info("Temporal infrastructure ready")
+            prefect_bootstrap_state.ready = True
+            prefect_bootstrap_state.status = "ready"
+            prefect_bootstrap_state.task_running = False
+            logger.info("Prefect infrastructure ready")
            return

        except asyncio.CancelledError:
-            temporal_bootstrap_state.status = "cancelled"
-            temporal_bootstrap_state.task_running = False
-            logger.info("Temporal bootstrap task cancelled")
+            prefect_bootstrap_state.status = "cancelled"
+            prefect_bootstrap_state.task_running = False
+            logger.info("Prefect bootstrap task cancelled")
            raise

        except Exception as exc:  # pragma: no cover - defensive logging on infra startup
-            logger.exception("Temporal bootstrap failed")
-            temporal_bootstrap_state.ready = False
-            temporal_bootstrap_state.status = "error"
-            temporal_bootstrap_state.last_error = str(exc)
+            logger.exception("Prefect bootstrap failed")
+            prefect_bootstrap_state.ready = False
+            prefect_bootstrap_state.status = "error"
+            prefect_bootstrap_state.last_error = str(exc)

            # Ensure partial initialization does not leave stale state behind
-            temporal_mgr.workflows.clear()
+            prefect_mgr.workflows.clear()
+            prefect_mgr.deployments.clear()
+            await prefect_stats_monitor.stop_monitoring()

            wait_time = min(
                STARTUP_RETRY_SECONDS * (2 ** (attempt - 1)),
                STARTUP_RETRY_MAX_SECONDS,
            )
-            logger.info("Retrying Temporal bootstrap in %s second(s)", wait_time)
+            logger.info("Retrying Prefect bootstrap in %s second(s)", wait_time)

            try:
                await asyncio.sleep(wait_time)
            except asyncio.CancelledError:
-                temporal_bootstrap_state.status = "cancelled"
-                temporal_bootstrap_state.task_running = False
+                prefect_bootstrap_state.status = "cancelled"
+                prefect_bootstrap_state.task_running = False
                raise


 def _lookup_workflow(workflow_name: str):
-    info = temporal_mgr.workflows.get(workflow_name)
+    info = prefect_mgr.workflows.get(workflow_name)
    if not info:
        return None
    metadata = info.metadata
    defaults = metadata.get("default_parameters", {})
    default_target_path = metadata.get("default_target_path") or defaults.get("target_path")
+    supported_modes = metadata.get("supported_volume_modes") or ["ro", "rw"]
+    if not isinstance(supported_modes, list) or not supported_modes:
+        supported_modes = ["ro", "rw"]
+    default_volume_mode = (
+        metadata.get("default_volume_mode")
+        or defaults.get("volume_mode")
+        or supported_modes[0]
+    )
    return {
        "name": workflow_name,
        "version": metadata.get("version", "0.6.0"),
@@ -222,23 +246,26 @@ def _lookup_workflow(workflow_name: str):
        "parameters": metadata.get("parameters", {}),
        "default_parameters": metadata.get("default_parameters", {}),
        "required_modules": metadata.get("required_modules", []),
-        "default_target_path": default_target_path
+        "supported_volume_modes": supported_modes,
+        "default_target_path": default_target_path,
+        "default_volume_mode": default_volume_mode,
+        "has_custom_docker": bool(info.has_docker),
    }


@mcp.tool
 async def list_workflows_mcp() -> Dict[str, Any]:
    """List all discovered workflows and their metadata summary."""
-    not_ready = _temporal_not_ready_status()
+    not_ready = _prefect_not_ready_status()
    if not_ready:
        return {
            "workflows": [],
-            "temporal": not_ready,
-            "message": "Temporal infrastructure is still initializing",
+            "prefect": not_ready,
+            "message": "Prefect infrastructure is still initializing",
        }

    workflows_summary = []
-    for name, info in temporal_mgr.workflows.items():
+    for name, info in prefect_mgr.workflows.items():
        metadata = info.metadata
        defaults = metadata.get("default_parameters", {})
        workflows_summary.append({
@@ -247,20 +274,25 @@ async def list_workflows_mcp() -> Dict[str, Any]:
            "description": metadata.get("description", ""),
            "author": metadata.get("author"),
            "tags": metadata.get("tags", []),
+            "supported_volume_modes": metadata.get("supported_volume_modes", ["ro", "rw"]),
+            "default_volume_mode": metadata.get("default_volume_mode")
+            or defaults.get("volume_mode")
+            or "ro",
            "default_target_path": metadata.get("default_target_path")
-            or defaults.get("target_path")
+            or defaults.get("target_path"),
+            "has_custom_docker": bool(info.has_docker),
        })
-    return {"workflows": workflows_summary, "temporal": get_temporal_status()}
+    return {"workflows": workflows_summary, "prefect": get_prefect_status()}


@mcp.tool
 async def get_workflow_metadata_mcp(workflow_name: str) -> Dict[str, Any]:
    """Fetch detailed metadata for a workflow."""
-    not_ready = _temporal_not_ready_status()
+    not_ready = _prefect_not_ready_status()
    if not_ready:
        return {
-            "error": "Temporal infrastructure not ready",
-            "temporal": not_ready,
+            "error": "Prefect infrastructure not ready",
+            "prefect": not_ready,
        }

    data = _lookup_workflow(workflow_name)
@@ -272,11 +304,11 @@ async def get_workflow_metadata_mcp(workflow_name: str) -> Dict[str, Any]:
@mcp.tool
 async def get_workflow_parameters_mcp(workflow_name: str) -> Dict[str, Any]:
    """Return the parameter schema and defaults for a workflow."""
-    not_ready = _temporal_not_ready_status()
+    not_ready = _prefect_not_ready_status()
    if not_ready:
        return {
-            "error": "Temporal infrastructure not ready",
-            "temporal": not_ready,
+            "error": "Prefect infrastructure not ready",
+            "prefect": not_ready,
        }

    data = _lookup_workflow(workflow_name)
@@ -291,41 +323,72 @@ async def get_workflow_parameters_mcp(workflow_name: str) -> Dict[str, Any]:
@mcp.tool
 async def get_workflow_metadata_schema_mcp() -> Dict[str, Any]:
    """Return the JSON schema describing workflow metadata files."""
-    from src.temporal.discovery import WorkflowDiscovery
    return WorkflowDiscovery.get_metadata_schema()


@mcp.tool
 async def submit_security_scan_mcp(
    workflow_name: str,
-    target_id: str,
+    target_path: str | None = None,
+    volume_mode: str | None = None,
    parameters: Dict[str, Any] | None = None,
 ) -> Dict[str, Any] | Dict[str, str]:
-    """Submit a Temporal workflow via MCP."""
+    """Submit a Prefect workflow via MCP."""
    try:
-        not_ready = _temporal_not_ready_status()
+        not_ready = _prefect_not_ready_status()
        if not_ready:
            return {
-                "error": "Temporal infrastructure not ready",
-                "temporal": not_ready,
+                "error": "Prefect infrastructure not ready",
+                "prefect": not_ready,
            }

-        workflow_info = temporal_mgr.workflows.get(workflow_name)
+        workflow_info = prefect_mgr.workflows.get(workflow_name)
        if not workflow_info:
            return {"error": f"Workflow '{workflow_name}' not found"}

        metadata = workflow_info.metadata or {}
        defaults = metadata.get("default_parameters", {})

+        resolved_target_path = target_path or metadata.get("default_target_path") or defaults.get("target_path")
+        if not resolved_target_path:
+            return {
+                "error": (
+                    "target_path is required and no default_target_path is defined in metadata"
+                ),
+                "metadata": {
+                    "workflow": workflow_name,
+                    "default_target_path": metadata.get("default_target_path"),
+                },
+            }
+
+        requested_volume_mode = volume_mode or metadata.get("default_volume_mode") or defaults.get("volume_mode")
+        if not requested_volume_mode:
+            requested_volume_mode = "ro"
+
+        normalised_volume_mode = (
+            str(requested_volume_mode).strip().lower().replace("-", "_")
+        )
+        if normalised_volume_mode in {"read_only", "readonly", "ro"}:
+            normalised_volume_mode = "ro"
+        elif normalised_volume_mode in {"read_write", "readwrite", "rw"}:
+            normalised_volume_mode = "rw"
+        else:
+            supported_modes = metadata.get("supported_volume_modes", ["ro", "rw"])
+            if isinstance(supported_modes, list) and normalised_volume_mode in supported_modes:
+                pass
+            else:
+                normalised_volume_mode = "ro"
+
        parameters = parameters or {}
+
        cleaned_parameters: Dict[str, Any] = {**defaults, **parameters}

-        # Ensure *_config structures default to dicts
+        # Ensure *_config structures default to dicts so Prefect validation passes.
        for key, value in list(cleaned_parameters.items()):
            if isinstance(key, str) and key.endswith("_config") and value is None:
                cleaned_parameters[key] = {}

-        # Some workflows expect configuration dictionaries even when omitted
+        # Some workflows expect configuration dictionaries even when omitted.
        parameter_definitions = (
            metadata.get("parameters", {}).get("properties", {})
            if isinstance(metadata.get("parameters"), dict)
@@ -340,19 +403,20 @@ async def submit_security_scan_mcp(
            elif cleaned_parameters[key] is None:
                cleaned_parameters[key] = {}

-        # Start workflow
-        handle = await temporal_mgr.run_workflow(
+        flow_run = await prefect_mgr.submit_workflow(
            workflow_name=workflow_name,
-            target_id=target_id,
-            workflow_params=cleaned_parameters,
+            target_path=resolved_target_path,
+            volume_mode=normalised_volume_mode,
+            parameters=cleaned_parameters,
        )

        return {
-            "run_id": handle.id,
-            "status": "RUNNING",
+            "run_id": str(flow_run.id),
+            "status": flow_run.state.name if flow_run.state else "PENDING",
            "workflow": workflow_name,
            "message": f"Workflow '{workflow_name}' submitted successfully",
-            "target_id": target_id,
+            "target_path": resolved_target_path,
+            "volume_mode": normalised_volume_mode,
            "parameters": cleaned_parameters,
            "mcp_enabled": True,
        }
@@ -363,38 +427,43 @@ async def submit_security_scan_mcp(

@mcp.tool
 async def get_comprehensive_scan_summary(run_id: str) -> Dict[str, Any] | Dict[str, str]:
-    """Return a summary for the given workflow run via MCP."""
+    """Return a summary for the given flow run via MCP."""
    try:
-        not_ready = _temporal_not_ready_status()
+        not_ready = _prefect_not_ready_status()
        if not_ready:
            return {
-                "error": "Temporal infrastructure not ready",
-                "temporal": not_ready,
+                "error": "Prefect infrastructure not ready",
+                "prefect": not_ready,
            }

-        status = await temporal_mgr.get_workflow_status(run_id)
+        status = await prefect_mgr.get_flow_run_status(run_id)
+        findings = await prefect_mgr.get_flow_run_findings(run_id)
+
+        workflow_name = "unknown"
+        deployment_id = status.get("workflow", "")
+        for name, deployment in prefect_mgr.deployments.items():
+            if str(deployment) == str(deployment_id):
+                workflow_name = name
+                break

-        # Try to get result if completed
        total_findings = 0
        severity_summary = {"critical": 0, "high": 0, "medium": 0, "low": 0, "info": 0}

-        if status.get("status") == "COMPLETED":
-            try:
-                result = await temporal_mgr.get_workflow_result(run_id)
-                if isinstance(result, dict):
-                    summary = result.get("summary", {})
-                    total_findings = summary.get("total_findings", 0)
-            except Exception as e:
-                logger.debug(f"Could not retrieve result for {run_id}: {e}")
+        if findings and "sarif" in findings:
+            sarif = findings["sarif"]
+            if isinstance(sarif, dict):
+                total_findings = sarif.get("total_findings", 0)

        return {
            "run_id": run_id,
-            "workflow": "unknown",  # Temporal doesn't track workflow name in status
+            "workflow": workflow_name,
            "status": status.get("status", "unknown"),
-            "is_completed": status.get("status") == "COMPLETED",
+            "is_completed": status.get("is_completed", False),
            "total_findings": total_findings,
            "severity_summary": severity_summary,
-            "scan_duration": status.get("close_time", "In progress"),
+            "scan_duration": status.get("updated_at", "")
+            if status.get("is_completed")
+            else "In progress",
            "recommendations": (
                [
                    "Review high and critical severity findings first",
@@ -413,26 +482,32 @@ async def get_comprehensive_scan_summary(run_id: str) -> Dict[str, Any] | Dict[s

@mcp.tool
 async def get_run_status_mcp(run_id: str) -> Dict[str, Any]:
-    """Return current status information for a Temporal run."""
+    """Return current status information for a Prefect run."""
    try:
-        not_ready = _temporal_not_ready_status()
+        not_ready = _prefect_not_ready_status()
        if not_ready:
            return {
-                "error": "Temporal infrastructure not ready",
-                "temporal": not_ready,
+                "error": "Prefect infrastructure not ready",
+                "prefect": not_ready,
            }

-        status = await temporal_mgr.get_workflow_status(run_id)
+        status = await prefect_mgr.get_flow_run_status(run_id)
+        workflow_name = "unknown"
+        deployment_id = status.get("workflow", "")
+        for name, deployment in prefect_mgr.deployments.items():
+            if str(deployment) == str(deployment_id):
+                workflow_name = name
+                break

        return {
-            "run_id": run_id,
-            "workflow": "unknown",
+            "run_id": status["run_id"],
+            "workflow": workflow_name,
            "status": status["status"],
-            "is_completed": status["status"] in ["COMPLETED", "FAILED", "CANCELLED"],
-            "is_failed": status["status"] == "FAILED",
-            "is_running": status["status"] == "RUNNING",
-            "created_at": status.get("start_time"),
-            "updated_at": status.get("close_time") or status.get("execution_time"),
+            "is_completed": status["is_completed"],
+            "is_failed": status["is_failed"],
+            "is_running": status["is_running"],
+            "created_at": status["created_at"],
+            "updated_at": status["updated_at"],
        }
    except Exception as exc:
        logger.exception("MCP run status failed")
@@ -443,30 +518,38 @@ async def get_run_status_mcp(run_id: str) -> Dict[str, Any]:
 async def get_run_findings_mcp(run_id: str) -> Dict[str, Any]:
    """Return SARIF findings for a completed run."""
    try:
-        not_ready = _temporal_not_ready_status()
+        not_ready = _prefect_not_ready_status()
        if not_ready:
            return {
-                "error": "Temporal infrastructure not ready",
-                "temporal": not_ready,
+                "error": "Prefect infrastructure not ready",
+                "prefect": not_ready,
            }

-        status = await temporal_mgr.get_workflow_status(run_id)
-        if status.get("status") != "COMPLETED":
+        status = await prefect_mgr.get_flow_run_status(run_id)
+        if not status.get("is_completed"):
            return {"error": f"Run {run_id} not completed. Status: {status.get('status')}"}

-        result = await temporal_mgr.get_workflow_result(run_id)
+        findings = await prefect_mgr.get_flow_run_findings(run_id)
+
+        workflow_name = "unknown"
+        deployment_id = status.get("workflow", "")
+        for name, deployment in prefect_mgr.deployments.items():
+            if str(deployment) == str(deployment_id):
+                workflow_name = name
+                break

        metadata = {
-            "completion_time": status.get("close_time"),
+            "completion_time": status.get("updated_at"),
            "workflow_version": "unknown",
        }
-
-        sarif = result.get("sarif", {}) if isinstance(result, dict) else {}
+        info = prefect_mgr.workflows.get(workflow_name)
+        if info:
+            metadata["workflow_version"] = info.metadata.get("version", "unknown")

        return {
-            "workflow": "unknown",
+            "workflow": workflow_name,
            "run_id": run_id,
-            "sarif": sarif,
+            "sarif": findings,
            "metadata": metadata,
        }
    except Exception as exc:
@@ -478,15 +561,16 @@ async def get_run_findings_mcp(run_id: str) -> Dict[str, Any]:
 async def list_recent_runs_mcp(
    limit: int = 10,
    workflow_name: str | None = None,
+    states: List[str] | None = None,
 ) -> Dict[str, Any]:
-    """List recent Temporal runs with optional workflow filter."""
+    """List recent Prefect runs with optional workflow/state filters."""

-    not_ready = _temporal_not_ready_status()
+    not_ready = _prefect_not_ready_status()
    if not_ready:
        return {
            "runs": [],
-            "temporal": not_ready,
-            "message": "Temporal infrastructure is still initializing",
+            "prefect": not_ready,
+            "message": "Prefect infrastructure is still initializing",
        }

    try:
@@ -495,49 +579,116 @@ async def list_recent_runs_mcp(
        limit_value = 10
    limit_value = max(1, min(limit_value, 100))

-    try:
-        # Build filter query
-        filter_query = None
-        if workflow_name:
-            workflow_info = temporal_mgr.workflows.get(workflow_name)
-            if workflow_info:
-                filter_query = f'WorkflowType="{workflow_info.workflow_type}"'
+    deployment_map = {
+        str(deployment_id): workflow
+        for workflow, deployment_id in prefect_mgr.deployments.items()
+    }

-        workflows = await temporal_mgr.list_workflows(filter_query, limit_value)
+    deployment_filter_value = None
+    if workflow_name:
+        deployment_id = prefect_mgr.deployments.get(workflow_name)
+        if not deployment_id:
+            return {
+                "runs": [],
+                "prefect": get_prefect_status(),
+                "error": f"Workflow '{workflow_name}' has no registered deployment",
+            }
+        try:
+            deployment_filter_value = UUID(str(deployment_id))
+        except ValueError:
+            return {
+                "runs": [],
+                "prefect": get_prefect_status(),
+                "error": (
+                    f"Deployment id '{deployment_id}' for workflow '{workflow_name}' is invalid"
+                ),
+            }

-        results: List[Dict[str, Any]] = []
-        for wf in workflows:
-            results.append({
-                "run_id": wf["workflow_id"],
-                "workflow": workflow_name or "unknown",
-                "state": wf["status"],
-                "state_type": wf["status"],
-                "is_completed": wf["status"] in ["COMPLETED", "FAILED", "CANCELLED"],
-                "is_running": wf["status"] == "RUNNING",
-                "is_failed": wf["status"] == "FAILED",
-                "created_at": wf.get("start_time"),
-                "updated_at": wf.get("close_time"),
-            })
+    desired_state_types: List[StateType] = []
+    if states:
+        for raw_state in states:
+            if not raw_state:
+                continue
+            normalised = raw_state.strip().upper()
+            if normalised == "ALL":
+                desired_state_types = []
+                break
+            try:
+                desired_state_types.append(StateType[normalised])
+            except KeyError:
+                continue
+    if not desired_state_types:
+        desired_state_types = [
+            StateType.RUNNING,
+            StateType.COMPLETED,
+            StateType.FAILED,
+            StateType.CANCELLED,
+        ]

-        return {"runs": results, "temporal": get_temporal_status()}
+    flow_filter = FlowRunFilter()
+    if desired_state_types:
+        flow_filter.state = FlowRunFilterState(
+            type=FlowRunFilterStateType(any_=desired_state_types)
+        )
+    if deployment_filter_value:
+        flow_filter.deployment_id = FlowRunFilterDeploymentId(
+            any_=[deployment_filter_value]
+        )

-    except Exception as exc:
-        logger.exception("Failed to list runs")
-        return {
-            "runs": [],
-            "temporal": get_temporal_status(),
-            "error": str(exc)
-        }
+    async with get_client() as client:
+        flow_runs = await client.read_flow_runs(
+            limit=limit_value,
+            flow_run_filter=flow_filter,
+            sort=FlowRunSort.START_TIME_DESC,
+        )
+
+    results: List[Dict[str, Any]] = []
+    for flow_run in flow_runs:
+        deployment_id = getattr(flow_run, "deployment_id", None)
+        workflow = deployment_map.get(str(deployment_id), "unknown")
+        state = getattr(flow_run, "state", None)
+        state_name = getattr(state, "name", None) if state else None
+        state_type = getattr(state, "type", None) if state else None
+
+        results.append(
+            {
+                "run_id": str(flow_run.id),
+                "workflow": workflow,
+                "deployment_id": str(deployment_id) if deployment_id else None,
+                "state": state_name or (state_type.name if state_type else None),
+                "state_type": state_type.name if state_type else None,
+                "is_completed": bool(getattr(state, "is_completed", lambda: False)()),
+                "is_running": bool(getattr(state, "is_running", lambda: False)()),
+                "is_failed": bool(getattr(state, "is_failed", lambda: False)()),
+                "created_at": getattr(flow_run, "created", None),
+                "updated_at": getattr(flow_run, "updated", None),
+                "expected_start_time": getattr(flow_run, "expected_start_time", None),
+                "start_time": getattr(flow_run, "start_time", None),
+            }
+        )
+
+    # Normalise datetimes to ISO 8601 strings for serialization
+    for entry in results:
+        for key in ("created_at", "updated_at", "expected_start_time", "start_time"):
+            value = entry.get(key)
+            if value is None:
+                continue
+            try:
+                entry[key] = value.isoformat()
+            except AttributeError:
+                entry[key] = str(value)
+
+    return {"runs": results, "prefect": get_prefect_status()}


@mcp.tool
 async def get_fuzzing_stats_mcp(run_id: str) -> Dict[str, Any]:
    """Return fuzzing statistics for a run if available."""
-    not_ready = _temporal_not_ready_status()
+    not_ready = _prefect_not_ready_status()
    if not_ready:
        return {
-            "error": "Temporal infrastructure not ready",
-            "temporal": not_ready,
+            "error": "Prefect infrastructure not ready",
+            "prefect": not_ready,
        }

    stats = fuzzing.fuzzing_stats.get(run_id)
@@ -557,11 +708,11 @@ async def get_fuzzing_stats_mcp(run_id: str) -> Dict[str, Any]:
@mcp.tool
 async def get_fuzzing_crash_reports_mcp(run_id: str) -> Dict[str, Any]:
    """Return crash reports collected for a fuzzing run."""
-    not_ready = _temporal_not_ready_status()
+    not_ready = _prefect_not_ready_status()
    if not_ready:
        return {
-            "error": "Temporal infrastructure not ready",
-            "temporal": not_ready,
+            "error": "Prefect infrastructure not ready",
+            "prefect": not_ready,
        }

    reports = fuzzing.crash_reports.get(run_id)
@@ -574,11 +725,11 @@ async def get_fuzzing_crash_reports_mcp(run_id: str) -> Dict[str, Any]:
 async def get_backend_status_mcp() -> Dict[str, Any]:
    """Expose backend readiness, workflows, and registered MCP tools."""

-    status = get_temporal_status()
-    response: Dict[str, Any] = {"temporal": status}
+    status = get_prefect_status()
+    response: Dict[str, Any] = {"prefect": status}

    if status.get("ready"):
-        response["workflows"] = list(temporal_mgr.workflows.keys())
+        response["workflows"] = list(prefect_mgr.workflows.keys())

    try:
        tools = await mcp._tool_manager.list_tools()
@@ -624,12 +775,12 @@ def create_mcp_transport_app() -> Starlette:


 # ---------------------------------------------------------------------------
-# Combined lifespan: Temporal init + dedicated MCP transports
+# Combined lifespan: Prefect init + dedicated MCP transports
 # ---------------------------------------------------------------------------

@asynccontextmanager
 async def combined_lifespan(app: FastAPI):
-    global temporal_bootstrap_task, _fastapi_mcp_imported
+    global prefect_bootstrap_task, _fastapi_mcp_imported

    logger.info("Starting FuzzForge backend...")

@@ -642,12 +793,12 @@ async def combined_lifespan(app: FastAPI):
        except Exception as exc:
            logger.exception("Failed to import FastAPI endpoints into MCP", exc_info=exc)

-    # Kick off Temporal bootstrap in the background if needed
-    if temporal_bootstrap_task is None or temporal_bootstrap_task.done():
-        temporal_bootstrap_task = asyncio.create_task(_bootstrap_temporal_with_retries())
-        logger.info("Temporal bootstrap task started")
+    # Kick off Prefect bootstrap in the background if needed
+    if prefect_bootstrap_task is None or prefect_bootstrap_task.done():
+        prefect_bootstrap_task = asyncio.create_task(_bootstrap_prefect_with_retries())
+        logger.info("Prefect bootstrap task started")
    else:
-        logger.info("Temporal bootstrap task already running")
+        logger.info("Prefect bootstrap task already running")

    # Start MCP transports on shared port (HTTP + SSE)
    mcp_app = create_mcp_transport_app()
@@ -695,17 +846,18 @@ async def combined_lifespan(app: FastAPI):
        mcp_server.force_exit = True
        await asyncio.gather(mcp_task, return_exceptions=True)

-        if temporal_bootstrap_task and not temporal_bootstrap_task.done():
-            temporal_bootstrap_task.cancel()
+        if prefect_bootstrap_task and not prefect_bootstrap_task.done():
+            prefect_bootstrap_task.cancel()
            with suppress(asyncio.CancelledError):
-                await temporal_bootstrap_task
-        temporal_bootstrap_state.task_running = False
-        if not temporal_bootstrap_state.ready:
-            temporal_bootstrap_state.status = "stopped"
-        temporal_bootstrap_task = None
+                await prefect_bootstrap_task
+        prefect_bootstrap_state.task_running = False
+        if not prefect_bootstrap_state.ready:
+            prefect_bootstrap_state.status = "stopped"
+            prefect_bootstrap_state.next_retry_seconds = None
+        prefect_bootstrap_task = None

-        # Close Temporal client
-        await temporal_mgr.close()
+        logger.info("Shutting down Prefect statistics monitor...")
+        await prefect_stats_monitor.stop_monitoring()
        logger.info("Shutting down FuzzForge backend...")


--- a/backend/src/models/findings.py
+++ b/backend/src/models/findings.py
@@ -13,9 +13,10 @@ Models for workflow findings and submissions
 #
 # Additional attribution and requirements are provided in the NOTICE file.

-from pydantic import BaseModel, Field
-from typing import Dict, Any, Optional, List
+from pydantic import BaseModel, Field, field_validator
+from typing import Dict, Any, Optional, Literal, List
 from datetime import datetime
+from pathlib import Path


 class WorkflowFindings(BaseModel):
@@ -26,13 +27,47 @@ class WorkflowFindings(BaseModel):
    metadata: Dict[str, Any] = Field(default_factory=dict, description="Additional metadata")


-class WorkflowSubmission(BaseModel):
-    """
-    Submit a workflow with configurable settings.
+class ResourceLimits(BaseModel):
+    """Resource limits for workflow execution"""
+    cpu_limit: Optional[str] = Field(None, description="CPU limit (e.g., '2' for 2 cores, '500m' for 0.5 cores)")
+    memory_limit: Optional[str] = Field(None, description="Memory limit (e.g., '1Gi', '512Mi')")
+    cpu_request: Optional[str] = Field(None, description="CPU request (guaranteed)")
+    memory_request: Optional[str] = Field(None, description="Memory request (guaranteed)")

-    Note: This model is deprecated in favor of the /upload-and-submit endpoint
-    which handles file uploads directly.
-    """
+
+class VolumeMount(BaseModel):
+    """Volume mount specification"""
+    host_path: str = Field(..., description="Host path to mount")
+    container_path: str = Field(..., description="Container path for mount")
+    mode: Literal["ro", "rw"] = Field(default="ro", description="Mount mode")
+
+    @field_validator("host_path")
+    @classmethod
+    def validate_host_path(cls, v):
+        """Validate that the host path is absolute (existence checked at runtime)"""
+        path = Path(v)
+        if not path.is_absolute():
+            raise ValueError(f"Host path must be absolute: {v}")
+        # Note: Path existence is validated at workflow runtime
+        # We can't validate existence here as this runs inside Docker container
+        return str(path)
+
+    @field_validator("container_path")
+    @classmethod
+    def validate_container_path(cls, v):
+        """Validate that the container path is absolute"""
+        if not v.startswith('/'):
+            raise ValueError(f"Container path must be absolute: {v}")
+        return v
+
+
+class WorkflowSubmission(BaseModel):
+    """Submit a workflow with configurable settings"""
+    target_path: str = Field(..., description="Absolute path to analyze")
+    volume_mode: Literal["ro", "rw"] = Field(
+        default="ro",
+        description="Volume mount mode: read-only (ro) or read-write (rw)"
+    )
    parameters: Dict[str, Any] = Field(
        default_factory=dict,
        description="Workflow-specific parameters"
@@ -43,6 +78,25 @@ class WorkflowSubmission(BaseModel):
        ge=1,
        le=604800  # Max 7 days to support fuzzing campaigns
    )
+    resource_limits: Optional[ResourceLimits] = Field(
+        None,
+        description="Resource limits for workflow container"
+    )
+    additional_volumes: List[VolumeMount] = Field(
+        default_factory=list,
+        description="Additional volume mounts (e.g., for corpus, output directories)"
+    )
+
+    @field_validator("target_path")
+    @classmethod
+    def validate_path(cls, v):
+        """Validate that the target path is absolute (existence checked at runtime)"""
+        path = Path(v)
+        if not path.is_absolute():
+            raise ValueError(f"Path must be absolute: {v}")
+        # Note: Path existence is validated at workflow runtime when volumes are mounted
+        # We can't validate existence here as this runs inside Docker container
+        return str(path)


 class WorkflowStatus(BaseModel):
@@ -73,6 +127,14 @@ class WorkflowMetadata(BaseModel):
        default_factory=list,
        description="Required module names"
    )
+    supported_volume_modes: List[Literal["ro", "rw"]] = Field(
+        default=["ro", "rw"],
+        description="Supported volume mount modes"
+    )
+    has_custom_docker: bool = Field(
+        default=False,
+        description="Whether workflow has custom Dockerfile"
+    )


 class WorkflowListItem(BaseModel):
--- a/backend/src/services/prefect_stats_monitor.py
+++ b/backend/src/services/prefect_stats_monitor.py
@@ -0,0 +1,394 @@
+"""
+Generic Prefect Statistics Monitor Service
+
+This service monitors ALL workflows for structured live data logging and
+updates the appropriate statistics APIs. Works with any workflow that follows
+the standard LIVE_STATS logging pattern.
+"""
+# Copyright (c) 2025 FuzzingLabs
+#
+# Licensed under the Business Source License 1.1 (BSL). See the LICENSE file
+# at the root of this repository for details.
+#
+# After the Change Date (four years from publication), this version of the
+# Licensed Work will be made available under the Apache License, Version 2.0.
+# See the LICENSE-APACHE file or http://www.apache.org/licenses/LICENSE-2.0
+#
+# Additional attribution and requirements are provided in the NOTICE file.
+
+
+import asyncio
+import json
+import logging
+from datetime import datetime, timedelta, timezone
+from typing import Dict, Any, Optional
+from prefect.client.orchestration import get_client
+from prefect.client.schemas.objects import FlowRun, TaskRun
+from src.models.findings import FuzzingStats
+from src.api.fuzzing import fuzzing_stats, initialize_fuzzing_tracking, active_connections
+
+logger = logging.getLogger(__name__)
+
+
+class PrefectStatsMonitor:
+    """Monitors Prefect flows and tasks for live statistics from any workflow"""
+
+    def __init__(self):
+        self.monitoring = False
+        self.monitor_task = None
+        self.monitored_runs = set()
+        self.last_log_ts: Dict[str, datetime] = {}
+        self._client = None
+        self._client_refresh_time = None
+        self._client_refresh_interval = 300  # Refresh connection every 5 minutes
+
+    async def start_monitoring(self):
+        """Start the Prefect statistics monitoring service"""
+        if self.monitoring:
+            logger.warning("Prefect stats monitor already running")
+            return
+
+        self.monitoring = True
+        self.monitor_task = asyncio.create_task(self._monitor_flows())
+        logger.info("Started Prefect statistics monitor")
+
+    async def stop_monitoring(self):
+        """Stop the monitoring service"""
+        self.monitoring = False
+        if self.monitor_task:
+            self.monitor_task.cancel()
+            try:
+                await self.monitor_task
+            except asyncio.CancelledError:
+                pass
+        logger.info("Stopped Prefect statistics monitor")
+
+    async def _get_or_refresh_client(self):
+        """Get or refresh Prefect client with connection pooling."""
+        now = datetime.now(timezone.utc)
+
+        if (self._client is None or
+            self._client_refresh_time is None or
+            (now - self._client_refresh_time).total_seconds() > self._client_refresh_interval):
+
+            if self._client:
+                try:
+                    await self._client.aclose()
+                except Exception:
+                    pass
+
+            self._client = get_client()
+            self._client_refresh_time = now
+            await self._client.__aenter__()
+
+        return self._client
+
+    async def _monitor_flows(self):
+        """Main monitoring loop that watches Prefect flows"""
+        try:
+            while self.monitoring:
+                try:
+                    # Use connection pooling for better performance
+                    client = await self._get_or_refresh_client()
+
+                    # Get recent flow runs (limit to reduce load)
+                    flow_runs = await client.read_flow_runs(
+                        limit=50,
+                        sort="START_TIME_DESC",
+                    )
+
+                    # Only consider runs from the last 15 minutes
+                    recent_cutoff = datetime.now(timezone.utc) - timedelta(minutes=15)
+                    for flow_run in flow_runs:
+                        created = getattr(flow_run, "created", None)
+                        if created is None:
+                            continue
+                        try:
+                            # Ensure timezone-aware comparison
+                            if created.tzinfo is None:
+                                created = created.replace(tzinfo=timezone.utc)
+                            if created >= recent_cutoff:
+                                await self._monitor_flow_run(client, flow_run)
+                        except Exception:
+                            # If comparison fails, attempt monitoring anyway
+                            await self._monitor_flow_run(client, flow_run)
+
+                    await asyncio.sleep(5)  # Check every 5 seconds
+
+                except Exception as e:
+                    logger.error(f"Error in Prefect monitoring: {e}")
+                    await asyncio.sleep(10)
+
+        except asyncio.CancelledError:
+            logger.info("Prefect monitoring cancelled")
+        except Exception as e:
+            logger.error(f"Fatal error in Prefect monitoring: {e}")
+        finally:
+            # Clean up client on exit
+            if self._client:
+                try:
+                    await self._client.__aexit__(None, None, None)
+                except Exception:
+                    pass
+                self._client = None
+
+    async def _monitor_flow_run(self, client, flow_run: FlowRun):
+        """Monitor a specific flow run for statistics"""
+        run_id = str(flow_run.id)
+        workflow_name = flow_run.name or "unknown"
+
+        try:
+            # Initialize tracking if not exists - only for workflows that might have live stats
+            if run_id not in fuzzing_stats:
+                initialize_fuzzing_tracking(run_id, workflow_name)
+                self.monitored_runs.add(run_id)
+
+            # Skip corrupted entries (should not happen after startup cleanup, but defensive)
+            elif not isinstance(fuzzing_stats[run_id], FuzzingStats):
+                logger.warning(f"Skipping corrupted stats entry for {run_id}, reinitializing")
+                initialize_fuzzing_tracking(run_id, workflow_name)
+                self.monitored_runs.add(run_id)
+
+            # Get task runs for this flow
+            task_runs = await client.read_task_runs(
+                flow_run_filter={"id": {"any_": [flow_run.id]}},
+                limit=25,
+            )
+
+            # Check all tasks for live statistics logging
+            for task_run in task_runs:
+                await self._extract_stats_from_task(client, run_id, task_run, workflow_name)
+
+            # Also scan flow-level logs as a fallback
+            await self._extract_stats_from_flow_logs(client, run_id, flow_run, workflow_name)
+
+        except Exception as e:
+            logger.warning(f"Error monitoring flow run {run_id}: {e}")
+
+    async def _extract_stats_from_task(self, client, run_id: str, task_run: TaskRun, workflow_name: str):
+        """Extract statistics from any task that logs live stats"""
+        try:
+            # Get task run logs
+            logs = await client.read_logs(
+                log_filter={
+                    "task_run_id": {"any_": [task_run.id]}
+                },
+                limit=100,
+                sort="TIMESTAMP_ASC"
+            )
+
+            # Parse logs for LIVE_STATS entries (generic pattern for any workflow)
+            latest_stats = None
+            for log in logs:
+                # Prefer structured extra field if present
+                extra_data = getattr(log, "extra", None) or getattr(log, "extra_fields", None) or None
+                if isinstance(extra_data, dict):
+                    stat_type = extra_data.get("stats_type")
+                    if stat_type in ["fuzzing_live_update", "scan_progress", "analysis_update", "live_stats"]:
+                        latest_stats = extra_data
+                        continue
+
+                # Fallback to parsing from message text
+                if ("FUZZ_STATS" in log.message or "LIVE_STATS" in log.message):
+                    stats = self._parse_stats_from_log(log.message)
+                    if stats:
+                        latest_stats = stats
+
+            # Update statistics if we found any
+            if latest_stats:
+                # Calculate elapsed time from task start
+                elapsed_time = 0
+                if task_run.start_time:
+                    # Ensure timezone-aware arithmetic
+                    now = datetime.now(timezone.utc)
+                    try:
+                        elapsed_time = int((now - task_run.start_time).total_seconds())
+                    except Exception:
+                        # Fallback to naive UTC if types mismatch
+                        elapsed_time = int((datetime.utcnow() - task_run.start_time.replace(tzinfo=None)).total_seconds())
+
+                updated_stats = FuzzingStats(
+                    run_id=run_id,
+                    workflow=workflow_name,
+                    executions=latest_stats.get("executions", 0),
+                    executions_per_sec=latest_stats.get("executions_per_sec", 0.0),
+                    crashes=latest_stats.get("crashes", 0),
+                    unique_crashes=latest_stats.get("unique_crashes", 0),
+                    corpus_size=latest_stats.get("corpus_size", 0),
+                    elapsed_time=elapsed_time
+                )
+
+                # Update the global stats
+                previous = fuzzing_stats.get(run_id)
+                fuzzing_stats[run_id] = updated_stats
+
+                # Broadcast to any active WebSocket clients for this run
+                if active_connections.get(run_id):
+                    # Handle both Pydantic objects and plain dicts
+                    if isinstance(updated_stats, dict):
+                        stats_data = updated_stats
+                    elif hasattr(updated_stats, 'model_dump'):
+                        stats_data = updated_stats.model_dump()
+                    elif hasattr(updated_stats, 'dict'):
+                        stats_data = updated_stats.dict()
+                    else:
+                        stats_data = updated_stats.__dict__
+
+                    message = {
+                        "type": "stats_update",
+                        "data": stats_data,
+                    }
+                    disconnected = []
+                    for ws in active_connections[run_id]:
+                        try:
+                            await ws.send_text(json.dumps(message))
+                        except Exception:
+                            disconnected.append(ws)
+                    # Clean up disconnected sockets
+                    for ws in disconnected:
+                        try:
+                            active_connections[run_id].remove(ws)
+                        except ValueError:
+                            pass
+
+                logger.debug(f"Updated Prefect stats for {run_id}: {updated_stats.executions} execs")
+
+        except Exception as e:
+            logger.warning(f"Error extracting stats from task {task_run.id}: {e}")
+
+    async def _extract_stats_from_flow_logs(self, client, run_id: str, flow_run: FlowRun, workflow_name: str):
+        """Extract statistics by scanning flow-level logs for LIVE/FUZZ stats"""
+        try:
+            logs = await client.read_logs(
+                log_filter={
+                    "flow_run_id": {"any_": [flow_run.id]}
+                },
+                limit=200,
+                sort="TIMESTAMP_ASC"
+            )
+
+            latest_stats = None
+            last_seen = self.last_log_ts.get(run_id)
+            max_ts = last_seen
+
+            for log in logs:
+                # Skip logs we've already processed
+                ts = getattr(log, "timestamp", None)
+                if last_seen and ts and ts <= last_seen:
+                    continue
+                if ts and (max_ts is None or ts > max_ts):
+                    max_ts = ts
+
+                # Prefer structured extra field if available
+                extra_data = getattr(log, "extra", None) or getattr(log, "extra_fields", None) or None
+                if isinstance(extra_data, dict):
+                    stat_type = extra_data.get("stats_type")
+                    if stat_type in ["fuzzing_live_update", "scan_progress", "analysis_update", "live_stats"]:
+                        latest_stats = extra_data
+                        continue
+
+                # Fallback to message parse
+                if ("FUZZ_STATS" in log.message or "LIVE_STATS" in log.message):
+                    stats = self._parse_stats_from_log(log.message)
+                    if stats:
+                        latest_stats = stats
+
+            if max_ts:
+                self.last_log_ts[run_id] = max_ts
+
+            if latest_stats:
+                # Use flow_run timestamps for elapsed time if available
+                elapsed_time = 0
+                start_time = getattr(flow_run, "start_time", None) or getattr(flow_run, "start_time", None)
+                if start_time:
+                    now = datetime.now(timezone.utc)
+                    try:
+                        if start_time.tzinfo is None:
+                            start_time = start_time.replace(tzinfo=timezone.utc)
+                        elapsed_time = int((now - start_time).total_seconds())
+                    except Exception:
+                        elapsed_time = int((datetime.utcnow() - start_time.replace(tzinfo=None)).total_seconds())
+
+                updated_stats = FuzzingStats(
+                    run_id=run_id,
+                    workflow=workflow_name,
+                    executions=latest_stats.get("executions", 0),
+                    executions_per_sec=latest_stats.get("executions_per_sec", 0.0),
+                    crashes=latest_stats.get("crashes", 0),
+                    unique_crashes=latest_stats.get("unique_crashes", 0),
+                    corpus_size=latest_stats.get("corpus_size", 0),
+                    elapsed_time=elapsed_time
+                )
+
+                fuzzing_stats[run_id] = updated_stats
+
+                # Broadcast if listeners exist
+                if active_connections.get(run_id):
+                    # Handle both Pydantic objects and plain dicts
+                    if isinstance(updated_stats, dict):
+                        stats_data = updated_stats
+                    elif hasattr(updated_stats, 'model_dump'):
+                        stats_data = updated_stats.model_dump()
+                    elif hasattr(updated_stats, 'dict'):
+                        stats_data = updated_stats.dict()
+                    else:
+                        stats_data = updated_stats.__dict__
+
+                    message = {
+                        "type": "stats_update",
+                        "data": stats_data,
+                    }
+                    disconnected = []
+                    for ws in active_connections[run_id]:
+                        try:
+                            await ws.send_text(json.dumps(message))
+                        except Exception:
+                            disconnected.append(ws)
+                    for ws in disconnected:
+                        try:
+                            active_connections[run_id].remove(ws)
+                        except ValueError:
+                            pass
+
+        except Exception as e:
+            logger.warning(f"Error extracting stats from flow logs {run_id}: {e}")
+
+    def _parse_stats_from_log(self, log_message: str) -> Optional[Dict[str, Any]]:
+        """Parse statistics from a log message"""
+        try:
+            import re
+
+            # Prefer explicit JSON after marker tokens
+            m = re.search(r'(?:FUZZ_STATS|LIVE_STATS)\s+(\{.*\})', log_message)
+            if m:
+                try:
+                    return json.loads(m.group(1))
+                except Exception:
+                    pass
+
+            # Fallback: Extract the extra= dict and coerce to JSON
+            stats_match = re.search(r'extra=({.*?})', log_message)
+            if not stats_match:
+                return None
+
+            extra_str = stats_match.group(1)
+            extra_str = extra_str.replace("'", '"')
+            extra_str = extra_str.replace('None', 'null')
+            extra_str = extra_str.replace('True', 'true')
+            extra_str = extra_str.replace('False', 'false')
+
+            stats_data = json.loads(extra_str)
+
+            # Support multiple stat types for different workflows
+            stat_type = stats_data.get("stats_type")
+            if stat_type in ["fuzzing_live_update", "scan_progress", "analysis_update", "live_stats"]:
+                return stats_data
+
+        except Exception as e:
+            logger.debug(f"Error parsing log stats: {e}")
+
+        return None
+
+
+# Global instance
+prefect_stats_monitor = PrefectStatsMonitor()
--- a/backend/src/storage/init.py
+++ b/backend/src/storage/init.py
@@ -1,10 +0,0 @@
-"""
-Storage abstraction layer for FuzzForge.
-
-Provides unified interface for storing and retrieving targets and results.
-"""
-
-from .base import StorageBackend
-from .s3_cached import S3CachedStorage
-
-__all__ = ["StorageBackend", "S3CachedStorage"]
--- a/backend/src/storage/base.py
+++ b/backend/src/storage/base.py
@@ -1,153 +0,0 @@
-"""
-Base storage backend interface.
-
-All storage implementations must implement this interface.
-"""
-
-from abc import ABC, abstractmethod
-from pathlib import Path
-from typing import Optional, Dict, Any
-
-
-class StorageBackend(ABC):
-    """
-    Abstract base class for storage backends.
-
-    Implementations handle storage and retrieval of:
-    - Uploaded targets (code, binaries, etc.)
-    - Workflow results
-    - Temporary files
-    """
-
-    @abstractmethod
-    async def upload_target(
-        self,
-        file_path: Path,
-        user_id: str,
-        metadata: Optional[Dict[str, Any]] = None
-    ) -> str:
-        """
-        Upload a target file to storage.
-
-        Args:
-            file_path: Local path to file to upload
-            user_id: ID of user uploading the file
-            metadata: Optional metadata to store with file
-
-        Returns:
-            Target ID (unique identifier for retrieval)
-
-        Raises:
-            FileNotFoundError: If file_path doesn't exist
-            StorageError: If upload fails
-        """
-        pass
-
-    @abstractmethod
-    async def get_target(self, target_id: str) -> Path:
-        """
-        Get target file from storage.
-
-        Args:
-            target_id: Unique identifier from upload_target()
-
-        Returns:
-            Local path to cached file
-
-        Raises:
-            FileNotFoundError: If target doesn't exist
-            StorageError: If download fails
-        """
-        pass
-
-    @abstractmethod
-    async def delete_target(self, target_id: str) -> None:
-        """
-        Delete target from storage.
-
-        Args:
-            target_id: Unique identifier to delete
-
-        Raises:
-            StorageError: If deletion fails (doesn't raise if not found)
-        """
-        pass
-
-    @abstractmethod
-    async def upload_results(
-        self,
-        workflow_id: str,
-        results: Dict[str, Any],
-        results_format: str = "json"
-    ) -> str:
-        """
-        Upload workflow results to storage.
-
-        Args:
-            workflow_id: Workflow execution ID
-            results: Results dictionary
-            results_format: Format (json, sarif, etc.)
-
-        Returns:
-            URL to uploaded results
-
-        Raises:
-            StorageError: If upload fails
-        """
-        pass
-
-    @abstractmethod
-    async def get_results(self, workflow_id: str) -> Dict[str, Any]:
-        """
-        Get workflow results from storage.
-
-        Args:
-            workflow_id: Workflow execution ID
-
-        Returns:
-            Results dictionary
-
-        Raises:
-            FileNotFoundError: If results don't exist
-            StorageError: If download fails
-        """
-        pass
-
-    @abstractmethod
-    async def list_targets(
-        self,
-        user_id: Optional[str] = None,
-        limit: int = 100
-    ) -> list[Dict[str, Any]]:
-        """
-        List uploaded targets.
-
-        Args:
-            user_id: Filter by user ID (None = all users)
-            limit: Maximum number of results
-
-        Returns:
-            List of target metadata dictionaries
-
-        Raises:
-            StorageError: If listing fails
-        """
-        pass
-
-    @abstractmethod
-    async def cleanup_cache(self) -> int:
-        """
-        Clean up local cache (LRU eviction).
-
-        Returns:
-            Number of files removed
-
-        Raises:
-            StorageError: If cleanup fails
-        """
-        pass
-
-
-class StorageError(Exception):
-    """Base exception for storage operations."""
-    pass
--- a/backend/src/storage/s3_cached.py
+++ b/backend/src/storage/s3_cached.py
@@ -1,423 +0,0 @@
-"""
-S3-compatible storage backend with local caching.
-
-Works with MinIO (dev/prod) or AWS S3 (cloud).
-"""
-
-import json
-import logging
-import os
-import shutil
-from datetime import datetime
-from pathlib import Path
-from typing import Optional, Dict, Any
-from uuid import uuid4
-
-import boto3
-from botocore.exceptions import ClientError
-
-from .base import StorageBackend, StorageError
-
-logger = logging.getLogger(__name__)
-
-
-class S3CachedStorage(StorageBackend):
-    """
-    S3-compatible storage with local caching.
-
-    Features:
-    - Upload targets to S3/MinIO
-    - Download with local caching (LRU eviction)
-    - Lifecycle management (auto-cleanup old files)
-    - Metadata tracking
-    """
-
-    def __init__(
-        self,
-        endpoint_url: Optional[str] = None,
-        access_key: Optional[str] = None,
-        secret_key: Optional[str] = None,
-        bucket: str = "targets",
-        region: str = "us-east-1",
-        use_ssl: bool = False,
-        cache_dir: Optional[Path] = None,
-        cache_max_size_gb: int = 10
-    ):
-        """
-        Initialize S3 storage backend.
-
-        Args:
-            endpoint_url: S3 endpoint (None = AWS S3, or MinIO URL)
-            access_key: S3 access key (None = from env)
-            secret_key: S3 secret key (None = from env)
-            bucket: S3 bucket name
-            region: AWS region
-            use_ssl: Use HTTPS
-            cache_dir: Local cache directory
-            cache_max_size_gb: Maximum cache size in GB
-        """
-        # Use environment variables as defaults
-        self.endpoint_url = endpoint_url or os.getenv('S3_ENDPOINT', 'http://minio:9000')
-        self.access_key = access_key or os.getenv('S3_ACCESS_KEY', 'fuzzforge')
-        self.secret_key = secret_key or os.getenv('S3_SECRET_KEY', 'fuzzforge123')
-        self.bucket = bucket or os.getenv('S3_BUCKET', 'targets')
-        self.region = region or os.getenv('S3_REGION', 'us-east-1')
-        self.use_ssl = use_ssl or os.getenv('S3_USE_SSL', 'false').lower() == 'true'
-
-        # Cache configuration
-        self.cache_dir = cache_dir or Path(os.getenv('CACHE_DIR', '/tmp/fuzzforge-cache'))
-        self.cache_max_size = cache_max_size_gb * (1024 ** 3)  # Convert to bytes
-
-        # Ensure cache directory exists
-        self.cache_dir.mkdir(parents=True, exist_ok=True)
-
-        # Initialize S3 client
-        try:
-            self.s3_client = boto3.client(
-                's3',
-                endpoint_url=self.endpoint_url,
-                aws_access_key_id=self.access_key,
-                aws_secret_access_key=self.secret_key,
-                region_name=self.region,
-                use_ssl=self.use_ssl
-            )
-            logger.info(f"Initialized S3 storage: {self.endpoint_url}/{self.bucket}")
-        except Exception as e:
-            logger.error(f"Failed to initialize S3 client: {e}")
-            raise StorageError(f"S3 initialization failed: {e}")
-
-    async def upload_target(
-        self,
-        file_path: Path,
-        user_id: str,
-        metadata: Optional[Dict[str, Any]] = None
-    ) -> str:
-        """Upload target file to S3/MinIO."""
-        if not file_path.exists():
-            raise FileNotFoundError(f"File not found: {file_path}")
-
-        # Generate unique target ID
-        target_id = str(uuid4())
-
-        # Prepare metadata
-        upload_metadata = {
-            'user_id': user_id,
-            'uploaded_at': datetime.now().isoformat(),
-            'filename': file_path.name,
-            'size': str(file_path.stat().st_size)
-        }
-        if metadata:
-            upload_metadata.update(metadata)
-
-        # Upload to S3
-        s3_key = f'{target_id}/target'
-        try:
-            logger.info(f"Uploading target to s3://{self.bucket}/{s3_key}")
-
-            self.s3_client.upload_file(
-                str(file_path),
-                self.bucket,
-                s3_key,
-                ExtraArgs={
-                    'Metadata': upload_metadata
-                }
-            )
-
-            file_size_mb = file_path.stat().st_size / (1024 * 1024)
-            logger.info(
-                f"✓ Uploaded target {target_id} "
-                f"({file_path.name}, {file_size_mb:.2f} MB)"
-            )
-
-            return target_id
-
-        except ClientError as e:
-            logger.error(f"S3 upload failed: {e}", exc_info=True)
-            raise StorageError(f"Failed to upload target: {e}")
-        except Exception as e:
-            logger.error(f"Upload failed: {e}", exc_info=True)
-            raise StorageError(f"Upload error: {e}")
-
-    async def get_target(self, target_id: str) -> Path:
-        """Get target from cache or download from S3/MinIO."""
-        # Check cache first
-        cache_path = self.cache_dir / target_id
-        cached_file = cache_path / "target"
-
-        if cached_file.exists():
-            # Update access time for LRU
-            cached_file.touch()
-            logger.info(f"Cache HIT: {target_id}")
-            return cached_file
-
-        # Cache miss - download from S3
-        logger.info(f"Cache MISS: {target_id}, downloading from S3...")
-
-        try:
-            # Create cache directory
-            cache_path.mkdir(parents=True, exist_ok=True)
-
-            # Download from S3
-            s3_key = f'{target_id}/target'
-            logger.info(f"Downloading s3://{self.bucket}/{s3_key}")
-
-            self.s3_client.download_file(
-                self.bucket,
-                s3_key,
-                str(cached_file)
-            )
-
-            # Verify download
-            if not cached_file.exists():
-                raise StorageError(f"Downloaded file not found: {cached_file}")
-
-            file_size_mb = cached_file.stat().st_size / (1024 * 1024)
-            logger.info(f"✓ Downloaded target {target_id} ({file_size_mb:.2f} MB)")
-
-            return cached_file
-
-        except ClientError as e:
-            error_code = e.response.get('Error', {}).get('Code')
-            if error_code in ['404', 'NoSuchKey']:
-                logger.error(f"Target not found: {target_id}")
-                raise FileNotFoundError(f"Target {target_id} not found in storage")
-            else:
-                logger.error(f"S3 download failed: {e}", exc_info=True)
-                raise StorageError(f"Download failed: {e}")
-        except Exception as e:
-            logger.error(f"Download error: {e}", exc_info=True)
-            # Cleanup partial download
-            if cache_path.exists():
-                shutil.rmtree(cache_path, ignore_errors=True)
-            raise StorageError(f"Download error: {e}")
-
-    async def delete_target(self, target_id: str) -> None:
-        """Delete target from S3/MinIO."""
-        try:
-            s3_key = f'{target_id}/target'
-            logger.info(f"Deleting s3://{self.bucket}/{s3_key}")
-
-            self.s3_client.delete_object(
-                Bucket=self.bucket,
-                Key=s3_key
-            )
-
-            # Also delete from cache if present
-            cache_path = self.cache_dir / target_id
-            if cache_path.exists():
-                shutil.rmtree(cache_path, ignore_errors=True)
-                logger.info(f"✓ Deleted target {target_id} from S3 and cache")
-            else:
-                logger.info(f"✓ Deleted target {target_id} from S3")
-
-        except ClientError as e:
-            logger.error(f"S3 delete failed: {e}", exc_info=True)
-            # Don't raise error if object doesn't exist
-            if e.response.get('Error', {}).get('Code') not in ['404', 'NoSuchKey']:
-                raise StorageError(f"Delete failed: {e}")
-        except Exception as e:
-            logger.error(f"Delete error: {e}", exc_info=True)
-            raise StorageError(f"Delete error: {e}")
-
-    async def upload_results(
-        self,
-        workflow_id: str,
-        results: Dict[str, Any],
-        results_format: str = "json"
-    ) -> str:
-        """Upload workflow results to S3/MinIO."""
-        try:
-            # Prepare results content
-            if results_format == "json":
-                content = json.dumps(results, indent=2).encode('utf-8')
-                content_type = 'application/json'
-                file_ext = 'json'
-            elif results_format == "sarif":
-                content = json.dumps(results, indent=2).encode('utf-8')
-                content_type = 'application/sarif+json'
-                file_ext = 'sarif'
-            else:
-                content = json.dumps(results, indent=2).encode('utf-8')
-                content_type = 'application/json'
-                file_ext = 'json'
-
-            # Upload to results bucket
-            results_bucket = 'results'
-            s3_key = f'{workflow_id}/results.{file_ext}'
-
-            logger.info(f"Uploading results to s3://{results_bucket}/{s3_key}")
-
-            self.s3_client.put_object(
-                Bucket=results_bucket,
-                Key=s3_key,
-                Body=content,
-                ContentType=content_type,
-                Metadata={
-                    'workflow_id': workflow_id,
-                    'format': results_format,
-                    'uploaded_at': datetime.now().isoformat()
-                }
-            )
-
-            # Construct URL
-            results_url = f"{self.endpoint_url}/{results_bucket}/{s3_key}"
-            logger.info(f"✓ Uploaded results: {results_url}")
-
-            return results_url
-
-        except Exception as e:
-            logger.error(f"Results upload failed: {e}", exc_info=True)
-            raise StorageError(f"Results upload failed: {e}")
-
-    async def get_results(self, workflow_id: str) -> Dict[str, Any]:
-        """Get workflow results from S3/MinIO."""
-        try:
-            results_bucket = 'results'
-            s3_key = f'{workflow_id}/results.json'
-
-            logger.info(f"Downloading results from s3://{results_bucket}/{s3_key}")
-
-            response = self.s3_client.get_object(
-                Bucket=results_bucket,
-                Key=s3_key
-            )
-
-            content = response['Body'].read().decode('utf-8')
-            results = json.loads(content)
-
-            logger.info(f"✓ Downloaded results for workflow {workflow_id}")
-            return results
-
-        except ClientError as e:
-            error_code = e.response.get('Error', {}).get('Code')
-            if error_code in ['404', 'NoSuchKey']:
-                logger.error(f"Results not found: {workflow_id}")
-                raise FileNotFoundError(f"Results for workflow {workflow_id} not found")
-            else:
-                logger.error(f"Results download failed: {e}", exc_info=True)
-                raise StorageError(f"Results download failed: {e}")
-        except Exception as e:
-            logger.error(f"Results download error: {e}", exc_info=True)
-            raise StorageError(f"Results download error: {e}")
-
-    async def list_targets(
-        self,
-        user_id: Optional[str] = None,
-        limit: int = 100
-    ) -> list[Dict[str, Any]]:
-        """List uploaded targets."""
-        try:
-            targets = []
-            paginator = self.s3_client.get_paginator('list_objects_v2')
-
-            for page in paginator.paginate(Bucket=self.bucket, PaginationConfig={'MaxItems': limit}):
-                for obj in page.get('Contents', []):
-                    # Get object metadata
-                    try:
-                        metadata_response = self.s3_client.head_object(
-                            Bucket=self.bucket,
-                            Key=obj['Key']
-                        )
-                        metadata = metadata_response.get('Metadata', {})
-
-                        # Filter by user_id if specified
-                        if user_id and metadata.get('user_id') != user_id:
-                            continue
-
-                        targets.append({
-                            'target_id': obj['Key'].split('/')[0],
-                            'key': obj['Key'],
-                            'size': obj['Size'],
-                            'last_modified': obj['LastModified'].isoformat(),
-                            'metadata': metadata
-                        })
-
-                    except Exception as e:
-                        logger.warning(f"Failed to get metadata for {obj['Key']}: {e}")
-                        continue
-
-            logger.info(f"Listed {len(targets)} targets (user_id={user_id})")
-            return targets
-
-        except Exception as e:
-            logger.error(f"List targets failed: {e}", exc_info=True)
-            raise StorageError(f"List targets failed: {e}")
-
-    async def cleanup_cache(self) -> int:
-        """Clean up local cache using LRU eviction."""
-        try:
-            cache_files = []
-            total_size = 0
-
-            # Gather all cached files with metadata
-            for cache_file in self.cache_dir.rglob('*'):
-                if cache_file.is_file():
-                    try:
-                        stat = cache_file.stat()
-                        cache_files.append({
-                            'path': cache_file,
-                            'size': stat.st_size,
-                            'atime': stat.st_atime  # Last access time
-                        })
-                        total_size += stat.st_size
-                    except Exception as e:
-                        logger.warning(f"Failed to stat {cache_file}: {e}")
-                        continue
-
-            # Check if cleanup is needed
-            if total_size <= self.cache_max_size:
-                logger.info(
-                    f"Cache size OK: {total_size / (1024**3):.2f} GB / "
-                    f"{self.cache_max_size / (1024**3):.2f} GB"
-                )
-                return 0
-
-            # Sort by access time (oldest first)
-            cache_files.sort(key=lambda x: x['atime'])
-
-            # Remove files until under limit
-            removed_count = 0
-            for file_info in cache_files:
-                if total_size <= self.cache_max_size:
-                    break
-
-                try:
-                    file_info['path'].unlink()
-                    total_size -= file_info['size']
-                    removed_count += 1
-                    logger.debug(f"Evicted from cache: {file_info['path']}")
-                except Exception as e:
-                    logger.warning(f"Failed to delete {file_info['path']}: {e}")
-                    continue
-
-            logger.info(
-                f"✓ Cache cleanup: removed {removed_count} files, "
-                f"new size: {total_size / (1024**3):.2f} GB"
-            )
-            return removed_count
-
-        except Exception as e:
-            logger.error(f"Cache cleanup failed: {e}", exc_info=True)
-            raise StorageError(f"Cache cleanup failed: {e}")
-
-    def get_cache_stats(self) -> Dict[str, Any]:
-        """Get cache statistics."""
-        try:
-            total_size = 0
-            file_count = 0
-
-            for cache_file in self.cache_dir.rglob('*'):
-                if cache_file.is_file():
-                    total_size += cache_file.stat().st_size
-                    file_count += 1
-
-            return {
-                'total_size_bytes': total_size,
-                'total_size_gb': total_size / (1024 ** 3),
-                'file_count': file_count,
-                'max_size_gb': self.cache_max_size / (1024 ** 3),
-                'usage_percent': (total_size / self.cache_max_size) * 100
-            }
-        except Exception as e:
-            logger.error(f"Failed to get cache stats: {e}")
-            return {'error': str(e)}
--- a/backend/src/temporal/init.py
+++ b/backend/src/temporal/init.py
@@ -1,10 +0,0 @@
-"""
-Temporal integration for FuzzForge.
-
-Handles workflow execution, monitoring, and management.
-"""
-
-from .manager import TemporalManager
-from .discovery import WorkflowDiscovery
-
-__all__ = ["TemporalManager", "WorkflowDiscovery"]
--- a/backend/src/temporal/discovery.py
+++ b/backend/src/temporal/discovery.py
@@ -1,257 +0,0 @@
-"""
-Workflow Discovery for Temporal
-
-Discovers workflows from the toolbox/workflows directory
-and provides metadata about available workflows.
-"""
-
-import logging
-import yaml
-from pathlib import Path
-from typing import Dict, Any
-from pydantic import BaseModel, Field, ConfigDict
-
-logger = logging.getLogger(__name__)
-
-
-class WorkflowInfo(BaseModel):
-    """Information about a discovered workflow"""
-    name: str = Field(..., description="Workflow name")
-    path: Path = Field(..., description="Path to workflow directory")
-    workflow_file: Path = Field(..., description="Path to workflow.py file")
-    metadata: Dict[str, Any] = Field(..., description="Workflow metadata from YAML")
-    workflow_type: str = Field(..., description="Workflow class name")
-    vertical: str = Field(..., description="Vertical (worker type) for this workflow")
-
-    model_config = ConfigDict(arbitrary_types_allowed=True)
-
-
-class WorkflowDiscovery:
-    """
-    Discovers workflows from the filesystem.
-
-    Scans toolbox/workflows/ for directories containing:
-    - metadata.yaml (required)
-    - workflow.py (required)
-
-    Each workflow declares its vertical (rust, android, web, etc.)
-    which determines which worker pool will execute it.
-    """
-
-    def __init__(self, workflows_dir: Path):
-        """
-        Initialize workflow discovery.
-
-        Args:
-            workflows_dir: Path to the workflows directory
-        """
-        self.workflows_dir = workflows_dir
-        if not self.workflows_dir.exists():
-            self.workflows_dir.mkdir(parents=True, exist_ok=True)
-            logger.info(f"Created workflows directory: {self.workflows_dir}")
-
-    async def discover_workflows(self) -> Dict[str, WorkflowInfo]:
-        """
-        Discover workflows by scanning the workflows directory.
-
-        Returns:
-            Dictionary mapping workflow names to their information
-        """
-        workflows = {}
-
-        logger.info(f"Scanning for workflows in: {self.workflows_dir}")
-
-        for workflow_dir in self.workflows_dir.iterdir():
-            if not workflow_dir.is_dir():
-                continue
-
-            # Skip special directories
-            if workflow_dir.name.startswith('.') or workflow_dir.name == '__pycache__':
-                continue
-
-            metadata_file = workflow_dir / "metadata.yaml"
-            if not metadata_file.exists():
-                logger.debug(f"No metadata.yaml in {workflow_dir.name}, skipping")
-                continue
-
-            workflow_file = workflow_dir / "workflow.py"
-            if not workflow_file.exists():
-                logger.warning(
-                    f"Workflow {workflow_dir.name} has metadata but no workflow.py, skipping"
-                )
-                continue
-
-            try:
-                # Parse metadata
-                with open(metadata_file) as f:
-                    metadata = yaml.safe_load(f)
-
-                # Validate required fields
-                if 'name' not in metadata:
-                    logger.warning(f"Workflow {workflow_dir.name} metadata missing 'name' field")
-                    metadata['name'] = workflow_dir.name
-
-                if 'vertical' not in metadata:
-                    logger.warning(
-                        f"Workflow {workflow_dir.name} metadata missing 'vertical' field"
-                    )
-                    continue
-
-                # Infer workflow class name from metadata or use convention
-                workflow_type = metadata.get('workflow_class')
-                if not workflow_type:
-                    # Convention: convert snake_case to PascalCase + Workflow
-                    # e.g., rust_test -> RustTestWorkflow
-                    parts = workflow_dir.name.split('_')
-                    workflow_type = ''.join(part.capitalize() for part in parts) + 'Workflow'
-
-                # Create workflow info
-                info = WorkflowInfo(
-                    name=metadata['name'],
-                    path=workflow_dir,
-                    workflow_file=workflow_file,
-                    metadata=metadata,
-                    workflow_type=workflow_type,
-                    vertical=metadata['vertical']
-                )
-
-                workflows[info.name] = info
-                logger.info(
-                    f"✓ Discovered workflow: {info.name} "
-                    f"(vertical: {info.vertical}, class: {info.workflow_type})"
-                )
-
-            except Exception as e:
-                logger.error(
-                    f"Error discovering workflow {workflow_dir.name}: {e}",
-                    exc_info=True
-                )
-                continue
-
-        logger.info(f"Discovered {len(workflows)} workflows")
-        return workflows
-
-    def get_workflows_by_vertical(
-        self,
-        workflows: Dict[str, WorkflowInfo],
-        vertical: str
-    ) -> Dict[str, WorkflowInfo]:
-        """
-        Filter workflows by vertical.
-
-        Args:
-            workflows: All discovered workflows
-            vertical: Vertical name to filter by
-
-        Returns:
-            Filtered workflows dictionary
-        """
-        return {
-            name: info
-            for name, info in workflows.items()
-            if info.vertical == vertical
-        }
-
-    def get_available_verticals(self, workflows: Dict[str, WorkflowInfo]) -> list[str]:
-        """
-        Get list of all verticals from discovered workflows.
-
-        Args:
-            workflows: All discovered workflows
-
-        Returns:
-            List of unique vertical names
-        """
-        return list(set(info.vertical for info in workflows.values()))
-
-    @staticmethod
-    def get_metadata_schema() -> Dict[str, Any]:
-        """
-        Get the JSON schema for workflow metadata.
-
-        Returns:
-            JSON schema dictionary
-        """
-        return {
-            "type": "object",
-            "required": ["name", "version", "description", "author", "vertical", "parameters"],
-            "properties": {
-                "name": {
-                    "type": "string",
-                    "description": "Workflow name"
-                },
-                "version": {
-                    "type": "string",
-                    "pattern": "^\\d+\\.\\d+\\.\\d+$",
-                    "description": "Semantic version (x.y.z)"
-                },
-                "vertical": {
-                    "type": "string",
-                    "description": "Vertical worker type (rust, android, web, etc.)"
-                },
-                "description": {
-                    "type": "string",
-                    "description": "Workflow description"
-                },
-                "author": {
-                    "type": "string",
-                    "description": "Workflow author"
-                },
-                "category": {
-                    "type": "string",
-                    "enum": ["comprehensive", "specialized", "fuzzing", "focused"],
-                    "description": "Workflow category"
-                },
-                "tags": {
-                    "type": "array",
-                    "items": {"type": "string"},
-                    "description": "Workflow tags for categorization"
-                },
-                "requirements": {
-                    "type": "object",
-                    "required": ["tools", "resources"],
-                    "properties": {
-                        "tools": {
-                            "type": "array",
-                            "items": {"type": "string"},
-                            "description": "Required security tools"
-                        },
-                        "resources": {
-                            "type": "object",
-                            "required": ["memory", "cpu", "timeout"],
-                            "properties": {
-                                "memory": {
-                                    "type": "string",
-                                    "pattern": "^\\d+[GMK]i$",
-                                    "description": "Memory limit (e.g., 1Gi, 512Mi)"
-                                },
-                                "cpu": {
-                                    "type": "string",
-                                    "pattern": "^\\d+m?$",
-                                    "description": "CPU limit (e.g., 1000m, 2)"
-                                },
-                                "timeout": {
-                                    "type": "integer",
-                                    "minimum": 60,
-                                    "maximum": 7200,
-                                    "description": "Workflow timeout in seconds"
-                                }
-                            }
-                        }
-                    }
-                },
-                "parameters": {
-                    "type": "object",
-                    "description": "Workflow parameters schema"
-                },
-                "default_parameters": {
-                    "type": "object",
-                    "description": "Default parameter values"
-                },
-                "required_modules": {
-                    "type": "array",
-                    "items": {"type": "string"},
-                    "description": "Required module names"
-                }
-            }
-        }
--- a/backend/src/temporal/manager.py
+++ b/backend/src/temporal/manager.py
@@ -1,392 +0,0 @@
-"""
-Temporal Manager - Workflow execution and management
-
-Handles:
- Workflow discovery from toolbox
- Workflow execution (submit to Temporal)
- Status monitoring
- Results retrieval
-"""
-
-import logging
-import os
-from pathlib import Path
-from typing import Dict, Optional, Any
-from uuid import uuid4
-
-from temporalio.client import Client, WorkflowHandle
-from temporalio.common import RetryPolicy
-from datetime import timedelta
-
-from .discovery import WorkflowDiscovery, WorkflowInfo
-from src.storage import S3CachedStorage
-
-logger = logging.getLogger(__name__)
-
-
-class TemporalManager:
-    """
-    Manages Temporal workflow execution for FuzzForge.
-
-    This class:
-    - Discovers available workflows from toolbox
-    - Submits workflow executions to Temporal
-    - Monitors workflow status
-    - Retrieves workflow results
-    """
-
-    def __init__(
-        self,
-        workflows_dir: Optional[Path] = None,
-        temporal_address: Optional[str] = None,
-        temporal_namespace: str = "default",
-        storage: Optional[S3CachedStorage] = None
-    ):
-        """
-        Initialize Temporal manager.
-
-        Args:
-            workflows_dir: Path to workflows directory (default: toolbox/workflows)
-            temporal_address: Temporal server address (default: from env or localhost:7233)
-            temporal_namespace: Temporal namespace
-            storage: Storage backend for file uploads (default: S3CachedStorage)
-        """
-        if workflows_dir is None:
-            workflows_dir = Path("toolbox/workflows")
-
-        self.temporal_address = temporal_address or os.getenv(
-            'TEMPORAL_ADDRESS',
-            'localhost:7233'
-        )
-        self.temporal_namespace = temporal_namespace
-        self.discovery = WorkflowDiscovery(workflows_dir)
-        self.workflows: Dict[str, WorkflowInfo] = {}
-        self.client: Optional[Client] = None
-
-        # Initialize storage backend
-        self.storage = storage or S3CachedStorage()
-
-        logger.info(
-            f"TemporalManager initialized: {self.temporal_address} "
-            f"(namespace: {self.temporal_namespace})"
-        )
-
-    async def initialize(self):
-        """Initialize the manager by discovering workflows and connecting to Temporal."""
-        try:
-            # Discover workflows
-            self.workflows = await self.discovery.discover_workflows()
-
-            if not self.workflows:
-                logger.warning("No workflows discovered")
-            else:
-                logger.info(
-                    f"Discovered {len(self.workflows)} workflows: "
-                    f"{list(self.workflows.keys())}"
-                )
-
-            # Connect to Temporal
-            self.client = await Client.connect(
-                self.temporal_address,
-                namespace=self.temporal_namespace
-            )
-            logger.info(f"✓ Connected to Temporal: {self.temporal_address}")
-
-        except Exception as e:
-            logger.error(f"Failed to initialize Temporal manager: {e}", exc_info=True)
-            raise
-
-    async def close(self):
-        """Close Temporal client connection."""
-        if self.client:
-            # Temporal client doesn't need explicit close in Python SDK
-            pass
-
-    async def get_workflows(self) -> Dict[str, WorkflowInfo]:
-        """
-        Get all discovered workflows.
-
-        Returns:
-            Dictionary mapping workflow names to their info
-        """
-        return self.workflows
-
-    async def get_workflow(self, name: str) -> Optional[WorkflowInfo]:
-        """
-        Get workflow info by name.
-
-        Args:
-            name: Workflow name
-
-        Returns:
-            WorkflowInfo or None if not found
-        """
-        return self.workflows.get(name)
-
-    async def upload_target(
-        self,
-        file_path: Path,
-        user_id: str,
-        metadata: Optional[Dict[str, Any]] = None
-    ) -> str:
-        """
-        Upload target file to storage.
-
-        Args:
-            file_path: Local path to file
-            user_id: User ID
-            metadata: Optional metadata
-
-        Returns:
-            Target ID for use in workflow execution
-        """
-        target_id = await self.storage.upload_target(file_path, user_id, metadata)
-        logger.info(f"Uploaded target: {target_id}")
-        return target_id
-
-    async def run_workflow(
-        self,
-        workflow_name: str,
-        target_id: str,
-        workflow_params: Optional[Dict[str, Any]] = None,
-        workflow_id: Optional[str] = None
-    ) -> WorkflowHandle:
-        """
-        Execute a workflow.
-
-        Args:
-            workflow_name: Name of workflow to execute
-            target_id: Target ID (from upload_target)
-            workflow_params: Additional workflow parameters
-            workflow_id: Optional workflow ID (generated if not provided)
-
-        Returns:
-            WorkflowHandle for monitoring/results
-
-        Raises:
-            ValueError: If workflow not found or client not initialized
-        """
-        if not self.client:
-            raise ValueError("Temporal client not initialized. Call initialize() first.")
-
-        # Get workflow info
-        workflow_info = self.workflows.get(workflow_name)
-        if not workflow_info:
-            raise ValueError(f"Workflow not found: {workflow_name}")
-
-        # Generate workflow ID if not provided
-        if not workflow_id:
-            workflow_id = f"{workflow_name}-{str(uuid4())[:8]}"
-
-        # Prepare workflow input arguments
-        workflow_params = workflow_params or {}
-
-        # Build args list: [target_id, ...workflow_params in schema order]
-        # The workflow parameters are passed as individual positional args
-        workflow_args = [target_id]
-
-        # Add parameters in order based on metadata schema
-        # This ensures parameters match the workflow signature order
-        # Apply defaults from metadata.yaml if parameter not provided
-        if 'parameters' in workflow_info.metadata:
-            param_schema = workflow_info.metadata['parameters'].get('properties', {})
-            logger.debug(f"Found {len(param_schema)} parameters in schema")
-            # Iterate parameters in schema order and add values
-            for param_name in param_schema.keys():
-                param_spec = param_schema[param_name]
-
-                # Use provided param, or fall back to default from metadata
-                if workflow_params and param_name in workflow_params:
-                    param_value = workflow_params[param_name]
-                    logger.debug(f"Using provided value for {param_name}: {param_value}")
-                elif 'default' in param_spec:
-                    param_value = param_spec['default']
-                    logger.debug(f"Using default for {param_name}: {param_value}")
-                else:
-                    param_value = None
-                    logger.debug(f"No value or default for {param_name}, using None")
-
-                workflow_args.append(param_value)
-        else:
-            logger.debug("No 'parameters' section found in workflow metadata")
-
-        # Determine task queue from workflow vertical
-        vertical = workflow_info.metadata.get("vertical", "default")
-        task_queue = f"{vertical}-queue"
-
-        logger.info(
-            f"Starting workflow: {workflow_name} "
-            f"(id={workflow_id}, queue={task_queue}, target={target_id})"
-        )
-        logger.info(f"DEBUG: workflow_args = {workflow_args}")
-        logger.info(f"DEBUG: workflow_params received = {workflow_params}")
-
-        try:
-            # Start workflow execution with positional arguments
-            handle = await self.client.start_workflow(
-                workflow=workflow_info.workflow_type,  # Workflow class name
-                args=workflow_args,  # Positional arguments
-                id=workflow_id,
-                task_queue=task_queue,
-                retry_policy=RetryPolicy(
-                    initial_interval=timedelta(seconds=1),
-                    maximum_interval=timedelta(minutes=1),
-                    maximum_attempts=3
-                )
-            )
-
-            logger.info(f"✓ Workflow started: {workflow_id}")
-            return handle
-
-        except Exception as e:
-            logger.error(f"Failed to start workflow {workflow_name}: {e}", exc_info=True)
-            raise
-
-    async def get_workflow_status(self, workflow_id: str) -> Dict[str, Any]:
-        """
-        Get workflow execution status.
-
-        Args:
-            workflow_id: Workflow execution ID
-
-        Returns:
-            Status dictionary with workflow state
-
-        Raises:
-            ValueError: If client not initialized or workflow not found
-        """
-        if not self.client:
-            raise ValueError("Temporal client not initialized")
-
-        try:
-            # Get workflow handle
-            handle = self.client.get_workflow_handle(workflow_id)
-
-            # Try to get result (non-blocking describe)
-            description = await handle.describe()
-
-            status = {
-                "workflow_id": workflow_id,
-                "status": description.status.name,
-                "start_time": description.start_time.isoformat() if description.start_time else None,
-                "execution_time": description.execution_time.isoformat() if description.execution_time else None,
-                "close_time": description.close_time.isoformat() if description.close_time else None,
-                "task_queue": description.task_queue,
-            }
-
-            logger.info(f"Workflow {workflow_id} status: {status['status']}")
-            return status
-
-        except Exception as e:
-            logger.error(f"Failed to get workflow status: {e}", exc_info=True)
-            raise
-
-    async def get_workflow_result(
-        self,
-        workflow_id: str,
-        timeout: Optional[timedelta] = None
-    ) -> Any:
-        """
-        Get workflow execution result (blocking).
-
-        Args:
-            workflow_id: Workflow execution ID
-            timeout: Maximum time to wait for result
-
-        Returns:
-            Workflow result
-
-        Raises:
-            ValueError: If client not initialized
-            TimeoutError: If timeout exceeded
-        """
-        if not self.client:
-            raise ValueError("Temporal client not initialized")
-
-        try:
-            handle = self.client.get_workflow_handle(workflow_id)
-
-            logger.info(f"Waiting for workflow result: {workflow_id}")
-
-            # Wait for workflow to complete and get result
-            if timeout:
-                # Use asyncio timeout if provided
-                import asyncio
-                result = await asyncio.wait_for(handle.result(), timeout=timeout.total_seconds())
-            else:
-                result = await handle.result()
-
-            logger.info(f"✓ Workflow {workflow_id} completed")
-            return result
-
-        except Exception as e:
-            logger.error(f"Failed to get workflow result: {e}", exc_info=True)
-            raise
-
-    async def cancel_workflow(self, workflow_id: str) -> None:
-        """
-        Cancel a running workflow.
-
-        Args:
-            workflow_id: Workflow execution ID
-
-        Raises:
-            ValueError: If client not initialized
-        """
-        if not self.client:
-            raise ValueError("Temporal client not initialized")
-
-        try:
-            handle = self.client.get_workflow_handle(workflow_id)
-            await handle.cancel()
-
-            logger.info(f"✓ Workflow cancelled: {workflow_id}")
-
-        except Exception as e:
-            logger.error(f"Failed to cancel workflow: {e}", exc_info=True)
-            raise
-
-    async def list_workflows(
-        self,
-        filter_query: Optional[str] = None,
-        limit: int = 100
-    ) -> list[Dict[str, Any]]:
-        """
-        List workflow executions.
-
-        Args:
-            filter_query: Optional Temporal list filter query
-            limit: Maximum number of results
-
-        Returns:
-            List of workflow execution info
-
-        Raises:
-            ValueError: If client not initialized
-        """
-        if not self.client:
-            raise ValueError("Temporal client not initialized")
-
-        try:
-            workflows = []
-
-            # Use Temporal's list API
-            async for workflow in self.client.list_workflows(filter_query):
-                workflows.append({
-                    "workflow_id": workflow.id,
-                    "workflow_type": workflow.workflow_type,
-                    "status": workflow.status.name,
-                    "start_time": workflow.start_time.isoformat() if workflow.start_time else None,
-                    "close_time": workflow.close_time.isoformat() if workflow.close_time else None,
-                    "task_queue": workflow.task_queue,
-                })
-
-                if len(workflows) >= limit:
-                    break
-
-            logger.info(f"Listed {len(workflows)} workflows")
-            return workflows
-
-        except Exception as e:
-            logger.error(f"Failed to list workflows: {e}", exc_info=True)
-            raise
--- a/backend/tests/README.md
+++ b/backend/tests/README.md
@@ -1,119 +0,0 @@
-# FuzzForge Test Suite
-
-Comprehensive test infrastructure for FuzzForge modules and workflows.
-
-## Directory Structure
-
-```
-tests/
-├── conftest.py           # Shared pytest fixtures
-├── unit/                 # Fast, isolated unit tests
-│   ├── test_modules/     # Module-specific tests
-│   │   ├── test_cargo_fuzzer.py
-│   │   └── test_atheris_fuzzer.py
-│   ├── test_workflows/   # Workflow tests
-│   └── test_api/         # API endpoint tests
-├── integration/          # Integration tests (requires Docker)
-└── fixtures/             # Test data and projects
-    ├── test_projects/    # Vulnerable projects for testing
-    └── expected_results/ # Expected output for validation
-```
-
-## Running Tests
-
-### All Tests
-```bash
-cd backend
-pytest tests/ -v
-```
-
-### Unit Tests Only (Fast)
-```bash
-pytest tests/unit/ -v
-```
-
-### Integration Tests (Requires Docker)
-```bash
-# Start services
-docker-compose up -d
-
-# Run integration tests
-pytest tests/integration/ -v
-
-# Cleanup
-docker-compose down
-```
-
-### With Coverage
-```bash
-pytest tests/ --cov=toolbox/modules --cov=src --cov-report=html
-```
-
-### Parallel Execution
-```bash
-pytest tests/unit/ -n auto
-```
-
-## Available Fixtures
-
-### Workspace Fixtures
- `temp_workspace`: Empty temporary workspace
- `python_test_workspace`: Python project with vulnerabilities
- `rust_test_workspace`: Rust project with fuzz targets
-
-### Module Fixtures
- `atheris_fuzzer`: AtherisFuzzer instance
- `cargo_fuzzer`: CargoFuzzer instance
- `file_scanner`: FileScanner instance
-
-### Configuration Fixtures
- `atheris_config`: Default Atheris configuration
- `cargo_fuzz_config`: Default cargo-fuzz configuration
- `gitleaks_config`: Default Gitleaks configuration
-
-### Mock Fixtures
- `mock_stats_callback`: Mock stats callback for fuzzing
- `mock_temporal_context`: Mock Temporal activity context
-
-## Writing Tests
-
-### Unit Test Example
-```python
-import pytest
-
-@pytest.mark.asyncio
-async def test_module_execution(cargo_fuzzer, rust_test_workspace, cargo_fuzz_config):
-    """Test module execution"""
-    result = await cargo_fuzzer.execute(cargo_fuzz_config, rust_test_workspace)
-
-    assert result.status == "success"
-    assert result.execution_time > 0
-```
-
-### Integration Test Example
-```python
-@pytest.mark.integration
-async def test_end_to_end_workflow():
-    """Test complete workflow execution"""
-    # Test full workflow with real services
-    pass
-```
-
-## CI/CD Integration
-
-Tests run automatically on:
- **Push to main/develop**: Full test suite
- **Pull requests**: Full test suite + coverage
- **Nightly**: Extended integration tests
-
-See `.github/workflows/test.yml` for configuration.
-
-## Code Coverage
-
-Target coverage: **80%+** for core modules
-
-View coverage report:
-```bash
-pytest tests/ --cov --cov-report=html
-open htmlcov/index.html
-```
--- a/backend/tests/conftest.py
+++ b/backend/tests/conftest.py
@@ -11,220 +11,9 @@

 import sys
 from pathlib import Path
-from typing import Dict, Any
-import pytest

 # Ensure project root is on sys.path so `src` is importable
 ROOT = Path(__file__).resolve().parents[1]
 if str(ROOT) not in sys.path:
    sys.path.insert(0, str(ROOT))

-# Add toolbox to path for module imports
-TOOLBOX = ROOT / "toolbox"
-if str(TOOLBOX) not in sys.path:
-    sys.path.insert(0, str(TOOLBOX))
-
-
-# ============================================================================
-# Workspace Fixtures
-# ============================================================================
-
-@pytest.fixture
-def temp_workspace(tmp_path):
-    """Create a temporary workspace directory for testing"""
-    workspace = tmp_path / "workspace"
-    workspace.mkdir()
-    return workspace
-
-
-@pytest.fixture
-def python_test_workspace(temp_workspace):
-    """Create a Python test workspace with sample files"""
-    # Create a simple Python project structure
-    (temp_workspace / "main.py").write_text("""
-def process_data(data):
-    # Intentional bug: no bounds checking
-    return data[0:100]
-
-def divide(a, b):
-    # Division by zero vulnerability
-    return a / b
-""")
-
-    (temp_workspace / "config.py").write_text("""
-# Hardcoded secrets for testing
-API_KEY = "sk_test_1234567890abcdef"
-DATABASE_URL = "postgresql://admin:password123@localhost/db"
-AWS_SECRET = "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY"
-""")
-
-    return temp_workspace
-
-
-@pytest.fixture
-def rust_test_workspace(temp_workspace):
-    """Create a Rust test workspace with fuzz targets"""
-    # Create Cargo.toml
-    (temp_workspace / "Cargo.toml").write_text("""[package]
-name = "test_project"
-version = "0.1.0"
-edition = "2021"
-
-[dependencies]
-""")
-
-    # Create src/lib.rs
-    src_dir = temp_workspace / "src"
-    src_dir.mkdir()
-    (src_dir / "lib.rs").write_text("""
-pub fn process_buffer(data: &[u8]) -> Vec<u8> {
-    if data.len() < 4 {
-        return Vec::new();
-    }
-
-    // Vulnerability: bounds checking issue
-    let size = data[0] as usize;
-    let mut result = Vec::new();
-    for i in 0..size {
-        result.push(data[i]);
-    }
-    result
-}
-""")
-
-    # Create fuzz directory structure
-    fuzz_dir = temp_workspace / "fuzz"
-    fuzz_dir.mkdir()
-
-    (fuzz_dir / "Cargo.toml").write_text("""[package]
-name = "test_project-fuzz"
-version = "0.0.0"
-edition = "2021"
-
-[dependencies]
-libfuzzer-sys = "0.4"
-
-[dependencies.test_project]
-path = ".."
-
-[[bin]]
-name = "fuzz_target_1"
-path = "fuzz_targets/fuzz_target_1.rs"
-""")
-
-    fuzz_targets_dir = fuzz_dir / "fuzz_targets"
-    fuzz_targets_dir.mkdir()
-
-    (fuzz_targets_dir / "fuzz_target_1.rs").write_text("""#![no_main]
-use libfuzzer_sys::fuzz_target;
-use test_project::process_buffer;
-
-fuzz_target!(|data: &[u8]| {
-    let _ = process_buffer(data);
-});
-""")
-
-    return temp_workspace
-
-
-# ============================================================================
-# Module Configuration Fixtures
-# ============================================================================
-
-@pytest.fixture
-def atheris_config():
-    """Default Atheris fuzzer configuration"""
-    return {
-        "target_file": "auto-discover",
-        "max_iterations": 1000,
-        "timeout_seconds": 10,
-        "corpus_dir": None
-    }
-
-
-@pytest.fixture
-def cargo_fuzz_config():
-    """Default cargo-fuzz configuration"""
-    return {
-        "target_name": None,
-        "max_iterations": 1000,
-        "timeout_seconds": 10,
-        "sanitizer": "address"
-    }
-
-
-@pytest.fixture
-def gitleaks_config():
-    """Default Gitleaks configuration"""
-    return {
-        "config_path": None,
-        "scan_uncommitted": True
-    }
-
-
-@pytest.fixture
-def file_scanner_config():
-    """Default file scanner configuration"""
-    return {
-        "scan_patterns": ["*.py", "*.rs", "*.js"],
-        "exclude_patterns": ["*.test.*", "*.spec.*"],
-        "max_file_size": 1048576  # 1MB
-    }
-
-
-# ============================================================================
-# Module Instance Fixtures
-# ============================================================================
-
-@pytest.fixture
-def atheris_fuzzer():
-    """Create an AtherisFuzzer instance"""
-    from modules.fuzzer.atheris_fuzzer import AtherisFuzzer
-    return AtherisFuzzer()
-
-
-@pytest.fixture
-def cargo_fuzzer():
-    """Create a CargoFuzzer instance"""
-    from modules.fuzzer.cargo_fuzzer import CargoFuzzer
-    return CargoFuzzer()
-
-
-@pytest.fixture
-def file_scanner():
-    """Create a FileScanner instance"""
-    from modules.scanner.file_scanner import FileScanner
-    return FileScanner()
-
-
-# ============================================================================
-# Mock Fixtures
-# ============================================================================
-
-@pytest.fixture
-def mock_stats_callback():
-    """Mock stats callback for fuzzing"""
-    stats_received = []
-
-    async def callback(stats: Dict[str, Any]):
-        stats_received.append(stats)
-
-    callback.stats_received = stats_received
-    return callback
-
-
-@pytest.fixture
-def mock_temporal_context():
-    """Mock Temporal activity context"""
-    class MockActivityInfo:
-        def __init__(self):
-            self.workflow_id = "test-workflow-123"
-            self.activity_id = "test-activity-1"
-            self.attempt = 1
-
-    class MockContext:
-        def __init__(self):
-            self.info = MockActivityInfo()
-
-    return MockContext()
-
--- a/backend/tests/fixtures/init.py
+++ b/backend/tests/fixtures/init.py
--- a/backend/tests/integration/init.py
+++ b/backend/tests/integration/init.py
--- a/backend/tests/test_prefect_stats_monitor.py
+++ b/backend/tests/test_prefect_stats_monitor.py
@@ -0,0 +1,82 @@
+# Copyright (c) 2025 FuzzingLabs
+#
+# Licensed under the Business Source License 1.1 (BSL). See the LICENSE file
+# at the root of this repository for details.
+#
+# After the Change Date (four years from publication), this version of the
+# Licensed Work will be made available under the Apache License, Version 2.0.
+# See the LICENSE-APACHE file or http://www.apache.org/licenses/LICENSE-2.0
+#
+# Additional attribution and requirements are provided in the NOTICE file.
+
+import asyncio
+from datetime import datetime, timezone, timedelta
+
+
+from src.services.prefect_stats_monitor import PrefectStatsMonitor
+from src.api import fuzzing
+
+
+class FakeLog:
+    def __init__(self, message: str):
+        self.message = message
+
+
+class FakeClient:
+    def __init__(self, logs):
+        self._logs = logs
+
+    async def read_logs(self, log_filter=None, limit=100, sort="TIMESTAMP_ASC"):
+        return self._logs
+
+
+class FakeTaskRun:
+    def __init__(self):
+        self.id = "task-1"
+        self.start_time = datetime.now(timezone.utc) - timedelta(seconds=5)
+
+
+def test_parse_stats_from_log_fuzzing():
+    mon = PrefectStatsMonitor()
+    msg = (
+        "INFO LIVE_STATS extra={'stats_type': 'fuzzing_live_update', "
+        "'executions': 42, 'executions_per_sec': 3.14, 'crashes': 1, 'unique_crashes': 1, 'corpus_size': 9}"
+    )
+    stats = mon._parse_stats_from_log(msg)
+    assert stats is not None
+    assert stats["stats_type"] == "fuzzing_live_update"
+    assert stats["executions"] == 42
+
+
+def test_extract_stats_updates_and_broadcasts():
+    mon = PrefectStatsMonitor()
+    run_id = "run-123"
+    workflow = "wf"
+    fuzzing.initialize_fuzzing_tracking(run_id, workflow)
+
+    # Prepare a fake websocket to capture messages
+    sent = []
+
+    class FakeWS:
+        async def send_text(self, text: str):
+            sent.append(text)
+
+    fuzzing.active_connections[run_id] = [FakeWS()]
+
+    # Craft a log line the parser understands
+    msg = (
+        "INFO LIVE_STATS extra={'stats_type': 'fuzzing_live_update', "
+        "'executions': 10, 'executions_per_sec': 1.5, 'crashes': 0, 'unique_crashes': 0, 'corpus_size': 2}"
+    )
+    fake_client = FakeClient([FakeLog(msg)])
+    task_run = FakeTaskRun()
+
+    asyncio.run(mon._extract_stats_from_task(fake_client, run_id, task_run, workflow))
+
+    # Verify stats updated
+    stats = fuzzing.fuzzing_stats[run_id]
+    assert stats.executions == 10
+    assert stats.executions_per_sec == 1.5
+
+    # Verify a message was sent to WebSocket
+    assert sent, "Expected a stats_update message to be sent"
--- a/backend/tests/unit/init.py
+++ b/backend/tests/unit/init.py
--- a/backend/tests/unit/test_api/init.py
+++ b/backend/tests/unit/test_api/init.py
--- a/backend/tests/unit/test_modules/init.py
+++ b/backend/tests/unit/test_modules/init.py
--- a/backend/tests/unit/test_modules/test_atheris_fuzzer.py
+++ b/backend/tests/unit/test_modules/test_atheris_fuzzer.py
@@ -1,177 +0,0 @@
-"""
-Unit tests for AtherisFuzzer module
-"""
-
-import pytest
-from unittest.mock import AsyncMock, patch
-
-
-@pytest.mark.asyncio
-class TestAtherisFuzzerMetadata:
-    """Test AtherisFuzzer metadata"""
-
-    async def test_metadata_structure(self, atheris_fuzzer):
-        """Test that module metadata is properly defined"""
-        metadata = atheris_fuzzer.get_metadata()
-
-        assert metadata.name == "atheris_fuzzer"
-        assert metadata.category == "fuzzer"
-        assert "fuzzing" in metadata.tags
-        assert "python" in metadata.tags
-
-
-@pytest.mark.asyncio
-class TestAtherisFuzzerConfigValidation:
-    """Test configuration validation"""
-
-    async def test_valid_config(self, atheris_fuzzer, atheris_config):
-        """Test validation of valid configuration"""
-        assert atheris_fuzzer.validate_config(atheris_config) is True
-
-    async def test_invalid_max_iterations(self, atheris_fuzzer):
-        """Test validation fails with invalid max_iterations"""
-        config = {
-            "target_file": "fuzz_target.py",
-            "max_iterations": -1,
-            "timeout_seconds": 10
-        }
-        with pytest.raises(ValueError, match="max_iterations"):
-            atheris_fuzzer.validate_config(config)
-
-    async def test_invalid_timeout(self, atheris_fuzzer):
-        """Test validation fails with invalid timeout"""
-        config = {
-            "target_file": "fuzz_target.py",
-            "max_iterations": 1000,
-            "timeout_seconds": 0
-        }
-        with pytest.raises(ValueError, match="timeout_seconds"):
-            atheris_fuzzer.validate_config(config)
-
-
-@pytest.mark.asyncio
-class TestAtherisFuzzerDiscovery:
-    """Test fuzz target discovery"""
-
-    async def test_auto_discover(self, atheris_fuzzer, python_test_workspace):
-        """Test auto-discovery of Python fuzz targets"""
-        # Create a fuzz target file
-        (python_test_workspace / "fuzz_target.py").write_text("""
-import atheris
-import sys
-
-def TestOneInput(data):
-    pass
-
-if __name__ == "__main__":
-    atheris.Setup(sys.argv, TestOneInput)
-    atheris.Fuzz()
-""")
-
-        # Pass None for auto-discovery
-        target = atheris_fuzzer._discover_target(python_test_workspace, None)
-
-        assert target is not None
-        assert "fuzz_target.py" in str(target)
-
-
-@pytest.mark.asyncio
-class TestAtherisFuzzerExecution:
-    """Test fuzzer execution logic"""
-
-    async def test_execution_creates_result(self, atheris_fuzzer, python_test_workspace, atheris_config):
-        """Test that execution returns a ModuleResult"""
-        # Create a simple fuzz target
-        (python_test_workspace / "fuzz_target.py").write_text("""
-import atheris
-import sys
-
-def TestOneInput(data):
-    if len(data) > 0:
-        pass
-
-if __name__ == "__main__":
-    atheris.Setup(sys.argv, TestOneInput)
-    atheris.Fuzz()
-""")
-
-        # Use a very short timeout for testing
-        test_config = {
-            "target_file": "fuzz_target.py",
-            "max_iterations": 10,
-            "timeout_seconds": 1
-        }
-
-        # Mock the fuzzing subprocess to avoid actual execution
-        with patch.object(atheris_fuzzer, '_run_fuzzing', new_callable=AsyncMock, return_value=([], {"total_executions": 10})):
-            result = await atheris_fuzzer.execute(test_config, python_test_workspace)
-
-            assert result.module == "atheris_fuzzer"
-            assert result.status in ["success", "partial", "failed"]
-            assert isinstance(result.execution_time, float)
-
-
-@pytest.mark.asyncio
-class TestAtherisFuzzerStatsCallback:
-    """Test stats callback functionality"""
-
-    async def test_stats_callback_invoked(self, atheris_fuzzer, python_test_workspace, atheris_config, mock_stats_callback):
-        """Test that stats callback is invoked during fuzzing"""
-        (python_test_workspace / "fuzz_target.py").write_text("""
-import atheris
-import sys
-
-def TestOneInput(data):
-    pass
-
-if __name__ == "__main__":
-    atheris.Setup(sys.argv, TestOneInput)
-    atheris.Fuzz()
-""")
-
-        # Mock fuzzing to simulate stats
-        async def mock_run_fuzzing(test_one_input, target_path, workspace, max_iterations, timeout_seconds, stats_callback):
-            if stats_callback:
-                await stats_callback({
-                    "total_execs": 100,
-                    "execs_per_sec": 10.0,
-                    "crashes": 0,
-                    "coverage": 5,
-                    "corpus_size": 2,
-                    "elapsed_time": 10
-                })
-            return
-
-        with patch.object(atheris_fuzzer, '_run_fuzzing', side_effect=mock_run_fuzzing):
-            with patch.object(atheris_fuzzer, '_load_target_module', return_value=lambda x: None):
-                # Put stats_callback in config dict, not as kwarg
-                atheris_config["target_file"] = "fuzz_target.py"
-                atheris_config["stats_callback"] = mock_stats_callback
-                await atheris_fuzzer.execute(atheris_config, python_test_workspace)
-
-                # Verify callback was invoked
-                assert len(mock_stats_callback.stats_received) > 0
-
-
-@pytest.mark.asyncio
-class TestAtherisFuzzerFindingGeneration:
-    """Test finding generation from crashes"""
-
-    async def test_create_crash_finding(self, atheris_fuzzer):
-        """Test crash finding creation"""
-        finding = atheris_fuzzer.create_finding(
-            title="Crash: Exception in TestOneInput",
-            description="IndexError: list index out of range",
-            severity="high",
-            category="crash",
-            file_path="fuzz_target.py",
-            metadata={
-                "crash_type": "IndexError",
-                "stack_trace": "Traceback..."
-            }
-        )
-
-        assert finding.title == "Crash: Exception in TestOneInput"
-        assert finding.severity == "high"
-        assert finding.category == "crash"
-        assert "IndexError" in finding.metadata["crash_type"]
--- a/backend/tests/unit/test_modules/test_cargo_fuzzer.py
+++ b/backend/tests/unit/test_modules/test_cargo_fuzzer.py
@@ -1,177 +0,0 @@
-"""
-Unit tests for CargoFuzzer module
-"""
-
-import pytest
-from unittest.mock import AsyncMock, patch
-
-
-@pytest.mark.asyncio
-class TestCargoFuzzerMetadata:
-    """Test CargoFuzzer metadata"""
-
-    async def test_metadata_structure(self, cargo_fuzzer):
-        """Test that module metadata is properly defined"""
-        metadata = cargo_fuzzer.get_metadata()
-
-        assert metadata.name == "cargo_fuzz"
-        assert metadata.version == "0.11.2"
-        assert metadata.category == "fuzzer"
-        assert "fuzzing" in metadata.tags
-        assert "rust" in metadata.tags
-
-
-@pytest.mark.asyncio
-class TestCargoFuzzerConfigValidation:
-    """Test configuration validation"""
-
-    async def test_valid_config(self, cargo_fuzzer, cargo_fuzz_config):
-        """Test validation of valid configuration"""
-        assert cargo_fuzzer.validate_config(cargo_fuzz_config) is True
-
-    async def test_invalid_max_iterations(self, cargo_fuzzer):
-        """Test validation fails with invalid max_iterations"""
-        config = {
-            "max_iterations": -1,
-            "timeout_seconds": 10,
-            "sanitizer": "address"
-        }
-        with pytest.raises(ValueError, match="max_iterations"):
-            cargo_fuzzer.validate_config(config)
-
-    async def test_invalid_timeout(self, cargo_fuzzer):
-        """Test validation fails with invalid timeout"""
-        config = {
-            "max_iterations": 1000,
-            "timeout_seconds": 0,
-            "sanitizer": "address"
-        }
-        with pytest.raises(ValueError, match="timeout_seconds"):
-            cargo_fuzzer.validate_config(config)
-
-    async def test_invalid_sanitizer(self, cargo_fuzzer):
-        """Test validation fails with invalid sanitizer"""
-        config = {
-            "max_iterations": 1000,
-            "timeout_seconds": 10,
-            "sanitizer": "invalid_sanitizer"
-        }
-        with pytest.raises(ValueError, match="sanitizer"):
-            cargo_fuzzer.validate_config(config)
-
-
-@pytest.mark.asyncio
-class TestCargoFuzzerWorkspaceValidation:
-    """Test workspace validation"""
-
-    async def test_valid_workspace(self, cargo_fuzzer, rust_test_workspace):
-        """Test validation of valid workspace"""
-        assert cargo_fuzzer.validate_workspace(rust_test_workspace) is True
-
-    async def test_nonexistent_workspace(self, cargo_fuzzer, tmp_path):
-        """Test validation fails with nonexistent workspace"""
-        nonexistent = tmp_path / "does_not_exist"
-        with pytest.raises(ValueError, match="does not exist"):
-            cargo_fuzzer.validate_workspace(nonexistent)
-
-    async def test_workspace_is_file(self, cargo_fuzzer, tmp_path):
-        """Test validation fails when workspace is a file"""
-        file_path = tmp_path / "file.txt"
-        file_path.write_text("test")
-        with pytest.raises(ValueError, match="not a directory"):
-            cargo_fuzzer.validate_workspace(file_path)
-
-
-@pytest.mark.asyncio
-class TestCargoFuzzerDiscovery:
-    """Test fuzz target discovery"""
-
-    async def test_discover_targets(self, cargo_fuzzer, rust_test_workspace):
-        """Test discovery of fuzz targets"""
-        targets = await cargo_fuzzer._discover_fuzz_targets(rust_test_workspace)
-
-        assert len(targets) == 1
-        assert "fuzz_target_1" in targets
-
-    async def test_no_fuzz_directory(self, cargo_fuzzer, temp_workspace):
-        """Test discovery with no fuzz directory"""
-        targets = await cargo_fuzzer._discover_fuzz_targets(temp_workspace)
-
-        assert targets == []
-
-
-@pytest.mark.asyncio
-class TestCargoFuzzerExecution:
-    """Test fuzzer execution logic"""
-
-    async def test_execution_creates_result(self, cargo_fuzzer, rust_test_workspace, cargo_fuzz_config):
-        """Test that execution returns a ModuleResult"""
-        # Mock the build and run methods to avoid actual fuzzing
-        with patch.object(cargo_fuzzer, '_build_fuzz_target', new_callable=AsyncMock, return_value=True):
-            with patch.object(cargo_fuzzer, '_run_fuzzing', new_callable=AsyncMock, return_value=([], {"total_executions": 0, "crashes_found": 0})):
-                with patch.object(cargo_fuzzer, '_parse_crash_artifacts', new_callable=AsyncMock, return_value=[]):
-                    result = await cargo_fuzzer.execute(cargo_fuzz_config, rust_test_workspace)
-
-                    assert result.module == "cargo_fuzz"
-                    assert result.status == "success"
-                    assert isinstance(result.execution_time, float)
-                    assert result.execution_time >= 0
-
-    async def test_execution_with_no_targets(self, cargo_fuzzer, temp_workspace, cargo_fuzz_config):
-        """Test execution fails gracefully with no fuzz targets"""
-        result = await cargo_fuzzer.execute(cargo_fuzz_config, temp_workspace)
-
-        assert result.status == "failed"
-        assert "No fuzz targets found" in result.error
-
-
-@pytest.mark.asyncio
-class TestCargoFuzzerStatsCallback:
-    """Test stats callback functionality"""
-
-    async def test_stats_callback_invoked(self, cargo_fuzzer, rust_test_workspace, cargo_fuzz_config, mock_stats_callback):
-        """Test that stats callback is invoked during fuzzing"""
-        # Mock build/run to simulate stats generation
-        async def mock_run_fuzzing(workspace, target, config, callback):
-            # Simulate stats callback
-            if callback:
-                await callback({
-                    "total_execs": 1000,
-                    "execs_per_sec": 100.0,
-                    "crashes": 0,
-                    "coverage": 10,
-                    "corpus_size": 5,
-                    "elapsed_time": 10
-                })
-            return [], {"total_executions": 1000}
-
-        with patch.object(cargo_fuzzer, '_build_fuzz_target', new_callable=AsyncMock, return_value=True):
-            with patch.object(cargo_fuzzer, '_run_fuzzing', side_effect=mock_run_fuzzing):
-                with patch.object(cargo_fuzzer, '_parse_crash_artifacts', new_callable=AsyncMock, return_value=[]):
-                    await cargo_fuzzer.execute(cargo_fuzz_config, rust_test_workspace, stats_callback=mock_stats_callback)
-
-                    # Verify callback was invoked
-                    assert len(mock_stats_callback.stats_received) > 0
-                    assert mock_stats_callback.stats_received[0]["total_execs"] == 1000
-
-
-@pytest.mark.asyncio
-class TestCargoFuzzerFindingGeneration:
-    """Test finding generation from crashes"""
-
-    async def test_create_finding_from_crash(self, cargo_fuzzer):
-        """Test finding creation"""
-        finding = cargo_fuzzer.create_finding(
-            title="Crash: Segmentation Fault",
-            description="Test crash",
-            severity="critical",
-            category="crash",
-            file_path="fuzz/fuzz_targets/fuzz_target_1.rs",
-            metadata={"crash_type": "SIGSEGV"}
-        )
-
-        assert finding.title == "Crash: Segmentation Fault"
-        assert finding.severity == "critical"
-        assert finding.category == "crash"
-        assert finding.file_path == "fuzz/fuzz_targets/fuzz_target_1.rs"
-        assert finding.metadata["crash_type"] == "SIGSEGV"
--- a/backend/tests/unit/test_modules/test_file_scanner.py
+++ b/backend/tests/unit/test_modules/test_file_scanner.py
@@ -1,349 +0,0 @@
-"""
-Unit tests for FileScanner module
-"""
-
-import sys
-from pathlib import Path
-
-import pytest
-
-sys.path.insert(0, str(Path(__file__).resolve().parents[3] / "toolbox"))
-
-
-
-@pytest.mark.asyncio
-class TestFileScannerMetadata:
-    """Test FileScanner metadata"""
-
-    async def test_metadata_structure(self, file_scanner):
-        """Test that metadata has correct structure"""
-        metadata = file_scanner.get_metadata()
-
-        assert metadata.name == "file_scanner"
-        assert metadata.version == "1.0.0"
-        assert metadata.category == "scanner"
-        assert "files" in metadata.tags
-        assert "enumeration" in metadata.tags
-        assert metadata.requires_workspace is True
-
-
-@pytest.mark.asyncio
-class TestFileScannerConfigValidation:
-    """Test configuration validation"""
-
-    async def test_valid_config(self, file_scanner):
-        """Test that valid config passes validation"""
-        config = {
-            "patterns": ["*.py", "*.js"],
-            "max_file_size": 1048576,
-            "check_sensitive": True,
-            "calculate_hashes": False
-        }
-        assert file_scanner.validate_config(config) is True
-
-    async def test_default_config(self, file_scanner):
-        """Test that empty config uses defaults"""
-        config = {}
-        assert file_scanner.validate_config(config) is True
-
-    async def test_invalid_patterns_type(self, file_scanner):
-        """Test that non-list patterns raises error"""
-        config = {"patterns": "*.py"}
-        with pytest.raises(ValueError, match="patterns must be a list"):
-            file_scanner.validate_config(config)
-
-    async def test_invalid_max_file_size(self, file_scanner):
-        """Test that invalid max_file_size raises error"""
-        config = {"max_file_size": -1}
-        with pytest.raises(ValueError, match="max_file_size must be a positive integer"):
-            file_scanner.validate_config(config)
-
-    async def test_invalid_max_file_size_type(self, file_scanner):
-        """Test that non-integer max_file_size raises error"""
-        config = {"max_file_size": "large"}
-        with pytest.raises(ValueError, match="max_file_size must be a positive integer"):
-            file_scanner.validate_config(config)
-
-
-@pytest.mark.asyncio
-class TestFileScannerExecution:
-    """Test scanner execution"""
-
-    async def test_scan_python_files(self, file_scanner, python_test_workspace):
-        """Test scanning Python files"""
-        config = {
-            "patterns": ["*.py"],
-            "check_sensitive": False,
-            "calculate_hashes": False
-        }
-
-        result = await file_scanner.execute(config, python_test_workspace)
-
-        assert result.module == "file_scanner"
-        assert result.status == "success"
-        assert len(result.findings) > 0
-
-        # Check that Python files were found
-        python_files = [f for f in result.findings if f.file_path.endswith('.py')]
-        assert len(python_files) > 0
-
-    async def test_scan_all_files(self, file_scanner, python_test_workspace):
-        """Test scanning all files with wildcard"""
-        config = {
-            "patterns": ["*"],
-            "check_sensitive": False,
-            "calculate_hashes": False
-        }
-
-        result = await file_scanner.execute(config, python_test_workspace)
-
-        assert result.status == "success"
-        assert len(result.findings) > 0
-        assert result.summary["total_files"] > 0
-
-    async def test_scan_with_multiple_patterns(self, file_scanner, python_test_workspace):
-        """Test scanning with multiple patterns"""
-        config = {
-            "patterns": ["*.py", "*.txt"],
-            "check_sensitive": False,
-            "calculate_hashes": False
-        }
-
-        result = await file_scanner.execute(config, python_test_workspace)
-
-        assert result.status == "success"
-        assert len(result.findings) > 0
-
-    async def test_empty_workspace(self, file_scanner, temp_workspace):
-        """Test scanning empty workspace"""
-        config = {
-            "patterns": ["*.py"],
-            "check_sensitive": False
-        }
-
-        result = await file_scanner.execute(config, temp_workspace)
-
-        assert result.status == "success"
-        assert len(result.findings) == 0
-        assert result.summary["total_files"] == 0
-
-
-@pytest.mark.asyncio
-class TestFileScannerSensitiveDetection:
-    """Test sensitive file detection"""
-
-    async def test_detect_env_file(self, file_scanner, temp_workspace):
-        """Test detection of .env file"""
-        # Create .env file
-        (temp_workspace / ".env").write_text("API_KEY=secret123")
-
-        config = {
-            "patterns": ["*"],
-            "check_sensitive": True,
-            "calculate_hashes": False
-        }
-
-        result = await file_scanner.execute(config, temp_workspace)
-
-        assert result.status == "success"
-
-        # Check for sensitive file finding
-        sensitive_findings = [f for f in result.findings if f.category == "sensitive_file"]
-        assert len(sensitive_findings) > 0
-        assert any(".env" in f.title for f in sensitive_findings)
-
-    async def test_detect_private_key(self, file_scanner, temp_workspace):
-        """Test detection of private key file"""
-        # Create private key file
-        (temp_workspace / "id_rsa").write_text("-----BEGIN RSA PRIVATE KEY-----")
-
-        config = {
-            "patterns": ["*"],
-            "check_sensitive": True
-        }
-
-        result = await file_scanner.execute(config, temp_workspace)
-
-        assert result.status == "success"
-        sensitive_findings = [f for f in result.findings if f.category == "sensitive_file"]
-        assert len(sensitive_findings) > 0
-
-    async def test_no_sensitive_detection_when_disabled(self, file_scanner, temp_workspace):
-        """Test that sensitive detection can be disabled"""
-        (temp_workspace / ".env").write_text("API_KEY=secret123")
-
-        config = {
-            "patterns": ["*"],
-            "check_sensitive": False
-        }
-
-        result = await file_scanner.execute(config, temp_workspace)
-
-        assert result.status == "success"
-        sensitive_findings = [f for f in result.findings if f.category == "sensitive_file"]
-        assert len(sensitive_findings) == 0
-
-
-@pytest.mark.asyncio
-class TestFileScannerHashing:
-    """Test file hashing functionality"""
-
-    async def test_hash_calculation(self, file_scanner, temp_workspace):
-        """Test SHA256 hash calculation"""
-        # Create test file
-        test_file = temp_workspace / "test.txt"
-        test_file.write_text("Hello World")
-
-        config = {
-            "patterns": ["*.txt"],
-            "calculate_hashes": True
-        }
-
-        result = await file_scanner.execute(config, temp_workspace)
-
-        assert result.status == "success"
-
-        # Find the test.txt finding
-        txt_findings = [f for f in result.findings if "test.txt" in f.file_path]
-        assert len(txt_findings) > 0
-
-        # Check that hash was calculated
-        finding = txt_findings[0]
-        assert finding.metadata.get("file_hash") is not None
-        assert len(finding.metadata["file_hash"]) == 64  # SHA256 hex length
-
-    async def test_no_hash_when_disabled(self, file_scanner, temp_workspace):
-        """Test that hashing can be disabled"""
-        test_file = temp_workspace / "test.txt"
-        test_file.write_text("Hello World")
-
-        config = {
-            "patterns": ["*.txt"],
-            "calculate_hashes": False
-        }
-
-        result = await file_scanner.execute(config, temp_workspace)
-
-        assert result.status == "success"
-        txt_findings = [f for f in result.findings if "test.txt" in f.file_path]
-
-        if len(txt_findings) > 0:
-            finding = txt_findings[0]
-            assert finding.metadata.get("file_hash") is None
-
-
-@pytest.mark.asyncio
-class TestFileScannerFileTypes:
-    """Test file type detection"""
-
-    async def test_detect_python_type(self, file_scanner, temp_workspace):
-        """Test detection of Python file type"""
-        (temp_workspace / "script.py").write_text("print('hello')")
-
-        config = {"patterns": ["*.py"]}
-        result = await file_scanner.execute(config, temp_workspace)
-
-        assert result.status == "success"
-        py_findings = [f for f in result.findings if "script.py" in f.file_path]
-        assert len(py_findings) > 0
-        assert "python" in py_findings[0].metadata["file_type"]
-
-    async def test_detect_javascript_type(self, file_scanner, temp_workspace):
-        """Test detection of JavaScript file type"""
-        (temp_workspace / "app.js").write_text("console.log('hello')")
-
-        config = {"patterns": ["*.js"]}
-        result = await file_scanner.execute(config, temp_workspace)
-
-        assert result.status == "success"
-        js_findings = [f for f in result.findings if "app.js" in f.file_path]
-        assert len(js_findings) > 0
-        assert "javascript" in js_findings[0].metadata["file_type"]
-
-    async def test_file_type_summary(self, file_scanner, temp_workspace):
-        """Test that file type summary is generated"""
-        (temp_workspace / "script.py").write_text("print('hello')")
-        (temp_workspace / "app.js").write_text("console.log('hello')")
-        (temp_workspace / "readme.txt").write_text("Documentation")
-
-        config = {"patterns": ["*"]}
-        result = await file_scanner.execute(config, temp_workspace)
-
-        assert result.status == "success"
-        assert "file_types" in result.summary
-        assert len(result.summary["file_types"]) > 0
-
-
-@pytest.mark.asyncio
-class TestFileScannerSizeLimits:
-    """Test file size handling"""
-
-    async def test_skip_large_files(self, file_scanner, temp_workspace):
-        """Test that large files are skipped"""
-        # Create a "large" file
-        large_file = temp_workspace / "large.txt"
-        large_file.write_text("x" * 1000)
-
-        config = {
-            "patterns": ["*.txt"],
-            "max_file_size": 500  # Set limit smaller than file
-        }
-
-        result = await file_scanner.execute(config, temp_workspace)
-
-        # Should succeed but skip the large file
-        assert result.status == "success"
-
-        # The file should still be counted but not have a detailed finding
-        assert result.summary["total_files"] > 0
-
-    async def test_process_small_files(self, file_scanner, temp_workspace):
-        """Test that small files are processed"""
-        small_file = temp_workspace / "small.txt"
-        small_file.write_text("small content")
-
-        config = {
-            "patterns": ["*.txt"],
-            "max_file_size": 1048576  # 1MB
-        }
-
-        result = await file_scanner.execute(config, temp_workspace)
-
-        assert result.status == "success"
-        txt_findings = [f for f in result.findings if "small.txt" in f.file_path]
-        assert len(txt_findings) > 0
-
-
-@pytest.mark.asyncio
-class TestFileScannerSummary:
-    """Test result summary generation"""
-
-    async def test_summary_structure(self, file_scanner, python_test_workspace):
-        """Test that summary has correct structure"""
-        config = {"patterns": ["*"]}
-        result = await file_scanner.execute(config, python_test_workspace)
-
-        assert result.status == "success"
-        assert "total_files" in result.summary
-        assert "total_size_bytes" in result.summary
-        assert "file_types" in result.summary
-        assert "patterns_scanned" in result.summary
-
-        assert isinstance(result.summary["total_files"], int)
-        assert isinstance(result.summary["total_size_bytes"], int)
-        assert isinstance(result.summary["file_types"], dict)
-        assert isinstance(result.summary["patterns_scanned"], list)
-
-    async def test_summary_counts(self, file_scanner, temp_workspace):
-        """Test that summary counts are accurate"""
-        # Create known files
-        (temp_workspace / "file1.py").write_text("content1")
-        (temp_workspace / "file2.py").write_text("content2")
-        (temp_workspace / "file3.txt").write_text("content3")
-
-        config = {"patterns": ["*"]}
-        result = await file_scanner.execute(config, temp_workspace)
-
-        assert result.status == "success"
-        assert result.summary["total_files"] == 3
-        assert result.summary["total_size_bytes"] > 0
--- a/backend/tests/unit/test_modules/test_security_analyzer.py
+++ b/backend/tests/unit/test_modules/test_security_analyzer.py
@@ -1,493 +0,0 @@
-"""
-Unit tests for SecurityAnalyzer module
-"""
-
-import pytest
-import sys
-from pathlib import Path
-
-sys.path.insert(0, str(Path(__file__).resolve().parents[3] / "toolbox"))
-
-from modules.analyzer.security_analyzer import SecurityAnalyzer
-
-
-@pytest.fixture
-def security_analyzer():
-    """Create SecurityAnalyzer instance"""
-    return SecurityAnalyzer()
-
-
-@pytest.mark.asyncio
-class TestSecurityAnalyzerMetadata:
-    """Test SecurityAnalyzer metadata"""
-
-    async def test_metadata_structure(self, security_analyzer):
-        """Test that metadata has correct structure"""
-        metadata = security_analyzer.get_metadata()
-
-        assert metadata.name == "security_analyzer"
-        assert metadata.version == "1.0.0"
-        assert metadata.category == "analyzer"
-        assert "security" in metadata.tags
-        assert "vulnerabilities" in metadata.tags
-        assert metadata.requires_workspace is True
-
-
-@pytest.mark.asyncio
-class TestSecurityAnalyzerConfigValidation:
-    """Test configuration validation"""
-
-    async def test_valid_config(self, security_analyzer):
-        """Test that valid config passes validation"""
-        config = {
-            "file_extensions": [".py", ".js"],
-            "check_secrets": True,
-            "check_sql": True,
-            "check_dangerous_functions": True
-        }
-        assert security_analyzer.validate_config(config) is True
-
-    async def test_default_config(self, security_analyzer):
-        """Test that empty config uses defaults"""
-        config = {}
-        assert security_analyzer.validate_config(config) is True
-
-    async def test_invalid_extensions_type(self, security_analyzer):
-        """Test that non-list extensions raises error"""
-        config = {"file_extensions": ".py"}
-        with pytest.raises(ValueError, match="file_extensions must be a list"):
-            security_analyzer.validate_config(config)
-
-
-@pytest.mark.asyncio
-class TestSecurityAnalyzerSecretDetection:
-    """Test hardcoded secret detection"""
-
-    async def test_detect_api_key(self, security_analyzer, temp_workspace):
-        """Test detection of hardcoded API key"""
-        code_file = temp_workspace / "config.py"
-        code_file.write_text("""
-# Configuration file
-api_key = "apikey_live_abcdefghijklmnopqrstuvwxyzabcdefghijk"
-database_url = "postgresql://localhost/db"
-""")
-
-        config = {
-            "file_extensions": [".py"],
-            "check_secrets": True,
-            "check_sql": False,
-            "check_dangerous_functions": False
-        }
-
-        result = await security_analyzer.execute(config, temp_workspace)
-
-        assert result.status == "success"
-        secret_findings = [f for f in result.findings if f.category == "hardcoded_secret"]
-        assert len(secret_findings) > 0
-        assert any("API Key" in f.title for f in secret_findings)
-
-    async def test_detect_password(self, security_analyzer, temp_workspace):
-        """Test detection of hardcoded password"""
-        code_file = temp_workspace / "auth.py"
-        code_file.write_text("""
-def connect():
-    password = "mySecretP@ssw0rd"
-    return connect_db(password)
-""")
-
-        config = {
-            "file_extensions": [".py"],
-            "check_secrets": True,
-            "check_sql": False,
-            "check_dangerous_functions": False
-        }
-
-        result = await security_analyzer.execute(config, temp_workspace)
-
-        assert result.status == "success"
-        secret_findings = [f for f in result.findings if f.category == "hardcoded_secret"]
-        assert len(secret_findings) > 0
-
-    async def test_detect_aws_credentials(self, security_analyzer, temp_workspace):
-        """Test detection of AWS credentials"""
-        code_file = temp_workspace / "aws_config.py"
-        code_file.write_text("""
-aws_access_key = "AKIAIOSFODNN7REALKEY"
-aws_secret_key = "wJalrXUtnFEMI/K7MDENG/bPxRfiCYREALKEY"
-""")
-
-        config = {
-            "file_extensions": [".py"],
-            "check_secrets": True
-        }
-
-        result = await security_analyzer.execute(config, temp_workspace)
-
-        assert result.status == "success"
-        aws_findings = [f for f in result.findings if "AWS" in f.title]
-        assert len(aws_findings) >= 2  # Both access key and secret key
-
-    async def test_no_secret_detection_when_disabled(self, security_analyzer, temp_workspace):
-        """Test that secret detection can be disabled"""
-        code_file = temp_workspace / "config.py"
-        code_file.write_text('api_key = "sk_live_1234567890abcdef"')
-
-        config = {
-            "file_extensions": [".py"],
-            "check_secrets": False
-        }
-
-        result = await security_analyzer.execute(config, temp_workspace)
-
-        assert result.status == "success"
-        secret_findings = [f for f in result.findings if f.category == "hardcoded_secret"]
-        assert len(secret_findings) == 0
-
-
-@pytest.mark.asyncio
-class TestSecurityAnalyzerSQLInjection:
-    """Test SQL injection detection"""
-
-    async def test_detect_string_concatenation(self, security_analyzer, temp_workspace):
-        """Test detection of SQL string concatenation"""
-        code_file = temp_workspace / "db.py"
-        code_file.write_text("""
-def get_user(user_id):
-    query = "SELECT * FROM users WHERE id = " + user_id
-    return execute(query)
-""")
-
-        config = {
-            "file_extensions": [".py"],
-            "check_secrets": False,
-            "check_sql": True,
-            "check_dangerous_functions": False
-        }
-
-        result = await security_analyzer.execute(config, temp_workspace)
-
-        assert result.status == "success"
-        sql_findings = [f for f in result.findings if f.category == "sql_injection"]
-        assert len(sql_findings) > 0
-
-    async def test_detect_f_string_sql(self, security_analyzer, temp_workspace):
-        """Test detection of f-string in SQL"""
-        code_file = temp_workspace / "db.py"
-        code_file.write_text("""
-def get_user(name):
-    query = f"SELECT * FROM users WHERE name = '{name}'"
-    return execute(query)
-""")
-
-        config = {
-            "file_extensions": [".py"],
-            "check_sql": True
-        }
-
-        result = await security_analyzer.execute(config, temp_workspace)
-
-        assert result.status == "success"
-        sql_findings = [f for f in result.findings if f.category == "sql_injection"]
-        assert len(sql_findings) > 0
-
-    async def test_detect_dynamic_query_building(self, security_analyzer, temp_workspace):
-        """Test detection of dynamic query building"""
-        code_file = temp_workspace / "queries.py"
-        code_file.write_text("""
-def search(keyword):
-    query = "SELECT * FROM products WHERE name LIKE " + keyword
-    execute(query + " ORDER BY price")
-""")
-
-        config = {
-            "file_extensions": [".py"],
-            "check_sql": True
-        }
-
-        result = await security_analyzer.execute(config, temp_workspace)
-
-        assert result.status == "success"
-        sql_findings = [f for f in result.findings if f.category == "sql_injection"]
-        assert len(sql_findings) > 0
-
-    async def test_no_sql_detection_when_disabled(self, security_analyzer, temp_workspace):
-        """Test that SQL detection can be disabled"""
-        code_file = temp_workspace / "db.py"
-        code_file.write_text('query = "SELECT * FROM users WHERE id = " + user_id')
-
-        config = {
-            "file_extensions": [".py"],
-            "check_sql": False
-        }
-
-        result = await security_analyzer.execute(config, temp_workspace)
-
-        assert result.status == "success"
-        sql_findings = [f for f in result.findings if f.category == "sql_injection"]
-        assert len(sql_findings) == 0
-
-
-@pytest.mark.asyncio
-class TestSecurityAnalyzerDangerousFunctions:
-    """Test dangerous function detection"""
-
-    async def test_detect_eval(self, security_analyzer, temp_workspace):
-        """Test detection of eval() usage"""
-        code_file = temp_workspace / "dangerous.py"
-        code_file.write_text("""
-def process_input(user_input):
-    result = eval(user_input)
-    return result
-""")
-
-        config = {
-            "file_extensions": [".py"],
-            "check_secrets": False,
-            "check_sql": False,
-            "check_dangerous_functions": True
-        }
-
-        result = await security_analyzer.execute(config, temp_workspace)
-
-        assert result.status == "success"
-        dangerous_findings = [f for f in result.findings if f.category == "dangerous_function"]
-        assert len(dangerous_findings) > 0
-        assert any("eval" in f.title.lower() for f in dangerous_findings)
-
-    async def test_detect_exec(self, security_analyzer, temp_workspace):
-        """Test detection of exec() usage"""
-        code_file = temp_workspace / "runner.py"
-        code_file.write_text("""
-def run_code(code):
-    exec(code)
-""")
-
-        config = {
-            "file_extensions": [".py"],
-            "check_dangerous_functions": True
-        }
-
-        result = await security_analyzer.execute(config, temp_workspace)
-
-        assert result.status == "success"
-        dangerous_findings = [f for f in result.findings if f.category == "dangerous_function"]
-        assert len(dangerous_findings) > 0
-
-    async def test_detect_os_system(self, security_analyzer, temp_workspace):
-        """Test detection of os.system() usage"""
-        code_file = temp_workspace / "commands.py"
-        code_file.write_text("""
-import os
-
-def run_command(cmd):
-    os.system(cmd)
-""")
-
-        config = {
-            "file_extensions": [".py"],
-            "check_dangerous_functions": True
-        }
-
-        result = await security_analyzer.execute(config, temp_workspace)
-
-        assert result.status == "success"
-        dangerous_findings = [f for f in result.findings if f.category == "dangerous_function"]
-        assert len(dangerous_findings) > 0
-        assert any("os.system" in f.title for f in dangerous_findings)
-
-    async def test_detect_pickle_loads(self, security_analyzer, temp_workspace):
-        """Test detection of pickle.loads() usage"""
-        code_file = temp_workspace / "serializer.py"
-        code_file.write_text("""
-import pickle
-
-def deserialize(data):
-    return pickle.loads(data)
-""")
-
-        config = {
-            "file_extensions": [".py"],
-            "check_dangerous_functions": True
-        }
-
-        result = await security_analyzer.execute(config, temp_workspace)
-
-        assert result.status == "success"
-        dangerous_findings = [f for f in result.findings if f.category == "dangerous_function"]
-        assert len(dangerous_findings) > 0
-
-    async def test_detect_javascript_eval(self, security_analyzer, temp_workspace):
-        """Test detection of eval() in JavaScript"""
-        code_file = temp_workspace / "app.js"
-        code_file.write_text("""
-function processInput(userInput) {
-    return eval(userInput);
-}
-""")
-
-        config = {
-            "file_extensions": [".js"],
-            "check_dangerous_functions": True
-        }
-
-        result = await security_analyzer.execute(config, temp_workspace)
-
-        assert result.status == "success"
-        dangerous_findings = [f for f in result.findings if f.category == "dangerous_function"]
-        assert len(dangerous_findings) > 0
-
-    async def test_detect_innerHTML(self, security_analyzer, temp_workspace):
-        """Test detection of innerHTML (XSS risk)"""
-        code_file = temp_workspace / "dom.js"
-        code_file.write_text("""
-function updateContent(html) {
-    document.getElementById("content").innerHTML = html;
-}
-""")
-
-        config = {
-            "file_extensions": [".js"],
-            "check_dangerous_functions": True
-        }
-
-        result = await security_analyzer.execute(config, temp_workspace)
-
-        assert result.status == "success"
-        dangerous_findings = [f for f in result.findings if f.category == "dangerous_function"]
-        assert len(dangerous_findings) > 0
-
-    async def test_no_dangerous_detection_when_disabled(self, security_analyzer, temp_workspace):
-        """Test that dangerous function detection can be disabled"""
-        code_file = temp_workspace / "code.py"
-        code_file.write_text('result = eval(user_input)')
-
-        config = {
-            "file_extensions": [".py"],
-            "check_dangerous_functions": False
-        }
-
-        result = await security_analyzer.execute(config, temp_workspace)
-
-        assert result.status == "success"
-        dangerous_findings = [f for f in result.findings if f.category == "dangerous_function"]
-        assert len(dangerous_findings) == 0
-
-
-@pytest.mark.asyncio
-class TestSecurityAnalyzerMultipleIssues:
-    """Test detection of multiple issues in same file"""
-
-    async def test_detect_multiple_vulnerabilities(self, security_analyzer, temp_workspace):
-        """Test detection of multiple vulnerability types"""
-        code_file = temp_workspace / "vulnerable.py"
-        code_file.write_text("""
-import os
-
-# Hardcoded credentials
-api_key = "apikey_live_abcdefghijklmnopqrstuvwxyzabcdef"
-password = "MySecureP@ssw0rd"
-
-def process_query(user_input):
-    # SQL injection
-    query = "SELECT * FROM users WHERE name = " + user_input
-
-    # Dangerous function
-    result = eval(user_input)
-
-    # Command injection
-    os.system(user_input)
-
-    return result
-""")
-
-        config = {
-            "file_extensions": [".py"],
-            "check_secrets": True,
-            "check_sql": True,
-            "check_dangerous_functions": True
-        }
-
-        result = await security_analyzer.execute(config, temp_workspace)
-
-        assert result.status == "success"
-
-        # Should find multiple types of issues
-        secret_findings = [f for f in result.findings if f.category == "hardcoded_secret"]
-        sql_findings = [f for f in result.findings if f.category == "sql_injection"]
-        dangerous_findings = [f for f in result.findings if f.category == "dangerous_function"]
-
-        assert len(secret_findings) > 0
-        assert len(sql_findings) > 0
-        assert len(dangerous_findings) > 0
-
-
-@pytest.mark.asyncio
-class TestSecurityAnalyzerSummary:
-    """Test result summary generation"""
-
-    async def test_summary_structure(self, security_analyzer, temp_workspace):
-        """Test that summary has correct structure"""
-        (temp_workspace / "test.py").write_text("print('hello')")
-
-        config = {"file_extensions": [".py"]}
-        result = await security_analyzer.execute(config, temp_workspace)
-
-        assert result.status == "success"
-        assert "files_analyzed" in result.summary
-        assert "total_findings" in result.summary
-        assert "extensions_scanned" in result.summary
-
-        assert isinstance(result.summary["files_analyzed"], int)
-        assert isinstance(result.summary["total_findings"], int)
-        assert isinstance(result.summary["extensions_scanned"], list)
-
-    async def test_empty_workspace(self, security_analyzer, temp_workspace):
-        """Test analyzing empty workspace"""
-        config = {"file_extensions": [".py"]}
-        result = await security_analyzer.execute(config, temp_workspace)
-
-        assert result.status == "partial"  # No files found
-        assert result.summary["files_analyzed"] == 0
-
-    async def test_analyze_multiple_file_types(self, security_analyzer, temp_workspace):
-        """Test analyzing multiple file types"""
-        (temp_workspace / "app.py").write_text("print('hello')")
-        (temp_workspace / "script.js").write_text("console.log('hello')")
-        (temp_workspace / "index.php").write_text("<?php echo 'hello'; ?>")
-
-        config = {"file_extensions": [".py", ".js", ".php"]}
-        result = await security_analyzer.execute(config, temp_workspace)
-
-        assert result.status == "success"
-        assert result.summary["files_analyzed"] == 3
-
-
-@pytest.mark.asyncio
-class TestSecurityAnalyzerFalsePositives:
-    """Test false positive filtering"""
-
-    async def test_skip_test_secrets(self, security_analyzer, temp_workspace):
-        """Test that test/example secrets are filtered"""
-        code_file = temp_workspace / "test_config.py"
-        code_file.write_text("""
-# Test configuration - should be filtered
-api_key = "test_key_example"
-password = "dummy_password_123"
-token = "sample_token_placeholder"
-""")
-
-        config = {
-            "file_extensions": [".py"],
-            "check_secrets": True
-        }
-
-        result = await security_analyzer.execute(config, temp_workspace)
-
-        assert result.status == "success"
-        # These should be filtered as false positives
-        secret_findings = [f for f in result.findings if f.category == "hardcoded_secret"]
-        # Should have fewer or no findings due to false positive filtering
-        assert len(secret_findings) == 0 or all(
-            not any(fp in f.description.lower() for fp in ['test', 'example', 'dummy', 'sample'])
-            for f in secret_findings
-        )
--- a/backend/tests/unit/test_workflows/init.py
+++ b/backend/tests/unit/test_workflows/init.py
--- a/backend/toolbox/common/storage_activities.py
+++ b/backend/toolbox/common/storage_activities.py
@@ -1,369 +0,0 @@
-"""
-FuzzForge Common Storage Activities
-
-Activities for interacting with MinIO storage:
- get_target_activity: Download target from MinIO to local cache
- cleanup_cache_activity: Remove target from local cache
- upload_results_activity: Upload workflow results to MinIO
-"""
-
-import logging
-import os
-import shutil
-from pathlib import Path
-
-import boto3
-from botocore.exceptions import ClientError
-from temporalio import activity
-
-# Configure logging
-logger = logging.getLogger(__name__)
-
-# Initialize S3 client (MinIO)
-s3_client = boto3.client(
-    's3',
-    endpoint_url=os.getenv('S3_ENDPOINT', 'http://minio:9000'),
-    aws_access_key_id=os.getenv('S3_ACCESS_KEY', 'fuzzforge'),
-    aws_secret_access_key=os.getenv('S3_SECRET_KEY', 'fuzzforge123'),
-    region_name=os.getenv('S3_REGION', 'us-east-1'),
-    use_ssl=os.getenv('S3_USE_SSL', 'false').lower() == 'true'
-)
-
-# Configuration
-S3_BUCKET = os.getenv('S3_BUCKET', 'targets')
-CACHE_DIR = Path(os.getenv('CACHE_DIR', '/cache'))
-CACHE_MAX_SIZE_GB = int(os.getenv('CACHE_MAX_SIZE', '10').rstrip('GB'))
-
-
-@activity.defn(name="get_target")
-async def get_target_activity(
-    target_id: str,
-    run_id: str = None,
-    workspace_isolation: str = "isolated"
-) -> str:
-    """
-    Download target from MinIO to local cache.
-
-    Args:
-        target_id: UUID of the uploaded target
-        run_id: Workflow run ID for isolation (required for isolated mode)
-        workspace_isolation: Isolation mode - "isolated" (default), "shared", or "copy-on-write"
-
-    Returns:
-        Local path to the cached target workspace
-
-    Raises:
-        FileNotFoundError: If target doesn't exist in MinIO
-        ValueError: If run_id not provided for isolated mode
-        Exception: For other download errors
-    """
-    logger.info(
-        f"Activity: get_target (target_id={target_id}, run_id={run_id}, "
-        f"isolation={workspace_isolation})"
-    )
-
-    # Validate isolation mode
-    valid_modes = ["isolated", "shared", "copy-on-write"]
-    if workspace_isolation not in valid_modes:
-        raise ValueError(
-            f"Invalid workspace_isolation mode: {workspace_isolation}. "
-            f"Must be one of: {valid_modes}"
-        )
-
-    # Require run_id for isolated and copy-on-write modes
-    if workspace_isolation in ["isolated", "copy-on-write"] and not run_id:
-        raise ValueError(
-            f"run_id is required for workspace_isolation='{workspace_isolation}'"
-        )
-
-    # Define cache paths based on isolation mode
-    if workspace_isolation == "isolated":
-        # Each run gets its own isolated workspace
-        cache_path = CACHE_DIR / target_id / run_id
-        cached_file = cache_path / "target"
-    elif workspace_isolation == "shared":
-        # All runs share the same workspace (legacy behavior)
-        cache_path = CACHE_DIR / target_id
-        cached_file = cache_path / "target"
-    else:  # copy-on-write
-        # Shared download, run-specific copy
-        shared_cache_path = CACHE_DIR / target_id / "shared"
-        cache_path = CACHE_DIR / target_id / run_id
-        cached_file = shared_cache_path / "target"
-
-    # Handle copy-on-write mode
-    if workspace_isolation == "copy-on-write":
-        # Check if shared cache exists
-        if cached_file.exists():
-            logger.info(f"Copy-on-write: Shared cache HIT for {target_id}")
-
-            # Copy shared workspace to run-specific path
-            shared_workspace = shared_cache_path / "workspace"
-            run_workspace = cache_path / "workspace"
-
-            if shared_workspace.exists():
-                logger.info(f"Copying workspace to isolated run path: {run_workspace}")
-                cache_path.mkdir(parents=True, exist_ok=True)
-                shutil.copytree(shared_workspace, run_workspace)
-                return str(run_workspace)
-            else:
-                # Shared file exists but not extracted (non-tarball)
-                run_file = cache_path / "target"
-                cache_path.mkdir(parents=True, exist_ok=True)
-                shutil.copy2(cached_file, run_file)
-                return str(run_file)
-        # If shared cache doesn't exist, fall through to download
-
-    # Check if target is already cached (isolated or shared mode)
-    elif cached_file.exists():
-        # Update access time for LRU
-        cached_file.touch()
-        logger.info(f"Cache HIT: {target_id} (mode: {workspace_isolation})")
-
-        # Check if workspace directory exists (extracted tarball)
-        workspace_dir = cache_path / "workspace"
-        if workspace_dir.exists() and workspace_dir.is_dir():
-            logger.info(f"Returning cached workspace: {workspace_dir}")
-            return str(workspace_dir)
-        else:
-            # Return cached file (not a tarball)
-            return str(cached_file)
-
-    # Cache miss - download from MinIO
-    logger.info(
-        f"Cache MISS: {target_id} (mode: {workspace_isolation}), "
-        f"downloading from MinIO..."
-    )
-
-    try:
-        # Create cache directory
-        cache_path.mkdir(parents=True, exist_ok=True)
-
-        # Download from S3/MinIO
-        s3_key = f'{target_id}/target'
-        logger.info(f"Downloading s3://{S3_BUCKET}/{s3_key} -> {cached_file}")
-
-        s3_client.download_file(
-            Bucket=S3_BUCKET,
-            Key=s3_key,
-            Filename=str(cached_file)
-        )
-
-        # Verify file was downloaded
-        if not cached_file.exists():
-            raise FileNotFoundError(f"Downloaded file not found: {cached_file}")
-
-        file_size = cached_file.stat().st_size
-        logger.info(
-            f"✓ Downloaded target {target_id} "
-            f"({file_size / 1024 / 1024:.2f} MB)"
-        )
-
-        # Extract tarball if it's an archive
-        import tarfile
-        workspace_dir = cache_path / "workspace"
-
-        if tarfile.is_tarfile(str(cached_file)):
-            logger.info(f"Extracting tarball to {workspace_dir}...")
-            workspace_dir.mkdir(parents=True, exist_ok=True)
-
-            with tarfile.open(str(cached_file), 'r:*') as tar:
-                tar.extractall(path=workspace_dir)
-
-            logger.info(f"✓ Extracted tarball to {workspace_dir}")
-
-            # For copy-on-write mode, copy to run-specific path
-            if workspace_isolation == "copy-on-write":
-                run_cache_path = CACHE_DIR / target_id / run_id
-                run_workspace = run_cache_path / "workspace"
-                logger.info(f"Copy-on-write: Copying to {run_workspace}")
-                run_cache_path.mkdir(parents=True, exist_ok=True)
-                shutil.copytree(workspace_dir, run_workspace)
-                return str(run_workspace)
-
-            return str(workspace_dir)
-        else:
-            # Not a tarball
-            if workspace_isolation == "copy-on-write":
-                # Copy file to run-specific path
-                run_cache_path = CACHE_DIR / target_id / run_id
-                run_file = run_cache_path / "target"
-                logger.info(f"Copy-on-write: Copying file to {run_file}")
-                run_cache_path.mkdir(parents=True, exist_ok=True)
-                shutil.copy2(cached_file, run_file)
-                return str(run_file)
-
-            return str(cached_file)
-
-    except ClientError as e:
-        error_code = e.response['Error']['Code']
-        if error_code == '404' or error_code == 'NoSuchKey':
-            logger.error(f"Target not found in MinIO: {target_id}")
-            raise FileNotFoundError(f"Target {target_id} not found in storage")
-        else:
-            logger.error(f"S3/MinIO error downloading target: {e}", exc_info=True)
-            raise
-
-    except Exception as e:
-        logger.error(f"Failed to download target {target_id}: {e}", exc_info=True)
-        # Cleanup partial download
-        if cache_path.exists():
-            shutil.rmtree(cache_path, ignore_errors=True)
-        raise
-
-
-@activity.defn(name="cleanup_cache")
-async def cleanup_cache_activity(
-    target_path: str,
-    workspace_isolation: str = "isolated"
-) -> None:
-    """
-    Remove target from local cache after workflow completes.
-
-    Args:
-        target_path: Path to the cached target workspace (from get_target_activity)
-        workspace_isolation: Isolation mode used - determines cleanup scope
-
-    Notes:
-        - "isolated" mode: Removes the entire run-specific directory
-        - "copy-on-write" mode: Removes run-specific directory, keeps shared cache
-        - "shared" mode: Does NOT remove cache (shared across runs)
-    """
-    logger.info(
-        f"Activity: cleanup_cache (path={target_path}, "
-        f"isolation={workspace_isolation})"
-    )
-
-    try:
-        target = Path(target_path)
-
-        # For shared mode, don't clean up (cache is shared across runs)
-        if workspace_isolation == "shared":
-            logger.info(
-                f"Skipping cleanup for shared workspace (mode={workspace_isolation})"
-            )
-            return
-
-        # For isolated and copy-on-write modes, clean up run-specific directory
-        # Navigate up to the run-specific directory: /cache/{target_id}/{run_id}/
-        if target.name == "workspace":
-            # Path is .../workspace, go up one level to run directory
-            run_dir = target.parent
-        else:
-            # Path is a file, go up one level to run directory
-            run_dir = target.parent
-
-        # Validate it's in cache and looks like a run-specific path
-        if run_dir.exists() and run_dir.is_relative_to(CACHE_DIR):
-            # Check if parent is target_id directory (validate structure)
-            target_id_dir = run_dir.parent
-            if target_id_dir.is_relative_to(CACHE_DIR):
-                shutil.rmtree(run_dir)
-                logger.info(
-                    f"✓ Cleaned up run-specific directory: {run_dir} "
-                    f"(mode={workspace_isolation})"
-                )
-            else:
-                logger.warning(
-                    f"Unexpected cache structure, skipping cleanup: {run_dir}"
-                )
-        else:
-            logger.warning(
-                f"Cache path not in CACHE_DIR or doesn't exist: {run_dir}"
-            )
-
-    except Exception as e:
-        # Don't fail workflow if cleanup fails
-        logger.error(
-            f"Failed to cleanup cache {target_path}: {e}",
-            exc_info=True
-        )
-
-
-@activity.defn(name="upload_results")
-async def upload_results_activity(
-    workflow_id: str,
-    results: dict,
-    results_format: str = "json"
-) -> str:
-    """
-    Upload workflow results to MinIO.
-
-    Args:
-        workflow_id: Workflow execution ID
-        results: Results dictionary to upload
-        results_format: Format for results (json, sarif, etc.)
-
-    Returns:
-        S3 URL to the uploaded results
-    """
-    logger.info(
-        f"Activity: upload_results "
-        f"(workflow_id={workflow_id}, format={results_format})"
-    )
-
-    try:
-        import json
-
-        # Prepare results content
-        if results_format == "json":
-            content = json.dumps(results, indent=2).encode('utf-8')
-            content_type = 'application/json'
-            file_ext = 'json'
-        elif results_format == "sarif":
-            content = json.dumps(results, indent=2).encode('utf-8')
-            content_type = 'application/sarif+json'
-            file_ext = 'sarif'
-        else:
-            # Default to JSON
-            content = json.dumps(results, indent=2).encode('utf-8')
-            content_type = 'application/json'
-            file_ext = 'json'
-
-        # Upload to MinIO
-        s3_key = f'{workflow_id}/results.{file_ext}'
-        logger.info(f"Uploading results to s3://results/{s3_key}")
-
-        s3_client.put_object(
-            Bucket='results',
-            Key=s3_key,
-            Body=content,
-            ContentType=content_type,
-            Metadata={
-                'workflow_id': workflow_id,
-                'format': results_format
-            }
-        )
-
-        # Construct S3 URL
-        s3_endpoint = os.getenv('S3_ENDPOINT', 'http://minio:9000')
-        s3_url = f"{s3_endpoint}/results/{s3_key}"
-
-        logger.info(f"✓ Uploaded results: {s3_url}")
-        return s3_url
-
-    except Exception as e:
-        logger.error(
-            f"Failed to upload results for workflow {workflow_id}: {e}",
-            exc_info=True
-        )
-        raise
-
-
-def _check_cache_size():
-    """Check total cache size and log warning if exceeding limit"""
-    try:
-        total_size = 0
-        for item in CACHE_DIR.rglob('*'):
-            if item.is_file():
-                total_size += item.stat().st_size
-
-        total_size_gb = total_size / (1024 ** 3)
-        if total_size_gb > CACHE_MAX_SIZE_GB:
-            logger.warning(
-                f"Cache size ({total_size_gb:.2f} GB) exceeds "
-                f"limit ({CACHE_MAX_SIZE_GB} GB). Consider cleanup."
-            )
-
-    except Exception as e:
-        logger.error(f"Failed to check cache size: {e}")
--- a/backend/toolbox/modules/analyzer/init.py
+++ b/backend/toolbox/modules/analyzer/init.py
@@ -10,7 +10,5 @@
 # Additional attribution and requirements are provided in the NOTICE file.

 from .security_analyzer import SecurityAnalyzer
-from .bandit_analyzer import BanditAnalyzer
-from .mypy_analyzer import MypyAnalyzer

-__all__ = ["SecurityAnalyzer", "BanditAnalyzer", "MypyAnalyzer"]
+__all__ = ["SecurityAnalyzer"]
--- a/backend/toolbox/modules/analyzer/bandit_analyzer.py
+++ b/backend/toolbox/modules/analyzer/bandit_analyzer.py
@@ -1,328 +0,0 @@
-"""
-Bandit Analyzer Module - Analyzes Python code for security issues using Bandit
-"""
-
-# Copyright (c) 2025 FuzzingLabs
-#
-# Licensed under the Business Source License 1.1 (BSL). See the LICENSE file
-# at the root of this repository for details.
-#
-# After the Change Date (four years from publication), this version of the
-# Licensed Work will be made available under the Apache License, Version 2.0.
-# See the LICENSE-APACHE file or http://www.apache.org/licenses/LICENSE-2.0
-#
-# Additional attribution and requirements are provided in the NOTICE file.
-
-import asyncio
-import json
-import logging
-import time
-from pathlib import Path
-from typing import Dict, Any, List
-
-try:
-    from toolbox.modules.base import BaseModule, ModuleMetadata, ModuleResult, ModuleFinding
-except ImportError:
-    try:
-        from modules.base import BaseModule, ModuleMetadata, ModuleResult, ModuleFinding
-    except ImportError:
-        from src.toolbox.modules.base import BaseModule, ModuleMetadata, ModuleResult, ModuleFinding
-
-logger = logging.getLogger(__name__)
-
-
-class BanditAnalyzer(BaseModule):
-    """
-    Analyzes Python code for security issues using Bandit.
-
-    This module:
-    - Runs Bandit security linter on Python files
-    - Detects common security issues (SQL injection, hardcoded secrets, etc.)
-    - Reports findings with severity levels
-    """
-
-    # Severity mapping from Bandit levels to our standard
-    SEVERITY_MAP = {
-        "LOW": "low",
-        "MEDIUM": "medium",
-        "HIGH": "high"
-    }
-
-    def get_metadata(self) -> ModuleMetadata:
-        """Get module metadata"""
-        return ModuleMetadata(
-            name="bandit_analyzer",
-            version="1.0.0",
-            description="Analyzes Python code for security issues using Bandit",
-            author="FuzzForge Team",
-            category="analyzer",
-            tags=["python", "security", "bandit", "sast"],
-            input_schema={
-                "severity_level": {
-                    "type": "string",
-                    "enum": ["low", "medium", "high"],
-                    "description": "Minimum severity level to report",
-                    "default": "low"
-                },
-                "confidence_level": {
-                    "type": "string",
-                    "enum": ["low", "medium", "high"],
-                    "description": "Minimum confidence level to report",
-                    "default": "medium"
-                },
-                "exclude_tests": {
-                    "type": "boolean",
-                    "description": "Exclude test files from analysis",
-                    "default": True
-                },
-                "skip_ids": {
-                    "type": "array",
-                    "items": {"type": "string"},
-                    "description": "List of Bandit test IDs to skip",
-                    "default": []
-                }
-            },
-            output_schema={
-                "findings": {
-                    "type": "array",
-                    "description": "List of security issues found by Bandit"
-                }
-            },
-            requires_workspace=True
-        )
-
-    def validate_config(self, config: Dict[str, Any]) -> bool:
-        """Validate module configuration"""
-        severity = config.get("severity_level", "low")
-        if severity not in ["low", "medium", "high"]:
-            raise ValueError("severity_level must be one of: low, medium, high")
-
-        confidence = config.get("confidence_level", "medium")
-        if confidence not in ["low", "medium", "high"]:
-            raise ValueError("confidence_level must be one of: low, medium, high")
-
-        skip_ids = config.get("skip_ids", [])
-        if not isinstance(skip_ids, list):
-            raise ValueError("skip_ids must be a list")
-
-        return True
-
-    async def _run_bandit(
-        self,
-        workspace: Path,
-        severity_level: str,
-        confidence_level: str,
-        exclude_tests: bool,
-        skip_ids: List[str]
-    ) -> Dict[str, Any]:
-        """
-        Run Bandit on the workspace.
-
-        Args:
-            workspace: Path to workspace
-            severity_level: Minimum severity to report
-            confidence_level: Minimum confidence to report
-            exclude_tests: Whether to exclude test files
-            skip_ids: List of test IDs to skip
-
-        Returns:
-            Bandit JSON output as dict
-        """
-        try:
-            # Build bandit command
-            cmd = [
-                "bandit",
-                "-r", str(workspace),
-                "-f", "json",
-                "-ll",  # Report all findings (we'll filter later)
-            ]
-
-            # Add exclude patterns for test files
-            if exclude_tests:
-                cmd.extend(["-x", "*/test_*.py,*/tests/*,*_test.py"])
-
-            # Add skip IDs if specified
-            if skip_ids:
-                cmd.extend(["-s", ",".join(skip_ids)])
-
-            logger.info(f"Running Bandit on: {workspace}")
-            process = await asyncio.create_subprocess_exec(
-                *cmd,
-                stdout=asyncio.subprocess.PIPE,
-                stderr=asyncio.subprocess.PIPE
-            )
-
-            stdout, stderr = await process.communicate()
-
-            # Bandit returns non-zero if issues found, which is expected
-            if process.returncode not in [0, 1]:
-                logger.error(f"Bandit failed: {stderr.decode()}")
-                return {"results": []}
-
-            # Parse JSON output
-            result = json.loads(stdout.decode())
-            return result
-
-        except Exception as e:
-            logger.error(f"Error running Bandit: {e}")
-            return {"results": []}
-
-    def _should_include_finding(
-        self,
-        issue: Dict[str, Any],
-        min_severity: str,
-        min_confidence: str
-    ) -> bool:
-        """
-        Determine if a Bandit issue should be included based on severity/confidence.
-
-        Args:
-            issue: Bandit issue dict
-            min_severity: Minimum severity threshold
-            min_confidence: Minimum confidence threshold
-
-        Returns:
-            True if issue should be included
-        """
-        severity_order = ["low", "medium", "high"]
-        issue_severity = issue.get("issue_severity", "LOW").lower()
-        issue_confidence = issue.get("issue_confidence", "LOW").lower()
-
-        severity_meets_threshold = severity_order.index(issue_severity) >= severity_order.index(min_severity)
-        confidence_meets_threshold = severity_order.index(issue_confidence) >= severity_order.index(min_confidence)
-
-        return severity_meets_threshold and confidence_meets_threshold
-
-    def _convert_to_findings(
-        self,
-        bandit_result: Dict[str, Any],
-        workspace: Path,
-        min_severity: str,
-        min_confidence: str
-    ) -> List[ModuleFinding]:
-        """
-        Convert Bandit results to ModuleFindings.
-
-        Args:
-            bandit_result: Bandit JSON output
-            workspace: Workspace path for relative paths
-            min_severity: Minimum severity to include
-            min_confidence: Minimum confidence to include
-
-        Returns:
-            List of ModuleFindings
-        """
-        findings = []
-
-        for issue in bandit_result.get("results", []):
-            # Filter by severity and confidence
-            if not self._should_include_finding(issue, min_severity, min_confidence):
-                continue
-
-            # Extract issue details
-            test_id = issue.get("test_id", "B000")
-            test_name = issue.get("test_name", "unknown")
-            issue_text = issue.get("issue_text", "No description")
-            severity = self.SEVERITY_MAP.get(issue.get("issue_severity", "LOW"), "low")
-
-            # File location
-            filename = issue.get("filename", "")
-            line_number = issue.get("line_number", 0)
-            code = issue.get("code", "")
-
-            # Try to get relative path
-            try:
-                file_path = Path(filename)
-                rel_path = file_path.relative_to(workspace)
-            except (ValueError, TypeError):
-                rel_path = Path(filename).name
-
-            # Create finding
-            finding = self.create_finding(
-                title=f"{test_name} ({test_id})",
-                description=issue_text,
-                severity=severity,
-                category="security-issue",
-                file_path=str(rel_path),
-                line_start=line_number,
-                line_end=line_number,
-                code_snippet=code.strip() if code else None,
-                recommendation=f"Review and fix the security issue identified by Bandit test {test_id}",
-                metadata={
-                    "test_id": test_id,
-                    "test_name": test_name,
-                    "confidence": issue.get("issue_confidence", "LOW").lower(),
-                    "cwe": issue.get("issue_cwe", {}).get("id") if issue.get("issue_cwe") else None,
-                    "more_info": issue.get("more_info", "")
-                }
-            )
-            findings.append(finding)
-
-        return findings
-
-    async def execute(self, config: Dict[str, Any], workspace: Path) -> ModuleResult:
-        """
-        Execute the Bandit analyzer module.
-
-        Args:
-            config: Module configuration
-            workspace: Path to workspace
-
-        Returns:
-            ModuleResult with security findings
-        """
-        start_time = time.time()
-        metadata = self.get_metadata()
-
-        # Validate inputs
-        self.validate_config(config)
-        self.validate_workspace(workspace)
-
-        # Get configuration
-        severity_level = config.get("severity_level", "low")
-        confidence_level = config.get("confidence_level", "medium")
-        exclude_tests = config.get("exclude_tests", True)
-        skip_ids = config.get("skip_ids", [])
-
-        # Run Bandit
-        logger.info("Starting Bandit analysis...")
-        bandit_result = await self._run_bandit(
-            workspace,
-            severity_level,
-            confidence_level,
-            exclude_tests,
-            skip_ids
-        )
-
-        # Convert to findings
-        findings = self._convert_to_findings(
-            bandit_result,
-            workspace,
-            severity_level,
-            confidence_level
-        )
-
-        # Calculate summary
-        severity_counts = {}
-        for finding in findings:
-            sev = finding.severity
-            severity_counts[sev] = severity_counts.get(sev, 0) + 1
-
-        execution_time = time.time() - start_time
-
-        return ModuleResult(
-            module=metadata.name,
-            version=metadata.version,
-            status="success",
-            execution_time=execution_time,
-            findings=findings,
-            summary={
-                "total_issues": len(findings),
-                "by_severity": severity_counts,
-                "files_analyzed": len(set(f.file_path for f in findings if f.file_path))
-            },
-            metadata={
-                "bandit_version": bandit_result.get("generated_at", "unknown"),
-                "metrics": bandit_result.get("metrics", {})
-            }
-        )
--- a/backend/toolbox/modules/analyzer/llm_analyzer.py
+++ b/backend/toolbox/modules/analyzer/llm_analyzer.py
@@ -1,349 +0,0 @@
-"""
-LLM Analyzer Module - Uses AI to analyze code for security issues
-"""
-
-# Copyright (c) 2025 FuzzingLabs
-#
-# Licensed under the Business Source License 1.1 (BSL). See the LICENSE file
-# at the root of this repository for details.
-#
-# After the Change Date (four years from publication), this version of the
-# Licensed Work will be made available under the Apache License, Version 2.0.
-# See the LICENSE-APACHE file or http://www.apache.org/licenses/LICENSE-2.0
-#
-# Additional attribution and requirements are provided in the NOTICE file.
-
-import logging
-from pathlib import Path
-from typing import Dict, Any, List
-
-try:
-    from toolbox.modules.base import BaseModule, ModuleMetadata, ModuleResult
-except ImportError:
-    try:
-        from modules.base import BaseModule, ModuleMetadata, ModuleResult
-    except ImportError:
-        from src.toolbox.modules.base import BaseModule, ModuleMetadata, ModuleResult
-
-logger = logging.getLogger(__name__)
-
-
-class LLMAnalyzer(BaseModule):
-    """
-    Uses an LLM to analyze code for potential security issues.
-
-    This module:
-    - Sends code to an LLM agent via A2A protocol
-    - Asks the LLM to identify security vulnerabilities
-    - Collects findings and returns them in structured format
-    """
-
-    def get_metadata(self) -> ModuleMetadata:
-        """Get module metadata"""
-        return ModuleMetadata(
-            name="llm_analyzer",
-            version="1.0.0",
-            description="Uses AI to analyze code for security issues",
-            author="FuzzForge Team",
-            category="analyzer",
-            tags=["llm", "ai", "security", "analysis"],
-            input_schema={
-                "agent_url": {
-                    "type": "string",
-                    "description": "A2A agent endpoint URL",
-                    "default": "http://fuzzforge-task-agent:8000/a2a/litellm_agent"
-                },
-                "llm_model": {
-                    "type": "string",
-                    "description": "LLM model to use",
-                    "default": "gpt-4o-mini"
-                },
-                "llm_provider": {
-                    "type": "string",
-                    "description": "LLM provider (openai, anthropic, etc.)",
-                    "default": "openai"
-                },
-                "file_patterns": {
-                    "type": "array",
-                    "items": {"type": "string"},
-                    "description": "File patterns to analyze",
-                    "default": ["*.py", "*.js", "*.ts", "*.java", "*.go"]
-                },
-                "max_files": {
-                    "type": "integer",
-                    "description": "Maximum number of files to analyze",
-                    "default": 5
-                },
-                "max_file_size": {
-                    "type": "integer",
-                    "description": "Maximum file size in bytes",
-                    "default": 50000  # 50KB
-                },
-                "timeout": {
-                    "type": "integer",
-                    "description": "Timeout per file in seconds",
-                    "default": 60
-                }
-            },
-            output_schema={
-                "findings": {
-                    "type": "array",
-                    "description": "Security issues identified by LLM"
-                }
-            },
-            requires_workspace=True
-        )
-
-    def validate_config(self, config: Dict[str, Any]) -> bool:
-        """Validate module configuration"""
-        # Lazy import to avoid Temporal sandbox restrictions
-        try:
-            from fuzzforge_ai.a2a_wrapper import send_agent_task  # noqa: F401
-        except ImportError:
-            raise RuntimeError(
-                "A2A wrapper not available. Ensure fuzzforge_ai module is accessible."
-            )
-
-        agent_url = config.get("agent_url")
-        if not agent_url or not isinstance(agent_url, str):
-            raise ValueError("agent_url must be a valid URL string")
-
-        max_files = config.get("max_files", 5)
-        if not isinstance(max_files, int) or max_files <= 0:
-            raise ValueError("max_files must be a positive integer")
-
-        return True
-
-    async def execute(self, config: Dict[str, Any], workspace: Path) -> ModuleResult:
-        """
-        Execute the LLM analysis module.
-
-        Args:
-            config: Module configuration
-            workspace: Path to the workspace containing code to analyze
-
-        Returns:
-            ModuleResult with findings from LLM analysis
-        """
-        # Start execution timer
-        self.start_timer()
-
-        logger.info(f"Starting LLM analysis in workspace: {workspace}")
-
-        # Extract configuration
-        agent_url = config.get("agent_url", "http://fuzzforge-task-agent:8000/a2a/litellm_agent")
-        llm_model = config.get("llm_model", "gpt-4o-mini")
-        llm_provider = config.get("llm_provider", "openai")
-        file_patterns = config.get("file_patterns", ["*.py", "*.js", "*.ts", "*.java", "*.go"])
-        max_files = config.get("max_files", 5)
-        max_file_size = config.get("max_file_size", 50000)
-        timeout = config.get("timeout", 60)
-
-        # Find files to analyze
-        files_to_analyze = []
-        for pattern in file_patterns:
-            for file_path in workspace.rglob(pattern):
-                if file_path.is_file():
-                    try:
-                        # Check file size
-                        if file_path.stat().st_size > max_file_size:
-                            logger.debug(f"Skipping {file_path} (too large)")
-                            continue
-
-                        files_to_analyze.append(file_path)
-
-                        if len(files_to_analyze) >= max_files:
-                            break
-                    except Exception as e:
-                        logger.warning(f"Error checking file {file_path}: {e}")
-                        continue
-
-            if len(files_to_analyze) >= max_files:
-                break
-
-        logger.info(f"Found {len(files_to_analyze)} files to analyze")
-
-        # Analyze each file
-        all_findings = []
-        for file_path in files_to_analyze:
-            logger.info(f"Analyzing: {file_path.relative_to(workspace)}")
-
-            try:
-                findings = await self._analyze_file(
-                    file_path=file_path,
-                    workspace=workspace,
-                    agent_url=agent_url,
-                    llm_model=llm_model,
-                    llm_provider=llm_provider,
-                    timeout=timeout
-                )
-                all_findings.extend(findings)
-
-            except Exception as e:
-                logger.error(f"Error analyzing {file_path}: {e}")
-                # Continue with next file
-                continue
-
-        logger.info(f"LLM analysis complete. Found {len(all_findings)} issues.")
-
-        # Create result using base module helper
-        return self.create_result(
-            findings=all_findings,
-            status="success",
-            summary={
-                "files_analyzed": len(files_to_analyze),
-                "total_findings": len(all_findings),
-                "agent_url": agent_url,
-                "model": f"{llm_provider}/{llm_model}"
-            }
-        )
-
-    async def _analyze_file(
-        self,
-        file_path: Path,
-        workspace: Path,
-        agent_url: str,
-        llm_model: str,
-        llm_provider: str,
-        timeout: int
-    ) -> List[Dict[str, Any]]:
-        """Analyze a single file with LLM"""
-
-        # Read file content
-        try:
-            with open(file_path, 'r', encoding='utf-8') as f:
-                code_content = f.read()
-        except Exception as e:
-            logger.error(f"Failed to read {file_path}: {e}")
-            return []
-
-        # Determine language from extension
-        extension = file_path.suffix.lower()
-        language_map = {
-            ".py": "python",
-            ".js": "javascript",
-            ".ts": "typescript",
-            ".java": "java",
-            ".go": "go",
-            ".rs": "rust",
-            ".c": "c",
-            ".cpp": "cpp",
-        }
-        language = language_map.get(extension, "code")
-
-        # Build prompt for LLM
-        system_prompt = (
-            "You are a security code analyzer. Analyze the provided code and identify "
-            "potential security vulnerabilities, bugs, and code quality issues. "
-            "For each issue found, respond in this exact format:\n"
-            "ISSUE: [short title]\n"
-            "SEVERITY: [error/warning/note]\n"
-            "LINE: [line number or 'unknown']\n"
-            "DESCRIPTION: [detailed explanation]\n\n"
-            "If no issues are found, respond with 'NO_ISSUES_FOUND'."
-        )
-
-        user_message = (
-            f"Analyze this {language} code for security vulnerabilities:\n\n"
-            f"File: {file_path.relative_to(workspace)}\n\n"
-            f"```{language}\n{code_content}\n```"
-        )
-
-        # Call LLM via A2A wrapper (lazy import to avoid Temporal sandbox restrictions)
-        try:
-            from fuzzforge_ai.a2a_wrapper import send_agent_task
-
-            result = await send_agent_task(
-                url=agent_url,
-                model=llm_model,
-                provider=llm_provider,
-                prompt=system_prompt,
-                message=user_message,
-                context=f"llm_analysis_{file_path.stem}",
-                timeout=float(timeout)
-            )
-
-            llm_response = result.text
-
-        except Exception as e:
-            logger.error(f"A2A call failed for {file_path}: {e}")
-            return []
-
-        # Parse LLM response into findings
-        findings = self._parse_llm_response(
-            llm_response=llm_response,
-            file_path=file_path,
-            workspace=workspace
-        )
-
-        return findings
-
-    def _parse_llm_response(
-        self,
-        llm_response: str,
-        file_path: Path,
-        workspace: Path
-    ) -> List:
-        """Parse LLM response into structured findings"""
-
-        if "NO_ISSUES_FOUND" in llm_response:
-            return []
-
-        findings = []
-        relative_path = str(file_path.relative_to(workspace))
-
-        # Simple parser for the expected format
-        lines = llm_response.split('\n')
-        current_issue = {}
-
-        for line in lines:
-            line = line.strip()
-
-            if line.startswith("ISSUE:"):
-                # Save previous issue if exists
-                if current_issue:
-                    findings.append(self._create_module_finding(current_issue, relative_path))
-                current_issue = {"title": line.replace("ISSUE:", "").strip()}
-
-            elif line.startswith("SEVERITY:"):
-                current_issue["severity"] = line.replace("SEVERITY:", "").strip().lower()
-
-            elif line.startswith("LINE:"):
-                line_num = line.replace("LINE:", "").strip()
-                try:
-                    current_issue["line"] = int(line_num)
-                except ValueError:
-                    current_issue["line"] = None
-
-            elif line.startswith("DESCRIPTION:"):
-                current_issue["description"] = line.replace("DESCRIPTION:", "").strip()
-
-        # Save last issue
-        if current_issue:
-            findings.append(self._create_module_finding(current_issue, relative_path))
-
-        return findings
-
-    def _create_module_finding(self, issue: Dict[str, Any], file_path: str):
-        """Create a ModuleFinding from parsed issue"""
-
-        severity_map = {
-            "error": "critical",
-            "warning": "medium",
-            "note": "low",
-            "info": "low"
-        }
-
-        # Use base class helper to create proper ModuleFinding
-        return self.create_finding(
-            title=issue.get("title", "Security issue detected"),
-            description=issue.get("description", ""),
-            severity=severity_map.get(issue.get("severity", "warning"), "medium"),
-            category="security",
-            file_path=file_path,
-            line_start=issue.get("line"),
-            metadata={
-                "tool": "llm-analyzer",
-                "type": "llm-security-analysis"
-            }
-        )
--- a/backend/toolbox/modules/analyzer/mypy_analyzer.py
+++ b/backend/toolbox/modules/analyzer/mypy_analyzer.py
@@ -1,269 +0,0 @@
-"""
-Mypy Analyzer Module - Analyzes Python code for type safety issues using Mypy
-"""
-
-# Copyright (c) 2025 FuzzingLabs
-#
-# Licensed under the Business Source License 1.1 (BSL). See the LICENSE file
-# at the root of this repository for details.
-#
-# After the Change Date (four years from publication), this version of the
-# Licensed Work will be made available under the Apache License, Version 2.0.
-# See the LICENSE-APACHE file or http://www.apache.org/licenses/LICENSE-2.0
-#
-# Additional attribution and requirements are provided in the NOTICE file.
-
-import asyncio
-import logging
-import re
-import time
-from pathlib import Path
-from typing import Dict, Any, List
-
-try:
-    from toolbox.modules.base import BaseModule, ModuleMetadata, ModuleResult, ModuleFinding
-except ImportError:
-    try:
-        from modules.base import BaseModule, ModuleMetadata, ModuleResult, ModuleFinding
-    except ImportError:
-        from src.toolbox.modules.base import BaseModule, ModuleMetadata, ModuleResult, ModuleFinding
-
-logger = logging.getLogger(__name__)
-
-
-class MypyAnalyzer(BaseModule):
-    """
-    Analyzes Python code for type safety issues using Mypy.
-
-    This module:
-    - Runs Mypy type checker on Python files
-    - Detects type errors and inconsistencies
-    - Reports findings with configurable strictness
-    """
-
-    # Map Mypy error codes to severity
-    ERROR_SEVERITY_MAP = {
-        "error": "medium",
-        "note": "info"
-    }
-
-    def get_metadata(self) -> ModuleMetadata:
-        """Get module metadata"""
-        return ModuleMetadata(
-            name="mypy_analyzer",
-            version="1.0.0",
-            description="Analyzes Python code for type safety issues using Mypy",
-            author="FuzzForge Team",
-            category="analyzer",
-            tags=["python", "type-checking", "mypy", "sast"],
-            input_schema={
-                "strict_mode": {
-                    "type": "boolean",
-                    "description": "Enable strict type checking",
-                    "default": False
-                },
-                "ignore_missing_imports": {
-                    "type": "boolean",
-                    "description": "Ignore errors about missing imports",
-                    "default": True
-                },
-                "follow_imports": {
-                    "type": "string",
-                    "enum": ["normal", "silent", "skip", "error"],
-                    "description": "How to handle imports",
-                    "default": "silent"
-                }
-            },
-            output_schema={
-                "findings": {
-                    "type": "array",
-                    "description": "List of type errors found by Mypy"
-                }
-            },
-            requires_workspace=True
-        )
-
-    def validate_config(self, config: Dict[str, Any]) -> bool:
-        """Validate module configuration"""
-        follow_imports = config.get("follow_imports", "silent")
-        if follow_imports not in ["normal", "silent", "skip", "error"]:
-            raise ValueError("follow_imports must be one of: normal, silent, skip, error")
-
-        return True
-
-    async def _run_mypy(
-        self,
-        workspace: Path,
-        strict_mode: bool,
-        ignore_missing_imports: bool,
-        follow_imports: str
-    ) -> str:
-        """
-        Run Mypy on the workspace.
-
-        Args:
-            workspace: Path to workspace
-            strict_mode: Enable strict checking
-            ignore_missing_imports: Ignore missing import errors
-            follow_imports: How to handle imports
-
-        Returns:
-            Mypy output as string
-        """
-        try:
-            # Build mypy command
-            cmd = [
-                "mypy",
-                str(workspace),
-                "--show-column-numbers",
-                "--no-error-summary",
-                f"--follow-imports={follow_imports}"
-            ]
-
-            if strict_mode:
-                cmd.append("--strict")
-
-            if ignore_missing_imports:
-                cmd.append("--ignore-missing-imports")
-
-            logger.info(f"Running Mypy on: {workspace}")
-            process = await asyncio.create_subprocess_exec(
-                *cmd,
-                stdout=asyncio.subprocess.PIPE,
-                stderr=asyncio.subprocess.PIPE
-            )
-
-            stdout, stderr = await process.communicate()
-
-            # Mypy returns non-zero if errors found, which is expected
-            output = stdout.decode()
-            return output
-
-        except Exception as e:
-            logger.error(f"Error running Mypy: {e}")
-            return ""
-
-    def _parse_mypy_output(self, output: str, workspace: Path) -> List[ModuleFinding]:
-        """
-        Parse Mypy output and convert to findings.
-
-        Mypy output format:
-        file.py:10:5: error: Incompatible return value type [return-value]
-        file.py:15: note: See https://...
-
-        Args:
-            output: Mypy stdout
-            workspace: Workspace path for relative paths
-
-        Returns:
-            List of ModuleFindings
-        """
-        findings = []
-
-        # Regex to parse mypy output lines
-        # Format: filename:line:column: level: message [error-code]
-        pattern = r'^(.+?):(\d+)(?::(\d+))?: (error|note): (.+?)(?:\s+\[([^\]]+)\])?$'
-
-        for line in output.splitlines():
-            match = re.match(pattern, line.strip())
-            if not match:
-                continue
-
-            filename, line_num, column, level, message, error_code = match.groups()
-
-            # Convert to relative path
-            try:
-                file_path = Path(filename)
-                rel_path = file_path.relative_to(workspace)
-            except (ValueError, TypeError):
-                rel_path = Path(filename).name
-
-            # Skip if it's just a note (unless it's a standalone note)
-            if level == "note" and not error_code:
-                continue
-
-            # Map severity
-            severity = self.ERROR_SEVERITY_MAP.get(level, "medium")
-
-            # Create finding
-            title = f"Type error: {error_code or 'type-issue'}"
-            description = message
-
-            finding = self.create_finding(
-                title=title,
-                description=description,
-                severity=severity,
-                category="type-error",
-                file_path=str(rel_path),
-                line_start=int(line_num),
-                line_end=int(line_num),
-                recommendation="Review and fix the type inconsistency or add appropriate type annotations",
-                metadata={
-                    "error_code": error_code or "unknown",
-                    "column": int(column) if column else None,
-                    "level": level
-                }
-            )
-            findings.append(finding)
-
-        return findings
-
-    async def execute(self, config: Dict[str, Any], workspace: Path) -> ModuleResult:
-        """
-        Execute the Mypy analyzer module.
-
-        Args:
-            config: Module configuration
-            workspace: Path to workspace
-
-        Returns:
-            ModuleResult with type checking findings
-        """
-        start_time = time.time()
-        metadata = self.get_metadata()
-
-        # Validate inputs
-        self.validate_config(config)
-        self.validate_workspace(workspace)
-
-        # Get configuration
-        strict_mode = config.get("strict_mode", False)
-        ignore_missing_imports = config.get("ignore_missing_imports", True)
-        follow_imports = config.get("follow_imports", "silent")
-
-        # Run Mypy
-        logger.info("Starting Mypy analysis...")
-        mypy_output = await self._run_mypy(
-            workspace,
-            strict_mode,
-            ignore_missing_imports,
-            follow_imports
-        )
-
-        # Parse output to findings
-        findings = self._parse_mypy_output(mypy_output, workspace)
-
-        # Calculate summary
-        error_code_counts = {}
-        for finding in findings:
-            code = finding.metadata.get("error_code", "unknown")
-            error_code_counts[code] = error_code_counts.get(code, 0) + 1
-
-        execution_time = time.time() - start_time
-
-        return ModuleResult(
-            module=metadata.name,
-            version=metadata.version,
-            status="success",
-            execution_time=execution_time,
-            findings=findings,
-            summary={
-                "total_errors": len(findings),
-                "by_error_code": error_code_counts,
-                "files_with_errors": len(set(f.file_path for f in findings if f.file_path))
-            },
-            metadata={
-                "strict_mode": strict_mode,
-                "ignore_missing_imports": ignore_missing_imports
-            }
-        )
--- a/backend/toolbox/modules/analyzer/security_analyzer.py
+++ b/backend/toolbox/modules/analyzer/security_analyzer.py
@@ -16,7 +16,7 @@ Security Analyzer Module - Analyzes code for security vulnerabilities
 import logging
 import re
 from pathlib import Path
-from typing import Dict, Any, List
+from typing import Dict, Any, List, Optional

 try:
    from toolbox.modules.base import BaseModule, ModuleMetadata, ModuleResult, ModuleFinding
--- a/backend/toolbox/modules/android/init.py
+++ b/backend/toolbox/modules/android/init.py
@@ -1,31 +1,25 @@
 """
-Android Security Analysis Modules
+Android Security Modules

-Modules for Android application security testing:
- JadxDecompiler: APK decompilation using Jadx
- MobSFScanner: Mobile security analysis using MobSF
- OpenGrepAndroid: Static analysis using OpenGrep/Semgrep with Android-specific rules
+This package contains modules for android static code analysis and security testing.
+
+Available modules:
+- MobSF: Mobile Security Framework
+- Jadx: Dex to Java decompiler
+- OpenGrep: Open-source pattern-based static analysis tool
 """

-# Copyright (c) 2025 FuzzingLabs
-#
-# Licensed under the Business Source License 1.1 (BSL). See the LICENSE file
-# at the root of this repository for details.
-#
-# After the Change Date (four years from publication), this version of the
-# Licensed Work will be made available under the Apache License, Version 2.0.
-# See the LICENSE-APACHE file or http://www.apache.org/licenses/LICENSE-2.0
-#
-# Additional attribution and requirements are provided in the NOTICE file.
+from typing import List, Type
+from ..base import BaseModule

-from .jadx_decompiler import JadxDecompiler
-from .opengrep_android import OpenGrepAndroid
+# Module registry for automatic discovery
+ANDROID_MODULES: List[Type[BaseModule]] = []

-# MobSF is optional (not available on ARM64 platform)
-try:
-    from .mobsf_scanner import MobSFScanner
-    __all__ = ["JadxDecompiler", "MobSFScanner", "OpenGrepAndroid"]
-except ImportError:
-    # MobSF dependencies not available (e.g., ARM64 platform)
-    MobSFScanner = None
-    __all__ = ["JadxDecompiler", "OpenGrepAndroid"]
+def register_module(module_class: Type[BaseModule]):
+    """Register a android security module"""
+    ANDROID_MODULES.append(module_class)
+    return module_class
+
+def get_available_modules() -> List[Type[BaseModule]]:
+    """Get all available android modules"""
+    return ANDROID_MODULES.copy()
--- a/Show More
+++ b/Show More