mirror of
https://github.com/FuzzingLabs/fuzzforge_ai.git
synced 2026-02-13 11:52:47 +00:00
Compare commits
1 Commits
dev
...
feat/andro
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
5da3f1e071 |
@@ -26,7 +26,7 @@ What type of security workflow is this?
|
||||
## Files
|
||||
Please attach or provide links to your workflow files:
|
||||
|
||||
- [ ] `workflow.py` - Main Temporal flow implementation
|
||||
- [ ] `workflow.py` - Main Prefect flow implementation
|
||||
- [ ] `Dockerfile` - Container definition
|
||||
- [ ] `metadata.yaml` - Workflow metadata
|
||||
- [ ] Test files or examples
|
||||
|
||||
79
.github/pull_request_template.md
vendored
79
.github/pull_request_template.md
vendored
@@ -1,79 +0,0 @@
|
||||
## Description
|
||||
|
||||
<!-- Provide a brief description of the changes in this PR -->
|
||||
|
||||
## Type of Change
|
||||
|
||||
<!-- Mark the appropriate option with an 'x' -->
|
||||
|
||||
- [ ] 🐛 Bug fix (non-breaking change which fixes an issue)
|
||||
- [ ] ✨ New feature (non-breaking change which adds functionality)
|
||||
- [ ] 💥 Breaking change (fix or feature that would cause existing functionality to not work as expected)
|
||||
- [ ] 📝 Documentation update
|
||||
- [ ] 🔧 Configuration change
|
||||
- [ ] ♻️ Refactoring (no functional changes)
|
||||
- [ ] 🎨 Style/formatting changes
|
||||
- [ ] ✅ Test additions or updates
|
||||
|
||||
## Related Issues
|
||||
|
||||
<!-- Link to related issues using #issue_number -->
|
||||
<!-- Example: Closes #123, Relates to #456 -->
|
||||
|
||||
## Changes Made
|
||||
|
||||
<!-- List the specific changes made in this PR -->
|
||||
|
||||
-
|
||||
-
|
||||
-
|
||||
|
||||
## Testing
|
||||
|
||||
<!-- Describe the tests you ran to verify your changes -->
|
||||
|
||||
### Tested Locally
|
||||
|
||||
- [ ] All tests pass (`pytest`, `uv build`, etc.)
|
||||
- [ ] Linting passes (`ruff check`)
|
||||
- [ ] Code builds successfully
|
||||
|
||||
### Worker Changes (if applicable)
|
||||
|
||||
- [ ] Docker images build successfully (`docker compose build`)
|
||||
- [ ] Worker containers start correctly
|
||||
- [ ] Tested with actual workflow execution
|
||||
|
||||
### Documentation
|
||||
|
||||
- [ ] Documentation updated (if needed)
|
||||
- [ ] README updated (if needed)
|
||||
- [ ] CHANGELOG.md updated (if user-facing changes)
|
||||
|
||||
## Pre-Merge Checklist
|
||||
|
||||
<!-- Ensure all items are completed before requesting review -->
|
||||
|
||||
- [ ] My code follows the project's coding standards
|
||||
- [ ] I have performed a self-review of my code
|
||||
- [ ] I have commented my code, particularly in hard-to-understand areas
|
||||
- [ ] I have made corresponding changes to the documentation
|
||||
- [ ] My changes generate no new warnings
|
||||
- [ ] I have added tests that prove my fix is effective or that my feature works
|
||||
- [ ] New and existing unit tests pass locally with my changes
|
||||
- [ ] Any dependent changes have been merged and published
|
||||
|
||||
### Worker-Specific Checks (if workers/ modified)
|
||||
|
||||
- [ ] All worker files properly tracked by git (not gitignored)
|
||||
- [ ] Worker validation script passes (`.github/scripts/validate-workers.sh`)
|
||||
- [ ] Docker images build without errors
|
||||
- [ ] Worker configuration updated in `docker-compose.yml` (if needed)
|
||||
|
||||
## Screenshots (if applicable)
|
||||
|
||||
<!-- Add screenshots to help explain your changes -->
|
||||
|
||||
## Additional Notes
|
||||
|
||||
<!-- Any additional information that reviewers should know -->
|
||||
127
.github/scripts/validate-workers.sh
vendored
127
.github/scripts/validate-workers.sh
vendored
@@ -1,127 +0,0 @@
|
||||
#!/bin/bash
|
||||
# Worker Validation Script
|
||||
# Ensures all workers defined in docker-compose.yml exist in the repository
|
||||
# and are properly tracked by git.
|
||||
|
||||
set -e
|
||||
|
||||
echo "🔍 Validating worker completeness..."
|
||||
|
||||
# Colors for output
|
||||
RED='\033[0;31m'
|
||||
GREEN='\033[0;32m'
|
||||
YELLOW='\033[1;33m'
|
||||
NC='\033[0m' # No Color
|
||||
|
||||
ERRORS=0
|
||||
WARNINGS=0
|
||||
|
||||
# Extract worker service names from docker-compose.yml
|
||||
echo ""
|
||||
echo "📋 Checking workers defined in docker-compose.yml..."
|
||||
WORKERS=$(grep -E "^\s+worker-" docker-compose.yml | grep -v "#" | cut -d: -f1 | tr -d ' ' | sort -u)
|
||||
|
||||
if [ -z "$WORKERS" ]; then
|
||||
echo -e "${RED}❌ No workers found in docker-compose.yml${NC}"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "Found workers:"
|
||||
for worker in $WORKERS; do
|
||||
echo " - $worker"
|
||||
done
|
||||
|
||||
# Check each worker
|
||||
echo ""
|
||||
echo "🔎 Validating worker files..."
|
||||
for worker in $WORKERS; do
|
||||
WORKER_DIR="workers/${worker#worker-}"
|
||||
|
||||
echo ""
|
||||
echo "Checking $worker ($WORKER_DIR)..."
|
||||
|
||||
# Check if directory exists
|
||||
if [ ! -d "$WORKER_DIR" ]; then
|
||||
echo -e "${RED} ❌ Directory not found: $WORKER_DIR${NC}"
|
||||
ERRORS=$((ERRORS + 1))
|
||||
continue
|
||||
fi
|
||||
|
||||
# Check Dockerfile (single file or multi-platform pattern)
|
||||
if [ -f "$WORKER_DIR/Dockerfile" ]; then
|
||||
# Single Dockerfile
|
||||
if ! git ls-files --error-unmatch "$WORKER_DIR/Dockerfile" &> /dev/null; then
|
||||
echo -e "${RED} ❌ File not tracked by git: $WORKER_DIR/Dockerfile${NC}"
|
||||
echo -e "${YELLOW} Check .gitignore patterns!${NC}"
|
||||
ERRORS=$((ERRORS + 1))
|
||||
else
|
||||
echo -e "${GREEN} ✓ Dockerfile (tracked)${NC}"
|
||||
fi
|
||||
elif compgen -G "$WORKER_DIR/Dockerfile.*" > /dev/null; then
|
||||
# Multi-platform Dockerfiles (e.g., Dockerfile.amd64, Dockerfile.arm64)
|
||||
PLATFORM_DOCKERFILES=$(ls "$WORKER_DIR"/Dockerfile.* 2>/dev/null)
|
||||
DOCKERFILE_FOUND=false
|
||||
for dockerfile in $PLATFORM_DOCKERFILES; do
|
||||
if git ls-files --error-unmatch "$dockerfile" &> /dev/null; then
|
||||
echo -e "${GREEN} ✓ $(basename "$dockerfile") (tracked)${NC}"
|
||||
DOCKERFILE_FOUND=true
|
||||
else
|
||||
echo -e "${RED} ❌ File not tracked by git: $dockerfile${NC}"
|
||||
ERRORS=$((ERRORS + 1))
|
||||
fi
|
||||
done
|
||||
if [ "$DOCKERFILE_FOUND" = false ]; then
|
||||
echo -e "${RED} ❌ No platform-specific Dockerfiles found${NC}"
|
||||
ERRORS=$((ERRORS + 1))
|
||||
fi
|
||||
else
|
||||
echo -e "${RED} ❌ Missing Dockerfile or Dockerfile.* files${NC}"
|
||||
ERRORS=$((ERRORS + 1))
|
||||
fi
|
||||
|
||||
# Check other required files
|
||||
REQUIRED_FILES=("requirements.txt" "worker.py")
|
||||
for file in "${REQUIRED_FILES[@]}"; do
|
||||
FILE_PATH="$WORKER_DIR/$file"
|
||||
|
||||
if [ ! -f "$FILE_PATH" ]; then
|
||||
echo -e "${RED} ❌ Missing file: $FILE_PATH${NC}"
|
||||
ERRORS=$((ERRORS + 1))
|
||||
else
|
||||
# Check if file is tracked by git
|
||||
if ! git ls-files --error-unmatch "$FILE_PATH" &> /dev/null; then
|
||||
echo -e "${RED} ❌ File not tracked by git: $FILE_PATH${NC}"
|
||||
echo -e "${YELLOW} Check .gitignore patterns!${NC}"
|
||||
ERRORS=$((ERRORS + 1))
|
||||
else
|
||||
echo -e "${GREEN} ✓ $file (tracked)${NC}"
|
||||
fi
|
||||
fi
|
||||
done
|
||||
done
|
||||
|
||||
# Check for any ignored worker files
|
||||
echo ""
|
||||
echo "🚫 Checking for gitignored worker files..."
|
||||
IGNORED_FILES=$(git check-ignore workers/*/* 2>/dev/null || true)
|
||||
if [ -n "$IGNORED_FILES" ]; then
|
||||
echo -e "${YELLOW}⚠️ Warning: Some worker files are being ignored:${NC}"
|
||||
echo "$IGNORED_FILES" | while read -r file; do
|
||||
echo -e "${YELLOW} - $file${NC}"
|
||||
done
|
||||
WARNINGS=$((WARNINGS + 1))
|
||||
fi
|
||||
|
||||
# Summary
|
||||
echo ""
|
||||
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
|
||||
if [ $ERRORS -eq 0 ] && [ $WARNINGS -eq 0 ]; then
|
||||
echo -e "${GREEN}✅ All workers validated successfully!${NC}"
|
||||
exit 0
|
||||
elif [ $ERRORS -eq 0 ]; then
|
||||
echo -e "${YELLOW}⚠️ Validation passed with $WARNINGS warning(s)${NC}"
|
||||
exit 0
|
||||
else
|
||||
echo -e "${RED}❌ Validation failed with $ERRORS error(s) and $WARNINGS warning(s)${NC}"
|
||||
exit 1
|
||||
fi
|
||||
165
.github/workflows/benchmark.yml
vendored
165
.github/workflows/benchmark.yml
vendored
@@ -1,165 +0,0 @@
|
||||
name: Benchmarks
|
||||
|
||||
on:
|
||||
# Disabled automatic runs - benchmarks not ready for CI/CD yet
|
||||
# schedule:
|
||||
# - cron: '0 2 * * *' # 2 AM UTC every day
|
||||
|
||||
# Allow manual trigger for testing
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
compare_with:
|
||||
description: 'Baseline commit to compare against (optional)'
|
||||
required: false
|
||||
default: ''
|
||||
|
||||
# pull_request:
|
||||
# paths:
|
||||
# - 'backend/benchmarks/**'
|
||||
# - 'backend/toolbox/modules/**'
|
||||
# - '.github/workflows/benchmark.yml'
|
||||
|
||||
jobs:
|
||||
benchmark:
|
||||
name: Run Benchmarks
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0 # Fetch all history for comparison
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: '3.11'
|
||||
|
||||
- name: Install system dependencies
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y build-essential
|
||||
|
||||
- name: Install Python dependencies
|
||||
working-directory: ./backend
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install -e ".[dev]"
|
||||
pip install pytest pytest-asyncio pytest-benchmark pytest-benchmark[histogram]
|
||||
pip install -e ../sdk # Install SDK for benchmarks
|
||||
|
||||
- name: Run benchmarks
|
||||
working-directory: ./backend
|
||||
run: |
|
||||
pytest benchmarks/ \
|
||||
-v \
|
||||
--benchmark-only \
|
||||
--benchmark-json=benchmark-results.json \
|
||||
--benchmark-histogram=benchmark-histogram
|
||||
|
||||
- name: Store benchmark results
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: benchmark-results-${{ github.run_number }}
|
||||
path: |
|
||||
backend/benchmark-results.json
|
||||
backend/benchmark-histogram.svg
|
||||
|
||||
- name: Download baseline benchmarks
|
||||
if: github.event_name == 'pull_request'
|
||||
uses: dawidd6/action-download-artifact@v3
|
||||
continue-on-error: true
|
||||
with:
|
||||
workflow: benchmark.yml
|
||||
branch: ${{ github.base_ref }}
|
||||
name: benchmark-results-*
|
||||
path: ./baseline
|
||||
search_artifacts: true
|
||||
|
||||
- name: Compare with baseline
|
||||
if: github.event_name == 'pull_request' && hashFiles('baseline/benchmark-results.json') != ''
|
||||
run: |
|
||||
python -c "
|
||||
import json
|
||||
import sys
|
||||
|
||||
with open('backend/benchmark-results.json') as f:
|
||||
current = json.load(f)
|
||||
|
||||
with open('baseline/benchmark-results.json') as f:
|
||||
baseline = json.load(f)
|
||||
|
||||
print('\\n## Benchmark Comparison\\n')
|
||||
print('| Benchmark | Current | Baseline | Change |')
|
||||
print('|-----------|---------|----------|--------|')
|
||||
|
||||
regressions = []
|
||||
|
||||
for bench in current['benchmarks']:
|
||||
name = bench['name']
|
||||
current_time = bench['stats']['mean']
|
||||
|
||||
# Find matching baseline
|
||||
baseline_bench = next((b for b in baseline['benchmarks'] if b['name'] == name), None)
|
||||
if baseline_bench:
|
||||
baseline_time = baseline_bench['stats']['mean']
|
||||
change = ((current_time - baseline_time) / baseline_time) * 100
|
||||
|
||||
print(f'| {name} | {current_time:.4f}s | {baseline_time:.4f}s | {change:+.2f}% |')
|
||||
|
||||
# Flag regressions > 10%
|
||||
if change > 10:
|
||||
regressions.append((name, change))
|
||||
else:
|
||||
print(f'| {name} | {current_time:.4f}s | N/A | NEW |')
|
||||
|
||||
if regressions:
|
||||
print('\\n⚠️ **Performance Regressions Detected:**')
|
||||
for name, change in regressions:
|
||||
print(f'- {name}: +{change:.2f}%')
|
||||
sys.exit(1)
|
||||
else:
|
||||
print('\\n✅ No significant performance regressions detected')
|
||||
"
|
||||
|
||||
- name: Comment PR with results
|
||||
if: github.event_name == 'pull_request'
|
||||
uses: actions/github-script@v7
|
||||
with:
|
||||
script: |
|
||||
const fs = require('fs');
|
||||
const results = JSON.parse(fs.readFileSync('backend/benchmark-results.json', 'utf8'));
|
||||
|
||||
let body = '## Benchmark Results\\n\\n';
|
||||
body += '| Category | Benchmark | Mean Time | Std Dev |\\n';
|
||||
body += '|----------|-----------|-----------|---------|\\n';
|
||||
|
||||
for (const bench of results.benchmarks) {
|
||||
const group = bench.group || 'ungrouped';
|
||||
const name = bench.name.split('::').pop();
|
||||
const mean = bench.stats.mean.toFixed(4);
|
||||
const stddev = bench.stats.stddev.toFixed(4);
|
||||
body += `| ${group} | ${name} | ${mean}s | ${stddev}s |\\n`;
|
||||
}
|
||||
|
||||
body += '\\n📊 Full benchmark results available in artifacts.';
|
||||
|
||||
github.rest.issues.createComment({
|
||||
issue_number: context.issue.number,
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
body: body
|
||||
});
|
||||
|
||||
benchmark-summary:
|
||||
name: Benchmark Summary
|
||||
runs-on: ubuntu-latest
|
||||
needs: benchmark
|
||||
if: always()
|
||||
steps:
|
||||
- name: Check results
|
||||
run: |
|
||||
if [ "${{ needs.benchmark.result }}" != "success" ]; then
|
||||
echo "Benchmarks failed or detected regressions"
|
||||
exit 1
|
||||
fi
|
||||
echo "Benchmarks completed successfully!"
|
||||
152
.github/workflows/examples/security-scan.yml
vendored
152
.github/workflows/examples/security-scan.yml
vendored
@@ -1,152 +0,0 @@
|
||||
# FuzzForge CI/CD Example - Security Scanning
|
||||
#
|
||||
# This workflow demonstrates how to integrate FuzzForge into your CI/CD pipeline
|
||||
# for automated security testing on pull requests and pushes.
|
||||
#
|
||||
# Features:
|
||||
# - Runs entirely in GitHub Actions (no external infrastructure needed)
|
||||
# - Auto-starts FuzzForge services on-demand
|
||||
# - Fails builds on error-level SARIF findings
|
||||
# - Uploads SARIF results to GitHub Security tab
|
||||
# - Exports findings as artifacts
|
||||
#
|
||||
# Prerequisites:
|
||||
# - Ubuntu runner with Docker support
|
||||
# - At least 4GB RAM available
|
||||
# - ~90 seconds startup time
|
||||
|
||||
name: Security Scan Example
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
branches: [main, develop]
|
||||
push:
|
||||
branches: [main]
|
||||
|
||||
jobs:
|
||||
security-scan:
|
||||
name: Security Assessment
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 30
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Start FuzzForge
|
||||
run: |
|
||||
bash scripts/ci-start.sh
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: '3.11'
|
||||
|
||||
- name: Install FuzzForge CLI
|
||||
run: |
|
||||
pip install ./cli
|
||||
|
||||
- name: Initialize FuzzForge
|
||||
run: |
|
||||
ff init --api-url http://localhost:8000 --name "GitHub Actions Security Scan"
|
||||
|
||||
- name: Run Security Assessment
|
||||
run: |
|
||||
ff workflow run security_assessment . \
|
||||
--wait \
|
||||
--fail-on error \
|
||||
--export-sarif results.sarif
|
||||
|
||||
- name: Upload SARIF to GitHub Security
|
||||
if: always()
|
||||
uses: github/codeql-action/upload-sarif@v3
|
||||
with:
|
||||
sarif_file: results.sarif
|
||||
|
||||
- name: Upload findings as artifact
|
||||
if: always()
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: security-findings
|
||||
path: results.sarif
|
||||
retention-days: 30
|
||||
|
||||
- name: Stop FuzzForge
|
||||
if: always()
|
||||
run: |
|
||||
bash scripts/ci-stop.sh
|
||||
|
||||
secret-scan:
|
||||
name: Secret Detection
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 15
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Start FuzzForge
|
||||
run: bash scripts/ci-start.sh
|
||||
|
||||
- name: Install CLI
|
||||
run: |
|
||||
pip install ./cli
|
||||
|
||||
- name: Initialize & Scan
|
||||
run: |
|
||||
ff init --api-url http://localhost:8000 --name "Secret Detection"
|
||||
ff workflow run secret_detection . \
|
||||
--wait \
|
||||
--fail-on all \
|
||||
--export-sarif secrets.sarif
|
||||
|
||||
- name: Upload results
|
||||
if: always()
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: secret-scan-results
|
||||
path: secrets.sarif
|
||||
retention-days: 30
|
||||
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: bash scripts/ci-stop.sh
|
||||
|
||||
# Example: Nightly fuzzing campaign (long-running)
|
||||
nightly-fuzzing:
|
||||
name: Nightly Fuzzing
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 120
|
||||
# Only run on schedule
|
||||
if: github.event_name == 'schedule'
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Start FuzzForge
|
||||
run: bash scripts/ci-start.sh
|
||||
|
||||
- name: Install CLI
|
||||
run: pip install ./cli
|
||||
|
||||
- name: Run Fuzzing Campaign
|
||||
run: |
|
||||
ff init --api-url http://localhost:8000
|
||||
ff workflow run atheris_fuzzing . \
|
||||
max_iterations=100000000 \
|
||||
timeout_seconds=7200 \
|
||||
--wait \
|
||||
--export-sarif fuzzing-results.sarif
|
||||
# Don't fail on fuzzing findings, just report
|
||||
continue-on-error: true
|
||||
|
||||
- name: Upload fuzzing results
|
||||
if: always()
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: fuzzing-results
|
||||
path: fuzzing-results.sarif
|
||||
retention-days: 90
|
||||
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: bash scripts/ci-stop.sh
|
||||
248
.github/workflows/test.yml
vendored
248
.github/workflows/test.yml
vendored
@@ -1,248 +0,0 @@
|
||||
name: Tests
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [ main, master, dev, develop, feature/** ]
|
||||
pull_request:
|
||||
branches: [ main, master, dev, develop ]
|
||||
|
||||
jobs:
|
||||
validate-workers:
|
||||
name: Validate Workers
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Run worker validation
|
||||
run: |
|
||||
chmod +x .github/scripts/validate-workers.sh
|
||||
.github/scripts/validate-workers.sh
|
||||
|
||||
build-workers:
|
||||
name: Build Worker Docker Images
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0 # Fetch all history for proper diff
|
||||
|
||||
- name: Check which workers were modified
|
||||
id: check-workers
|
||||
run: |
|
||||
if [ "${{ github.event_name }}" == "pull_request" ]; then
|
||||
# For PRs, check changed files
|
||||
CHANGED_FILES=$(git diff --name-only origin/${{ github.base_ref }}...HEAD)
|
||||
echo "Changed files:"
|
||||
echo "$CHANGED_FILES"
|
||||
else
|
||||
# For direct pushes, check last commit
|
||||
CHANGED_FILES=$(git diff --name-only HEAD~1 HEAD)
|
||||
fi
|
||||
|
||||
# Check if docker-compose.yml changed (build all workers)
|
||||
if echo "$CHANGED_FILES" | grep -q "^docker-compose.yml"; then
|
||||
echo "workers_to_build=worker-python worker-secrets worker-rust worker-android worker-ossfuzz" >> $GITHUB_OUTPUT
|
||||
echo "workers_modified=true" >> $GITHUB_OUTPUT
|
||||
echo "✅ docker-compose.yml modified - building all workers"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Detect which specific workers changed
|
||||
WORKERS_TO_BUILD=""
|
||||
|
||||
if echo "$CHANGED_FILES" | grep -q "^workers/python/"; then
|
||||
WORKERS_TO_BUILD="$WORKERS_TO_BUILD worker-python"
|
||||
echo "✅ Python worker modified"
|
||||
fi
|
||||
|
||||
if echo "$CHANGED_FILES" | grep -q "^workers/secrets/"; then
|
||||
WORKERS_TO_BUILD="$WORKERS_TO_BUILD worker-secrets"
|
||||
echo "✅ Secrets worker modified"
|
||||
fi
|
||||
|
||||
if echo "$CHANGED_FILES" | grep -q "^workers/rust/"; then
|
||||
WORKERS_TO_BUILD="$WORKERS_TO_BUILD worker-rust"
|
||||
echo "✅ Rust worker modified"
|
||||
fi
|
||||
|
||||
if echo "$CHANGED_FILES" | grep -q "^workers/android/"; then
|
||||
WORKERS_TO_BUILD="$WORKERS_TO_BUILD worker-android"
|
||||
echo "✅ Android worker modified"
|
||||
fi
|
||||
|
||||
if echo "$CHANGED_FILES" | grep -q "^workers/ossfuzz/"; then
|
||||
WORKERS_TO_BUILD="$WORKERS_TO_BUILD worker-ossfuzz"
|
||||
echo "✅ OSS-Fuzz worker modified"
|
||||
fi
|
||||
|
||||
if [ -z "$WORKERS_TO_BUILD" ]; then
|
||||
echo "workers_modified=false" >> $GITHUB_OUTPUT
|
||||
echo "⏭️ No worker changes detected - skipping build"
|
||||
else
|
||||
echo "workers_to_build=$WORKERS_TO_BUILD" >> $GITHUB_OUTPUT
|
||||
echo "workers_modified=true" >> $GITHUB_OUTPUT
|
||||
echo "Building workers:$WORKERS_TO_BUILD"
|
||||
fi
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
if: steps.check-workers.outputs.workers_modified == 'true'
|
||||
uses: docker/setup-buildx-action@v3
|
||||
|
||||
- name: Build worker images
|
||||
if: steps.check-workers.outputs.workers_modified == 'true'
|
||||
run: |
|
||||
WORKERS="${{ steps.check-workers.outputs.workers_to_build }}"
|
||||
echo "Building worker Docker images: $WORKERS"
|
||||
docker compose build $WORKERS --no-cache
|
||||
continue-on-error: false
|
||||
|
||||
lint:
|
||||
name: Lint
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: '3.11'
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install ruff mypy
|
||||
|
||||
- name: Run ruff
|
||||
run: ruff check backend/src backend/toolbox backend/tests backend/benchmarks --output-format=github
|
||||
|
||||
- name: Run mypy (continue on error)
|
||||
run: mypy backend/src backend/toolbox || true
|
||||
continue-on-error: true
|
||||
|
||||
unit-tests:
|
||||
name: Unit Tests
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
matrix:
|
||||
python-version: ['3.11', '3.12']
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
|
||||
- name: Install system dependencies
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y build-essential
|
||||
|
||||
- name: Install Python dependencies
|
||||
working-directory: ./backend
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install -e ".[dev]"
|
||||
pip install pytest pytest-asyncio pytest-cov pytest-xdist
|
||||
|
||||
- name: Run unit tests
|
||||
working-directory: ./backend
|
||||
run: |
|
||||
pytest tests/unit/ \
|
||||
-v \
|
||||
--cov=toolbox/modules \
|
||||
--cov=src \
|
||||
--cov-report=xml \
|
||||
--cov-report=term \
|
||||
--cov-report=html \
|
||||
-n auto
|
||||
|
||||
- name: Upload coverage to Codecov
|
||||
if: matrix.python-version == '3.11'
|
||||
uses: codecov/codecov-action@v4
|
||||
with:
|
||||
file: ./backend/coverage.xml
|
||||
flags: unittests
|
||||
name: codecov-backend
|
||||
|
||||
- name: Upload coverage HTML
|
||||
if: matrix.python-version == '3.11'
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: coverage-report
|
||||
path: ./backend/htmlcov/
|
||||
|
||||
# integration-tests:
|
||||
# name: Integration Tests
|
||||
# runs-on: ubuntu-latest
|
||||
# needs: unit-tests
|
||||
#
|
||||
# services:
|
||||
# postgres:
|
||||
# image: postgres:15
|
||||
# env:
|
||||
# POSTGRES_USER: postgres
|
||||
# POSTGRES_PASSWORD: postgres
|
||||
# POSTGRES_DB: fuzzforge_test
|
||||
# options: >-
|
||||
# --health-cmd pg_isready
|
||||
# --health-interval 10s
|
||||
# --health-timeout 5s
|
||||
# --health-retries 5
|
||||
# ports:
|
||||
# - 5432:5432
|
||||
#
|
||||
# steps:
|
||||
# - uses: actions/checkout@v4
|
||||
#
|
||||
# - name: Set up Python
|
||||
# uses: actions/setup-python@v5
|
||||
# with:
|
||||
# python-version: '3.11'
|
||||
#
|
||||
# - name: Set up Docker Buildx
|
||||
# uses: docker/setup-buildx-action@v3
|
||||
#
|
||||
# - name: Install Python dependencies
|
||||
# working-directory: ./backend
|
||||
# run: |
|
||||
# python -m pip install --upgrade pip
|
||||
# pip install -e ".[dev]"
|
||||
# pip install pytest pytest-asyncio
|
||||
#
|
||||
# - name: Start services (Temporal, MinIO)
|
||||
# run: |
|
||||
# docker-compose -f docker-compose.yml up -d temporal minio
|
||||
# sleep 30
|
||||
#
|
||||
# - name: Run integration tests
|
||||
# working-directory: ./backend
|
||||
# run: |
|
||||
# pytest tests/integration/ -v --tb=short
|
||||
# env:
|
||||
# DATABASE_URL: postgresql://postgres:postgres@localhost:5432/fuzzforge_test
|
||||
# TEMPORAL_ADDRESS: localhost:7233
|
||||
# MINIO_ENDPOINT: localhost:9000
|
||||
#
|
||||
# - name: Shutdown services
|
||||
# if: always()
|
||||
# run: docker-compose down
|
||||
|
||||
test-summary:
|
||||
name: Test Summary
|
||||
runs-on: ubuntu-latest
|
||||
needs: [validate-workers, lint, unit-tests]
|
||||
if: always()
|
||||
steps:
|
||||
- name: Check test results
|
||||
run: |
|
||||
if [ "${{ needs.validate-workers.result }}" != "success" ]; then
|
||||
echo "Worker validation failed"
|
||||
exit 1
|
||||
fi
|
||||
if [ "${{ needs.unit-tests.result }}" != "success" ]; then
|
||||
echo "Unit tests failed"
|
||||
exit 1
|
||||
fi
|
||||
echo "All tests passed!"
|
||||
26
.gitignore
vendored
26
.gitignore
vendored
@@ -185,13 +185,6 @@ logs/
|
||||
# FuzzForge project directories (user projects should manage their own .gitignore)
|
||||
.fuzzforge/
|
||||
|
||||
# Docker volume configs (keep .env.example but ignore actual .env)
|
||||
volumes/env/.env
|
||||
|
||||
# Vendored proxy sources (kept locally for reference)
|
||||
ai/proxy/bifrost/
|
||||
ai/proxy/litellm/
|
||||
|
||||
# Test project databases and configurations
|
||||
test_projects/*/.fuzzforge/
|
||||
test_projects/*/findings.db*
|
||||
@@ -208,7 +201,6 @@ dev_config.yaml
|
||||
reports/
|
||||
output/
|
||||
findings/
|
||||
*.sarif
|
||||
*.sarif.json
|
||||
*.html.report
|
||||
security_report.*
|
||||
@@ -237,17 +229,6 @@ yarn-error.log*
|
||||
*.key
|
||||
*.p12
|
||||
*.pfx
|
||||
|
||||
# Exception: Secret detection benchmark test files (not real secrets)
|
||||
!test_projects/secret_detection_benchmark/
|
||||
!test_projects/secret_detection_benchmark/**
|
||||
!**/secret_detection_benchmark_GROUND_TRUTH.json
|
||||
!**/secret_detection/results/
|
||||
|
||||
# Exception: Allow workers/secrets/ directory (secrets detection worker)
|
||||
!workers/secrets/
|
||||
!workers/secrets/**
|
||||
|
||||
secret*
|
||||
secrets/
|
||||
credentials*
|
||||
@@ -307,9 +288,4 @@ test_projects/*/wallet.json
|
||||
test_projects/*/.npmrc
|
||||
test_projects/*/.git-credentials
|
||||
test_projects/*/credentials.*
|
||||
test_projects/*/api_keys.*
|
||||
test_projects/*/ci-*.sh
|
||||
|
||||
# -------------------- Internal Documentation --------------------
|
||||
# Weekly summaries and temporary project documentation
|
||||
WEEK_SUMMARY*.md
|
||||
test_projects/*/api_keys.*
|
||||
@@ -1,121 +0,0 @@
|
||||
# FuzzForge CI/CD Example - GitLab CI
|
||||
#
|
||||
# This file demonstrates how to integrate FuzzForge into your GitLab CI/CD pipeline.
|
||||
# Copy this to `.gitlab-ci.yml` in your project root to enable security scanning.
|
||||
#
|
||||
# Features:
|
||||
# - Runs entirely in GitLab runners (no external infrastructure)
|
||||
# - Auto-starts FuzzForge services on-demand
|
||||
# - Fails pipelines on critical/high severity findings
|
||||
# - Uploads SARIF reports to GitLab Security Dashboard
|
||||
# - Exports findings as artifacts
|
||||
#
|
||||
# Prerequisites:
|
||||
# - GitLab Runner with Docker support (docker:dind)
|
||||
# - At least 4GB RAM available
|
||||
# - ~90 seconds startup time
|
||||
|
||||
stages:
|
||||
- security
|
||||
|
||||
variables:
|
||||
FUZZFORGE_API_URL: "http://localhost:8000"
|
||||
DOCKER_DRIVER: overlay2
|
||||
DOCKER_TLS_CERTDIR: ""
|
||||
|
||||
# Base template for all FuzzForge jobs
|
||||
.fuzzforge_template:
|
||||
image: docker:24
|
||||
services:
|
||||
- docker:24-dind
|
||||
before_script:
|
||||
# Install dependencies
|
||||
- apk add --no-cache bash curl python3 py3-pip git
|
||||
# Start FuzzForge
|
||||
- bash scripts/ci-start.sh
|
||||
# Install CLI
|
||||
- pip3 install ./cli --break-system-packages
|
||||
# Initialize project
|
||||
- ff init --api-url $FUZZFORGE_API_URL --name "GitLab CI Security Scan"
|
||||
after_script:
|
||||
# Cleanup
|
||||
- bash scripts/ci-stop.sh || true
|
||||
|
||||
# Security Assessment - Comprehensive code analysis
|
||||
security:scan:
|
||||
extends: .fuzzforge_template
|
||||
stage: security
|
||||
timeout: 30 minutes
|
||||
script:
|
||||
- ff workflow run security_assessment . --wait --fail-on error --export-sarif results.sarif
|
||||
artifacts:
|
||||
when: always
|
||||
reports:
|
||||
sast: results.sarif
|
||||
paths:
|
||||
- results.sarif
|
||||
expire_in: 30 days
|
||||
only:
|
||||
- merge_requests
|
||||
- main
|
||||
- develop
|
||||
|
||||
# Secret Detection - Scan for exposed credentials
|
||||
security:secrets:
|
||||
extends: .fuzzforge_template
|
||||
stage: security
|
||||
timeout: 15 minutes
|
||||
script:
|
||||
- ff workflow run secret_detection . --wait --fail-on all --export-sarif secrets.sarif
|
||||
artifacts:
|
||||
when: always
|
||||
paths:
|
||||
- secrets.sarif
|
||||
expire_in: 30 days
|
||||
only:
|
||||
- merge_requests
|
||||
- main
|
||||
|
||||
# Nightly Fuzzing - Long-running fuzzing campaign (scheduled only)
|
||||
security:fuzzing:
|
||||
extends: .fuzzforge_template
|
||||
stage: security
|
||||
timeout: 2 hours
|
||||
script:
|
||||
- |
|
||||
ff workflow run atheris_fuzzing . \
|
||||
max_iterations=100000000 \
|
||||
timeout_seconds=7200 \
|
||||
--wait \
|
||||
--export-sarif fuzzing-results.sarif
|
||||
artifacts:
|
||||
when: always
|
||||
paths:
|
||||
- fuzzing-results.sarif
|
||||
expire_in: 90 days
|
||||
allow_failure: true # Don't fail pipeline on fuzzing findings
|
||||
only:
|
||||
- schedules
|
||||
|
||||
# OSS-Fuzz Campaign (for supported projects)
|
||||
security:ossfuzz:
|
||||
extends: .fuzzforge_template
|
||||
stage: security
|
||||
timeout: 1 hour
|
||||
script:
|
||||
- |
|
||||
ff workflow run ossfuzz_campaign . \
|
||||
project_name=your-project-name \
|
||||
campaign_duration_hours=0.5 \
|
||||
--wait \
|
||||
--export-sarif ossfuzz-results.sarif
|
||||
artifacts:
|
||||
when: always
|
||||
paths:
|
||||
- ossfuzz-results.sarif
|
||||
expire_in: 90 days
|
||||
allow_failure: true
|
||||
only:
|
||||
- schedules
|
||||
# Uncomment and set your project name
|
||||
# when: manual
|
||||
1020
ARCHITECTURE.md
1020
ARCHITECTURE.md
File diff suppressed because it is too large
Load Diff
200
CHANGELOG.md
200
CHANGELOG.md
@@ -1,200 +0,0 @@
|
||||
# Changelog
|
||||
|
||||
All notable changes to FuzzForge will be documented in this file.
|
||||
|
||||
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
||||
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
||||
|
||||
## [Unreleased]
|
||||
|
||||
### 📝 Documentation
|
||||
- Added comprehensive worker startup documentation across all guides
|
||||
- Added workflow-to-worker mapping tables in README, troubleshooting guide, getting started guide, and docker setup guide
|
||||
- Fixed broken documentation links in CLI reference
|
||||
- Added WEEK_SUMMARY*.md pattern to .gitignore
|
||||
|
||||
---
|
||||
|
||||
## [0.7.3] - 2025-10-30
|
||||
|
||||
### 🎯 Major Features
|
||||
|
||||
#### Android Static Analysis Workflow
|
||||
- **Added comprehensive Android security testing workflow** (`android_static_analysis`):
|
||||
- Jadx decompiler for APK → Java source code decompilation
|
||||
- OpenGrep/Semgrep static analysis with custom Android security rules
|
||||
- MobSF integration for comprehensive mobile security scanning
|
||||
- SARIF report generation with unified findings format
|
||||
- Test results: Successfully decompiled 4,145 Java files, found 8 security vulnerabilities
|
||||
- Full workflow completes in ~1.5 minutes
|
||||
|
||||
#### Platform-Aware Worker Architecture
|
||||
- **ARM64 (Apple Silicon) support**:
|
||||
- Automatic platform detection (ARM64 vs x86_64) in CLI using `platform.machine()`
|
||||
- Worker metadata convention (`metadata.yaml`) for platform-specific capabilities
|
||||
- Multi-Dockerfile support: `Dockerfile.amd64` (full toolchain) and `Dockerfile.arm64` (optimized)
|
||||
- Conditional module imports for graceful degradation (MobSF skips on ARM64)
|
||||
- Backend path resolution via `FUZZFORGE_HOST_ROOT` for CLI worker management
|
||||
- **Worker selection logic**:
|
||||
- CLI automatically selects appropriate Dockerfile based on detected platform
|
||||
- Multi-strategy path resolution (API → .fuzzforge marker → environment variable)
|
||||
- Platform-specific tool availability documented in metadata
|
||||
|
||||
#### Python SAST Workflow
|
||||
- **Added Python Static Application Security Testing workflow** (`python_sast`):
|
||||
- Bandit for Python security linting (SAST)
|
||||
- MyPy for static type checking
|
||||
- Safety for dependency vulnerability scanning
|
||||
- Integrated SARIF reporter for unified findings format
|
||||
- Auto-start Python worker on-demand
|
||||
|
||||
### ✨ Enhancements
|
||||
|
||||
#### CI/CD Improvements
|
||||
- Added automated worker validation in CI pipeline
|
||||
- Docker build checks for all workers before merge
|
||||
- Worker file change detection for selective builds
|
||||
- Optimized Docker layer caching for faster builds
|
||||
- Dev branch testing workflow triggers
|
||||
|
||||
#### CLI Improvements
|
||||
- Fixed live monitoring bug in `ff monitor live` command
|
||||
- Enhanced `ff findings` command with better table formatting
|
||||
- Improved `ff monitor` with clearer status displays
|
||||
- Auto-start workers on-demand when workflows require them
|
||||
- Better error messages with actionable manual start commands
|
||||
|
||||
#### Worker Management
|
||||
- Standardized worker service names (`worker-python`, `worker-android`, etc.)
|
||||
- Added missing `worker-secrets` to repository
|
||||
- Improved worker naming consistency across codebase
|
||||
|
||||
#### LiteLLM Integration
|
||||
- Centralized LLM provider management with proxy
|
||||
- Governance and request/response routing
|
||||
- OTEL collector integration for observability
|
||||
- Environment-based configurable timeouts
|
||||
- Optional `.env.litellm` configuration
|
||||
|
||||
### 🐛 Bug Fixes
|
||||
|
||||
- Fixed MobSF API key generation from secret file (SHA256 hash)
|
||||
- Corrected Temporal activity names (decompile_with_jadx, scan_with_opengrep, scan_with_mobsf)
|
||||
- Resolved linter errors across codebase
|
||||
- Fixed unused import issues to pass CI checks
|
||||
- Removed deprecated workflow parameters
|
||||
- Docker Compose version compatibility fixes
|
||||
|
||||
### 🔧 Technical Changes
|
||||
|
||||
- Conditional import pattern for optional dependencies (MobSF on ARM64)
|
||||
- Multi-platform Dockerfile architecture
|
||||
- Worker metadata convention for capability declaration
|
||||
- Improved CI worker build optimization
|
||||
- Enhanced storage activity error handling
|
||||
|
||||
### 📝 Test Projects
|
||||
|
||||
- Added `test_projects/android_test/` with BeetleBug.apk and shopnest.apk
|
||||
- Android workflow validation with real APK samples
|
||||
- ARM64 platform testing and validation
|
||||
|
||||
---
|
||||
|
||||
## [0.7.2] - 2025-10-22
|
||||
|
||||
### 🐛 Bug Fixes
|
||||
- Fixed worker naming inconsistencies across codebase
|
||||
- Improved monitor command consolidation and usability
|
||||
- Enhanced findings CLI with better formatting and display
|
||||
- Added missing secrets worker to repository
|
||||
|
||||
### 📝 Documentation
|
||||
- Added benchmark results files to git for secret detection workflows
|
||||
|
||||
**Note:** v0.7.1 was re-tagged as v0.7.2 (both point to the same commit)
|
||||
|
||||
---
|
||||
|
||||
## [0.7.0] - 2025-10-16
|
||||
|
||||
### 🎯 Major Features
|
||||
|
||||
#### Secret Detection Workflows
|
||||
- **Added three secret detection workflows**:
|
||||
- `gitleaks_detection` - Pattern-based secret scanning
|
||||
- `trufflehog_detection` - Entropy-based secret detection with verification
|
||||
- `llm_secret_detection` - AI-powered semantic secret detection using LLMs
|
||||
- **Comprehensive benchmarking infrastructure**:
|
||||
- 32-secret ground truth dataset for precision/recall testing
|
||||
- Difficulty levels: 12 Easy, 10 Medium, 10 Hard secrets
|
||||
- SARIF-formatted output for all workflows
|
||||
- Achieved 100% recall with LLM-based detection on benchmark dataset
|
||||
|
||||
#### AI Module & Agent Integration
|
||||
- Added A2A (Agent-to-Agent) wrapper for multi-agent orchestration
|
||||
- Task agent implementation with Google ADK
|
||||
- LLM analysis workflow for code security analysis
|
||||
- Reactivated AI agent command (`ff ai agent`)
|
||||
|
||||
#### Temporal Migration Complete
|
||||
- Fully migrated from Prefect to Temporal for workflow orchestration
|
||||
- MinIO storage for unified file handling (replaces volume mounts)
|
||||
- Vertical workers with pre-built security toolchains
|
||||
- Improved worker lifecycle management
|
||||
|
||||
#### CI/CD Integration
|
||||
- Ephemeral deployment model for testing
|
||||
- Automated workflow validation in CI pipeline
|
||||
|
||||
### ✨ Enhancements
|
||||
|
||||
#### Documentation
|
||||
- Updated README for Temporal + MinIO architecture
|
||||
- Added `.env` configuration guide for AI agent API keys
|
||||
- Fixed worker startup instructions with correct service names
|
||||
- Updated docker compose commands to modern syntax
|
||||
|
||||
#### Worker Management
|
||||
- Added `worker_service` field to API responses for correct service naming
|
||||
- Improved error messages with actionable manual start commands
|
||||
- Fixed default parameters for gitleaks (now uses `no_git=True` by default)
|
||||
|
||||
### 🐛 Bug Fixes
|
||||
|
||||
- Fixed default parameters from metadata.yaml not being applied to workflows when no parameters provided
|
||||
- Fixed gitleaks workflow failing on uploaded directories without Git history
|
||||
- Fixed worker startup command suggestions (now uses `docker compose up -d` with service names)
|
||||
- Fixed missing `cognify_text` method in CogneeProjectIntegration
|
||||
|
||||
### 🔧 Technical Changes
|
||||
|
||||
- Updated all package versions to 0.7.0
|
||||
- Improved SARIF output formatting for secret detection workflows
|
||||
- Enhanced benchmark validation with ground truth JSON
|
||||
- Better integration between CLI and backend for worker management
|
||||
|
||||
### 📝 Test Projects
|
||||
|
||||
- Added `secret_detection_benchmark` with 32 documented secrets
|
||||
- Ground truth JSON for automated precision/recall calculations
|
||||
- Updated `vulnerable_app` for comprehensive security testing
|
||||
|
||||
---
|
||||
|
||||
## [0.6.0] - Undocumented
|
||||
|
||||
### Features
|
||||
- Initial Temporal migration
|
||||
- Fuzzing workflows (Atheris, Cargo, OSS-Fuzz)
|
||||
- Security assessment workflow
|
||||
- Basic CLI commands
|
||||
|
||||
**Note:** No git tag exists for v0.6.0. Release date undocumented.
|
||||
|
||||
---
|
||||
|
||||
[0.7.3]: https://github.com/FuzzingLabs/fuzzforge_ai/compare/v0.7.2...v0.7.3
|
||||
[0.7.2]: https://github.com/FuzzingLabs/fuzzforge_ai/compare/v0.7.0...v0.7.2
|
||||
[0.7.0]: https://github.com/FuzzingLabs/fuzzforge_ai/releases/tag/v0.7.0
|
||||
[0.6.0]: https://github.com/FuzzingLabs/fuzzforge_ai/tree/v0.6.0
|
||||
@@ -84,10 +84,9 @@ docs(readme): update installation instructions
|
||||
```
|
||||
backend/toolbox/workflows/your_workflow/
|
||||
├── __init__.py
|
||||
├── workflow.py # Main Temporal workflow
|
||||
├── activities.py # Workflow activities (optional)
|
||||
├── metadata.yaml # Workflow metadata (includes vertical field)
|
||||
└── requirements.txt # Additional dependencies (optional)
|
||||
├── workflow.py # Main Prefect flow
|
||||
├── metadata.yaml # Workflow metadata
|
||||
└── Dockerfile # Container definition
|
||||
```
|
||||
|
||||
2. **Register Your Workflow**
|
||||
|
||||
@@ -1,421 +0,0 @@
|
||||
# FuzzForge Temporal Architecture - Quick Start Guide
|
||||
|
||||
This guide walks you through starting and testing the new Temporal-based architecture.
|
||||
|
||||
## Prerequisites
|
||||
|
||||
- Docker and Docker Compose installed
|
||||
- At least 2GB free RAM (core services only, workers start on-demand)
|
||||
- Ports available: 7233, 8233, 9000, 9001, 8000
|
||||
|
||||
## Step 1: Start Core Services
|
||||
|
||||
```bash
|
||||
# From project root
|
||||
cd /path/to/fuzzforge_ai
|
||||
|
||||
# Start core services (Temporal, MinIO, Backend)
|
||||
docker-compose up -d
|
||||
|
||||
# Workers are pre-built but don't auto-start (saves ~6-7GB RAM)
|
||||
# They'll start automatically when workflows need them
|
||||
|
||||
# Check status
|
||||
docker-compose ps
|
||||
```
|
||||
|
||||
**Expected output:**
|
||||
```
|
||||
NAME STATUS PORTS
|
||||
fuzzforge-minio healthy 0.0.0.0:9000-9001->9000-9001/tcp
|
||||
fuzzforge-temporal healthy 0.0.0.0:7233->7233/tcp
|
||||
fuzzforge-temporal-postgresql healthy 5432/tcp
|
||||
fuzzforge-backend healthy 0.0.0.0:8000->8000/tcp
|
||||
fuzzforge-minio-setup exited (0)
|
||||
# Workers NOT running (will start on-demand)
|
||||
```
|
||||
|
||||
**First startup takes ~30-60 seconds** for health checks to pass.
|
||||
|
||||
## Step 2: Verify Worker Discovery
|
||||
|
||||
Check worker logs to ensure workflows are discovered:
|
||||
|
||||
```bash
|
||||
docker logs fuzzforge-worker-rust
|
||||
```
|
||||
|
||||
**Expected output:**
|
||||
```
|
||||
============================================================
|
||||
FuzzForge Vertical Worker: rust
|
||||
============================================================
|
||||
Temporal Address: temporal:7233
|
||||
Task Queue: rust-queue
|
||||
Max Concurrent Activities: 5
|
||||
============================================================
|
||||
Discovering workflows for vertical: rust
|
||||
Importing workflow module: toolbox.workflows.rust_test.workflow
|
||||
✓ Discovered workflow: RustTestWorkflow from rust_test (vertical: rust)
|
||||
Discovered 1 workflows for vertical 'rust'
|
||||
Connecting to Temporal at temporal:7233...
|
||||
✓ Connected to Temporal successfully
|
||||
Creating worker on task queue: rust-queue
|
||||
✓ Worker created successfully
|
||||
============================================================
|
||||
🚀 Worker started for vertical 'rust'
|
||||
📦 Registered 1 workflows
|
||||
⚙️ Registered 3 activities
|
||||
📨 Listening on task queue: rust-queue
|
||||
============================================================
|
||||
Worker is ready to process tasks...
|
||||
```
|
||||
|
||||
## Step 2.5: Worker Lifecycle Management (New in v0.7.0)
|
||||
|
||||
Workers start on-demand when workflows need them:
|
||||
|
||||
```bash
|
||||
# Check worker status (should show Exited or not running)
|
||||
docker ps -a --filter "name=fuzzforge-worker"
|
||||
|
||||
# Run a workflow - worker starts automatically
|
||||
ff workflow run ossfuzz_campaign . project_name=zlib
|
||||
|
||||
# Worker is now running
|
||||
docker ps --filter "name=fuzzforge-worker-ossfuzz"
|
||||
```
|
||||
|
||||
**Configuration** (`.fuzzforge/config.yaml`):
|
||||
```yaml
|
||||
workers:
|
||||
auto_start_workers: true # Default: auto-start
|
||||
auto_stop_workers: false # Default: keep running
|
||||
worker_startup_timeout: 60 # Startup timeout in seconds
|
||||
```
|
||||
|
||||
**CLI Control**:
|
||||
```bash
|
||||
# Disable auto-start
|
||||
ff workflow run ossfuzz_campaign . --no-auto-start
|
||||
|
||||
# Enable auto-stop after completion
|
||||
ff workflow run ossfuzz_campaign . --wait --auto-stop
|
||||
```
|
||||
|
||||
## Step 3: Access Web UIs
|
||||
|
||||
### Temporal Web UI
|
||||
- URL: http://localhost:8233
|
||||
- View workflows, executions, and task queues
|
||||
|
||||
### MinIO Console
|
||||
- URL: http://localhost:9001
|
||||
- Login: `fuzzforge` / `fuzzforge123`
|
||||
- View uploaded targets and results
|
||||
|
||||
## Step 4: Test Workflow Execution
|
||||
|
||||
### Option A: Using Temporal CLI (tctl)
|
||||
|
||||
```bash
|
||||
# Install tctl (if not already installed)
|
||||
brew install temporal # macOS
|
||||
# or download from https://github.com/temporalio/tctl/releases
|
||||
|
||||
# Execute test workflow
|
||||
tctl workflow run \
|
||||
--address localhost:7233 \
|
||||
--taskqueue rust-queue \
|
||||
--workflow_type RustTestWorkflow \
|
||||
--input '{"target_id": "test-123", "test_message": "Hello Temporal!"}'
|
||||
```
|
||||
|
||||
### Option B: Using Python Client
|
||||
|
||||
Create `test_workflow.py`:
|
||||
|
||||
```python
|
||||
import asyncio
|
||||
from temporalio.client import Client
|
||||
|
||||
async def main():
|
||||
# Connect to Temporal
|
||||
client = await Client.connect("localhost:7233")
|
||||
|
||||
# Start workflow
|
||||
result = await client.execute_workflow(
|
||||
"RustTestWorkflow",
|
||||
{"target_id": "test-123", "test_message": "Hello Temporal!"},
|
||||
id="test-workflow-1",
|
||||
task_queue="rust-queue"
|
||||
)
|
||||
|
||||
print("Workflow result:", result)
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
```
|
||||
|
||||
```bash
|
||||
python test_workflow.py
|
||||
```
|
||||
|
||||
### Option C: Upload Target and Run (Full Flow)
|
||||
|
||||
```python
|
||||
# upload_and_run.py
|
||||
import asyncio
|
||||
import boto3
|
||||
from pathlib import Path
|
||||
from temporalio.client import Client
|
||||
|
||||
async def main():
|
||||
# 1. Upload target to MinIO
|
||||
s3 = boto3.client(
|
||||
's3',
|
||||
endpoint_url='http://localhost:9000',
|
||||
aws_access_key_id='fuzzforge',
|
||||
aws_secret_access_key='fuzzforge123',
|
||||
region_name='us-east-1'
|
||||
)
|
||||
|
||||
# Create a test file
|
||||
test_file = Path('/tmp/test_target.txt')
|
||||
test_file.write_text('This is a test target file')
|
||||
|
||||
# Upload to MinIO
|
||||
target_id = 'my-test-target-001'
|
||||
s3.upload_file(
|
||||
str(test_file),
|
||||
'targets',
|
||||
f'{target_id}/target'
|
||||
)
|
||||
print(f"✓ Uploaded target: {target_id}")
|
||||
|
||||
# 2. Run workflow
|
||||
client = await Client.connect("localhost:7233")
|
||||
|
||||
result = await client.execute_workflow(
|
||||
"RustTestWorkflow",
|
||||
{"target_id": target_id, "test_message": "Full flow test!"},
|
||||
id=f"workflow-{target_id}",
|
||||
task_queue="rust-queue"
|
||||
)
|
||||
|
||||
print("✓ Workflow completed!")
|
||||
print("Results:", result)
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
```
|
||||
|
||||
```bash
|
||||
# Install dependencies
|
||||
pip install temporalio boto3
|
||||
|
||||
# Run test
|
||||
python upload_and_run.py
|
||||
```
|
||||
|
||||
## Step 5: Monitor Execution
|
||||
|
||||
### View in Temporal UI
|
||||
|
||||
1. Open http://localhost:8233
|
||||
2. Click on "Workflows"
|
||||
3. Find your workflow by ID
|
||||
4. Click to see:
|
||||
- Execution history
|
||||
- Activity results
|
||||
- Error stack traces (if any)
|
||||
|
||||
### View Logs
|
||||
|
||||
```bash
|
||||
# Worker logs (shows activity execution)
|
||||
docker logs -f fuzzforge-worker-rust
|
||||
|
||||
# Temporal server logs
|
||||
docker logs -f fuzzforge-temporal
|
||||
```
|
||||
|
||||
### Check MinIO Storage
|
||||
|
||||
1. Open http://localhost:9001
|
||||
2. Login: `fuzzforge` / `fuzzforge123`
|
||||
3. Browse buckets:
|
||||
- `targets/` - Uploaded target files
|
||||
- `results/` - Workflow results (if uploaded)
|
||||
- `cache/` - Worker cache (temporary)
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### Services Not Starting
|
||||
|
||||
```bash
|
||||
# Check logs for all services
|
||||
docker-compose -f docker-compose.temporal.yaml logs
|
||||
|
||||
# Check specific service
|
||||
docker-compose -f docker-compose.temporal.yaml logs temporal
|
||||
docker-compose -f docker-compose.temporal.yaml logs minio
|
||||
docker-compose -f docker-compose.temporal.yaml logs worker-rust
|
||||
```
|
||||
|
||||
### Worker Not Discovering Workflows
|
||||
|
||||
**Issue**: Worker logs show "No workflows found for vertical: rust"
|
||||
|
||||
**Solution**:
|
||||
1. Check toolbox mount: `docker exec fuzzforge-worker-rust ls /app/toolbox/workflows`
|
||||
2. Verify metadata.yaml exists and has `vertical: rust`
|
||||
3. Check workflow.py has `@workflow.defn` decorator
|
||||
|
||||
### Cannot Connect to Temporal
|
||||
|
||||
**Issue**: `Failed to connect to Temporal`
|
||||
|
||||
**Solution**:
|
||||
```bash
|
||||
# Wait for Temporal to be healthy
|
||||
docker-compose -f docker-compose.temporal.yaml ps
|
||||
|
||||
# Check Temporal health manually
|
||||
curl http://localhost:8233
|
||||
|
||||
# Restart Temporal if needed
|
||||
docker-compose -f docker-compose.temporal.yaml restart temporal
|
||||
```
|
||||
|
||||
### MinIO Connection Failed
|
||||
|
||||
**Issue**: `Failed to download target`
|
||||
|
||||
**Solution**:
|
||||
```bash
|
||||
# Check MinIO is running
|
||||
docker ps | grep minio
|
||||
|
||||
# Check buckets exist
|
||||
docker exec fuzzforge-minio mc ls fuzzforge/
|
||||
|
||||
# Verify target was uploaded
|
||||
docker exec fuzzforge-minio mc ls fuzzforge/targets/
|
||||
```
|
||||
|
||||
### Workflow Hangs
|
||||
|
||||
**Issue**: Workflow starts but never completes
|
||||
|
||||
**Check**:
|
||||
1. Worker logs for errors: `docker logs fuzzforge-worker-rust`
|
||||
2. Activity timeouts in workflow code
|
||||
3. Target file actually exists in MinIO
|
||||
|
||||
## Scaling
|
||||
|
||||
### Add More Workers
|
||||
|
||||
```bash
|
||||
# Scale rust workers horizontally
|
||||
docker-compose -f docker-compose.temporal.yaml up -d --scale worker-rust=3
|
||||
|
||||
# Verify all workers are running
|
||||
docker ps | grep worker-rust
|
||||
```
|
||||
|
||||
### Increase Concurrent Activities
|
||||
|
||||
Edit `docker-compose.temporal.yaml`:
|
||||
|
||||
```yaml
|
||||
worker-rust:
|
||||
environment:
|
||||
MAX_CONCURRENT_ACTIVITIES: 10 # Increase from 5
|
||||
```
|
||||
|
||||
```bash
|
||||
# Apply changes
|
||||
docker-compose -f docker-compose.temporal.yaml up -d worker-rust
|
||||
```
|
||||
|
||||
## Cleanup
|
||||
|
||||
```bash
|
||||
# Stop all services
|
||||
docker-compose -f docker-compose.temporal.yaml down
|
||||
|
||||
# Remove volumes (WARNING: deletes all data)
|
||||
docker-compose -f docker-compose.temporal.yaml down -v
|
||||
|
||||
# Remove everything including images
|
||||
docker-compose -f docker-compose.temporal.yaml down -v --rmi all
|
||||
```
|
||||
|
||||
## Next Steps
|
||||
|
||||
1. **Add More Workflows**: Create workflows in `backend/toolbox/workflows/`
|
||||
2. **Add More Verticals**: Create new worker types (android, web, etc.) - see `workers/README.md`
|
||||
3. **Integrate with Backend**: Update FastAPI backend to use Temporal client
|
||||
4. **Update CLI**: Modify `ff` CLI to work with Temporal workflows
|
||||
|
||||
## Useful Commands
|
||||
|
||||
```bash
|
||||
# View all logs
|
||||
docker-compose -f docker-compose.temporal.yaml logs -f
|
||||
|
||||
# View specific service logs
|
||||
docker-compose -f docker-compose.temporal.yaml logs -f worker-rust
|
||||
|
||||
# Restart a service
|
||||
docker-compose -f docker-compose.temporal.yaml restart worker-rust
|
||||
|
||||
# Check service status
|
||||
docker-compose -f docker-compose.temporal.yaml ps
|
||||
|
||||
# Execute command in worker
|
||||
docker exec -it fuzzforge-worker-rust bash
|
||||
|
||||
# View worker Python environment
|
||||
docker exec fuzzforge-worker-rust pip list
|
||||
|
||||
# Check workflow discovery manually
|
||||
docker exec fuzzforge-worker-rust python -c "
|
||||
from pathlib import Path
|
||||
import yaml
|
||||
for w in Path('/app/toolbox/workflows').iterdir():
|
||||
if w.is_dir():
|
||||
meta = w / 'metadata.yaml'
|
||||
if meta.exists():
|
||||
print(f'{w.name}: {yaml.safe_load(meta.read_text()).get(\"vertical\")}')"
|
||||
```
|
||||
|
||||
## Architecture Overview
|
||||
|
||||
```
|
||||
┌─────────────┐ ┌──────────────┐ ┌──────────────┐
|
||||
│ Temporal │────▶│ Task Queue │────▶│ Worker-Rust │
|
||||
│ Server │ │ rust-queue │ │ (Long-lived)│
|
||||
└─────────────┘ └──────────────┘ └──────┬───────┘
|
||||
│ │
|
||||
│ │
|
||||
▼ ▼
|
||||
┌─────────────┐ ┌──────────────┐
|
||||
│ Postgres │ │ MinIO │
|
||||
│ (State) │ │ (Storage) │
|
||||
└─────────────┘ └──────────────┘
|
||||
│
|
||||
┌──────┴──────┐
|
||||
│ │
|
||||
┌────▼────┐ ┌─────▼────┐
|
||||
│ Targets │ │ Results │
|
||||
└─────────┘ └──────────┘
|
||||
```
|
||||
|
||||
## Support
|
||||
|
||||
- **Documentation**: See `ARCHITECTURE.md` for detailed design
|
||||
- **Worker Guide**: See `workers/README.md` for adding verticals
|
||||
- **Issues**: Open GitHub issue with logs and steps to reproduce
|
||||
146
README.md
146
README.md
@@ -6,11 +6,11 @@
|
||||
<p align="center"><strong>AI-powered workflow automation and AI Agents for AppSec, Fuzzing & Offensive Security</strong></p>
|
||||
|
||||
<p align="center">
|
||||
<a href="https://discord.gg/8XEX33UUwZ/"><img src="https://img.shields.io/discord/1420767905255133267?logo=discord&label=Discord" alt="Discord"></a>
|
||||
<a href="https://discord.com/invite/acqv9FVG"><img src="https://img.shields.io/discord/1420767905255133267?logo=discord&label=Discord" alt="Discord"></a>
|
||||
<a href="LICENSE"><img src="https://img.shields.io/badge/license-BSL%20%2B%20Apache-orange" alt="License: BSL + Apache"></a>
|
||||
<a href="https://www.python.org/downloads/"><img src="https://img.shields.io/badge/python-3.11%2B-blue" alt="Python 3.11+"/></a>
|
||||
<a href="https://fuzzforge.ai"><img src="https://img.shields.io/badge/Website-fuzzforge.ai-blue" alt="Website"/></a>
|
||||
<img src="https://img.shields.io/badge/version-0.7.3-green" alt="Version">
|
||||
<img src="https://img.shields.io/badge/version-0.6.0-green" alt="Version">
|
||||
<a href="https://github.com/FuzzingLabs/fuzzforge_ai/stargazers"><img src="https://img.shields.io/github/stars/FuzzingLabs/fuzzforge_ai?style=social" alt="GitHub Stars"></a>
|
||||
|
||||
</p>
|
||||
@@ -41,29 +41,6 @@
|
||||
FuzzForge is **open source**, built to empower security teams, researchers, and the community.
|
||||
|
||||
> 🚧 FuzzForge is under active development. Expect breaking changes.
|
||||
>
|
||||
> **Note:** Fuzzing workflows (`atheris_fuzzing`, `cargo_fuzzing`, `ossfuzz_campaign`) are in early development. OSS-Fuzz integration is under heavy active development. For stable workflows, use: `security_assessment`, `gitleaks_detection`, `trufflehog_detection`, or `llm_secret_detection`.
|
||||
|
||||
---
|
||||
|
||||
## Demo - Manual Workflow Setup
|
||||
|
||||

|
||||
|
||||
_Setting up and running security workflows through the interface_
|
||||
|
||||
👉 More installation options in the [Documentation](https://docs.fuzzforge.ai).
|
||||
|
||||
---
|
||||
|
||||
## ✨ Key Features
|
||||
|
||||
- 🤖 **AI Agents for Security** – Specialized agents for AppSec, reversing, and fuzzing
|
||||
- 🛠 **Workflow Automation** – Define & execute AppSec workflows as code
|
||||
- 📈 **Vulnerability Research at Scale** – Rediscover 1-days & find 0-days with automation
|
||||
- 🔗 **Fuzzer Integration** – Atheris (Python), cargo-fuzz (Rust), OSS-Fuzz campaigns
|
||||
- 🌐 **Community Marketplace** – Share workflows, corpora, PoCs, and modules
|
||||
- 🔒 **Enterprise Ready** – Team/Corp cloud tiers for scaling offensive security
|
||||
|
||||
---
|
||||
|
||||
@@ -77,20 +54,14 @@ If you find FuzzForge useful, please star the repo to support development 🚀
|
||||
|
||||
---
|
||||
|
||||
## 🔍 Secret Detection Benchmarks
|
||||
## ✨ Key Features
|
||||
|
||||
FuzzForge includes three secret detection workflows benchmarked on a controlled dataset of **32 documented secrets** (12 Easy, 10 Medium, 10 Hard):
|
||||
|
||||
| Tool | Recall | Secrets Found | Speed |
|
||||
|------|--------|---------------|-------|
|
||||
| **LLM (gpt-5-mini)** | **84.4%** | 41 | 618s |
|
||||
| **LLM (gpt-4o-mini)** | 56.2% | 30 | 297s |
|
||||
| **Gitleaks** | 37.5% | 12 | 5s |
|
||||
| **TruffleHog** | 0.0% | 1 | 5s |
|
||||
|
||||
📊 [Full benchmark results and analysis](backend/benchmarks/by_category/secret_detection/results/comparison_report.md)
|
||||
|
||||
The LLM-based detector excels at finding obfuscated and hidden secrets through semantic analysis, while pattern-based tools (Gitleaks) offer speed for standard secret formats.
|
||||
- 🤖 **AI Agents for Security** – Specialized agents for AppSec, reversing, and fuzzing
|
||||
- 🛠 **Workflow Automation** – Define & execute AppSec workflows as code
|
||||
- 📈 **Vulnerability Research at Scale** – Rediscover 1-days & find 0-days with automation
|
||||
- 🔗 **Fuzzer Integration** – AFL, Honggfuzz, AFLnet, StateAFL & more
|
||||
- 🌐 **Community Marketplace** – Share workflows, corpora, PoCs, and modules
|
||||
- 🔒 **Enterprise Ready** – Team/Corp cloud tiers for scaling offensive security
|
||||
|
||||
---
|
||||
|
||||
@@ -110,22 +81,38 @@ curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||
**Docker**
|
||||
For containerized workflows, see the [Docker Installation Guide](https://docs.docker.com/get-docker/).
|
||||
|
||||
#### Configure AI Agent API Keys (Optional)
|
||||
#### Configure Docker Daemon
|
||||
|
||||
For AI-powered workflows, configure your LLM API keys:
|
||||
Before running `docker compose up`, configure Docker to allow insecure registries (required for the local registry).
|
||||
|
||||
```bash
|
||||
cp volumes/env/.env.template volumes/env/.env
|
||||
# Edit volumes/env/.env and add your API keys (OpenAI, Anthropic, Google, etc.)
|
||||
# Add your key to LITELLM_GEMINI_API_KEY
|
||||
Add the following to your Docker daemon configuration:
|
||||
|
||||
```json
|
||||
{
|
||||
"insecure-registries": [
|
||||
"localhost:5000",
|
||||
"host.docker.internal:5001",
|
||||
"registry:5000"
|
||||
]
|
||||
}
|
||||
```
|
||||
> Dont change the OPENAI_API_KEY default value, as it is used for the LLM proxy.
|
||||
|
||||
This is required for:
|
||||
- `llm_secret_detection` workflow
|
||||
- AI agent features (`ff ai agent`)
|
||||
**macOS (Docker Desktop):**
|
||||
1. Open Docker Desktop
|
||||
2. Go to Settings → Docker Engine
|
||||
3. Add the `insecure-registries` configuration to the JSON
|
||||
4. Click "Apply & Restart"
|
||||
|
||||
Basic security workflows (gitleaks, trufflehog, security_assessment) work without this configuration.
|
||||
**Linux:**
|
||||
1. Edit `/etc/docker/daemon.json` (create if it doesn't exist):
|
||||
```bash
|
||||
sudo nano /etc/docker/daemon.json
|
||||
```
|
||||
2. Add the configuration above
|
||||
3. Restart Docker:
|
||||
```bash
|
||||
sudo systemctl restart docker
|
||||
```
|
||||
|
||||
### CLI Installation
|
||||
|
||||
@@ -144,55 +131,40 @@ uv tool install --python python3.12 .
|
||||
|
||||
## ⚡ Quickstart
|
||||
|
||||
Run your first workflow with **Temporal orchestration** and **automatic file upload**:
|
||||
Run your first workflow :
|
||||
|
||||
```bash
|
||||
# 1. Clone the repo
|
||||
git clone https://github.com/fuzzinglabs/fuzzforge_ai.git
|
||||
cd fuzzforge_ai
|
||||
|
||||
# 2. Copy the default LLM env config
|
||||
cp volumes/env/.env.template volumes/env/.env
|
||||
|
||||
# 3. Start FuzzForge with Temporal
|
||||
# 2. Build & run with Docker
|
||||
# Set registry host for your OS (local registry is mandatory)
|
||||
# macOS/Windows (Docker Desktop):
|
||||
export REGISTRY_HOST=host.docker.internal
|
||||
# Linux (default):
|
||||
# export REGISTRY_HOST=localhost
|
||||
docker compose up -d
|
||||
|
||||
# 4. Start the Python worker (needed for security_assessment workflow)
|
||||
docker compose up -d worker-python
|
||||
```
|
||||
|
||||
> The first launch can take 2-3 minutes for services to initialize ☕
|
||||
>
|
||||
> Workers don't auto-start by default (saves RAM). Start the worker you need before running workflows.
|
||||
|
||||
**Workflow-to-Worker Quick Reference:**
|
||||
|
||||
| Workflow | Worker Required | Startup Command |
|
||||
|----------|----------------|-----------------|
|
||||
| `security_assessment`, `python_sast`, `llm_analysis`, `atheris_fuzzing` | worker-python | `docker compose up -d worker-python` |
|
||||
| `android_static_analysis` | worker-android | `docker compose up -d worker-android` |
|
||||
| `cargo_fuzzing` | worker-rust | `docker compose up -d worker-rust` |
|
||||
| `ossfuzz_campaign` | worker-ossfuzz | `docker compose up -d worker-ossfuzz` |
|
||||
| `llm_secret_detection`, `trufflehog_detection`, `gitleaks_detection` | worker-secrets | `docker compose up -d worker-secrets` |
|
||||
> The first launch can take 5-10 minutes due to Docker image building - a good time for a coffee break ☕
|
||||
|
||||
```bash
|
||||
# 5. Run your first workflow (files are automatically uploaded)
|
||||
cd test_projects/vulnerable_app/
|
||||
fuzzforge init # Initialize FuzzForge project
|
||||
ff workflow run security_assessment . # Start workflow - CLI uploads files automatically!
|
||||
|
||||
# The CLI will:
|
||||
# - Detect the local directory
|
||||
# - Create a compressed tarball
|
||||
# - Upload to backend (via MinIO)
|
||||
# - Start the workflow on vertical worker
|
||||
# 3. Run your first workflow
|
||||
cd test_projects/vulnerable_app/ # Go into the test directory
|
||||
fuzzforge init # Init a fuzzforge project
|
||||
ff workflow run security_assessment . # Start a workflow (you can also use ff command)
|
||||
```
|
||||
|
||||
**What's running:**
|
||||
- **Temporal**: Workflow orchestration (UI at http://localhost:8080)
|
||||
- **MinIO**: File storage for targets (Console at http://localhost:9001)
|
||||
- **Vertical Workers**: Pre-built workers with security toolchains
|
||||
- **Backend API**: FuzzForge REST API (http://localhost:8000)
|
||||
### Manual Workflow Setup
|
||||
|
||||

|
||||
|
||||
_Setting up and running security workflows through the interface_
|
||||
|
||||
👉 More installation options in the [Documentation](https://docs.fuzzforge.ai).
|
||||
|
||||
---
|
||||
|
||||
## AI-Powered Workflow Execution
|
||||
|
||||
@@ -204,7 +176,7 @@ _AI agents automatically analyzing code and providing security insights_
|
||||
|
||||
- 🌐 [Website](https://fuzzforge.ai)
|
||||
- 📖 [Documentation](https://docs.fuzzforge.ai)
|
||||
- 💬 [Community Discord](https://discord.gg/8XEX33UUwZ)
|
||||
- 💬 [Community Discord](https://discord.com/invite/acqv9FVG)
|
||||
- 🎓 [FuzzingLabs Academy](https://academy.fuzzinglabs.com/?coupon=GITHUB_FUZZFORGE)
|
||||
|
||||
---
|
||||
@@ -233,7 +205,7 @@ Planned features and improvements:
|
||||
- ☁️ Multi-tenant SaaS platform with team collaboration
|
||||
- 📊 Advanced reporting & analytics
|
||||
|
||||
👉 Follow updates in the [GitHub issues](../../issues) and [Discord](https://discord.gg/8XEX33UUwZ)
|
||||
👉 Follow updates in the [GitHub issues](../../issues) and [Discord](https://discord.com/invite/acqv9FVG).
|
||||
|
||||
---
|
||||
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
# FuzzForge AI Module
|
||||
|
||||
FuzzForge AI is the multi-agent layer that lets you operate the FuzzForge security platform through natural language. It orchestrates local tooling, registered Agent-to-Agent (A2A) peers, and the Temporal-powered backend while keeping long-running context in memory and project knowledge graphs.
|
||||
FuzzForge AI is the multi-agent layer that lets you operate the FuzzForge security platform through natural language. It orchestrates local tooling, registered Agent-to-Agent (A2A) peers, and the Prefect-powered backend while keeping long-running context in memory and project knowledge graphs.
|
||||
|
||||
## Quick Start
|
||||
|
||||
@@ -32,7 +32,7 @@ FuzzForge AI is the multi-agent layer that lets you operate the FuzzForge securi
|
||||
```bash
|
||||
fuzzforge ai agent
|
||||
```
|
||||
Keep the backend running (Temporal API at `FUZZFORGE_MCP_URL`) so workflow commands succeed.
|
||||
Keep the backend running (Prefect API at `FUZZFORGE_MCP_URL`) so workflow commands succeed.
|
||||
|
||||
## Everyday Workflow
|
||||
|
||||
@@ -61,7 +61,7 @@ Inside `fuzzforge ai agent` you can mix slash commands and free-form prompts:
|
||||
/sendfile SecurityAgent src/report.md "Please review"
|
||||
You> route_to SecurityAnalyzer: scan ./backend for secrets
|
||||
You> run fuzzforge workflow static_analysis_scan on ./test_projects/demo
|
||||
You> search project knowledge for "temporal status" using INSIGHTS
|
||||
You> search project knowledge for "prefect status" using INSIGHTS
|
||||
```
|
||||
|
||||
Artifacts created during the conversation are served from `.fuzzforge/artifacts/` and exposed through the A2A HTTP API.
|
||||
@@ -84,7 +84,7 @@ Use these to validate the setup once the agent shell is running:
|
||||
- `run fuzzforge workflow static_analysis_scan on ./backend with target_branch=main`
|
||||
- `show findings for that run once it finishes`
|
||||
- `refresh the project knowledge graph for ./backend`
|
||||
- `search project knowledge for "temporal readiness" using INSIGHTS`
|
||||
- `search project knowledge for "prefect readiness" using INSIGHTS`
|
||||
- `/recall terraform secrets`
|
||||
- `/memory status`
|
||||
- `ROUTE_TO SecurityAnalyzer: audit infrastructure_vulnerable`
|
||||
|
||||
@@ -1,9 +0,0 @@
|
||||
__pycache__
|
||||
*.pyc
|
||||
*.pyo
|
||||
*.pytest_cache
|
||||
*.coverage
|
||||
coverage.xml
|
||||
build/
|
||||
dist/
|
||||
.env
|
||||
@@ -1,82 +0,0 @@
|
||||
# Architecture Overview
|
||||
|
||||
This package is a minimal ADK agent that keeps runtime behaviour and A2A access in separate layers so it can double as boilerplate.
|
||||
|
||||
## Directory Layout
|
||||
|
||||
```text
|
||||
agent_with_adk_format/
|
||||
├── __init__.py # Exposes root_agent for ADK runners
|
||||
├── a2a_hot_swap.py # JSON-RPC helper for model/prompt swaps
|
||||
├── README.md, QUICKSTART.md # Operational docs
|
||||
├── ARCHITECTURE.md # This document
|
||||
├── .env # Active environment (gitignored)
|
||||
├── .env.example # Environment template
|
||||
└── litellm_agent/
|
||||
├── agent.py # Root Agent definition (LiteLLM shell)
|
||||
├── callbacks.py # before_agent / before_model hooks
|
||||
├── config.py # Defaults, state keys, control prefix
|
||||
├── control.py # HOTSWAP command parsing/serialization
|
||||
├── state.py # Session state wrapper + LiteLLM factory
|
||||
├── tools.py # set_model / set_prompt / get_config
|
||||
├── prompts.py # Base instruction text
|
||||
└── agent.json # A2A agent card (served under /.well-known)
|
||||
```
|
||||
|
||||
```mermaid
|
||||
flowchart TD
|
||||
subgraph ADK Runner
|
||||
A["adk api_server / adk web / adk run"]
|
||||
B["agent_with_adk_format/__init__.py"]
|
||||
C["litellm_agent/agent.py (root_agent)"]
|
||||
D["HotSwapState (state.py)"]
|
||||
E["LiteLlm(model, provider)"]
|
||||
end
|
||||
|
||||
subgraph Session State
|
||||
S1[app:litellm_agent/model]
|
||||
S2[app:litellm_agent/provider]
|
||||
S3[app:litellm_agent/prompt]
|
||||
end
|
||||
|
||||
A --> B --> C
|
||||
C --> D
|
||||
D -->|instantiate| E
|
||||
D --> S1
|
||||
D --> S2
|
||||
D --> S3
|
||||
E --> C
|
||||
```
|
||||
|
||||
## Runtime Flow (ADK Runners)
|
||||
|
||||
1. **Startup**: `adk api_server`/`adk web` imports `agent_with_adk_format`, which exposes `root_agent` from `litellm_agent/agent.py`. `.env` at package root is loaded before the runner constructs the agent.
|
||||
2. **Session State**: `callbacks.py` and `tools.py` read/write through `state.py`. We store `model`, `provider`, and `prompt` keys (prefixed `app:litellm_agent/…`) which persist across turns.
|
||||
3. **Instruction Generation**: `provide_instruction` composes the base persona from `prompts.py` plus any stored prompt override. The current model/provider is appended for observability.
|
||||
4. **Model Hot-Swap**: When a control message is detected (`[HOTSWAP:MODEL:…]`) the callback parses it via `control.py`, updates the session state, and calls `state.apply_state_to_agent` to instantiate a new `LiteLlm(model=…, custom_llm_provider=…)`. ADK runners reuse that instance for subsequent turns.
|
||||
5. **Prompt Hot-Swap**: Similar path (`set_prompt` tool/callback) updates state; the dynamic instruction immediately reflects the change.
|
||||
6. **Config Reporting**: Both the callback and the tool surface the summary string produced by `HotSwapState.describe()`, ensuring CLI, A2A, and UI all show the same data.
|
||||
|
||||
## A2A Integration
|
||||
|
||||
- `agent.json` defines the agent card and enables ADK to register `/a2a/litellm_agent` routes when launched with `--a2a`.
|
||||
- `a2a_hot_swap.py` uses `a2a.client.A2AClient` to programmatically send control messages and user text via JSON-RPC. It supports streaming when available and falls back to blocking requests otherwise.
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant Client as a2a_hot_swap.py
|
||||
participant Server as ADK API Server
|
||||
participant Agent as root_agent
|
||||
|
||||
Client->>Server: POST /a2a/litellm_agent (message/stream or message/send)
|
||||
Server->>Agent: Invoke callbacks/tools
|
||||
Agent->>Server: Status / artifacts / final message
|
||||
Server->>Client: Streamed Task events
|
||||
Client->>Client: Extract text & print summary
|
||||
```
|
||||
|
||||
## Extending the Boilerplate
|
||||
|
||||
- Add tools under `litellm_agent/tools.py` and register them in `agent.py` to expose new capabilities.
|
||||
- Use `state.py` to track additional configuration or session data (store under your own prefix to avoid collisions).
|
||||
- When layering business logic, prefer expanding callbacks or adding higher-level agents while leaving the hot-swap mechanism untouched for reuse.
|
||||
@@ -1,71 +0,0 @@
|
||||
# Docker & Kubernetes Deployment
|
||||
|
||||
## Local Docker
|
||||
|
||||
Build from the repository root:
|
||||
|
||||
```bash
|
||||
docker build -t litellm-hot-swap:latest agent_with_adk_format
|
||||
```
|
||||
|
||||
Run the container (port 8000, inject provider keys via env file or flags):
|
||||
|
||||
```bash
|
||||
docker run \
|
||||
-p 8000:8000 \
|
||||
--env-file agent_with_adk_format/.env \
|
||||
litellm-hot-swap:latest
|
||||
```
|
||||
|
||||
The container serves Uvicorn on `http://localhost:8000`. Update `.env` (or pass `-e KEY=value`) before launching if you plan to hot-swap providers.
|
||||
|
||||
## Kubernetes (example manifest)
|
||||
|
||||
Use the same image, optionally pushed to a registry (`docker tag` + `docker push`). A simple Deployment/Service pair:
|
||||
|
||||
```yaml
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: litellm-hot-swap
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: litellm-hot-swap
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: litellm-hot-swap
|
||||
spec:
|
||||
containers:
|
||||
- name: server
|
||||
image: <REGISTRY_URI>/litellm-hot-swap:latest
|
||||
ports:
|
||||
- containerPort: 8000
|
||||
env:
|
||||
- name: PORT
|
||||
value: "8000"
|
||||
- name: LITELLM_MODEL
|
||||
value: gemini/gemini-2.0-flash-001
|
||||
# Add provider keys as needed
|
||||
# - name: OPENAI_API_KEY
|
||||
# valueFrom:
|
||||
# secretKeyRef:
|
||||
# name: litellm-secrets
|
||||
# key: OPENAI_API_KEY
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: litellm-hot-swap
|
||||
spec:
|
||||
type: LoadBalancer
|
||||
selector:
|
||||
app: litellm-hot-swap
|
||||
ports:
|
||||
- port: 80
|
||||
targetPort: 8000
|
||||
```
|
||||
|
||||
Apply with `kubectl apply -f deployment.yaml`. Provide secrets via `env` or Kubernetes Secrets.
|
||||
@@ -1,24 +0,0 @@
|
||||
# syntax=docker/dockerfile:1
|
||||
|
||||
FROM python:3.11-slim AS base
|
||||
|
||||
ENV PYTHONUNBUFFERED=1 \
|
||||
PYTHONDONTWRITEBYTECODE=1 \
|
||||
PIP_NO_CACHE_DIR=1 \
|
||||
PORT=8000
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
COPY requirements.txt ./requirements.txt
|
||||
RUN pip install --upgrade pip && pip install -r requirements.txt
|
||||
|
||||
COPY . /app/agent_with_adk_format
|
||||
WORKDIR /app/agent_with_adk_format
|
||||
ENV PYTHONPATH=/app
|
||||
|
||||
# Copy and set up entrypoint
|
||||
COPY docker-entrypoint.sh /docker-entrypoint.sh
|
||||
RUN chmod +x /docker-entrypoint.sh
|
||||
|
||||
ENTRYPOINT ["/docker-entrypoint.sh"]
|
||||
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"]
|
||||
@@ -1,61 +0,0 @@
|
||||
# Quick Start Guide
|
||||
|
||||
## Launch the Agent
|
||||
|
||||
From the repository root you can expose the agent through any ADK entry point:
|
||||
|
||||
```bash
|
||||
# A2A / HTTP server
|
||||
adk api_server --a2a --port 8000 agent_with_adk_format
|
||||
|
||||
# Browser UI
|
||||
adk web agent_with_adk_format
|
||||
|
||||
# Interactive terminal
|
||||
adk run agent_with_adk_format
|
||||
```
|
||||
|
||||
The A2A server exposes the JSON-RPC endpoint at `http://localhost:8000/a2a/litellm_agent`.
|
||||
|
||||
## Hot-Swap from the Command Line
|
||||
|
||||
Use the bundled helper to change model and prompt via A2A without touching the UI:
|
||||
|
||||
```bash
|
||||
python agent_with_adk_format/a2a_hot_swap.py \
|
||||
--model openai gpt-4o \
|
||||
--prompt "You are concise." \
|
||||
--config \
|
||||
--context demo-session
|
||||
```
|
||||
|
||||
The script sends the control messages for you and prints the server’s responses. The `--context` flag lets you reuse the same conversation across multiple invocations.
|
||||
|
||||
### Follow-up Messages
|
||||
|
||||
Once the swaps are applied you can send a user message on the same session:
|
||||
|
||||
```bash
|
||||
python agent_with_adk_format/a2a_hot_swap.py \
|
||||
--context demo-session \
|
||||
--message "Summarise the current configuration in five words."
|
||||
```
|
||||
|
||||
### Clearing the Prompt
|
||||
|
||||
```bash
|
||||
python agent_with_adk_format/a2a_hot_swap.py \
|
||||
--context demo-session \
|
||||
--prompt "" \
|
||||
--config
|
||||
```
|
||||
|
||||
## Control Messages (for reference)
|
||||
|
||||
Behind the scenes the helper sends plain text messages understood by the callbacks:
|
||||
|
||||
- `[HOTSWAP:MODEL:provider/model]`
|
||||
- `[HOTSWAP:PROMPT:text]`
|
||||
- `[HOTSWAP:GET_CONFIG]`
|
||||
|
||||
You can craft the same messages from any A2A client if you prefer.
|
||||
@@ -1,365 +0,0 @@
|
||||
# LiteLLM Agent with Hot-Swap Support
|
||||
|
||||
A flexible AI agent powered by LiteLLM that supports runtime hot-swapping of models and system prompts. Compatible with ADK and A2A protocols.
|
||||
|
||||
## Features
|
||||
|
||||
- 🔄 **Hot-Swap Models**: Change LLM models on-the-fly without restarting
|
||||
- 📝 **Dynamic Prompts**: Update system prompts during conversation
|
||||
- 🌐 **Multi-Provider Support**: Works with OpenAI, Anthropic, Google, OpenRouter, and more
|
||||
- 🔌 **A2A Compatible**: Can be served as an A2A agent
|
||||
- 🛠️ **ADK Integration**: Run with `adk web`, `adk run`, or `adk api_server`
|
||||
|
||||
## Architecture
|
||||
|
||||
```
|
||||
task_agent/
|
||||
├── __init__.py # Exposes root_agent for ADK
|
||||
├── a2a_hot_swap.py # JSON-RPC helper for hot-swapping
|
||||
├── README.md # This guide
|
||||
├── QUICKSTART.md # Quick-start walkthrough
|
||||
├── .env # Active environment (gitignored)
|
||||
├── .env.example # Environment template
|
||||
└── litellm_agent/
|
||||
├── __init__.py
|
||||
├── agent.py # Main agent implementation
|
||||
├── agent.json # A2A agent card
|
||||
├── callbacks.py # ADK callbacks
|
||||
├── config.py # Defaults and state keys
|
||||
├── control.py # HOTSWAP message helpers
|
||||
├── prompts.py # Base instruction
|
||||
├── state.py # Session state utilities
|
||||
└── tools.py # set_model / set_prompt / get_config
|
||||
```
|
||||
|
||||
## Setup
|
||||
|
||||
### 1. Environment Configuration
|
||||
|
||||
Copying the example file is optional—the repository already ships with a root-level `.env` seeded with defaults. Adjust the values at the package root:
|
||||
```bash
|
||||
cd task_agent
|
||||
# Optionally refresh from the template
|
||||
# cp .env.example .env
|
||||
```
|
||||
|
||||
Edit `.env` (or `.env.example`) and add your proxy + API keys. The agent must be restarted after changes so the values are picked up:
|
||||
```bash
|
||||
# Route every request through the proxy container (use http://localhost:10999 from the host)
|
||||
FF_LLM_PROXY_BASE_URL=http://llm-proxy:4000
|
||||
|
||||
# Default model + provider the agent boots with
|
||||
LITELLM_MODEL=openai/gpt-4o-mini
|
||||
LITELLM_PROVIDER=openai
|
||||
|
||||
# Virtual key issued by the proxy to the task agent (bootstrap replaces the placeholder)
|
||||
OPENAI_API_KEY=sk-proxy-default
|
||||
|
||||
# Upstream keys stay inside the proxy. Store real secrets under the LiteLLM
|
||||
# aliases and the bootstrapper mirrors them into .env.litellm for the proxy container.
|
||||
LITELLM_OPENAI_API_KEY=your_real_openai_api_key
|
||||
LITELLM_ANTHROPIC_API_KEY=your_real_anthropic_key
|
||||
LITELLM_GEMINI_API_KEY=your_real_gemini_key
|
||||
LITELLM_MISTRAL_API_KEY=your_real_mistral_key
|
||||
LITELLM_OPENROUTER_API_KEY=your_real_openrouter_key
|
||||
```
|
||||
|
||||
> When running the agent outside of Docker, swap `FF_LLM_PROXY_BASE_URL` to the host port (default `http://localhost:10999`).
|
||||
|
||||
The bootstrap container provisions LiteLLM, copies provider secrets into
|
||||
`volumes/env/.env.litellm`, and rewrites `volumes/env/.env` with the virtual key.
|
||||
Populate the `LITELLM_*_API_KEY` values before the first launch so the proxy can
|
||||
reach your upstream providers as soon as the bootstrap script runs.
|
||||
|
||||
### 2. Install Dependencies
|
||||
|
||||
```bash
|
||||
pip install "google-adk" "a2a-sdk[all]" "python-dotenv" "litellm"
|
||||
```
|
||||
|
||||
### 3. Run in Docker
|
||||
|
||||
Build the container (this image can be pushed to any registry or run locally):
|
||||
|
||||
```bash
|
||||
docker build -t litellm-hot-swap:latest task_agent
|
||||
```
|
||||
|
||||
Provide environment configuration at runtime (either pass variables individually or mount a file):
|
||||
|
||||
```bash
|
||||
docker run \
|
||||
-p 8000:8000 \
|
||||
--env-file task_agent/.env \
|
||||
litellm-hot-swap:latest
|
||||
```
|
||||
|
||||
The container starts Uvicorn with the ADK app (`main.py`) listening on port 8000.
|
||||
|
||||
## Running the Agent
|
||||
|
||||
### Option 1: ADK Web UI (Recommended for Testing)
|
||||
|
||||
Start the web interface:
|
||||
```bash
|
||||
adk web task_agent
|
||||
```
|
||||
|
||||
> **Tip:** before launching `adk web`/`adk run`/`adk api_server`, ensure the root-level `.env` contains valid API keys for any provider you plan to hot-swap to (e.g. set `OPENAI_API_KEY` before switching to `openai/gpt-4o`).
|
||||
|
||||
Open http://localhost:8000 in your browser and interact with the agent.
|
||||
|
||||
### Option 2: ADK Terminal
|
||||
|
||||
Run in terminal mode:
|
||||
```bash
|
||||
adk run task_agent
|
||||
```
|
||||
|
||||
### Option 3: A2A API Server
|
||||
|
||||
Start as an A2A-compatible API server:
|
||||
```bash
|
||||
adk api_server --a2a --port 8000 task_agent
|
||||
```
|
||||
|
||||
The agent will be available at: `http://localhost:8000/a2a/litellm_agent`
|
||||
|
||||
### Command-line helper
|
||||
|
||||
Use the bundled script to drive hot-swaps and user messages over A2A:
|
||||
|
||||
```bash
|
||||
python task_agent/a2a_hot_swap.py \
|
||||
--url http://127.0.0.1:8000/a2a/litellm_agent \
|
||||
--model openai gpt-4o \
|
||||
--prompt "You are concise." \
|
||||
--config \
|
||||
--context demo-session
|
||||
```
|
||||
|
||||
To send a follow-up prompt in the same session (with a larger timeout for long answers):
|
||||
|
||||
```bash
|
||||
python task_agent/a2a_hot_swap.py \
|
||||
--url http://127.0.0.1:8000/a2a/litellm_agent \
|
||||
--model openai gpt-4o \
|
||||
--prompt "You are concise." \
|
||||
--message "Give me a fuzzing harness." \
|
||||
--context demo-session \
|
||||
--timeout 120
|
||||
```
|
||||
|
||||
> Ensure the corresponding provider keys are present in `.env` (or passed via environment variables) before issuing model swaps.
|
||||
|
||||
## Hot-Swap Tools
|
||||
|
||||
The agent provides three special tools:
|
||||
|
||||
### 1. `set_model` - Change the LLM Model
|
||||
|
||||
Change the model during conversation:
|
||||
|
||||
```
|
||||
User: Use the set_model tool to change to gpt-4o with openai provider
|
||||
Agent: ✅ Model configured to: openai/gpt-4o
|
||||
This change is active now!
|
||||
```
|
||||
|
||||
**Parameters:**
|
||||
- `model`: Model name (e.g., "gpt-4o", "claude-3-sonnet-20240229")
|
||||
- `custom_llm_provider`: Optional provider prefix (e.g., "openai", "anthropic", "openrouter")
|
||||
|
||||
**Examples:**
|
||||
- OpenAI: `set_model(model="gpt-4o", custom_llm_provider="openai")`
|
||||
- Anthropic: `set_model(model="claude-3-sonnet-20240229", custom_llm_provider="anthropic")`
|
||||
- Google: `set_model(model="gemini-2.0-flash-001", custom_llm_provider="gemini")`
|
||||
|
||||
### 2. `set_prompt` - Change System Prompt
|
||||
|
||||
Update the system instructions:
|
||||
|
||||
```
|
||||
User: Use set_prompt to change my behavior to "You are a helpful coding assistant"
|
||||
Agent: ✅ System prompt updated:
|
||||
You are a helpful coding assistant
|
||||
|
||||
This change is active now!
|
||||
```
|
||||
|
||||
### 3. `get_config` - View Configuration
|
||||
|
||||
Check current model and prompt:
|
||||
|
||||
```
|
||||
User: Use get_config to show me your configuration
|
||||
Agent: 📊 Current Configuration:
|
||||
━━━━━━━━━━━━━━━━━━━━━━
|
||||
Model: openai/gpt-4o
|
||||
System Prompt: You are a helpful coding assistant
|
||||
━━━━━━━━━━━━━━━━━━━━━━
|
||||
```
|
||||
|
||||
## Testing
|
||||
|
||||
### Basic A2A Client Test
|
||||
|
||||
```bash
|
||||
python agent/test_a2a_client.py
|
||||
```
|
||||
|
||||
### Hot-Swap Functionality Test
|
||||
|
||||
```bash
|
||||
python agent/test_hotswap.py
|
||||
```
|
||||
|
||||
This will:
|
||||
1. Check initial configuration
|
||||
2. Query with default model
|
||||
3. Hot-swap to GPT-4o
|
||||
4. Verify model changed
|
||||
5. Change system prompt
|
||||
6. Test new prompt behavior
|
||||
7. Hot-swap to Claude
|
||||
8. Verify final configuration
|
||||
|
||||
### Command-Line Hot-Swap Helper
|
||||
|
||||
You can trigger model and prompt changes directly against the A2A endpoint without the interactive CLI:
|
||||
|
||||
```bash
|
||||
# Start the agent first (in another terminal):
|
||||
adk api_server --a2a --port 8000 task_agent
|
||||
|
||||
# Apply swaps via pure A2A calls
|
||||
python task_agent/a2a_hot_swap.py --model openai gpt-4o --prompt "You are concise." --config
|
||||
python task_agent/a2a_hot_swap.py --model anthropic claude-3-sonnet-20240229 --context shared-session --config
|
||||
python task_agent/a2a_hot_swap.py --prompt "" --context shared-session --config # Clear the prompt and show current state
|
||||
```
|
||||
|
||||
`--model` accepts either `provider/model` or a provider/model pair. Add `--context` if you want to reuse the same conversation across invocations. Use `--config` to dump the agent's configuration after the changes are applied.
|
||||
|
||||
## Supported Models
|
||||
|
||||
### OpenAI
|
||||
- `openai/gpt-4o`
|
||||
- `openai/gpt-4-turbo`
|
||||
- `openai/gpt-3.5-turbo`
|
||||
|
||||
### Anthropic
|
||||
- `anthropic/claude-3-opus-20240229`
|
||||
- `anthropic/claude-3-sonnet-20240229`
|
||||
- `anthropic/claude-3-haiku-20240307`
|
||||
|
||||
### Google
|
||||
- `gemini/gemini-2.0-flash-001`
|
||||
- `gemini/gemini-2.5-pro-exp-03-25`
|
||||
- `vertex_ai/gemini-2.0-flash-001`
|
||||
|
||||
### OpenRouter
|
||||
- `openrouter/anthropic/claude-3-opus`
|
||||
- `openrouter/openai/gpt-4`
|
||||
- Any model from OpenRouter catalog
|
||||
|
||||
## How It Works
|
||||
|
||||
### Session State
|
||||
- Model and prompt settings are stored in session state
|
||||
- Each session maintains its own configuration
|
||||
- Settings persist across messages in the same session
|
||||
|
||||
### Hot-Swap Mechanism
|
||||
1. Tools update session state with new model/prompt
|
||||
2. `before_agent_callback` checks for changes
|
||||
3. If model changed, directly updates: `agent.model = LiteLlm(model=new_model)`
|
||||
4. Dynamic instruction function reads custom prompt from session state
|
||||
|
||||
### A2A Compatibility
|
||||
- Agent card at `agent.json` defines A2A metadata
|
||||
- Served at `/a2a/litellm_agent` endpoint
|
||||
- Compatible with A2A client protocol
|
||||
|
||||
## Example Usage
|
||||
|
||||
### Interactive Session
|
||||
|
||||
```python
|
||||
from a2a.client import A2AClient
|
||||
import asyncio
|
||||
|
||||
async def chat():
|
||||
client = A2AClient("http://localhost:8000/a2a/litellm_agent")
|
||||
context_id = "my-session-123"
|
||||
|
||||
# Start with default model
|
||||
async for msg in client.send_message("Hello!", context_id=context_id):
|
||||
print(msg)
|
||||
|
||||
# Switch to GPT-4
|
||||
async for msg in client.send_message(
|
||||
"Use set_model with model gpt-4o and provider openai",
|
||||
context_id=context_id
|
||||
):
|
||||
print(msg)
|
||||
|
||||
# Continue with new model
|
||||
async for msg in client.send_message(
|
||||
"Help me write a function",
|
||||
context_id=context_id
|
||||
):
|
||||
print(msg)
|
||||
|
||||
asyncio.run(chat())
|
||||
```
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### Model Not Found
|
||||
- Ensure API key for the provider is set in `.env`
|
||||
- Check model name is correct for the provider
|
||||
- Verify LiteLLM supports the model (https://docs.litellm.ai/docs/providers)
|
||||
|
||||
### Connection Refused
|
||||
- Ensure the agent is running (`adk api_server --a2a task_agent`)
|
||||
- Check the port matches (default: 8000)
|
||||
- Verify no firewall blocking localhost
|
||||
|
||||
### Hot-Swap Not Working
|
||||
- Check that you're using the same `context_id` across messages
|
||||
- Ensure the tool is being called (not just asked to switch)
|
||||
- Look for `🔄 Hot-swapped model to:` in server logs
|
||||
|
||||
## Development
|
||||
|
||||
### Adding New Tools
|
||||
|
||||
```python
|
||||
async def my_tool(tool_ctx: ToolContext, param: str) -> str:
|
||||
"""Your tool description."""
|
||||
# Access session state
|
||||
tool_ctx.state["my_key"] = "my_value"
|
||||
return "Tool result"
|
||||
|
||||
# Add to agent
|
||||
root_agent = LlmAgent(
|
||||
# ...
|
||||
tools=[set_model, set_prompt, get_config, my_tool],
|
||||
)
|
||||
```
|
||||
|
||||
### Modifying Callbacks
|
||||
|
||||
```python
|
||||
async def after_model_callback(
|
||||
callback_context: CallbackContext,
|
||||
llm_response: LlmResponse
|
||||
) -> Optional[LlmResponse]:
|
||||
"""Modify response after model generates it."""
|
||||
# Your logic here
|
||||
return llm_response
|
||||
```
|
||||
|
||||
## License
|
||||
|
||||
Apache 2.0
|
||||
@@ -1,5 +0,0 @@
|
||||
"""Package entry point for the ADK-formatted hot swap agent."""
|
||||
|
||||
from .litellm_agent.agent import root_agent
|
||||
|
||||
__all__ = ["root_agent"]
|
||||
@@ -1,224 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Minimal A2A client utility for hot-swapping LiteLLM model/prompt."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import asyncio
|
||||
from typing import Optional
|
||||
from uuid import uuid4
|
||||
|
||||
import httpx
|
||||
from a2a.client import A2AClient
|
||||
from a2a.client.errors import A2AClientHTTPError
|
||||
from a2a.types import (
|
||||
JSONRPCErrorResponse,
|
||||
Message,
|
||||
MessageSendConfiguration,
|
||||
MessageSendParams,
|
||||
Part,
|
||||
Role,
|
||||
SendMessageRequest,
|
||||
SendStreamingMessageRequest,
|
||||
Task,
|
||||
TaskArtifactUpdateEvent,
|
||||
TaskStatusUpdateEvent,
|
||||
TextPart,
|
||||
)
|
||||
|
||||
from litellm_agent.control import (
|
||||
HotSwapCommand,
|
||||
build_control_message,
|
||||
parse_model_spec,
|
||||
serialize_model_spec,
|
||||
)
|
||||
|
||||
DEFAULT_URL = "http://localhost:8000/a2a/litellm_agent"
|
||||
|
||||
|
||||
async def _collect_text(client: A2AClient, message: str, context_id: str) -> str:
|
||||
"""Send a message and collect streamed agent text into a single string."""
|
||||
|
||||
params = MessageSendParams(
|
||||
configuration=MessageSendConfiguration(blocking=True),
|
||||
message=Message(
|
||||
context_id=context_id,
|
||||
message_id=str(uuid4()),
|
||||
role=Role.user,
|
||||
parts=[Part(root=TextPart(text=message))],
|
||||
),
|
||||
)
|
||||
|
||||
stream_request = SendStreamingMessageRequest(id=str(uuid4()), params=params)
|
||||
buffer: list[str] = []
|
||||
try:
|
||||
async for response in client.send_message_streaming(stream_request):
|
||||
root = response.root
|
||||
if isinstance(root, JSONRPCErrorResponse):
|
||||
raise RuntimeError(f"A2A error: {root.error}")
|
||||
|
||||
payload = root.result
|
||||
buffer.extend(_extract_text(payload))
|
||||
except A2AClientHTTPError as exc:
|
||||
if "text/event-stream" not in str(exc):
|
||||
raise
|
||||
|
||||
send_request = SendMessageRequest(id=str(uuid4()), params=params)
|
||||
response = await client.send_message(send_request)
|
||||
root = response.root
|
||||
if isinstance(root, JSONRPCErrorResponse):
|
||||
raise RuntimeError(f"A2A error: {root.error}")
|
||||
payload = root.result
|
||||
buffer.extend(_extract_text(payload))
|
||||
|
||||
if buffer:
|
||||
buffer = list(dict.fromkeys(buffer))
|
||||
return "\n".join(buffer).strip()
|
||||
|
||||
|
||||
def _extract_text(
|
||||
result: Message | Task | TaskStatusUpdateEvent | TaskArtifactUpdateEvent,
|
||||
) -> list[str]:
|
||||
texts: list[str] = []
|
||||
if isinstance(result, Message):
|
||||
if result.role is Role.agent:
|
||||
for part in result.parts:
|
||||
root_part = part.root
|
||||
text = getattr(root_part, "text", None)
|
||||
if text:
|
||||
texts.append(text)
|
||||
elif isinstance(result, Task) and result.history:
|
||||
for msg in result.history:
|
||||
if msg.role is Role.agent:
|
||||
for part in msg.parts:
|
||||
root_part = part.root
|
||||
text = getattr(root_part, "text", None)
|
||||
if text:
|
||||
texts.append(text)
|
||||
elif isinstance(result, TaskStatusUpdateEvent):
|
||||
message = result.status.message
|
||||
if message:
|
||||
texts.extend(_extract_text(message))
|
||||
elif isinstance(result, TaskArtifactUpdateEvent):
|
||||
artifact = result.artifact
|
||||
if artifact and artifact.parts:
|
||||
for part in artifact.parts:
|
||||
root_part = part.root
|
||||
text = getattr(root_part, "text", None)
|
||||
if text:
|
||||
texts.append(text)
|
||||
return texts
|
||||
|
||||
|
||||
def _split_model_args(model_args: Optional[list[str]]) -> tuple[Optional[str], Optional[str]]:
|
||||
if not model_args:
|
||||
return None, None
|
||||
|
||||
if len(model_args) == 1:
|
||||
return model_args[0], None
|
||||
|
||||
provider = model_args[0]
|
||||
model = " ".join(model_args[1:])
|
||||
return model, provider
|
||||
|
||||
|
||||
async def hot_swap(
|
||||
url: str,
|
||||
*,
|
||||
model_args: Optional[list[str]],
|
||||
provider: Optional[str],
|
||||
prompt: Optional[str],
|
||||
message: Optional[str],
|
||||
show_config: bool,
|
||||
context_id: Optional[str],
|
||||
timeout: float,
|
||||
) -> None:
|
||||
"""Execute the requested hot-swap operations against the A2A endpoint."""
|
||||
|
||||
timeout_config = httpx.Timeout(timeout)
|
||||
async with httpx.AsyncClient(timeout=timeout_config) as http_client:
|
||||
client = A2AClient(url=url, httpx_client=http_client)
|
||||
session_id = context_id or str(uuid4())
|
||||
|
||||
model, derived_provider = _split_model_args(model_args)
|
||||
|
||||
if model:
|
||||
spec = parse_model_spec(model, provider=provider or derived_provider)
|
||||
payload = serialize_model_spec(spec)
|
||||
control_msg = build_control_message(HotSwapCommand.MODEL, payload)
|
||||
result = await _collect_text(client, control_msg, session_id)
|
||||
print(f"Model response: {result or '(no response)'}")
|
||||
|
||||
if prompt is not None:
|
||||
control_msg = build_control_message(HotSwapCommand.PROMPT, prompt)
|
||||
result = await _collect_text(client, control_msg, session_id)
|
||||
print(f"Prompt response: {result or '(no response)'}")
|
||||
|
||||
if show_config:
|
||||
control_msg = build_control_message(HotSwapCommand.GET_CONFIG)
|
||||
result = await _collect_text(client, control_msg, session_id)
|
||||
print(f"Config:\n{result or '(no response)'}")
|
||||
|
||||
if message:
|
||||
result = await _collect_text(client, message, session_id)
|
||||
print(f"Message response: {result or '(no response)'}")
|
||||
|
||||
print(f"Context ID: {session_id}")
|
||||
|
||||
|
||||
def parse_args() -> argparse.Namespace:
|
||||
parser = argparse.ArgumentParser(description=__doc__)
|
||||
parser.add_argument(
|
||||
"--url",
|
||||
default=DEFAULT_URL,
|
||||
help=f"A2A endpoint for the agent (default: {DEFAULT_URL})",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--model",
|
||||
nargs="+",
|
||||
help="LiteLLM model spec: either 'provider/model' or '<provider> <model>'.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--provider",
|
||||
help="Optional LiteLLM provider when --model lacks a prefix.")
|
||||
parser.add_argument(
|
||||
"--prompt",
|
||||
help="Set the system prompt (omit to leave unchanged; empty string clears it).",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--message",
|
||||
help="Send an additional user message after the swaps complete.")
|
||||
parser.add_argument(
|
||||
"--config",
|
||||
action="store_true",
|
||||
help="Print the agent configuration after performing swaps.")
|
||||
parser.add_argument(
|
||||
"--context",
|
||||
help="Optional context/session identifier to reuse across calls.")
|
||||
parser.add_argument(
|
||||
"--timeout",
|
||||
type=float,
|
||||
default=60.0,
|
||||
help="Request timeout (seconds) for A2A calls (default: 60).",
|
||||
)
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
def main() -> None:
|
||||
args = parse_args()
|
||||
asyncio.run(
|
||||
hot_swap(
|
||||
args.url,
|
||||
model_args=args.model,
|
||||
provider=args.provider,
|
||||
prompt=args.prompt,
|
||||
message=args.message,
|
||||
show_config=args.config,
|
||||
context_id=args.context,
|
||||
timeout=args.timeout,
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -1,24 +0,0 @@
|
||||
version: '3.8'
|
||||
|
||||
services:
|
||||
task-agent:
|
||||
build:
|
||||
context: .
|
||||
dockerfile: Dockerfile
|
||||
container_name: fuzzforge-task-agent
|
||||
ports:
|
||||
- "10900:8000"
|
||||
env_file:
|
||||
- ../../../volumes/env/.env
|
||||
environment:
|
||||
- PORT=8000
|
||||
- PYTHONUNBUFFERED=1
|
||||
volumes:
|
||||
# Mount volumes/env for runtime config access
|
||||
- ../../../volumes/env:/app/config:ro
|
||||
restart: unless-stopped
|
||||
healthcheck:
|
||||
test: ["CMD", "curl", "-f", "http://localhost:8000/health"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
@@ -1,31 +0,0 @@
|
||||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
# Wait for .env file to have keys (max 30 seconds)
|
||||
echo "[task-agent] Waiting for virtual keys to be provisioned..."
|
||||
for i in $(seq 1 30); do
|
||||
if [ -f /app/config/.env ]; then
|
||||
# Check if TASK_AGENT_API_KEY has a value (not empty)
|
||||
KEY=$(grep -E '^TASK_AGENT_API_KEY=' /app/config/.env | cut -d'=' -f2)
|
||||
if [ -n "$KEY" ] && [ "$KEY" != "" ]; then
|
||||
echo "[task-agent] Virtual keys found, loading environment..."
|
||||
# Export keys from .env file
|
||||
export TASK_AGENT_API_KEY="$KEY"
|
||||
export OPENAI_API_KEY=$(grep -E '^OPENAI_API_KEY=' /app/config/.env | cut -d'=' -f2)
|
||||
export FF_LLM_PROXY_BASE_URL=$(grep -E '^FF_LLM_PROXY_BASE_URL=' /app/config/.env | cut -d'=' -f2)
|
||||
echo "[task-agent] Loaded TASK_AGENT_API_KEY: ${TASK_AGENT_API_KEY:0:15}..."
|
||||
echo "[task-agent] Loaded FF_LLM_PROXY_BASE_URL: $FF_LLM_PROXY_BASE_URL"
|
||||
break
|
||||
fi
|
||||
fi
|
||||
echo "[task-agent] Keys not ready yet, waiting... ($i/30)"
|
||||
sleep 1
|
||||
done
|
||||
|
||||
if [ -z "$TASK_AGENT_API_KEY" ]; then
|
||||
echo "[task-agent] ERROR: Virtual keys were not provisioned within 30 seconds!"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "[task-agent] Starting uvicorn..."
|
||||
exec "$@"
|
||||
@@ -1,55 +0,0 @@
|
||||
"""LiteLLM hot-swap agent package exports."""
|
||||
|
||||
from .agent import root_agent
|
||||
from .callbacks import (
|
||||
before_agent_callback,
|
||||
before_model_callback,
|
||||
provide_instruction,
|
||||
)
|
||||
from .config import (
|
||||
AGENT_DESCRIPTION,
|
||||
AGENT_NAME,
|
||||
CONTROL_PREFIX,
|
||||
DEFAULT_MODEL,
|
||||
DEFAULT_PROVIDER,
|
||||
STATE_MODEL_KEY,
|
||||
STATE_PROVIDER_KEY,
|
||||
STATE_PROMPT_KEY,
|
||||
)
|
||||
from .control import (
|
||||
HotSwapCommand,
|
||||
ModelSpec,
|
||||
build_control_message,
|
||||
parse_control_message,
|
||||
parse_model_spec,
|
||||
serialize_model_spec,
|
||||
)
|
||||
from .state import HotSwapState, apply_state_to_agent
|
||||
from .tools import HOTSWAP_TOOLS, get_config, set_model, set_prompt
|
||||
|
||||
__all__ = [
|
||||
"root_agent",
|
||||
"before_agent_callback",
|
||||
"before_model_callback",
|
||||
"provide_instruction",
|
||||
"AGENT_DESCRIPTION",
|
||||
"AGENT_NAME",
|
||||
"CONTROL_PREFIX",
|
||||
"DEFAULT_MODEL",
|
||||
"DEFAULT_PROVIDER",
|
||||
"STATE_MODEL_KEY",
|
||||
"STATE_PROVIDER_KEY",
|
||||
"STATE_PROMPT_KEY",
|
||||
"HotSwapCommand",
|
||||
"ModelSpec",
|
||||
"HotSwapState",
|
||||
"apply_state_to_agent",
|
||||
"build_control_message",
|
||||
"parse_control_message",
|
||||
"parse_model_spec",
|
||||
"serialize_model_spec",
|
||||
"HOTSWAP_TOOLS",
|
||||
"get_config",
|
||||
"set_model",
|
||||
"set_prompt",
|
||||
]
|
||||
@@ -1,24 +0,0 @@
|
||||
{
|
||||
"name": "litellm_agent",
|
||||
"description": "A flexible AI agent powered by LiteLLM with hot-swappable models from OpenRouter and other providers",
|
||||
"url": "http://localhost:8000",
|
||||
"version": "1.0.0",
|
||||
"defaultInputModes": ["text/plain"],
|
||||
"defaultOutputModes": ["text/plain"],
|
||||
"capabilities": {
|
||||
"streaming": true
|
||||
},
|
||||
"skills": [
|
||||
{
|
||||
"id": "litellm-general-purpose",
|
||||
"name": "General Purpose AI Assistant",
|
||||
"description": "A flexible AI assistant that can help with various tasks using any LiteLLM-supported model. Supports runtime model and prompt hot-swapping.",
|
||||
"tags": ["ai", "assistant", "litellm", "flexible", "hot-swap"],
|
||||
"examples": [
|
||||
"Help me write a Python function",
|
||||
"Explain quantum computing",
|
||||
"Switch to Claude model and help me code"
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -1,29 +0,0 @@
|
||||
"""Root agent definition for the LiteLLM hot-swap shell."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from google.adk.agents import Agent
|
||||
|
||||
from .callbacks import (
|
||||
before_agent_callback,
|
||||
before_model_callback,
|
||||
provide_instruction,
|
||||
)
|
||||
from .config import AGENT_DESCRIPTION, AGENT_NAME, DEFAULT_MODEL, DEFAULT_PROVIDER
|
||||
from .state import HotSwapState
|
||||
from .tools import HOTSWAP_TOOLS
|
||||
|
||||
_initial_state = HotSwapState(model=DEFAULT_MODEL, provider=DEFAULT_PROVIDER)
|
||||
|
||||
root_agent = Agent(
|
||||
name=AGENT_NAME,
|
||||
model=_initial_state.instantiate_llm(),
|
||||
description=AGENT_DESCRIPTION,
|
||||
instruction=provide_instruction,
|
||||
tools=HOTSWAP_TOOLS,
|
||||
before_agent_callback=before_agent_callback,
|
||||
before_model_callback=before_model_callback,
|
||||
)
|
||||
|
||||
|
||||
__all__ = ["root_agent"]
|
||||
@@ -1,137 +0,0 @@
|
||||
"""Callbacks and instruction providers for the LiteLLM hot-swap agent."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from typing import Optional
|
||||
|
||||
from google.adk.agents.callback_context import CallbackContext
|
||||
from google.adk.agents.readonly_context import ReadonlyContext
|
||||
from google.adk.models.llm_request import LlmRequest
|
||||
from google.genai import types
|
||||
|
||||
from .config import CONTROL_PREFIX, DEFAULT_MODEL
|
||||
from .control import HotSwapCommand, parse_control_message, parse_model_spec
|
||||
from .prompts import BASE_INSTRUCTION
|
||||
from .state import HotSwapState, apply_state_to_agent
|
||||
|
||||
_LOGGER = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def provide_instruction(ctx: ReadonlyContext | None = None) -> str:
|
||||
"""Compose the system instruction using the stored state."""
|
||||
|
||||
state_mapping = getattr(ctx, "state", None)
|
||||
state = HotSwapState.from_mapping(state_mapping)
|
||||
prompt = state.prompt or BASE_INSTRUCTION
|
||||
return f"{prompt}\n\nActive model: {state.display_model}"
|
||||
|
||||
|
||||
def _ensure_state(callback_context: CallbackContext) -> HotSwapState:
|
||||
state = HotSwapState.from_mapping(callback_context.state)
|
||||
state.persist(callback_context.state)
|
||||
return state
|
||||
|
||||
|
||||
def _session_id(callback_context: CallbackContext) -> str:
|
||||
session = getattr(callback_context, "session", None)
|
||||
if session is None:
|
||||
session = getattr(callback_context._invocation_context, "session", None)
|
||||
return getattr(session, "id", "unknown-session")
|
||||
|
||||
|
||||
async def before_model_callback(
|
||||
callback_context: CallbackContext,
|
||||
llm_request: LlmRequest,
|
||||
) -> Optional[types.Content]:
|
||||
"""Ensure outgoing requests use the active model from session state."""
|
||||
|
||||
state = _ensure_state(callback_context)
|
||||
try:
|
||||
apply_state_to_agent(callback_context._invocation_context, state)
|
||||
except Exception: # pragma: no cover - defensive logging
|
||||
_LOGGER.exception(
|
||||
"Failed to apply LiteLLM model '%s' (provider=%s) for session %s",
|
||||
state.model,
|
||||
state.provider,
|
||||
callback_context.session.id,
|
||||
)
|
||||
llm_request.model = state.model or DEFAULT_MODEL
|
||||
return None
|
||||
|
||||
|
||||
async def before_agent_callback(
|
||||
callback_context: CallbackContext,
|
||||
) -> Optional[types.Content]:
|
||||
"""Intercept hot-swap control messages and update session state."""
|
||||
|
||||
user_content = callback_context.user_content
|
||||
if not user_content or not user_content.parts:
|
||||
return None
|
||||
|
||||
first_part = user_content.parts[0]
|
||||
message_text = (first_part.text or "").strip()
|
||||
if not message_text.startswith(CONTROL_PREFIX):
|
||||
return None
|
||||
|
||||
parsed = parse_control_message(message_text)
|
||||
if not parsed:
|
||||
return None
|
||||
|
||||
command, payload = parsed
|
||||
state = _ensure_state(callback_context)
|
||||
|
||||
if command is HotSwapCommand.MODEL:
|
||||
if not payload:
|
||||
return _render("❌ Missing model specification for hot-swap.")
|
||||
try:
|
||||
spec = parse_model_spec(payload)
|
||||
except ValueError as exc:
|
||||
return _render(f"❌ Invalid model specification: {exc}")
|
||||
|
||||
state.model = spec.model
|
||||
state.provider = spec.provider
|
||||
state.persist(callback_context.state)
|
||||
try:
|
||||
apply_state_to_agent(callback_context._invocation_context, state)
|
||||
except Exception: # pragma: no cover - defensive logging
|
||||
_LOGGER.exception(
|
||||
"Failed to apply LiteLLM model '%s' (provider=%s) for session %s",
|
||||
state.model,
|
||||
state.provider,
|
||||
_session_id(callback_context),
|
||||
)
|
||||
_LOGGER.info(
|
||||
"Hot-swapped model to %s (provider=%s, session=%s)",
|
||||
state.model,
|
||||
state.provider,
|
||||
_session_id(callback_context),
|
||||
)
|
||||
label = state.display_model
|
||||
return _render(f"✅ Model switched to: {label}")
|
||||
|
||||
if command is HotSwapCommand.PROMPT:
|
||||
prompt_value = (payload or "").strip()
|
||||
state.prompt = prompt_value or None
|
||||
state.persist(callback_context.state)
|
||||
if state.prompt:
|
||||
_LOGGER.info(
|
||||
"Updated prompt for session %s", _session_id(callback_context)
|
||||
)
|
||||
return _render(
|
||||
"✅ System prompt updated. This change takes effect immediately."
|
||||
)
|
||||
return _render("✅ System prompt cleared. Reverting to default instruction.")
|
||||
|
||||
if command is HotSwapCommand.GET_CONFIG:
|
||||
return _render(state.describe())
|
||||
|
||||
expected = ", ".join(HotSwapCommand.choices())
|
||||
return _render(
|
||||
"⚠️ Unsupported hot-swap command. Available verbs: "
|
||||
f"{expected}."
|
||||
)
|
||||
|
||||
|
||||
def _render(message: str) -> types.ModelContent:
|
||||
return types.ModelContent(parts=[types.Part(text=message)])
|
||||
@@ -1,35 +0,0 @@
|
||||
"""Configuration constants for the LiteLLM hot-swap agent."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
|
||||
|
||||
def _normalize_proxy_base_url(raw_value: str | None) -> str | None:
|
||||
if not raw_value:
|
||||
return None
|
||||
cleaned = raw_value.strip()
|
||||
if not cleaned:
|
||||
return None
|
||||
# Avoid double slashes in downstream requests
|
||||
return cleaned.rstrip("/")
|
||||
|
||||
AGENT_NAME = "litellm_agent"
|
||||
AGENT_DESCRIPTION = (
|
||||
"A LiteLLM-backed shell that exposes hot-swappable model and prompt controls."
|
||||
)
|
||||
|
||||
DEFAULT_MODEL = os.getenv("LITELLM_MODEL", "openai/gpt-4o-mini")
|
||||
DEFAULT_PROVIDER = os.getenv("LITELLM_PROVIDER") or None
|
||||
PROXY_BASE_URL = _normalize_proxy_base_url(
|
||||
os.getenv("FF_LLM_PROXY_BASE_URL")
|
||||
or os.getenv("LITELLM_API_BASE")
|
||||
or os.getenv("LITELLM_BASE_URL")
|
||||
)
|
||||
|
||||
STATE_PREFIX = "app:litellm_agent/"
|
||||
STATE_MODEL_KEY = f"{STATE_PREFIX}model"
|
||||
STATE_PROVIDER_KEY = f"{STATE_PREFIX}provider"
|
||||
STATE_PROMPT_KEY = f"{STATE_PREFIX}prompt"
|
||||
|
||||
CONTROL_PREFIX = "[HOTSWAP"
|
||||
@@ -1,99 +0,0 @@
|
||||
"""Control message helpers for hot-swapping model and prompt."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
from dataclasses import dataclass
|
||||
from enum import Enum
|
||||
from typing import Optional, Tuple
|
||||
|
||||
from .config import DEFAULT_PROVIDER
|
||||
|
||||
|
||||
class HotSwapCommand(str, Enum):
|
||||
"""Supported control verbs embedded in user messages."""
|
||||
|
||||
MODEL = "MODEL"
|
||||
PROMPT = "PROMPT"
|
||||
GET_CONFIG = "GET_CONFIG"
|
||||
|
||||
@classmethod
|
||||
def choices(cls) -> tuple[str, ...]:
|
||||
return tuple(item.value for item in cls)
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class ModelSpec:
|
||||
"""Represents a LiteLLM model and optional provider."""
|
||||
|
||||
model: str
|
||||
provider: Optional[str] = None
|
||||
|
||||
|
||||
_COMMAND_PATTERN = re.compile(
|
||||
r"^\[HOTSWAP:(?P<verb>[A-Z_]+)(?::(?P<payload>.*))?\]$",
|
||||
)
|
||||
|
||||
|
||||
def parse_control_message(text: str) -> Optional[Tuple[HotSwapCommand, Optional[str]]]:
|
||||
"""Return hot-swap command tuple when the string matches the control format."""
|
||||
|
||||
match = _COMMAND_PATTERN.match(text.strip())
|
||||
if not match:
|
||||
return None
|
||||
|
||||
verb = match.group("verb")
|
||||
if verb not in HotSwapCommand.choices():
|
||||
return None
|
||||
|
||||
payload = match.group("payload")
|
||||
return HotSwapCommand(verb), payload if payload else None
|
||||
|
||||
|
||||
def build_control_message(command: HotSwapCommand, payload: Optional[str] = None) -> str:
|
||||
"""Serialise a control command for downstream clients."""
|
||||
|
||||
if command not in HotSwapCommand:
|
||||
raise ValueError(f"Unsupported hot-swap command: {command}")
|
||||
if payload is None or payload == "":
|
||||
return f"[HOTSWAP:{command.value}]"
|
||||
return f"[HOTSWAP:{command.value}:{payload}]"
|
||||
|
||||
|
||||
def parse_model_spec(model: str, provider: Optional[str] = None) -> ModelSpec:
|
||||
"""Parse model/provider inputs into a structured ModelSpec."""
|
||||
|
||||
candidate = (model or "").strip()
|
||||
if not candidate:
|
||||
raise ValueError("Model name cannot be empty")
|
||||
|
||||
if provider:
|
||||
provider_clean = provider.strip()
|
||||
if not provider_clean:
|
||||
raise ValueError("Provider cannot be empty when supplied")
|
||||
if "/" in candidate:
|
||||
raise ValueError(
|
||||
"Provide either provider/model or use provider argument, not both",
|
||||
)
|
||||
return ModelSpec(model=candidate, provider=provider_clean)
|
||||
|
||||
if "/" in candidate:
|
||||
provider_part, model_part = candidate.split("/", 1)
|
||||
provider_part = provider_part.strip()
|
||||
model_part = model_part.strip()
|
||||
if not provider_part or not model_part:
|
||||
raise ValueError("Model spec must include provider and model when using '/' format")
|
||||
return ModelSpec(model=model_part, provider=provider_part)
|
||||
|
||||
if DEFAULT_PROVIDER:
|
||||
return ModelSpec(model=candidate, provider=DEFAULT_PROVIDER.strip())
|
||||
|
||||
return ModelSpec(model=candidate, provider=None)
|
||||
|
||||
|
||||
def serialize_model_spec(spec: ModelSpec) -> str:
|
||||
"""Render a ModelSpec to provider/model string for control messages."""
|
||||
|
||||
if spec.provider:
|
||||
return f"{spec.provider}/{spec.model}"
|
||||
return spec.model
|
||||
@@ -1,9 +0,0 @@
|
||||
"""System prompt templates for the LiteLLM agent."""
|
||||
|
||||
BASE_INSTRUCTION = (
|
||||
"You are a focused orchestration layer that relays between the user and a"
|
||||
" LiteLLM managed model."
|
||||
"\n- Keep answers concise and actionable."
|
||||
"\n- Prefer plain language; reveal intermediate reasoning only when helpful."
|
||||
"\n- Surface any tool results clearly with short explanations."
|
||||
)
|
||||
@@ -1,254 +0,0 @@
|
||||
"""Session state utilities for the LiteLLM hot-swap agent."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
import os
|
||||
from typing import Any, Mapping, MutableMapping, Optional
|
||||
|
||||
import httpx
|
||||
|
||||
from .config import (
|
||||
DEFAULT_MODEL,
|
||||
DEFAULT_PROVIDER,
|
||||
PROXY_BASE_URL,
|
||||
STATE_MODEL_KEY,
|
||||
STATE_PROMPT_KEY,
|
||||
STATE_PROVIDER_KEY,
|
||||
)
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class HotSwapState:
|
||||
"""Lightweight view of the hot-swap session state."""
|
||||
|
||||
model: str = DEFAULT_MODEL
|
||||
provider: Optional[str] = None
|
||||
prompt: Optional[str] = None
|
||||
|
||||
@classmethod
|
||||
def from_mapping(cls, mapping: Optional[Mapping[str, Any]]) -> "HotSwapState":
|
||||
if not mapping:
|
||||
return cls()
|
||||
|
||||
raw_model = mapping.get(STATE_MODEL_KEY, DEFAULT_MODEL)
|
||||
raw_provider = mapping.get(STATE_PROVIDER_KEY)
|
||||
raw_prompt = mapping.get(STATE_PROMPT_KEY)
|
||||
|
||||
model = raw_model.strip() if isinstance(raw_model, str) else DEFAULT_MODEL
|
||||
provider = raw_provider.strip() if isinstance(raw_provider, str) else None
|
||||
if not provider and DEFAULT_PROVIDER:
|
||||
provider = DEFAULT_PROVIDER.strip() or None
|
||||
prompt = raw_prompt.strip() if isinstance(raw_prompt, str) else None
|
||||
return cls(
|
||||
model=model or DEFAULT_MODEL,
|
||||
provider=provider or None,
|
||||
prompt=prompt or None,
|
||||
)
|
||||
|
||||
def persist(self, store: MutableMapping[str, object]) -> None:
|
||||
store[STATE_MODEL_KEY] = self.model
|
||||
if self.provider:
|
||||
store[STATE_PROVIDER_KEY] = self.provider
|
||||
else:
|
||||
store[STATE_PROVIDER_KEY] = None
|
||||
store[STATE_PROMPT_KEY] = self.prompt
|
||||
|
||||
def describe(self) -> str:
|
||||
prompt_value = self.prompt if self.prompt else "(default prompt)"
|
||||
provider_value = self.provider if self.provider else "(default provider)"
|
||||
return (
|
||||
"📊 Current Configuration\n"
|
||||
"━━━━━━━━━━━━━━━━━━━━━━\n"
|
||||
f"Model: {self.model}\n"
|
||||
f"Provider: {provider_value}\n"
|
||||
f"System Prompt: {prompt_value}\n"
|
||||
"━━━━━━━━━━━━━━━━━━━━━━"
|
||||
)
|
||||
|
||||
def instantiate_llm(self):
|
||||
"""Create a LiteLlm instance for the current state."""
|
||||
|
||||
from google.adk.models.lite_llm import LiteLlm # Lazy import to avoid cycle
|
||||
from google.adk.models.lite_llm import LiteLLMClient
|
||||
from litellm.types.utils import Choices, Message, ModelResponse, Usage
|
||||
|
||||
kwargs = {"model": self.model}
|
||||
if self.provider:
|
||||
kwargs["custom_llm_provider"] = self.provider
|
||||
if PROXY_BASE_URL:
|
||||
provider = (self.provider or DEFAULT_PROVIDER or "").lower()
|
||||
if provider and provider != "openai":
|
||||
kwargs["api_base"] = f"{PROXY_BASE_URL.rstrip('/')}/{provider}"
|
||||
else:
|
||||
kwargs["api_base"] = PROXY_BASE_URL
|
||||
kwargs.setdefault("api_key", os.environ.get("TASK_AGENT_API_KEY") or os.environ.get("OPENAI_API_KEY"))
|
||||
|
||||
provider = (self.provider or DEFAULT_PROVIDER or "").lower()
|
||||
model_suffix = self.model.split("/", 1)[-1]
|
||||
use_responses = provider == "openai" and (
|
||||
model_suffix.startswith("gpt-5") or model_suffix.startswith("o1")
|
||||
)
|
||||
if use_responses:
|
||||
kwargs.setdefault("use_responses_api", True)
|
||||
|
||||
llm = LiteLlm(**kwargs)
|
||||
|
||||
if use_responses and PROXY_BASE_URL:
|
||||
|
||||
class _ResponsesAwareClient(LiteLLMClient):
|
||||
def __init__(self, base_client: LiteLLMClient, api_base: str, api_key: str):
|
||||
self._base_client = base_client
|
||||
self._api_base = api_base.rstrip("/")
|
||||
self._api_key = api_key
|
||||
|
||||
async def acompletion(self, model, messages, tools, **kwargs): # type: ignore[override]
|
||||
use_responses_api = kwargs.pop("use_responses_api", False)
|
||||
if not use_responses_api:
|
||||
return await self._base_client.acompletion(
|
||||
model=model,
|
||||
messages=messages,
|
||||
tools=tools,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
resolved_model = model
|
||||
if "/" not in resolved_model:
|
||||
resolved_model = f"openai/{resolved_model}"
|
||||
|
||||
payload = {
|
||||
"model": resolved_model,
|
||||
"input": _messages_to_responses_input(messages),
|
||||
}
|
||||
|
||||
timeout = kwargs.get("timeout", 60)
|
||||
headers = {
|
||||
"Authorization": f"Bearer {self._api_key}",
|
||||
"Content-Type": "application/json",
|
||||
}
|
||||
|
||||
async with httpx.AsyncClient(timeout=timeout) as client:
|
||||
response = await client.post(
|
||||
f"{self._api_base}/v1/responses",
|
||||
json=payload,
|
||||
headers=headers,
|
||||
)
|
||||
try:
|
||||
response.raise_for_status()
|
||||
except httpx.HTTPStatusError as exc:
|
||||
text = exc.response.text
|
||||
raise RuntimeError(
|
||||
f"LiteLLM responses request failed: {text}"
|
||||
) from exc
|
||||
data = response.json()
|
||||
|
||||
text_output = _extract_output_text(data)
|
||||
usage = data.get("usage", {})
|
||||
|
||||
return ModelResponse(
|
||||
id=data.get("id"),
|
||||
model=model,
|
||||
choices=[
|
||||
Choices(
|
||||
finish_reason="stop",
|
||||
index=0,
|
||||
message=Message(role="assistant", content=text_output),
|
||||
provider_specific_fields={"bifrost_response": data},
|
||||
)
|
||||
],
|
||||
usage=Usage(
|
||||
prompt_tokens=usage.get("input_tokens"),
|
||||
completion_tokens=usage.get("output_tokens"),
|
||||
reasoning_tokens=usage.get("output_tokens_details", {}).get(
|
||||
"reasoning_tokens"
|
||||
),
|
||||
total_tokens=usage.get("total_tokens"),
|
||||
),
|
||||
)
|
||||
|
||||
llm.llm_client = _ResponsesAwareClient(
|
||||
llm.llm_client,
|
||||
PROXY_BASE_URL,
|
||||
os.environ.get("TASK_AGENT_API_KEY") or os.environ.get("OPENAI_API_KEY", ""),
|
||||
)
|
||||
|
||||
return llm
|
||||
|
||||
@property
|
||||
def display_model(self) -> str:
|
||||
if self.provider:
|
||||
return f"{self.provider}/{self.model}"
|
||||
return self.model
|
||||
|
||||
|
||||
def apply_state_to_agent(invocation_context, state: HotSwapState) -> None:
|
||||
"""Update the provided agent with a LiteLLM instance matching state."""
|
||||
|
||||
agent = invocation_context.agent
|
||||
agent.model = state.instantiate_llm()
|
||||
|
||||
|
||||
def _messages_to_responses_input(messages: list[dict[str, Any]]) -> list[dict[str, Any]]:
|
||||
inputs: list[dict[str, Any]] = []
|
||||
for message in messages:
|
||||
role = message.get("role", "user")
|
||||
content = message.get("content", "")
|
||||
text_segments: list[str] = []
|
||||
|
||||
if isinstance(content, list):
|
||||
for item in content:
|
||||
if isinstance(item, dict):
|
||||
text = item.get("text") or item.get("content")
|
||||
if text:
|
||||
text_segments.append(str(text))
|
||||
elif isinstance(item, str):
|
||||
text_segments.append(item)
|
||||
elif isinstance(content, str):
|
||||
text_segments.append(content)
|
||||
|
||||
text = "\n".join(segment.strip() for segment in text_segments if segment)
|
||||
if not text:
|
||||
continue
|
||||
|
||||
entry_type = "input_text"
|
||||
if role == "assistant":
|
||||
entry_type = "output_text"
|
||||
|
||||
inputs.append(
|
||||
{
|
||||
"role": role,
|
||||
"content": [
|
||||
{
|
||||
"type": entry_type,
|
||||
"text": text,
|
||||
}
|
||||
],
|
||||
}
|
||||
)
|
||||
|
||||
if not inputs:
|
||||
inputs.append(
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{
|
||||
"type": "input_text",
|
||||
"text": "",
|
||||
}
|
||||
],
|
||||
}
|
||||
)
|
||||
return inputs
|
||||
|
||||
|
||||
def _extract_output_text(response_json: dict[str, Any]) -> str:
|
||||
outputs = response_json.get("output", [])
|
||||
collected: list[str] = []
|
||||
for item in outputs:
|
||||
if isinstance(item, dict) and item.get("type") == "message":
|
||||
for part in item.get("content", []):
|
||||
if isinstance(part, dict) and part.get("type") == "output_text":
|
||||
text = part.get("text", "")
|
||||
if text:
|
||||
collected.append(str(text))
|
||||
return "\n\n".join(collected).strip()
|
||||
@@ -1,64 +0,0 @@
|
||||
"""Tool definitions exposed to the LiteLLM agent."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Optional
|
||||
|
||||
from google.adk.tools import FunctionTool, ToolContext
|
||||
|
||||
from .control import parse_model_spec
|
||||
from .state import HotSwapState, apply_state_to_agent
|
||||
|
||||
|
||||
async def set_model(
|
||||
model: str,
|
||||
*,
|
||||
provider: Optional[str] = None,
|
||||
tool_context: ToolContext,
|
||||
) -> str:
|
||||
"""Hot-swap the active LiteLLM model for this session."""
|
||||
|
||||
spec = parse_model_spec(model, provider=provider)
|
||||
state = HotSwapState.from_mapping(tool_context.state)
|
||||
state.model = spec.model
|
||||
state.provider = spec.provider
|
||||
state.persist(tool_context.state)
|
||||
try:
|
||||
apply_state_to_agent(tool_context._invocation_context, state)
|
||||
except Exception as exc: # pragma: no cover - defensive reporting
|
||||
return f"❌ Failed to apply model '{state.display_model}': {exc}"
|
||||
return f"✅ Model switched to: {state.display_model}"
|
||||
|
||||
|
||||
async def set_prompt(prompt: str, *, tool_context: ToolContext) -> str:
|
||||
"""Update or clear the system prompt used for this session."""
|
||||
|
||||
state = HotSwapState.from_mapping(tool_context.state)
|
||||
prompt_value = prompt.strip()
|
||||
state.prompt = prompt_value or None
|
||||
state.persist(tool_context.state)
|
||||
if state.prompt:
|
||||
return "✅ System prompt updated. This change takes effect immediately."
|
||||
return "✅ System prompt cleared. Reverting to default instruction."
|
||||
|
||||
|
||||
async def get_config(*, tool_context: ToolContext) -> str:
|
||||
"""Return a summary of the current model and prompt configuration."""
|
||||
|
||||
state = HotSwapState.from_mapping(tool_context.state)
|
||||
return state.describe()
|
||||
|
||||
|
||||
HOTSWAP_TOOLS = [
|
||||
FunctionTool(set_model),
|
||||
FunctionTool(set_prompt),
|
||||
FunctionTool(get_config),
|
||||
]
|
||||
|
||||
|
||||
__all__ = [
|
||||
"set_model",
|
||||
"set_prompt",
|
||||
"get_config",
|
||||
"HOTSWAP_TOOLS",
|
||||
]
|
||||
@@ -1,13 +0,0 @@
|
||||
"""ASGI entrypoint for containerized deployments."""
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
from google.adk.cli.fast_api import get_fast_api_app
|
||||
|
||||
AGENT_DIR = Path(__file__).resolve().parent
|
||||
|
||||
app = get_fast_api_app(
|
||||
agents_dir=str(AGENT_DIR),
|
||||
web=False,
|
||||
a2a=True,
|
||||
)
|
||||
@@ -1,4 +0,0 @@
|
||||
google-adk
|
||||
a2a-sdk[all]
|
||||
litellm
|
||||
python-dotenv
|
||||
@@ -1,5 +0,0 @@
|
||||
# LLM Proxy Integrations
|
||||
|
||||
This directory contains vendor source trees that were vendored only for reference when integrating LLM gateways. The actual FuzzForge deployment uses the official Docker images for each project.
|
||||
|
||||
See `docs/docs/how-to/llm-proxy.md` for up-to-date instructions on running the proxy services and issuing keys for the agents.
|
||||
@@ -1,6 +1,6 @@
|
||||
[project]
|
||||
name = "fuzzforge-ai"
|
||||
version = "0.7.3"
|
||||
version = "0.6.0"
|
||||
description = "FuzzForge AI orchestration module"
|
||||
readme = "README.md"
|
||||
requires-python = ">=3.11"
|
||||
|
||||
@@ -3,11 +3,6 @@ FuzzForge AI Module - Agent-to-Agent orchestration system
|
||||
|
||||
This module integrates the fuzzforge_ai components into FuzzForge,
|
||||
providing intelligent AI agent capabilities for security analysis.
|
||||
|
||||
Usage:
|
||||
from fuzzforge_ai.a2a_wrapper import send_agent_task
|
||||
from fuzzforge_ai.agent import FuzzForgeAgent
|
||||
from fuzzforge_ai.config_manager import ConfigManager
|
||||
"""
|
||||
# Copyright (c) 2025 FuzzingLabs
|
||||
#
|
||||
@@ -21,4 +16,9 @@ Usage:
|
||||
# Additional attribution and requirements are provided in the NOTICE file.
|
||||
|
||||
|
||||
__version__ = "0.7.3"
|
||||
__version__ = "0.6.0"
|
||||
|
||||
from .agent import FuzzForgeAgent
|
||||
from .config_manager import ConfigManager
|
||||
|
||||
__all__ = ['FuzzForgeAgent', 'ConfigManager']
|
||||
@@ -1,4 +1,3 @@
|
||||
# ruff: noqa: E402 # Imports delayed for environment/logging setup
|
||||
"""
|
||||
FuzzForge A2A Server
|
||||
Run this to expose FuzzForge as an A2A-compatible agent
|
||||
@@ -79,7 +78,7 @@ def create_a2a_app():
|
||||
print("\033[0m") # Reset color
|
||||
|
||||
# Create A2A app
|
||||
print("🚀 Starting FuzzForge A2A Server")
|
||||
print(f"🚀 Starting FuzzForge A2A Server")
|
||||
print(f" Model: {fuzzforge.model}")
|
||||
if fuzzforge.cognee_url:
|
||||
print(f" Memory: Cognee at {fuzzforge.cognee_url}")
|
||||
@@ -87,7 +86,7 @@ def create_a2a_app():
|
||||
|
||||
app = create_custom_a2a_app(fuzzforge.adk_agent, port=port, executor=fuzzforge.executor)
|
||||
|
||||
print("\n✅ FuzzForge A2A Server ready!")
|
||||
print(f"\n✅ FuzzForge A2A Server ready!")
|
||||
print(f" Agent card: http://localhost:{port}/.well-known/agent-card.json")
|
||||
print(f" A2A endpoint: http://localhost:{port}/")
|
||||
print(f"\n📡 Other agents can register FuzzForge at: http://localhost:{port}")
|
||||
@@ -102,7 +101,7 @@ def main():
|
||||
app = create_a2a_app()
|
||||
port = int(os.getenv('FUZZFORGE_PORT', 10100))
|
||||
|
||||
print("\n🎯 Starting server with uvicorn...")
|
||||
print(f"\n🎯 Starting server with uvicorn...")
|
||||
uvicorn.run(app, host="127.0.0.1", port=port)
|
||||
|
||||
|
||||
|
||||
@@ -18,6 +18,7 @@ from typing import Optional, Union
|
||||
|
||||
from starlette.applications import Starlette
|
||||
from starlette.responses import Response, FileResponse
|
||||
from starlette.routing import Route
|
||||
|
||||
from google.adk.a2a.executor.a2a_agent_executor import A2aAgentExecutor
|
||||
from google.adk.a2a.utils.agent_card_builder import AgentCardBuilder
|
||||
|
||||
@@ -1,288 +0,0 @@
|
||||
"""
|
||||
A2A Wrapper Module for FuzzForge
|
||||
Programmatic interface to send tasks to A2A agents with custom model/prompt/context
|
||||
"""
|
||||
# Copyright (c) 2025 FuzzingLabs
|
||||
#
|
||||
# Licensed under the Business Source License 1.1 (BSL). See the LICENSE file
|
||||
# at the root of this repository for details.
|
||||
#
|
||||
# After the Change Date (four years from publication), this version of the
|
||||
# Licensed Work will be made available under the Apache License, Version 2.0.
|
||||
# See the LICENSE-APACHE file or http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Additional attribution and requirements are provided in the NOTICE file.
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Optional, Any
|
||||
from uuid import uuid4
|
||||
|
||||
import httpx
|
||||
from a2a.client import A2AClient
|
||||
from a2a.client.errors import A2AClientHTTPError
|
||||
from a2a.types import (
|
||||
JSONRPCErrorResponse,
|
||||
Message,
|
||||
MessageSendConfiguration,
|
||||
MessageSendParams,
|
||||
Part,
|
||||
Role,
|
||||
SendMessageRequest,
|
||||
SendStreamingMessageRequest,
|
||||
Task,
|
||||
TaskArtifactUpdateEvent,
|
||||
TaskStatusUpdateEvent,
|
||||
TextPart,
|
||||
)
|
||||
|
||||
|
||||
class A2ATaskResult:
|
||||
"""Result from an A2A agent task"""
|
||||
|
||||
def __init__(self, text: str, context_id: str, raw_response: Any = None):
|
||||
self.text = text
|
||||
self.context_id = context_id
|
||||
self.raw_response = raw_response
|
||||
|
||||
def __str__(self) -> str:
|
||||
return self.text
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return f"A2ATaskResult(text={self.text[:50]}..., context_id={self.context_id})"
|
||||
|
||||
|
||||
def _build_control_message(command: str, payload: Optional[str] = None) -> str:
|
||||
"""Build a control message for hot-swapping agent configuration"""
|
||||
if payload is None or payload == "":
|
||||
return f"[HOTSWAP:{command}]"
|
||||
return f"[HOTSWAP:{command}:{payload}]"
|
||||
|
||||
|
||||
def _extract_text(
|
||||
result: Message | Task | TaskStatusUpdateEvent | TaskArtifactUpdateEvent,
|
||||
) -> list[str]:
|
||||
"""Extract text content from A2A response objects"""
|
||||
texts: list[str] = []
|
||||
if isinstance(result, Message):
|
||||
if result.role is Role.agent:
|
||||
for part in result.parts:
|
||||
root_part = part.root
|
||||
text = getattr(root_part, "text", None)
|
||||
if text:
|
||||
texts.append(text)
|
||||
elif isinstance(result, Task) and result.history:
|
||||
for msg in result.history:
|
||||
if msg.role is Role.agent:
|
||||
for part in msg.parts:
|
||||
root_part = part.root
|
||||
text = getattr(root_part, "text", None)
|
||||
if text:
|
||||
texts.append(text)
|
||||
elif isinstance(result, TaskStatusUpdateEvent):
|
||||
message = result.status.message
|
||||
if message:
|
||||
texts.extend(_extract_text(message))
|
||||
elif isinstance(result, TaskArtifactUpdateEvent):
|
||||
artifact = result.artifact
|
||||
if artifact and artifact.parts:
|
||||
for part in artifact.parts:
|
||||
root_part = part.root
|
||||
text = getattr(root_part, "text", None)
|
||||
if text:
|
||||
texts.append(text)
|
||||
return texts
|
||||
|
||||
|
||||
async def _send_message(
|
||||
client: A2AClient,
|
||||
message: str,
|
||||
context_id: str,
|
||||
) -> str:
|
||||
"""Send a message to the A2A agent and collect the response"""
|
||||
|
||||
params = MessageSendParams(
|
||||
configuration=MessageSendConfiguration(blocking=True),
|
||||
message=Message(
|
||||
context_id=context_id,
|
||||
message_id=str(uuid4()),
|
||||
role=Role.user,
|
||||
parts=[Part(root=TextPart(text=message))],
|
||||
),
|
||||
)
|
||||
|
||||
stream_request = SendStreamingMessageRequest(id=str(uuid4()), params=params)
|
||||
buffer: list[str] = []
|
||||
|
||||
try:
|
||||
async for response in client.send_message_streaming(stream_request):
|
||||
root = response.root
|
||||
if isinstance(root, JSONRPCErrorResponse):
|
||||
raise RuntimeError(f"A2A error: {root.error}")
|
||||
|
||||
payload = root.result
|
||||
buffer.extend(_extract_text(payload))
|
||||
except A2AClientHTTPError as exc:
|
||||
if "text/event-stream" not in str(exc):
|
||||
raise
|
||||
|
||||
# Fallback to non-streaming
|
||||
send_request = SendMessageRequest(id=str(uuid4()), params=params)
|
||||
response = await client.send_message(send_request)
|
||||
root = response.root
|
||||
if isinstance(root, JSONRPCErrorResponse):
|
||||
raise RuntimeError(f"A2A error: {root.error}")
|
||||
payload = root.result
|
||||
buffer.extend(_extract_text(payload))
|
||||
|
||||
if buffer:
|
||||
buffer = list(dict.fromkeys(buffer)) # Remove duplicates
|
||||
return "\n".join(buffer).strip()
|
||||
|
||||
|
||||
async def send_agent_task(
|
||||
url: str,
|
||||
message: str,
|
||||
*,
|
||||
model: Optional[str] = None,
|
||||
provider: Optional[str] = None,
|
||||
prompt: Optional[str] = None,
|
||||
context: Optional[str] = None,
|
||||
timeout: float = 120.0,
|
||||
) -> A2ATaskResult:
|
||||
"""
|
||||
Send a task to an A2A agent with optional model/prompt configuration.
|
||||
|
||||
Args:
|
||||
url: A2A endpoint URL (e.g., "http://127.0.0.1:8000/a2a/litellm_agent")
|
||||
message: The task message to send to the agent
|
||||
model: Optional model name (e.g., "gpt-4o", "gemini-2.0-flash")
|
||||
provider: Optional provider name (e.g., "openai", "gemini")
|
||||
prompt: Optional system prompt to set before sending the message
|
||||
context: Optional context/session ID (generated if not provided)
|
||||
timeout: Request timeout in seconds (default: 120)
|
||||
|
||||
Returns:
|
||||
A2ATaskResult with the agent's response text and context ID
|
||||
|
||||
Example:
|
||||
>>> result = await send_agent_task(
|
||||
... url="http://127.0.0.1:8000/a2a/litellm_agent",
|
||||
... model="gpt-4o",
|
||||
... provider="openai",
|
||||
... prompt="You are concise.",
|
||||
... message="Give me a fuzzing harness.",
|
||||
... context="fuzzing",
|
||||
... timeout=120
|
||||
... )
|
||||
>>> print(result.text)
|
||||
"""
|
||||
timeout_config = httpx.Timeout(timeout)
|
||||
context_id = context or str(uuid4())
|
||||
|
||||
async with httpx.AsyncClient(timeout=timeout_config) as http_client:
|
||||
client = A2AClient(url=url, httpx_client=http_client)
|
||||
|
||||
# Set model if provided
|
||||
if model:
|
||||
model_spec = f"{provider}/{model}" if provider else model
|
||||
control_msg = _build_control_message("MODEL", model_spec)
|
||||
await _send_message(client, control_msg, context_id)
|
||||
|
||||
# Set prompt if provided
|
||||
if prompt is not None:
|
||||
control_msg = _build_control_message("PROMPT", prompt)
|
||||
await _send_message(client, control_msg, context_id)
|
||||
|
||||
# Send the actual task message
|
||||
response_text = await _send_message(client, message, context_id)
|
||||
|
||||
return A2ATaskResult(
|
||||
text=response_text,
|
||||
context_id=context_id,
|
||||
)
|
||||
|
||||
|
||||
async def get_agent_config(
|
||||
url: str,
|
||||
context: Optional[str] = None,
|
||||
timeout: float = 60.0,
|
||||
) -> str:
|
||||
"""
|
||||
Get the current configuration of an A2A agent.
|
||||
|
||||
Args:
|
||||
url: A2A endpoint URL
|
||||
context: Optional context/session ID
|
||||
timeout: Request timeout in seconds
|
||||
|
||||
Returns:
|
||||
Configuration string from the agent
|
||||
"""
|
||||
timeout_config = httpx.Timeout(timeout)
|
||||
context_id = context or str(uuid4())
|
||||
|
||||
async with httpx.AsyncClient(timeout=timeout_config) as http_client:
|
||||
client = A2AClient(url=url, httpx_client=http_client)
|
||||
control_msg = _build_control_message("GET_CONFIG")
|
||||
config_text = await _send_message(client, control_msg, context_id)
|
||||
return config_text
|
||||
|
||||
|
||||
async def hot_swap_model(
|
||||
url: str,
|
||||
model: str,
|
||||
provider: Optional[str] = None,
|
||||
context: Optional[str] = None,
|
||||
timeout: float = 60.0,
|
||||
) -> str:
|
||||
"""
|
||||
Hot-swap the model of an A2A agent without sending a task.
|
||||
|
||||
Args:
|
||||
url: A2A endpoint URL
|
||||
model: Model name to switch to
|
||||
provider: Optional provider name
|
||||
context: Optional context/session ID
|
||||
timeout: Request timeout in seconds
|
||||
|
||||
Returns:
|
||||
Response from the agent
|
||||
"""
|
||||
timeout_config = httpx.Timeout(timeout)
|
||||
context_id = context or str(uuid4())
|
||||
|
||||
async with httpx.AsyncClient(timeout=timeout_config) as http_client:
|
||||
client = A2AClient(url=url, httpx_client=http_client)
|
||||
model_spec = f"{provider}/{model}" if provider else model
|
||||
control_msg = _build_control_message("MODEL", model_spec)
|
||||
response = await _send_message(client, control_msg, context_id)
|
||||
return response
|
||||
|
||||
|
||||
async def hot_swap_prompt(
|
||||
url: str,
|
||||
prompt: str,
|
||||
context: Optional[str] = None,
|
||||
timeout: float = 60.0,
|
||||
) -> str:
|
||||
"""
|
||||
Hot-swap the system prompt of an A2A agent.
|
||||
|
||||
Args:
|
||||
url: A2A endpoint URL
|
||||
prompt: System prompt to set
|
||||
context: Optional context/session ID
|
||||
timeout: Request timeout in seconds
|
||||
|
||||
Returns:
|
||||
Response from the agent
|
||||
"""
|
||||
timeout_config = httpx.Timeout(timeout)
|
||||
context_id = context or str(uuid4())
|
||||
|
||||
async with httpx.AsyncClient(timeout=timeout_config) as http_client:
|
||||
client = A2AClient(url=url, httpx_client=http_client)
|
||||
control_msg = _build_control_message("PROMPT", prompt)
|
||||
response = await _send_message(client, control_msg, context_id)
|
||||
return response
|
||||
@@ -60,7 +60,7 @@ class FuzzForgeAgent:
|
||||
debug=os.getenv('FUZZFORGE_DEBUG', '0') == '1',
|
||||
memory_service=self.memory_service,
|
||||
session_persistence=os.getenv('SESSION_PERSISTENCE', 'inmemory'),
|
||||
fuzzforge_mcp_url=None, # Disabled
|
||||
fuzzforge_mcp_url=os.getenv('FUZZFORGE_MCP_URL'),
|
||||
)
|
||||
|
||||
# Create Hybrid Memory Manager (ADK + Cognee direct integration)
|
||||
|
||||
@@ -15,7 +15,7 @@ Defines what FuzzForge can do and how others can discover it
|
||||
|
||||
|
||||
from dataclasses import dataclass
|
||||
from typing import List, Dict, Any
|
||||
from typing import List, Optional, Dict, Any
|
||||
|
||||
@dataclass
|
||||
class AgentSkill:
|
||||
@@ -172,6 +172,7 @@ def get_fuzzforge_agent_card(url: str = "http://localhost:10100") -> AgentCard:
|
||||
orchestration_skill,
|
||||
memory_skill,
|
||||
conversation_skill,
|
||||
workflow_automation_skill,
|
||||
agent_management_skill
|
||||
],
|
||||
capabilities=fuzzforge_capabilities,
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
# ruff: noqa: E402 # Imports delayed for environment/logging setup
|
||||
"""FuzzForge Agent Executor - orchestrates workflows and delegation."""
|
||||
# Copyright (c) 2025 FuzzingLabs
|
||||
#
|
||||
@@ -13,6 +12,7 @@
|
||||
|
||||
|
||||
import asyncio
|
||||
import base64
|
||||
import time
|
||||
import uuid
|
||||
import json
|
||||
@@ -174,7 +174,7 @@ class FuzzForgeExecutor:
|
||||
else:
|
||||
# Run now if no loop is running
|
||||
loop.run_until_complete(self._register_agent_async(url, name))
|
||||
except Exception:
|
||||
except:
|
||||
# Ignore auto-registration failures
|
||||
pass
|
||||
except Exception as e:
|
||||
@@ -392,7 +392,7 @@ class FuzzForgeExecutor:
|
||||
user_email = f"project_{config.get_project_context()['project_id']}@fuzzforge.example"
|
||||
user = await get_user(user_email)
|
||||
cognee.set_user(user)
|
||||
except Exception:
|
||||
except Exception as e:
|
||||
pass # User context not critical
|
||||
|
||||
# Use cognee search directly for maximum flexibility
|
||||
@@ -452,11 +452,11 @@ class FuzzForgeExecutor:
|
||||
try:
|
||||
user = await get_user(user_email)
|
||||
logger.info(f"Using existing user: {user_email}")
|
||||
except Exception:
|
||||
except:
|
||||
try:
|
||||
user = await create_user(user_email, user_tenant)
|
||||
logger.info(f"Created new user: {user_email}")
|
||||
except Exception:
|
||||
except:
|
||||
user = None
|
||||
|
||||
if user:
|
||||
@@ -583,6 +583,7 @@ class FuzzForgeExecutor:
|
||||
pattern: Glob pattern (e.g. '*.py', '**/*.js', '')
|
||||
"""
|
||||
try:
|
||||
from pathlib import Path
|
||||
|
||||
# Get project root from config
|
||||
config = ProjectConfigManager()
|
||||
@@ -647,6 +648,7 @@ class FuzzForgeExecutor:
|
||||
max_lines: Maximum lines to read (0 for all, default 200 for large files)
|
||||
"""
|
||||
try:
|
||||
from pathlib import Path
|
||||
|
||||
# Get project root from config
|
||||
config = ProjectConfigManager()
|
||||
@@ -709,6 +711,7 @@ class FuzzForgeExecutor:
|
||||
"""
|
||||
try:
|
||||
import re
|
||||
from pathlib import Path
|
||||
|
||||
# Get project root from config
|
||||
config = ProjectConfigManager()
|
||||
@@ -754,7 +757,7 @@ class FuzzForgeExecutor:
|
||||
result = f"Found '{search_pattern}' in {len(matches)} locations (searched {files_searched} files):\n"
|
||||
result += "\n".join(matches[:50])
|
||||
if len(matches) >= 50:
|
||||
result += "\n... (showing first 50 matches)"
|
||||
result += f"\n... (showing first 50 matches)"
|
||||
return result
|
||||
else:
|
||||
return f"No matches found for '{search_pattern}' in {files_searched} files matching '{file_pattern}'"
|
||||
@@ -831,15 +834,26 @@ class FuzzForgeExecutor:
|
||||
async def submit_security_scan_mcp(
|
||||
workflow_name: str,
|
||||
target_path: str = "",
|
||||
volume_mode: str = "",
|
||||
parameters: Dict[str, Any] | None = None,
|
||||
tool_context: ToolContext | None = None,
|
||||
) -> Any:
|
||||
# Resolve the target path to an absolute path for validation
|
||||
# Normalise volume mode to supported values
|
||||
normalised_mode = (volume_mode or "ro").strip().lower().replace("-", "_")
|
||||
if normalised_mode in {"read_only", "readonly", "ro"}:
|
||||
normalised_mode = "ro"
|
||||
elif normalised_mode in {"read_write", "readwrite", "rw"}:
|
||||
normalised_mode = "rw"
|
||||
else:
|
||||
# Fall back to Prefect defaults if we can't recognise the input
|
||||
normalised_mode = "ro"
|
||||
|
||||
# Resolve the target path to an absolute path for Prefect's validation
|
||||
resolved_path = target_path or "."
|
||||
try:
|
||||
resolved_path = str(Path(resolved_path).expanduser().resolve())
|
||||
except Exception:
|
||||
# If resolution fails, use the raw value
|
||||
# If resolution fails, Prefect will surface the validation error – use the raw value
|
||||
resolved_path = target_path
|
||||
|
||||
# Ensure configuration objects default to dictionaries instead of None
|
||||
@@ -872,6 +886,7 @@ class FuzzForgeExecutor:
|
||||
payload = {
|
||||
"workflow_name": workflow_name,
|
||||
"target_path": resolved_path,
|
||||
"volume_mode": normalised_mode,
|
||||
"parameters": cleaned_parameters,
|
||||
}
|
||||
result = await _call_fuzzforge_mcp("submit_security_scan_mcp", payload)
|
||||
@@ -1049,19 +1064,10 @@ class FuzzForgeExecutor:
|
||||
FunctionTool(get_task_list)
|
||||
])
|
||||
|
||||
|
||||
# Create the agent with LiteLLM configuration
|
||||
llm_kwargs = {}
|
||||
api_key = os.getenv('OPENAI_API_KEY') or os.getenv('LLM_API_KEY')
|
||||
api_base = os.getenv('LLM_ENDPOINT') or os.getenv('LLM_API_BASE') or os.getenv('OPENAI_API_BASE')
|
||||
|
||||
if api_key:
|
||||
llm_kwargs['api_key'] = api_key
|
||||
if api_base:
|
||||
llm_kwargs['api_base'] = api_base
|
||||
|
||||
|
||||
# Create the agent
|
||||
self.agent = LlmAgent(
|
||||
model=LiteLlm(model=self.model, **llm_kwargs),
|
||||
model=LiteLlm(model=self.model),
|
||||
name="fuzzforge_executor",
|
||||
description="Intelligent A2A orchestrator with memory",
|
||||
instruction=self._build_instruction(),
|
||||
@@ -1082,7 +1088,7 @@ class FuzzForgeExecutor:
|
||||
|
||||
def _build_instruction(self) -> str:
|
||||
"""Build the agent's instruction prompt"""
|
||||
instruction = """You are FuzzForge, an intelligent A2A orchestrator with dual memory systems.
|
||||
instruction = f"""You are FuzzForge, an intelligent A2A orchestrator with dual memory systems.
|
||||
|
||||
## Your Core Responsibilities:
|
||||
|
||||
@@ -1702,7 +1708,7 @@ Be concise and intelligent in your responses."""
|
||||
if self.agentops_trace:
|
||||
try:
|
||||
agentops.end_trace()
|
||||
except Exception:
|
||||
except:
|
||||
pass
|
||||
|
||||
# Cancel background monitors
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
# ruff: noqa: E402 # Imports delayed for environment/logging setup
|
||||
#!/usr/bin/env python3
|
||||
# Copyright (c) 2025 FuzzingLabs
|
||||
#
|
||||
@@ -27,6 +26,7 @@ import random
|
||||
from datetime import datetime
|
||||
from contextlib import contextmanager
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
@@ -90,12 +90,18 @@ except ImportError:
|
||||
from rich.console import Console
|
||||
from rich.table import Table
|
||||
from rich.panel import Panel
|
||||
from rich.prompt import Prompt
|
||||
from rich import box
|
||||
|
||||
from google.adk.events.event import Event
|
||||
from google.adk.events.event_actions import EventActions
|
||||
from google.genai import types as gen_types
|
||||
|
||||
from .agent import FuzzForgeAgent
|
||||
from .agent_card import get_fuzzforge_agent_card
|
||||
from .config_manager import ConfigManager
|
||||
from .config_bridge import ProjectConfigManager
|
||||
from .remote_agent import RemoteAgentConnection
|
||||
|
||||
console = Console()
|
||||
|
||||
@@ -237,7 +243,7 @@ class FuzzForgeCLI:
|
||||
)
|
||||
)
|
||||
if self.agent.executor.agentops_trace:
|
||||
console.print("Tracking: [medium_purple1]AgentOps active[/medium_purple1]")
|
||||
console.print(f"Tracking: [medium_purple1]AgentOps active[/medium_purple1]")
|
||||
|
||||
# Show skills
|
||||
console.print("\nSkills:")
|
||||
@@ -314,7 +320,7 @@ class FuzzForgeCLI:
|
||||
url=args.strip(),
|
||||
description=description
|
||||
)
|
||||
console.print(" [dim]Saved to config for auto-registration[/dim]")
|
||||
console.print(f" [dim]Saved to config for auto-registration[/dim]")
|
||||
else:
|
||||
console.print(f"[red]Failed: {result['error']}[/red]")
|
||||
|
||||
@@ -340,9 +346,9 @@ class FuzzForgeCLI:
|
||||
# Remove from config
|
||||
if self.config_manager.remove_registered_agent(name=agent_to_remove['name'], url=agent_to_remove['url']):
|
||||
console.print(f"✅ Unregistered: [bold]{agent_to_remove['name']}[/bold]")
|
||||
console.print(" [dim]Removed from config (won't auto-register next time)[/dim]")
|
||||
console.print(f" [dim]Removed from config (won't auto-register next time)[/dim]")
|
||||
else:
|
||||
console.print("[yellow]Agent unregistered from session but not found in config[/yellow]")
|
||||
console.print(f"[yellow]Agent unregistered from session but not found in config[/yellow]")
|
||||
|
||||
async def cmd_list(self, args: str = "") -> None:
|
||||
"""List registered agents"""
|
||||
@@ -429,7 +435,7 @@ class FuzzForgeCLI:
|
||||
text = data['parts'][0].get('text', '')[:150]
|
||||
role = data.get('role', 'unknown')
|
||||
console.print(f"{i}. [{role}]: {text}...")
|
||||
except Exception:
|
||||
except:
|
||||
console.print(f"{i}. {content[:150]}...")
|
||||
else:
|
||||
console.print("[yellow]No matches found in SQLite either[/yellow]")
|
||||
@@ -693,7 +699,7 @@ class FuzzForgeCLI:
|
||||
)
|
||||
|
||||
console.print(table)
|
||||
console.print("\n[dim]Use /artifacts <id> to view artifact content[/dim]")
|
||||
console.print(f"\n[dim]Use /artifacts <id> to view artifact content[/dim]")
|
||||
|
||||
async def cmd_tasks(self, args: str = "") -> None:
|
||||
"""List tasks or show details for a specific task."""
|
||||
|
||||
@@ -16,7 +16,9 @@ Can be reused by external agents and other components
|
||||
|
||||
|
||||
import os
|
||||
from typing import Dict, Any, Optional
|
||||
import asyncio
|
||||
import json
|
||||
from typing import Dict, List, Any, Optional, Union
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
@@ -187,69 +189,33 @@ class CogneeProjectIntegration:
|
||||
except Exception as e:
|
||||
return {"error": f"Failed to list data: {e}"}
|
||||
|
||||
async def cognify_text(self, text: str, dataset: str = None) -> Dict[str, Any]:
|
||||
"""
|
||||
Cognify text content into knowledge graph
|
||||
|
||||
Args:
|
||||
text: Text to cognify
|
||||
dataset: Dataset name (defaults to project_name_codebase)
|
||||
|
||||
Returns:
|
||||
Dict containing cognify results
|
||||
"""
|
||||
if not self._initialized:
|
||||
await self.initialize()
|
||||
|
||||
if not self._initialized:
|
||||
return {"error": "Cognee not initialized"}
|
||||
|
||||
if not dataset:
|
||||
dataset = f"{self.project_context['project_name']}_codebase"
|
||||
|
||||
try:
|
||||
# Add text to dataset
|
||||
await self._cognee.add([text], dataset_name=dataset)
|
||||
|
||||
# Process (cognify) the dataset
|
||||
await self._cognee.cognify([dataset])
|
||||
|
||||
return {
|
||||
"text_length": len(text),
|
||||
"dataset": dataset,
|
||||
"project": self.project_context["project_name"],
|
||||
"status": "success"
|
||||
}
|
||||
except Exception as e:
|
||||
return {"error": f"Cognify failed: {e}"}
|
||||
|
||||
async def ingest_text_to_dataset(self, text: str, dataset: str = None) -> Dict[str, Any]:
|
||||
"""
|
||||
Ingest text content into a specific dataset
|
||||
|
||||
|
||||
Args:
|
||||
text: Text to ingest
|
||||
dataset: Dataset name (defaults to project_name_codebase)
|
||||
|
||||
|
||||
Returns:
|
||||
Dict containing ingest results
|
||||
"""
|
||||
if not self._initialized:
|
||||
await self.initialize()
|
||||
|
||||
|
||||
if not self._initialized:
|
||||
return {"error": "Cognee not initialized"}
|
||||
|
||||
|
||||
if not dataset:
|
||||
dataset = f"{self.project_context['project_name']}_codebase"
|
||||
|
||||
|
||||
try:
|
||||
# Add text to dataset
|
||||
await self._cognee.add([text], dataset_name=dataset)
|
||||
|
||||
|
||||
# Process (cognify) the dataset
|
||||
await self._cognee.cognify([dataset])
|
||||
|
||||
|
||||
return {
|
||||
"text_length": len(text),
|
||||
"dataset": dataset,
|
||||
|
||||
@@ -15,9 +15,11 @@ Provides integrated Cognee functionality for codebase analysis and knowledge gra
|
||||
|
||||
|
||||
import os
|
||||
import asyncio
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Any
|
||||
from typing import Dict, List, Any, Optional
|
||||
from datetime import datetime
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -56,7 +58,7 @@ class CogneeService:
|
||||
# Configure LLM with API key BEFORE any other cognee operations
|
||||
provider = os.getenv("LLM_PROVIDER", "openai")
|
||||
model = os.getenv("LLM_MODEL") or os.getenv("LITELLM_MODEL", "gpt-4o-mini")
|
||||
api_key = os.getenv("COGNEE_API_KEY") or os.getenv("LLM_API_KEY") or os.getenv("OPENAI_API_KEY")
|
||||
api_key = os.getenv("LLM_API_KEY") or os.getenv("OPENAI_API_KEY")
|
||||
endpoint = os.getenv("LLM_ENDPOINT")
|
||||
api_version = os.getenv("LLM_API_VERSION")
|
||||
max_tokens = os.getenv("LLM_MAX_TOKENS")
|
||||
@@ -78,62 +80,48 @@ class CogneeService:
|
||||
os.environ.setdefault("OPENAI_API_KEY", api_key)
|
||||
if endpoint:
|
||||
os.environ["LLM_ENDPOINT"] = endpoint
|
||||
os.environ.setdefault("LLM_API_BASE", endpoint)
|
||||
os.environ.setdefault("OPENAI_API_BASE", endpoint)
|
||||
os.environ.setdefault("LITELLM_PROXY_API_BASE", endpoint)
|
||||
if api_key:
|
||||
os.environ.setdefault("LITELLM_PROXY_API_KEY", api_key)
|
||||
if api_version:
|
||||
os.environ["LLM_API_VERSION"] = api_version
|
||||
if max_tokens:
|
||||
os.environ["LLM_MAX_TOKENS"] = str(max_tokens)
|
||||
|
||||
# Configure Cognee's runtime using its configuration helpers when available
|
||||
embedding_model = os.getenv("LLM_EMBEDDING_MODEL")
|
||||
embedding_endpoint = os.getenv("LLM_EMBEDDING_ENDPOINT")
|
||||
if embedding_endpoint:
|
||||
os.environ.setdefault("LLM_EMBEDDING_API_BASE", embedding_endpoint)
|
||||
|
||||
if hasattr(cognee.config, "set_llm_provider"):
|
||||
cognee.config.set_llm_provider(provider)
|
||||
if hasattr(cognee.config, "set_llm_model"):
|
||||
cognee.config.set_llm_model(model)
|
||||
if api_key and hasattr(cognee.config, "set_llm_api_key"):
|
||||
cognee.config.set_llm_api_key(api_key)
|
||||
if endpoint and hasattr(cognee.config, "set_llm_endpoint"):
|
||||
cognee.config.set_llm_endpoint(endpoint)
|
||||
if embedding_model and hasattr(cognee.config, "set_llm_embedding_model"):
|
||||
cognee.config.set_llm_embedding_model(embedding_model)
|
||||
if embedding_endpoint and hasattr(cognee.config, "set_llm_embedding_endpoint"):
|
||||
cognee.config.set_llm_embedding_endpoint(embedding_endpoint)
|
||||
if hasattr(cognee.config, "set_llm_model"):
|
||||
cognee.config.set_llm_model(model)
|
||||
if api_key and hasattr(cognee.config, "set_llm_api_key"):
|
||||
cognee.config.set_llm_api_key(api_key)
|
||||
if endpoint and hasattr(cognee.config, "set_llm_endpoint"):
|
||||
cognee.config.set_llm_endpoint(endpoint)
|
||||
if api_version and hasattr(cognee.config, "set_llm_api_version"):
|
||||
cognee.config.set_llm_api_version(api_version)
|
||||
if max_tokens and hasattr(cognee.config, "set_llm_max_tokens"):
|
||||
cognee.config.set_llm_max_tokens(int(max_tokens))
|
||||
|
||||
|
||||
# Configure graph database
|
||||
cognee.config.set_graph_db_config({
|
||||
"graph_database_provider": self.cognee_config.get("graph_database_provider", "kuzu"),
|
||||
})
|
||||
|
||||
|
||||
# Set data directories
|
||||
data_dir = self.cognee_config.get("data_directory")
|
||||
system_dir = self.cognee_config.get("system_directory")
|
||||
|
||||
|
||||
if data_dir:
|
||||
logger.debug("Setting cognee data root", extra={"path": data_dir})
|
||||
cognee.config.data_root_directory(data_dir)
|
||||
if system_dir:
|
||||
logger.debug("Setting cognee system root", extra={"path": system_dir})
|
||||
cognee.config.system_root_directory(system_dir)
|
||||
|
||||
|
||||
# Setup multi-tenant user context
|
||||
await self._setup_user_context()
|
||||
|
||||
|
||||
self._initialized = True
|
||||
logger.info(f"Cognee initialized for project {self.project_context['project_name']} "
|
||||
f"with Kuzu at {system_dir}")
|
||||
|
||||
|
||||
except ImportError:
|
||||
logger.error("Cognee not installed. Install with: pip install cognee")
|
||||
raise
|
||||
@@ -170,7 +158,7 @@ class CogneeService:
|
||||
self._user = await get_user(fallback_email)
|
||||
logger.info(f"Using existing user: {fallback_email}")
|
||||
return
|
||||
except Exception:
|
||||
except:
|
||||
# User doesn't exist, try to create fallback
|
||||
pass
|
||||
|
||||
|
||||
@@ -13,7 +13,7 @@
|
||||
|
||||
try:
|
||||
from fuzzforge_cli.config import ProjectConfigManager as _ProjectConfigManager
|
||||
except ImportError: # pragma: no cover - used when CLI not available
|
||||
except ImportError as exc: # pragma: no cover - used when CLI not available
|
||||
class _ProjectConfigManager: # type: ignore[no-redef]
|
||||
"""Fallback implementation that raises a helpful error."""
|
||||
|
||||
@@ -21,10 +21,10 @@ except ImportError: # pragma: no cover - used when CLI not available
|
||||
raise ImportError(
|
||||
"ProjectConfigManager is unavailable. Install the FuzzForge CLI "
|
||||
"package or supply a compatible configuration object."
|
||||
)
|
||||
) from exc
|
||||
|
||||
def __getattr__(name): # pragma: no cover - defensive
|
||||
raise ImportError("ProjectConfigManager unavailable")
|
||||
raise ImportError("ProjectConfigManager unavailable") from exc
|
||||
|
||||
ProjectConfigManager = _ProjectConfigManager
|
||||
|
||||
|
||||
@@ -16,12 +16,15 @@ Separate from Cognee which will be used for RAG/codebase analysis
|
||||
|
||||
|
||||
import os
|
||||
from typing import Dict, Any
|
||||
import json
|
||||
from typing import Dict, List, Any, Optional
|
||||
from datetime import datetime
|
||||
import logging
|
||||
|
||||
# ADK Memory imports
|
||||
from google.adk.memory import InMemoryMemoryService, BaseMemoryService
|
||||
from google.adk.memory.base_memory_service import SearchMemoryResponse
|
||||
from google.adk.memory.memory_entry import MemoryEntry
|
||||
|
||||
# Optional VertexAI Memory Bank
|
||||
try:
|
||||
|
||||
@@ -37,7 +37,7 @@ class RemoteAgentConnection:
|
||||
response.raise_for_status()
|
||||
self.agent_card = response.json()
|
||||
return self.agent_card
|
||||
except Exception:
|
||||
except:
|
||||
# Try old path for compatibility
|
||||
try:
|
||||
response = await self.client.get(f"{self.url}/.well-known/agent.json")
|
||||
|
||||
@@ -17,21 +17,25 @@ RUN apt-get update && apt-get install -y \
|
||||
|
||||
# Docker client configuration removed - localhost:5001 doesn't require insecure registry config
|
||||
|
||||
# Install uv for faster package management
|
||||
RUN pip install uv
|
||||
|
||||
# Copy project files
|
||||
COPY pyproject.toml ./
|
||||
COPY uv.lock ./
|
||||
|
||||
# Install dependencies with pip
|
||||
RUN pip install --no-cache-dir -e .
|
||||
# Install dependencies
|
||||
RUN uv sync --no-dev
|
||||
|
||||
# Copy source code
|
||||
COPY . .
|
||||
|
||||
# Expose ports (API on 8000, MCP on 8010)
|
||||
EXPOSE 8000 8010
|
||||
# Expose port
|
||||
EXPOSE 8000
|
||||
|
||||
# Health check
|
||||
HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
|
||||
CMD curl -f http://localhost:8000/health || exit 1
|
||||
|
||||
# Start the application
|
||||
CMD ["uvicorn", "src.main:app", "--host", "0.0.0.0", "--port", "8000"]
|
||||
CMD ["uv", "run", "uvicorn", "src.main:app", "--host", "0.0.0.0", "--port", "8000"]
|
||||
@@ -1,6 +1,6 @@
|
||||
# FuzzForge Backend
|
||||
|
||||
A stateless API server for security testing workflow orchestration using Temporal. This system dynamically discovers workflows, executes them in isolated worker environments, and returns findings in SARIF format.
|
||||
A stateless API server for security testing workflow orchestration using Prefect. This system dynamically discovers workflows, executes them in isolated Docker containers with volume mounting, and returns findings in SARIF format.
|
||||
|
||||
## Architecture Overview
|
||||
|
||||
@@ -8,17 +8,17 @@ A stateless API server for security testing workflow orchestration using Tempora
|
||||
|
||||
1. **Workflow Discovery System**: Automatically discovers workflows at startup
|
||||
2. **Module System**: Reusable components (scanner, analyzer, reporter) with a common interface
|
||||
3. **Temporal Integration**: Handles workflow orchestration, execution, and monitoring with vertical workers
|
||||
4. **File Upload & Storage**: HTTP multipart upload to MinIO for target files
|
||||
3. **Prefect Integration**: Handles container orchestration, workflow execution, and monitoring
|
||||
4. **Volume Mounting**: Secure file access with configurable permissions (ro/rw)
|
||||
5. **SARIF Output**: Standardized security findings format
|
||||
|
||||
### Key Features
|
||||
|
||||
- **Stateless**: No persistent data, fully scalable
|
||||
- **Generic**: No hardcoded workflows, automatic discovery
|
||||
- **Isolated**: Each workflow runs in specialized vertical workers
|
||||
- **Isolated**: Each workflow runs in its own Docker container
|
||||
- **Extensible**: Easy to add new workflows and modules
|
||||
- **Secure**: File upload with MinIO storage, automatic cleanup via lifecycle policies
|
||||
- **Secure**: Read-only volume mounts by default, path validation
|
||||
- **Observable**: Comprehensive logging and status tracking
|
||||
|
||||
## Quick Start
|
||||
@@ -32,17 +32,19 @@ A stateless API server for security testing workflow orchestration using Tempora
|
||||
From the project root, start all services:
|
||||
|
||||
```bash
|
||||
docker-compose -f docker-compose.temporal.yaml up -d
|
||||
docker-compose up -d
|
||||
```
|
||||
|
||||
This will start:
|
||||
- Temporal server (Web UI at http://localhost:8233, gRPC at :7233)
|
||||
- MinIO (S3 storage at http://localhost:9000, Console at http://localhost:9001)
|
||||
- PostgreSQL database (for Temporal state)
|
||||
- Vertical workers (worker-rust, worker-android, worker-web, etc.)
|
||||
- Prefect server (API at http://localhost:4200/api)
|
||||
- PostgreSQL database
|
||||
- Redis cache
|
||||
- Docker registry (port 5001)
|
||||
- Prefect worker (for running workflows)
|
||||
- FuzzForge backend API (port 8000)
|
||||
- FuzzForge MCP server (port 8010)
|
||||
|
||||
**Note**: MinIO console login: `fuzzforge` / `fuzzforge123`
|
||||
**Note**: The Prefect UI at http://localhost:4200 is not currently accessible from the host due to the API being configured for inter-container communication. Use the REST API or MCP interface instead.
|
||||
|
||||
## API Endpoints
|
||||
|
||||
@@ -52,8 +54,7 @@ This will start:
|
||||
- `GET /workflows/{name}/metadata` - Get workflow metadata and parameters
|
||||
- `GET /workflows/{name}/parameters` - Get workflow parameter schema
|
||||
- `GET /workflows/metadata/schema` - Get metadata.yaml schema
|
||||
- `POST /workflows/{name}/submit` - Submit a workflow for execution (path-based, legacy)
|
||||
- `POST /workflows/{name}/upload-and-submit` - **Upload local files and submit workflow** (recommended)
|
||||
- `POST /workflows/{name}/submit` - Submit a workflow for execution
|
||||
|
||||
### Runs
|
||||
|
||||
@@ -67,13 +68,12 @@ Each workflow must have:
|
||||
|
||||
```
|
||||
toolbox/workflows/{workflow_name}/
|
||||
workflow.py # Temporal workflow definition
|
||||
metadata.yaml # Mandatory metadata (parameters, version, vertical, etc.)
|
||||
requirements.txt # Optional Python dependencies (installed in vertical worker)
|
||||
workflow.py # Prefect flow definition
|
||||
metadata.yaml # Mandatory metadata (parameters, version, etc.)
|
||||
Dockerfile # Optional custom container definition
|
||||
requirements.txt # Optional Python dependencies
|
||||
```
|
||||
|
||||
**Note**: With Temporal architecture, workflows run in pre-built vertical workers (e.g., `worker-rust`, `worker-android`), not individual Docker containers. The workflow code is mounted as a volume and discovered at runtime.
|
||||
|
||||
### Example metadata.yaml
|
||||
|
||||
```yaml
|
||||
@@ -82,12 +82,15 @@ version: "1.0.0"
|
||||
description: "Comprehensive security analysis workflow"
|
||||
author: "FuzzForge Team"
|
||||
category: "comprehensive"
|
||||
vertical: "rust" # Routes to worker-rust
|
||||
tags:
|
||||
- "security"
|
||||
- "analysis"
|
||||
- "comprehensive"
|
||||
|
||||
supported_volume_modes:
|
||||
- "ro"
|
||||
- "rw"
|
||||
|
||||
requirements:
|
||||
tools:
|
||||
- "file_scanner"
|
||||
@@ -107,6 +110,11 @@ parameters:
|
||||
type: string
|
||||
default: "/workspace"
|
||||
description: "Path to analyze"
|
||||
volume_mode:
|
||||
type: string
|
||||
enum: ["ro", "rw"]
|
||||
default: "ro"
|
||||
description: "Volume mount mode"
|
||||
scanner_config:
|
||||
type: object
|
||||
description: "Scanner configuration"
|
||||
@@ -151,6 +159,7 @@ curl -X POST "http://localhost:8000/workflows/security_assessment/submit" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"target_path": "/tmp/project",
|
||||
"volume_mode": "ro",
|
||||
"resource_limits": {
|
||||
"memory_limit": "1Gi",
|
||||
"cpu_limit": "1"
|
||||
@@ -160,54 +169,6 @@ curl -X POST "http://localhost:8000/workflows/security_assessment/submit" \
|
||||
|
||||
Resource precedence: User limits > Workflow requirements > System defaults
|
||||
|
||||
## File Upload and Target Access
|
||||
|
||||
### Upload Endpoint
|
||||
|
||||
The backend provides an upload endpoint for submitting workflows with local files:
|
||||
|
||||
```
|
||||
POST /workflows/{workflow_name}/upload-and-submit
|
||||
Content-Type: multipart/form-data
|
||||
|
||||
Parameters:
|
||||
file: File upload (supports .tar.gz for directories)
|
||||
parameters: JSON string of workflow parameters (optional)
|
||||
timeout: Execution timeout in seconds (optional)
|
||||
```
|
||||
|
||||
Example using curl:
|
||||
|
||||
```bash
|
||||
# Upload a directory (create tarball first)
|
||||
tar -czf project.tar.gz /path/to/project
|
||||
curl -X POST "http://localhost:8000/workflows/security_assessment/upload-and-submit" \
|
||||
-F "file=@project.tar.gz" \
|
||||
-F "parameters={\"check_secrets\":true}"
|
||||
|
||||
# Upload a single file
|
||||
curl -X POST "http://localhost:8000/workflows/security_assessment/upload-and-submit" \
|
||||
-F "file=@binary.elf"
|
||||
```
|
||||
|
||||
### Storage Flow
|
||||
|
||||
1. **CLI/API uploads file** via HTTP multipart
|
||||
2. **Backend receives file** and streams to temporary location (max 10GB)
|
||||
3. **Backend uploads to MinIO** with generated `target_id`
|
||||
4. **Workflow is submitted** to Temporal with `target_id`
|
||||
5. **Worker downloads target** from MinIO to local cache
|
||||
6. **Workflow processes target** from cache
|
||||
7. **MinIO lifecycle policy** deletes files after 7 days
|
||||
|
||||
### Advantages
|
||||
|
||||
- **No host filesystem access required** - workers can run anywhere
|
||||
- **Automatic cleanup** - lifecycle policies prevent disk exhaustion
|
||||
- **Caching** - repeated workflows reuse cached targets
|
||||
- **Multi-host ready** - targets accessible from any worker
|
||||
- **Secure** - isolated storage, no arbitrary host path access
|
||||
|
||||
## Module Development
|
||||
|
||||
Modules implement the `BaseModule` interface:
|
||||
@@ -237,24 +198,12 @@ class MyModule(BaseModule):
|
||||
|
||||
## Submitting a Workflow
|
||||
|
||||
### With File Upload (Recommended)
|
||||
|
||||
```bash
|
||||
# Automatic tarball and upload
|
||||
tar -czf project.tar.gz /home/user/project
|
||||
curl -X POST "http://localhost:8000/workflows/security_assessment/upload-and-submit" \
|
||||
-F "file=@project.tar.gz" \
|
||||
-F "parameters={\"scanner_config\":{\"patterns\":[\"*.py\"]},\"analyzer_config\":{\"check_secrets\":true}}"
|
||||
```
|
||||
|
||||
### Legacy Path-Based Submission
|
||||
|
||||
```bash
|
||||
# Only works if backend and target are on same machine
|
||||
curl -X POST "http://localhost:8000/workflows/security_assessment/submit" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"target_path": "/home/user/project",
|
||||
"volume_mode": "ro",
|
||||
"parameters": {
|
||||
"scanner_config": {"patterns": ["*.py"]},
|
||||
"analyzer_config": {"check_secrets": true}
|
||||
@@ -286,31 +235,23 @@ Returns SARIF-formatted findings:
|
||||
|
||||
## Security Considerations
|
||||
|
||||
1. **File Upload Security**: Files uploaded to MinIO with isolated storage
|
||||
2. **Read-Only Default**: Target files accessed as read-only unless explicitly set
|
||||
3. **Worker Isolation**: Each workflow runs in isolated vertical workers
|
||||
4. **Resource Limits**: Can set CPU/memory limits per worker
|
||||
5. **Automatic Cleanup**: MinIO lifecycle policies delete old files after 7 days
|
||||
1. **Volume Mounting**: Only allowed directories can be mounted
|
||||
2. **Read-Only Default**: Volumes mounted as read-only unless explicitly set
|
||||
3. **Container Isolation**: Each workflow runs in an isolated container
|
||||
4. **Resource Limits**: Can set CPU/memory limits via Prefect
|
||||
5. **Network Isolation**: Containers use bridge networking
|
||||
|
||||
## Development
|
||||
|
||||
### Adding a New Workflow
|
||||
|
||||
1. Create directory: `toolbox/workflows/my_workflow/`
|
||||
2. Add `workflow.py` with a Temporal workflow (using `@workflow.defn`)
|
||||
3. Add mandatory `metadata.yaml` with `vertical` field
|
||||
4. Restart the appropriate worker: `docker-compose -f docker-compose.temporal.yaml restart worker-rust`
|
||||
5. Worker will automatically discover and register the new workflow
|
||||
2. Add `workflow.py` with a Prefect flow
|
||||
3. Add mandatory `metadata.yaml`
|
||||
4. Restart backend: `docker-compose restart fuzzforge-backend`
|
||||
|
||||
### Adding a New Module
|
||||
|
||||
1. Create module in `toolbox/modules/{category}/`
|
||||
2. Implement `BaseModule` interface
|
||||
3. Use in workflows via import
|
||||
|
||||
### Adding a New Vertical Worker
|
||||
|
||||
1. Create worker directory: `workers/{vertical}/`
|
||||
2. Create `Dockerfile` with required tools
|
||||
3. Add worker to `docker-compose.temporal.yaml`
|
||||
4. Worker will automatically discover workflows with matching `vertical` in metadata
|
||||
3. Use in workflows via import
|
||||
@@ -1,184 +0,0 @@
|
||||
# FuzzForge Benchmark Suite
|
||||
|
||||
Performance benchmarking infrastructure organized by module category.
|
||||
|
||||
## Directory Structure
|
||||
|
||||
```
|
||||
benchmarks/
|
||||
├── conftest.py # Benchmark fixtures
|
||||
├── category_configs.py # Category-specific thresholds
|
||||
├── by_category/ # Benchmarks organized by category
|
||||
│ ├── fuzzer/
|
||||
│ │ ├── bench_cargo_fuzz.py
|
||||
│ │ └── bench_atheris.py
|
||||
│ ├── scanner/
|
||||
│ │ └── bench_file_scanner.py
|
||||
│ ├── secret_detection/
|
||||
│ │ ├── bench_gitleaks.py
|
||||
│ │ └── bench_trufflehog.py
|
||||
│ └── analyzer/
|
||||
│ └── bench_security_analyzer.py
|
||||
├── fixtures/ # Benchmark test data
|
||||
│ ├── small/ # ~1K LOC
|
||||
│ ├── medium/ # ~10K LOC
|
||||
│ └── large/ # ~100K LOC
|
||||
└── results/ # Benchmark results (JSON)
|
||||
```
|
||||
|
||||
## Module Categories
|
||||
|
||||
### Fuzzer
|
||||
**Expected Metrics**: execs/sec, coverage_rate, time_to_crash, memory_usage
|
||||
|
||||
**Performance Thresholds**:
|
||||
- Min 1000 execs/sec
|
||||
- Max 10s for small projects
|
||||
- Max 2GB memory
|
||||
|
||||
### Scanner
|
||||
**Expected Metrics**: files/sec, LOC/sec, findings_count
|
||||
|
||||
**Performance Thresholds**:
|
||||
- Min 100 files/sec
|
||||
- Min 10K LOC/sec
|
||||
- Max 512MB memory
|
||||
|
||||
### Secret Detection
|
||||
**Expected Metrics**: patterns/sec, precision, recall, F1
|
||||
|
||||
**Performance Thresholds**:
|
||||
- Min 90% precision
|
||||
- Min 95% recall
|
||||
- Max 5 false positives per 100 secrets
|
||||
|
||||
### Analyzer
|
||||
**Expected Metrics**: analysis_depth, files/sec, accuracy
|
||||
|
||||
**Performance Thresholds**:
|
||||
- Min 10 files/sec (deep analysis)
|
||||
- Min 85% accuracy
|
||||
- Max 2GB memory
|
||||
|
||||
## Running Benchmarks
|
||||
|
||||
### All Benchmarks
|
||||
```bash
|
||||
cd backend
|
||||
pytest benchmarks/ --benchmark-only -v
|
||||
```
|
||||
|
||||
### Specific Category
|
||||
```bash
|
||||
pytest benchmarks/by_category/fuzzer/ --benchmark-only -v
|
||||
```
|
||||
|
||||
### With Comparison
|
||||
```bash
|
||||
# Run and save baseline
|
||||
pytest benchmarks/ --benchmark-only --benchmark-save=baseline
|
||||
|
||||
# Compare against baseline
|
||||
pytest benchmarks/ --benchmark-only --benchmark-compare=baseline
|
||||
```
|
||||
|
||||
### Generate Histogram
|
||||
```bash
|
||||
pytest benchmarks/ --benchmark-only --benchmark-histogram=histogram
|
||||
```
|
||||
|
||||
## Benchmark Results
|
||||
|
||||
Results are saved as JSON and include:
|
||||
- Mean execution time
|
||||
- Standard deviation
|
||||
- Min/Max values
|
||||
- Iterations per second
|
||||
- Memory usage
|
||||
|
||||
Example output:
|
||||
```
|
||||
------------------------ benchmark: fuzzer --------------------------
|
||||
Name Mean StdDev Ops/Sec
|
||||
bench_cargo_fuzz[discovery] 0.0012s 0.0001s 833.33
|
||||
bench_cargo_fuzz[execution] 0.1250s 0.0050s 8.00
|
||||
bench_cargo_fuzz[memory] 0.0100s 0.0005s 100.00
|
||||
---------------------------------------------------------------------
|
||||
```
|
||||
|
||||
## CI/CD Integration
|
||||
|
||||
Benchmarks run:
|
||||
- **Nightly**: Full benchmark suite, track trends
|
||||
- **On PR**: When benchmarks/ or modules/ changed
|
||||
- **Manual**: Via workflow_dispatch
|
||||
|
||||
### Regression Detection
|
||||
|
||||
Benchmarks automatically fail if:
|
||||
- Performance degrades >10%
|
||||
- Memory usage exceeds thresholds
|
||||
- Throughput drops below minimum
|
||||
|
||||
See `.github/workflows/benchmark.yml` for configuration.
|
||||
|
||||
## Adding New Benchmarks
|
||||
|
||||
### 1. Create benchmark file in category directory
|
||||
```python
|
||||
# benchmarks/by_category/fuzzer/bench_new_fuzzer.py
|
||||
|
||||
import pytest
|
||||
from benchmarks.category_configs import ModuleCategory, get_threshold
|
||||
|
||||
@pytest.mark.benchmark(group="fuzzer")
|
||||
def test_execution_performance(benchmark, new_fuzzer, test_workspace):
|
||||
"""Benchmark execution speed"""
|
||||
result = benchmark(new_fuzzer.execute, config, test_workspace)
|
||||
|
||||
# Validate against threshold
|
||||
threshold = get_threshold(ModuleCategory.FUZZER, "max_execution_time_small")
|
||||
assert result.execution_time < threshold
|
||||
```
|
||||
|
||||
### 2. Update category_configs.py if needed
|
||||
Add new thresholds or metrics for your module.
|
||||
|
||||
### 3. Run locally
|
||||
```bash
|
||||
pytest benchmarks/by_category/fuzzer/bench_new_fuzzer.py --benchmark-only -v
|
||||
```
|
||||
|
||||
## Best Practices
|
||||
|
||||
1. **Use mocking** for external dependencies (network, disk I/O)
|
||||
2. **Fixed iterations** for consistent benchmarking
|
||||
3. **Warm-up runs** for JIT-compiled code
|
||||
4. **Category-specific metrics** aligned with module purpose
|
||||
5. **Realistic fixtures** that represent actual use cases
|
||||
6. **Memory profiling** using tracemalloc
|
||||
7. **Compare apples to apples** within the same category
|
||||
|
||||
## Interpreting Results
|
||||
|
||||
### Good Performance
|
||||
- ✅ Execution time below threshold
|
||||
- ✅ Memory usage within limits
|
||||
- ✅ Throughput meets minimum
|
||||
- ✅ <5% variance across runs
|
||||
|
||||
### Performance Issues
|
||||
- ⚠️ Execution time 10-20% over threshold
|
||||
- ❌ Execution time >20% over threshold
|
||||
- ❌ Memory leaks (increasing over iterations)
|
||||
- ❌ High variance (>10%) indicates instability
|
||||
|
||||
## Tracking Performance Over Time
|
||||
|
||||
Benchmark results are stored as artifacts with:
|
||||
- Commit SHA
|
||||
- Timestamp
|
||||
- Environment details (Python version, OS)
|
||||
- Full metrics
|
||||
|
||||
Use these to track long-term performance trends and detect gradual degradation.
|
||||
@@ -1,221 +0,0 @@
|
||||
"""
|
||||
Benchmarks for CargoFuzzer module
|
||||
|
||||
Tests performance characteristics of Rust fuzzing:
|
||||
- Execution throughput (execs/sec)
|
||||
- Coverage rate
|
||||
- Memory efficiency
|
||||
- Time to first crash
|
||||
"""
|
||||
|
||||
import pytest
|
||||
import asyncio
|
||||
from pathlib import Path
|
||||
from unittest.mock import AsyncMock, patch
|
||||
import sys
|
||||
|
||||
sys.path.insert(0, str(Path(__file__).resolve().parents[3] / "toolbox"))
|
||||
|
||||
from modules.fuzzer.cargo_fuzzer import CargoFuzzer
|
||||
from benchmarks.category_configs import ModuleCategory, get_threshold
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def cargo_fuzzer():
|
||||
"""Create CargoFuzzer instance for benchmarking"""
|
||||
return CargoFuzzer()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def benchmark_config():
|
||||
"""Benchmark-optimized configuration"""
|
||||
return {
|
||||
"target_name": None,
|
||||
"max_iterations": 10000, # Fixed iterations for consistent benchmarking
|
||||
"timeout_seconds": 30,
|
||||
"sanitizer": "address"
|
||||
}
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_rust_workspace(tmp_path):
|
||||
"""Create a minimal Rust workspace for benchmarking"""
|
||||
workspace = tmp_path / "rust_project"
|
||||
workspace.mkdir()
|
||||
|
||||
# Cargo.toml
|
||||
(workspace / "Cargo.toml").write_text("""[package]
|
||||
name = "bench_project"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
""")
|
||||
|
||||
# src/lib.rs
|
||||
src = workspace / "src"
|
||||
src.mkdir()
|
||||
(src / "lib.rs").write_text("""
|
||||
pub fn benchmark_function(data: &[u8]) -> Vec<u8> {
|
||||
data.to_vec()
|
||||
}
|
||||
""")
|
||||
|
||||
# fuzz structure
|
||||
fuzz = workspace / "fuzz"
|
||||
fuzz.mkdir()
|
||||
(fuzz / "Cargo.toml").write_text("""[package]
|
||||
name = "bench_project-fuzz"
|
||||
version = "0.0.0"
|
||||
edition = "2021"
|
||||
|
||||
[dependencies]
|
||||
libfuzzer-sys = "0.4"
|
||||
|
||||
[dependencies.bench_project]
|
||||
path = ".."
|
||||
|
||||
[[bin]]
|
||||
name = "fuzz_target_1"
|
||||
path = "fuzz_targets/fuzz_target_1.rs"
|
||||
""")
|
||||
|
||||
targets = fuzz / "fuzz_targets"
|
||||
targets.mkdir()
|
||||
(targets / "fuzz_target_1.rs").write_text("""#![no_main]
|
||||
use libfuzzer_sys::fuzz_target;
|
||||
use bench_project::benchmark_function;
|
||||
|
||||
fuzz_target!(|data: &[u8]| {
|
||||
let _ = benchmark_function(data);
|
||||
});
|
||||
""")
|
||||
|
||||
return workspace
|
||||
|
||||
|
||||
class TestCargoFuzzerPerformance:
|
||||
"""Benchmark CargoFuzzer performance metrics"""
|
||||
|
||||
@pytest.mark.benchmark(group="fuzzer")
|
||||
def test_target_discovery_performance(self, benchmark, cargo_fuzzer, mock_rust_workspace):
|
||||
"""Benchmark fuzz target discovery speed"""
|
||||
def discover():
|
||||
return asyncio.run(cargo_fuzzer._discover_fuzz_targets(mock_rust_workspace))
|
||||
|
||||
result = benchmark(discover)
|
||||
assert len(result) > 0
|
||||
|
||||
@pytest.mark.benchmark(group="fuzzer")
|
||||
def test_config_validation_performance(self, benchmark, cargo_fuzzer, benchmark_config):
|
||||
"""Benchmark configuration validation speed"""
|
||||
result = benchmark(cargo_fuzzer.validate_config, benchmark_config)
|
||||
assert result is True
|
||||
|
||||
@pytest.mark.benchmark(group="fuzzer")
|
||||
def test_module_initialization_performance(self, benchmark):
|
||||
"""Benchmark module instantiation time"""
|
||||
def init_module():
|
||||
return CargoFuzzer()
|
||||
|
||||
module = benchmark(init_module)
|
||||
assert module is not None
|
||||
|
||||
|
||||
class TestCargoFuzzerThroughput:
|
||||
"""Benchmark execution throughput"""
|
||||
|
||||
@pytest.mark.benchmark(group="fuzzer")
|
||||
def test_execution_throughput(self, benchmark, cargo_fuzzer, mock_rust_workspace, benchmark_config):
|
||||
"""Benchmark fuzzing execution throughput"""
|
||||
|
||||
# Mock actual fuzzing to focus on orchestration overhead
|
||||
async def mock_run(workspace, target, config, callback):
|
||||
# Simulate 10K execs at 1000 execs/sec
|
||||
if callback:
|
||||
await callback({
|
||||
"total_execs": 10000,
|
||||
"execs_per_sec": 1000.0,
|
||||
"crashes": 0,
|
||||
"coverage": 50,
|
||||
"corpus_size": 10,
|
||||
"elapsed_time": 10
|
||||
})
|
||||
return [], {"total_executions": 10000, "execution_time": 10.0}
|
||||
|
||||
with patch.object(cargo_fuzzer, '_build_fuzz_target', new_callable=AsyncMock, return_value=True):
|
||||
with patch.object(cargo_fuzzer, '_run_fuzzing', side_effect=mock_run):
|
||||
with patch.object(cargo_fuzzer, '_parse_crash_artifacts', new_callable=AsyncMock, return_value=[]):
|
||||
def run_fuzzer():
|
||||
# Run in new event loop
|
||||
loop = asyncio.new_event_loop()
|
||||
try:
|
||||
return loop.run_until_complete(
|
||||
cargo_fuzzer.execute(benchmark_config, mock_rust_workspace)
|
||||
)
|
||||
finally:
|
||||
loop.close()
|
||||
|
||||
result = benchmark(run_fuzzer)
|
||||
assert result.status == "success"
|
||||
|
||||
# Verify performance threshold
|
||||
threshold = get_threshold(ModuleCategory.FUZZER, "max_execution_time_small")
|
||||
assert result.execution_time < threshold, \
|
||||
f"Execution time {result.execution_time}s exceeds threshold {threshold}s"
|
||||
|
||||
|
||||
class TestCargoFuzzerMemory:
|
||||
"""Benchmark memory efficiency"""
|
||||
|
||||
@pytest.mark.benchmark(group="fuzzer")
|
||||
def test_memory_overhead(self, benchmark, cargo_fuzzer, mock_rust_workspace, benchmark_config):
|
||||
"""Benchmark memory usage during execution"""
|
||||
import tracemalloc
|
||||
|
||||
def measure_memory():
|
||||
tracemalloc.start()
|
||||
|
||||
# Simulate operations
|
||||
cargo_fuzzer.validate_config(benchmark_config)
|
||||
asyncio.run(cargo_fuzzer._discover_fuzz_targets(mock_rust_workspace))
|
||||
|
||||
current, peak = tracemalloc.get_traced_memory()
|
||||
tracemalloc.stop()
|
||||
|
||||
return peak / 1024 / 1024 # Convert to MB
|
||||
|
||||
peak_mb = benchmark(measure_memory)
|
||||
|
||||
# Check against threshold
|
||||
max_memory = get_threshold(ModuleCategory.FUZZER, "max_memory_mb")
|
||||
assert peak_mb < max_memory, \
|
||||
f"Peak memory {peak_mb:.2f}MB exceeds threshold {max_memory}MB"
|
||||
|
||||
|
||||
class TestCargoFuzzerScalability:
|
||||
"""Benchmark scalability characteristics"""
|
||||
|
||||
@pytest.mark.benchmark(group="fuzzer")
|
||||
def test_multiple_target_discovery(self, benchmark, cargo_fuzzer, tmp_path):
|
||||
"""Benchmark discovery with multiple targets"""
|
||||
workspace = tmp_path / "multi_target"
|
||||
workspace.mkdir()
|
||||
|
||||
# Create workspace with 10 fuzz targets
|
||||
(workspace / "Cargo.toml").write_text("[package]\nname = \"test\"\nversion = \"0.1.0\"\nedition = \"2021\"")
|
||||
src = workspace / "src"
|
||||
src.mkdir()
|
||||
(src / "lib.rs").write_text("pub fn test() {}")
|
||||
|
||||
fuzz = workspace / "fuzz"
|
||||
fuzz.mkdir()
|
||||
targets = fuzz / "fuzz_targets"
|
||||
targets.mkdir()
|
||||
|
||||
for i in range(10):
|
||||
(targets / f"fuzz_target_{i}.rs").write_text("// Target")
|
||||
|
||||
def discover():
|
||||
return asyncio.run(cargo_fuzzer._discover_fuzz_targets(workspace))
|
||||
|
||||
result = benchmark(discover)
|
||||
assert len(result) == 10
|
||||
@@ -1,240 +0,0 @@
|
||||
# Secret Detection Benchmarks
|
||||
|
||||
Comprehensive benchmarking suite comparing secret detection tools via complete workflow execution:
|
||||
- **Gitleaks** - Fast pattern-based detection
|
||||
- **TruffleHog** - Entropy analysis with verification
|
||||
- **LLM Detector** - AI-powered semantic analysis (gpt-4o-mini, gpt-5-mini)
|
||||
|
||||
## Quick Start
|
||||
|
||||
### Run All Comparisons
|
||||
|
||||
```bash
|
||||
cd backend
|
||||
python benchmarks/by_category/secret_detection/compare_tools.py
|
||||
```
|
||||
|
||||
This will run all workflows on `test_projects/secret_detection_benchmark/` and generate comparison reports.
|
||||
|
||||
### Run Benchmark Tests
|
||||
|
||||
```bash
|
||||
# All benchmarks (Gitleaks, TruffleHog, LLM with 3 models)
|
||||
pytest benchmarks/by_category/secret_detection/bench_comparison.py --benchmark-only -v
|
||||
|
||||
# Specific tool only
|
||||
pytest benchmarks/by_category/secret_detection/bench_comparison.py::TestSecretDetectionComparison::test_gitleaks_workflow --benchmark-only -v
|
||||
|
||||
# Performance tests only
|
||||
pytest benchmarks/by_category/secret_detection/bench_comparison.py::TestSecretDetectionPerformance --benchmark-only -v
|
||||
```
|
||||
|
||||
## Ground Truth Dataset
|
||||
|
||||
**Controlled Benchmark** (`test_projects/secret_detection_benchmark/`)
|
||||
|
||||
**Exactly 32 documented secrets** for accurate precision/recall testing:
|
||||
- **12 Easy**: Standard patterns (AWS keys, GitHub PATs, Stripe keys, SSH keys)
|
||||
- **10 Medium**: Obfuscated (Base64, hex, concatenated, in comments, Unicode)
|
||||
- **10 Hard**: Well hidden (ROT13, binary, XOR, reversed, template strings, regex patterns)
|
||||
|
||||
All secrets documented in `secret_detection_benchmark_GROUND_TRUTH.json` with exact file paths and line numbers.
|
||||
|
||||
See `test_projects/secret_detection_benchmark/README.md` for details.
|
||||
|
||||
## Metrics Measured
|
||||
|
||||
### Accuracy Metrics
|
||||
- **Precision**: TP / (TP + FP) - How many detected secrets are real?
|
||||
- **Recall**: TP / (TP + FN) - How many real secrets were found?
|
||||
- **F1 Score**: Harmonic mean of precision and recall
|
||||
- **False Positive Rate**: FP / Total Detected
|
||||
|
||||
### Performance Metrics
|
||||
- **Execution Time**: Total time to scan all files
|
||||
- **Throughput**: Files/secrets scanned per second
|
||||
- **Memory Usage**: Peak memory during execution
|
||||
|
||||
### Thresholds (from `category_configs.py`)
|
||||
- Minimum Precision: 90%
|
||||
- Minimum Recall: 95%
|
||||
- Max Execution Time (small): 2.0s
|
||||
- Max False Positives: 5 per 100 secrets
|
||||
|
||||
## Tool Comparison
|
||||
|
||||
### Gitleaks
|
||||
**Strengths:**
|
||||
- Fastest execution
|
||||
- Git-aware (commit history scanning)
|
||||
- Low false positive rate
|
||||
- No API required
|
||||
- Works offline
|
||||
|
||||
**Weaknesses:**
|
||||
- Pattern-based only
|
||||
- May miss obfuscated secrets
|
||||
- Limited to known patterns
|
||||
|
||||
### TruffleHog
|
||||
**Strengths:**
|
||||
- Secret verification (validates if active)
|
||||
- High detection rate with entropy analysis
|
||||
- Multiple detectors (600+ secret types)
|
||||
- Catches high-entropy strings
|
||||
|
||||
**Weaknesses:**
|
||||
- Slower than Gitleaks
|
||||
- Higher false positive rate
|
||||
- Verification requires network calls
|
||||
|
||||
### LLM Detector
|
||||
**Strengths:**
|
||||
- Semantic understanding of context
|
||||
- Catches novel/custom secret patterns
|
||||
- Can reason about what "looks like" a secret
|
||||
- Multiple model options (GPT-4, Claude, etc.)
|
||||
- Understands code context
|
||||
|
||||
**Weaknesses:**
|
||||
- Slowest (API latency + LLM processing)
|
||||
- Most expensive (LLM API costs)
|
||||
- Requires A2A agent infrastructure
|
||||
- Accuracy varies by model
|
||||
- May miss well-disguised secrets
|
||||
|
||||
## Results Directory
|
||||
|
||||
After running comparisons, results are saved to:
|
||||
```
|
||||
benchmarks/by_category/secret_detection/results/
|
||||
├── comparison_report.md # Human-readable comparison with:
|
||||
│ # - Summary table with secrets/files/avg per file/time
|
||||
│ # - Agreement analysis (secrets found by N tools)
|
||||
│ # - Tool agreement matrix (overlap between pairs)
|
||||
│ # - Per-file detailed comparison table
|
||||
│ # - File type breakdown
|
||||
│ # - Files analyzed by each tool
|
||||
│ # - Overlap analysis and performance summary
|
||||
└── comparison_results.json # Machine-readable data with findings_by_file
|
||||
```
|
||||
|
||||
## Latest Benchmark Results
|
||||
|
||||
Run the benchmark to generate results:
|
||||
```bash
|
||||
cd backend
|
||||
python benchmarks/by_category/secret_detection/compare_tools.py
|
||||
```
|
||||
|
||||
Results are saved to `results/comparison_report.md` with:
|
||||
- Summary table (secrets found, files scanned, time)
|
||||
- Agreement analysis (how many tools found each secret)
|
||||
- Tool agreement matrix (overlap between tools)
|
||||
- Per-file detailed comparison
|
||||
- File type breakdown
|
||||
|
||||
## CI/CD Integration
|
||||
|
||||
Add to your CI pipeline:
|
||||
|
||||
```yaml
|
||||
# .github/workflows/benchmark-secrets.yml
|
||||
name: Secret Detection Benchmark
|
||||
|
||||
on:
|
||||
schedule:
|
||||
- cron: '0 0 * * 0' # Weekly
|
||||
workflow_dispatch:
|
||||
|
||||
jobs:
|
||||
benchmark:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: '3.11'
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
pip install -r backend/requirements.txt
|
||||
pip install pytest-benchmark
|
||||
|
||||
- name: Run benchmarks
|
||||
env:
|
||||
GITGUARDIAN_API_KEY: ${{ secrets.GITGUARDIAN_API_KEY }}
|
||||
run: |
|
||||
cd backend
|
||||
pytest benchmarks/by_category/secret_detection/bench_comparison.py \
|
||||
--benchmark-only \
|
||||
--benchmark-json=results.json \
|
||||
--gitguardian-api-key
|
||||
|
||||
- name: Upload results
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: benchmark-results
|
||||
path: backend/results.json
|
||||
```
|
||||
|
||||
## Adding New Tools
|
||||
|
||||
To benchmark a new secret detection tool:
|
||||
|
||||
1. Create module in `toolbox/modules/secret_detection/`
|
||||
2. Register in `__init__.py`
|
||||
3. Add to `compare_tools.py` in `run_all_tools()`
|
||||
4. Add test in `bench_comparison.py`
|
||||
|
||||
## Interpreting Results
|
||||
|
||||
### High Precision, Low Recall
|
||||
Tool is conservative - few false positives but misses secrets.
|
||||
**Use case**: Production environments where false positives are costly.
|
||||
|
||||
### Low Precision, High Recall
|
||||
Tool is aggressive - finds most secrets but many false positives.
|
||||
**Use case**: Initial scans where manual review is acceptable.
|
||||
|
||||
### Balanced (High F1)
|
||||
Tool has good balance of precision and recall.
|
||||
**Use case**: General purpose scanning.
|
||||
|
||||
### Fast Execution
|
||||
Suitable for CI/CD pipelines and pre-commit hooks.
|
||||
|
||||
### Slow but Accurate
|
||||
Better for comprehensive security audits.
|
||||
|
||||
## Best Practices
|
||||
|
||||
1. **Use multiple tools**: Each has strengths/weaknesses
|
||||
2. **Combine results**: Union of all findings for maximum coverage
|
||||
3. **Filter intelligently**: Remove known false positives
|
||||
4. **Verify findings**: Check if secrets are actually valid
|
||||
5. **Track over time**: Monitor precision/recall trends
|
||||
6. **Update regularly**: Patterns evolve, tools improve
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### GitGuardian Tests Skipped
|
||||
- Set `GITGUARDIAN_API_KEY` environment variable
|
||||
- Use `--gitguardian-api-key` flag
|
||||
|
||||
### LLM Tests Skipped
|
||||
- Ensure A2A agent is running
|
||||
- Check agent URL in config
|
||||
- Use `--llm-enabled` flag
|
||||
|
||||
### Low Recall
|
||||
- Check if ground truth is up to date
|
||||
- Verify tool is configured correctly
|
||||
- Review missed secrets manually
|
||||
|
||||
### High False Positives
|
||||
- Adjust tool sensitivity
|
||||
- Add exclusion patterns
|
||||
- Review false positive list
|
||||
@@ -1,285 +0,0 @@
|
||||
"""
|
||||
Secret Detection Tool Comparison Benchmark
|
||||
|
||||
Compares Gitleaks, TruffleHog, and LLM-based detection
|
||||
on the vulnerable_app ground truth dataset via workflow execution.
|
||||
"""
|
||||
|
||||
import pytest
|
||||
import json
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Any
|
||||
import sys
|
||||
|
||||
sys.path.insert(0, str(Path(__file__).resolve().parents[3] / "sdk" / "src"))
|
||||
|
||||
from fuzzforge_sdk import FuzzForgeClient
|
||||
from benchmarks.category_configs import ModuleCategory, get_threshold
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def target_path():
|
||||
"""Path to vulnerable_app"""
|
||||
path = Path(__file__).parent.parent.parent.parent.parent / "test_projects" / "vulnerable_app"
|
||||
assert path.exists(), f"Target not found: {path}"
|
||||
return path
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def ground_truth(target_path):
|
||||
"""Load ground truth data"""
|
||||
metadata_file = target_path / "SECRETS_GROUND_TRUTH.json"
|
||||
assert metadata_file.exists(), f"Ground truth not found: {metadata_file}"
|
||||
|
||||
with open(metadata_file) as f:
|
||||
return json.load(f)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def sdk_client():
|
||||
"""FuzzForge SDK client"""
|
||||
client = FuzzForgeClient(base_url="http://localhost:8000")
|
||||
yield client
|
||||
client.close()
|
||||
|
||||
|
||||
def calculate_metrics(sarif_results: List[Dict], ground_truth: Dict[str, Any]) -> Dict[str, float]:
|
||||
"""Calculate precision, recall, and F1 score"""
|
||||
|
||||
# Extract expected secrets from ground truth
|
||||
expected_secrets = set()
|
||||
for file_info in ground_truth["files"]:
|
||||
if "secrets" in file_info:
|
||||
for secret in file_info["secrets"]:
|
||||
expected_secrets.add((file_info["filename"], secret["line"]))
|
||||
|
||||
# Extract detected secrets from SARIF
|
||||
detected_secrets = set()
|
||||
for result in sarif_results:
|
||||
locations = result.get("locations", [])
|
||||
for location in locations:
|
||||
physical_location = location.get("physicalLocation", {})
|
||||
artifact_location = physical_location.get("artifactLocation", {})
|
||||
region = physical_location.get("region", {})
|
||||
|
||||
uri = artifact_location.get("uri", "")
|
||||
line = region.get("startLine", 0)
|
||||
|
||||
if uri and line:
|
||||
file_path = Path(uri)
|
||||
filename = file_path.name
|
||||
detected_secrets.add((filename, line))
|
||||
# Also try with relative path
|
||||
if len(file_path.parts) > 1:
|
||||
rel_path = str(Path(*file_path.parts[-2:]))
|
||||
detected_secrets.add((rel_path, line))
|
||||
|
||||
# Calculate metrics
|
||||
true_positives = len(expected_secrets & detected_secrets)
|
||||
false_positives = len(detected_secrets - expected_secrets)
|
||||
false_negatives = len(expected_secrets - detected_secrets)
|
||||
|
||||
precision = true_positives / (true_positives + false_positives) if (true_positives + false_positives) > 0 else 0
|
||||
recall = true_positives / (true_positives + false_negatives) if (true_positives + false_negatives) > 0 else 0
|
||||
f1 = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0
|
||||
|
||||
return {
|
||||
"precision": precision,
|
||||
"recall": recall,
|
||||
"f1": f1,
|
||||
"true_positives": true_positives,
|
||||
"false_positives": false_positives,
|
||||
"false_negatives": false_negatives
|
||||
}
|
||||
|
||||
|
||||
class TestSecretDetectionComparison:
|
||||
"""Compare all secret detection tools"""
|
||||
|
||||
@pytest.mark.benchmark(group="secret_detection")
|
||||
def test_gitleaks_workflow(self, benchmark, sdk_client, target_path, ground_truth):
|
||||
"""Benchmark Gitleaks workflow accuracy and performance"""
|
||||
|
||||
def run_gitleaks():
|
||||
run = sdk_client.submit_workflow_with_upload(
|
||||
workflow_name="gitleaks_detection",
|
||||
target_path=str(target_path),
|
||||
parameters={
|
||||
"scan_mode": "detect",
|
||||
"no_git": True,
|
||||
"redact": False
|
||||
}
|
||||
)
|
||||
|
||||
result = sdk_client.wait_for_completion(run.run_id, timeout=300)
|
||||
assert result.status == "completed", f"Workflow failed: {result.status}"
|
||||
|
||||
findings = sdk_client.get_run_findings(run.run_id)
|
||||
assert findings and findings.sarif, "No findings returned"
|
||||
|
||||
return findings
|
||||
|
||||
findings = benchmark(run_gitleaks)
|
||||
|
||||
# Extract SARIF results
|
||||
sarif_results = []
|
||||
for run_data in findings.sarif.get("runs", []):
|
||||
sarif_results.extend(run_data.get("results", []))
|
||||
|
||||
# Calculate metrics
|
||||
metrics = calculate_metrics(sarif_results, ground_truth)
|
||||
|
||||
# Log results
|
||||
print(f"\n=== Gitleaks Workflow Results ===")
|
||||
print(f"Precision: {metrics['precision']:.2%}")
|
||||
print(f"Recall: {metrics['recall']:.2%}")
|
||||
print(f"F1 Score: {metrics['f1']:.2%}")
|
||||
print(f"True Positives: {metrics['true_positives']}")
|
||||
print(f"False Positives: {metrics['false_positives']}")
|
||||
print(f"False Negatives: {metrics['false_negatives']}")
|
||||
print(f"Findings Count: {len(sarif_results)}")
|
||||
|
||||
# Assert meets thresholds
|
||||
min_precision = get_threshold(ModuleCategory.SECRET_DETECTION, "min_precision")
|
||||
min_recall = get_threshold(ModuleCategory.SECRET_DETECTION, "min_recall")
|
||||
|
||||
assert metrics['precision'] >= min_precision, \
|
||||
f"Precision {metrics['precision']:.2%} below threshold {min_precision:.2%}"
|
||||
assert metrics['recall'] >= min_recall, \
|
||||
f"Recall {metrics['recall']:.2%} below threshold {min_recall:.2%}"
|
||||
|
||||
@pytest.mark.benchmark(group="secret_detection")
|
||||
def test_trufflehog_workflow(self, benchmark, sdk_client, target_path, ground_truth):
|
||||
"""Benchmark TruffleHog workflow accuracy and performance"""
|
||||
|
||||
def run_trufflehog():
|
||||
run = sdk_client.submit_workflow_with_upload(
|
||||
workflow_name="trufflehog_detection",
|
||||
target_path=str(target_path),
|
||||
parameters={
|
||||
"verify": False,
|
||||
"max_depth": 10
|
||||
}
|
||||
)
|
||||
|
||||
result = sdk_client.wait_for_completion(run.run_id, timeout=300)
|
||||
assert result.status == "completed", f"Workflow failed: {result.status}"
|
||||
|
||||
findings = sdk_client.get_run_findings(run.run_id)
|
||||
assert findings and findings.sarif, "No findings returned"
|
||||
|
||||
return findings
|
||||
|
||||
findings = benchmark(run_trufflehog)
|
||||
|
||||
sarif_results = []
|
||||
for run_data in findings.sarif.get("runs", []):
|
||||
sarif_results.extend(run_data.get("results", []))
|
||||
|
||||
metrics = calculate_metrics(sarif_results, ground_truth)
|
||||
|
||||
print(f"\n=== TruffleHog Workflow Results ===")
|
||||
print(f"Precision: {metrics['precision']:.2%}")
|
||||
print(f"Recall: {metrics['recall']:.2%}")
|
||||
print(f"F1 Score: {metrics['f1']:.2%}")
|
||||
print(f"True Positives: {metrics['true_positives']}")
|
||||
print(f"False Positives: {metrics['false_positives']}")
|
||||
print(f"False Negatives: {metrics['false_negatives']}")
|
||||
print(f"Findings Count: {len(sarif_results)}")
|
||||
|
||||
min_precision = get_threshold(ModuleCategory.SECRET_DETECTION, "min_precision")
|
||||
min_recall = get_threshold(ModuleCategory.SECRET_DETECTION, "min_recall")
|
||||
|
||||
assert metrics['precision'] >= min_precision
|
||||
assert metrics['recall'] >= min_recall
|
||||
|
||||
@pytest.mark.benchmark(group="secret_detection")
|
||||
@pytest.mark.parametrize("model", [
|
||||
"gpt-4o-mini",
|
||||
"gpt-4o",
|
||||
"claude-3-5-sonnet-20241022"
|
||||
])
|
||||
def test_llm_workflow(self, benchmark, sdk_client, target_path, ground_truth, model):
|
||||
"""Benchmark LLM workflow with different models"""
|
||||
|
||||
def run_llm():
|
||||
provider = "openai" if "gpt" in model else "anthropic"
|
||||
|
||||
run = sdk_client.submit_workflow_with_upload(
|
||||
workflow_name="llm_secret_detection",
|
||||
target_path=str(target_path),
|
||||
parameters={
|
||||
"agent_url": "http://fuzzforge-task-agent:8000/a2a/litellm_agent",
|
||||
"llm_model": model,
|
||||
"llm_provider": provider,
|
||||
"max_files": 20,
|
||||
"timeout": 60
|
||||
}
|
||||
)
|
||||
|
||||
result = sdk_client.wait_for_completion(run.run_id, timeout=300)
|
||||
assert result.status == "completed", f"Workflow failed: {result.status}"
|
||||
|
||||
findings = sdk_client.get_run_findings(run.run_id)
|
||||
assert findings and findings.sarif, "No findings returned"
|
||||
|
||||
return findings
|
||||
|
||||
findings = benchmark(run_llm)
|
||||
|
||||
sarif_results = []
|
||||
for run_data in findings.sarif.get("runs", []):
|
||||
sarif_results.extend(run_data.get("results", []))
|
||||
|
||||
metrics = calculate_metrics(sarif_results, ground_truth)
|
||||
|
||||
print(f"\n=== LLM ({model}) Workflow Results ===")
|
||||
print(f"Precision: {metrics['precision']:.2%}")
|
||||
print(f"Recall: {metrics['recall']:.2%}")
|
||||
print(f"F1 Score: {metrics['f1']:.2%}")
|
||||
print(f"True Positives: {metrics['true_positives']}")
|
||||
print(f"False Positives: {metrics['false_positives']}")
|
||||
print(f"False Negatives: {metrics['false_negatives']}")
|
||||
print(f"Findings Count: {len(sarif_results)}")
|
||||
|
||||
|
||||
class TestSecretDetectionPerformance:
|
||||
"""Performance benchmarks for each tool"""
|
||||
|
||||
@pytest.mark.benchmark(group="secret_detection")
|
||||
def test_gitleaks_performance(self, benchmark, sdk_client, target_path):
|
||||
"""Benchmark Gitleaks workflow execution speed"""
|
||||
|
||||
def run():
|
||||
run = sdk_client.submit_workflow_with_upload(
|
||||
workflow_name="gitleaks_detection",
|
||||
target_path=str(target_path),
|
||||
parameters={"scan_mode": "detect", "no_git": True}
|
||||
)
|
||||
result = sdk_client.wait_for_completion(run.run_id, timeout=300)
|
||||
return result
|
||||
|
||||
result = benchmark(run)
|
||||
|
||||
max_time = get_threshold(ModuleCategory.SECRET_DETECTION, "max_execution_time_small")
|
||||
# Note: Workflow execution time includes orchestration overhead
|
||||
# so we allow 2x the module threshold
|
||||
assert result.execution_time < max_time * 2
|
||||
|
||||
@pytest.mark.benchmark(group="secret_detection")
|
||||
def test_trufflehog_performance(self, benchmark, sdk_client, target_path):
|
||||
"""Benchmark TruffleHog workflow execution speed"""
|
||||
|
||||
def run():
|
||||
run = sdk_client.submit_workflow_with_upload(
|
||||
workflow_name="trufflehog_detection",
|
||||
target_path=str(target_path),
|
||||
parameters={"verify": False}
|
||||
)
|
||||
result = sdk_client.wait_for_completion(run.run_id, timeout=300)
|
||||
return result
|
||||
|
||||
result = benchmark(run)
|
||||
|
||||
max_time = get_threshold(ModuleCategory.SECRET_DETECTION, "max_execution_time_small")
|
||||
assert result.execution_time < max_time * 2
|
||||
@@ -1,547 +0,0 @@
|
||||
"""
|
||||
Secret Detection Tools Comparison Report Generator
|
||||
|
||||
Generates comparison reports showing strengths/weaknesses of each tool.
|
||||
Uses workflow execution via SDK to test complete pipeline.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import time
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Any, Optional
|
||||
from dataclasses import dataclass, asdict
|
||||
import sys
|
||||
|
||||
sys.path.insert(0, str(Path(__file__).resolve().parents[3] / "sdk" / "src"))
|
||||
|
||||
from fuzzforge_sdk import FuzzForgeClient
|
||||
|
||||
|
||||
@dataclass
|
||||
class ToolResult:
|
||||
"""Results from running a tool"""
|
||||
tool_name: str
|
||||
execution_time: float
|
||||
findings_count: int
|
||||
findings_by_file: Dict[str, List[int]] # file_path -> [line_numbers]
|
||||
unique_files: int
|
||||
unique_locations: int # unique (file, line) pairs
|
||||
secret_density: float # average secrets per file
|
||||
file_types: Dict[str, int] # file extension -> count of files with secrets
|
||||
|
||||
|
||||
class SecretDetectionComparison:
|
||||
"""Compare secret detection tools"""
|
||||
|
||||
def __init__(self, target_path: Path, api_url: str = "http://localhost:8000"):
|
||||
self.target_path = target_path
|
||||
self.client = FuzzForgeClient(base_url=api_url)
|
||||
|
||||
async def run_workflow(self, workflow_name: str, tool_name: str, config: Dict[str, Any] = None) -> Optional[ToolResult]:
|
||||
"""Run a workflow and extract findings"""
|
||||
print(f"\nRunning {tool_name} workflow...")
|
||||
|
||||
start_time = time.time()
|
||||
|
||||
try:
|
||||
# Start workflow
|
||||
run = self.client.submit_workflow_with_upload(
|
||||
workflow_name=workflow_name,
|
||||
target_path=str(self.target_path),
|
||||
parameters=config or {}
|
||||
)
|
||||
|
||||
print(f" Started run: {run.run_id}")
|
||||
|
||||
# Wait for completion (up to 30 minutes for slow LLMs)
|
||||
print(f" Waiting for completion...")
|
||||
result = self.client.wait_for_completion(run.run_id, timeout=1800)
|
||||
|
||||
execution_time = time.time() - start_time
|
||||
|
||||
if result.status != "COMPLETED":
|
||||
print(f"❌ {tool_name} workflow failed: {result.status}")
|
||||
return None
|
||||
|
||||
# Get findings from SARIF
|
||||
findings = self.client.get_run_findings(run.run_id)
|
||||
|
||||
if not findings or not findings.sarif:
|
||||
print(f"⚠️ {tool_name} produced no findings")
|
||||
return None
|
||||
|
||||
# Extract results from SARIF and group by file
|
||||
findings_by_file = {}
|
||||
unique_locations = set()
|
||||
|
||||
for run_data in findings.sarif.get("runs", []):
|
||||
for result in run_data.get("results", []):
|
||||
locations = result.get("locations", [])
|
||||
for location in locations:
|
||||
physical_location = location.get("physicalLocation", {})
|
||||
artifact_location = physical_location.get("artifactLocation", {})
|
||||
region = physical_location.get("region", {})
|
||||
|
||||
uri = artifact_location.get("uri", "")
|
||||
line = region.get("startLine", 0)
|
||||
|
||||
if uri and line:
|
||||
if uri not in findings_by_file:
|
||||
findings_by_file[uri] = []
|
||||
findings_by_file[uri].append(line)
|
||||
unique_locations.add((uri, line))
|
||||
|
||||
# Sort line numbers for each file
|
||||
for file_path in findings_by_file:
|
||||
findings_by_file[file_path] = sorted(set(findings_by_file[file_path]))
|
||||
|
||||
# Calculate file type distribution
|
||||
file_types = {}
|
||||
for file_path in findings_by_file:
|
||||
ext = Path(file_path).suffix or Path(file_path).name # Use full name for files like .env
|
||||
if ext.startswith('.'):
|
||||
file_types[ext] = file_types.get(ext, 0) + 1
|
||||
else:
|
||||
file_types['[no extension]'] = file_types.get('[no extension]', 0) + 1
|
||||
|
||||
# Calculate secret density
|
||||
secret_density = len(unique_locations) / len(findings_by_file) if findings_by_file else 0
|
||||
|
||||
print(f" ✓ Found {len(unique_locations)} secrets in {len(findings_by_file)} files (avg {secret_density:.1f} per file)")
|
||||
|
||||
return ToolResult(
|
||||
tool_name=tool_name,
|
||||
execution_time=execution_time,
|
||||
findings_count=len(unique_locations),
|
||||
findings_by_file=findings_by_file,
|
||||
unique_files=len(findings_by_file),
|
||||
unique_locations=len(unique_locations),
|
||||
secret_density=secret_density,
|
||||
file_types=file_types
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
print(f"❌ {tool_name} error: {e}")
|
||||
return None
|
||||
|
||||
|
||||
async def run_all_tools(self, llm_models: List[str] = None) -> List[ToolResult]:
|
||||
"""Run all available tools"""
|
||||
results = []
|
||||
|
||||
if llm_models is None:
|
||||
llm_models = ["gpt-4o-mini"]
|
||||
|
||||
# Gitleaks
|
||||
result = await self.run_workflow("gitleaks_detection", "Gitleaks", {
|
||||
"scan_mode": "detect",
|
||||
"no_git": True,
|
||||
"redact": False
|
||||
})
|
||||
if result:
|
||||
results.append(result)
|
||||
|
||||
# TruffleHog
|
||||
result = await self.run_workflow("trufflehog_detection", "TruffleHog", {
|
||||
"verify": False,
|
||||
"max_depth": 10
|
||||
})
|
||||
if result:
|
||||
results.append(result)
|
||||
|
||||
# LLM Detector with multiple models
|
||||
for model in llm_models:
|
||||
tool_name = f"LLM ({model})"
|
||||
result = await self.run_workflow("llm_secret_detection", tool_name, {
|
||||
"agent_url": "http://fuzzforge-task-agent:8000/a2a/litellm_agent",
|
||||
"llm_model": model,
|
||||
"llm_provider": "openai" if "gpt" in model else "anthropic",
|
||||
"max_files": 20,
|
||||
"timeout": 60,
|
||||
"file_patterns": [
|
||||
"*.py", "*.js", "*.ts", "*.java", "*.go", "*.env", "*.yaml", "*.yml",
|
||||
"*.json", "*.xml", "*.ini", "*.sql", "*.properties", "*.sh", "*.bat",
|
||||
"*.config", "*.conf", "*.toml", "*id_rsa*", "*.txt"
|
||||
]
|
||||
})
|
||||
if result:
|
||||
results.append(result)
|
||||
|
||||
return results
|
||||
|
||||
def _calculate_agreement_matrix(self, results: List[ToolResult]) -> Dict[str, Dict[str, int]]:
|
||||
"""Calculate overlap matrix showing common secrets between tool pairs"""
|
||||
matrix = {}
|
||||
|
||||
for i, result1 in enumerate(results):
|
||||
matrix[result1.tool_name] = {}
|
||||
# Convert to set of (file, line) tuples
|
||||
secrets1 = set()
|
||||
for file_path, lines in result1.findings_by_file.items():
|
||||
for line in lines:
|
||||
secrets1.add((file_path, line))
|
||||
|
||||
for result2 in results:
|
||||
secrets2 = set()
|
||||
for file_path, lines in result2.findings_by_file.items():
|
||||
for line in lines:
|
||||
secrets2.add((file_path, line))
|
||||
|
||||
# Count common secrets
|
||||
common = len(secrets1 & secrets2)
|
||||
matrix[result1.tool_name][result2.tool_name] = common
|
||||
|
||||
return matrix
|
||||
|
||||
def _get_per_file_comparison(self, results: List[ToolResult]) -> Dict[str, Dict[str, int]]:
|
||||
"""Get per-file breakdown of findings across all tools"""
|
||||
all_files = set()
|
||||
for result in results:
|
||||
all_files.update(result.findings_by_file.keys())
|
||||
|
||||
comparison = {}
|
||||
for file_path in sorted(all_files):
|
||||
comparison[file_path] = {}
|
||||
for result in results:
|
||||
comparison[file_path][result.tool_name] = len(result.findings_by_file.get(file_path, []))
|
||||
|
||||
return comparison
|
||||
|
||||
def _get_agreement_stats(self, results: List[ToolResult]) -> Dict[int, int]:
|
||||
"""Calculate how many secrets are found by 1, 2, 3, or all tools"""
|
||||
# Collect all unique (file, line) pairs across all tools
|
||||
all_secrets = {} # (file, line) -> list of tools that found it
|
||||
|
||||
for result in results:
|
||||
for file_path, lines in result.findings_by_file.items():
|
||||
for line in lines:
|
||||
key = (file_path, line)
|
||||
if key not in all_secrets:
|
||||
all_secrets[key] = []
|
||||
all_secrets[key].append(result.tool_name)
|
||||
|
||||
# Count by number of tools
|
||||
agreement_counts = {}
|
||||
for secret, tools in all_secrets.items():
|
||||
count = len(set(tools)) # Unique tools
|
||||
agreement_counts[count] = agreement_counts.get(count, 0) + 1
|
||||
|
||||
return agreement_counts
|
||||
|
||||
def generate_markdown_report(self, results: List[ToolResult]) -> str:
|
||||
"""Generate markdown comparison report"""
|
||||
report = []
|
||||
report.append("# Secret Detection Tools Comparison\n")
|
||||
report.append(f"**Target**: {self.target_path.name}")
|
||||
report.append(f"**Tools**: {', '.join([r.tool_name for r in results])}\n")
|
||||
|
||||
# Summary table with extended metrics
|
||||
report.append("\n## Summary\n")
|
||||
report.append("| Tool | Secrets | Files | Avg/File | Time (s) |")
|
||||
report.append("|------|---------|-------|----------|----------|")
|
||||
|
||||
for result in results:
|
||||
report.append(
|
||||
f"| {result.tool_name} | "
|
||||
f"{result.findings_count} | "
|
||||
f"{result.unique_files} | "
|
||||
f"{result.secret_density:.1f} | "
|
||||
f"{result.execution_time:.2f} |"
|
||||
)
|
||||
|
||||
# Agreement Analysis
|
||||
agreement_stats = self._get_agreement_stats(results)
|
||||
report.append("\n## Agreement Analysis\n")
|
||||
report.append("Secrets found by different numbers of tools:\n")
|
||||
for num_tools in sorted(agreement_stats.keys(), reverse=True):
|
||||
count = agreement_stats[num_tools]
|
||||
if num_tools == len(results):
|
||||
report.append(f"- **All {num_tools} tools agree**: {count} secrets")
|
||||
elif num_tools == 1:
|
||||
report.append(f"- **Only 1 tool found**: {count} secrets")
|
||||
else:
|
||||
report.append(f"- **{num_tools} tools agree**: {count} secrets")
|
||||
|
||||
# Agreement Matrix
|
||||
agreement_matrix = self._calculate_agreement_matrix(results)
|
||||
report.append("\n## Tool Agreement Matrix\n")
|
||||
report.append("Number of common secrets found by tool pairs:\n")
|
||||
|
||||
# Header row
|
||||
header = "| Tool |"
|
||||
separator = "|------|"
|
||||
for result in results:
|
||||
short_name = result.tool_name.replace("LLM (", "").replace(")", "")
|
||||
header += f" {short_name} |"
|
||||
separator += "------|"
|
||||
report.append(header)
|
||||
report.append(separator)
|
||||
|
||||
# Data rows
|
||||
for result in results:
|
||||
short_name = result.tool_name.replace("LLM (", "").replace(")", "")
|
||||
row = f"| {short_name} |"
|
||||
for result2 in results:
|
||||
count = agreement_matrix[result.tool_name][result2.tool_name]
|
||||
row += f" {count} |"
|
||||
report.append(row)
|
||||
|
||||
# Per-File Comparison
|
||||
per_file = self._get_per_file_comparison(results)
|
||||
report.append("\n## Per-File Detailed Comparison\n")
|
||||
report.append("Secrets found per file by each tool:\n")
|
||||
|
||||
# Header
|
||||
header = "| File |"
|
||||
separator = "|------|"
|
||||
for result in results:
|
||||
short_name = result.tool_name.replace("LLM (", "").replace(")", "")
|
||||
header += f" {short_name} |"
|
||||
separator += "------|"
|
||||
header += " Total |"
|
||||
separator += "------|"
|
||||
report.append(header)
|
||||
report.append(separator)
|
||||
|
||||
# Show top 15 files by total findings
|
||||
file_totals = [(f, sum(counts.values())) for f, counts in per_file.items()]
|
||||
file_totals.sort(key=lambda x: x[1], reverse=True)
|
||||
|
||||
for file_path, total in file_totals[:15]:
|
||||
row = f"| `{file_path}` |"
|
||||
for result in results:
|
||||
count = per_file[file_path].get(result.tool_name, 0)
|
||||
row += f" {count} |"
|
||||
row += f" **{total}** |"
|
||||
report.append(row)
|
||||
|
||||
if len(file_totals) > 15:
|
||||
report.append(f"| ... and {len(file_totals) - 15} more files | ... | ... | ... | ... | ... |")
|
||||
|
||||
# File Type Breakdown
|
||||
report.append("\n## File Type Breakdown\n")
|
||||
all_extensions = set()
|
||||
for result in results:
|
||||
all_extensions.update(result.file_types.keys())
|
||||
|
||||
if all_extensions:
|
||||
header = "| Type |"
|
||||
separator = "|------|"
|
||||
for result in results:
|
||||
short_name = result.tool_name.replace("LLM (", "").replace(")", "")
|
||||
header += f" {short_name} |"
|
||||
separator += "------|"
|
||||
report.append(header)
|
||||
report.append(separator)
|
||||
|
||||
for ext in sorted(all_extensions):
|
||||
row = f"| `{ext}` |"
|
||||
for result in results:
|
||||
count = result.file_types.get(ext, 0)
|
||||
row += f" {count} files |"
|
||||
report.append(row)
|
||||
|
||||
# File analysis
|
||||
report.append("\n## Files Analyzed\n")
|
||||
|
||||
# Collect all unique files across all tools
|
||||
all_files = set()
|
||||
for result in results:
|
||||
all_files.update(result.findings_by_file.keys())
|
||||
|
||||
report.append(f"**Total unique files with secrets**: {len(all_files)}\n")
|
||||
|
||||
for result in results:
|
||||
report.append(f"\n### {result.tool_name}\n")
|
||||
report.append(f"Found secrets in **{result.unique_files} files**:\n")
|
||||
|
||||
# Sort files by number of findings (descending)
|
||||
sorted_files = sorted(
|
||||
result.findings_by_file.items(),
|
||||
key=lambda x: len(x[1]),
|
||||
reverse=True
|
||||
)
|
||||
|
||||
# Show top 10 files
|
||||
for file_path, lines in sorted_files[:10]:
|
||||
report.append(f"- `{file_path}`: {len(lines)} secrets (lines: {', '.join(map(str, lines[:5]))}{'...' if len(lines) > 5 else ''})")
|
||||
|
||||
if len(sorted_files) > 10:
|
||||
report.append(f"- ... and {len(sorted_files) - 10} more files")
|
||||
|
||||
# Overlap analysis
|
||||
if len(results) >= 2:
|
||||
report.append("\n## Overlap Analysis\n")
|
||||
|
||||
# Find common files
|
||||
file_sets = [set(r.findings_by_file.keys()) for r in results]
|
||||
common_files = set.intersection(*file_sets) if file_sets else set()
|
||||
|
||||
if common_files:
|
||||
report.append(f"\n**Files found by all tools** ({len(common_files)}):\n")
|
||||
for file_path in sorted(common_files)[:10]:
|
||||
report.append(f"- `{file_path}`")
|
||||
else:
|
||||
report.append("\n**No files were found by all tools**\n")
|
||||
|
||||
# Find tool-specific files
|
||||
for i, result in enumerate(results):
|
||||
unique_to_tool = set(result.findings_by_file.keys())
|
||||
for j, other_result in enumerate(results):
|
||||
if i != j:
|
||||
unique_to_tool -= set(other_result.findings_by_file.keys())
|
||||
|
||||
if unique_to_tool:
|
||||
report.append(f"\n**Unique to {result.tool_name}** ({len(unique_to_tool)} files):\n")
|
||||
for file_path in sorted(unique_to_tool)[:5]:
|
||||
report.append(f"- `{file_path}`")
|
||||
if len(unique_to_tool) > 5:
|
||||
report.append(f"- ... and {len(unique_to_tool) - 5} more")
|
||||
|
||||
# Ground Truth Analysis (if available)
|
||||
ground_truth_path = Path(__file__).parent / "secret_detection_benchmark_GROUND_TRUTH.json"
|
||||
if ground_truth_path.exists():
|
||||
report.append("\n## Ground Truth Analysis\n")
|
||||
try:
|
||||
with open(ground_truth_path) as f:
|
||||
gt_data = json.load(f)
|
||||
|
||||
gt_total = gt_data.get("total_secrets", 30)
|
||||
report.append(f"**Expected secrets**: {gt_total} (documented in ground truth)\n")
|
||||
|
||||
# Build ground truth set of (file, line) tuples
|
||||
gt_secrets = set()
|
||||
for secret in gt_data.get("secrets", []):
|
||||
gt_secrets.add((secret["file"], secret["line"]))
|
||||
|
||||
report.append("### Tool Performance vs Ground Truth\n")
|
||||
report.append("| Tool | Found | Expected | Recall | Extra Findings |")
|
||||
report.append("|------|-------|----------|--------|----------------|")
|
||||
|
||||
for result in results:
|
||||
# Build tool findings set
|
||||
tool_secrets = set()
|
||||
for file_path, lines in result.findings_by_file.items():
|
||||
for line in lines:
|
||||
tool_secrets.add((file_path, line))
|
||||
|
||||
# Calculate metrics
|
||||
true_positives = len(gt_secrets & tool_secrets)
|
||||
recall = (true_positives / gt_total * 100) if gt_total > 0 else 0
|
||||
extra = len(tool_secrets - gt_secrets)
|
||||
|
||||
report.append(
|
||||
f"| {result.tool_name} | "
|
||||
f"{result.findings_count} | "
|
||||
f"{gt_total} | "
|
||||
f"{recall:.1f}% | "
|
||||
f"{extra} |"
|
||||
)
|
||||
|
||||
# Analyze LLM extra findings
|
||||
llm_results = [r for r in results if "LLM" in r.tool_name]
|
||||
if llm_results:
|
||||
report.append("\n### LLM Extra Findings Explanation\n")
|
||||
report.append("LLMs may find more than 30 secrets because they detect:\n")
|
||||
report.append("- **Split secret components**: Each part of `DB_PASS_PART1 + PART2 + PART3` counted separately")
|
||||
report.append("- **Join operations**: Lines like `''.join(AWS_SECRET_CHARS)` flagged as additional exposure")
|
||||
report.append("- **Decoding functions**: Code that reveals secrets (e.g., `base64.b64decode()`, `codecs.decode()`)")
|
||||
report.append("- **Comment identifiers**: Lines marking secret locations without plaintext values")
|
||||
report.append("\nThese are *technically correct* detections of secret exposure points, not false positives.")
|
||||
report.append("The ground truth documents 30 'primary' secrets, but the codebase has additional derivative exposures.\n")
|
||||
|
||||
except Exception as e:
|
||||
report.append(f"*Could not load ground truth: {e}*\n")
|
||||
|
||||
# Performance summary
|
||||
if results:
|
||||
report.append("\n## Performance Summary\n")
|
||||
most_findings = max(results, key=lambda r: r.findings_count)
|
||||
most_files = max(results, key=lambda r: r.unique_files)
|
||||
fastest = min(results, key=lambda r: r.execution_time)
|
||||
|
||||
report.append(f"- **Most secrets found**: {most_findings.tool_name} ({most_findings.findings_count} secrets)")
|
||||
report.append(f"- **Most files covered**: {most_files.tool_name} ({most_files.unique_files} files)")
|
||||
report.append(f"- **Fastest**: {fastest.tool_name} ({fastest.execution_time:.2f}s)")
|
||||
|
||||
return "\n".join(report)
|
||||
|
||||
def save_json_report(self, results: List[ToolResult], output_path: Path):
|
||||
"""Save results as JSON"""
|
||||
data = {
|
||||
"target_path": str(self.target_path),
|
||||
"results": [asdict(r) for r in results]
|
||||
}
|
||||
|
||||
with open(output_path, 'w') as f:
|
||||
json.dump(data, f, indent=2)
|
||||
|
||||
print(f"\n✅ JSON report saved to: {output_path}")
|
||||
|
||||
def cleanup(self):
|
||||
"""Cleanup SDK client"""
|
||||
self.client.close()
|
||||
|
||||
|
||||
async def main():
|
||||
"""Run comparison and generate reports"""
|
||||
# Get target path (secret_detection_benchmark)
|
||||
target_path = Path(__file__).parent.parent.parent.parent.parent / "test_projects" / "secret_detection_benchmark"
|
||||
|
||||
if not target_path.exists():
|
||||
print(f"❌ Target not found at: {target_path}")
|
||||
return 1
|
||||
|
||||
print("=" * 80)
|
||||
print("Secret Detection Tools Comparison")
|
||||
print("=" * 80)
|
||||
print(f"Target: {target_path}")
|
||||
|
||||
# LLM models to test
|
||||
llm_models = [
|
||||
"gpt-4o-mini",
|
||||
"gpt-5-mini"
|
||||
]
|
||||
print(f"LLM models: {', '.join(llm_models)}\n")
|
||||
|
||||
# Run comparison
|
||||
comparison = SecretDetectionComparison(target_path)
|
||||
|
||||
try:
|
||||
results = await comparison.run_all_tools(llm_models=llm_models)
|
||||
|
||||
if not results:
|
||||
print("❌ No tools ran successfully")
|
||||
return 1
|
||||
|
||||
# Generate reports
|
||||
print("\n" + "=" * 80)
|
||||
markdown_report = comparison.generate_markdown_report(results)
|
||||
print(markdown_report)
|
||||
|
||||
# Save reports
|
||||
output_dir = Path(__file__).parent / "results"
|
||||
output_dir.mkdir(exist_ok=True)
|
||||
|
||||
markdown_path = output_dir / "comparison_report.md"
|
||||
with open(markdown_path, 'w') as f:
|
||||
f.write(markdown_report)
|
||||
print(f"\n✅ Markdown report saved to: {markdown_path}")
|
||||
|
||||
json_path = output_dir / "comparison_results.json"
|
||||
comparison.save_json_report(results, json_path)
|
||||
|
||||
print("\n" + "=" * 80)
|
||||
print("✅ Comparison complete!")
|
||||
print("=" * 80)
|
||||
|
||||
return 0
|
||||
|
||||
finally:
|
||||
comparison.cleanup()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
exit_code = asyncio.run(main())
|
||||
sys.exit(exit_code)
|
||||
@@ -1,169 +0,0 @@
|
||||
# Secret Detection Tools Comparison
|
||||
|
||||
**Target**: secret_detection_benchmark
|
||||
**Tools**: Gitleaks, TruffleHog, LLM (gpt-4o-mini), LLM (gpt-5-mini)
|
||||
|
||||
|
||||
## Summary
|
||||
|
||||
| Tool | Secrets | Files | Avg/File | Time (s) |
|
||||
|------|---------|-------|----------|----------|
|
||||
| Gitleaks | 12 | 10 | 1.2 | 5.18 |
|
||||
| TruffleHog | 1 | 1 | 1.0 | 5.06 |
|
||||
| LLM (gpt-4o-mini) | 30 | 15 | 2.0 | 296.85 |
|
||||
| LLM (gpt-5-mini) | 41 | 16 | 2.6 | 618.55 |
|
||||
|
||||
## Agreement Analysis
|
||||
|
||||
Secrets found by different numbers of tools:
|
||||
|
||||
- **3 tools agree**: 6 secrets
|
||||
- **2 tools agree**: 22 secrets
|
||||
- **Only 1 tool found**: 22 secrets
|
||||
|
||||
## Tool Agreement Matrix
|
||||
|
||||
Number of common secrets found by tool pairs:
|
||||
|
||||
| Tool | Gitleaks | TruffleHog | gpt-4o-mini | gpt-5-mini |
|
||||
|------|------|------|------|------|
|
||||
| Gitleaks | 12 | 0 | 7 | 11 |
|
||||
| TruffleHog | 0 | 1 | 0 | 0 |
|
||||
| gpt-4o-mini | 7 | 0 | 30 | 22 |
|
||||
| gpt-5-mini | 11 | 0 | 22 | 41 |
|
||||
|
||||
## Per-File Detailed Comparison
|
||||
|
||||
Secrets found per file by each tool:
|
||||
|
||||
| File | Gitleaks | TruffleHog | gpt-4o-mini | gpt-5-mini | Total |
|
||||
|------|------|------|------|------|------|
|
||||
| `src/obfuscated.py` | 2 | 0 | 6 | 7 | **15** |
|
||||
| `src/advanced.js` | 0 | 0 | 5 | 7 | **12** |
|
||||
| `src/config.py` | 1 | 0 | 0 | 6 | **7** |
|
||||
| `.env` | 1 | 0 | 2 | 2 | **5** |
|
||||
| `config/keys.yaml` | 1 | 0 | 2 | 2 | **5** |
|
||||
| `config/oauth.json` | 1 | 0 | 2 | 2 | **5** |
|
||||
| `config/settings.py` | 2 | 0 | 0 | 3 | **5** |
|
||||
| `scripts/deploy.sh` | 1 | 0 | 2 | 2 | **5** |
|
||||
| `config/legacy.ini` | 0 | 0 | 2 | 2 | **4** |
|
||||
| `src/Crypto.go` | 0 | 0 | 2 | 2 | **4** |
|
||||
| `config/app.properties` | 1 | 0 | 1 | 1 | **3** |
|
||||
| `config/database.yaml` | 0 | 1 | 1 | 1 | **3** |
|
||||
| `src/Main.java` | 1 | 0 | 1 | 1 | **3** |
|
||||
| `id_rsa` | 1 | 0 | 1 | 0 | **2** |
|
||||
| `scripts/webhook.js` | 0 | 0 | 1 | 1 | **2** |
|
||||
| ... and 2 more files | ... | ... | ... | ... | ... |
|
||||
|
||||
## File Type Breakdown
|
||||
|
||||
| Type | Gitleaks | TruffleHog | gpt-4o-mini | gpt-5-mini |
|
||||
|------|------|------|------|------|
|
||||
| `.env` | 1 files | 0 files | 1 files | 1 files |
|
||||
| `.go` | 0 files | 0 files | 1 files | 1 files |
|
||||
| `.ini` | 0 files | 0 files | 1 files | 1 files |
|
||||
| `.java` | 1 files | 0 files | 1 files | 1 files |
|
||||
| `.js` | 0 files | 0 files | 2 files | 2 files |
|
||||
| `.json` | 1 files | 0 files | 1 files | 1 files |
|
||||
| `.properties` | 1 files | 0 files | 1 files | 1 files |
|
||||
| `.py` | 3 files | 0 files | 2 files | 4 files |
|
||||
| `.sh` | 1 files | 0 files | 1 files | 1 files |
|
||||
| `.sql` | 0 files | 0 files | 1 files | 1 files |
|
||||
| `.yaml` | 1 files | 1 files | 2 files | 2 files |
|
||||
| `[no extension]` | 1 files | 0 files | 1 files | 0 files |
|
||||
|
||||
## Files Analyzed
|
||||
|
||||
**Total unique files with secrets**: 17
|
||||
|
||||
|
||||
### Gitleaks
|
||||
|
||||
Found secrets in **10 files**:
|
||||
|
||||
- `config/settings.py`: 2 secrets (lines: 6, 9)
|
||||
- `src/obfuscated.py`: 2 secrets (lines: 7, 17)
|
||||
- `.env`: 1 secrets (lines: 3)
|
||||
- `config/app.properties`: 1 secrets (lines: 6)
|
||||
- `config/keys.yaml`: 1 secrets (lines: 6)
|
||||
- `id_rsa`: 1 secrets (lines: 1)
|
||||
- `config/oauth.json`: 1 secrets (lines: 4)
|
||||
- `scripts/deploy.sh`: 1 secrets (lines: 5)
|
||||
- `src/Main.java`: 1 secrets (lines: 5)
|
||||
- `src/config.py`: 1 secrets (lines: 7)
|
||||
|
||||
### TruffleHog
|
||||
|
||||
Found secrets in **1 files**:
|
||||
|
||||
- `config/database.yaml`: 1 secrets (lines: 6)
|
||||
|
||||
### LLM (gpt-4o-mini)
|
||||
|
||||
Found secrets in **15 files**:
|
||||
|
||||
- `src/obfuscated.py`: 6 secrets (lines: 7, 10, 13, 18, 20...)
|
||||
- `src/advanced.js`: 5 secrets (lines: 4, 7, 10, 12, 17)
|
||||
- `src/Crypto.go`: 2 secrets (lines: 6, 10)
|
||||
- `.env`: 2 secrets (lines: 3, 4)
|
||||
- `config/keys.yaml`: 2 secrets (lines: 6, 12)
|
||||
- `config/oauth.json`: 2 secrets (lines: 3, 4)
|
||||
- `config/legacy.ini`: 2 secrets (lines: 4, 7)
|
||||
- `scripts/deploy.sh`: 2 secrets (lines: 6, 9)
|
||||
- `src/app.py`: 1 secrets (lines: 7)
|
||||
- `scripts/webhook.js`: 1 secrets (lines: 4)
|
||||
- ... and 5 more files
|
||||
|
||||
### LLM (gpt-5-mini)
|
||||
|
||||
Found secrets in **16 files**:
|
||||
|
||||
- `src/obfuscated.py`: 7 secrets (lines: 7, 10, 13, 14, 17...)
|
||||
- `src/advanced.js`: 7 secrets (lines: 4, 7, 9, 10, 13...)
|
||||
- `src/config.py`: 6 secrets (lines: 7, 10, 13, 14, 15...)
|
||||
- `config/settings.py`: 3 secrets (lines: 6, 9, 20)
|
||||
- `src/Crypto.go`: 2 secrets (lines: 10, 15)
|
||||
- `.env`: 2 secrets (lines: 3, 4)
|
||||
- `config/keys.yaml`: 2 secrets (lines: 6, 12)
|
||||
- `config/oauth.json`: 2 secrets (lines: 3, 4)
|
||||
- `config/legacy.ini`: 2 secrets (lines: 3, 7)
|
||||
- `scripts/deploy.sh`: 2 secrets (lines: 5, 10)
|
||||
- ... and 6 more files
|
||||
|
||||
## Overlap Analysis
|
||||
|
||||
|
||||
**No files were found by all tools**
|
||||
|
||||
|
||||
## Ground Truth Analysis
|
||||
|
||||
**Expected secrets**: 32 (documented in ground truth)
|
||||
|
||||
### Tool Performance vs Ground Truth
|
||||
|
||||
| Tool | Found | Expected | Recall | Extra Findings |
|
||||
|------|-------|----------|--------|----------------|
|
||||
| Gitleaks | 12 | 32 | 37.5% | 0 |
|
||||
| TruffleHog | 1 | 32 | 0.0% | 1 |
|
||||
| LLM (gpt-4o-mini) | 30 | 32 | 56.2% | 12 |
|
||||
| LLM (gpt-5-mini) | 41 | 32 | 84.4% | 14 |
|
||||
|
||||
### LLM Extra Findings Explanation
|
||||
|
||||
LLMs may find more than 30 secrets because they detect:
|
||||
|
||||
- **Split secret components**: Each part of `DB_PASS_PART1 + PART2 + PART3` counted separately
|
||||
- **Join operations**: Lines like `''.join(AWS_SECRET_CHARS)` flagged as additional exposure
|
||||
- **Decoding functions**: Code that reveals secrets (e.g., `base64.b64decode()`, `codecs.decode()`)
|
||||
- **Comment identifiers**: Lines marking secret locations without plaintext values
|
||||
|
||||
These are *technically correct* detections of secret exposure points, not false positives.
|
||||
The ground truth documents 30 'primary' secrets, but the codebase has additional derivative exposures.
|
||||
|
||||
|
||||
## Performance Summary
|
||||
|
||||
- **Most secrets found**: LLM (gpt-5-mini) (41 secrets)
|
||||
- **Most files covered**: LLM (gpt-5-mini) (16 files)
|
||||
- **Fastest**: TruffleHog (5.06s)
|
||||
@@ -1,253 +0,0 @@
|
||||
{
|
||||
"target_path": "/Users/tduhamel/Documents/FuzzingLabs/fuzzforge_ai/test_projects/secret_detection_benchmark",
|
||||
"results": [
|
||||
{
|
||||
"tool_name": "Gitleaks",
|
||||
"execution_time": 5.177123069763184,
|
||||
"findings_count": 12,
|
||||
"findings_by_file": {
|
||||
".env": [
|
||||
3
|
||||
],
|
||||
"config/app.properties": [
|
||||
6
|
||||
],
|
||||
"config/keys.yaml": [
|
||||
6
|
||||
],
|
||||
"id_rsa": [
|
||||
1
|
||||
],
|
||||
"config/oauth.json": [
|
||||
4
|
||||
],
|
||||
"scripts/deploy.sh": [
|
||||
5
|
||||
],
|
||||
"config/settings.py": [
|
||||
6,
|
||||
9
|
||||
],
|
||||
"src/Main.java": [
|
||||
5
|
||||
],
|
||||
"src/obfuscated.py": [
|
||||
7,
|
||||
17
|
||||
],
|
||||
"src/config.py": [
|
||||
7
|
||||
]
|
||||
},
|
||||
"unique_files": 10,
|
||||
"unique_locations": 12,
|
||||
"secret_density": 1.2,
|
||||
"file_types": {
|
||||
".env": 1,
|
||||
".properties": 1,
|
||||
".yaml": 1,
|
||||
"[no extension]": 1,
|
||||
".json": 1,
|
||||
".sh": 1,
|
||||
".py": 3,
|
||||
".java": 1
|
||||
}
|
||||
},
|
||||
{
|
||||
"tool_name": "TruffleHog",
|
||||
"execution_time": 5.061383008956909,
|
||||
"findings_count": 1,
|
||||
"findings_by_file": {
|
||||
"config/database.yaml": [
|
||||
6
|
||||
]
|
||||
},
|
||||
"unique_files": 1,
|
||||
"unique_locations": 1,
|
||||
"secret_density": 1.0,
|
||||
"file_types": {
|
||||
".yaml": 1
|
||||
}
|
||||
},
|
||||
{
|
||||
"tool_name": "LLM (gpt-4o-mini)",
|
||||
"execution_time": 296.8492441177368,
|
||||
"findings_count": 30,
|
||||
"findings_by_file": {
|
||||
"src/obfuscated.py": [
|
||||
7,
|
||||
10,
|
||||
13,
|
||||
18,
|
||||
20,
|
||||
23
|
||||
],
|
||||
"src/app.py": [
|
||||
7
|
||||
],
|
||||
"scripts/webhook.js": [
|
||||
4
|
||||
],
|
||||
"src/advanced.js": [
|
||||
4,
|
||||
7,
|
||||
10,
|
||||
12,
|
||||
17
|
||||
],
|
||||
"src/Main.java": [
|
||||
5
|
||||
],
|
||||
"src/Crypto.go": [
|
||||
6,
|
||||
10
|
||||
],
|
||||
".env": [
|
||||
3,
|
||||
4
|
||||
],
|
||||
"config/keys.yaml": [
|
||||
6,
|
||||
12
|
||||
],
|
||||
"config/database.yaml": [
|
||||
7
|
||||
],
|
||||
"config/oauth.json": [
|
||||
3,
|
||||
4
|
||||
],
|
||||
"config/legacy.ini": [
|
||||
4,
|
||||
7
|
||||
],
|
||||
"src/database.sql": [
|
||||
4
|
||||
],
|
||||
"config/app.properties": [
|
||||
6
|
||||
],
|
||||
"scripts/deploy.sh": [
|
||||
6,
|
||||
9
|
||||
],
|
||||
"id_rsa": [
|
||||
1
|
||||
]
|
||||
},
|
||||
"unique_files": 15,
|
||||
"unique_locations": 30,
|
||||
"secret_density": 2.0,
|
||||
"file_types": {
|
||||
".py": 2,
|
||||
".js": 2,
|
||||
".java": 1,
|
||||
".go": 1,
|
||||
".env": 1,
|
||||
".yaml": 2,
|
||||
".json": 1,
|
||||
".ini": 1,
|
||||
".sql": 1,
|
||||
".properties": 1,
|
||||
".sh": 1,
|
||||
"[no extension]": 1
|
||||
}
|
||||
},
|
||||
{
|
||||
"tool_name": "LLM (gpt-5-mini)",
|
||||
"execution_time": 618.5462851524353,
|
||||
"findings_count": 41,
|
||||
"findings_by_file": {
|
||||
"config/settings.py": [
|
||||
6,
|
||||
9,
|
||||
20
|
||||
],
|
||||
"src/obfuscated.py": [
|
||||
7,
|
||||
10,
|
||||
13,
|
||||
14,
|
||||
17,
|
||||
20,
|
||||
23
|
||||
],
|
||||
"src/app.py": [
|
||||
7
|
||||
],
|
||||
"src/config.py": [
|
||||
7,
|
||||
10,
|
||||
13,
|
||||
14,
|
||||
15,
|
||||
16
|
||||
],
|
||||
"scripts/webhook.js": [
|
||||
4
|
||||
],
|
||||
"src/advanced.js": [
|
||||
4,
|
||||
7,
|
||||
9,
|
||||
10,
|
||||
13,
|
||||
17,
|
||||
19
|
||||
],
|
||||
"src/Main.java": [
|
||||
5
|
||||
],
|
||||
"src/Crypto.go": [
|
||||
10,
|
||||
15
|
||||
],
|
||||
".env": [
|
||||
3,
|
||||
4
|
||||
],
|
||||
"config/keys.yaml": [
|
||||
6,
|
||||
12
|
||||
],
|
||||
"config/database.yaml": [
|
||||
7
|
||||
],
|
||||
"config/oauth.json": [
|
||||
3,
|
||||
4
|
||||
],
|
||||
"config/legacy.ini": [
|
||||
3,
|
||||
7
|
||||
],
|
||||
"src/database.sql": [
|
||||
6
|
||||
],
|
||||
"config/app.properties": [
|
||||
6
|
||||
],
|
||||
"scripts/deploy.sh": [
|
||||
5,
|
||||
10
|
||||
]
|
||||
},
|
||||
"unique_files": 16,
|
||||
"unique_locations": 41,
|
||||
"secret_density": 2.5625,
|
||||
"file_types": {
|
||||
".py": 4,
|
||||
".js": 2,
|
||||
".java": 1,
|
||||
".go": 1,
|
||||
".env": 1,
|
||||
".yaml": 2,
|
||||
".json": 1,
|
||||
".ini": 1,
|
||||
".sql": 1,
|
||||
".properties": 1,
|
||||
".sh": 1
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -1,344 +0,0 @@
|
||||
{
|
||||
"description": "Ground truth dataset for secret detection benchmarking - Exactly 32 secrets",
|
||||
"version": "1.1.0",
|
||||
"total_secrets": 32,
|
||||
"secrets_by_difficulty": {
|
||||
"easy": 12,
|
||||
"medium": 10,
|
||||
"hard": 10
|
||||
},
|
||||
"secrets": [
|
||||
{
|
||||
"id": 1,
|
||||
"file": ".env",
|
||||
"line": 3,
|
||||
"difficulty": "easy",
|
||||
"type": "aws_access_key",
|
||||
"value": "AKIAIOSFODNN7EXAMPLE",
|
||||
"severity": "critical"
|
||||
},
|
||||
{
|
||||
"id": 2,
|
||||
"file": ".env",
|
||||
"line": 4,
|
||||
"difficulty": "easy",
|
||||
"type": "aws_secret_access_key",
|
||||
"value": "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY",
|
||||
"severity": "critical"
|
||||
},
|
||||
{
|
||||
"id": 3,
|
||||
"file": "config/settings.py",
|
||||
"line": 6,
|
||||
"difficulty": "easy",
|
||||
"type": "github_pat",
|
||||
"value": "ghp_vR8jK2mN4pQ6tX9bC3wY7zA1eF5hI8kL",
|
||||
"severity": "critical"
|
||||
},
|
||||
{
|
||||
"id": 4,
|
||||
"file": "config/settings.py",
|
||||
"line": 9,
|
||||
"difficulty": "easy",
|
||||
"type": "stripe_api_key",
|
||||
"value": "sk_live_51MabcdefghijklmnopqrstuvwxyzABCDEF123456789",
|
||||
"severity": "critical"
|
||||
},
|
||||
{
|
||||
"id": 5,
|
||||
"file": "config/settings.py",
|
||||
"line": 17,
|
||||
"difficulty": "easy",
|
||||
"type": "database_password",
|
||||
"value": "ProdDB_P@ssw0rd_2024_Secure!",
|
||||
"severity": "critical"
|
||||
},
|
||||
{
|
||||
"id": 6,
|
||||
"file": "src/app.py",
|
||||
"line": 6,
|
||||
"difficulty": "easy",
|
||||
"type": "jwt_secret",
|
||||
"value": "my-super-secret-jwt-key-do-not-share-2024",
|
||||
"severity": "critical"
|
||||
},
|
||||
{
|
||||
"id": 7,
|
||||
"file": "config/database.yaml",
|
||||
"line": 7,
|
||||
"difficulty": "easy",
|
||||
"type": "azure_storage_key",
|
||||
"value": "DefaultEndpointsProtocol=https;AccountName=prodstore;AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;EndpointSuffix=core.windows.net",
|
||||
"severity": "critical"
|
||||
},
|
||||
{
|
||||
"id": 8,
|
||||
"file": "scripts/webhook.js",
|
||||
"line": 4,
|
||||
"difficulty": "easy",
|
||||
"type": "slack_webhook",
|
||||
"value": "https://hooks.slack.com/services/T00000000/B00000000/XXXXXXXXXXXXXXXXXXXX",
|
||||
"severity": "high"
|
||||
},
|
||||
{
|
||||
"id": 9,
|
||||
"file": "config/app.properties",
|
||||
"line": 6,
|
||||
"difficulty": "easy",
|
||||
"type": "api_key",
|
||||
"value": "sk_test_4eC39HqLyjWDarjtT1zdp7dc",
|
||||
"severity": "high"
|
||||
},
|
||||
{
|
||||
"id": 10,
|
||||
"file": "id_rsa",
|
||||
"line": 1,
|
||||
"difficulty": "easy",
|
||||
"type": "ssh_private_key",
|
||||
"value": "-----BEGIN OPENSSH PRIVATE KEY-----",
|
||||
"severity": "critical"
|
||||
},
|
||||
{
|
||||
"id": 11,
|
||||
"file": "config/oauth.json",
|
||||
"line": 4,
|
||||
"difficulty": "easy",
|
||||
"type": "oauth_client_secret",
|
||||
"value": "GOCSPX-Ab12Cd34Ef56Gh78Ij90Kl12",
|
||||
"severity": "critical"
|
||||
},
|
||||
{
|
||||
"id": 12,
|
||||
"file": "src/Main.java",
|
||||
"line": 5,
|
||||
"difficulty": "easy",
|
||||
"type": "google_oauth_secret",
|
||||
"value": "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",
|
||||
"severity": "critical"
|
||||
},
|
||||
{
|
||||
"id": 13,
|
||||
"file": "src/config.py",
|
||||
"line": 7,
|
||||
"difficulty": "medium",
|
||||
"type": "aws_access_key_base64",
|
||||
"value": "QUtJQUlPU0ZPRE5ON0VYQU1QTEU=",
|
||||
"decoded": "AKIAIOSFODNN7EXAMPLE",
|
||||
"severity": "critical"
|
||||
},
|
||||
{
|
||||
"id": 14,
|
||||
"file": "src/config.py",
|
||||
"line": 10,
|
||||
"difficulty": "medium",
|
||||
"type": "api_token_hex",
|
||||
"value": "6170695f746f6b656e5f616263313233787977373839",
|
||||
"decoded": "api_token_abc123xyz789",
|
||||
"severity": "high"
|
||||
},
|
||||
{
|
||||
"id": 15,
|
||||
"file": "src/config.py",
|
||||
"line": 16,
|
||||
"difficulty": "medium",
|
||||
"type": "database_password_concatenated",
|
||||
"value": "MySecurePassword2024!",
|
||||
"note": "Built from DB_PASS_PART1 + DB_PASS_PART2 + DB_PASS_PART3",
|
||||
"severity": "critical"
|
||||
},
|
||||
{
|
||||
"id": 16,
|
||||
"file": "scripts/deploy.sh",
|
||||
"line": 5,
|
||||
"difficulty": "medium",
|
||||
"type": "api_key_export",
|
||||
"value": "sk_prod_1234567890abcdefghijklmnopqrstuvwxyz",
|
||||
"severity": "critical"
|
||||
},
|
||||
{
|
||||
"id": 17,
|
||||
"file": "scripts/deploy.sh",
|
||||
"line": 11,
|
||||
"difficulty": "medium",
|
||||
"type": "database_password_url_encoded",
|
||||
"value": "mysql://admin:MyP%40ssw0rd%21@db.example.com:3306/prod",
|
||||
"decoded": "mysql://admin:MyP@ssw0rd!@db.example.com:3306/prod",
|
||||
"note": "In comment",
|
||||
"severity": "critical"
|
||||
},
|
||||
{
|
||||
"id": 18,
|
||||
"file": "config/keys.yaml",
|
||||
"line": 6,
|
||||
"difficulty": "medium",
|
||||
"type": "rsa_private_key_multiline",
|
||||
"value": "-----BEGIN RSA PRIVATE KEY-----",
|
||||
"note": "Multi-line YAML literal block",
|
||||
"severity": "critical"
|
||||
},
|
||||
{
|
||||
"id": 19,
|
||||
"file": "config/keys.yaml",
|
||||
"line": 11,
|
||||
"difficulty": "medium",
|
||||
"type": "api_token_unicode",
|
||||
"value": "tøkęn_śęçrėt_ẃïth_ŭñïçődė_123456",
|
||||
"severity": "high"
|
||||
},
|
||||
{
|
||||
"id": 20,
|
||||
"file": "src/database.sql",
|
||||
"line": 6,
|
||||
"difficulty": "medium",
|
||||
"type": "database_connection_string",
|
||||
"value": "postgresql://admin:Pr0dDB_S3cr3t_P@ss@db.prod.example.com:5432/prod_db",
|
||||
"note": "In SQL comment",
|
||||
"severity": "critical"
|
||||
},
|
||||
{
|
||||
"id": 21,
|
||||
"file": "config/legacy.ini",
|
||||
"line": 3,
|
||||
"difficulty": "medium",
|
||||
"type": "database_password",
|
||||
"value": "L3g@cy_DB_P@ssw0rd_2023",
|
||||
"severity": "critical"
|
||||
},
|
||||
{
|
||||
"id": 22,
|
||||
"file": "config/legacy.ini",
|
||||
"line": 7,
|
||||
"difficulty": "medium",
|
||||
"type": "api_key_commented",
|
||||
"value": "backup_key_xyz789abc123def456ghi",
|
||||
"note": "Commented backup key",
|
||||
"severity": "high"
|
||||
},
|
||||
{
|
||||
"id": 23,
|
||||
"file": "src/obfuscated.py",
|
||||
"line": 7,
|
||||
"difficulty": "hard",
|
||||
"type": "stripe_key_rot13",
|
||||
"value": "fx_yvir_frperg_xrl_12345",
|
||||
"decoded": "sk_live_secret_key_12345",
|
||||
"severity": "critical"
|
||||
},
|
||||
{
|
||||
"id": 24,
|
||||
"file": "src/obfuscated.py",
|
||||
"line": 10,
|
||||
"difficulty": "hard",
|
||||
"type": "github_token_binary",
|
||||
"value": "b'\\x67\\x68\\x70\\x5f\\x4d\\x79\\x47\\x69\\x74\\x48\\x75\\x62\\x54\\x6f\\x6b\\x65\\x6e\\x31\\x32\\x33\\x34\\x35\\x36'",
|
||||
"decoded": "ghp_MyGitHubToken123456",
|
||||
"severity": "critical"
|
||||
},
|
||||
{
|
||||
"id": 25,
|
||||
"file": "src/obfuscated.py",
|
||||
"line": 13,
|
||||
"difficulty": "hard",
|
||||
"type": "aws_secret_char_array",
|
||||
"value": "['A','W','S','_','S','E','C','R','E','T','_','K','E','Y','_','X','Y','Z','7','8','9']",
|
||||
"decoded": "AWS_SECRET_KEY_XYZ789",
|
||||
"severity": "critical"
|
||||
},
|
||||
{
|
||||
"id": 26,
|
||||
"file": "src/obfuscated.py",
|
||||
"line": 17,
|
||||
"difficulty": "hard",
|
||||
"type": "api_token_reversed",
|
||||
"value": "321cba_desrever_nekot_ipa",
|
||||
"decoded": "api_token_reversed_abc123",
|
||||
"severity": "high"
|
||||
},
|
||||
{
|
||||
"id": 27,
|
||||
"file": "src/advanced.js",
|
||||
"line": 4,
|
||||
"difficulty": "hard",
|
||||
"type": "secret_template_string",
|
||||
"value": "sk_prod_template_key_xyz",
|
||||
"note": "Built from template literals",
|
||||
"severity": "critical"
|
||||
},
|
||||
{
|
||||
"id": 28,
|
||||
"file": "src/advanced.js",
|
||||
"line": 7,
|
||||
"difficulty": "hard",
|
||||
"type": "password_in_regex",
|
||||
"value": "password_regex_secret_789",
|
||||
"note": "Inside regex pattern",
|
||||
"severity": "medium"
|
||||
},
|
||||
{
|
||||
"id": 29,
|
||||
"file": "src/advanced.js",
|
||||
"line": 10,
|
||||
"difficulty": "hard",
|
||||
"type": "api_key_xor",
|
||||
"value": "[65,82,90,75,94,91,92,75,93,67,65,90,67,92,75,91,67,95]",
|
||||
"decoded": "api_xor_secret_key",
|
||||
"note": "XOR encrypted with key 42",
|
||||
"severity": "critical"
|
||||
},
|
||||
{
|
||||
"id": 30,
|
||||
"file": "src/advanced.js",
|
||||
"line": 17,
|
||||
"difficulty": "hard",
|
||||
"type": "api_key_escaped_json",
|
||||
"value": "sk_escaped_json_key_456",
|
||||
"note": "Escaped JSON within string",
|
||||
"severity": "high"
|
||||
},
|
||||
{
|
||||
"id": 31,
|
||||
"file": "src/Crypto.go",
|
||||
"line": 10,
|
||||
"difficulty": "hard",
|
||||
"type": "secret_in_heredoc",
|
||||
"value": "golang_heredoc_secret_999",
|
||||
"note": "In heredoc/multi-line string",
|
||||
"severity": "high"
|
||||
},
|
||||
{
|
||||
"id": 32,
|
||||
"file": "src/Crypto.go",
|
||||
"line": 15,
|
||||
"difficulty": "hard",
|
||||
"type": "stripe_key_typo",
|
||||
"value": "strippe_sk_live_corrected_key",
|
||||
"decoded": "stripe_sk_live_corrected_key",
|
||||
"note": "Intentional typo corrected programmatically",
|
||||
"severity": "critical"
|
||||
}
|
||||
],
|
||||
"file_summary": {
|
||||
".env": 2,
|
||||
"config/settings.py": 3,
|
||||
"src/app.py": 1,
|
||||
"config/database.yaml": 1,
|
||||
"scripts/webhook.js": 1,
|
||||
"config/app.properties": 1,
|
||||
"id_rsa": 1,
|
||||
"config/oauth.json": 1,
|
||||
"src/Main.java": 1,
|
||||
"src/config.py": 3,
|
||||
"scripts/deploy.sh": 2,
|
||||
"config/keys.yaml": 2,
|
||||
"src/database.sql": 1,
|
||||
"config/legacy.ini": 2,
|
||||
"src/obfuscated.py": 4,
|
||||
"src/advanced.js": 4,
|
||||
"src/Crypto.go": 2
|
||||
},
|
||||
"notes": {
|
||||
"easy_secrets": "Standard patterns that any decent secret scanner should detect",
|
||||
"medium_secrets": "Slightly obfuscated - base64, hex, concatenated, or in comments",
|
||||
"hard_secrets": "Well hidden - ROT13, binary, XOR, reversed, split across constructs"
|
||||
}
|
||||
}
|
||||
@@ -1,151 +0,0 @@
|
||||
"""
|
||||
Category-specific benchmark configurations
|
||||
|
||||
Defines expected metrics and performance thresholds for each module category.
|
||||
"""
|
||||
|
||||
from dataclasses import dataclass
|
||||
from typing import List, Dict
|
||||
from enum import Enum
|
||||
|
||||
|
||||
class ModuleCategory(str, Enum):
|
||||
"""Module categories for benchmarking"""
|
||||
FUZZER = "fuzzer"
|
||||
SCANNER = "scanner"
|
||||
ANALYZER = "analyzer"
|
||||
SECRET_DETECTION = "secret_detection"
|
||||
REPORTER = "reporter"
|
||||
|
||||
|
||||
@dataclass
|
||||
class CategoryBenchmarkConfig:
|
||||
"""Benchmark configuration for a module category"""
|
||||
category: ModuleCategory
|
||||
expected_metrics: List[str]
|
||||
performance_thresholds: Dict[str, float]
|
||||
description: str
|
||||
|
||||
|
||||
# Fuzzer category configuration
|
||||
FUZZER_CONFIG = CategoryBenchmarkConfig(
|
||||
category=ModuleCategory.FUZZER,
|
||||
expected_metrics=[
|
||||
"execs_per_sec",
|
||||
"coverage_rate",
|
||||
"time_to_first_crash",
|
||||
"corpus_efficiency",
|
||||
"execution_time",
|
||||
"peak_memory_mb"
|
||||
],
|
||||
performance_thresholds={
|
||||
"min_execs_per_sec": 1000, # Minimum executions per second
|
||||
"max_execution_time_small": 10.0, # Max time for small project (seconds)
|
||||
"max_execution_time_medium": 60.0, # Max time for medium project
|
||||
"max_memory_mb": 2048, # Maximum memory usage
|
||||
"min_coverage_rate": 1.0, # Minimum new coverage per second
|
||||
},
|
||||
description="Fuzzing modules: coverage-guided fuzz testing"
|
||||
)
|
||||
|
||||
# Scanner category configuration
|
||||
SCANNER_CONFIG = CategoryBenchmarkConfig(
|
||||
category=ModuleCategory.SCANNER,
|
||||
expected_metrics=[
|
||||
"files_per_sec",
|
||||
"loc_per_sec",
|
||||
"execution_time",
|
||||
"peak_memory_mb",
|
||||
"findings_count"
|
||||
],
|
||||
performance_thresholds={
|
||||
"min_files_per_sec": 100, # Minimum files scanned per second
|
||||
"min_loc_per_sec": 10000, # Minimum lines of code per second
|
||||
"max_execution_time_small": 1.0,
|
||||
"max_execution_time_medium": 10.0,
|
||||
"max_memory_mb": 512,
|
||||
},
|
||||
description="File scanning modules: fast pattern-based scanning"
|
||||
)
|
||||
|
||||
# Secret detection category configuration
|
||||
SECRET_DETECTION_CONFIG = CategoryBenchmarkConfig(
|
||||
category=ModuleCategory.SECRET_DETECTION,
|
||||
expected_metrics=[
|
||||
"patterns_per_sec",
|
||||
"precision",
|
||||
"recall",
|
||||
"f1_score",
|
||||
"false_positive_rate",
|
||||
"execution_time",
|
||||
"peak_memory_mb"
|
||||
],
|
||||
performance_thresholds={
|
||||
"min_patterns_per_sec": 1000,
|
||||
"min_precision": 0.90, # 90% precision target
|
||||
"min_recall": 0.95, # 95% recall target
|
||||
"max_false_positives": 5, # Max false positives per 100 secrets
|
||||
"max_execution_time_small": 2.0,
|
||||
"max_execution_time_medium": 20.0,
|
||||
"max_memory_mb": 1024,
|
||||
},
|
||||
description="Secret detection modules: high precision pattern matching"
|
||||
)
|
||||
|
||||
# Analyzer category configuration
|
||||
ANALYZER_CONFIG = CategoryBenchmarkConfig(
|
||||
category=ModuleCategory.ANALYZER,
|
||||
expected_metrics=[
|
||||
"analysis_depth",
|
||||
"files_analyzed_per_sec",
|
||||
"execution_time",
|
||||
"peak_memory_mb",
|
||||
"findings_count",
|
||||
"accuracy"
|
||||
],
|
||||
performance_thresholds={
|
||||
"min_files_per_sec": 10, # Slower than scanners due to deep analysis
|
||||
"max_execution_time_small": 5.0,
|
||||
"max_execution_time_medium": 60.0,
|
||||
"max_memory_mb": 2048,
|
||||
"min_accuracy": 0.85, # 85% accuracy target
|
||||
},
|
||||
description="Code analysis modules: deep semantic analysis"
|
||||
)
|
||||
|
||||
# Reporter category configuration
|
||||
REPORTER_CONFIG = CategoryBenchmarkConfig(
|
||||
category=ModuleCategory.REPORTER,
|
||||
expected_metrics=[
|
||||
"report_generation_time",
|
||||
"findings_per_sec",
|
||||
"peak_memory_mb"
|
||||
],
|
||||
performance_thresholds={
|
||||
"max_report_time_100_findings": 1.0, # Max 1 second for 100 findings
|
||||
"max_report_time_1000_findings": 10.0, # Max 10 seconds for 1000 findings
|
||||
"max_memory_mb": 256,
|
||||
},
|
||||
description="Reporting modules: fast report generation"
|
||||
)
|
||||
|
||||
|
||||
# Category configurations map
|
||||
CATEGORY_CONFIGS = {
|
||||
ModuleCategory.FUZZER: FUZZER_CONFIG,
|
||||
ModuleCategory.SCANNER: SCANNER_CONFIG,
|
||||
ModuleCategory.SECRET_DETECTION: SECRET_DETECTION_CONFIG,
|
||||
ModuleCategory.ANALYZER: ANALYZER_CONFIG,
|
||||
ModuleCategory.REPORTER: REPORTER_CONFIG,
|
||||
}
|
||||
|
||||
|
||||
def get_category_config(category: ModuleCategory) -> CategoryBenchmarkConfig:
|
||||
"""Get benchmark configuration for a category"""
|
||||
return CATEGORY_CONFIGS[category]
|
||||
|
||||
|
||||
def get_threshold(category: ModuleCategory, metric: str) -> float:
|
||||
"""Get performance threshold for a specific metric"""
|
||||
config = get_category_config(category)
|
||||
return config.performance_thresholds.get(metric, 0.0)
|
||||
@@ -1,60 +0,0 @@
|
||||
"""
|
||||
Benchmark fixtures and configuration
|
||||
"""
|
||||
|
||||
import sys
|
||||
from pathlib import Path
|
||||
import pytest
|
||||
|
||||
# Add parent directories to path
|
||||
BACKEND_ROOT = Path(__file__).resolve().parents[1]
|
||||
TOOLBOX = BACKEND_ROOT / "toolbox"
|
||||
|
||||
if str(BACKEND_ROOT) not in sys.path:
|
||||
sys.path.insert(0, str(BACKEND_ROOT))
|
||||
if str(TOOLBOX) not in sys.path:
|
||||
sys.path.insert(0, str(TOOLBOX))
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Benchmark Fixtures
|
||||
# ============================================================================
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def benchmark_fixtures_dir():
|
||||
"""Path to benchmark fixtures directory"""
|
||||
return Path(__file__).parent / "fixtures"
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def small_project_fixture(benchmark_fixtures_dir):
|
||||
"""Small project fixture (~1K LOC)"""
|
||||
return benchmark_fixtures_dir / "small"
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def medium_project_fixture(benchmark_fixtures_dir):
|
||||
"""Medium project fixture (~10K LOC)"""
|
||||
return benchmark_fixtures_dir / "medium"
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def large_project_fixture(benchmark_fixtures_dir):
|
||||
"""Large project fixture (~100K LOC)"""
|
||||
return benchmark_fixtures_dir / "large"
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# pytest-benchmark Configuration
|
||||
# ============================================================================
|
||||
|
||||
def pytest_configure(config):
|
||||
"""Configure pytest-benchmark"""
|
||||
config.addinivalue_line(
|
||||
"markers", "benchmark: mark test as a benchmark"
|
||||
)
|
||||
|
||||
|
||||
def pytest_benchmark_group_stats(config, benchmarks, group_by):
|
||||
"""Group benchmark results by category"""
|
||||
return group_by
|
||||
@@ -22,6 +22,7 @@
|
||||
"parameters": {
|
||||
"workflow_name": "string",
|
||||
"target_path": "string",
|
||||
"volume_mode": "string (ro|rw)",
|
||||
"parameters": "object"
|
||||
}
|
||||
},
|
||||
|
||||
@@ -1,14 +1,13 @@
|
||||
[project]
|
||||
name = "backend"
|
||||
version = "0.7.3"
|
||||
version = "0.6.0"
|
||||
description = "FuzzForge OSS backend"
|
||||
authors = []
|
||||
readme = "README.md"
|
||||
requires-python = ">=3.11"
|
||||
dependencies = [
|
||||
"fastapi>=0.116.1",
|
||||
"temporalio>=1.6.0",
|
||||
"boto3>=1.34.0",
|
||||
"prefect>=3.4.18",
|
||||
"pydantic>=2.0.0",
|
||||
"pyyaml>=6.0",
|
||||
"docker>=7.0.0",
|
||||
@@ -22,20 +21,5 @@ dependencies = [
|
||||
dev = [
|
||||
"pytest>=8.0.0",
|
||||
"pytest-asyncio>=0.23.0",
|
||||
"pytest-benchmark>=4.0.0",
|
||||
"pytest-cov>=5.0.0",
|
||||
"pytest-xdist>=3.5.0",
|
||||
"pytest-mock>=3.12.0",
|
||||
"httpx>=0.27.0",
|
||||
"ruff>=0.1.0",
|
||||
]
|
||||
|
||||
[tool.pytest.ini_options]
|
||||
asyncio_mode = "auto"
|
||||
testpaths = ["tests", "benchmarks"]
|
||||
python_files = ["test_*.py", "bench_*.py"]
|
||||
python_classes = ["Test*"]
|
||||
python_functions = ["test_*"]
|
||||
markers = [
|
||||
"benchmark: mark test as a benchmark",
|
||||
]
|
||||
|
||||
@@ -14,8 +14,8 @@ API endpoints for fuzzing workflow management and real-time monitoring
|
||||
# Additional attribution and requirements are provided in the NOTICE file.
|
||||
|
||||
import logging
|
||||
from typing import List, Dict
|
||||
from fastapi import APIRouter, HTTPException, WebSocket, WebSocketDisconnect
|
||||
from typing import List, Dict, Any
|
||||
from fastapi import APIRouter, HTTPException, Depends, WebSocket, WebSocketDisconnect
|
||||
from fastapi.responses import StreamingResponse
|
||||
import asyncio
|
||||
import json
|
||||
@@ -25,6 +25,7 @@ from src.models.findings import (
|
||||
FuzzingStats,
|
||||
CrashReport
|
||||
)
|
||||
from src.core.workflow_discovery import WorkflowDiscovery
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -125,13 +126,12 @@ async def update_fuzzing_stats(run_id: str, stats: FuzzingStats):
|
||||
# Debug: log reception for live instrumentation
|
||||
try:
|
||||
logger.info(
|
||||
"Received fuzzing stats update: run_id=%s exec=%s eps=%.2f crashes=%s corpus=%s coverage=%s elapsed=%ss",
|
||||
"Received fuzzing stats update: run_id=%s exec=%s eps=%.2f crashes=%s corpus=%s elapsed=%ss",
|
||||
run_id,
|
||||
stats.executions,
|
||||
stats.executions_per_sec,
|
||||
stats.crashes,
|
||||
stats.corpus_size,
|
||||
stats.coverage,
|
||||
stats.elapsed_time,
|
||||
)
|
||||
except Exception:
|
||||
|
||||
@@ -14,6 +14,7 @@ API endpoints for workflow run management and findings retrieval
|
||||
# Additional attribution and requirements are provided in the NOTICE file.
|
||||
|
||||
import logging
|
||||
from typing import Dict, Any
|
||||
from fastapi import APIRouter, HTTPException, Depends
|
||||
|
||||
from src.models.findings import WorkflowFindings, WorkflowStatus
|
||||
@@ -23,22 +24,22 @@ logger = logging.getLogger(__name__)
|
||||
router = APIRouter(prefix="/runs", tags=["runs"])
|
||||
|
||||
|
||||
def get_temporal_manager():
|
||||
"""Dependency to get the Temporal manager instance"""
|
||||
from src.main import temporal_mgr
|
||||
return temporal_mgr
|
||||
def get_prefect_manager():
|
||||
"""Dependency to get the Prefect manager instance"""
|
||||
from src.main import prefect_mgr
|
||||
return prefect_mgr
|
||||
|
||||
|
||||
@router.get("/{run_id}/status", response_model=WorkflowStatus)
|
||||
async def get_run_status(
|
||||
run_id: str,
|
||||
temporal_mgr=Depends(get_temporal_manager)
|
||||
prefect_mgr=Depends(get_prefect_manager)
|
||||
) -> WorkflowStatus:
|
||||
"""
|
||||
Get the current status of a workflow run.
|
||||
|
||||
Args:
|
||||
run_id: The workflow run ID
|
||||
run_id: The flow run ID
|
||||
|
||||
Returns:
|
||||
Status information including state, timestamps, and completion flags
|
||||
@@ -47,26 +48,25 @@ async def get_run_status(
|
||||
HTTPException: 404 if run not found
|
||||
"""
|
||||
try:
|
||||
status = await temporal_mgr.get_workflow_status(run_id)
|
||||
status = await prefect_mgr.get_flow_run_status(run_id)
|
||||
|
||||
# Map Temporal status to response format
|
||||
workflow_status = status.get("status", "UNKNOWN")
|
||||
is_completed = workflow_status in ["COMPLETED", "FAILED", "CANCELLED"]
|
||||
is_failed = workflow_status == "FAILED"
|
||||
is_running = workflow_status == "RUNNING"
|
||||
|
||||
# Extract workflow name from run_id (format: workflow_name-unique_id)
|
||||
workflow_name = run_id.rsplit('-', 1)[0] if '-' in run_id else "unknown"
|
||||
# Find workflow name from deployment
|
||||
workflow_name = "unknown"
|
||||
workflow_deployment_id = status.get("workflow", "")
|
||||
for name, deployment_id in prefect_mgr.deployments.items():
|
||||
if str(deployment_id) == str(workflow_deployment_id):
|
||||
workflow_name = name
|
||||
break
|
||||
|
||||
return WorkflowStatus(
|
||||
run_id=run_id,
|
||||
run_id=status["run_id"],
|
||||
workflow=workflow_name,
|
||||
status=workflow_status,
|
||||
is_completed=is_completed,
|
||||
is_failed=is_failed,
|
||||
is_running=is_running,
|
||||
created_at=status.get("start_time"),
|
||||
updated_at=status.get("close_time") or status.get("execution_time")
|
||||
status=status["status"],
|
||||
is_completed=status["is_completed"],
|
||||
is_failed=status["is_failed"],
|
||||
is_running=status["is_running"],
|
||||
created_at=status["created_at"],
|
||||
updated_at=status["updated_at"]
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
@@ -80,13 +80,13 @@ async def get_run_status(
|
||||
@router.get("/{run_id}/findings", response_model=WorkflowFindings)
|
||||
async def get_run_findings(
|
||||
run_id: str,
|
||||
temporal_mgr=Depends(get_temporal_manager)
|
||||
prefect_mgr=Depends(get_prefect_manager)
|
||||
) -> WorkflowFindings:
|
||||
"""
|
||||
Get the findings from a completed workflow run.
|
||||
|
||||
Args:
|
||||
run_id: The workflow run ID
|
||||
run_id: The flow run ID
|
||||
|
||||
Returns:
|
||||
SARIF-formatted findings from the workflow execution
|
||||
@@ -96,49 +96,50 @@ async def get_run_findings(
|
||||
"""
|
||||
try:
|
||||
# Get run status first
|
||||
status = await temporal_mgr.get_workflow_status(run_id)
|
||||
workflow_status = status.get("status", "UNKNOWN")
|
||||
status = await prefect_mgr.get_flow_run_status(run_id)
|
||||
|
||||
if workflow_status not in ["COMPLETED", "FAILED", "CANCELLED"]:
|
||||
if workflow_status == "RUNNING":
|
||||
if not status["is_completed"]:
|
||||
if status["is_running"]:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail=f"Run {run_id} is still running. Current status: {workflow_status}"
|
||||
detail=f"Run {run_id} is still running. Current status: {status['status']}"
|
||||
)
|
||||
elif status["is_failed"]:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail=f"Run {run_id} failed. Status: {status['status']}"
|
||||
)
|
||||
else:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail=f"Run {run_id} not completed. Status: {workflow_status}"
|
||||
detail=f"Run {run_id} not completed. Status: {status['status']}"
|
||||
)
|
||||
|
||||
if workflow_status == "FAILED":
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail=f"Run {run_id} failed. Status: {workflow_status}"
|
||||
)
|
||||
# Get the findings
|
||||
findings = await prefect_mgr.get_flow_run_findings(run_id)
|
||||
|
||||
# Get the workflow result
|
||||
result = await temporal_mgr.get_workflow_result(run_id)
|
||||
# Find workflow name
|
||||
workflow_name = "unknown"
|
||||
workflow_deployment_id = status.get("workflow", "")
|
||||
for name, deployment_id in prefect_mgr.deployments.items():
|
||||
if str(deployment_id) == str(workflow_deployment_id):
|
||||
workflow_name = name
|
||||
break
|
||||
|
||||
# Extract SARIF from result (handle None for backwards compatibility)
|
||||
if isinstance(result, dict):
|
||||
sarif = result.get("sarif") or {}
|
||||
else:
|
||||
sarif = {}
|
||||
|
||||
# Extract workflow name from run_id (format: workflow_name-unique_id)
|
||||
workflow_name = run_id.rsplit('-', 1)[0] if '-' in run_id else "unknown"
|
||||
|
||||
# Metadata
|
||||
# Get workflow version if available
|
||||
metadata = {
|
||||
"completion_time": status.get("close_time"),
|
||||
"completion_time": status["updated_at"],
|
||||
"workflow_version": "unknown"
|
||||
}
|
||||
|
||||
if workflow_name in prefect_mgr.workflows:
|
||||
workflow_info = prefect_mgr.workflows[workflow_name]
|
||||
metadata["workflow_version"] = workflow_info.metadata.get("version", "unknown")
|
||||
|
||||
return WorkflowFindings(
|
||||
workflow=workflow_name,
|
||||
run_id=run_id,
|
||||
sarif=sarif,
|
||||
sarif=findings,
|
||||
metadata=metadata
|
||||
)
|
||||
|
||||
@@ -156,7 +157,7 @@ async def get_run_findings(
|
||||
async def get_workflow_findings(
|
||||
workflow_name: str,
|
||||
run_id: str,
|
||||
temporal_mgr=Depends(get_temporal_manager)
|
||||
prefect_mgr=Depends(get_prefect_manager)
|
||||
) -> WorkflowFindings:
|
||||
"""
|
||||
Get findings for a specific workflow run.
|
||||
@@ -165,7 +166,7 @@ async def get_workflow_findings(
|
||||
|
||||
Args:
|
||||
workflow_name: Name of the workflow
|
||||
run_id: The workflow run ID
|
||||
run_id: The flow run ID
|
||||
|
||||
Returns:
|
||||
SARIF-formatted findings from the workflow execution
|
||||
@@ -173,11 +174,11 @@ async def get_workflow_findings(
|
||||
Raises:
|
||||
HTTPException: 404 if workflow or run not found, 400 if run not completed
|
||||
"""
|
||||
if workflow_name not in temporal_mgr.workflows:
|
||||
if workflow_name not in prefect_mgr.workflows:
|
||||
raise HTTPException(
|
||||
status_code=404,
|
||||
detail=f"Workflow not found: {workflow_name}"
|
||||
)
|
||||
|
||||
# Delegate to the main findings endpoint
|
||||
return await get_run_findings(run_id, temporal_mgr)
|
||||
return await get_run_findings(run_id, prefect_mgr)
|
||||
@@ -1,47 +0,0 @@
|
||||
# Copyright (c) 2025 FuzzingLabs
|
||||
#
|
||||
# Licensed under the Business Source License 1.1 (BSL). See the LICENSE file
|
||||
# at the root of this repository for details.
|
||||
#
|
||||
# After the Change Date (four years from publication), this version of the
|
||||
# Licensed Work will be made available under the Apache License, Version 2.0.
|
||||
# See the LICENSE-APACHE file or http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Additional attribution and requirements are provided in the NOTICE file.
|
||||
|
||||
"""
|
||||
System information endpoints for FuzzForge API.
|
||||
|
||||
Provides system configuration and filesystem paths to CLI for worker management.
|
||||
"""
|
||||
|
||||
import os
|
||||
from typing import Dict
|
||||
|
||||
from fastapi import APIRouter
|
||||
|
||||
router = APIRouter(prefix="/system", tags=["system"])
|
||||
|
||||
|
||||
@router.get("/info")
|
||||
async def get_system_info() -> Dict[str, str]:
|
||||
"""
|
||||
Get system information including host filesystem paths.
|
||||
|
||||
This endpoint exposes paths needed by the CLI to manage workers via docker-compose.
|
||||
The FUZZFORGE_HOST_ROOT environment variable is set by docker-compose and points
|
||||
to the FuzzForge installation directory on the host machine.
|
||||
|
||||
Returns:
|
||||
Dictionary containing:
|
||||
- host_root: Absolute path to FuzzForge root on host
|
||||
- docker_compose_path: Path to docker-compose.yml on host
|
||||
- workers_dir: Path to workers directory on host
|
||||
"""
|
||||
host_root = os.getenv("FUZZFORGE_HOST_ROOT", "")
|
||||
|
||||
return {
|
||||
"host_root": host_root,
|
||||
"docker_compose_path": f"{host_root}/docker-compose.yml" if host_root else "",
|
||||
"workers_dir": f"{host_root}/workers" if host_root else "",
|
||||
}
|
||||
@@ -15,9 +15,8 @@ API endpoints for workflow management with enhanced error handling
|
||||
|
||||
import logging
|
||||
import traceback
|
||||
import tempfile
|
||||
from typing import List, Dict, Any, Optional
|
||||
from fastapi import APIRouter, HTTPException, Depends, UploadFile, File, Form
|
||||
from fastapi import APIRouter, HTTPException, Depends
|
||||
from pathlib import Path
|
||||
|
||||
from src.models.findings import (
|
||||
@@ -26,59 +25,13 @@ from src.models.findings import (
|
||||
WorkflowListItem,
|
||||
RunSubmissionResponse
|
||||
)
|
||||
from src.temporal.discovery import WorkflowDiscovery
|
||||
from src.core.workflow_discovery import WorkflowDiscovery
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Configuration for file uploads
|
||||
MAX_UPLOAD_SIZE = 10 * 1024 * 1024 * 1024 # 10 GB
|
||||
ALLOWED_CONTENT_TYPES = [
|
||||
"application/gzip",
|
||||
"application/x-gzip",
|
||||
"application/x-tar",
|
||||
"application/x-compressed-tar",
|
||||
"application/octet-stream", # Generic binary
|
||||
]
|
||||
|
||||
router = APIRouter(prefix="/workflows", tags=["workflows"])
|
||||
|
||||
|
||||
def extract_defaults_from_json_schema(metadata: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""
|
||||
Extract default parameter values from JSON Schema format.
|
||||
|
||||
Converts from:
|
||||
parameters:
|
||||
properties:
|
||||
param_name:
|
||||
default: value
|
||||
|
||||
To:
|
||||
{param_name: value}
|
||||
|
||||
Args:
|
||||
metadata: Workflow metadata dictionary
|
||||
|
||||
Returns:
|
||||
Dictionary of parameter defaults
|
||||
"""
|
||||
defaults = {}
|
||||
|
||||
# Check if there's a legacy default_parameters field
|
||||
if "default_parameters" in metadata:
|
||||
defaults.update(metadata["default_parameters"])
|
||||
|
||||
# Extract defaults from JSON Schema parameters
|
||||
parameters = metadata.get("parameters", {})
|
||||
properties = parameters.get("properties", {})
|
||||
|
||||
for param_name, param_spec in properties.items():
|
||||
if "default" in param_spec:
|
||||
defaults[param_name] = param_spec["default"]
|
||||
|
||||
return defaults
|
||||
|
||||
|
||||
def create_structured_error_response(
|
||||
error_type: str,
|
||||
message: str,
|
||||
@@ -115,15 +68,15 @@ def create_structured_error_response(
|
||||
return error_response
|
||||
|
||||
|
||||
def get_temporal_manager():
|
||||
"""Dependency to get the Temporal manager instance"""
|
||||
from src.main import temporal_mgr
|
||||
return temporal_mgr
|
||||
def get_prefect_manager():
|
||||
"""Dependency to get the Prefect manager instance"""
|
||||
from src.main import prefect_mgr
|
||||
return prefect_mgr
|
||||
|
||||
|
||||
@router.get("/", response_model=List[WorkflowListItem])
|
||||
async def list_workflows(
|
||||
temporal_mgr=Depends(get_temporal_manager)
|
||||
prefect_mgr=Depends(get_prefect_manager)
|
||||
) -> List[WorkflowListItem]:
|
||||
"""
|
||||
List all discovered workflows with their metadata.
|
||||
@@ -132,7 +85,7 @@ async def list_workflows(
|
||||
author, and tags.
|
||||
"""
|
||||
workflows = []
|
||||
for name, info in temporal_mgr.workflows.items():
|
||||
for name, info in prefect_mgr.workflows.items():
|
||||
workflows.append(WorkflowListItem(
|
||||
name=name,
|
||||
version=info.metadata.get("version", "0.6.0"),
|
||||
@@ -158,7 +111,7 @@ async def get_metadata_schema() -> Dict[str, Any]:
|
||||
@router.get("/{workflow_name}/metadata", response_model=WorkflowMetadata)
|
||||
async def get_workflow_metadata(
|
||||
workflow_name: str,
|
||||
temporal_mgr=Depends(get_temporal_manager)
|
||||
prefect_mgr=Depends(get_prefect_manager)
|
||||
) -> WorkflowMetadata:
|
||||
"""
|
||||
Get complete metadata for a specific workflow.
|
||||
@@ -173,8 +126,8 @@ async def get_workflow_metadata(
|
||||
Raises:
|
||||
HTTPException: 404 if workflow not found
|
||||
"""
|
||||
if workflow_name not in temporal_mgr.workflows:
|
||||
available_workflows = list(temporal_mgr.workflows.keys())
|
||||
if workflow_name not in prefect_mgr.workflows:
|
||||
available_workflows = list(prefect_mgr.workflows.keys())
|
||||
error_response = create_structured_error_response(
|
||||
error_type="WorkflowNotFound",
|
||||
message=f"Workflow '{workflow_name}' not found",
|
||||
@@ -190,7 +143,7 @@ async def get_workflow_metadata(
|
||||
detail=error_response
|
||||
)
|
||||
|
||||
info = temporal_mgr.workflows[workflow_name]
|
||||
info = prefect_mgr.workflows[workflow_name]
|
||||
metadata = info.metadata
|
||||
|
||||
return WorkflowMetadata(
|
||||
@@ -200,8 +153,10 @@ async def get_workflow_metadata(
|
||||
author=metadata.get("author"),
|
||||
tags=metadata.get("tags", []),
|
||||
parameters=metadata.get("parameters", {}),
|
||||
default_parameters=extract_defaults_from_json_schema(metadata),
|
||||
required_modules=metadata.get("required_modules", [])
|
||||
default_parameters=metadata.get("default_parameters", {}),
|
||||
required_modules=metadata.get("required_modules", []),
|
||||
supported_volume_modes=metadata.get("supported_volume_modes", ["ro", "rw"]),
|
||||
has_custom_docker=info.has_docker
|
||||
)
|
||||
|
||||
|
||||
@@ -209,14 +164,14 @@ async def get_workflow_metadata(
|
||||
async def submit_workflow(
|
||||
workflow_name: str,
|
||||
submission: WorkflowSubmission,
|
||||
temporal_mgr=Depends(get_temporal_manager)
|
||||
prefect_mgr=Depends(get_prefect_manager)
|
||||
) -> RunSubmissionResponse:
|
||||
"""
|
||||
Submit a workflow for execution.
|
||||
Submit a workflow for execution with volume mounting.
|
||||
|
||||
Args:
|
||||
workflow_name: Name of the workflow to execute
|
||||
submission: Submission parameters including target path and parameters
|
||||
submission: Submission parameters including target path and volume mode
|
||||
|
||||
Returns:
|
||||
Run submission response with run_id and initial status
|
||||
@@ -224,8 +179,8 @@ async def submit_workflow(
|
||||
Raises:
|
||||
HTTPException: 404 if workflow not found, 400 for invalid parameters
|
||||
"""
|
||||
if workflow_name not in temporal_mgr.workflows:
|
||||
available_workflows = list(temporal_mgr.workflows.keys())
|
||||
if workflow_name not in prefect_mgr.workflows:
|
||||
available_workflows = list(prefect_mgr.workflows.keys())
|
||||
error_response = create_structured_error_response(
|
||||
error_type="WorkflowNotFound",
|
||||
message=f"Workflow '{workflow_name}' not found",
|
||||
@@ -242,36 +197,31 @@ async def submit_workflow(
|
||||
)
|
||||
|
||||
try:
|
||||
# Upload target file to MinIO and get target_id
|
||||
target_path = Path(submission.target_path)
|
||||
if not target_path.exists():
|
||||
raise ValueError(f"Target path does not exist: {submission.target_path}")
|
||||
# Convert ResourceLimits to dict if provided
|
||||
resource_limits_dict = None
|
||||
if submission.resource_limits:
|
||||
resource_limits_dict = {
|
||||
"cpu_limit": submission.resource_limits.cpu_limit,
|
||||
"memory_limit": submission.resource_limits.memory_limit,
|
||||
"cpu_request": submission.resource_limits.cpu_request,
|
||||
"memory_request": submission.resource_limits.memory_request
|
||||
}
|
||||
|
||||
# Upload target (using anonymous user for now)
|
||||
target_id = await temporal_mgr.upload_target(
|
||||
file_path=target_path,
|
||||
user_id="api-user",
|
||||
metadata={"workflow": workflow_name}
|
||||
)
|
||||
|
||||
# Merge default parameters with user parameters
|
||||
workflow_info = temporal_mgr.workflows[workflow_name]
|
||||
metadata = workflow_info.metadata or {}
|
||||
defaults = extract_defaults_from_json_schema(metadata)
|
||||
user_params = submission.parameters or {}
|
||||
workflow_params = {**defaults, **user_params}
|
||||
|
||||
# Start workflow execution
|
||||
handle = await temporal_mgr.run_workflow(
|
||||
# Submit the workflow with enhanced parameters
|
||||
flow_run = await prefect_mgr.submit_workflow(
|
||||
workflow_name=workflow_name,
|
||||
target_id=target_id,
|
||||
workflow_params=workflow_params
|
||||
target_path=submission.target_path,
|
||||
volume_mode=submission.volume_mode,
|
||||
parameters=submission.parameters,
|
||||
resource_limits=resource_limits_dict,
|
||||
additional_volumes=submission.additional_volumes,
|
||||
timeout=submission.timeout
|
||||
)
|
||||
|
||||
run_id = handle.id
|
||||
run_id = str(flow_run.id)
|
||||
|
||||
# Initialize fuzzing tracking if this looks like a fuzzing workflow
|
||||
workflow_info = temporal_mgr.workflows.get(workflow_name, {})
|
||||
workflow_info = prefect_mgr.workflows.get(workflow_name, {})
|
||||
workflow_tags = workflow_info.metadata.get("tags", []) if hasattr(workflow_info, 'metadata') else []
|
||||
if "fuzzing" in workflow_tags or "fuzz" in workflow_name.lower():
|
||||
from src.api.fuzzing import initialize_fuzzing_tracking
|
||||
@@ -279,7 +229,7 @@ async def submit_workflow(
|
||||
|
||||
return RunSubmissionResponse(
|
||||
run_id=run_id,
|
||||
status="RUNNING",
|
||||
status=flow_run.state.name if flow_run.state else "PENDING",
|
||||
workflow=workflow_name,
|
||||
message=f"Workflow '{workflow_name}' submitted successfully"
|
||||
)
|
||||
@@ -311,13 +261,17 @@ async def submit_workflow(
|
||||
error_type = "WorkflowSubmissionError"
|
||||
|
||||
# Detect specific error patterns
|
||||
if "workflow" in error_message.lower() and "not found" in error_message.lower():
|
||||
error_type = "WorkflowError"
|
||||
if "deployment" in error_message.lower():
|
||||
error_type = "DeploymentError"
|
||||
deployment_info = {
|
||||
"status": "failed",
|
||||
"error": error_message
|
||||
}
|
||||
suggestions.extend([
|
||||
"Check if Temporal server is running and accessible",
|
||||
"Verify workflow workers are running",
|
||||
"Check if workflow is registered with correct vertical",
|
||||
"Ensure Docker is running and has sufficient resources"
|
||||
"Check if Prefect server is running and accessible",
|
||||
"Verify Docker is running and has sufficient resources",
|
||||
"Check container image availability",
|
||||
"Ensure volume paths exist and are accessible"
|
||||
])
|
||||
|
||||
elif "volume" in error_message.lower() or "mount" in error_message.lower():
|
||||
@@ -370,248 +324,10 @@ async def submit_workflow(
|
||||
)
|
||||
|
||||
|
||||
@router.post("/{workflow_name}/upload-and-submit", response_model=RunSubmissionResponse)
|
||||
async def upload_and_submit_workflow(
|
||||
workflow_name: str,
|
||||
file: UploadFile = File(..., description="Target file or tarball to analyze"),
|
||||
parameters: Optional[str] = Form(None, description="JSON-encoded workflow parameters"),
|
||||
timeout: Optional[int] = Form(None, description="Timeout in seconds"),
|
||||
temporal_mgr=Depends(get_temporal_manager)
|
||||
) -> RunSubmissionResponse:
|
||||
"""
|
||||
Upload a target file/tarball and submit workflow for execution.
|
||||
|
||||
This endpoint accepts multipart/form-data uploads and is the recommended
|
||||
way to submit workflows from remote CLI clients.
|
||||
|
||||
Args:
|
||||
workflow_name: Name of the workflow to execute
|
||||
file: Target file or tarball (compressed directory)
|
||||
parameters: JSON string of workflow parameters (optional)
|
||||
timeout: Execution timeout in seconds (optional)
|
||||
|
||||
Returns:
|
||||
Run submission response with run_id and initial status
|
||||
|
||||
Raises:
|
||||
HTTPException: 404 if workflow not found, 400 for invalid parameters,
|
||||
413 if file too large
|
||||
"""
|
||||
if workflow_name not in temporal_mgr.workflows:
|
||||
available_workflows = list(temporal_mgr.workflows.keys())
|
||||
error_response = create_structured_error_response(
|
||||
error_type="WorkflowNotFound",
|
||||
message=f"Workflow '{workflow_name}' not found",
|
||||
workflow_name=workflow_name,
|
||||
suggestions=[
|
||||
f"Available workflows: {', '.join(available_workflows)}",
|
||||
"Use GET /workflows/ to see all available workflows"
|
||||
]
|
||||
)
|
||||
raise HTTPException(status_code=404, detail=error_response)
|
||||
|
||||
temp_file_path = None
|
||||
|
||||
try:
|
||||
# Validate file size
|
||||
file_size = 0
|
||||
chunk_size = 1024 * 1024 # 1MB chunks
|
||||
|
||||
# Create temporary file
|
||||
temp_fd, temp_file_path = tempfile.mkstemp(suffix=".tar.gz")
|
||||
|
||||
logger.info(f"Receiving file upload for workflow '{workflow_name}': {file.filename}")
|
||||
|
||||
# Stream file to disk
|
||||
with open(temp_fd, 'wb') as temp_file:
|
||||
while True:
|
||||
chunk = await file.read(chunk_size)
|
||||
if not chunk:
|
||||
break
|
||||
|
||||
file_size += len(chunk)
|
||||
|
||||
# Check size limit
|
||||
if file_size > MAX_UPLOAD_SIZE:
|
||||
raise HTTPException(
|
||||
status_code=413,
|
||||
detail=create_structured_error_response(
|
||||
error_type="FileTooLarge",
|
||||
message=f"File size exceeds maximum allowed size of {MAX_UPLOAD_SIZE / (1024**3):.1f} GB",
|
||||
workflow_name=workflow_name,
|
||||
suggestions=[
|
||||
"Reduce the size of your target directory",
|
||||
"Exclude unnecessary files (build artifacts, dependencies, etc.)",
|
||||
"Consider splitting into smaller analysis targets"
|
||||
]
|
||||
)
|
||||
)
|
||||
|
||||
temp_file.write(chunk)
|
||||
|
||||
logger.info(f"Received file: {file_size / (1024**2):.2f} MB")
|
||||
|
||||
# Parse parameters
|
||||
workflow_params = {}
|
||||
if parameters:
|
||||
try:
|
||||
import json
|
||||
workflow_params = json.loads(parameters)
|
||||
if not isinstance(workflow_params, dict):
|
||||
raise ValueError("Parameters must be a JSON object")
|
||||
except (json.JSONDecodeError, ValueError) as e:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail=create_structured_error_response(
|
||||
error_type="InvalidParameters",
|
||||
message=f"Invalid parameters JSON: {e}",
|
||||
workflow_name=workflow_name,
|
||||
suggestions=["Ensure parameters is valid JSON object"]
|
||||
)
|
||||
)
|
||||
|
||||
# Upload to MinIO
|
||||
target_id = await temporal_mgr.upload_target(
|
||||
file_path=Path(temp_file_path),
|
||||
user_id="api-user",
|
||||
metadata={
|
||||
"workflow": workflow_name,
|
||||
"original_filename": file.filename,
|
||||
"upload_method": "multipart"
|
||||
}
|
||||
)
|
||||
|
||||
logger.info(f"Uploaded to MinIO with target_id: {target_id}")
|
||||
|
||||
# Merge default parameters with user parameters
|
||||
workflow_info = temporal_mgr.workflows.get(workflow_name)
|
||||
metadata = workflow_info.metadata or {}
|
||||
defaults = extract_defaults_from_json_schema(metadata)
|
||||
workflow_params = {**defaults, **workflow_params}
|
||||
|
||||
# Start workflow execution
|
||||
handle = await temporal_mgr.run_workflow(
|
||||
workflow_name=workflow_name,
|
||||
target_id=target_id,
|
||||
workflow_params=workflow_params
|
||||
)
|
||||
|
||||
run_id = handle.id
|
||||
|
||||
# Initialize fuzzing tracking if needed
|
||||
workflow_info = temporal_mgr.workflows.get(workflow_name, {})
|
||||
workflow_tags = workflow_info.metadata.get("tags", []) if hasattr(workflow_info, 'metadata') else []
|
||||
if "fuzzing" in workflow_tags or "fuzz" in workflow_name.lower():
|
||||
from src.api.fuzzing import initialize_fuzzing_tracking
|
||||
initialize_fuzzing_tracking(run_id, workflow_name)
|
||||
|
||||
return RunSubmissionResponse(
|
||||
run_id=run_id,
|
||||
status="RUNNING",
|
||||
workflow=workflow_name,
|
||||
message=f"Workflow '{workflow_name}' submitted successfully with uploaded target"
|
||||
)
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to upload and submit workflow '{workflow_name}': {e}")
|
||||
logger.error(f"Traceback: {traceback.format_exc()}")
|
||||
|
||||
error_response = create_structured_error_response(
|
||||
error_type="WorkflowSubmissionError",
|
||||
message=f"Failed to process upload and submit workflow: {str(e)}",
|
||||
workflow_name=workflow_name,
|
||||
suggestions=[
|
||||
"Check if the uploaded file is a valid tarball",
|
||||
"Verify MinIO storage is accessible",
|
||||
"Check backend logs for detailed error information",
|
||||
"Ensure Temporal workers are running"
|
||||
]
|
||||
)
|
||||
|
||||
raise HTTPException(status_code=500, detail=error_response)
|
||||
|
||||
finally:
|
||||
# Cleanup temporary file
|
||||
if temp_file_path and Path(temp_file_path).exists():
|
||||
try:
|
||||
Path(temp_file_path).unlink()
|
||||
logger.debug(f"Cleaned up temp file: {temp_file_path}")
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to cleanup temp file {temp_file_path}: {e}")
|
||||
|
||||
|
||||
@router.get("/{workflow_name}/worker-info")
|
||||
async def get_workflow_worker_info(
|
||||
workflow_name: str,
|
||||
temporal_mgr=Depends(get_temporal_manager)
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Get worker information for a workflow.
|
||||
|
||||
Returns details about which worker is required to execute this workflow,
|
||||
including container name, task queue, and vertical.
|
||||
|
||||
Args:
|
||||
workflow_name: Name of the workflow
|
||||
|
||||
Returns:
|
||||
Worker information including vertical, container name, and task queue
|
||||
|
||||
Raises:
|
||||
HTTPException: 404 if workflow not found
|
||||
"""
|
||||
if workflow_name not in temporal_mgr.workflows:
|
||||
available_workflows = list(temporal_mgr.workflows.keys())
|
||||
error_response = create_structured_error_response(
|
||||
error_type="WorkflowNotFound",
|
||||
message=f"Workflow '{workflow_name}' not found",
|
||||
workflow_name=workflow_name,
|
||||
suggestions=[
|
||||
f"Available workflows: {', '.join(available_workflows)}",
|
||||
"Use GET /workflows/ to see all available workflows"
|
||||
]
|
||||
)
|
||||
raise HTTPException(
|
||||
status_code=404,
|
||||
detail=error_response
|
||||
)
|
||||
|
||||
info = temporal_mgr.workflows[workflow_name]
|
||||
metadata = info.metadata
|
||||
|
||||
# Extract vertical from metadata
|
||||
vertical = metadata.get("vertical")
|
||||
|
||||
if not vertical:
|
||||
error_response = create_structured_error_response(
|
||||
error_type="MissingVertical",
|
||||
message=f"Workflow '{workflow_name}' does not specify a vertical in metadata",
|
||||
workflow_name=workflow_name,
|
||||
suggestions=[
|
||||
"Check workflow metadata.yaml for 'vertical' field",
|
||||
"Contact workflow author for support"
|
||||
]
|
||||
)
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=error_response
|
||||
)
|
||||
|
||||
return {
|
||||
"workflow": workflow_name,
|
||||
"vertical": vertical,
|
||||
"worker_service": f"worker-{vertical}",
|
||||
"task_queue": f"{vertical}-queue",
|
||||
"required": True
|
||||
}
|
||||
|
||||
|
||||
@router.get("/{workflow_name}/parameters")
|
||||
async def get_workflow_parameters(
|
||||
workflow_name: str,
|
||||
temporal_mgr=Depends(get_temporal_manager)
|
||||
prefect_mgr=Depends(get_prefect_manager)
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Get the parameters schema for a workflow.
|
||||
@@ -625,8 +341,8 @@ async def get_workflow_parameters(
|
||||
Raises:
|
||||
HTTPException: 404 if workflow not found
|
||||
"""
|
||||
if workflow_name not in temporal_mgr.workflows:
|
||||
available_workflows = list(temporal_mgr.workflows.keys())
|
||||
if workflow_name not in prefect_mgr.workflows:
|
||||
available_workflows = list(prefect_mgr.workflows.keys())
|
||||
error_response = create_structured_error_response(
|
||||
error_type="WorkflowNotFound",
|
||||
message=f"Workflow '{workflow_name}' not found",
|
||||
@@ -641,7 +357,7 @@ async def get_workflow_parameters(
|
||||
detail=error_response
|
||||
)
|
||||
|
||||
info = temporal_mgr.workflows[workflow_name]
|
||||
info = prefect_mgr.workflows[workflow_name]
|
||||
metadata = info.metadata
|
||||
|
||||
# Return parameters with enhanced schema information
|
||||
@@ -653,8 +369,11 @@ async def get_workflow_parameters(
|
||||
else:
|
||||
param_definitions = parameters_schema
|
||||
|
||||
# Extract default values from JSON Schema
|
||||
default_params = extract_defaults_from_json_schema(metadata)
|
||||
# Add default values to the schema
|
||||
default_params = metadata.get("default_parameters", {})
|
||||
for param_name, param_schema in param_definitions.items():
|
||||
if isinstance(param_schema, dict) and param_name in default_params:
|
||||
param_schema["default"] = default_params[param_name]
|
||||
|
||||
return {
|
||||
"workflow": workflow_name,
|
||||
|
||||
770
backend/src/core/prefect_manager.py
Normal file
770
backend/src/core/prefect_manager.py
Normal file
@@ -0,0 +1,770 @@
|
||||
"""
|
||||
Prefect Manager - Core orchestration for workflow deployment and execution
|
||||
"""
|
||||
|
||||
# Copyright (c) 2025 FuzzingLabs
|
||||
#
|
||||
# Licensed under the Business Source License 1.1 (BSL). See the LICENSE file
|
||||
# at the root of this repository for details.
|
||||
#
|
||||
# After the Change Date (four years from publication), this version of the
|
||||
# Licensed Work will be made available under the Apache License, Version 2.0.
|
||||
# See the LICENSE-APACHE file or http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Additional attribution and requirements are provided in the NOTICE file.
|
||||
|
||||
import logging
|
||||
import os
|
||||
import platform
|
||||
import re
|
||||
from pathlib import Path
|
||||
from typing import Dict, Optional, Any
|
||||
from prefect import get_client
|
||||
from prefect.docker import DockerImage
|
||||
from prefect.client.schemas import FlowRun
|
||||
|
||||
from src.core.workflow_discovery import WorkflowDiscovery, WorkflowInfo
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def get_registry_url(context: str = "default") -> str:
|
||||
"""
|
||||
Get the container registry URL to use for a given operation context.
|
||||
|
||||
Goals:
|
||||
- Work reliably across Linux and macOS Docker Desktop
|
||||
- Prefer in-network service discovery when running inside containers
|
||||
- Allow full override via env vars from docker-compose
|
||||
|
||||
Env overrides:
|
||||
- FUZZFORGE_REGISTRY_PUSH_URL: used for image builds/pushes
|
||||
- FUZZFORGE_REGISTRY_PULL_URL: used for workers to pull images
|
||||
"""
|
||||
# Normalize context
|
||||
ctx = (context or "default").lower()
|
||||
|
||||
# Always honor explicit overrides first
|
||||
if ctx in ("push", "build"):
|
||||
push_url = os.getenv("FUZZFORGE_REGISTRY_PUSH_URL")
|
||||
if push_url:
|
||||
logger.debug("Using FUZZFORGE_REGISTRY_PUSH_URL: %s", push_url)
|
||||
return push_url
|
||||
# Default to host-published registry for Docker daemon operations
|
||||
return "localhost:5001"
|
||||
|
||||
if ctx == "pull":
|
||||
pull_url = os.getenv("FUZZFORGE_REGISTRY_PULL_URL")
|
||||
if pull_url:
|
||||
logger.debug("Using FUZZFORGE_REGISTRY_PULL_URL: %s", pull_url)
|
||||
return pull_url
|
||||
# Prefect worker pulls via host Docker daemon as well
|
||||
return "localhost:5001"
|
||||
|
||||
# Default/fallback
|
||||
return os.getenv("FUZZFORGE_REGISTRY_PULL_URL", os.getenv("FUZZFORGE_REGISTRY_PUSH_URL", "localhost:5001"))
|
||||
|
||||
|
||||
def _compose_project_name(default: str = "fuzzforge") -> str:
|
||||
"""Return the docker-compose project name used for network/volume naming.
|
||||
|
||||
Always returns 'fuzzforge' regardless of environment variables.
|
||||
"""
|
||||
return "fuzzforge"
|
||||
|
||||
|
||||
class PrefectManager:
|
||||
"""
|
||||
Manages Prefect deployments and flow runs for discovered workflows.
|
||||
|
||||
This class handles:
|
||||
- Workflow discovery and registration
|
||||
- Docker image building through Prefect
|
||||
- Deployment creation and management
|
||||
- Flow run submission with volume mounting
|
||||
- Findings retrieval from completed runs
|
||||
"""
|
||||
|
||||
def __init__(self, workflows_dir: Path = None):
|
||||
"""
|
||||
Initialize the Prefect manager.
|
||||
|
||||
Args:
|
||||
workflows_dir: Path to the workflows directory (default: toolbox/workflows)
|
||||
"""
|
||||
if workflows_dir is None:
|
||||
workflows_dir = Path("toolbox/workflows")
|
||||
|
||||
self.discovery = WorkflowDiscovery(workflows_dir)
|
||||
self.workflows: Dict[str, WorkflowInfo] = {}
|
||||
self.deployments: Dict[str, str] = {} # workflow_name -> deployment_id
|
||||
|
||||
# Security: Define allowed and forbidden paths for host mounting
|
||||
self.allowed_base_paths = [
|
||||
"/tmp",
|
||||
"/home",
|
||||
"/Users", # macOS users
|
||||
"/opt",
|
||||
"/var/tmp",
|
||||
"/workspace", # Common container workspace
|
||||
"/app" # Container application directory (for test projects)
|
||||
]
|
||||
|
||||
self.forbidden_paths = [
|
||||
"/etc",
|
||||
"/root",
|
||||
"/var/run",
|
||||
"/sys",
|
||||
"/proc",
|
||||
"/dev",
|
||||
"/boot",
|
||||
"/var/lib/docker", # Critical Docker data
|
||||
"/var/log", # System logs
|
||||
"/usr/bin", # System binaries
|
||||
"/usr/sbin",
|
||||
"/sbin",
|
||||
"/bin"
|
||||
]
|
||||
|
||||
@staticmethod
|
||||
def _parse_memory_to_bytes(memory_str: str) -> int:
|
||||
"""
|
||||
Parse memory string (like '512Mi', '1Gi') to bytes.
|
||||
|
||||
Args:
|
||||
memory_str: Memory string with unit suffix
|
||||
|
||||
Returns:
|
||||
Memory in bytes
|
||||
|
||||
Raises:
|
||||
ValueError: If format is invalid
|
||||
"""
|
||||
if not memory_str:
|
||||
return 0
|
||||
|
||||
match = re.match(r'^(\d+(?:\.\d+)?)\s*([GMK]i?)$', memory_str.strip())
|
||||
if not match:
|
||||
raise ValueError(f"Invalid memory format: {memory_str}. Expected format like '512Mi', '1Gi'")
|
||||
|
||||
value, unit = match.groups()
|
||||
value = float(value)
|
||||
|
||||
# Convert to bytes based on unit (binary units: Ki, Mi, Gi)
|
||||
if unit in ['K', 'Ki']:
|
||||
multiplier = 1024
|
||||
elif unit in ['M', 'Mi']:
|
||||
multiplier = 1024 * 1024
|
||||
elif unit in ['G', 'Gi']:
|
||||
multiplier = 1024 * 1024 * 1024
|
||||
else:
|
||||
raise ValueError(f"Unsupported memory unit: {unit}")
|
||||
|
||||
return int(value * multiplier)
|
||||
|
||||
@staticmethod
|
||||
def _parse_cpu_to_millicores(cpu_str: str) -> int:
|
||||
"""
|
||||
Parse CPU string (like '500m', '1', '2.5') to millicores.
|
||||
|
||||
Args:
|
||||
cpu_str: CPU string
|
||||
|
||||
Returns:
|
||||
CPU in millicores (1 core = 1000 millicores)
|
||||
|
||||
Raises:
|
||||
ValueError: If format is invalid
|
||||
"""
|
||||
if not cpu_str:
|
||||
return 0
|
||||
|
||||
cpu_str = cpu_str.strip()
|
||||
|
||||
# Handle millicores format (e.g., '500m')
|
||||
if cpu_str.endswith('m'):
|
||||
try:
|
||||
return int(cpu_str[:-1])
|
||||
except ValueError:
|
||||
raise ValueError(f"Invalid CPU format: {cpu_str}")
|
||||
|
||||
# Handle core format (e.g., '1', '2.5')
|
||||
try:
|
||||
cores = float(cpu_str)
|
||||
return int(cores * 1000) # Convert to millicores
|
||||
except ValueError:
|
||||
raise ValueError(f"Invalid CPU format: {cpu_str}")
|
||||
|
||||
def _extract_resource_requirements(self, workflow_info: WorkflowInfo) -> Dict[str, str]:
|
||||
"""
|
||||
Extract resource requirements from workflow metadata.
|
||||
|
||||
Args:
|
||||
workflow_info: Workflow information with metadata
|
||||
|
||||
Returns:
|
||||
Dictionary with resource requirements in Docker format
|
||||
"""
|
||||
metadata = workflow_info.metadata
|
||||
requirements = metadata.get("requirements", {})
|
||||
resources = requirements.get("resources", {})
|
||||
|
||||
resource_config = {}
|
||||
|
||||
# Extract memory requirement
|
||||
memory = resources.get("memory")
|
||||
if memory:
|
||||
try:
|
||||
# Validate memory format and store original string for Docker
|
||||
self._parse_memory_to_bytes(memory)
|
||||
resource_config["memory"] = memory
|
||||
except ValueError as e:
|
||||
logger.warning(f"Invalid memory requirement in {workflow_info.name}: {e}")
|
||||
|
||||
# Extract CPU requirement
|
||||
cpu = resources.get("cpu")
|
||||
if cpu:
|
||||
try:
|
||||
# Validate CPU format and store original string for Docker
|
||||
self._parse_cpu_to_millicores(cpu)
|
||||
resource_config["cpus"] = cpu
|
||||
except ValueError as e:
|
||||
logger.warning(f"Invalid CPU requirement in {workflow_info.name}: {e}")
|
||||
|
||||
# Extract timeout
|
||||
timeout = resources.get("timeout")
|
||||
if timeout and isinstance(timeout, int):
|
||||
resource_config["timeout"] = str(timeout)
|
||||
|
||||
return resource_config
|
||||
|
||||
async def initialize(self):
|
||||
"""
|
||||
Initialize the manager by discovering and deploying all workflows.
|
||||
|
||||
This method:
|
||||
1. Discovers all valid workflows in the workflows directory
|
||||
2. Validates their metadata
|
||||
3. Deploys each workflow to Prefect with Docker images
|
||||
"""
|
||||
try:
|
||||
# Discover workflows
|
||||
self.workflows = await self.discovery.discover_workflows()
|
||||
|
||||
if not self.workflows:
|
||||
logger.warning("No workflows discovered")
|
||||
return
|
||||
|
||||
logger.info(f"Discovered {len(self.workflows)} workflows: {list(self.workflows.keys())}")
|
||||
|
||||
# Deploy each workflow
|
||||
for name, info in self.workflows.items():
|
||||
try:
|
||||
await self._deploy_workflow(name, info)
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to deploy workflow '{name}': {e}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to initialize Prefect manager: {e}")
|
||||
raise
|
||||
|
||||
async def _deploy_workflow(self, name: str, info: WorkflowInfo):
|
||||
"""
|
||||
Deploy a single workflow to Prefect with Docker image.
|
||||
|
||||
Args:
|
||||
name: Workflow name
|
||||
info: Workflow information including metadata and paths
|
||||
"""
|
||||
logger.info(f"Deploying workflow '{name}'...")
|
||||
|
||||
# Get the flow function from registry
|
||||
flow_func = self.discovery.get_flow_function(name)
|
||||
if not flow_func:
|
||||
logger.error(
|
||||
f"Failed to get flow function for '{name}' from registry. "
|
||||
f"Ensure the workflow is properly registered in toolbox/workflows/registry.py"
|
||||
)
|
||||
return
|
||||
|
||||
# Use the mandatory Dockerfile with absolute paths for Docker Compose
|
||||
# Get absolute paths for build context and dockerfile
|
||||
toolbox_path = info.path.parent.parent.resolve()
|
||||
dockerfile_abs_path = info.dockerfile.resolve()
|
||||
|
||||
# Calculate relative dockerfile path from toolbox context
|
||||
try:
|
||||
dockerfile_rel_path = dockerfile_abs_path.relative_to(toolbox_path)
|
||||
except ValueError:
|
||||
# If relative path fails, use the workflow-specific path
|
||||
dockerfile_rel_path = Path("workflows") / name / "Dockerfile"
|
||||
|
||||
# Determine deployment strategy based on Dockerfile presence
|
||||
base_image = "prefecthq/prefect:3-python3.11"
|
||||
has_custom_dockerfile = info.has_docker and info.dockerfile.exists()
|
||||
|
||||
logger.info(f"=== DEPLOYMENT DEBUG for '{name}' ===")
|
||||
logger.info(f"info.has_docker: {info.has_docker}")
|
||||
logger.info(f"info.dockerfile: {info.dockerfile}")
|
||||
logger.info(f"info.dockerfile.exists(): {info.dockerfile.exists()}")
|
||||
logger.info(f"has_custom_dockerfile: {has_custom_dockerfile}")
|
||||
logger.info(f"toolbox_path: {toolbox_path}")
|
||||
logger.info(f"dockerfile_rel_path: {dockerfile_rel_path}")
|
||||
|
||||
if has_custom_dockerfile:
|
||||
logger.info(f"Workflow '{name}' has custom Dockerfile - building custom image")
|
||||
# Decide whether to use registry or keep images local to host engine
|
||||
import os
|
||||
# Default to using the local registry; set FUZZFORGE_USE_REGISTRY=false to bypass (not recommended)
|
||||
use_registry = os.getenv("FUZZFORGE_USE_REGISTRY", "true").lower() == "true"
|
||||
|
||||
if use_registry:
|
||||
registry_url = get_registry_url(context="push")
|
||||
image_spec = DockerImage(
|
||||
name=f"{registry_url}/fuzzforge/{name}",
|
||||
tag="latest",
|
||||
dockerfile=str(dockerfile_rel_path),
|
||||
context=str(toolbox_path)
|
||||
)
|
||||
deploy_image = f"{registry_url}/fuzzforge/{name}:latest"
|
||||
build_custom = True
|
||||
push_custom = True
|
||||
logger.info(f"Using registry: {registry_url} for '{name}'")
|
||||
else:
|
||||
# Single-host mode: build into host engine cache; no push required
|
||||
image_spec = DockerImage(
|
||||
name=f"fuzzforge/{name}",
|
||||
tag="latest",
|
||||
dockerfile=str(dockerfile_rel_path),
|
||||
context=str(toolbox_path)
|
||||
)
|
||||
deploy_image = f"fuzzforge/{name}:latest"
|
||||
build_custom = True
|
||||
push_custom = False
|
||||
logger.info("Using single-host image (no registry push): %s", deploy_image)
|
||||
else:
|
||||
logger.info(f"Workflow '{name}' using base image - no custom dependencies needed")
|
||||
deploy_image = base_image
|
||||
build_custom = False
|
||||
push_custom = False
|
||||
|
||||
# Pre-validate registry connectivity when pushing
|
||||
if push_custom:
|
||||
try:
|
||||
from .setup import validate_registry_connectivity
|
||||
await validate_registry_connectivity(registry_url)
|
||||
logger.info(f"Registry connectivity validated for {registry_url}")
|
||||
except Exception as e:
|
||||
logger.error(f"Registry connectivity validation failed for {registry_url}: {e}")
|
||||
raise RuntimeError(f"Cannot deploy workflow '{name}': Registry {registry_url} is not accessible. {e}")
|
||||
|
||||
# Deploy the workflow
|
||||
try:
|
||||
# Ensure any previous deployment is removed so job variables are updated
|
||||
try:
|
||||
async with get_client() as client:
|
||||
existing = await client.read_deployment_by_name(
|
||||
f"{name}/{name}-deployment"
|
||||
)
|
||||
if existing:
|
||||
logger.info(f"Removing existing deployment for '{name}' to refresh settings...")
|
||||
await client.delete_deployment(existing.id)
|
||||
except Exception:
|
||||
# If not found or deletion fails, continue with deployment
|
||||
pass
|
||||
|
||||
# Extract resource requirements from metadata
|
||||
workflow_resource_requirements = self._extract_resource_requirements(info)
|
||||
logger.info(f"Workflow '{name}' resource requirements: {workflow_resource_requirements}")
|
||||
|
||||
# Build job variables with resource requirements
|
||||
job_variables = {
|
||||
"image": deploy_image, # Use the worker-accessible registry name
|
||||
"volumes": [], # Populated at run submission with toolbox mount
|
||||
"env": {
|
||||
"PYTHONPATH": "/opt/prefect/toolbox:/opt/prefect",
|
||||
"WORKFLOW_NAME": name
|
||||
}
|
||||
}
|
||||
|
||||
# Add resource requirements to job variables if present
|
||||
if workflow_resource_requirements:
|
||||
job_variables["resources"] = workflow_resource_requirements
|
||||
|
||||
# Prepare deployment parameters
|
||||
deploy_params = {
|
||||
"name": f"{name}-deployment",
|
||||
"work_pool_name": "docker-pool",
|
||||
"image": image_spec if has_custom_dockerfile else deploy_image,
|
||||
"push": push_custom,
|
||||
"build": build_custom,
|
||||
"job_variables": job_variables
|
||||
}
|
||||
|
||||
deployment = await flow_func.deploy(**deploy_params)
|
||||
|
||||
self.deployments[name] = str(deployment.id) if hasattr(deployment, 'id') else name
|
||||
logger.info(f"Successfully deployed workflow '{name}'")
|
||||
|
||||
except Exception as e:
|
||||
# Enhanced error reporting with more context
|
||||
import traceback
|
||||
logger.error(f"Failed to deploy workflow '{name}': {e}")
|
||||
logger.error(f"Deployment traceback: {traceback.format_exc()}")
|
||||
|
||||
# Try to capture Docker-specific context
|
||||
error_context = {
|
||||
"workflow_name": name,
|
||||
"has_dockerfile": has_custom_dockerfile,
|
||||
"image_name": deploy_image if 'deploy_image' in locals() else "unknown",
|
||||
"registry_url": registry_url if 'registry_url' in locals() else "unknown",
|
||||
"error_type": type(e).__name__,
|
||||
"error_message": str(e)
|
||||
}
|
||||
|
||||
# Check for specific error patterns with detailed categorization
|
||||
error_msg_lower = str(e).lower()
|
||||
if "registry" in error_msg_lower and ("no such host" in error_msg_lower or "connection" in error_msg_lower):
|
||||
error_context["category"] = "registry_connectivity_error"
|
||||
error_context["solution"] = f"Cannot reach registry at {error_context['registry_url']}. Check Docker network and registry service."
|
||||
elif "docker" in error_msg_lower:
|
||||
error_context["category"] = "docker_error"
|
||||
if "build" in error_msg_lower:
|
||||
error_context["subcategory"] = "image_build_failed"
|
||||
error_context["solution"] = "Check Dockerfile syntax and dependencies."
|
||||
elif "pull" in error_msg_lower:
|
||||
error_context["subcategory"] = "image_pull_failed"
|
||||
error_context["solution"] = "Check if image exists in registry and network connectivity."
|
||||
elif "push" in error_msg_lower:
|
||||
error_context["subcategory"] = "image_push_failed"
|
||||
error_context["solution"] = f"Check registry connectivity and push permissions to {error_context['registry_url']}."
|
||||
elif "registry" in error_msg_lower:
|
||||
error_context["category"] = "registry_error"
|
||||
error_context["solution"] = "Check registry configuration and accessibility."
|
||||
elif "prefect" in error_msg_lower:
|
||||
error_context["category"] = "prefect_error"
|
||||
error_context["solution"] = "Check Prefect server connectivity and deployment configuration."
|
||||
else:
|
||||
error_context["category"] = "unknown_deployment_error"
|
||||
error_context["solution"] = "Check logs for more specific error details."
|
||||
|
||||
logger.error(f"Deployment error context: {error_context}")
|
||||
|
||||
# Raise enhanced exception with context
|
||||
enhanced_error = Exception(f"Deployment failed for workflow '{name}': {str(e)} | Context: {error_context}")
|
||||
enhanced_error.original_error = e
|
||||
enhanced_error.context = error_context
|
||||
raise enhanced_error
|
||||
|
||||
async def submit_workflow(
|
||||
self,
|
||||
workflow_name: str,
|
||||
target_path: str,
|
||||
volume_mode: str = "ro",
|
||||
parameters: Dict[str, Any] = None,
|
||||
resource_limits: Dict[str, str] = None,
|
||||
additional_volumes: list = None,
|
||||
timeout: int = None
|
||||
) -> FlowRun:
|
||||
"""
|
||||
Submit a workflow for execution with volume mounting.
|
||||
|
||||
Args:
|
||||
workflow_name: Name of the workflow to execute
|
||||
target_path: Host path to mount as volume
|
||||
volume_mode: Volume mount mode ("ro" for read-only, "rw" for read-write)
|
||||
parameters: Workflow-specific parameters
|
||||
resource_limits: CPU/memory limits for container
|
||||
additional_volumes: List of additional volume mounts
|
||||
timeout: Timeout in seconds
|
||||
|
||||
Returns:
|
||||
FlowRun object with run information
|
||||
|
||||
Raises:
|
||||
ValueError: If workflow not found or volume mode not supported
|
||||
"""
|
||||
if workflow_name not in self.workflows:
|
||||
raise ValueError(f"Unknown workflow: {workflow_name}")
|
||||
|
||||
# Validate volume mode
|
||||
workflow_info = self.workflows[workflow_name]
|
||||
supported_modes = workflow_info.metadata.get("supported_volume_modes", ["ro", "rw"])
|
||||
|
||||
if volume_mode not in supported_modes:
|
||||
raise ValueError(
|
||||
f"Workflow '{workflow_name}' doesn't support volume mode '{volume_mode}'. "
|
||||
f"Supported modes: {supported_modes}"
|
||||
)
|
||||
|
||||
# Validate target path with security checks
|
||||
self._validate_target_path(target_path)
|
||||
|
||||
# Validate additional volumes if provided
|
||||
if additional_volumes:
|
||||
for volume in additional_volumes:
|
||||
self._validate_target_path(volume.host_path)
|
||||
|
||||
async with get_client() as client:
|
||||
# Get the deployment, auto-redeploy once if missing
|
||||
try:
|
||||
deployment = await client.read_deployment_by_name(
|
||||
f"{workflow_name}/{workflow_name}-deployment"
|
||||
)
|
||||
except Exception as e:
|
||||
import traceback
|
||||
logger.error(f"Failed to find deployment for workflow '{workflow_name}': {e}")
|
||||
logger.error(f"Deployment lookup traceback: {traceback.format_exc()}")
|
||||
|
||||
# Attempt a one-time auto-deploy to recover from startup races
|
||||
try:
|
||||
logger.info(f"Auto-deploying missing workflow '{workflow_name}' and retrying...")
|
||||
await self._deploy_workflow(workflow_name, workflow_info)
|
||||
deployment = await client.read_deployment_by_name(
|
||||
f"{workflow_name}/{workflow_name}-deployment"
|
||||
)
|
||||
except Exception as redeploy_exc:
|
||||
# Enhanced error with context
|
||||
error_context = {
|
||||
"workflow_name": workflow_name,
|
||||
"error_type": type(e).__name__,
|
||||
"error_message": str(e),
|
||||
"redeploy_error": str(redeploy_exc),
|
||||
"available_deployments": list(self.deployments.keys()),
|
||||
}
|
||||
enhanced_error = ValueError(
|
||||
f"Deployment not found and redeploy failed for workflow '{workflow_name}': {e} | Context: {error_context}"
|
||||
)
|
||||
enhanced_error.context = error_context
|
||||
raise enhanced_error
|
||||
|
||||
# Determine the Docker Compose network name and volume names
|
||||
# Hardcoded to 'fuzzforge' to avoid directory name dependencies
|
||||
import os
|
||||
compose_project = "fuzzforge"
|
||||
docker_network = "fuzzforge_default"
|
||||
|
||||
# Build volume mounts
|
||||
# Add toolbox volume mount for workflow code access
|
||||
backend_toolbox_path = "/app/toolbox" # Path in backend container
|
||||
|
||||
# Hardcoded volume names
|
||||
prefect_storage_volume = "fuzzforge_prefect_storage"
|
||||
toolbox_code_volume = "fuzzforge_toolbox_code"
|
||||
|
||||
volumes = [
|
||||
f"{target_path}:/workspace:{volume_mode}",
|
||||
f"{prefect_storage_volume}:/prefect-storage", # Shared storage for results
|
||||
f"{toolbox_code_volume}:/opt/prefect/toolbox:ro" # Mount workflow code
|
||||
]
|
||||
|
||||
# Add additional volumes if provided
|
||||
if additional_volumes:
|
||||
for volume in additional_volumes:
|
||||
volume_spec = f"{volume.host_path}:{volume.container_path}:{volume.mode}"
|
||||
volumes.append(volume_spec)
|
||||
|
||||
# Build environment variables
|
||||
env_vars = {
|
||||
"PREFECT_API_URL": "http://prefect-server:4200/api", # Use internal network hostname
|
||||
"PREFECT_LOGGING_LEVEL": "INFO",
|
||||
"PREFECT_LOCAL_STORAGE_PATH": "/prefect-storage", # Use shared storage
|
||||
"PREFECT_RESULTS_PERSIST_BY_DEFAULT": "true", # Enable result persistence
|
||||
"PREFECT_DEFAULT_RESULT_STORAGE_BLOCK": "local-file-system/fuzzforge-results", # Use our storage block
|
||||
"WORKSPACE_PATH": "/workspace",
|
||||
"VOLUME_MODE": volume_mode,
|
||||
"WORKFLOW_NAME": workflow_name
|
||||
}
|
||||
|
||||
# Add additional volume paths to environment for easy access
|
||||
if additional_volumes:
|
||||
for i, volume in enumerate(additional_volumes):
|
||||
env_vars[f"ADDITIONAL_VOLUME_{i}_PATH"] = volume.container_path
|
||||
|
||||
# Determine which image to use based on workflow configuration
|
||||
workflow_info = self.workflows[workflow_name]
|
||||
has_custom_dockerfile = workflow_info.has_docker and workflow_info.dockerfile.exists()
|
||||
# Use pull context for worker to pull from registry
|
||||
registry_url = get_registry_url(context="pull")
|
||||
workflow_image = f"{registry_url}/fuzzforge/{workflow_name}:latest" if has_custom_dockerfile else "prefecthq/prefect:3-python3.11"
|
||||
logger.debug(f"Worker will pull image: {workflow_image} (Registry: {registry_url})")
|
||||
|
||||
# Configure job variables with volume mounting and network access
|
||||
job_variables = {
|
||||
# Use custom image if available, otherwise base Prefect image
|
||||
"image": workflow_image,
|
||||
"volumes": volumes,
|
||||
"networks": [docker_network], # Connect to Docker Compose network
|
||||
"env": {
|
||||
**env_vars,
|
||||
"PYTHONPATH": "/opt/prefect/toolbox:/opt/prefect/toolbox/workflows",
|
||||
"WORKFLOW_NAME": workflow_name
|
||||
}
|
||||
}
|
||||
|
||||
# Apply resource requirements from workflow metadata and user overrides
|
||||
workflow_resource_requirements = self._extract_resource_requirements(workflow_info)
|
||||
final_resource_config = {}
|
||||
|
||||
# Start with workflow requirements as base
|
||||
if workflow_resource_requirements:
|
||||
final_resource_config.update(workflow_resource_requirements)
|
||||
|
||||
# Apply user-provided resource limits (overrides workflow defaults)
|
||||
if resource_limits:
|
||||
user_resource_config = {}
|
||||
if resource_limits.get("cpu_limit"):
|
||||
user_resource_config["cpus"] = resource_limits["cpu_limit"]
|
||||
if resource_limits.get("memory_limit"):
|
||||
user_resource_config["memory"] = resource_limits["memory_limit"]
|
||||
# Note: cpu_request and memory_request are not directly supported by Docker
|
||||
# but could be used for Kubernetes in the future
|
||||
|
||||
# User overrides take precedence
|
||||
final_resource_config.update(user_resource_config)
|
||||
|
||||
# Apply final resource configuration
|
||||
if final_resource_config:
|
||||
job_variables["resources"] = final_resource_config
|
||||
logger.info(f"Applied resource limits: {final_resource_config}")
|
||||
|
||||
# Merge parameters with defaults from metadata
|
||||
default_params = workflow_info.metadata.get("default_parameters", {})
|
||||
final_params = {**default_params, **(parameters or {})}
|
||||
|
||||
# Set flow parameters that match the flow signature
|
||||
final_params["target_path"] = "/workspace" # Container path where volume is mounted
|
||||
final_params["volume_mode"] = volume_mode
|
||||
|
||||
# Create and submit the flow run
|
||||
# Pass job_variables to ensure network, volumes, and environment are configured
|
||||
logger.info(f"Submitting flow with job_variables: {job_variables}")
|
||||
logger.info(f"Submitting flow with parameters: {final_params}")
|
||||
|
||||
# Prepare flow run creation parameters
|
||||
flow_run_params = {
|
||||
"deployment_id": deployment.id,
|
||||
"parameters": final_params,
|
||||
"job_variables": job_variables
|
||||
}
|
||||
|
||||
# Note: Timeout is handled through workflow-level configuration
|
||||
# Additional timeout configuration can be added to deployment metadata if needed
|
||||
|
||||
flow_run = await client.create_flow_run_from_deployment(**flow_run_params)
|
||||
|
||||
logger.info(
|
||||
f"Submitted workflow '{workflow_name}' with run_id: {flow_run.id}, "
|
||||
f"target: {target_path}, mode: {volume_mode}"
|
||||
)
|
||||
|
||||
return flow_run
|
||||
|
||||
async def get_flow_run_findings(self, run_id: str) -> Dict[str, Any]:
|
||||
"""
|
||||
Retrieve findings from a completed flow run.
|
||||
|
||||
Args:
|
||||
run_id: The flow run ID
|
||||
|
||||
Returns:
|
||||
Dictionary containing SARIF-formatted findings
|
||||
|
||||
Raises:
|
||||
ValueError: If run not completed or not found
|
||||
"""
|
||||
async with get_client() as client:
|
||||
flow_run = await client.read_flow_run(run_id)
|
||||
|
||||
if not flow_run.state.is_completed():
|
||||
raise ValueError(
|
||||
f"Flow run {run_id} not completed. Current status: {flow_run.state.name}"
|
||||
)
|
||||
|
||||
# Get the findings from the flow run result
|
||||
try:
|
||||
findings = await flow_run.state.result()
|
||||
return findings
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to retrieve findings for run {run_id}: {e}")
|
||||
raise ValueError(f"Failed to retrieve findings: {e}")
|
||||
|
||||
async def get_flow_run_status(self, run_id: str) -> Dict[str, Any]:
|
||||
"""
|
||||
Get the current status of a flow run.
|
||||
|
||||
Args:
|
||||
run_id: The flow run ID
|
||||
|
||||
Returns:
|
||||
Dictionary with status information
|
||||
"""
|
||||
async with get_client() as client:
|
||||
flow_run = await client.read_flow_run(run_id)
|
||||
|
||||
return {
|
||||
"run_id": str(flow_run.id),
|
||||
"workflow": flow_run.deployment_id,
|
||||
"status": flow_run.state.name,
|
||||
"is_completed": flow_run.state.is_completed(),
|
||||
"is_failed": flow_run.state.is_failed(),
|
||||
"is_running": flow_run.state.is_running(),
|
||||
"created_at": flow_run.created,
|
||||
"updated_at": flow_run.updated
|
||||
}
|
||||
|
||||
def _validate_target_path(self, target_path: str) -> None:
|
||||
"""
|
||||
Validate target path for security before mounting as volume.
|
||||
|
||||
Args:
|
||||
target_path: Host path to validate
|
||||
|
||||
Raises:
|
||||
ValueError: If path is not allowed for security reasons
|
||||
"""
|
||||
target = Path(target_path)
|
||||
|
||||
# Path must be absolute
|
||||
if not target.is_absolute():
|
||||
raise ValueError(f"Target path must be absolute: {target_path}")
|
||||
|
||||
# Resolve path to handle symlinks and relative components
|
||||
try:
|
||||
resolved_path = target.resolve()
|
||||
except (OSError, RuntimeError) as e:
|
||||
raise ValueError(f"Cannot resolve target path: {target_path} - {e}")
|
||||
|
||||
resolved_str = str(resolved_path)
|
||||
|
||||
# Check against forbidden paths first (more restrictive)
|
||||
for forbidden in self.forbidden_paths:
|
||||
if resolved_str.startswith(forbidden):
|
||||
raise ValueError(
|
||||
f"Access denied: Path '{target_path}' resolves to forbidden directory '{forbidden}'. "
|
||||
f"This path contains sensitive system files and cannot be mounted."
|
||||
)
|
||||
|
||||
# Check if path starts with any allowed base path
|
||||
path_allowed = False
|
||||
for allowed in self.allowed_base_paths:
|
||||
if resolved_str.startswith(allowed):
|
||||
path_allowed = True
|
||||
break
|
||||
|
||||
if not path_allowed:
|
||||
allowed_list = ", ".join(self.allowed_base_paths)
|
||||
raise ValueError(
|
||||
f"Access denied: Path '{target_path}' is not in allowed directories. "
|
||||
f"Allowed base paths: {allowed_list}"
|
||||
)
|
||||
|
||||
# Additional security checks
|
||||
if resolved_str == "/":
|
||||
raise ValueError("Cannot mount root filesystem")
|
||||
|
||||
# Warn if path doesn't exist (but don't block - it might be created later)
|
||||
if not resolved_path.exists():
|
||||
logger.warning(f"Target path does not exist: {target_path}")
|
||||
|
||||
logger.info(f"Path validation passed for: {target_path} -> {resolved_str}")
|
||||
@@ -1,5 +1,5 @@
|
||||
"""
|
||||
Setup utilities for FuzzForge infrastructure
|
||||
Setup utilities for Prefect infrastructure
|
||||
"""
|
||||
|
||||
# Copyright (c) 2025 FuzzingLabs
|
||||
@@ -14,21 +14,364 @@ Setup utilities for FuzzForge infrastructure
|
||||
# Additional attribution and requirements are provided in the NOTICE file.
|
||||
|
||||
import logging
|
||||
from prefect import get_client
|
||||
from prefect.client.schemas.actions import WorkPoolCreate
|
||||
from prefect.client.schemas.objects import WorkPool
|
||||
from .prefect_manager import get_registry_url
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
async def setup_docker_pool():
|
||||
"""
|
||||
Create or update the Docker work pool for container execution.
|
||||
|
||||
This work pool is configured to:
|
||||
- Connect to the local Docker daemon
|
||||
- Support volume mounting at runtime
|
||||
- Clean up containers after execution
|
||||
- Use bridge networking by default
|
||||
"""
|
||||
import os
|
||||
|
||||
async with get_client() as client:
|
||||
pool_name = "docker-pool"
|
||||
|
||||
# Add force recreation flag for debugging fresh install issues
|
||||
force_recreate = os.getenv('FORCE_RECREATE_WORK_POOL', 'false').lower() == 'true'
|
||||
debug_setup = os.getenv('DEBUG_WORK_POOL_SETUP', 'false').lower() == 'true'
|
||||
|
||||
if force_recreate:
|
||||
logger.warning(f"FORCE_RECREATE_WORK_POOL=true - Will recreate work pool regardless of existing configuration")
|
||||
if debug_setup:
|
||||
logger.warning(f"DEBUG_WORK_POOL_SETUP=true - Enhanced logging enabled")
|
||||
# Temporarily set logging level to DEBUG for this function
|
||||
original_level = logger.level
|
||||
logger.setLevel(logging.DEBUG)
|
||||
|
||||
try:
|
||||
# Check if pool already exists and supports custom images
|
||||
existing_pools = await client.read_work_pools()
|
||||
existing_pool = None
|
||||
for pool in existing_pools:
|
||||
if pool.name == pool_name:
|
||||
existing_pool = pool
|
||||
break
|
||||
|
||||
if existing_pool and not force_recreate:
|
||||
logger.info(f"Found existing work pool '{pool_name}' - validating configuration...")
|
||||
|
||||
# Check if the existing pool has the correct configuration
|
||||
base_template = existing_pool.base_job_template or {}
|
||||
logger.debug(f"Base template keys: {list(base_template.keys())}")
|
||||
|
||||
job_config = base_template.get("job_configuration", {})
|
||||
logger.debug(f"Job config keys: {list(job_config.keys())}")
|
||||
|
||||
image_config = job_config.get("image", "")
|
||||
has_image_variable = "{{ image }}" in str(image_config)
|
||||
logger.debug(f"Image config: '{image_config}' -> has_image_variable: {has_image_variable}")
|
||||
|
||||
# Check if volume defaults include toolbox mount
|
||||
variables = base_template.get("variables", {})
|
||||
properties = variables.get("properties", {})
|
||||
volume_config = properties.get("volumes", {})
|
||||
volume_defaults = volume_config.get("default", [])
|
||||
has_toolbox_volume = any("toolbox_code" in str(vol) for vol in volume_defaults) if volume_defaults else False
|
||||
logger.debug(f"Volume defaults: {volume_defaults}")
|
||||
logger.debug(f"Has toolbox volume: {has_toolbox_volume}")
|
||||
|
||||
# Check if environment defaults include required settings
|
||||
env_config = properties.get("env", {})
|
||||
env_defaults = env_config.get("default", {})
|
||||
has_api_url = "PREFECT_API_URL" in env_defaults
|
||||
has_storage_path = "PREFECT_LOCAL_STORAGE_PATH" in env_defaults
|
||||
has_results_persist = "PREFECT_RESULTS_PERSIST_BY_DEFAULT" in env_defaults
|
||||
has_required_env = has_api_url and has_storage_path and has_results_persist
|
||||
logger.debug(f"Environment defaults: {env_defaults}")
|
||||
logger.debug(f"Has API URL: {has_api_url}, Has storage path: {has_storage_path}, Has results persist: {has_results_persist}")
|
||||
logger.debug(f"Has required env: {has_required_env}")
|
||||
|
||||
# Log the full validation result
|
||||
logger.info(f"Work pool validation - Image: {has_image_variable}, Toolbox: {has_toolbox_volume}, Environment: {has_required_env}")
|
||||
|
||||
if has_image_variable and has_toolbox_volume and has_required_env:
|
||||
logger.info(f"Docker work pool '{pool_name}' already exists with correct configuration")
|
||||
return
|
||||
else:
|
||||
reasons = []
|
||||
if not has_image_variable:
|
||||
reasons.append("missing image template")
|
||||
if not has_toolbox_volume:
|
||||
reasons.append("missing toolbox volume mount")
|
||||
if not has_required_env:
|
||||
if not has_api_url:
|
||||
reasons.append("missing PREFECT_API_URL")
|
||||
if not has_storage_path:
|
||||
reasons.append("missing PREFECT_LOCAL_STORAGE_PATH")
|
||||
if not has_results_persist:
|
||||
reasons.append("missing PREFECT_RESULTS_PERSIST_BY_DEFAULT")
|
||||
|
||||
logger.warning(f"Docker work pool '{pool_name}' exists but lacks: {', '.join(reasons)}. Recreating...")
|
||||
# Delete the old pool and recreate it
|
||||
try:
|
||||
await client.delete_work_pool(pool_name)
|
||||
logger.info(f"Deleted old work pool '{pool_name}'")
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to delete old work pool: {e}")
|
||||
elif force_recreate and existing_pool:
|
||||
logger.warning(f"Force recreation enabled - deleting existing work pool '{pool_name}'")
|
||||
try:
|
||||
await client.delete_work_pool(pool_name)
|
||||
logger.info(f"Deleted existing work pool for force recreation")
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to delete work pool for force recreation: {e}")
|
||||
|
||||
logger.info(f"Creating Docker work pool '{pool_name}' with custom image support...")
|
||||
|
||||
# Create the work pool with proper Docker configuration
|
||||
work_pool = WorkPoolCreate(
|
||||
name=pool_name,
|
||||
type="docker",
|
||||
description="Docker work pool for FuzzForge workflows with custom image support",
|
||||
base_job_template={
|
||||
"job_configuration": {
|
||||
"image": "{{ image }}", # Template variable for custom images
|
||||
"volumes": "{{ volumes }}", # List of volume mounts
|
||||
"env": "{{ env }}", # Environment variables
|
||||
"networks": "{{ networks }}", # Docker networks
|
||||
"stream_output": True,
|
||||
"auto_remove": True,
|
||||
"privileged": False,
|
||||
"network_mode": None, # Use networks instead
|
||||
"labels": {},
|
||||
"command": None # Let the image's CMD/ENTRYPOINT run
|
||||
},
|
||||
"variables": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"image": {
|
||||
"type": "string",
|
||||
"title": "Docker Image",
|
||||
"default": "prefecthq/prefect:3-python3.11",
|
||||
"description": "Docker image for the flow run"
|
||||
},
|
||||
"volumes": {
|
||||
"type": "array",
|
||||
"title": "Volume Mounts",
|
||||
"default": [
|
||||
"fuzzforge_prefect_storage:/prefect-storage",
|
||||
"fuzzforge_toolbox_code:/opt/prefect/toolbox:ro"
|
||||
],
|
||||
"description": "Volume mounts in format 'host:container:mode'",
|
||||
"items": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"networks": {
|
||||
"type": "array",
|
||||
"title": "Docker Networks",
|
||||
"default": ["fuzzforge_default"],
|
||||
"description": "Docker networks to connect container to",
|
||||
"items": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"env": {
|
||||
"type": "object",
|
||||
"title": "Environment Variables",
|
||||
"default": {
|
||||
"PREFECT_API_URL": "http://prefect-server:4200/api",
|
||||
"PREFECT_LOCAL_STORAGE_PATH": "/prefect-storage",
|
||||
"PREFECT_RESULTS_PERSIST_BY_DEFAULT": "true"
|
||||
},
|
||||
"description": "Environment variables for the container",
|
||||
"additionalProperties": {
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
)
|
||||
|
||||
await client.create_work_pool(work_pool)
|
||||
logger.info(f"Created Docker work pool '{pool_name}'")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to setup Docker work pool: {e}")
|
||||
raise
|
||||
finally:
|
||||
# Restore original logging level if debug mode was enabled
|
||||
if debug_setup and 'original_level' in locals():
|
||||
logger.setLevel(original_level)
|
||||
|
||||
|
||||
def get_actual_compose_project_name():
|
||||
"""
|
||||
Return the hardcoded compose project name for FuzzForge.
|
||||
|
||||
Always returns 'fuzzforge' as per system requirements.
|
||||
"""
|
||||
logger.info("Using hardcoded compose project name: fuzzforge")
|
||||
return "fuzzforge"
|
||||
|
||||
|
||||
async def setup_result_storage():
|
||||
"""
|
||||
Setup result storage (MinIO).
|
||||
Create or update Prefect result storage block for findings persistence.
|
||||
|
||||
MinIO is used for both target upload and result storage.
|
||||
This is a placeholder for any MinIO-specific setup if needed.
|
||||
This sets up a LocalFileSystem storage block pointing to the shared
|
||||
/prefect-storage volume for result persistence.
|
||||
"""
|
||||
logger.info("Result storage (MinIO) configured")
|
||||
# MinIO is configured via environment variables in docker-compose
|
||||
# No additional setup needed here
|
||||
return True
|
||||
from prefect.filesystems import LocalFileSystem
|
||||
|
||||
storage_name = "fuzzforge-results"
|
||||
|
||||
try:
|
||||
# Create the storage block, overwrite if it exists
|
||||
logger.info(f"Setting up storage block '{storage_name}'...")
|
||||
storage = LocalFileSystem(basepath="/prefect-storage")
|
||||
|
||||
block_doc_id = await storage.save(name=storage_name, overwrite=True)
|
||||
logger.info(f"Storage block '{storage_name}' configured successfully")
|
||||
return str(block_doc_id)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to setup result storage: {e}")
|
||||
# Don't raise the exception - continue without storage block
|
||||
logger.warning("Continuing without result storage block - findings may not persist")
|
||||
return None
|
||||
|
||||
|
||||
async def validate_docker_connection():
|
||||
"""
|
||||
Validate that Docker is accessible and running.
|
||||
|
||||
Note: In containerized deployments with Docker socket proxy,
|
||||
the backend doesn't need direct Docker access.
|
||||
|
||||
Raises:
|
||||
RuntimeError: If Docker is not accessible
|
||||
"""
|
||||
import os
|
||||
|
||||
# Skip Docker validation if running in container without socket access
|
||||
if os.path.exists("/.dockerenv") and not os.path.exists("/var/run/docker.sock"):
|
||||
logger.info("Running in container without Docker socket - skipping Docker validation")
|
||||
return
|
||||
|
||||
try:
|
||||
import docker
|
||||
client = docker.from_env()
|
||||
client.ping()
|
||||
logger.info("Docker connection validated")
|
||||
except Exception as e:
|
||||
logger.error(f"Docker is not accessible: {e}")
|
||||
raise RuntimeError(
|
||||
"Docker is not running or not accessible. "
|
||||
"Please ensure Docker is installed and running."
|
||||
)
|
||||
|
||||
|
||||
async def validate_registry_connectivity(registry_url: str = None):
|
||||
"""
|
||||
Validate that the Docker registry is accessible.
|
||||
|
||||
Args:
|
||||
registry_url: URL of the Docker registry to validate (auto-detected if None)
|
||||
|
||||
Raises:
|
||||
RuntimeError: If registry is not accessible
|
||||
"""
|
||||
# Resolve a reachable test URL from within this process
|
||||
if registry_url is None:
|
||||
# If not specified, prefer internal service name in containers, host port on host
|
||||
import os
|
||||
if os.path.exists('/.dockerenv'):
|
||||
registry_url = "registry:5000"
|
||||
else:
|
||||
registry_url = "localhost:5001"
|
||||
|
||||
# If we're running inside a container and asked to probe localhost:PORT,
|
||||
# the probe would hit the container, not the host. Use host.docker.internal instead.
|
||||
import os
|
||||
try:
|
||||
host_part, port_part = registry_url.split(":", 1)
|
||||
except ValueError:
|
||||
host_part, port_part = registry_url, "80"
|
||||
|
||||
if os.path.exists('/.dockerenv') and host_part in ("localhost", "127.0.0.1"):
|
||||
test_host = "host.docker.internal"
|
||||
else:
|
||||
test_host = host_part
|
||||
test_url = f"http://{test_host}:{port_part}/v2/"
|
||||
|
||||
import aiohttp
|
||||
import asyncio
|
||||
|
||||
logger.info(f"Validating registry connectivity to {registry_url}...")
|
||||
|
||||
try:
|
||||
async with aiohttp.ClientSession(timeout=aiohttp.ClientTimeout(total=10)) as session:
|
||||
async with session.get(test_url) as response:
|
||||
if response.status == 200:
|
||||
logger.info(f"Registry at {registry_url} is accessible (tested via {test_host})")
|
||||
return
|
||||
else:
|
||||
raise RuntimeError(f"Registry returned status {response.status}")
|
||||
except asyncio.TimeoutError:
|
||||
raise RuntimeError(f"Registry at {registry_url} is not responding (timeout)")
|
||||
except aiohttp.ClientError as e:
|
||||
raise RuntimeError(f"Registry at {registry_url} is not accessible: {e}")
|
||||
except Exception as e:
|
||||
raise RuntimeError(f"Failed to validate registry connectivity: {e}")
|
||||
|
||||
|
||||
async def validate_docker_network(network_name: str):
|
||||
"""
|
||||
Validate that the specified Docker network exists.
|
||||
|
||||
Args:
|
||||
network_name: Name of the Docker network to validate
|
||||
|
||||
Raises:
|
||||
RuntimeError: If network doesn't exist
|
||||
"""
|
||||
import os
|
||||
|
||||
# Skip network validation if running in container without Docker socket
|
||||
if os.path.exists("/.dockerenv") and not os.path.exists("/var/run/docker.sock"):
|
||||
logger.info("Running in container without Docker socket - skipping network validation")
|
||||
return
|
||||
|
||||
try:
|
||||
import docker
|
||||
client = docker.from_env()
|
||||
|
||||
# List all networks
|
||||
networks = client.networks.list(names=[network_name])
|
||||
|
||||
if not networks:
|
||||
# Try to find networks with similar names
|
||||
all_networks = client.networks.list()
|
||||
similar_networks = [n.name for n in all_networks if "fuzzforge" in n.name.lower()]
|
||||
|
||||
error_msg = f"Docker network '{network_name}' not found."
|
||||
if similar_networks:
|
||||
error_msg += f" Available networks: {similar_networks}"
|
||||
else:
|
||||
error_msg += " Please ensure Docker Compose is running."
|
||||
|
||||
raise RuntimeError(error_msg)
|
||||
|
||||
logger.info(f"Docker network '{network_name}' validated")
|
||||
|
||||
except Exception as e:
|
||||
if isinstance(e, RuntimeError):
|
||||
raise
|
||||
logger.error(f"Network validation failed: {e}")
|
||||
raise RuntimeError(f"Failed to validate Docker network: {e}")
|
||||
|
||||
|
||||
async def validate_infrastructure():
|
||||
@@ -39,7 +382,21 @@ async def validate_infrastructure():
|
||||
"""
|
||||
logger.info("Validating infrastructure...")
|
||||
|
||||
# Setup storage (MinIO)
|
||||
await setup_result_storage()
|
||||
# Validate Docker connection
|
||||
await validate_docker_connection()
|
||||
|
||||
# Validate registry connectivity for custom image building
|
||||
await validate_registry_connectivity()
|
||||
|
||||
# Validate network (hardcoded to avoid directory name dependencies)
|
||||
import os
|
||||
compose_project = "fuzzforge"
|
||||
docker_network = "fuzzforge_default"
|
||||
|
||||
try:
|
||||
await validate_docker_network(docker_network)
|
||||
except RuntimeError as e:
|
||||
logger.warning(f"Network validation failed: {e}")
|
||||
logger.warning("Workflows may not be able to connect to Prefect services")
|
||||
|
||||
logger.info("Infrastructure validation completed")
|
||||
|
||||
459
backend/src/core/workflow_discovery.py
Normal file
459
backend/src/core/workflow_discovery.py
Normal file
@@ -0,0 +1,459 @@
|
||||
"""
|
||||
Workflow Discovery - Registry-based discovery and loading of workflows
|
||||
"""
|
||||
|
||||
# Copyright (c) 2025 FuzzingLabs
|
||||
#
|
||||
# Licensed under the Business Source License 1.1 (BSL). See the LICENSE file
|
||||
# at the root of this repository for details.
|
||||
#
|
||||
# After the Change Date (four years from publication), this version of the
|
||||
# Licensed Work will be made available under the Apache License, Version 2.0.
|
||||
# See the LICENSE-APACHE file or http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Additional attribution and requirements are provided in the NOTICE file.
|
||||
|
||||
import logging
|
||||
import yaml
|
||||
from pathlib import Path
|
||||
from typing import Dict, Optional, Any, Callable
|
||||
from pydantic import BaseModel, Field, ConfigDict
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class WorkflowInfo(BaseModel):
|
||||
"""Information about a discovered workflow"""
|
||||
name: str = Field(..., description="Workflow name")
|
||||
path: Path = Field(..., description="Path to workflow directory")
|
||||
workflow_file: Path = Field(..., description="Path to workflow.py file")
|
||||
dockerfile: Path = Field(..., description="Path to Dockerfile")
|
||||
has_docker: bool = Field(..., description="Whether workflow has custom Dockerfile")
|
||||
metadata: Dict[str, Any] = Field(..., description="Workflow metadata from YAML")
|
||||
flow_function_name: str = Field(default="main_flow", description="Name of the flow function")
|
||||
|
||||
model_config = ConfigDict(arbitrary_types_allowed=True)
|
||||
|
||||
|
||||
class WorkflowDiscovery:
|
||||
"""
|
||||
Discovers workflows from the filesystem and validates them against the registry.
|
||||
|
||||
This system:
|
||||
1. Scans for workflows with metadata.yaml files
|
||||
2. Cross-references them with the manual registry
|
||||
3. Provides registry-based flow functions for deployment
|
||||
|
||||
Workflows must have:
|
||||
- workflow.py: Contains the Prefect flow
|
||||
- metadata.yaml: Mandatory metadata file
|
||||
- Entry in toolbox/workflows/registry.py: Manual registration
|
||||
- Dockerfile (optional): Custom container definition
|
||||
- requirements.txt (optional): Python dependencies
|
||||
"""
|
||||
|
||||
def __init__(self, workflows_dir: Path):
|
||||
"""
|
||||
Initialize workflow discovery.
|
||||
|
||||
Args:
|
||||
workflows_dir: Path to the workflows directory
|
||||
"""
|
||||
self.workflows_dir = workflows_dir
|
||||
if not self.workflows_dir.exists():
|
||||
self.workflows_dir.mkdir(parents=True, exist_ok=True)
|
||||
logger.info(f"Created workflows directory: {self.workflows_dir}")
|
||||
|
||||
# Import registry - this validates it on import
|
||||
try:
|
||||
from toolbox.workflows.registry import WORKFLOW_REGISTRY, list_registered_workflows
|
||||
self.registry = WORKFLOW_REGISTRY
|
||||
logger.info(f"Loaded workflow registry with {len(self.registry)} registered workflows")
|
||||
except ImportError as e:
|
||||
logger.error(f"Failed to import workflow registry: {e}")
|
||||
self.registry = {}
|
||||
except Exception as e:
|
||||
logger.error(f"Registry validation failed: {e}")
|
||||
self.registry = {}
|
||||
|
||||
# Cache for discovered workflows
|
||||
self._workflow_cache: Optional[Dict[str, WorkflowInfo]] = None
|
||||
self._cache_timestamp: Optional[float] = None
|
||||
self._cache_ttl = 60.0 # Cache TTL in seconds
|
||||
|
||||
async def discover_workflows(self) -> Dict[str, WorkflowInfo]:
|
||||
"""
|
||||
Discover workflows by cross-referencing filesystem with registry.
|
||||
Uses caching to avoid frequent filesystem scans.
|
||||
|
||||
Returns:
|
||||
Dictionary mapping workflow names to their information
|
||||
"""
|
||||
# Check cache validity
|
||||
import time
|
||||
current_time = time.time()
|
||||
|
||||
if (self._workflow_cache is not None and
|
||||
self._cache_timestamp is not None and
|
||||
(current_time - self._cache_timestamp) < self._cache_ttl):
|
||||
# Return cached results
|
||||
logger.debug(f"Returning cached workflow discovery ({len(self._workflow_cache)} workflows)")
|
||||
return self._workflow_cache
|
||||
workflows = {}
|
||||
discovered_dirs = set()
|
||||
registry_names = set(self.registry.keys())
|
||||
|
||||
if not self.workflows_dir.exists():
|
||||
logger.warning(f"Workflows directory does not exist: {self.workflows_dir}")
|
||||
return workflows
|
||||
|
||||
# Recursively scan all directories and subdirectories
|
||||
await self._scan_directory_recursive(self.workflows_dir, workflows, discovered_dirs)
|
||||
|
||||
# Check for registry entries without corresponding directories
|
||||
missing_dirs = registry_names - discovered_dirs
|
||||
if missing_dirs:
|
||||
logger.warning(
|
||||
f"Registry contains workflows without filesystem directories: {missing_dirs}. "
|
||||
f"These workflows cannot be deployed."
|
||||
)
|
||||
|
||||
logger.info(
|
||||
f"Discovery complete: {len(workflows)} workflows ready for deployment, "
|
||||
f"{len(missing_dirs)} registry entries missing directories, "
|
||||
f"{len(discovered_dirs - registry_names)} filesystem workflows not registered"
|
||||
)
|
||||
|
||||
# Update cache
|
||||
self._workflow_cache = workflows
|
||||
self._cache_timestamp = current_time
|
||||
|
||||
return workflows
|
||||
|
||||
async def _scan_directory_recursive(self, directory: Path, workflows: Dict[str, WorkflowInfo], discovered_dirs: set):
|
||||
"""
|
||||
Recursively scan directory for workflows.
|
||||
|
||||
Args:
|
||||
directory: Directory to scan
|
||||
workflows: Dictionary to populate with discovered workflows
|
||||
discovered_dirs: Set to track discovered workflow names
|
||||
"""
|
||||
for item in directory.iterdir():
|
||||
if not item.is_dir():
|
||||
continue
|
||||
|
||||
if item.name.startswith('_') or item.name.startswith('.'):
|
||||
continue # Skip hidden or private directories
|
||||
|
||||
# Check if this directory contains workflow files (workflow.py and metadata.yaml)
|
||||
workflow_file = item / "workflow.py"
|
||||
metadata_file = item / "metadata.yaml"
|
||||
|
||||
if workflow_file.exists() and metadata_file.exists():
|
||||
# This is a workflow directory
|
||||
workflow_name = item.name
|
||||
discovered_dirs.add(workflow_name)
|
||||
|
||||
# Only process workflows that are in the registry
|
||||
if workflow_name not in self.registry:
|
||||
logger.warning(
|
||||
f"Workflow '{workflow_name}' found in filesystem but not in registry. "
|
||||
f"Add it to toolbox/workflows/registry.py to enable deployment."
|
||||
)
|
||||
continue
|
||||
|
||||
try:
|
||||
workflow_info = await self._load_workflow(item)
|
||||
if workflow_info:
|
||||
workflows[workflow_info.name] = workflow_info
|
||||
logger.info(f"Discovered and registered workflow: {workflow_info.name}")
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to load workflow from {item}: {e}")
|
||||
else:
|
||||
# This is a category directory, recurse into it
|
||||
await self._scan_directory_recursive(item, workflows, discovered_dirs)
|
||||
|
||||
async def _load_workflow(self, workflow_dir: Path) -> Optional[WorkflowInfo]:
|
||||
"""
|
||||
Load and validate a single workflow.
|
||||
|
||||
Args:
|
||||
workflow_dir: Path to the workflow directory
|
||||
|
||||
Returns:
|
||||
WorkflowInfo if valid, None otherwise
|
||||
"""
|
||||
workflow_name = workflow_dir.name
|
||||
|
||||
# Check for mandatory files
|
||||
workflow_file = workflow_dir / "workflow.py"
|
||||
metadata_file = workflow_dir / "metadata.yaml"
|
||||
|
||||
if not workflow_file.exists():
|
||||
logger.warning(f"Workflow {workflow_name} missing workflow.py")
|
||||
return None
|
||||
|
||||
if not metadata_file.exists():
|
||||
logger.error(f"Workflow {workflow_name} missing mandatory metadata.yaml")
|
||||
return None
|
||||
|
||||
# Load and validate metadata
|
||||
try:
|
||||
metadata = self._load_metadata(metadata_file)
|
||||
if not self._validate_metadata(metadata, workflow_name):
|
||||
return None
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to load metadata for {workflow_name}: {e}")
|
||||
return None
|
||||
|
||||
# Check for mandatory Dockerfile
|
||||
dockerfile = workflow_dir / "Dockerfile"
|
||||
if not dockerfile.exists():
|
||||
logger.error(f"Workflow {workflow_name} missing mandatory Dockerfile")
|
||||
return None
|
||||
|
||||
has_docker = True # Always True since Dockerfile is mandatory
|
||||
|
||||
# Get flow function name from metadata or use default
|
||||
flow_function_name = metadata.get("flow_function", "main_flow")
|
||||
|
||||
return WorkflowInfo(
|
||||
name=workflow_name,
|
||||
path=workflow_dir,
|
||||
workflow_file=workflow_file,
|
||||
dockerfile=dockerfile,
|
||||
has_docker=has_docker,
|
||||
metadata=metadata,
|
||||
flow_function_name=flow_function_name
|
||||
)
|
||||
|
||||
def _load_metadata(self, metadata_file: Path) -> Dict[str, Any]:
|
||||
"""
|
||||
Load metadata from YAML file.
|
||||
|
||||
Args:
|
||||
metadata_file: Path to metadata.yaml
|
||||
|
||||
Returns:
|
||||
Dictionary containing metadata
|
||||
"""
|
||||
with open(metadata_file, 'r') as f:
|
||||
metadata = yaml.safe_load(f)
|
||||
|
||||
if metadata is None:
|
||||
raise ValueError("Empty metadata file")
|
||||
|
||||
return metadata
|
||||
|
||||
def _validate_metadata(self, metadata: Dict[str, Any], workflow_name: str) -> bool:
|
||||
"""
|
||||
Validate that metadata contains all required fields.
|
||||
|
||||
Args:
|
||||
metadata: Metadata dictionary
|
||||
workflow_name: Name of the workflow for logging
|
||||
|
||||
Returns:
|
||||
True if valid, False otherwise
|
||||
"""
|
||||
required_fields = ["name", "version", "description", "author", "category", "parameters", "requirements"]
|
||||
|
||||
missing_fields = []
|
||||
for field in required_fields:
|
||||
if field not in metadata:
|
||||
missing_fields.append(field)
|
||||
|
||||
if missing_fields:
|
||||
logger.error(
|
||||
f"Workflow {workflow_name} metadata missing required fields: {missing_fields}"
|
||||
)
|
||||
return False
|
||||
|
||||
# Validate version format (semantic versioning)
|
||||
version = metadata.get("version", "")
|
||||
if not self._is_valid_version(version):
|
||||
logger.error(f"Workflow {workflow_name} has invalid version format: {version}")
|
||||
return False
|
||||
|
||||
# Validate parameters structure
|
||||
parameters = metadata.get("parameters", {})
|
||||
if not isinstance(parameters, dict):
|
||||
logger.error(f"Workflow {workflow_name} parameters must be a dictionary")
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
def _is_valid_version(self, version: str) -> bool:
|
||||
"""
|
||||
Check if version follows semantic versioning (x.y.z).
|
||||
|
||||
Args:
|
||||
version: Version string
|
||||
|
||||
Returns:
|
||||
True if valid semantic version
|
||||
"""
|
||||
try:
|
||||
parts = version.split('.')
|
||||
if len(parts) != 3:
|
||||
return False
|
||||
for part in parts:
|
||||
int(part) # Check if each part is a number
|
||||
return True
|
||||
except (ValueError, AttributeError):
|
||||
return False
|
||||
|
||||
def invalidate_cache(self) -> None:
|
||||
"""
|
||||
Invalidate the workflow discovery cache.
|
||||
Useful when workflows are added or modified.
|
||||
"""
|
||||
self._workflow_cache = None
|
||||
self._cache_timestamp = None
|
||||
logger.debug("Workflow discovery cache invalidated")
|
||||
|
||||
def get_flow_function(self, workflow_name: str) -> Optional[Callable]:
|
||||
"""
|
||||
Get the flow function from the registry.
|
||||
|
||||
Args:
|
||||
workflow_name: Name of the workflow
|
||||
|
||||
Returns:
|
||||
The flow function if found in registry, None otherwise
|
||||
"""
|
||||
if workflow_name not in self.registry:
|
||||
logger.error(
|
||||
f"Workflow '{workflow_name}' not found in registry. "
|
||||
f"Available workflows: {list(self.registry.keys())}"
|
||||
)
|
||||
return None
|
||||
|
||||
try:
|
||||
from toolbox.workflows.registry import get_workflow_flow
|
||||
flow_func = get_workflow_flow(workflow_name)
|
||||
logger.debug(f"Retrieved flow function for '{workflow_name}' from registry")
|
||||
return flow_func
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to get flow function for '{workflow_name}': {e}")
|
||||
return None
|
||||
|
||||
def get_registry_info(self, workflow_name: str) -> Optional[Dict[str, Any]]:
|
||||
"""
|
||||
Get registry information for a workflow.
|
||||
|
||||
Args:
|
||||
workflow_name: Name of the workflow
|
||||
|
||||
Returns:
|
||||
Registry information if found, None otherwise
|
||||
"""
|
||||
if workflow_name not in self.registry:
|
||||
return None
|
||||
|
||||
try:
|
||||
from toolbox.workflows.registry import get_workflow_info
|
||||
return get_workflow_info(workflow_name)
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to get registry info for '{workflow_name}': {e}")
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def get_metadata_schema() -> Dict[str, Any]:
|
||||
"""
|
||||
Get the JSON schema for workflow metadata.
|
||||
|
||||
Returns:
|
||||
JSON schema dictionary
|
||||
"""
|
||||
return {
|
||||
"type": "object",
|
||||
"required": ["name", "version", "description", "author", "category", "parameters", "requirements"],
|
||||
"properties": {
|
||||
"name": {
|
||||
"type": "string",
|
||||
"description": "Workflow name"
|
||||
},
|
||||
"version": {
|
||||
"type": "string",
|
||||
"pattern": "^\\d+\\.\\d+\\.\\d+$",
|
||||
"description": "Semantic version (x.y.z)"
|
||||
},
|
||||
"description": {
|
||||
"type": "string",
|
||||
"description": "Workflow description"
|
||||
},
|
||||
"author": {
|
||||
"type": "string",
|
||||
"description": "Workflow author"
|
||||
},
|
||||
"category": {
|
||||
"type": "string",
|
||||
"enum": ["comprehensive", "specialized", "fuzzing", "focused"],
|
||||
"description": "Workflow category"
|
||||
},
|
||||
"tags": {
|
||||
"type": "array",
|
||||
"items": {"type": "string"},
|
||||
"description": "Workflow tags for categorization"
|
||||
},
|
||||
"requirements": {
|
||||
"type": "object",
|
||||
"required": ["tools", "resources"],
|
||||
"properties": {
|
||||
"tools": {
|
||||
"type": "array",
|
||||
"items": {"type": "string"},
|
||||
"description": "Required security tools"
|
||||
},
|
||||
"resources": {
|
||||
"type": "object",
|
||||
"required": ["memory", "cpu", "timeout"],
|
||||
"properties": {
|
||||
"memory": {
|
||||
"type": "string",
|
||||
"pattern": "^\\d+[GMK]i$",
|
||||
"description": "Memory limit (e.g., 1Gi, 512Mi)"
|
||||
},
|
||||
"cpu": {
|
||||
"type": "string",
|
||||
"pattern": "^\\d+m?$",
|
||||
"description": "CPU limit (e.g., 1000m, 2)"
|
||||
},
|
||||
"timeout": {
|
||||
"type": "integer",
|
||||
"minimum": 60,
|
||||
"maximum": 7200,
|
||||
"description": "Workflow timeout in seconds"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"description": "Workflow parameters schema"
|
||||
},
|
||||
"default_parameters": {
|
||||
"type": "object",
|
||||
"description": "Default parameter values"
|
||||
},
|
||||
"required_modules": {
|
||||
"type": "array",
|
||||
"items": {"type": "string"},
|
||||
"description": "Required module names"
|
||||
},
|
||||
"supported_volume_modes": {
|
||||
"type": "array",
|
||||
"items": {"enum": ["ro", "rw"]},
|
||||
"default": ["ro", "rw"],
|
||||
"description": "Supported volume mount modes"
|
||||
},
|
||||
"flow_function": {
|
||||
"type": "string",
|
||||
"default": "main_flow",
|
||||
"description": "Name of the flow function in workflow.py"
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -12,6 +12,7 @@
|
||||
import asyncio
|
||||
import logging
|
||||
import os
|
||||
from uuid import UUID
|
||||
from contextlib import AsyncExitStack, asynccontextmanager, suppress
|
||||
from typing import Any, Dict, Optional, List
|
||||
|
||||
@@ -22,20 +23,31 @@ from starlette.routing import Mount
|
||||
|
||||
from fastmcp.server.http import create_sse_app
|
||||
|
||||
from src.temporal.manager import TemporalManager
|
||||
from src.core.setup import setup_result_storage, validate_infrastructure
|
||||
from src.api import workflows, runs, fuzzing, system
|
||||
from src.core.prefect_manager import PrefectManager
|
||||
from src.core.setup import setup_docker_pool, setup_result_storage, validate_infrastructure
|
||||
from src.core.workflow_discovery import WorkflowDiscovery
|
||||
from src.api import workflows, runs, fuzzing
|
||||
from src.services.prefect_stats_monitor import prefect_stats_monitor
|
||||
|
||||
from fastmcp import FastMCP
|
||||
from prefect.client.orchestration import get_client
|
||||
from prefect.client.schemas.filters import (
|
||||
FlowRunFilter,
|
||||
FlowRunFilterDeploymentId,
|
||||
FlowRunFilterState,
|
||||
FlowRunFilterStateType,
|
||||
)
|
||||
from prefect.client.schemas.sorting import FlowRunSort
|
||||
from prefect.states import StateType
|
||||
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
temporal_mgr = TemporalManager()
|
||||
prefect_mgr = PrefectManager()
|
||||
|
||||
|
||||
class TemporalBootstrapState:
|
||||
"""Tracks Temporal initialization progress for API and MCP consumers."""
|
||||
class PrefectBootstrapState:
|
||||
"""Tracks Prefect initialization progress for API and MCP consumers."""
|
||||
|
||||
def __init__(self) -> None:
|
||||
self.ready: bool = False
|
||||
@@ -52,19 +64,19 @@ class TemporalBootstrapState:
|
||||
}
|
||||
|
||||
|
||||
temporal_bootstrap_state = TemporalBootstrapState()
|
||||
prefect_bootstrap_state = PrefectBootstrapState()
|
||||
|
||||
# Configure retry strategy for bootstrapping Temporal + infrastructure
|
||||
# Configure retry strategy for bootstrapping Prefect + infrastructure
|
||||
STARTUP_RETRY_SECONDS = max(1, int(os.getenv("FUZZFORGE_STARTUP_RETRY_SECONDS", "5")))
|
||||
STARTUP_RETRY_MAX_SECONDS = max(
|
||||
STARTUP_RETRY_SECONDS,
|
||||
int(os.getenv("FUZZFORGE_STARTUP_RETRY_MAX_SECONDS", "60")),
|
||||
)
|
||||
|
||||
temporal_bootstrap_task: Optional[asyncio.Task] = None
|
||||
prefect_bootstrap_task: Optional[asyncio.Task] = None
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# FastAPI application (REST API)
|
||||
# FastAPI application (REST API remains unchanged)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
app = FastAPI(
|
||||
@@ -76,22 +88,22 @@ app = FastAPI(
|
||||
app.include_router(workflows.router)
|
||||
app.include_router(runs.router)
|
||||
app.include_router(fuzzing.router)
|
||||
app.include_router(system.router)
|
||||
|
||||
|
||||
def get_temporal_status() -> Dict[str, Any]:
|
||||
"""Return a snapshot of Temporal bootstrap state for diagnostics."""
|
||||
status = temporal_bootstrap_state.as_dict()
|
||||
status["workflows_loaded"] = len(temporal_mgr.workflows)
|
||||
def get_prefect_status() -> Dict[str, Any]:
|
||||
"""Return a snapshot of Prefect bootstrap state for diagnostics."""
|
||||
status = prefect_bootstrap_state.as_dict()
|
||||
status["workflows_loaded"] = len(prefect_mgr.workflows)
|
||||
status["deployments_tracked"] = len(prefect_mgr.deployments)
|
||||
status["bootstrap_task_running"] = (
|
||||
temporal_bootstrap_task is not None and not temporal_bootstrap_task.done()
|
||||
prefect_bootstrap_task is not None and not prefect_bootstrap_task.done()
|
||||
)
|
||||
return status
|
||||
|
||||
|
||||
def _temporal_not_ready_status() -> Optional[Dict[str, Any]]:
|
||||
"""Return status details if Temporal is not ready yet."""
|
||||
status = get_temporal_status()
|
||||
def _prefect_not_ready_status() -> Optional[Dict[str, Any]]:
|
||||
"""Return status details if Prefect is not ready yet."""
|
||||
status = get_prefect_status()
|
||||
if status.get("ready"):
|
||||
return None
|
||||
return status
|
||||
@@ -99,19 +111,19 @@ def _temporal_not_ready_status() -> Optional[Dict[str, Any]]:
|
||||
|
||||
@app.get("/")
|
||||
async def root() -> Dict[str, Any]:
|
||||
status = get_temporal_status()
|
||||
status = get_prefect_status()
|
||||
return {
|
||||
"name": "FuzzForge API",
|
||||
"version": "0.6.0",
|
||||
"status": "ready" if status.get("ready") else "initializing",
|
||||
"workflows_loaded": status.get("workflows_loaded", 0),
|
||||
"temporal": status,
|
||||
"prefect": status,
|
||||
}
|
||||
|
||||
|
||||
@app.get("/health")
|
||||
async def health() -> Dict[str, str]:
|
||||
status = get_temporal_status()
|
||||
status = get_prefect_status()
|
||||
health_status = "healthy" if status.get("ready") else "initializing"
|
||||
return {"status": health_status}
|
||||
|
||||
@@ -153,66 +165,78 @@ _fastapi_mcp_imported = False
|
||||
mcp = FastMCP(name="FuzzForge MCP")
|
||||
|
||||
|
||||
async def _bootstrap_temporal_with_retries() -> None:
|
||||
"""Initialize Temporal infrastructure with exponential backoff retries."""
|
||||
async def _bootstrap_prefect_with_retries() -> None:
|
||||
"""Initialize Prefect infrastructure with exponential backoff retries."""
|
||||
|
||||
attempt = 0
|
||||
|
||||
while True:
|
||||
attempt += 1
|
||||
temporal_bootstrap_state.task_running = True
|
||||
temporal_bootstrap_state.status = "starting"
|
||||
temporal_bootstrap_state.ready = False
|
||||
temporal_bootstrap_state.last_error = None
|
||||
prefect_bootstrap_state.task_running = True
|
||||
prefect_bootstrap_state.status = "starting"
|
||||
prefect_bootstrap_state.ready = False
|
||||
prefect_bootstrap_state.last_error = None
|
||||
|
||||
try:
|
||||
logger.info("Bootstrapping Temporal infrastructure...")
|
||||
logger.info("Bootstrapping Prefect infrastructure...")
|
||||
await validate_infrastructure()
|
||||
await setup_docker_pool()
|
||||
await setup_result_storage()
|
||||
await temporal_mgr.initialize()
|
||||
await prefect_mgr.initialize()
|
||||
await prefect_stats_monitor.start_monitoring()
|
||||
|
||||
temporal_bootstrap_state.ready = True
|
||||
temporal_bootstrap_state.status = "ready"
|
||||
temporal_bootstrap_state.task_running = False
|
||||
logger.info("Temporal infrastructure ready")
|
||||
prefect_bootstrap_state.ready = True
|
||||
prefect_bootstrap_state.status = "ready"
|
||||
prefect_bootstrap_state.task_running = False
|
||||
logger.info("Prefect infrastructure ready")
|
||||
return
|
||||
|
||||
except asyncio.CancelledError:
|
||||
temporal_bootstrap_state.status = "cancelled"
|
||||
temporal_bootstrap_state.task_running = False
|
||||
logger.info("Temporal bootstrap task cancelled")
|
||||
prefect_bootstrap_state.status = "cancelled"
|
||||
prefect_bootstrap_state.task_running = False
|
||||
logger.info("Prefect bootstrap task cancelled")
|
||||
raise
|
||||
|
||||
except Exception as exc: # pragma: no cover - defensive logging on infra startup
|
||||
logger.exception("Temporal bootstrap failed")
|
||||
temporal_bootstrap_state.ready = False
|
||||
temporal_bootstrap_state.status = "error"
|
||||
temporal_bootstrap_state.last_error = str(exc)
|
||||
logger.exception("Prefect bootstrap failed")
|
||||
prefect_bootstrap_state.ready = False
|
||||
prefect_bootstrap_state.status = "error"
|
||||
prefect_bootstrap_state.last_error = str(exc)
|
||||
|
||||
# Ensure partial initialization does not leave stale state behind
|
||||
temporal_mgr.workflows.clear()
|
||||
prefect_mgr.workflows.clear()
|
||||
prefect_mgr.deployments.clear()
|
||||
await prefect_stats_monitor.stop_monitoring()
|
||||
|
||||
wait_time = min(
|
||||
STARTUP_RETRY_SECONDS * (2 ** (attempt - 1)),
|
||||
STARTUP_RETRY_MAX_SECONDS,
|
||||
)
|
||||
logger.info("Retrying Temporal bootstrap in %s second(s)", wait_time)
|
||||
logger.info("Retrying Prefect bootstrap in %s second(s)", wait_time)
|
||||
|
||||
try:
|
||||
await asyncio.sleep(wait_time)
|
||||
except asyncio.CancelledError:
|
||||
temporal_bootstrap_state.status = "cancelled"
|
||||
temporal_bootstrap_state.task_running = False
|
||||
prefect_bootstrap_state.status = "cancelled"
|
||||
prefect_bootstrap_state.task_running = False
|
||||
raise
|
||||
|
||||
|
||||
def _lookup_workflow(workflow_name: str):
|
||||
info = temporal_mgr.workflows.get(workflow_name)
|
||||
info = prefect_mgr.workflows.get(workflow_name)
|
||||
if not info:
|
||||
return None
|
||||
metadata = info.metadata
|
||||
defaults = metadata.get("default_parameters", {})
|
||||
default_target_path = metadata.get("default_target_path") or defaults.get("target_path")
|
||||
supported_modes = metadata.get("supported_volume_modes") or ["ro", "rw"]
|
||||
if not isinstance(supported_modes, list) or not supported_modes:
|
||||
supported_modes = ["ro", "rw"]
|
||||
default_volume_mode = (
|
||||
metadata.get("default_volume_mode")
|
||||
or defaults.get("volume_mode")
|
||||
or supported_modes[0]
|
||||
)
|
||||
return {
|
||||
"name": workflow_name,
|
||||
"version": metadata.get("version", "0.6.0"),
|
||||
@@ -222,23 +246,26 @@ def _lookup_workflow(workflow_name: str):
|
||||
"parameters": metadata.get("parameters", {}),
|
||||
"default_parameters": metadata.get("default_parameters", {}),
|
||||
"required_modules": metadata.get("required_modules", []),
|
||||
"default_target_path": default_target_path
|
||||
"supported_volume_modes": supported_modes,
|
||||
"default_target_path": default_target_path,
|
||||
"default_volume_mode": default_volume_mode,
|
||||
"has_custom_docker": bool(info.has_docker),
|
||||
}
|
||||
|
||||
|
||||
@mcp.tool
|
||||
async def list_workflows_mcp() -> Dict[str, Any]:
|
||||
"""List all discovered workflows and their metadata summary."""
|
||||
not_ready = _temporal_not_ready_status()
|
||||
not_ready = _prefect_not_ready_status()
|
||||
if not_ready:
|
||||
return {
|
||||
"workflows": [],
|
||||
"temporal": not_ready,
|
||||
"message": "Temporal infrastructure is still initializing",
|
||||
"prefect": not_ready,
|
||||
"message": "Prefect infrastructure is still initializing",
|
||||
}
|
||||
|
||||
workflows_summary = []
|
||||
for name, info in temporal_mgr.workflows.items():
|
||||
for name, info in prefect_mgr.workflows.items():
|
||||
metadata = info.metadata
|
||||
defaults = metadata.get("default_parameters", {})
|
||||
workflows_summary.append({
|
||||
@@ -247,20 +274,25 @@ async def list_workflows_mcp() -> Dict[str, Any]:
|
||||
"description": metadata.get("description", ""),
|
||||
"author": metadata.get("author"),
|
||||
"tags": metadata.get("tags", []),
|
||||
"supported_volume_modes": metadata.get("supported_volume_modes", ["ro", "rw"]),
|
||||
"default_volume_mode": metadata.get("default_volume_mode")
|
||||
or defaults.get("volume_mode")
|
||||
or "ro",
|
||||
"default_target_path": metadata.get("default_target_path")
|
||||
or defaults.get("target_path")
|
||||
or defaults.get("target_path"),
|
||||
"has_custom_docker": bool(info.has_docker),
|
||||
})
|
||||
return {"workflows": workflows_summary, "temporal": get_temporal_status()}
|
||||
return {"workflows": workflows_summary, "prefect": get_prefect_status()}
|
||||
|
||||
|
||||
@mcp.tool
|
||||
async def get_workflow_metadata_mcp(workflow_name: str) -> Dict[str, Any]:
|
||||
"""Fetch detailed metadata for a workflow."""
|
||||
not_ready = _temporal_not_ready_status()
|
||||
not_ready = _prefect_not_ready_status()
|
||||
if not_ready:
|
||||
return {
|
||||
"error": "Temporal infrastructure not ready",
|
||||
"temporal": not_ready,
|
||||
"error": "Prefect infrastructure not ready",
|
||||
"prefect": not_ready,
|
||||
}
|
||||
|
||||
data = _lookup_workflow(workflow_name)
|
||||
@@ -272,11 +304,11 @@ async def get_workflow_metadata_mcp(workflow_name: str) -> Dict[str, Any]:
|
||||
@mcp.tool
|
||||
async def get_workflow_parameters_mcp(workflow_name: str) -> Dict[str, Any]:
|
||||
"""Return the parameter schema and defaults for a workflow."""
|
||||
not_ready = _temporal_not_ready_status()
|
||||
not_ready = _prefect_not_ready_status()
|
||||
if not_ready:
|
||||
return {
|
||||
"error": "Temporal infrastructure not ready",
|
||||
"temporal": not_ready,
|
||||
"error": "Prefect infrastructure not ready",
|
||||
"prefect": not_ready,
|
||||
}
|
||||
|
||||
data = _lookup_workflow(workflow_name)
|
||||
@@ -291,41 +323,72 @@ async def get_workflow_parameters_mcp(workflow_name: str) -> Dict[str, Any]:
|
||||
@mcp.tool
|
||||
async def get_workflow_metadata_schema_mcp() -> Dict[str, Any]:
|
||||
"""Return the JSON schema describing workflow metadata files."""
|
||||
from src.temporal.discovery import WorkflowDiscovery
|
||||
return WorkflowDiscovery.get_metadata_schema()
|
||||
|
||||
|
||||
@mcp.tool
|
||||
async def submit_security_scan_mcp(
|
||||
workflow_name: str,
|
||||
target_id: str,
|
||||
target_path: str | None = None,
|
||||
volume_mode: str | None = None,
|
||||
parameters: Dict[str, Any] | None = None,
|
||||
) -> Dict[str, Any] | Dict[str, str]:
|
||||
"""Submit a Temporal workflow via MCP."""
|
||||
"""Submit a Prefect workflow via MCP."""
|
||||
try:
|
||||
not_ready = _temporal_not_ready_status()
|
||||
not_ready = _prefect_not_ready_status()
|
||||
if not_ready:
|
||||
return {
|
||||
"error": "Temporal infrastructure not ready",
|
||||
"temporal": not_ready,
|
||||
"error": "Prefect infrastructure not ready",
|
||||
"prefect": not_ready,
|
||||
}
|
||||
|
||||
workflow_info = temporal_mgr.workflows.get(workflow_name)
|
||||
workflow_info = prefect_mgr.workflows.get(workflow_name)
|
||||
if not workflow_info:
|
||||
return {"error": f"Workflow '{workflow_name}' not found"}
|
||||
|
||||
metadata = workflow_info.metadata or {}
|
||||
defaults = metadata.get("default_parameters", {})
|
||||
|
||||
resolved_target_path = target_path or metadata.get("default_target_path") or defaults.get("target_path")
|
||||
if not resolved_target_path:
|
||||
return {
|
||||
"error": (
|
||||
"target_path is required and no default_target_path is defined in metadata"
|
||||
),
|
||||
"metadata": {
|
||||
"workflow": workflow_name,
|
||||
"default_target_path": metadata.get("default_target_path"),
|
||||
},
|
||||
}
|
||||
|
||||
requested_volume_mode = volume_mode or metadata.get("default_volume_mode") or defaults.get("volume_mode")
|
||||
if not requested_volume_mode:
|
||||
requested_volume_mode = "ro"
|
||||
|
||||
normalised_volume_mode = (
|
||||
str(requested_volume_mode).strip().lower().replace("-", "_")
|
||||
)
|
||||
if normalised_volume_mode in {"read_only", "readonly", "ro"}:
|
||||
normalised_volume_mode = "ro"
|
||||
elif normalised_volume_mode in {"read_write", "readwrite", "rw"}:
|
||||
normalised_volume_mode = "rw"
|
||||
else:
|
||||
supported_modes = metadata.get("supported_volume_modes", ["ro", "rw"])
|
||||
if isinstance(supported_modes, list) and normalised_volume_mode in supported_modes:
|
||||
pass
|
||||
else:
|
||||
normalised_volume_mode = "ro"
|
||||
|
||||
parameters = parameters or {}
|
||||
|
||||
cleaned_parameters: Dict[str, Any] = {**defaults, **parameters}
|
||||
|
||||
# Ensure *_config structures default to dicts
|
||||
# Ensure *_config structures default to dicts so Prefect validation passes.
|
||||
for key, value in list(cleaned_parameters.items()):
|
||||
if isinstance(key, str) and key.endswith("_config") and value is None:
|
||||
cleaned_parameters[key] = {}
|
||||
|
||||
# Some workflows expect configuration dictionaries even when omitted
|
||||
# Some workflows expect configuration dictionaries even when omitted.
|
||||
parameter_definitions = (
|
||||
metadata.get("parameters", {}).get("properties", {})
|
||||
if isinstance(metadata.get("parameters"), dict)
|
||||
@@ -340,19 +403,20 @@ async def submit_security_scan_mcp(
|
||||
elif cleaned_parameters[key] is None:
|
||||
cleaned_parameters[key] = {}
|
||||
|
||||
# Start workflow
|
||||
handle = await temporal_mgr.run_workflow(
|
||||
flow_run = await prefect_mgr.submit_workflow(
|
||||
workflow_name=workflow_name,
|
||||
target_id=target_id,
|
||||
workflow_params=cleaned_parameters,
|
||||
target_path=resolved_target_path,
|
||||
volume_mode=normalised_volume_mode,
|
||||
parameters=cleaned_parameters,
|
||||
)
|
||||
|
||||
return {
|
||||
"run_id": handle.id,
|
||||
"status": "RUNNING",
|
||||
"run_id": str(flow_run.id),
|
||||
"status": flow_run.state.name if flow_run.state else "PENDING",
|
||||
"workflow": workflow_name,
|
||||
"message": f"Workflow '{workflow_name}' submitted successfully",
|
||||
"target_id": target_id,
|
||||
"target_path": resolved_target_path,
|
||||
"volume_mode": normalised_volume_mode,
|
||||
"parameters": cleaned_parameters,
|
||||
"mcp_enabled": True,
|
||||
}
|
||||
@@ -363,38 +427,43 @@ async def submit_security_scan_mcp(
|
||||
|
||||
@mcp.tool
|
||||
async def get_comprehensive_scan_summary(run_id: str) -> Dict[str, Any] | Dict[str, str]:
|
||||
"""Return a summary for the given workflow run via MCP."""
|
||||
"""Return a summary for the given flow run via MCP."""
|
||||
try:
|
||||
not_ready = _temporal_not_ready_status()
|
||||
not_ready = _prefect_not_ready_status()
|
||||
if not_ready:
|
||||
return {
|
||||
"error": "Temporal infrastructure not ready",
|
||||
"temporal": not_ready,
|
||||
"error": "Prefect infrastructure not ready",
|
||||
"prefect": not_ready,
|
||||
}
|
||||
|
||||
status = await temporal_mgr.get_workflow_status(run_id)
|
||||
status = await prefect_mgr.get_flow_run_status(run_id)
|
||||
findings = await prefect_mgr.get_flow_run_findings(run_id)
|
||||
|
||||
workflow_name = "unknown"
|
||||
deployment_id = status.get("workflow", "")
|
||||
for name, deployment in prefect_mgr.deployments.items():
|
||||
if str(deployment) == str(deployment_id):
|
||||
workflow_name = name
|
||||
break
|
||||
|
||||
# Try to get result if completed
|
||||
total_findings = 0
|
||||
severity_summary = {"critical": 0, "high": 0, "medium": 0, "low": 0, "info": 0}
|
||||
|
||||
if status.get("status") == "COMPLETED":
|
||||
try:
|
||||
result = await temporal_mgr.get_workflow_result(run_id)
|
||||
if isinstance(result, dict):
|
||||
summary = result.get("summary", {})
|
||||
total_findings = summary.get("total_findings", 0)
|
||||
except Exception as e:
|
||||
logger.debug(f"Could not retrieve result for {run_id}: {e}")
|
||||
if findings and "sarif" in findings:
|
||||
sarif = findings["sarif"]
|
||||
if isinstance(sarif, dict):
|
||||
total_findings = sarif.get("total_findings", 0)
|
||||
|
||||
return {
|
||||
"run_id": run_id,
|
||||
"workflow": "unknown", # Temporal doesn't track workflow name in status
|
||||
"workflow": workflow_name,
|
||||
"status": status.get("status", "unknown"),
|
||||
"is_completed": status.get("status") == "COMPLETED",
|
||||
"is_completed": status.get("is_completed", False),
|
||||
"total_findings": total_findings,
|
||||
"severity_summary": severity_summary,
|
||||
"scan_duration": status.get("close_time", "In progress"),
|
||||
"scan_duration": status.get("updated_at", "")
|
||||
if status.get("is_completed")
|
||||
else "In progress",
|
||||
"recommendations": (
|
||||
[
|
||||
"Review high and critical severity findings first",
|
||||
@@ -413,26 +482,32 @@ async def get_comprehensive_scan_summary(run_id: str) -> Dict[str, Any] | Dict[s
|
||||
|
||||
@mcp.tool
|
||||
async def get_run_status_mcp(run_id: str) -> Dict[str, Any]:
|
||||
"""Return current status information for a Temporal run."""
|
||||
"""Return current status information for a Prefect run."""
|
||||
try:
|
||||
not_ready = _temporal_not_ready_status()
|
||||
not_ready = _prefect_not_ready_status()
|
||||
if not_ready:
|
||||
return {
|
||||
"error": "Temporal infrastructure not ready",
|
||||
"temporal": not_ready,
|
||||
"error": "Prefect infrastructure not ready",
|
||||
"prefect": not_ready,
|
||||
}
|
||||
|
||||
status = await temporal_mgr.get_workflow_status(run_id)
|
||||
status = await prefect_mgr.get_flow_run_status(run_id)
|
||||
workflow_name = "unknown"
|
||||
deployment_id = status.get("workflow", "")
|
||||
for name, deployment in prefect_mgr.deployments.items():
|
||||
if str(deployment) == str(deployment_id):
|
||||
workflow_name = name
|
||||
break
|
||||
|
||||
return {
|
||||
"run_id": run_id,
|
||||
"workflow": "unknown",
|
||||
"run_id": status["run_id"],
|
||||
"workflow": workflow_name,
|
||||
"status": status["status"],
|
||||
"is_completed": status["status"] in ["COMPLETED", "FAILED", "CANCELLED"],
|
||||
"is_failed": status["status"] == "FAILED",
|
||||
"is_running": status["status"] == "RUNNING",
|
||||
"created_at": status.get("start_time"),
|
||||
"updated_at": status.get("close_time") or status.get("execution_time"),
|
||||
"is_completed": status["is_completed"],
|
||||
"is_failed": status["is_failed"],
|
||||
"is_running": status["is_running"],
|
||||
"created_at": status["created_at"],
|
||||
"updated_at": status["updated_at"],
|
||||
}
|
||||
except Exception as exc:
|
||||
logger.exception("MCP run status failed")
|
||||
@@ -443,30 +518,38 @@ async def get_run_status_mcp(run_id: str) -> Dict[str, Any]:
|
||||
async def get_run_findings_mcp(run_id: str) -> Dict[str, Any]:
|
||||
"""Return SARIF findings for a completed run."""
|
||||
try:
|
||||
not_ready = _temporal_not_ready_status()
|
||||
not_ready = _prefect_not_ready_status()
|
||||
if not_ready:
|
||||
return {
|
||||
"error": "Temporal infrastructure not ready",
|
||||
"temporal": not_ready,
|
||||
"error": "Prefect infrastructure not ready",
|
||||
"prefect": not_ready,
|
||||
}
|
||||
|
||||
status = await temporal_mgr.get_workflow_status(run_id)
|
||||
if status.get("status") != "COMPLETED":
|
||||
status = await prefect_mgr.get_flow_run_status(run_id)
|
||||
if not status.get("is_completed"):
|
||||
return {"error": f"Run {run_id} not completed. Status: {status.get('status')}"}
|
||||
|
||||
result = await temporal_mgr.get_workflow_result(run_id)
|
||||
findings = await prefect_mgr.get_flow_run_findings(run_id)
|
||||
|
||||
workflow_name = "unknown"
|
||||
deployment_id = status.get("workflow", "")
|
||||
for name, deployment in prefect_mgr.deployments.items():
|
||||
if str(deployment) == str(deployment_id):
|
||||
workflow_name = name
|
||||
break
|
||||
|
||||
metadata = {
|
||||
"completion_time": status.get("close_time"),
|
||||
"completion_time": status.get("updated_at"),
|
||||
"workflow_version": "unknown",
|
||||
}
|
||||
|
||||
sarif = result.get("sarif", {}) if isinstance(result, dict) else {}
|
||||
info = prefect_mgr.workflows.get(workflow_name)
|
||||
if info:
|
||||
metadata["workflow_version"] = info.metadata.get("version", "unknown")
|
||||
|
||||
return {
|
||||
"workflow": "unknown",
|
||||
"workflow": workflow_name,
|
||||
"run_id": run_id,
|
||||
"sarif": sarif,
|
||||
"sarif": findings,
|
||||
"metadata": metadata,
|
||||
}
|
||||
except Exception as exc:
|
||||
@@ -478,15 +561,16 @@ async def get_run_findings_mcp(run_id: str) -> Dict[str, Any]:
|
||||
async def list_recent_runs_mcp(
|
||||
limit: int = 10,
|
||||
workflow_name: str | None = None,
|
||||
states: List[str] | None = None,
|
||||
) -> Dict[str, Any]:
|
||||
"""List recent Temporal runs with optional workflow filter."""
|
||||
"""List recent Prefect runs with optional workflow/state filters."""
|
||||
|
||||
not_ready = _temporal_not_ready_status()
|
||||
not_ready = _prefect_not_ready_status()
|
||||
if not_ready:
|
||||
return {
|
||||
"runs": [],
|
||||
"temporal": not_ready,
|
||||
"message": "Temporal infrastructure is still initializing",
|
||||
"prefect": not_ready,
|
||||
"message": "Prefect infrastructure is still initializing",
|
||||
}
|
||||
|
||||
try:
|
||||
@@ -495,49 +579,116 @@ async def list_recent_runs_mcp(
|
||||
limit_value = 10
|
||||
limit_value = max(1, min(limit_value, 100))
|
||||
|
||||
try:
|
||||
# Build filter query
|
||||
filter_query = None
|
||||
if workflow_name:
|
||||
workflow_info = temporal_mgr.workflows.get(workflow_name)
|
||||
if workflow_info:
|
||||
filter_query = f'WorkflowType="{workflow_info.workflow_type}"'
|
||||
deployment_map = {
|
||||
str(deployment_id): workflow
|
||||
for workflow, deployment_id in prefect_mgr.deployments.items()
|
||||
}
|
||||
|
||||
workflows = await temporal_mgr.list_workflows(filter_query, limit_value)
|
||||
deployment_filter_value = None
|
||||
if workflow_name:
|
||||
deployment_id = prefect_mgr.deployments.get(workflow_name)
|
||||
if not deployment_id:
|
||||
return {
|
||||
"runs": [],
|
||||
"prefect": get_prefect_status(),
|
||||
"error": f"Workflow '{workflow_name}' has no registered deployment",
|
||||
}
|
||||
try:
|
||||
deployment_filter_value = UUID(str(deployment_id))
|
||||
except ValueError:
|
||||
return {
|
||||
"runs": [],
|
||||
"prefect": get_prefect_status(),
|
||||
"error": (
|
||||
f"Deployment id '{deployment_id}' for workflow '{workflow_name}' is invalid"
|
||||
),
|
||||
}
|
||||
|
||||
results: List[Dict[str, Any]] = []
|
||||
for wf in workflows:
|
||||
results.append({
|
||||
"run_id": wf["workflow_id"],
|
||||
"workflow": workflow_name or "unknown",
|
||||
"state": wf["status"],
|
||||
"state_type": wf["status"],
|
||||
"is_completed": wf["status"] in ["COMPLETED", "FAILED", "CANCELLED"],
|
||||
"is_running": wf["status"] == "RUNNING",
|
||||
"is_failed": wf["status"] == "FAILED",
|
||||
"created_at": wf.get("start_time"),
|
||||
"updated_at": wf.get("close_time"),
|
||||
})
|
||||
desired_state_types: List[StateType] = []
|
||||
if states:
|
||||
for raw_state in states:
|
||||
if not raw_state:
|
||||
continue
|
||||
normalised = raw_state.strip().upper()
|
||||
if normalised == "ALL":
|
||||
desired_state_types = []
|
||||
break
|
||||
try:
|
||||
desired_state_types.append(StateType[normalised])
|
||||
except KeyError:
|
||||
continue
|
||||
if not desired_state_types:
|
||||
desired_state_types = [
|
||||
StateType.RUNNING,
|
||||
StateType.COMPLETED,
|
||||
StateType.FAILED,
|
||||
StateType.CANCELLED,
|
||||
]
|
||||
|
||||
return {"runs": results, "temporal": get_temporal_status()}
|
||||
flow_filter = FlowRunFilter()
|
||||
if desired_state_types:
|
||||
flow_filter.state = FlowRunFilterState(
|
||||
type=FlowRunFilterStateType(any_=desired_state_types)
|
||||
)
|
||||
if deployment_filter_value:
|
||||
flow_filter.deployment_id = FlowRunFilterDeploymentId(
|
||||
any_=[deployment_filter_value]
|
||||
)
|
||||
|
||||
except Exception as exc:
|
||||
logger.exception("Failed to list runs")
|
||||
return {
|
||||
"runs": [],
|
||||
"temporal": get_temporal_status(),
|
||||
"error": str(exc)
|
||||
}
|
||||
async with get_client() as client:
|
||||
flow_runs = await client.read_flow_runs(
|
||||
limit=limit_value,
|
||||
flow_run_filter=flow_filter,
|
||||
sort=FlowRunSort.START_TIME_DESC,
|
||||
)
|
||||
|
||||
results: List[Dict[str, Any]] = []
|
||||
for flow_run in flow_runs:
|
||||
deployment_id = getattr(flow_run, "deployment_id", None)
|
||||
workflow = deployment_map.get(str(deployment_id), "unknown")
|
||||
state = getattr(flow_run, "state", None)
|
||||
state_name = getattr(state, "name", None) if state else None
|
||||
state_type = getattr(state, "type", None) if state else None
|
||||
|
||||
results.append(
|
||||
{
|
||||
"run_id": str(flow_run.id),
|
||||
"workflow": workflow,
|
||||
"deployment_id": str(deployment_id) if deployment_id else None,
|
||||
"state": state_name or (state_type.name if state_type else None),
|
||||
"state_type": state_type.name if state_type else None,
|
||||
"is_completed": bool(getattr(state, "is_completed", lambda: False)()),
|
||||
"is_running": bool(getattr(state, "is_running", lambda: False)()),
|
||||
"is_failed": bool(getattr(state, "is_failed", lambda: False)()),
|
||||
"created_at": getattr(flow_run, "created", None),
|
||||
"updated_at": getattr(flow_run, "updated", None),
|
||||
"expected_start_time": getattr(flow_run, "expected_start_time", None),
|
||||
"start_time": getattr(flow_run, "start_time", None),
|
||||
}
|
||||
)
|
||||
|
||||
# Normalise datetimes to ISO 8601 strings for serialization
|
||||
for entry in results:
|
||||
for key in ("created_at", "updated_at", "expected_start_time", "start_time"):
|
||||
value = entry.get(key)
|
||||
if value is None:
|
||||
continue
|
||||
try:
|
||||
entry[key] = value.isoformat()
|
||||
except AttributeError:
|
||||
entry[key] = str(value)
|
||||
|
||||
return {"runs": results, "prefect": get_prefect_status()}
|
||||
|
||||
|
||||
@mcp.tool
|
||||
async def get_fuzzing_stats_mcp(run_id: str) -> Dict[str, Any]:
|
||||
"""Return fuzzing statistics for a run if available."""
|
||||
not_ready = _temporal_not_ready_status()
|
||||
not_ready = _prefect_not_ready_status()
|
||||
if not_ready:
|
||||
return {
|
||||
"error": "Temporal infrastructure not ready",
|
||||
"temporal": not_ready,
|
||||
"error": "Prefect infrastructure not ready",
|
||||
"prefect": not_ready,
|
||||
}
|
||||
|
||||
stats = fuzzing.fuzzing_stats.get(run_id)
|
||||
@@ -557,11 +708,11 @@ async def get_fuzzing_stats_mcp(run_id: str) -> Dict[str, Any]:
|
||||
@mcp.tool
|
||||
async def get_fuzzing_crash_reports_mcp(run_id: str) -> Dict[str, Any]:
|
||||
"""Return crash reports collected for a fuzzing run."""
|
||||
not_ready = _temporal_not_ready_status()
|
||||
not_ready = _prefect_not_ready_status()
|
||||
if not_ready:
|
||||
return {
|
||||
"error": "Temporal infrastructure not ready",
|
||||
"temporal": not_ready,
|
||||
"error": "Prefect infrastructure not ready",
|
||||
"prefect": not_ready,
|
||||
}
|
||||
|
||||
reports = fuzzing.crash_reports.get(run_id)
|
||||
@@ -574,11 +725,11 @@ async def get_fuzzing_crash_reports_mcp(run_id: str) -> Dict[str, Any]:
|
||||
async def get_backend_status_mcp() -> Dict[str, Any]:
|
||||
"""Expose backend readiness, workflows, and registered MCP tools."""
|
||||
|
||||
status = get_temporal_status()
|
||||
response: Dict[str, Any] = {"temporal": status}
|
||||
status = get_prefect_status()
|
||||
response: Dict[str, Any] = {"prefect": status}
|
||||
|
||||
if status.get("ready"):
|
||||
response["workflows"] = list(temporal_mgr.workflows.keys())
|
||||
response["workflows"] = list(prefect_mgr.workflows.keys())
|
||||
|
||||
try:
|
||||
tools = await mcp._tool_manager.list_tools()
|
||||
@@ -624,12 +775,12 @@ def create_mcp_transport_app() -> Starlette:
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Combined lifespan: Temporal init + dedicated MCP transports
|
||||
# Combined lifespan: Prefect init + dedicated MCP transports
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@asynccontextmanager
|
||||
async def combined_lifespan(app: FastAPI):
|
||||
global temporal_bootstrap_task, _fastapi_mcp_imported
|
||||
global prefect_bootstrap_task, _fastapi_mcp_imported
|
||||
|
||||
logger.info("Starting FuzzForge backend...")
|
||||
|
||||
@@ -642,12 +793,12 @@ async def combined_lifespan(app: FastAPI):
|
||||
except Exception as exc:
|
||||
logger.exception("Failed to import FastAPI endpoints into MCP", exc_info=exc)
|
||||
|
||||
# Kick off Temporal bootstrap in the background if needed
|
||||
if temporal_bootstrap_task is None or temporal_bootstrap_task.done():
|
||||
temporal_bootstrap_task = asyncio.create_task(_bootstrap_temporal_with_retries())
|
||||
logger.info("Temporal bootstrap task started")
|
||||
# Kick off Prefect bootstrap in the background if needed
|
||||
if prefect_bootstrap_task is None or prefect_bootstrap_task.done():
|
||||
prefect_bootstrap_task = asyncio.create_task(_bootstrap_prefect_with_retries())
|
||||
logger.info("Prefect bootstrap task started")
|
||||
else:
|
||||
logger.info("Temporal bootstrap task already running")
|
||||
logger.info("Prefect bootstrap task already running")
|
||||
|
||||
# Start MCP transports on shared port (HTTP + SSE)
|
||||
mcp_app = create_mcp_transport_app()
|
||||
@@ -695,17 +846,18 @@ async def combined_lifespan(app: FastAPI):
|
||||
mcp_server.force_exit = True
|
||||
await asyncio.gather(mcp_task, return_exceptions=True)
|
||||
|
||||
if temporal_bootstrap_task and not temporal_bootstrap_task.done():
|
||||
temporal_bootstrap_task.cancel()
|
||||
if prefect_bootstrap_task and not prefect_bootstrap_task.done():
|
||||
prefect_bootstrap_task.cancel()
|
||||
with suppress(asyncio.CancelledError):
|
||||
await temporal_bootstrap_task
|
||||
temporal_bootstrap_state.task_running = False
|
||||
if not temporal_bootstrap_state.ready:
|
||||
temporal_bootstrap_state.status = "stopped"
|
||||
temporal_bootstrap_task = None
|
||||
await prefect_bootstrap_task
|
||||
prefect_bootstrap_state.task_running = False
|
||||
if not prefect_bootstrap_state.ready:
|
||||
prefect_bootstrap_state.status = "stopped"
|
||||
prefect_bootstrap_state.next_retry_seconds = None
|
||||
prefect_bootstrap_task = None
|
||||
|
||||
# Close Temporal client
|
||||
await temporal_mgr.close()
|
||||
logger.info("Shutting down Prefect statistics monitor...")
|
||||
await prefect_stats_monitor.stop_monitoring()
|
||||
logger.info("Shutting down FuzzForge backend...")
|
||||
|
||||
|
||||
|
||||
@@ -13,9 +13,10 @@ Models for workflow findings and submissions
|
||||
#
|
||||
# Additional attribution and requirements are provided in the NOTICE file.
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
from typing import Dict, Any, Optional, List
|
||||
from pydantic import BaseModel, Field, field_validator
|
||||
from typing import Dict, Any, Optional, Literal, List
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
class WorkflowFindings(BaseModel):
|
||||
@@ -26,13 +27,47 @@ class WorkflowFindings(BaseModel):
|
||||
metadata: Dict[str, Any] = Field(default_factory=dict, description="Additional metadata")
|
||||
|
||||
|
||||
class WorkflowSubmission(BaseModel):
|
||||
"""
|
||||
Submit a workflow with configurable settings.
|
||||
class ResourceLimits(BaseModel):
|
||||
"""Resource limits for workflow execution"""
|
||||
cpu_limit: Optional[str] = Field(None, description="CPU limit (e.g., '2' for 2 cores, '500m' for 0.5 cores)")
|
||||
memory_limit: Optional[str] = Field(None, description="Memory limit (e.g., '1Gi', '512Mi')")
|
||||
cpu_request: Optional[str] = Field(None, description="CPU request (guaranteed)")
|
||||
memory_request: Optional[str] = Field(None, description="Memory request (guaranteed)")
|
||||
|
||||
Note: This model is deprecated in favor of the /upload-and-submit endpoint
|
||||
which handles file uploads directly.
|
||||
"""
|
||||
|
||||
class VolumeMount(BaseModel):
|
||||
"""Volume mount specification"""
|
||||
host_path: str = Field(..., description="Host path to mount")
|
||||
container_path: str = Field(..., description="Container path for mount")
|
||||
mode: Literal["ro", "rw"] = Field(default="ro", description="Mount mode")
|
||||
|
||||
@field_validator("host_path")
|
||||
@classmethod
|
||||
def validate_host_path(cls, v):
|
||||
"""Validate that the host path is absolute (existence checked at runtime)"""
|
||||
path = Path(v)
|
||||
if not path.is_absolute():
|
||||
raise ValueError(f"Host path must be absolute: {v}")
|
||||
# Note: Path existence is validated at workflow runtime
|
||||
# We can't validate existence here as this runs inside Docker container
|
||||
return str(path)
|
||||
|
||||
@field_validator("container_path")
|
||||
@classmethod
|
||||
def validate_container_path(cls, v):
|
||||
"""Validate that the container path is absolute"""
|
||||
if not v.startswith('/'):
|
||||
raise ValueError(f"Container path must be absolute: {v}")
|
||||
return v
|
||||
|
||||
|
||||
class WorkflowSubmission(BaseModel):
|
||||
"""Submit a workflow with configurable settings"""
|
||||
target_path: str = Field(..., description="Absolute path to analyze")
|
||||
volume_mode: Literal["ro", "rw"] = Field(
|
||||
default="ro",
|
||||
description="Volume mount mode: read-only (ro) or read-write (rw)"
|
||||
)
|
||||
parameters: Dict[str, Any] = Field(
|
||||
default_factory=dict,
|
||||
description="Workflow-specific parameters"
|
||||
@@ -43,6 +78,25 @@ class WorkflowSubmission(BaseModel):
|
||||
ge=1,
|
||||
le=604800 # Max 7 days to support fuzzing campaigns
|
||||
)
|
||||
resource_limits: Optional[ResourceLimits] = Field(
|
||||
None,
|
||||
description="Resource limits for workflow container"
|
||||
)
|
||||
additional_volumes: List[VolumeMount] = Field(
|
||||
default_factory=list,
|
||||
description="Additional volume mounts (e.g., for corpus, output directories)"
|
||||
)
|
||||
|
||||
@field_validator("target_path")
|
||||
@classmethod
|
||||
def validate_path(cls, v):
|
||||
"""Validate that the target path is absolute (existence checked at runtime)"""
|
||||
path = Path(v)
|
||||
if not path.is_absolute():
|
||||
raise ValueError(f"Path must be absolute: {v}")
|
||||
# Note: Path existence is validated at workflow runtime when volumes are mounted
|
||||
# We can't validate existence here as this runs inside Docker container
|
||||
return str(path)
|
||||
|
||||
|
||||
class WorkflowStatus(BaseModel):
|
||||
@@ -73,6 +127,14 @@ class WorkflowMetadata(BaseModel):
|
||||
default_factory=list,
|
||||
description="Required module names"
|
||||
)
|
||||
supported_volume_modes: List[Literal["ro", "rw"]] = Field(
|
||||
default=["ro", "rw"],
|
||||
description="Supported volume mount modes"
|
||||
)
|
||||
has_custom_docker: bool = Field(
|
||||
default=False,
|
||||
description="Whether workflow has custom Dockerfile"
|
||||
)
|
||||
|
||||
|
||||
class WorkflowListItem(BaseModel):
|
||||
|
||||
394
backend/src/services/prefect_stats_monitor.py
Normal file
394
backend/src/services/prefect_stats_monitor.py
Normal file
@@ -0,0 +1,394 @@
|
||||
"""
|
||||
Generic Prefect Statistics Monitor Service
|
||||
|
||||
This service monitors ALL workflows for structured live data logging and
|
||||
updates the appropriate statistics APIs. Works with any workflow that follows
|
||||
the standard LIVE_STATS logging pattern.
|
||||
"""
|
||||
# Copyright (c) 2025 FuzzingLabs
|
||||
#
|
||||
# Licensed under the Business Source License 1.1 (BSL). See the LICENSE file
|
||||
# at the root of this repository for details.
|
||||
#
|
||||
# After the Change Date (four years from publication), this version of the
|
||||
# Licensed Work will be made available under the Apache License, Version 2.0.
|
||||
# See the LICENSE-APACHE file or http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Additional attribution and requirements are provided in the NOTICE file.
|
||||
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from typing import Dict, Any, Optional
|
||||
from prefect.client.orchestration import get_client
|
||||
from prefect.client.schemas.objects import FlowRun, TaskRun
|
||||
from src.models.findings import FuzzingStats
|
||||
from src.api.fuzzing import fuzzing_stats, initialize_fuzzing_tracking, active_connections
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class PrefectStatsMonitor:
|
||||
"""Monitors Prefect flows and tasks for live statistics from any workflow"""
|
||||
|
||||
def __init__(self):
|
||||
self.monitoring = False
|
||||
self.monitor_task = None
|
||||
self.monitored_runs = set()
|
||||
self.last_log_ts: Dict[str, datetime] = {}
|
||||
self._client = None
|
||||
self._client_refresh_time = None
|
||||
self._client_refresh_interval = 300 # Refresh connection every 5 minutes
|
||||
|
||||
async def start_monitoring(self):
|
||||
"""Start the Prefect statistics monitoring service"""
|
||||
if self.monitoring:
|
||||
logger.warning("Prefect stats monitor already running")
|
||||
return
|
||||
|
||||
self.monitoring = True
|
||||
self.monitor_task = asyncio.create_task(self._monitor_flows())
|
||||
logger.info("Started Prefect statistics monitor")
|
||||
|
||||
async def stop_monitoring(self):
|
||||
"""Stop the monitoring service"""
|
||||
self.monitoring = False
|
||||
if self.monitor_task:
|
||||
self.monitor_task.cancel()
|
||||
try:
|
||||
await self.monitor_task
|
||||
except asyncio.CancelledError:
|
||||
pass
|
||||
logger.info("Stopped Prefect statistics monitor")
|
||||
|
||||
async def _get_or_refresh_client(self):
|
||||
"""Get or refresh Prefect client with connection pooling."""
|
||||
now = datetime.now(timezone.utc)
|
||||
|
||||
if (self._client is None or
|
||||
self._client_refresh_time is None or
|
||||
(now - self._client_refresh_time).total_seconds() > self._client_refresh_interval):
|
||||
|
||||
if self._client:
|
||||
try:
|
||||
await self._client.aclose()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
self._client = get_client()
|
||||
self._client_refresh_time = now
|
||||
await self._client.__aenter__()
|
||||
|
||||
return self._client
|
||||
|
||||
async def _monitor_flows(self):
|
||||
"""Main monitoring loop that watches Prefect flows"""
|
||||
try:
|
||||
while self.monitoring:
|
||||
try:
|
||||
# Use connection pooling for better performance
|
||||
client = await self._get_or_refresh_client()
|
||||
|
||||
# Get recent flow runs (limit to reduce load)
|
||||
flow_runs = await client.read_flow_runs(
|
||||
limit=50,
|
||||
sort="START_TIME_DESC",
|
||||
)
|
||||
|
||||
# Only consider runs from the last 15 minutes
|
||||
recent_cutoff = datetime.now(timezone.utc) - timedelta(minutes=15)
|
||||
for flow_run in flow_runs:
|
||||
created = getattr(flow_run, "created", None)
|
||||
if created is None:
|
||||
continue
|
||||
try:
|
||||
# Ensure timezone-aware comparison
|
||||
if created.tzinfo is None:
|
||||
created = created.replace(tzinfo=timezone.utc)
|
||||
if created >= recent_cutoff:
|
||||
await self._monitor_flow_run(client, flow_run)
|
||||
except Exception:
|
||||
# If comparison fails, attempt monitoring anyway
|
||||
await self._monitor_flow_run(client, flow_run)
|
||||
|
||||
await asyncio.sleep(5) # Check every 5 seconds
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error in Prefect monitoring: {e}")
|
||||
await asyncio.sleep(10)
|
||||
|
||||
except asyncio.CancelledError:
|
||||
logger.info("Prefect monitoring cancelled")
|
||||
except Exception as e:
|
||||
logger.error(f"Fatal error in Prefect monitoring: {e}")
|
||||
finally:
|
||||
# Clean up client on exit
|
||||
if self._client:
|
||||
try:
|
||||
await self._client.__aexit__(None, None, None)
|
||||
except Exception:
|
||||
pass
|
||||
self._client = None
|
||||
|
||||
async def _monitor_flow_run(self, client, flow_run: FlowRun):
|
||||
"""Monitor a specific flow run for statistics"""
|
||||
run_id = str(flow_run.id)
|
||||
workflow_name = flow_run.name or "unknown"
|
||||
|
||||
try:
|
||||
# Initialize tracking if not exists - only for workflows that might have live stats
|
||||
if run_id not in fuzzing_stats:
|
||||
initialize_fuzzing_tracking(run_id, workflow_name)
|
||||
self.monitored_runs.add(run_id)
|
||||
|
||||
# Skip corrupted entries (should not happen after startup cleanup, but defensive)
|
||||
elif not isinstance(fuzzing_stats[run_id], FuzzingStats):
|
||||
logger.warning(f"Skipping corrupted stats entry for {run_id}, reinitializing")
|
||||
initialize_fuzzing_tracking(run_id, workflow_name)
|
||||
self.monitored_runs.add(run_id)
|
||||
|
||||
# Get task runs for this flow
|
||||
task_runs = await client.read_task_runs(
|
||||
flow_run_filter={"id": {"any_": [flow_run.id]}},
|
||||
limit=25,
|
||||
)
|
||||
|
||||
# Check all tasks for live statistics logging
|
||||
for task_run in task_runs:
|
||||
await self._extract_stats_from_task(client, run_id, task_run, workflow_name)
|
||||
|
||||
# Also scan flow-level logs as a fallback
|
||||
await self._extract_stats_from_flow_logs(client, run_id, flow_run, workflow_name)
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Error monitoring flow run {run_id}: {e}")
|
||||
|
||||
async def _extract_stats_from_task(self, client, run_id: str, task_run: TaskRun, workflow_name: str):
|
||||
"""Extract statistics from any task that logs live stats"""
|
||||
try:
|
||||
# Get task run logs
|
||||
logs = await client.read_logs(
|
||||
log_filter={
|
||||
"task_run_id": {"any_": [task_run.id]}
|
||||
},
|
||||
limit=100,
|
||||
sort="TIMESTAMP_ASC"
|
||||
)
|
||||
|
||||
# Parse logs for LIVE_STATS entries (generic pattern for any workflow)
|
||||
latest_stats = None
|
||||
for log in logs:
|
||||
# Prefer structured extra field if present
|
||||
extra_data = getattr(log, "extra", None) or getattr(log, "extra_fields", None) or None
|
||||
if isinstance(extra_data, dict):
|
||||
stat_type = extra_data.get("stats_type")
|
||||
if stat_type in ["fuzzing_live_update", "scan_progress", "analysis_update", "live_stats"]:
|
||||
latest_stats = extra_data
|
||||
continue
|
||||
|
||||
# Fallback to parsing from message text
|
||||
if ("FUZZ_STATS" in log.message or "LIVE_STATS" in log.message):
|
||||
stats = self._parse_stats_from_log(log.message)
|
||||
if stats:
|
||||
latest_stats = stats
|
||||
|
||||
# Update statistics if we found any
|
||||
if latest_stats:
|
||||
# Calculate elapsed time from task start
|
||||
elapsed_time = 0
|
||||
if task_run.start_time:
|
||||
# Ensure timezone-aware arithmetic
|
||||
now = datetime.now(timezone.utc)
|
||||
try:
|
||||
elapsed_time = int((now - task_run.start_time).total_seconds())
|
||||
except Exception:
|
||||
# Fallback to naive UTC if types mismatch
|
||||
elapsed_time = int((datetime.utcnow() - task_run.start_time.replace(tzinfo=None)).total_seconds())
|
||||
|
||||
updated_stats = FuzzingStats(
|
||||
run_id=run_id,
|
||||
workflow=workflow_name,
|
||||
executions=latest_stats.get("executions", 0),
|
||||
executions_per_sec=latest_stats.get("executions_per_sec", 0.0),
|
||||
crashes=latest_stats.get("crashes", 0),
|
||||
unique_crashes=latest_stats.get("unique_crashes", 0),
|
||||
corpus_size=latest_stats.get("corpus_size", 0),
|
||||
elapsed_time=elapsed_time
|
||||
)
|
||||
|
||||
# Update the global stats
|
||||
previous = fuzzing_stats.get(run_id)
|
||||
fuzzing_stats[run_id] = updated_stats
|
||||
|
||||
# Broadcast to any active WebSocket clients for this run
|
||||
if active_connections.get(run_id):
|
||||
# Handle both Pydantic objects and plain dicts
|
||||
if isinstance(updated_stats, dict):
|
||||
stats_data = updated_stats
|
||||
elif hasattr(updated_stats, 'model_dump'):
|
||||
stats_data = updated_stats.model_dump()
|
||||
elif hasattr(updated_stats, 'dict'):
|
||||
stats_data = updated_stats.dict()
|
||||
else:
|
||||
stats_data = updated_stats.__dict__
|
||||
|
||||
message = {
|
||||
"type": "stats_update",
|
||||
"data": stats_data,
|
||||
}
|
||||
disconnected = []
|
||||
for ws in active_connections[run_id]:
|
||||
try:
|
||||
await ws.send_text(json.dumps(message))
|
||||
except Exception:
|
||||
disconnected.append(ws)
|
||||
# Clean up disconnected sockets
|
||||
for ws in disconnected:
|
||||
try:
|
||||
active_connections[run_id].remove(ws)
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
logger.debug(f"Updated Prefect stats for {run_id}: {updated_stats.executions} execs")
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Error extracting stats from task {task_run.id}: {e}")
|
||||
|
||||
async def _extract_stats_from_flow_logs(self, client, run_id: str, flow_run: FlowRun, workflow_name: str):
|
||||
"""Extract statistics by scanning flow-level logs for LIVE/FUZZ stats"""
|
||||
try:
|
||||
logs = await client.read_logs(
|
||||
log_filter={
|
||||
"flow_run_id": {"any_": [flow_run.id]}
|
||||
},
|
||||
limit=200,
|
||||
sort="TIMESTAMP_ASC"
|
||||
)
|
||||
|
||||
latest_stats = None
|
||||
last_seen = self.last_log_ts.get(run_id)
|
||||
max_ts = last_seen
|
||||
|
||||
for log in logs:
|
||||
# Skip logs we've already processed
|
||||
ts = getattr(log, "timestamp", None)
|
||||
if last_seen and ts and ts <= last_seen:
|
||||
continue
|
||||
if ts and (max_ts is None or ts > max_ts):
|
||||
max_ts = ts
|
||||
|
||||
# Prefer structured extra field if available
|
||||
extra_data = getattr(log, "extra", None) or getattr(log, "extra_fields", None) or None
|
||||
if isinstance(extra_data, dict):
|
||||
stat_type = extra_data.get("stats_type")
|
||||
if stat_type in ["fuzzing_live_update", "scan_progress", "analysis_update", "live_stats"]:
|
||||
latest_stats = extra_data
|
||||
continue
|
||||
|
||||
# Fallback to message parse
|
||||
if ("FUZZ_STATS" in log.message or "LIVE_STATS" in log.message):
|
||||
stats = self._parse_stats_from_log(log.message)
|
||||
if stats:
|
||||
latest_stats = stats
|
||||
|
||||
if max_ts:
|
||||
self.last_log_ts[run_id] = max_ts
|
||||
|
||||
if latest_stats:
|
||||
# Use flow_run timestamps for elapsed time if available
|
||||
elapsed_time = 0
|
||||
start_time = getattr(flow_run, "start_time", None) or getattr(flow_run, "start_time", None)
|
||||
if start_time:
|
||||
now = datetime.now(timezone.utc)
|
||||
try:
|
||||
if start_time.tzinfo is None:
|
||||
start_time = start_time.replace(tzinfo=timezone.utc)
|
||||
elapsed_time = int((now - start_time).total_seconds())
|
||||
except Exception:
|
||||
elapsed_time = int((datetime.utcnow() - start_time.replace(tzinfo=None)).total_seconds())
|
||||
|
||||
updated_stats = FuzzingStats(
|
||||
run_id=run_id,
|
||||
workflow=workflow_name,
|
||||
executions=latest_stats.get("executions", 0),
|
||||
executions_per_sec=latest_stats.get("executions_per_sec", 0.0),
|
||||
crashes=latest_stats.get("crashes", 0),
|
||||
unique_crashes=latest_stats.get("unique_crashes", 0),
|
||||
corpus_size=latest_stats.get("corpus_size", 0),
|
||||
elapsed_time=elapsed_time
|
||||
)
|
||||
|
||||
fuzzing_stats[run_id] = updated_stats
|
||||
|
||||
# Broadcast if listeners exist
|
||||
if active_connections.get(run_id):
|
||||
# Handle both Pydantic objects and plain dicts
|
||||
if isinstance(updated_stats, dict):
|
||||
stats_data = updated_stats
|
||||
elif hasattr(updated_stats, 'model_dump'):
|
||||
stats_data = updated_stats.model_dump()
|
||||
elif hasattr(updated_stats, 'dict'):
|
||||
stats_data = updated_stats.dict()
|
||||
else:
|
||||
stats_data = updated_stats.__dict__
|
||||
|
||||
message = {
|
||||
"type": "stats_update",
|
||||
"data": stats_data,
|
||||
}
|
||||
disconnected = []
|
||||
for ws in active_connections[run_id]:
|
||||
try:
|
||||
await ws.send_text(json.dumps(message))
|
||||
except Exception:
|
||||
disconnected.append(ws)
|
||||
for ws in disconnected:
|
||||
try:
|
||||
active_connections[run_id].remove(ws)
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Error extracting stats from flow logs {run_id}: {e}")
|
||||
|
||||
def _parse_stats_from_log(self, log_message: str) -> Optional[Dict[str, Any]]:
|
||||
"""Parse statistics from a log message"""
|
||||
try:
|
||||
import re
|
||||
|
||||
# Prefer explicit JSON after marker tokens
|
||||
m = re.search(r'(?:FUZZ_STATS|LIVE_STATS)\s+(\{.*\})', log_message)
|
||||
if m:
|
||||
try:
|
||||
return json.loads(m.group(1))
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Fallback: Extract the extra= dict and coerce to JSON
|
||||
stats_match = re.search(r'extra=({.*?})', log_message)
|
||||
if not stats_match:
|
||||
return None
|
||||
|
||||
extra_str = stats_match.group(1)
|
||||
extra_str = extra_str.replace("'", '"')
|
||||
extra_str = extra_str.replace('None', 'null')
|
||||
extra_str = extra_str.replace('True', 'true')
|
||||
extra_str = extra_str.replace('False', 'false')
|
||||
|
||||
stats_data = json.loads(extra_str)
|
||||
|
||||
# Support multiple stat types for different workflows
|
||||
stat_type = stats_data.get("stats_type")
|
||||
if stat_type in ["fuzzing_live_update", "scan_progress", "analysis_update", "live_stats"]:
|
||||
return stats_data
|
||||
|
||||
except Exception as e:
|
||||
logger.debug(f"Error parsing log stats: {e}")
|
||||
|
||||
return None
|
||||
|
||||
|
||||
# Global instance
|
||||
prefect_stats_monitor = PrefectStatsMonitor()
|
||||
@@ -1,10 +0,0 @@
|
||||
"""
|
||||
Storage abstraction layer for FuzzForge.
|
||||
|
||||
Provides unified interface for storing and retrieving targets and results.
|
||||
"""
|
||||
|
||||
from .base import StorageBackend
|
||||
from .s3_cached import S3CachedStorage
|
||||
|
||||
__all__ = ["StorageBackend", "S3CachedStorage"]
|
||||
@@ -1,153 +0,0 @@
|
||||
"""
|
||||
Base storage backend interface.
|
||||
|
||||
All storage implementations must implement this interface.
|
||||
"""
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from pathlib import Path
|
||||
from typing import Optional, Dict, Any
|
||||
|
||||
|
||||
class StorageBackend(ABC):
|
||||
"""
|
||||
Abstract base class for storage backends.
|
||||
|
||||
Implementations handle storage and retrieval of:
|
||||
- Uploaded targets (code, binaries, etc.)
|
||||
- Workflow results
|
||||
- Temporary files
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
async def upload_target(
|
||||
self,
|
||||
file_path: Path,
|
||||
user_id: str,
|
||||
metadata: Optional[Dict[str, Any]] = None
|
||||
) -> str:
|
||||
"""
|
||||
Upload a target file to storage.
|
||||
|
||||
Args:
|
||||
file_path: Local path to file to upload
|
||||
user_id: ID of user uploading the file
|
||||
metadata: Optional metadata to store with file
|
||||
|
||||
Returns:
|
||||
Target ID (unique identifier for retrieval)
|
||||
|
||||
Raises:
|
||||
FileNotFoundError: If file_path doesn't exist
|
||||
StorageError: If upload fails
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def get_target(self, target_id: str) -> Path:
|
||||
"""
|
||||
Get target file from storage.
|
||||
|
||||
Args:
|
||||
target_id: Unique identifier from upload_target()
|
||||
|
||||
Returns:
|
||||
Local path to cached file
|
||||
|
||||
Raises:
|
||||
FileNotFoundError: If target doesn't exist
|
||||
StorageError: If download fails
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def delete_target(self, target_id: str) -> None:
|
||||
"""
|
||||
Delete target from storage.
|
||||
|
||||
Args:
|
||||
target_id: Unique identifier to delete
|
||||
|
||||
Raises:
|
||||
StorageError: If deletion fails (doesn't raise if not found)
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def upload_results(
|
||||
self,
|
||||
workflow_id: str,
|
||||
results: Dict[str, Any],
|
||||
results_format: str = "json"
|
||||
) -> str:
|
||||
"""
|
||||
Upload workflow results to storage.
|
||||
|
||||
Args:
|
||||
workflow_id: Workflow execution ID
|
||||
results: Results dictionary
|
||||
results_format: Format (json, sarif, etc.)
|
||||
|
||||
Returns:
|
||||
URL to uploaded results
|
||||
|
||||
Raises:
|
||||
StorageError: If upload fails
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def get_results(self, workflow_id: str) -> Dict[str, Any]:
|
||||
"""
|
||||
Get workflow results from storage.
|
||||
|
||||
Args:
|
||||
workflow_id: Workflow execution ID
|
||||
|
||||
Returns:
|
||||
Results dictionary
|
||||
|
||||
Raises:
|
||||
FileNotFoundError: If results don't exist
|
||||
StorageError: If download fails
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def list_targets(
|
||||
self,
|
||||
user_id: Optional[str] = None,
|
||||
limit: int = 100
|
||||
) -> list[Dict[str, Any]]:
|
||||
"""
|
||||
List uploaded targets.
|
||||
|
||||
Args:
|
||||
user_id: Filter by user ID (None = all users)
|
||||
limit: Maximum number of results
|
||||
|
||||
Returns:
|
||||
List of target metadata dictionaries
|
||||
|
||||
Raises:
|
||||
StorageError: If listing fails
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def cleanup_cache(self) -> int:
|
||||
"""
|
||||
Clean up local cache (LRU eviction).
|
||||
|
||||
Returns:
|
||||
Number of files removed
|
||||
|
||||
Raises:
|
||||
StorageError: If cleanup fails
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
class StorageError(Exception):
|
||||
"""Base exception for storage operations."""
|
||||
pass
|
||||
@@ -1,423 +0,0 @@
|
||||
"""
|
||||
S3-compatible storage backend with local caching.
|
||||
|
||||
Works with MinIO (dev/prod) or AWS S3 (cloud).
|
||||
"""
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import shutil
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import Optional, Dict, Any
|
||||
from uuid import uuid4
|
||||
|
||||
import boto3
|
||||
from botocore.exceptions import ClientError
|
||||
|
||||
from .base import StorageBackend, StorageError
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class S3CachedStorage(StorageBackend):
|
||||
"""
|
||||
S3-compatible storage with local caching.
|
||||
|
||||
Features:
|
||||
- Upload targets to S3/MinIO
|
||||
- Download with local caching (LRU eviction)
|
||||
- Lifecycle management (auto-cleanup old files)
|
||||
- Metadata tracking
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
endpoint_url: Optional[str] = None,
|
||||
access_key: Optional[str] = None,
|
||||
secret_key: Optional[str] = None,
|
||||
bucket: str = "targets",
|
||||
region: str = "us-east-1",
|
||||
use_ssl: bool = False,
|
||||
cache_dir: Optional[Path] = None,
|
||||
cache_max_size_gb: int = 10
|
||||
):
|
||||
"""
|
||||
Initialize S3 storage backend.
|
||||
|
||||
Args:
|
||||
endpoint_url: S3 endpoint (None = AWS S3, or MinIO URL)
|
||||
access_key: S3 access key (None = from env)
|
||||
secret_key: S3 secret key (None = from env)
|
||||
bucket: S3 bucket name
|
||||
region: AWS region
|
||||
use_ssl: Use HTTPS
|
||||
cache_dir: Local cache directory
|
||||
cache_max_size_gb: Maximum cache size in GB
|
||||
"""
|
||||
# Use environment variables as defaults
|
||||
self.endpoint_url = endpoint_url or os.getenv('S3_ENDPOINT', 'http://minio:9000')
|
||||
self.access_key = access_key or os.getenv('S3_ACCESS_KEY', 'fuzzforge')
|
||||
self.secret_key = secret_key or os.getenv('S3_SECRET_KEY', 'fuzzforge123')
|
||||
self.bucket = bucket or os.getenv('S3_BUCKET', 'targets')
|
||||
self.region = region or os.getenv('S3_REGION', 'us-east-1')
|
||||
self.use_ssl = use_ssl or os.getenv('S3_USE_SSL', 'false').lower() == 'true'
|
||||
|
||||
# Cache configuration
|
||||
self.cache_dir = cache_dir or Path(os.getenv('CACHE_DIR', '/tmp/fuzzforge-cache'))
|
||||
self.cache_max_size = cache_max_size_gb * (1024 ** 3) # Convert to bytes
|
||||
|
||||
# Ensure cache directory exists
|
||||
self.cache_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Initialize S3 client
|
||||
try:
|
||||
self.s3_client = boto3.client(
|
||||
's3',
|
||||
endpoint_url=self.endpoint_url,
|
||||
aws_access_key_id=self.access_key,
|
||||
aws_secret_access_key=self.secret_key,
|
||||
region_name=self.region,
|
||||
use_ssl=self.use_ssl
|
||||
)
|
||||
logger.info(f"Initialized S3 storage: {self.endpoint_url}/{self.bucket}")
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to initialize S3 client: {e}")
|
||||
raise StorageError(f"S3 initialization failed: {e}")
|
||||
|
||||
async def upload_target(
|
||||
self,
|
||||
file_path: Path,
|
||||
user_id: str,
|
||||
metadata: Optional[Dict[str, Any]] = None
|
||||
) -> str:
|
||||
"""Upload target file to S3/MinIO."""
|
||||
if not file_path.exists():
|
||||
raise FileNotFoundError(f"File not found: {file_path}")
|
||||
|
||||
# Generate unique target ID
|
||||
target_id = str(uuid4())
|
||||
|
||||
# Prepare metadata
|
||||
upload_metadata = {
|
||||
'user_id': user_id,
|
||||
'uploaded_at': datetime.now().isoformat(),
|
||||
'filename': file_path.name,
|
||||
'size': str(file_path.stat().st_size)
|
||||
}
|
||||
if metadata:
|
||||
upload_metadata.update(metadata)
|
||||
|
||||
# Upload to S3
|
||||
s3_key = f'{target_id}/target'
|
||||
try:
|
||||
logger.info(f"Uploading target to s3://{self.bucket}/{s3_key}")
|
||||
|
||||
self.s3_client.upload_file(
|
||||
str(file_path),
|
||||
self.bucket,
|
||||
s3_key,
|
||||
ExtraArgs={
|
||||
'Metadata': upload_metadata
|
||||
}
|
||||
)
|
||||
|
||||
file_size_mb = file_path.stat().st_size / (1024 * 1024)
|
||||
logger.info(
|
||||
f"✓ Uploaded target {target_id} "
|
||||
f"({file_path.name}, {file_size_mb:.2f} MB)"
|
||||
)
|
||||
|
||||
return target_id
|
||||
|
||||
except ClientError as e:
|
||||
logger.error(f"S3 upload failed: {e}", exc_info=True)
|
||||
raise StorageError(f"Failed to upload target: {e}")
|
||||
except Exception as e:
|
||||
logger.error(f"Upload failed: {e}", exc_info=True)
|
||||
raise StorageError(f"Upload error: {e}")
|
||||
|
||||
async def get_target(self, target_id: str) -> Path:
|
||||
"""Get target from cache or download from S3/MinIO."""
|
||||
# Check cache first
|
||||
cache_path = self.cache_dir / target_id
|
||||
cached_file = cache_path / "target"
|
||||
|
||||
if cached_file.exists():
|
||||
# Update access time for LRU
|
||||
cached_file.touch()
|
||||
logger.info(f"Cache HIT: {target_id}")
|
||||
return cached_file
|
||||
|
||||
# Cache miss - download from S3
|
||||
logger.info(f"Cache MISS: {target_id}, downloading from S3...")
|
||||
|
||||
try:
|
||||
# Create cache directory
|
||||
cache_path.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Download from S3
|
||||
s3_key = f'{target_id}/target'
|
||||
logger.info(f"Downloading s3://{self.bucket}/{s3_key}")
|
||||
|
||||
self.s3_client.download_file(
|
||||
self.bucket,
|
||||
s3_key,
|
||||
str(cached_file)
|
||||
)
|
||||
|
||||
# Verify download
|
||||
if not cached_file.exists():
|
||||
raise StorageError(f"Downloaded file not found: {cached_file}")
|
||||
|
||||
file_size_mb = cached_file.stat().st_size / (1024 * 1024)
|
||||
logger.info(f"✓ Downloaded target {target_id} ({file_size_mb:.2f} MB)")
|
||||
|
||||
return cached_file
|
||||
|
||||
except ClientError as e:
|
||||
error_code = e.response.get('Error', {}).get('Code')
|
||||
if error_code in ['404', 'NoSuchKey']:
|
||||
logger.error(f"Target not found: {target_id}")
|
||||
raise FileNotFoundError(f"Target {target_id} not found in storage")
|
||||
else:
|
||||
logger.error(f"S3 download failed: {e}", exc_info=True)
|
||||
raise StorageError(f"Download failed: {e}")
|
||||
except Exception as e:
|
||||
logger.error(f"Download error: {e}", exc_info=True)
|
||||
# Cleanup partial download
|
||||
if cache_path.exists():
|
||||
shutil.rmtree(cache_path, ignore_errors=True)
|
||||
raise StorageError(f"Download error: {e}")
|
||||
|
||||
async def delete_target(self, target_id: str) -> None:
|
||||
"""Delete target from S3/MinIO."""
|
||||
try:
|
||||
s3_key = f'{target_id}/target'
|
||||
logger.info(f"Deleting s3://{self.bucket}/{s3_key}")
|
||||
|
||||
self.s3_client.delete_object(
|
||||
Bucket=self.bucket,
|
||||
Key=s3_key
|
||||
)
|
||||
|
||||
# Also delete from cache if present
|
||||
cache_path = self.cache_dir / target_id
|
||||
if cache_path.exists():
|
||||
shutil.rmtree(cache_path, ignore_errors=True)
|
||||
logger.info(f"✓ Deleted target {target_id} from S3 and cache")
|
||||
else:
|
||||
logger.info(f"✓ Deleted target {target_id} from S3")
|
||||
|
||||
except ClientError as e:
|
||||
logger.error(f"S3 delete failed: {e}", exc_info=True)
|
||||
# Don't raise error if object doesn't exist
|
||||
if e.response.get('Error', {}).get('Code') not in ['404', 'NoSuchKey']:
|
||||
raise StorageError(f"Delete failed: {e}")
|
||||
except Exception as e:
|
||||
logger.error(f"Delete error: {e}", exc_info=True)
|
||||
raise StorageError(f"Delete error: {e}")
|
||||
|
||||
async def upload_results(
|
||||
self,
|
||||
workflow_id: str,
|
||||
results: Dict[str, Any],
|
||||
results_format: str = "json"
|
||||
) -> str:
|
||||
"""Upload workflow results to S3/MinIO."""
|
||||
try:
|
||||
# Prepare results content
|
||||
if results_format == "json":
|
||||
content = json.dumps(results, indent=2).encode('utf-8')
|
||||
content_type = 'application/json'
|
||||
file_ext = 'json'
|
||||
elif results_format == "sarif":
|
||||
content = json.dumps(results, indent=2).encode('utf-8')
|
||||
content_type = 'application/sarif+json'
|
||||
file_ext = 'sarif'
|
||||
else:
|
||||
content = json.dumps(results, indent=2).encode('utf-8')
|
||||
content_type = 'application/json'
|
||||
file_ext = 'json'
|
||||
|
||||
# Upload to results bucket
|
||||
results_bucket = 'results'
|
||||
s3_key = f'{workflow_id}/results.{file_ext}'
|
||||
|
||||
logger.info(f"Uploading results to s3://{results_bucket}/{s3_key}")
|
||||
|
||||
self.s3_client.put_object(
|
||||
Bucket=results_bucket,
|
||||
Key=s3_key,
|
||||
Body=content,
|
||||
ContentType=content_type,
|
||||
Metadata={
|
||||
'workflow_id': workflow_id,
|
||||
'format': results_format,
|
||||
'uploaded_at': datetime.now().isoformat()
|
||||
}
|
||||
)
|
||||
|
||||
# Construct URL
|
||||
results_url = f"{self.endpoint_url}/{results_bucket}/{s3_key}"
|
||||
logger.info(f"✓ Uploaded results: {results_url}")
|
||||
|
||||
return results_url
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Results upload failed: {e}", exc_info=True)
|
||||
raise StorageError(f"Results upload failed: {e}")
|
||||
|
||||
async def get_results(self, workflow_id: str) -> Dict[str, Any]:
|
||||
"""Get workflow results from S3/MinIO."""
|
||||
try:
|
||||
results_bucket = 'results'
|
||||
s3_key = f'{workflow_id}/results.json'
|
||||
|
||||
logger.info(f"Downloading results from s3://{results_bucket}/{s3_key}")
|
||||
|
||||
response = self.s3_client.get_object(
|
||||
Bucket=results_bucket,
|
||||
Key=s3_key
|
||||
)
|
||||
|
||||
content = response['Body'].read().decode('utf-8')
|
||||
results = json.loads(content)
|
||||
|
||||
logger.info(f"✓ Downloaded results for workflow {workflow_id}")
|
||||
return results
|
||||
|
||||
except ClientError as e:
|
||||
error_code = e.response.get('Error', {}).get('Code')
|
||||
if error_code in ['404', 'NoSuchKey']:
|
||||
logger.error(f"Results not found: {workflow_id}")
|
||||
raise FileNotFoundError(f"Results for workflow {workflow_id} not found")
|
||||
else:
|
||||
logger.error(f"Results download failed: {e}", exc_info=True)
|
||||
raise StorageError(f"Results download failed: {e}")
|
||||
except Exception as e:
|
||||
logger.error(f"Results download error: {e}", exc_info=True)
|
||||
raise StorageError(f"Results download error: {e}")
|
||||
|
||||
async def list_targets(
|
||||
self,
|
||||
user_id: Optional[str] = None,
|
||||
limit: int = 100
|
||||
) -> list[Dict[str, Any]]:
|
||||
"""List uploaded targets."""
|
||||
try:
|
||||
targets = []
|
||||
paginator = self.s3_client.get_paginator('list_objects_v2')
|
||||
|
||||
for page in paginator.paginate(Bucket=self.bucket, PaginationConfig={'MaxItems': limit}):
|
||||
for obj in page.get('Contents', []):
|
||||
# Get object metadata
|
||||
try:
|
||||
metadata_response = self.s3_client.head_object(
|
||||
Bucket=self.bucket,
|
||||
Key=obj['Key']
|
||||
)
|
||||
metadata = metadata_response.get('Metadata', {})
|
||||
|
||||
# Filter by user_id if specified
|
||||
if user_id and metadata.get('user_id') != user_id:
|
||||
continue
|
||||
|
||||
targets.append({
|
||||
'target_id': obj['Key'].split('/')[0],
|
||||
'key': obj['Key'],
|
||||
'size': obj['Size'],
|
||||
'last_modified': obj['LastModified'].isoformat(),
|
||||
'metadata': metadata
|
||||
})
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to get metadata for {obj['Key']}: {e}")
|
||||
continue
|
||||
|
||||
logger.info(f"Listed {len(targets)} targets (user_id={user_id})")
|
||||
return targets
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"List targets failed: {e}", exc_info=True)
|
||||
raise StorageError(f"List targets failed: {e}")
|
||||
|
||||
async def cleanup_cache(self) -> int:
|
||||
"""Clean up local cache using LRU eviction."""
|
||||
try:
|
||||
cache_files = []
|
||||
total_size = 0
|
||||
|
||||
# Gather all cached files with metadata
|
||||
for cache_file in self.cache_dir.rglob('*'):
|
||||
if cache_file.is_file():
|
||||
try:
|
||||
stat = cache_file.stat()
|
||||
cache_files.append({
|
||||
'path': cache_file,
|
||||
'size': stat.st_size,
|
||||
'atime': stat.st_atime # Last access time
|
||||
})
|
||||
total_size += stat.st_size
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to stat {cache_file}: {e}")
|
||||
continue
|
||||
|
||||
# Check if cleanup is needed
|
||||
if total_size <= self.cache_max_size:
|
||||
logger.info(
|
||||
f"Cache size OK: {total_size / (1024**3):.2f} GB / "
|
||||
f"{self.cache_max_size / (1024**3):.2f} GB"
|
||||
)
|
||||
return 0
|
||||
|
||||
# Sort by access time (oldest first)
|
||||
cache_files.sort(key=lambda x: x['atime'])
|
||||
|
||||
# Remove files until under limit
|
||||
removed_count = 0
|
||||
for file_info in cache_files:
|
||||
if total_size <= self.cache_max_size:
|
||||
break
|
||||
|
||||
try:
|
||||
file_info['path'].unlink()
|
||||
total_size -= file_info['size']
|
||||
removed_count += 1
|
||||
logger.debug(f"Evicted from cache: {file_info['path']}")
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to delete {file_info['path']}: {e}")
|
||||
continue
|
||||
|
||||
logger.info(
|
||||
f"✓ Cache cleanup: removed {removed_count} files, "
|
||||
f"new size: {total_size / (1024**3):.2f} GB"
|
||||
)
|
||||
return removed_count
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Cache cleanup failed: {e}", exc_info=True)
|
||||
raise StorageError(f"Cache cleanup failed: {e}")
|
||||
|
||||
def get_cache_stats(self) -> Dict[str, Any]:
|
||||
"""Get cache statistics."""
|
||||
try:
|
||||
total_size = 0
|
||||
file_count = 0
|
||||
|
||||
for cache_file in self.cache_dir.rglob('*'):
|
||||
if cache_file.is_file():
|
||||
total_size += cache_file.stat().st_size
|
||||
file_count += 1
|
||||
|
||||
return {
|
||||
'total_size_bytes': total_size,
|
||||
'total_size_gb': total_size / (1024 ** 3),
|
||||
'file_count': file_count,
|
||||
'max_size_gb': self.cache_max_size / (1024 ** 3),
|
||||
'usage_percent': (total_size / self.cache_max_size) * 100
|
||||
}
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to get cache stats: {e}")
|
||||
return {'error': str(e)}
|
||||
@@ -1,10 +0,0 @@
|
||||
"""
|
||||
Temporal integration for FuzzForge.
|
||||
|
||||
Handles workflow execution, monitoring, and management.
|
||||
"""
|
||||
|
||||
from .manager import TemporalManager
|
||||
from .discovery import WorkflowDiscovery
|
||||
|
||||
__all__ = ["TemporalManager", "WorkflowDiscovery"]
|
||||
@@ -1,257 +0,0 @@
|
||||
"""
|
||||
Workflow Discovery for Temporal
|
||||
|
||||
Discovers workflows from the toolbox/workflows directory
|
||||
and provides metadata about available workflows.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import yaml
|
||||
from pathlib import Path
|
||||
from typing import Dict, Any
|
||||
from pydantic import BaseModel, Field, ConfigDict
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class WorkflowInfo(BaseModel):
|
||||
"""Information about a discovered workflow"""
|
||||
name: str = Field(..., description="Workflow name")
|
||||
path: Path = Field(..., description="Path to workflow directory")
|
||||
workflow_file: Path = Field(..., description="Path to workflow.py file")
|
||||
metadata: Dict[str, Any] = Field(..., description="Workflow metadata from YAML")
|
||||
workflow_type: str = Field(..., description="Workflow class name")
|
||||
vertical: str = Field(..., description="Vertical (worker type) for this workflow")
|
||||
|
||||
model_config = ConfigDict(arbitrary_types_allowed=True)
|
||||
|
||||
|
||||
class WorkflowDiscovery:
|
||||
"""
|
||||
Discovers workflows from the filesystem.
|
||||
|
||||
Scans toolbox/workflows/ for directories containing:
|
||||
- metadata.yaml (required)
|
||||
- workflow.py (required)
|
||||
|
||||
Each workflow declares its vertical (rust, android, web, etc.)
|
||||
which determines which worker pool will execute it.
|
||||
"""
|
||||
|
||||
def __init__(self, workflows_dir: Path):
|
||||
"""
|
||||
Initialize workflow discovery.
|
||||
|
||||
Args:
|
||||
workflows_dir: Path to the workflows directory
|
||||
"""
|
||||
self.workflows_dir = workflows_dir
|
||||
if not self.workflows_dir.exists():
|
||||
self.workflows_dir.mkdir(parents=True, exist_ok=True)
|
||||
logger.info(f"Created workflows directory: {self.workflows_dir}")
|
||||
|
||||
async def discover_workflows(self) -> Dict[str, WorkflowInfo]:
|
||||
"""
|
||||
Discover workflows by scanning the workflows directory.
|
||||
|
||||
Returns:
|
||||
Dictionary mapping workflow names to their information
|
||||
"""
|
||||
workflows = {}
|
||||
|
||||
logger.info(f"Scanning for workflows in: {self.workflows_dir}")
|
||||
|
||||
for workflow_dir in self.workflows_dir.iterdir():
|
||||
if not workflow_dir.is_dir():
|
||||
continue
|
||||
|
||||
# Skip special directories
|
||||
if workflow_dir.name.startswith('.') or workflow_dir.name == '__pycache__':
|
||||
continue
|
||||
|
||||
metadata_file = workflow_dir / "metadata.yaml"
|
||||
if not metadata_file.exists():
|
||||
logger.debug(f"No metadata.yaml in {workflow_dir.name}, skipping")
|
||||
continue
|
||||
|
||||
workflow_file = workflow_dir / "workflow.py"
|
||||
if not workflow_file.exists():
|
||||
logger.warning(
|
||||
f"Workflow {workflow_dir.name} has metadata but no workflow.py, skipping"
|
||||
)
|
||||
continue
|
||||
|
||||
try:
|
||||
# Parse metadata
|
||||
with open(metadata_file) as f:
|
||||
metadata = yaml.safe_load(f)
|
||||
|
||||
# Validate required fields
|
||||
if 'name' not in metadata:
|
||||
logger.warning(f"Workflow {workflow_dir.name} metadata missing 'name' field")
|
||||
metadata['name'] = workflow_dir.name
|
||||
|
||||
if 'vertical' not in metadata:
|
||||
logger.warning(
|
||||
f"Workflow {workflow_dir.name} metadata missing 'vertical' field"
|
||||
)
|
||||
continue
|
||||
|
||||
# Infer workflow class name from metadata or use convention
|
||||
workflow_type = metadata.get('workflow_class')
|
||||
if not workflow_type:
|
||||
# Convention: convert snake_case to PascalCase + Workflow
|
||||
# e.g., rust_test -> RustTestWorkflow
|
||||
parts = workflow_dir.name.split('_')
|
||||
workflow_type = ''.join(part.capitalize() for part in parts) + 'Workflow'
|
||||
|
||||
# Create workflow info
|
||||
info = WorkflowInfo(
|
||||
name=metadata['name'],
|
||||
path=workflow_dir,
|
||||
workflow_file=workflow_file,
|
||||
metadata=metadata,
|
||||
workflow_type=workflow_type,
|
||||
vertical=metadata['vertical']
|
||||
)
|
||||
|
||||
workflows[info.name] = info
|
||||
logger.info(
|
||||
f"✓ Discovered workflow: {info.name} "
|
||||
f"(vertical: {info.vertical}, class: {info.workflow_type})"
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
f"Error discovering workflow {workflow_dir.name}: {e}",
|
||||
exc_info=True
|
||||
)
|
||||
continue
|
||||
|
||||
logger.info(f"Discovered {len(workflows)} workflows")
|
||||
return workflows
|
||||
|
||||
def get_workflows_by_vertical(
|
||||
self,
|
||||
workflows: Dict[str, WorkflowInfo],
|
||||
vertical: str
|
||||
) -> Dict[str, WorkflowInfo]:
|
||||
"""
|
||||
Filter workflows by vertical.
|
||||
|
||||
Args:
|
||||
workflows: All discovered workflows
|
||||
vertical: Vertical name to filter by
|
||||
|
||||
Returns:
|
||||
Filtered workflows dictionary
|
||||
"""
|
||||
return {
|
||||
name: info
|
||||
for name, info in workflows.items()
|
||||
if info.vertical == vertical
|
||||
}
|
||||
|
||||
def get_available_verticals(self, workflows: Dict[str, WorkflowInfo]) -> list[str]:
|
||||
"""
|
||||
Get list of all verticals from discovered workflows.
|
||||
|
||||
Args:
|
||||
workflows: All discovered workflows
|
||||
|
||||
Returns:
|
||||
List of unique vertical names
|
||||
"""
|
||||
return list(set(info.vertical for info in workflows.values()))
|
||||
|
||||
@staticmethod
|
||||
def get_metadata_schema() -> Dict[str, Any]:
|
||||
"""
|
||||
Get the JSON schema for workflow metadata.
|
||||
|
||||
Returns:
|
||||
JSON schema dictionary
|
||||
"""
|
||||
return {
|
||||
"type": "object",
|
||||
"required": ["name", "version", "description", "author", "vertical", "parameters"],
|
||||
"properties": {
|
||||
"name": {
|
||||
"type": "string",
|
||||
"description": "Workflow name"
|
||||
},
|
||||
"version": {
|
||||
"type": "string",
|
||||
"pattern": "^\\d+\\.\\d+\\.\\d+$",
|
||||
"description": "Semantic version (x.y.z)"
|
||||
},
|
||||
"vertical": {
|
||||
"type": "string",
|
||||
"description": "Vertical worker type (rust, android, web, etc.)"
|
||||
},
|
||||
"description": {
|
||||
"type": "string",
|
||||
"description": "Workflow description"
|
||||
},
|
||||
"author": {
|
||||
"type": "string",
|
||||
"description": "Workflow author"
|
||||
},
|
||||
"category": {
|
||||
"type": "string",
|
||||
"enum": ["comprehensive", "specialized", "fuzzing", "focused"],
|
||||
"description": "Workflow category"
|
||||
},
|
||||
"tags": {
|
||||
"type": "array",
|
||||
"items": {"type": "string"},
|
||||
"description": "Workflow tags for categorization"
|
||||
},
|
||||
"requirements": {
|
||||
"type": "object",
|
||||
"required": ["tools", "resources"],
|
||||
"properties": {
|
||||
"tools": {
|
||||
"type": "array",
|
||||
"items": {"type": "string"},
|
||||
"description": "Required security tools"
|
||||
},
|
||||
"resources": {
|
||||
"type": "object",
|
||||
"required": ["memory", "cpu", "timeout"],
|
||||
"properties": {
|
||||
"memory": {
|
||||
"type": "string",
|
||||
"pattern": "^\\d+[GMK]i$",
|
||||
"description": "Memory limit (e.g., 1Gi, 512Mi)"
|
||||
},
|
||||
"cpu": {
|
||||
"type": "string",
|
||||
"pattern": "^\\d+m?$",
|
||||
"description": "CPU limit (e.g., 1000m, 2)"
|
||||
},
|
||||
"timeout": {
|
||||
"type": "integer",
|
||||
"minimum": 60,
|
||||
"maximum": 7200,
|
||||
"description": "Workflow timeout in seconds"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"description": "Workflow parameters schema"
|
||||
},
|
||||
"default_parameters": {
|
||||
"type": "object",
|
||||
"description": "Default parameter values"
|
||||
},
|
||||
"required_modules": {
|
||||
"type": "array",
|
||||
"items": {"type": "string"},
|
||||
"description": "Required module names"
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,392 +0,0 @@
|
||||
"""
|
||||
Temporal Manager - Workflow execution and management
|
||||
|
||||
Handles:
|
||||
- Workflow discovery from toolbox
|
||||
- Workflow execution (submit to Temporal)
|
||||
- Status monitoring
|
||||
- Results retrieval
|
||||
"""
|
||||
|
||||
import logging
|
||||
import os
|
||||
from pathlib import Path
|
||||
from typing import Dict, Optional, Any
|
||||
from uuid import uuid4
|
||||
|
||||
from temporalio.client import Client, WorkflowHandle
|
||||
from temporalio.common import RetryPolicy
|
||||
from datetime import timedelta
|
||||
|
||||
from .discovery import WorkflowDiscovery, WorkflowInfo
|
||||
from src.storage import S3CachedStorage
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class TemporalManager:
|
||||
"""
|
||||
Manages Temporal workflow execution for FuzzForge.
|
||||
|
||||
This class:
|
||||
- Discovers available workflows from toolbox
|
||||
- Submits workflow executions to Temporal
|
||||
- Monitors workflow status
|
||||
- Retrieves workflow results
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
workflows_dir: Optional[Path] = None,
|
||||
temporal_address: Optional[str] = None,
|
||||
temporal_namespace: str = "default",
|
||||
storage: Optional[S3CachedStorage] = None
|
||||
):
|
||||
"""
|
||||
Initialize Temporal manager.
|
||||
|
||||
Args:
|
||||
workflows_dir: Path to workflows directory (default: toolbox/workflows)
|
||||
temporal_address: Temporal server address (default: from env or localhost:7233)
|
||||
temporal_namespace: Temporal namespace
|
||||
storage: Storage backend for file uploads (default: S3CachedStorage)
|
||||
"""
|
||||
if workflows_dir is None:
|
||||
workflows_dir = Path("toolbox/workflows")
|
||||
|
||||
self.temporal_address = temporal_address or os.getenv(
|
||||
'TEMPORAL_ADDRESS',
|
||||
'localhost:7233'
|
||||
)
|
||||
self.temporal_namespace = temporal_namespace
|
||||
self.discovery = WorkflowDiscovery(workflows_dir)
|
||||
self.workflows: Dict[str, WorkflowInfo] = {}
|
||||
self.client: Optional[Client] = None
|
||||
|
||||
# Initialize storage backend
|
||||
self.storage = storage or S3CachedStorage()
|
||||
|
||||
logger.info(
|
||||
f"TemporalManager initialized: {self.temporal_address} "
|
||||
f"(namespace: {self.temporal_namespace})"
|
||||
)
|
||||
|
||||
async def initialize(self):
|
||||
"""Initialize the manager by discovering workflows and connecting to Temporal."""
|
||||
try:
|
||||
# Discover workflows
|
||||
self.workflows = await self.discovery.discover_workflows()
|
||||
|
||||
if not self.workflows:
|
||||
logger.warning("No workflows discovered")
|
||||
else:
|
||||
logger.info(
|
||||
f"Discovered {len(self.workflows)} workflows: "
|
||||
f"{list(self.workflows.keys())}"
|
||||
)
|
||||
|
||||
# Connect to Temporal
|
||||
self.client = await Client.connect(
|
||||
self.temporal_address,
|
||||
namespace=self.temporal_namespace
|
||||
)
|
||||
logger.info(f"✓ Connected to Temporal: {self.temporal_address}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to initialize Temporal manager: {e}", exc_info=True)
|
||||
raise
|
||||
|
||||
async def close(self):
|
||||
"""Close Temporal client connection."""
|
||||
if self.client:
|
||||
# Temporal client doesn't need explicit close in Python SDK
|
||||
pass
|
||||
|
||||
async def get_workflows(self) -> Dict[str, WorkflowInfo]:
|
||||
"""
|
||||
Get all discovered workflows.
|
||||
|
||||
Returns:
|
||||
Dictionary mapping workflow names to their info
|
||||
"""
|
||||
return self.workflows
|
||||
|
||||
async def get_workflow(self, name: str) -> Optional[WorkflowInfo]:
|
||||
"""
|
||||
Get workflow info by name.
|
||||
|
||||
Args:
|
||||
name: Workflow name
|
||||
|
||||
Returns:
|
||||
WorkflowInfo or None if not found
|
||||
"""
|
||||
return self.workflows.get(name)
|
||||
|
||||
async def upload_target(
|
||||
self,
|
||||
file_path: Path,
|
||||
user_id: str,
|
||||
metadata: Optional[Dict[str, Any]] = None
|
||||
) -> str:
|
||||
"""
|
||||
Upload target file to storage.
|
||||
|
||||
Args:
|
||||
file_path: Local path to file
|
||||
user_id: User ID
|
||||
metadata: Optional metadata
|
||||
|
||||
Returns:
|
||||
Target ID for use in workflow execution
|
||||
"""
|
||||
target_id = await self.storage.upload_target(file_path, user_id, metadata)
|
||||
logger.info(f"Uploaded target: {target_id}")
|
||||
return target_id
|
||||
|
||||
async def run_workflow(
|
||||
self,
|
||||
workflow_name: str,
|
||||
target_id: str,
|
||||
workflow_params: Optional[Dict[str, Any]] = None,
|
||||
workflow_id: Optional[str] = None
|
||||
) -> WorkflowHandle:
|
||||
"""
|
||||
Execute a workflow.
|
||||
|
||||
Args:
|
||||
workflow_name: Name of workflow to execute
|
||||
target_id: Target ID (from upload_target)
|
||||
workflow_params: Additional workflow parameters
|
||||
workflow_id: Optional workflow ID (generated if not provided)
|
||||
|
||||
Returns:
|
||||
WorkflowHandle for monitoring/results
|
||||
|
||||
Raises:
|
||||
ValueError: If workflow not found or client not initialized
|
||||
"""
|
||||
if not self.client:
|
||||
raise ValueError("Temporal client not initialized. Call initialize() first.")
|
||||
|
||||
# Get workflow info
|
||||
workflow_info = self.workflows.get(workflow_name)
|
||||
if not workflow_info:
|
||||
raise ValueError(f"Workflow not found: {workflow_name}")
|
||||
|
||||
# Generate workflow ID if not provided
|
||||
if not workflow_id:
|
||||
workflow_id = f"{workflow_name}-{str(uuid4())[:8]}"
|
||||
|
||||
# Prepare workflow input arguments
|
||||
workflow_params = workflow_params or {}
|
||||
|
||||
# Build args list: [target_id, ...workflow_params in schema order]
|
||||
# The workflow parameters are passed as individual positional args
|
||||
workflow_args = [target_id]
|
||||
|
||||
# Add parameters in order based on metadata schema
|
||||
# This ensures parameters match the workflow signature order
|
||||
# Apply defaults from metadata.yaml if parameter not provided
|
||||
if 'parameters' in workflow_info.metadata:
|
||||
param_schema = workflow_info.metadata['parameters'].get('properties', {})
|
||||
logger.debug(f"Found {len(param_schema)} parameters in schema")
|
||||
# Iterate parameters in schema order and add values
|
||||
for param_name in param_schema.keys():
|
||||
param_spec = param_schema[param_name]
|
||||
|
||||
# Use provided param, or fall back to default from metadata
|
||||
if workflow_params and param_name in workflow_params:
|
||||
param_value = workflow_params[param_name]
|
||||
logger.debug(f"Using provided value for {param_name}: {param_value}")
|
||||
elif 'default' in param_spec:
|
||||
param_value = param_spec['default']
|
||||
logger.debug(f"Using default for {param_name}: {param_value}")
|
||||
else:
|
||||
param_value = None
|
||||
logger.debug(f"No value or default for {param_name}, using None")
|
||||
|
||||
workflow_args.append(param_value)
|
||||
else:
|
||||
logger.debug("No 'parameters' section found in workflow metadata")
|
||||
|
||||
# Determine task queue from workflow vertical
|
||||
vertical = workflow_info.metadata.get("vertical", "default")
|
||||
task_queue = f"{vertical}-queue"
|
||||
|
||||
logger.info(
|
||||
f"Starting workflow: {workflow_name} "
|
||||
f"(id={workflow_id}, queue={task_queue}, target={target_id})"
|
||||
)
|
||||
logger.info(f"DEBUG: workflow_args = {workflow_args}")
|
||||
logger.info(f"DEBUG: workflow_params received = {workflow_params}")
|
||||
|
||||
try:
|
||||
# Start workflow execution with positional arguments
|
||||
handle = await self.client.start_workflow(
|
||||
workflow=workflow_info.workflow_type, # Workflow class name
|
||||
args=workflow_args, # Positional arguments
|
||||
id=workflow_id,
|
||||
task_queue=task_queue,
|
||||
retry_policy=RetryPolicy(
|
||||
initial_interval=timedelta(seconds=1),
|
||||
maximum_interval=timedelta(minutes=1),
|
||||
maximum_attempts=3
|
||||
)
|
||||
)
|
||||
|
||||
logger.info(f"✓ Workflow started: {workflow_id}")
|
||||
return handle
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to start workflow {workflow_name}: {e}", exc_info=True)
|
||||
raise
|
||||
|
||||
async def get_workflow_status(self, workflow_id: str) -> Dict[str, Any]:
|
||||
"""
|
||||
Get workflow execution status.
|
||||
|
||||
Args:
|
||||
workflow_id: Workflow execution ID
|
||||
|
||||
Returns:
|
||||
Status dictionary with workflow state
|
||||
|
||||
Raises:
|
||||
ValueError: If client not initialized or workflow not found
|
||||
"""
|
||||
if not self.client:
|
||||
raise ValueError("Temporal client not initialized")
|
||||
|
||||
try:
|
||||
# Get workflow handle
|
||||
handle = self.client.get_workflow_handle(workflow_id)
|
||||
|
||||
# Try to get result (non-blocking describe)
|
||||
description = await handle.describe()
|
||||
|
||||
status = {
|
||||
"workflow_id": workflow_id,
|
||||
"status": description.status.name,
|
||||
"start_time": description.start_time.isoformat() if description.start_time else None,
|
||||
"execution_time": description.execution_time.isoformat() if description.execution_time else None,
|
||||
"close_time": description.close_time.isoformat() if description.close_time else None,
|
||||
"task_queue": description.task_queue,
|
||||
}
|
||||
|
||||
logger.info(f"Workflow {workflow_id} status: {status['status']}")
|
||||
return status
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to get workflow status: {e}", exc_info=True)
|
||||
raise
|
||||
|
||||
async def get_workflow_result(
|
||||
self,
|
||||
workflow_id: str,
|
||||
timeout: Optional[timedelta] = None
|
||||
) -> Any:
|
||||
"""
|
||||
Get workflow execution result (blocking).
|
||||
|
||||
Args:
|
||||
workflow_id: Workflow execution ID
|
||||
timeout: Maximum time to wait for result
|
||||
|
||||
Returns:
|
||||
Workflow result
|
||||
|
||||
Raises:
|
||||
ValueError: If client not initialized
|
||||
TimeoutError: If timeout exceeded
|
||||
"""
|
||||
if not self.client:
|
||||
raise ValueError("Temporal client not initialized")
|
||||
|
||||
try:
|
||||
handle = self.client.get_workflow_handle(workflow_id)
|
||||
|
||||
logger.info(f"Waiting for workflow result: {workflow_id}")
|
||||
|
||||
# Wait for workflow to complete and get result
|
||||
if timeout:
|
||||
# Use asyncio timeout if provided
|
||||
import asyncio
|
||||
result = await asyncio.wait_for(handle.result(), timeout=timeout.total_seconds())
|
||||
else:
|
||||
result = await handle.result()
|
||||
|
||||
logger.info(f"✓ Workflow {workflow_id} completed")
|
||||
return result
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to get workflow result: {e}", exc_info=True)
|
||||
raise
|
||||
|
||||
async def cancel_workflow(self, workflow_id: str) -> None:
|
||||
"""
|
||||
Cancel a running workflow.
|
||||
|
||||
Args:
|
||||
workflow_id: Workflow execution ID
|
||||
|
||||
Raises:
|
||||
ValueError: If client not initialized
|
||||
"""
|
||||
if not self.client:
|
||||
raise ValueError("Temporal client not initialized")
|
||||
|
||||
try:
|
||||
handle = self.client.get_workflow_handle(workflow_id)
|
||||
await handle.cancel()
|
||||
|
||||
logger.info(f"✓ Workflow cancelled: {workflow_id}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to cancel workflow: {e}", exc_info=True)
|
||||
raise
|
||||
|
||||
async def list_workflows(
|
||||
self,
|
||||
filter_query: Optional[str] = None,
|
||||
limit: int = 100
|
||||
) -> list[Dict[str, Any]]:
|
||||
"""
|
||||
List workflow executions.
|
||||
|
||||
Args:
|
||||
filter_query: Optional Temporal list filter query
|
||||
limit: Maximum number of results
|
||||
|
||||
Returns:
|
||||
List of workflow execution info
|
||||
|
||||
Raises:
|
||||
ValueError: If client not initialized
|
||||
"""
|
||||
if not self.client:
|
||||
raise ValueError("Temporal client not initialized")
|
||||
|
||||
try:
|
||||
workflows = []
|
||||
|
||||
# Use Temporal's list API
|
||||
async for workflow in self.client.list_workflows(filter_query):
|
||||
workflows.append({
|
||||
"workflow_id": workflow.id,
|
||||
"workflow_type": workflow.workflow_type,
|
||||
"status": workflow.status.name,
|
||||
"start_time": workflow.start_time.isoformat() if workflow.start_time else None,
|
||||
"close_time": workflow.close_time.isoformat() if workflow.close_time else None,
|
||||
"task_queue": workflow.task_queue,
|
||||
})
|
||||
|
||||
if len(workflows) >= limit:
|
||||
break
|
||||
|
||||
logger.info(f"Listed {len(workflows)} workflows")
|
||||
return workflows
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to list workflows: {e}", exc_info=True)
|
||||
raise
|
||||
@@ -1,119 +0,0 @@
|
||||
# FuzzForge Test Suite
|
||||
|
||||
Comprehensive test infrastructure for FuzzForge modules and workflows.
|
||||
|
||||
## Directory Structure
|
||||
|
||||
```
|
||||
tests/
|
||||
├── conftest.py # Shared pytest fixtures
|
||||
├── unit/ # Fast, isolated unit tests
|
||||
│ ├── test_modules/ # Module-specific tests
|
||||
│ │ ├── test_cargo_fuzzer.py
|
||||
│ │ └── test_atheris_fuzzer.py
|
||||
│ ├── test_workflows/ # Workflow tests
|
||||
│ └── test_api/ # API endpoint tests
|
||||
├── integration/ # Integration tests (requires Docker)
|
||||
└── fixtures/ # Test data and projects
|
||||
├── test_projects/ # Vulnerable projects for testing
|
||||
└── expected_results/ # Expected output for validation
|
||||
```
|
||||
|
||||
## Running Tests
|
||||
|
||||
### All Tests
|
||||
```bash
|
||||
cd backend
|
||||
pytest tests/ -v
|
||||
```
|
||||
|
||||
### Unit Tests Only (Fast)
|
||||
```bash
|
||||
pytest tests/unit/ -v
|
||||
```
|
||||
|
||||
### Integration Tests (Requires Docker)
|
||||
```bash
|
||||
# Start services
|
||||
docker-compose up -d
|
||||
|
||||
# Run integration tests
|
||||
pytest tests/integration/ -v
|
||||
|
||||
# Cleanup
|
||||
docker-compose down
|
||||
```
|
||||
|
||||
### With Coverage
|
||||
```bash
|
||||
pytest tests/ --cov=toolbox/modules --cov=src --cov-report=html
|
||||
```
|
||||
|
||||
### Parallel Execution
|
||||
```bash
|
||||
pytest tests/unit/ -n auto
|
||||
```
|
||||
|
||||
## Available Fixtures
|
||||
|
||||
### Workspace Fixtures
|
||||
- `temp_workspace`: Empty temporary workspace
|
||||
- `python_test_workspace`: Python project with vulnerabilities
|
||||
- `rust_test_workspace`: Rust project with fuzz targets
|
||||
|
||||
### Module Fixtures
|
||||
- `atheris_fuzzer`: AtherisFuzzer instance
|
||||
- `cargo_fuzzer`: CargoFuzzer instance
|
||||
- `file_scanner`: FileScanner instance
|
||||
|
||||
### Configuration Fixtures
|
||||
- `atheris_config`: Default Atheris configuration
|
||||
- `cargo_fuzz_config`: Default cargo-fuzz configuration
|
||||
- `gitleaks_config`: Default Gitleaks configuration
|
||||
|
||||
### Mock Fixtures
|
||||
- `mock_stats_callback`: Mock stats callback for fuzzing
|
||||
- `mock_temporal_context`: Mock Temporal activity context
|
||||
|
||||
## Writing Tests
|
||||
|
||||
### Unit Test Example
|
||||
```python
|
||||
import pytest
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_module_execution(cargo_fuzzer, rust_test_workspace, cargo_fuzz_config):
|
||||
"""Test module execution"""
|
||||
result = await cargo_fuzzer.execute(cargo_fuzz_config, rust_test_workspace)
|
||||
|
||||
assert result.status == "success"
|
||||
assert result.execution_time > 0
|
||||
```
|
||||
|
||||
### Integration Test Example
|
||||
```python
|
||||
@pytest.mark.integration
|
||||
async def test_end_to_end_workflow():
|
||||
"""Test complete workflow execution"""
|
||||
# Test full workflow with real services
|
||||
pass
|
||||
```
|
||||
|
||||
## CI/CD Integration
|
||||
|
||||
Tests run automatically on:
|
||||
- **Push to main/develop**: Full test suite
|
||||
- **Pull requests**: Full test suite + coverage
|
||||
- **Nightly**: Extended integration tests
|
||||
|
||||
See `.github/workflows/test.yml` for configuration.
|
||||
|
||||
## Code Coverage
|
||||
|
||||
Target coverage: **80%+** for core modules
|
||||
|
||||
View coverage report:
|
||||
```bash
|
||||
pytest tests/ --cov --cov-report=html
|
||||
open htmlcov/index.html
|
||||
```
|
||||
@@ -11,220 +11,9 @@
|
||||
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from typing import Dict, Any
|
||||
import pytest
|
||||
|
||||
# Ensure project root is on sys.path so `src` is importable
|
||||
ROOT = Path(__file__).resolve().parents[1]
|
||||
if str(ROOT) not in sys.path:
|
||||
sys.path.insert(0, str(ROOT))
|
||||
|
||||
# Add toolbox to path for module imports
|
||||
TOOLBOX = ROOT / "toolbox"
|
||||
if str(TOOLBOX) not in sys.path:
|
||||
sys.path.insert(0, str(TOOLBOX))
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Workspace Fixtures
|
||||
# ============================================================================
|
||||
|
||||
@pytest.fixture
|
||||
def temp_workspace(tmp_path):
|
||||
"""Create a temporary workspace directory for testing"""
|
||||
workspace = tmp_path / "workspace"
|
||||
workspace.mkdir()
|
||||
return workspace
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def python_test_workspace(temp_workspace):
|
||||
"""Create a Python test workspace with sample files"""
|
||||
# Create a simple Python project structure
|
||||
(temp_workspace / "main.py").write_text("""
|
||||
def process_data(data):
|
||||
# Intentional bug: no bounds checking
|
||||
return data[0:100]
|
||||
|
||||
def divide(a, b):
|
||||
# Division by zero vulnerability
|
||||
return a / b
|
||||
""")
|
||||
|
||||
(temp_workspace / "config.py").write_text("""
|
||||
# Hardcoded secrets for testing
|
||||
API_KEY = "sk_test_1234567890abcdef"
|
||||
DATABASE_URL = "postgresql://admin:password123@localhost/db"
|
||||
AWS_SECRET = "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY"
|
||||
""")
|
||||
|
||||
return temp_workspace
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def rust_test_workspace(temp_workspace):
|
||||
"""Create a Rust test workspace with fuzz targets"""
|
||||
# Create Cargo.toml
|
||||
(temp_workspace / "Cargo.toml").write_text("""[package]
|
||||
name = "test_project"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
|
||||
[dependencies]
|
||||
""")
|
||||
|
||||
# Create src/lib.rs
|
||||
src_dir = temp_workspace / "src"
|
||||
src_dir.mkdir()
|
||||
(src_dir / "lib.rs").write_text("""
|
||||
pub fn process_buffer(data: &[u8]) -> Vec<u8> {
|
||||
if data.len() < 4 {
|
||||
return Vec::new();
|
||||
}
|
||||
|
||||
// Vulnerability: bounds checking issue
|
||||
let size = data[0] as usize;
|
||||
let mut result = Vec::new();
|
||||
for i in 0..size {
|
||||
result.push(data[i]);
|
||||
}
|
||||
result
|
||||
}
|
||||
""")
|
||||
|
||||
# Create fuzz directory structure
|
||||
fuzz_dir = temp_workspace / "fuzz"
|
||||
fuzz_dir.mkdir()
|
||||
|
||||
(fuzz_dir / "Cargo.toml").write_text("""[package]
|
||||
name = "test_project-fuzz"
|
||||
version = "0.0.0"
|
||||
edition = "2021"
|
||||
|
||||
[dependencies]
|
||||
libfuzzer-sys = "0.4"
|
||||
|
||||
[dependencies.test_project]
|
||||
path = ".."
|
||||
|
||||
[[bin]]
|
||||
name = "fuzz_target_1"
|
||||
path = "fuzz_targets/fuzz_target_1.rs"
|
||||
""")
|
||||
|
||||
fuzz_targets_dir = fuzz_dir / "fuzz_targets"
|
||||
fuzz_targets_dir.mkdir()
|
||||
|
||||
(fuzz_targets_dir / "fuzz_target_1.rs").write_text("""#![no_main]
|
||||
use libfuzzer_sys::fuzz_target;
|
||||
use test_project::process_buffer;
|
||||
|
||||
fuzz_target!(|data: &[u8]| {
|
||||
let _ = process_buffer(data);
|
||||
});
|
||||
""")
|
||||
|
||||
return temp_workspace
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Module Configuration Fixtures
|
||||
# ============================================================================
|
||||
|
||||
@pytest.fixture
|
||||
def atheris_config():
|
||||
"""Default Atheris fuzzer configuration"""
|
||||
return {
|
||||
"target_file": "auto-discover",
|
||||
"max_iterations": 1000,
|
||||
"timeout_seconds": 10,
|
||||
"corpus_dir": None
|
||||
}
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def cargo_fuzz_config():
|
||||
"""Default cargo-fuzz configuration"""
|
||||
return {
|
||||
"target_name": None,
|
||||
"max_iterations": 1000,
|
||||
"timeout_seconds": 10,
|
||||
"sanitizer": "address"
|
||||
}
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def gitleaks_config():
|
||||
"""Default Gitleaks configuration"""
|
||||
return {
|
||||
"config_path": None,
|
||||
"scan_uncommitted": True
|
||||
}
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def file_scanner_config():
|
||||
"""Default file scanner configuration"""
|
||||
return {
|
||||
"scan_patterns": ["*.py", "*.rs", "*.js"],
|
||||
"exclude_patterns": ["*.test.*", "*.spec.*"],
|
||||
"max_file_size": 1048576 # 1MB
|
||||
}
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Module Instance Fixtures
|
||||
# ============================================================================
|
||||
|
||||
@pytest.fixture
|
||||
def atheris_fuzzer():
|
||||
"""Create an AtherisFuzzer instance"""
|
||||
from modules.fuzzer.atheris_fuzzer import AtherisFuzzer
|
||||
return AtherisFuzzer()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def cargo_fuzzer():
|
||||
"""Create a CargoFuzzer instance"""
|
||||
from modules.fuzzer.cargo_fuzzer import CargoFuzzer
|
||||
return CargoFuzzer()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def file_scanner():
|
||||
"""Create a FileScanner instance"""
|
||||
from modules.scanner.file_scanner import FileScanner
|
||||
return FileScanner()
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Mock Fixtures
|
||||
# ============================================================================
|
||||
|
||||
@pytest.fixture
|
||||
def mock_stats_callback():
|
||||
"""Mock stats callback for fuzzing"""
|
||||
stats_received = []
|
||||
|
||||
async def callback(stats: Dict[str, Any]):
|
||||
stats_received.append(stats)
|
||||
|
||||
callback.stats_received = stats_received
|
||||
return callback
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_temporal_context():
|
||||
"""Mock Temporal activity context"""
|
||||
class MockActivityInfo:
|
||||
def __init__(self):
|
||||
self.workflow_id = "test-workflow-123"
|
||||
self.activity_id = "test-activity-1"
|
||||
self.attempt = 1
|
||||
|
||||
class MockContext:
|
||||
def __init__(self):
|
||||
self.info = MockActivityInfo()
|
||||
|
||||
return MockContext()
|
||||
|
||||
|
||||
0
backend/tests/fixtures/__init__.py
vendored
0
backend/tests/fixtures/__init__.py
vendored
82
backend/tests/test_prefect_stats_monitor.py
Normal file
82
backend/tests/test_prefect_stats_monitor.py
Normal file
@@ -0,0 +1,82 @@
|
||||
# Copyright (c) 2025 FuzzingLabs
|
||||
#
|
||||
# Licensed under the Business Source License 1.1 (BSL). See the LICENSE file
|
||||
# at the root of this repository for details.
|
||||
#
|
||||
# After the Change Date (four years from publication), this version of the
|
||||
# Licensed Work will be made available under the Apache License, Version 2.0.
|
||||
# See the LICENSE-APACHE file or http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Additional attribution and requirements are provided in the NOTICE file.
|
||||
|
||||
import asyncio
|
||||
from datetime import datetime, timezone, timedelta
|
||||
|
||||
|
||||
from src.services.prefect_stats_monitor import PrefectStatsMonitor
|
||||
from src.api import fuzzing
|
||||
|
||||
|
||||
class FakeLog:
|
||||
def __init__(self, message: str):
|
||||
self.message = message
|
||||
|
||||
|
||||
class FakeClient:
|
||||
def __init__(self, logs):
|
||||
self._logs = logs
|
||||
|
||||
async def read_logs(self, log_filter=None, limit=100, sort="TIMESTAMP_ASC"):
|
||||
return self._logs
|
||||
|
||||
|
||||
class FakeTaskRun:
|
||||
def __init__(self):
|
||||
self.id = "task-1"
|
||||
self.start_time = datetime.now(timezone.utc) - timedelta(seconds=5)
|
||||
|
||||
|
||||
def test_parse_stats_from_log_fuzzing():
|
||||
mon = PrefectStatsMonitor()
|
||||
msg = (
|
||||
"INFO LIVE_STATS extra={'stats_type': 'fuzzing_live_update', "
|
||||
"'executions': 42, 'executions_per_sec': 3.14, 'crashes': 1, 'unique_crashes': 1, 'corpus_size': 9}"
|
||||
)
|
||||
stats = mon._parse_stats_from_log(msg)
|
||||
assert stats is not None
|
||||
assert stats["stats_type"] == "fuzzing_live_update"
|
||||
assert stats["executions"] == 42
|
||||
|
||||
|
||||
def test_extract_stats_updates_and_broadcasts():
|
||||
mon = PrefectStatsMonitor()
|
||||
run_id = "run-123"
|
||||
workflow = "wf"
|
||||
fuzzing.initialize_fuzzing_tracking(run_id, workflow)
|
||||
|
||||
# Prepare a fake websocket to capture messages
|
||||
sent = []
|
||||
|
||||
class FakeWS:
|
||||
async def send_text(self, text: str):
|
||||
sent.append(text)
|
||||
|
||||
fuzzing.active_connections[run_id] = [FakeWS()]
|
||||
|
||||
# Craft a log line the parser understands
|
||||
msg = (
|
||||
"INFO LIVE_STATS extra={'stats_type': 'fuzzing_live_update', "
|
||||
"'executions': 10, 'executions_per_sec': 1.5, 'crashes': 0, 'unique_crashes': 0, 'corpus_size': 2}"
|
||||
)
|
||||
fake_client = FakeClient([FakeLog(msg)])
|
||||
task_run = FakeTaskRun()
|
||||
|
||||
asyncio.run(mon._extract_stats_from_task(fake_client, run_id, task_run, workflow))
|
||||
|
||||
# Verify stats updated
|
||||
stats = fuzzing.fuzzing_stats[run_id]
|
||||
assert stats.executions == 10
|
||||
assert stats.executions_per_sec == 1.5
|
||||
|
||||
# Verify a message was sent to WebSocket
|
||||
assert sent, "Expected a stats_update message to be sent"
|
||||
@@ -1,177 +0,0 @@
|
||||
"""
|
||||
Unit tests for AtherisFuzzer module
|
||||
"""
|
||||
|
||||
import pytest
|
||||
from unittest.mock import AsyncMock, patch
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
class TestAtherisFuzzerMetadata:
|
||||
"""Test AtherisFuzzer metadata"""
|
||||
|
||||
async def test_metadata_structure(self, atheris_fuzzer):
|
||||
"""Test that module metadata is properly defined"""
|
||||
metadata = atheris_fuzzer.get_metadata()
|
||||
|
||||
assert metadata.name == "atheris_fuzzer"
|
||||
assert metadata.category == "fuzzer"
|
||||
assert "fuzzing" in metadata.tags
|
||||
assert "python" in metadata.tags
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
class TestAtherisFuzzerConfigValidation:
|
||||
"""Test configuration validation"""
|
||||
|
||||
async def test_valid_config(self, atheris_fuzzer, atheris_config):
|
||||
"""Test validation of valid configuration"""
|
||||
assert atheris_fuzzer.validate_config(atheris_config) is True
|
||||
|
||||
async def test_invalid_max_iterations(self, atheris_fuzzer):
|
||||
"""Test validation fails with invalid max_iterations"""
|
||||
config = {
|
||||
"target_file": "fuzz_target.py",
|
||||
"max_iterations": -1,
|
||||
"timeout_seconds": 10
|
||||
}
|
||||
with pytest.raises(ValueError, match="max_iterations"):
|
||||
atheris_fuzzer.validate_config(config)
|
||||
|
||||
async def test_invalid_timeout(self, atheris_fuzzer):
|
||||
"""Test validation fails with invalid timeout"""
|
||||
config = {
|
||||
"target_file": "fuzz_target.py",
|
||||
"max_iterations": 1000,
|
||||
"timeout_seconds": 0
|
||||
}
|
||||
with pytest.raises(ValueError, match="timeout_seconds"):
|
||||
atheris_fuzzer.validate_config(config)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
class TestAtherisFuzzerDiscovery:
|
||||
"""Test fuzz target discovery"""
|
||||
|
||||
async def test_auto_discover(self, atheris_fuzzer, python_test_workspace):
|
||||
"""Test auto-discovery of Python fuzz targets"""
|
||||
# Create a fuzz target file
|
||||
(python_test_workspace / "fuzz_target.py").write_text("""
|
||||
import atheris
|
||||
import sys
|
||||
|
||||
def TestOneInput(data):
|
||||
pass
|
||||
|
||||
if __name__ == "__main__":
|
||||
atheris.Setup(sys.argv, TestOneInput)
|
||||
atheris.Fuzz()
|
||||
""")
|
||||
|
||||
# Pass None for auto-discovery
|
||||
target = atheris_fuzzer._discover_target(python_test_workspace, None)
|
||||
|
||||
assert target is not None
|
||||
assert "fuzz_target.py" in str(target)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
class TestAtherisFuzzerExecution:
|
||||
"""Test fuzzer execution logic"""
|
||||
|
||||
async def test_execution_creates_result(self, atheris_fuzzer, python_test_workspace, atheris_config):
|
||||
"""Test that execution returns a ModuleResult"""
|
||||
# Create a simple fuzz target
|
||||
(python_test_workspace / "fuzz_target.py").write_text("""
|
||||
import atheris
|
||||
import sys
|
||||
|
||||
def TestOneInput(data):
|
||||
if len(data) > 0:
|
||||
pass
|
||||
|
||||
if __name__ == "__main__":
|
||||
atheris.Setup(sys.argv, TestOneInput)
|
||||
atheris.Fuzz()
|
||||
""")
|
||||
|
||||
# Use a very short timeout for testing
|
||||
test_config = {
|
||||
"target_file": "fuzz_target.py",
|
||||
"max_iterations": 10,
|
||||
"timeout_seconds": 1
|
||||
}
|
||||
|
||||
# Mock the fuzzing subprocess to avoid actual execution
|
||||
with patch.object(atheris_fuzzer, '_run_fuzzing', new_callable=AsyncMock, return_value=([], {"total_executions": 10})):
|
||||
result = await atheris_fuzzer.execute(test_config, python_test_workspace)
|
||||
|
||||
assert result.module == "atheris_fuzzer"
|
||||
assert result.status in ["success", "partial", "failed"]
|
||||
assert isinstance(result.execution_time, float)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
class TestAtherisFuzzerStatsCallback:
|
||||
"""Test stats callback functionality"""
|
||||
|
||||
async def test_stats_callback_invoked(self, atheris_fuzzer, python_test_workspace, atheris_config, mock_stats_callback):
|
||||
"""Test that stats callback is invoked during fuzzing"""
|
||||
(python_test_workspace / "fuzz_target.py").write_text("""
|
||||
import atheris
|
||||
import sys
|
||||
|
||||
def TestOneInput(data):
|
||||
pass
|
||||
|
||||
if __name__ == "__main__":
|
||||
atheris.Setup(sys.argv, TestOneInput)
|
||||
atheris.Fuzz()
|
||||
""")
|
||||
|
||||
# Mock fuzzing to simulate stats
|
||||
async def mock_run_fuzzing(test_one_input, target_path, workspace, max_iterations, timeout_seconds, stats_callback):
|
||||
if stats_callback:
|
||||
await stats_callback({
|
||||
"total_execs": 100,
|
||||
"execs_per_sec": 10.0,
|
||||
"crashes": 0,
|
||||
"coverage": 5,
|
||||
"corpus_size": 2,
|
||||
"elapsed_time": 10
|
||||
})
|
||||
return
|
||||
|
||||
with patch.object(atheris_fuzzer, '_run_fuzzing', side_effect=mock_run_fuzzing):
|
||||
with patch.object(atheris_fuzzer, '_load_target_module', return_value=lambda x: None):
|
||||
# Put stats_callback in config dict, not as kwarg
|
||||
atheris_config["target_file"] = "fuzz_target.py"
|
||||
atheris_config["stats_callback"] = mock_stats_callback
|
||||
await atheris_fuzzer.execute(atheris_config, python_test_workspace)
|
||||
|
||||
# Verify callback was invoked
|
||||
assert len(mock_stats_callback.stats_received) > 0
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
class TestAtherisFuzzerFindingGeneration:
|
||||
"""Test finding generation from crashes"""
|
||||
|
||||
async def test_create_crash_finding(self, atheris_fuzzer):
|
||||
"""Test crash finding creation"""
|
||||
finding = atheris_fuzzer.create_finding(
|
||||
title="Crash: Exception in TestOneInput",
|
||||
description="IndexError: list index out of range",
|
||||
severity="high",
|
||||
category="crash",
|
||||
file_path="fuzz_target.py",
|
||||
metadata={
|
||||
"crash_type": "IndexError",
|
||||
"stack_trace": "Traceback..."
|
||||
}
|
||||
)
|
||||
|
||||
assert finding.title == "Crash: Exception in TestOneInput"
|
||||
assert finding.severity == "high"
|
||||
assert finding.category == "crash"
|
||||
assert "IndexError" in finding.metadata["crash_type"]
|
||||
@@ -1,177 +0,0 @@
|
||||
"""
|
||||
Unit tests for CargoFuzzer module
|
||||
"""
|
||||
|
||||
import pytest
|
||||
from unittest.mock import AsyncMock, patch
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
class TestCargoFuzzerMetadata:
|
||||
"""Test CargoFuzzer metadata"""
|
||||
|
||||
async def test_metadata_structure(self, cargo_fuzzer):
|
||||
"""Test that module metadata is properly defined"""
|
||||
metadata = cargo_fuzzer.get_metadata()
|
||||
|
||||
assert metadata.name == "cargo_fuzz"
|
||||
assert metadata.version == "0.11.2"
|
||||
assert metadata.category == "fuzzer"
|
||||
assert "fuzzing" in metadata.tags
|
||||
assert "rust" in metadata.tags
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
class TestCargoFuzzerConfigValidation:
|
||||
"""Test configuration validation"""
|
||||
|
||||
async def test_valid_config(self, cargo_fuzzer, cargo_fuzz_config):
|
||||
"""Test validation of valid configuration"""
|
||||
assert cargo_fuzzer.validate_config(cargo_fuzz_config) is True
|
||||
|
||||
async def test_invalid_max_iterations(self, cargo_fuzzer):
|
||||
"""Test validation fails with invalid max_iterations"""
|
||||
config = {
|
||||
"max_iterations": -1,
|
||||
"timeout_seconds": 10,
|
||||
"sanitizer": "address"
|
||||
}
|
||||
with pytest.raises(ValueError, match="max_iterations"):
|
||||
cargo_fuzzer.validate_config(config)
|
||||
|
||||
async def test_invalid_timeout(self, cargo_fuzzer):
|
||||
"""Test validation fails with invalid timeout"""
|
||||
config = {
|
||||
"max_iterations": 1000,
|
||||
"timeout_seconds": 0,
|
||||
"sanitizer": "address"
|
||||
}
|
||||
with pytest.raises(ValueError, match="timeout_seconds"):
|
||||
cargo_fuzzer.validate_config(config)
|
||||
|
||||
async def test_invalid_sanitizer(self, cargo_fuzzer):
|
||||
"""Test validation fails with invalid sanitizer"""
|
||||
config = {
|
||||
"max_iterations": 1000,
|
||||
"timeout_seconds": 10,
|
||||
"sanitizer": "invalid_sanitizer"
|
||||
}
|
||||
with pytest.raises(ValueError, match="sanitizer"):
|
||||
cargo_fuzzer.validate_config(config)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
class TestCargoFuzzerWorkspaceValidation:
|
||||
"""Test workspace validation"""
|
||||
|
||||
async def test_valid_workspace(self, cargo_fuzzer, rust_test_workspace):
|
||||
"""Test validation of valid workspace"""
|
||||
assert cargo_fuzzer.validate_workspace(rust_test_workspace) is True
|
||||
|
||||
async def test_nonexistent_workspace(self, cargo_fuzzer, tmp_path):
|
||||
"""Test validation fails with nonexistent workspace"""
|
||||
nonexistent = tmp_path / "does_not_exist"
|
||||
with pytest.raises(ValueError, match="does not exist"):
|
||||
cargo_fuzzer.validate_workspace(nonexistent)
|
||||
|
||||
async def test_workspace_is_file(self, cargo_fuzzer, tmp_path):
|
||||
"""Test validation fails when workspace is a file"""
|
||||
file_path = tmp_path / "file.txt"
|
||||
file_path.write_text("test")
|
||||
with pytest.raises(ValueError, match="not a directory"):
|
||||
cargo_fuzzer.validate_workspace(file_path)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
class TestCargoFuzzerDiscovery:
|
||||
"""Test fuzz target discovery"""
|
||||
|
||||
async def test_discover_targets(self, cargo_fuzzer, rust_test_workspace):
|
||||
"""Test discovery of fuzz targets"""
|
||||
targets = await cargo_fuzzer._discover_fuzz_targets(rust_test_workspace)
|
||||
|
||||
assert len(targets) == 1
|
||||
assert "fuzz_target_1" in targets
|
||||
|
||||
async def test_no_fuzz_directory(self, cargo_fuzzer, temp_workspace):
|
||||
"""Test discovery with no fuzz directory"""
|
||||
targets = await cargo_fuzzer._discover_fuzz_targets(temp_workspace)
|
||||
|
||||
assert targets == []
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
class TestCargoFuzzerExecution:
|
||||
"""Test fuzzer execution logic"""
|
||||
|
||||
async def test_execution_creates_result(self, cargo_fuzzer, rust_test_workspace, cargo_fuzz_config):
|
||||
"""Test that execution returns a ModuleResult"""
|
||||
# Mock the build and run methods to avoid actual fuzzing
|
||||
with patch.object(cargo_fuzzer, '_build_fuzz_target', new_callable=AsyncMock, return_value=True):
|
||||
with patch.object(cargo_fuzzer, '_run_fuzzing', new_callable=AsyncMock, return_value=([], {"total_executions": 0, "crashes_found": 0})):
|
||||
with patch.object(cargo_fuzzer, '_parse_crash_artifacts', new_callable=AsyncMock, return_value=[]):
|
||||
result = await cargo_fuzzer.execute(cargo_fuzz_config, rust_test_workspace)
|
||||
|
||||
assert result.module == "cargo_fuzz"
|
||||
assert result.status == "success"
|
||||
assert isinstance(result.execution_time, float)
|
||||
assert result.execution_time >= 0
|
||||
|
||||
async def test_execution_with_no_targets(self, cargo_fuzzer, temp_workspace, cargo_fuzz_config):
|
||||
"""Test execution fails gracefully with no fuzz targets"""
|
||||
result = await cargo_fuzzer.execute(cargo_fuzz_config, temp_workspace)
|
||||
|
||||
assert result.status == "failed"
|
||||
assert "No fuzz targets found" in result.error
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
class TestCargoFuzzerStatsCallback:
|
||||
"""Test stats callback functionality"""
|
||||
|
||||
async def test_stats_callback_invoked(self, cargo_fuzzer, rust_test_workspace, cargo_fuzz_config, mock_stats_callback):
|
||||
"""Test that stats callback is invoked during fuzzing"""
|
||||
# Mock build/run to simulate stats generation
|
||||
async def mock_run_fuzzing(workspace, target, config, callback):
|
||||
# Simulate stats callback
|
||||
if callback:
|
||||
await callback({
|
||||
"total_execs": 1000,
|
||||
"execs_per_sec": 100.0,
|
||||
"crashes": 0,
|
||||
"coverage": 10,
|
||||
"corpus_size": 5,
|
||||
"elapsed_time": 10
|
||||
})
|
||||
return [], {"total_executions": 1000}
|
||||
|
||||
with patch.object(cargo_fuzzer, '_build_fuzz_target', new_callable=AsyncMock, return_value=True):
|
||||
with patch.object(cargo_fuzzer, '_run_fuzzing', side_effect=mock_run_fuzzing):
|
||||
with patch.object(cargo_fuzzer, '_parse_crash_artifacts', new_callable=AsyncMock, return_value=[]):
|
||||
await cargo_fuzzer.execute(cargo_fuzz_config, rust_test_workspace, stats_callback=mock_stats_callback)
|
||||
|
||||
# Verify callback was invoked
|
||||
assert len(mock_stats_callback.stats_received) > 0
|
||||
assert mock_stats_callback.stats_received[0]["total_execs"] == 1000
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
class TestCargoFuzzerFindingGeneration:
|
||||
"""Test finding generation from crashes"""
|
||||
|
||||
async def test_create_finding_from_crash(self, cargo_fuzzer):
|
||||
"""Test finding creation"""
|
||||
finding = cargo_fuzzer.create_finding(
|
||||
title="Crash: Segmentation Fault",
|
||||
description="Test crash",
|
||||
severity="critical",
|
||||
category="crash",
|
||||
file_path="fuzz/fuzz_targets/fuzz_target_1.rs",
|
||||
metadata={"crash_type": "SIGSEGV"}
|
||||
)
|
||||
|
||||
assert finding.title == "Crash: Segmentation Fault"
|
||||
assert finding.severity == "critical"
|
||||
assert finding.category == "crash"
|
||||
assert finding.file_path == "fuzz/fuzz_targets/fuzz_target_1.rs"
|
||||
assert finding.metadata["crash_type"] == "SIGSEGV"
|
||||
@@ -1,349 +0,0 @@
|
||||
"""
|
||||
Unit tests for FileScanner module
|
||||
"""
|
||||
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
sys.path.insert(0, str(Path(__file__).resolve().parents[3] / "toolbox"))
|
||||
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
class TestFileScannerMetadata:
|
||||
"""Test FileScanner metadata"""
|
||||
|
||||
async def test_metadata_structure(self, file_scanner):
|
||||
"""Test that metadata has correct structure"""
|
||||
metadata = file_scanner.get_metadata()
|
||||
|
||||
assert metadata.name == "file_scanner"
|
||||
assert metadata.version == "1.0.0"
|
||||
assert metadata.category == "scanner"
|
||||
assert "files" in metadata.tags
|
||||
assert "enumeration" in metadata.tags
|
||||
assert metadata.requires_workspace is True
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
class TestFileScannerConfigValidation:
|
||||
"""Test configuration validation"""
|
||||
|
||||
async def test_valid_config(self, file_scanner):
|
||||
"""Test that valid config passes validation"""
|
||||
config = {
|
||||
"patterns": ["*.py", "*.js"],
|
||||
"max_file_size": 1048576,
|
||||
"check_sensitive": True,
|
||||
"calculate_hashes": False
|
||||
}
|
||||
assert file_scanner.validate_config(config) is True
|
||||
|
||||
async def test_default_config(self, file_scanner):
|
||||
"""Test that empty config uses defaults"""
|
||||
config = {}
|
||||
assert file_scanner.validate_config(config) is True
|
||||
|
||||
async def test_invalid_patterns_type(self, file_scanner):
|
||||
"""Test that non-list patterns raises error"""
|
||||
config = {"patterns": "*.py"}
|
||||
with pytest.raises(ValueError, match="patterns must be a list"):
|
||||
file_scanner.validate_config(config)
|
||||
|
||||
async def test_invalid_max_file_size(self, file_scanner):
|
||||
"""Test that invalid max_file_size raises error"""
|
||||
config = {"max_file_size": -1}
|
||||
with pytest.raises(ValueError, match="max_file_size must be a positive integer"):
|
||||
file_scanner.validate_config(config)
|
||||
|
||||
async def test_invalid_max_file_size_type(self, file_scanner):
|
||||
"""Test that non-integer max_file_size raises error"""
|
||||
config = {"max_file_size": "large"}
|
||||
with pytest.raises(ValueError, match="max_file_size must be a positive integer"):
|
||||
file_scanner.validate_config(config)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
class TestFileScannerExecution:
|
||||
"""Test scanner execution"""
|
||||
|
||||
async def test_scan_python_files(self, file_scanner, python_test_workspace):
|
||||
"""Test scanning Python files"""
|
||||
config = {
|
||||
"patterns": ["*.py"],
|
||||
"check_sensitive": False,
|
||||
"calculate_hashes": False
|
||||
}
|
||||
|
||||
result = await file_scanner.execute(config, python_test_workspace)
|
||||
|
||||
assert result.module == "file_scanner"
|
||||
assert result.status == "success"
|
||||
assert len(result.findings) > 0
|
||||
|
||||
# Check that Python files were found
|
||||
python_files = [f for f in result.findings if f.file_path.endswith('.py')]
|
||||
assert len(python_files) > 0
|
||||
|
||||
async def test_scan_all_files(self, file_scanner, python_test_workspace):
|
||||
"""Test scanning all files with wildcard"""
|
||||
config = {
|
||||
"patterns": ["*"],
|
||||
"check_sensitive": False,
|
||||
"calculate_hashes": False
|
||||
}
|
||||
|
||||
result = await file_scanner.execute(config, python_test_workspace)
|
||||
|
||||
assert result.status == "success"
|
||||
assert len(result.findings) > 0
|
||||
assert result.summary["total_files"] > 0
|
||||
|
||||
async def test_scan_with_multiple_patterns(self, file_scanner, python_test_workspace):
|
||||
"""Test scanning with multiple patterns"""
|
||||
config = {
|
||||
"patterns": ["*.py", "*.txt"],
|
||||
"check_sensitive": False,
|
||||
"calculate_hashes": False
|
||||
}
|
||||
|
||||
result = await file_scanner.execute(config, python_test_workspace)
|
||||
|
||||
assert result.status == "success"
|
||||
assert len(result.findings) > 0
|
||||
|
||||
async def test_empty_workspace(self, file_scanner, temp_workspace):
|
||||
"""Test scanning empty workspace"""
|
||||
config = {
|
||||
"patterns": ["*.py"],
|
||||
"check_sensitive": False
|
||||
}
|
||||
|
||||
result = await file_scanner.execute(config, temp_workspace)
|
||||
|
||||
assert result.status == "success"
|
||||
assert len(result.findings) == 0
|
||||
assert result.summary["total_files"] == 0
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
class TestFileScannerSensitiveDetection:
|
||||
"""Test sensitive file detection"""
|
||||
|
||||
async def test_detect_env_file(self, file_scanner, temp_workspace):
|
||||
"""Test detection of .env file"""
|
||||
# Create .env file
|
||||
(temp_workspace / ".env").write_text("API_KEY=secret123")
|
||||
|
||||
config = {
|
||||
"patterns": ["*"],
|
||||
"check_sensitive": True,
|
||||
"calculate_hashes": False
|
||||
}
|
||||
|
||||
result = await file_scanner.execute(config, temp_workspace)
|
||||
|
||||
assert result.status == "success"
|
||||
|
||||
# Check for sensitive file finding
|
||||
sensitive_findings = [f for f in result.findings if f.category == "sensitive_file"]
|
||||
assert len(sensitive_findings) > 0
|
||||
assert any(".env" in f.title for f in sensitive_findings)
|
||||
|
||||
async def test_detect_private_key(self, file_scanner, temp_workspace):
|
||||
"""Test detection of private key file"""
|
||||
# Create private key file
|
||||
(temp_workspace / "id_rsa").write_text("-----BEGIN RSA PRIVATE KEY-----")
|
||||
|
||||
config = {
|
||||
"patterns": ["*"],
|
||||
"check_sensitive": True
|
||||
}
|
||||
|
||||
result = await file_scanner.execute(config, temp_workspace)
|
||||
|
||||
assert result.status == "success"
|
||||
sensitive_findings = [f for f in result.findings if f.category == "sensitive_file"]
|
||||
assert len(sensitive_findings) > 0
|
||||
|
||||
async def test_no_sensitive_detection_when_disabled(self, file_scanner, temp_workspace):
|
||||
"""Test that sensitive detection can be disabled"""
|
||||
(temp_workspace / ".env").write_text("API_KEY=secret123")
|
||||
|
||||
config = {
|
||||
"patterns": ["*"],
|
||||
"check_sensitive": False
|
||||
}
|
||||
|
||||
result = await file_scanner.execute(config, temp_workspace)
|
||||
|
||||
assert result.status == "success"
|
||||
sensitive_findings = [f for f in result.findings if f.category == "sensitive_file"]
|
||||
assert len(sensitive_findings) == 0
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
class TestFileScannerHashing:
|
||||
"""Test file hashing functionality"""
|
||||
|
||||
async def test_hash_calculation(self, file_scanner, temp_workspace):
|
||||
"""Test SHA256 hash calculation"""
|
||||
# Create test file
|
||||
test_file = temp_workspace / "test.txt"
|
||||
test_file.write_text("Hello World")
|
||||
|
||||
config = {
|
||||
"patterns": ["*.txt"],
|
||||
"calculate_hashes": True
|
||||
}
|
||||
|
||||
result = await file_scanner.execute(config, temp_workspace)
|
||||
|
||||
assert result.status == "success"
|
||||
|
||||
# Find the test.txt finding
|
||||
txt_findings = [f for f in result.findings if "test.txt" in f.file_path]
|
||||
assert len(txt_findings) > 0
|
||||
|
||||
# Check that hash was calculated
|
||||
finding = txt_findings[0]
|
||||
assert finding.metadata.get("file_hash") is not None
|
||||
assert len(finding.metadata["file_hash"]) == 64 # SHA256 hex length
|
||||
|
||||
async def test_no_hash_when_disabled(self, file_scanner, temp_workspace):
|
||||
"""Test that hashing can be disabled"""
|
||||
test_file = temp_workspace / "test.txt"
|
||||
test_file.write_text("Hello World")
|
||||
|
||||
config = {
|
||||
"patterns": ["*.txt"],
|
||||
"calculate_hashes": False
|
||||
}
|
||||
|
||||
result = await file_scanner.execute(config, temp_workspace)
|
||||
|
||||
assert result.status == "success"
|
||||
txt_findings = [f for f in result.findings if "test.txt" in f.file_path]
|
||||
|
||||
if len(txt_findings) > 0:
|
||||
finding = txt_findings[0]
|
||||
assert finding.metadata.get("file_hash") is None
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
class TestFileScannerFileTypes:
|
||||
"""Test file type detection"""
|
||||
|
||||
async def test_detect_python_type(self, file_scanner, temp_workspace):
|
||||
"""Test detection of Python file type"""
|
||||
(temp_workspace / "script.py").write_text("print('hello')")
|
||||
|
||||
config = {"patterns": ["*.py"]}
|
||||
result = await file_scanner.execute(config, temp_workspace)
|
||||
|
||||
assert result.status == "success"
|
||||
py_findings = [f for f in result.findings if "script.py" in f.file_path]
|
||||
assert len(py_findings) > 0
|
||||
assert "python" in py_findings[0].metadata["file_type"]
|
||||
|
||||
async def test_detect_javascript_type(self, file_scanner, temp_workspace):
|
||||
"""Test detection of JavaScript file type"""
|
||||
(temp_workspace / "app.js").write_text("console.log('hello')")
|
||||
|
||||
config = {"patterns": ["*.js"]}
|
||||
result = await file_scanner.execute(config, temp_workspace)
|
||||
|
||||
assert result.status == "success"
|
||||
js_findings = [f for f in result.findings if "app.js" in f.file_path]
|
||||
assert len(js_findings) > 0
|
||||
assert "javascript" in js_findings[0].metadata["file_type"]
|
||||
|
||||
async def test_file_type_summary(self, file_scanner, temp_workspace):
|
||||
"""Test that file type summary is generated"""
|
||||
(temp_workspace / "script.py").write_text("print('hello')")
|
||||
(temp_workspace / "app.js").write_text("console.log('hello')")
|
||||
(temp_workspace / "readme.txt").write_text("Documentation")
|
||||
|
||||
config = {"patterns": ["*"]}
|
||||
result = await file_scanner.execute(config, temp_workspace)
|
||||
|
||||
assert result.status == "success"
|
||||
assert "file_types" in result.summary
|
||||
assert len(result.summary["file_types"]) > 0
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
class TestFileScannerSizeLimits:
|
||||
"""Test file size handling"""
|
||||
|
||||
async def test_skip_large_files(self, file_scanner, temp_workspace):
|
||||
"""Test that large files are skipped"""
|
||||
# Create a "large" file
|
||||
large_file = temp_workspace / "large.txt"
|
||||
large_file.write_text("x" * 1000)
|
||||
|
||||
config = {
|
||||
"patterns": ["*.txt"],
|
||||
"max_file_size": 500 # Set limit smaller than file
|
||||
}
|
||||
|
||||
result = await file_scanner.execute(config, temp_workspace)
|
||||
|
||||
# Should succeed but skip the large file
|
||||
assert result.status == "success"
|
||||
|
||||
# The file should still be counted but not have a detailed finding
|
||||
assert result.summary["total_files"] > 0
|
||||
|
||||
async def test_process_small_files(self, file_scanner, temp_workspace):
|
||||
"""Test that small files are processed"""
|
||||
small_file = temp_workspace / "small.txt"
|
||||
small_file.write_text("small content")
|
||||
|
||||
config = {
|
||||
"patterns": ["*.txt"],
|
||||
"max_file_size": 1048576 # 1MB
|
||||
}
|
||||
|
||||
result = await file_scanner.execute(config, temp_workspace)
|
||||
|
||||
assert result.status == "success"
|
||||
txt_findings = [f for f in result.findings if "small.txt" in f.file_path]
|
||||
assert len(txt_findings) > 0
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
class TestFileScannerSummary:
|
||||
"""Test result summary generation"""
|
||||
|
||||
async def test_summary_structure(self, file_scanner, python_test_workspace):
|
||||
"""Test that summary has correct structure"""
|
||||
config = {"patterns": ["*"]}
|
||||
result = await file_scanner.execute(config, python_test_workspace)
|
||||
|
||||
assert result.status == "success"
|
||||
assert "total_files" in result.summary
|
||||
assert "total_size_bytes" in result.summary
|
||||
assert "file_types" in result.summary
|
||||
assert "patterns_scanned" in result.summary
|
||||
|
||||
assert isinstance(result.summary["total_files"], int)
|
||||
assert isinstance(result.summary["total_size_bytes"], int)
|
||||
assert isinstance(result.summary["file_types"], dict)
|
||||
assert isinstance(result.summary["patterns_scanned"], list)
|
||||
|
||||
async def test_summary_counts(self, file_scanner, temp_workspace):
|
||||
"""Test that summary counts are accurate"""
|
||||
# Create known files
|
||||
(temp_workspace / "file1.py").write_text("content1")
|
||||
(temp_workspace / "file2.py").write_text("content2")
|
||||
(temp_workspace / "file3.txt").write_text("content3")
|
||||
|
||||
config = {"patterns": ["*"]}
|
||||
result = await file_scanner.execute(config, temp_workspace)
|
||||
|
||||
assert result.status == "success"
|
||||
assert result.summary["total_files"] == 3
|
||||
assert result.summary["total_size_bytes"] > 0
|
||||
@@ -1,493 +0,0 @@
|
||||
"""
|
||||
Unit tests for SecurityAnalyzer module
|
||||
"""
|
||||
|
||||
import pytest
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
sys.path.insert(0, str(Path(__file__).resolve().parents[3] / "toolbox"))
|
||||
|
||||
from modules.analyzer.security_analyzer import SecurityAnalyzer
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def security_analyzer():
|
||||
"""Create SecurityAnalyzer instance"""
|
||||
return SecurityAnalyzer()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
class TestSecurityAnalyzerMetadata:
|
||||
"""Test SecurityAnalyzer metadata"""
|
||||
|
||||
async def test_metadata_structure(self, security_analyzer):
|
||||
"""Test that metadata has correct structure"""
|
||||
metadata = security_analyzer.get_metadata()
|
||||
|
||||
assert metadata.name == "security_analyzer"
|
||||
assert metadata.version == "1.0.0"
|
||||
assert metadata.category == "analyzer"
|
||||
assert "security" in metadata.tags
|
||||
assert "vulnerabilities" in metadata.tags
|
||||
assert metadata.requires_workspace is True
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
class TestSecurityAnalyzerConfigValidation:
|
||||
"""Test configuration validation"""
|
||||
|
||||
async def test_valid_config(self, security_analyzer):
|
||||
"""Test that valid config passes validation"""
|
||||
config = {
|
||||
"file_extensions": [".py", ".js"],
|
||||
"check_secrets": True,
|
||||
"check_sql": True,
|
||||
"check_dangerous_functions": True
|
||||
}
|
||||
assert security_analyzer.validate_config(config) is True
|
||||
|
||||
async def test_default_config(self, security_analyzer):
|
||||
"""Test that empty config uses defaults"""
|
||||
config = {}
|
||||
assert security_analyzer.validate_config(config) is True
|
||||
|
||||
async def test_invalid_extensions_type(self, security_analyzer):
|
||||
"""Test that non-list extensions raises error"""
|
||||
config = {"file_extensions": ".py"}
|
||||
with pytest.raises(ValueError, match="file_extensions must be a list"):
|
||||
security_analyzer.validate_config(config)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
class TestSecurityAnalyzerSecretDetection:
|
||||
"""Test hardcoded secret detection"""
|
||||
|
||||
async def test_detect_api_key(self, security_analyzer, temp_workspace):
|
||||
"""Test detection of hardcoded API key"""
|
||||
code_file = temp_workspace / "config.py"
|
||||
code_file.write_text("""
|
||||
# Configuration file
|
||||
api_key = "apikey_live_abcdefghijklmnopqrstuvwxyzabcdefghijk"
|
||||
database_url = "postgresql://localhost/db"
|
||||
""")
|
||||
|
||||
config = {
|
||||
"file_extensions": [".py"],
|
||||
"check_secrets": True,
|
||||
"check_sql": False,
|
||||
"check_dangerous_functions": False
|
||||
}
|
||||
|
||||
result = await security_analyzer.execute(config, temp_workspace)
|
||||
|
||||
assert result.status == "success"
|
||||
secret_findings = [f for f in result.findings if f.category == "hardcoded_secret"]
|
||||
assert len(secret_findings) > 0
|
||||
assert any("API Key" in f.title for f in secret_findings)
|
||||
|
||||
async def test_detect_password(self, security_analyzer, temp_workspace):
|
||||
"""Test detection of hardcoded password"""
|
||||
code_file = temp_workspace / "auth.py"
|
||||
code_file.write_text("""
|
||||
def connect():
|
||||
password = "mySecretP@ssw0rd"
|
||||
return connect_db(password)
|
||||
""")
|
||||
|
||||
config = {
|
||||
"file_extensions": [".py"],
|
||||
"check_secrets": True,
|
||||
"check_sql": False,
|
||||
"check_dangerous_functions": False
|
||||
}
|
||||
|
||||
result = await security_analyzer.execute(config, temp_workspace)
|
||||
|
||||
assert result.status == "success"
|
||||
secret_findings = [f for f in result.findings if f.category == "hardcoded_secret"]
|
||||
assert len(secret_findings) > 0
|
||||
|
||||
async def test_detect_aws_credentials(self, security_analyzer, temp_workspace):
|
||||
"""Test detection of AWS credentials"""
|
||||
code_file = temp_workspace / "aws_config.py"
|
||||
code_file.write_text("""
|
||||
aws_access_key = "AKIAIOSFODNN7REALKEY"
|
||||
aws_secret_key = "wJalrXUtnFEMI/K7MDENG/bPxRfiCYREALKEY"
|
||||
""")
|
||||
|
||||
config = {
|
||||
"file_extensions": [".py"],
|
||||
"check_secrets": True
|
||||
}
|
||||
|
||||
result = await security_analyzer.execute(config, temp_workspace)
|
||||
|
||||
assert result.status == "success"
|
||||
aws_findings = [f for f in result.findings if "AWS" in f.title]
|
||||
assert len(aws_findings) >= 2 # Both access key and secret key
|
||||
|
||||
async def test_no_secret_detection_when_disabled(self, security_analyzer, temp_workspace):
|
||||
"""Test that secret detection can be disabled"""
|
||||
code_file = temp_workspace / "config.py"
|
||||
code_file.write_text('api_key = "sk_live_1234567890abcdef"')
|
||||
|
||||
config = {
|
||||
"file_extensions": [".py"],
|
||||
"check_secrets": False
|
||||
}
|
||||
|
||||
result = await security_analyzer.execute(config, temp_workspace)
|
||||
|
||||
assert result.status == "success"
|
||||
secret_findings = [f for f in result.findings if f.category == "hardcoded_secret"]
|
||||
assert len(secret_findings) == 0
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
class TestSecurityAnalyzerSQLInjection:
|
||||
"""Test SQL injection detection"""
|
||||
|
||||
async def test_detect_string_concatenation(self, security_analyzer, temp_workspace):
|
||||
"""Test detection of SQL string concatenation"""
|
||||
code_file = temp_workspace / "db.py"
|
||||
code_file.write_text("""
|
||||
def get_user(user_id):
|
||||
query = "SELECT * FROM users WHERE id = " + user_id
|
||||
return execute(query)
|
||||
""")
|
||||
|
||||
config = {
|
||||
"file_extensions": [".py"],
|
||||
"check_secrets": False,
|
||||
"check_sql": True,
|
||||
"check_dangerous_functions": False
|
||||
}
|
||||
|
||||
result = await security_analyzer.execute(config, temp_workspace)
|
||||
|
||||
assert result.status == "success"
|
||||
sql_findings = [f for f in result.findings if f.category == "sql_injection"]
|
||||
assert len(sql_findings) > 0
|
||||
|
||||
async def test_detect_f_string_sql(self, security_analyzer, temp_workspace):
|
||||
"""Test detection of f-string in SQL"""
|
||||
code_file = temp_workspace / "db.py"
|
||||
code_file.write_text("""
|
||||
def get_user(name):
|
||||
query = f"SELECT * FROM users WHERE name = '{name}'"
|
||||
return execute(query)
|
||||
""")
|
||||
|
||||
config = {
|
||||
"file_extensions": [".py"],
|
||||
"check_sql": True
|
||||
}
|
||||
|
||||
result = await security_analyzer.execute(config, temp_workspace)
|
||||
|
||||
assert result.status == "success"
|
||||
sql_findings = [f for f in result.findings if f.category == "sql_injection"]
|
||||
assert len(sql_findings) > 0
|
||||
|
||||
async def test_detect_dynamic_query_building(self, security_analyzer, temp_workspace):
|
||||
"""Test detection of dynamic query building"""
|
||||
code_file = temp_workspace / "queries.py"
|
||||
code_file.write_text("""
|
||||
def search(keyword):
|
||||
query = "SELECT * FROM products WHERE name LIKE " + keyword
|
||||
execute(query + " ORDER BY price")
|
||||
""")
|
||||
|
||||
config = {
|
||||
"file_extensions": [".py"],
|
||||
"check_sql": True
|
||||
}
|
||||
|
||||
result = await security_analyzer.execute(config, temp_workspace)
|
||||
|
||||
assert result.status == "success"
|
||||
sql_findings = [f for f in result.findings if f.category == "sql_injection"]
|
||||
assert len(sql_findings) > 0
|
||||
|
||||
async def test_no_sql_detection_when_disabled(self, security_analyzer, temp_workspace):
|
||||
"""Test that SQL detection can be disabled"""
|
||||
code_file = temp_workspace / "db.py"
|
||||
code_file.write_text('query = "SELECT * FROM users WHERE id = " + user_id')
|
||||
|
||||
config = {
|
||||
"file_extensions": [".py"],
|
||||
"check_sql": False
|
||||
}
|
||||
|
||||
result = await security_analyzer.execute(config, temp_workspace)
|
||||
|
||||
assert result.status == "success"
|
||||
sql_findings = [f for f in result.findings if f.category == "sql_injection"]
|
||||
assert len(sql_findings) == 0
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
class TestSecurityAnalyzerDangerousFunctions:
|
||||
"""Test dangerous function detection"""
|
||||
|
||||
async def test_detect_eval(self, security_analyzer, temp_workspace):
|
||||
"""Test detection of eval() usage"""
|
||||
code_file = temp_workspace / "dangerous.py"
|
||||
code_file.write_text("""
|
||||
def process_input(user_input):
|
||||
result = eval(user_input)
|
||||
return result
|
||||
""")
|
||||
|
||||
config = {
|
||||
"file_extensions": [".py"],
|
||||
"check_secrets": False,
|
||||
"check_sql": False,
|
||||
"check_dangerous_functions": True
|
||||
}
|
||||
|
||||
result = await security_analyzer.execute(config, temp_workspace)
|
||||
|
||||
assert result.status == "success"
|
||||
dangerous_findings = [f for f in result.findings if f.category == "dangerous_function"]
|
||||
assert len(dangerous_findings) > 0
|
||||
assert any("eval" in f.title.lower() for f in dangerous_findings)
|
||||
|
||||
async def test_detect_exec(self, security_analyzer, temp_workspace):
|
||||
"""Test detection of exec() usage"""
|
||||
code_file = temp_workspace / "runner.py"
|
||||
code_file.write_text("""
|
||||
def run_code(code):
|
||||
exec(code)
|
||||
""")
|
||||
|
||||
config = {
|
||||
"file_extensions": [".py"],
|
||||
"check_dangerous_functions": True
|
||||
}
|
||||
|
||||
result = await security_analyzer.execute(config, temp_workspace)
|
||||
|
||||
assert result.status == "success"
|
||||
dangerous_findings = [f for f in result.findings if f.category == "dangerous_function"]
|
||||
assert len(dangerous_findings) > 0
|
||||
|
||||
async def test_detect_os_system(self, security_analyzer, temp_workspace):
|
||||
"""Test detection of os.system() usage"""
|
||||
code_file = temp_workspace / "commands.py"
|
||||
code_file.write_text("""
|
||||
import os
|
||||
|
||||
def run_command(cmd):
|
||||
os.system(cmd)
|
||||
""")
|
||||
|
||||
config = {
|
||||
"file_extensions": [".py"],
|
||||
"check_dangerous_functions": True
|
||||
}
|
||||
|
||||
result = await security_analyzer.execute(config, temp_workspace)
|
||||
|
||||
assert result.status == "success"
|
||||
dangerous_findings = [f for f in result.findings if f.category == "dangerous_function"]
|
||||
assert len(dangerous_findings) > 0
|
||||
assert any("os.system" in f.title for f in dangerous_findings)
|
||||
|
||||
async def test_detect_pickle_loads(self, security_analyzer, temp_workspace):
|
||||
"""Test detection of pickle.loads() usage"""
|
||||
code_file = temp_workspace / "serializer.py"
|
||||
code_file.write_text("""
|
||||
import pickle
|
||||
|
||||
def deserialize(data):
|
||||
return pickle.loads(data)
|
||||
""")
|
||||
|
||||
config = {
|
||||
"file_extensions": [".py"],
|
||||
"check_dangerous_functions": True
|
||||
}
|
||||
|
||||
result = await security_analyzer.execute(config, temp_workspace)
|
||||
|
||||
assert result.status == "success"
|
||||
dangerous_findings = [f for f in result.findings if f.category == "dangerous_function"]
|
||||
assert len(dangerous_findings) > 0
|
||||
|
||||
async def test_detect_javascript_eval(self, security_analyzer, temp_workspace):
|
||||
"""Test detection of eval() in JavaScript"""
|
||||
code_file = temp_workspace / "app.js"
|
||||
code_file.write_text("""
|
||||
function processInput(userInput) {
|
||||
return eval(userInput);
|
||||
}
|
||||
""")
|
||||
|
||||
config = {
|
||||
"file_extensions": [".js"],
|
||||
"check_dangerous_functions": True
|
||||
}
|
||||
|
||||
result = await security_analyzer.execute(config, temp_workspace)
|
||||
|
||||
assert result.status == "success"
|
||||
dangerous_findings = [f for f in result.findings if f.category == "dangerous_function"]
|
||||
assert len(dangerous_findings) > 0
|
||||
|
||||
async def test_detect_innerHTML(self, security_analyzer, temp_workspace):
|
||||
"""Test detection of innerHTML (XSS risk)"""
|
||||
code_file = temp_workspace / "dom.js"
|
||||
code_file.write_text("""
|
||||
function updateContent(html) {
|
||||
document.getElementById("content").innerHTML = html;
|
||||
}
|
||||
""")
|
||||
|
||||
config = {
|
||||
"file_extensions": [".js"],
|
||||
"check_dangerous_functions": True
|
||||
}
|
||||
|
||||
result = await security_analyzer.execute(config, temp_workspace)
|
||||
|
||||
assert result.status == "success"
|
||||
dangerous_findings = [f for f in result.findings if f.category == "dangerous_function"]
|
||||
assert len(dangerous_findings) > 0
|
||||
|
||||
async def test_no_dangerous_detection_when_disabled(self, security_analyzer, temp_workspace):
|
||||
"""Test that dangerous function detection can be disabled"""
|
||||
code_file = temp_workspace / "code.py"
|
||||
code_file.write_text('result = eval(user_input)')
|
||||
|
||||
config = {
|
||||
"file_extensions": [".py"],
|
||||
"check_dangerous_functions": False
|
||||
}
|
||||
|
||||
result = await security_analyzer.execute(config, temp_workspace)
|
||||
|
||||
assert result.status == "success"
|
||||
dangerous_findings = [f for f in result.findings if f.category == "dangerous_function"]
|
||||
assert len(dangerous_findings) == 0
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
class TestSecurityAnalyzerMultipleIssues:
|
||||
"""Test detection of multiple issues in same file"""
|
||||
|
||||
async def test_detect_multiple_vulnerabilities(self, security_analyzer, temp_workspace):
|
||||
"""Test detection of multiple vulnerability types"""
|
||||
code_file = temp_workspace / "vulnerable.py"
|
||||
code_file.write_text("""
|
||||
import os
|
||||
|
||||
# Hardcoded credentials
|
||||
api_key = "apikey_live_abcdefghijklmnopqrstuvwxyzabcdef"
|
||||
password = "MySecureP@ssw0rd"
|
||||
|
||||
def process_query(user_input):
|
||||
# SQL injection
|
||||
query = "SELECT * FROM users WHERE name = " + user_input
|
||||
|
||||
# Dangerous function
|
||||
result = eval(user_input)
|
||||
|
||||
# Command injection
|
||||
os.system(user_input)
|
||||
|
||||
return result
|
||||
""")
|
||||
|
||||
config = {
|
||||
"file_extensions": [".py"],
|
||||
"check_secrets": True,
|
||||
"check_sql": True,
|
||||
"check_dangerous_functions": True
|
||||
}
|
||||
|
||||
result = await security_analyzer.execute(config, temp_workspace)
|
||||
|
||||
assert result.status == "success"
|
||||
|
||||
# Should find multiple types of issues
|
||||
secret_findings = [f for f in result.findings if f.category == "hardcoded_secret"]
|
||||
sql_findings = [f for f in result.findings if f.category == "sql_injection"]
|
||||
dangerous_findings = [f for f in result.findings if f.category == "dangerous_function"]
|
||||
|
||||
assert len(secret_findings) > 0
|
||||
assert len(sql_findings) > 0
|
||||
assert len(dangerous_findings) > 0
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
class TestSecurityAnalyzerSummary:
|
||||
"""Test result summary generation"""
|
||||
|
||||
async def test_summary_structure(self, security_analyzer, temp_workspace):
|
||||
"""Test that summary has correct structure"""
|
||||
(temp_workspace / "test.py").write_text("print('hello')")
|
||||
|
||||
config = {"file_extensions": [".py"]}
|
||||
result = await security_analyzer.execute(config, temp_workspace)
|
||||
|
||||
assert result.status == "success"
|
||||
assert "files_analyzed" in result.summary
|
||||
assert "total_findings" in result.summary
|
||||
assert "extensions_scanned" in result.summary
|
||||
|
||||
assert isinstance(result.summary["files_analyzed"], int)
|
||||
assert isinstance(result.summary["total_findings"], int)
|
||||
assert isinstance(result.summary["extensions_scanned"], list)
|
||||
|
||||
async def test_empty_workspace(self, security_analyzer, temp_workspace):
|
||||
"""Test analyzing empty workspace"""
|
||||
config = {"file_extensions": [".py"]}
|
||||
result = await security_analyzer.execute(config, temp_workspace)
|
||||
|
||||
assert result.status == "partial" # No files found
|
||||
assert result.summary["files_analyzed"] == 0
|
||||
|
||||
async def test_analyze_multiple_file_types(self, security_analyzer, temp_workspace):
|
||||
"""Test analyzing multiple file types"""
|
||||
(temp_workspace / "app.py").write_text("print('hello')")
|
||||
(temp_workspace / "script.js").write_text("console.log('hello')")
|
||||
(temp_workspace / "index.php").write_text("<?php echo 'hello'; ?>")
|
||||
|
||||
config = {"file_extensions": [".py", ".js", ".php"]}
|
||||
result = await security_analyzer.execute(config, temp_workspace)
|
||||
|
||||
assert result.status == "success"
|
||||
assert result.summary["files_analyzed"] == 3
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
class TestSecurityAnalyzerFalsePositives:
|
||||
"""Test false positive filtering"""
|
||||
|
||||
async def test_skip_test_secrets(self, security_analyzer, temp_workspace):
|
||||
"""Test that test/example secrets are filtered"""
|
||||
code_file = temp_workspace / "test_config.py"
|
||||
code_file.write_text("""
|
||||
# Test configuration - should be filtered
|
||||
api_key = "test_key_example"
|
||||
password = "dummy_password_123"
|
||||
token = "sample_token_placeholder"
|
||||
""")
|
||||
|
||||
config = {
|
||||
"file_extensions": [".py"],
|
||||
"check_secrets": True
|
||||
}
|
||||
|
||||
result = await security_analyzer.execute(config, temp_workspace)
|
||||
|
||||
assert result.status == "success"
|
||||
# These should be filtered as false positives
|
||||
secret_findings = [f for f in result.findings if f.category == "hardcoded_secret"]
|
||||
# Should have fewer or no findings due to false positive filtering
|
||||
assert len(secret_findings) == 0 or all(
|
||||
not any(fp in f.description.lower() for fp in ['test', 'example', 'dummy', 'sample'])
|
||||
for f in secret_findings
|
||||
)
|
||||
@@ -1,369 +0,0 @@
|
||||
"""
|
||||
FuzzForge Common Storage Activities
|
||||
|
||||
Activities for interacting with MinIO storage:
|
||||
- get_target_activity: Download target from MinIO to local cache
|
||||
- cleanup_cache_activity: Remove target from local cache
|
||||
- upload_results_activity: Upload workflow results to MinIO
|
||||
"""
|
||||
|
||||
import logging
|
||||
import os
|
||||
import shutil
|
||||
from pathlib import Path
|
||||
|
||||
import boto3
|
||||
from botocore.exceptions import ClientError
|
||||
from temporalio import activity
|
||||
|
||||
# Configure logging
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Initialize S3 client (MinIO)
|
||||
s3_client = boto3.client(
|
||||
's3',
|
||||
endpoint_url=os.getenv('S3_ENDPOINT', 'http://minio:9000'),
|
||||
aws_access_key_id=os.getenv('S3_ACCESS_KEY', 'fuzzforge'),
|
||||
aws_secret_access_key=os.getenv('S3_SECRET_KEY', 'fuzzforge123'),
|
||||
region_name=os.getenv('S3_REGION', 'us-east-1'),
|
||||
use_ssl=os.getenv('S3_USE_SSL', 'false').lower() == 'true'
|
||||
)
|
||||
|
||||
# Configuration
|
||||
S3_BUCKET = os.getenv('S3_BUCKET', 'targets')
|
||||
CACHE_DIR = Path(os.getenv('CACHE_DIR', '/cache'))
|
||||
CACHE_MAX_SIZE_GB = int(os.getenv('CACHE_MAX_SIZE', '10').rstrip('GB'))
|
||||
|
||||
|
||||
@activity.defn(name="get_target")
|
||||
async def get_target_activity(
|
||||
target_id: str,
|
||||
run_id: str = None,
|
||||
workspace_isolation: str = "isolated"
|
||||
) -> str:
|
||||
"""
|
||||
Download target from MinIO to local cache.
|
||||
|
||||
Args:
|
||||
target_id: UUID of the uploaded target
|
||||
run_id: Workflow run ID for isolation (required for isolated mode)
|
||||
workspace_isolation: Isolation mode - "isolated" (default), "shared", or "copy-on-write"
|
||||
|
||||
Returns:
|
||||
Local path to the cached target workspace
|
||||
|
||||
Raises:
|
||||
FileNotFoundError: If target doesn't exist in MinIO
|
||||
ValueError: If run_id not provided for isolated mode
|
||||
Exception: For other download errors
|
||||
"""
|
||||
logger.info(
|
||||
f"Activity: get_target (target_id={target_id}, run_id={run_id}, "
|
||||
f"isolation={workspace_isolation})"
|
||||
)
|
||||
|
||||
# Validate isolation mode
|
||||
valid_modes = ["isolated", "shared", "copy-on-write"]
|
||||
if workspace_isolation not in valid_modes:
|
||||
raise ValueError(
|
||||
f"Invalid workspace_isolation mode: {workspace_isolation}. "
|
||||
f"Must be one of: {valid_modes}"
|
||||
)
|
||||
|
||||
# Require run_id for isolated and copy-on-write modes
|
||||
if workspace_isolation in ["isolated", "copy-on-write"] and not run_id:
|
||||
raise ValueError(
|
||||
f"run_id is required for workspace_isolation='{workspace_isolation}'"
|
||||
)
|
||||
|
||||
# Define cache paths based on isolation mode
|
||||
if workspace_isolation == "isolated":
|
||||
# Each run gets its own isolated workspace
|
||||
cache_path = CACHE_DIR / target_id / run_id
|
||||
cached_file = cache_path / "target"
|
||||
elif workspace_isolation == "shared":
|
||||
# All runs share the same workspace (legacy behavior)
|
||||
cache_path = CACHE_DIR / target_id
|
||||
cached_file = cache_path / "target"
|
||||
else: # copy-on-write
|
||||
# Shared download, run-specific copy
|
||||
shared_cache_path = CACHE_DIR / target_id / "shared"
|
||||
cache_path = CACHE_DIR / target_id / run_id
|
||||
cached_file = shared_cache_path / "target"
|
||||
|
||||
# Handle copy-on-write mode
|
||||
if workspace_isolation == "copy-on-write":
|
||||
# Check if shared cache exists
|
||||
if cached_file.exists():
|
||||
logger.info(f"Copy-on-write: Shared cache HIT for {target_id}")
|
||||
|
||||
# Copy shared workspace to run-specific path
|
||||
shared_workspace = shared_cache_path / "workspace"
|
||||
run_workspace = cache_path / "workspace"
|
||||
|
||||
if shared_workspace.exists():
|
||||
logger.info(f"Copying workspace to isolated run path: {run_workspace}")
|
||||
cache_path.mkdir(parents=True, exist_ok=True)
|
||||
shutil.copytree(shared_workspace, run_workspace)
|
||||
return str(run_workspace)
|
||||
else:
|
||||
# Shared file exists but not extracted (non-tarball)
|
||||
run_file = cache_path / "target"
|
||||
cache_path.mkdir(parents=True, exist_ok=True)
|
||||
shutil.copy2(cached_file, run_file)
|
||||
return str(run_file)
|
||||
# If shared cache doesn't exist, fall through to download
|
||||
|
||||
# Check if target is already cached (isolated or shared mode)
|
||||
elif cached_file.exists():
|
||||
# Update access time for LRU
|
||||
cached_file.touch()
|
||||
logger.info(f"Cache HIT: {target_id} (mode: {workspace_isolation})")
|
||||
|
||||
# Check if workspace directory exists (extracted tarball)
|
||||
workspace_dir = cache_path / "workspace"
|
||||
if workspace_dir.exists() and workspace_dir.is_dir():
|
||||
logger.info(f"Returning cached workspace: {workspace_dir}")
|
||||
return str(workspace_dir)
|
||||
else:
|
||||
# Return cached file (not a tarball)
|
||||
return str(cached_file)
|
||||
|
||||
# Cache miss - download from MinIO
|
||||
logger.info(
|
||||
f"Cache MISS: {target_id} (mode: {workspace_isolation}), "
|
||||
f"downloading from MinIO..."
|
||||
)
|
||||
|
||||
try:
|
||||
# Create cache directory
|
||||
cache_path.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Download from S3/MinIO
|
||||
s3_key = f'{target_id}/target'
|
||||
logger.info(f"Downloading s3://{S3_BUCKET}/{s3_key} -> {cached_file}")
|
||||
|
||||
s3_client.download_file(
|
||||
Bucket=S3_BUCKET,
|
||||
Key=s3_key,
|
||||
Filename=str(cached_file)
|
||||
)
|
||||
|
||||
# Verify file was downloaded
|
||||
if not cached_file.exists():
|
||||
raise FileNotFoundError(f"Downloaded file not found: {cached_file}")
|
||||
|
||||
file_size = cached_file.stat().st_size
|
||||
logger.info(
|
||||
f"✓ Downloaded target {target_id} "
|
||||
f"({file_size / 1024 / 1024:.2f} MB)"
|
||||
)
|
||||
|
||||
# Extract tarball if it's an archive
|
||||
import tarfile
|
||||
workspace_dir = cache_path / "workspace"
|
||||
|
||||
if tarfile.is_tarfile(str(cached_file)):
|
||||
logger.info(f"Extracting tarball to {workspace_dir}...")
|
||||
workspace_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
with tarfile.open(str(cached_file), 'r:*') as tar:
|
||||
tar.extractall(path=workspace_dir)
|
||||
|
||||
logger.info(f"✓ Extracted tarball to {workspace_dir}")
|
||||
|
||||
# For copy-on-write mode, copy to run-specific path
|
||||
if workspace_isolation == "copy-on-write":
|
||||
run_cache_path = CACHE_DIR / target_id / run_id
|
||||
run_workspace = run_cache_path / "workspace"
|
||||
logger.info(f"Copy-on-write: Copying to {run_workspace}")
|
||||
run_cache_path.mkdir(parents=True, exist_ok=True)
|
||||
shutil.copytree(workspace_dir, run_workspace)
|
||||
return str(run_workspace)
|
||||
|
||||
return str(workspace_dir)
|
||||
else:
|
||||
# Not a tarball
|
||||
if workspace_isolation == "copy-on-write":
|
||||
# Copy file to run-specific path
|
||||
run_cache_path = CACHE_DIR / target_id / run_id
|
||||
run_file = run_cache_path / "target"
|
||||
logger.info(f"Copy-on-write: Copying file to {run_file}")
|
||||
run_cache_path.mkdir(parents=True, exist_ok=True)
|
||||
shutil.copy2(cached_file, run_file)
|
||||
return str(run_file)
|
||||
|
||||
return str(cached_file)
|
||||
|
||||
except ClientError as e:
|
||||
error_code = e.response['Error']['Code']
|
||||
if error_code == '404' or error_code == 'NoSuchKey':
|
||||
logger.error(f"Target not found in MinIO: {target_id}")
|
||||
raise FileNotFoundError(f"Target {target_id} not found in storage")
|
||||
else:
|
||||
logger.error(f"S3/MinIO error downloading target: {e}", exc_info=True)
|
||||
raise
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to download target {target_id}: {e}", exc_info=True)
|
||||
# Cleanup partial download
|
||||
if cache_path.exists():
|
||||
shutil.rmtree(cache_path, ignore_errors=True)
|
||||
raise
|
||||
|
||||
|
||||
@activity.defn(name="cleanup_cache")
|
||||
async def cleanup_cache_activity(
|
||||
target_path: str,
|
||||
workspace_isolation: str = "isolated"
|
||||
) -> None:
|
||||
"""
|
||||
Remove target from local cache after workflow completes.
|
||||
|
||||
Args:
|
||||
target_path: Path to the cached target workspace (from get_target_activity)
|
||||
workspace_isolation: Isolation mode used - determines cleanup scope
|
||||
|
||||
Notes:
|
||||
- "isolated" mode: Removes the entire run-specific directory
|
||||
- "copy-on-write" mode: Removes run-specific directory, keeps shared cache
|
||||
- "shared" mode: Does NOT remove cache (shared across runs)
|
||||
"""
|
||||
logger.info(
|
||||
f"Activity: cleanup_cache (path={target_path}, "
|
||||
f"isolation={workspace_isolation})"
|
||||
)
|
||||
|
||||
try:
|
||||
target = Path(target_path)
|
||||
|
||||
# For shared mode, don't clean up (cache is shared across runs)
|
||||
if workspace_isolation == "shared":
|
||||
logger.info(
|
||||
f"Skipping cleanup for shared workspace (mode={workspace_isolation})"
|
||||
)
|
||||
return
|
||||
|
||||
# For isolated and copy-on-write modes, clean up run-specific directory
|
||||
# Navigate up to the run-specific directory: /cache/{target_id}/{run_id}/
|
||||
if target.name == "workspace":
|
||||
# Path is .../workspace, go up one level to run directory
|
||||
run_dir = target.parent
|
||||
else:
|
||||
# Path is a file, go up one level to run directory
|
||||
run_dir = target.parent
|
||||
|
||||
# Validate it's in cache and looks like a run-specific path
|
||||
if run_dir.exists() and run_dir.is_relative_to(CACHE_DIR):
|
||||
# Check if parent is target_id directory (validate structure)
|
||||
target_id_dir = run_dir.parent
|
||||
if target_id_dir.is_relative_to(CACHE_DIR):
|
||||
shutil.rmtree(run_dir)
|
||||
logger.info(
|
||||
f"✓ Cleaned up run-specific directory: {run_dir} "
|
||||
f"(mode={workspace_isolation})"
|
||||
)
|
||||
else:
|
||||
logger.warning(
|
||||
f"Unexpected cache structure, skipping cleanup: {run_dir}"
|
||||
)
|
||||
else:
|
||||
logger.warning(
|
||||
f"Cache path not in CACHE_DIR or doesn't exist: {run_dir}"
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
# Don't fail workflow if cleanup fails
|
||||
logger.error(
|
||||
f"Failed to cleanup cache {target_path}: {e}",
|
||||
exc_info=True
|
||||
)
|
||||
|
||||
|
||||
@activity.defn(name="upload_results")
|
||||
async def upload_results_activity(
|
||||
workflow_id: str,
|
||||
results: dict,
|
||||
results_format: str = "json"
|
||||
) -> str:
|
||||
"""
|
||||
Upload workflow results to MinIO.
|
||||
|
||||
Args:
|
||||
workflow_id: Workflow execution ID
|
||||
results: Results dictionary to upload
|
||||
results_format: Format for results (json, sarif, etc.)
|
||||
|
||||
Returns:
|
||||
S3 URL to the uploaded results
|
||||
"""
|
||||
logger.info(
|
||||
f"Activity: upload_results "
|
||||
f"(workflow_id={workflow_id}, format={results_format})"
|
||||
)
|
||||
|
||||
try:
|
||||
import json
|
||||
|
||||
# Prepare results content
|
||||
if results_format == "json":
|
||||
content = json.dumps(results, indent=2).encode('utf-8')
|
||||
content_type = 'application/json'
|
||||
file_ext = 'json'
|
||||
elif results_format == "sarif":
|
||||
content = json.dumps(results, indent=2).encode('utf-8')
|
||||
content_type = 'application/sarif+json'
|
||||
file_ext = 'sarif'
|
||||
else:
|
||||
# Default to JSON
|
||||
content = json.dumps(results, indent=2).encode('utf-8')
|
||||
content_type = 'application/json'
|
||||
file_ext = 'json'
|
||||
|
||||
# Upload to MinIO
|
||||
s3_key = f'{workflow_id}/results.{file_ext}'
|
||||
logger.info(f"Uploading results to s3://results/{s3_key}")
|
||||
|
||||
s3_client.put_object(
|
||||
Bucket='results',
|
||||
Key=s3_key,
|
||||
Body=content,
|
||||
ContentType=content_type,
|
||||
Metadata={
|
||||
'workflow_id': workflow_id,
|
||||
'format': results_format
|
||||
}
|
||||
)
|
||||
|
||||
# Construct S3 URL
|
||||
s3_endpoint = os.getenv('S3_ENDPOINT', 'http://minio:9000')
|
||||
s3_url = f"{s3_endpoint}/results/{s3_key}"
|
||||
|
||||
logger.info(f"✓ Uploaded results: {s3_url}")
|
||||
return s3_url
|
||||
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
f"Failed to upload results for workflow {workflow_id}: {e}",
|
||||
exc_info=True
|
||||
)
|
||||
raise
|
||||
|
||||
|
||||
def _check_cache_size():
|
||||
"""Check total cache size and log warning if exceeding limit"""
|
||||
try:
|
||||
total_size = 0
|
||||
for item in CACHE_DIR.rglob('*'):
|
||||
if item.is_file():
|
||||
total_size += item.stat().st_size
|
||||
|
||||
total_size_gb = total_size / (1024 ** 3)
|
||||
if total_size_gb > CACHE_MAX_SIZE_GB:
|
||||
logger.warning(
|
||||
f"Cache size ({total_size_gb:.2f} GB) exceeds "
|
||||
f"limit ({CACHE_MAX_SIZE_GB} GB). Consider cleanup."
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to check cache size: {e}")
|
||||
@@ -10,7 +10,5 @@
|
||||
# Additional attribution and requirements are provided in the NOTICE file.
|
||||
|
||||
from .security_analyzer import SecurityAnalyzer
|
||||
from .bandit_analyzer import BanditAnalyzer
|
||||
from .mypy_analyzer import MypyAnalyzer
|
||||
|
||||
__all__ = ["SecurityAnalyzer", "BanditAnalyzer", "MypyAnalyzer"]
|
||||
__all__ = ["SecurityAnalyzer"]
|
||||
@@ -1,328 +0,0 @@
|
||||
"""
|
||||
Bandit Analyzer Module - Analyzes Python code for security issues using Bandit
|
||||
"""
|
||||
|
||||
# Copyright (c) 2025 FuzzingLabs
|
||||
#
|
||||
# Licensed under the Business Source License 1.1 (BSL). See the LICENSE file
|
||||
# at the root of this repository for details.
|
||||
#
|
||||
# After the Change Date (four years from publication), this version of the
|
||||
# Licensed Work will be made available under the Apache License, Version 2.0.
|
||||
# See the LICENSE-APACHE file or http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Additional attribution and requirements are provided in the NOTICE file.
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
import time
|
||||
from pathlib import Path
|
||||
from typing import Dict, Any, List
|
||||
|
||||
try:
|
||||
from toolbox.modules.base import BaseModule, ModuleMetadata, ModuleResult, ModuleFinding
|
||||
except ImportError:
|
||||
try:
|
||||
from modules.base import BaseModule, ModuleMetadata, ModuleResult, ModuleFinding
|
||||
except ImportError:
|
||||
from src.toolbox.modules.base import BaseModule, ModuleMetadata, ModuleResult, ModuleFinding
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class BanditAnalyzer(BaseModule):
|
||||
"""
|
||||
Analyzes Python code for security issues using Bandit.
|
||||
|
||||
This module:
|
||||
- Runs Bandit security linter on Python files
|
||||
- Detects common security issues (SQL injection, hardcoded secrets, etc.)
|
||||
- Reports findings with severity levels
|
||||
"""
|
||||
|
||||
# Severity mapping from Bandit levels to our standard
|
||||
SEVERITY_MAP = {
|
||||
"LOW": "low",
|
||||
"MEDIUM": "medium",
|
||||
"HIGH": "high"
|
||||
}
|
||||
|
||||
def get_metadata(self) -> ModuleMetadata:
|
||||
"""Get module metadata"""
|
||||
return ModuleMetadata(
|
||||
name="bandit_analyzer",
|
||||
version="1.0.0",
|
||||
description="Analyzes Python code for security issues using Bandit",
|
||||
author="FuzzForge Team",
|
||||
category="analyzer",
|
||||
tags=["python", "security", "bandit", "sast"],
|
||||
input_schema={
|
||||
"severity_level": {
|
||||
"type": "string",
|
||||
"enum": ["low", "medium", "high"],
|
||||
"description": "Minimum severity level to report",
|
||||
"default": "low"
|
||||
},
|
||||
"confidence_level": {
|
||||
"type": "string",
|
||||
"enum": ["low", "medium", "high"],
|
||||
"description": "Minimum confidence level to report",
|
||||
"default": "medium"
|
||||
},
|
||||
"exclude_tests": {
|
||||
"type": "boolean",
|
||||
"description": "Exclude test files from analysis",
|
||||
"default": True
|
||||
},
|
||||
"skip_ids": {
|
||||
"type": "array",
|
||||
"items": {"type": "string"},
|
||||
"description": "List of Bandit test IDs to skip",
|
||||
"default": []
|
||||
}
|
||||
},
|
||||
output_schema={
|
||||
"findings": {
|
||||
"type": "array",
|
||||
"description": "List of security issues found by Bandit"
|
||||
}
|
||||
},
|
||||
requires_workspace=True
|
||||
)
|
||||
|
||||
def validate_config(self, config: Dict[str, Any]) -> bool:
|
||||
"""Validate module configuration"""
|
||||
severity = config.get("severity_level", "low")
|
||||
if severity not in ["low", "medium", "high"]:
|
||||
raise ValueError("severity_level must be one of: low, medium, high")
|
||||
|
||||
confidence = config.get("confidence_level", "medium")
|
||||
if confidence not in ["low", "medium", "high"]:
|
||||
raise ValueError("confidence_level must be one of: low, medium, high")
|
||||
|
||||
skip_ids = config.get("skip_ids", [])
|
||||
if not isinstance(skip_ids, list):
|
||||
raise ValueError("skip_ids must be a list")
|
||||
|
||||
return True
|
||||
|
||||
async def _run_bandit(
|
||||
self,
|
||||
workspace: Path,
|
||||
severity_level: str,
|
||||
confidence_level: str,
|
||||
exclude_tests: bool,
|
||||
skip_ids: List[str]
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Run Bandit on the workspace.
|
||||
|
||||
Args:
|
||||
workspace: Path to workspace
|
||||
severity_level: Minimum severity to report
|
||||
confidence_level: Minimum confidence to report
|
||||
exclude_tests: Whether to exclude test files
|
||||
skip_ids: List of test IDs to skip
|
||||
|
||||
Returns:
|
||||
Bandit JSON output as dict
|
||||
"""
|
||||
try:
|
||||
# Build bandit command
|
||||
cmd = [
|
||||
"bandit",
|
||||
"-r", str(workspace),
|
||||
"-f", "json",
|
||||
"-ll", # Report all findings (we'll filter later)
|
||||
]
|
||||
|
||||
# Add exclude patterns for test files
|
||||
if exclude_tests:
|
||||
cmd.extend(["-x", "*/test_*.py,*/tests/*,*_test.py"])
|
||||
|
||||
# Add skip IDs if specified
|
||||
if skip_ids:
|
||||
cmd.extend(["-s", ",".join(skip_ids)])
|
||||
|
||||
logger.info(f"Running Bandit on: {workspace}")
|
||||
process = await asyncio.create_subprocess_exec(
|
||||
*cmd,
|
||||
stdout=asyncio.subprocess.PIPE,
|
||||
stderr=asyncio.subprocess.PIPE
|
||||
)
|
||||
|
||||
stdout, stderr = await process.communicate()
|
||||
|
||||
# Bandit returns non-zero if issues found, which is expected
|
||||
if process.returncode not in [0, 1]:
|
||||
logger.error(f"Bandit failed: {stderr.decode()}")
|
||||
return {"results": []}
|
||||
|
||||
# Parse JSON output
|
||||
result = json.loads(stdout.decode())
|
||||
return result
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error running Bandit: {e}")
|
||||
return {"results": []}
|
||||
|
||||
def _should_include_finding(
|
||||
self,
|
||||
issue: Dict[str, Any],
|
||||
min_severity: str,
|
||||
min_confidence: str
|
||||
) -> bool:
|
||||
"""
|
||||
Determine if a Bandit issue should be included based on severity/confidence.
|
||||
|
||||
Args:
|
||||
issue: Bandit issue dict
|
||||
min_severity: Minimum severity threshold
|
||||
min_confidence: Minimum confidence threshold
|
||||
|
||||
Returns:
|
||||
True if issue should be included
|
||||
"""
|
||||
severity_order = ["low", "medium", "high"]
|
||||
issue_severity = issue.get("issue_severity", "LOW").lower()
|
||||
issue_confidence = issue.get("issue_confidence", "LOW").lower()
|
||||
|
||||
severity_meets_threshold = severity_order.index(issue_severity) >= severity_order.index(min_severity)
|
||||
confidence_meets_threshold = severity_order.index(issue_confidence) >= severity_order.index(min_confidence)
|
||||
|
||||
return severity_meets_threshold and confidence_meets_threshold
|
||||
|
||||
def _convert_to_findings(
|
||||
self,
|
||||
bandit_result: Dict[str, Any],
|
||||
workspace: Path,
|
||||
min_severity: str,
|
||||
min_confidence: str
|
||||
) -> List[ModuleFinding]:
|
||||
"""
|
||||
Convert Bandit results to ModuleFindings.
|
||||
|
||||
Args:
|
||||
bandit_result: Bandit JSON output
|
||||
workspace: Workspace path for relative paths
|
||||
min_severity: Minimum severity to include
|
||||
min_confidence: Minimum confidence to include
|
||||
|
||||
Returns:
|
||||
List of ModuleFindings
|
||||
"""
|
||||
findings = []
|
||||
|
||||
for issue in bandit_result.get("results", []):
|
||||
# Filter by severity and confidence
|
||||
if not self._should_include_finding(issue, min_severity, min_confidence):
|
||||
continue
|
||||
|
||||
# Extract issue details
|
||||
test_id = issue.get("test_id", "B000")
|
||||
test_name = issue.get("test_name", "unknown")
|
||||
issue_text = issue.get("issue_text", "No description")
|
||||
severity = self.SEVERITY_MAP.get(issue.get("issue_severity", "LOW"), "low")
|
||||
|
||||
# File location
|
||||
filename = issue.get("filename", "")
|
||||
line_number = issue.get("line_number", 0)
|
||||
code = issue.get("code", "")
|
||||
|
||||
# Try to get relative path
|
||||
try:
|
||||
file_path = Path(filename)
|
||||
rel_path = file_path.relative_to(workspace)
|
||||
except (ValueError, TypeError):
|
||||
rel_path = Path(filename).name
|
||||
|
||||
# Create finding
|
||||
finding = self.create_finding(
|
||||
title=f"{test_name} ({test_id})",
|
||||
description=issue_text,
|
||||
severity=severity,
|
||||
category="security-issue",
|
||||
file_path=str(rel_path),
|
||||
line_start=line_number,
|
||||
line_end=line_number,
|
||||
code_snippet=code.strip() if code else None,
|
||||
recommendation=f"Review and fix the security issue identified by Bandit test {test_id}",
|
||||
metadata={
|
||||
"test_id": test_id,
|
||||
"test_name": test_name,
|
||||
"confidence": issue.get("issue_confidence", "LOW").lower(),
|
||||
"cwe": issue.get("issue_cwe", {}).get("id") if issue.get("issue_cwe") else None,
|
||||
"more_info": issue.get("more_info", "")
|
||||
}
|
||||
)
|
||||
findings.append(finding)
|
||||
|
||||
return findings
|
||||
|
||||
async def execute(self, config: Dict[str, Any], workspace: Path) -> ModuleResult:
|
||||
"""
|
||||
Execute the Bandit analyzer module.
|
||||
|
||||
Args:
|
||||
config: Module configuration
|
||||
workspace: Path to workspace
|
||||
|
||||
Returns:
|
||||
ModuleResult with security findings
|
||||
"""
|
||||
start_time = time.time()
|
||||
metadata = self.get_metadata()
|
||||
|
||||
# Validate inputs
|
||||
self.validate_config(config)
|
||||
self.validate_workspace(workspace)
|
||||
|
||||
# Get configuration
|
||||
severity_level = config.get("severity_level", "low")
|
||||
confidence_level = config.get("confidence_level", "medium")
|
||||
exclude_tests = config.get("exclude_tests", True)
|
||||
skip_ids = config.get("skip_ids", [])
|
||||
|
||||
# Run Bandit
|
||||
logger.info("Starting Bandit analysis...")
|
||||
bandit_result = await self._run_bandit(
|
||||
workspace,
|
||||
severity_level,
|
||||
confidence_level,
|
||||
exclude_tests,
|
||||
skip_ids
|
||||
)
|
||||
|
||||
# Convert to findings
|
||||
findings = self._convert_to_findings(
|
||||
bandit_result,
|
||||
workspace,
|
||||
severity_level,
|
||||
confidence_level
|
||||
)
|
||||
|
||||
# Calculate summary
|
||||
severity_counts = {}
|
||||
for finding in findings:
|
||||
sev = finding.severity
|
||||
severity_counts[sev] = severity_counts.get(sev, 0) + 1
|
||||
|
||||
execution_time = time.time() - start_time
|
||||
|
||||
return ModuleResult(
|
||||
module=metadata.name,
|
||||
version=metadata.version,
|
||||
status="success",
|
||||
execution_time=execution_time,
|
||||
findings=findings,
|
||||
summary={
|
||||
"total_issues": len(findings),
|
||||
"by_severity": severity_counts,
|
||||
"files_analyzed": len(set(f.file_path for f in findings if f.file_path))
|
||||
},
|
||||
metadata={
|
||||
"bandit_version": bandit_result.get("generated_at", "unknown"),
|
||||
"metrics": bandit_result.get("metrics", {})
|
||||
}
|
||||
)
|
||||
@@ -1,349 +0,0 @@
|
||||
"""
|
||||
LLM Analyzer Module - Uses AI to analyze code for security issues
|
||||
"""
|
||||
|
||||
# Copyright (c) 2025 FuzzingLabs
|
||||
#
|
||||
# Licensed under the Business Source License 1.1 (BSL). See the LICENSE file
|
||||
# at the root of this repository for details.
|
||||
#
|
||||
# After the Change Date (four years from publication), this version of the
|
||||
# Licensed Work will be made available under the Apache License, Version 2.0.
|
||||
# See the LICENSE-APACHE file or http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Additional attribution and requirements are provided in the NOTICE file.
|
||||
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from typing import Dict, Any, List
|
||||
|
||||
try:
|
||||
from toolbox.modules.base import BaseModule, ModuleMetadata, ModuleResult
|
||||
except ImportError:
|
||||
try:
|
||||
from modules.base import BaseModule, ModuleMetadata, ModuleResult
|
||||
except ImportError:
|
||||
from src.toolbox.modules.base import BaseModule, ModuleMetadata, ModuleResult
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class LLMAnalyzer(BaseModule):
|
||||
"""
|
||||
Uses an LLM to analyze code for potential security issues.
|
||||
|
||||
This module:
|
||||
- Sends code to an LLM agent via A2A protocol
|
||||
- Asks the LLM to identify security vulnerabilities
|
||||
- Collects findings and returns them in structured format
|
||||
"""
|
||||
|
||||
def get_metadata(self) -> ModuleMetadata:
|
||||
"""Get module metadata"""
|
||||
return ModuleMetadata(
|
||||
name="llm_analyzer",
|
||||
version="1.0.0",
|
||||
description="Uses AI to analyze code for security issues",
|
||||
author="FuzzForge Team",
|
||||
category="analyzer",
|
||||
tags=["llm", "ai", "security", "analysis"],
|
||||
input_schema={
|
||||
"agent_url": {
|
||||
"type": "string",
|
||||
"description": "A2A agent endpoint URL",
|
||||
"default": "http://fuzzforge-task-agent:8000/a2a/litellm_agent"
|
||||
},
|
||||
"llm_model": {
|
||||
"type": "string",
|
||||
"description": "LLM model to use",
|
||||
"default": "gpt-4o-mini"
|
||||
},
|
||||
"llm_provider": {
|
||||
"type": "string",
|
||||
"description": "LLM provider (openai, anthropic, etc.)",
|
||||
"default": "openai"
|
||||
},
|
||||
"file_patterns": {
|
||||
"type": "array",
|
||||
"items": {"type": "string"},
|
||||
"description": "File patterns to analyze",
|
||||
"default": ["*.py", "*.js", "*.ts", "*.java", "*.go"]
|
||||
},
|
||||
"max_files": {
|
||||
"type": "integer",
|
||||
"description": "Maximum number of files to analyze",
|
||||
"default": 5
|
||||
},
|
||||
"max_file_size": {
|
||||
"type": "integer",
|
||||
"description": "Maximum file size in bytes",
|
||||
"default": 50000 # 50KB
|
||||
},
|
||||
"timeout": {
|
||||
"type": "integer",
|
||||
"description": "Timeout per file in seconds",
|
||||
"default": 60
|
||||
}
|
||||
},
|
||||
output_schema={
|
||||
"findings": {
|
||||
"type": "array",
|
||||
"description": "Security issues identified by LLM"
|
||||
}
|
||||
},
|
||||
requires_workspace=True
|
||||
)
|
||||
|
||||
def validate_config(self, config: Dict[str, Any]) -> bool:
|
||||
"""Validate module configuration"""
|
||||
# Lazy import to avoid Temporal sandbox restrictions
|
||||
try:
|
||||
from fuzzforge_ai.a2a_wrapper import send_agent_task # noqa: F401
|
||||
except ImportError:
|
||||
raise RuntimeError(
|
||||
"A2A wrapper not available. Ensure fuzzforge_ai module is accessible."
|
||||
)
|
||||
|
||||
agent_url = config.get("agent_url")
|
||||
if not agent_url or not isinstance(agent_url, str):
|
||||
raise ValueError("agent_url must be a valid URL string")
|
||||
|
||||
max_files = config.get("max_files", 5)
|
||||
if not isinstance(max_files, int) or max_files <= 0:
|
||||
raise ValueError("max_files must be a positive integer")
|
||||
|
||||
return True
|
||||
|
||||
async def execute(self, config: Dict[str, Any], workspace: Path) -> ModuleResult:
|
||||
"""
|
||||
Execute the LLM analysis module.
|
||||
|
||||
Args:
|
||||
config: Module configuration
|
||||
workspace: Path to the workspace containing code to analyze
|
||||
|
||||
Returns:
|
||||
ModuleResult with findings from LLM analysis
|
||||
"""
|
||||
# Start execution timer
|
||||
self.start_timer()
|
||||
|
||||
logger.info(f"Starting LLM analysis in workspace: {workspace}")
|
||||
|
||||
# Extract configuration
|
||||
agent_url = config.get("agent_url", "http://fuzzforge-task-agent:8000/a2a/litellm_agent")
|
||||
llm_model = config.get("llm_model", "gpt-4o-mini")
|
||||
llm_provider = config.get("llm_provider", "openai")
|
||||
file_patterns = config.get("file_patterns", ["*.py", "*.js", "*.ts", "*.java", "*.go"])
|
||||
max_files = config.get("max_files", 5)
|
||||
max_file_size = config.get("max_file_size", 50000)
|
||||
timeout = config.get("timeout", 60)
|
||||
|
||||
# Find files to analyze
|
||||
files_to_analyze = []
|
||||
for pattern in file_patterns:
|
||||
for file_path in workspace.rglob(pattern):
|
||||
if file_path.is_file():
|
||||
try:
|
||||
# Check file size
|
||||
if file_path.stat().st_size > max_file_size:
|
||||
logger.debug(f"Skipping {file_path} (too large)")
|
||||
continue
|
||||
|
||||
files_to_analyze.append(file_path)
|
||||
|
||||
if len(files_to_analyze) >= max_files:
|
||||
break
|
||||
except Exception as e:
|
||||
logger.warning(f"Error checking file {file_path}: {e}")
|
||||
continue
|
||||
|
||||
if len(files_to_analyze) >= max_files:
|
||||
break
|
||||
|
||||
logger.info(f"Found {len(files_to_analyze)} files to analyze")
|
||||
|
||||
# Analyze each file
|
||||
all_findings = []
|
||||
for file_path in files_to_analyze:
|
||||
logger.info(f"Analyzing: {file_path.relative_to(workspace)}")
|
||||
|
||||
try:
|
||||
findings = await self._analyze_file(
|
||||
file_path=file_path,
|
||||
workspace=workspace,
|
||||
agent_url=agent_url,
|
||||
llm_model=llm_model,
|
||||
llm_provider=llm_provider,
|
||||
timeout=timeout
|
||||
)
|
||||
all_findings.extend(findings)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error analyzing {file_path}: {e}")
|
||||
# Continue with next file
|
||||
continue
|
||||
|
||||
logger.info(f"LLM analysis complete. Found {len(all_findings)} issues.")
|
||||
|
||||
# Create result using base module helper
|
||||
return self.create_result(
|
||||
findings=all_findings,
|
||||
status="success",
|
||||
summary={
|
||||
"files_analyzed": len(files_to_analyze),
|
||||
"total_findings": len(all_findings),
|
||||
"agent_url": agent_url,
|
||||
"model": f"{llm_provider}/{llm_model}"
|
||||
}
|
||||
)
|
||||
|
||||
async def _analyze_file(
|
||||
self,
|
||||
file_path: Path,
|
||||
workspace: Path,
|
||||
agent_url: str,
|
||||
llm_model: str,
|
||||
llm_provider: str,
|
||||
timeout: int
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""Analyze a single file with LLM"""
|
||||
|
||||
# Read file content
|
||||
try:
|
||||
with open(file_path, 'r', encoding='utf-8') as f:
|
||||
code_content = f.read()
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to read {file_path}: {e}")
|
||||
return []
|
||||
|
||||
# Determine language from extension
|
||||
extension = file_path.suffix.lower()
|
||||
language_map = {
|
||||
".py": "python",
|
||||
".js": "javascript",
|
||||
".ts": "typescript",
|
||||
".java": "java",
|
||||
".go": "go",
|
||||
".rs": "rust",
|
||||
".c": "c",
|
||||
".cpp": "cpp",
|
||||
}
|
||||
language = language_map.get(extension, "code")
|
||||
|
||||
# Build prompt for LLM
|
||||
system_prompt = (
|
||||
"You are a security code analyzer. Analyze the provided code and identify "
|
||||
"potential security vulnerabilities, bugs, and code quality issues. "
|
||||
"For each issue found, respond in this exact format:\n"
|
||||
"ISSUE: [short title]\n"
|
||||
"SEVERITY: [error/warning/note]\n"
|
||||
"LINE: [line number or 'unknown']\n"
|
||||
"DESCRIPTION: [detailed explanation]\n\n"
|
||||
"If no issues are found, respond with 'NO_ISSUES_FOUND'."
|
||||
)
|
||||
|
||||
user_message = (
|
||||
f"Analyze this {language} code for security vulnerabilities:\n\n"
|
||||
f"File: {file_path.relative_to(workspace)}\n\n"
|
||||
f"```{language}\n{code_content}\n```"
|
||||
)
|
||||
|
||||
# Call LLM via A2A wrapper (lazy import to avoid Temporal sandbox restrictions)
|
||||
try:
|
||||
from fuzzforge_ai.a2a_wrapper import send_agent_task
|
||||
|
||||
result = await send_agent_task(
|
||||
url=agent_url,
|
||||
model=llm_model,
|
||||
provider=llm_provider,
|
||||
prompt=system_prompt,
|
||||
message=user_message,
|
||||
context=f"llm_analysis_{file_path.stem}",
|
||||
timeout=float(timeout)
|
||||
)
|
||||
|
||||
llm_response = result.text
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"A2A call failed for {file_path}: {e}")
|
||||
return []
|
||||
|
||||
# Parse LLM response into findings
|
||||
findings = self._parse_llm_response(
|
||||
llm_response=llm_response,
|
||||
file_path=file_path,
|
||||
workspace=workspace
|
||||
)
|
||||
|
||||
return findings
|
||||
|
||||
def _parse_llm_response(
|
||||
self,
|
||||
llm_response: str,
|
||||
file_path: Path,
|
||||
workspace: Path
|
||||
) -> List:
|
||||
"""Parse LLM response into structured findings"""
|
||||
|
||||
if "NO_ISSUES_FOUND" in llm_response:
|
||||
return []
|
||||
|
||||
findings = []
|
||||
relative_path = str(file_path.relative_to(workspace))
|
||||
|
||||
# Simple parser for the expected format
|
||||
lines = llm_response.split('\n')
|
||||
current_issue = {}
|
||||
|
||||
for line in lines:
|
||||
line = line.strip()
|
||||
|
||||
if line.startswith("ISSUE:"):
|
||||
# Save previous issue if exists
|
||||
if current_issue:
|
||||
findings.append(self._create_module_finding(current_issue, relative_path))
|
||||
current_issue = {"title": line.replace("ISSUE:", "").strip()}
|
||||
|
||||
elif line.startswith("SEVERITY:"):
|
||||
current_issue["severity"] = line.replace("SEVERITY:", "").strip().lower()
|
||||
|
||||
elif line.startswith("LINE:"):
|
||||
line_num = line.replace("LINE:", "").strip()
|
||||
try:
|
||||
current_issue["line"] = int(line_num)
|
||||
except ValueError:
|
||||
current_issue["line"] = None
|
||||
|
||||
elif line.startswith("DESCRIPTION:"):
|
||||
current_issue["description"] = line.replace("DESCRIPTION:", "").strip()
|
||||
|
||||
# Save last issue
|
||||
if current_issue:
|
||||
findings.append(self._create_module_finding(current_issue, relative_path))
|
||||
|
||||
return findings
|
||||
|
||||
def _create_module_finding(self, issue: Dict[str, Any], file_path: str):
|
||||
"""Create a ModuleFinding from parsed issue"""
|
||||
|
||||
severity_map = {
|
||||
"error": "critical",
|
||||
"warning": "medium",
|
||||
"note": "low",
|
||||
"info": "low"
|
||||
}
|
||||
|
||||
# Use base class helper to create proper ModuleFinding
|
||||
return self.create_finding(
|
||||
title=issue.get("title", "Security issue detected"),
|
||||
description=issue.get("description", ""),
|
||||
severity=severity_map.get(issue.get("severity", "warning"), "medium"),
|
||||
category="security",
|
||||
file_path=file_path,
|
||||
line_start=issue.get("line"),
|
||||
metadata={
|
||||
"tool": "llm-analyzer",
|
||||
"type": "llm-security-analysis"
|
||||
}
|
||||
)
|
||||
@@ -1,269 +0,0 @@
|
||||
"""
|
||||
Mypy Analyzer Module - Analyzes Python code for type safety issues using Mypy
|
||||
"""
|
||||
|
||||
# Copyright (c) 2025 FuzzingLabs
|
||||
#
|
||||
# Licensed under the Business Source License 1.1 (BSL). See the LICENSE file
|
||||
# at the root of this repository for details.
|
||||
#
|
||||
# After the Change Date (four years from publication), this version of the
|
||||
# Licensed Work will be made available under the Apache License, Version 2.0.
|
||||
# See the LICENSE-APACHE file or http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Additional attribution and requirements are provided in the NOTICE file.
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
import re
|
||||
import time
|
||||
from pathlib import Path
|
||||
from typing import Dict, Any, List
|
||||
|
||||
try:
|
||||
from toolbox.modules.base import BaseModule, ModuleMetadata, ModuleResult, ModuleFinding
|
||||
except ImportError:
|
||||
try:
|
||||
from modules.base import BaseModule, ModuleMetadata, ModuleResult, ModuleFinding
|
||||
except ImportError:
|
||||
from src.toolbox.modules.base import BaseModule, ModuleMetadata, ModuleResult, ModuleFinding
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class MypyAnalyzer(BaseModule):
|
||||
"""
|
||||
Analyzes Python code for type safety issues using Mypy.
|
||||
|
||||
This module:
|
||||
- Runs Mypy type checker on Python files
|
||||
- Detects type errors and inconsistencies
|
||||
- Reports findings with configurable strictness
|
||||
"""
|
||||
|
||||
# Map Mypy error codes to severity
|
||||
ERROR_SEVERITY_MAP = {
|
||||
"error": "medium",
|
||||
"note": "info"
|
||||
}
|
||||
|
||||
def get_metadata(self) -> ModuleMetadata:
|
||||
"""Get module metadata"""
|
||||
return ModuleMetadata(
|
||||
name="mypy_analyzer",
|
||||
version="1.0.0",
|
||||
description="Analyzes Python code for type safety issues using Mypy",
|
||||
author="FuzzForge Team",
|
||||
category="analyzer",
|
||||
tags=["python", "type-checking", "mypy", "sast"],
|
||||
input_schema={
|
||||
"strict_mode": {
|
||||
"type": "boolean",
|
||||
"description": "Enable strict type checking",
|
||||
"default": False
|
||||
},
|
||||
"ignore_missing_imports": {
|
||||
"type": "boolean",
|
||||
"description": "Ignore errors about missing imports",
|
||||
"default": True
|
||||
},
|
||||
"follow_imports": {
|
||||
"type": "string",
|
||||
"enum": ["normal", "silent", "skip", "error"],
|
||||
"description": "How to handle imports",
|
||||
"default": "silent"
|
||||
}
|
||||
},
|
||||
output_schema={
|
||||
"findings": {
|
||||
"type": "array",
|
||||
"description": "List of type errors found by Mypy"
|
||||
}
|
||||
},
|
||||
requires_workspace=True
|
||||
)
|
||||
|
||||
def validate_config(self, config: Dict[str, Any]) -> bool:
|
||||
"""Validate module configuration"""
|
||||
follow_imports = config.get("follow_imports", "silent")
|
||||
if follow_imports not in ["normal", "silent", "skip", "error"]:
|
||||
raise ValueError("follow_imports must be one of: normal, silent, skip, error")
|
||||
|
||||
return True
|
||||
|
||||
async def _run_mypy(
|
||||
self,
|
||||
workspace: Path,
|
||||
strict_mode: bool,
|
||||
ignore_missing_imports: bool,
|
||||
follow_imports: str
|
||||
) -> str:
|
||||
"""
|
||||
Run Mypy on the workspace.
|
||||
|
||||
Args:
|
||||
workspace: Path to workspace
|
||||
strict_mode: Enable strict checking
|
||||
ignore_missing_imports: Ignore missing import errors
|
||||
follow_imports: How to handle imports
|
||||
|
||||
Returns:
|
||||
Mypy output as string
|
||||
"""
|
||||
try:
|
||||
# Build mypy command
|
||||
cmd = [
|
||||
"mypy",
|
||||
str(workspace),
|
||||
"--show-column-numbers",
|
||||
"--no-error-summary",
|
||||
f"--follow-imports={follow_imports}"
|
||||
]
|
||||
|
||||
if strict_mode:
|
||||
cmd.append("--strict")
|
||||
|
||||
if ignore_missing_imports:
|
||||
cmd.append("--ignore-missing-imports")
|
||||
|
||||
logger.info(f"Running Mypy on: {workspace}")
|
||||
process = await asyncio.create_subprocess_exec(
|
||||
*cmd,
|
||||
stdout=asyncio.subprocess.PIPE,
|
||||
stderr=asyncio.subprocess.PIPE
|
||||
)
|
||||
|
||||
stdout, stderr = await process.communicate()
|
||||
|
||||
# Mypy returns non-zero if errors found, which is expected
|
||||
output = stdout.decode()
|
||||
return output
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error running Mypy: {e}")
|
||||
return ""
|
||||
|
||||
def _parse_mypy_output(self, output: str, workspace: Path) -> List[ModuleFinding]:
|
||||
"""
|
||||
Parse Mypy output and convert to findings.
|
||||
|
||||
Mypy output format:
|
||||
file.py:10:5: error: Incompatible return value type [return-value]
|
||||
file.py:15: note: See https://...
|
||||
|
||||
Args:
|
||||
output: Mypy stdout
|
||||
workspace: Workspace path for relative paths
|
||||
|
||||
Returns:
|
||||
List of ModuleFindings
|
||||
"""
|
||||
findings = []
|
||||
|
||||
# Regex to parse mypy output lines
|
||||
# Format: filename:line:column: level: message [error-code]
|
||||
pattern = r'^(.+?):(\d+)(?::(\d+))?: (error|note): (.+?)(?:\s+\[([^\]]+)\])?$'
|
||||
|
||||
for line in output.splitlines():
|
||||
match = re.match(pattern, line.strip())
|
||||
if not match:
|
||||
continue
|
||||
|
||||
filename, line_num, column, level, message, error_code = match.groups()
|
||||
|
||||
# Convert to relative path
|
||||
try:
|
||||
file_path = Path(filename)
|
||||
rel_path = file_path.relative_to(workspace)
|
||||
except (ValueError, TypeError):
|
||||
rel_path = Path(filename).name
|
||||
|
||||
# Skip if it's just a note (unless it's a standalone note)
|
||||
if level == "note" and not error_code:
|
||||
continue
|
||||
|
||||
# Map severity
|
||||
severity = self.ERROR_SEVERITY_MAP.get(level, "medium")
|
||||
|
||||
# Create finding
|
||||
title = f"Type error: {error_code or 'type-issue'}"
|
||||
description = message
|
||||
|
||||
finding = self.create_finding(
|
||||
title=title,
|
||||
description=description,
|
||||
severity=severity,
|
||||
category="type-error",
|
||||
file_path=str(rel_path),
|
||||
line_start=int(line_num),
|
||||
line_end=int(line_num),
|
||||
recommendation="Review and fix the type inconsistency or add appropriate type annotations",
|
||||
metadata={
|
||||
"error_code": error_code or "unknown",
|
||||
"column": int(column) if column else None,
|
||||
"level": level
|
||||
}
|
||||
)
|
||||
findings.append(finding)
|
||||
|
||||
return findings
|
||||
|
||||
async def execute(self, config: Dict[str, Any], workspace: Path) -> ModuleResult:
|
||||
"""
|
||||
Execute the Mypy analyzer module.
|
||||
|
||||
Args:
|
||||
config: Module configuration
|
||||
workspace: Path to workspace
|
||||
|
||||
Returns:
|
||||
ModuleResult with type checking findings
|
||||
"""
|
||||
start_time = time.time()
|
||||
metadata = self.get_metadata()
|
||||
|
||||
# Validate inputs
|
||||
self.validate_config(config)
|
||||
self.validate_workspace(workspace)
|
||||
|
||||
# Get configuration
|
||||
strict_mode = config.get("strict_mode", False)
|
||||
ignore_missing_imports = config.get("ignore_missing_imports", True)
|
||||
follow_imports = config.get("follow_imports", "silent")
|
||||
|
||||
# Run Mypy
|
||||
logger.info("Starting Mypy analysis...")
|
||||
mypy_output = await self._run_mypy(
|
||||
workspace,
|
||||
strict_mode,
|
||||
ignore_missing_imports,
|
||||
follow_imports
|
||||
)
|
||||
|
||||
# Parse output to findings
|
||||
findings = self._parse_mypy_output(mypy_output, workspace)
|
||||
|
||||
# Calculate summary
|
||||
error_code_counts = {}
|
||||
for finding in findings:
|
||||
code = finding.metadata.get("error_code", "unknown")
|
||||
error_code_counts[code] = error_code_counts.get(code, 0) + 1
|
||||
|
||||
execution_time = time.time() - start_time
|
||||
|
||||
return ModuleResult(
|
||||
module=metadata.name,
|
||||
version=metadata.version,
|
||||
status="success",
|
||||
execution_time=execution_time,
|
||||
findings=findings,
|
||||
summary={
|
||||
"total_errors": len(findings),
|
||||
"by_error_code": error_code_counts,
|
||||
"files_with_errors": len(set(f.file_path for f in findings if f.file_path))
|
||||
},
|
||||
metadata={
|
||||
"strict_mode": strict_mode,
|
||||
"ignore_missing_imports": ignore_missing_imports
|
||||
}
|
||||
)
|
||||
@@ -16,7 +16,7 @@ Security Analyzer Module - Analyzes code for security vulnerabilities
|
||||
import logging
|
||||
import re
|
||||
from pathlib import Path
|
||||
from typing import Dict, Any, List
|
||||
from typing import Dict, Any, List, Optional
|
||||
|
||||
try:
|
||||
from toolbox.modules.base import BaseModule, ModuleMetadata, ModuleResult, ModuleFinding
|
||||
|
||||
@@ -1,31 +1,25 @@
|
||||
"""
|
||||
Android Security Analysis Modules
|
||||
Android Security Modules
|
||||
|
||||
Modules for Android application security testing:
|
||||
- JadxDecompiler: APK decompilation using Jadx
|
||||
- MobSFScanner: Mobile security analysis using MobSF
|
||||
- OpenGrepAndroid: Static analysis using OpenGrep/Semgrep with Android-specific rules
|
||||
This package contains modules for android static code analysis and security testing.
|
||||
|
||||
Available modules:
|
||||
- MobSF: Mobile Security Framework
|
||||
- Jadx: Dex to Java decompiler
|
||||
- OpenGrep: Open-source pattern-based static analysis tool
|
||||
"""
|
||||
|
||||
# Copyright (c) 2025 FuzzingLabs
|
||||
#
|
||||
# Licensed under the Business Source License 1.1 (BSL). See the LICENSE file
|
||||
# at the root of this repository for details.
|
||||
#
|
||||
# After the Change Date (four years from publication), this version of the
|
||||
# Licensed Work will be made available under the Apache License, Version 2.0.
|
||||
# See the LICENSE-APACHE file or http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Additional attribution and requirements are provided in the NOTICE file.
|
||||
from typing import List, Type
|
||||
from ..base import BaseModule
|
||||
|
||||
from .jadx_decompiler import JadxDecompiler
|
||||
from .opengrep_android import OpenGrepAndroid
|
||||
# Module registry for automatic discovery
|
||||
ANDROID_MODULES: List[Type[BaseModule]] = []
|
||||
|
||||
# MobSF is optional (not available on ARM64 platform)
|
||||
try:
|
||||
from .mobsf_scanner import MobSFScanner
|
||||
__all__ = ["JadxDecompiler", "MobSFScanner", "OpenGrepAndroid"]
|
||||
except ImportError:
|
||||
# MobSF dependencies not available (e.g., ARM64 platform)
|
||||
MobSFScanner = None
|
||||
__all__ = ["JadxDecompiler", "OpenGrepAndroid"]
|
||||
def register_module(module_class: Type[BaseModule]):
|
||||
"""Register a android security module"""
|
||||
ANDROID_MODULES.append(module_class)
|
||||
return module_class
|
||||
|
||||
def get_available_modules() -> List[Type[BaseModule]]:
|
||||
"""Get all available android modules"""
|
||||
return ANDROID_MODULES.copy()
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user