diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md new file mode 100644 index 0000000..04ece70 --- /dev/null +++ b/.github/pull_request_template.md @@ -0,0 +1,79 @@ +## Description + + + +## Type of Change + + + +- [ ] ๐Ÿ› Bug fix (non-breaking change which fixes an issue) +- [ ] โœจ New feature (non-breaking change which adds functionality) +- [ ] ๐Ÿ’ฅ Breaking change (fix or feature that would cause existing functionality to not work as expected) +- [ ] ๐Ÿ“ Documentation update +- [ ] ๐Ÿ”ง Configuration change +- [ ] โ™ป๏ธ Refactoring (no functional changes) +- [ ] ๐ŸŽจ Style/formatting changes +- [ ] โœ… Test additions or updates + +## Related Issues + + + + +## Changes Made + + + +- +- +- + +## Testing + + + +### Tested Locally + +- [ ] All tests pass (`pytest`, `uv build`, etc.) +- [ ] Linting passes (`ruff check`) +- [ ] Code builds successfully + +### Worker Changes (if applicable) + +- [ ] Docker images build successfully (`docker compose build`) +- [ ] Worker containers start correctly +- [ ] Tested with actual workflow execution + +### Documentation + +- [ ] Documentation updated (if needed) +- [ ] README updated (if needed) +- [ ] CHANGELOG.md updated (if user-facing changes) + +## Pre-Merge Checklist + + + +- [ ] My code follows the project's coding standards +- [ ] I have performed a self-review of my code +- [ ] I have commented my code, particularly in hard-to-understand areas +- [ ] I have made corresponding changes to the documentation +- [ ] My changes generate no new warnings +- [ ] I have added tests that prove my fix is effective or that my feature works +- [ ] New and existing unit tests pass locally with my changes +- [ ] Any dependent changes have been merged and published + +### Worker-Specific Checks (if workers/ modified) + +- [ ] All worker files properly tracked by git (not gitignored) +- [ ] Worker validation script passes (`.github/scripts/validate-workers.sh`) +- [ ] Docker images build without errors +- [ ] Worker configuration updated in `docker-compose.yml` (if needed) + +## Screenshots (if applicable) + + + +## Additional Notes + + diff --git a/.github/scripts/validate-workers.sh b/.github/scripts/validate-workers.sh new file mode 100755 index 0000000..6b2c5f6 --- /dev/null +++ b/.github/scripts/validate-workers.sh @@ -0,0 +1,127 @@ +#!/bin/bash +# Worker Validation Script +# Ensures all workers defined in docker-compose.yml exist in the repository +# and are properly tracked by git. + +set -e + +echo "๐Ÿ” Validating worker completeness..." + +# Colors for output +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +NC='\033[0m' # No Color + +ERRORS=0 +WARNINGS=0 + +# Extract worker service names from docker-compose.yml +echo "" +echo "๐Ÿ“‹ Checking workers defined in docker-compose.yml..." +WORKERS=$(grep -E "^\s+worker-" docker-compose.yml | grep -v "#" | cut -d: -f1 | tr -d ' ' | sort -u) + +if [ -z "$WORKERS" ]; then + echo -e "${RED}โŒ No workers found in docker-compose.yml${NC}" + exit 1 +fi + +echo "Found workers:" +for worker in $WORKERS; do + echo " - $worker" +done + +# Check each worker +echo "" +echo "๐Ÿ”Ž Validating worker files..." +for worker in $WORKERS; do + WORKER_DIR="workers/${worker#worker-}" + + echo "" + echo "Checking $worker ($WORKER_DIR)..." + + # Check if directory exists + if [ ! -d "$WORKER_DIR" ]; then + echo -e "${RED} โŒ Directory not found: $WORKER_DIR${NC}" + ERRORS=$((ERRORS + 1)) + continue + fi + + # Check Dockerfile (single file or multi-platform pattern) + if [ -f "$WORKER_DIR/Dockerfile" ]; then + # Single Dockerfile + if ! git ls-files --error-unmatch "$WORKER_DIR/Dockerfile" &> /dev/null; then + echo -e "${RED} โŒ File not tracked by git: $WORKER_DIR/Dockerfile${NC}" + echo -e "${YELLOW} Check .gitignore patterns!${NC}" + ERRORS=$((ERRORS + 1)) + else + echo -e "${GREEN} โœ“ Dockerfile (tracked)${NC}" + fi + elif compgen -G "$WORKER_DIR/Dockerfile.*" > /dev/null; then + # Multi-platform Dockerfiles (e.g., Dockerfile.amd64, Dockerfile.arm64) + PLATFORM_DOCKERFILES=$(ls "$WORKER_DIR"/Dockerfile.* 2>/dev/null) + DOCKERFILE_FOUND=false + for dockerfile in $PLATFORM_DOCKERFILES; do + if git ls-files --error-unmatch "$dockerfile" &> /dev/null; then + echo -e "${GREEN} โœ“ $(basename "$dockerfile") (tracked)${NC}" + DOCKERFILE_FOUND=true + else + echo -e "${RED} โŒ File not tracked by git: $dockerfile${NC}" + ERRORS=$((ERRORS + 1)) + fi + done + if [ "$DOCKERFILE_FOUND" = false ]; then + echo -e "${RED} โŒ No platform-specific Dockerfiles found${NC}" + ERRORS=$((ERRORS + 1)) + fi + else + echo -e "${RED} โŒ Missing Dockerfile or Dockerfile.* files${NC}" + ERRORS=$((ERRORS + 1)) + fi + + # Check other required files + REQUIRED_FILES=("requirements.txt" "worker.py") + for file in "${REQUIRED_FILES[@]}"; do + FILE_PATH="$WORKER_DIR/$file" + + if [ ! -f "$FILE_PATH" ]; then + echo -e "${RED} โŒ Missing file: $FILE_PATH${NC}" + ERRORS=$((ERRORS + 1)) + else + # Check if file is tracked by git + if ! git ls-files --error-unmatch "$FILE_PATH" &> /dev/null; then + echo -e "${RED} โŒ File not tracked by git: $FILE_PATH${NC}" + echo -e "${YELLOW} Check .gitignore patterns!${NC}" + ERRORS=$((ERRORS + 1)) + else + echo -e "${GREEN} โœ“ $file (tracked)${NC}" + fi + fi + done +done + +# Check for any ignored worker files +echo "" +echo "๐Ÿšซ Checking for gitignored worker files..." +IGNORED_FILES=$(git check-ignore workers/*/* 2>/dev/null || true) +if [ -n "$IGNORED_FILES" ]; then + echo -e "${YELLOW}โš ๏ธ Warning: Some worker files are being ignored:${NC}" + echo "$IGNORED_FILES" | while read -r file; do + echo -e "${YELLOW} - $file${NC}" + done + WARNINGS=$((WARNINGS + 1)) +fi + +# Summary +echo "" +echo "โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”" +if [ $ERRORS -eq 0 ] && [ $WARNINGS -eq 0 ]; then + echo -e "${GREEN}โœ… All workers validated successfully!${NC}" + exit 0 +elif [ $ERRORS -eq 0 ]; then + echo -e "${YELLOW}โš ๏ธ Validation passed with $WARNINGS warning(s)${NC}" + exit 0 +else + echo -e "${RED}โŒ Validation failed with $ERRORS error(s) and $WARNINGS warning(s)${NC}" + exit 1 +fi diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 03581ef..9f79b46 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -2,11 +2,100 @@ name: Tests on: push: - branches: [ main, master, develop, feature/** ] + branches: [ main, master, dev, develop, feature/** ] pull_request: - branches: [ main, master, develop ] + branches: [ main, master, dev, develop ] jobs: + validate-workers: + name: Validate Workers + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Run worker validation + run: | + chmod +x .github/scripts/validate-workers.sh + .github/scripts/validate-workers.sh + + build-workers: + name: Build Worker Docker Images + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 # Fetch all history for proper diff + + - name: Check which workers were modified + id: check-workers + run: | + if [ "${{ github.event_name }}" == "pull_request" ]; then + # For PRs, check changed files + CHANGED_FILES=$(git diff --name-only origin/${{ github.base_ref }}...HEAD) + echo "Changed files:" + echo "$CHANGED_FILES" + else + # For direct pushes, check last commit + CHANGED_FILES=$(git diff --name-only HEAD~1 HEAD) + fi + + # Check if docker-compose.yml changed (build all workers) + if echo "$CHANGED_FILES" | grep -q "^docker-compose.yml"; then + echo "workers_to_build=worker-python worker-secrets worker-rust worker-android worker-ossfuzz" >> $GITHUB_OUTPUT + echo "workers_modified=true" >> $GITHUB_OUTPUT + echo "โœ… docker-compose.yml modified - building all workers" + exit 0 + fi + + # Detect which specific workers changed + WORKERS_TO_BUILD="" + + if echo "$CHANGED_FILES" | grep -q "^workers/python/"; then + WORKERS_TO_BUILD="$WORKERS_TO_BUILD worker-python" + echo "โœ… Python worker modified" + fi + + if echo "$CHANGED_FILES" | grep -q "^workers/secrets/"; then + WORKERS_TO_BUILD="$WORKERS_TO_BUILD worker-secrets" + echo "โœ… Secrets worker modified" + fi + + if echo "$CHANGED_FILES" | grep -q "^workers/rust/"; then + WORKERS_TO_BUILD="$WORKERS_TO_BUILD worker-rust" + echo "โœ… Rust worker modified" + fi + + if echo "$CHANGED_FILES" | grep -q "^workers/android/"; then + WORKERS_TO_BUILD="$WORKERS_TO_BUILD worker-android" + echo "โœ… Android worker modified" + fi + + if echo "$CHANGED_FILES" | grep -q "^workers/ossfuzz/"; then + WORKERS_TO_BUILD="$WORKERS_TO_BUILD worker-ossfuzz" + echo "โœ… OSS-Fuzz worker modified" + fi + + if [ -z "$WORKERS_TO_BUILD" ]; then + echo "workers_modified=false" >> $GITHUB_OUTPUT + echo "โญ๏ธ No worker changes detected - skipping build" + else + echo "workers_to_build=$WORKERS_TO_BUILD" >> $GITHUB_OUTPUT + echo "workers_modified=true" >> $GITHUB_OUTPUT + echo "Building workers:$WORKERS_TO_BUILD" + fi + + - name: Set up Docker Buildx + if: steps.check-workers.outputs.workers_modified == 'true' + uses: docker/setup-buildx-action@v3 + + - name: Build worker images + if: steps.check-workers.outputs.workers_modified == 'true' + run: | + WORKERS="${{ steps.check-workers.outputs.workers_to_build }}" + echo "Building worker Docker images: $WORKERS" + docker compose build $WORKERS --no-cache + continue-on-error: false + lint: name: Lint runs-on: ubuntu-latest @@ -143,11 +232,15 @@ jobs: test-summary: name: Test Summary runs-on: ubuntu-latest - needs: [lint, unit-tests] + needs: [validate-workers, lint, unit-tests] if: always() steps: - name: Check test results run: | + if [ "${{ needs.validate-workers.result }}" != "success" ]; then + echo "Worker validation failed" + exit 1 + fi if [ "${{ needs.unit-tests.result }}" != "success" ]; then echo "Unit tests failed" exit 1 diff --git a/.gitignore b/.gitignore index da918ac..a8d6e44 100644 --- a/.gitignore +++ b/.gitignore @@ -188,6 +188,10 @@ logs/ # Docker volume configs (keep .env.example but ignore actual .env) volumes/env/.env +# Vendored proxy sources (kept locally for reference) +ai/proxy/bifrost/ +ai/proxy/litellm/ + # Test project databases and configurations test_projects/*/.fuzzforge/ test_projects/*/findings.db* @@ -304,4 +308,8 @@ test_projects/*/.npmrc test_projects/*/.git-credentials test_projects/*/credentials.* test_projects/*/api_keys.* -test_projects/*/ci-*.sh \ No newline at end of file +test_projects/*/ci-*.sh + +# -------------------- Internal Documentation -------------------- +# Weekly summaries and temporary project documentation +WEEK_SUMMARY*.md diff --git a/CHANGELOG.md b/CHANGELOG.md index 649d8fb..c852469 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,7 +5,118 @@ All notable changes to FuzzForge will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## [0.7.0] - 2025-01-16 +## [Unreleased] + +### ๐Ÿ“ Documentation +- Added comprehensive worker startup documentation across all guides +- Added workflow-to-worker mapping tables in README, troubleshooting guide, getting started guide, and docker setup guide +- Fixed broken documentation links in CLI reference +- Added WEEK_SUMMARY*.md pattern to .gitignore + +--- + +## [0.7.3] - 2025-10-30 + +### ๐ŸŽฏ Major Features + +#### Android Static Analysis Workflow +- **Added comprehensive Android security testing workflow** (`android_static_analysis`): + - Jadx decompiler for APK โ†’ Java source code decompilation + - OpenGrep/Semgrep static analysis with custom Android security rules + - MobSF integration for comprehensive mobile security scanning + - SARIF report generation with unified findings format + - Test results: Successfully decompiled 4,145 Java files, found 8 security vulnerabilities + - Full workflow completes in ~1.5 minutes + +#### Platform-Aware Worker Architecture +- **ARM64 (Apple Silicon) support**: + - Automatic platform detection (ARM64 vs x86_64) in CLI using `platform.machine()` + - Worker metadata convention (`metadata.yaml`) for platform-specific capabilities + - Multi-Dockerfile support: `Dockerfile.amd64` (full toolchain) and `Dockerfile.arm64` (optimized) + - Conditional module imports for graceful degradation (MobSF skips on ARM64) + - Backend path resolution via `FUZZFORGE_HOST_ROOT` for CLI worker management +- **Worker selection logic**: + - CLI automatically selects appropriate Dockerfile based on detected platform + - Multi-strategy path resolution (API โ†’ .fuzzforge marker โ†’ environment variable) + - Platform-specific tool availability documented in metadata + +#### Python SAST Workflow +- **Added Python Static Application Security Testing workflow** (`python_sast`): + - Bandit for Python security linting (SAST) + - MyPy for static type checking + - Safety for dependency vulnerability scanning + - Integrated SARIF reporter for unified findings format + - Auto-start Python worker on-demand + +### โœจ Enhancements + +#### CI/CD Improvements +- Added automated worker validation in CI pipeline +- Docker build checks for all workers before merge +- Worker file change detection for selective builds +- Optimized Docker layer caching for faster builds +- Dev branch testing workflow triggers + +#### CLI Improvements +- Fixed live monitoring bug in `ff monitor live` command +- Enhanced `ff findings` command with better table formatting +- Improved `ff monitor` with clearer status displays +- Auto-start workers on-demand when workflows require them +- Better error messages with actionable manual start commands + +#### Worker Management +- Standardized worker service names (`worker-python`, `worker-android`, etc.) +- Added missing `worker-secrets` to repository +- Improved worker naming consistency across codebase + +#### LiteLLM Integration +- Centralized LLM provider management with proxy +- Governance and request/response routing +- OTEL collector integration for observability +- Environment-based configurable timeouts +- Optional `.env.litellm` configuration + +### ๐Ÿ› Bug Fixes + +- Fixed MobSF API key generation from secret file (SHA256 hash) +- Corrected Temporal activity names (decompile_with_jadx, scan_with_opengrep, scan_with_mobsf) +- Resolved linter errors across codebase +- Fixed unused import issues to pass CI checks +- Removed deprecated workflow parameters +- Docker Compose version compatibility fixes + +### ๐Ÿ”ง Technical Changes + +- Conditional import pattern for optional dependencies (MobSF on ARM64) +- Multi-platform Dockerfile architecture +- Worker metadata convention for capability declaration +- Improved CI worker build optimization +- Enhanced storage activity error handling + +### ๐Ÿ“ Test Projects + +- Added `test_projects/android_test/` with BeetleBug.apk and shopnest.apk +- Android workflow validation with real APK samples +- ARM64 platform testing and validation + +--- + +## [0.7.2] - 2025-10-22 + +### ๐Ÿ› Bug Fixes +- Fixed worker naming inconsistencies across codebase +- Improved monitor command consolidation and usability +- Enhanced findings CLI with better formatting and display +- Added missing secrets worker to repository + +### ๐Ÿ“ Documentation +- Added benchmark results files to git for secret detection workflows + +**Note:** v0.7.1 was re-tagged as v0.7.2 (both point to the same commit) + +--- + +## [0.7.0] - 2025-10-16 ### ๐ŸŽฏ Major Features @@ -40,7 +151,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 #### Documentation - Updated README for Temporal + MinIO architecture -- Removed obsolete `volume_mode` references across all documentation - Added `.env` configuration guide for AI agent API keys - Fixed worker startup instructions with correct service names - Updated docker compose commands to modern syntax @@ -52,6 +162,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### ๐Ÿ› Bug Fixes +- Fixed default parameters from metadata.yaml not being applied to workflows when no parameters provided - Fixed gitleaks workflow failing on uploaded directories without Git history - Fixed worker startup command suggestions (now uses `docker compose up -d` with service names) - Fixed missing `cognify_text` method in CogneeProjectIntegration @@ -71,7 +182,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 --- -## [0.6.0] - 2024-12-XX +## [0.6.0] - Undocumented ### Features - Initial Temporal migration @@ -79,7 +190,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Security assessment workflow - Basic CLI commands +**Note:** No git tag exists for v0.6.0. Release date undocumented. + --- -[0.7.0]: https://github.com/FuzzingLabs/fuzzforge_ai/compare/v0.6.0...v0.7.0 -[0.6.0]: https://github.com/FuzzingLabs/fuzzforge_ai/releases/tag/v0.6.0 +[0.7.3]: https://github.com/FuzzingLabs/fuzzforge_ai/compare/v0.7.2...v0.7.3 +[0.7.2]: https://github.com/FuzzingLabs/fuzzforge_ai/compare/v0.7.0...v0.7.2 +[0.7.0]: https://github.com/FuzzingLabs/fuzzforge_ai/releases/tag/v0.7.0 +[0.6.0]: https://github.com/FuzzingLabs/fuzzforge_ai/tree/v0.6.0 diff --git a/README.md b/README.md index 9b8eaaf..f76dcce 100644 --- a/README.md +++ b/README.md @@ -10,7 +10,7 @@ License: BSL + Apache Python 3.11+ Website - Version + Version GitHub Stars

@@ -115,9 +115,11 @@ For containerized workflows, see the [Docker Installation Guide](https://docs.do For AI-powered workflows, configure your LLM API keys: ```bash -cp volumes/env/.env.example volumes/env/.env +cp volumes/env/.env.template volumes/env/.env # Edit volumes/env/.env and add your API keys (OpenAI, Anthropic, Google, etc.) +# Add your key to LITELLM_GEMINI_API_KEY ``` +> Dont change the OPENAI_API_KEY default value, as it is used for the LLM proxy. This is required for: - `llm_secret_detection` workflow @@ -150,7 +152,7 @@ git clone https://github.com/fuzzinglabs/fuzzforge_ai.git cd fuzzforge_ai # 2. Copy the default LLM env config -cp volumes/env/.env.example volumes/env/.env +cp volumes/env/.env.template volumes/env/.env # 3. Start FuzzForge with Temporal docker compose up -d @@ -163,6 +165,16 @@ docker compose up -d worker-python > > Workers don't auto-start by default (saves RAM). Start the worker you need before running workflows. +**Workflow-to-Worker Quick Reference:** + +| Workflow | Worker Required | Startup Command | +|----------|----------------|-----------------| +| `security_assessment`, `python_sast`, `llm_analysis`, `atheris_fuzzing` | worker-python | `docker compose up -d worker-python` | +| `android_static_analysis` | worker-android | `docker compose up -d worker-android` | +| `cargo_fuzzing` | worker-rust | `docker compose up -d worker-rust` | +| `ossfuzz_campaign` | worker-ossfuzz | `docker compose up -d worker-ossfuzz` | +| `llm_secret_detection`, `trufflehog_detection`, `gitleaks_detection` | worker-secrets | `docker compose up -d worker-secrets` | + ```bash # 5. Run your first workflow (files are automatically uploaded) cd test_projects/vulnerable_app/ diff --git a/ai/agents/task_agent/.env.example b/ai/agents/task_agent/.env.example deleted file mode 100644 index c71d59a..0000000 --- a/ai/agents/task_agent/.env.example +++ /dev/null @@ -1,10 +0,0 @@ -# Default LiteLLM configuration -LITELLM_MODEL=gemini/gemini-2.0-flash-001 -# LITELLM_PROVIDER=gemini - -# API keys (uncomment and fill as needed) -# GOOGLE_API_KEY= -# OPENAI_API_KEY= -# ANTHROPIC_API_KEY= -# OPENROUTER_API_KEY= -# MISTRAL_API_KEY= diff --git a/ai/agents/task_agent/Dockerfile b/ai/agents/task_agent/Dockerfile index eaf734b..c2b6686 100644 --- a/ai/agents/task_agent/Dockerfile +++ b/ai/agents/task_agent/Dockerfile @@ -16,4 +16,9 @@ COPY . /app/agent_with_adk_format WORKDIR /app/agent_with_adk_format ENV PYTHONPATH=/app +# Copy and set up entrypoint +COPY docker-entrypoint.sh /docker-entrypoint.sh +RUN chmod +x /docker-entrypoint.sh + +ENTRYPOINT ["/docker-entrypoint.sh"] CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"] diff --git a/ai/agents/task_agent/README.md b/ai/agents/task_agent/README.md index 769ce33..692e4e6 100644 --- a/ai/agents/task_agent/README.md +++ b/ai/agents/task_agent/README.md @@ -43,18 +43,34 @@ cd task_agent # cp .env.example .env ``` -Edit `.env` (or `.env.example`) and add your API keys. The agent must be restarted after changes so the values are picked up: +Edit `.env` (or `.env.example`) and add your proxy + API keys. The agent must be restarted after changes so the values are picked up: ```bash -# Set default model -LITELLM_MODEL=gemini/gemini-2.0-flash-001 +# Route every request through the proxy container (use http://localhost:10999 from the host) +FF_LLM_PROXY_BASE_URL=http://llm-proxy:4000 -# Add API keys for providers you want to use -GOOGLE_API_KEY=your_google_api_key -OPENAI_API_KEY=your_openai_api_key -ANTHROPIC_API_KEY=your_anthropic_api_key -OPENROUTER_API_KEY=your_openrouter_api_key +# Default model + provider the agent boots with +LITELLM_MODEL=openai/gpt-4o-mini +LITELLM_PROVIDER=openai + +# Virtual key issued by the proxy to the task agent (bootstrap replaces the placeholder) +OPENAI_API_KEY=sk-proxy-default + +# Upstream keys stay inside the proxy. Store real secrets under the LiteLLM +# aliases and the bootstrapper mirrors them into .env.litellm for the proxy container. +LITELLM_OPENAI_API_KEY=your_real_openai_api_key +LITELLM_ANTHROPIC_API_KEY=your_real_anthropic_key +LITELLM_GEMINI_API_KEY=your_real_gemini_key +LITELLM_MISTRAL_API_KEY=your_real_mistral_key +LITELLM_OPENROUTER_API_KEY=your_real_openrouter_key ``` +> When running the agent outside of Docker, swap `FF_LLM_PROXY_BASE_URL` to the host port (default `http://localhost:10999`). + +The bootstrap container provisions LiteLLM, copies provider secrets into +`volumes/env/.env.litellm`, and rewrites `volumes/env/.env` with the virtual key. +Populate the `LITELLM_*_API_KEY` values before the first launch so the proxy can +reach your upstream providers as soon as the bootstrap script runs. + ### 2. Install Dependencies ```bash diff --git a/ai/agents/task_agent/docker-entrypoint.sh b/ai/agents/task_agent/docker-entrypoint.sh new file mode 100644 index 0000000..88e3733 --- /dev/null +++ b/ai/agents/task_agent/docker-entrypoint.sh @@ -0,0 +1,31 @@ +#!/bin/bash +set -e + +# Wait for .env file to have keys (max 30 seconds) +echo "[task-agent] Waiting for virtual keys to be provisioned..." +for i in $(seq 1 30); do + if [ -f /app/config/.env ]; then + # Check if TASK_AGENT_API_KEY has a value (not empty) + KEY=$(grep -E '^TASK_AGENT_API_KEY=' /app/config/.env | cut -d'=' -f2) + if [ -n "$KEY" ] && [ "$KEY" != "" ]; then + echo "[task-agent] Virtual keys found, loading environment..." + # Export keys from .env file + export TASK_AGENT_API_KEY="$KEY" + export OPENAI_API_KEY=$(grep -E '^OPENAI_API_KEY=' /app/config/.env | cut -d'=' -f2) + export FF_LLM_PROXY_BASE_URL=$(grep -E '^FF_LLM_PROXY_BASE_URL=' /app/config/.env | cut -d'=' -f2) + echo "[task-agent] Loaded TASK_AGENT_API_KEY: ${TASK_AGENT_API_KEY:0:15}..." + echo "[task-agent] Loaded FF_LLM_PROXY_BASE_URL: $FF_LLM_PROXY_BASE_URL" + break + fi + fi + echo "[task-agent] Keys not ready yet, waiting... ($i/30)" + sleep 1 +done + +if [ -z "$TASK_AGENT_API_KEY" ]; then + echo "[task-agent] ERROR: Virtual keys were not provisioned within 30 seconds!" + exit 1 +fi + +echo "[task-agent] Starting uvicorn..." +exec "$@" diff --git a/ai/agents/task_agent/litellm_agent/config.py b/ai/agents/task_agent/litellm_agent/config.py index 9b404bf..54ab609 100644 --- a/ai/agents/task_agent/litellm_agent/config.py +++ b/ai/agents/task_agent/litellm_agent/config.py @@ -4,13 +4,28 @@ from __future__ import annotations import os + +def _normalize_proxy_base_url(raw_value: str | None) -> str | None: + if not raw_value: + return None + cleaned = raw_value.strip() + if not cleaned: + return None + # Avoid double slashes in downstream requests + return cleaned.rstrip("/") + AGENT_NAME = "litellm_agent" AGENT_DESCRIPTION = ( "A LiteLLM-backed shell that exposes hot-swappable model and prompt controls." ) -DEFAULT_MODEL = os.getenv("LITELLM_MODEL", "gemini-2.0-flash-001") -DEFAULT_PROVIDER = os.getenv("LITELLM_PROVIDER") +DEFAULT_MODEL = os.getenv("LITELLM_MODEL", "openai/gpt-4o-mini") +DEFAULT_PROVIDER = os.getenv("LITELLM_PROVIDER") or None +PROXY_BASE_URL = _normalize_proxy_base_url( + os.getenv("FF_LLM_PROXY_BASE_URL") + or os.getenv("LITELLM_API_BASE") + or os.getenv("LITELLM_BASE_URL") +) STATE_PREFIX = "app:litellm_agent/" STATE_MODEL_KEY = f"{STATE_PREFIX}model" diff --git a/ai/agents/task_agent/litellm_agent/state.py b/ai/agents/task_agent/litellm_agent/state.py index 460d961..54f1308 100644 --- a/ai/agents/task_agent/litellm_agent/state.py +++ b/ai/agents/task_agent/litellm_agent/state.py @@ -3,11 +3,15 @@ from __future__ import annotations from dataclasses import dataclass +import os from typing import Any, Mapping, MutableMapping, Optional +import httpx + from .config import ( DEFAULT_MODEL, DEFAULT_PROVIDER, + PROXY_BASE_URL, STATE_MODEL_KEY, STATE_PROMPT_KEY, STATE_PROVIDER_KEY, @@ -66,11 +70,109 @@ class HotSwapState: """Create a LiteLlm instance for the current state.""" from google.adk.models.lite_llm import LiteLlm # Lazy import to avoid cycle + from google.adk.models.lite_llm import LiteLLMClient + from litellm.types.utils import Choices, Message, ModelResponse, Usage kwargs = {"model": self.model} if self.provider: kwargs["custom_llm_provider"] = self.provider - return LiteLlm(**kwargs) + if PROXY_BASE_URL: + provider = (self.provider or DEFAULT_PROVIDER or "").lower() + if provider and provider != "openai": + kwargs["api_base"] = f"{PROXY_BASE_URL.rstrip('/')}/{provider}" + else: + kwargs["api_base"] = PROXY_BASE_URL + kwargs.setdefault("api_key", os.environ.get("TASK_AGENT_API_KEY") or os.environ.get("OPENAI_API_KEY")) + + provider = (self.provider or DEFAULT_PROVIDER or "").lower() + model_suffix = self.model.split("/", 1)[-1] + use_responses = provider == "openai" and ( + model_suffix.startswith("gpt-5") or model_suffix.startswith("o1") + ) + if use_responses: + kwargs.setdefault("use_responses_api", True) + + llm = LiteLlm(**kwargs) + + if use_responses and PROXY_BASE_URL: + + class _ResponsesAwareClient(LiteLLMClient): + def __init__(self, base_client: LiteLLMClient, api_base: str, api_key: str): + self._base_client = base_client + self._api_base = api_base.rstrip("/") + self._api_key = api_key + + async def acompletion(self, model, messages, tools, **kwargs): # type: ignore[override] + use_responses_api = kwargs.pop("use_responses_api", False) + if not use_responses_api: + return await self._base_client.acompletion( + model=model, + messages=messages, + tools=tools, + **kwargs, + ) + + resolved_model = model + if "/" not in resolved_model: + resolved_model = f"openai/{resolved_model}" + + payload = { + "model": resolved_model, + "input": _messages_to_responses_input(messages), + } + + timeout = kwargs.get("timeout", 60) + headers = { + "Authorization": f"Bearer {self._api_key}", + "Content-Type": "application/json", + } + + async with httpx.AsyncClient(timeout=timeout) as client: + response = await client.post( + f"{self._api_base}/v1/responses", + json=payload, + headers=headers, + ) + try: + response.raise_for_status() + except httpx.HTTPStatusError as exc: + text = exc.response.text + raise RuntimeError( + f"LiteLLM responses request failed: {text}" + ) from exc + data = response.json() + + text_output = _extract_output_text(data) + usage = data.get("usage", {}) + + return ModelResponse( + id=data.get("id"), + model=model, + choices=[ + Choices( + finish_reason="stop", + index=0, + message=Message(role="assistant", content=text_output), + provider_specific_fields={"bifrost_response": data}, + ) + ], + usage=Usage( + prompt_tokens=usage.get("input_tokens"), + completion_tokens=usage.get("output_tokens"), + reasoning_tokens=usage.get("output_tokens_details", {}).get( + "reasoning_tokens" + ), + total_tokens=usage.get("total_tokens"), + ), + ) + + llm.llm_client = _ResponsesAwareClient( + llm.llm_client, + PROXY_BASE_URL, + os.environ.get("TASK_AGENT_API_KEY") or os.environ.get("OPENAI_API_KEY", ""), + ) + + return llm @property def display_model(self) -> str: @@ -84,3 +186,69 @@ def apply_state_to_agent(invocation_context, state: HotSwapState) -> None: agent = invocation_context.agent agent.model = state.instantiate_llm() + + +def _messages_to_responses_input(messages: list[dict[str, Any]]) -> list[dict[str, Any]]: + inputs: list[dict[str, Any]] = [] + for message in messages: + role = message.get("role", "user") + content = message.get("content", "") + text_segments: list[str] = [] + + if isinstance(content, list): + for item in content: + if isinstance(item, dict): + text = item.get("text") or item.get("content") + if text: + text_segments.append(str(text)) + elif isinstance(item, str): + text_segments.append(item) + elif isinstance(content, str): + text_segments.append(content) + + text = "\n".join(segment.strip() for segment in text_segments if segment) + if not text: + continue + + entry_type = "input_text" + if role == "assistant": + entry_type = "output_text" + + inputs.append( + { + "role": role, + "content": [ + { + "type": entry_type, + "text": text, + } + ], + } + ) + + if not inputs: + inputs.append( + { + "role": "user", + "content": [ + { + "type": "input_text", + "text": "", + } + ], + } + ) + return inputs + + +def _extract_output_text(response_json: dict[str, Any]) -> str: + outputs = response_json.get("output", []) + collected: list[str] = [] + for item in outputs: + if isinstance(item, dict) and item.get("type") == "message": + for part in item.get("content", []): + if isinstance(part, dict) and part.get("type") == "output_text": + text = part.get("text", "") + if text: + collected.append(str(text)) + return "\n\n".join(collected).strip() diff --git a/ai/proxy/README.md b/ai/proxy/README.md new file mode 100644 index 0000000..fc941eb --- /dev/null +++ b/ai/proxy/README.md @@ -0,0 +1,5 @@ +# LLM Proxy Integrations + +This directory contains vendor source trees that were vendored only for reference when integrating LLM gateways. The actual FuzzForge deployment uses the official Docker images for each project. + +See `docs/docs/how-to/llm-proxy.md` for up-to-date instructions on running the proxy services and issuing keys for the agents. diff --git a/ai/pyproject.toml b/ai/pyproject.toml index d5c0e77..120b9cc 100644 --- a/ai/pyproject.toml +++ b/ai/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "fuzzforge-ai" -version = "0.7.0" +version = "0.7.3" description = "FuzzForge AI orchestration module" readme = "README.md" requires-python = ">=3.11" diff --git a/ai/src/fuzzforge_ai/__init__.py b/ai/src/fuzzforge_ai/__init__.py index cca81fc..eefecd9 100644 --- a/ai/src/fuzzforge_ai/__init__.py +++ b/ai/src/fuzzforge_ai/__init__.py @@ -21,4 +21,4 @@ Usage: # Additional attribution and requirements are provided in the NOTICE file. -__version__ = "0.6.0" \ No newline at end of file +__version__ = "0.7.3" \ No newline at end of file diff --git a/ai/src/fuzzforge_ai/agent_executor.py b/ai/src/fuzzforge_ai/agent_executor.py index fd1f1d9..41613c0 100644 --- a/ai/src/fuzzforge_ai/agent_executor.py +++ b/ai/src/fuzzforge_ai/agent_executor.py @@ -831,20 +831,9 @@ class FuzzForgeExecutor: async def submit_security_scan_mcp( workflow_name: str, target_path: str = "", - volume_mode: str = "", parameters: Dict[str, Any] | None = None, tool_context: ToolContext | None = None, ) -> Any: - # Normalise volume mode to supported values - normalised_mode = (volume_mode or "ro").strip().lower().replace("-", "_") - if normalised_mode in {"read_only", "readonly", "ro"}: - normalised_mode = "ro" - elif normalised_mode in {"read_write", "readwrite", "rw"}: - normalised_mode = "rw" - else: - # Fall back to read-only if we can't recognise the input - normalised_mode = "ro" - # Resolve the target path to an absolute path for validation resolved_path = target_path or "." try: @@ -883,7 +872,6 @@ class FuzzForgeExecutor: payload = { "workflow_name": workflow_name, "target_path": resolved_path, - "volume_mode": normalised_mode, "parameters": cleaned_parameters, } result = await _call_fuzzforge_mcp("submit_security_scan_mcp", payload) @@ -1061,10 +1049,19 @@ class FuzzForgeExecutor: FunctionTool(get_task_list) ]) - - # Create the agent + + # Create the agent with LiteLLM configuration + llm_kwargs = {} + api_key = os.getenv('OPENAI_API_KEY') or os.getenv('LLM_API_KEY') + api_base = os.getenv('LLM_ENDPOINT') or os.getenv('LLM_API_BASE') or os.getenv('OPENAI_API_BASE') + + if api_key: + llm_kwargs['api_key'] = api_key + if api_base: + llm_kwargs['api_base'] = api_base + self.agent = LlmAgent( - model=LiteLlm(model=self.model), + model=LiteLlm(model=self.model, **llm_kwargs), name="fuzzforge_executor", description="Intelligent A2A orchestrator with memory", instruction=self._build_instruction(), diff --git a/ai/src/fuzzforge_ai/cognee_service.py b/ai/src/fuzzforge_ai/cognee_service.py index 968e956..ba14a30 100644 --- a/ai/src/fuzzforge_ai/cognee_service.py +++ b/ai/src/fuzzforge_ai/cognee_service.py @@ -56,7 +56,7 @@ class CogneeService: # Configure LLM with API key BEFORE any other cognee operations provider = os.getenv("LLM_PROVIDER", "openai") model = os.getenv("LLM_MODEL") or os.getenv("LITELLM_MODEL", "gpt-4o-mini") - api_key = os.getenv("LLM_API_KEY") or os.getenv("OPENAI_API_KEY") + api_key = os.getenv("COGNEE_API_KEY") or os.getenv("LLM_API_KEY") or os.getenv("OPENAI_API_KEY") endpoint = os.getenv("LLM_ENDPOINT") api_version = os.getenv("LLM_API_VERSION") max_tokens = os.getenv("LLM_MAX_TOKENS") @@ -78,48 +78,62 @@ class CogneeService: os.environ.setdefault("OPENAI_API_KEY", api_key) if endpoint: os.environ["LLM_ENDPOINT"] = endpoint + os.environ.setdefault("LLM_API_BASE", endpoint) + os.environ.setdefault("OPENAI_API_BASE", endpoint) + os.environ.setdefault("LITELLM_PROXY_API_BASE", endpoint) + if api_key: + os.environ.setdefault("LITELLM_PROXY_API_KEY", api_key) if api_version: os.environ["LLM_API_VERSION"] = api_version if max_tokens: os.environ["LLM_MAX_TOKENS"] = str(max_tokens) # Configure Cognee's runtime using its configuration helpers when available + embedding_model = os.getenv("LLM_EMBEDDING_MODEL") + embedding_endpoint = os.getenv("LLM_EMBEDDING_ENDPOINT") + if embedding_endpoint: + os.environ.setdefault("LLM_EMBEDDING_API_BASE", embedding_endpoint) + if hasattr(cognee.config, "set_llm_provider"): cognee.config.set_llm_provider(provider) - if hasattr(cognee.config, "set_llm_model"): - cognee.config.set_llm_model(model) - if api_key and hasattr(cognee.config, "set_llm_api_key"): - cognee.config.set_llm_api_key(api_key) - if endpoint and hasattr(cognee.config, "set_llm_endpoint"): - cognee.config.set_llm_endpoint(endpoint) + if hasattr(cognee.config, "set_llm_model"): + cognee.config.set_llm_model(model) + if api_key and hasattr(cognee.config, "set_llm_api_key"): + cognee.config.set_llm_api_key(api_key) + if endpoint and hasattr(cognee.config, "set_llm_endpoint"): + cognee.config.set_llm_endpoint(endpoint) + if embedding_model and hasattr(cognee.config, "set_llm_embedding_model"): + cognee.config.set_llm_embedding_model(embedding_model) + if embedding_endpoint and hasattr(cognee.config, "set_llm_embedding_endpoint"): + cognee.config.set_llm_embedding_endpoint(embedding_endpoint) if api_version and hasattr(cognee.config, "set_llm_api_version"): cognee.config.set_llm_api_version(api_version) if max_tokens and hasattr(cognee.config, "set_llm_max_tokens"): cognee.config.set_llm_max_tokens(int(max_tokens)) - + # Configure graph database cognee.config.set_graph_db_config({ "graph_database_provider": self.cognee_config.get("graph_database_provider", "kuzu"), }) - + # Set data directories data_dir = self.cognee_config.get("data_directory") system_dir = self.cognee_config.get("system_directory") - + if data_dir: logger.debug("Setting cognee data root", extra={"path": data_dir}) cognee.config.data_root_directory(data_dir) if system_dir: logger.debug("Setting cognee system root", extra={"path": system_dir}) cognee.config.system_root_directory(system_dir) - + # Setup multi-tenant user context await self._setup_user_context() - + self._initialized = True logger.info(f"Cognee initialized for project {self.project_context['project_name']} " f"with Kuzu at {system_dir}") - + except ImportError: logger.error("Cognee not installed. Install with: pip install cognee") raise diff --git a/backend/mcp-config.json b/backend/mcp-config.json index 1b6e783..4f06ce4 100644 --- a/backend/mcp-config.json +++ b/backend/mcp-config.json @@ -22,7 +22,6 @@ "parameters": { "workflow_name": "string", "target_path": "string", - "volume_mode": "string (ro|rw)", "parameters": "object" } }, diff --git a/backend/pyproject.toml b/backend/pyproject.toml index 03a7307..595d473 100644 --- a/backend/pyproject.toml +++ b/backend/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "backend" -version = "0.7.0" +version = "0.7.3" description = "FuzzForge OSS backend" authors = [] readme = "README.md" diff --git a/backend/src/api/system.py b/backend/src/api/system.py new file mode 100644 index 0000000..a4ee1a6 --- /dev/null +++ b/backend/src/api/system.py @@ -0,0 +1,47 @@ +# Copyright (c) 2025 FuzzingLabs +# +# Licensed under the Business Source License 1.1 (BSL). See the LICENSE file +# at the root of this repository for details. +# +# After the Change Date (four years from publication), this version of the +# Licensed Work will be made available under the Apache License, Version 2.0. +# See the LICENSE-APACHE file or http://www.apache.org/licenses/LICENSE-2.0 +# +# Additional attribution and requirements are provided in the NOTICE file. + +""" +System information endpoints for FuzzForge API. + +Provides system configuration and filesystem paths to CLI for worker management. +""" + +import os +from typing import Dict + +from fastapi import APIRouter + +router = APIRouter(prefix="/system", tags=["system"]) + + +@router.get("/info") +async def get_system_info() -> Dict[str, str]: + """ + Get system information including host filesystem paths. + + This endpoint exposes paths needed by the CLI to manage workers via docker-compose. + The FUZZFORGE_HOST_ROOT environment variable is set by docker-compose and points + to the FuzzForge installation directory on the host machine. + + Returns: + Dictionary containing: + - host_root: Absolute path to FuzzForge root on host + - docker_compose_path: Path to docker-compose.yml on host + - workers_dir: Path to workers directory on host + """ + host_root = os.getenv("FUZZFORGE_HOST_ROOT", "") + + return { + "host_root": host_root, + "docker_compose_path": f"{host_root}/docker-compose.yml" if host_root else "", + "workers_dir": f"{host_root}/workers" if host_root else "", + } diff --git a/backend/src/api/workflows.py b/backend/src/api/workflows.py index 3ffda9d..a4d1b7c 100644 --- a/backend/src/api/workflows.py +++ b/backend/src/api/workflows.py @@ -43,6 +43,42 @@ ALLOWED_CONTENT_TYPES = [ router = APIRouter(prefix="/workflows", tags=["workflows"]) +def extract_defaults_from_json_schema(metadata: Dict[str, Any]) -> Dict[str, Any]: + """ + Extract default parameter values from JSON Schema format. + + Converts from: + parameters: + properties: + param_name: + default: value + + To: + {param_name: value} + + Args: + metadata: Workflow metadata dictionary + + Returns: + Dictionary of parameter defaults + """ + defaults = {} + + # Check if there's a legacy default_parameters field + if "default_parameters" in metadata: + defaults.update(metadata["default_parameters"]) + + # Extract defaults from JSON Schema parameters + parameters = metadata.get("parameters", {}) + properties = parameters.get("properties", {}) + + for param_name, param_spec in properties.items(): + if "default" in param_spec: + defaults[param_name] = param_spec["default"] + + return defaults + + def create_structured_error_response( error_type: str, message: str, @@ -164,7 +200,7 @@ async def get_workflow_metadata( author=metadata.get("author"), tags=metadata.get("tags", []), parameters=metadata.get("parameters", {}), - default_parameters=metadata.get("default_parameters", {}), + default_parameters=extract_defaults_from_json_schema(metadata), required_modules=metadata.get("required_modules", []) ) @@ -221,7 +257,7 @@ async def submit_workflow( # Merge default parameters with user parameters workflow_info = temporal_mgr.workflows[workflow_name] metadata = workflow_info.metadata or {} - defaults = metadata.get("default_parameters", {}) + defaults = extract_defaults_from_json_schema(metadata) user_params = submission.parameters or {} workflow_params = {**defaults, **user_params} @@ -450,7 +486,7 @@ async def upload_and_submit_workflow( # Merge default parameters with user parameters workflow_info = temporal_mgr.workflows.get(workflow_name) metadata = workflow_info.metadata or {} - defaults = metadata.get("default_parameters", {}) + defaults = extract_defaults_from_json_schema(metadata) workflow_params = {**defaults, **workflow_params} # Start workflow execution @@ -617,11 +653,8 @@ async def get_workflow_parameters( else: param_definitions = parameters_schema - # Add default values to the schema - default_params = metadata.get("default_parameters", {}) - for param_name, param_schema in param_definitions.items(): - if isinstance(param_schema, dict) and param_name in default_params: - param_schema["default"] = default_params[param_name] + # Extract default values from JSON Schema + default_params = extract_defaults_from_json_schema(metadata) return { "workflow": workflow_name, diff --git a/backend/src/main.py b/backend/src/main.py index 9866c43..c219742 100644 --- a/backend/src/main.py +++ b/backend/src/main.py @@ -24,7 +24,7 @@ from fastmcp.server.http import create_sse_app from src.temporal.manager import TemporalManager from src.core.setup import setup_result_storage, validate_infrastructure -from src.api import workflows, runs, fuzzing +from src.api import workflows, runs, fuzzing, system from fastmcp import FastMCP @@ -76,6 +76,7 @@ app = FastAPI( app.include_router(workflows.router) app.include_router(runs.router) app.include_router(fuzzing.router) +app.include_router(system.router) def get_temporal_status() -> Dict[str, Any]: @@ -212,14 +213,6 @@ def _lookup_workflow(workflow_name: str): metadata = info.metadata defaults = metadata.get("default_parameters", {}) default_target_path = metadata.get("default_target_path") or defaults.get("target_path") - supported_modes = metadata.get("supported_volume_modes") or ["ro", "rw"] - if not isinstance(supported_modes, list) or not supported_modes: - supported_modes = ["ro", "rw"] - default_volume_mode = ( - metadata.get("default_volume_mode") - or defaults.get("volume_mode") - or supported_modes[0] - ) return { "name": workflow_name, "version": metadata.get("version", "0.6.0"), @@ -229,9 +222,7 @@ def _lookup_workflow(workflow_name: str): "parameters": metadata.get("parameters", {}), "default_parameters": metadata.get("default_parameters", {}), "required_modules": metadata.get("required_modules", []), - "supported_volume_modes": supported_modes, - "default_target_path": default_target_path, - "default_volume_mode": default_volume_mode + "default_target_path": default_target_path } @@ -256,10 +247,6 @@ async def list_workflows_mcp() -> Dict[str, Any]: "description": metadata.get("description", ""), "author": metadata.get("author"), "tags": metadata.get("tags", []), - "supported_volume_modes": metadata.get("supported_volume_modes", ["ro", "rw"]), - "default_volume_mode": metadata.get("default_volume_mode") - or defaults.get("volume_mode") - or "ro", "default_target_path": metadata.get("default_target_path") or defaults.get("target_path") }) diff --git a/backend/src/models/findings.py b/backend/src/models/findings.py index ddc756a..b71a9b6 100644 --- a/backend/src/models/findings.py +++ b/backend/src/models/findings.py @@ -14,7 +14,7 @@ Models for workflow findings and submissions # Additional attribution and requirements are provided in the NOTICE file. from pydantic import BaseModel, Field -from typing import Dict, Any, Optional, Literal, List +from typing import Dict, Any, Optional, List from datetime import datetime @@ -73,10 +73,6 @@ class WorkflowMetadata(BaseModel): default_factory=list, description="Required module names" ) - supported_volume_modes: List[Literal["ro", "rw"]] = Field( - default=["ro", "rw"], - description="Supported volume mount modes" - ) class WorkflowListItem(BaseModel): diff --git a/backend/src/temporal/manager.py b/backend/src/temporal/manager.py index 9a44e8b..96d9a84 100644 --- a/backend/src/temporal/manager.py +++ b/backend/src/temporal/manager.py @@ -187,12 +187,28 @@ class TemporalManager: # Add parameters in order based on metadata schema # This ensures parameters match the workflow signature order - if workflow_params and 'parameters' in workflow_info.metadata: + # Apply defaults from metadata.yaml if parameter not provided + if 'parameters' in workflow_info.metadata: param_schema = workflow_info.metadata['parameters'].get('properties', {}) + logger.debug(f"Found {len(param_schema)} parameters in schema") # Iterate parameters in schema order and add values for param_name in param_schema.keys(): - param_value = workflow_params.get(param_name) + param_spec = param_schema[param_name] + + # Use provided param, or fall back to default from metadata + if workflow_params and param_name in workflow_params: + param_value = workflow_params[param_name] + logger.debug(f"Using provided value for {param_name}: {param_value}") + elif 'default' in param_spec: + param_value = param_spec['default'] + logger.debug(f"Using default for {param_name}: {param_value}") + else: + param_value = None + logger.debug(f"No value or default for {param_name}, using None") + workflow_args.append(param_value) + else: + logger.debug("No 'parameters' section found in workflow metadata") # Determine task queue from workflow vertical vertical = workflow_info.metadata.get("vertical", "default") diff --git a/backend/toolbox/modules/analyzer/__init__.py b/backend/toolbox/modules/analyzer/__init__.py index 527dab7..8bffdab 100644 --- a/backend/toolbox/modules/analyzer/__init__.py +++ b/backend/toolbox/modules/analyzer/__init__.py @@ -10,5 +10,7 @@ # Additional attribution and requirements are provided in the NOTICE file. from .security_analyzer import SecurityAnalyzer +from .bandit_analyzer import BanditAnalyzer +from .mypy_analyzer import MypyAnalyzer -__all__ = ["SecurityAnalyzer"] \ No newline at end of file +__all__ = ["SecurityAnalyzer", "BanditAnalyzer", "MypyAnalyzer"] \ No newline at end of file diff --git a/backend/toolbox/modules/analyzer/bandit_analyzer.py b/backend/toolbox/modules/analyzer/bandit_analyzer.py new file mode 100644 index 0000000..ecf81a8 --- /dev/null +++ b/backend/toolbox/modules/analyzer/bandit_analyzer.py @@ -0,0 +1,328 @@ +""" +Bandit Analyzer Module - Analyzes Python code for security issues using Bandit +""" + +# Copyright (c) 2025 FuzzingLabs +# +# Licensed under the Business Source License 1.1 (BSL). See the LICENSE file +# at the root of this repository for details. +# +# After the Change Date (four years from publication), this version of the +# Licensed Work will be made available under the Apache License, Version 2.0. +# See the LICENSE-APACHE file or http://www.apache.org/licenses/LICENSE-2.0 +# +# Additional attribution and requirements are provided in the NOTICE file. + +import asyncio +import json +import logging +import time +from pathlib import Path +from typing import Dict, Any, List + +try: + from toolbox.modules.base import BaseModule, ModuleMetadata, ModuleResult, ModuleFinding +except ImportError: + try: + from modules.base import BaseModule, ModuleMetadata, ModuleResult, ModuleFinding + except ImportError: + from src.toolbox.modules.base import BaseModule, ModuleMetadata, ModuleResult, ModuleFinding + +logger = logging.getLogger(__name__) + + +class BanditAnalyzer(BaseModule): + """ + Analyzes Python code for security issues using Bandit. + + This module: + - Runs Bandit security linter on Python files + - Detects common security issues (SQL injection, hardcoded secrets, etc.) + - Reports findings with severity levels + """ + + # Severity mapping from Bandit levels to our standard + SEVERITY_MAP = { + "LOW": "low", + "MEDIUM": "medium", + "HIGH": "high" + } + + def get_metadata(self) -> ModuleMetadata: + """Get module metadata""" + return ModuleMetadata( + name="bandit_analyzer", + version="1.0.0", + description="Analyzes Python code for security issues using Bandit", + author="FuzzForge Team", + category="analyzer", + tags=["python", "security", "bandit", "sast"], + input_schema={ + "severity_level": { + "type": "string", + "enum": ["low", "medium", "high"], + "description": "Minimum severity level to report", + "default": "low" + }, + "confidence_level": { + "type": "string", + "enum": ["low", "medium", "high"], + "description": "Minimum confidence level to report", + "default": "medium" + }, + "exclude_tests": { + "type": "boolean", + "description": "Exclude test files from analysis", + "default": True + }, + "skip_ids": { + "type": "array", + "items": {"type": "string"}, + "description": "List of Bandit test IDs to skip", + "default": [] + } + }, + output_schema={ + "findings": { + "type": "array", + "description": "List of security issues found by Bandit" + } + }, + requires_workspace=True + ) + + def validate_config(self, config: Dict[str, Any]) -> bool: + """Validate module configuration""" + severity = config.get("severity_level", "low") + if severity not in ["low", "medium", "high"]: + raise ValueError("severity_level must be one of: low, medium, high") + + confidence = config.get("confidence_level", "medium") + if confidence not in ["low", "medium", "high"]: + raise ValueError("confidence_level must be one of: low, medium, high") + + skip_ids = config.get("skip_ids", []) + if not isinstance(skip_ids, list): + raise ValueError("skip_ids must be a list") + + return True + + async def _run_bandit( + self, + workspace: Path, + severity_level: str, + confidence_level: str, + exclude_tests: bool, + skip_ids: List[str] + ) -> Dict[str, Any]: + """ + Run Bandit on the workspace. + + Args: + workspace: Path to workspace + severity_level: Minimum severity to report + confidence_level: Minimum confidence to report + exclude_tests: Whether to exclude test files + skip_ids: List of test IDs to skip + + Returns: + Bandit JSON output as dict + """ + try: + # Build bandit command + cmd = [ + "bandit", + "-r", str(workspace), + "-f", "json", + "-ll", # Report all findings (we'll filter later) + ] + + # Add exclude patterns for test files + if exclude_tests: + cmd.extend(["-x", "*/test_*.py,*/tests/*,*_test.py"]) + + # Add skip IDs if specified + if skip_ids: + cmd.extend(["-s", ",".join(skip_ids)]) + + logger.info(f"Running Bandit on: {workspace}") + process = await asyncio.create_subprocess_exec( + *cmd, + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE + ) + + stdout, stderr = await process.communicate() + + # Bandit returns non-zero if issues found, which is expected + if process.returncode not in [0, 1]: + logger.error(f"Bandit failed: {stderr.decode()}") + return {"results": []} + + # Parse JSON output + result = json.loads(stdout.decode()) + return result + + except Exception as e: + logger.error(f"Error running Bandit: {e}") + return {"results": []} + + def _should_include_finding( + self, + issue: Dict[str, Any], + min_severity: str, + min_confidence: str + ) -> bool: + """ + Determine if a Bandit issue should be included based on severity/confidence. + + Args: + issue: Bandit issue dict + min_severity: Minimum severity threshold + min_confidence: Minimum confidence threshold + + Returns: + True if issue should be included + """ + severity_order = ["low", "medium", "high"] + issue_severity = issue.get("issue_severity", "LOW").lower() + issue_confidence = issue.get("issue_confidence", "LOW").lower() + + severity_meets_threshold = severity_order.index(issue_severity) >= severity_order.index(min_severity) + confidence_meets_threshold = severity_order.index(issue_confidence) >= severity_order.index(min_confidence) + + return severity_meets_threshold and confidence_meets_threshold + + def _convert_to_findings( + self, + bandit_result: Dict[str, Any], + workspace: Path, + min_severity: str, + min_confidence: str + ) -> List[ModuleFinding]: + """ + Convert Bandit results to ModuleFindings. + + Args: + bandit_result: Bandit JSON output + workspace: Workspace path for relative paths + min_severity: Minimum severity to include + min_confidence: Minimum confidence to include + + Returns: + List of ModuleFindings + """ + findings = [] + + for issue in bandit_result.get("results", []): + # Filter by severity and confidence + if not self._should_include_finding(issue, min_severity, min_confidence): + continue + + # Extract issue details + test_id = issue.get("test_id", "B000") + test_name = issue.get("test_name", "unknown") + issue_text = issue.get("issue_text", "No description") + severity = self.SEVERITY_MAP.get(issue.get("issue_severity", "LOW"), "low") + + # File location + filename = issue.get("filename", "") + line_number = issue.get("line_number", 0) + code = issue.get("code", "") + + # Try to get relative path + try: + file_path = Path(filename) + rel_path = file_path.relative_to(workspace) + except (ValueError, TypeError): + rel_path = Path(filename).name + + # Create finding + finding = self.create_finding( + title=f"{test_name} ({test_id})", + description=issue_text, + severity=severity, + category="security-issue", + file_path=str(rel_path), + line_start=line_number, + line_end=line_number, + code_snippet=code.strip() if code else None, + recommendation=f"Review and fix the security issue identified by Bandit test {test_id}", + metadata={ + "test_id": test_id, + "test_name": test_name, + "confidence": issue.get("issue_confidence", "LOW").lower(), + "cwe": issue.get("issue_cwe", {}).get("id") if issue.get("issue_cwe") else None, + "more_info": issue.get("more_info", "") + } + ) + findings.append(finding) + + return findings + + async def execute(self, config: Dict[str, Any], workspace: Path) -> ModuleResult: + """ + Execute the Bandit analyzer module. + + Args: + config: Module configuration + workspace: Path to workspace + + Returns: + ModuleResult with security findings + """ + start_time = time.time() + metadata = self.get_metadata() + + # Validate inputs + self.validate_config(config) + self.validate_workspace(workspace) + + # Get configuration + severity_level = config.get("severity_level", "low") + confidence_level = config.get("confidence_level", "medium") + exclude_tests = config.get("exclude_tests", True) + skip_ids = config.get("skip_ids", []) + + # Run Bandit + logger.info("Starting Bandit analysis...") + bandit_result = await self._run_bandit( + workspace, + severity_level, + confidence_level, + exclude_tests, + skip_ids + ) + + # Convert to findings + findings = self._convert_to_findings( + bandit_result, + workspace, + severity_level, + confidence_level + ) + + # Calculate summary + severity_counts = {} + for finding in findings: + sev = finding.severity + severity_counts[sev] = severity_counts.get(sev, 0) + 1 + + execution_time = time.time() - start_time + + return ModuleResult( + module=metadata.name, + version=metadata.version, + status="success", + execution_time=execution_time, + findings=findings, + summary={ + "total_issues": len(findings), + "by_severity": severity_counts, + "files_analyzed": len(set(f.file_path for f in findings if f.file_path)) + }, + metadata={ + "bandit_version": bandit_result.get("generated_at", "unknown"), + "metrics": bandit_result.get("metrics", {}) + } + ) diff --git a/backend/toolbox/modules/analyzer/mypy_analyzer.py b/backend/toolbox/modules/analyzer/mypy_analyzer.py new file mode 100644 index 0000000..9d3e39f --- /dev/null +++ b/backend/toolbox/modules/analyzer/mypy_analyzer.py @@ -0,0 +1,269 @@ +""" +Mypy Analyzer Module - Analyzes Python code for type safety issues using Mypy +""" + +# Copyright (c) 2025 FuzzingLabs +# +# Licensed under the Business Source License 1.1 (BSL). See the LICENSE file +# at the root of this repository for details. +# +# After the Change Date (four years from publication), this version of the +# Licensed Work will be made available under the Apache License, Version 2.0. +# See the LICENSE-APACHE file or http://www.apache.org/licenses/LICENSE-2.0 +# +# Additional attribution and requirements are provided in the NOTICE file. + +import asyncio +import logging +import re +import time +from pathlib import Path +from typing import Dict, Any, List + +try: + from toolbox.modules.base import BaseModule, ModuleMetadata, ModuleResult, ModuleFinding +except ImportError: + try: + from modules.base import BaseModule, ModuleMetadata, ModuleResult, ModuleFinding + except ImportError: + from src.toolbox.modules.base import BaseModule, ModuleMetadata, ModuleResult, ModuleFinding + +logger = logging.getLogger(__name__) + + +class MypyAnalyzer(BaseModule): + """ + Analyzes Python code for type safety issues using Mypy. + + This module: + - Runs Mypy type checker on Python files + - Detects type errors and inconsistencies + - Reports findings with configurable strictness + """ + + # Map Mypy error codes to severity + ERROR_SEVERITY_MAP = { + "error": "medium", + "note": "info" + } + + def get_metadata(self) -> ModuleMetadata: + """Get module metadata""" + return ModuleMetadata( + name="mypy_analyzer", + version="1.0.0", + description="Analyzes Python code for type safety issues using Mypy", + author="FuzzForge Team", + category="analyzer", + tags=["python", "type-checking", "mypy", "sast"], + input_schema={ + "strict_mode": { + "type": "boolean", + "description": "Enable strict type checking", + "default": False + }, + "ignore_missing_imports": { + "type": "boolean", + "description": "Ignore errors about missing imports", + "default": True + }, + "follow_imports": { + "type": "string", + "enum": ["normal", "silent", "skip", "error"], + "description": "How to handle imports", + "default": "silent" + } + }, + output_schema={ + "findings": { + "type": "array", + "description": "List of type errors found by Mypy" + } + }, + requires_workspace=True + ) + + def validate_config(self, config: Dict[str, Any]) -> bool: + """Validate module configuration""" + follow_imports = config.get("follow_imports", "silent") + if follow_imports not in ["normal", "silent", "skip", "error"]: + raise ValueError("follow_imports must be one of: normal, silent, skip, error") + + return True + + async def _run_mypy( + self, + workspace: Path, + strict_mode: bool, + ignore_missing_imports: bool, + follow_imports: str + ) -> str: + """ + Run Mypy on the workspace. + + Args: + workspace: Path to workspace + strict_mode: Enable strict checking + ignore_missing_imports: Ignore missing import errors + follow_imports: How to handle imports + + Returns: + Mypy output as string + """ + try: + # Build mypy command + cmd = [ + "mypy", + str(workspace), + "--show-column-numbers", + "--no-error-summary", + f"--follow-imports={follow_imports}" + ] + + if strict_mode: + cmd.append("--strict") + + if ignore_missing_imports: + cmd.append("--ignore-missing-imports") + + logger.info(f"Running Mypy on: {workspace}") + process = await asyncio.create_subprocess_exec( + *cmd, + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE + ) + + stdout, stderr = await process.communicate() + + # Mypy returns non-zero if errors found, which is expected + output = stdout.decode() + return output + + except Exception as e: + logger.error(f"Error running Mypy: {e}") + return "" + + def _parse_mypy_output(self, output: str, workspace: Path) -> List[ModuleFinding]: + """ + Parse Mypy output and convert to findings. + + Mypy output format: + file.py:10:5: error: Incompatible return value type [return-value] + file.py:15: note: See https://... + + Args: + output: Mypy stdout + workspace: Workspace path for relative paths + + Returns: + List of ModuleFindings + """ + findings = [] + + # Regex to parse mypy output lines + # Format: filename:line:column: level: message [error-code] + pattern = r'^(.+?):(\d+)(?::(\d+))?: (error|note): (.+?)(?:\s+\[([^\]]+)\])?$' + + for line in output.splitlines(): + match = re.match(pattern, line.strip()) + if not match: + continue + + filename, line_num, column, level, message, error_code = match.groups() + + # Convert to relative path + try: + file_path = Path(filename) + rel_path = file_path.relative_to(workspace) + except (ValueError, TypeError): + rel_path = Path(filename).name + + # Skip if it's just a note (unless it's a standalone note) + if level == "note" and not error_code: + continue + + # Map severity + severity = self.ERROR_SEVERITY_MAP.get(level, "medium") + + # Create finding + title = f"Type error: {error_code or 'type-issue'}" + description = message + + finding = self.create_finding( + title=title, + description=description, + severity=severity, + category="type-error", + file_path=str(rel_path), + line_start=int(line_num), + line_end=int(line_num), + recommendation="Review and fix the type inconsistency or add appropriate type annotations", + metadata={ + "error_code": error_code or "unknown", + "column": int(column) if column else None, + "level": level + } + ) + findings.append(finding) + + return findings + + async def execute(self, config: Dict[str, Any], workspace: Path) -> ModuleResult: + """ + Execute the Mypy analyzer module. + + Args: + config: Module configuration + workspace: Path to workspace + + Returns: + ModuleResult with type checking findings + """ + start_time = time.time() + metadata = self.get_metadata() + + # Validate inputs + self.validate_config(config) + self.validate_workspace(workspace) + + # Get configuration + strict_mode = config.get("strict_mode", False) + ignore_missing_imports = config.get("ignore_missing_imports", True) + follow_imports = config.get("follow_imports", "silent") + + # Run Mypy + logger.info("Starting Mypy analysis...") + mypy_output = await self._run_mypy( + workspace, + strict_mode, + ignore_missing_imports, + follow_imports + ) + + # Parse output to findings + findings = self._parse_mypy_output(mypy_output, workspace) + + # Calculate summary + error_code_counts = {} + for finding in findings: + code = finding.metadata.get("error_code", "unknown") + error_code_counts[code] = error_code_counts.get(code, 0) + 1 + + execution_time = time.time() - start_time + + return ModuleResult( + module=metadata.name, + version=metadata.version, + status="success", + execution_time=execution_time, + findings=findings, + summary={ + "total_errors": len(findings), + "by_error_code": error_code_counts, + "files_with_errors": len(set(f.file_path for f in findings if f.file_path)) + }, + metadata={ + "strict_mode": strict_mode, + "ignore_missing_imports": ignore_missing_imports + } + ) diff --git a/backend/toolbox/modules/android/__init__.py b/backend/toolbox/modules/android/__init__.py new file mode 100644 index 0000000..ef2c74c --- /dev/null +++ b/backend/toolbox/modules/android/__init__.py @@ -0,0 +1,31 @@ +""" +Android Security Analysis Modules + +Modules for Android application security testing: +- JadxDecompiler: APK decompilation using Jadx +- MobSFScanner: Mobile security analysis using MobSF +- OpenGrepAndroid: Static analysis using OpenGrep/Semgrep with Android-specific rules +""" + +# Copyright (c) 2025 FuzzingLabs +# +# Licensed under the Business Source License 1.1 (BSL). See the LICENSE file +# at the root of this repository for details. +# +# After the Change Date (four years from publication), this version of the +# Licensed Work will be made available under the Apache License, Version 2.0. +# See the LICENSE-APACHE file or http://www.apache.org/licenses/LICENSE-2.0 +# +# Additional attribution and requirements are provided in the NOTICE file. + +from .jadx_decompiler import JadxDecompiler +from .opengrep_android import OpenGrepAndroid + +# MobSF is optional (not available on ARM64 platform) +try: + from .mobsf_scanner import MobSFScanner + __all__ = ["JadxDecompiler", "MobSFScanner", "OpenGrepAndroid"] +except ImportError: + # MobSF dependencies not available (e.g., ARM64 platform) + MobSFScanner = None + __all__ = ["JadxDecompiler", "OpenGrepAndroid"] diff --git a/backend/toolbox/modules/android/custom_rules/clipboard-sensitive-data.yaml b/backend/toolbox/modules/android/custom_rules/clipboard-sensitive-data.yaml new file mode 100644 index 0000000..df7944e --- /dev/null +++ b/backend/toolbox/modules/android/custom_rules/clipboard-sensitive-data.yaml @@ -0,0 +1,15 @@ +rules: + - id: clipboard-sensitive-data + severity: WARNING + languages: [java] + message: "Sensitive data may be copied to the clipboard." + metadata: + authors: + - Guerric ELOI (FuzzingLabs) + category: security + area: clipboard + verification-level: [L1] + paths: + include: + - "**/*.java" + pattern: "$CLIPBOARD.setPrimaryClip($CLIP)" diff --git a/backend/toolbox/modules/android/custom_rules/hardcoded-secrets.yaml b/backend/toolbox/modules/android/custom_rules/hardcoded-secrets.yaml new file mode 100644 index 0000000..c353c96 --- /dev/null +++ b/backend/toolbox/modules/android/custom_rules/hardcoded-secrets.yaml @@ -0,0 +1,23 @@ +rules: + - id: hardcoded-secrets + severity: WARNING + languages: [java] + message: "Possible hardcoded secret found in variable '$NAME'." + metadata: + authors: + - Guerric ELOI (FuzzingLabs) + owasp-mobile: M2 + category: secrets + verification-level: [L1] + paths: + include: + - "**/*.java" + patterns: + - pattern-either: + - pattern: 'String $NAME = "$VAL";' + - pattern: 'final String $NAME = "$VAL";' + - pattern: 'private String $NAME = "$VAL";' + - pattern: 'public static String $NAME = "$VAL";' + - pattern: 'static final String $NAME = "$VAL";' + - pattern-regex: "$NAME =~ /(?i).*(api|key|token|secret|pass|auth|session|bearer|access|private).*/" + diff --git a/backend/toolbox/modules/android/custom_rules/insecure-data-storage.yaml b/backend/toolbox/modules/android/custom_rules/insecure-data-storage.yaml new file mode 100644 index 0000000..c22546d --- /dev/null +++ b/backend/toolbox/modules/android/custom_rules/insecure-data-storage.yaml @@ -0,0 +1,18 @@ +rules: + - id: insecure-data-storage + severity: WARNING + languages: [java] + message: "Potential insecure data storage (external storage)." + metadata: + authors: + - Guerric ELOI (FuzzingLabs) + owasp-mobile: M2 + category: security + area: storage + verification-level: [L1] + paths: + include: + - "**/*.java" + pattern-either: + - pattern: "$CTX.openFileOutput($NAME, $MODE)" + - pattern: "Environment.getExternalStorageDirectory()" diff --git a/backend/toolbox/modules/android/custom_rules/insecure-deeplink.yaml b/backend/toolbox/modules/android/custom_rules/insecure-deeplink.yaml new file mode 100644 index 0000000..4be31ad --- /dev/null +++ b/backend/toolbox/modules/android/custom_rules/insecure-deeplink.yaml @@ -0,0 +1,16 @@ +rules: + - id: insecure-deeplink + severity: WARNING + languages: [xml] + message: "Potential insecure deeplink found in intent-filter." + metadata: + authors: + - Guerric ELOI (FuzzingLabs) + category: component + area: manifest + verification-level: [L1] + paths: + include: + - "**/AndroidManifest.xml" + pattern: | + diff --git a/backend/toolbox/modules/android/custom_rules/insecure-logging.yaml b/backend/toolbox/modules/android/custom_rules/insecure-logging.yaml new file mode 100644 index 0000000..f36f2a7 --- /dev/null +++ b/backend/toolbox/modules/android/custom_rules/insecure-logging.yaml @@ -0,0 +1,21 @@ +rules: + - id: insecure-logging + severity: WARNING + languages: [java] + message: "Sensitive data logged via Android Log API." + metadata: + authors: + - Guerric ELOI (FuzzingLabs) + owasp-mobile: M2 + category: logging + verification-level: [L1] + paths: + include: + - "**/*.java" + patterns: + - pattern-either: + - pattern: "Log.d($TAG, $MSG)" + - pattern: "Log.e($TAG, $MSG)" + - pattern: "System.out.println($MSG)" + - pattern-regex: "$MSG =~ /(?i).*(password|token|secret|api|auth|session).*/" + diff --git a/backend/toolbox/modules/android/custom_rules/intent-redirection.yaml b/backend/toolbox/modules/android/custom_rules/intent-redirection.yaml new file mode 100644 index 0000000..ade522a --- /dev/null +++ b/backend/toolbox/modules/android/custom_rules/intent-redirection.yaml @@ -0,0 +1,15 @@ +rules: + - id: intent-redirection + severity: WARNING + languages: [java] + message: "Potential intent redirection: using getIntent().getExtras() without validation." + metadata: + authors: + - Guerric ELOI (FuzzingLabs) + category: intent + area: intercomponent + verification-level: [L1] + paths: + include: + - "**/*.java" + pattern: "$ACT.getIntent().getExtras()" diff --git a/backend/toolbox/modules/android/custom_rules/sensitive_data_sharedPreferences.yaml b/backend/toolbox/modules/android/custom_rules/sensitive_data_sharedPreferences.yaml new file mode 100644 index 0000000..4f8f28f --- /dev/null +++ b/backend/toolbox/modules/android/custom_rules/sensitive_data_sharedPreferences.yaml @@ -0,0 +1,18 @@ +rules: + - id: sensitive-data-in-shared-preferences + severity: WARNING + languages: [java] + message: "Sensitive data may be stored in SharedPreferences. Please review the key '$KEY'." + metadata: + authors: + - Guerric ELOI (FuzzingLabs) + owasp-mobile: M2 + category: security + area: storage + verification-level: [L1] + paths: + include: + - "**/*.java" + patterns: + - pattern: "$EDITOR.putString($KEY, $VAL);" + - pattern-regex: "$KEY =~ /(?i).*(username|password|pass|token|auth_token|api_key|secret|sessionid|email).*/" diff --git a/backend/toolbox/modules/android/custom_rules/sqlite-injection.yaml b/backend/toolbox/modules/android/custom_rules/sqlite-injection.yaml new file mode 100644 index 0000000..5d07e22 --- /dev/null +++ b/backend/toolbox/modules/android/custom_rules/sqlite-injection.yaml @@ -0,0 +1,21 @@ +rules: + - id: sqlite-injection + severity: ERROR + languages: [java] + message: "Possible SQL injection: concatenated input in rawQuery or execSQL." + metadata: + authors: + - Guerric ELOI (FuzzingLabs) + owasp-mobile: M7 + category: injection + area: database + verification-level: [L1] + paths: + include: + - "**/*.java" + patterns: + - pattern-either: + - pattern: "$DB.rawQuery($QUERY, ...)" + - pattern: "$DB.execSQL($QUERY)" + - pattern-regex: "$QUERY =~ /.*\".*\".*\\+.*/" + diff --git a/backend/toolbox/modules/android/custom_rules/vulnerable-activity.yaml b/backend/toolbox/modules/android/custom_rules/vulnerable-activity.yaml new file mode 100644 index 0000000..0cef4fc --- /dev/null +++ b/backend/toolbox/modules/android/custom_rules/vulnerable-activity.yaml @@ -0,0 +1,16 @@ +rules: + - id: vulnerable-activity + severity: WARNING + languages: [xml] + message: "Activity exported without permission." + metadata: + authors: + - Guerric ELOI (FuzzingLabs) + category: component + area: manifest + verification-level: [L1] + paths: + include: + - "**/AndroidManifest.xml" + pattern: | + ModuleMetadata: + return ModuleMetadata( + name="jadx_decompiler", + version="1.5.0", + description="Android APK decompilation using Jadx - converts DEX bytecode to Java source", + author="FuzzForge Team", + category="android", + tags=["android", "jadx", "decompilation", "reverse", "apk"], + input_schema={ + "type": "object", + "properties": { + "apk_path": { + "type": "string", + "description": "Path to the APK to decompile (absolute or relative to workspace)", + }, + "output_dir": { + "type": "string", + "description": "Directory (relative to workspace) where Jadx output should be written", + "default": "jadx_output", + }, + "overwrite": { + "type": "boolean", + "description": "Overwrite existing output directory if present", + "default": True, + }, + "threads": { + "type": "integer", + "description": "Number of Jadx decompilation threads", + "default": 4, + "minimum": 1, + "maximum": 32, + }, + "decompiler_args": { + "type": "array", + "items": {"type": "string"}, + "description": "Additional arguments passed directly to Jadx", + "default": [], + }, + }, + "required": ["apk_path"], + }, + output_schema={ + "type": "object", + "properties": { + "output_dir": { + "type": "string", + "description": "Path to decompiled output directory", + }, + "source_dir": { + "type": "string", + "description": "Path to decompiled Java sources", + }, + "resource_dir": { + "type": "string", + "description": "Path to extracted resources", + }, + "java_files": { + "type": "integer", + "description": "Number of Java files decompiled", + }, + }, + }, + requires_workspace=True, + ) + + def validate_config(self, config: Dict[str, Any]) -> bool: + """Validate module configuration""" + apk_path = config.get("apk_path") + if not apk_path: + raise ValueError("'apk_path' must be provided for Jadx decompilation") + + threads = config.get("threads", 4) + if not isinstance(threads, int) or threads < 1 or threads > 32: + raise ValueError("threads must be between 1 and 32") + + return True + + async def execute(self, config: Dict[str, Any], workspace: Path) -> ModuleResult: + """ + Execute Jadx decompilation on an APK file. + + Args: + config: Configuration dict with apk_path, output_dir, etc. + workspace: Workspace directory path + + Returns: + ModuleResult with decompilation summary and metadata + """ + self.start_timer() + + try: + self.validate_config(config) + self.validate_workspace(workspace) + + workspace = workspace.resolve() + + # Resolve APK path + apk_path = Path(config["apk_path"]) + if not apk_path.is_absolute(): + apk_path = (workspace / apk_path).resolve() + + if not apk_path.exists(): + raise ValueError(f"APK not found: {apk_path}") + + if apk_path.is_dir(): + raise ValueError(f"APK path must be a file, not a directory: {apk_path}") + + logger.info(f"Decompiling APK: {apk_path}") + + # Resolve output directory + output_dir = Path(config.get("output_dir", "jadx_output")) + if not output_dir.is_absolute(): + output_dir = (workspace / output_dir).resolve() + + # Handle existing output directory + if output_dir.exists(): + if config.get("overwrite", True): + logger.info(f"Removing existing output directory: {output_dir}") + shutil.rmtree(output_dir) + else: + raise ValueError( + f"Output directory already exists: {output_dir}. Set overwrite=true to replace it." + ) + + output_dir.mkdir(parents=True, exist_ok=True) + + # Build Jadx command + threads = str(config.get("threads", 4)) + extra_args = config.get("decompiler_args", []) or [] + + cmd = [ + "jadx", + "--threads-count", + threads, + "--deobf", # Deobfuscate code + "--output-dir", + str(output_dir), + ] + cmd.extend(extra_args) + cmd.append(str(apk_path)) + + logger.info(f"Running Jadx: {' '.join(cmd)}") + + # Execute Jadx + process = await asyncio.create_subprocess_exec( + *cmd, + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE, + cwd=str(workspace), + ) + + stdout, stderr = await process.communicate() + stdout_str = stdout.decode(errors="ignore") if stdout else "" + stderr_str = stderr.decode(errors="ignore") if stderr else "" + + if stdout_str: + logger.debug(f"Jadx stdout: {stdout_str[:200]}...") + if stderr_str: + logger.debug(f"Jadx stderr: {stderr_str[:200]}...") + + if process.returncode != 0: + error_output = stderr_str or stdout_str or "No error output" + raise RuntimeError( + f"Jadx failed with exit code {process.returncode}: {error_output[:500]}" + ) + + # Verify output structure + source_dir = output_dir / "sources" + resource_dir = output_dir / "resources" + + if not source_dir.exists(): + logger.warning( + f"Jadx sources directory not found at expected path: {source_dir}" + ) + # Use output_dir as fallback + source_dir = output_dir + + # Count decompiled Java files + java_files = 0 + if source_dir.exists(): + java_files = sum(1 for _ in source_dir.rglob("*.java")) + logger.info(f"Decompiled {java_files} Java files") + + # Log sample files for debugging + sample_files = [] + for idx, file_path in enumerate(source_dir.rglob("*.java")): + sample_files.append(str(file_path.relative_to(workspace))) + if idx >= 4: + break + if sample_files: + logger.debug(f"Sample Java files: {sample_files}") + + # Create summary + summary = { + "output_dir": str(output_dir), + "source_dir": str(source_dir if source_dir.exists() else output_dir), + "resource_dir": str( + resource_dir if resource_dir.exists() else output_dir + ), + "java_files": java_files, + "apk_name": apk_path.name, + "apk_size_bytes": apk_path.stat().st_size, + } + + metadata = { + "apk_path": str(apk_path), + "output_dir": str(output_dir), + "source_dir": summary["source_dir"], + "resource_dir": summary["resource_dir"], + "threads": threads, + "decompiler": "jadx", + "decompiler_version": "1.5.0", + } + + logger.info( + f"โœ“ Jadx decompilation completed: {java_files} Java files generated" + ) + + return self.create_result( + findings=[], # Jadx doesn't generate findings, only decompiles + status="success", + summary=summary, + metadata=metadata, + ) + + except Exception as exc: + logger.error(f"Jadx decompilation failed: {exc}", exc_info=True) + return self.create_result( + findings=[], + status="failed", + error=str(exc), + metadata={"decompiler": "jadx", "apk_path": config.get("apk_path")}, + ) diff --git a/backend/toolbox/modules/android/mobsf_scanner.py b/backend/toolbox/modules/android/mobsf_scanner.py new file mode 100644 index 0000000..3b16e1b --- /dev/null +++ b/backend/toolbox/modules/android/mobsf_scanner.py @@ -0,0 +1,437 @@ +""" +MobSF Scanner Module + +Mobile Security Framework (MobSF) integration for comprehensive Android app security analysis. +Performs static analysis on APK files including permissions, manifest analysis, code analysis, and behavior checks. +""" + +# Copyright (c) 2025 FuzzingLabs +# +# Licensed under the Business Source License 1.1 (BSL). See the LICENSE file +# at the root of this repository for details. +# +# After the Change Date (four years from publication), this version of the +# Licensed Work will be made available under the Apache License, Version 2.0. +# See the LICENSE-APACHE file or http://www.apache.org/licenses/LICENSE-2.0 +# +# Additional attribution and requirements are provided in the NOTICE file. + +import logging +import os +from collections import Counter +from pathlib import Path +from typing import Dict, Any, List +import aiohttp + +try: + from toolbox.modules.base import BaseModule, ModuleMetadata, ModuleFinding, ModuleResult +except ImportError: + try: + from modules.base import BaseModule, ModuleMetadata, ModuleFinding, ModuleResult + except ImportError: + from src.toolbox.modules.base import BaseModule, ModuleMetadata, ModuleFinding, ModuleResult + +logger = logging.getLogger(__name__) + + +class MobSFScanner(BaseModule): + """Mobile Security Framework (MobSF) scanner module for Android applications""" + + SEVERITY_MAP = { + "dangerous": "critical", + "high": "high", + "warning": "medium", + "medium": "medium", + "low": "low", + "info": "low", + "secure": "low", + } + + def get_metadata(self) -> ModuleMetadata: + return ModuleMetadata( + name="mobsf_scanner", + version="3.9.7", + description="Comprehensive Android security analysis using Mobile Security Framework (MobSF)", + author="FuzzForge Team", + category="android", + tags=["mobile", "android", "mobsf", "sast", "scanner", "security"], + input_schema={ + "type": "object", + "properties": { + "mobsf_url": { + "type": "string", + "description": "MobSF server URL", + "default": "http://localhost:8877", + }, + "file_path": { + "type": "string", + "description": "Path to the APK file to scan (absolute or relative to workspace)", + }, + "api_key": { + "type": "string", + "description": "MobSF API key (if not provided, will try MOBSF_API_KEY env var)", + "default": None, + }, + "rescan": { + "type": "boolean", + "description": "Force rescan even if file was previously analyzed", + "default": False, + }, + }, + "required": ["file_path"], + }, + output_schema={ + "type": "object", + "properties": { + "findings": { + "type": "array", + "description": "Security findings from MobSF analysis" + }, + "scan_hash": {"type": "string"}, + "total_findings": {"type": "integer"}, + "severity_counts": {"type": "object"}, + } + }, + requires_workspace=True, + ) + + def validate_config(self, config: Dict[str, Any]) -> bool: + """Validate module configuration""" + if "mobsf_url" in config and not isinstance(config["mobsf_url"], str): + raise ValueError("mobsf_url must be a string") + + file_path = config.get("file_path") + if not file_path: + raise ValueError("file_path is required for MobSF scanning") + + return True + + async def execute(self, config: Dict[str, Any], workspace: Path) -> ModuleResult: + """ + Execute MobSF security analysis on an APK file. + + Args: + config: Configuration dict with file_path, mobsf_url, api_key + workspace: Workspace directory path + + Returns: + ModuleResult with security findings from MobSF + """ + self.start_timer() + + try: + self.validate_config(config) + self.validate_workspace(workspace) + + # Get configuration + mobsf_url = config.get("mobsf_url", "http://localhost:8877") + file_path_str = config["file_path"] + rescan = config.get("rescan", False) + + # Get API key from config or environment + api_key = config.get("api_key") or os.environ.get("MOBSF_API_KEY", "") + if not api_key: + logger.warning("No MobSF API key provided. Some functionality may be limited.") + + # Resolve APK file path + file_path = Path(file_path_str) + if not file_path.is_absolute(): + file_path = (workspace / file_path).resolve() + + if not file_path.exists(): + raise FileNotFoundError(f"APK file not found: {file_path}") + + if not file_path.is_file(): + raise ValueError(f"APK path must be a file: {file_path}") + + logger.info(f"Starting MobSF scan of APK: {file_path}") + + # Upload and scan APK + scan_hash = await self._upload_file(mobsf_url, file_path, api_key) + logger.info(f"APK uploaded to MobSF with hash: {scan_hash}") + + # Start scan + await self._start_scan(mobsf_url, scan_hash, api_key, rescan=rescan) + logger.info(f"MobSF scan completed for hash: {scan_hash}") + + # Get JSON results + scan_results = await self._get_json_results(mobsf_url, scan_hash, api_key) + + # Parse results into findings + findings = self._parse_scan_results(scan_results, file_path) + + # Create summary + summary = self._create_summary(findings, scan_hash) + + logger.info(f"โœ“ MobSF scan completed: {len(findings)} findings") + + return self.create_result( + findings=findings, + status="success", + summary=summary, + metadata={ + "tool": "mobsf", + "tool_version": "3.9.7", + "scan_hash": scan_hash, + "apk_file": str(file_path), + "mobsf_url": mobsf_url, + } + ) + + except Exception as exc: + logger.error(f"MobSF scanner failed: {exc}", exc_info=True) + return self.create_result( + findings=[], + status="failed", + error=str(exc), + metadata={"tool": "mobsf", "file_path": config.get("file_path")} + ) + + async def _upload_file(self, mobsf_url: str, file_path: Path, api_key: str) -> str: + """ + Upload APK file to MobSF server. + + Returns: + Scan hash for the uploaded file + """ + headers = {'X-Mobsf-Api-Key': api_key} if api_key else {} + + # Create multipart form data + filename = file_path.name + + async with aiohttp.ClientSession() as session: + with open(file_path, 'rb') as f: + data = aiohttp.FormData() + data.add_field('file', + f, + filename=filename, + content_type='application/vnd.android.package-archive') + + async with session.post( + f"{mobsf_url}/api/v1/upload", + headers=headers, + data=data, + timeout=aiohttp.ClientTimeout(total=300) + ) as response: + if response.status != 200: + error_text = await response.text() + raise Exception(f"Failed to upload file to MobSF: {error_text}") + + result = await response.json() + scan_hash = result.get('hash') + if not scan_hash: + raise Exception(f"MobSF upload failed: {result}") + + return scan_hash + + async def _start_scan(self, mobsf_url: str, scan_hash: str, api_key: str, rescan: bool = False) -> Dict[str, Any]: + """ + Start MobSF scan for uploaded file. + + Returns: + Scan result dictionary + """ + headers = {'X-Mobsf-Api-Key': api_key} if api_key else {} + data = { + 'hash': scan_hash, + 're_scan': '1' if rescan else '0' + } + + async with aiohttp.ClientSession() as session: + async with session.post( + f"{mobsf_url}/api/v1/scan", + headers=headers, + data=data, + timeout=aiohttp.ClientTimeout(total=600) # 10 minutes for scan + ) as response: + if response.status != 200: + error_text = await response.text() + raise Exception(f"MobSF scan failed: {error_text}") + + result = await response.json() + return result + + async def _get_json_results(self, mobsf_url: str, scan_hash: str, api_key: str) -> Dict[str, Any]: + """ + Retrieve JSON scan results from MobSF. + + Returns: + Scan results dictionary + """ + headers = {'X-Mobsf-Api-Key': api_key} if api_key else {} + data = {'hash': scan_hash} + + async with aiohttp.ClientSession() as session: + async with session.post( + f"{mobsf_url}/api/v1/report_json", + headers=headers, + data=data, + timeout=aiohttp.ClientTimeout(total=60) + ) as response: + if response.status != 200: + error_text = await response.text() + raise Exception(f"Failed to retrieve MobSF results: {error_text}") + + return await response.json() + + def _parse_scan_results(self, scan_data: Dict[str, Any], apk_path: Path) -> List[ModuleFinding]: + """Parse MobSF JSON results into standardized findings""" + findings = [] + + # Parse permissions + if 'permissions' in scan_data: + for perm_name, perm_attrs in scan_data['permissions'].items(): + if isinstance(perm_attrs, dict): + severity = self.SEVERITY_MAP.get( + perm_attrs.get('status', '').lower(), 'low' + ) + + finding = self.create_finding( + title=f"Android Permission: {perm_name}", + description=perm_attrs.get('description', 'No description'), + severity=severity, + category="android-permission", + metadata={ + 'permission': perm_name, + 'status': perm_attrs.get('status'), + 'info': perm_attrs.get('info'), + 'tool': 'mobsf', + } + ) + findings.append(finding) + + # Parse manifest analysis + if 'manifest_analysis' in scan_data: + manifest_findings = scan_data['manifest_analysis'].get('manifest_findings', []) + for item in manifest_findings: + if isinstance(item, dict): + severity = self.SEVERITY_MAP.get(item.get('severity', '').lower(), 'medium') + + finding = self.create_finding( + title=item.get('title') or item.get('name') or "Manifest Issue", + description=item.get('description', 'No description'), + severity=severity, + category="android-manifest", + metadata={ + 'rule': item.get('rule'), + 'tool': 'mobsf', + } + ) + findings.append(finding) + + # Parse code analysis + if 'code_analysis' in scan_data: + code_findings = scan_data['code_analysis'].get('findings', {}) + for finding_name, finding_data in code_findings.items(): + if isinstance(finding_data, dict): + metadata_dict = finding_data.get('metadata', {}) + severity = self.SEVERITY_MAP.get( + metadata_dict.get('severity', '').lower(), 'medium' + ) + + # MobSF returns 'files' as a dict: {filename: line_numbers} + files_dict = finding_data.get('files', {}) + + # Create a finding for each affected file + if isinstance(files_dict, dict) and files_dict: + for file_path, line_numbers in files_dict.items(): + finding = self.create_finding( + title=finding_name, + description=metadata_dict.get('description', 'No description'), + severity=severity, + category="android-code-analysis", + file_path=file_path, + line_number=line_numbers, # Can be string like "28" or "65,81" + metadata={ + 'cwe': metadata_dict.get('cwe'), + 'owasp': metadata_dict.get('owasp'), + 'masvs': metadata_dict.get('masvs'), + 'cvss': metadata_dict.get('cvss'), + 'ref': metadata_dict.get('ref'), + 'line_numbers': line_numbers, + 'tool': 'mobsf', + } + ) + findings.append(finding) + else: + # Fallback: create one finding without file info + finding = self.create_finding( + title=finding_name, + description=metadata_dict.get('description', 'No description'), + severity=severity, + category="android-code-analysis", + metadata={ + 'cwe': metadata_dict.get('cwe'), + 'owasp': metadata_dict.get('owasp'), + 'masvs': metadata_dict.get('masvs'), + 'cvss': metadata_dict.get('cvss'), + 'ref': metadata_dict.get('ref'), + 'tool': 'mobsf', + } + ) + findings.append(finding) + + # Parse behavior analysis + if 'behaviour' in scan_data: + for key, value in scan_data['behaviour'].items(): + if isinstance(value, dict): + metadata_dict = value.get('metadata', {}) + labels = metadata_dict.get('label', []) + label = labels[0] if labels else 'Unknown Behavior' + + severity = self.SEVERITY_MAP.get( + metadata_dict.get('severity', '').lower(), 'medium' + ) + + # MobSF returns 'files' as a dict: {filename: line_numbers} + files_dict = value.get('files', {}) + + # Create a finding for each affected file + if isinstance(files_dict, dict) and files_dict: + for file_path, line_numbers in files_dict.items(): + finding = self.create_finding( + title=f"Behavior: {label}", + description=metadata_dict.get('description', 'No description'), + severity=severity, + category="android-behavior", + file_path=file_path, + line_number=line_numbers, + metadata={ + 'line_numbers': line_numbers, + 'behavior_key': key, + 'tool': 'mobsf', + } + ) + findings.append(finding) + else: + # Fallback: create one finding without file info + finding = self.create_finding( + title=f"Behavior: {label}", + description=metadata_dict.get('description', 'No description'), + severity=severity, + category="android-behavior", + metadata={ + 'behavior_key': key, + 'tool': 'mobsf', + } + ) + findings.append(finding) + + logger.debug(f"Parsed {len(findings)} findings from MobSF results") + return findings + + def _create_summary(self, findings: List[ModuleFinding], scan_hash: str) -> Dict[str, Any]: + """Create analysis summary""" + severity_counter = Counter() + category_counter = Counter() + + for finding in findings: + severity_counter[finding.severity] += 1 + category_counter[finding.category] += 1 + + return { + "scan_hash": scan_hash, + "total_findings": len(findings), + "severity_counts": dict(severity_counter), + "category_counts": dict(category_counter), + } diff --git a/backend/toolbox/modules/android/opengrep_android.py b/backend/toolbox/modules/android/opengrep_android.py new file mode 100644 index 0000000..01e32c4 --- /dev/null +++ b/backend/toolbox/modules/android/opengrep_android.py @@ -0,0 +1,440 @@ +""" +OpenGrep Android Static Analysis Module + +Pattern-based static analysis for Android applications using OpenGrep/Semgrep +with Android-specific security rules. +""" + +# Copyright (c) 2025 FuzzingLabs +# +# Licensed under the Business Source License 1.1 (BSL). See the LICENSE file +# at the root of this repository for details. +# +# After the Change Date (four years from publication), this version of the +# Licensed Work will be made available under the Apache License, Version 2.0. +# See the LICENSE-APACHE file or http://www.apache.org/licenses/LICENSE-2.0 +# +# Additional attribution and requirements are provided in the NOTICE file. + +import asyncio +import json +import logging +from pathlib import Path +from typing import Dict, Any, List + +try: + from toolbox.modules.base import BaseModule, ModuleMetadata, ModuleFinding, ModuleResult +except ImportError: + try: + from modules.base import BaseModule, ModuleMetadata, ModuleFinding, ModuleResult + except ImportError: + from src.toolbox.modules.base import BaseModule, ModuleMetadata, ModuleFinding, ModuleResult + +logger = logging.getLogger(__name__) + + +class OpenGrepAndroid(BaseModule): + """OpenGrep static analysis module specialized for Android security""" + + def get_metadata(self) -> ModuleMetadata: + """Get module metadata""" + return ModuleMetadata( + name="opengrep_android", + version="1.45.0", + description="Android-focused static analysis using OpenGrep/Semgrep with custom security rules for Java/Kotlin", + author="FuzzForge Team", + category="android", + tags=["sast", "android", "opengrep", "semgrep", "java", "kotlin", "security"], + input_schema={ + "type": "object", + "properties": { + "config": { + "type": "string", + "enum": ["auto", "p/security-audit", "p/owasp-top-ten", "p/cwe-top-25"], + "default": "auto", + "description": "Rule configuration to use" + }, + "custom_rules_path": { + "type": "string", + "description": "Path to a directory containing custom OpenGrep rules (Android-specific rules recommended)", + "default": None, + }, + "languages": { + "type": "array", + "items": {"type": "string"}, + "description": "Specific languages to analyze (defaults to java, kotlin for Android)", + "default": ["java", "kotlin"], + }, + "include_patterns": { + "type": "array", + "items": {"type": "string"}, + "description": "File patterns to include", + "default": [], + }, + "exclude_patterns": { + "type": "array", + "items": {"type": "string"}, + "description": "File patterns to exclude", + "default": [], + }, + "max_target_bytes": { + "type": "integer", + "default": 1000000, + "description": "Maximum file size to analyze (bytes)" + }, + "timeout": { + "type": "integer", + "default": 300, + "description": "Analysis timeout in seconds" + }, + "severity": { + "type": "array", + "items": {"type": "string", "enum": ["ERROR", "WARNING", "INFO"]}, + "default": ["ERROR", "WARNING", "INFO"], + "description": "Minimum severity levels to report" + }, + "confidence": { + "type": "array", + "items": {"type": "string", "enum": ["HIGH", "MEDIUM", "LOW"]}, + "default": ["HIGH", "MEDIUM", "LOW"], + "description": "Minimum confidence levels to report" + } + } + }, + output_schema={ + "type": "object", + "properties": { + "findings": { + "type": "array", + "description": "Security findings from OpenGrep analysis" + }, + "total_findings": {"type": "integer"}, + "severity_counts": {"type": "object"}, + "files_analyzed": {"type": "integer"}, + } + }, + requires_workspace=True, + ) + + def validate_config(self, config: Dict[str, Any]) -> bool: + """Validate configuration""" + timeout = config.get("timeout", 300) + if not isinstance(timeout, int) or timeout < 30 or timeout > 3600: + raise ValueError("Timeout must be between 30 and 3600 seconds") + + max_bytes = config.get("max_target_bytes", 1000000) + if not isinstance(max_bytes, int) or max_bytes < 1000 or max_bytes > 10000000: + raise ValueError("max_target_bytes must be between 1000 and 10000000") + + custom_rules_path = config.get("custom_rules_path") + if custom_rules_path: + rules_path = Path(custom_rules_path) + if not rules_path.exists(): + logger.warning(f"Custom rules path does not exist: {custom_rules_path}") + + return True + + async def execute(self, config: Dict[str, Any], workspace: Path) -> ModuleResult: + """Execute OpenGrep static analysis on Android code""" + self.start_timer() + + try: + # Validate inputs + self.validate_config(config) + self.validate_workspace(workspace) + + logger.info(f"Running OpenGrep Android analysis on {workspace}") + + # Build opengrep command + cmd = ["opengrep", "scan", "--json"] + + # Add configuration + custom_rules_path = config.get("custom_rules_path") + use_custom_rules = False + if custom_rules_path and Path(custom_rules_path).exists(): + cmd.extend(["--config", custom_rules_path]) + use_custom_rules = True + logger.info(f"Using custom Android rules from: {custom_rules_path}") + else: + config_type = config.get("config", "auto") + if config_type == "auto": + cmd.extend(["--config", "auto"]) + else: + cmd.extend(["--config", config_type]) + + # Add timeout + cmd.extend(["--timeout", str(config.get("timeout", 300))]) + + # Add max target bytes + cmd.extend(["--max-target-bytes", str(config.get("max_target_bytes", 1000000))]) + + # Add languages if specified (but NOT when using custom rules) + languages = config.get("languages", ["java", "kotlin"]) + if languages and not use_custom_rules: + langs = ",".join(languages) + cmd.extend(["--lang", langs]) + logger.debug(f"Analyzing languages: {langs}") + + # Add include patterns + include_patterns = config.get("include_patterns", []) + for pattern in include_patterns: + cmd.extend(["--include", pattern]) + + # Add exclude patterns + exclude_patterns = config.get("exclude_patterns", []) + for pattern in exclude_patterns: + cmd.extend(["--exclude", pattern]) + + # Add severity filter if single level requested + severity_levels = config.get("severity", ["ERROR", "WARNING", "INFO"]) + if severity_levels and len(severity_levels) == 1: + cmd.extend(["--severity", severity_levels[0]]) + + # Disable metrics collection + cmd.append("--disable-version-check") + cmd.append("--no-git-ignore") + + # Add target directory + cmd.append(str(workspace)) + + logger.debug(f"Running command: {' '.join(cmd)}") + + # Run OpenGrep + process = await asyncio.create_subprocess_exec( + *cmd, + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE, + cwd=workspace + ) + + stdout, stderr = await process.communicate() + + # Parse results + findings = [] + if process.returncode in [0, 1]: # 0 = no findings, 1 = findings found + findings = self._parse_opengrep_output(stdout.decode(), workspace, config) + logger.info(f"OpenGrep found {len(findings)} potential security issues") + else: + error_msg = stderr.decode() + logger.error(f"OpenGrep failed: {error_msg}") + return self.create_result( + findings=[], + status="failed", + error=f"OpenGrep execution failed (exit code {process.returncode}): {error_msg[:500]}" + ) + + # Create summary + summary = self._create_summary(findings) + + return self.create_result( + findings=findings, + status="success", + summary=summary, + metadata={ + "tool": "opengrep", + "tool_version": "1.45.0", + "languages": languages, + "custom_rules": bool(custom_rules_path), + } + ) + + except Exception as e: + logger.error(f"OpenGrep Android module failed: {e}", exc_info=True) + return self.create_result( + findings=[], + status="failed", + error=str(e) + ) + + def _parse_opengrep_output(self, output: str, workspace: Path, config: Dict[str, Any]) -> List[ModuleFinding]: + """Parse OpenGrep JSON output into findings""" + findings = [] + + if not output.strip(): + return findings + + try: + data = json.loads(output) + results = data.get("results", []) + logger.debug(f"OpenGrep returned {len(results)} raw results") + + # Get filtering criteria + allowed_severities = set(config.get("severity", ["ERROR", "WARNING", "INFO"])) + allowed_confidences = set(config.get("confidence", ["HIGH", "MEDIUM", "LOW"])) + + for result in results: + # Extract basic info + rule_id = result.get("check_id", "unknown") + message = result.get("message", "") + extra = result.get("extra", {}) + severity = extra.get("severity", "INFO").upper() + + # File location info + path_info = result.get("path", "") + start_line = result.get("start", {}).get("line", 0) + end_line = result.get("end", {}).get("line", 0) + + # Code snippet + lines = extra.get("lines", "") + + # Metadata + rule_metadata = extra.get("metadata", {}) + cwe = rule_metadata.get("cwe", []) + owasp = rule_metadata.get("owasp", []) + confidence = extra.get("confidence", rule_metadata.get("confidence", "MEDIUM")).upper() + + # Apply severity filter + if severity not in allowed_severities: + continue + + # Apply confidence filter + if confidence not in allowed_confidences: + continue + + # Make file path relative to workspace + if path_info: + try: + rel_path = Path(path_info).relative_to(workspace) + path_info = str(rel_path) + except ValueError: + pass + + # Map severity to our standard levels + finding_severity = self._map_severity(severity) + + # Create finding + finding = self.create_finding( + title=f"Android Security: {rule_id}", + description=message or f"OpenGrep rule {rule_id} triggered", + severity=finding_severity, + category=self._get_category(rule_id, extra), + file_path=path_info if path_info else None, + line_start=start_line if start_line > 0 else None, + line_end=end_line if end_line > 0 and end_line != start_line else None, + code_snippet=lines.strip() if lines else None, + recommendation=self._get_recommendation(rule_id, extra), + metadata={ + "rule_id": rule_id, + "opengrep_severity": severity, + "confidence": confidence, + "cwe": cwe, + "owasp": owasp, + "fix": extra.get("fix", ""), + "impact": extra.get("impact", ""), + "likelihood": extra.get("likelihood", ""), + "references": extra.get("references", []), + "tool": "opengrep", + } + ) + + findings.append(finding) + + except json.JSONDecodeError as e: + logger.warning(f"Failed to parse OpenGrep output: {e}. Output snippet: {output[:200]}...") + except Exception as e: + logger.warning(f"Error processing OpenGrep results: {e}", exc_info=True) + + return findings + + def _map_severity(self, opengrep_severity: str) -> str: + """Map OpenGrep severity to our standard severity levels""" + severity_map = { + "ERROR": "high", + "WARNING": "medium", + "INFO": "low" + } + return severity_map.get(opengrep_severity.upper(), "medium") + + def _get_category(self, rule_id: str, extra: Dict[str, Any]) -> str: + """Determine finding category based on rule and metadata""" + rule_metadata = extra.get("metadata", {}) + cwe_list = rule_metadata.get("cwe", []) + owasp_list = rule_metadata.get("owasp", []) + + rule_lower = rule_id.lower() + + # Android-specific categories + if "injection" in rule_lower or "sql" in rule_lower: + return "injection" + elif "intent" in rule_lower: + return "android-intent" + elif "webview" in rule_lower: + return "android-webview" + elif "deeplink" in rule_lower: + return "android-deeplink" + elif "storage" in rule_lower or "sharedpreferences" in rule_lower: + return "android-storage" + elif "logging" in rule_lower or "log" in rule_lower: + return "android-logging" + elif "clipboard" in rule_lower: + return "android-clipboard" + elif "activity" in rule_lower or "service" in rule_lower or "provider" in rule_lower: + return "android-component" + elif "crypto" in rule_lower or "encrypt" in rule_lower: + return "cryptography" + elif "hardcode" in rule_lower or "secret" in rule_lower: + return "secrets" + elif "auth" in rule_lower: + return "authentication" + elif cwe_list: + return f"cwe-{cwe_list[0]}" + elif owasp_list: + return f"owasp-{owasp_list[0].replace(' ', '-').lower()}" + else: + return "android-security" + + def _get_recommendation(self, rule_id: str, extra: Dict[str, Any]) -> str: + """Generate recommendation based on rule and metadata""" + fix_suggestion = extra.get("fix", "") + if fix_suggestion: + return fix_suggestion + + rule_lower = rule_id.lower() + + # Android-specific recommendations + if "injection" in rule_lower or "sql" in rule_lower: + return "Use parameterized queries or Room database with type-safe queries to prevent SQL injection." + elif "intent" in rule_lower: + return "Validate all incoming Intent data and use explicit Intents when possible to prevent Intent manipulation attacks." + elif "webview" in rule_lower and "javascript" in rule_lower: + return "Disable JavaScript in WebView if not needed, or implement proper JavaScript interfaces with @JavascriptInterface annotation." + elif "deeplink" in rule_lower: + return "Validate all deeplink URLs and sanitize user input to prevent deeplink hijacking attacks." + elif "storage" in rule_lower or "sharedpreferences" in rule_lower: + return "Encrypt sensitive data before storing in SharedPreferences or use EncryptedSharedPreferences for Android API 23+." + elif "logging" in rule_lower: + return "Remove sensitive data from logs in production builds. Use ProGuard/R8 to strip logging statements." + elif "clipboard" in rule_lower: + return "Avoid placing sensitive data on the clipboard. If necessary, clear clipboard data when no longer needed." + elif "crypto" in rule_lower: + return "Use modern cryptographic algorithms (AES-GCM, RSA-OAEP) and Android Keystore for key management." + elif "hardcode" in rule_lower or "secret" in rule_lower: + return "Remove hardcoded secrets. Use Android Keystore, environment variables, or secure configuration management." + else: + return "Review this Android security issue and apply appropriate fixes based on Android security best practices." + + def _create_summary(self, findings: List[ModuleFinding]) -> Dict[str, Any]: + """Create analysis summary""" + severity_counts = {"critical": 0, "high": 0, "medium": 0, "low": 0} + category_counts = {} + rule_counts = {} + + for finding in findings: + # Count by severity + severity_counts[finding.severity] += 1 + + # Count by category + category = finding.category + category_counts[category] = category_counts.get(category, 0) + 1 + + # Count by rule + rule_id = finding.metadata.get("rule_id", "unknown") + rule_counts[rule_id] = rule_counts.get(rule_id, 0) + 1 + + return { + "total_findings": len(findings), + "severity_counts": severity_counts, + "category_counts": category_counts, + "top_rules": dict(sorted(rule_counts.items(), key=lambda x: x[1], reverse=True)[:10]), + "files_analyzed": len(set(f.file_path for f in findings if f.file_path)) + } diff --git a/backend/toolbox/modules/scanner/__init__.py b/backend/toolbox/modules/scanner/__init__.py index ae02119..3efefe6 100644 --- a/backend/toolbox/modules/scanner/__init__.py +++ b/backend/toolbox/modules/scanner/__init__.py @@ -10,5 +10,6 @@ # Additional attribution and requirements are provided in the NOTICE file. from .file_scanner import FileScanner +from .dependency_scanner import DependencyScanner -__all__ = ["FileScanner"] \ No newline at end of file +__all__ = ["FileScanner", "DependencyScanner"] \ No newline at end of file diff --git a/backend/toolbox/modules/scanner/dependency_scanner.py b/backend/toolbox/modules/scanner/dependency_scanner.py new file mode 100644 index 0000000..4c7791c --- /dev/null +++ b/backend/toolbox/modules/scanner/dependency_scanner.py @@ -0,0 +1,302 @@ +""" +Dependency Scanner Module - Scans Python dependencies for known vulnerabilities using pip-audit +""" + +# Copyright (c) 2025 FuzzingLabs +# +# Licensed under the Business Source License 1.1 (BSL). See the LICENSE file +# at the root of this repository for details. +# +# After the Change Date (four years from publication), this version of the +# Licensed Work will be made available under the Apache License, Version 2.0. +# See the LICENSE-APACHE file or http://www.apache.org/licenses/LICENSE-2.0 +# +# Additional attribution and requirements are provided in the NOTICE file. + +import asyncio +import json +import logging +import time +from pathlib import Path +from typing import Dict, Any, List + +try: + from toolbox.modules.base import BaseModule, ModuleMetadata, ModuleResult, ModuleFinding +except ImportError: + try: + from modules.base import BaseModule, ModuleMetadata, ModuleResult, ModuleFinding + except ImportError: + from src.toolbox.modules.base import BaseModule, ModuleMetadata, ModuleResult, ModuleFinding + +logger = logging.getLogger(__name__) + + +class DependencyScanner(BaseModule): + """ + Scans Python dependencies for known vulnerabilities using pip-audit. + + This module: + - Discovers dependency files (requirements.txt, pyproject.toml, setup.py, Pipfile) + - Runs pip-audit to check for vulnerable dependencies + - Reports CVEs with severity and affected versions + """ + + def get_metadata(self) -> ModuleMetadata: + """Get module metadata""" + return ModuleMetadata( + name="dependency_scanner", + version="1.0.0", + description="Scans Python dependencies for known vulnerabilities", + author="FuzzForge Team", + category="scanner", + tags=["dependencies", "cve", "vulnerabilities", "pip-audit"], + input_schema={ + "dependency_files": { + "type": "array", + "items": {"type": "string"}, + "description": "List of dependency files to scan (auto-discovered if empty)", + "default": [] + }, + "ignore_vulns": { + "type": "array", + "items": {"type": "string"}, + "description": "List of vulnerability IDs to ignore", + "default": [] + } + }, + output_schema={ + "findings": { + "type": "array", + "description": "List of vulnerable dependencies with CVE information" + } + }, + requires_workspace=True + ) + + def validate_config(self, config: Dict[str, Any]) -> bool: + """Validate module configuration""" + dep_files = config.get("dependency_files", []) + if not isinstance(dep_files, list): + raise ValueError("dependency_files must be a list") + + ignore_vulns = config.get("ignore_vulns", []) + if not isinstance(ignore_vulns, list): + raise ValueError("ignore_vulns must be a list") + + return True + + def _discover_dependency_files(self, workspace: Path) -> List[Path]: + """ + Discover Python dependency files in workspace. + + Returns: + List of discovered dependency file paths + """ + dependency_patterns = [ + "requirements.txt", + "*requirements*.txt", + "pyproject.toml", + "setup.py", + "Pipfile", + "poetry.lock" + ] + + found_files = [] + for pattern in dependency_patterns: + found_files.extend(workspace.rglob(pattern)) + + # Deduplicate and return + unique_files = list(set(found_files)) + logger.info(f"Discovered {len(unique_files)} dependency files") + return unique_files + + async def _run_pip_audit(self, file_path: Path) -> Dict[str, Any]: + """ + Run pip-audit on a specific dependency file. + + Args: + file_path: Path to dependency file + + Returns: + pip-audit JSON output as dict + """ + try: + # Run pip-audit with JSON output + cmd = [ + "pip-audit", + "--requirement", str(file_path), + "--format", "json", + "--progress-spinner", "off" + ] + + logger.info(f"Running pip-audit on: {file_path.name}") + process = await asyncio.create_subprocess_exec( + *cmd, + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE + ) + + stdout, stderr = await process.communicate() + + # pip-audit returns 0 if no vulns, 1 if vulns found + if process.returncode not in [0, 1]: + logger.error(f"pip-audit failed: {stderr.decode()}") + return {"dependencies": []} + + # Parse JSON output + result = json.loads(stdout.decode()) + return result + + except Exception as e: + logger.error(f"Error running pip-audit on {file_path}: {e}") + return {"dependencies": []} + + def _convert_to_findings( + self, + audit_result: Dict[str, Any], + file_path: Path, + workspace: Path, + ignore_vulns: List[str] + ) -> List[ModuleFinding]: + """ + Convert pip-audit results to ModuleFindings. + + Args: + audit_result: pip-audit JSON output + file_path: Path to scanned file + workspace: Workspace path for relative path calculation + ignore_vulns: List of vulnerability IDs to ignore + + Returns: + List of ModuleFindings + """ + findings = [] + + # pip-audit format: {"dependencies": [{package, version, vulns: []}]} + for dep in audit_result.get("dependencies", []): + package_name = dep.get("name", "unknown") + package_version = dep.get("version", "unknown") + vulnerabilities = dep.get("vulns", []) + + for vuln in vulnerabilities: + vuln_id = vuln.get("id", "UNKNOWN") + + # Skip if in ignore list + if vuln_id in ignore_vulns: + logger.debug(f"Ignoring vulnerability: {vuln_id}") + continue + + description = vuln.get("description", "No description available") + fix_versions = vuln.get("fix_versions", []) + + # Map CVSS scores to severity + # pip-audit doesn't always provide CVSS, so we default to medium + severity = "medium" + + # Try to get relative path + try: + rel_path = file_path.relative_to(workspace) + except ValueError: + rel_path = file_path + + recommendation = f"Upgrade {package_name} to a fixed version: {', '.join(fix_versions)}" if fix_versions else f"Check for updates to {package_name}" + + finding = self.create_finding( + title=f"Vulnerable dependency: {package_name} ({vuln_id})", + description=f"{description}\n\nAffected package: {package_name} {package_version}", + severity=severity, + category="vulnerable-dependency", + file_path=str(rel_path), + recommendation=recommendation, + metadata={ + "cve_id": vuln_id, + "package": package_name, + "installed_version": package_version, + "fix_versions": fix_versions, + "aliases": vuln.get("aliases", []), + "link": vuln.get("link", "") + } + ) + findings.append(finding) + + return findings + + async def execute(self, config: Dict[str, Any], workspace: Path) -> ModuleResult: + """ + Execute the dependency scanning module. + + Args: + config: Module configuration + workspace: Path to workspace + + Returns: + ModuleResult with vulnerability findings + """ + start_time = time.time() + metadata = self.get_metadata() + + # Validate inputs + self.validate_config(config) + self.validate_workspace(workspace) + + # Get configuration + specified_files = config.get("dependency_files", []) + ignore_vulns = config.get("ignore_vulns", []) + + # Discover or use specified dependency files + if specified_files: + dep_files = [workspace / f for f in specified_files] + else: + dep_files = self._discover_dependency_files(workspace) + + if not dep_files: + logger.warning("No dependency files found in workspace") + return ModuleResult( + module=metadata.name, + version=metadata.version, + status="success", + execution_time=time.time() - start_time, + findings=[], + summary={ + "total_files": 0, + "total_vulnerabilities": 0, + "vulnerable_packages": 0 + } + ) + + # Scan each dependency file + all_findings = [] + files_scanned = 0 + + for dep_file in dep_files: + if not dep_file.exists(): + logger.warning(f"Dependency file not found: {dep_file}") + continue + + logger.info(f"Scanning dependencies in: {dep_file.name}") + audit_result = await self._run_pip_audit(dep_file) + findings = self._convert_to_findings(audit_result, dep_file, workspace, ignore_vulns) + + all_findings.extend(findings) + files_scanned += 1 + + # Calculate summary + unique_packages = len(set(f.metadata.get("package") for f in all_findings)) + + execution_time = time.time() - start_time + + return ModuleResult( + module=metadata.name, + version=metadata.version, + status="success", + execution_time=execution_time, + findings=all_findings, + summary={ + "total_files": files_scanned, + "total_vulnerabilities": len(all_findings), + "vulnerable_packages": unique_packages + }, + metadata={ + "scanned_files": [str(f.name) for f in dep_files if f.exists()] + } + ) diff --git a/backend/toolbox/modules/secret_detection/llm_secret_detector.py b/backend/toolbox/modules/secret_detection/llm_secret_detector.py index 3ba96f8..1adf341 100644 --- a/backend/toolbox/modules/secret_detection/llm_secret_detector.py +++ b/backend/toolbox/modules/secret_detection/llm_secret_detector.py @@ -107,7 +107,8 @@ class LLMSecretDetectorModule(BaseModule): ) agent_url = config.get("agent_url") - if not agent_url or not isinstance(agent_url, str): + # agent_url is optional - will have default from metadata.yaml + if agent_url is not None and not isinstance(agent_url, str): raise ValueError("agent_url must be a valid URL string") max_files = config.get("max_files", 20) @@ -131,14 +132,14 @@ class LLMSecretDetectorModule(BaseModule): logger.info(f"Starting LLM secret detection in workspace: {workspace}") - # Extract configuration - agent_url = config.get("agent_url", "http://fuzzforge-task-agent:8000/a2a/litellm_agent") - llm_model = config.get("llm_model", "gpt-4o-mini") - llm_provider = config.get("llm_provider", "openai") - file_patterns = config.get("file_patterns", ["*.py", "*.js", "*.ts", "*.java", "*.go", "*.env", "*.yaml", "*.yml", "*.json", "*.xml", "*.ini", "*.sql", "*.properties", "*.sh", "*.bat", "*.config", "*.conf", "*.toml", "*id_rsa*", "*.txt"]) - max_files = config.get("max_files", 20) - max_file_size = config.get("max_file_size", 30000) - timeout = config.get("timeout", 30) # Reduced from 45s + # Extract configuration (defaults come from metadata.yaml via API) + agent_url = config["agent_url"] + llm_model = config["llm_model"] + llm_provider = config["llm_provider"] + file_patterns = config["file_patterns"] + max_files = config["max_files"] + max_file_size = config["max_file_size"] + timeout = config["timeout"] # Find files to analyze # Skip files that are unlikely to contain secrets diff --git a/backend/toolbox/workflows/android_static_analysis/__init__.py b/backend/toolbox/workflows/android_static_analysis/__init__.py new file mode 100644 index 0000000..aec13c5 --- /dev/null +++ b/backend/toolbox/workflows/android_static_analysis/__init__.py @@ -0,0 +1,35 @@ +""" +Android Static Analysis Workflow + +Comprehensive Android application security testing combining: +- Jadx APK decompilation +- OpenGrep/Semgrep static analysis with Android-specific rules +- MobSF mobile security framework analysis +""" + +# Copyright (c) 2025 FuzzingLabs +# +# Licensed under the Business Source License 1.1 (BSL). See the LICENSE file +# at the root of this repository for details. +# +# After the Change Date (four years from publication), this version of the +# Licensed Work will be made available under the Apache License, Version 2.0. +# See the LICENSE-APACHE file or http://www.apache.org/licenses/LICENSE-2.0 +# +# Additional attribution and requirements are provided in the NOTICE file. + +from .workflow import AndroidStaticAnalysisWorkflow +from .activities import ( + decompile_with_jadx_activity, + scan_with_opengrep_activity, + scan_with_mobsf_activity, + generate_android_sarif_activity, +) + +__all__ = [ + "AndroidStaticAnalysisWorkflow", + "decompile_with_jadx_activity", + "scan_with_opengrep_activity", + "scan_with_mobsf_activity", + "generate_android_sarif_activity", +] diff --git a/backend/toolbox/workflows/android_static_analysis/activities.py b/backend/toolbox/workflows/android_static_analysis/activities.py new file mode 100644 index 0000000..5d37729 --- /dev/null +++ b/backend/toolbox/workflows/android_static_analysis/activities.py @@ -0,0 +1,213 @@ +""" +Android Static Analysis Workflow Activities + +Activities for the Android security testing workflow: +- decompile_with_jadx_activity: Decompile APK using Jadx +- scan_with_opengrep_activity: Analyze code with OpenGrep/Semgrep +- scan_with_mobsf_activity: Scan APK with MobSF +- generate_android_sarif_activity: Generate combined SARIF report +""" + +# Copyright (c) 2025 FuzzingLabs +# +# Licensed under the Business Source License 1.1 (BSL). See the LICENSE file +# at the root of this repository for details. +# +# After the Change Date (four years from publication), this version of the +# Licensed Work will be made available under the Apache License, Version 2.0. +# See the LICENSE-APACHE file or http://www.apache.org/licenses/LICENSE-2.0 +# +# Additional attribution and requirements are provided in the NOTICE file. + +import logging +import sys +from pathlib import Path + +from temporalio import activity + +# Configure logging +logger = logging.getLogger(__name__) + +# Add toolbox to path for module imports +sys.path.insert(0, '/app/toolbox') + + +@activity.defn(name="decompile_with_jadx") +async def decompile_with_jadx_activity(workspace_path: str, config: dict) -> dict: + """ + Decompile Android APK to Java source code using Jadx. + + Args: + workspace_path: Path to the workspace directory + config: JadxDecompiler configuration + + Returns: + Decompilation results dictionary + """ + logger.info(f"Activity: decompile_with_jadx (workspace={workspace_path})") + + try: + from modules.android import JadxDecompiler + + workspace = Path(workspace_path) + if not workspace.exists(): + raise FileNotFoundError(f"Workspace not found: {workspace_path}") + + decompiler = JadxDecompiler() + result = await decompiler.execute(config, workspace) + + logger.info( + f"โœ“ Jadx decompilation completed: " + f"{result.summary.get('java_files', 0)} Java files generated" + ) + return result.dict() + + except Exception as e: + logger.error(f"Jadx decompilation failed: {e}", exc_info=True) + raise + + +@activity.defn(name="scan_with_opengrep") +async def scan_with_opengrep_activity(workspace_path: str, config: dict) -> dict: + """ + Analyze Android code for security issues using OpenGrep/Semgrep. + + Args: + workspace_path: Path to the workspace directory + config: OpenGrepAndroid configuration + + Returns: + Analysis results dictionary + """ + logger.info(f"Activity: scan_with_opengrep (workspace={workspace_path})") + + try: + from modules.android import OpenGrepAndroid + + workspace = Path(workspace_path) + if not workspace.exists(): + raise FileNotFoundError(f"Workspace not found: {workspace_path}") + + analyzer = OpenGrepAndroid() + result = await analyzer.execute(config, workspace) + + logger.info( + f"โœ“ OpenGrep analysis completed: " + f"{result.summary.get('total_findings', 0)} security issues found" + ) + return result.dict() + + except Exception as e: + logger.error(f"OpenGrep analysis failed: {e}", exc_info=True) + raise + + +@activity.defn(name="scan_with_mobsf") +async def scan_with_mobsf_activity(workspace_path: str, config: dict) -> dict: + """ + Analyze Android APK for security issues using MobSF. + + Args: + workspace_path: Path to the workspace directory + config: MobSFScanner configuration + + Returns: + Scan results dictionary (or skipped status if MobSF unavailable) + """ + logger.info(f"Activity: scan_with_mobsf (workspace={workspace_path})") + + # Check if MobSF is installed (graceful degradation for ARM64 platform) + mobsf_path = Path("/app/mobsf") + if not mobsf_path.exists(): + logger.warning("MobSF not installed on this platform (ARM64/Rosetta limitation)") + return { + "status": "skipped", + "findings": [], + "summary": { + "total_findings": 0, + "skip_reason": "MobSF unavailable on ARM64 platform (Rosetta 2 incompatibility)" + } + } + + try: + from modules.android import MobSFScanner + + workspace = Path(workspace_path) + if not workspace.exists(): + raise FileNotFoundError(f"Workspace not found: {workspace_path}") + + scanner = MobSFScanner() + result = await scanner.execute(config, workspace) + + logger.info( + f"โœ“ MobSF scan completed: " + f"{result.summary.get('total_findings', 0)} findings" + ) + return result.dict() + + except Exception as e: + logger.error(f"MobSF scan failed: {e}", exc_info=True) + raise + + +@activity.defn(name="generate_android_sarif") +async def generate_android_sarif_activity( + jadx_result: dict, + opengrep_result: dict, + mobsf_result: dict, + config: dict, + workspace_path: str +) -> dict: + """ + Generate combined SARIF report from all Android security findings. + + Args: + jadx_result: Jadx decompilation results + opengrep_result: OpenGrep analysis results + mobsf_result: MobSF scan results (may be None if disabled) + config: Reporter configuration + workspace_path: Workspace path + + Returns: + SARIF report dictionary + """ + logger.info("Activity: generate_android_sarif") + + try: + from modules.reporter import SARIFReporter + + workspace = Path(workspace_path) + + # Collect all findings + all_findings = [] + all_findings.extend(opengrep_result.get("findings", [])) + + if mobsf_result: + all_findings.extend(mobsf_result.get("findings", [])) + + # Prepare reporter config + reporter_config = { + **(config or {}), + "findings": all_findings, + "tool_name": "FuzzForge Android Static Analysis", + "tool_version": "1.0.0", + "metadata": { + "jadx_version": "1.5.0", + "opengrep_version": "1.45.0", + "mobsf_version": "3.9.7", + "java_files_decompiled": jadx_result.get("summary", {}).get("java_files", 0), + } + } + + reporter = SARIFReporter() + result = await reporter.execute(reporter_config, workspace) + + sarif_report = result.dict().get("sarif", {}) + + logger.info(f"โœ“ SARIF report generated with {len(all_findings)} findings") + + return sarif_report + + except Exception as e: + logger.error(f"SARIF report generation failed: {e}", exc_info=True) + raise diff --git a/backend/toolbox/workflows/android_static_analysis/metadata.yaml b/backend/toolbox/workflows/android_static_analysis/metadata.yaml new file mode 100644 index 0000000..cd77e48 --- /dev/null +++ b/backend/toolbox/workflows/android_static_analysis/metadata.yaml @@ -0,0 +1,172 @@ +name: android_static_analysis +version: "1.0.0" +vertical: android +description: "Comprehensive Android application security testing using Jadx decompilation, OpenGrep static analysis, and MobSF mobile security framework" +author: "FuzzForge Team" +tags: + - "android" + - "mobile" + - "static-analysis" + - "security" + - "opengrep" + - "semgrep" + - "mobsf" + - "jadx" + - "apk" + - "sarif" + +# Workspace isolation mode +# Using "shared" mode for read-only APK analysis (no file modifications except decompilation output) +workspace_isolation: "shared" + +parameters: + type: object + properties: + apk_path: + type: string + description: "Path to the APK file to analyze (relative to uploaded target or absolute within workspace)" + default: "" + + decompile_apk: + type: boolean + description: "Whether to decompile APK with Jadx before OpenGrep analysis" + default: true + + jadx_config: + type: object + description: "Jadx decompiler configuration" + properties: + output_dir: + type: string + description: "Output directory for decompiled sources" + default: "jadx_output" + overwrite: + type: boolean + description: "Overwrite existing decompilation output" + default: true + threads: + type: integer + description: "Number of decompilation threads" + default: 4 + minimum: 1 + maximum: 32 + decompiler_args: + type: array + items: + type: string + description: "Additional Jadx arguments" + default: [] + + opengrep_config: + type: object + description: "OpenGrep/Semgrep static analysis configuration" + properties: + config: + type: string + enum: ["auto", "p/security-audit", "p/owasp-top-ten", "p/cwe-top-25"] + description: "Preset OpenGrep ruleset (ignored if custom_rules_path is set)" + default: "auto" + custom_rules_path: + type: string + description: "Path to custom OpenGrep rules directory (use Android-specific rules for best results)" + default: "/app/toolbox/modules/android/custom_rules" + languages: + type: array + items: + type: string + description: "Programming languages to analyze (defaults to java, kotlin for Android)" + default: ["java", "kotlin"] + include_patterns: + type: array + items: + type: string + description: "File patterns to include in scan" + default: [] + exclude_patterns: + type: array + items: + type: string + description: "File patterns to exclude from scan" + default: [] + max_target_bytes: + type: integer + description: "Maximum file size to analyze (bytes)" + default: 1000000 + timeout: + type: integer + description: "Analysis timeout in seconds" + default: 300 + severity: + type: array + items: + type: string + enum: ["ERROR", "WARNING", "INFO"] + description: "Severity levels to include in results" + default: ["ERROR", "WARNING", "INFO"] + confidence: + type: array + items: + type: string + enum: ["HIGH", "MEDIUM", "LOW"] + description: "Confidence levels to include in results" + default: ["HIGH", "MEDIUM", "LOW"] + + mobsf_config: + type: object + description: "MobSF scanner configuration" + properties: + enabled: + type: boolean + description: "Enable MobSF analysis (requires APK file)" + default: true + mobsf_url: + type: string + description: "MobSF server URL" + default: "http://localhost:8877" + api_key: + type: string + description: "MobSF API key (if not provided, uses MOBSF_API_KEY env var)" + default: null + rescan: + type: boolean + description: "Force rescan even if APK was previously analyzed" + default: false + + reporter_config: + type: object + description: "SARIF reporter configuration" + properties: + include_code_flows: + type: boolean + description: "Include code flow information in SARIF output" + default: false + logical_id: + type: string + description: "Custom identifier for the SARIF report" + default: null + +output_schema: + type: object + properties: + sarif: + type: object + description: "SARIF-formatted findings from all Android security tools" + summary: + type: object + description: "Android security analysis summary" + properties: + total_findings: + type: integer + decompiled_java_files: + type: integer + description: "Number of Java files decompiled by Jadx" + opengrep_findings: + type: integer + description: "Findings from OpenGrep/Semgrep analysis" + mobsf_findings: + type: integer + description: "Findings from MobSF analysis" + severity_distribution: + type: object + category_distribution: + type: object diff --git a/backend/toolbox/workflows/android_static_analysis/workflow.py b/backend/toolbox/workflows/android_static_analysis/workflow.py new file mode 100644 index 0000000..8376cd2 --- /dev/null +++ b/backend/toolbox/workflows/android_static_analysis/workflow.py @@ -0,0 +1,289 @@ +""" +Android Static Analysis Workflow - Temporal Version + +Comprehensive security testing for Android applications using Jadx, OpenGrep, and MobSF. +""" + +# Copyright (c) 2025 FuzzingLabs +# +# Licensed under the Business Source License 1.1 (BSL). See the LICENSE file +# at the root of this repository for details. +# +# After the Change Date (four years from publication), this version of the +# Licensed Work will be made available under the Apache License, Version 2.0. +# See the LICENSE-APACHE file or http://www.apache.org/licenses/LICENSE-2.0 +# +# Additional attribution and requirements are provided in the NOTICE file. + +from datetime import timedelta +from typing import Dict, Any, Optional +from pathlib import Path + +from temporalio import workflow +from temporalio.common import RetryPolicy + +# Import activity interfaces (will be executed by worker) +with workflow.unsafe.imports_passed_through(): + import logging + +logger = logging.getLogger(__name__) + + +@workflow.defn +class AndroidStaticAnalysisWorkflow: + """ + Android Static Application Security Testing workflow. + + This workflow: + 1. Downloads target (APK) from MinIO + 2. (Optional) Decompiles APK using Jadx + 3. Runs OpenGrep/Semgrep static analysis on decompiled code + 4. (Optional) Runs MobSF comprehensive security scan + 5. Generates a SARIF report with all findings + 6. Uploads results to MinIO + 7. Cleans up cache + """ + + @workflow.run + async def run( + self, + target_id: str, + apk_path: Optional[str] = None, + decompile_apk: bool = True, + jadx_config: Optional[Dict[str, Any]] = None, + opengrep_config: Optional[Dict[str, Any]] = None, + mobsf_config: Optional[Dict[str, Any]] = None, + reporter_config: Optional[Dict[str, Any]] = None + ) -> Dict[str, Any]: + """ + Main workflow execution. + + Args: + target_id: UUID of the uploaded target (APK) in MinIO + apk_path: Path to APK file within target (if target is not a single APK) + decompile_apk: Whether to decompile APK with Jadx before OpenGrep + jadx_config: Configuration for Jadx decompiler + opengrep_config: Configuration for OpenGrep analyzer + mobsf_config: Configuration for MobSF scanner + reporter_config: Configuration for SARIF reporter + + Returns: + Dictionary containing SARIF report and summary + """ + workflow_id = workflow.info().workflow_id + + workflow.logger.info( + f"Starting AndroidStaticAnalysisWorkflow " + f"(workflow_id={workflow_id}, target_id={target_id})" + ) + + # Default configurations + if not jadx_config: + jadx_config = { + "output_dir": "jadx_output", + "overwrite": True, + "threads": 4, + "decompiler_args": [] + } + + if not opengrep_config: + opengrep_config = { + "config": "auto", + "custom_rules_path": "/app/toolbox/modules/android/custom_rules", + "languages": ["java", "kotlin"], + "severity": ["ERROR", "WARNING", "INFO"], + "confidence": ["HIGH", "MEDIUM", "LOW"], + "timeout": 300, + } + + if not mobsf_config: + mobsf_config = { + "enabled": True, + "mobsf_url": "http://localhost:8877", + "api_key": None, + "rescan": False, + } + + if not reporter_config: + reporter_config = { + "include_code_flows": False + } + + # Activity retry policy + retry_policy = RetryPolicy( + initial_interval=timedelta(seconds=1), + maximum_interval=timedelta(seconds=60), + maximum_attempts=3, + backoff_coefficient=2.0, + ) + + # Phase 0: Download target from MinIO + workflow.logger.info(f"Phase 0: Downloading target from MinIO (target_id={target_id})") + workspace_path = await workflow.execute_activity( + "get_target", + args=[target_id, workflow.info().workflow_id, "shared"], + start_to_close_timeout=timedelta(minutes=10), + retry_policy=retry_policy, + ) + workflow.logger.info(f"โœ“ Target downloaded to: {workspace_path}") + + # Handle case where workspace_path is a file (single APK upload) + # vs. a directory containing files + workspace_path_obj = Path(workspace_path) + + # Determine actual workspace directory and APK path + if apk_path: + # User explicitly provided apk_path + actual_apk_path = apk_path + # workspace_path could be either a file or directory + # If it's a file and apk_path matches the filename, use parent as workspace + if workspace_path_obj.name == apk_path: + workspace_path = str(workspace_path_obj.parent) + workflow.logger.info(f"Adjusted workspace to parent directory: {workspace_path}") + else: + # No apk_path provided - check if workspace_path is an APK file + if workspace_path_obj.suffix.lower() == '.apk' or workspace_path_obj.name.endswith('.apk'): + # workspace_path is the APK file itself + actual_apk_path = workspace_path_obj.name + workspace_path = str(workspace_path_obj.parent) + workflow.logger.info(f"Detected single APK file: {actual_apk_path}, workspace: {workspace_path}") + else: + # workspace_path is a directory, need to find APK within it + actual_apk_path = None + workflow.logger.info("Workspace is a directory, APK detection will be handled by modules") + + # Phase 1: Jadx decompilation (if enabled and APK provided) + jadx_result = None + analysis_workspace = workspace_path + + if decompile_apk and actual_apk_path: + workflow.logger.info(f"Phase 1: Decompiling APK with Jadx (apk={actual_apk_path})") + + jadx_activity_config = { + **jadx_config, + "apk_path": actual_apk_path + } + + jadx_result = await workflow.execute_activity( + "decompile_with_jadx", + args=[workspace_path, jadx_activity_config], + start_to_close_timeout=timedelta(minutes=15), + retry_policy=retry_policy, + ) + + if jadx_result.get("status") == "success": + # Use decompiled sources as workspace for OpenGrep + source_dir = jadx_result.get("summary", {}).get("source_dir") + if source_dir: + analysis_workspace = source_dir + workflow.logger.info( + f"โœ“ Jadx decompiled {jadx_result.get('summary', {}).get('java_files', 0)} Java files" + ) + else: + workflow.logger.warning(f"Jadx decompilation failed: {jadx_result.get('error')}") + else: + workflow.logger.info("Phase 1: Jadx decompilation skipped") + + # Phase 2: OpenGrep static analysis + workflow.logger.info(f"Phase 2: OpenGrep analysis on {analysis_workspace}") + + opengrep_result = await workflow.execute_activity( + "scan_with_opengrep", + args=[analysis_workspace, opengrep_config], + start_to_close_timeout=timedelta(minutes=20), + retry_policy=retry_policy, + ) + + workflow.logger.info( + f"โœ“ OpenGrep completed: {opengrep_result.get('summary', {}).get('total_findings', 0)} findings" + ) + + # Phase 3: MobSF analysis (if enabled and APK provided) + mobsf_result = None + + if mobsf_config.get("enabled", True) and actual_apk_path: + workflow.logger.info(f"Phase 3: MobSF scan on APK: {actual_apk_path}") + + mobsf_activity_config = { + **mobsf_config, + "file_path": actual_apk_path + } + + try: + mobsf_result = await workflow.execute_activity( + "scan_with_mobsf", + args=[workspace_path, mobsf_activity_config], + start_to_close_timeout=timedelta(minutes=30), + retry_policy=RetryPolicy( + maximum_attempts=2 # MobSF can be flaky, limit retries + ), + ) + + # Handle skipped or completed status + if mobsf_result.get("status") == "skipped": + workflow.logger.warning( + f"โš ๏ธ MobSF skipped: {mobsf_result.get('summary', {}).get('skip_reason', 'Unknown reason')}" + ) + else: + workflow.logger.info( + f"โœ“ MobSF completed: {mobsf_result.get('summary', {}).get('total_findings', 0)} findings" + ) + except Exception as e: + workflow.logger.warning(f"MobSF scan failed (continuing without it): {e}") + mobsf_result = None + else: + workflow.logger.info("Phase 3: MobSF scan skipped (disabled or no APK)") + + # Phase 4: Generate SARIF report + workflow.logger.info("Phase 4: Generating SARIF report") + + sarif_report = await workflow.execute_activity( + "generate_android_sarif", + args=[jadx_result or {}, opengrep_result, mobsf_result, reporter_config, workspace_path], + start_to_close_timeout=timedelta(minutes=5), + retry_policy=retry_policy, + ) + + # Phase 5: Upload results to MinIO + workflow.logger.info("Phase 5: Uploading results to MinIO") + + result_url = await workflow.execute_activity( + "upload_results", + args=[workflow.info().workflow_id, sarif_report, "sarif"], + start_to_close_timeout=timedelta(minutes=10), + retry_policy=retry_policy, + ) + + workflow.logger.info(f"โœ“ Results uploaded: {result_url}") + + # Phase 6: Cleanup cache + workflow.logger.info("Phase 6: Cleaning up cache") + + await workflow.execute_activity( + "cleanup_cache", + args=[workspace_path, "shared"], + start_to_close_timeout=timedelta(minutes=5), + retry_policy=RetryPolicy(maximum_attempts=1), # Don't retry cleanup + ) + + # Calculate summary + total_findings = len(sarif_report.get("runs", [{}])[0].get("results", [])) + + summary = { + "workflow": "android_static_analysis", + "target_id": target_id, + "total_findings": total_findings, + "decompiled_java_files": (jadx_result or {}).get("summary", {}).get("java_files", 0) if jadx_result else 0, + "opengrep_findings": opengrep_result.get("summary", {}).get("total_findings", 0), + "mobsf_findings": mobsf_result.get("summary", {}).get("total_findings", 0) if mobsf_result else 0, + "result_url": result_url, + } + + workflow.logger.info( + f"โœ… AndroidStaticAnalysisWorkflow completed successfully: {total_findings} findings" + ) + + return { + "sarif": sarif_report, + "summary": summary, + } diff --git a/backend/toolbox/workflows/atheris_fuzzing/metadata.yaml b/backend/toolbox/workflows/atheris_fuzzing/metadata.yaml index b079804..c119aad 100644 --- a/backend/toolbox/workflows/atheris_fuzzing/metadata.yaml +++ b/backend/toolbox/workflows/atheris_fuzzing/metadata.yaml @@ -16,11 +16,6 @@ tags: # - "copy-on-write": Download once, copy for each run (balances performance and isolation) workspace_isolation: "isolated" -default_parameters: - target_file: null - max_iterations: 1000000 - timeout_seconds: 1800 - parameters: type: object properties: diff --git a/backend/toolbox/workflows/cargo_fuzzing/metadata.yaml b/backend/toolbox/workflows/cargo_fuzzing/metadata.yaml index 39ff622..829a1f3 100644 --- a/backend/toolbox/workflows/cargo_fuzzing/metadata.yaml +++ b/backend/toolbox/workflows/cargo_fuzzing/metadata.yaml @@ -16,12 +16,6 @@ tags: # - "copy-on-write": Download once, copy for each run (balances performance and isolation) workspace_isolation: "isolated" -default_parameters: - target_name: null - max_iterations: 1000000 - timeout_seconds: 1800 - sanitizer: "address" - parameters: type: object properties: diff --git a/backend/toolbox/workflows/gitleaks_detection/metadata.yaml b/backend/toolbox/workflows/gitleaks_detection/metadata.yaml index d2c343c..ad4ae45 100644 --- a/backend/toolbox/workflows/gitleaks_detection/metadata.yaml +++ b/backend/toolbox/workflows/gitleaks_detection/metadata.yaml @@ -30,13 +30,5 @@ parameters: default: false description: "Scan files without Git context" -default_parameters: - scan_mode: "detect" - redact: true - no_git: false - required_modules: - "gitleaks" - -supported_volume_modes: - - "ro" diff --git a/backend/toolbox/workflows/llm_analysis/metadata.yaml b/backend/toolbox/workflows/llm_analysis/metadata.yaml index 0a388bf..2631b59 100644 --- a/backend/toolbox/workflows/llm_analysis/metadata.yaml +++ b/backend/toolbox/workflows/llm_analysis/metadata.yaml @@ -13,38 +13,84 @@ tags: # Workspace isolation mode workspace_isolation: "shared" -default_parameters: - agent_url: "http://fuzzforge-task-agent:8000/a2a/litellm_agent" - llm_model: "gpt-5-mini" - llm_provider: "openai" - max_files: 5 - parameters: type: object properties: agent_url: type: string description: "A2A agent endpoint URL" + default: "http://fuzzforge-task-agent:8000/a2a/litellm_agent" llm_model: type: string description: "LLM model to use (e.g., gpt-4o-mini, claude-3-5-sonnet)" + default: "gpt-5-mini" llm_provider: type: string description: "LLM provider (openai, anthropic, etc.)" + default: "openai" file_patterns: type: array items: type: string - description: "File patterns to analyze (e.g., ['*.py', '*.js'])" + default: + - "*.py" + - "*.js" + - "*.ts" + - "*.jsx" + - "*.tsx" + - "*.java" + - "*.go" + - "*.rs" + - "*.c" + - "*.cpp" + - "*.h" + - "*.hpp" + - "*.cs" + - "*.php" + - "*.rb" + - "*.swift" + - "*.kt" + - "*.scala" + - "*.env" + - "*.yaml" + - "*.yml" + - "*.json" + - "*.xml" + - "*.ini" + - "*.sql" + - "*.properties" + - "*.sh" + - "*.bat" + - "*.ps1" + - "*.config" + - "*.conf" + - "*.toml" + - "*id_rsa*" + - "*id_dsa*" + - "*id_ecdsa*" + - "*id_ed25519*" + - "*.pem" + - "*.key" + - "*.pub" + - "*.txt" + - "*.md" + - "Dockerfile" + - "docker-compose.yml" + - ".gitignore" + - ".dockerignore" + description: "File patterns to analyze for security issues and secrets" max_files: type: integer description: "Maximum number of files to analyze" + default: 10 max_file_size: type: integer description: "Maximum file size in bytes" + default: 100000 timeout: type: integer description: "Timeout per file in seconds" + default: 90 output_schema: type: object diff --git a/backend/toolbox/workflows/llm_secret_detection/metadata.yaml b/backend/toolbox/workflows/llm_secret_detection/metadata.yaml index cf761ef..a97b859 100644 --- a/backend/toolbox/workflows/llm_secret_detection/metadata.yaml +++ b/backend/toolbox/workflows/llm_secret_detection/metadata.yaml @@ -30,14 +30,42 @@ parameters: type: integer default: 20 -default_parameters: - agent_url: "http://fuzzforge-task-agent:8000/a2a/litellm_agent" - llm_model: "gpt-5-mini" - llm_provider: "openai" - max_files: 20 + max_file_size: + type: integer + default: 30000 + description: "Maximum file size in bytes" + + timeout: + type: integer + default: 30 + description: "Timeout per file in seconds" + + file_patterns: + type: array + items: + type: string + default: + - "*.py" + - "*.js" + - "*.ts" + - "*.java" + - "*.go" + - "*.env" + - "*.yaml" + - "*.yml" + - "*.json" + - "*.xml" + - "*.ini" + - "*.sql" + - "*.properties" + - "*.sh" + - "*.bat" + - "*.config" + - "*.conf" + - "*.toml" + - "*id_rsa*" + - "*.txt" + description: "File patterns to scan for secrets" required_modules: - "llm_secret_detector" - -supported_volume_modes: - - "ro" diff --git a/backend/toolbox/workflows/llm_secret_detection/workflow.py b/backend/toolbox/workflows/llm_secret_detection/workflow.py index 4f693d0..a0c66d2 100644 --- a/backend/toolbox/workflows/llm_secret_detection/workflow.py +++ b/backend/toolbox/workflows/llm_secret_detection/workflow.py @@ -17,6 +17,7 @@ class LlmSecretDetectionWorkflow: llm_model: Optional[str] = None, llm_provider: Optional[str] = None, max_files: Optional[int] = None, + max_file_size: Optional[int] = None, timeout: Optional[int] = None, file_patterns: Optional[list] = None ) -> Dict[str, Any]: @@ -67,6 +68,8 @@ class LlmSecretDetectionWorkflow: config["llm_provider"] = llm_provider if max_files: config["max_files"] = max_files + if max_file_size: + config["max_file_size"] = max_file_size if timeout: config["timeout"] = timeout if file_patterns: diff --git a/backend/toolbox/workflows/ossfuzz_campaign/metadata.yaml b/backend/toolbox/workflows/ossfuzz_campaign/metadata.yaml index fbc1d51..d6766f9 100644 --- a/backend/toolbox/workflows/ossfuzz_campaign/metadata.yaml +++ b/backend/toolbox/workflows/ossfuzz_campaign/metadata.yaml @@ -16,13 +16,6 @@ tags: # OSS-Fuzz campaigns use isolated mode for safe concurrent campaigns workspace_isolation: "isolated" -default_parameters: - project_name: null - campaign_duration_hours: 1 - override_engine: null - override_sanitizer: null - max_iterations: null - parameters: type: object required: diff --git a/backend/toolbox/workflows/python_sast/__init__.py b/backend/toolbox/workflows/python_sast/__init__.py new file mode 100644 index 0000000..e436884 --- /dev/null +++ b/backend/toolbox/workflows/python_sast/__init__.py @@ -0,0 +1,10 @@ +# Copyright (c) 2025 FuzzingLabs +# +# Licensed under the Business Source License 1.1 (BSL). See the LICENSE file +# at the root of this repository for details. +# +# After the Change Date (four years from publication), this version of the +# Licensed Work will be made available under the Apache License, Version 2.0. +# See the LICENSE-APACHE file or http://www.apache.org/licenses/LICENSE-2.0 +# +# Additional attribution and requirements are provided in the NOTICE file. diff --git a/backend/toolbox/workflows/python_sast/activities.py b/backend/toolbox/workflows/python_sast/activities.py new file mode 100644 index 0000000..fea884f --- /dev/null +++ b/backend/toolbox/workflows/python_sast/activities.py @@ -0,0 +1,191 @@ +""" +Python SAST Workflow Activities + +Activities specific to the Python SAST workflow: +- scan_dependencies_activity: Scan Python dependencies for CVEs using pip-audit +- analyze_with_bandit_activity: Analyze Python code for security issues using Bandit +- analyze_with_mypy_activity: Analyze Python code for type safety using Mypy +- generate_python_sast_sarif_activity: Generate SARIF report from all findings +""" + +import logging +import sys +from pathlib import Path + +from temporalio import activity + +# Configure logging +logger = logging.getLogger(__name__) + +# Add toolbox to path for module imports +sys.path.insert(0, '/app/toolbox') + + +@activity.defn(name="scan_dependencies") +async def scan_dependencies_activity(workspace_path: str, config: dict) -> dict: + """ + Scan Python dependencies for known vulnerabilities using pip-audit. + + Args: + workspace_path: Path to the workspace directory + config: DependencyScanner configuration + + Returns: + Scanner results dictionary + """ + logger.info(f"Activity: scan_dependencies (workspace={workspace_path})") + + try: + from modules.scanner import DependencyScanner + + workspace = Path(workspace_path) + if not workspace.exists(): + raise FileNotFoundError(f"Workspace not found: {workspace_path}") + + scanner = DependencyScanner() + result = await scanner.execute(config, workspace) + + logger.info( + f"โœ“ Dependency scanning completed: " + f"{result.summary.get('total_vulnerabilities', 0)} vulnerabilities found" + ) + return result.dict() + + except Exception as e: + logger.error(f"Dependency scanning failed: {e}", exc_info=True) + raise + + +@activity.defn(name="analyze_with_bandit") +async def analyze_with_bandit_activity(workspace_path: str, config: dict) -> dict: + """ + Analyze Python code for security issues using Bandit. + + Args: + workspace_path: Path to the workspace directory + config: BanditAnalyzer configuration + + Returns: + Analysis results dictionary + """ + logger.info(f"Activity: analyze_with_bandit (workspace={workspace_path})") + + try: + from modules.analyzer import BanditAnalyzer + + workspace = Path(workspace_path) + if not workspace.exists(): + raise FileNotFoundError(f"Workspace not found: {workspace_path}") + + analyzer = BanditAnalyzer() + result = await analyzer.execute(config, workspace) + + logger.info( + f"โœ“ Bandit analysis completed: " + f"{result.summary.get('total_issues', 0)} security issues found" + ) + return result.dict() + + except Exception as e: + logger.error(f"Bandit analysis failed: {e}", exc_info=True) + raise + + +@activity.defn(name="analyze_with_mypy") +async def analyze_with_mypy_activity(workspace_path: str, config: dict) -> dict: + """ + Analyze Python code for type safety issues using Mypy. + + Args: + workspace_path: Path to the workspace directory + config: MypyAnalyzer configuration + + Returns: + Analysis results dictionary + """ + logger.info(f"Activity: analyze_with_mypy (workspace={workspace_path})") + + try: + from modules.analyzer import MypyAnalyzer + + workspace = Path(workspace_path) + if not workspace.exists(): + raise FileNotFoundError(f"Workspace not found: {workspace_path}") + + analyzer = MypyAnalyzer() + result = await analyzer.execute(config, workspace) + + logger.info( + f"โœ“ Mypy analysis completed: " + f"{result.summary.get('total_errors', 0)} type errors found" + ) + return result.dict() + + except Exception as e: + logger.error(f"Mypy analysis failed: {e}", exc_info=True) + raise + + +@activity.defn(name="generate_python_sast_sarif") +async def generate_python_sast_sarif_activity( + dependency_results: dict, + bandit_results: dict, + mypy_results: dict, + config: dict, + workspace_path: str +) -> dict: + """ + Generate SARIF report from all SAST analysis results. + + Args: + dependency_results: Results from dependency scanner + bandit_results: Results from Bandit analyzer + mypy_results: Results from Mypy analyzer + config: Reporter configuration + workspace_path: Path to the workspace + + Returns: + SARIF report dictionary + """ + logger.info("Activity: generate_python_sast_sarif") + + try: + from modules.reporter import SARIFReporter + + workspace = Path(workspace_path) + + # Combine findings from all modules + all_findings = [] + + # Add dependency scanner findings + dependency_findings = dependency_results.get("findings", []) + all_findings.extend(dependency_findings) + + # Add Bandit findings + bandit_findings = bandit_results.get("findings", []) + all_findings.extend(bandit_findings) + + # Add Mypy findings + mypy_findings = mypy_results.get("findings", []) + all_findings.extend(mypy_findings) + + # Prepare reporter config + reporter_config = { + **config, + "findings": all_findings, + "tool_name": "FuzzForge Python SAST", + "tool_version": "1.0.0" + } + + reporter = SARIFReporter() + result = await reporter.execute(reporter_config, workspace) + + # Extract SARIF from result + sarif = result.dict().get("sarif", {}) + + logger.info(f"โœ“ SARIF report generated with {len(all_findings)} findings") + return sarif + + except Exception as e: + logger.error(f"SARIF report generation failed: {e}", exc_info=True) + raise diff --git a/backend/toolbox/workflows/python_sast/metadata.yaml b/backend/toolbox/workflows/python_sast/metadata.yaml new file mode 100644 index 0000000..c7e209c --- /dev/null +++ b/backend/toolbox/workflows/python_sast/metadata.yaml @@ -0,0 +1,110 @@ +name: python_sast +version: "1.0.0" +vertical: python +description: "Python Static Application Security Testing (SAST) workflow combining dependency scanning (pip-audit), security linting (Bandit), and type checking (Mypy)" +author: "FuzzForge Team" +tags: + - "python" + - "sast" + - "security" + - "type-checking" + - "dependencies" + - "bandit" + - "mypy" + - "pip-audit" + - "sarif" + +# Workspace isolation mode (system-level configuration) +# Using "shared" mode for read-only SAST analysis (no file modifications) +workspace_isolation: "shared" + +parameters: + type: object + properties: + dependency_config: + type: object + description: "Dependency scanner (pip-audit) configuration" + properties: + dependency_files: + type: array + items: + type: string + description: "List of dependency files to scan (auto-discovered if empty)" + default: [] + ignore_vulns: + type: array + items: + type: string + description: "List of vulnerability IDs to ignore" + default: [] + bandit_config: + type: object + description: "Bandit security analyzer configuration" + properties: + severity_level: + type: string + enum: ["low", "medium", "high"] + description: "Minimum severity level to report" + default: "low" + confidence_level: + type: string + enum: ["low", "medium", "high"] + description: "Minimum confidence level to report" + default: "medium" + exclude_tests: + type: boolean + description: "Exclude test files from analysis" + default: true + skip_ids: + type: array + items: + type: string + description: "List of Bandit test IDs to skip" + default: [] + mypy_config: + type: object + description: "Mypy type checker configuration" + properties: + strict_mode: + type: boolean + description: "Enable strict type checking" + default: false + ignore_missing_imports: + type: boolean + description: "Ignore errors about missing imports" + default: true + follow_imports: + type: string + enum: ["normal", "silent", "skip", "error"] + description: "How to handle imports" + default: "silent" + reporter_config: + type: object + description: "SARIF reporter configuration" + properties: + include_code_flows: + type: boolean + description: "Include code flow information" + default: false + +output_schema: + type: object + properties: + sarif: + type: object + description: "SARIF-formatted SAST findings from all tools" + summary: + type: object + description: "SAST execution summary" + properties: + total_findings: + type: integer + vulnerabilities: + type: integer + description: "CVEs found in dependencies" + security_issues: + type: integer + description: "Security issues found by Bandit" + type_errors: + type: integer + description: "Type errors found by Mypy" diff --git a/backend/toolbox/workflows/python_sast/workflow.py b/backend/toolbox/workflows/python_sast/workflow.py new file mode 100644 index 0000000..6d56a47 --- /dev/null +++ b/backend/toolbox/workflows/python_sast/workflow.py @@ -0,0 +1,265 @@ +""" +Python SAST Workflow - Temporal Version + +Static Application Security Testing for Python projects using multiple tools. +""" + +# Copyright (c) 2025 FuzzingLabs +# +# Licensed under the Business Source License 1.1 (BSL). See the LICENSE file +# at the root of this repository for details. +# +# After the Change Date (four years from publication), this version of the +# Licensed Work will be made available under the Apache License, Version 2.0. +# See the LICENSE-APACHE file or http://www.apache.org/licenses/LICENSE-2.0 +# +# Additional attribution and requirements are provided in the NOTICE file. + +from datetime import timedelta +from typing import Dict, Any, Optional + +from temporalio import workflow +from temporalio.common import RetryPolicy + +# Import activity interfaces (will be executed by worker) +with workflow.unsafe.imports_passed_through(): + import logging + +logger = logging.getLogger(__name__) + + +@workflow.defn +class PythonSastWorkflow: + """ + Python Static Application Security Testing workflow. + + This workflow: + 1. Downloads target from MinIO + 2. Runs dependency scanning (pip-audit for CVEs) + 3. Runs security linting (Bandit for security issues) + 4. Runs type checking (Mypy for type safety) + 5. Generates a SARIF report with all findings + 6. Uploads results to MinIO + 7. Cleans up cache + """ + + @workflow.run + async def run( + self, + target_id: str, + dependency_config: Optional[Dict[str, Any]] = None, + bandit_config: Optional[Dict[str, Any]] = None, + mypy_config: Optional[Dict[str, Any]] = None, + reporter_config: Optional[Dict[str, Any]] = None + ) -> Dict[str, Any]: + """ + Main workflow execution. + + Args: + target_id: UUID of the uploaded target in MinIO + dependency_config: Configuration for dependency scanner + bandit_config: Configuration for Bandit analyzer + mypy_config: Configuration for Mypy analyzer + reporter_config: Configuration for SARIF reporter + + Returns: + Dictionary containing SARIF report and summary + """ + workflow_id = workflow.info().workflow_id + + workflow.logger.info( + f"Starting PythonSASTWorkflow " + f"(workflow_id={workflow_id}, target_id={target_id})" + ) + + # Default configurations + if not dependency_config: + dependency_config = { + "dependency_files": [], # Auto-discover + "ignore_vulns": [] + } + + if not bandit_config: + bandit_config = { + "severity_level": "low", + "confidence_level": "medium", + "exclude_tests": True, + "skip_ids": [] + } + + if not mypy_config: + mypy_config = { + "strict_mode": False, + "ignore_missing_imports": True, + "follow_imports": "silent" + } + + if not reporter_config: + reporter_config = { + "include_code_flows": False + } + + results = { + "workflow_id": workflow_id, + "target_id": target_id, + "status": "running", + "steps": [] + } + + try: + # Get run ID for workspace isolation (using shared mode for read-only analysis) + run_id = workflow.info().run_id + + # Step 1: Download target from MinIO + workflow.logger.info("Step 1: Downloading target from MinIO") + target_path = await workflow.execute_activity( + "get_target", + args=[target_id, run_id, "shared"], # target_id, run_id, workspace_isolation + start_to_close_timeout=timedelta(minutes=5), + retry_policy=RetryPolicy( + initial_interval=timedelta(seconds=1), + maximum_interval=timedelta(seconds=30), + maximum_attempts=3 + ) + ) + results["steps"].append({ + "step": "download_target", + "status": "success", + "target_path": target_path + }) + workflow.logger.info(f"โœ“ Target downloaded to: {target_path}") + + # Step 2: Dependency scanning (pip-audit) + workflow.logger.info("Step 2: Scanning dependencies for vulnerabilities") + dependency_results = await workflow.execute_activity( + "scan_dependencies", + args=[target_path, dependency_config], + start_to_close_timeout=timedelta(minutes=10), + retry_policy=RetryPolicy( + initial_interval=timedelta(seconds=2), + maximum_interval=timedelta(seconds=60), + maximum_attempts=2 + ) + ) + results["steps"].append({ + "step": "dependency_scanning", + "status": "success", + "vulnerabilities": dependency_results.get("summary", {}).get("total_vulnerabilities", 0) + }) + workflow.logger.info( + f"โœ“ Dependency scanning completed: " + f"{dependency_results.get('summary', {}).get('total_vulnerabilities', 0)} vulnerabilities" + ) + + # Step 3: Security linting (Bandit) + workflow.logger.info("Step 3: Analyzing security issues with Bandit") + bandit_results = await workflow.execute_activity( + "analyze_with_bandit", + args=[target_path, bandit_config], + start_to_close_timeout=timedelta(minutes=10), + retry_policy=RetryPolicy( + initial_interval=timedelta(seconds=2), + maximum_interval=timedelta(seconds=60), + maximum_attempts=2 + ) + ) + results["steps"].append({ + "step": "bandit_analysis", + "status": "success", + "issues": bandit_results.get("summary", {}).get("total_issues", 0) + }) + workflow.logger.info( + f"โœ“ Bandit analysis completed: " + f"{bandit_results.get('summary', {}).get('total_issues', 0)} security issues" + ) + + # Step 4: Type checking (Mypy) + workflow.logger.info("Step 4: Type checking with Mypy") + mypy_results = await workflow.execute_activity( + "analyze_with_mypy", + args=[target_path, mypy_config], + start_to_close_timeout=timedelta(minutes=10), + retry_policy=RetryPolicy( + initial_interval=timedelta(seconds=2), + maximum_interval=timedelta(seconds=60), + maximum_attempts=2 + ) + ) + results["steps"].append({ + "step": "mypy_analysis", + "status": "success", + "type_errors": mypy_results.get("summary", {}).get("total_errors", 0) + }) + workflow.logger.info( + f"โœ“ Mypy analysis completed: " + f"{mypy_results.get('summary', {}).get('total_errors', 0)} type errors" + ) + + # Step 5: Generate SARIF report + workflow.logger.info("Step 5: Generating SARIF report") + sarif_report = await workflow.execute_activity( + "generate_python_sast_sarif", + args=[dependency_results, bandit_results, mypy_results, reporter_config, target_path], + start_to_close_timeout=timedelta(minutes=5) + ) + results["steps"].append({ + "step": "report_generation", + "status": "success" + }) + + # Count total findings in SARIF + total_findings = 0 + if sarif_report and "runs" in sarif_report: + total_findings = len(sarif_report["runs"][0].get("results", [])) + + workflow.logger.info(f"โœ“ SARIF report generated with {total_findings} findings") + + # Step 6: Upload results to MinIO + workflow.logger.info("Step 6: Uploading results") + try: + results_url = await workflow.execute_activity( + "upload_results", + args=[workflow_id, sarif_report, "sarif"], + start_to_close_timeout=timedelta(minutes=2) + ) + results["results_url"] = results_url + workflow.logger.info(f"โœ“ Results uploaded to: {results_url}") + except Exception as e: + workflow.logger.warning(f"Failed to upload results: {e}") + results["results_url"] = None + + # Step 7: Cleanup cache + workflow.logger.info("Step 7: Cleaning up cache") + try: + await workflow.execute_activity( + "cleanup_cache", + args=[target_path, "shared"], # target_path, workspace_isolation + start_to_close_timeout=timedelta(minutes=1) + ) + workflow.logger.info("โœ“ Cache cleaned up (skipped for shared mode)") + except Exception as e: + workflow.logger.warning(f"Cache cleanup failed: {e}") + + # Mark workflow as successful + results["status"] = "success" + results["sarif"] = sarif_report + results["summary"] = { + "total_findings": total_findings, + "vulnerabilities": dependency_results.get("summary", {}).get("total_vulnerabilities", 0), + "security_issues": bandit_results.get("summary", {}).get("total_issues", 0), + "type_errors": mypy_results.get("summary", {}).get("total_errors", 0) + } + workflow.logger.info(f"โœ“ Workflow completed successfully: {workflow_id}") + + return results + + except Exception as e: + workflow.logger.error(f"Workflow failed: {e}") + results["status"] = "error" + results["error"] = str(e) + results["steps"].append({ + "step": "error", + "status": "failed", + "error": str(e) + }) + raise diff --git a/backend/toolbox/workflows/security_assessment/metadata.yaml b/backend/toolbox/workflows/security_assessment/metadata.yaml index 572e50c..09addbd 100644 --- a/backend/toolbox/workflows/security_assessment/metadata.yaml +++ b/backend/toolbox/workflows/security_assessment/metadata.yaml @@ -18,11 +18,6 @@ tags: # Using "shared" mode for read-only security analysis (no file modifications) workspace_isolation: "shared" -default_parameters: - scanner_config: {} - analyzer_config: {} - reporter_config: {} - parameters: type: object properties: diff --git a/backend/toolbox/workflows/trufflehog_detection/metadata.yaml b/backend/toolbox/workflows/trufflehog_detection/metadata.yaml index 1a147f0..d725061 100644 --- a/backend/toolbox/workflows/trufflehog_detection/metadata.yaml +++ b/backend/toolbox/workflows/trufflehog_detection/metadata.yaml @@ -23,12 +23,5 @@ parameters: default: 10 description: "Maximum directory depth to scan" -default_parameters: - verify: true - max_depth: 10 - required_modules: - "trufflehog" - -supported_volume_modes: - - "ro" diff --git a/cli/pyproject.toml b/cli/pyproject.toml index 1b8ddd9..4a71d1e 100644 --- a/cli/pyproject.toml +++ b/cli/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "fuzzforge-cli" -version = "0.7.0" +version = "0.7.3" description = "FuzzForge CLI - Command-line interface for FuzzForge security testing platform" readme = "README.md" authors = [ diff --git a/cli/src/fuzzforge_cli/__init__.py b/cli/src/fuzzforge_cli/__init__.py index 9d26c75..cc4a071 100644 --- a/cli/src/fuzzforge_cli/__init__.py +++ b/cli/src/fuzzforge_cli/__init__.py @@ -16,4 +16,4 @@ with local project management and persistent storage. # Additional attribution and requirements are provided in the NOTICE file. -__version__ = "0.6.0" \ No newline at end of file +__version__ = "0.7.3" \ No newline at end of file diff --git a/cli/src/fuzzforge_cli/commands/__init__.py b/cli/src/fuzzforge_cli/commands/__init__.py index 7e53182..afcf0d9 100644 --- a/cli/src/fuzzforge_cli/commands/__init__.py +++ b/cli/src/fuzzforge_cli/commands/__init__.py @@ -12,3 +12,6 @@ Command modules for FuzzForge CLI. # # Additional attribution and requirements are provided in the NOTICE file. +from . import worker + +__all__ = ["worker"] diff --git a/cli/src/fuzzforge_cli/commands/findings.py b/cli/src/fuzzforge_cli/commands/findings.py index 6335db1..7058527 100644 --- a/cli/src/fuzzforge_cli/commands/findings.py +++ b/cli/src/fuzzforge_cli/commands/findings.py @@ -253,15 +253,15 @@ def display_finding_detail(finding: Dict[str, Any], tool: Dict[str, Any], run_id content_lines.append(f"[bold]Tool:[/bold] {tool.get('name', 'Unknown')} v{tool.get('version', 'unknown')}") content_lines.append(f"[bold]Run ID:[/bold] {run_id}") content_lines.append("") - content_lines.append(f"[bold]Summary:[/bold]") + content_lines.append("[bold]Summary:[/bold]") content_lines.append(message_text) content_lines.append("") - content_lines.append(f"[bold]Description:[/bold]") + content_lines.append("[bold]Description:[/bold]") content_lines.append(message_markdown) if code_snippet: content_lines.append("") - content_lines.append(f"[bold]Code Snippet:[/bold]") + content_lines.append("[bold]Code Snippet:[/bold]") content_lines.append(f"[dim]{code_snippet}[/dim]") content = "\n".join(content_lines) @@ -270,7 +270,7 @@ def display_finding_detail(finding: Dict[str, Any], tool: Dict[str, Any], run_id console.print() console.print(Panel( content, - title=f"๐Ÿ” Finding Detail", + title="๐Ÿ” Finding Detail", border_style=severity_color, box=box.ROUNDED, padding=(1, 2) diff --git a/cli/src/fuzzforge_cli/commands/init.py b/cli/src/fuzzforge_cli/commands/init.py index 9aa4ca7..ceb3586 100644 --- a/cli/src/fuzzforge_cli/commands/init.py +++ b/cli/src/fuzzforge_cli/commands/init.py @@ -187,19 +187,40 @@ def _ensure_env_file(fuzzforge_dir: Path, force: bool) -> None: console.print("๐Ÿง  Configuring AI environment...") console.print(" โ€ข Default LLM provider: openai") - console.print(" โ€ข Default LLM model: gpt-5-mini") + console.print(" โ€ข Default LLM model: litellm_proxy/gpt-5-mini") console.print(" โ€ข To customise provider/model later, edit .fuzzforge/.env") llm_provider = "openai" - llm_model = "gpt-5-mini" + llm_model = "litellm_proxy/gpt-5-mini" + + # Check for global virtual keys from volumes/env/.env + global_env_key = None + for parent in fuzzforge_dir.parents: + global_env = parent / "volumes" / "env" / ".env" + if global_env.exists(): + try: + for line in global_env.read_text(encoding="utf-8").splitlines(): + if line.strip().startswith("OPENAI_API_KEY=") and "=" in line: + key_value = line.split("=", 1)[1].strip() + if key_value and not key_value.startswith("your-") and key_value.startswith("sk-"): + global_env_key = key_value + console.print(f" โ€ข Found virtual key in {global_env.relative_to(parent)}") + break + except Exception: + pass + break api_key = Prompt.ask( - "OpenAI API key (leave blank to fill manually)", + "OpenAI API key (leave blank to use global virtual key)" if global_env_key else "OpenAI API key (leave blank to fill manually)", default="", show_default=False, console=console, ) + # Use global key if user didn't provide one + if not api_key and global_env_key: + api_key = global_env_key + session_db_path = fuzzforge_dir / "fuzzforge_sessions.db" session_db_rel = session_db_path.relative_to(fuzzforge_dir.parent) @@ -210,14 +231,20 @@ def _ensure_env_file(fuzzforge_dir: Path, force: bool) -> None: f"LLM_PROVIDER={llm_provider}", f"LLM_MODEL={llm_model}", f"LITELLM_MODEL={llm_model}", + "LLM_ENDPOINT=http://localhost:10999", + "LLM_API_KEY=", + "LLM_EMBEDDING_MODEL=litellm_proxy/text-embedding-3-large", + "LLM_EMBEDDING_ENDPOINT=http://localhost:10999", f"OPENAI_API_KEY={api_key}", "FUZZFORGE_MCP_URL=http://localhost:8010/mcp", "", "# Cognee configuration mirrors the primary LLM by default", f"LLM_COGNEE_PROVIDER={llm_provider}", f"LLM_COGNEE_MODEL={llm_model}", - f"LLM_COGNEE_API_KEY={api_key}", - "LLM_COGNEE_ENDPOINT=", + "LLM_COGNEE_ENDPOINT=http://localhost:10999", + "LLM_COGNEE_API_KEY=", + "LLM_COGNEE_EMBEDDING_MODEL=litellm_proxy/text-embedding-3-large", + "LLM_COGNEE_EMBEDDING_ENDPOINT=http://localhost:10999", "COGNEE_MCP_URL=", "", "# Session persistence options: inmemory | sqlite", @@ -239,6 +266,8 @@ def _ensure_env_file(fuzzforge_dir: Path, force: bool) -> None: for line in env_lines: if line.startswith("OPENAI_API_KEY="): template_lines.append("OPENAI_API_KEY=") + elif line.startswith("LLM_API_KEY="): + template_lines.append("LLM_API_KEY=") elif line.startswith("LLM_COGNEE_API_KEY="): template_lines.append("LLM_COGNEE_API_KEY=") else: diff --git a/cli/src/fuzzforge_cli/commands/worker.py b/cli/src/fuzzforge_cli/commands/worker.py new file mode 100644 index 0000000..06b8b03 --- /dev/null +++ b/cli/src/fuzzforge_cli/commands/worker.py @@ -0,0 +1,225 @@ +""" +Worker management commands for FuzzForge CLI. + +Provides commands to start, stop, and list Temporal workers. +""" +# Copyright (c) 2025 FuzzingLabs +# +# Licensed under the Business Source License 1.1 (BSL). See the LICENSE file +# at the root of this repository for details. +# +# After the Change Date (four years from publication), this version of the +# Licensed Work will be made available under the Apache License, Version 2.0. +# See the LICENSE-APACHE file or http://www.apache.org/licenses/LICENSE-2.0 +# +# Additional attribution and requirements are provided in the NOTICE file. + +import subprocess +import sys +import typer +from pathlib import Path +from rich.console import Console +from rich.table import Table +from typing import Optional + +from ..worker_manager import WorkerManager + +console = Console() +app = typer.Typer( + name="worker", + help="๐Ÿ”ง Manage Temporal workers", + no_args_is_help=True, +) + + +@app.command("stop") +def stop_workers( + all: bool = typer.Option( + False, "--all", + help="Stop all workers (default behavior, flag for clarity)" + ) +): + """ + ๐Ÿ›‘ Stop all running FuzzForge workers. + + This command stops all worker containers using the proper Docker Compose + profile flag to ensure workers are actually stopped (since they're in profiles). + + Examples: + $ ff worker stop + $ ff worker stop --all + """ + try: + worker_mgr = WorkerManager() + success = worker_mgr.stop_all_workers() + + if success: + sys.exit(0) + else: + console.print("โš ๏ธ Some workers may not have stopped properly", style="yellow") + sys.exit(1) + + except Exception as e: + console.print(f"โŒ Error: {e}", style="red") + sys.exit(1) + + +@app.command("list") +def list_workers( + all: bool = typer.Option( + False, "--all", "-a", + help="Show all workers (including stopped)" + ) +): + """ + ๐Ÿ“‹ List FuzzForge workers and their status. + + By default, shows only running workers. Use --all to see all workers. + + Examples: + $ ff worker list + $ ff worker list --all + """ + try: + # Get list of running workers + result = subprocess.run( + ["docker", "ps", "--filter", "name=fuzzforge-worker-", + "--format", "{{.Names}}\t{{.Status}}\t{{.RunningFor}}"], + capture_output=True, + text=True, + check=False + ) + + running_workers = [] + if result.stdout.strip(): + for line in result.stdout.strip().splitlines(): + parts = line.split('\t') + if len(parts) >= 3: + running_workers.append({ + "name": parts[0].replace("fuzzforge-worker-", ""), + "status": "Running", + "uptime": parts[2] + }) + + # If --all, also get stopped workers + stopped_workers = [] + if all: + result_all = subprocess.run( + ["docker", "ps", "-a", "--filter", "name=fuzzforge-worker-", + "--format", "{{.Names}}\t{{.Status}}"], + capture_output=True, + text=True, + check=False + ) + + all_worker_names = set() + for line in result_all.stdout.strip().splitlines(): + parts = line.split('\t') + if len(parts) >= 2: + worker_name = parts[0].replace("fuzzforge-worker-", "") + all_worker_names.add(worker_name) + # If not running, it's stopped + if not any(w["name"] == worker_name for w in running_workers): + stopped_workers.append({ + "name": worker_name, + "status": "Stopped", + "uptime": "-" + }) + + # Display results + if not running_workers and not stopped_workers: + console.print("โ„น๏ธ No workers found", style="cyan") + console.print("\n๐Ÿ’ก Start a worker with: [cyan]docker compose up -d worker-[/cyan]") + console.print(" Or run a workflow, which auto-starts workers: [cyan]ff workflow run [/cyan]") + return + + # Create table + table = Table(title="FuzzForge Workers", show_header=True, header_style="bold cyan") + table.add_column("Worker", style="cyan", no_wrap=True) + table.add_column("Status", style="green") + table.add_column("Uptime", style="dim") + + # Add running workers + for worker in running_workers: + table.add_row( + worker["name"], + f"[green]โ—[/green] {worker['status']}", + worker["uptime"] + ) + + # Add stopped workers if --all + for worker in stopped_workers: + table.add_row( + worker["name"], + f"[red]โ—[/red] {worker['status']}", + worker["uptime"] + ) + + console.print(table) + + # Summary + if running_workers: + console.print(f"\nโœ… {len(running_workers)} worker(s) running") + if stopped_workers: + console.print(f"โน๏ธ {len(stopped_workers)} worker(s) stopped") + + except Exception as e: + console.print(f"โŒ Error listing workers: {e}", style="red") + sys.exit(1) + + +@app.command("start") +def start_worker( + name: str = typer.Argument( + ..., + help="Worker name (e.g., 'python', 'android', 'secrets')" + ), + build: bool = typer.Option( + False, "--build", + help="Rebuild worker image before starting" + ) +): + """ + ๐Ÿš€ Start a specific worker. + + The worker name should be the vertical name (e.g., 'python', 'android', 'rust'). + + Examples: + $ ff worker start python + $ ff worker start android --build + """ + try: + service_name = f"worker-{name}" + + console.print(f"๐Ÿš€ Starting worker: [cyan]{service_name}[/cyan]") + + # Build docker compose command + cmd = ["docker", "compose", "up", "-d"] + if build: + cmd.append("--build") + cmd.append(service_name) + + result = subprocess.run( + cmd, + capture_output=True, + text=True, + check=False + ) + + if result.returncode == 0: + console.print(f"โœ… Worker [cyan]{service_name}[/cyan] started successfully") + else: + console.print(f"โŒ Failed to start worker: {result.stderr}", style="red") + console.print( + f"\n๐Ÿ’ก Try manually: [yellow]docker compose up -d {service_name}[/yellow]", + style="dim" + ) + sys.exit(1) + + except Exception as e: + console.print(f"โŒ Error: {e}", style="red") + sys.exit(1) + + +if __name__ == "__main__": + app() diff --git a/cli/src/fuzzforge_cli/commands/workflow_exec.py b/cli/src/fuzzforge_cli/commands/workflow_exec.py index d1633e8..bfa7f12 100644 --- a/cli/src/fuzzforge_cli/commands/workflow_exec.py +++ b/cli/src/fuzzforge_cli/commands/workflow_exec.py @@ -39,7 +39,7 @@ from ..validation import ( ) from ..progress import step_progress from ..constants import ( - STATUS_EMOJIS, MAX_RUN_ID_DISPLAY_LENGTH, DEFAULT_VOLUME_MODE, + STATUS_EMOJIS, MAX_RUN_ID_DISPLAY_LENGTH, PROGRESS_STEP_DELAYS, MAX_RETRIES, RETRY_DELAY, POLL_INTERVAL ) from ..worker_manager import WorkerManager @@ -112,7 +112,6 @@ def execute_workflow_submission( workflow: str, target_path: str, parameters: Dict[str, Any], - volume_mode: str, timeout: Optional[int], interactive: bool ) -> Any: @@ -160,13 +159,10 @@ def execute_workflow_submission( except ValueError as e: console.print(f"โŒ Invalid {param_type}: {e}", style="red") - # Note: volume_mode is no longer used (Temporal uses MinIO storage) - # Show submission summary console.print("\n๐ŸŽฏ [bold]Executing workflow:[/bold]") console.print(f" Workflow: {workflow}") console.print(f" Target: {target_path}") - console.print(f" Volume Mode: {volume_mode}") if parameters: console.print(f" Parameters: {len(parameters)} provided") if timeout: @@ -252,8 +248,6 @@ def execute_workflow_submission( progress.next_step() # Submitting submission = WorkflowSubmission( - target_path=target_path, - volume_mode=volume_mode, parameters=parameters, timeout=timeout ) @@ -281,10 +275,6 @@ def execute_workflow( None, "--param-file", "-f", help="JSON file containing workflow parameters" ), - volume_mode: str = typer.Option( - DEFAULT_VOLUME_MODE, "--volume-mode", "-v", - help="Volume mount mode: ro (read-only) or rw (read-write)" - ), timeout: Optional[int] = typer.Option( None, "--timeout", "-t", help="Execution timeout in seconds" @@ -410,7 +400,7 @@ def execute_workflow( response = execute_workflow_submission( client, workflow, target_path, parameters, - volume_mode, timeout, interactive + timeout, interactive ) console.print("โœ… Workflow execution started!", style="green") @@ -453,9 +443,9 @@ def execute_workflow( console.print("Press Ctrl+C to stop monitoring (execution continues in background).\n") try: - from ..commands.monitor import live_monitor - # Import monitor command and run it - live_monitor(response.run_id, refresh=3) + from ..commands.monitor import _live_monitor + # Call helper function directly with proper parameters + _live_monitor(response.run_id, refresh=3, once=False, style="inline") except KeyboardInterrupt: console.print("\nโน๏ธ Live monitoring stopped (execution continues in background)", style="yellow") except Exception as e: diff --git a/cli/src/fuzzforge_cli/completion.py b/cli/src/fuzzforge_cli/completion.py index bd717cd..7bd7c5b 100644 --- a/cli/src/fuzzforge_cli/completion.py +++ b/cli/src/fuzzforge_cli/completion.py @@ -95,12 +95,6 @@ def complete_target_paths(incomplete: str) -> List[str]: return [] -def complete_volume_modes(incomplete: str) -> List[str]: - """Auto-complete volume mount modes.""" - modes = ["ro", "rw"] - return [mode for mode in modes if mode.startswith(incomplete)] - - def complete_export_formats(incomplete: str) -> List[str]: """Auto-complete export formats.""" formats = ["json", "csv", "html", "sarif"] @@ -139,7 +133,6 @@ def complete_config_keys(incomplete: str) -> List[str]: "api_url", "api_timeout", "default_workflow", - "default_volume_mode", "project_name", "data_retention_days", "auto_save_findings", @@ -164,11 +157,6 @@ TargetPathComplete = typer.Argument( help="Target path (tab completion available)" ) -VolumeModetComplete = typer.Option( - autocompletion=complete_volume_modes, - help="Volume mode: ro or rw (tab completion available)" -) - ExportFormatComplete = typer.Option( autocompletion=complete_export_formats, help="Export format (tab completion available)" diff --git a/cli/src/fuzzforge_cli/config.py b/cli/src/fuzzforge_cli/config.py index f21b87d..1a0ae28 100644 --- a/cli/src/fuzzforge_cli/config.py +++ b/cli/src/fuzzforge_cli/config.py @@ -28,6 +28,58 @@ try: # Optional dependency; fall back if not installed except ImportError: # pragma: no cover - optional dependency load_dotenv = None + +def _load_env_file_if_exists(path: Path, override: bool = False) -> bool: + if not path.exists(): + return False + # Always use manual parsing to handle empty values correctly + try: + for line in path.read_text(encoding="utf-8").splitlines(): + stripped = line.strip() + if not stripped or stripped.startswith("#") or "=" not in stripped: + continue + key, value = stripped.split("=", 1) + key = key.strip() + value = value.strip() + if override: + # Only override if value is non-empty + if value: + os.environ[key] = value + else: + # Set if not already in environment and value is non-empty + if key not in os.environ and value: + os.environ[key] = value + return True + except Exception: # pragma: no cover - best effort fallback + return False + + +def _find_shared_env_file(project_dir: Path) -> Path | None: + for directory in [project_dir] + list(project_dir.parents): + candidate = directory / "volumes" / "env" / ".env" + if candidate.exists(): + return candidate + return None + + +def load_project_env(project_dir: Optional[Path] = None) -> Path | None: + """Load project-local env, falling back to shared volumes/env/.env.""" + + project_dir = Path(project_dir or Path.cwd()) + shared_env = _find_shared_env_file(project_dir) + loaded_shared = False + if shared_env: + loaded_shared = _load_env_file_if_exists(shared_env, override=False) + + project_env = project_dir / ".fuzzforge" / ".env" + if _load_env_file_if_exists(project_env, override=True): + return project_env + + if loaded_shared: + return shared_env + + return None + import yaml from pydantic import BaseModel, Field @@ -312,23 +364,7 @@ class ProjectConfigManager: if not cognee.get("enabled", True): return - # Load project-specific environment overrides from .fuzzforge/.env if available - env_file = self.project_dir / ".fuzzforge" / ".env" - if env_file.exists(): - if load_dotenv: - load_dotenv(env_file, override=False) - else: - try: - for line in env_file.read_text(encoding="utf-8").splitlines(): - stripped = line.strip() - if not stripped or stripped.startswith("#"): - continue - if "=" not in stripped: - continue - key, value = stripped.split("=", 1) - os.environ.setdefault(key.strip(), value.strip()) - except Exception: # pragma: no cover - best effort fallback - pass + load_project_env(self.project_dir) backend_access = "true" if cognee.get("backend_access_control", True) else "false" os.environ["ENABLE_BACKEND_ACCESS_CONTROL"] = backend_access @@ -374,6 +410,17 @@ class ProjectConfigManager: "OPENAI_API_KEY", ) endpoint = _env("LLM_COGNEE_ENDPOINT", "COGNEE_LLM_ENDPOINT", "LLM_ENDPOINT") + embedding_model = _env( + "LLM_COGNEE_EMBEDDING_MODEL", + "COGNEE_LLM_EMBEDDING_MODEL", + "LLM_EMBEDDING_MODEL", + ) + embedding_endpoint = _env( + "LLM_COGNEE_EMBEDDING_ENDPOINT", + "COGNEE_LLM_EMBEDDING_ENDPOINT", + "LLM_EMBEDDING_ENDPOINT", + "LLM_ENDPOINT", + ) api_version = _env( "LLM_COGNEE_API_VERSION", "COGNEE_LLM_API_VERSION", @@ -398,6 +445,20 @@ class ProjectConfigManager: os.environ.setdefault("OPENAI_API_KEY", api_key) if endpoint: os.environ["LLM_ENDPOINT"] = endpoint + os.environ.setdefault("LLM_API_BASE", endpoint) + os.environ.setdefault("LLM_EMBEDDING_ENDPOINT", endpoint) + os.environ.setdefault("LLM_EMBEDDING_API_BASE", endpoint) + os.environ.setdefault("OPENAI_API_BASE", endpoint) + # Set LiteLLM proxy environment variables for SDK usage + os.environ.setdefault("LITELLM_PROXY_API_BASE", endpoint) + if api_key: + # Set LiteLLM proxy API key from the virtual key + os.environ.setdefault("LITELLM_PROXY_API_KEY", api_key) + if embedding_model: + os.environ["LLM_EMBEDDING_MODEL"] = embedding_model + if embedding_endpoint: + os.environ["LLM_EMBEDDING_ENDPOINT"] = embedding_endpoint + os.environ.setdefault("LLM_EMBEDDING_API_BASE", embedding_endpoint) if api_version: os.environ["LLM_API_VERSION"] = api_version if max_tokens: diff --git a/cli/src/fuzzforge_cli/constants.py b/cli/src/fuzzforge_cli/constants.py index 231f5b7..493dfb0 100644 --- a/cli/src/fuzzforge_cli/constants.py +++ b/cli/src/fuzzforge_cli/constants.py @@ -57,10 +57,6 @@ SEVERITY_STYLES = { "info": "bold cyan" } -# Default volume modes -DEFAULT_VOLUME_MODE = "ro" -SUPPORTED_VOLUME_MODES = ["ro", "rw"] - # Default export formats DEFAULT_EXPORT_FORMAT = "sarif" SUPPORTED_EXPORT_FORMATS = ["sarif", "json", "csv"] diff --git a/cli/src/fuzzforge_cli/fuzzy.py b/cli/src/fuzzforge_cli/fuzzy.py index 48f16a5..4cec4de 100644 --- a/cli/src/fuzzforge_cli/fuzzy.py +++ b/cli/src/fuzzforge_cli/fuzzy.py @@ -52,7 +52,6 @@ class FuzzyMatcher: # Common parameter names self.parameter_names = [ "target_path", - "volume_mode", "timeout", "workflow", "param", @@ -70,7 +69,6 @@ class FuzzyMatcher: # Common values self.common_values = { - "volume_mode": ["ro", "rw"], "format": ["json", "csv", "html", "sarif"], "severity": ["critical", "high", "medium", "low", "info"], } diff --git a/cli/src/fuzzforge_cli/main.py b/cli/src/fuzzforge_cli/main.py index 24baa9c..f869c8c 100644 --- a/cli/src/fuzzforge_cli/main.py +++ b/cli/src/fuzzforge_cli/main.py @@ -19,6 +19,8 @@ from rich.traceback import install from typing import Optional, List import sys +from .config import load_project_env + from .commands import ( workflows, workflow_exec, @@ -27,13 +29,16 @@ from .commands import ( config as config_cmd, ai, ingest, + worker, ) -from .constants import DEFAULT_VOLUME_MODE from .fuzzy import enhanced_command_not_found_handler # Install rich traceback handler install(show_locals=True) +# Ensure environment variables are available before command execution +load_project_env() + # Create console for rich output console = Console() @@ -184,10 +189,6 @@ def run_workflow( None, "--param-file", "-f", help="JSON file containing workflow parameters" ), - volume_mode: str = typer.Option( - DEFAULT_VOLUME_MODE, "--volume-mode", "-v", - help="Volume mount mode: ro (read-only) or rw (read-write)" - ), timeout: Optional[int] = typer.Option( None, "--timeout", "-t", help="Execution timeout in seconds" @@ -234,7 +235,6 @@ def run_workflow( target_path=target, params=params, param_file=param_file, - volume_mode=volume_mode, timeout=timeout, interactive=interactive, wait=wait, @@ -335,6 +335,7 @@ app.add_typer(finding_app, name="finding", help="๐Ÿ” View and analyze findings" app.add_typer(monitor.app, name="monitor", help="๐Ÿ“Š Real-time monitoring") app.add_typer(ai.app, name="ai", help="๐Ÿค– AI integration features") app.add_typer(ingest.app, name="ingest", help="๐Ÿง  Ingest knowledge into AI") +app.add_typer(worker.app, name="worker", help="๐Ÿ”ง Manage Temporal workers") # Help and utility commands @app.command() @@ -410,7 +411,7 @@ def main(): 'init', 'status', 'config', 'clean', 'workflows', 'workflow', 'findings', 'finding', - 'monitor', 'ai', 'ingest', + 'monitor', 'ai', 'ingest', 'worker', 'version' ] diff --git a/cli/src/fuzzforge_cli/validation.py b/cli/src/fuzzforge_cli/validation.py index 1f524f6..b8fdfb7 100644 --- a/cli/src/fuzzforge_cli/validation.py +++ b/cli/src/fuzzforge_cli/validation.py @@ -17,7 +17,7 @@ import re from pathlib import Path from typing import Any, Dict, List, Optional -from .constants import SUPPORTED_VOLUME_MODES, SUPPORTED_EXPORT_FORMATS +from .constants import SUPPORTED_EXPORT_FORMATS from .exceptions import ValidationError @@ -65,15 +65,6 @@ def validate_target_path(target_path: str, must_exist: bool = True) -> Path: return path -def validate_volume_mode(volume_mode: str) -> None: - """Validate volume mode""" - if volume_mode not in SUPPORTED_VOLUME_MODES: - raise ValidationError( - "volume_mode", volume_mode, - f"one of: {', '.join(SUPPORTED_VOLUME_MODES)}" - ) - - def validate_export_format(export_format: str) -> None: """Validate export format""" if export_format not in SUPPORTED_EXPORT_FORMATS: diff --git a/cli/src/fuzzforge_cli/worker_manager.py b/cli/src/fuzzforge_cli/worker_manager.py index b6102e0..a9b3eaf 100644 --- a/cli/src/fuzzforge_cli/worker_manager.py +++ b/cli/src/fuzzforge_cli/worker_manager.py @@ -15,12 +15,17 @@ Manages on-demand startup and shutdown of Temporal workers using Docker Compose. # Additional attribution and requirements are provided in the NOTICE file. import logging +import os +import platform import subprocess import time from pathlib import Path from typing import Optional, Dict, Any +import requests +import yaml from rich.console import Console +from rich.status import Status logger = logging.getLogger(__name__) console = Console() @@ -57,27 +62,206 @@ class WorkerManager: def _find_compose_file(self) -> Path: """ - Auto-detect docker-compose.yml location. + Auto-detect docker-compose.yml location using multiple strategies. - Searches upward from current directory to find the compose file. + Strategies (in order): + 1. Query backend API for host path + 2. Search upward for .fuzzforge marker directory + 3. Use FUZZFORGE_ROOT environment variable + 4. Fallback to current directory + + Returns: + Path to docker-compose.yml + + Raises: + FileNotFoundError: If docker-compose.yml cannot be located """ - current = Path.cwd() + # Strategy 1: Ask backend for location + try: + backend_url = os.getenv("FUZZFORGE_API_URL", "http://localhost:8000") + response = requests.get(f"{backend_url}/system/info", timeout=2) + if response.ok: + info = response.json() + if compose_path_str := info.get("docker_compose_path"): + compose_path = Path(compose_path_str) + if compose_path.exists(): + logger.debug(f"Found docker-compose.yml via backend API: {compose_path}") + return compose_path + except Exception as e: + logger.debug(f"Backend API not reachable for path lookup: {e}") - # Try current directory and parents + # Strategy 2: Search upward for .fuzzforge marker directory + current = Path.cwd() for parent in [current] + list(current.parents): - compose_path = parent / "docker-compose.yml" + if (parent / ".fuzzforge").exists(): + compose_path = parent / "docker-compose.yml" + if compose_path.exists(): + logger.debug(f"Found docker-compose.yml via .fuzzforge marker: {compose_path}") + return compose_path + + # Strategy 3: Environment variable + if fuzzforge_root := os.getenv("FUZZFORGE_ROOT"): + compose_path = Path(fuzzforge_root) / "docker-compose.yml" if compose_path.exists(): + logger.debug(f"Found docker-compose.yml via FUZZFORGE_ROOT: {compose_path}") return compose_path - # Fallback to default location - return Path("docker-compose.yml") + # Strategy 4: Fallback to current directory + compose_path = Path("docker-compose.yml") + if compose_path.exists(): + return compose_path - def _run_docker_compose(self, *args: str) -> subprocess.CompletedProcess: + raise FileNotFoundError( + "Cannot find docker-compose.yml. Ensure backend is running, " + "run from FuzzForge directory, or set FUZZFORGE_ROOT environment variable." + ) + + def _get_workers_dir(self) -> Path: """ - Run docker-compose command. + Get the workers directory path. + + Uses same strategy as _find_compose_file(): + 1. Query backend API + 2. Derive from compose_file location + 3. Use FUZZFORGE_ROOT + + Returns: + Path to workers directory + """ + # Strategy 1: Ask backend + try: + backend_url = os.getenv("FUZZFORGE_API_URL", "http://localhost:8000") + response = requests.get(f"{backend_url}/system/info", timeout=2) + if response.ok: + info = response.json() + if workers_dir_str := info.get("workers_dir"): + workers_dir = Path(workers_dir_str) + if workers_dir.exists(): + return workers_dir + except Exception: + pass + + # Strategy 2: Derive from compose file location + if self.compose_file.exists(): + workers_dir = self.compose_file.parent / "workers" + if workers_dir.exists(): + return workers_dir + + # Strategy 3: Use environment variable + if fuzzforge_root := os.getenv("FUZZFORGE_ROOT"): + workers_dir = Path(fuzzforge_root) / "workers" + if workers_dir.exists(): + return workers_dir + + # Fallback + return Path("workers") + + def _detect_platform(self) -> str: + """ + Detect the current platform. + + Returns: + Platform string: "linux/amd64" or "linux/arm64" + """ + machine = platform.machine().lower() + system = platform.system().lower() + + logger.debug(f"Platform detection: machine={machine}, system={system}") + + # Normalize machine architecture + if machine in ["x86_64", "amd64", "x64"]: + detected = "linux/amd64" + elif machine in ["arm64", "aarch64", "armv8", "arm64v8"]: + detected = "linux/arm64" + else: + # Fallback to amd64 for unknown architectures + logger.warning( + f"Unknown architecture '{machine}' detected, falling back to linux/amd64. " + f"Please report this issue if you're experiencing problems." + ) + detected = "linux/amd64" + + logger.info(f"Detected platform: {detected}") + return detected + + def _read_worker_metadata(self, vertical: str) -> dict: + """ + Read worker metadata.yaml for a vertical. Args: - *args: Arguments to pass to docker-compose + vertical: Worker vertical name (e.g., "android", "python") + + Returns: + Dictionary containing metadata, or empty dict if not found + """ + try: + workers_dir = self._get_workers_dir() + metadata_file = workers_dir / vertical / "metadata.yaml" + + if not metadata_file.exists(): + logger.debug(f"No metadata.yaml found for {vertical}") + return {} + + with open(metadata_file, 'r') as f: + return yaml.safe_load(f) or {} + except Exception as e: + logger.debug(f"Failed to read metadata for {vertical}: {e}") + return {} + + def _select_dockerfile(self, vertical: str) -> str: + """ + Select the appropriate Dockerfile for the current platform. + + Args: + vertical: Worker vertical name + + Returns: + Dockerfile name (e.g., "Dockerfile.amd64", "Dockerfile.arm64") + """ + detected_platform = self._detect_platform() + metadata = self._read_worker_metadata(vertical) + + if not metadata: + # No metadata: use default Dockerfile + logger.debug(f"No metadata for {vertical}, using Dockerfile") + return "Dockerfile" + + platforms = metadata.get("platforms", {}) + + if not platforms: + # Metadata exists but no platform definitions + logger.debug(f"No platform definitions in metadata for {vertical}, using Dockerfile") + return "Dockerfile" + + # Try detected platform first + if detected_platform in platforms: + dockerfile = platforms[detected_platform].get("dockerfile", "Dockerfile") + logger.info(f"โœ“ Selected {dockerfile} for {vertical} on {detected_platform}") + return dockerfile + + # Fallback to default platform + default_platform = metadata.get("default_platform", "linux/amd64") + logger.warning( + f"Platform {detected_platform} not found in metadata for {vertical}, " + f"falling back to default: {default_platform}" + ) + + if default_platform in platforms: + dockerfile = platforms[default_platform].get("dockerfile", "Dockerfile.amd64") + logger.info(f"Using default platform {default_platform}: {dockerfile}") + return dockerfile + + # Last resort: just use Dockerfile + logger.warning(f"No suitable Dockerfile found for {vertical}, using 'Dockerfile'") + return "Dockerfile" + + def _run_docker_compose(self, *args: str, env: Optional[Dict[str, str]] = None) -> subprocess.CompletedProcess: + """ + Run docker compose command with optional environment variables. + + Args: + *args: Arguments to pass to docker compose + env: Optional environment variables to set Returns: CompletedProcess with result @@ -85,14 +269,21 @@ class WorkerManager: Raises: subprocess.CalledProcessError: If command fails """ - cmd = ["docker-compose", "-f", str(self.compose_file)] + list(args) + cmd = ["docker", "compose", "-f", str(self.compose_file)] + list(args) logger.debug(f"Running: {' '.join(cmd)}") + # Merge with current environment + full_env = os.environ.copy() + if env: + full_env.update(env) + logger.debug(f"Environment overrides: {env}") + return subprocess.run( cmd, capture_output=True, text=True, - check=True + check=True, + env=full_env ) def _service_to_container_name(self, service_name: str) -> str: @@ -135,21 +326,35 @@ class WorkerManager: def start_worker(self, service_name: str) -> bool: """ - Start a worker service using docker-compose. + Start a worker service using docker-compose with platform-specific Dockerfile. Args: - service_name: Name of the Docker Compose service to start (e.g., "worker-python") + service_name: Name of the Docker Compose service to start (e.g., "worker-android") Returns: True if started successfully, False otherwise """ try: - console.print(f"๐Ÿš€ Starting worker: {service_name}") + # Extract vertical name from service name + vertical = service_name.replace("worker-", "") - # Use docker-compose up to create and start the service - result = self._run_docker_compose("up", "-d", service_name) + # Detect platform and select appropriate Dockerfile + detected_platform = self._detect_platform() + dockerfile = self._select_dockerfile(vertical) - logger.info(f"Worker {service_name} started") + # Set environment variable for docker-compose + env_var_name = f"{vertical.upper()}_DOCKERFILE" + env = {env_var_name: dockerfile} + + console.print( + f"๐Ÿš€ Starting worker: {service_name} " + f"(platform: {detected_platform}, using {dockerfile})" + ) + + # Use docker-compose up with --build to ensure correct Dockerfile is used + result = self._run_docker_compose("up", "-d", "--build", service_name, env=env) + + logger.info(f"Worker {service_name} started with {dockerfile}") return True except subprocess.CalledProcessError as e: @@ -163,9 +368,67 @@ class WorkerManager: console.print(f"โŒ Unexpected error: {e}", style="red") return False + def _get_container_state(self, service_name: str) -> str: + """ + Get the current state of a container (running, created, restarting, etc.). + + Args: + service_name: Name of the Docker Compose service + + Returns: + Container state string (running, created, restarting, exited, etc.) or "unknown" + """ + try: + container_name = self._service_to_container_name(service_name) + result = subprocess.run( + ["docker", "inspect", "-f", "{{.State.Status}}", container_name], + capture_output=True, + text=True, + check=False + ) + if result.returncode == 0: + return result.stdout.strip() + return "unknown" + except Exception as e: + logger.debug(f"Failed to get container state: {e}") + return "unknown" + + def _get_health_status(self, container_name: str) -> str: + """ + Get container health status. + + Args: + container_name: Docker container name + + Returns: + Health status: "healthy", "unhealthy", "starting", "none", or "unknown" + """ + try: + result = subprocess.run( + ["docker", "inspect", "-f", "{{.State.Health.Status}}", container_name], + capture_output=True, + text=True, + check=False + ) + + if result.returncode != 0: + return "unknown" + + health_status = result.stdout.strip() + + if health_status == "" or health_status == "": + return "none" # No health check defined + + return health_status # healthy, unhealthy, starting + + except Exception as e: + logger.debug(f"Failed to check health: {e}") + return "unknown" + def wait_for_worker_ready(self, service_name: str, timeout: Optional[int] = None) -> bool: """ Wait for a worker to be healthy and ready to process tasks. + Shows live progress updates during startup. Args: service_name: Name of the Docker Compose service @@ -173,56 +436,74 @@ class WorkerManager: Returns: True if worker is ready, False if timeout reached - - Raises: - TimeoutError: If worker doesn't become ready within timeout """ timeout = timeout or self.startup_timeout start_time = time.time() container_name = self._service_to_container_name(service_name) + last_status_msg = "" - console.print("โณ Waiting for worker to be ready...") + with Status("[bold cyan]Starting worker...", console=console, spinner="dots") as status: + while time.time() - start_time < timeout: + elapsed = int(time.time() - start_time) + + # Get container state + container_state = self._get_container_state(service_name) + + # Get health status + health_status = self._get_health_status(container_name) + + # Build status message based on current state + if container_state == "created": + status_msg = f"[cyan]Worker starting... ({elapsed}s)[/cyan]" + elif container_state == "restarting": + status_msg = f"[yellow]Worker restarting... ({elapsed}s)[/yellow]" + elif container_state == "running": + if health_status == "starting": + status_msg = f"[cyan]Worker running, health check starting... ({elapsed}s)[/cyan]" + elif health_status == "unhealthy": + status_msg = f"[yellow]Worker running, health check: unhealthy ({elapsed}s)[/yellow]" + elif health_status == "healthy": + status_msg = f"[green]Worker healthy! ({elapsed}s)[/green]" + status.update(status_msg) + console.print(f"โœ… Worker ready: {service_name} (took {elapsed}s)") + logger.info(f"Worker {service_name} is healthy (took {elapsed}s)") + return True + elif health_status == "none": + # No health check defined, assume ready + status_msg = f"[green]Worker running (no health check) ({elapsed}s)[/green]" + status.update(status_msg) + console.print(f"โœ… Worker ready: {service_name} (took {elapsed}s)") + logger.info(f"Worker {service_name} is running, no health check (took {elapsed}s)") + return True + else: + status_msg = f"[cyan]Worker running ({elapsed}s)[/cyan]" + elif not container_state or container_state == "exited": + status_msg = f"[yellow]Waiting for container to start... ({elapsed}s)[/yellow]" + else: + status_msg = f"[cyan]Worker state: {container_state} ({elapsed}s)[/cyan]" + + # Show helpful hints at certain intervals + if elapsed == 10: + status_msg += " [dim](pulling image if not cached)[/dim]" + elif elapsed == 30: + status_msg += " [dim](large images can take time)[/dim]" + elif elapsed == 60: + status_msg += " [dim](still working...)[/dim]" + + # Update status if changed + if status_msg != last_status_msg: + status.update(status_msg) + last_status_msg = status_msg + logger.debug(f"Worker {service_name} - state: {container_state}, health: {health_status}") - while time.time() - start_time < timeout: - # Check if container is running - if not self.is_worker_running(service_name): - logger.debug(f"Worker {service_name} not running yet") time.sleep(self.health_check_interval) - continue - # Check container health status - try: - result = subprocess.run( - ["docker", "inspect", "-f", "{{.State.Health.Status}}", container_name], - capture_output=True, - text=True, - check=False - ) - - health_status = result.stdout.strip() - - # If no health check is defined, assume healthy after running - if health_status == "" or health_status == "": - logger.info(f"Worker {service_name} is running (no health check)") - console.print(f"โœ… Worker ready: {service_name}") - return True - - if health_status == "healthy": - logger.info(f"Worker {service_name} is healthy") - console.print(f"โœ… Worker ready: {service_name}") - return True - - logger.debug(f"Worker {service_name} health: {health_status}") - - except Exception as e: - logger.debug(f"Failed to check health: {e}") - - time.sleep(self.health_check_interval) - - elapsed = time.time() - start_time - logger.warning(f"Worker {service_name} did not become ready within {elapsed:.1f}s") - console.print(f"โš ๏ธ Worker startup timeout after {elapsed:.1f}s", style="yellow") - return False + # Timeout reached + elapsed = int(time.time() - start_time) + logger.warning(f"Worker {service_name} did not become ready within {elapsed}s") + console.print(f"โš ๏ธ Worker startup timeout after {elapsed}s", style="yellow") + console.print(f" Last state: {container_state}, health: {health_status}", style="dim") + return False def stop_worker(self, service_name: str) -> bool: """ @@ -253,6 +534,75 @@ class WorkerManager: console.print(f"โŒ Unexpected error: {e}", style="red") return False + def stop_all_workers(self) -> bool: + """ + Stop all running FuzzForge worker containers. + + This uses `docker stop` to stop worker containers individually, + avoiding the Docker Compose profile issue and preventing accidental + shutdown of core services. + + Returns: + True if all workers stopped successfully, False otherwise + """ + try: + console.print("๐Ÿ›‘ Stopping all FuzzForge workers...") + + # Get list of all running worker containers + result = subprocess.run( + ["docker", "ps", "--filter", "name=fuzzforge-worker-", "--format", "{{.Names}}"], + capture_output=True, + text=True, + check=False + ) + + running_workers = [name.strip() for name in result.stdout.splitlines() if name.strip()] + + if not running_workers: + console.print("โœ“ No workers running") + return True + + console.print(f"Found {len(running_workers)} running worker(s):") + for worker in running_workers: + console.print(f" - {worker}") + + # Stop each worker container individually using docker stop + # This is safer than docker compose down and won't affect core services + failed_workers = [] + for worker in running_workers: + try: + logger.info(f"Stopping {worker}...") + result = subprocess.run( + ["docker", "stop", worker], + capture_output=True, + text=True, + check=True, + timeout=30 + ) + console.print(f" โœ“ Stopped {worker}") + except subprocess.CalledProcessError as e: + logger.error(f"Failed to stop {worker}: {e.stderr}") + failed_workers.append(worker) + console.print(f" โœ— Failed to stop {worker}", style="red") + except subprocess.TimeoutExpired: + logger.error(f"Timeout stopping {worker}") + failed_workers.append(worker) + console.print(f" โœ— Timeout stopping {worker}", style="red") + + if failed_workers: + console.print(f"\nโš ๏ธ {len(failed_workers)} worker(s) failed to stop", style="yellow") + console.print("๐Ÿ’ก Try manually: docker stop " + " ".join(failed_workers), style="dim") + return False + + console.print("\nโœ… All workers stopped") + logger.info("All workers stopped successfully") + return True + + except Exception as e: + logger.error(f"Unexpected error stopping workers: {e}") + console.print(f"โŒ Unexpected error: {e}", style="red") + return False + def ensure_worker_running( self, worker_info: Dict[str, Any], diff --git a/docker-compose.yml b/docker-compose.yml index 271f7e6..aae0fb5 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -144,6 +144,103 @@ services: networks: - fuzzforge-network + # ============================================================================ + # LLM Proxy - LiteLLM Gateway + # ============================================================================ + llm-proxy: + image: ghcr.io/berriai/litellm:main-stable + container_name: fuzzforge-llm-proxy + depends_on: + llm-proxy-db: + condition: service_healthy + otel-collector: + condition: service_started + env_file: + - ./volumes/env/.env + environment: + PORT: 4000 + DATABASE_URL: postgresql://litellm:litellm@llm-proxy-db:5432/litellm + STORE_MODEL_IN_DB: "True" + UI_USERNAME: ${UI_USERNAME:-fuzzforge} + UI_PASSWORD: ${UI_PASSWORD:-fuzzforge123} + OTEL_EXPORTER_OTLP_ENDPOINT: http://otel-collector:4317 + OTEL_EXPORTER_OTLP_PROTOCOL: grpc + ANTHROPIC_API_KEY: ${LITELLM_ANTHROPIC_API_KEY:-} + OPENAI_API_KEY: ${LITELLM_OPENAI_API_KEY:-} + command: + - "--config" + - "/etc/litellm/proxy_config.yaml" + ports: + - "10999:4000" # Web UI + OpenAI-compatible API + volumes: + - litellm_proxy_data:/var/lib/litellm + - ./volumes/litellm/proxy_config.yaml:/etc/litellm/proxy_config.yaml:ro + networks: + - fuzzforge-network + healthcheck: + test: ["CMD-SHELL", "wget --no-verbose --tries=1 http://localhost:4000/health/liveliness || exit 1"] + interval: 30s + timeout: 10s + retries: 3 + start_period: 40s + restart: unless-stopped + + otel-collector: + image: otel/opentelemetry-collector:latest + container_name: fuzzforge-otel-collector + command: ["--config=/etc/otel-collector/config.yaml"] + volumes: + - ./volumes/otel/collector-config.yaml:/etc/otel-collector/config.yaml:ro + ports: + - "4317:4317" + - "4318:4318" + networks: + - fuzzforge-network + restart: unless-stopped + + llm-proxy-db: + image: postgres:16 + container_name: fuzzforge-llm-proxy-db + environment: + POSTGRES_DB: litellm + POSTGRES_USER: litellm + POSTGRES_PASSWORD: litellm + healthcheck: + test: ["CMD-SHELL", "pg_isready -d litellm -U litellm"] + interval: 5s + timeout: 5s + retries: 12 + volumes: + - litellm_proxy_db:/var/lib/postgresql/data + networks: + - fuzzforge-network + restart: unless-stopped + + # ============================================================================ + # LLM Proxy Bootstrap - Seed providers and virtual keys + # ============================================================================ + llm-proxy-bootstrap: + image: python:3.11-slim + container_name: fuzzforge-llm-proxy-bootstrap + depends_on: + llm-proxy: + condition: service_started + env_file: + - ./volumes/env/.env + environment: + PROXY_BASE_URL: http://llm-proxy:4000 + ENV_FILE_PATH: /bootstrap/env/.env + UI_USERNAME: ${UI_USERNAME:-fuzzforge} + UI_PASSWORD: ${UI_PASSWORD:-fuzzforge123} + volumes: + - ./docker/scripts/bootstrap_llm_proxy.py:/app/bootstrap.py:ro + - ./volumes/env:/bootstrap/env + - litellm_proxy_data:/bootstrap/data + networks: + - fuzzforge-network + command: ["python", "/app/bootstrap.py"] + restart: "no" + # ============================================================================ # Vertical Worker: Rust/Native Security # ============================================================================ @@ -217,9 +314,6 @@ services: context: ./workers/python dockerfile: Dockerfile container_name: fuzzforge-worker-python - profiles: - - workers - - python depends_on: postgresql: condition: service_healthy @@ -345,7 +439,7 @@ services: worker-android: build: context: ./workers/android - dockerfile: Dockerfile + dockerfile: ${ANDROID_DOCKERFILE:-Dockerfile.amd64} container_name: fuzzforge-worker-android profiles: - workers @@ -433,6 +527,9 @@ services: PYTHONPATH: /app PYTHONUNBUFFERED: 1 + # Host filesystem paths (for CLI worker management) + FUZZFORGE_HOST_ROOT: ${PWD} + # Logging LOG_LEVEL: INFO ports: @@ -458,10 +555,11 @@ services: context: ./ai/agents/task_agent dockerfile: Dockerfile container_name: fuzzforge-task-agent + depends_on: + llm-proxy-bootstrap: + condition: service_completed_successfully ports: - "10900:8000" - env_file: - - ./volumes/env/.env environment: - PORT=8000 - PYTHONUNBUFFERED=1 @@ -558,6 +656,10 @@ volumes: name: fuzzforge_worker_ossfuzz_cache worker_ossfuzz_build: name: fuzzforge_worker_ossfuzz_build + litellm_proxy_data: + name: fuzzforge_litellm_proxy_data + litellm_proxy_db: + name: fuzzforge_litellm_proxy_db # Add more worker caches as you add verticals: # worker_web_cache: # worker_ios_cache: @@ -591,6 +693,7 @@ networks: # 4. Web UIs: # - Temporal UI: http://localhost:8233 # - MinIO Console: http://localhost:9001 (user: fuzzforge, pass: fuzzforge123) +# - LiteLLM Proxy: http://localhost:10999 # # 5. Resource Usage (Baseline): # - Temporal: ~500MB diff --git a/docker/scripts/bootstrap_llm_proxy.py b/docker/scripts/bootstrap_llm_proxy.py new file mode 100644 index 0000000..68f6745 --- /dev/null +++ b/docker/scripts/bootstrap_llm_proxy.py @@ -0,0 +1,636 @@ +"""Bootstrap the LiteLLM proxy with provider secrets and default virtual keys. + +The bootstrapper runs as a one-shot container during docker-compose startup. +It performs the following actions: + + 1. Waits for the proxy health endpoint to respond. + 2. Collects upstream provider API keys from the shared .env file (plus any + legacy copies) and mirrors them into a proxy-specific env file + (volumes/env/.env.litellm) so only the proxy container can access them. + 3. Emits a default virtual key for the task agent by calling /key/generate, + persisting the generated token back into volumes/env/.env so the agent can + authenticate through the proxy instead of using raw provider secrets. + 4. Keeps the process idempotent: existing keys are reused and their allowed + model list is refreshed instead of issuing duplicates on every run. +""" + +from __future__ import annotations + +import json +import os +import sys +import time +import urllib.error +import urllib.parse +import urllib.request +from dataclasses import dataclass +from pathlib import Path +from typing import Iterable, Mapping + +PROXY_BASE_URL = os.getenv("PROXY_BASE_URL", "http://llm-proxy:4000").rstrip("/") +ENV_FILE_PATH = Path(os.getenv("ENV_FILE_PATH", "/bootstrap/env/.env")) +LITELLM_ENV_FILE_PATH = Path( + os.getenv("LITELLM_ENV_FILE_PATH", "/bootstrap/env/.env.litellm") +) +LEGACY_ENV_FILE_PATH = Path( + os.getenv("LEGACY_ENV_FILE_PATH", "/bootstrap/env/.env.bifrost") +) +MAX_WAIT_SECONDS = int(os.getenv("LITELLM_PROXY_WAIT_SECONDS", "120")) + + +@dataclass(frozen=True) +class VirtualKeySpec: + """Configuration for a virtual key to be provisioned.""" + env_var: str + alias: str + user_id: str + budget_env_var: str + duration_env_var: str + default_budget: float + default_duration: str + + +# Multiple virtual keys for different services +VIRTUAL_KEYS: tuple[VirtualKeySpec, ...] = ( + VirtualKeySpec( + env_var="OPENAI_API_KEY", + alias="fuzzforge-cli", + user_id="fuzzforge-cli", + budget_env_var="CLI_BUDGET", + duration_env_var="CLI_DURATION", + default_budget=100.0, + default_duration="30d", + ), + VirtualKeySpec( + env_var="TASK_AGENT_API_KEY", + alias="fuzzforge-task-agent", + user_id="fuzzforge-task-agent", + budget_env_var="TASK_AGENT_BUDGET", + duration_env_var="TASK_AGENT_DURATION", + default_budget=25.0, + default_duration="30d", + ), + VirtualKeySpec( + env_var="COGNEE_API_KEY", + alias="fuzzforge-cognee", + user_id="fuzzforge-cognee", + budget_env_var="COGNEE_BUDGET", + duration_env_var="COGNEE_DURATION", + default_budget=50.0, + default_duration="30d", + ), +) + + +@dataclass(frozen=True) +class ProviderSpec: + name: str + litellm_env_var: str + alias_env_var: str + source_env_vars: tuple[str, ...] + + +# Support fresh LiteLLM variables while gracefully migrating legacy env +# aliases on first boot. +PROVIDERS: tuple[ProviderSpec, ...] = ( + ProviderSpec( + "openai", + "OPENAI_API_KEY", + "LITELLM_OPENAI_API_KEY", + ("LITELLM_OPENAI_API_KEY", "BIFROST_OPENAI_KEY"), + ), + ProviderSpec( + "anthropic", + "ANTHROPIC_API_KEY", + "LITELLM_ANTHROPIC_API_KEY", + ("LITELLM_ANTHROPIC_API_KEY", "BIFROST_ANTHROPIC_KEY"), + ), + ProviderSpec( + "gemini", + "GEMINI_API_KEY", + "LITELLM_GEMINI_API_KEY", + ("LITELLM_GEMINI_API_KEY", "BIFROST_GEMINI_KEY"), + ), + ProviderSpec( + "mistral", + "MISTRAL_API_KEY", + "LITELLM_MISTRAL_API_KEY", + ("LITELLM_MISTRAL_API_KEY", "BIFROST_MISTRAL_KEY"), + ), + ProviderSpec( + "openrouter", + "OPENROUTER_API_KEY", + "LITELLM_OPENROUTER_API_KEY", + ("LITELLM_OPENROUTER_API_KEY", "BIFROST_OPENROUTER_KEY"), + ), +) + +PROVIDER_LOOKUP: dict[str, ProviderSpec] = {spec.name: spec for spec in PROVIDERS} + + +def log(message: str) -> None: + print(f"[litellm-bootstrap] {message}", flush=True) + + +def read_lines(path: Path) -> list[str]: + if not path.exists(): + return [] + return path.read_text().splitlines() + + +def write_lines(path: Path, lines: Iterable[str]) -> None: + material = "\n".join(lines) + if material and not material.endswith("\n"): + material += "\n" + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text(material) + + +def read_env_file() -> list[str]: + if not ENV_FILE_PATH.exists(): + raise FileNotFoundError( + f"Expected env file at {ENV_FILE_PATH}. Copy volumes/env/.env.template first." + ) + return read_lines(ENV_FILE_PATH) + + +def write_env_file(lines: Iterable[str]) -> None: + write_lines(ENV_FILE_PATH, lines) + + +def read_litellm_env_file() -> list[str]: + return read_lines(LITELLM_ENV_FILE_PATH) + + +def write_litellm_env_file(lines: Iterable[str]) -> None: + write_lines(LITELLM_ENV_FILE_PATH, lines) + + +def read_legacy_env_file() -> Mapping[str, str]: + lines = read_lines(LEGACY_ENV_FILE_PATH) + return parse_env_lines(lines) + + +def set_env_value(lines: list[str], key: str, value: str) -> tuple[list[str], bool]: + prefix = f"{key}=" + new_line = f"{prefix}{value}" + for idx, line in enumerate(lines): + stripped = line.lstrip() + if not stripped or stripped.startswith("#"): + continue + if stripped.startswith(prefix): + if stripped == new_line: + return lines, False + indent = line[: len(line) - len(stripped)] + lines[idx] = f"{indent}{new_line}" + return lines, True + lines.append(new_line) + return lines, True + + +def parse_env_lines(lines: list[str]) -> dict[str, str]: + mapping: dict[str, str] = {} + for raw_line in lines: + stripped = raw_line.strip() + if not stripped or stripped.startswith("#") or "=" not in stripped: + continue + key, value = stripped.split("=", 1) + mapping[key] = value + return mapping + + +def wait_for_proxy() -> None: + health_paths = ("/health/liveliness", "/health", "/") + deadline = time.time() + MAX_WAIT_SECONDS + attempt = 0 + while time.time() < deadline: + attempt += 1 + for path in health_paths: + url = f"{PROXY_BASE_URL}{path}" + try: + with urllib.request.urlopen(url) as response: # noqa: S310 + if response.status < 400: + log(f"Proxy responded on {path} (attempt {attempt})") + return + except urllib.error.URLError as exc: + log(f"Proxy not ready yet ({path}): {exc}") + time.sleep(3) + raise TimeoutError(f"Timed out waiting for proxy at {PROXY_BASE_URL}") + + +def request_json( + path: str, + *, + method: str = "GET", + payload: Mapping[str, object] | None = None, + auth_token: str | None = None, +) -> tuple[int, str]: + url = f"{PROXY_BASE_URL}{path}" + data = None + headers = {"Accept": "application/json"} + if auth_token: + headers["Authorization"] = f"Bearer {auth_token}" + if payload is not None: + data = json.dumps(payload).encode("utf-8") + headers["Content-Type"] = "application/json" + request = urllib.request.Request(url, data=data, headers=headers, method=method) + try: + with urllib.request.urlopen(request) as response: # noqa: S310 + body = response.read().decode("utf-8") + return response.status, body + except urllib.error.HTTPError as exc: + body = exc.read().decode("utf-8") + return exc.code, body + + +def get_master_key(env_map: Mapping[str, str]) -> str: + candidate = os.getenv("LITELLM_MASTER_KEY") or env_map.get("LITELLM_MASTER_KEY") + if not candidate: + raise RuntimeError( + "LITELLM_MASTER_KEY is not set. Add it to volumes/env/.env before starting Docker." + ) + value = candidate.strip() + if not value: + raise RuntimeError( + "LITELLM_MASTER_KEY is blank. Provide a non-empty value in the env file." + ) + return value + + +def gather_provider_keys( + env_lines: list[str], + env_map: dict[str, str], + legacy_map: Mapping[str, str], +) -> tuple[dict[str, str], list[str], bool]: + updated_lines = list(env_lines) + discovered: dict[str, str] = {} + changed = False + + for spec in PROVIDERS: + value: str | None = None + for source_var in spec.source_env_vars: + candidate = env_map.get(source_var) or legacy_map.get(source_var) or os.getenv( + source_var + ) + if not candidate: + continue + stripped = candidate.strip() + if stripped: + value = stripped + break + if not value: + continue + + discovered[spec.litellm_env_var] = value + updated_lines, alias_changed = set_env_value( + updated_lines, spec.alias_env_var, value + ) + if alias_changed: + env_map[spec.alias_env_var] = value + changed = True + + return discovered, updated_lines, changed + + +def ensure_litellm_env(provider_values: Mapping[str, str]) -> None: + if not provider_values: + log("No provider secrets discovered; skipping LiteLLM env update") + return + lines = read_litellm_env_file() + updated_lines = list(lines) + changed = False + for env_var, value in provider_values.items(): + updated_lines, var_changed = set_env_value(updated_lines, env_var, value) + if var_changed: + changed = True + if changed or not lines: + write_litellm_env_file(updated_lines) + log(f"Wrote provider secrets to {LITELLM_ENV_FILE_PATH}") + + +def current_env_key(env_map: Mapping[str, str], env_var: str) -> str | None: + candidate = os.getenv(env_var) or env_map.get(env_var) + if not candidate: + return None + value = candidate.strip() + if not value or value.startswith("sk-proxy-"): + return None + return value + + +def collect_default_models(env_map: Mapping[str, str]) -> list[str]: + explicit = ( + os.getenv("LITELLM_DEFAULT_MODELS") + or env_map.get("LITELLM_DEFAULT_MODELS") + or "" + ) + models: list[str] = [] + if explicit: + models.extend( + model.strip() + for model in explicit.split(",") + if model.strip() + ) + if models: + return sorted(dict.fromkeys(models)) + + configured_model = ( + os.getenv("LITELLM_MODEL") or env_map.get("LITELLM_MODEL") or "" + ).strip() + configured_provider = ( + os.getenv("LITELLM_PROVIDER") or env_map.get("LITELLM_PROVIDER") or "" + ).strip() + + if configured_model: + if "/" in configured_model: + models.append(configured_model) + elif configured_provider: + models.append(f"{configured_provider}/{configured_model}") + else: + log( + "LITELLM_MODEL is set without a provider; configure LITELLM_PROVIDER or " + "use the provider/model format (e.g. openai/gpt-4o-mini)." + ) + elif configured_provider: + log( + "LITELLM_PROVIDER configured without a default model. Bootstrap will issue an " + "unrestricted virtual key allowing any proxy-registered model." + ) + + return sorted(dict.fromkeys(models)) + + +def fetch_existing_key_record(master_key: str, key_value: str) -> Mapping[str, object] | None: + encoded = urllib.parse.quote_plus(key_value) + status, body = request_json(f"/key/info?key={encoded}", auth_token=master_key) + if status != 200: + log(f"Key lookup failed ({status}); treating OPENAI_API_KEY as new") + return None + try: + data = json.loads(body) + except json.JSONDecodeError: + log("Key info response was not valid JSON; ignoring") + return None + if isinstance(data, Mapping) and data.get("key"): + return data + return None + + +def fetch_key_by_alias(master_key: str, alias: str) -> str | None: + """Fetch existing key value by alias from LiteLLM proxy.""" + status, body = request_json("/key/info", auth_token=master_key) + if status != 200: + return None + try: + data = json.loads(body) + except json.JSONDecodeError: + return None + if isinstance(data, dict) and "keys" in data: + for key_info in data.get("keys", []): + if isinstance(key_info, dict) and key_info.get("key_alias") == alias: + return str(key_info.get("key", "")).strip() or None + return None + + +def generate_virtual_key( + master_key: str, + models: list[str], + spec: VirtualKeySpec, + env_map: Mapping[str, str], +) -> str: + budget_str = os.getenv(spec.budget_env_var) or env_map.get(spec.budget_env_var) or str(spec.default_budget) + try: + budget = float(budget_str) + except ValueError: + budget = spec.default_budget + + duration = os.getenv(spec.duration_env_var) or env_map.get(spec.duration_env_var) or spec.default_duration + + payload: dict[str, object] = { + "key_alias": spec.alias, + "user_id": spec.user_id, + "duration": duration, + "max_budget": budget, + "metadata": { + "provisioned_by": "bootstrap", + "service": spec.alias, + "default_models": models, + }, + "key_type": "llm_api", + } + if models: + payload["models"] = models + status, body = request_json( + "/key/generate", method="POST", payload=payload, auth_token=master_key + ) + if status == 400 and "already exists" in body.lower(): + # Key alias already exists but .env is out of sync (e.g., after docker prune) + # Delete the old key and regenerate + log(f"Key alias '{spec.alias}' already exists in database but not in .env; deleting and regenerating") + # Try to delete by alias using POST /key/delete with key_aliases array + delete_payload = {"key_aliases": [spec.alias]} + delete_status, delete_body = request_json( + "/key/delete", method="POST", payload=delete_payload, auth_token=master_key + ) + if delete_status not in {200, 201}: + log(f"Warning: Could not delete existing key alias {spec.alias} ({delete_status}): {delete_body}") + # Continue anyway and try to generate + else: + log(f"Deleted existing key alias {spec.alias}") + + # Retry generation + status, body = request_json( + "/key/generate", method="POST", payload=payload, auth_token=master_key + ) + if status not in {200, 201}: + raise RuntimeError(f"Failed to generate virtual key for {spec.alias} ({status}): {body}") + try: + data = json.loads(body) + except json.JSONDecodeError as exc: + raise RuntimeError(f"Virtual key response for {spec.alias} was not valid JSON") from exc + if isinstance(data, Mapping): + key_value = str(data.get("key") or data.get("token") or "").strip() + if key_value: + log(f"Generated new LiteLLM virtual key for {spec.alias} (budget: ${budget}, duration: {duration})") + return key_value + raise RuntimeError(f"Virtual key response for {spec.alias} did not include a key field") + + +def update_virtual_key( + master_key: str, + key_value: str, + models: list[str], + spec: VirtualKeySpec, +) -> None: + if not models: + return + payload: dict[str, object] = { + "key": key_value, + "models": models, + } + status, body = request_json( + "/key/update", method="POST", payload=payload, auth_token=master_key + ) + if status != 200: + log(f"Virtual key update for {spec.alias} skipped ({status}): {body}") + else: + log(f"Refreshed allowed models for {spec.alias}") + + +def persist_key_to_env(new_key: str, env_var: str) -> None: + lines = read_env_file() + updated_lines, changed = set_env_value(lines, env_var, new_key) + # Always update the environment variable, even if file wasn't changed + os.environ[env_var] = new_key + if changed: + write_env_file(updated_lines) + log(f"Persisted {env_var} to {ENV_FILE_PATH}") + else: + log(f"{env_var} already up-to-date in env file") + + +def ensure_virtual_key( + master_key: str, + models: list[str], + env_map: Mapping[str, str], + spec: VirtualKeySpec, +) -> str: + allowed_models: list[str] = [] + sync_flag = os.getenv("LITELLM_SYNC_VIRTUAL_KEY_MODELS", "").strip().lower() + if models and (sync_flag in {"1", "true", "yes", "on"} or models == ["*"]): + allowed_models = models + existing_key = current_env_key(env_map, spec.env_var) + if existing_key: + record = fetch_existing_key_record(master_key, existing_key) + if record: + log(f"Reusing existing LiteLLM virtual key for {spec.alias}") + if allowed_models: + update_virtual_key(master_key, existing_key, allowed_models, spec) + return existing_key + log(f"Existing {spec.env_var} not registered with proxy; generating new key") + + new_key = generate_virtual_key(master_key, models, spec, env_map) + if allowed_models: + update_virtual_key(master_key, new_key, allowed_models, spec) + return new_key + + +def _split_model_identifier(model: str) -> tuple[str | None, str]: + if "/" in model: + provider, short_name = model.split("/", 1) + return provider.lower().strip() or None, short_name.strip() + return None, model.strip() + + +def ensure_models_registered( + master_key: str, + models: list[str], + env_map: Mapping[str, str], +) -> None: + if not models: + return + for model in models: + provider, short_name = _split_model_identifier(model) + if not provider or not short_name: + log(f"Skipping model '{model}' (no provider segment)") + continue + spec = PROVIDER_LOOKUP.get(provider) + if not spec: + log(f"No provider spec registered for '{provider}'; skipping model '{model}'") + continue + provider_secret = ( + env_map.get(spec.alias_env_var) + or env_map.get(spec.litellm_env_var) + or os.getenv(spec.alias_env_var) + or os.getenv(spec.litellm_env_var) + ) + if not provider_secret: + log( + f"Provider secret for '{provider}' not found; skipping model registration" + ) + continue + + api_key_reference = f"os.environ/{spec.alias_env_var}" + payload: dict[str, object] = { + "model_name": model, + "litellm_params": { + "model": short_name, + "custom_llm_provider": provider, + "api_key": api_key_reference, + }, + "model_info": { + "provider": provider, + "description": "Auto-registered during bootstrap", + }, + } + + status, body = request_json( + "/model/new", method="POST", payload=payload, auth_token=master_key + ) + if status in {200, 201}: + log(f"Registered LiteLLM model '{model}'") + continue + try: + data = json.loads(body) + except json.JSONDecodeError: + data = body + error_message = ( + data.get("error") if isinstance(data, Mapping) else str(data) + ) + if status == 409 or ( + isinstance(error_message, str) + and "already" in error_message.lower() + ): + log(f"Model '{model}' already present; skipping") + continue + log(f"Failed to register model '{model}' ({status}): {error_message}") + + +def main() -> int: + log("Bootstrapping LiteLLM proxy") + try: + wait_for_proxy() + env_lines = read_env_file() + env_map = parse_env_lines(env_lines) + legacy_map = read_legacy_env_file() + master_key = get_master_key(env_map) + + provider_values, updated_env_lines, env_changed = gather_provider_keys( + env_lines, env_map, legacy_map + ) + if env_changed: + write_env_file(updated_env_lines) + env_map = parse_env_lines(updated_env_lines) + log("Updated LiteLLM provider aliases in shared env file") + + ensure_litellm_env(provider_values) + + models = collect_default_models(env_map) + if models: + log("Default models for virtual keys: %s" % ", ".join(models)) + models_for_key = models + else: + log( + "No default models configured; provisioning virtual keys without model " + "restrictions (model-agnostic)." + ) + models_for_key = ["*"] + + # Generate virtual keys for each service + for spec in VIRTUAL_KEYS: + virtual_key = ensure_virtual_key(master_key, models_for_key, env_map, spec) + persist_key_to_env(virtual_key, spec.env_var) + + # Register models if any were specified + if models: + ensure_models_registered(master_key, models, env_map) + + log("Bootstrap complete") + return 0 + except Exception as exc: # pragma: no cover - startup failure reported to logs + log(f"Bootstrap failed: {exc}") + return 1 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/docs/blog/2025-01-16-v0.7.0-temporal-workers-release.md b/docs/blog/2025-01-16-v0.7.0-temporal-workers-release.md index ef8a641..329ca7a 100644 --- a/docs/blog/2025-01-16-v0.7.0-temporal-workers-release.md +++ b/docs/blog/2025-01-16-v0.7.0-temporal-workers-release.md @@ -225,7 +225,7 @@ docker compose up -d # All workers start Set up AI workflows with API keys: ```bash -cp volumes/env/.env.example volumes/env/.env +cp volumes/env/.env.template volumes/env/.env # Edit .env and add your API keys (OpenAI, Anthropic, etc.) ``` diff --git a/docs/docs/how-to/docker-setup.md b/docs/docs/how-to/docker-setup.md index 39c0de9..f4c2fa0 100644 --- a/docs/docs/how-to/docker-setup.md +++ b/docs/docs/how-to/docker-setup.md @@ -110,7 +110,32 @@ fuzzforge workflow run secret_detection ./codebase ### Manual Worker Management -Start specific workers when needed: +**Quick Reference - Workflow to Worker Mapping:** + +| Workflow | Worker Service | Docker Command | +|----------|----------------|----------------| +| `security_assessment`, `python_sast`, `llm_analysis`, `atheris_fuzzing` | worker-python | `docker compose up -d worker-python` | +| `android_static_analysis` | worker-android | `docker compose up -d worker-android` | +| `cargo_fuzzing` | worker-rust | `docker compose up -d worker-rust` | +| `ossfuzz_campaign` | worker-ossfuzz | `docker compose up -d worker-ossfuzz` | +| `llm_secret_detection`, `trufflehog_detection`, `gitleaks_detection` | worker-secrets | `docker compose up -d worker-secrets` | + +FuzzForge CLI provides convenient commands for managing workers: + +```bash +# List all workers and their status +ff worker list +ff worker list --all # Include stopped workers + +# Start a specific worker +ff worker start python +ff worker start android --build # Rebuild before starting + +# Stop all workers +ff worker stop +``` + +You can also use Docker commands directly: ```bash # Start a single worker @@ -123,6 +148,33 @@ docker compose --profile workers up -d docker stop fuzzforge-worker-ossfuzz ``` +### Stopping Workers Properly + +The easiest way to stop workers is using the CLI: + +```bash +# Stop all running workers (recommended) +ff worker stop +``` + +This command safely stops all worker containers without affecting core services. + +Alternatively, you can use Docker commands: + +```bash +# Stop individual worker +docker stop fuzzforge-worker-python + +# Stop all workers using docker compose +# Note: This requires the --profile flag because workers are in profiles +docker compose down --profile workers +``` + +**Important:** Workers use Docker Compose profiles to prevent auto-starting. When using Docker commands directly: +- `docker compose down` (without `--profile workers`) does NOT stop workers +- Workers remain running unless explicitly stopped with the profile flag or `docker stop` +- Use `ff worker stop` for the safest option that won't affect core services + ### Resource Comparison | Command | Workers Started | RAM Usage | @@ -171,7 +223,7 @@ FuzzForge requires `volumes/env/.env` to start. This file contains API keys and ```bash # Copy the example file -cp volumes/env/.env.example volumes/env/.env +cp volumes/env/.env.template volumes/env/.env # Edit to add your API keys (if using AI features) nano volumes/env/.env diff --git a/docs/docs/how-to/litellm-hot-swap.md b/docs/docs/how-to/litellm-hot-swap.md new file mode 100644 index 0000000..8c1d138 --- /dev/null +++ b/docs/docs/how-to/litellm-hot-swap.md @@ -0,0 +1,179 @@ +--- +title: "Hot-Swap LiteLLM Models" +description: "Register OpenAI and Anthropic models with the bundled LiteLLM proxy and switch them on the task agent without downtime." +--- + +LiteLLM sits between the task agent and upstream providers, so every model change +is just an API call. This guide walks through registering OpenAI and Anthropic +models, updating the virtual key, and exercising the A2A hot-swap flow. + +## Prerequisites + +- `docker compose up llm-proxy llm-proxy-db task-agent` +- Provider secrets in `volumes/env/.env`: + - `LITELLM_OPENAI_API_KEY` + - `LITELLM_ANTHROPIC_API_KEY` +- Master key (`LITELLM_MASTER_KEY`) and task-agent virtual key (auto-generated + during bootstrap) + +> UI access uses `UI_USERNAME` / `UI_PASSWORD` (defaults: `fuzzforge` / +> `fuzzforge123`). Change them by exporting new values before running compose. + +## Register Provider Models + +Use the admin API to register the models the proxy should expose. The snippet +below creates aliases for OpenAI `gpt-5`, `gpt-5-mini`, and Anthropic +`claude-sonnet-4-5`. + +```bash +MASTER_KEY=$(awk -F= '$1=="LITELLM_MASTER_KEY"{print $2}' volumes/env/.env) +export OPENAI_API_KEY=$(awk -F= '$1=="OPENAI_API_KEY"{print $2}' volumes/env/.env) +python - <<'PY' +import os, requests +master = os.environ['MASTER_KEY'].strip() +base = 'http://localhost:10999' +models = [ + { + "model_name": "openai/gpt-5", + "litellm_params": { + "model": "gpt-5", + "custom_llm_provider": "openai", + "api_key": "os.environ/LITELLM_OPENAI_API_KEY" + }, + "model_info": { + "provider": "openai", + "description": "OpenAI GPT-5" + } + }, + { + "model_name": "openai/gpt-5-mini", + "litellm_params": { + "model": "gpt-5-mini", + "custom_llm_provider": "openai", + "api_key": "os.environ/LITELLM_OPENAI_API_KEY" + }, + "model_info": { + "provider": "openai", + "description": "OpenAI GPT-5 mini" + } + }, + { + "model_name": "anthropic/claude-sonnet-4-5", + "litellm_params": { + "model": "claude-sonnet-4-5", + "custom_llm_provider": "anthropic", + "api_key": "os.environ/LITELLM_ANTHROPIC_API_KEY" + }, + "model_info": { + "provider": "anthropic", + "description": "Anthropic Claude Sonnet 4.5" + } + } +] +for payload in models: + resp = requests.post( + f"{base}/model/new", + headers={"Authorization": f"Bearer {master}", "Content-Type": "application/json"}, + json=payload, + timeout=60, + ) + if resp.status_code not in (200, 201, 409): + raise SystemExit(f"Failed to register {payload['model_name']}: {resp.status_code} {resp.text}") + print(payload['model_name'], '=>', resp.status_code) +PY +``` + +Each entry stores the upstream secret by reference (`os.environ/...`) so the +raw API key never leaves the container environment. + +## Relax Virtual Key Model Restrictions + +Let the agent key call every model on the proxy: + +```bash +MASTER_KEY=$(awk -F= '$1=="LITELLM_MASTER_KEY"{print $2}' volumes/env/.env) +VK=$(awk -F= '$1=="OPENAI_API_KEY"{print $2}' volumes/env/.env) +python - <<'PY' +import os, requests, json +resp = requests.post( + 'http://localhost:10999/key/update', + headers={ + 'Authorization': f"Bearer {os.environ['MASTER_KEY'].strip()}", + 'Content-Type': 'application/json' + }, + json={'key': os.environ['VK'].strip(), 'models': []}, + timeout=60, +) +print(json.dumps(resp.json(), indent=2)) +PY +``` + +Restart the task agent so it sees the refreshed key: + +```bash +docker compose restart task-agent +``` + +## Hot-Swap With The A2A Helper + +Switch models without restarting the service: + +```bash +# Ensure the CLI reads the latest virtual key +export OPENAI_API_KEY=$(awk -F= '$1=="OPENAI_API_KEY"{print $2}' volumes/env/.env) + +# OpenAI gpt-5 alias +python ai/agents/task_agent/a2a_hot_swap.py \ + --url http://localhost:10900/a2a/litellm_agent \ + --model openai gpt-5 \ + --context switch-demo + +# Confirm the response comes from the new model +python ai/agents/task_agent/a2a_hot_swap.py \ + --url http://localhost:10900/a2a/litellm_agent \ + --message "Which model am I using?" \ + --context switch-demo + +# Swap to gpt-5-mini +python ai/agents/task_agent/a2a_hot_swap.py --url http://localhost:10900/a2a/litellm_agent --model openai gpt-5-mini --context switch-demo + +# Swap to Anthropic Claude Sonnet 4.5 +python ai/agents/task_agent/a2a_hot_swap.py --url http://localhost:10900/a2a/litellm_agent --model anthropic claude-sonnet-4-5 --context switch-demo +``` + +> Each invocation reuses the same conversation context (`switch-demo`) so you +> can confirm the active provider by asking follow-up questions. + +## Resetting The Proxy (Optional) + +To wipe the LiteLLM state and rerun bootstrap: + +```bash +docker compose down llm-proxy llm-proxy-db llm-proxy-bootstrap + +docker volume rm fuzzforge_litellm_proxy_data fuzzforge_litellm_proxy_db + +docker compose up -d llm-proxy-db llm-proxy +``` + +After the proxy is healthy, rerun the registration script and key update. The +bootstrap container mirrors secrets into `.env.litellm` and reissues the task +agent key automatically. + +## How The Pieces Fit Together + +1. **LiteLLM Proxy** exposes OpenAI-compatible routes and stores provider + metadata in Postgres. +2. **Bootstrap Container** waits for `/health/liveliness`, mirrors secrets into + `.env.litellm`, registers any models you script, and keeps the virtual key in + sync with the discovered model list. +3. **Task Agent** calls the proxy via `FF_LLM_PROXY_BASE_URL`. The hot-swap tool + updates the agentโ€™s runtime configuration, so switching providers is just a + control message. +4. **Virtual Keys** carry quotas and allowed models. Setting the `models` array + to `[]` lets the key use anything registered on the proxy. + +Keep the master key and generated virtual keys somewhere safeโ€”they grant full +admin and agent access respectively. When you add a new provider (e.g., Ollama) +just register the model via `/model/new`, update the key if needed, and repeat +the hot-swap steps. diff --git a/docs/docs/how-to/llm-proxy.md b/docs/docs/how-to/llm-proxy.md new file mode 100644 index 0000000..4d6a0db --- /dev/null +++ b/docs/docs/how-to/llm-proxy.md @@ -0,0 +1,194 @@ +--- +title: "Run the LLM Proxy" +description: "Run the LiteLLM gateway that ships with FuzzForge and connect it to the task agent." +--- + +## Overview + +FuzzForge routes every LLM request through a LiteLLM proxy so that usage can be +metered, priced, and rate limited per user. Docker Compose starts the proxy in a +hardened container, while a bootstrap job seeds upstream provider secrets and +issues a virtual key for the task agent automatically. + +LiteLLM exposes the OpenAI-compatible APIs (`/v1/*`) plus a rich admin UI. All +traffic stays on your network and upstream credentials never leave the proxy +container. + +## Before You Start + +1. Copy `volumes/env/.env.template` to `volumes/env/.env` and set the basics: + - `LITELLM_MASTER_KEY` โ€” admin token used to manage the proxy + - `LITELLM_SALT_KEY` โ€” random string used to encrypt provider credentials + - Provider secrets under `LITELLM__API_KEY` (for example + `LITELLM_OPENAI_API_KEY`) + - Leave `OPENAI_API_KEY=sk-proxy-default`; the bootstrap job replaces it with a + LiteLLM-issued virtual key +2. When running tools outside Docker, change `FF_LLM_PROXY_BASE_URL` to the + published host port (`http://localhost:10999`). Inside Docker the default + value `http://llm-proxy:4000` already resolves to the container. + +## Start the Proxy + +```bash +docker compose up llm-proxy +``` + +The service publishes two things: + +- HTTP API + admin UI on `http://localhost:10999` +- Persistent SQLite state inside the named volume + `fuzzforge_litellm_proxy_data` + +The UI login uses the `UI_USERNAME` / `UI_PASSWORD` pair (defaults to +`fuzzforge` / `fuzzforge123`). To change them, set the environment variables +before you run `docker compose up`: + +```bash +export UI_USERNAME=myadmin +export UI_PASSWORD=super-secret +docker compose up llm-proxy +``` + +You can also edit the values directly in `docker-compose.yml` if you prefer to +check them into a different secrets manager. + +Proxy-wide settings now live in `volumes/litellm/proxy_config.yaml`. By +default it enables `store_model_in_db` and `store_prompts_in_spend_logs`, which +lets the UI display request/response payloads for new calls. Update this file +if you need additional LiteLLM options and restart the `llm-proxy` container. + +LiteLLM's health endpoint lives at `/health/liveliness`. You can verify it from +another terminal: + +```bash +curl http://localhost:10999/health/liveliness +``` + +## What the Bootstrapper Does + +During startup the `llm-proxy-bootstrap` container performs three actions: + +1. **Wait for the proxy** โ€” Blocks until `/health/liveliness` becomes healthy. +2. **Mirror provider secrets** โ€” Reads `volumes/env/.env` and writes any + `LITELLM_*_API_KEY` values into `volumes/env/.env.litellm`. The file is + created automatically on first boot; if you delete it, bootstrap will + recreate it and the proxy continues to read secrets from `.env`. +3. **Issue the default virtual key** โ€” Calls `/key/generate` with the master key + and persists the generated token back into `volumes/env/.env` (replacing the + `sk-proxy-default` placeholder). The key is scoped to + `LITELLM_DEFAULT_MODELS` when that variable is set; otherwise it uses the + model from `LITELLM_MODEL`. + +The sequence is idempotent. Existing provider secrets and virtual keys are +reused on subsequent runs, and the allowed-model list is refreshed via +`/key/update` if you change the defaults. + +## Managing Virtual Keys + +LiteLLM keys act as per-user credentials. The default key, named +`task-agent default`, is created automatically for the task agent. You can issue +more keys for teammates or CI jobs with the same management API: + +```bash +curl http://localhost:10999/key/generate \ + -H "Authorization: Bearer $LITELLM_MASTER_KEY" \ + -H "Content-Type: application/json" \ + -d '{ + "key_alias": "demo-user", + "user_id": "demo", + "models": ["openai/gpt-4o-mini"], + "duration": "30d", + "max_budget": 50, + "metadata": {"team": "sandbox"} + }' +``` + +Use `/key/update` to adjust budgets or the allowed-model list on existing keys: + +```bash +curl http://localhost:10999/key/update \ + -H "Authorization: Bearer $LITELLM_MASTER_KEY" \ + -H "Content-Type: application/json" \ + -d '{ + "key": "sk-...", + "models": ["openai/*", "anthropic/*"], + "max_budget": 100 + }' +``` + +The admin UI (navigate to `http://localhost:10999/ui`) provides equivalent +controls for creating keys, routing models, auditing spend, and exporting logs. + +## Wiring the Task Agent + +The task agent already expects to talk to the proxy. Confirm these values in +`volumes/env/.env` before launching the stack: + +```bash +FF_LLM_PROXY_BASE_URL=http://llm-proxy:4000 # or http://localhost:10999 when outside Docker +OPENAI_API_KEY= +LITELLM_MODEL=openai/gpt-5 +LITELLM_PROVIDER=openai +``` + +Restart the agent container after changing environment variables so the process +picks up the updates. + +To validate the integration end to end, call the proxy directly: + +```bash +curl -X POST http://localhost:10999/v1/chat/completions \ + -H "Authorization: Bearer $OPENAI_API_KEY" \ + -H "Content-Type: application/json" \ + -d '{ + "model": "openai/gpt-4o-mini", + "messages": [{"role": "user", "content": "Proxy health check"}] + }' +``` + +A JSON response indicates the proxy can reach your upstream provider using the +mirrored secrets. + +## Local Runtimes (Ollama, etc.) + +LiteLLM supports non-hosted providers as well. To route requests to a local +runtime such as Ollama: + +1. Set the appropriate provider key in the env file + (for Ollama, point LiteLLM at `OLLAMA_API_BASE` inside the container). +2. Add the passthrough model either from the UI (**Models โ†’ Add Model**) or + by calling `/model/new` with the master key. +3. Update `LITELLM_DEFAULT_MODELS` (and regenerate the virtual key if you want +the default key to include it). + +The task agent keeps using the same OpenAI-compatible surface while LiteLLM +handles the translation to your runtime. + +## Next Steps + +- Explore [LiteLLM's documentation](https://docs.litellm.ai/docs/simple_proxy) + for advanced routing, cost controls, and observability hooks. +- Configure Slack/Prometheus integrations from the UI to monitor usage. +- Rotate the master key periodically and store it in your secrets manager, as it + grants full admin access to the proxy. + +## Observability + +LiteLLM ships with OpenTelemetry hooks for traces and metrics. This repository +already includes an OTLP collector (`otel-collector` service) and mounts a +default configuration that forwards traces to standard output. To wire it up: + +1. Edit `volumes/otel/collector-config.yaml` if you want to forward to Jaeger, + Datadog, etc. The initial config uses the logging exporter so you can see + spans immediately via `docker compose logs -f otel-collector`. +2. Customize `volumes/litellm/proxy_config.yaml` if you need additional + callbacks; `general_settings.otel: true` and `litellm_settings.callbacks: + ["otel"]` are already present so no extra code changes are required. +3. (Optional) Override `OTEL_EXPORTER_OTLP_*` environment variables in + `docker-compose.yml` or your shell to point at a remote collector. + +After updating the configs, run `docker compose up -d otel-collector llm-proxy` +and generate a request (for example, trigger `ff workflow run llm_analysis`). +New traces will show up in the collector logs or whichever backend you +configured. See the official LiteLLM guide for advanced exporter options: +https://docs.litellm.ai/docs/observability/opentelemetry_integration. diff --git a/docs/docs/how-to/troubleshooting.md b/docs/docs/how-to/troubleshooting.md index 8784ef3..38165ae 100644 --- a/docs/docs/how-to/troubleshooting.md +++ b/docs/docs/how-to/troubleshooting.md @@ -33,7 +33,7 @@ The required `volumes/env/.env` file is missing. Docker Compose needs this file **How to fix:** ```bash # Create the environment file from the template -cp volumes/env/.env.example volumes/env/.env +cp volumes/env/.env.template volumes/env/.env # Restart Docker Compose docker compose -f docker-compose.yml down @@ -106,6 +106,46 @@ File upload to MinIO failed or worker can't download target. ``` - Reduce the number of concurrent workflows if your system is resource-constrained. +### Workflow requires worker not running + +**What's happening?** +You see a warning message like: +``` +โš ๏ธ Could not check worker requirements: Cannot find docker-compose.yml. + Ensure backend is running, run from FuzzForge directory, or set + FUZZFORGE_ROOT environment variable. + Continuing without worker management... +``` + +Or the workflow fails to start because the required worker isn't running. + +**How to fix:** +Start the worker required for your workflow before running it: + +| Workflow | Worker Required | Startup Command | +|----------|----------------|-----------------| +| `android_static_analysis` | worker-android | `docker compose up -d worker-android` | +| `security_assessment` | worker-python | `docker compose up -d worker-python` | +| `python_sast` | worker-python | `docker compose up -d worker-python` | +| `llm_analysis` | worker-python | `docker compose up -d worker-python` | +| `atheris_fuzzing` | worker-python | `docker compose up -d worker-python` | +| `ossfuzz_campaign` | worker-ossfuzz | `docker compose up -d worker-ossfuzz` | +| `cargo_fuzzing` | worker-rust | `docker compose up -d worker-rust` | +| `llm_secret_detection` | worker-secrets | `docker compose up -d worker-secrets` | +| `trufflehog_detection` | worker-secrets | `docker compose up -d worker-secrets` | +| `gitleaks_detection` | worker-secrets | `docker compose up -d worker-secrets` | + +**Check worker status:** +```bash +# Check if a specific worker is running +docker compose ps worker-android + +# Check all workers +docker compose ps | grep worker +``` + +**Note:** Workers don't auto-start by default to save system resources. For more details on worker management, see the [Docker Setup guide](docker-setup.md#worker-management). + --- ## Service Connectivity Issues diff --git a/docs/docs/reference/cli-reference.md b/docs/docs/reference/cli-reference.md new file mode 100644 index 0000000..dd7b4d2 --- /dev/null +++ b/docs/docs/reference/cli-reference.md @@ -0,0 +1,616 @@ +# FuzzForge CLI Reference + +Complete reference for the FuzzForge CLI (`ff` command). Use this as your quick lookup for all commands, options, and examples. + +--- + +## Global Options + +| Option | Description | +|--------|-------------| +| `--help`, `-h` | Show help message | +| `--version`, `-v` | Show version information | + +--- + +## Core Commands + +### `ff init` + +Initialize a new FuzzForge project in the current directory. + +**Usage:** +```bash +ff init [OPTIONS] +``` + +**Options:** +- `--name`, `-n` โ€” Project name (defaults to current directory name) +- `--api-url`, `-u` โ€” FuzzForge API URL (defaults to http://localhost:8000) +- `--force`, `-f` โ€” Force initialization even if project already exists + +**Examples:** +```bash +ff init # Initialize with defaults +ff init --name my-project # Set custom project name +ff init --api-url http://prod:8000 # Use custom API URL +``` + +--- + +### `ff status` + +Show project and latest execution status. + +**Usage:** +```bash +ff status +``` + +**Example Output:** +``` +๐Ÿ“Š Project Status + Project: my-security-project + API URL: http://localhost:8000 + +Latest Execution: + Run ID: security_scan-a1b2c3 + Workflow: security_assessment + Status: COMPLETED + Started: 2 hours ago +``` + +--- + +### `ff config` + +Manage project configuration. + +**Usage:** +```bash +ff config # Show all config +ff config # Get specific value +ff config # Set value +``` + +**Examples:** +```bash +ff config # Display all settings +ff config api_url # Get API URL +ff config api_url http://prod:8000 # Set API URL +``` + +--- + +### `ff clean` + +Clean old execution data and findings. + +**Usage:** +```bash +ff clean [OPTIONS] +``` + +**Options:** +- `--days`, `-d` โ€” Remove data older than this many days (default: 90) +- `--dry-run` โ€” Show what would be deleted without deleting + +**Examples:** +```bash +ff clean # Clean data older than 90 days +ff clean --days 30 # Clean data older than 30 days +ff clean --dry-run # Preview what would be deleted +``` + +--- + +## Workflow Commands + +### `ff workflows` + +Browse and list available workflows. + +**Usage:** +```bash +ff workflows [COMMAND] +``` + +**Subcommands:** +- `list` โ€” List all available workflows +- `info ` โ€” Show detailed workflow information +- `params ` โ€” Show workflow parameters + +**Examples:** +```bash +ff workflows list # List all workflows +ff workflows info python_sast # Show workflow details +ff workflows params python_sast # Show parameters +``` + +--- + +### `ff workflow` + +Execute and manage individual workflows. + +**Usage:** +```bash +ff workflow +``` + +**Subcommands:** + +#### `ff workflow run` + +Execute a security testing workflow. + +**Usage:** +```bash +ff workflow run [params...] [OPTIONS] +``` + +**Arguments:** +- `` โ€” Workflow name +- `` โ€” Target path to analyze +- `[params...]` โ€” Parameters as `key=value` pairs + +**Options:** +- `--param-file`, `-f` โ€” JSON file containing workflow parameters +- `--timeout`, `-t` โ€” Execution timeout in seconds +- `--interactive` / `--no-interactive`, `-i` / `-n` โ€” Interactive parameter input (default: interactive) +- `--wait`, `-w` โ€” Wait for execution to complete +- `--live`, `-l` โ€” Start live monitoring after execution +- `--auto-start` / `--no-auto-start` โ€” Automatically start required worker +- `--auto-stop` / `--no-auto-stop` โ€” Automatically stop worker after completion +- `--fail-on` โ€” Fail build if findings match SARIF level (error, warning, note, info, all, none) +- `--export-sarif` โ€” Export SARIF results to file after completion + +**Examples:** +```bash +# Basic workflow execution +ff workflow run python_sast ./project + +# With parameters +ff workflow run python_sast ./project check_secrets=true + +# CI/CD integration - fail on errors +ff workflow run python_sast ./project --wait --no-interactive \ + --fail-on error --export-sarif results.sarif + +# With parameter file +ff workflow run python_sast ./project --param-file config.json + +# Live monitoring for fuzzing +ff workflow run atheris_fuzzing ./project --live +``` + +#### `ff workflow status` + +Check status of latest or specific workflow execution. + +**Usage:** +```bash +ff workflow status [run_id] +``` + +**Examples:** +```bash +ff workflow status # Show latest execution status +ff workflow status python_sast-abc123 # Show specific execution +``` + +#### `ff workflow history` + +Show execution history. + +**Usage:** +```bash +ff workflow history [OPTIONS] +``` + +**Options:** +- `--limit`, `-l` โ€” Number of executions to show (default: 10) + +**Example:** +```bash +ff workflow history --limit 20 +``` + +#### `ff workflow retry` + +Retry a failed workflow execution. + +**Usage:** +```bash +ff workflow retry +``` + +**Example:** +```bash +ff workflow retry python_sast-abc123 +``` + +--- + +## Finding Commands + +### `ff findings` + +Browse all findings across executions. + +**Usage:** +```bash +ff findings [COMMAND] +``` + +**Subcommands:** + +#### `ff findings list` + +List findings from a specific run. + +**Usage:** +```bash +ff findings list [run_id] [OPTIONS] +``` + +**Options:** +- `--format` โ€” Output format: table, json, sarif (default: table) +- `--save` โ€” Save findings to file + +**Examples:** +```bash +ff findings list # Show latest findings +ff findings list python_sast-abc123 # Show specific run +ff findings list --format json # JSON output +ff findings list --format sarif --save # Export SARIF +``` + +#### `ff findings export` + +Export findings to various formats. + +**Usage:** +```bash +ff findings export [OPTIONS] +``` + +**Options:** +- `--format` โ€” Output format: json, sarif, csv +- `--output`, `-o` โ€” Output file path + +**Example:** +```bash +ff findings export python_sast-abc123 --format sarif --output results.sarif +``` + +#### `ff findings history` + +Show finding history across multiple runs. + +**Usage:** +```bash +ff findings history [OPTIONS] +``` + +**Options:** +- `--limit`, `-l` โ€” Number of runs to include (default: 10) + +--- + +### `ff finding` + +View and analyze individual findings. + +**Usage:** +```bash +ff finding [id] # Show latest or specific finding +ff finding show --rule # Show specific finding detail +``` + +**Examples:** +```bash +ff finding # Show latest finding +ff finding python_sast-abc123 # Show specific run findings +ff finding show python_sast-abc123 --rule f2cf5e3e # Show specific finding +``` + +--- + +## Worker Management Commands + +### `ff worker` + +Manage Temporal workers for workflow execution. + +**Usage:** +```bash +ff worker +``` + +**Subcommands:** + +#### `ff worker list` + +List FuzzForge workers and their status. + +**Usage:** +```bash +ff worker list [OPTIONS] +``` + +**Options:** +- `--all`, `-a` โ€” Show all workers (including stopped) + +**Examples:** +```bash +ff worker list # Show running workers +ff worker list --all # Show all workers +``` + +**Example Output:** +``` +FuzzForge Workers +โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”ณโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”ณโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”“ +โ”ƒ Worker โ”ƒ Status โ”ƒ Uptime โ”ƒ +โ”กโ”โ”โ”โ”โ”โ”โ”โ”โ”โ•‡โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ•‡โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”ฉ +โ”‚ android โ”‚ โ— Running โ”‚ 5 minutes ago โ”‚ +โ”‚ python โ”‚ โ— Running โ”‚ 10 minutes ago โ”‚ +โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜ + +โœ… 2 worker(s) running +``` + +#### `ff worker start` + +Start a specific worker. + +**Usage:** +```bash +ff worker start [OPTIONS] +``` + +**Arguments:** +- `` โ€” Worker name (e.g., python, android, rust, secrets) + +**Options:** +- `--build` โ€” Rebuild worker image before starting + +**Examples:** +```bash +ff worker start python # Start Python worker +ff worker start android --build # Rebuild and start Android worker +``` + +**Available Workers:** +- `python` โ€” Python security analysis and fuzzing +- `android` โ€” Android APK analysis +- `rust` โ€” Rust fuzzing and analysis +- `secrets` โ€” Secret detection workflows +- `ossfuzz` โ€” OSS-Fuzz integration + +#### `ff worker stop` + +Stop all running FuzzForge workers. + +**Usage:** +```bash +ff worker stop [OPTIONS] +``` + +**Options:** +- `--all` โ€” Stop all workers (default behavior, flag for clarity) + +**Example:** +```bash +ff worker stop +``` + +**Note:** This command stops only worker containers, leaving core services (backend, temporal, minio) running. + +--- + +## Monitoring Commands + +### `ff monitor` + +Real-time monitoring for running workflows. + +**Usage:** +```bash +ff monitor [COMMAND] +``` + +**Subcommands:** +- `live ` โ€” Live monitoring for a specific execution +- `stats ` โ€” Show statistics for fuzzing workflows + +**Examples:** +```bash +ff monitor live atheris-abc123 # Monitor fuzzing campaign +ff monitor stats atheris-abc123 # Show fuzzing statistics +``` + +--- + +## AI Integration Commands + +### `ff ai` + +AI-powered analysis and assistance. + +**Usage:** +```bash +ff ai [COMMAND] +``` + +**Subcommands:** +- `analyze ` โ€” Analyze findings with AI +- `explain ` โ€” Get AI explanation of a finding +- `remediate ` โ€” Get remediation suggestions + +**Examples:** +```bash +ff ai analyze python_sast-abc123 # Analyze all findings +ff ai explain python_sast-abc123:finding1 # Explain specific finding +ff ai remediate python_sast-abc123:finding1 # Get fix suggestions +``` + +--- + +## Knowledge Ingestion Commands + +### `ff ingest` + +Ingest knowledge into the AI knowledge base. + +**Usage:** +```bash +ff ingest [COMMAND] +``` + +**Subcommands:** +- `file ` โ€” Ingest a file +- `directory ` โ€” Ingest directory contents +- `workflow ` โ€” Ingest workflow documentation + +**Examples:** +```bash +ff ingest file ./docs/security.md # Ingest single file +ff ingest directory ./docs # Ingest directory +ff ingest workflow python_sast # Ingest workflow docs +``` + +--- + +## Common Workflow Examples + +### CI/CD Integration + +```bash +# Run security scan in CI, fail on errors +ff workflow run python_sast . \ + --wait \ + --no-interactive \ + --fail-on error \ + --export-sarif results.sarif +``` + +### Local Development + +```bash +# Quick security check +ff workflow run python_sast ./my-code + +# Check specific file types +ff workflow run python_sast . file_extensions='[".py",".js"]' + +# Interactive parameter configuration +ff workflow run python_sast . --interactive +``` + +### Fuzzing Workflows + +```bash +# Start fuzzing with live monitoring +ff workflow run atheris_fuzzing ./project --live + +# Long-running fuzzing campaign +ff workflow run ossfuzz_campaign ./project \ + --auto-start \ + duration=3600 \ + --live +``` + +### Worker Management + +```bash +# Check which workers are running +ff worker list + +# Start needed worker manually +ff worker start python --build + +# Stop all workers when done +ff worker stop +``` + +--- + +## Configuration Files + +### Project Config (`.fuzzforge/config.json`) + +```json +{ + "project_name": "my-security-project", + "api_url": "http://localhost:8000", + "default_workflow": "python_sast", + "auto_start_workers": true, + "auto_stop_workers": false +} +``` + +### Parameter File Example + +```json +{ + "check_secrets": true, + "file_extensions": [".py", ".js", ".go"], + "severity_threshold": "medium", + "exclude_patterns": ["**/test/**", "**/vendor/**"] +} +``` + +--- + +## Exit Codes + +| Code | Meaning | +|------|---------| +| 0 | Success | +| 1 | General error | +| 2 | Findings matched `--fail-on` criteria | +| 3 | Worker startup failed | +| 4 | Workflow execution failed | + +--- + +## Environment Variables + +| Variable | Description | Default | +|----------|-------------|---------| +| `FUZZFORGE_API_URL` | Backend API URL | http://localhost:8000 | +| `FUZZFORGE_ROOT` | FuzzForge installation directory | Auto-detected | +| `FUZZFORGE_DEBUG` | Enable debug logging | false | + +--- + +## Tips and Best Practices + +1. **Use `--no-interactive` in CI/CD** โ€” Prevents prompts that would hang automated pipelines +2. **Use `--fail-on` for quality gates** โ€” Fail builds based on finding severity +3. **Export SARIF for tool integration** โ€” Most security tools support SARIF format +4. **Let workflows auto-start workers** โ€” More efficient than manually managing workers +5. **Use `--wait` with `--export-sarif`** โ€” Ensures results are available before export +6. **Check `ff worker list` regularly** โ€” Helps manage system resources +7. **Use parameter files for complex configs** โ€” Easier to version control and reuse + +--- + +## Related Documentation + +- [Docker Setup](../how-to/docker-setup.md) โ€” Worker management and Docker configuration +- [Getting Started](../tutorial/getting-started.md) โ€” Complete setup guide +- [Workflow Guide](../how-to/create-workflow.md) โ€” Detailed workflow documentation +- [CI/CD Integration](../how-to/cicd-integration.md) โ€” CI/CD setup examples + +--- + +**Need Help?** + +```bash +ff --help # General help +ff workflow run --help # Command-specific help +ff worker --help # Worker management help +``` diff --git a/docs/docs/tutorial/getting-started.md b/docs/docs/tutorial/getting-started.md index 2049963..b376258 100644 --- a/docs/docs/tutorial/getting-started.md +++ b/docs/docs/tutorial/getting-started.md @@ -28,7 +28,7 @@ cd fuzzforge_ai Create the environment configuration file: ```bash -cp volumes/env/.env.example volumes/env/.env +cp volumes/env/.env.template volumes/env/.env ``` This file is required for FuzzForge to start. You can leave it with default values if you're only using basic workflows. @@ -89,9 +89,26 @@ curl http://localhost:8000/health # Should return: {"status":"healthy"} ``` -### Start the Python Worker +### Start Workers for Your Workflows -Workers don't auto-start by default (saves RAM). Start the Python worker for your first workflow: +Workers don't auto-start by default (saves RAM). You need to start the worker required for the workflow you want to run. + +**Workflow-to-Worker Mapping:** + +| Workflow | Worker Required | Startup Command | +|----------|----------------|-----------------| +| `security_assessment` | worker-python | `docker compose up -d worker-python` | +| `python_sast` | worker-python | `docker compose up -d worker-python` | +| `llm_analysis` | worker-python | `docker compose up -d worker-python` | +| `atheris_fuzzing` | worker-python | `docker compose up -d worker-python` | +| `android_static_analysis` | worker-android | `docker compose up -d worker-android` | +| `cargo_fuzzing` | worker-rust | `docker compose up -d worker-rust` | +| `ossfuzz_campaign` | worker-ossfuzz | `docker compose up -d worker-ossfuzz` | +| `llm_secret_detection` | worker-secrets | `docker compose up -d worker-secrets` | +| `trufflehog_detection` | worker-secrets | `docker compose up -d worker-secrets` | +| `gitleaks_detection` | worker-secrets | `docker compose up -d worker-secrets` | + +**For your first workflow (security_assessment), start the Python worker:** ```bash # Start the Python worker @@ -102,7 +119,20 @@ docker compose ps worker-python # Should show: Up (healthy) ``` -**Note:** Workers use Docker Compose profiles and only start when needed. For your first workflow run, it's safer to start the worker manually. Later, the CLI can auto-start workers on demand. +**For other workflows, start the appropriate worker:** + +```bash +# Example: For Android analysis +docker compose up -d worker-android + +# Example: For Rust fuzzing +docker compose up -d worker-rust + +# Check all running workers +docker compose ps | grep worker +``` + +**Note:** Workers use Docker Compose profiles and only start when needed. For your first workflow run, it's safer to start the worker manually. Later, the CLI can auto-start workers on demand. If you see a warning about worker requirements, ensure you've started the correct worker for your workflow. ## Step 4: Install the CLI (Optional but Recommended) diff --git a/docs/docusaurus.config.ts b/docs/docusaurus.config.ts index 3630bd1..1068406 100644 --- a/docs/docusaurus.config.ts +++ b/docs/docusaurus.config.ts @@ -100,7 +100,7 @@ const config: Config = { label: "AI", }, { - href: "https://github.com/FuzzingLabs/fuzzforge_alpha", + href: "https://github.com/FuzzingLabs/fuzzforge_ai", label: "GitHub", position: "right", }, @@ -160,7 +160,7 @@ const config: Config = { }, { label: "GitHub", - href: "https://github.com/FuzzingLabs/fuzzforge_alpha", + href: "https://github.com/FuzzingLabs/fuzzforge_ai", }, ], }, diff --git a/docs/index.md b/docs/index.md index dc0a13e..7d2cd85 100644 --- a/docs/index.md +++ b/docs/index.md @@ -89,7 +89,7 @@ Technical reference materials and specifications. Before starting FuzzForge, you **must** create the environment configuration file: ```bash -cp volumes/env/.env.example volumes/env/.env +cp volumes/env/.env.template volumes/env/.env ``` Docker Compose will fail without this file. You can leave it with default values if you're only using basic workflows (no AI features). diff --git a/pyproject.toml b/pyproject.toml index 2ee4e56..2e54464 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "fuzzforge" -version = "0.6.0" +version = "0.7.3" description = "FuzzForge Platform - Complete fuzzing and security testing platform with AI capabilities" readme = "README.md" license = { text = "BSL-1.1" } diff --git a/sdk/examples/basic_workflow.py b/sdk/examples/basic_workflow.py index 74b3a49..df55d38 100644 --- a/sdk/examples/basic_workflow.py +++ b/sdk/examples/basic_workflow.py @@ -64,7 +64,6 @@ def main(): print("๐Ÿ“ Workflow metadata:") print(f" Author: {metadata.author}") print(f" Required modules: {metadata.required_modules}") - print(f" Supported volume modes: {metadata.supported_volume_modes}") print() # Prepare target path (use current directory as example) @@ -74,7 +73,6 @@ def main(): # Create workflow submission submission = create_workflow_submission( target_path=target_path, - volume_mode="ro", timeout=300, # 5 minutes ) @@ -234,7 +232,6 @@ async def async_main(): target_path = Path.cwd().absolute() submission = create_workflow_submission( target_path=target_path, - volume_mode="ro", timeout=300, ) diff --git a/sdk/examples/batch_analysis.py b/sdk/examples/batch_analysis.py index 5ac46bc..77aab78 100644 --- a/sdk/examples/batch_analysis.py +++ b/sdk/examples/batch_analysis.py @@ -135,23 +135,18 @@ class BatchAnalyzer: # Determine appropriate timeout based on workflow type if "fuzzing" in metadata.tags: timeout = 1800 # 30 minutes for fuzzing - volume_mode = "rw" elif "dynamic" in metadata.tags: timeout = 900 # 15 minutes for dynamic analysis - volume_mode = "rw" else: timeout = 300 # 5 minutes for static analysis - volume_mode = "ro" except Exception: # Fallback settings timeout = 600 - volume_mode = "ro" # Create submission submission = create_workflow_submission( target_path=project_path, - volume_mode=volume_mode, timeout=timeout ) diff --git a/sdk/examples/fuzzing_monitor.py b/sdk/examples/fuzzing_monitor.py index 096574d..07c4e06 100644 --- a/sdk/examples/fuzzing_monitor.py +++ b/sdk/examples/fuzzing_monitor.py @@ -193,7 +193,6 @@ async def main(): submission = create_workflow_submission( target_path=target_path, - volume_mode="rw", # Fuzzing may need to write files timeout=3600, # 1 hour timeout resource_limits=resource_limits, parameters={ diff --git a/sdk/examples/save_findings_demo.py b/sdk/examples/save_findings_demo.py index 304b17a..1d9568d 100644 --- a/sdk/examples/save_findings_demo.py +++ b/sdk/examples/save_findings_demo.py @@ -33,7 +33,6 @@ def main(): workflow_name = workflows[0].name submission = create_workflow_submission( target_path=Path.cwd().absolute(), - volume_mode="ro", timeout=300 ) diff --git a/sdk/pyproject.toml b/sdk/pyproject.toml index 2afc681..694807f 100644 --- a/sdk/pyproject.toml +++ b/sdk/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "fuzzforge-sdk" -version = "0.7.0" +version = "0.7.3" description = "Python SDK for FuzzForge security testing workflow orchestration platform" readme = "README.md" authors = [ diff --git a/sdk/src/fuzzforge_sdk/__init__.py b/sdk/src/fuzzforge_sdk/__init__.py index b0da889..d50f599 100644 --- a/sdk/src/fuzzforge_sdk/__init__.py +++ b/sdk/src/fuzzforge_sdk/__init__.py @@ -42,7 +42,7 @@ from .testing import ( DEFAULT_TEST_CONFIG, ) -__version__ = "0.6.0" +__version__ = "0.7.3" __all__ = [ "FuzzForgeClient", "WorkflowSubmission", diff --git a/sdk/src/fuzzforge_sdk/client.py b/sdk/src/fuzzforge_sdk/client.py index 1319389..c4f29d3 100644 --- a/sdk/src/fuzzforge_sdk/client.py +++ b/sdk/src/fuzzforge_sdk/client.py @@ -440,7 +440,6 @@ class FuzzForgeClient: workflow_name: str, target_path: Union[str, Path], parameters: Optional[Dict[str, Any]] = None, - volume_mode: str = "ro", timeout: Optional[int] = None, progress_callback: Optional[Callable[[int, int], None]] = None ) -> RunSubmissionResponse: @@ -454,7 +453,6 @@ class FuzzForgeClient: workflow_name: Name of the workflow to execute target_path: Local path to file or directory to analyze parameters: Workflow-specific parameters - volume_mode: Volume mount mode ("ro" or "rw") timeout: Timeout in seconds progress_callback: Optional callback(bytes_uploaded, total_bytes) for progress diff --git a/sdk/src/fuzzforge_sdk/testing.py b/sdk/src/fuzzforge_sdk/testing.py index 9f9297b..6d191b8 100644 --- a/sdk/src/fuzzforge_sdk/testing.py +++ b/sdk/src/fuzzforge_sdk/testing.py @@ -193,8 +193,6 @@ class WorkflowTester: # Create workflow submission submission = create_workflow_submission( - target_path=str(test_path), - volume_mode="ro", **workflow_params ) diff --git a/src/fuzzforge/__init__.py b/src/fuzzforge/__init__.py index 3846ea1..87a087f 100644 --- a/src/fuzzforge/__init__.py +++ b/src/fuzzforge/__init__.py @@ -1,3 +1,3 @@ """FuzzForge Platform - Complete security testing platform with AI capabilities.""" -__version__ = "0.6.0" \ No newline at end of file +__version__ = "0.7.3" \ No newline at end of file diff --git a/test_projects/android_test/BeetleBug.apk b/test_projects/android_test/BeetleBug.apk new file mode 100644 index 0000000..05f8f51 Binary files /dev/null and b/test_projects/android_test/BeetleBug.apk differ diff --git a/test_projects/android_test/shopnest.apk b/test_projects/android_test/shopnest.apk new file mode 100644 index 0000000..523ec80 Binary files /dev/null and b/test_projects/android_test/shopnest.apk differ diff --git a/test_projects/vulnerable_app/findings-security.json b/test_projects/vulnerable_app/findings-security.json new file mode 100644 index 0000000..0e4734e --- /dev/null +++ b/test_projects/vulnerable_app/findings-security.json @@ -0,0 +1,695 @@ +{ + "tool": { + "name": "FuzzForge Security Assessment", + "version": "1.0.0" + }, + "summary": { + "total_issues": 68, + "by_severity": { + "warning": 51, + "error": 17 + } + }, + "findings": [ + { + "rule_id": "sensitive_file_medium", + "severity": "warning", + "message": "Found potentially sensitive file at .env", + "location": { + "file": ".env", + "line": null, + "column": null + } + }, + { + "rule_id": "sensitive_file_medium", + "severity": "warning", + "message": "Found potentially sensitive file at .git-credentials", + "location": { + "file": ".git-credentials", + "line": null, + "column": null + } + }, + { + "rule_id": "sensitive_file_medium", + "severity": "warning", + "message": "Found potentially sensitive file at private_key.pem", + "location": { + "file": "private_key.pem", + "line": null, + "column": null + } + }, + { + "rule_id": "sensitive_file_medium", + "severity": "warning", + "message": "Found potentially sensitive file at wallet.json", + "location": { + "file": "wallet.json", + "line": null, + "column": null + } + }, + { + "rule_id": "sensitive_file_medium", + "severity": "warning", + "message": "Found potentially sensitive file at SECRETS_GROUND_TRUTH.json", + "location": { + "file": "SECRETS_GROUND_TRUTH.json", + "line": null, + "column": null + } + }, + { + "rule_id": "sensitive_file_medium", + "severity": "warning", + "message": "Found potentially sensitive file at .npmrc", + "location": { + "file": ".npmrc", + "line": null, + "column": null + } + }, + { + "rule_id": "sensitive_file_medium", + "severity": "warning", + "message": "Found potentially sensitive file at .fuzzforge/.env", + "location": { + "file": ".fuzzforge/.env", + "line": null, + "column": null + } + }, + { + "rule_id": "sensitive_file_medium", + "severity": "warning", + "message": "Found potentially sensitive file at .fuzzforge/.env.template", + "location": { + "file": ".fuzzforge/.env.template", + "line": null, + "column": null + } + }, + { + "rule_id": "sensitive_file_medium", + "severity": "warning", + "message": "Found potentially sensitive file at data/credentials.json", + "location": { + "file": "data/credentials.json", + "line": null, + "column": null + } + }, + { + "rule_id": "sensitive_file_medium", + "severity": "warning", + "message": "Found potentially sensitive file at data/api_keys.txt", + "location": { + "file": "data/api_keys.txt", + "line": null, + "column": null + } + }, + { + "rule_id": "sql_injection_high", + "severity": "error", + "message": "Detected potential SQL injection vulnerability via F-string in SQL query", + "location": { + "file": "app.py", + "line": 31, + "column": null + } + }, + { + "rule_id": "hardcoded_secret_high", + "severity": "error", + "message": "Found potential hardcoded API Key in src/api_handler.py", + "location": { + "file": "src/api_handler.py", + "line": 25, + "column": null + } + }, + { + "rule_id": "hardcoded_secret_medium", + "severity": "warning", + "message": "Found potential hardcoded Authentication Token in src/api_handler.py", + "location": { + "file": "src/api_handler.py", + "line": 21, + "column": null + } + }, + { + "rule_id": "dangerous_function_medium", + "severity": "warning", + "message": "Use of potentially dangerous function eval(): Arbitrary code execution", + "location": { + "file": "src/api_handler.py", + "line": 34, + "column": null + } + }, + { + "rule_id": "dangerous_function_medium", + "severity": "warning", + "message": "Use of potentially dangerous function eval(): Arbitrary code execution", + "location": { + "file": "src/api_handler.py", + "line": 54, + "column": null + } + }, + { + "rule_id": "dangerous_function_medium", + "severity": "warning", + "message": "Use of potentially dangerous function exec(): Arbitrary code execution", + "location": { + "file": "src/api_handler.py", + "line": 49, + "column": null + } + }, + { + "rule_id": "dangerous_function_medium", + "severity": "warning", + "message": "Use of potentially dangerous function os.system(): Command injection risk", + "location": { + "file": "src/api_handler.py", + "line": 44, + "column": null + } + }, + { + "rule_id": "dangerous_function_medium", + "severity": "warning", + "message": "Use of potentially dangerous function os.system(): Command injection risk", + "location": { + "file": "src/api_handler.py", + "line": 71, + "column": null + } + }, + { + "rule_id": "dangerous_function_medium", + "severity": "warning", + "message": "Use of potentially dangerous function subprocess with shell=True: Command injection risk", + "location": { + "file": "src/api_handler.py", + "line": 39, + "column": null + } + }, + { + "rule_id": "sql_injection_high", + "severity": "error", + "message": "Detected potential SQL injection vulnerability via String concatenation in SQL", + "location": { + "file": "src/database.py", + "line": 43, + "column": null + } + }, + { + "rule_id": "sql_injection_high", + "severity": "error", + "message": "Detected potential SQL injection vulnerability via String formatting in SQL", + "location": { + "file": "src/database.py", + "line": 50, + "column": null + } + }, + { + "rule_id": "sql_injection_high", + "severity": "error", + "message": "Detected potential SQL injection vulnerability via String formatting in SQL", + "location": { + "file": "src/database.py", + "line": 57, + "column": null + } + }, + { + "rule_id": "sql_injection_high", + "severity": "error", + "message": "Detected potential SQL injection vulnerability via F-string in SQL query", + "location": { + "file": "src/database.py", + "line": 50, + "column": null + } + }, + { + "rule_id": "sql_injection_high", + "severity": "error", + "message": "Detected potential SQL injection vulnerability via Dynamic query building", + "location": { + "file": "src/database.py", + "line": 43, + "column": null + } + }, + { + "rule_id": "sql_injection_high", + "severity": "error", + "message": "Detected potential SQL injection vulnerability via Dynamic query building", + "location": { + "file": "src/database.py", + "line": 75, + "column": null + } + }, + { + "rule_id": "dangerous_function_medium", + "severity": "warning", + "message": "Use of potentially dangerous function os.system(): Command injection risk", + "location": { + "file": "src/database.py", + "line": 69, + "column": null + } + }, + { + "rule_id": "dangerous_function_medium", + "severity": "warning", + "message": "Use of potentially dangerous function pickle.load(): Deserialization vulnerability", + "location": { + "file": "src/database.py", + "line": 64, + "column": null + } + }, + { + "rule_id": "hardcoded_secret_high", + "severity": "error", + "message": "Found potential hardcoded Private Key in scripts/backup.js", + "location": { + "file": "scripts/backup.js", + "line": 81, + "column": null + } + }, + { + "rule_id": "hardcoded_secret_medium", + "severity": "warning", + "message": "Found potential hardcoded Potential Secret Hash in scripts/backup.js", + "location": { + "file": "scripts/backup.js", + "line": 81, + "column": null + } + }, + { + "rule_id": "dangerous_function_medium", + "severity": "warning", + "message": "Use of potentially dangerous function eval(): Arbitrary code execution", + "location": { + "file": "scripts/backup.js", + "line": 23, + "column": null + } + }, + { + "rule_id": "dangerous_function_medium", + "severity": "warning", + "message": "Use of potentially dangerous function new Function(): Arbitrary code execution", + "location": { + "file": "scripts/backup.js", + "line": 28, + "column": null + } + }, + { + "rule_id": "dangerous_function_medium", + "severity": "warning", + "message": "Use of potentially dangerous function innerHTML: XSS vulnerability", + "location": { + "file": "scripts/backup.js", + "line": 33, + "column": null + } + }, + { + "rule_id": "dangerous_function_medium", + "severity": "warning", + "message": "Use of potentially dangerous function innerHTML: XSS vulnerability", + "location": { + "file": "scripts/backup.js", + "line": 37, + "column": null + } + }, + { + "rule_id": "dangerous_function_medium", + "severity": "warning", + "message": "Use of potentially dangerous function document.write(): XSS vulnerability", + "location": { + "file": "scripts/backup.js", + "line": 42, + "column": null + } + }, + { + "rule_id": "hardcoded_secret_high", + "severity": "error", + "message": "Found potential hardcoded Private Key in src/Main.java", + "location": { + "file": "src/Main.java", + "line": 77, + "column": null + } + }, + { + "rule_id": "sql_injection_high", + "severity": "error", + "message": "Detected potential SQL injection vulnerability via String concatenation in SQL", + "location": { + "file": "src/Main.java", + "line": 23, + "column": null + } + }, + { + "rule_id": "sql_injection_high", + "severity": "error", + "message": "Detected potential SQL injection vulnerability via String concatenation in SQL", + "location": { + "file": "src/Main.java", + "line": 29, + "column": null + } + }, + { + "rule_id": "sql_injection_high", + "severity": "error", + "message": "Detected potential SQL injection vulnerability via Dynamic query building", + "location": { + "file": "src/Main.java", + "line": 23, + "column": null + } + }, + { + "rule_id": "sql_injection_high", + "severity": "error", + "message": "Detected potential SQL injection vulnerability via Dynamic query building", + "location": { + "file": "src/Main.java", + "line": 29, + "column": null + } + }, + { + "rule_id": "dangerous_function_medium", + "severity": "warning", + "message": "Use of potentially dangerous function eval(): Arbitrary code execution", + "location": { + "file": "scripts/deploy.php", + "line": 28, + "column": null + } + }, + { + "rule_id": "dangerous_function_medium", + "severity": "warning", + "message": "Use of potentially dangerous function exec(): Command execution", + "location": { + "file": "scripts/deploy.php", + "line": 22, + "column": null + } + }, + { + "rule_id": "dangerous_function_medium", + "severity": "warning", + "message": "Use of potentially dangerous function exec(): Command execution", + "location": { + "file": "scripts/deploy.php", + "line": 23, + "column": null + } + }, + { + "rule_id": "dangerous_function_medium", + "severity": "warning", + "message": "Use of potentially dangerous function system(): Command execution", + "location": { + "file": "scripts/deploy.php", + "line": 21, + "column": null + } + }, + { + "rule_id": "dangerous_function_medium", + "severity": "warning", + "message": "Use of potentially dangerous function shell_exec(): Command execution", + "location": { + "file": "scripts/deploy.php", + "line": 23, + "column": null + } + }, + { + "rule_id": "dangerous_function_medium", + "severity": "warning", + "message": "Use of potentially dangerous function Direct $_GET usage: Input validation missing", + "location": { + "file": "scripts/deploy.php", + "line": 12, + "column": null + } + }, + { + "rule_id": "dangerous_function_medium", + "severity": "warning", + "message": "Use of potentially dangerous function Direct $_GET usage: Input validation missing", + "location": { + "file": "scripts/deploy.php", + "line": 21, + "column": null + } + }, + { + "rule_id": "dangerous_function_medium", + "severity": "warning", + "message": "Use of potentially dangerous function Direct $_GET usage: Input validation missing", + "location": { + "file": "scripts/deploy.php", + "line": 23, + "column": null + } + }, + { + "rule_id": "dangerous_function_medium", + "severity": "warning", + "message": "Use of potentially dangerous function Direct $_GET usage: Input validation missing", + "location": { + "file": "scripts/deploy.php", + "line": 24, + "column": null + } + }, + { + "rule_id": "dangerous_function_medium", + "severity": "warning", + "message": "Use of potentially dangerous function Direct $_GET usage: Input validation missing", + "location": { + "file": "scripts/deploy.php", + "line": 31, + "column": null + } + }, + { + "rule_id": "dangerous_function_medium", + "severity": "warning", + "message": "Use of potentially dangerous function Direct $_GET usage: Input validation missing", + "location": { + "file": "scripts/deploy.php", + "line": 45, + "column": null + } + }, + { + "rule_id": "dangerous_function_medium", + "severity": "warning", + "message": "Use of potentially dangerous function Direct $_GET usage: Input validation missing", + "location": { + "file": "scripts/deploy.php", + "line": 50, + "column": null + } + }, + { + "rule_id": "dangerous_function_medium", + "severity": "warning", + "message": "Use of potentially dangerous function Direct $_GET usage: Input validation missing", + "location": { + "file": "scripts/deploy.php", + "line": 57, + "column": null + } + }, + { + "rule_id": "dangerous_function_medium", + "severity": "warning", + "message": "Use of potentially dangerous function Direct $_POST usage: Input validation missing", + "location": { + "file": "scripts/deploy.php", + "line": 13, + "column": null + } + }, + { + "rule_id": "dangerous_function_medium", + "severity": "warning", + "message": "Use of potentially dangerous function Direct $_POST usage: Input validation missing", + "location": { + "file": "scripts/deploy.php", + "line": 22, + "column": null + } + }, + { + "rule_id": "dangerous_function_medium", + "severity": "warning", + "message": "Use of potentially dangerous function Direct $_POST usage: Input validation missing", + "location": { + "file": "scripts/deploy.php", + "line": 27, + "column": null + } + }, + { + "rule_id": "dangerous_function_medium", + "severity": "warning", + "message": "Use of potentially dangerous function Direct $_POST usage: Input validation missing", + "location": { + "file": "scripts/deploy.php", + "line": 32, + "column": null + } + }, + { + "rule_id": "dangerous_function_medium", + "severity": "warning", + "message": "Use of potentially dangerous function Direct $_POST usage: Input validation missing", + "location": { + "file": "scripts/deploy.php", + "line": 40, + "column": null + } + }, + { + "rule_id": "dangerous_function_medium", + "severity": "warning", + "message": "Use of potentially dangerous function Direct $_POST usage: Input validation missing", + "location": { + "file": "scripts/deploy.php", + "line": 46, + "column": null + } + }, + { + "rule_id": "dangerous_function_medium", + "severity": "warning", + "message": "Use of potentially dangerous function Direct $_POST usage: Input validation missing", + "location": { + "file": "scripts/deploy.php", + "line": 53, + "column": null + } + }, + { + "rule_id": "dangerous_function_medium", + "severity": "warning", + "message": "Use of potentially dangerous function Direct $_POST usage: Input validation missing", + "location": { + "file": "scripts/deploy.php", + "line": 54, + "column": null + } + }, + { + "rule_id": "dangerous_function_medium", + "severity": "warning", + "message": "Use of potentially dangerous function Direct $_POST usage: Input validation missing", + "location": { + "file": "scripts/deploy.php", + "line": 61, + "column": null + } + }, + { + "rule_id": "dangerous_function_medium", + "severity": "warning", + "message": "Use of potentially dangerous function Direct $_POST usage: Input validation missing", + "location": { + "file": "scripts/deploy.php", + "line": 62, + "column": null + } + }, + { + "rule_id": "hardcoded_secret_high", + "severity": "error", + "message": "Found potential hardcoded API Key in src/utils.rb", + "location": { + "file": "src/utils.rb", + "line": 64, + "column": null + } + }, + { + "rule_id": "hardcoded_secret_medium", + "severity": "warning", + "message": "Found potential hardcoded Hardcoded Password in src/utils.rb", + "location": { + "file": "src/utils.rb", + "line": 63, + "column": null + } + }, + { + "rule_id": "hardcoded_secret_high", + "severity": "error", + "message": "Found potential hardcoded Private Key in src/app.go", + "location": { + "file": "src/app.go", + "line": 59, + "column": null + } + }, + { + "rule_id": "hardcoded_secret_high", + "severity": "error", + "message": "Found potential hardcoded Private Key in src/app.go", + "location": { + "file": "src/app.go", + "line": 62, + "column": null + } + }, + { + "rule_id": "hardcoded_secret_medium", + "severity": "warning", + "message": "Found potential hardcoded Potential Secret Hash in src/app.go", + "location": { + "file": "src/app.go", + "line": 59, + "column": null + } + }, + { + "rule_id": "hardcoded_secret_medium", + "severity": "warning", + "message": "Found potential hardcoded Potential Secret Hash in src/app.go", + "location": { + "file": "src/app.go", + "line": 62, + "column": null + } + } + ] +} \ No newline at end of file diff --git a/test_projects/vulnerable_app/type_errors.py b/test_projects/vulnerable_app/type_errors.py new file mode 100644 index 0000000..b9856c4 --- /dev/null +++ b/test_projects/vulnerable_app/type_errors.py @@ -0,0 +1,62 @@ +#!/usr/bin/env python3 +# Copyright (c) 2025 FuzzingLabs +# +# Licensed under the Business Source License 1.1 (BSL). See the LICENSE file +# at the root of this repository for details. +# +# After the Change Date (four years from publication), this version of the +# Licensed Work will be made available under the Apache License, Version 2.0. +# See the LICENSE-APACHE file or http://www.apache.org/licenses/LICENSE-2.0 +# +# Additional attribution and requirements are provided in the NOTICE file. + +""" +Test file with type errors for Mypy testing. +""" + +from typing import List, Dict + + +def add_numbers(a: int, b: int) -> int: + """Add two integers""" + # Type error: returning string instead of int + return str(a + b) + + +def process_items(items: List[str]) -> None: + """Process a list of strings""" + # Type error: iterating over None + for item in items: + print(item.upper()) + + # Type error: passing int to function expecting string list + process_items(123) + + +def get_user_data() -> Dict[str, str]: + """Get user data""" + # Type error: returning wrong type + return ["user1", "user2"] + + +def calculate_total(numbers: List[int]) -> float: + """Calculate total""" + # Type error: calling method that doesn't exist + return numbers.sum() + + +class User: + def __init__(self, name: str, age: int): + self.name = name + self.age = age + + +def create_user(name: str, age: int) -> User: + """Create a user""" + # Type error: returning dict instead of User + return {"name": name, "age": age} + + +# Missing type annotations +def unsafe_function(x, y): + return x + y diff --git a/volumes/env/.env.example b/volumes/env/.env.example deleted file mode 100644 index 4be30b9..0000000 --- a/volumes/env/.env.example +++ /dev/null @@ -1,17 +0,0 @@ -# FuzzForge Agent Configuration -# Copy this to .env and configure your API keys - -# LiteLLM Model Configuration -LITELLM_MODEL=gemini/gemini-2.0-flash-001 -# LITELLM_PROVIDER=gemini - -# API Keys (uncomment and configure as needed) -# GOOGLE_API_KEY= -# OPENAI_API_KEY= -# ANTHROPIC_API_KEY= -# OPENROUTER_API_KEY= -# MISTRAL_API_KEY= - -# Agent Configuration -# DEFAULT_TIMEOUT=120 -# DEFAULT_CONTEXT_ID=default diff --git a/volumes/env/.env.template b/volumes/env/.env.template new file mode 100644 index 0000000..09ccca6 --- /dev/null +++ b/volumes/env/.env.template @@ -0,0 +1,65 @@ +# ============================================================================= +# FuzzForge LiteLLM Proxy Configuration +# ============================================================================= +# Copy this file to .env and fill in your API keys +# Bootstrap will automatically create virtual keys for each service +# ============================================================================= + +# LiteLLM Proxy Internal Configuration +# ----------------------------------------------------------------------------- +FF_LLM_PROXY_BASE_URL=http://llm-proxy:4000 +LITELLM_MASTER_KEY=sk-master-test +LITELLM_SALT_KEY=super-secret-salt + +# Default Models (comma-separated, leave empty for model-agnostic access) +# ----------------------------------------------------------------------------- +# Examples: +# openai/gpt-5-mini,openai/text-embedding-3-large +# anthropic/claude-sonnet-4-5-20250929,openai/gpt-5-mini +# (empty = unrestricted access to all registered models) +LITELLM_DEFAULT_MODELS= + +# Upstream Provider API Keys +# ----------------------------------------------------------------------------- +# Add your real provider keys here - these are used by the proxy to call LLM providers +LITELLM_OPENAI_API_KEY=your-openai-key-here +LITELLM_ANTHROPIC_API_KEY=your-anthropic-key-here +LITELLM_GEMINI_API_KEY= +LITELLM_MISTRAL_API_KEY= +LITELLM_OPENROUTER_API_KEY= + +# Virtual Keys Budget & Duration Configuration +# ----------------------------------------------------------------------------- +# These control the budget and duration for auto-generated virtual keys +# Task Agent Key - used by task-agent service for A2A LiteLLM calls +TASK_AGENT_BUDGET=25.0 +TASK_AGENT_DURATION=30d + +# Cognee Key - used by Cognee for knowledge graph ingestion and queries +COGNEE_BUDGET=50.0 +COGNEE_DURATION=30d + +# General CLI/SDK Key - used by ff CLI and fuzzforge-sdk +CLI_BUDGET=100.0 +CLI_DURATION=30d + +# Virtual Keys (auto-generated by bootstrap - leave blank) +# ----------------------------------------------------------------------------- +TASK_AGENT_API_KEY= +COGNEE_API_KEY= +OPENAI_API_KEY= + +# LiteLLM Proxy Client Configuration +# ----------------------------------------------------------------------------- +# For CLI and SDK usage (Cognee, ff ingest, etc.) +LITELLM_PROXY_API_BASE=http://localhost:10999 +LLM_ENDPOINT=http://localhost:10999 +LLM_PROVIDER=openai +LLM_MODEL=litellm_proxy/gpt-5-mini +LLM_API_BASE=http://localhost:10999 +LLM_EMBEDDING_MODEL=litellm_proxy/text-embedding-3-large + +# UI Access +# ----------------------------------------------------------------------------- +UI_USERNAME=fuzzforge +UI_PASSWORD=fuzzforge123 diff --git a/volumes/env/README.md b/volumes/env/README.md index c53f184..7df65e8 100644 --- a/volumes/env/README.md +++ b/volumes/env/README.md @@ -1,22 +1,89 @@ -# FuzzForge Environment Configuration +# FuzzForge LiteLLM Proxy Configuration -This directory contains environment files that are mounted into Docker containers. +This directory contains configuration for the LiteLLM proxy with model-agnostic virtual keys. + +## Quick Start (Fresh Clone) + +### 1. Create Your `.env` File + +```bash +cp .env.template .env +``` + +### 2. Add Your Provider API Keys + +Edit `.env` and add your **real** API keys: + +```bash +LITELLM_OPENAI_API_KEY=sk-proj-YOUR-OPENAI-KEY-HERE +LITELLM_ANTHROPIC_API_KEY=sk-ant-api03-YOUR-ANTHROPIC-KEY-HERE +``` + +### 3. Start Services + +```bash +cd ../.. # Back to repo root +COMPOSE_PROFILES=secrets docker compose up -d +``` + +Bootstrap will automatically: +- Generate 3 virtual keys with individual budgets +- Write them to your `.env` file +- No model restrictions (model-agnostic) ## Files -- `.env.example` - Template configuration file -- `.env` - Your actual configuration (create by copying .env.example) +- **`.env.template`** - Clean template (checked into git) +- **`.env`** - Your real keys (git ignored, you create this) +- **`.env.example`** - Legacy example -## Usage +## Virtual Keys (Auto-Generated) -1. Copy the example file: - ```bash - cp .env.example .env - ``` +Bootstrap creates 3 keys with budget controls: -2. Edit `.env` and add your API keys +| Key | Budget | Duration | Used By | +|-----|--------|----------|---------| +| `OPENAI_API_KEY` | $100 | 30 days | CLI, SDK | +| `TASK_AGENT_API_KEY` | $25 | 30 days | Task Agent | +| `COGNEE_API_KEY` | $50 | 30 days | Cognee | -3. Restart Docker containers to apply changes: - ```bash - docker-compose restart - ``` +All keys are **model-agnostic** by default (no restrictions). + +## Using Models + +Registered models in `volumes/litellm/proxy_config.yaml`: +- `gpt-5-mini` โ†’ `openai/gpt-5-mini` +- `claude-sonnet-4-5` โ†’ `anthropic/claude-sonnet-4-5-20250929` +- `text-embedding-3-large` โ†’ `openai/text-embedding-3-large` + +### Use Registered Aliases: + +```bash +fuzzforge workflow run llm_secret_detection . -n llm_model=gpt-5-mini +fuzzforge workflow run llm_secret_detection . -n llm_model=claude-sonnet-4-5 +``` + +### Use Any Model (Direct): + +```bash +# Works without registering first! +fuzzforge workflow run llm_secret_detection . -n llm_model=openai/gpt-5-nano +``` + +## Proxy UI + +http://localhost:10999/ui +- User: `fuzzforge` / Pass: `fuzzforge123` + +## Troubleshooting + +```bash +# Check bootstrap logs +docker compose logs llm-proxy-bootstrap + +# Verify keys generated +grep "API_KEY=" .env | grep -v "^#" | grep -v "your-" + +# Restart services +docker compose restart llm-proxy task-agent +``` diff --git a/volumes/litellm/proxy_config.yaml b/volumes/litellm/proxy_config.yaml new file mode 100644 index 0000000..297abfa --- /dev/null +++ b/volumes/litellm/proxy_config.yaml @@ -0,0 +1,26 @@ +general_settings: + master_key: os.environ/LITELLM_MASTER_KEY + database_url: os.environ/DATABASE_URL + store_model_in_db: true + store_prompts_in_spend_logs: true + otel: true + +litellm_settings: + callbacks: + - "otel" + +model_list: + - model_name: claude-sonnet-4-5 + litellm_params: + model: anthropic/claude-sonnet-4-5-20250929 + api_key: os.environ/ANTHROPIC_API_KEY + + - model_name: gpt-5-mini + litellm_params: + model: openai/gpt-5-mini + api_key: os.environ/LITELLM_OPENAI_API_KEY + + - model_name: text-embedding-3-large + litellm_params: + model: openai/text-embedding-3-large + api_key: os.environ/LITELLM_OPENAI_API_KEY diff --git a/volumes/otel/collector-config.yaml b/volumes/otel/collector-config.yaml new file mode 100644 index 0000000..29dfa0b --- /dev/null +++ b/volumes/otel/collector-config.yaml @@ -0,0 +1,25 @@ +receivers: + otlp: + protocols: + grpc: + endpoint: 0.0.0.0:4317 + http: + endpoint: 0.0.0.0:4318 + +processors: + batch: + +exporters: + debug: + verbosity: detailed + +service: + pipelines: + traces: + receivers: [otlp] + processors: [batch] + exporters: [debug] + metrics: + receivers: [otlp] + processors: [batch] + exporters: [debug] diff --git a/workers/android/Dockerfile.amd64 b/workers/android/Dockerfile.amd64 new file mode 100644 index 0000000..3939eeb --- /dev/null +++ b/workers/android/Dockerfile.amd64 @@ -0,0 +1,148 @@ +# FuzzForge Vertical Worker: Android Security +# +# Pre-installed tools for Android security analysis: +# - Android SDK (adb, aapt) +# - apktool (APK decompilation) +# - jadx (Dex to Java decompiler) +# - Frida (dynamic instrumentation) +# - androguard (Python APK analysis) +# - MobSF dependencies +# +# Note: Uses amd64 platform for compatibility with Android 32-bit tools + +FROM --platform=linux/amd64 python:3.11-slim-bookworm + +# Set working directory +WORKDIR /app + +# Install system dependencies +RUN apt-get update && apt-get install -y \ + # Build essentials + build-essential \ + git \ + curl \ + wget \ + unzip \ + # Java (required for Android tools) + openjdk-17-jdk \ + # Android tools dependencies (32-bit libraries for emulated amd64) + lib32stdc++6 \ + lib32z1 \ + # Frida dependencies + libc6-dev \ + # XML/Binary analysis + libxml2-dev \ + libxslt-dev \ + # Network tools + netcat-openbsd \ + tcpdump \ + # MobSF dependencies + xfonts-75dpi \ + xfonts-base \ + # Cleanup + && rm -rf /var/lib/apt/lists/* + +# Install wkhtmltopdf (required for MobSF PDF reports) +RUN wget -q https://github.com/wkhtmltopdf/packaging/releases/download/0.12.6.1-3/wkhtmltox_0.12.6.1-3.bookworm_amd64.deb && \ + apt-get update && \ + apt-get install -y ./wkhtmltox_0.12.6.1-3.bookworm_amd64.deb && \ + rm wkhtmltox_0.12.6.1-3.bookworm_amd64.deb && \ + rm -rf /var/lib/apt/lists/* + +# Install Android SDK Command Line Tools +ENV ANDROID_HOME=/opt/android-sdk +ENV PATH="${ANDROID_HOME}/cmdline-tools/latest/bin:${ANDROID_HOME}/platform-tools:${PATH}" + +RUN mkdir -p ${ANDROID_HOME}/cmdline-tools && \ + cd ${ANDROID_HOME}/cmdline-tools && \ + wget -q https://dl.google.com/android/repository/commandlinetools-linux-9477386_latest.zip && \ + unzip -q commandlinetools-linux-9477386_latest.zip && \ + mv cmdline-tools latest && \ + rm commandlinetools-linux-9477386_latest.zip && \ + # Accept licenses + yes | ${ANDROID_HOME}/cmdline-tools/latest/bin/sdkmanager --licenses && \ + # Install platform tools (adb, fastboot) + ${ANDROID_HOME}/cmdline-tools/latest/bin/sdkmanager "platform-tools" "build-tools;33.0.0" + +# Install apktool +RUN wget -q https://raw.githubusercontent.com/iBotPeaches/Apktool/master/scripts/linux/apktool -O /usr/local/bin/apktool && \ + wget -q https://bitbucket.org/iBotPeaches/apktool/downloads/apktool_2.9.3.jar -O /usr/local/bin/apktool.jar && \ + chmod +x /usr/local/bin/apktool + +# Install jadx (Dex to Java decompiler) +RUN wget -q https://github.com/skylot/jadx/releases/download/v1.4.7/jadx-1.4.7.zip -O /tmp/jadx.zip && \ + unzip -q /tmp/jadx.zip -d /opt/jadx && \ + ln -s /opt/jadx/bin/jadx /usr/local/bin/jadx && \ + ln -s /opt/jadx/bin/jadx-gui /usr/local/bin/jadx-gui && \ + rm /tmp/jadx.zip + +# Install Python dependencies for Android security tools +COPY requirements.txt /tmp/requirements.txt +RUN pip3 install --no-cache-dir -r /tmp/requirements.txt && \ + rm /tmp/requirements.txt + +# Install androguard (Python APK analysis framework) +RUN pip3 install --no-cache-dir androguard pyaxmlparser + +# Install Frida +RUN pip3 install --no-cache-dir frida-tools frida + +# Install OpenGrep/Semgrep (expose as opengrep command) +RUN pip3 install --no-cache-dir semgrep==1.45.0 && \ + ln -sf /usr/local/bin/semgrep /usr/local/bin/opengrep + +# Install MobSF (Mobile Security Framework) +RUN git clone --depth 1 --branch v3.9.7 https://github.com/MobSF/Mobile-Security-Framework-MobSF.git /app/mobsf && \ + cd /app/mobsf && \ + ./setup.sh + +# Install aiohttp for async HTTP requests (used by MobSF scanner module) +RUN pip3 install --no-cache-dir aiohttp + +# Create cache directory +RUN mkdir -p /cache && chmod 755 /cache + +# Copy worker entrypoint (generic, works for all verticals) +COPY worker.py /app/worker.py + +# Create startup script that runs MobSF in background and then starts worker +RUN echo '#!/bin/bash\n\ +# Start MobSF server in background with sync workers (avoid Rosetta syscall issues)\n\ +echo "Starting MobSF server in background..."\n\ +cd /app/mobsf && python3 -m poetry run gunicorn -b 127.0.0.1:8877 \\\n\ + mobsf.MobSF.wsgi:application \\\n\ + --worker-class=sync \\\n\ + --workers=2 \\\n\ + --timeout=3600 \\\n\ + --log-level=error \\\n\ + > /tmp/mobsf.log 2>&1 &\n\ +MOBSF_PID=$!\n\ +echo "MobSF started with PID: $MOBSF_PID"\n\ +\n\ +# Wait for MobSF to initialize\n\ +sleep 10\n\ +\n\ +# Generate and store MobSF API key\n\ +if [ -f /root/.MobSF/secret ]; then\n\ + SECRET=$(cat /root/.MobSF/secret)\n\ + export MOBSF_API_KEY=$(echo -n "$SECRET" | sha256sum | cut -d " " -f1)\n\ + echo "MobSF API key: $MOBSF_API_KEY"\n\ +fi\n\ +\n\ +# Start worker\n\ +echo "Starting Temporal worker..."\n\ +exec python3 /app/worker.py\n\ +' > /app/start.sh && chmod +x /app/start.sh + +# Add toolbox to Python path (mounted at runtime) +ENV PYTHONPATH="/app:/app/toolbox:${PYTHONPATH}" +ENV PYTHONUNBUFFERED=1 +ENV JAVA_HOME=/usr/lib/jvm/java-17-openjdk-amd64 +ENV MOBSF_PORT=8877 + +# Healthcheck +HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=5 \ + CMD python3 -c "import sys; sys.exit(0)" + +# Run startup script (starts MobSF + worker) +CMD ["/app/start.sh"] diff --git a/workers/android/Dockerfile b/workers/android/Dockerfile.arm64 similarity index 73% rename from workers/android/Dockerfile rename to workers/android/Dockerfile.arm64 index a3bb9d4..2fdcff2 100644 --- a/workers/android/Dockerfile +++ b/workers/android/Dockerfile.arm64 @@ -1,4 +1,4 @@ -# FuzzForge Vertical Worker: Android Security +# FuzzForge Vertical Worker: Android Security (ARM64) # # Pre-installed tools for Android security analysis: # - Android SDK (adb, aapt) @@ -6,9 +6,11 @@ # - jadx (Dex to Java decompiler) # - Frida (dynamic instrumentation) # - androguard (Python APK analysis) -# - MobSF dependencies +# +# Note: MobSF is excluded due to Rosetta 2 syscall incompatibility +# Note: Uses amd64 platform for compatibility with Android 32-bit tools -FROM python:3.11-slim-bookworm +FROM --platform=linux/amd64 python:3.11-slim-bookworm # Set working directory WORKDIR /app @@ -23,7 +25,7 @@ RUN apt-get update && apt-get install -y \ unzip \ # Java (required for Android tools) openjdk-17-jdk \ - # Android tools dependencies + # Android tools dependencies (32-bit libraries for emulated amd64) lib32stdc++6 \ lib32z1 \ # Frida dependencies @@ -75,20 +77,34 @@ RUN pip3 install --no-cache-dir androguard pyaxmlparser # Install Frida RUN pip3 install --no-cache-dir frida-tools frida +# Install OpenGrep/Semgrep (expose as opengrep command) +RUN pip3 install --no-cache-dir semgrep==1.45.0 && \ + ln -sf /usr/local/bin/semgrep /usr/local/bin/opengrep + +# NOTE: MobSF is NOT installed on ARM64 platform due to Rosetta 2 incompatibility +# The workflow will gracefully skip MobSF analysis on this platform + # Create cache directory RUN mkdir -p /cache && chmod 755 /cache # Copy worker entrypoint (generic, works for all verticals) COPY worker.py /app/worker.py +# Create simplified startup script (no MobSF) +RUN echo '#!/bin/bash\n\ +# ARM64 worker - MobSF disabled due to Rosetta 2 limitations\n\ +echo "Starting Temporal worker (ARM64 platform - MobSF disabled)..."\n\ +exec python3 /app/worker.py\n\ +' > /app/start.sh && chmod +x /app/start.sh + # Add toolbox to Python path (mounted at runtime) ENV PYTHONPATH="/app:/app/toolbox:${PYTHONPATH}" ENV PYTHONUNBUFFERED=1 ENV JAVA_HOME=/usr/lib/jvm/java-17-openjdk-amd64 # Healthcheck -HEALTHCHECK --interval=30s --timeout=10s --start-period=40s --retries=3 \ +HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=5 \ CMD python3 -c "import sys; sys.exit(0)" -# Run worker -CMD ["python3", "/app/worker.py"] +# Run startup script +CMD ["/app/start.sh"] diff --git a/workers/android/metadata.yaml b/workers/android/metadata.yaml new file mode 100644 index 0000000..e5b46b8 --- /dev/null +++ b/workers/android/metadata.yaml @@ -0,0 +1,42 @@ +# Android Worker Metadata +# +# Platform-specific configuration for Android security analysis worker. +# This file defines which Dockerfile to use for each platform and what tools +# are available on that platform. + +name: android +version: "1.0.0" +description: "Android application security testing worker with Jadx, OpenGrep, and MobSF" + +# Default platform when auto-detection fails or metadata is not platform-aware +default_platform: linux/amd64 + +# Platform-specific configurations +platforms: + # x86_64 / Intel / AMD platform (full toolchain including MobSF) + linux/amd64: + dockerfile: Dockerfile.amd64 + description: "Full Android toolchain with MobSF support" + supported_tools: + - jadx # APK decompiler + - opengrep # Static analysis with custom Android rules + - mobsf # Mobile Security Framework + - frida # Dynamic instrumentation + - androguard # Python APK analysis + + # ARM64 / Apple Silicon platform (MobSF excluded due to Rosetta limitations) + linux/arm64: + dockerfile: Dockerfile.arm64 + description: "Android toolchain without MobSF (ARM64/Apple Silicon compatible)" + supported_tools: + - jadx # APK decompiler + - opengrep # Static analysis with custom Android rules + - frida # Dynamic instrumentation + - androguard # Python APK analysis + disabled_tools: + mobsf: "Incompatible with Rosetta 2 emulation (requires syscall 284: copy_file_range)" + notes: | + MobSF cannot run under Rosetta 2 on Apple Silicon Macs due to missing + syscall implementations. The workflow will gracefully skip MobSF analysis + on this platform while still providing comprehensive security testing via + Jadx decompilation and OpenGrep static analysis. diff --git a/workers/python/requirements.txt b/workers/python/requirements.txt index 2e30f4a..c0fff32 100644 --- a/workers/python/requirements.txt +++ b/workers/python/requirements.txt @@ -16,3 +16,8 @@ a2a-sdk[all]>=0.1.0 # Fuzzing atheris>=2.3.0 + +# SAST Tools +bandit>=1.7.0 +pip-audit>=2.6.0 +mypy>=1.8.0