mirror of
https://github.com/FuzzingLabs/fuzzforge_ai.git
synced 2026-05-21 20:16:52 +02:00
fe50d4ef72
This commit implements a complete Python fuzzing workflow using Atheris: ## Python Worker (workers/python/) - Dockerfile with Python 3.11, Atheris, and build tools - Generic worker.py for dynamic workflow discovery - requirements.txt with temporalio, boto3, atheris dependencies - Added to docker-compose.temporal.yaml with dedicated cache volume ## AtherisFuzzer Module (backend/toolbox/modules/fuzzer/) - Reusable module extending BaseModule - Auto-discovers fuzz targets (fuzz_*.py, *_fuzz.py, fuzz_target.py) - Recursive search to find targets in nested directories - Dynamically loads TestOneInput() function - Configurable max_iterations and timeout - Real-time stats callback support for live monitoring - Returns findings as ModuleFinding objects ## Atheris Fuzzing Workflow (backend/toolbox/workflows/atheris_fuzzing/) - Temporal workflow for orchestrating fuzzing - Downloads user code from MinIO - Executes AtherisFuzzer module - Uploads results to MinIO - Cleans up cache after execution - metadata.yaml with vertical: python for routing ## Test Project (test_projects/python_fuzz_waterfall/) - Demonstrates stateful waterfall vulnerability - main.py with check_secret() that leaks progress - fuzz_target.py with Atheris TestOneInput() harness - Complete README with usage instructions ## Backend Fixes - Fixed parameter merging in REST API endpoints (workflows.py) - Changed workflow parameter passing from positional args to kwargs (manager.py) - Default parameters now properly merged with user parameters ## Testing ✅ Worker discovered AtherisFuzzingWorkflow ✅ Workflow executed end-to-end successfully ✅ Fuzz target auto-discovered in nested directories ✅ Atheris ran 100,000 iterations ✅ Results uploaded and cache cleaned
378 lines
13 KiB
Python
378 lines
13 KiB
Python
"""
|
|
Atheris Fuzzer Module
|
|
|
|
Reusable module for fuzzing Python code using Atheris.
|
|
Discovers and fuzzes user-provided Python targets with TestOneInput() function.
|
|
"""
|
|
|
|
import asyncio
|
|
import base64
|
|
import importlib.util
|
|
import logging
|
|
import sys
|
|
import time
|
|
import traceback
|
|
from pathlib import Path
|
|
from typing import Dict, Any, List, Optional, Callable
|
|
import uuid
|
|
|
|
from modules.base import BaseModule, ModuleMetadata, ModuleResult, ModuleFinding
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
class AtherisFuzzer(BaseModule):
|
|
"""
|
|
Atheris fuzzing module - discovers and fuzzes Python code.
|
|
|
|
This module can be used by any workflow to fuzz Python targets.
|
|
"""
|
|
|
|
def __init__(self):
|
|
super().__init__()
|
|
self.crashes = []
|
|
self.total_executions = 0
|
|
self.start_time = None
|
|
self.last_stats_time = 0
|
|
|
|
def get_metadata(self) -> ModuleMetadata:
|
|
"""Return module metadata"""
|
|
return ModuleMetadata(
|
|
name="atheris_fuzzer",
|
|
version="1.0.0",
|
|
description="Python fuzzing using Atheris - discovers and fuzzes TestOneInput() functions",
|
|
author="FuzzForge Team",
|
|
category="fuzzer",
|
|
tags=["fuzzing", "atheris", "python", "coverage"],
|
|
input_schema={
|
|
"type": "object",
|
|
"properties": {
|
|
"target_file": {
|
|
"type": "string",
|
|
"description": "Python file with TestOneInput() function (auto-discovered if not specified)"
|
|
},
|
|
"max_iterations": {
|
|
"type": "integer",
|
|
"description": "Maximum fuzzing iterations",
|
|
"default": 100000
|
|
},
|
|
"timeout_seconds": {
|
|
"type": "integer",
|
|
"description": "Fuzzing timeout in seconds",
|
|
"default": 300
|
|
},
|
|
"stats_callback": {
|
|
"description": "Optional callback for real-time statistics"
|
|
}
|
|
}
|
|
},
|
|
requires_workspace=True
|
|
)
|
|
|
|
def validate_config(self, config: Dict[str, Any]) -> bool:
|
|
"""Validate fuzzing configuration"""
|
|
max_iterations = config.get("max_iterations", 100000)
|
|
if not isinstance(max_iterations, int) or max_iterations <= 0:
|
|
raise ValueError(f"max_iterations must be positive integer, got: {max_iterations}")
|
|
|
|
timeout = config.get("timeout_seconds", 300)
|
|
if not isinstance(timeout, int) or timeout <= 0:
|
|
raise ValueError(f"timeout_seconds must be positive integer, got: {timeout}")
|
|
|
|
return True
|
|
|
|
async def execute(self, config: Dict[str, Any], workspace: Path) -> ModuleResult:
|
|
"""
|
|
Execute Atheris fuzzing on user code.
|
|
|
|
Args:
|
|
config: Fuzzing configuration
|
|
workspace: Path to user's uploaded code
|
|
|
|
Returns:
|
|
ModuleResult with crash findings
|
|
"""
|
|
self.start_timer()
|
|
self.start_time = time.time()
|
|
|
|
# Validate configuration
|
|
self.validate_config(config)
|
|
self.validate_workspace(workspace)
|
|
|
|
# Extract config
|
|
target_file = config.get("target_file")
|
|
max_iterations = config.get("max_iterations", 100000)
|
|
timeout_seconds = config.get("timeout_seconds", 300)
|
|
stats_callback = config.get("stats_callback")
|
|
|
|
logger.info(
|
|
f"Starting Atheris fuzzing (max_iterations={max_iterations}, "
|
|
f"timeout={timeout_seconds}s, target={target_file or 'auto-discover'})"
|
|
)
|
|
|
|
try:
|
|
# Step 1: Discover or load target
|
|
target_path = self._discover_target(workspace, target_file)
|
|
logger.info(f"Using fuzz target: {target_path}")
|
|
|
|
# Step 2: Load target module
|
|
test_one_input = self._load_target_module(target_path)
|
|
logger.info(f"Loaded TestOneInput function from {target_path}")
|
|
|
|
# Step 3: Run fuzzing
|
|
await self._run_fuzzing(
|
|
test_one_input=test_one_input,
|
|
target_path=target_path,
|
|
max_iterations=max_iterations,
|
|
timeout_seconds=timeout_seconds,
|
|
stats_callback=stats_callback
|
|
)
|
|
|
|
# Step 4: Generate findings from crashes
|
|
findings = self._generate_findings(target_path)
|
|
|
|
logger.info(
|
|
f"Fuzzing completed: {self.total_executions} executions, "
|
|
f"{len(self.crashes)} crashes found"
|
|
)
|
|
|
|
return self.create_result(
|
|
findings=findings,
|
|
status="success",
|
|
summary={
|
|
"total_executions": self.total_executions,
|
|
"crashes_found": len(self.crashes),
|
|
"execution_time": self.get_execution_time(),
|
|
"target_file": str(target_path.relative_to(workspace))
|
|
},
|
|
metadata={
|
|
"max_iterations": max_iterations,
|
|
"timeout_seconds": timeout_seconds
|
|
}
|
|
)
|
|
|
|
except Exception as e:
|
|
logger.error(f"Fuzzing failed: {e}", exc_info=True)
|
|
return self.create_result(
|
|
findings=[],
|
|
status="failed",
|
|
error=str(e)
|
|
)
|
|
|
|
def _discover_target(self, workspace: Path, target_file: Optional[str]) -> Path:
|
|
"""
|
|
Discover fuzz target in workspace.
|
|
|
|
Args:
|
|
workspace: Path to workspace
|
|
target_file: Explicit target file or None for auto-discovery
|
|
|
|
Returns:
|
|
Path to target file
|
|
"""
|
|
if target_file:
|
|
# Use specified target
|
|
target_path = workspace / target_file
|
|
if not target_path.exists():
|
|
raise FileNotFoundError(f"Target file not found: {target_file}")
|
|
return target_path
|
|
|
|
# Auto-discover: look for fuzz_*.py or *_fuzz.py
|
|
logger.info("Auto-discovering fuzz targets...")
|
|
|
|
candidates = []
|
|
# Use rglob for recursive search (searches all subdirectories)
|
|
for pattern in ["fuzz_*.py", "*_fuzz.py", "fuzz_target.py"]:
|
|
matches = list(workspace.rglob(pattern))
|
|
candidates.extend(matches)
|
|
|
|
if not candidates:
|
|
raise FileNotFoundError(
|
|
"No fuzz targets found. Expected files matching: fuzz_*.py, *_fuzz.py, or fuzz_target.py"
|
|
)
|
|
|
|
# Use first candidate
|
|
target = candidates[0]
|
|
if len(candidates) > 1:
|
|
logger.warning(
|
|
f"Multiple fuzz targets found: {[str(c) for c in candidates]}. "
|
|
f"Using: {target.name}"
|
|
)
|
|
|
|
return target
|
|
|
|
def _load_target_module(self, target_path: Path) -> Callable:
|
|
"""
|
|
Load target module and get TestOneInput function.
|
|
|
|
Args:
|
|
target_path: Path to Python file with TestOneInput
|
|
|
|
Returns:
|
|
TestOneInput function
|
|
"""
|
|
# Add target directory to sys.path
|
|
target_dir = target_path.parent
|
|
if str(target_dir) not in sys.path:
|
|
sys.path.insert(0, str(target_dir))
|
|
|
|
# Load module dynamically
|
|
module_name = target_path.stem
|
|
spec = importlib.util.spec_from_file_location(module_name, target_path)
|
|
if spec is None or spec.loader is None:
|
|
raise ImportError(f"Cannot load module from {target_path}")
|
|
|
|
module = importlib.util.module_from_spec(spec)
|
|
spec.loader.exec_module(module)
|
|
|
|
# Get TestOneInput function
|
|
if not hasattr(module, "TestOneInput"):
|
|
raise AttributeError(
|
|
f"Module {module_name} does not have TestOneInput() function. "
|
|
"Atheris requires a TestOneInput(data: bytes) function."
|
|
)
|
|
|
|
return module.TestOneInput
|
|
|
|
async def _run_fuzzing(
|
|
self,
|
|
test_one_input: Callable,
|
|
target_path: Path,
|
|
max_iterations: int,
|
|
timeout_seconds: int,
|
|
stats_callback: Optional[Callable] = None
|
|
):
|
|
"""
|
|
Run Atheris fuzzing with real-time monitoring.
|
|
|
|
Args:
|
|
test_one_input: TestOneInput function to fuzz
|
|
target_path: Path to target file
|
|
max_iterations: Max iterations
|
|
timeout_seconds: Timeout in seconds
|
|
stats_callback: Optional callback for stats
|
|
"""
|
|
import atheris
|
|
|
|
self.crashes = []
|
|
self.total_executions = 0
|
|
corpus_size = 0
|
|
|
|
# Wrapper to track executions and crashes
|
|
def fuzz_wrapper(data):
|
|
self.total_executions += 1
|
|
|
|
try:
|
|
test_one_input(data)
|
|
except Exception as e:
|
|
# Capture crash
|
|
crash_info = {
|
|
"input": data,
|
|
"exception": e,
|
|
"exception_type": type(e).__name__,
|
|
"stack_trace": traceback.format_exc(),
|
|
"execution": self.total_executions
|
|
}
|
|
self.crashes.append(crash_info)
|
|
logger.warning(
|
|
f"Crash found (execution {self.total_executions}): "
|
|
f"{type(e).__name__}: {str(e)}"
|
|
)
|
|
# Re-raise so Atheris detects it
|
|
raise
|
|
|
|
# Configure Atheris
|
|
atheris.Setup(
|
|
[
|
|
"atheris_fuzzer",
|
|
f"-runs={max_iterations}",
|
|
f"-max_total_time={timeout_seconds}",
|
|
"-print_final_stats=1"
|
|
],
|
|
fuzz_wrapper
|
|
)
|
|
|
|
logger.info(f"Starting Atheris fuzzer (max_runs={max_iterations}, timeout={timeout_seconds}s)...")
|
|
|
|
# Run fuzzing in a separate task with monitoring
|
|
async def monitor_stats():
|
|
"""Monitor and report stats every 5 seconds"""
|
|
while True:
|
|
await asyncio.sleep(5)
|
|
|
|
if stats_callback:
|
|
elapsed = time.time() - self.start_time
|
|
execs_per_sec = self.total_executions / elapsed if elapsed > 0 else 0
|
|
|
|
await stats_callback({
|
|
"total_execs": self.total_executions,
|
|
"execs_per_sec": execs_per_sec,
|
|
"crashes": len(self.crashes),
|
|
"corpus_size": corpus_size,
|
|
"coverage": 0.0, # Atheris doesn't expose coverage easily
|
|
"elapsed_time": int(elapsed)
|
|
})
|
|
|
|
# Start monitoring task
|
|
monitor_task = None
|
|
if stats_callback:
|
|
monitor_task = asyncio.create_task(monitor_stats())
|
|
|
|
try:
|
|
# Run fuzzing (blocking)
|
|
atheris.Fuzz()
|
|
except SystemExit:
|
|
# Atheris exits when done
|
|
pass
|
|
finally:
|
|
# Stop monitoring
|
|
if monitor_task:
|
|
monitor_task.cancel()
|
|
try:
|
|
await monitor_task
|
|
except asyncio.CancelledError:
|
|
pass
|
|
|
|
def _generate_findings(self, target_path: Path) -> List[ModuleFinding]:
|
|
"""
|
|
Generate ModuleFinding objects from crashes.
|
|
|
|
Args:
|
|
target_path: Path to target file
|
|
|
|
Returns:
|
|
List of findings
|
|
"""
|
|
findings = []
|
|
|
|
for crash in self.crashes:
|
|
# Encode crash input for storage
|
|
crash_input_b64 = base64.b64encode(crash["input"]).decode()
|
|
|
|
finding = self.create_finding(
|
|
title=f"Crash: {crash['exception_type']}",
|
|
description=(
|
|
f"Atheris found crash during fuzzing:\n"
|
|
f"Exception: {crash['exception_type']}\n"
|
|
f"Message: {str(crash['exception'])}\n"
|
|
f"Execution: {crash['execution']}"
|
|
),
|
|
severity="critical",
|
|
category="crash",
|
|
file_path=str(target_path),
|
|
metadata={
|
|
"crash_input_base64": crash_input_b64,
|
|
"crash_input_hex": crash["input"].hex(),
|
|
"exception_type": crash["exception_type"],
|
|
"stack_trace": crash["stack_trace"],
|
|
"execution_number": crash["execution"]
|
|
},
|
|
recommendation=(
|
|
"Review the crash stack trace and input to identify the vulnerability. "
|
|
"The crash input is provided in base64 and hex formats for reproduction."
|
|
)
|
|
)
|
|
findings.append(finding)
|
|
|
|
return findings
|