Files
fuzzforge_ai/backend/toolbox/modules/fuzzing/atheris.py
Tanguy Duhamel 323a434c73 Initial commit
2025-09-29 21:26:41 +02:00

774 lines
29 KiB
Python

"""
Atheris Fuzzing Module
This module uses Atheris for fuzzing Python code to find bugs and security
vulnerabilities in Python applications and libraries.
"""
# Copyright (c) 2025 FuzzingLabs
#
# Licensed under the Business Source License 1.1 (BSL). See the LICENSE file
# at the root of this repository for details.
#
# After the Change Date (four years from publication), this version of the
# Licensed Work will be made available under the Apache License, Version 2.0.
# See the LICENSE-APACHE file or http://www.apache.org/licenses/LICENSE-2.0
#
# Additional attribution and requirements are provided in the NOTICE file.
import asyncio
import json
import os
import sys
from pathlib import Path
from typing import Dict, Any, List
import subprocess
import logging
import traceback
from ..base import BaseModule, ModuleMetadata, ModuleFinding, ModuleResult
from . import register_module
logger = logging.getLogger(__name__)
@register_module
class AtherisModule(BaseModule):
"""Atheris Python fuzzing module"""
def get_metadata(self) -> ModuleMetadata:
"""Get module metadata"""
return ModuleMetadata(
name="atheris",
version="2.3.0",
description="Coverage-guided Python fuzzing engine for finding bugs in Python code",
author="FuzzForge Team",
category="fuzzing",
tags=["python", "coverage-guided", "native", "sanitizers", "libfuzzer"],
input_schema={
"type": "object",
"properties": {
"target_script": {
"type": "string",
"description": "Path to the Python script containing the fuzz target function"
},
"target_function": {
"type": "string",
"default": "TestOneInput",
"description": "Name of the target function to fuzz"
},
"corpus_dir": {
"type": "string",
"description": "Directory containing initial corpus files"
},
"dict_file": {
"type": "string",
"description": "Dictionary file for fuzzing keywords"
},
"max_total_time": {
"type": "integer",
"default": 600,
"description": "Maximum total time to run fuzzing (seconds)"
},
"max_len": {
"type": "integer",
"default": 4096,
"description": "Maximum length of test input"
},
"timeout": {
"type": "integer",
"default": 25,
"description": "Timeout for individual test cases (seconds)"
},
"runs": {
"type": "integer",
"default": -1,
"description": "Number of individual test runs (-1 for unlimited)"
},
"jobs": {
"type": "integer",
"default": 1,
"description": "Number of fuzzing jobs to run in parallel"
},
"print_final_stats": {
"type": "boolean",
"default": true,
"description": "Print final statistics"
},
"print_pcs": {
"type": "boolean",
"default": false,
"description": "Print newly covered PCs"
},
"print_coverage": {
"type": "boolean",
"default": true,
"description": "Print coverage information"
},
"artifact_prefix": {
"type": "string",
"default": "crash-",
"description": "Prefix for artifact files"
},
"seed": {
"type": "integer",
"description": "Random seed for reproducibility"
},
"python_path": {
"type": "array",
"items": {"type": "string"},
"description": "Additional Python paths to add to sys.path"
},
"enable_sanitizers": {
"type": "boolean",
"default": true,
"description": "Enable Python-specific sanitizers and checks"
},
"detect_leaks": {
"type": "boolean",
"default": true,
"description": "Detect memory leaks in native extensions"
},
"detect_stack_use_after_return": {
"type": "boolean",
"default": false,
"description": "Detect stack use-after-return"
},
"setup_code": {
"type": "string",
"description": "Python code to execute before fuzzing starts"
},
"enable_value_profile": {
"type": "boolean",
"default": false,
"description": "Enable value profiling for better mutation"
},
"shrink": {
"type": "boolean",
"default": true,
"description": "Try to shrink the corpus"
},
"only_ascii": {
"type": "boolean",
"default": false,
"description": "Only generate ASCII inputs"
}
}
},
output_schema={
"type": "object",
"properties": {
"findings": {
"type": "array",
"items": {
"type": "object",
"properties": {
"exception_type": {"type": "string"},
"exception_message": {"type": "string"},
"stack_trace": {"type": "string"},
"crash_input": {"type": "string"}
}
}
}
}
}
)
def validate_config(self, config: Dict[str, Any]) -> bool:
"""Validate configuration"""
target_script = config.get("target_script")
if not target_script:
raise ValueError("target_script is required for Atheris")
max_total_time = config.get("max_total_time", 600)
if max_total_time <= 0:
raise ValueError("max_total_time must be positive")
return True
async def execute(self, config: Dict[str, Any], workspace: Path) -> ModuleResult:
"""Execute Atheris Python fuzzing"""
self.start_timer()
try:
# Validate inputs
self.validate_config(config)
self.validate_workspace(workspace)
logger.info("Running Atheris Python fuzzing")
# Check Atheris installation
await self._check_atheris_installation()
# Validate target script
target_script = workspace / config["target_script"]
if not target_script.exists():
raise FileNotFoundError(f"Target script not found: {target_script}")
# Run Atheris fuzzing
findings = await self._run_atheris_fuzzing(target_script, config, workspace)
# Create summary
summary = self._create_summary(findings)
logger.info(f"Atheris found {len(findings)} issues")
return self.create_result(
findings=findings,
status="success",
summary=summary
)
except Exception as e:
logger.error(f"Atheris module failed: {e}")
return self.create_result(
findings=[],
status="failed",
error=str(e)
)
async def _check_atheris_installation(self):
"""Check if Atheris is installed"""
try:
process = await asyncio.create_subprocess_exec(
sys.executable, "-c", "import atheris; print(atheris.__version__)",
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE
)
stdout, stderr = await process.communicate()
if process.returncode != 0:
raise RuntimeError("Atheris not installed. Install with: pip install atheris")
version = stdout.decode().strip()
logger.info(f"Using Atheris version: {version}")
except Exception as e:
raise RuntimeError(f"Atheris installation check failed: {e}")
async def _run_atheris_fuzzing(self, target_script: Path, config: Dict[str, Any], workspace: Path) -> List[ModuleFinding]:
"""Run Atheris fuzzing"""
findings = []
try:
# Create output directory for artifacts
output_dir = workspace / "atheris_output"
output_dir.mkdir(exist_ok=True)
# Create wrapper script for fuzzing
wrapper_script = await self._create_atheris_wrapper(target_script, config, workspace, output_dir)
# Build Atheris command
cmd = [sys.executable, str(wrapper_script)]
# Add corpus directory
corpus_dir = config.get("corpus_dir")
if corpus_dir:
corpus_path = workspace / corpus_dir
if corpus_path.exists():
cmd.append(str(corpus_path))
# Set up environment
env = self._setup_atheris_environment(config)
logger.debug(f"Running command: {' '.join(cmd)}")
# Run Atheris with timeout
max_total_time = config.get("max_total_time", 600)
try:
process = await asyncio.create_subprocess_exec(
*cmd,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
cwd=workspace,
env=env
)
# Wait for specified time then terminate
try:
stdout, stderr = await asyncio.wait_for(
process.communicate(), timeout=max_total_time
)
except asyncio.TimeoutError:
logger.info(f"Atheris fuzzing timed out after {max_total_time} seconds")
process.terminate()
try:
await asyncio.wait_for(process.wait(), timeout=10)
except asyncio.TimeoutError:
process.kill()
await process.wait()
# Parse results
findings = self._parse_atheris_output(
stdout.decode(), stderr.decode(), output_dir, workspace
)
# Look for crash files
crash_findings = self._parse_crash_files(output_dir, workspace)
findings.extend(crash_findings)
except Exception as e:
logger.warning(f"Error running Atheris process: {e}")
except Exception as e:
logger.warning(f"Error in Atheris fuzzing: {e}")
return findings
async def _create_atheris_wrapper(self, target_script: Path, config: Dict[str, Any], workspace: Path, output_dir: Path) -> Path:
"""Create wrapper script for Atheris fuzzing"""
wrapper_path = workspace / "atheris_wrapper.py"
wrapper_code = f'''#!/usr/bin/env python3
import sys
import os
import atheris
import traceback
# Add Python paths
python_paths = {config.get("python_path", [])}
for path in python_paths:
if path not in sys.path:
sys.path.insert(0, path)
# Add workspace to Python path
sys.path.insert(0, r"{workspace}")
# Setup code
setup_code = """{config.get("setup_code", "")}"""
if setup_code:
exec(setup_code)
# Import target script
target_module_name = "{target_script.stem}"
sys.path.insert(0, r"{target_script.parent}")
try:
target_module = __import__(target_module_name)
target_function = getattr(target_module, "{config.get("target_function", "TestOneInput")}")
except Exception as e:
print(f"Failed to import target: {{e}}")
sys.exit(1)
# Wrapper function to catch exceptions
original_target = target_function
def wrapped_target(data):
try:
return original_target(data)
except Exception as e:
# Write crash information
crash_info = {{
"exception_type": type(e).__name__,
"exception_message": str(e),
"stack_trace": traceback.format_exc(),
"input_data": data[:1000].hex() if isinstance(data, bytes) else str(data)[:1000]
}}
crash_file = r"{output_dir}" + "/crash_" + type(e).__name__ + ".txt"
with open(crash_file, "a") as f:
f.write(f"Exception: {{type(e).__name__}}\\n")
f.write(f"Message: {{str(e)}}\\n")
f.write(f"Stack trace:\\n{{traceback.format_exc()}}\\n")
f.write(f"Input data (first 1000 chars/bytes): {{crash_info['input_data']}}\\n")
f.write("-" * 80 + "\\n")
# Re-raise to let Atheris handle it
raise
if __name__ == "__main__":
# Configure Atheris
atheris.Setup(sys.argv, wrapped_target)
# Set Atheris options
options = []
options.append(f"-max_total_time={{config.get('max_total_time', 600)}}")
options.append(f"-max_len={{config.get('max_len', 4096)}}")
options.append(f"-timeout={{config.get('timeout', 25)}}")
options.append(f"-runs={{config.get('runs', -1)}}")
if {config.get('jobs', 1)} > 1:
options.append(f"-jobs={{config.get('jobs', 1)}}")
if {config.get('print_final_stats', True)}:
options.append("-print_final_stats=1")
else:
options.append("-print_final_stats=0")
if {config.get('print_pcs', False)}:
options.append("-print_pcs=1")
if {config.get('print_coverage', True)}:
options.append("-print_coverage=1")
artifact_prefix = "{config.get('artifact_prefix', 'crash-')}"
options.append(f"-artifact_prefix={{r'{output_dir}'}}/" + artifact_prefix)
seed = {config.get('seed')}
if seed is not None:
options.append(f"-seed={{seed}}")
if {config.get('enable_value_profile', False)}:
options.append("-use_value_profile=1")
if {config.get('shrink', True)}:
options.append("-shrink=1")
if {config.get('only_ascii', False)}:
options.append("-only_ascii=1")
dict_file = "{config.get('dict_file', '')}"
if dict_file:
dict_path = r"{workspace}" + "/" + dict_file
if os.path.exists(dict_path):
options.append(f"-dict={{dict_path}}")
# Add options to sys.argv
sys.argv.extend(options)
# Start fuzzing
atheris.Fuzz()
'''
with open(wrapper_path, 'w') as f:
f.write(wrapper_code)
return wrapper_path
def _setup_atheris_environment(self, config: Dict[str, Any]) -> Dict[str, str]:
"""Setup environment variables for Atheris"""
env = os.environ.copy()
# Enable sanitizers if requested
if config.get("enable_sanitizers", True):
env["ASAN_OPTIONS"] = env.get("ASAN_OPTIONS", "") + ":detect_leaks=1:halt_on_error=1"
if config.get("detect_leaks", True):
env["ASAN_OPTIONS"] = env.get("ASAN_OPTIONS", "") + ":detect_leaks=1"
if config.get("detect_stack_use_after_return", False):
env["ASAN_OPTIONS"] = env.get("ASAN_OPTIONS", "") + ":detect_stack_use_after_return=1"
return env
def _parse_atheris_output(self, stdout: str, stderr: str, output_dir: Path, workspace: Path) -> List[ModuleFinding]:
"""Parse Atheris output for crashes and issues"""
findings = []
try:
# Combine stdout and stderr
full_output = stdout + "\n" + stderr
# Look for Python exceptions in output
exception_patterns = [
r"Traceback \(most recent call last\):(.*?)(?=\n\w|\nDONE|\n=|\Z)",
r"Exception: (\w+).*?\nMessage: (.*?)\nStack trace:\n(.*?)(?=\n-{20,}|\Z)"
]
for pattern in exception_patterns:
import re
matches = re.findall(pattern, full_output, re.DOTALL | re.MULTILINE)
for match in matches:
finding = self._create_exception_finding(match, full_output, output_dir)
if finding:
findings.append(finding)
except Exception as e:
logger.warning(f"Error parsing Atheris output: {e}")
return findings
def _parse_crash_files(self, output_dir: Path, workspace: Path) -> List[ModuleFinding]:
"""Parse crash files created by wrapper"""
findings = []
try:
# Look for crash files
crash_files = list(output_dir.glob("crash_*.txt"))
for crash_file in crash_files:
findings.extend(self._parse_crash_file(crash_file, workspace))
# Also look for Atheris artifact files
artifact_files = list(output_dir.glob("crash-*"))
for artifact_file in artifact_files:
finding = self._create_artifact_finding(artifact_file, workspace)
if finding:
findings.append(finding)
except Exception as e:
logger.warning(f"Error parsing crash files: {e}")
return findings
def _parse_crash_file(self, crash_file: Path, workspace: Path) -> List[ModuleFinding]:
"""Parse individual crash file"""
findings = []
try:
content = crash_file.read_text()
# Split by separator
crash_entries = content.split("-" * 80)
for entry in crash_entries:
if not entry.strip():
continue
finding = self._parse_crash_entry(entry, crash_file, workspace)
if finding:
findings.append(finding)
except Exception as e:
logger.warning(f"Error parsing crash file {crash_file}: {e}")
return findings
def _parse_crash_entry(self, entry: str, crash_file: Path, workspace: Path) -> ModuleFinding:
"""Parse individual crash entry"""
try:
lines = entry.strip().split('\n')
exception_type = ""
exception_message = ""
stack_trace = ""
input_data = ""
current_section = None
stack_lines = []
for line in lines:
if line.startswith("Exception: "):
exception_type = line.replace("Exception: ", "")
elif line.startswith("Message: "):
exception_message = line.replace("Message: ", "")
elif line.startswith("Stack trace:"):
current_section = "stack"
elif line.startswith("Input data"):
current_section = "input"
input_data = line.split(":", 1)[1].strip() if ":" in line else ""
elif current_section == "stack":
stack_lines.append(line)
stack_trace = '\n'.join(stack_lines)
if not exception_type:
return None
# Determine severity based on exception type
severity = self._get_exception_severity(exception_type)
# Create relative path
try:
rel_path = crash_file.relative_to(workspace)
file_path = str(rel_path)
except ValueError:
file_path = str(crash_file)
finding = self.create_finding(
title=f"Atheris Exception: {exception_type}",
description=f"Atheris discovered a Python exception: {exception_type}{': ' + exception_message if exception_message else ''}",
severity=severity,
category=self._get_exception_category(exception_type),
file_path=file_path,
recommendation=self._get_exception_recommendation(exception_type, exception_message),
metadata={
"exception_type": exception_type,
"exception_message": exception_message,
"stack_trace": stack_trace[:2000] if stack_trace else "", # Limit size
"crash_input_preview": input_data[:500] if input_data else "",
"fuzzer": "atheris"
}
)
return finding
except Exception as e:
logger.warning(f"Error parsing crash entry: {e}")
return None
def _create_exception_finding(self, match, full_output: str, output_dir: Path) -> ModuleFinding:
"""Create finding from exception match"""
try:
if isinstance(match, tuple) and len(match) >= 1:
# Handle different match formats
if len(match) == 3: # Exception format
exception_type, exception_message, stack_trace = match
else:
stack_trace = match[0]
exception_type = "Unknown"
exception_message = ""
else:
stack_trace = str(match)
exception_type = "Unknown"
exception_message = ""
# Try to extract exception type from stack trace
if not exception_type or exception_type == "Unknown":
lines = stack_trace.split('\n')
for line in reversed(lines):
if ':' in line and any(exc in line for exc in ['Error', 'Exception', 'Warning']):
exception_type = line.split(':')[0].strip()
exception_message = line.split(':', 1)[1].strip() if ':' in line else ""
break
severity = self._get_exception_severity(exception_type)
finding = self.create_finding(
title=f"Atheris Exception: {exception_type}",
description=f"Atheris discovered a Python exception during fuzzing: {exception_type}",
severity=severity,
category=self._get_exception_category(exception_type),
file_path=None,
recommendation=self._get_exception_recommendation(exception_type, exception_message),
metadata={
"exception_type": exception_type,
"exception_message": exception_message,
"stack_trace": stack_trace[:2000] if stack_trace else "",
"fuzzer": "atheris"
}
)
return finding
except Exception as e:
logger.warning(f"Error creating exception finding: {e}")
return None
def _create_artifact_finding(self, artifact_file: Path, workspace: Path) -> ModuleFinding:
"""Create finding from Atheris artifact file"""
try:
# Try to read artifact content (limited)
artifact_content = ""
try:
content_bytes = artifact_file.read_bytes()[:1000]
artifact_content = content_bytes.hex()
except Exception:
pass
# Create relative path
try:
rel_path = artifact_file.relative_to(workspace)
file_path = str(rel_path)
except ValueError:
file_path = str(artifact_file)
finding = self.create_finding(
title="Atheris Crash Artifact",
description=f"Atheris generated a crash artifact file: {artifact_file.name}",
severity="medium",
category="program_crash",
file_path=file_path,
recommendation="Analyze the crash artifact to reproduce and debug the issue. The artifact contains the input that caused the crash.",
metadata={
"artifact_type": "crash",
"artifact_file": artifact_file.name,
"artifact_content_hex": artifact_content,
"fuzzer": "atheris"
}
)
return finding
except Exception as e:
logger.warning(f"Error creating artifact finding: {e}")
return None
def _get_exception_severity(self, exception_type: str) -> str:
"""Determine severity based on exception type"""
if not exception_type:
return "medium"
exception_lower = exception_type.lower()
# Critical security issues
if any(term in exception_lower for term in ["segmentationfault", "accessviolation", "memoryerror"]):
return "critical"
# High severity exceptions
elif any(term in exception_lower for term in ["attributeerror", "typeerror", "indexerror", "keyerror", "valueerror"]):
return "high"
# Medium severity exceptions
elif any(term in exception_lower for term in ["assertionerror", "runtimeerror", "ioerror", "oserror"]):
return "medium"
# Lower severity exceptions
elif any(term in exception_lower for term in ["warning", "deprecation"]):
return "low"
else:
return "medium"
def _get_exception_category(self, exception_type: str) -> str:
"""Determine category based on exception type"""
if not exception_type:
return "python_exception"
exception_lower = exception_type.lower()
if any(term in exception_lower for term in ["memory", "segmentation", "access"]):
return "memory_corruption"
elif any(term in exception_lower for term in ["attribute", "type"]):
return "type_error"
elif any(term in exception_lower for term in ["index", "key", "value"]):
return "data_error"
elif any(term in exception_lower for term in ["io", "os", "file"]):
return "io_error"
elif any(term in exception_lower for term in ["assertion"]):
return "assertion_failure"
else:
return "python_exception"
def _get_exception_recommendation(self, exception_type: str, exception_message: str) -> str:
"""Generate recommendation based on exception type"""
if not exception_type:
return "Analyze the exception and fix the underlying code issue."
exception_lower = exception_type.lower()
if "attributeerror" in exception_lower:
return "Fix AttributeError by ensuring objects have the expected attributes before accessing them. Add proper error handling and validation."
elif "typeerror" in exception_lower:
return "Fix TypeError by ensuring correct data types are used. Add type checking and validation for function parameters."
elif "indexerror" in exception_lower:
return "Fix IndexError by adding bounds checking before accessing list/array elements. Validate indices are within valid range."
elif "keyerror" in exception_lower:
return "Fix KeyError by checking if keys exist in dictionaries before accessing them. Use .get() method or proper key validation."
elif "valueerror" in exception_lower:
return "Fix ValueError by validating input values before processing. Add proper input sanitization and validation."
elif "memoryerror" in exception_lower:
return "Fix MemoryError by optimizing memory usage, processing data in chunks, or increasing available memory."
elif "assertionerror" in exception_lower:
return "Fix AssertionError by reviewing assertion conditions and ensuring they properly validate the expected state."
else:
return f"Fix the {exception_type} exception by analyzing the root cause and implementing appropriate error handling and validation."
def _create_summary(self, findings: List[ModuleFinding]) -> Dict[str, Any]:
"""Create analysis summary"""
severity_counts = {"critical": 0, "high": 0, "medium": 0, "low": 0, "info": 0}
category_counts = {}
exception_counts = {}
for finding in findings:
# Count by severity
severity_counts[finding.severity] += 1
# Count by category
category = finding.category
category_counts[category] = category_counts.get(category, 0) + 1
# Count by exception type
exception_type = finding.metadata.get("exception_type", "unknown")
exception_counts[exception_type] = exception_counts.get(exception_type, 0) + 1
return {
"total_findings": len(findings),
"severity_counts": severity_counts,
"category_counts": category_counts,
"exception_counts": exception_counts,
"unique_exceptions": len(exception_counts),
"python_specific_issues": sum(category_counts.get(cat, 0) for cat in ["type_error", "data_error", "python_exception"])
}