Files
fuzzforge_ai/backend/toolbox/modules/fuzzing/aflplusplus.py
Tanguy Duhamel 323a434c73 Initial commit
2025-09-29 21:26:41 +02:00

734 lines
28 KiB
Python

"""
AFL++ Fuzzing Module
This module uses AFL++ (Advanced American Fuzzy Lop) for coverage-guided
fuzzing with modern features and optimizations.
"""
# Copyright (c) 2025 FuzzingLabs
#
# Licensed under the Business Source License 1.1 (BSL). See the LICENSE file
# at the root of this repository for details.
#
# After the Change Date (four years from publication), this version of the
# Licensed Work will be made available under the Apache License, Version 2.0.
# See the LICENSE-APACHE file or http://www.apache.org/licenses/LICENSE-2.0
#
# Additional attribution and requirements are provided in the NOTICE file.
import asyncio
import json
import os
from pathlib import Path
from typing import Dict, Any, List
import subprocess
import logging
import re
from ..base import BaseModule, ModuleMetadata, ModuleFinding, ModuleResult
from . import register_module
logger = logging.getLogger(__name__)
@register_module
class AFLPlusPlusModule(BaseModule):
"""AFL++ advanced fuzzing module"""
def get_metadata(self) -> ModuleMetadata:
"""Get module metadata"""
return ModuleMetadata(
name="aflplusplus",
version="4.09c",
description="Advanced American Fuzzy Lop with modern features for coverage-guided fuzzing",
author="FuzzForge Team",
category="fuzzing",
tags=["coverage-guided", "american-fuzzy-lop", "advanced", "mutation", "instrumentation"],
input_schema={
"type": "object",
"properties": {
"target_binary": {
"type": "string",
"description": "Path to the target binary (compiled with afl-gcc/afl-clang)"
},
"input_dir": {
"type": "string",
"description": "Directory containing seed input files"
},
"output_dir": {
"type": "string",
"default": "afl_output",
"description": "Output directory for AFL++ results"
},
"dictionary": {
"type": "string",
"description": "Dictionary file for fuzzing keywords"
},
"timeout": {
"type": "integer",
"default": 1000,
"description": "Timeout for each execution (ms)"
},
"memory_limit": {
"type": "integer",
"default": 50,
"description": "Memory limit for child process (MB)"
},
"skip_deterministic": {
"type": "boolean",
"default": false,
"description": "Skip deterministic mutations"
},
"no_arith": {
"type": "boolean",
"default": false,
"description": "Skip arithmetic mutations"
},
"shuffle_queue": {
"type": "boolean",
"default": false,
"description": "Shuffle queue entries"
},
"max_total_time": {
"type": "integer",
"default": 3600,
"description": "Maximum total fuzzing time (seconds)"
},
"power_schedule": {
"type": "string",
"enum": ["explore", "fast", "coe", "lin", "quad", "exploit", "rare"],
"default": "fast",
"description": "Power schedule algorithm"
},
"mutation_mode": {
"type": "string",
"enum": ["default", "old", "mopt"],
"default": "default",
"description": "Mutation mode to use"
},
"parallel_fuzzing": {
"type": "boolean",
"default": false,
"description": "Enable parallel fuzzing with multiple instances"
},
"fuzzer_instances": {
"type": "integer",
"default": 1,
"description": "Number of parallel fuzzer instances"
},
"master_instance": {
"type": "string",
"default": "master",
"description": "Name for master fuzzer instance"
},
"slave_prefix": {
"type": "string",
"default": "slave",
"description": "Prefix for slave fuzzer instances"
},
"hang_timeout": {
"type": "integer",
"default": 1000,
"description": "Timeout for detecting hangs (ms)"
},
"crash_mode": {
"type": "boolean",
"default": false,
"description": "Run in crash exploration mode"
},
"target_args": {
"type": "array",
"items": {"type": "string"},
"description": "Arguments to pass to target binary"
},
"env_vars": {
"type": "object",
"description": "Environment variables to set"
},
"ignore_finds": {
"type": "boolean",
"default": false,
"description": "Ignore existing findings and start fresh"
},
"force_deterministic": {
"type": "boolean",
"default": false,
"description": "Force deterministic mutations"
}
}
},
output_schema={
"type": "object",
"properties": {
"findings": {
"type": "array",
"items": {
"type": "object",
"properties": {
"crash_id": {"type": "string"},
"crash_file": {"type": "string"},
"crash_type": {"type": "string"},
"signal": {"type": "string"}
}
}
}
}
}
)
def validate_config(self, config: Dict[str, Any]) -> bool:
"""Validate configuration"""
target_binary = config.get("target_binary")
if not target_binary:
raise ValueError("target_binary is required for AFL++")
input_dir = config.get("input_dir")
if not input_dir:
raise ValueError("input_dir is required for AFL++")
return True
async def execute(self, config: Dict[str, Any], workspace: Path) -> ModuleResult:
"""Execute AFL++ fuzzing"""
self.start_timer()
try:
# Validate inputs
self.validate_config(config)
self.validate_workspace(workspace)
logger.info("Running AFL++ fuzzing campaign")
# Check prerequisites
await self._check_afl_prerequisites(workspace)
# Setup directories and files
target_binary, input_dir, output_dir = self._setup_afl_directories(config, workspace)
# Run AFL++ fuzzing
findings = await self._run_afl_fuzzing(target_binary, input_dir, output_dir, config, workspace)
# Create summary
summary = self._create_summary(findings, output_dir)
logger.info(f"AFL++ found {len(findings)} crashes")
return self.create_result(
findings=findings,
status="success",
summary=summary
)
except Exception as e:
logger.error(f"AFL++ module failed: {e}")
return self.create_result(
findings=[],
status="failed",
error=str(e)
)
async def _check_afl_prerequisites(self, workspace: Path):
"""Check AFL++ prerequisites and system setup"""
try:
# Check if afl-fuzz exists
process = await asyncio.create_subprocess_exec(
"which", "afl-fuzz",
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE
)
stdout, stderr = await process.communicate()
if process.returncode != 0:
raise RuntimeError("afl-fuzz not found. Please install AFL++")
# Check core dump pattern (important for AFL)
try:
with open("/proc/sys/kernel/core_pattern", "r") as f:
core_pattern = f.read().strip()
if core_pattern != "core":
logger.warning(f"Core dump pattern is '{core_pattern}', AFL++ may not work optimally")
except Exception:
logger.warning("Could not check core dump pattern")
except Exception as e:
logger.warning(f"AFL++ prerequisite check failed: {e}")
def _setup_afl_directories(self, config: Dict[str, Any], workspace: Path):
"""Setup AFL++ directories and validate files"""
# Check target binary
target_binary = workspace / config["target_binary"]
if not target_binary.exists():
raise FileNotFoundError(f"Target binary not found: {target_binary}")
# Check input directory
input_dir = workspace / config["input_dir"]
if not input_dir.exists():
raise FileNotFoundError(f"Input directory not found: {input_dir}")
# Check if input directory has files
input_files = list(input_dir.glob("*"))
if not input_files:
raise ValueError(f"Input directory is empty: {input_dir}")
# Create output directory
output_dir = workspace / config.get("output_dir", "afl_output")
output_dir.mkdir(exist_ok=True)
return target_binary, input_dir, output_dir
async def _run_afl_fuzzing(self, target_binary: Path, input_dir: Path, output_dir: Path, config: Dict[str, Any], workspace: Path) -> List[ModuleFinding]:
"""Run AFL++ fuzzing"""
findings = []
try:
if config.get("parallel_fuzzing", False):
findings = await self._run_parallel_fuzzing(
target_binary, input_dir, output_dir, config, workspace
)
else:
findings = await self._run_single_fuzzing(
target_binary, input_dir, output_dir, config, workspace
)
except Exception as e:
logger.warning(f"Error running AFL++ fuzzing: {e}")
return findings
async def _run_single_fuzzing(self, target_binary: Path, input_dir: Path, output_dir: Path, config: Dict[str, Any], workspace: Path) -> List[ModuleFinding]:
"""Run single-instance AFL++ fuzzing"""
findings = []
try:
# Build AFL++ command
cmd = ["afl-fuzz"]
# Add input and output directories
cmd.extend(["-i", str(input_dir)])
cmd.extend(["-o", str(output_dir)])
# Add dictionary if specified
dictionary = config.get("dictionary")
if dictionary:
dict_path = workspace / dictionary
if dict_path.exists():
cmd.extend(["-x", str(dict_path)])
# Add timeout
timeout = config.get("timeout", 1000)
cmd.extend(["-t", str(timeout)])
# Add memory limit
memory_limit = config.get("memory_limit", 50)
cmd.extend(["-m", str(memory_limit)])
# Add power schedule
power_schedule = config.get("power_schedule", "fast")
cmd.extend(["-p", power_schedule])
# Add mutation options
if config.get("skip_deterministic", False):
cmd.append("-d")
if config.get("no_arith", False):
cmd.append("-a")
if config.get("shuffle_queue", False):
cmd.append("-Z")
# Add hang timeout
hang_timeout = config.get("hang_timeout", 1000)
cmd.extend(["-T", str(hang_timeout)])
# Add crash mode
if config.get("crash_mode", False):
cmd.append("-C")
# Add ignore finds
if config.get("ignore_finds", False):
cmd.append("-f")
# Add force deterministic
if config.get("force_deterministic", False):
cmd.append("-D")
# Add target binary and arguments
cmd.append("--")
cmd.append(str(target_binary))
target_args = config.get("target_args", [])
cmd.extend(target_args)
# Set up environment
env = os.environ.copy()
env_vars = config.get("env_vars", {})
env.update(env_vars)
# Set AFL environment variables
env["AFL_I_DONT_CARE_ABOUT_MISSING_CRASHES"] = "1" # Avoid interactive prompts
env["AFL_SKIP_CPUFREQ"] = "1" # Skip CPU frequency checks
logger.debug(f"Running command: {' '.join(cmd)}")
# Run AFL++ with timeout
max_total_time = config.get("max_total_time", 3600)
try:
process = await asyncio.create_subprocess_exec(
*cmd,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
cwd=workspace,
env=env
)
# Wait for specified time then terminate
try:
stdout, stderr = await asyncio.wait_for(
process.communicate(), timeout=max_total_time
)
except asyncio.TimeoutError:
logger.info(f"AFL++ fuzzing timed out after {max_total_time} seconds")
process.terminate()
try:
await asyncio.wait_for(process.wait(), timeout=10)
except asyncio.TimeoutError:
process.kill()
await process.wait()
# Parse results from output directory
findings = self._parse_afl_results(output_dir, workspace)
except Exception as e:
logger.warning(f"Error running AFL++ process: {e}")
except Exception as e:
logger.warning(f"Error in single fuzzing: {e}")
return findings
async def _run_parallel_fuzzing(self, target_binary: Path, input_dir: Path, output_dir: Path, config: Dict[str, Any], workspace: Path) -> List[ModuleFinding]:
"""Run parallel AFL++ fuzzing"""
findings = []
try:
fuzzer_instances = config.get("fuzzer_instances", 2)
master_name = config.get("master_instance", "master")
slave_prefix = config.get("slave_prefix", "slave")
processes = []
# Start master instance
master_cmd = await self._build_afl_command(
target_binary, input_dir, output_dir, config, workspace,
instance_name=master_name, is_master=True
)
master_process = await asyncio.create_subprocess_exec(
*master_cmd,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
cwd=workspace,
env=self._get_afl_env(config)
)
processes.append(master_process)
# Start slave instances
for i in range(1, fuzzer_instances):
slave_name = f"{slave_prefix}{i:02d}"
slave_cmd = await self._build_afl_command(
target_binary, input_dir, output_dir, config, workspace,
instance_name=slave_name, is_master=False
)
slave_process = await asyncio.create_subprocess_exec(
*slave_cmd,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
cwd=workspace,
env=self._get_afl_env(config)
)
processes.append(slave_process)
# Wait for specified time then terminate all
max_total_time = config.get("max_total_time", 3600)
try:
await asyncio.sleep(max_total_time)
finally:
# Terminate all processes
for process in processes:
if process.returncode is None:
process.terminate()
try:
await asyncio.wait_for(process.wait(), timeout=10)
except asyncio.TimeoutError:
process.kill()
await process.wait()
# Parse results from output directory
findings = self._parse_afl_results(output_dir, workspace)
except Exception as e:
logger.warning(f"Error in parallel fuzzing: {e}")
return findings
async def _build_afl_command(self, target_binary: Path, input_dir: Path, output_dir: Path, config: Dict[str, Any], workspace: Path, instance_name: str, is_master: bool) -> List[str]:
"""Build AFL++ command for a fuzzer instance"""
cmd = ["afl-fuzz"]
# Add input and output directories
cmd.extend(["-i", str(input_dir)])
cmd.extend(["-o", str(output_dir)])
# Add instance name
if is_master:
cmd.extend(["-M", instance_name])
else:
cmd.extend(["-S", instance_name])
# Add other options (same as single fuzzing)
dictionary = config.get("dictionary")
if dictionary:
dict_path = workspace / dictionary
if dict_path.exists():
cmd.extend(["-x", str(dict_path)])
cmd.extend(["-t", str(config.get("timeout", 1000))])
cmd.extend(["-m", str(config.get("memory_limit", 50))])
cmd.extend(["-p", config.get("power_schedule", "fast")])
if config.get("skip_deterministic", False):
cmd.append("-d")
if config.get("no_arith", False):
cmd.append("-a")
# Add target
cmd.append("--")
cmd.append(str(target_binary))
cmd.extend(config.get("target_args", []))
return cmd
def _get_afl_env(self, config: Dict[str, Any]) -> Dict[str, str]:
"""Get environment variables for AFL++"""
env = os.environ.copy()
env.update(config.get("env_vars", {}))
env["AFL_I_DONT_CARE_ABOUT_MISSING_CRASHES"] = "1"
env["AFL_SKIP_CPUFREQ"] = "1"
return env
def _parse_afl_results(self, output_dir: Path, workspace: Path) -> List[ModuleFinding]:
"""Parse AFL++ results from output directory"""
findings = []
try:
# Look for crashes directory
crashes_dirs = []
# Single instance
crashes_dir = output_dir / "crashes"
if crashes_dir.exists():
crashes_dirs.append(crashes_dir)
# Multiple instances
for instance_dir in output_dir.iterdir():
if instance_dir.is_dir():
instance_crashes = instance_dir / "crashes"
if instance_crashes.exists():
crashes_dirs.append(instance_crashes)
# Process crash files
for crashes_dir in crashes_dirs:
crash_files = [f for f in crashes_dir.iterdir() if f.is_file() and f.name.startswith("id:")]
for crash_file in crash_files:
finding = self._create_afl_crash_finding(crash_file, workspace)
if finding:
findings.append(finding)
except Exception as e:
logger.warning(f"Error parsing AFL++ results: {e}")
return findings
def _create_afl_crash_finding(self, crash_file: Path, workspace: Path) -> ModuleFinding:
"""Create finding from AFL++ crash file"""
try:
# Parse crash filename for information
filename = crash_file.name
crash_info = self._parse_afl_filename(filename)
# Try to read crash file (limited size)
crash_content = ""
try:
crash_data = crash_file.read_bytes()[:1000]
crash_content = crash_data.hex()[:200] # Hex representation, limited
except Exception:
pass
# Determine severity based on signal
severity = self._get_crash_severity(crash_info.get("signal", ""))
# Create relative path
try:
rel_path = crash_file.relative_to(workspace)
file_path = str(rel_path)
except ValueError:
file_path = str(crash_file)
finding = self.create_finding(
title=f"AFL++ Crash: {crash_info.get('signal', 'Unknown')}",
description=f"AFL++ discovered a crash with signal {crash_info.get('signal', 'unknown')} in the target program",
severity=severity,
category=self._get_crash_category(crash_info.get("signal", "")),
file_path=file_path,
recommendation=self._get_afl_crash_recommendation(crash_info.get("signal", "")),
metadata={
"crash_id": crash_info.get("id", ""),
"signal": crash_info.get("signal", ""),
"src": crash_info.get("src", ""),
"crash_file": crash_file.name,
"crash_content_hex": crash_content,
"fuzzer": "afl++"
}
)
return finding
except Exception as e:
logger.warning(f"Error creating AFL++ crash finding: {e}")
return None
def _parse_afl_filename(self, filename: str) -> Dict[str, str]:
"""Parse AFL++ crash filename for information"""
info = {}
try:
# AFL++ crash filename format: id:XXXXXX,sig:XX,src:XXXXXX,op:XXX,rep:X
parts = filename.split(',')
for part in parts:
if ':' in part:
key, value = part.split(':', 1)
info[key] = value
except Exception:
pass
return info
def _get_crash_severity(self, signal: str) -> str:
"""Determine severity based on crash signal"""
if not signal:
return "medium"
signal_lower = signal.lower()
# Critical signals indicating memory corruption
if signal in ["11", "sigsegv", "segv"]: # Segmentation fault
return "critical"
elif signal in ["6", "sigabrt", "abrt"]: # Abort
return "high"
elif signal in ["4", "sigill", "ill"]: # Illegal instruction
return "high"
elif signal in ["8", "sigfpe", "fpe"]: # Floating point exception
return "medium"
elif signal in ["9", "sigkill", "kill"]: # Kill signal
return "medium"
else:
return "medium"
def _get_crash_category(self, signal: str) -> str:
"""Determine category based on crash signal"""
if not signal:
return "program_crash"
if signal in ["11", "sigsegv", "segv"]:
return "memory_corruption"
elif signal in ["6", "sigabrt", "abrt"]:
return "assertion_failure"
elif signal in ["4", "sigill", "ill"]:
return "illegal_instruction"
elif signal in ["8", "sigfpe", "fpe"]:
return "arithmetic_error"
else:
return "program_crash"
def _get_afl_crash_recommendation(self, signal: str) -> str:
"""Generate recommendation based on crash signal"""
if signal in ["11", "sigsegv", "segv"]:
return "Segmentation fault detected. Investigate memory access patterns, check for buffer overflows, null pointer dereferences, or use-after-free bugs."
elif signal in ["6", "sigabrt", "abrt"]:
return "Program abort detected. Check for assertion failures, memory allocation errors, or explicit abort() calls in the code."
elif signal in ["4", "sigill", "ill"]:
return "Illegal instruction detected. Check for code corruption, invalid function pointers, or architecture-specific instruction issues."
elif signal in ["8", "sigfpe", "fpe"]:
return "Floating point exception detected. Check for division by zero, arithmetic overflow, or invalid floating point operations."
else:
return f"Program crash with signal {signal} detected. Analyze the crash dump and input to identify the root cause."
def _create_summary(self, findings: List[ModuleFinding], output_dir: Path) -> Dict[str, Any]:
"""Create analysis summary"""
severity_counts = {"critical": 0, "high": 0, "medium": 0, "low": 0, "info": 0}
category_counts = {}
signal_counts = {}
for finding in findings:
# Count by severity
severity_counts[finding.severity] += 1
# Count by category
category = finding.category
category_counts[category] = category_counts.get(category, 0) + 1
# Count by signal
signal = finding.metadata.get("signal", "unknown")
signal_counts[signal] = signal_counts.get(signal, 0) + 1
# Try to read AFL++ statistics
stats = self._read_afl_stats(output_dir)
return {
"total_findings": len(findings),
"severity_counts": severity_counts,
"category_counts": category_counts,
"signal_counts": signal_counts,
"unique_crashes": len(set(f.metadata.get("crash_id", "") for f in findings)),
"afl_stats": stats
}
def _read_afl_stats(self, output_dir: Path) -> Dict[str, Any]:
"""Read AFL++ fuzzer statistics"""
stats = {}
try:
# Look for fuzzer_stats file in single or multiple instance setup
stats_files = []
# Single instance
single_stats = output_dir / "fuzzer_stats"
if single_stats.exists():
stats_files.append(single_stats)
# Multiple instances
for instance_dir in output_dir.iterdir():
if instance_dir.is_dir():
instance_stats = instance_dir / "fuzzer_stats"
if instance_stats.exists():
stats_files.append(instance_stats)
# Read first stats file found
if stats_files:
with open(stats_files[0], 'r') as f:
for line in f:
if ':' in line:
key, value = line.strip().split(':', 1)
stats[key.strip()] = value.strip()
except Exception as e:
logger.warning(f"Error reading AFL++ stats: {e}")
return stats