Files
fuzzforge_ai/backend/toolbox/modules/fuzzing/cargo_fuzz.py
Tanguy Duhamel 323a434c73 Initial commit
2025-09-29 21:26:41 +02:00

573 lines
22 KiB
Python

"""
Cargo Fuzz Module
This module uses cargo-fuzz for fuzzing Rust code with libFuzzer integration.
"""
# Copyright (c) 2025 FuzzingLabs
#
# Licensed under the Business Source License 1.1 (BSL). See the LICENSE file
# at the root of this repository for details.
#
# After the Change Date (four years from publication), this version of the
# Licensed Work will be made available under the Apache License, Version 2.0.
# See the LICENSE-APACHE file or http://www.apache.org/licenses/LICENSE-2.0
#
# Additional attribution and requirements are provided in the NOTICE file.
import asyncio
import json
import os
from pathlib import Path
from typing import Dict, Any, List, Tuple
import subprocess
import logging
import httpx
import re
from datetime import datetime, timedelta
try:
from prefect import get_run_context
except ImportError:
# Fallback for when not running in Prefect context
get_run_context = None
from ..base import BaseModule, ModuleMetadata, ModuleFinding, ModuleResult
from . import register_module
logger = logging.getLogger(__name__)
@register_module
class CargoFuzzModule(BaseModule):
"""Cargo Fuzz Rust fuzzing module"""
def get_metadata(self) -> ModuleMetadata:
"""Get module metadata"""
return ModuleMetadata(
name="cargo_fuzz",
version="0.11.2",
description="Rust fuzzing integration with libFuzzer using cargo-fuzz",
author="FuzzForge Team",
category="fuzzing",
tags=["rust", "libfuzzer", "cargo", "coverage-guided", "sanitizers"],
input_schema={
"type": "object",
"properties": {
"project_dir": {
"type": "string",
"description": "Path to Rust project directory (with Cargo.toml)"
},
"fuzz_target": {
"type": "string",
"description": "Name of the fuzz target to run"
},
"max_total_time": {
"type": "integer",
"default": 600,
"description": "Maximum total time to run fuzzing (seconds)"
},
"jobs": {
"type": "integer",
"default": 1,
"description": "Number of worker processes"
},
"corpus_dir": {
"type": "string",
"description": "Custom corpus directory"
},
"artifacts_dir": {
"type": "string",
"description": "Custom artifacts directory"
},
"sanitizer": {
"type": "string",
"enum": ["address", "memory", "thread", "leak", "none"],
"default": "address",
"description": "Sanitizer to use"
},
"release": {
"type": "boolean",
"default": False,
"description": "Use release mode"
},
"debug_assertions": {
"type": "boolean",
"default": True,
"description": "Enable debug assertions"
}
}
},
output_schema={
"type": "object",
"properties": {
"findings": {
"type": "array",
"items": {
"type": "object",
"properties": {
"crash_type": {"type": "string"},
"artifact_path": {"type": "string"},
"stack_trace": {"type": "string"}
}
}
}
}
}
)
def validate_config(self, config: Dict[str, Any]) -> bool:
"""Validate configuration"""
project_dir = config.get("project_dir")
if not project_dir:
raise ValueError("project_dir is required")
fuzz_target = config.get("fuzz_target")
if not fuzz_target:
raise ValueError("fuzz_target is required")
return True
async def execute(self, config: Dict[str, Any], workspace: Path, stats_callback=None) -> ModuleResult:
"""Execute cargo-fuzz fuzzing"""
self.start_timer()
try:
# Initialize last observed stats for summary propagation
self._last_stats = {
'executions': 0,
'executions_per_sec': 0.0,
'crashes': 0,
'corpus_size': 0,
'elapsed_time': 0,
}
# Validate inputs
self.validate_config(config)
self.validate_workspace(workspace)
logger.info("Running cargo-fuzz Rust fuzzing")
# Check installation
await self._check_cargo_fuzz_installation()
# Setup project
project_dir = workspace / config["project_dir"]
await self._setup_cargo_fuzz_project(project_dir, config)
# Run fuzzing
findings = await self._run_cargo_fuzz(project_dir, config, workspace, stats_callback)
# Create summary and enrich with last observed runtime stats
summary = self._create_summary(findings)
try:
summary.update({
'executions': self._last_stats.get('executions', 0),
'executions_per_sec': self._last_stats.get('executions_per_sec', 0.0),
'corpus_size': self._last_stats.get('corpus_size', 0),
'crashes': self._last_stats.get('crashes', 0),
'elapsed_time': self._last_stats.get('elapsed_time', 0),
})
except Exception:
pass
logger.info(f"cargo-fuzz found {len(findings)} issues")
return self.create_result(
findings=findings,
status="success",
summary=summary
)
except Exception as e:
logger.error(f"cargo-fuzz module failed: {e}")
return self.create_result(
findings=[],
status="failed",
error=str(e)
)
async def _check_cargo_fuzz_installation(self):
"""Check if cargo-fuzz is installed"""
try:
process = await asyncio.create_subprocess_exec(
"cargo", "fuzz", "--version",
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE
)
stdout, stderr = await process.communicate()
if process.returncode != 0:
raise RuntimeError("cargo-fuzz not installed. Install with: cargo install cargo-fuzz")
except Exception as e:
raise RuntimeError(f"cargo-fuzz installation check failed: {e}")
async def _setup_cargo_fuzz_project(self, project_dir: Path, config: Dict[str, Any]):
"""Setup cargo-fuzz project"""
if not project_dir.exists():
raise FileNotFoundError(f"Project directory not found: {project_dir}")
cargo_toml = project_dir / "Cargo.toml"
if not cargo_toml.exists():
raise FileNotFoundError(f"Cargo.toml not found in {project_dir}")
# Check if fuzz directory exists, if not initialize
fuzz_dir = project_dir / "fuzz"
if not fuzz_dir.exists():
logger.info("Initializing cargo-fuzz project")
process = await asyncio.create_subprocess_exec(
"cargo", "fuzz", "init",
cwd=project_dir,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE
)
await process.communicate()
async def _run_cargo_fuzz(self, project_dir: Path, config: Dict[str, Any], workspace: Path, stats_callback=None) -> List[ModuleFinding]:
"""Run cargo-fuzz with real-time statistics reporting"""
findings = []
# Get run_id from Prefect context for statistics reporting
run_id = None
if get_run_context:
try:
context = get_run_context()
run_id = str(context.flow_run.id)
except Exception:
logger.warning("Could not get run_id from Prefect context")
try:
# Build command
cmd = ["cargo", "fuzz", "run", config["fuzz_target"]]
# Add options
if config.get("jobs", 1) > 1:
cmd.extend(["--", f"-jobs={config['jobs']}"])
max_time = config.get("max_total_time", 600)
cmd.extend(["--", f"-max_total_time={max_time}"])
# Set sanitizer
sanitizer = config.get("sanitizer", "address")
if sanitizer != "none":
cmd.append(f"--sanitizer={sanitizer}")
if config.get("release", False):
cmd.append("--release")
# Set environment
env = os.environ.copy()
if config.get("debug_assertions", True):
env["RUSTFLAGS"] = env.get("RUSTFLAGS", "") + " -C debug-assertions=on"
logger.debug(f"Running command: {' '.join(cmd)}")
# Run with streaming output processing for real-time stats
try:
process = await asyncio.create_subprocess_exec(
*cmd,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.STDOUT, # Merge stderr into stdout
cwd=project_dir,
env=env
)
# Process output in real-time
stdout_data, stderr_data = await self._process_streaming_output(
process, max_time, config, stats_callback
)
# Parse final results
findings = self._parse_cargo_fuzz_output(
stdout_data, stderr_data, project_dir, workspace, config
)
except Exception as e:
logger.warning(f"Error running cargo-fuzz: {e}")
except Exception as e:
logger.warning(f"Error in cargo-fuzz execution: {e}")
return findings
def _parse_cargo_fuzz_output(self, stdout: str, stderr: str, project_dir: Path, workspace: Path, config: Dict[str, Any]) -> List[ModuleFinding]:
"""Parse cargo-fuzz output"""
findings = []
try:
full_output = stdout + "\n" + stderr
# Look for crash artifacts
artifacts_dir = project_dir / "fuzz" / "artifacts" / config["fuzz_target"]
if artifacts_dir.exists():
for artifact in artifacts_dir.iterdir():
if artifact.is_file():
finding = self._create_artifact_finding(artifact, workspace, full_output)
if finding:
findings.append(finding)
except Exception as e:
logger.warning(f"Error parsing cargo-fuzz output: {e}")
return findings
def _create_artifact_finding(self, artifact_path: Path, workspace: Path, output: str) -> ModuleFinding:
"""Create finding from artifact file"""
try:
# Try to determine crash type from filename or content
crash_type = "crash"
if "leak" in artifact_path.name.lower():
crash_type = "memory_leak"
elif "timeout" in artifact_path.name.lower():
crash_type = "timeout"
# Extract stack trace from output
stack_trace = self._extract_stack_trace_from_output(output, artifact_path.name)
try:
rel_path = artifact_path.relative_to(workspace)
file_path = str(rel_path)
except ValueError:
file_path = str(artifact_path)
severity = "high" if "crash" in crash_type else "medium"
finding = self.create_finding(
title=f"cargo-fuzz {crash_type.title()}",
description=f"cargo-fuzz discovered a {crash_type} in the Rust code",
severity=severity,
category=self._get_crash_category(crash_type),
file_path=file_path,
recommendation=self._get_crash_recommendation(crash_type),
metadata={
"crash_type": crash_type,
"artifact_path": str(artifact_path),
"stack_trace": stack_trace,
"fuzzer": "cargo_fuzz"
}
)
return finding
except Exception as e:
logger.warning(f"Error creating artifact finding: {e}")
return None
def _extract_stack_trace_from_output(self, output: str, artifact_name: str) -> str:
"""Extract stack trace from output"""
try:
lines = output.split('\n')
stack_lines = []
in_stack = False
for line in lines:
if artifact_name in line or "stack backtrace:" in line.lower():
in_stack = True
continue
if in_stack:
if line.strip() and ("at " in line or "::" in line or line.strip().startswith("0:")):
stack_lines.append(line.strip())
elif not line.strip() and stack_lines:
break
return '\n'.join(stack_lines[:20]) # Limit stack trace size
except Exception:
return ""
def _get_crash_category(self, crash_type: str) -> str:
"""Get category for crash type"""
if "leak" in crash_type:
return "memory_leak"
elif "timeout" in crash_type:
return "performance_issues"
else:
return "memory_safety"
def _get_crash_recommendation(self, crash_type: str) -> str:
"""Get recommendation for crash type"""
if "leak" in crash_type:
return "Fix memory leak by ensuring proper cleanup of allocated resources. Review memory management patterns."
elif "timeout" in crash_type:
return "Fix timeout by optimizing performance, avoiding infinite loops, and implementing reasonable bounds."
else:
return "Fix the crash by analyzing the stack trace and addressing memory safety issues."
async def _process_streaming_output(self, process, max_time: int, config: Dict[str, Any], stats_callback=None) -> Tuple[str, str]:
"""Process cargo-fuzz output in real-time and report statistics"""
stdout_lines = []
start_time = datetime.utcnow()
last_update = start_time
stats_data = {
'executions': 0,
'executions_per_sec': 0.0,
'crashes': 0,
'corpus_size': 0,
'elapsed_time': 0
}
# Get run_id from Prefect context for statistics reporting
run_id = None
if get_run_context:
try:
context = get_run_context()
run_id = str(context.flow_run.id)
except Exception:
logger.debug("Could not get run_id from Prefect context")
try:
# Emit an initial baseline update so dashboards show activity immediately
try:
await self._send_stats_via_callback(stats_callback, run_id, stats_data)
except Exception:
pass
# Monitor process output in chunks to capture libFuzzer carriage-return updates
buffer = ""
while True:
try:
chunk = await asyncio.wait_for(process.stdout.read(4096), timeout=1.0)
if not chunk:
# Process finished
break
buffer += chunk.decode('utf-8', errors='ignore')
# Split on both newline and carriage return
if "\n" in buffer or "\r" in buffer:
parts = re.split(r"[\r\n]", buffer)
buffer = parts[-1]
for part in parts[:-1]:
line = part.strip()
if not line:
continue
stdout_lines.append(line)
self._parse_stats_from_line(line, stats_data)
except asyncio.TimeoutError:
# No output this second; continue to periodic update check
pass
# Periodic update (even if there was no output)
current_time = datetime.utcnow()
stats_data['elapsed_time'] = int((current_time - start_time).total_seconds())
if current_time - last_update >= timedelta(seconds=3):
try:
self._last_stats = dict(stats_data)
except Exception:
pass
await self._send_stats_via_callback(stats_callback, run_id, stats_data)
last_update = current_time
# Check if max time exceeded
if stats_data['elapsed_time'] >= max_time:
logger.info("Max time reached, terminating cargo-fuzz")
process.terminate()
break
# Wait for process to complete
await process.wait()
# Send final stats update
try:
self._last_stats = dict(stats_data)
except Exception:
pass
await self._send_stats_via_callback(stats_callback, run_id, stats_data)
except Exception as e:
logger.warning(f"Error processing streaming output: {e}")
stdout_data = '\n'.join(stdout_lines)
return stdout_data, ""
def _parse_stats_from_line(self, line: str, stats_data: Dict[str, Any]):
"""Parse statistics from a cargo-fuzz output line"""
try:
# cargo-fuzz typically shows stats like:
# "#12345: DONE cov: 1234 ft: 5678 corp: 9/10Mb exec/s: 1500 rss: 234Mb"
# "#12345: NEW cov: 1234 ft: 5678 corp: 9/10Mb exec/s: 1500 rss: 234Mb L: 45/67 MS: 3 ..."
# Extract execution count (the #number)
exec_match = re.search(r'#(\d+)(?::)?', line)
if exec_match:
stats_data['executions'] = int(exec_match.group(1))
else:
# libFuzzer stats format alternative
exec_alt = re.search(r'stat::number_of_executed_units:\s*(\d+)', line)
if exec_alt:
stats_data['executions'] = int(exec_alt.group(1))
else:
exec_alt2 = re.search(r'executed units:?\s*(\d+)', line, re.IGNORECASE)
if exec_alt2:
stats_data['executions'] = int(exec_alt2.group(1))
# Extract executions per second
exec_per_sec_match = re.search(r'exec/s:\s*([0-9\.]+)', line)
if exec_per_sec_match:
stats_data['executions_per_sec'] = float(exec_per_sec_match.group(1))
else:
eps_alt = re.search(r'stat::execs_per_sec:\s*([0-9\.]+)', line)
if eps_alt:
stats_data['executions_per_sec'] = float(eps_alt.group(1))
# Extract corpus size (corp: X/YMb)
corp_match = re.search(r'corp(?:us)?:\s*(\d+)', line)
if corp_match:
stats_data['corpus_size'] = int(corp_match.group(1))
# Look for crash indicators
if any(keyword in line.lower() for keyword in ['crash', 'assert', 'panic', 'abort']):
stats_data['crashes'] += 1
except Exception as e:
logger.debug(f"Error parsing stats from line '{line}': {e}")
async def _send_stats_via_callback(self, stats_callback, run_id: str, stats_data: Dict[str, Any]):
"""Send statistics update via callback function"""
if not stats_callback or not run_id:
return
try:
# Prepare statistics payload
stats_payload = {
"run_id": run_id,
"workflow": "language_fuzzing",
"executions": stats_data['executions'],
"executions_per_sec": stats_data['executions_per_sec'],
"crashes": stats_data['crashes'],
"unique_crashes": stats_data['crashes'], # Assume all crashes are unique for now
"corpus_size": stats_data['corpus_size'],
"elapsed_time": stats_data['elapsed_time'],
"timestamp": datetime.utcnow().isoformat()
}
# Call the callback function provided by the Prefect task
await stats_callback(stats_payload)
logger.info(
"LIVE STATS SENT: exec=%s eps=%.2f crashes=%s corpus=%s elapsed=%s",
stats_data['executions'],
stats_data['executions_per_sec'],
stats_data['crashes'],
stats_data['corpus_size'],
stats_data['elapsed_time'],
)
except Exception as e:
logger.debug(f"Error sending stats via callback: {e}")
def _create_summary(self, findings: List[ModuleFinding]) -> Dict[str, Any]:
"""Create analysis summary"""
severity_counts = {"critical": 0, "high": 0, "medium": 0, "low": 0, "info": 0}
category_counts = {}
for finding in findings:
severity_counts[finding.severity] += 1
category_counts[finding.category] = category_counts.get(finding.category, 0) + 1
return {
"total_findings": len(findings),
"severity_counts": severity_counts,
"category_counts": category_counts
}