""" Cargo Fuzz Module This module uses cargo-fuzz for fuzzing Rust code with libFuzzer integration. """ # Copyright (c) 2025 FuzzingLabs # # Licensed under the Business Source License 1.1 (BSL). See the LICENSE file # at the root of this repository for details. # # After the Change Date (four years from publication), this version of the # Licensed Work will be made available under the Apache License, Version 2.0. # See the LICENSE-APACHE file or http://www.apache.org/licenses/LICENSE-2.0 # # Additional attribution and requirements are provided in the NOTICE file. import asyncio import json import os from pathlib import Path from typing import Dict, Any, List, Tuple import subprocess import logging import httpx import re from datetime import datetime, timedelta try: from prefect import get_run_context except ImportError: # Fallback for when not running in Prefect context get_run_context = None from ..base import BaseModule, ModuleMetadata, ModuleFinding, ModuleResult from . import register_module logger = logging.getLogger(__name__) @register_module class CargoFuzzModule(BaseModule): """Cargo Fuzz Rust fuzzing module""" def get_metadata(self) -> ModuleMetadata: """Get module metadata""" return ModuleMetadata( name="cargo_fuzz", version="0.11.2", description="Rust fuzzing integration with libFuzzer using cargo-fuzz", author="FuzzForge Team", category="fuzzing", tags=["rust", "libfuzzer", "cargo", "coverage-guided", "sanitizers"], input_schema={ "type": "object", "properties": { "project_dir": { "type": "string", "description": "Path to Rust project directory (with Cargo.toml)" }, "fuzz_target": { "type": "string", "description": "Name of the fuzz target to run" }, "max_total_time": { "type": "integer", "default": 600, "description": "Maximum total time to run fuzzing (seconds)" }, "jobs": { "type": "integer", "default": 1, "description": "Number of worker processes" }, "corpus_dir": { "type": "string", "description": "Custom corpus directory" }, "artifacts_dir": { "type": "string", "description": "Custom artifacts directory" }, "sanitizer": { "type": "string", "enum": ["address", "memory", "thread", "leak", "none"], "default": "address", "description": "Sanitizer to use" }, "release": { "type": "boolean", "default": False, "description": "Use release mode" }, "debug_assertions": { "type": "boolean", "default": True, "description": "Enable debug assertions" } } }, output_schema={ "type": "object", "properties": { "findings": { "type": "array", "items": { "type": "object", "properties": { "crash_type": {"type": "string"}, "artifact_path": {"type": "string"}, "stack_trace": {"type": "string"} } } } } } ) def validate_config(self, config: Dict[str, Any]) -> bool: """Validate configuration""" project_dir = config.get("project_dir") if not project_dir: raise ValueError("project_dir is required") fuzz_target = config.get("fuzz_target") if not fuzz_target: raise ValueError("fuzz_target is required") return True async def execute(self, config: Dict[str, Any], workspace: Path, stats_callback=None) -> ModuleResult: """Execute cargo-fuzz fuzzing""" self.start_timer() try: # Initialize last observed stats for summary propagation self._last_stats = { 'executions': 0, 'executions_per_sec': 0.0, 'crashes': 0, 'corpus_size': 0, 'elapsed_time': 0, } # Validate inputs self.validate_config(config) self.validate_workspace(workspace) logger.info("Running cargo-fuzz Rust fuzzing") # Check installation await self._check_cargo_fuzz_installation() # Setup project project_dir = workspace / config["project_dir"] await self._setup_cargo_fuzz_project(project_dir, config) # Run fuzzing findings = await self._run_cargo_fuzz(project_dir, config, workspace, stats_callback) # Create summary and enrich with last observed runtime stats summary = self._create_summary(findings) try: summary.update({ 'executions': self._last_stats.get('executions', 0), 'executions_per_sec': self._last_stats.get('executions_per_sec', 0.0), 'corpus_size': self._last_stats.get('corpus_size', 0), 'crashes': self._last_stats.get('crashes', 0), 'elapsed_time': self._last_stats.get('elapsed_time', 0), }) except Exception: pass logger.info(f"cargo-fuzz found {len(findings)} issues") return self.create_result( findings=findings, status="success", summary=summary ) except Exception as e: logger.error(f"cargo-fuzz module failed: {e}") return self.create_result( findings=[], status="failed", error=str(e) ) async def _check_cargo_fuzz_installation(self): """Check if cargo-fuzz is installed""" try: process = await asyncio.create_subprocess_exec( "cargo", "fuzz", "--version", stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE ) stdout, stderr = await process.communicate() if process.returncode != 0: raise RuntimeError("cargo-fuzz not installed. Install with: cargo install cargo-fuzz") except Exception as e: raise RuntimeError(f"cargo-fuzz installation check failed: {e}") async def _setup_cargo_fuzz_project(self, project_dir: Path, config: Dict[str, Any]): """Setup cargo-fuzz project""" if not project_dir.exists(): raise FileNotFoundError(f"Project directory not found: {project_dir}") cargo_toml = project_dir / "Cargo.toml" if not cargo_toml.exists(): raise FileNotFoundError(f"Cargo.toml not found in {project_dir}") # Check if fuzz directory exists, if not initialize fuzz_dir = project_dir / "fuzz" if not fuzz_dir.exists(): logger.info("Initializing cargo-fuzz project") process = await asyncio.create_subprocess_exec( "cargo", "fuzz", "init", cwd=project_dir, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE ) await process.communicate() async def _run_cargo_fuzz(self, project_dir: Path, config: Dict[str, Any], workspace: Path, stats_callback=None) -> List[ModuleFinding]: """Run cargo-fuzz with real-time statistics reporting""" findings = [] # Get run_id from Prefect context for statistics reporting run_id = None if get_run_context: try: context = get_run_context() run_id = str(context.flow_run.id) except Exception: logger.warning("Could not get run_id from Prefect context") try: # Build command cmd = ["cargo", "fuzz", "run", config["fuzz_target"]] # Add options if config.get("jobs", 1) > 1: cmd.extend(["--", f"-jobs={config['jobs']}"]) max_time = config.get("max_total_time", 600) cmd.extend(["--", f"-max_total_time={max_time}"]) # Set sanitizer sanitizer = config.get("sanitizer", "address") if sanitizer != "none": cmd.append(f"--sanitizer={sanitizer}") if config.get("release", False): cmd.append("--release") # Set environment env = os.environ.copy() if config.get("debug_assertions", True): env["RUSTFLAGS"] = env.get("RUSTFLAGS", "") + " -C debug-assertions=on" logger.debug(f"Running command: {' '.join(cmd)}") # Run with streaming output processing for real-time stats try: process = await asyncio.create_subprocess_exec( *cmd, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.STDOUT, # Merge stderr into stdout cwd=project_dir, env=env ) # Process output in real-time stdout_data, stderr_data = await self._process_streaming_output( process, max_time, config, stats_callback ) # Parse final results findings = self._parse_cargo_fuzz_output( stdout_data, stderr_data, project_dir, workspace, config ) except Exception as e: logger.warning(f"Error running cargo-fuzz: {e}") except Exception as e: logger.warning(f"Error in cargo-fuzz execution: {e}") return findings def _parse_cargo_fuzz_output(self, stdout: str, stderr: str, project_dir: Path, workspace: Path, config: Dict[str, Any]) -> List[ModuleFinding]: """Parse cargo-fuzz output""" findings = [] try: full_output = stdout + "\n" + stderr # Look for crash artifacts artifacts_dir = project_dir / "fuzz" / "artifacts" / config["fuzz_target"] if artifacts_dir.exists(): for artifact in artifacts_dir.iterdir(): if artifact.is_file(): finding = self._create_artifact_finding(artifact, workspace, full_output) if finding: findings.append(finding) except Exception as e: logger.warning(f"Error parsing cargo-fuzz output: {e}") return findings def _create_artifact_finding(self, artifact_path: Path, workspace: Path, output: str) -> ModuleFinding: """Create finding from artifact file""" try: # Try to determine crash type from filename or content crash_type = "crash" if "leak" in artifact_path.name.lower(): crash_type = "memory_leak" elif "timeout" in artifact_path.name.lower(): crash_type = "timeout" # Extract stack trace from output stack_trace = self._extract_stack_trace_from_output(output, artifact_path.name) try: rel_path = artifact_path.relative_to(workspace) file_path = str(rel_path) except ValueError: file_path = str(artifact_path) severity = "high" if "crash" in crash_type else "medium" finding = self.create_finding( title=f"cargo-fuzz {crash_type.title()}", description=f"cargo-fuzz discovered a {crash_type} in the Rust code", severity=severity, category=self._get_crash_category(crash_type), file_path=file_path, recommendation=self._get_crash_recommendation(crash_type), metadata={ "crash_type": crash_type, "artifact_path": str(artifact_path), "stack_trace": stack_trace, "fuzzer": "cargo_fuzz" } ) return finding except Exception as e: logger.warning(f"Error creating artifact finding: {e}") return None def _extract_stack_trace_from_output(self, output: str, artifact_name: str) -> str: """Extract stack trace from output""" try: lines = output.split('\n') stack_lines = [] in_stack = False for line in lines: if artifact_name in line or "stack backtrace:" in line.lower(): in_stack = True continue if in_stack: if line.strip() and ("at " in line or "::" in line or line.strip().startswith("0:")): stack_lines.append(line.strip()) elif not line.strip() and stack_lines: break return '\n'.join(stack_lines[:20]) # Limit stack trace size except Exception: return "" def _get_crash_category(self, crash_type: str) -> str: """Get category for crash type""" if "leak" in crash_type: return "memory_leak" elif "timeout" in crash_type: return "performance_issues" else: return "memory_safety" def _get_crash_recommendation(self, crash_type: str) -> str: """Get recommendation for crash type""" if "leak" in crash_type: return "Fix memory leak by ensuring proper cleanup of allocated resources. Review memory management patterns." elif "timeout" in crash_type: return "Fix timeout by optimizing performance, avoiding infinite loops, and implementing reasonable bounds." else: return "Fix the crash by analyzing the stack trace and addressing memory safety issues." async def _process_streaming_output(self, process, max_time: int, config: Dict[str, Any], stats_callback=None) -> Tuple[str, str]: """Process cargo-fuzz output in real-time and report statistics""" stdout_lines = [] start_time = datetime.utcnow() last_update = start_time stats_data = { 'executions': 0, 'executions_per_sec': 0.0, 'crashes': 0, 'corpus_size': 0, 'elapsed_time': 0 } # Get run_id from Prefect context for statistics reporting run_id = None if get_run_context: try: context = get_run_context() run_id = str(context.flow_run.id) except Exception: logger.debug("Could not get run_id from Prefect context") try: # Emit an initial baseline update so dashboards show activity immediately try: await self._send_stats_via_callback(stats_callback, run_id, stats_data) except Exception: pass # Monitor process output in chunks to capture libFuzzer carriage-return updates buffer = "" while True: try: chunk = await asyncio.wait_for(process.stdout.read(4096), timeout=1.0) if not chunk: # Process finished break buffer += chunk.decode('utf-8', errors='ignore') # Split on both newline and carriage return if "\n" in buffer or "\r" in buffer: parts = re.split(r"[\r\n]", buffer) buffer = parts[-1] for part in parts[:-1]: line = part.strip() if not line: continue stdout_lines.append(line) self._parse_stats_from_line(line, stats_data) except asyncio.TimeoutError: # No output this second; continue to periodic update check pass # Periodic update (even if there was no output) current_time = datetime.utcnow() stats_data['elapsed_time'] = int((current_time - start_time).total_seconds()) if current_time - last_update >= timedelta(seconds=3): try: self._last_stats = dict(stats_data) except Exception: pass await self._send_stats_via_callback(stats_callback, run_id, stats_data) last_update = current_time # Check if max time exceeded if stats_data['elapsed_time'] >= max_time: logger.info("Max time reached, terminating cargo-fuzz") process.terminate() break # Wait for process to complete await process.wait() # Send final stats update try: self._last_stats = dict(stats_data) except Exception: pass await self._send_stats_via_callback(stats_callback, run_id, stats_data) except Exception as e: logger.warning(f"Error processing streaming output: {e}") stdout_data = '\n'.join(stdout_lines) return stdout_data, "" def _parse_stats_from_line(self, line: str, stats_data: Dict[str, Any]): """Parse statistics from a cargo-fuzz output line""" try: # cargo-fuzz typically shows stats like: # "#12345: DONE cov: 1234 ft: 5678 corp: 9/10Mb exec/s: 1500 rss: 234Mb" # "#12345: NEW cov: 1234 ft: 5678 corp: 9/10Mb exec/s: 1500 rss: 234Mb L: 45/67 MS: 3 ..." # Extract execution count (the #number) exec_match = re.search(r'#(\d+)(?::)?', line) if exec_match: stats_data['executions'] = int(exec_match.group(1)) else: # libFuzzer stats format alternative exec_alt = re.search(r'stat::number_of_executed_units:\s*(\d+)', line) if exec_alt: stats_data['executions'] = int(exec_alt.group(1)) else: exec_alt2 = re.search(r'executed units:?\s*(\d+)', line, re.IGNORECASE) if exec_alt2: stats_data['executions'] = int(exec_alt2.group(1)) # Extract executions per second exec_per_sec_match = re.search(r'exec/s:\s*([0-9\.]+)', line) if exec_per_sec_match: stats_data['executions_per_sec'] = float(exec_per_sec_match.group(1)) else: eps_alt = re.search(r'stat::execs_per_sec:\s*([0-9\.]+)', line) if eps_alt: stats_data['executions_per_sec'] = float(eps_alt.group(1)) # Extract corpus size (corp: X/YMb) corp_match = re.search(r'corp(?:us)?:\s*(\d+)', line) if corp_match: stats_data['corpus_size'] = int(corp_match.group(1)) # Look for crash indicators if any(keyword in line.lower() for keyword in ['crash', 'assert', 'panic', 'abort']): stats_data['crashes'] += 1 except Exception as e: logger.debug(f"Error parsing stats from line '{line}': {e}") async def _send_stats_via_callback(self, stats_callback, run_id: str, stats_data: Dict[str, Any]): """Send statistics update via callback function""" if not stats_callback or not run_id: return try: # Prepare statistics payload stats_payload = { "run_id": run_id, "workflow": "language_fuzzing", "executions": stats_data['executions'], "executions_per_sec": stats_data['executions_per_sec'], "crashes": stats_data['crashes'], "unique_crashes": stats_data['crashes'], # Assume all crashes are unique for now "corpus_size": stats_data['corpus_size'], "elapsed_time": stats_data['elapsed_time'], "timestamp": datetime.utcnow().isoformat() } # Call the callback function provided by the Prefect task await stats_callback(stats_payload) logger.info( "LIVE STATS SENT: exec=%s eps=%.2f crashes=%s corpus=%s elapsed=%s", stats_data['executions'], stats_data['executions_per_sec'], stats_data['crashes'], stats_data['corpus_size'], stats_data['elapsed_time'], ) except Exception as e: logger.debug(f"Error sending stats via callback: {e}") def _create_summary(self, findings: List[ModuleFinding]) -> Dict[str, Any]: """Create analysis summary""" severity_counts = {"critical": 0, "high": 0, "medium": 0, "low": 0, "info": 0} category_counts = {} for finding in findings: severity_counts[finding.severity] += 1 category_counts[finding.category] = category_counts.get(finding.category, 0) + 1 return { "total_findings": len(findings), "severity_counts": severity_counts, "category_counts": category_counts }