Fix deployment issues

This commit is contained in:
Tanguy Duhamel
2025-09-30 15:18:53 +02:00
parent 724064dfaa
commit 833963e5dd
30 changed files with 4 additions and 14073 deletions

Binary file not shown.

View File

@@ -382,7 +382,7 @@ class PrefectManager:
"image": deploy_image, # Use the worker-accessible registry name
"volumes": [], # Populated at run submission with toolbox mount
"env": {
"PYTHONPATH": "/opt/prefect/toolbox:/opt/prefect/toolbox/workflows",
"PYTHONPATH": "/opt/prefect/toolbox:/opt/prefect",
"WORKFLOW_NAME": name
}
}

View File

@@ -1,37 +0,0 @@
"""
AI Security Modules
This package contains modules for AI and machine learning model security testing.
Available modules:
- Garak: LLM/AI model security testing framework for prompt injection, bias, and jailbreaks
"""
# Copyright (c) 2025 FuzzingLabs
#
# Licensed under the Business Source License 1.1 (BSL). See the LICENSE file
# at the root of this repository for details.
#
# After the Change Date (four years from publication), this version of the
# Licensed Work will be made available under the Apache License, Version 2.0.
# See the LICENSE-APACHE file or http://www.apache.org/licenses/LICENSE-2.0
#
# Additional attribution and requirements are provided in the NOTICE file.
from typing import List, Type
from ..base import BaseModule
# Module registry for automatic discovery
AI_SECURITY_MODULES: List[Type[BaseModule]] = []
def register_module(module_class: Type[BaseModule]):
"""Register an AI security module"""
AI_SECURITY_MODULES.append(module_class)
return module_class
def get_available_modules() -> List[Type[BaseModule]]:
"""Get all available AI security modules"""
return AI_SECURITY_MODULES.copy()
# Import modules to trigger registration
from .garak import GarakModule

View File

@@ -1,526 +0,0 @@
"""
Garak AI Security Module
This module uses Garak for AI red-teaming and LLM vulnerability assessment,
testing for prompt injection, bias, jailbreaks, and other AI-specific security issues.
"""
# Copyright (c) 2025 FuzzingLabs
#
# Licensed under the Business Source License 1.1 (BSL). See the LICENSE file
# at the root of this repository for details.
#
# After the Change Date (four years from publication), this version of the
# Licensed Work will be made available under the Apache License, Version 2.0.
# See the LICENSE-APACHE file or http://www.apache.org/licenses/LICENSE-2.0
#
# Additional attribution and requirements are provided in the NOTICE file.
import asyncio
import json
import os
from pathlib import Path
from typing import Dict, Any, List
import subprocess
import logging
from ..base import BaseModule, ModuleMetadata, ModuleFinding, ModuleResult
from . import register_module
logger = logging.getLogger(__name__)
@register_module
class GarakModule(BaseModule):
"""Garak AI red-teaming and LLM vulnerability assessment module"""
def get_metadata(self) -> ModuleMetadata:
"""Get module metadata"""
return ModuleMetadata(
name="garak",
version="0.9.0",
description="AI red-teaming framework for testing LLM vulnerabilities including prompt injection, bias, and jailbreaks",
author="FuzzForge Team",
category="ai_security",
tags=["ai", "llm", "prompt-injection", "bias", "jailbreak", "red-team"],
input_schema={
"type": "object",
"properties": {
"model_type": {
"type": "string",
"enum": ["openai", "huggingface", "anthropic", "local"],
"description": "Type of LLM to test"
},
"model_name": {
"type": "string",
"description": "Name/path of the model to test"
},
"api_key": {
"type": "string",
"description": "API key for cloud models (if required)"
},
"probes": {
"type": "array",
"items": {"type": "string"},
"default": ["encoding", "promptinject", "malwaregen", "dan"],
"description": "Probe types to run"
},
"generations": {
"type": "integer",
"default": 10,
"description": "Number of generations per probe"
},
"detectors": {
"type": "array",
"items": {"type": "string"},
"description": "Detectors to use for evaluation"
},
"config_file": {
"type": "string",
"description": "Path to Garak configuration file"
},
"report_prefix": {
"type": "string",
"default": "garak",
"description": "Prefix for report files"
},
"parallel_requests": {
"type": "integer",
"default": 1,
"description": "Number of parallel requests"
},
"temperature": {
"type": "number",
"default": 0.7,
"description": "Model temperature setting"
},
"max_tokens": {
"type": "integer",
"default": 150,
"description": "Maximum tokens per generation"
},
"seed": {
"type": "integer",
"description": "Random seed for reproducibility"
},
"verbose": {
"type": "boolean",
"default": false,
"description": "Enable verbose output"
}
}
},
output_schema={
"type": "object",
"properties": {
"findings": {
"type": "array",
"items": {
"type": "object",
"properties": {
"probe_name": {"type": "string"},
"vulnerability_type": {"type": "string"},
"success_rate": {"type": "number"},
"prompt": {"type": "string"},
"response": {"type": "string"}
}
}
}
}
}
)
def validate_config(self, config: Dict[str, Any]) -> bool:
"""Validate configuration"""
model_type = config.get("model_type")
if not model_type:
raise ValueError("model_type is required")
model_name = config.get("model_name")
if not model_name:
raise ValueError("model_name is required")
return True
async def execute(self, config: Dict[str, Any], workspace: Path) -> ModuleResult:
"""Execute Garak AI security testing"""
self.start_timer()
try:
# Validate inputs
self.validate_config(config)
self.validate_workspace(workspace)
logger.info("Running Garak AI security assessment")
# Check Garak installation
await self._check_garak_installation()
# Run Garak testing
findings = await self._run_garak_assessment(config, workspace)
# Create summary
summary = self._create_summary(findings)
logger.info(f"Garak found {len(findings)} AI security issues")
return self.create_result(
findings=findings,
status="success",
summary=summary
)
except Exception as e:
logger.error(f"Garak module failed: {e}")
return self.create_result(
findings=[],
status="failed",
error=str(e)
)
async def _check_garak_installation(self):
"""Check if Garak is installed"""
try:
process = await asyncio.create_subprocess_exec(
"python", "-c", "import garak; print(garak.__version__)",
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE
)
stdout, stderr = await process.communicate()
if process.returncode != 0:
# Try installing if not available
logger.info("Garak not found, attempting installation...")
install_process = await asyncio.create_subprocess_exec(
"pip", "install", "garak",
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE
)
await install_process.communicate()
except Exception as e:
logger.warning(f"Garak installation check failed: {e}")
async def _run_garak_assessment(self, config: Dict[str, Any], workspace: Path) -> List[ModuleFinding]:
"""Run Garak AI security assessment"""
findings = []
try:
# Build Garak command
cmd = ["python", "-m", "garak"]
# Add model configuration
cmd.extend(["--model_type", config["model_type"]])
cmd.extend(["--model_name", config["model_name"]])
# Add API key if provided
api_key = config.get("api_key")
if api_key:
# Set environment variable instead of command line for security
os.environ["GARAK_API_KEY"] = api_key
# Add probes
probes = config.get("probes", ["encoding", "promptinject"])
for probe in probes:
cmd.extend(["--probes", probe])
# Add generations
generations = config.get("generations", 10)
cmd.extend(["--generations", str(generations)])
# Add detectors if specified
detectors = config.get("detectors", [])
for detector in detectors:
cmd.extend(["--detectors", detector])
# Add parallel requests
parallel = config.get("parallel_requests", 1)
if parallel > 1:
cmd.extend(["--parallel_requests", str(parallel)])
# Add model parameters
temperature = config.get("temperature", 0.7)
cmd.extend(["--temperature", str(temperature)])
max_tokens = config.get("max_tokens", 150)
cmd.extend(["--max_tokens", str(max_tokens)])
# Add seed for reproducibility
seed = config.get("seed")
if seed:
cmd.extend(["--seed", str(seed)])
# Add configuration file
config_file = config.get("config_file")
if config_file:
config_path = workspace / config_file
if config_path.exists():
cmd.extend(["--config", str(config_path)])
# Set output directory
output_dir = workspace / "garak_output"
output_dir.mkdir(exist_ok=True)
cmd.extend(["--report_prefix", str(output_dir / config.get("report_prefix", "garak"))])
# Add verbose flag
if config.get("verbose", False):
cmd.append("--verbose")
logger.debug(f"Running command: {' '.join(cmd)}")
# Run Garak
process = await asyncio.create_subprocess_exec(
*cmd,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
cwd=workspace
)
stdout, stderr = await process.communicate()
# Parse results
findings = self._parse_garak_results(output_dir, workspace, stdout.decode(), stderr.decode())
except Exception as e:
logger.warning(f"Error running Garak assessment: {e}")
return findings
def _parse_garak_results(self, output_dir: Path, workspace: Path, stdout: str, stderr: str) -> List[ModuleFinding]:
"""Parse Garak output for findings"""
findings = []
try:
# Look for JSON report files
report_files = list(output_dir.glob("*.report.jsonl"))
for report_file in report_files:
findings.extend(self._parse_report_file(report_file, workspace))
# If no report files, try to parse stdout
if not findings:
findings = self._parse_stdout_output(stdout, stderr, workspace)
except Exception as e:
logger.warning(f"Error parsing Garak results: {e}")
return findings
def _parse_report_file(self, report_file: Path, workspace: Path) -> List[ModuleFinding]:
"""Parse Garak JSONL report file"""
findings = []
try:
with open(report_file, 'r') as f:
for line in f:
if line.strip():
data = json.loads(line)
finding = self._create_garak_finding(data, workspace, report_file)
if finding:
findings.append(finding)
except Exception as e:
logger.warning(f"Error parsing report file {report_file}: {e}")
return findings
def _parse_stdout_output(self, stdout: str, stderr: str, workspace: Path) -> List[ModuleFinding]:
"""Parse Garak stdout output"""
findings = []
try:
# Look for vulnerability indicators in output
lines = stdout.split('\n') + stderr.split('\n')
for line in lines:
if any(indicator in line.lower() for indicator in [
"vulnerability", "injection", "jailbreak", "bias", "harmful"
]):
# Create a basic finding from the output line
finding = self._create_basic_finding(line, workspace)
if finding:
findings.append(finding)
except Exception as e:
logger.warning(f"Error parsing Garak stdout: {e}")
return findings
def _create_garak_finding(self, data: Dict[str, Any], workspace: Path, report_file: Path) -> ModuleFinding:
"""Create finding from Garak report data"""
try:
# Extract information from Garak data structure
probe_name = data.get("probe", "unknown")
detector_name = data.get("detector", "unknown")
passed = data.get("passed", True)
prompt = data.get("prompt", "")[:500] # Limit length
response = data.get("response", "")[:500] # Limit length
# Only create findings for failed tests (vulnerabilities found)
if passed:
return None
# Determine vulnerability type and severity
vulnerability_type = self._get_vulnerability_type(probe_name, detector_name)
severity = self._get_vulnerability_severity(vulnerability_type, probe_name)
# Create relative path
try:
rel_path = report_file.relative_to(workspace)
file_path = str(rel_path)
except ValueError:
file_path = str(report_file)
finding = self.create_finding(
title=f"AI Security Issue: {vulnerability_type}",
description=f"Garak detected a {vulnerability_type} vulnerability using probe '{probe_name}' and detector '{detector_name}'",
severity=severity,
category=self._get_ai_security_category(vulnerability_type),
file_path=file_path,
recommendation=self._get_ai_security_recommendation(vulnerability_type, probe_name),
metadata={
"probe_name": probe_name,
"detector_name": detector_name,
"vulnerability_type": vulnerability_type,
"prompt_preview": prompt,
"response_preview": response,
"passed": passed,
"fuzzer": "garak"
}
)
return finding
except Exception as e:
logger.warning(f"Error creating Garak finding: {e}")
return None
def _create_basic_finding(self, line: str, workspace: Path) -> ModuleFinding:
"""Create basic finding from output line"""
try:
# Extract basic information from line
vulnerability_type = "ai_security_issue"
if "injection" in line.lower():
vulnerability_type = "prompt_injection"
elif "jailbreak" in line.lower():
vulnerability_type = "jailbreak_attempt"
elif "bias" in line.lower():
vulnerability_type = "bias_detection"
finding = self.create_finding(
title=f"AI Security Detection: {vulnerability_type.replace('_', ' ').title()}",
description=f"Garak detected potential AI security issue: {line.strip()}",
severity="medium",
category=self._get_ai_security_category(vulnerability_type),
file_path=None,
recommendation=self._get_ai_security_recommendation(vulnerability_type, "general"),
metadata={
"vulnerability_type": vulnerability_type,
"detection_line": line.strip(),
"fuzzer": "garak"
}
)
return finding
except Exception as e:
logger.warning(f"Error creating basic finding: {e}")
return None
def _get_vulnerability_type(self, probe_name: str, detector_name: str) -> str:
"""Determine vulnerability type from probe and detector names"""
probe_lower = probe_name.lower()
detector_lower = detector_name.lower()
if "inject" in probe_lower or "inject" in detector_lower:
return "prompt_injection"
elif "jailbreak" in probe_lower or "dan" in probe_lower:
return "jailbreak_attempt"
elif "bias" in probe_lower or "bias" in detector_lower:
return "bias_detection"
elif "malware" in probe_lower or "harmful" in detector_lower:
return "harmful_content_generation"
elif "encoding" in probe_lower:
return "encoding_vulnerability"
elif "leak" in probe_lower:
return "data_leakage"
else:
return "ai_security_vulnerability"
def _get_vulnerability_severity(self, vulnerability_type: str, probe_name: str) -> str:
"""Determine severity based on vulnerability type"""
if vulnerability_type in ["prompt_injection", "jailbreak_attempt"]:
return "high"
elif vulnerability_type in ["harmful_content_generation", "data_leakage"]:
return "high"
elif vulnerability_type in ["bias_detection", "encoding_vulnerability"]:
return "medium"
else:
return "medium"
def _get_ai_security_category(self, vulnerability_type: str) -> str:
"""Get category for AI security vulnerability"""
if "injection" in vulnerability_type:
return "prompt_injection"
elif "jailbreak" in vulnerability_type:
return "jailbreak_attack"
elif "bias" in vulnerability_type:
return "algorithmic_bias"
elif "harmful" in vulnerability_type or "malware" in vulnerability_type:
return "harmful_content"
elif "leak" in vulnerability_type:
return "data_leakage"
elif "encoding" in vulnerability_type:
return "input_manipulation"
else:
return "ai_security"
def _get_ai_security_recommendation(self, vulnerability_type: str, probe_name: str) -> str:
"""Get recommendation for AI security vulnerability"""
if "injection" in vulnerability_type:
return "Implement robust input validation, prompt sanitization, and use structured prompts to prevent injection attacks. Consider implementing content filtering and output validation."
elif "jailbreak" in vulnerability_type:
return "Strengthen model alignment and safety measures. Implement content filtering, use constitutional AI techniques, and add safety classifiers for output validation."
elif "bias" in vulnerability_type:
return "Review training data for bias, implement fairness constraints, use debiasing techniques, and conduct regular bias audits across different demographic groups."
elif "harmful" in vulnerability_type:
return "Implement strict content policies, use safety classifiers, add human oversight for sensitive outputs, and refuse to generate harmful content."
elif "leak" in vulnerability_type:
return "Review data handling practices, implement data anonymization, use differential privacy techniques, and audit model responses for sensitive information disclosure."
elif "encoding" in vulnerability_type:
return "Normalize and validate all input encodings, implement proper character filtering, and use encoding-aware input processing."
else:
return f"Address the {vulnerability_type} vulnerability by implementing appropriate AI safety measures, input validation, and output monitoring."
def _create_summary(self, findings: List[ModuleFinding]) -> Dict[str, Any]:
"""Create analysis summary"""
severity_counts = {"critical": 0, "high": 0, "medium": 0, "low": 0, "info": 0}
category_counts = {}
vulnerability_counts = {}
probe_counts = {}
for finding in findings:
# Count by severity
severity_counts[finding.severity] += 1
# Count by category
category = finding.category
category_counts[category] = category_counts.get(category, 0) + 1
# Count by vulnerability type
vuln_type = finding.metadata.get("vulnerability_type", "unknown")
vulnerability_counts[vuln_type] = vulnerability_counts.get(vuln_type, 0) + 1
# Count by probe
probe = finding.metadata.get("probe_name", "unknown")
probe_counts[probe] = probe_counts.get(probe, 0) + 1
return {
"total_findings": len(findings),
"severity_counts": severity_counts,
"category_counts": category_counts,
"vulnerability_counts": vulnerability_counts,
"probe_counts": probe_counts,
"ai_security_issues": len(findings),
"high_risk_vulnerabilities": severity_counts.get("high", 0) + severity_counts.get("critical", 0)
}

View File

@@ -1,37 +0,0 @@
"""
CI/CD Security Modules
This package contains modules for CI/CD pipeline and workflow security testing.
Available modules:
- Zizmor: GitHub Actions workflow security analyzer
"""
# Copyright (c) 2025 FuzzingLabs
#
# Licensed under the Business Source License 1.1 (BSL). See the LICENSE file
# at the root of this repository for details.
#
# After the Change Date (four years from publication), this version of the
# Licensed Work will be made available under the Apache License, Version 2.0.
# See the LICENSE-APACHE file or http://www.apache.org/licenses/LICENSE-2.0
#
# Additional attribution and requirements are provided in the NOTICE file.
from typing import List, Type
from ..base import BaseModule
# Module registry for automatic discovery
CICD_SECURITY_MODULES: List[Type[BaseModule]] = []
def register_module(module_class: Type[BaseModule]):
"""Register a CI/CD security module"""
CICD_SECURITY_MODULES.append(module_class)
return module_class
def get_available_modules() -> List[Type[BaseModule]]:
"""Get all available CI/CD security modules"""
return CICD_SECURITY_MODULES.copy()
# Import modules to trigger registration
from .zizmor import ZizmorModule

View File

@@ -1,595 +0,0 @@
"""
Zizmor CI/CD Security Module
This module uses Zizmor to analyze GitHub Actions workflows for security
vulnerabilities and misconfigurations.
"""
# Copyright (c) 2025 FuzzingLabs
#
# Licensed under the Business Source License 1.1 (BSL). See the LICENSE file
# at the root of this repository for details.
#
# After the Change Date (four years from publication), this version of the
# Licensed Work will be made available under the Apache License, Version 2.0.
# See the LICENSE-APACHE file or http://www.apache.org/licenses/LICENSE-2.0
#
# Additional attribution and requirements are provided in the NOTICE file.
import asyncio
import json
from pathlib import Path
from typing import Dict, Any, List
import subprocess
import logging
from ..base import BaseModule, ModuleMetadata, ModuleFinding, ModuleResult
from . import register_module
logger = logging.getLogger(__name__)
@register_module
class ZizmorModule(BaseModule):
"""Zizmor GitHub Actions security analysis module"""
def get_metadata(self) -> ModuleMetadata:
"""Get module metadata"""
return ModuleMetadata(
name="zizmor",
version="0.2.0",
description="GitHub Actions workflow security analyzer for detecting vulnerabilities and misconfigurations",
author="FuzzForge Team",
category="cicd_security",
tags=["github-actions", "cicd", "workflow", "security", "pipeline"],
input_schema={
"type": "object",
"properties": {
"workflow_dir": {
"type": "string",
"default": ".github/workflows",
"description": "Directory containing GitHub Actions workflows"
},
"workflow_files": {
"type": "array",
"items": {"type": "string"},
"description": "Specific workflow files to analyze"
},
"format": {
"type": "string",
"enum": ["json", "sarif", "pretty"],
"default": "json",
"description": "Output format"
},
"verbose": {
"type": "boolean",
"default": false,
"description": "Enable verbose output"
},
"offline": {
"type": "boolean",
"default": false,
"description": "Run in offline mode (no internet lookups)"
},
"no_online_audits": {
"type": "boolean",
"default": true,
"description": "Disable online audits for faster execution"
},
"pedantic": {
"type": "boolean",
"default": false,
"description": "Enable pedantic mode (more strict checking)"
},
"rules": {
"type": "array",
"items": {"type": "string"},
"description": "Specific rules to run"
},
"ignore_rules": {
"type": "array",
"items": {"type": "string"},
"description": "Rules to ignore"
},
"min_severity": {
"type": "string",
"enum": ["unknown", "informational", "low", "medium", "high"],
"default": "low",
"description": "Minimum severity level to report"
}
}
},
output_schema={
"type": "object",
"properties": {
"findings": {
"type": "array",
"items": {
"type": "object",
"properties": {
"rule_id": {"type": "string"},
"rule_name": {"type": "string"},
"severity": {"type": "string"},
"workflow_file": {"type": "string"},
"line_number": {"type": "integer"}
}
}
}
}
}
)
def validate_config(self, config: Dict[str, Any]) -> bool:
"""Validate configuration"""
workflow_dir = config.get("workflow_dir", ".github/workflows")
workflow_files = config.get("workflow_files", [])
if not workflow_dir and not workflow_files:
raise ValueError("Either workflow_dir or workflow_files must be specified")
return True
async def execute(self, config: Dict[str, Any], workspace: Path) -> ModuleResult:
"""Execute Zizmor GitHub Actions security analysis"""
self.start_timer()
try:
# Validate inputs
self.validate_config(config)
self.validate_workspace(workspace)
logger.info("Running Zizmor GitHub Actions security analysis")
# Check Zizmor installation
await self._check_zizmor_installation()
# Find workflow files
workflow_files = self._find_workflow_files(workspace, config)
if not workflow_files:
logger.info("No GitHub Actions workflow files found")
return self.create_result(
findings=[],
status="success",
summary={"total_findings": 0, "workflows_scanned": 0}
)
# Run Zizmor analysis
findings = await self._run_zizmor_analysis(workflow_files, config, workspace)
# Create summary
summary = self._create_summary(findings, len(workflow_files))
logger.info(f"Zizmor found {len(findings)} CI/CD security issues")
return self.create_result(
findings=findings,
status="success",
summary=summary
)
except Exception as e:
logger.error(f"Zizmor module failed: {e}")
return self.create_result(
findings=[],
status="failed",
error=str(e)
)
async def _check_zizmor_installation(self):
"""Check if Zizmor is installed"""
try:
process = await asyncio.create_subprocess_exec(
"zizmor", "--version",
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE
)
stdout, stderr = await process.communicate()
if process.returncode != 0:
raise RuntimeError("Zizmor not found. Install with: cargo install zizmor")
except FileNotFoundError:
raise RuntimeError("Zizmor not found. Install with: cargo install zizmor")
except Exception as e:
raise RuntimeError(f"Zizmor installation check failed: {e}")
def _find_workflow_files(self, workspace: Path, config: Dict[str, Any]) -> List[Path]:
"""Find GitHub Actions workflow files"""
workflow_files = []
# Check for specific files
specific_files = config.get("workflow_files", [])
for file_path in specific_files:
full_path = workspace / file_path
if full_path.exists():
workflow_files.append(full_path)
# Check workflow directory
if not workflow_files:
workflow_dir = workspace / config.get("workflow_dir", ".github/workflows")
if workflow_dir.exists():
# Find YAML files
for pattern in ["*.yml", "*.yaml"]:
workflow_files.extend(workflow_dir.glob(pattern))
return list(set(workflow_files)) # Remove duplicates
async def _run_zizmor_analysis(self, workflow_files: List[Path], config: Dict[str, Any], workspace: Path) -> List[ModuleFinding]:
"""Run Zizmor analysis on workflow files"""
findings = []
try:
for workflow_file in workflow_files:
file_findings = await self._analyze_workflow_file(workflow_file, config, workspace)
findings.extend(file_findings)
except Exception as e:
logger.warning(f"Error running Zizmor analysis: {e}")
return findings
async def _analyze_workflow_file(self, workflow_file: Path, config: Dict[str, Any], workspace: Path) -> List[ModuleFinding]:
"""Analyze a single workflow file with Zizmor"""
findings = []
try:
# Build Zizmor command
cmd = ["zizmor"]
# Add format
format_type = config.get("format", "json")
cmd.extend(["--format", format_type])
# Add minimum severity
min_severity = config.get("min_severity", "low")
cmd.extend(["--min-severity", min_severity])
# Add flags
if config.get("verbose", False):
cmd.append("--verbose")
if config.get("offline", False):
cmd.append("--offline")
if config.get("no_online_audits", True):
cmd.append("--no-online-audits")
if config.get("pedantic", False):
cmd.append("--pedantic")
# Add specific rules
rules = config.get("rules", [])
for rule in rules:
cmd.extend(["--rules", rule])
# Add ignore rules
ignore_rules = config.get("ignore_rules", [])
for rule in ignore_rules:
cmd.extend(["--ignore", rule])
# Add workflow file
cmd.append(str(workflow_file))
logger.debug(f"Running command: {' '.join(cmd)}")
# Run Zizmor
process = await asyncio.create_subprocess_exec(
*cmd,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
cwd=workspace
)
stdout, stderr = await process.communicate()
# Parse results (even if return code is non-zero, as it may contain findings)
if stdout.strip():
findings = self._parse_zizmor_output(
stdout.decode(), workflow_file, workspace, format_type
)
elif stderr.strip():
logger.warning(f"Zizmor analysis failed for {workflow_file}: {stderr.decode()}")
except Exception as e:
logger.warning(f"Error analyzing workflow file {workflow_file}: {e}")
return findings
def _parse_zizmor_output(self, output: str, workflow_file: Path, workspace: Path, format_type: str) -> List[ModuleFinding]:
"""Parse Zizmor output into findings"""
findings = []
try:
if format_type == "json":
findings = self._parse_json_output(output, workflow_file, workspace)
elif format_type == "sarif":
findings = self._parse_sarif_output(output, workflow_file, workspace)
else:
findings = self._parse_text_output(output, workflow_file, workspace)
except Exception as e:
logger.warning(f"Error parsing Zizmor output: {e}")
return findings
def _parse_json_output(self, output: str, workflow_file: Path, workspace: Path) -> List[ModuleFinding]:
"""Parse Zizmor JSON output"""
findings = []
try:
if not output.strip():
return findings
data = json.loads(output)
# Handle different JSON structures
if isinstance(data, dict):
# Single result
findings.extend(self._process_zizmor_result(data, workflow_file, workspace))
elif isinstance(data, list):
# Multiple results
for result in data:
findings.extend(self._process_zizmor_result(result, workflow_file, workspace))
except json.JSONDecodeError as e:
logger.warning(f"Failed to parse Zizmor JSON output: {e}")
return findings
def _parse_sarif_output(self, output: str, workflow_file: Path, workspace: Path) -> List[ModuleFinding]:
"""Parse Zizmor SARIF output"""
findings = []
try:
data = json.loads(output)
runs = data.get("runs", [])
for run in runs:
results = run.get("results", [])
for result in results:
finding = self._create_sarif_finding(result, workflow_file, workspace)
if finding:
findings.append(finding)
except Exception as e:
logger.warning(f"Error parsing SARIF output: {e}")
return findings
def _parse_text_output(self, output: str, workflow_file: Path, workspace: Path) -> List[ModuleFinding]:
"""Parse Zizmor text output"""
findings = []
try:
lines = output.strip().split('\n')
for line in lines:
if line.strip() and not line.startswith('#'):
# Create basic finding from text line
finding = self._create_text_finding(line, workflow_file, workspace)
if finding:
findings.append(finding)
except Exception as e:
logger.warning(f"Error parsing text output: {e}")
return findings
def _process_zizmor_result(self, result: Dict[str, Any], workflow_file: Path, workspace: Path) -> List[ModuleFinding]:
"""Process a single Zizmor result"""
findings = []
try:
# Extract rule information
rule_id = result.get("rule", {}).get("id", "unknown")
rule_name = result.get("rule", {}).get("desc", rule_id)
severity = result.get("severity", "medium")
message = result.get("message", "")
# Extract location information
locations = result.get("locations", [])
if not locations:
# Create finding without specific location
finding = self._create_zizmor_finding(
rule_id, rule_name, severity, message, workflow_file, workspace
)
if finding:
findings.append(finding)
else:
# Create finding for each location
for location in locations:
line_number = location.get("line", 0)
column = location.get("column", 0)
finding = self._create_zizmor_finding(
rule_id, rule_name, severity, message, workflow_file, workspace,
line_number, column
)
if finding:
findings.append(finding)
except Exception as e:
logger.warning(f"Error processing Zizmor result: {e}")
return findings
def _create_zizmor_finding(self, rule_id: str, rule_name: str, severity: str, message: str,
workflow_file: Path, workspace: Path, line_number: int = None, column: int = None) -> ModuleFinding:
"""Create finding from Zizmor analysis"""
try:
# Map Zizmor severity to our standard levels
finding_severity = self._map_severity(severity)
# Create relative path
try:
rel_path = workflow_file.relative_to(workspace)
file_path = str(rel_path)
except ValueError:
file_path = str(workflow_file)
# Get category and recommendation
category = self._get_cicd_category(rule_id, rule_name)
recommendation = self._get_cicd_recommendation(rule_id, rule_name, message)
finding = self.create_finding(
title=f"CI/CD Security Issue: {rule_name}",
description=message or f"Zizmor detected a security issue: {rule_name}",
severity=finding_severity,
category=category,
file_path=file_path,
line_start=line_number if line_number else None,
recommendation=recommendation,
metadata={
"rule_id": rule_id,
"rule_name": rule_name,
"zizmor_severity": severity,
"workflow_file": str(workflow_file.name),
"line_number": line_number,
"column": column,
"tool": "zizmor"
}
)
return finding
except Exception as e:
logger.warning(f"Error creating Zizmor finding: {e}")
return None
def _create_sarif_finding(self, result: Dict[str, Any], workflow_file: Path, workspace: Path) -> ModuleFinding:
"""Create finding from SARIF result"""
try:
rule_id = result.get("ruleId", "unknown")
message = result.get("message", {}).get("text", "")
severity = result.get("level", "warning")
# Extract location
locations = result.get("locations", [])
line_number = None
if locations:
physical_location = locations[0].get("physicalLocation", {})
region = physical_location.get("region", {})
line_number = region.get("startLine")
return self._create_zizmor_finding(
rule_id, rule_id, severity, message, workflow_file, workspace, line_number
)
except Exception as e:
logger.warning(f"Error creating SARIF finding: {e}")
return None
def _create_text_finding(self, line: str, workflow_file: Path, workspace: Path) -> ModuleFinding:
"""Create finding from text line"""
try:
try:
rel_path = workflow_file.relative_to(workspace)
file_path = str(rel_path)
except ValueError:
file_path = str(workflow_file)
finding = self.create_finding(
title="CI/CD Security Issue",
description=line.strip(),
severity="medium",
category="workflow_security",
file_path=file_path,
recommendation="Review and address the workflow security issue identified by Zizmor.",
metadata={
"detection_line": line.strip(),
"workflow_file": str(workflow_file.name),
"tool": "zizmor"
}
)
return finding
except Exception as e:
logger.warning(f"Error creating text finding: {e}")
return None
def _map_severity(self, zizmor_severity: str) -> str:
"""Map Zizmor severity to our standard levels"""
severity_map = {
"high": "high",
"medium": "medium",
"low": "low",
"informational": "info",
"unknown": "low",
"error": "high",
"warning": "medium",
"note": "low"
}
return severity_map.get(zizmor_severity.lower(), "medium")
def _get_cicd_category(self, rule_id: str, rule_name: str) -> str:
"""Get category for CI/CD security issue"""
rule_lower = f"{rule_id} {rule_name}".lower()
if any(term in rule_lower for term in ["secret", "token", "credential", "password"]):
return "secret_exposure"
elif any(term in rule_lower for term in ["permission", "access", "privilege"]):
return "permission_escalation"
elif any(term in rule_lower for term in ["injection", "command", "script"]):
return "code_injection"
elif any(term in rule_lower for term in ["artifact", "cache", "upload"]):
return "artifact_security"
elif any(term in rule_lower for term in ["environment", "env", "variable"]):
return "environment_security"
elif any(term in rule_lower for term in ["network", "external", "download"]):
return "network_security"
else:
return "workflow_security"
def _get_cicd_recommendation(self, rule_id: str, rule_name: str, message: str) -> str:
"""Get recommendation for CI/CD security issue"""
rule_lower = f"{rule_id} {rule_name}".lower()
if "secret" in rule_lower or "token" in rule_lower:
return "Store secrets securely using GitHub Secrets or environment variables. Never hardcode credentials in workflow files."
elif "permission" in rule_lower:
return "Follow the principle of least privilege. Grant only necessary permissions and use specific permission scopes."
elif "injection" in rule_lower:
return "Avoid using user input directly in shell commands. Use proper escaping, validation, or structured approaches."
elif "artifact" in rule_lower:
return "Secure artifact handling by validating checksums, using signed artifacts, and restricting artifact access."
elif "environment" in rule_lower:
return "Protect environment variables and avoid exposing sensitive information in logs or outputs."
elif "network" in rule_lower:
return "Use HTTPS for external connections, validate certificates, and avoid downloading from untrusted sources."
elif message:
return f"Address the identified issue: {message}"
else:
return f"Review and fix the workflow security issue: {rule_name}"
def _create_summary(self, findings: List[ModuleFinding], workflows_count: int) -> Dict[str, Any]:
"""Create analysis summary"""
severity_counts = {"critical": 0, "high": 0, "medium": 0, "low": 0, "info": 0}
category_counts = {}
rule_counts = {}
workflow_counts = {}
for finding in findings:
# Count by severity
severity_counts[finding.severity] += 1
# Count by category
category = finding.category
category_counts[category] = category_counts.get(category, 0) + 1
# Count by rule
rule_id = finding.metadata.get("rule_id", "unknown")
rule_counts[rule_id] = rule_counts.get(rule_id, 0) + 1
# Count by workflow
workflow = finding.metadata.get("workflow_file", "unknown")
workflow_counts[workflow] = workflow_counts.get(workflow, 0) + 1
return {
"total_findings": len(findings),
"workflows_scanned": workflows_count,
"severity_counts": severity_counts,
"category_counts": category_counts,
"top_rules": dict(sorted(rule_counts.items(), key=lambda x: x[1], reverse=True)[:10]),
"workflows_with_issues": len(workflow_counts),
"workflow_issue_counts": dict(sorted(workflow_counts.items(), key=lambda x: x[1], reverse=True)[:10])
}

View File

@@ -1,49 +0,0 @@
"""
Fuzzing Modules
This package contains modules for various fuzzing techniques and tools.
Available modules:
- LibFuzzer: LLVM's coverage-guided fuzzing engine
- AFL++: Advanced American Fuzzy Lop with modern features
- AFL-RS: Rust-based AFL implementation
- Atheris: Python fuzzing engine for finding bugs in Python code
- Cargo Fuzz: Rust fuzzing integration with libFuzzer
- Go-Fuzz: Coverage-guided fuzzing for Go packages
- OSS-Fuzz: Google's continuous fuzzing for open source
"""
# Copyright (c) 2025 FuzzingLabs
#
# Licensed under the Business Source License 1.1 (BSL). See the LICENSE file
# at the root of this repository for details.
#
# After the Change Date (four years from publication), this version of the
# Licensed Work will be made available under the Apache License, Version 2.0.
# See the LICENSE-APACHE file or http://www.apache.org/licenses/LICENSE-2.0
#
# Additional attribution and requirements are provided in the NOTICE file.
from typing import List, Type
from ..base import BaseModule
# Module registry for automatic discovery
FUZZING_MODULES: List[Type[BaseModule]] = []
def register_module(module_class: Type[BaseModule]):
"""Register a fuzzing module"""
FUZZING_MODULES.append(module_class)
return module_class
def get_available_modules() -> List[Type[BaseModule]]:
"""Get all available fuzzing modules"""
return FUZZING_MODULES.copy()
# Import modules to trigger registration
from .libfuzzer import LibFuzzerModule
from .aflplusplus import AFLPlusPlusModule
from .aflrs import AFLRSModule
from .atheris import AtherisModule
from .cargo_fuzz import CargoFuzzModule
from .go_fuzz import GoFuzzModule
from .oss_fuzz import OSSFuzzModule

View File

@@ -1,734 +0,0 @@
"""
AFL++ Fuzzing Module
This module uses AFL++ (Advanced American Fuzzy Lop) for coverage-guided
fuzzing with modern features and optimizations.
"""
# Copyright (c) 2025 FuzzingLabs
#
# Licensed under the Business Source License 1.1 (BSL). See the LICENSE file
# at the root of this repository for details.
#
# After the Change Date (four years from publication), this version of the
# Licensed Work will be made available under the Apache License, Version 2.0.
# See the LICENSE-APACHE file or http://www.apache.org/licenses/LICENSE-2.0
#
# Additional attribution and requirements are provided in the NOTICE file.
import asyncio
import json
import os
from pathlib import Path
from typing import Dict, Any, List
import subprocess
import logging
import re
from ..base import BaseModule, ModuleMetadata, ModuleFinding, ModuleResult
from . import register_module
logger = logging.getLogger(__name__)
@register_module
class AFLPlusPlusModule(BaseModule):
"""AFL++ advanced fuzzing module"""
def get_metadata(self) -> ModuleMetadata:
"""Get module metadata"""
return ModuleMetadata(
name="aflplusplus",
version="4.09c",
description="Advanced American Fuzzy Lop with modern features for coverage-guided fuzzing",
author="FuzzForge Team",
category="fuzzing",
tags=["coverage-guided", "american-fuzzy-lop", "advanced", "mutation", "instrumentation"],
input_schema={
"type": "object",
"properties": {
"target_binary": {
"type": "string",
"description": "Path to the target binary (compiled with afl-gcc/afl-clang)"
},
"input_dir": {
"type": "string",
"description": "Directory containing seed input files"
},
"output_dir": {
"type": "string",
"default": "afl_output",
"description": "Output directory for AFL++ results"
},
"dictionary": {
"type": "string",
"description": "Dictionary file for fuzzing keywords"
},
"timeout": {
"type": "integer",
"default": 1000,
"description": "Timeout for each execution (ms)"
},
"memory_limit": {
"type": "integer",
"default": 50,
"description": "Memory limit for child process (MB)"
},
"skip_deterministic": {
"type": "boolean",
"default": false,
"description": "Skip deterministic mutations"
},
"no_arith": {
"type": "boolean",
"default": false,
"description": "Skip arithmetic mutations"
},
"shuffle_queue": {
"type": "boolean",
"default": false,
"description": "Shuffle queue entries"
},
"max_total_time": {
"type": "integer",
"default": 3600,
"description": "Maximum total fuzzing time (seconds)"
},
"power_schedule": {
"type": "string",
"enum": ["explore", "fast", "coe", "lin", "quad", "exploit", "rare"],
"default": "fast",
"description": "Power schedule algorithm"
},
"mutation_mode": {
"type": "string",
"enum": ["default", "old", "mopt"],
"default": "default",
"description": "Mutation mode to use"
},
"parallel_fuzzing": {
"type": "boolean",
"default": false,
"description": "Enable parallel fuzzing with multiple instances"
},
"fuzzer_instances": {
"type": "integer",
"default": 1,
"description": "Number of parallel fuzzer instances"
},
"master_instance": {
"type": "string",
"default": "master",
"description": "Name for master fuzzer instance"
},
"slave_prefix": {
"type": "string",
"default": "slave",
"description": "Prefix for slave fuzzer instances"
},
"hang_timeout": {
"type": "integer",
"default": 1000,
"description": "Timeout for detecting hangs (ms)"
},
"crash_mode": {
"type": "boolean",
"default": false,
"description": "Run in crash exploration mode"
},
"target_args": {
"type": "array",
"items": {"type": "string"},
"description": "Arguments to pass to target binary"
},
"env_vars": {
"type": "object",
"description": "Environment variables to set"
},
"ignore_finds": {
"type": "boolean",
"default": false,
"description": "Ignore existing findings and start fresh"
},
"force_deterministic": {
"type": "boolean",
"default": false,
"description": "Force deterministic mutations"
}
}
},
output_schema={
"type": "object",
"properties": {
"findings": {
"type": "array",
"items": {
"type": "object",
"properties": {
"crash_id": {"type": "string"},
"crash_file": {"type": "string"},
"crash_type": {"type": "string"},
"signal": {"type": "string"}
}
}
}
}
}
)
def validate_config(self, config: Dict[str, Any]) -> bool:
"""Validate configuration"""
target_binary = config.get("target_binary")
if not target_binary:
raise ValueError("target_binary is required for AFL++")
input_dir = config.get("input_dir")
if not input_dir:
raise ValueError("input_dir is required for AFL++")
return True
async def execute(self, config: Dict[str, Any], workspace: Path) -> ModuleResult:
"""Execute AFL++ fuzzing"""
self.start_timer()
try:
# Validate inputs
self.validate_config(config)
self.validate_workspace(workspace)
logger.info("Running AFL++ fuzzing campaign")
# Check prerequisites
await self._check_afl_prerequisites(workspace)
# Setup directories and files
target_binary, input_dir, output_dir = self._setup_afl_directories(config, workspace)
# Run AFL++ fuzzing
findings = await self._run_afl_fuzzing(target_binary, input_dir, output_dir, config, workspace)
# Create summary
summary = self._create_summary(findings, output_dir)
logger.info(f"AFL++ found {len(findings)} crashes")
return self.create_result(
findings=findings,
status="success",
summary=summary
)
except Exception as e:
logger.error(f"AFL++ module failed: {e}")
return self.create_result(
findings=[],
status="failed",
error=str(e)
)
async def _check_afl_prerequisites(self, workspace: Path):
"""Check AFL++ prerequisites and system setup"""
try:
# Check if afl-fuzz exists
process = await asyncio.create_subprocess_exec(
"which", "afl-fuzz",
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE
)
stdout, stderr = await process.communicate()
if process.returncode != 0:
raise RuntimeError("afl-fuzz not found. Please install AFL++")
# Check core dump pattern (important for AFL)
try:
with open("/proc/sys/kernel/core_pattern", "r") as f:
core_pattern = f.read().strip()
if core_pattern != "core":
logger.warning(f"Core dump pattern is '{core_pattern}', AFL++ may not work optimally")
except Exception:
logger.warning("Could not check core dump pattern")
except Exception as e:
logger.warning(f"AFL++ prerequisite check failed: {e}")
def _setup_afl_directories(self, config: Dict[str, Any], workspace: Path):
"""Setup AFL++ directories and validate files"""
# Check target binary
target_binary = workspace / config["target_binary"]
if not target_binary.exists():
raise FileNotFoundError(f"Target binary not found: {target_binary}")
# Check input directory
input_dir = workspace / config["input_dir"]
if not input_dir.exists():
raise FileNotFoundError(f"Input directory not found: {input_dir}")
# Check if input directory has files
input_files = list(input_dir.glob("*"))
if not input_files:
raise ValueError(f"Input directory is empty: {input_dir}")
# Create output directory
output_dir = workspace / config.get("output_dir", "afl_output")
output_dir.mkdir(exist_ok=True)
return target_binary, input_dir, output_dir
async def _run_afl_fuzzing(self, target_binary: Path, input_dir: Path, output_dir: Path, config: Dict[str, Any], workspace: Path) -> List[ModuleFinding]:
"""Run AFL++ fuzzing"""
findings = []
try:
if config.get("parallel_fuzzing", False):
findings = await self._run_parallel_fuzzing(
target_binary, input_dir, output_dir, config, workspace
)
else:
findings = await self._run_single_fuzzing(
target_binary, input_dir, output_dir, config, workspace
)
except Exception as e:
logger.warning(f"Error running AFL++ fuzzing: {e}")
return findings
async def _run_single_fuzzing(self, target_binary: Path, input_dir: Path, output_dir: Path, config: Dict[str, Any], workspace: Path) -> List[ModuleFinding]:
"""Run single-instance AFL++ fuzzing"""
findings = []
try:
# Build AFL++ command
cmd = ["afl-fuzz"]
# Add input and output directories
cmd.extend(["-i", str(input_dir)])
cmd.extend(["-o", str(output_dir)])
# Add dictionary if specified
dictionary = config.get("dictionary")
if dictionary:
dict_path = workspace / dictionary
if dict_path.exists():
cmd.extend(["-x", str(dict_path)])
# Add timeout
timeout = config.get("timeout", 1000)
cmd.extend(["-t", str(timeout)])
# Add memory limit
memory_limit = config.get("memory_limit", 50)
cmd.extend(["-m", str(memory_limit)])
# Add power schedule
power_schedule = config.get("power_schedule", "fast")
cmd.extend(["-p", power_schedule])
# Add mutation options
if config.get("skip_deterministic", False):
cmd.append("-d")
if config.get("no_arith", False):
cmd.append("-a")
if config.get("shuffle_queue", False):
cmd.append("-Z")
# Add hang timeout
hang_timeout = config.get("hang_timeout", 1000)
cmd.extend(["-T", str(hang_timeout)])
# Add crash mode
if config.get("crash_mode", False):
cmd.append("-C")
# Add ignore finds
if config.get("ignore_finds", False):
cmd.append("-f")
# Add force deterministic
if config.get("force_deterministic", False):
cmd.append("-D")
# Add target binary and arguments
cmd.append("--")
cmd.append(str(target_binary))
target_args = config.get("target_args", [])
cmd.extend(target_args)
# Set up environment
env = os.environ.copy()
env_vars = config.get("env_vars", {})
env.update(env_vars)
# Set AFL environment variables
env["AFL_I_DONT_CARE_ABOUT_MISSING_CRASHES"] = "1" # Avoid interactive prompts
env["AFL_SKIP_CPUFREQ"] = "1" # Skip CPU frequency checks
logger.debug(f"Running command: {' '.join(cmd)}")
# Run AFL++ with timeout
max_total_time = config.get("max_total_time", 3600)
try:
process = await asyncio.create_subprocess_exec(
*cmd,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
cwd=workspace,
env=env
)
# Wait for specified time then terminate
try:
stdout, stderr = await asyncio.wait_for(
process.communicate(), timeout=max_total_time
)
except asyncio.TimeoutError:
logger.info(f"AFL++ fuzzing timed out after {max_total_time} seconds")
process.terminate()
try:
await asyncio.wait_for(process.wait(), timeout=10)
except asyncio.TimeoutError:
process.kill()
await process.wait()
# Parse results from output directory
findings = self._parse_afl_results(output_dir, workspace)
except Exception as e:
logger.warning(f"Error running AFL++ process: {e}")
except Exception as e:
logger.warning(f"Error in single fuzzing: {e}")
return findings
async def _run_parallel_fuzzing(self, target_binary: Path, input_dir: Path, output_dir: Path, config: Dict[str, Any], workspace: Path) -> List[ModuleFinding]:
"""Run parallel AFL++ fuzzing"""
findings = []
try:
fuzzer_instances = config.get("fuzzer_instances", 2)
master_name = config.get("master_instance", "master")
slave_prefix = config.get("slave_prefix", "slave")
processes = []
# Start master instance
master_cmd = await self._build_afl_command(
target_binary, input_dir, output_dir, config, workspace,
instance_name=master_name, is_master=True
)
master_process = await asyncio.create_subprocess_exec(
*master_cmd,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
cwd=workspace,
env=self._get_afl_env(config)
)
processes.append(master_process)
# Start slave instances
for i in range(1, fuzzer_instances):
slave_name = f"{slave_prefix}{i:02d}"
slave_cmd = await self._build_afl_command(
target_binary, input_dir, output_dir, config, workspace,
instance_name=slave_name, is_master=False
)
slave_process = await asyncio.create_subprocess_exec(
*slave_cmd,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
cwd=workspace,
env=self._get_afl_env(config)
)
processes.append(slave_process)
# Wait for specified time then terminate all
max_total_time = config.get("max_total_time", 3600)
try:
await asyncio.sleep(max_total_time)
finally:
# Terminate all processes
for process in processes:
if process.returncode is None:
process.terminate()
try:
await asyncio.wait_for(process.wait(), timeout=10)
except asyncio.TimeoutError:
process.kill()
await process.wait()
# Parse results from output directory
findings = self._parse_afl_results(output_dir, workspace)
except Exception as e:
logger.warning(f"Error in parallel fuzzing: {e}")
return findings
async def _build_afl_command(self, target_binary: Path, input_dir: Path, output_dir: Path, config: Dict[str, Any], workspace: Path, instance_name: str, is_master: bool) -> List[str]:
"""Build AFL++ command for a fuzzer instance"""
cmd = ["afl-fuzz"]
# Add input and output directories
cmd.extend(["-i", str(input_dir)])
cmd.extend(["-o", str(output_dir)])
# Add instance name
if is_master:
cmd.extend(["-M", instance_name])
else:
cmd.extend(["-S", instance_name])
# Add other options (same as single fuzzing)
dictionary = config.get("dictionary")
if dictionary:
dict_path = workspace / dictionary
if dict_path.exists():
cmd.extend(["-x", str(dict_path)])
cmd.extend(["-t", str(config.get("timeout", 1000))])
cmd.extend(["-m", str(config.get("memory_limit", 50))])
cmd.extend(["-p", config.get("power_schedule", "fast")])
if config.get("skip_deterministic", False):
cmd.append("-d")
if config.get("no_arith", False):
cmd.append("-a")
# Add target
cmd.append("--")
cmd.append(str(target_binary))
cmd.extend(config.get("target_args", []))
return cmd
def _get_afl_env(self, config: Dict[str, Any]) -> Dict[str, str]:
"""Get environment variables for AFL++"""
env = os.environ.copy()
env.update(config.get("env_vars", {}))
env["AFL_I_DONT_CARE_ABOUT_MISSING_CRASHES"] = "1"
env["AFL_SKIP_CPUFREQ"] = "1"
return env
def _parse_afl_results(self, output_dir: Path, workspace: Path) -> List[ModuleFinding]:
"""Parse AFL++ results from output directory"""
findings = []
try:
# Look for crashes directory
crashes_dirs = []
# Single instance
crashes_dir = output_dir / "crashes"
if crashes_dir.exists():
crashes_dirs.append(crashes_dir)
# Multiple instances
for instance_dir in output_dir.iterdir():
if instance_dir.is_dir():
instance_crashes = instance_dir / "crashes"
if instance_crashes.exists():
crashes_dirs.append(instance_crashes)
# Process crash files
for crashes_dir in crashes_dirs:
crash_files = [f for f in crashes_dir.iterdir() if f.is_file() and f.name.startswith("id:")]
for crash_file in crash_files:
finding = self._create_afl_crash_finding(crash_file, workspace)
if finding:
findings.append(finding)
except Exception as e:
logger.warning(f"Error parsing AFL++ results: {e}")
return findings
def _create_afl_crash_finding(self, crash_file: Path, workspace: Path) -> ModuleFinding:
"""Create finding from AFL++ crash file"""
try:
# Parse crash filename for information
filename = crash_file.name
crash_info = self._parse_afl_filename(filename)
# Try to read crash file (limited size)
crash_content = ""
try:
crash_data = crash_file.read_bytes()[:1000]
crash_content = crash_data.hex()[:200] # Hex representation, limited
except Exception:
pass
# Determine severity based on signal
severity = self._get_crash_severity(crash_info.get("signal", ""))
# Create relative path
try:
rel_path = crash_file.relative_to(workspace)
file_path = str(rel_path)
except ValueError:
file_path = str(crash_file)
finding = self.create_finding(
title=f"AFL++ Crash: {crash_info.get('signal', 'Unknown')}",
description=f"AFL++ discovered a crash with signal {crash_info.get('signal', 'unknown')} in the target program",
severity=severity,
category=self._get_crash_category(crash_info.get("signal", "")),
file_path=file_path,
recommendation=self._get_afl_crash_recommendation(crash_info.get("signal", "")),
metadata={
"crash_id": crash_info.get("id", ""),
"signal": crash_info.get("signal", ""),
"src": crash_info.get("src", ""),
"crash_file": crash_file.name,
"crash_content_hex": crash_content,
"fuzzer": "afl++"
}
)
return finding
except Exception as e:
logger.warning(f"Error creating AFL++ crash finding: {e}")
return None
def _parse_afl_filename(self, filename: str) -> Dict[str, str]:
"""Parse AFL++ crash filename for information"""
info = {}
try:
# AFL++ crash filename format: id:XXXXXX,sig:XX,src:XXXXXX,op:XXX,rep:X
parts = filename.split(',')
for part in parts:
if ':' in part:
key, value = part.split(':', 1)
info[key] = value
except Exception:
pass
return info
def _get_crash_severity(self, signal: str) -> str:
"""Determine severity based on crash signal"""
if not signal:
return "medium"
signal_lower = signal.lower()
# Critical signals indicating memory corruption
if signal in ["11", "sigsegv", "segv"]: # Segmentation fault
return "critical"
elif signal in ["6", "sigabrt", "abrt"]: # Abort
return "high"
elif signal in ["4", "sigill", "ill"]: # Illegal instruction
return "high"
elif signal in ["8", "sigfpe", "fpe"]: # Floating point exception
return "medium"
elif signal in ["9", "sigkill", "kill"]: # Kill signal
return "medium"
else:
return "medium"
def _get_crash_category(self, signal: str) -> str:
"""Determine category based on crash signal"""
if not signal:
return "program_crash"
if signal in ["11", "sigsegv", "segv"]:
return "memory_corruption"
elif signal in ["6", "sigabrt", "abrt"]:
return "assertion_failure"
elif signal in ["4", "sigill", "ill"]:
return "illegal_instruction"
elif signal in ["8", "sigfpe", "fpe"]:
return "arithmetic_error"
else:
return "program_crash"
def _get_afl_crash_recommendation(self, signal: str) -> str:
"""Generate recommendation based on crash signal"""
if signal in ["11", "sigsegv", "segv"]:
return "Segmentation fault detected. Investigate memory access patterns, check for buffer overflows, null pointer dereferences, or use-after-free bugs."
elif signal in ["6", "sigabrt", "abrt"]:
return "Program abort detected. Check for assertion failures, memory allocation errors, or explicit abort() calls in the code."
elif signal in ["4", "sigill", "ill"]:
return "Illegal instruction detected. Check for code corruption, invalid function pointers, or architecture-specific instruction issues."
elif signal in ["8", "sigfpe", "fpe"]:
return "Floating point exception detected. Check for division by zero, arithmetic overflow, or invalid floating point operations."
else:
return f"Program crash with signal {signal} detected. Analyze the crash dump and input to identify the root cause."
def _create_summary(self, findings: List[ModuleFinding], output_dir: Path) -> Dict[str, Any]:
"""Create analysis summary"""
severity_counts = {"critical": 0, "high": 0, "medium": 0, "low": 0, "info": 0}
category_counts = {}
signal_counts = {}
for finding in findings:
# Count by severity
severity_counts[finding.severity] += 1
# Count by category
category = finding.category
category_counts[category] = category_counts.get(category, 0) + 1
# Count by signal
signal = finding.metadata.get("signal", "unknown")
signal_counts[signal] = signal_counts.get(signal, 0) + 1
# Try to read AFL++ statistics
stats = self._read_afl_stats(output_dir)
return {
"total_findings": len(findings),
"severity_counts": severity_counts,
"category_counts": category_counts,
"signal_counts": signal_counts,
"unique_crashes": len(set(f.metadata.get("crash_id", "") for f in findings)),
"afl_stats": stats
}
def _read_afl_stats(self, output_dir: Path) -> Dict[str, Any]:
"""Read AFL++ fuzzer statistics"""
stats = {}
try:
# Look for fuzzer_stats file in single or multiple instance setup
stats_files = []
# Single instance
single_stats = output_dir / "fuzzer_stats"
if single_stats.exists():
stats_files.append(single_stats)
# Multiple instances
for instance_dir in output_dir.iterdir():
if instance_dir.is_dir():
instance_stats = instance_dir / "fuzzer_stats"
if instance_stats.exists():
stats_files.append(instance_stats)
# Read first stats file found
if stats_files:
with open(stats_files[0], 'r') as f:
for line in f:
if ':' in line:
key, value = line.strip().split(':', 1)
stats[key.strip()] = value.strip()
except Exception as e:
logger.warning(f"Error reading AFL++ stats: {e}")
return stats

View File

@@ -1,678 +0,0 @@
"""
AFL-RS Fuzzing Module
This module uses AFL-RS (AFL in Rust) for high-performance coverage-guided fuzzing
with modern Rust implementations and optimizations.
"""
# Copyright (c) 2025 FuzzingLabs
#
# Licensed under the Business Source License 1.1 (BSL). See the LICENSE file
# at the root of this repository for details.
#
# After the Change Date (four years from publication), this version of the
# Licensed Work will be made available under the Apache License, Version 2.0.
# See the LICENSE-APACHE file or http://www.apache.org/licenses/LICENSE-2.0
#
# Additional attribution and requirements are provided in the NOTICE file.
import asyncio
import json
import os
from pathlib import Path
from typing import Dict, Any, List
import subprocess
import logging
import re
from ..base import BaseModule, ModuleMetadata, ModuleFinding, ModuleResult
from . import register_module
logger = logging.getLogger(__name__)
@register_module
class AFLRSModule(BaseModule):
"""AFL-RS Rust-based fuzzing module"""
def get_metadata(self) -> ModuleMetadata:
"""Get module metadata"""
return ModuleMetadata(
name="aflrs",
version="0.2.0",
description="High-performance AFL implementation in Rust with modern fuzzing features",
author="FuzzForge Team",
category="fuzzing",
tags=["coverage-guided", "rust", "afl", "high-performance", "modern"],
input_schema={
"type": "object",
"properties": {
"target_binary": {
"type": "string",
"description": "Path to the target binary (compiled with AFL-RS instrumentation)"
},
"input_dir": {
"type": "string",
"description": "Directory containing seed input files"
},
"output_dir": {
"type": "string",
"default": "aflrs_output",
"description": "Output directory for AFL-RS results"
},
"dictionary": {
"type": "string",
"description": "Dictionary file for token-based mutations"
},
"timeout": {
"type": "integer",
"default": 1000,
"description": "Timeout for each execution (ms)"
},
"memory_limit": {
"type": "integer",
"default": 50,
"description": "Memory limit for target process (MB)"
},
"max_total_time": {
"type": "integer",
"default": 3600,
"description": "Maximum total fuzzing time (seconds)"
},
"cpu_cores": {
"type": "integer",
"default": 1,
"description": "Number of CPU cores to use"
},
"mutation_depth": {
"type": "integer",
"default": 4,
"description": "Maximum depth for cascaded mutations"
},
"skip_deterministic": {
"type": "boolean",
"default": false,
"description": "Skip deterministic mutations"
},
"power_schedule": {
"type": "string",
"enum": ["explore", "fast", "coe", "lin", "quad", "exploit", "rare", "mmopt", "seek"],
"default": "fast",
"description": "Power scheduling algorithm"
},
"custom_mutators": {
"type": "array",
"items": {"type": "string"},
"description": "Custom mutator libraries to load"
},
"cmplog": {
"type": "boolean",
"default": true,
"description": "Enable CmpLog for comparison logging"
},
"redqueen": {
"type": "boolean",
"default": true,
"description": "Enable RedQueen input-to-state correspondence"
},
"unicorn_mode": {
"type": "boolean",
"default": false,
"description": "Enable Unicorn mode for emulation"
},
"persistent_mode": {
"type": "boolean",
"default": false,
"description": "Enable persistent mode for faster execution"
},
"target_args": {
"type": "array",
"items": {"type": "string"},
"description": "Arguments to pass to target binary"
},
"env_vars": {
"type": "object",
"description": "Environment variables to set"
},
"ignore_timeouts": {
"type": "boolean",
"default": false,
"description": "Ignore timeout signals and continue fuzzing"
},
"ignore_crashes": {
"type": "boolean",
"default": false,
"description": "Ignore crashes and continue fuzzing"
},
"sync_dir": {
"type": "string",
"description": "Directory for syncing with other AFL instances"
},
"sync_id": {
"type": "string",
"description": "Fuzzer ID for syncing"
}
}
},
output_schema={
"type": "object",
"properties": {
"findings": {
"type": "array",
"items": {
"type": "object",
"properties": {
"crash_id": {"type": "string"},
"crash_file": {"type": "string"},
"signal": {"type": "string"},
"execution_time": {"type": "integer"}
}
}
}
}
}
)
def validate_config(self, config: Dict[str, Any]) -> bool:
"""Validate configuration"""
target_binary = config.get("target_binary")
if not target_binary:
raise ValueError("target_binary is required for AFL-RS")
input_dir = config.get("input_dir")
if not input_dir:
raise ValueError("input_dir is required for AFL-RS")
cpu_cores = config.get("cpu_cores", 1)
if cpu_cores < 1:
raise ValueError("cpu_cores must be at least 1")
return True
async def execute(self, config: Dict[str, Any], workspace: Path) -> ModuleResult:
"""Execute AFL-RS fuzzing"""
self.start_timer()
try:
# Validate inputs
self.validate_config(config)
self.validate_workspace(workspace)
logger.info("Running AFL-RS fuzzing campaign")
# Check AFL-RS installation
await self._check_aflrs_installation()
# Setup directories and files
target_binary, input_dir, output_dir = self._setup_aflrs_directories(config, workspace)
# Run AFL-RS fuzzing
findings = await self._run_aflrs_fuzzing(target_binary, input_dir, output_dir, config, workspace)
# Create summary
summary = self._create_summary(findings, output_dir)
logger.info(f"AFL-RS found {len(findings)} crashes")
return self.create_result(
findings=findings,
status="success",
summary=summary
)
except Exception as e:
logger.error(f"AFL-RS module failed: {e}")
return self.create_result(
findings=[],
status="failed",
error=str(e)
)
async def _check_aflrs_installation(self):
"""Check if AFL-RS is installed and available"""
try:
# Check if aflrs is available (assuming aflrs binary)
process = await asyncio.create_subprocess_exec(
"which", "aflrs",
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE
)
stdout, stderr = await process.communicate()
if process.returncode != 0:
# Try alternative AFL-RS command names
alt_commands = ["afl-fuzz-rs", "afl-rs", "cargo-afl"]
found = False
for cmd in alt_commands:
process = await asyncio.create_subprocess_exec(
"which", cmd,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE
)
stdout, stderr = await process.communicate()
if process.returncode == 0:
found = True
break
if not found:
raise RuntimeError("AFL-RS not found. Please install AFL-RS or ensure it's in PATH")
except Exception as e:
logger.warning(f"AFL-RS installation check failed: {e}")
def _setup_aflrs_directories(self, config: Dict[str, Any], workspace: Path):
"""Setup AFL-RS directories and validate files"""
# Check target binary
target_binary = workspace / config["target_binary"]
if not target_binary.exists():
raise FileNotFoundError(f"Target binary not found: {target_binary}")
# Check input directory
input_dir = workspace / config["input_dir"]
if not input_dir.exists():
raise FileNotFoundError(f"Input directory not found: {input_dir}")
# Validate input files exist
input_files = list(input_dir.glob("*"))
if not input_files:
raise ValueError(f"Input directory is empty: {input_dir}")
# Create output directory
output_dir = workspace / config.get("output_dir", "aflrs_output")
output_dir.mkdir(exist_ok=True)
return target_binary, input_dir, output_dir
async def _run_aflrs_fuzzing(self, target_binary: Path, input_dir: Path, output_dir: Path, config: Dict[str, Any], workspace: Path) -> List[ModuleFinding]:
"""Run AFL-RS fuzzing"""
findings = []
try:
# Build AFL-RS command
cmd = await self._build_aflrs_command(target_binary, input_dir, output_dir, config, workspace)
# Set up environment
env = self._setup_aflrs_environment(config)
logger.debug(f"Running command: {' '.join(cmd)}")
# Run AFL-RS with timeout
max_total_time = config.get("max_total_time", 3600)
try:
process = await asyncio.create_subprocess_exec(
*cmd,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
cwd=workspace,
env=env
)
# Wait for specified time then terminate
try:
stdout, stderr = await asyncio.wait_for(
process.communicate(), timeout=max_total_time
)
logger.info(f"AFL-RS completed after {max_total_time} seconds")
except asyncio.TimeoutError:
logger.info(f"AFL-RS fuzzing timed out after {max_total_time} seconds, terminating")
process.terminate()
try:
await asyncio.wait_for(process.wait(), timeout=10)
except asyncio.TimeoutError:
process.kill()
await process.wait()
# Parse results
findings = self._parse_aflrs_results(output_dir, workspace)
except Exception as e:
logger.warning(f"Error running AFL-RS process: {e}")
except Exception as e:
logger.warning(f"Error in AFL-RS fuzzing: {e}")
return findings
async def _build_aflrs_command(self, target_binary: Path, input_dir: Path, output_dir: Path, config: Dict[str, Any], workspace: Path) -> List[str]:
"""Build AFL-RS command"""
# Try to determine the correct AFL-RS command
aflrs_cmd = "aflrs" # Default
# Try alternative command names
alt_commands = ["aflrs", "afl-fuzz-rs", "afl-rs"]
for cmd in alt_commands:
try:
process = await asyncio.create_subprocess_exec(
"which", cmd,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE
)
stdout, stderr = await process.communicate()
if process.returncode == 0:
aflrs_cmd = cmd
break
except Exception:
continue
cmd = [aflrs_cmd]
# Add input and output directories
cmd.extend(["-i", str(input_dir)])
cmd.extend(["-o", str(output_dir)])
# Add dictionary if specified
dictionary = config.get("dictionary")
if dictionary:
dict_path = workspace / dictionary
if dict_path.exists():
cmd.extend(["-x", str(dict_path)])
# Add timeout and memory limit
cmd.extend(["-t", str(config.get("timeout", 1000))])
cmd.extend(["-m", str(config.get("memory_limit", 50))])
# Add CPU cores
cpu_cores = config.get("cpu_cores", 1)
if cpu_cores > 1:
cmd.extend(["-j", str(cpu_cores)])
# Add mutation depth
mutation_depth = config.get("mutation_depth", 4)
cmd.extend(["-d", str(mutation_depth)])
# Add power schedule
power_schedule = config.get("power_schedule", "fast")
cmd.extend(["-p", power_schedule])
# Add skip deterministic
if config.get("skip_deterministic", False):
cmd.append("-D")
# Add custom mutators
custom_mutators = config.get("custom_mutators", [])
for mutator in custom_mutators:
cmd.extend(["-c", mutator])
# Add advanced features
if config.get("cmplog", True):
cmd.append("-l")
if config.get("redqueen", True):
cmd.append("-I")
if config.get("unicorn_mode", False):
cmd.append("-U")
if config.get("persistent_mode", False):
cmd.append("-P")
# Add ignore options
if config.get("ignore_timeouts", False):
cmd.append("-T")
if config.get("ignore_crashes", False):
cmd.append("-C")
# Add sync options
sync_dir = config.get("sync_dir")
if sync_dir:
cmd.extend(["-F", sync_dir])
sync_id = config.get("sync_id")
if sync_id:
cmd.extend(["-S", sync_id])
# Add target binary and arguments
cmd.append("--")
cmd.append(str(target_binary))
target_args = config.get("target_args", [])
cmd.extend(target_args)
return cmd
def _setup_aflrs_environment(self, config: Dict[str, Any]) -> Dict[str, str]:
"""Setup environment variables for AFL-RS"""
env = os.environ.copy()
# Add user-specified environment variables
env_vars = config.get("env_vars", {})
env.update(env_vars)
# Set AFL-RS specific environment variables
env["AFL_I_DONT_CARE_ABOUT_MISSING_CRASHES"] = "1"
env["AFL_SKIP_CPUFREQ"] = "1"
# Enable advanced features if requested
if config.get("cmplog", True):
env["AFL_USE_CMPLOG"] = "1"
if config.get("redqueen", True):
env["AFL_USE_REDQUEEN"] = "1"
return env
def _parse_aflrs_results(self, output_dir: Path, workspace: Path) -> List[ModuleFinding]:
"""Parse AFL-RS results from output directory"""
findings = []
try:
# Look for crashes directory
crashes_dir = output_dir / "crashes"
if not crashes_dir.exists():
logger.info("No crashes directory found in AFL-RS output")
return findings
# Process crash files
crash_files = [f for f in crashes_dir.iterdir() if f.is_file() and not f.name.startswith(".")]
for crash_file in crash_files:
finding = self._create_aflrs_crash_finding(crash_file, workspace)
if finding:
findings.append(finding)
except Exception as e:
logger.warning(f"Error parsing AFL-RS results: {e}")
return findings
def _create_aflrs_crash_finding(self, crash_file: Path, workspace: Path) -> ModuleFinding:
"""Create finding from AFL-RS crash file"""
try:
# Parse crash filename
filename = crash_file.name
crash_info = self._parse_aflrs_filename(filename)
# Try to read crash file (limited size)
crash_content = ""
crash_size = 0
try:
crash_data = crash_file.read_bytes()
crash_size = len(crash_data)
# Store first 500 bytes as hex
crash_content = crash_data[:500].hex()
except Exception:
pass
# Determine severity based on signal or crash type
signal = crash_info.get("signal", "")
severity = self._get_crash_severity(signal)
# Create relative path
try:
rel_path = crash_file.relative_to(workspace)
file_path = str(rel_path)
except ValueError:
file_path = str(crash_file)
finding = self.create_finding(
title=f"AFL-RS Crash: {signal or 'Unknown Signal'}",
description=f"AFL-RS discovered a crash in the target program{' with signal ' + signal if signal else ''}",
severity=severity,
category=self._get_crash_category(signal),
file_path=file_path,
recommendation=self._get_crash_recommendation(signal),
metadata={
"crash_id": crash_info.get("id", ""),
"signal": signal,
"execution_time": crash_info.get("time", ""),
"crash_file": crash_file.name,
"crash_size": crash_size,
"crash_content_hex": crash_content,
"fuzzer": "aflrs"
}
)
return finding
except Exception as e:
logger.warning(f"Error creating AFL-RS crash finding: {e}")
return None
def _parse_aflrs_filename(self, filename: str) -> Dict[str, str]:
"""Parse AFL-RS crash filename for information"""
info = {}
try:
# AFL-RS may use similar format to AFL++
# Example: id_000000_sig_11_src_000000_time_12345_op_havoc_rep_128
parts = filename.replace("id:", "id_").replace("sig:", "sig_").replace("src:", "src_").replace("time:", "time_").replace("op:", "op_").replace("rep:", "rep_").split("_")
i = 0
while i < len(parts) - 1:
if parts[i] in ["id", "sig", "src", "time", "op", "rep"]:
info[parts[i]] = parts[i + 1]
i += 2
else:
i += 1
except Exception:
# Fallback: try to extract signal from filename
signal_match = re.search(r'sig[_:]?(\d+)', filename)
if signal_match:
info["signal"] = signal_match.group(1)
return info
def _get_crash_severity(self, signal: str) -> str:
"""Determine crash severity based on signal"""
if not signal:
return "medium"
try:
sig_num = int(signal)
except ValueError:
return "medium"
# Map common signals to severity
if sig_num == 11: # SIGSEGV
return "critical"
elif sig_num == 6: # SIGABRT
return "high"
elif sig_num == 4: # SIGILL
return "high"
elif sig_num == 8: # SIGFPE
return "medium"
elif sig_num == 9: # SIGKILL
return "medium"
else:
return "medium"
def _get_crash_category(self, signal: str) -> str:
"""Determine crash category based on signal"""
if not signal:
return "program_crash"
try:
sig_num = int(signal)
except ValueError:
return "program_crash"
if sig_num == 11: # SIGSEGV
return "memory_corruption"
elif sig_num == 6: # SIGABRT
return "assertion_failure"
elif sig_num == 4: # SIGILL
return "illegal_instruction"
elif sig_num == 8: # SIGFPE
return "arithmetic_error"
else:
return "program_crash"
def _get_crash_recommendation(self, signal: str) -> str:
"""Generate recommendation based on crash signal"""
if not signal:
return "Analyze the crash input to reproduce and debug the issue."
try:
sig_num = int(signal)
except ValueError:
return "Analyze the crash input to reproduce and debug the issue."
if sig_num == 11: # SIGSEGV
return "Segmentation fault detected. Check for buffer overflows, null pointer dereferences, use-after-free, or invalid memory access patterns."
elif sig_num == 6: # SIGABRT
return "Program abort detected. Check for assertion failures, memory corruption detected by allocator, or explicit abort calls."
elif sig_num == 4: # SIGILL
return "Illegal instruction detected. Check for code corruption, invalid function pointers, or architecture-specific issues."
elif sig_num == 8: # SIGFPE
return "Floating point exception detected. Check for division by zero, arithmetic overflow, or invalid floating point operations."
else:
return f"Program terminated with signal {signal}. Analyze the crash input and use debugging tools to identify the root cause."
def _create_summary(self, findings: List[ModuleFinding], output_dir: Path) -> Dict[str, Any]:
"""Create analysis summary"""
severity_counts = {"critical": 0, "high": 0, "medium": 0, "low": 0, "info": 0}
category_counts = {}
signal_counts = {}
for finding in findings:
# Count by severity
severity_counts[finding.severity] += 1
# Count by category
category = finding.category
category_counts[category] = category_counts.get(category, 0) + 1
# Count by signal
signal = finding.metadata.get("signal", "unknown")
signal_counts[signal] = signal_counts.get(signal, 0) + 1
# Try to read AFL-RS statistics
stats = self._read_aflrs_stats(output_dir)
return {
"total_findings": len(findings),
"severity_counts": severity_counts,
"category_counts": category_counts,
"signal_counts": signal_counts,
"unique_crashes": len(set(f.metadata.get("crash_id", "") for f in findings)),
"aflrs_stats": stats
}
def _read_aflrs_stats(self, output_dir: Path) -> Dict[str, Any]:
"""Read AFL-RS fuzzer statistics"""
stats = {}
try:
# Look for AFL-RS stats file
stats_file = output_dir / "fuzzer_stats"
if stats_file.exists():
with open(stats_file, 'r') as f:
for line in f:
if ':' in line:
key, value = line.strip().split(':', 1)
stats[key.strip()] = value.strip()
# Also look for AFL-RS specific files
plot_data = output_dir / "plot_data"
if plot_data.exists():
stats["plot_data_available"] = True
except Exception as e:
logger.warning(f"Error reading AFL-RS stats: {e}")
return stats

View File

@@ -1,774 +0,0 @@
"""
Atheris Fuzzing Module
This module uses Atheris for fuzzing Python code to find bugs and security
vulnerabilities in Python applications and libraries.
"""
# Copyright (c) 2025 FuzzingLabs
#
# Licensed under the Business Source License 1.1 (BSL). See the LICENSE file
# at the root of this repository for details.
#
# After the Change Date (four years from publication), this version of the
# Licensed Work will be made available under the Apache License, Version 2.0.
# See the LICENSE-APACHE file or http://www.apache.org/licenses/LICENSE-2.0
#
# Additional attribution and requirements are provided in the NOTICE file.
import asyncio
import json
import os
import sys
from pathlib import Path
from typing import Dict, Any, List
import subprocess
import logging
import traceback
from ..base import BaseModule, ModuleMetadata, ModuleFinding, ModuleResult
from . import register_module
logger = logging.getLogger(__name__)
@register_module
class AtherisModule(BaseModule):
"""Atheris Python fuzzing module"""
def get_metadata(self) -> ModuleMetadata:
"""Get module metadata"""
return ModuleMetadata(
name="atheris",
version="2.3.0",
description="Coverage-guided Python fuzzing engine for finding bugs in Python code",
author="FuzzForge Team",
category="fuzzing",
tags=["python", "coverage-guided", "native", "sanitizers", "libfuzzer"],
input_schema={
"type": "object",
"properties": {
"target_script": {
"type": "string",
"description": "Path to the Python script containing the fuzz target function"
},
"target_function": {
"type": "string",
"default": "TestOneInput",
"description": "Name of the target function to fuzz"
},
"corpus_dir": {
"type": "string",
"description": "Directory containing initial corpus files"
},
"dict_file": {
"type": "string",
"description": "Dictionary file for fuzzing keywords"
},
"max_total_time": {
"type": "integer",
"default": 600,
"description": "Maximum total time to run fuzzing (seconds)"
},
"max_len": {
"type": "integer",
"default": 4096,
"description": "Maximum length of test input"
},
"timeout": {
"type": "integer",
"default": 25,
"description": "Timeout for individual test cases (seconds)"
},
"runs": {
"type": "integer",
"default": -1,
"description": "Number of individual test runs (-1 for unlimited)"
},
"jobs": {
"type": "integer",
"default": 1,
"description": "Number of fuzzing jobs to run in parallel"
},
"print_final_stats": {
"type": "boolean",
"default": true,
"description": "Print final statistics"
},
"print_pcs": {
"type": "boolean",
"default": false,
"description": "Print newly covered PCs"
},
"print_coverage": {
"type": "boolean",
"default": true,
"description": "Print coverage information"
},
"artifact_prefix": {
"type": "string",
"default": "crash-",
"description": "Prefix for artifact files"
},
"seed": {
"type": "integer",
"description": "Random seed for reproducibility"
},
"python_path": {
"type": "array",
"items": {"type": "string"},
"description": "Additional Python paths to add to sys.path"
},
"enable_sanitizers": {
"type": "boolean",
"default": true,
"description": "Enable Python-specific sanitizers and checks"
},
"detect_leaks": {
"type": "boolean",
"default": true,
"description": "Detect memory leaks in native extensions"
},
"detect_stack_use_after_return": {
"type": "boolean",
"default": false,
"description": "Detect stack use-after-return"
},
"setup_code": {
"type": "string",
"description": "Python code to execute before fuzzing starts"
},
"enable_value_profile": {
"type": "boolean",
"default": false,
"description": "Enable value profiling for better mutation"
},
"shrink": {
"type": "boolean",
"default": true,
"description": "Try to shrink the corpus"
},
"only_ascii": {
"type": "boolean",
"default": false,
"description": "Only generate ASCII inputs"
}
}
},
output_schema={
"type": "object",
"properties": {
"findings": {
"type": "array",
"items": {
"type": "object",
"properties": {
"exception_type": {"type": "string"},
"exception_message": {"type": "string"},
"stack_trace": {"type": "string"},
"crash_input": {"type": "string"}
}
}
}
}
}
)
def validate_config(self, config: Dict[str, Any]) -> bool:
"""Validate configuration"""
target_script = config.get("target_script")
if not target_script:
raise ValueError("target_script is required for Atheris")
max_total_time = config.get("max_total_time", 600)
if max_total_time <= 0:
raise ValueError("max_total_time must be positive")
return True
async def execute(self, config: Dict[str, Any], workspace: Path) -> ModuleResult:
"""Execute Atheris Python fuzzing"""
self.start_timer()
try:
# Validate inputs
self.validate_config(config)
self.validate_workspace(workspace)
logger.info("Running Atheris Python fuzzing")
# Check Atheris installation
await self._check_atheris_installation()
# Validate target script
target_script = workspace / config["target_script"]
if not target_script.exists():
raise FileNotFoundError(f"Target script not found: {target_script}")
# Run Atheris fuzzing
findings = await self._run_atheris_fuzzing(target_script, config, workspace)
# Create summary
summary = self._create_summary(findings)
logger.info(f"Atheris found {len(findings)} issues")
return self.create_result(
findings=findings,
status="success",
summary=summary
)
except Exception as e:
logger.error(f"Atheris module failed: {e}")
return self.create_result(
findings=[],
status="failed",
error=str(e)
)
async def _check_atheris_installation(self):
"""Check if Atheris is installed"""
try:
process = await asyncio.create_subprocess_exec(
sys.executable, "-c", "import atheris; print(atheris.__version__)",
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE
)
stdout, stderr = await process.communicate()
if process.returncode != 0:
raise RuntimeError("Atheris not installed. Install with: pip install atheris")
version = stdout.decode().strip()
logger.info(f"Using Atheris version: {version}")
except Exception as e:
raise RuntimeError(f"Atheris installation check failed: {e}")
async def _run_atheris_fuzzing(self, target_script: Path, config: Dict[str, Any], workspace: Path) -> List[ModuleFinding]:
"""Run Atheris fuzzing"""
findings = []
try:
# Create output directory for artifacts
output_dir = workspace / "atheris_output"
output_dir.mkdir(exist_ok=True)
# Create wrapper script for fuzzing
wrapper_script = await self._create_atheris_wrapper(target_script, config, workspace, output_dir)
# Build Atheris command
cmd = [sys.executable, str(wrapper_script)]
# Add corpus directory
corpus_dir = config.get("corpus_dir")
if corpus_dir:
corpus_path = workspace / corpus_dir
if corpus_path.exists():
cmd.append(str(corpus_path))
# Set up environment
env = self._setup_atheris_environment(config)
logger.debug(f"Running command: {' '.join(cmd)}")
# Run Atheris with timeout
max_total_time = config.get("max_total_time", 600)
try:
process = await asyncio.create_subprocess_exec(
*cmd,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
cwd=workspace,
env=env
)
# Wait for specified time then terminate
try:
stdout, stderr = await asyncio.wait_for(
process.communicate(), timeout=max_total_time
)
except asyncio.TimeoutError:
logger.info(f"Atheris fuzzing timed out after {max_total_time} seconds")
process.terminate()
try:
await asyncio.wait_for(process.wait(), timeout=10)
except asyncio.TimeoutError:
process.kill()
await process.wait()
# Parse results
findings = self._parse_atheris_output(
stdout.decode(), stderr.decode(), output_dir, workspace
)
# Look for crash files
crash_findings = self._parse_crash_files(output_dir, workspace)
findings.extend(crash_findings)
except Exception as e:
logger.warning(f"Error running Atheris process: {e}")
except Exception as e:
logger.warning(f"Error in Atheris fuzzing: {e}")
return findings
async def _create_atheris_wrapper(self, target_script: Path, config: Dict[str, Any], workspace: Path, output_dir: Path) -> Path:
"""Create wrapper script for Atheris fuzzing"""
wrapper_path = workspace / "atheris_wrapper.py"
wrapper_code = f'''#!/usr/bin/env python3
import sys
import os
import atheris
import traceback
# Add Python paths
python_paths = {config.get("python_path", [])}
for path in python_paths:
if path not in sys.path:
sys.path.insert(0, path)
# Add workspace to Python path
sys.path.insert(0, r"{workspace}")
# Setup code
setup_code = """{config.get("setup_code", "")}"""
if setup_code:
exec(setup_code)
# Import target script
target_module_name = "{target_script.stem}"
sys.path.insert(0, r"{target_script.parent}")
try:
target_module = __import__(target_module_name)
target_function = getattr(target_module, "{config.get("target_function", "TestOneInput")}")
except Exception as e:
print(f"Failed to import target: {{e}}")
sys.exit(1)
# Wrapper function to catch exceptions
original_target = target_function
def wrapped_target(data):
try:
return original_target(data)
except Exception as e:
# Write crash information
crash_info = {{
"exception_type": type(e).__name__,
"exception_message": str(e),
"stack_trace": traceback.format_exc(),
"input_data": data[:1000].hex() if isinstance(data, bytes) else str(data)[:1000]
}}
crash_file = r"{output_dir}" + "/crash_" + type(e).__name__ + ".txt"
with open(crash_file, "a") as f:
f.write(f"Exception: {{type(e).__name__}}\\n")
f.write(f"Message: {{str(e)}}\\n")
f.write(f"Stack trace:\\n{{traceback.format_exc()}}\\n")
f.write(f"Input data (first 1000 chars/bytes): {{crash_info['input_data']}}\\n")
f.write("-" * 80 + "\\n")
# Re-raise to let Atheris handle it
raise
if __name__ == "__main__":
# Configure Atheris
atheris.Setup(sys.argv, wrapped_target)
# Set Atheris options
options = []
options.append(f"-max_total_time={{config.get('max_total_time', 600)}}")
options.append(f"-max_len={{config.get('max_len', 4096)}}")
options.append(f"-timeout={{config.get('timeout', 25)}}")
options.append(f"-runs={{config.get('runs', -1)}}")
if {config.get('jobs', 1)} > 1:
options.append(f"-jobs={{config.get('jobs', 1)}}")
if {config.get('print_final_stats', True)}:
options.append("-print_final_stats=1")
else:
options.append("-print_final_stats=0")
if {config.get('print_pcs', False)}:
options.append("-print_pcs=1")
if {config.get('print_coverage', True)}:
options.append("-print_coverage=1")
artifact_prefix = "{config.get('artifact_prefix', 'crash-')}"
options.append(f"-artifact_prefix={{r'{output_dir}'}}/" + artifact_prefix)
seed = {config.get('seed')}
if seed is not None:
options.append(f"-seed={{seed}}")
if {config.get('enable_value_profile', False)}:
options.append("-use_value_profile=1")
if {config.get('shrink', True)}:
options.append("-shrink=1")
if {config.get('only_ascii', False)}:
options.append("-only_ascii=1")
dict_file = "{config.get('dict_file', '')}"
if dict_file:
dict_path = r"{workspace}" + "/" + dict_file
if os.path.exists(dict_path):
options.append(f"-dict={{dict_path}}")
# Add options to sys.argv
sys.argv.extend(options)
# Start fuzzing
atheris.Fuzz()
'''
with open(wrapper_path, 'w') as f:
f.write(wrapper_code)
return wrapper_path
def _setup_atheris_environment(self, config: Dict[str, Any]) -> Dict[str, str]:
"""Setup environment variables for Atheris"""
env = os.environ.copy()
# Enable sanitizers if requested
if config.get("enable_sanitizers", True):
env["ASAN_OPTIONS"] = env.get("ASAN_OPTIONS", "") + ":detect_leaks=1:halt_on_error=1"
if config.get("detect_leaks", True):
env["ASAN_OPTIONS"] = env.get("ASAN_OPTIONS", "") + ":detect_leaks=1"
if config.get("detect_stack_use_after_return", False):
env["ASAN_OPTIONS"] = env.get("ASAN_OPTIONS", "") + ":detect_stack_use_after_return=1"
return env
def _parse_atheris_output(self, stdout: str, stderr: str, output_dir: Path, workspace: Path) -> List[ModuleFinding]:
"""Parse Atheris output for crashes and issues"""
findings = []
try:
# Combine stdout and stderr
full_output = stdout + "\n" + stderr
# Look for Python exceptions in output
exception_patterns = [
r"Traceback \(most recent call last\):(.*?)(?=\n\w|\nDONE|\n=|\Z)",
r"Exception: (\w+).*?\nMessage: (.*?)\nStack trace:\n(.*?)(?=\n-{20,}|\Z)"
]
for pattern in exception_patterns:
import re
matches = re.findall(pattern, full_output, re.DOTALL | re.MULTILINE)
for match in matches:
finding = self._create_exception_finding(match, full_output, output_dir)
if finding:
findings.append(finding)
except Exception as e:
logger.warning(f"Error parsing Atheris output: {e}")
return findings
def _parse_crash_files(self, output_dir: Path, workspace: Path) -> List[ModuleFinding]:
"""Parse crash files created by wrapper"""
findings = []
try:
# Look for crash files
crash_files = list(output_dir.glob("crash_*.txt"))
for crash_file in crash_files:
findings.extend(self._parse_crash_file(crash_file, workspace))
# Also look for Atheris artifact files
artifact_files = list(output_dir.glob("crash-*"))
for artifact_file in artifact_files:
finding = self._create_artifact_finding(artifact_file, workspace)
if finding:
findings.append(finding)
except Exception as e:
logger.warning(f"Error parsing crash files: {e}")
return findings
def _parse_crash_file(self, crash_file: Path, workspace: Path) -> List[ModuleFinding]:
"""Parse individual crash file"""
findings = []
try:
content = crash_file.read_text()
# Split by separator
crash_entries = content.split("-" * 80)
for entry in crash_entries:
if not entry.strip():
continue
finding = self._parse_crash_entry(entry, crash_file, workspace)
if finding:
findings.append(finding)
except Exception as e:
logger.warning(f"Error parsing crash file {crash_file}: {e}")
return findings
def _parse_crash_entry(self, entry: str, crash_file: Path, workspace: Path) -> ModuleFinding:
"""Parse individual crash entry"""
try:
lines = entry.strip().split('\n')
exception_type = ""
exception_message = ""
stack_trace = ""
input_data = ""
current_section = None
stack_lines = []
for line in lines:
if line.startswith("Exception: "):
exception_type = line.replace("Exception: ", "")
elif line.startswith("Message: "):
exception_message = line.replace("Message: ", "")
elif line.startswith("Stack trace:"):
current_section = "stack"
elif line.startswith("Input data"):
current_section = "input"
input_data = line.split(":", 1)[1].strip() if ":" in line else ""
elif current_section == "stack":
stack_lines.append(line)
stack_trace = '\n'.join(stack_lines)
if not exception_type:
return None
# Determine severity based on exception type
severity = self._get_exception_severity(exception_type)
# Create relative path
try:
rel_path = crash_file.relative_to(workspace)
file_path = str(rel_path)
except ValueError:
file_path = str(crash_file)
finding = self.create_finding(
title=f"Atheris Exception: {exception_type}",
description=f"Atheris discovered a Python exception: {exception_type}{': ' + exception_message if exception_message else ''}",
severity=severity,
category=self._get_exception_category(exception_type),
file_path=file_path,
recommendation=self._get_exception_recommendation(exception_type, exception_message),
metadata={
"exception_type": exception_type,
"exception_message": exception_message,
"stack_trace": stack_trace[:2000] if stack_trace else "", # Limit size
"crash_input_preview": input_data[:500] if input_data else "",
"fuzzer": "atheris"
}
)
return finding
except Exception as e:
logger.warning(f"Error parsing crash entry: {e}")
return None
def _create_exception_finding(self, match, full_output: str, output_dir: Path) -> ModuleFinding:
"""Create finding from exception match"""
try:
if isinstance(match, tuple) and len(match) >= 1:
# Handle different match formats
if len(match) == 3: # Exception format
exception_type, exception_message, stack_trace = match
else:
stack_trace = match[0]
exception_type = "Unknown"
exception_message = ""
else:
stack_trace = str(match)
exception_type = "Unknown"
exception_message = ""
# Try to extract exception type from stack trace
if not exception_type or exception_type == "Unknown":
lines = stack_trace.split('\n')
for line in reversed(lines):
if ':' in line and any(exc in line for exc in ['Error', 'Exception', 'Warning']):
exception_type = line.split(':')[0].strip()
exception_message = line.split(':', 1)[1].strip() if ':' in line else ""
break
severity = self._get_exception_severity(exception_type)
finding = self.create_finding(
title=f"Atheris Exception: {exception_type}",
description=f"Atheris discovered a Python exception during fuzzing: {exception_type}",
severity=severity,
category=self._get_exception_category(exception_type),
file_path=None,
recommendation=self._get_exception_recommendation(exception_type, exception_message),
metadata={
"exception_type": exception_type,
"exception_message": exception_message,
"stack_trace": stack_trace[:2000] if stack_trace else "",
"fuzzer": "atheris"
}
)
return finding
except Exception as e:
logger.warning(f"Error creating exception finding: {e}")
return None
def _create_artifact_finding(self, artifact_file: Path, workspace: Path) -> ModuleFinding:
"""Create finding from Atheris artifact file"""
try:
# Try to read artifact content (limited)
artifact_content = ""
try:
content_bytes = artifact_file.read_bytes()[:1000]
artifact_content = content_bytes.hex()
except Exception:
pass
# Create relative path
try:
rel_path = artifact_file.relative_to(workspace)
file_path = str(rel_path)
except ValueError:
file_path = str(artifact_file)
finding = self.create_finding(
title="Atheris Crash Artifact",
description=f"Atheris generated a crash artifact file: {artifact_file.name}",
severity="medium",
category="program_crash",
file_path=file_path,
recommendation="Analyze the crash artifact to reproduce and debug the issue. The artifact contains the input that caused the crash.",
metadata={
"artifact_type": "crash",
"artifact_file": artifact_file.name,
"artifact_content_hex": artifact_content,
"fuzzer": "atheris"
}
)
return finding
except Exception as e:
logger.warning(f"Error creating artifact finding: {e}")
return None
def _get_exception_severity(self, exception_type: str) -> str:
"""Determine severity based on exception type"""
if not exception_type:
return "medium"
exception_lower = exception_type.lower()
# Critical security issues
if any(term in exception_lower for term in ["segmentationfault", "accessviolation", "memoryerror"]):
return "critical"
# High severity exceptions
elif any(term in exception_lower for term in ["attributeerror", "typeerror", "indexerror", "keyerror", "valueerror"]):
return "high"
# Medium severity exceptions
elif any(term in exception_lower for term in ["assertionerror", "runtimeerror", "ioerror", "oserror"]):
return "medium"
# Lower severity exceptions
elif any(term in exception_lower for term in ["warning", "deprecation"]):
return "low"
else:
return "medium"
def _get_exception_category(self, exception_type: str) -> str:
"""Determine category based on exception type"""
if not exception_type:
return "python_exception"
exception_lower = exception_type.lower()
if any(term in exception_lower for term in ["memory", "segmentation", "access"]):
return "memory_corruption"
elif any(term in exception_lower for term in ["attribute", "type"]):
return "type_error"
elif any(term in exception_lower for term in ["index", "key", "value"]):
return "data_error"
elif any(term in exception_lower for term in ["io", "os", "file"]):
return "io_error"
elif any(term in exception_lower for term in ["assertion"]):
return "assertion_failure"
else:
return "python_exception"
def _get_exception_recommendation(self, exception_type: str, exception_message: str) -> str:
"""Generate recommendation based on exception type"""
if not exception_type:
return "Analyze the exception and fix the underlying code issue."
exception_lower = exception_type.lower()
if "attributeerror" in exception_lower:
return "Fix AttributeError by ensuring objects have the expected attributes before accessing them. Add proper error handling and validation."
elif "typeerror" in exception_lower:
return "Fix TypeError by ensuring correct data types are used. Add type checking and validation for function parameters."
elif "indexerror" in exception_lower:
return "Fix IndexError by adding bounds checking before accessing list/array elements. Validate indices are within valid range."
elif "keyerror" in exception_lower:
return "Fix KeyError by checking if keys exist in dictionaries before accessing them. Use .get() method or proper key validation."
elif "valueerror" in exception_lower:
return "Fix ValueError by validating input values before processing. Add proper input sanitization and validation."
elif "memoryerror" in exception_lower:
return "Fix MemoryError by optimizing memory usage, processing data in chunks, or increasing available memory."
elif "assertionerror" in exception_lower:
return "Fix AssertionError by reviewing assertion conditions and ensuring they properly validate the expected state."
else:
return f"Fix the {exception_type} exception by analyzing the root cause and implementing appropriate error handling and validation."
def _create_summary(self, findings: List[ModuleFinding]) -> Dict[str, Any]:
"""Create analysis summary"""
severity_counts = {"critical": 0, "high": 0, "medium": 0, "low": 0, "info": 0}
category_counts = {}
exception_counts = {}
for finding in findings:
# Count by severity
severity_counts[finding.severity] += 1
# Count by category
category = finding.category
category_counts[category] = category_counts.get(category, 0) + 1
# Count by exception type
exception_type = finding.metadata.get("exception_type", "unknown")
exception_counts[exception_type] = exception_counts.get(exception_type, 0) + 1
return {
"total_findings": len(findings),
"severity_counts": severity_counts,
"category_counts": category_counts,
"exception_counts": exception_counts,
"unique_exceptions": len(exception_counts),
"python_specific_issues": sum(category_counts.get(cat, 0) for cat in ["type_error", "data_error", "python_exception"])
}

View File

@@ -1,572 +0,0 @@
"""
Cargo Fuzz Module
This module uses cargo-fuzz for fuzzing Rust code with libFuzzer integration.
"""
# Copyright (c) 2025 FuzzingLabs
#
# Licensed under the Business Source License 1.1 (BSL). See the LICENSE file
# at the root of this repository for details.
#
# After the Change Date (four years from publication), this version of the
# Licensed Work will be made available under the Apache License, Version 2.0.
# See the LICENSE-APACHE file or http://www.apache.org/licenses/LICENSE-2.0
#
# Additional attribution and requirements are provided in the NOTICE file.
import asyncio
import json
import os
from pathlib import Path
from typing import Dict, Any, List, Tuple
import subprocess
import logging
import httpx
import re
from datetime import datetime, timedelta
try:
from prefect import get_run_context
except ImportError:
# Fallback for when not running in Prefect context
get_run_context = None
from ..base import BaseModule, ModuleMetadata, ModuleFinding, ModuleResult
from . import register_module
logger = logging.getLogger(__name__)
@register_module
class CargoFuzzModule(BaseModule):
"""Cargo Fuzz Rust fuzzing module"""
def get_metadata(self) -> ModuleMetadata:
"""Get module metadata"""
return ModuleMetadata(
name="cargo_fuzz",
version="0.11.2",
description="Rust fuzzing integration with libFuzzer using cargo-fuzz",
author="FuzzForge Team",
category="fuzzing",
tags=["rust", "libfuzzer", "cargo", "coverage-guided", "sanitizers"],
input_schema={
"type": "object",
"properties": {
"project_dir": {
"type": "string",
"description": "Path to Rust project directory (with Cargo.toml)"
},
"fuzz_target": {
"type": "string",
"description": "Name of the fuzz target to run"
},
"max_total_time": {
"type": "integer",
"default": 600,
"description": "Maximum total time to run fuzzing (seconds)"
},
"jobs": {
"type": "integer",
"default": 1,
"description": "Number of worker processes"
},
"corpus_dir": {
"type": "string",
"description": "Custom corpus directory"
},
"artifacts_dir": {
"type": "string",
"description": "Custom artifacts directory"
},
"sanitizer": {
"type": "string",
"enum": ["address", "memory", "thread", "leak", "none"],
"default": "address",
"description": "Sanitizer to use"
},
"release": {
"type": "boolean",
"default": False,
"description": "Use release mode"
},
"debug_assertions": {
"type": "boolean",
"default": True,
"description": "Enable debug assertions"
}
}
},
output_schema={
"type": "object",
"properties": {
"findings": {
"type": "array",
"items": {
"type": "object",
"properties": {
"crash_type": {"type": "string"},
"artifact_path": {"type": "string"},
"stack_trace": {"type": "string"}
}
}
}
}
}
)
def validate_config(self, config: Dict[str, Any]) -> bool:
"""Validate configuration"""
project_dir = config.get("project_dir")
if not project_dir:
raise ValueError("project_dir is required")
fuzz_target = config.get("fuzz_target")
if not fuzz_target:
raise ValueError("fuzz_target is required")
return True
async def execute(self, config: Dict[str, Any], workspace: Path, stats_callback=None) -> ModuleResult:
"""Execute cargo-fuzz fuzzing"""
self.start_timer()
try:
# Initialize last observed stats for summary propagation
self._last_stats = {
'executions': 0,
'executions_per_sec': 0.0,
'crashes': 0,
'corpus_size': 0,
'elapsed_time': 0,
}
# Validate inputs
self.validate_config(config)
self.validate_workspace(workspace)
logger.info("Running cargo-fuzz Rust fuzzing")
# Check installation
await self._check_cargo_fuzz_installation()
# Setup project
project_dir = workspace / config["project_dir"]
await self._setup_cargo_fuzz_project(project_dir, config)
# Run fuzzing
findings = await self._run_cargo_fuzz(project_dir, config, workspace, stats_callback)
# Create summary and enrich with last observed runtime stats
summary = self._create_summary(findings)
try:
summary.update({
'executions': self._last_stats.get('executions', 0),
'executions_per_sec': self._last_stats.get('executions_per_sec', 0.0),
'corpus_size': self._last_stats.get('corpus_size', 0),
'crashes': self._last_stats.get('crashes', 0),
'elapsed_time': self._last_stats.get('elapsed_time', 0),
})
except Exception:
pass
logger.info(f"cargo-fuzz found {len(findings)} issues")
return self.create_result(
findings=findings,
status="success",
summary=summary
)
except Exception as e:
logger.error(f"cargo-fuzz module failed: {e}")
return self.create_result(
findings=[],
status="failed",
error=str(e)
)
async def _check_cargo_fuzz_installation(self):
"""Check if cargo-fuzz is installed"""
try:
process = await asyncio.create_subprocess_exec(
"cargo", "fuzz", "--version",
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE
)
stdout, stderr = await process.communicate()
if process.returncode != 0:
raise RuntimeError("cargo-fuzz not installed. Install with: cargo install cargo-fuzz")
except Exception as e:
raise RuntimeError(f"cargo-fuzz installation check failed: {e}")
async def _setup_cargo_fuzz_project(self, project_dir: Path, config: Dict[str, Any]):
"""Setup cargo-fuzz project"""
if not project_dir.exists():
raise FileNotFoundError(f"Project directory not found: {project_dir}")
cargo_toml = project_dir / "Cargo.toml"
if not cargo_toml.exists():
raise FileNotFoundError(f"Cargo.toml not found in {project_dir}")
# Check if fuzz directory exists, if not initialize
fuzz_dir = project_dir / "fuzz"
if not fuzz_dir.exists():
logger.info("Initializing cargo-fuzz project")
process = await asyncio.create_subprocess_exec(
"cargo", "fuzz", "init",
cwd=project_dir,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE
)
await process.communicate()
async def _run_cargo_fuzz(self, project_dir: Path, config: Dict[str, Any], workspace: Path, stats_callback=None) -> List[ModuleFinding]:
"""Run cargo-fuzz with real-time statistics reporting"""
findings = []
# Get run_id from Prefect context for statistics reporting
run_id = None
if get_run_context:
try:
context = get_run_context()
run_id = str(context.flow_run.id)
except Exception:
logger.warning("Could not get run_id from Prefect context")
try:
# Build command
cmd = ["cargo", "fuzz", "run", config["fuzz_target"]]
# Add options
if config.get("jobs", 1) > 1:
cmd.extend(["--", f"-jobs={config['jobs']}"])
max_time = config.get("max_total_time", 600)
cmd.extend(["--", f"-max_total_time={max_time}"])
# Set sanitizer
sanitizer = config.get("sanitizer", "address")
if sanitizer != "none":
cmd.append(f"--sanitizer={sanitizer}")
if config.get("release", False):
cmd.append("--release")
# Set environment
env = os.environ.copy()
if config.get("debug_assertions", True):
env["RUSTFLAGS"] = env.get("RUSTFLAGS", "") + " -C debug-assertions=on"
logger.debug(f"Running command: {' '.join(cmd)}")
# Run with streaming output processing for real-time stats
try:
process = await asyncio.create_subprocess_exec(
*cmd,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.STDOUT, # Merge stderr into stdout
cwd=project_dir,
env=env
)
# Process output in real-time
stdout_data, stderr_data = await self._process_streaming_output(
process, max_time, config, stats_callback
)
# Parse final results
findings = self._parse_cargo_fuzz_output(
stdout_data, stderr_data, project_dir, workspace, config
)
except Exception as e:
logger.warning(f"Error running cargo-fuzz: {e}")
except Exception as e:
logger.warning(f"Error in cargo-fuzz execution: {e}")
return findings
def _parse_cargo_fuzz_output(self, stdout: str, stderr: str, project_dir: Path, workspace: Path, config: Dict[str, Any]) -> List[ModuleFinding]:
"""Parse cargo-fuzz output"""
findings = []
try:
full_output = stdout + "\n" + stderr
# Look for crash artifacts
artifacts_dir = project_dir / "fuzz" / "artifacts" / config["fuzz_target"]
if artifacts_dir.exists():
for artifact in artifacts_dir.iterdir():
if artifact.is_file():
finding = self._create_artifact_finding(artifact, workspace, full_output)
if finding:
findings.append(finding)
except Exception as e:
logger.warning(f"Error parsing cargo-fuzz output: {e}")
return findings
def _create_artifact_finding(self, artifact_path: Path, workspace: Path, output: str) -> ModuleFinding:
"""Create finding from artifact file"""
try:
# Try to determine crash type from filename or content
crash_type = "crash"
if "leak" in artifact_path.name.lower():
crash_type = "memory_leak"
elif "timeout" in artifact_path.name.lower():
crash_type = "timeout"
# Extract stack trace from output
stack_trace = self._extract_stack_trace_from_output(output, artifact_path.name)
try:
rel_path = artifact_path.relative_to(workspace)
file_path = str(rel_path)
except ValueError:
file_path = str(artifact_path)
severity = "high" if "crash" in crash_type else "medium"
finding = self.create_finding(
title=f"cargo-fuzz {crash_type.title()}",
description=f"cargo-fuzz discovered a {crash_type} in the Rust code",
severity=severity,
category=self._get_crash_category(crash_type),
file_path=file_path,
recommendation=self._get_crash_recommendation(crash_type),
metadata={
"crash_type": crash_type,
"artifact_path": str(artifact_path),
"stack_trace": stack_trace,
"fuzzer": "cargo_fuzz"
}
)
return finding
except Exception as e:
logger.warning(f"Error creating artifact finding: {e}")
return None
def _extract_stack_trace_from_output(self, output: str, artifact_name: str) -> str:
"""Extract stack trace from output"""
try:
lines = output.split('\n')
stack_lines = []
in_stack = False
for line in lines:
if artifact_name in line or "stack backtrace:" in line.lower():
in_stack = True
continue
if in_stack:
if line.strip() and ("at " in line or "::" in line or line.strip().startswith("0:")):
stack_lines.append(line.strip())
elif not line.strip() and stack_lines:
break
return '\n'.join(stack_lines[:20]) # Limit stack trace size
except Exception:
return ""
def _get_crash_category(self, crash_type: str) -> str:
"""Get category for crash type"""
if "leak" in crash_type:
return "memory_leak"
elif "timeout" in crash_type:
return "performance_issues"
else:
return "memory_safety"
def _get_crash_recommendation(self, crash_type: str) -> str:
"""Get recommendation for crash type"""
if "leak" in crash_type:
return "Fix memory leak by ensuring proper cleanup of allocated resources. Review memory management patterns."
elif "timeout" in crash_type:
return "Fix timeout by optimizing performance, avoiding infinite loops, and implementing reasonable bounds."
else:
return "Fix the crash by analyzing the stack trace and addressing memory safety issues."
async def _process_streaming_output(self, process, max_time: int, config: Dict[str, Any], stats_callback=None) -> Tuple[str, str]:
"""Process cargo-fuzz output in real-time and report statistics"""
stdout_lines = []
start_time = datetime.utcnow()
last_update = start_time
stats_data = {
'executions': 0,
'executions_per_sec': 0.0,
'crashes': 0,
'corpus_size': 0,
'elapsed_time': 0
}
# Get run_id from Prefect context for statistics reporting
run_id = None
if get_run_context:
try:
context = get_run_context()
run_id = str(context.flow_run.id)
except Exception:
logger.debug("Could not get run_id from Prefect context")
try:
# Emit an initial baseline update so dashboards show activity immediately
try:
await self._send_stats_via_callback(stats_callback, run_id, stats_data)
except Exception:
pass
# Monitor process output in chunks to capture libFuzzer carriage-return updates
buffer = ""
while True:
try:
chunk = await asyncio.wait_for(process.stdout.read(4096), timeout=1.0)
if not chunk:
# Process finished
break
buffer += chunk.decode('utf-8', errors='ignore')
# Split on both newline and carriage return
if "\n" in buffer or "\r" in buffer:
parts = re.split(r"[\r\n]", buffer)
buffer = parts[-1]
for part in parts[:-1]:
line = part.strip()
if not line:
continue
stdout_lines.append(line)
self._parse_stats_from_line(line, stats_data)
except asyncio.TimeoutError:
# No output this second; continue to periodic update check
pass
# Periodic update (even if there was no output)
current_time = datetime.utcnow()
stats_data['elapsed_time'] = int((current_time - start_time).total_seconds())
if current_time - last_update >= timedelta(seconds=3):
try:
self._last_stats = dict(stats_data)
except Exception:
pass
await self._send_stats_via_callback(stats_callback, run_id, stats_data)
last_update = current_time
# Check if max time exceeded
if stats_data['elapsed_time'] >= max_time:
logger.info("Max time reached, terminating cargo-fuzz")
process.terminate()
break
# Wait for process to complete
await process.wait()
# Send final stats update
try:
self._last_stats = dict(stats_data)
except Exception:
pass
await self._send_stats_via_callback(stats_callback, run_id, stats_data)
except Exception as e:
logger.warning(f"Error processing streaming output: {e}")
stdout_data = '\n'.join(stdout_lines)
return stdout_data, ""
def _parse_stats_from_line(self, line: str, stats_data: Dict[str, Any]):
"""Parse statistics from a cargo-fuzz output line"""
try:
# cargo-fuzz typically shows stats like:
# "#12345: DONE cov: 1234 ft: 5678 corp: 9/10Mb exec/s: 1500 rss: 234Mb"
# "#12345: NEW cov: 1234 ft: 5678 corp: 9/10Mb exec/s: 1500 rss: 234Mb L: 45/67 MS: 3 ..."
# Extract execution count (the #number)
exec_match = re.search(r'#(\d+)(?::)?', line)
if exec_match:
stats_data['executions'] = int(exec_match.group(1))
else:
# libFuzzer stats format alternative
exec_alt = re.search(r'stat::number_of_executed_units:\s*(\d+)', line)
if exec_alt:
stats_data['executions'] = int(exec_alt.group(1))
else:
exec_alt2 = re.search(r'executed units:?\s*(\d+)', line, re.IGNORECASE)
if exec_alt2:
stats_data['executions'] = int(exec_alt2.group(1))
# Extract executions per second
exec_per_sec_match = re.search(r'exec/s:\s*([0-9\.]+)', line)
if exec_per_sec_match:
stats_data['executions_per_sec'] = float(exec_per_sec_match.group(1))
else:
eps_alt = re.search(r'stat::execs_per_sec:\s*([0-9\.]+)', line)
if eps_alt:
stats_data['executions_per_sec'] = float(eps_alt.group(1))
# Extract corpus size (corp: X/YMb)
corp_match = re.search(r'corp(?:us)?:\s*(\d+)', line)
if corp_match:
stats_data['corpus_size'] = int(corp_match.group(1))
# Look for crash indicators
if any(keyword in line.lower() for keyword in ['crash', 'assert', 'panic', 'abort']):
stats_data['crashes'] += 1
except Exception as e:
logger.debug(f"Error parsing stats from line '{line}': {e}")
async def _send_stats_via_callback(self, stats_callback, run_id: str, stats_data: Dict[str, Any]):
"""Send statistics update via callback function"""
if not stats_callback or not run_id:
return
try:
# Prepare statistics payload
stats_payload = {
"run_id": run_id,
"workflow": "language_fuzzing",
"executions": stats_data['executions'],
"executions_per_sec": stats_data['executions_per_sec'],
"crashes": stats_data['crashes'],
"unique_crashes": stats_data['crashes'], # Assume all crashes are unique for now
"corpus_size": stats_data['corpus_size'],
"elapsed_time": stats_data['elapsed_time'],
"timestamp": datetime.utcnow().isoformat()
}
# Call the callback function provided by the Prefect task
await stats_callback(stats_payload)
logger.info(
"LIVE STATS SENT: exec=%s eps=%.2f crashes=%s corpus=%s elapsed=%s",
stats_data['executions'],
stats_data['executions_per_sec'],
stats_data['crashes'],
stats_data['corpus_size'],
stats_data['elapsed_time'],
)
except Exception as e:
logger.debug(f"Error sending stats via callback: {e}")
def _create_summary(self, findings: List[ModuleFinding]) -> Dict[str, Any]:
"""Create analysis summary"""
severity_counts = {"critical": 0, "high": 0, "medium": 0, "low": 0, "info": 0}
category_counts = {}
for finding in findings:
severity_counts[finding.severity] += 1
category_counts[finding.category] = category_counts.get(finding.category, 0) + 1
return {
"total_findings": len(findings),
"severity_counts": severity_counts,
"category_counts": category_counts
}

View File

@@ -1,384 +0,0 @@
"""
Go-Fuzz Module
This module uses go-fuzz for coverage-guided fuzzing of Go packages.
"""
# Copyright (c) 2025 FuzzingLabs
#
# Licensed under the Business Source License 1.1 (BSL). See the LICENSE file
# at the root of this repository for details.
#
# After the Change Date (four years from publication), this version of the
# Licensed Work will be made available under the Apache License, Version 2.0.
# See the LICENSE-APACHE file or http://www.apache.org/licenses/LICENSE-2.0
#
# Additional attribution and requirements are provided in the NOTICE file.
import asyncio
import json
import os
from pathlib import Path
from typing import Dict, Any, List
import subprocess
import logging
from ..base import BaseModule, ModuleMetadata, ModuleFinding, ModuleResult
from . import register_module
logger = logging.getLogger(__name__)
@register_module
class GoFuzzModule(BaseModule):
"""Go-Fuzz Go language fuzzing module"""
def get_metadata(self) -> ModuleMetadata:
"""Get module metadata"""
return ModuleMetadata(
name="go_fuzz",
version="1.2.0",
description="Coverage-guided fuzzing for Go packages using go-fuzz",
author="FuzzForge Team",
category="fuzzing",
tags=["go", "golang", "coverage-guided", "packages"],
input_schema={
"type": "object",
"properties": {
"package_path": {
"type": "string",
"description": "Path to Go package to fuzz"
},
"fuzz_function": {
"type": "string",
"default": "Fuzz",
"description": "Name of the fuzz function"
},
"workdir": {
"type": "string",
"default": "go_fuzz_workdir",
"description": "Working directory for go-fuzz"
},
"procs": {
"type": "integer",
"default": 1,
"description": "Number of parallel processes"
},
"timeout": {
"type": "integer",
"default": 600,
"description": "Total fuzzing timeout (seconds)"
},
"race": {
"type": "boolean",
"default": false,
"description": "Enable race detector"
},
"minimize": {
"type": "boolean",
"default": true,
"description": "Minimize crashers"
},
"sonar": {
"type": "boolean",
"default": false,
"description": "Enable sonar mode"
}
}
},
output_schema={
"type": "object",
"properties": {
"findings": {
"type": "array",
"items": {
"type": "object",
"properties": {
"crash_type": {"type": "string"},
"crash_file": {"type": "string"},
"stack_trace": {"type": "string"}
}
}
}
}
}
)
def validate_config(self, config: Dict[str, Any]) -> bool:
"""Validate configuration"""
package_path = config.get("package_path")
if not package_path:
raise ValueError("package_path is required")
return True
async def execute(self, config: Dict[str, Any], workspace: Path) -> ModuleResult:
"""Execute go-fuzz fuzzing"""
self.start_timer()
try:
# Validate inputs
self.validate_config(config)
self.validate_workspace(workspace)
logger.info("Running go-fuzz Go fuzzing")
# Check installation
await self._check_go_fuzz_installation()
# Setup
package_path = workspace / config["package_path"]
workdir = workspace / config.get("workdir", "go_fuzz_workdir")
# Build and run
findings = await self._run_go_fuzz(package_path, workdir, config, workspace)
# Create summary
summary = self._create_summary(findings)
logger.info(f"go-fuzz found {len(findings)} issues")
return self.create_result(
findings=findings,
status="success",
summary=summary
)
except Exception as e:
logger.error(f"go-fuzz module failed: {e}")
return self.create_result(
findings=[],
status="failed",
error=str(e)
)
async def _check_go_fuzz_installation(self):
"""Check if go-fuzz is installed"""
try:
process = await asyncio.create_subprocess_exec(
"go-fuzz", "--help",
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE
)
await process.communicate()
if process.returncode != 0:
# Try building
process = await asyncio.create_subprocess_exec(
"go", "install", "github.com/dvyukov/go-fuzz/go-fuzz@latest",
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE
)
await process.communicate()
except Exception as e:
raise RuntimeError(f"go-fuzz installation failed: {e}")
async def _run_go_fuzz(self, package_path: Path, workdir: Path, config: Dict[str, Any], workspace: Path) -> List[ModuleFinding]:
"""Run go-fuzz"""
findings = []
try:
# Create workdir
workdir.mkdir(exist_ok=True)
# Build
await self._build_go_fuzz(package_path, config)
# Run fuzzing
cmd = ["go-fuzz", "-bin", f"{package_path.name}-fuzz.zip", "-workdir", str(workdir)]
if config.get("procs", 1) > 1:
cmd.extend(["-procs", str(config["procs"])])
if config.get("race", False):
cmd.append("-race")
if config.get("sonar", False):
cmd.append("-sonar")
timeout = config.get("timeout", 600)
try:
process = await asyncio.create_subprocess_exec(
*cmd,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
cwd=package_path.parent
)
try:
stdout, stderr = await asyncio.wait_for(
process.communicate(), timeout=timeout
)
except asyncio.TimeoutError:
process.terminate()
await process.wait()
# Parse results
findings = self._parse_go_fuzz_results(workdir, workspace, config)
except Exception as e:
logger.warning(f"Error running go-fuzz: {e}")
except Exception as e:
logger.warning(f"Error in go-fuzz execution: {e}")
return findings
async def _build_go_fuzz(self, package_path: Path, config: Dict[str, Any]):
"""Build go-fuzz binary"""
cmd = ["go-fuzz-build"]
if config.get("race", False):
cmd.append("-race")
process = await asyncio.create_subprocess_exec(
*cmd,
cwd=package_path,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE
)
stdout, stderr = await process.communicate()
if process.returncode != 0:
raise RuntimeError(f"go-fuzz-build failed: {stderr.decode()}")
def _parse_go_fuzz_results(self, workdir: Path, workspace: Path, config: Dict[str, Any]) -> List[ModuleFinding]:
"""Parse go-fuzz results"""
findings = []
try:
# Look for crashers
crashers_dir = workdir / "crashers"
if crashers_dir.exists():
for crash_file in crashers_dir.iterdir():
if crash_file.is_file() and not crash_file.name.startswith("."):
finding = self._create_crash_finding(crash_file, workspace)
if finding:
findings.append(finding)
# Look for suppressions (potential issues)
suppressions_dir = workdir / "suppressions"
if suppressions_dir.exists():
for supp_file in suppressions_dir.iterdir():
if supp_file.is_file():
finding = self._create_suppression_finding(supp_file, workspace)
if finding:
findings.append(finding)
except Exception as e:
logger.warning(f"Error parsing go-fuzz results: {e}")
return findings
def _create_crash_finding(self, crash_file: Path, workspace: Path) -> ModuleFinding:
"""Create finding from crash file"""
try:
# Read crash output
crash_content = ""
if crash_file.name.endswith(".output"):
crash_content = crash_file.read_text()
# Determine crash type
crash_type = "panic"
if "runtime error" in crash_content:
crash_type = "runtime_error"
elif "race" in crash_content:
crash_type = "race_condition"
try:
rel_path = crash_file.relative_to(workspace)
file_path = str(rel_path)
except ValueError:
file_path = str(crash_file)
finding = self.create_finding(
title=f"go-fuzz {crash_type.title()}",
description=f"go-fuzz discovered a {crash_type} in the Go code",
severity=self._get_crash_severity(crash_type),
category=self._get_crash_category(crash_type),
file_path=file_path,
recommendation=self._get_crash_recommendation(crash_type),
metadata={
"crash_type": crash_type,
"crash_file": str(crash_file),
"stack_trace": crash_content[:1000],
"fuzzer": "go_fuzz"
}
)
return finding
except Exception as e:
logger.warning(f"Error creating crash finding: {e}")
return None
def _create_suppression_finding(self, supp_file: Path, workspace: Path) -> ModuleFinding:
"""Create finding from suppression file"""
try:
try:
rel_path = supp_file.relative_to(workspace)
file_path = str(rel_path)
except ValueError:
file_path = str(supp_file)
finding = self.create_finding(
title="go-fuzz Potential Issue",
description="go-fuzz identified a potential issue that was suppressed",
severity="low",
category="potential_issue",
file_path=file_path,
recommendation="Review suppressed issue to determine if it requires attention.",
metadata={
"suppression_file": str(supp_file),
"fuzzer": "go_fuzz"
}
)
return finding
except Exception as e:
logger.warning(f"Error creating suppression finding: {e}")
return None
def _get_crash_severity(self, crash_type: str) -> str:
"""Get crash severity"""
if crash_type == "race_condition":
return "high"
elif crash_type == "runtime_error":
return "high"
else:
return "medium"
def _get_crash_category(self, crash_type: str) -> str:
"""Get crash category"""
if crash_type == "race_condition":
return "race_condition"
elif crash_type == "runtime_error":
return "runtime_error"
else:
return "program_crash"
def _get_crash_recommendation(self, crash_type: str) -> str:
"""Get crash recommendation"""
if crash_type == "race_condition":
return "Fix race condition by adding proper synchronization (mutexes, channels, etc.)"
elif crash_type == "runtime_error":
return "Fix runtime error by adding bounds checking and proper error handling"
else:
return "Analyze the crash and fix the underlying issue"
def _create_summary(self, findings: List[ModuleFinding]) -> Dict[str, Any]:
"""Create analysis summary"""
severity_counts = {"critical": 0, "high": 0, "medium": 0, "low": 0, "info": 0}
category_counts = {}
for finding in findings:
severity_counts[finding.severity] += 1
category_counts[finding.category] = category_counts.get(finding.category, 0) + 1
return {
"total_findings": len(findings),
"severity_counts": severity_counts,
"category_counts": category_counts
}

View File

@@ -1,705 +0,0 @@
"""
LibFuzzer Fuzzing Module
This module uses LibFuzzer (LLVM's coverage-guided fuzzing engine) to find
bugs and security vulnerabilities in C/C++ code.
"""
# Copyright (c) 2025 FuzzingLabs
#
# Licensed under the Business Source License 1.1 (BSL). See the LICENSE file
# at the root of this repository for details.
#
# After the Change Date (four years from publication), this version of the
# Licensed Work will be made available under the Apache License, Version 2.0.
# See the LICENSE-APACHE file or http://www.apache.org/licenses/LICENSE-2.0
#
# Additional attribution and requirements are provided in the NOTICE file.
import asyncio
import json
import os
from pathlib import Path
from typing import Dict, Any, List
import subprocess
import logging
import re
from ..base import BaseModule, ModuleMetadata, ModuleFinding, ModuleResult
from . import register_module
logger = logging.getLogger(__name__)
@register_module
class LibFuzzerModule(BaseModule):
"""LibFuzzer coverage-guided fuzzing module"""
def get_metadata(self) -> ModuleMetadata:
"""Get module metadata"""
return ModuleMetadata(
name="libfuzzer",
version="17.0.0",
description="LLVM's coverage-guided fuzzing engine for finding bugs in C/C++ code",
author="FuzzForge Team",
category="fuzzing",
tags=["coverage-guided", "c", "cpp", "llvm", "sanitizers", "memory-safety"],
input_schema={
"type": "object",
"properties": {
"target_binary": {
"type": "string",
"description": "Path to the fuzz target binary (compiled with -fsanitize=fuzzer)"
},
"corpus_dir": {
"type": "string",
"description": "Directory containing initial corpus files"
},
"dict_file": {
"type": "string",
"description": "Dictionary file for fuzzing keywords"
},
"max_total_time": {
"type": "integer",
"default": 600,
"description": "Maximum total time to run fuzzing (seconds)"
},
"max_len": {
"type": "integer",
"default": 4096,
"description": "Maximum length of test input"
},
"timeout": {
"type": "integer",
"default": 25,
"description": "Timeout for individual test cases (seconds)"
},
"runs": {
"type": "integer",
"default": -1,
"description": "Number of individual test runs (-1 for unlimited)"
},
"jobs": {
"type": "integer",
"default": 1,
"description": "Number of fuzzing jobs to run in parallel"
},
"workers": {
"type": "integer",
"default": 1,
"description": "Number of workers for parallel fuzzing"
},
"reload": {
"type": "integer",
"default": 1,
"description": "Reload the main corpus periodically"
},
"print_final_stats": {
"type": "boolean",
"default": true,
"description": "Print final statistics"
},
"print_pcs": {
"type": "boolean",
"default": false,
"description": "Print newly covered PCs"
},
"print_funcs": {
"type": "boolean",
"default": false,
"description": "Print newly covered functions"
},
"print_coverage": {
"type": "boolean",
"default": true,
"description": "Print coverage information"
},
"shrink": {
"type": "boolean",
"default": true,
"description": "Try to shrink the corpus"
},
"reduce_inputs": {
"type": "boolean",
"default": true,
"description": "Try to reduce the size of inputs"
},
"use_value_profile": {
"type": "boolean",
"default": false,
"description": "Use value profile for fuzzing"
},
"sanitizers": {
"type": "array",
"items": {"type": "string", "enum": ["address", "memory", "undefined", "thread", "leak"]},
"default": ["address"],
"description": "Sanitizers to use during fuzzing"
},
"artifact_prefix": {
"type": "string",
"default": "crash-",
"description": "Prefix for artifact files"
},
"exact_artifact_path": {
"type": "string",
"description": "Exact path for artifact files"
},
"fork": {
"type": "integer",
"default": 0,
"description": "Fork mode (number of simultaneous processes)"
},
"ignore_crashes": {
"type": "boolean",
"default": false,
"description": "Ignore crashes and continue fuzzing"
},
"ignore_timeouts": {
"type": "boolean",
"default": false,
"description": "Ignore timeouts and continue fuzzing"
},
"ignore_ooms": {
"type": "boolean",
"default": false,
"description": "Ignore out-of-memory and continue fuzzing"
}
}
},
output_schema={
"type": "object",
"properties": {
"findings": {
"type": "array",
"items": {
"type": "object",
"properties": {
"crash_type": {"type": "string"},
"crash_file": {"type": "string"},
"stack_trace": {"type": "string"},
"sanitizer": {"type": "string"}
}
}
}
}
}
)
def validate_config(self, config: Dict[str, Any]) -> bool:
"""Validate configuration"""
target_binary = config.get("target_binary")
if not target_binary:
raise ValueError("target_binary is required for LibFuzzer")
max_total_time = config.get("max_total_time", 600)
if max_total_time <= 0:
raise ValueError("max_total_time must be positive")
return True
async def execute(self, config: Dict[str, Any], workspace: Path) -> ModuleResult:
"""Execute LibFuzzer fuzzing"""
self.start_timer()
try:
# Validate inputs
self.validate_config(config)
self.validate_workspace(workspace)
logger.info("Running LibFuzzer fuzzing campaign")
# Check if target binary exists
target_binary = workspace / config["target_binary"]
if not target_binary.exists():
raise FileNotFoundError(f"Target binary not found: {target_binary}")
# Run LibFuzzer
findings = await self._run_libfuzzer(target_binary, config, workspace)
# Create summary
summary = self._create_summary(findings)
logger.info(f"LibFuzzer found {len(findings)} issues")
return self.create_result(
findings=findings,
status="success",
summary=summary
)
except Exception as e:
logger.error(f"LibFuzzer module failed: {e}")
return self.create_result(
findings=[],
status="failed",
error=str(e)
)
async def _run_libfuzzer(self, target_binary: Path, config: Dict[str, Any], workspace: Path) -> List[ModuleFinding]:
"""Run LibFuzzer fuzzing"""
findings = []
try:
# Create output directory for artifacts
output_dir = workspace / "libfuzzer_output"
output_dir.mkdir(exist_ok=True)
# Build LibFuzzer command
cmd = [str(target_binary)]
# Add corpus directory
corpus_dir = config.get("corpus_dir")
if corpus_dir:
corpus_path = workspace / corpus_dir
if corpus_path.exists():
cmd.append(str(corpus_path))
else:
logger.warning(f"Corpus directory not found: {corpus_path}")
# Add dictionary file
dict_file = config.get("dict_file")
if dict_file:
dict_path = workspace / dict_file
if dict_path.exists():
cmd.append(f"-dict={dict_path}")
# Add fuzzing parameters
cmd.append(f"-max_total_time={config.get('max_total_time', 600)}")
cmd.append(f"-max_len={config.get('max_len', 4096)}")
cmd.append(f"-timeout={config.get('timeout', 25)}")
cmd.append(f"-runs={config.get('runs', -1)}")
if config.get("jobs", 1) > 1:
cmd.append(f"-jobs={config['jobs']}")
if config.get("workers", 1) > 1:
cmd.append(f"-workers={config['workers']}")
cmd.append(f"-reload={config.get('reload', 1)}")
# Add output options
if config.get("print_final_stats", True):
cmd.append("-print_final_stats=1")
if config.get("print_pcs", False):
cmd.append("-print_pcs=1")
if config.get("print_funcs", False):
cmd.append("-print_funcs=1")
if config.get("print_coverage", True):
cmd.append("-print_coverage=1")
# Add corpus management options
if config.get("shrink", True):
cmd.append("-shrink=1")
if config.get("reduce_inputs", True):
cmd.append("-reduce_inputs=1")
if config.get("use_value_profile", False):
cmd.append("-use_value_profile=1")
# Add artifact options
artifact_prefix = config.get("artifact_prefix", "crash-")
cmd.append(f"-artifact_prefix={output_dir / artifact_prefix}")
exact_artifact_path = config.get("exact_artifact_path")
if exact_artifact_path:
cmd.append(f"-exact_artifact_path={output_dir / exact_artifact_path}")
# Add fork mode
fork = config.get("fork", 0)
if fork > 0:
cmd.append(f"-fork={fork}")
# Add ignore options
if config.get("ignore_crashes", False):
cmd.append("-ignore_crashes=1")
if config.get("ignore_timeouts", False):
cmd.append("-ignore_timeouts=1")
if config.get("ignore_ooms", False):
cmd.append("-ignore_ooms=1")
# Set up environment for sanitizers
env = os.environ.copy()
sanitizers = config.get("sanitizers", ["address"])
self._setup_sanitizer_environment(env, sanitizers)
logger.debug(f"Running command: {' '.join(cmd)}")
# Run LibFuzzer
process = await asyncio.create_subprocess_exec(
*cmd,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
cwd=workspace,
env=env
)
stdout, stderr = await process.communicate()
# Parse results
findings = self._parse_libfuzzer_output(
stdout.decode(), stderr.decode(), output_dir, workspace, sanitizers
)
# Look for crash files
crash_findings = self._parse_crash_files(output_dir, workspace, sanitizers)
findings.extend(crash_findings)
except Exception as e:
logger.warning(f"Error running LibFuzzer: {e}")
return findings
def _setup_sanitizer_environment(self, env: Dict[str, str], sanitizers: List[str]):
"""Set up environment variables for sanitizers"""
if "address" in sanitizers:
env["ASAN_OPTIONS"] = env.get("ASAN_OPTIONS", "") + ":halt_on_error=0:abort_on_error=1"
if "memory" in sanitizers:
env["MSAN_OPTIONS"] = env.get("MSAN_OPTIONS", "") + ":halt_on_error=0:abort_on_error=1"
if "undefined" in sanitizers:
env["UBSAN_OPTIONS"] = env.get("UBSAN_OPTIONS", "") + ":halt_on_error=0:abort_on_error=1"
if "thread" in sanitizers:
env["TSAN_OPTIONS"] = env.get("TSAN_OPTIONS", "") + ":halt_on_error=0:abort_on_error=1"
if "leak" in sanitizers:
env["LSAN_OPTIONS"] = env.get("LSAN_OPTIONS", "") + ":halt_on_error=0:abort_on_error=1"
def _parse_libfuzzer_output(self, stdout: str, stderr: str, output_dir: Path, workspace: Path, sanitizers: List[str]) -> List[ModuleFinding]:
"""Parse LibFuzzer output for crashes and issues"""
findings = []
try:
# Combine stdout and stderr for analysis
full_output = stdout + "\n" + stderr
# Look for crash indicators
crash_patterns = [
r"ERROR: AddressSanitizer: (.+)",
r"ERROR: MemorySanitizer: (.+)",
r"ERROR: UndefinedBehaviorSanitizer: (.+)",
r"ERROR: ThreadSanitizer: (.+)",
r"ERROR: LeakSanitizer: (.+)",
r"SUMMARY: (.+Sanitizer): (.+)",
r"==\d+==ERROR: libFuzzer: (.+)"
]
for pattern in crash_patterns:
matches = re.finditer(pattern, full_output, re.MULTILINE)
for match in matches:
finding = self._create_crash_finding(
match, full_output, output_dir, sanitizers
)
if finding:
findings.append(finding)
# Look for timeout and OOM issues
if "TIMEOUT" in full_output:
finding = self._create_timeout_finding(full_output, output_dir)
if finding:
findings.append(finding)
if "out-of-memory" in full_output.lower() or "oom" in full_output.lower():
finding = self._create_oom_finding(full_output, output_dir)
if finding:
findings.append(finding)
except Exception as e:
logger.warning(f"Error parsing LibFuzzer output: {e}")
return findings
def _parse_crash_files(self, output_dir: Path, workspace: Path, sanitizers: List[str]) -> List[ModuleFinding]:
"""Parse crash artifact files"""
findings = []
try:
# Look for crash files
crash_patterns = ["crash-*", "leak-*", "timeout-*", "oom-*"]
for pattern in crash_patterns:
crash_files = list(output_dir.glob(pattern))
for crash_file in crash_files:
finding = self._create_artifact_finding(crash_file, workspace, sanitizers)
if finding:
findings.append(finding)
except Exception as e:
logger.warning(f"Error parsing crash files: {e}")
return findings
def _create_crash_finding(self, match, full_output: str, output_dir: Path, sanitizers: List[str]) -> ModuleFinding:
"""Create finding from crash match"""
try:
crash_type = match.group(1) if match.groups() else "Unknown crash"
# Extract stack trace
stack_trace = self._extract_stack_trace(full_output, match.start())
# Determine sanitizer
sanitizer = self._identify_sanitizer(match.group(0), sanitizers)
# Determine severity based on crash type
severity = self._get_crash_severity(crash_type, sanitizer)
# Create finding
finding = self.create_finding(
title=f"LibFuzzer Crash: {crash_type}",
description=f"LibFuzzer detected a crash with {sanitizer}: {crash_type}",
severity=severity,
category=self._get_crash_category(crash_type),
file_path=None, # LibFuzzer doesn't always provide specific files
recommendation=self._get_crash_recommendation(crash_type, sanitizer),
metadata={
"crash_type": crash_type,
"sanitizer": sanitizer,
"stack_trace": stack_trace[:2000] if stack_trace else "", # Limit size
"fuzzer": "libfuzzer"
}
)
return finding
except Exception as e:
logger.warning(f"Error creating crash finding: {e}")
return None
def _create_timeout_finding(self, output: str, output_dir: Path) -> ModuleFinding:
"""Create finding for timeout issues"""
try:
finding = self.create_finding(
title="LibFuzzer Timeout",
description="LibFuzzer detected a timeout during fuzzing, indicating potential infinite loop or performance issue",
severity="medium",
category="performance_issues",
file_path=None,
recommendation="Review the code for potential infinite loops, excessive computation, or blocking operations that could cause timeouts.",
metadata={
"issue_type": "timeout",
"fuzzer": "libfuzzer"
}
)
return finding
except Exception as e:
logger.warning(f"Error creating timeout finding: {e}")
return None
def _create_oom_finding(self, output: str, output_dir: Path) -> ModuleFinding:
"""Create finding for out-of-memory issues"""
try:
finding = self.create_finding(
title="LibFuzzer Out-of-Memory",
description="LibFuzzer detected an out-of-memory condition during fuzzing, indicating potential memory leak or excessive allocation",
severity="medium",
category="memory_management",
file_path=None,
recommendation="Review memory allocation patterns, check for memory leaks, and consider implementing proper bounds checking.",
metadata={
"issue_type": "out_of_memory",
"fuzzer": "libfuzzer"
}
)
return finding
except Exception as e:
logger.warning(f"Error creating OOM finding: {e}")
return None
def _create_artifact_finding(self, crash_file: Path, workspace: Path, sanitizers: List[str]) -> ModuleFinding:
"""Create finding from crash artifact file"""
try:
crash_type = crash_file.name.split('-')[0] # e.g., "crash", "leak", "timeout"
# Try to read crash file content (limited)
crash_content = ""
try:
crash_content = crash_file.read_bytes()[:1000].decode('utf-8', errors='ignore')
except Exception:
pass
# Determine severity
severity = self._get_artifact_severity(crash_type)
finding = self.create_finding(
title=f"LibFuzzer Artifact: {crash_type}",
description=f"LibFuzzer generated a {crash_type} artifact file indicating a potential issue",
severity=severity,
category=self._get_crash_category(crash_type),
file_path=str(crash_file.relative_to(workspace)),
recommendation=self._get_artifact_recommendation(crash_type),
metadata={
"artifact_type": crash_type,
"artifact_file": str(crash_file.name),
"crash_content_preview": crash_content,
"fuzzer": "libfuzzer"
}
)
return finding
except Exception as e:
logger.warning(f"Error creating artifact finding: {e}")
return None
def _extract_stack_trace(self, output: str, start_pos: int) -> str:
"""Extract stack trace from output"""
try:
lines = output[start_pos:].split('\n')
stack_lines = []
for line in lines[:50]: # Limit to first 50 lines
if any(indicator in line for indicator in ["#0", "#1", "#2", "at ", "in "]):
stack_lines.append(line.strip())
elif stack_lines and not line.strip():
break
return '\n'.join(stack_lines)
except Exception:
return ""
def _identify_sanitizer(self, crash_line: str, sanitizers: List[str]) -> str:
"""Identify which sanitizer detected the issue"""
crash_lower = crash_line.lower()
if "addresssanitizer" in crash_lower:
return "AddressSanitizer"
elif "memorysanitizer" in crash_lower:
return "MemorySanitizer"
elif "undefinedbehaviorsanitizer" in crash_lower:
return "UndefinedBehaviorSanitizer"
elif "threadsanitizer" in crash_lower:
return "ThreadSanitizer"
elif "leaksanitizer" in crash_lower:
return "LeakSanitizer"
elif "libfuzzer" in crash_lower:
return "LibFuzzer"
else:
return "Unknown"
def _get_crash_severity(self, crash_type: str, sanitizer: str) -> str:
"""Determine severity based on crash type and sanitizer"""
crash_lower = crash_type.lower()
# Critical issues
if any(term in crash_lower for term in ["heap-buffer-overflow", "stack-buffer-overflow", "use-after-free", "double-free"]):
return "critical"
# High severity issues
elif any(term in crash_lower for term in ["heap-use-after-free", "stack-use-after-return", "global-buffer-overflow"]):
return "high"
# Medium severity issues
elif any(term in crash_lower for term in ["uninitialized", "leak", "race", "deadlock"]):
return "medium"
# Default to high for any crash
else:
return "high"
def _get_crash_category(self, crash_type: str) -> str:
"""Determine category based on crash type"""
crash_lower = crash_type.lower()
if any(term in crash_lower for term in ["buffer-overflow", "heap-buffer", "stack-buffer", "global-buffer"]):
return "buffer_overflow"
elif any(term in crash_lower for term in ["use-after-free", "double-free", "invalid-free"]):
return "memory_corruption"
elif any(term in crash_lower for term in ["uninitialized", "uninit"]):
return "uninitialized_memory"
elif any(term in crash_lower for term in ["leak"]):
return "memory_leak"
elif any(term in crash_lower for term in ["race", "data-race"]):
return "race_condition"
elif any(term in crash_lower for term in ["timeout"]):
return "performance_issues"
elif any(term in crash_lower for term in ["oom", "out-of-memory"]):
return "memory_management"
else:
return "memory_safety"
def _get_artifact_severity(self, artifact_type: str) -> str:
"""Determine severity for artifact types"""
if artifact_type == "crash":
return "high"
elif artifact_type == "leak":
return "medium"
elif artifact_type in ["timeout", "oom"]:
return "medium"
else:
return "low"
def _get_crash_recommendation(self, crash_type: str, sanitizer: str) -> str:
"""Generate recommendation based on crash type"""
crash_lower = crash_type.lower()
if "buffer-overflow" in crash_lower:
return "Fix buffer overflow by implementing proper bounds checking, using safe string functions, and validating array indices."
elif "use-after-free" in crash_lower:
return "Fix use-after-free by setting pointers to NULL after freeing, using smart pointers, or redesigning object lifetime management."
elif "double-free" in crash_lower:
return "Fix double-free by ensuring each allocation has exactly one corresponding free, or use RAII patterns."
elif "uninitialized" in crash_lower:
return "Initialize all variables before use and ensure proper constructor implementation."
elif "leak" in crash_lower:
return "Fix memory leak by ensuring all allocated memory is properly freed, use smart pointers, or implement proper cleanup routines."
elif "race" in crash_lower:
return "Fix data race by using proper synchronization mechanisms like mutexes, atomic operations, or lock-free data structures."
else:
return f"Address the {crash_type} issue detected by {sanitizer}. Review code for memory safety and proper resource management."
def _get_artifact_recommendation(self, artifact_type: str) -> str:
"""Generate recommendation for artifact types"""
if artifact_type == "crash":
return "Analyze the crash artifact file to reproduce the issue and identify the root cause. Fix the underlying bug that caused the crash."
elif artifact_type == "leak":
return "Investigate the memory leak by analyzing allocation patterns and ensuring proper cleanup of resources."
elif artifact_type == "timeout":
return "Optimize code performance to prevent timeouts, check for infinite loops, and implement reasonable time limits."
elif artifact_type == "oom":
return "Reduce memory usage, implement proper memory management, and add bounds checking for allocations."
else:
return f"Analyze the {artifact_type} artifact to understand and fix the underlying issue."
def _create_summary(self, findings: List[ModuleFinding]) -> Dict[str, Any]:
"""Create analysis summary"""
severity_counts = {"critical": 0, "high": 0, "medium": 0, "low": 0, "info": 0}
category_counts = {}
sanitizer_counts = {}
crash_type_counts = {}
for finding in findings:
# Count by severity
severity_counts[finding.severity] += 1
# Count by category
category = finding.category
category_counts[category] = category_counts.get(category, 0) + 1
# Count by sanitizer
sanitizer = finding.metadata.get("sanitizer", "unknown")
sanitizer_counts[sanitizer] = sanitizer_counts.get(sanitizer, 0) + 1
# Count by crash type
crash_type = finding.metadata.get("crash_type", finding.metadata.get("issue_type", "unknown"))
crash_type_counts[crash_type] = crash_type_counts.get(crash_type, 0) + 1
return {
"total_findings": len(findings),
"severity_counts": severity_counts,
"category_counts": category_counts,
"sanitizer_counts": sanitizer_counts,
"crash_type_counts": crash_type_counts,
"memory_safety_issues": category_counts.get("memory_safety", 0) +
category_counts.get("buffer_overflow", 0) +
category_counts.get("memory_corruption", 0),
"performance_issues": category_counts.get("performance_issues", 0)
}

View File

@@ -1,547 +0,0 @@
"""
OSS-Fuzz Module
This module integrates with Google's OSS-Fuzz for continuous fuzzing
of open source projects.
"""
# Copyright (c) 2025 FuzzingLabs
#
# Licensed under the Business Source License 1.1 (BSL). See the LICENSE file
# at the root of this repository for details.
#
# After the Change Date (four years from publication), this version of the
# Licensed Work will be made available under the Apache License, Version 2.0.
# See the LICENSE-APACHE file or http://www.apache.org/licenses/LICENSE-2.0
#
# Additional attribution and requirements are provided in the NOTICE file.
import asyncio
import json
import os
from pathlib import Path
from typing import Dict, Any, List
import subprocess
import logging
from ..base import BaseModule, ModuleMetadata, ModuleFinding, ModuleResult
from . import register_module
logger = logging.getLogger(__name__)
@register_module
class OSSFuzzModule(BaseModule):
"""OSS-Fuzz continuous fuzzing module"""
def get_metadata(self) -> ModuleMetadata:
"""Get module metadata"""
return ModuleMetadata(
name="oss_fuzz",
version="1.0.0",
description="Google's continuous fuzzing for open source projects integration",
author="FuzzForge Team",
category="fuzzing",
tags=["oss-fuzz", "continuous", "google", "open-source", "docker"],
input_schema={
"type": "object",
"properties": {
"project_name": {
"type": "string",
"description": "OSS-Fuzz project name"
},
"source_dir": {
"type": "string",
"description": "Source directory to fuzz"
},
"build_script": {
"type": "string",
"default": "build.sh",
"description": "Build script path"
},
"dockerfile": {
"type": "string",
"default": "Dockerfile",
"description": "Dockerfile path"
},
"project_yaml": {
"type": "string",
"default": "project.yaml",
"description": "Project configuration file"
},
"sanitizer": {
"type": "string",
"enum": ["address", "memory", "undefined", "coverage"],
"default": "address",
"description": "Sanitizer to use"
},
"architecture": {
"type": "string",
"enum": ["x86_64", "i386"],
"default": "x86_64",
"description": "Target architecture"
},
"fuzzing_engine": {
"type": "string",
"enum": ["libfuzzer", "afl", "honggfuzz"],
"default": "libfuzzer",
"description": "Fuzzing engine to use"
},
"timeout": {
"type": "integer",
"default": 3600,
"description": "Fuzzing timeout (seconds)"
},
"check_build": {
"type": "boolean",
"default": true,
"description": "Check if build is successful"
},
"reproduce_bugs": {
"type": "boolean",
"default": false,
"description": "Try to reproduce existing bugs"
}
}
},
output_schema={
"type": "object",
"properties": {
"findings": {
"type": "array",
"items": {
"type": "object",
"properties": {
"bug_type": {"type": "string"},
"reproducer": {"type": "string"},
"stack_trace": {"type": "string"},
"sanitizer": {"type": "string"}
}
}
}
}
}
)
def validate_config(self, config: Dict[str, Any]) -> bool:
"""Validate configuration"""
project_name = config.get("project_name")
if not project_name:
raise ValueError("project_name is required")
source_dir = config.get("source_dir")
if not source_dir:
raise ValueError("source_dir is required")
return True
async def execute(self, config: Dict[str, Any], workspace: Path) -> ModuleResult:
"""Execute OSS-Fuzz integration"""
self.start_timer()
try:
# Validate inputs
self.validate_config(config)
self.validate_workspace(workspace)
logger.info("Running OSS-Fuzz integration")
# Check Docker
await self._check_docker()
# Clone/update OSS-Fuzz if needed
oss_fuzz_dir = await self._setup_oss_fuzz(workspace)
# Setup project
await self._setup_project(oss_fuzz_dir, config, workspace)
# Build and run
findings = await self._run_oss_fuzz(oss_fuzz_dir, config, workspace)
# Create summary
summary = self._create_summary(findings)
logger.info(f"OSS-Fuzz found {len(findings)} issues")
return self.create_result(
findings=findings,
status="success",
summary=summary
)
except Exception as e:
logger.error(f"OSS-Fuzz module failed: {e}")
return self.create_result(
findings=[],
status="failed",
error=str(e)
)
async def _check_docker(self):
"""Check if Docker is available"""
try:
process = await asyncio.create_subprocess_exec(
"docker", "--version",
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE
)
stdout, stderr = await process.communicate()
if process.returncode != 0:
raise RuntimeError("Docker not available. OSS-Fuzz requires Docker.")
except Exception as e:
raise RuntimeError(f"Docker check failed: {e}")
async def _setup_oss_fuzz(self, workspace: Path) -> Path:
"""Setup OSS-Fuzz repository"""
oss_fuzz_dir = workspace / "oss-fuzz"
if not oss_fuzz_dir.exists():
logger.info("Cloning OSS-Fuzz repository")
process = await asyncio.create_subprocess_exec(
"git", "clone", "https://github.com/google/oss-fuzz.git",
cwd=workspace,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE
)
stdout, stderr = await process.communicate()
if process.returncode != 0:
raise RuntimeError(f"Failed to clone OSS-Fuzz: {stderr.decode()}")
return oss_fuzz_dir
async def _setup_project(self, oss_fuzz_dir: Path, config: Dict[str, Any], workspace: Path):
"""Setup OSS-Fuzz project"""
project_name = config["project_name"]
project_dir = oss_fuzz_dir / "projects" / project_name
# Create project directory if it doesn't exist
project_dir.mkdir(parents=True, exist_ok=True)
# Copy source if provided
source_dir = workspace / config["source_dir"]
if source_dir.exists():
# Create symlink or copy source
logger.info(f"Setting up source directory: {source_dir}")
# Setup required files if they don't exist
await self._create_project_files(project_dir, config, workspace)
async def _create_project_files(self, project_dir: Path, config: Dict[str, Any], workspace: Path):
"""Create required OSS-Fuzz project files"""
# Create Dockerfile if it doesn't exist
dockerfile = project_dir / config.get("dockerfile", "Dockerfile")
if not dockerfile.exists():
dockerfile_content = f'''FROM gcr.io/oss-fuzz-base/base-builder
COPY . $SRC/{config["project_name"]}
WORKDIR $SRC/{config["project_name"]}
COPY {config.get("build_script", "build.sh")} $SRC/
'''
dockerfile.write_text(dockerfile_content)
# Create build.sh if it doesn't exist
build_script = project_dir / config.get("build_script", "build.sh")
if not build_script.exists():
build_content = f'''#!/bin/bash -eu
# Build script for {config["project_name"]}
# Add your build commands here
echo "Building {config['project_name']}..."
'''
build_script.write_text(build_content)
build_script.chmod(0o755)
# Create project.yaml if it doesn't exist
project_yaml = project_dir / config.get("project_yaml", "project.yaml")
if not project_yaml.exists():
yaml_content = f'''homepage: "https://example.com"
language: c++
primary_contact: "security@example.com"
auto_ccs:
- "fuzzing@example.com"
sanitizers:
- {config.get("sanitizer", "address")}
architectures:
- {config.get("architecture", "x86_64")}
fuzzing_engines:
- {config.get("fuzzing_engine", "libfuzzer")}
'''
project_yaml.write_text(yaml_content)
async def _run_oss_fuzz(self, oss_fuzz_dir: Path, config: Dict[str, Any], workspace: Path) -> List[ModuleFinding]:
"""Run OSS-Fuzz"""
findings = []
try:
project_name = config["project_name"]
sanitizer = config.get("sanitizer", "address")
architecture = config.get("architecture", "x86_64")
# Build project
if config.get("check_build", True):
await self._build_project(oss_fuzz_dir, project_name, sanitizer, architecture)
# Check build
await self._check_build(oss_fuzz_dir, project_name, sanitizer, architecture)
# Run fuzzing (limited time for this integration)
timeout = min(config.get("timeout", 300), 300) # Max 5 minutes for demo
findings = await self._run_fuzzing(oss_fuzz_dir, project_name, sanitizer, timeout, workspace)
# Reproduce bugs if requested
if config.get("reproduce_bugs", False):
repro_findings = await self._reproduce_bugs(oss_fuzz_dir, project_name, workspace)
findings.extend(repro_findings)
except Exception as e:
logger.warning(f"Error running OSS-Fuzz: {e}")
return findings
async def _build_project(self, oss_fuzz_dir: Path, project_name: str, sanitizer: str, architecture: str):
"""Build OSS-Fuzz project"""
cmd = [
"python3", "infra/helper.py", "build_image", project_name
]
process = await asyncio.create_subprocess_exec(
*cmd,
cwd=oss_fuzz_dir,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE
)
stdout, stderr = await process.communicate()
if process.returncode != 0:
logger.warning(f"Build image failed: {stderr.decode()}")
async def _check_build(self, oss_fuzz_dir: Path, project_name: str, sanitizer: str, architecture: str):
"""Check OSS-Fuzz build"""
cmd = [
"python3", "infra/helper.py", "check_build", project_name
]
process = await asyncio.create_subprocess_exec(
*cmd,
cwd=oss_fuzz_dir,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE
)
stdout, stderr = await process.communicate()
if process.returncode != 0:
logger.warning(f"Build check failed: {stderr.decode()}")
async def _run_fuzzing(self, oss_fuzz_dir: Path, project_name: str, sanitizer: str, timeout: int, workspace: Path) -> List[ModuleFinding]:
"""Run OSS-Fuzz fuzzing"""
findings = []
try:
# This is a simplified version - real OSS-Fuzz runs for much longer
cmd = [
"python3", "infra/helper.py", "run_fuzzer", project_name,
"--", f"-max_total_time={timeout}"
]
process = await asyncio.create_subprocess_exec(
*cmd,
cwd=oss_fuzz_dir,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE
)
try:
stdout, stderr = await asyncio.wait_for(
process.communicate(), timeout=timeout + 60
)
except asyncio.TimeoutError:
process.terminate()
await process.wait()
# Parse output for crashes
full_output = stdout.decode() + stderr.decode()
findings = self._parse_oss_fuzz_output(full_output, workspace, sanitizer)
except Exception as e:
logger.warning(f"Error in OSS-Fuzz execution: {e}")
return findings
async def _reproduce_bugs(self, oss_fuzz_dir: Path, project_name: str, workspace: Path) -> List[ModuleFinding]:
"""Reproduce existing bugs"""
findings = []
try:
# Look for existing testcases or artifacts
testcases_dir = oss_fuzz_dir / "projects" / project_name / "testcases"
if testcases_dir.exists():
for testcase in testcases_dir.iterdir():
if testcase.is_file():
finding = self._create_testcase_finding(testcase, workspace)
if finding:
findings.append(finding)
except Exception as e:
logger.warning(f"Error reproducing bugs: {e}")
return findings
def _parse_oss_fuzz_output(self, output: str, workspace: Path, sanitizer: str) -> List[ModuleFinding]:
"""Parse OSS-Fuzz output"""
findings = []
try:
# Look for common crash indicators
lines = output.split('\n')
crash_info = None
for line in lines:
if "ERROR:" in line and any(term in line for term in ["AddressSanitizer", "MemorySanitizer", "UBSan"]):
crash_info = {
"type": self._extract_crash_type(line),
"sanitizer": sanitizer,
"line": line
}
elif crash_info and line.strip().startswith("#"):
# Stack trace line
if "stack_trace" not in crash_info:
crash_info["stack_trace"] = []
crash_info["stack_trace"].append(line.strip())
if crash_info:
finding = self._create_oss_fuzz_finding(crash_info, workspace)
if finding:
findings.append(finding)
except Exception as e:
logger.warning(f"Error parsing OSS-Fuzz output: {e}")
return findings
def _create_oss_fuzz_finding(self, crash_info: Dict[str, Any], workspace: Path) -> ModuleFinding:
"""Create finding from OSS-Fuzz crash"""
try:
bug_type = crash_info.get("type", "unknown")
sanitizer = crash_info.get("sanitizer", "unknown")
stack_trace = '\n'.join(crash_info.get("stack_trace", [])[:20])
severity = self._get_oss_fuzz_severity(bug_type)
finding = self.create_finding(
title=f"OSS-Fuzz {bug_type.title()}",
description=f"OSS-Fuzz detected a {bug_type} using {sanitizer} sanitizer",
severity=severity,
category=self._get_oss_fuzz_category(bug_type),
file_path=None,
recommendation=self._get_oss_fuzz_recommendation(bug_type, sanitizer),
metadata={
"bug_type": bug_type,
"sanitizer": sanitizer,
"stack_trace": stack_trace,
"fuzzer": "oss_fuzz"
}
)
return finding
except Exception as e:
logger.warning(f"Error creating OSS-Fuzz finding: {e}")
return None
def _create_testcase_finding(self, testcase_file: Path, workspace: Path) -> ModuleFinding:
"""Create finding from testcase file"""
try:
try:
rel_path = testcase_file.relative_to(workspace)
file_path = str(rel_path)
except ValueError:
file_path = str(testcase_file)
finding = self.create_finding(
title="OSS-Fuzz Testcase",
description=f"OSS-Fuzz testcase found: {testcase_file.name}",
severity="info",
category="testcase",
file_path=file_path,
recommendation="Analyze testcase to understand potential issues",
metadata={
"testcase_file": str(testcase_file),
"fuzzer": "oss_fuzz"
}
)
return finding
except Exception as e:
logger.warning(f"Error creating testcase finding: {e}")
return None
def _extract_crash_type(self, line: str) -> str:
"""Extract crash type from error line"""
if "heap-buffer-overflow" in line:
return "heap_buffer_overflow"
elif "stack-buffer-overflow" in line:
return "stack_buffer_overflow"
elif "use-after-free" in line:
return "use_after_free"
elif "double-free" in line:
return "double_free"
elif "memory leak" in line:
return "memory_leak"
else:
return "unknown_crash"
def _get_oss_fuzz_severity(self, bug_type: str) -> str:
"""Get severity for OSS-Fuzz bug type"""
if bug_type in ["heap_buffer_overflow", "stack_buffer_overflow", "use_after_free", "double_free"]:
return "critical"
elif bug_type == "memory_leak":
return "medium"
else:
return "high"
def _get_oss_fuzz_category(self, bug_type: str) -> str:
"""Get category for OSS-Fuzz bug type"""
if "overflow" in bug_type:
return "buffer_overflow"
elif "free" in bug_type:
return "memory_corruption"
elif "leak" in bug_type:
return "memory_leak"
else:
return "memory_safety"
def _get_oss_fuzz_recommendation(self, bug_type: str, sanitizer: str) -> str:
"""Get recommendation for OSS-Fuzz finding"""
if "overflow" in bug_type:
return "Fix buffer overflow by implementing proper bounds checking and using safe string functions."
elif "use_after_free" in bug_type:
return "Fix use-after-free by ensuring proper object lifetime management and setting pointers to NULL after freeing."
elif "double_free" in bug_type:
return "Fix double-free by ensuring each allocation has exactly one corresponding free operation."
elif "leak" in bug_type:
return "Fix memory leak by ensuring all allocated memory is properly freed in all code paths."
else:
return f"Address the {bug_type} issue detected by OSS-Fuzz with {sanitizer} sanitizer."
def _create_summary(self, findings: List[ModuleFinding]) -> Dict[str, Any]:
"""Create analysis summary"""
severity_counts = {"critical": 0, "high": 0, "medium": 0, "low": 0, "info": 0}
category_counts = {}
sanitizer_counts = {}
for finding in findings:
severity_counts[finding.severity] += 1
category_counts[finding.category] = category_counts.get(finding.category, 0) + 1
sanitizer = finding.metadata.get("sanitizer", "unknown")
sanitizer_counts[sanitizer] = sanitizer_counts.get(sanitizer, 0) + 1
return {
"total_findings": len(findings),
"severity_counts": severity_counts,
"category_counts": category_counts,
"sanitizer_counts": sanitizer_counts
}

View File

@@ -1,43 +0,0 @@
"""
Infrastructure Security Modules
This package contains modules for Infrastructure as Code (IaC) security testing.
Available modules:
- Checkov: Terraform/CloudFormation/Kubernetes IaC security
- Hadolint: Dockerfile security linting and best practices
- Kubesec: Kubernetes security risk analysis
- Polaris: Kubernetes configuration validation
"""
# Copyright (c) 2025 FuzzingLabs
#
# Licensed under the Business Source License 1.1 (BSL). See the LICENSE file
# at the root of this repository for details.
#
# After the Change Date (four years from publication), this version of the
# Licensed Work will be made available under the Apache License, Version 2.0.
# See the LICENSE-APACHE file or http://www.apache.org/licenses/LICENSE-2.0
#
# Additional attribution and requirements are provided in the NOTICE file.
from typing import List, Type
from ..base import BaseModule
# Module registry for automatic discovery
INFRASTRUCTURE_MODULES: List[Type[BaseModule]] = []
def register_module(module_class: Type[BaseModule]):
"""Register an infrastructure security module"""
INFRASTRUCTURE_MODULES.append(module_class)
return module_class
def get_available_modules() -> List[Type[BaseModule]]:
"""Get all available infrastructure security modules"""
return INFRASTRUCTURE_MODULES.copy()
# Import modules to trigger registration
from .checkov import CheckovModule
from .hadolint import HadolintModule
from .kubesec import KubesecModule
from .polaris import PolarisModule

View File

@@ -1,411 +0,0 @@
"""
Checkov Infrastructure Security Module
This module uses Checkov to scan Infrastructure as Code (IaC) files for
security misconfigurations and compliance violations.
"""
# Copyright (c) 2025 FuzzingLabs
#
# Licensed under the Business Source License 1.1 (BSL). See the LICENSE file
# at the root of this repository for details.
#
# After the Change Date (four years from publication), this version of the
# Licensed Work will be made available under the Apache License, Version 2.0.
# See the LICENSE-APACHE file or http://www.apache.org/licenses/LICENSE-2.0
#
# Additional attribution and requirements are provided in the NOTICE file.
import asyncio
import json
from pathlib import Path
from typing import Dict, Any, List
import subprocess
import logging
from ..base import BaseModule, ModuleMetadata, ModuleFinding, ModuleResult
from . import register_module
logger = logging.getLogger(__name__)
@register_module
class CheckovModule(BaseModule):
"""Checkov Infrastructure as Code security scanning module"""
def get_metadata(self) -> ModuleMetadata:
"""Get module metadata"""
return ModuleMetadata(
name="checkov",
version="3.1.34",
description="Infrastructure as Code security scanning for Terraform, CloudFormation, Kubernetes, and more",
author="FuzzForge Team",
category="infrastructure",
tags=["iac", "terraform", "cloudformation", "kubernetes", "security", "compliance"],
input_schema={
"type": "object",
"properties": {
"frameworks": {
"type": "array",
"items": {"type": "string"},
"default": ["terraform", "cloudformation", "kubernetes"],
"description": "IaC frameworks to scan"
},
"checks": {
"type": "array",
"items": {"type": "string"},
"description": "Specific checks to run"
},
"skip_checks": {
"type": "array",
"items": {"type": "string"},
"description": "Checks to skip"
},
"severity": {
"type": "array",
"items": {"type": "string", "enum": ["CRITICAL", "HIGH", "MEDIUM", "LOW", "INFO"]},
"default": ["CRITICAL", "HIGH", "MEDIUM", "LOW", "INFO"],
"description": "Minimum severity levels to report"
},
"compact": {
"type": "boolean",
"default": False,
"description": "Use compact output format"
},
"quiet": {
"type": "boolean",
"default": False,
"description": "Suppress verbose output"
},
"soft_fail": {
"type": "boolean",
"default": True,
"description": "Return exit code 0 even when issues are found"
},
"include_patterns": {
"type": "array",
"items": {"type": "string"},
"description": "File patterns to include"
},
"exclude_patterns": {
"type": "array",
"items": {"type": "string"},
"description": "File patterns to exclude"
}
}
},
output_schema={
"type": "object",
"properties": {
"findings": {
"type": "array",
"items": {
"type": "object",
"properties": {
"check_id": {"type": "string"},
"check_name": {"type": "string"},
"severity": {"type": "string"},
"file_path": {"type": "string"},
"line_range": {"type": "array"}
}
}
}
}
}
)
def validate_config(self, config: Dict[str, Any]) -> bool:
"""Validate configuration"""
frameworks = config.get("frameworks", [])
supported_frameworks = [
"terraform", "cloudformation", "kubernetes", "dockerfile",
"ansible", "helm", "serverless", "bicep", "github_actions"
]
for framework in frameworks:
if framework not in supported_frameworks:
raise ValueError(f"Unsupported framework: {framework}. Supported: {supported_frameworks}")
return True
async def execute(self, config: Dict[str, Any], workspace: Path) -> ModuleResult:
"""Execute Checkov IaC security scanning"""
self.start_timer()
try:
# Validate inputs
self.validate_config(config)
self.validate_workspace(workspace)
logger.info(f"Running Checkov IaC scan on {workspace}")
# Check if there are any IaC files
iac_files = self._find_iac_files(workspace, config.get("frameworks", []))
if not iac_files:
logger.info("No Infrastructure as Code files found")
return self.create_result(
findings=[],
status="success",
summary={"total_findings": 0, "files_scanned": 0}
)
# Build checkov command
cmd = ["checkov", "-d", str(workspace)]
# Add output format
cmd.extend(["--output", "json"])
# Add frameworks
frameworks = config.get("frameworks", ["terraform", "cloudformation", "kubernetes"])
cmd.extend(["--framework"] + frameworks)
# Add specific checks
if config.get("checks"):
cmd.extend(["--check", ",".join(config["checks"])])
# Add skip checks
if config.get("skip_checks"):
cmd.extend(["--skip-check", ",".join(config["skip_checks"])])
# Add compact flag
if config.get("compact", False):
cmd.append("--compact")
# Add quiet flag
if config.get("quiet", False):
cmd.append("--quiet")
# Add soft fail
if config.get("soft_fail", True):
cmd.append("--soft-fail")
# Add include patterns
if config.get("include_patterns"):
for pattern in config["include_patterns"]:
cmd.extend(["--include", pattern])
# Add exclude patterns
if config.get("exclude_patterns"):
for pattern in config["exclude_patterns"]:
cmd.extend(["--exclude", pattern])
# Disable update checks and telemetry
cmd.extend(["--no-guide", "--skip-download"])
logger.debug(f"Running command: {' '.join(cmd)}")
# Run Checkov
process = await asyncio.create_subprocess_exec(
*cmd,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
cwd=workspace
)
stdout, stderr = await process.communicate()
# Parse results
findings = []
if process.returncode == 0 or config.get("soft_fail", True):
findings = self._parse_checkov_output(stdout.decode(), workspace, config)
else:
error_msg = stderr.decode()
logger.error(f"Checkov failed: {error_msg}")
return self.create_result(
findings=[],
status="failed",
error=f"Checkov execution failed: {error_msg}"
)
# Create summary
summary = self._create_summary(findings, len(iac_files))
logger.info(f"Checkov found {len(findings)} security issues")
return self.create_result(
findings=findings,
status="success",
summary=summary
)
except Exception as e:
logger.error(f"Checkov module failed: {e}")
return self.create_result(
findings=[],
status="failed",
error=str(e)
)
def _find_iac_files(self, workspace: Path, frameworks: List[str]) -> List[Path]:
"""Find Infrastructure as Code files in workspace"""
iac_patterns = {
"terraform": ["*.tf", "*.tfvars"],
"cloudformation": ["*.yaml", "*.yml", "*.json", "*template*"],
"kubernetes": ["*.yaml", "*.yml"],
"dockerfile": ["Dockerfile", "*.dockerfile"],
"ansible": ["*.yaml", "*.yml", "playbook*"],
"helm": ["Chart.yaml", "values.yaml", "*.yaml"],
"bicep": ["*.bicep"],
"github_actions": [".github/workflows/*.yaml", ".github/workflows/*.yml"]
}
found_files = []
for framework in frameworks:
patterns = iac_patterns.get(framework, [])
for pattern in patterns:
found_files.extend(workspace.rglob(pattern))
return list(set(found_files)) # Remove duplicates
def _parse_checkov_output(self, output: str, workspace: Path, config: Dict[str, Any]) -> List[ModuleFinding]:
"""Parse Checkov JSON output into findings"""
findings = []
if not output.strip():
return findings
try:
data = json.loads(output)
# Get severity filter
allowed_severities = set(s.upper() for s in config.get("severity", ["CRITICAL", "HIGH", "MEDIUM", "LOW", "INFO"]))
# Process failed checks
failed_checks = data.get("results", {}).get("failed_checks", [])
for check in failed_checks:
# Extract information
check_id = check.get("check_id", "unknown")
check_name = check.get("check_name", "")
severity = check.get("severity", "MEDIUM").upper()
file_path = check.get("file_path", "")
file_line_range = check.get("file_line_range", [])
resource = check.get("resource", "")
description = check.get("description", "")
guideline = check.get("guideline", "")
# Apply severity filter
if severity not in allowed_severities:
continue
# Make file path relative to workspace
if file_path:
try:
rel_path = Path(file_path).relative_to(workspace)
file_path = str(rel_path)
except ValueError:
pass
# Map severity to our standard levels
finding_severity = self._map_severity(severity)
# Create finding
finding = self.create_finding(
title=f"IaC Security Issue: {check_name}",
description=description or f"Checkov check {check_id} failed for resource {resource}",
severity=finding_severity,
category=self._get_category(check_id, check_name),
file_path=file_path if file_path else None,
line_start=file_line_range[0] if file_line_range and len(file_line_range) > 0 else None,
line_end=file_line_range[1] if file_line_range and len(file_line_range) > 1 else None,
recommendation=self._get_recommendation(check_id, check_name, guideline),
metadata={
"check_id": check_id,
"check_name": check_name,
"checkov_severity": severity,
"resource": resource,
"guideline": guideline,
"bc_category": check.get("bc_category", ""),
"benchmarks": check.get("benchmarks", {}),
"fixed_definition": check.get("fixed_definition", "")
}
)
findings.append(finding)
except json.JSONDecodeError as e:
logger.warning(f"Failed to parse Checkov output: {e}")
except Exception as e:
logger.warning(f"Error processing Checkov results: {e}")
return findings
def _map_severity(self, checkov_severity: str) -> str:
"""Map Checkov severity to our standard severity levels"""
severity_map = {
"CRITICAL": "critical",
"HIGH": "high",
"MEDIUM": "medium",
"LOW": "low",
"INFO": "info"
}
return severity_map.get(checkov_severity.upper(), "medium")
def _get_category(self, check_id: str, check_name: str) -> str:
"""Determine finding category based on check"""
check_lower = f"{check_id} {check_name}".lower()
if any(term in check_lower for term in ["encryption", "encrypt", "kms", "ssl", "tls"]):
return "encryption"
elif any(term in check_lower for term in ["access", "iam", "rbac", "permission"]):
return "access_control"
elif any(term in check_lower for term in ["network", "security group", "firewall", "vpc"]):
return "network_security"
elif any(term in check_lower for term in ["logging", "monitor", "audit"]):
return "logging_monitoring"
elif any(term in check_lower for term in ["storage", "s3", "bucket", "database"]):
return "data_protection"
elif any(term in check_lower for term in ["secret", "password", "key", "credential"]):
return "secrets_management"
elif any(term in check_lower for term in ["backup", "snapshot", "versioning"]):
return "backup_recovery"
else:
return "infrastructure_security"
def _get_recommendation(self, check_id: str, check_name: str, guideline: str) -> str:
"""Generate recommendation based on check"""
if guideline:
return f"Follow the guideline: {guideline}"
# Generic recommendations based on common patterns
check_lower = f"{check_id} {check_name}".lower()
if "encryption" in check_lower:
return "Enable encryption for sensitive data at rest and in transit using appropriate encryption algorithms."
elif "access" in check_lower or "iam" in check_lower:
return "Review and tighten access controls. Follow the principle of least privilege."
elif "network" in check_lower or "security group" in check_lower:
return "Restrict network access to only necessary ports and IP ranges."
elif "logging" in check_lower:
return "Enable comprehensive logging and monitoring for security events."
elif "backup" in check_lower:
return "Implement proper backup and disaster recovery procedures."
else:
return f"Review and fix the security configuration issue identified by check {check_id}."
def _create_summary(self, findings: List[ModuleFinding], total_files: int) -> Dict[str, Any]:
"""Create analysis summary"""
severity_counts = {"critical": 0, "high": 0, "medium": 0, "low": 0, "info": 0}
category_counts = {}
check_counts = {}
for finding in findings:
# Count by severity
severity_counts[finding.severity] += 1
# Count by category
category = finding.category
category_counts[category] = category_counts.get(category, 0) + 1
# Count by check
check_id = finding.metadata.get("check_id", "unknown")
check_counts[check_id] = check_counts.get(check_id, 0) + 1
return {
"total_findings": len(findings),
"files_scanned": total_files,
"severity_counts": severity_counts,
"category_counts": category_counts,
"top_checks": dict(sorted(check_counts.items(), key=lambda x: x[1], reverse=True)[:10]),
"files_with_issues": len(set(f.file_path for f in findings if f.file_path))
}

View File

@@ -1,406 +0,0 @@
"""
Hadolint Infrastructure Security Module
This module uses Hadolint to scan Dockerfiles for security best practices
and potential vulnerabilities.
"""
# Copyright (c) 2025 FuzzingLabs
#
# Licensed under the Business Source License 1.1 (BSL). See the LICENSE file
# at the root of this repository for details.
#
# After the Change Date (four years from publication), this version of the
# Licensed Work will be made available under the Apache License, Version 2.0.
# See the LICENSE-APACHE file or http://www.apache.org/licenses/LICENSE-2.0
#
# Additional attribution and requirements are provided in the NOTICE file.
import asyncio
import json
from pathlib import Path
from typing import Dict, Any, List
import subprocess
import logging
from ..base import BaseModule, ModuleMetadata, ModuleFinding, ModuleResult
from . import register_module
logger = logging.getLogger(__name__)
@register_module
class HadolintModule(BaseModule):
"""Hadolint Dockerfile security scanning module"""
def get_metadata(self) -> ModuleMetadata:
"""Get module metadata"""
return ModuleMetadata(
name="hadolint",
version="2.12.0",
description="Dockerfile security linting and best practices validation",
author="FuzzForge Team",
category="infrastructure",
tags=["dockerfile", "docker", "security", "best-practices", "linting"],
input_schema={
"type": "object",
"properties": {
"severity": {
"type": "array",
"items": {"type": "string", "enum": ["error", "warning", "info", "style"]},
"default": ["error", "warning", "info", "style"],
"description": "Minimum severity levels to report"
},
"ignored_rules": {
"type": "array",
"items": {"type": "string"},
"description": "Hadolint rules to ignore"
},
"trusted_registries": {
"type": "array",
"items": {"type": "string"},
"description": "List of trusted Docker registries"
},
"allowed_maintainers": {
"type": "array",
"items": {"type": "string"},
"description": "List of allowed maintainer emails"
},
"dockerfile_patterns": {
"type": "array",
"items": {"type": "string"},
"default": ["**/Dockerfile", "**/*.dockerfile", "**/Containerfile"],
"description": "Patterns to find Dockerfile-like files"
},
"strict": {
"type": "boolean",
"default": False,
"description": "Enable strict mode (fail on any issue)"
},
"no_fail": {
"type": "boolean",
"default": True,
"description": "Don't fail on lint errors (useful for reporting)"
}
}
},
output_schema={
"type": "object",
"properties": {
"findings": {
"type": "array",
"items": {
"type": "object",
"properties": {
"rule": {"type": "string"},
"severity": {"type": "string"},
"message": {"type": "string"},
"file_path": {"type": "string"},
"line": {"type": "integer"}
}
}
}
}
}
)
def validate_config(self, config: Dict[str, Any]) -> bool:
"""Validate configuration"""
severity_levels = config.get("severity", ["error", "warning", "info", "style"])
valid_severities = ["error", "warning", "info", "style"]
for severity in severity_levels:
if severity not in valid_severities:
raise ValueError(f"Invalid severity level: {severity}. Valid: {valid_severities}")
return True
async def execute(self, config: Dict[str, Any], workspace: Path) -> ModuleResult:
"""Execute Hadolint Dockerfile security scanning"""
self.start_timer()
try:
# Validate inputs
self.validate_config(config)
self.validate_workspace(workspace)
logger.info(f"Running Hadolint Dockerfile scan on {workspace}")
# Find all Dockerfiles
dockerfiles = self._find_dockerfiles(workspace, config)
if not dockerfiles:
logger.info("No Dockerfiles found for Hadolint analysis")
return self.create_result(
findings=[],
status="success",
summary={"total_findings": 0, "files_scanned": 0}
)
logger.info(f"Found {len(dockerfiles)} Dockerfile(s) to analyze")
# Process each Dockerfile
all_findings = []
for dockerfile in dockerfiles:
findings = await self._scan_dockerfile(dockerfile, workspace, config)
all_findings.extend(findings)
# Create summary
summary = self._create_summary(all_findings, len(dockerfiles))
logger.info(f"Hadolint found {len(all_findings)} issues across {len(dockerfiles)} Dockerfiles")
return self.create_result(
findings=all_findings,
status="success",
summary=summary
)
except Exception as e:
logger.error(f"Hadolint module failed: {e}")
return self.create_result(
findings=[],
status="failed",
error=str(e)
)
def _find_dockerfiles(self, workspace: Path, config: Dict[str, Any]) -> List[Path]:
"""Find Dockerfile-like files in workspace"""
patterns = config.get("dockerfile_patterns", [
"**/Dockerfile", "**/*.dockerfile", "**/Containerfile"
])
# Debug logging
logger.info(f"Hadolint searching in workspace: {workspace}")
logger.info(f"Workspace exists: {workspace.exists()}")
if workspace.exists():
all_files = list(workspace.rglob("*"))
logger.info(f"All files in workspace: {all_files}")
dockerfiles = []
for pattern in patterns:
matches = list(workspace.glob(pattern))
logger.info(f"Pattern '{pattern}' found: {matches}")
dockerfiles.extend(matches)
logger.info(f"Final dockerfiles list: {dockerfiles}")
return list(set(dockerfiles)) # Remove duplicates
async def _scan_dockerfile(self, dockerfile: Path, workspace: Path, config: Dict[str, Any]) -> List[ModuleFinding]:
"""Scan a single Dockerfile with Hadolint"""
findings = []
try:
# Build hadolint command
cmd = ["hadolint", "--format", "json"]
# Add severity levels
severity_levels = config.get("severity", ["error", "warning", "info", "style"])
if "error" not in severity_levels:
cmd.append("--no-error")
if "warning" not in severity_levels:
cmd.append("--no-warning")
if "info" not in severity_levels:
cmd.append("--no-info")
if "style" not in severity_levels:
cmd.append("--no-style")
# Add ignored rules
ignored_rules = config.get("ignored_rules", [])
for rule in ignored_rules:
cmd.extend(["--ignore", rule])
# Add trusted registries
trusted_registries = config.get("trusted_registries", [])
for registry in trusted_registries:
cmd.extend(["--trusted-registry", registry])
# Add strict mode
if config.get("strict", False):
cmd.append("--strict-labels")
# Add the dockerfile
cmd.append(str(dockerfile))
logger.debug(f"Running command: {' '.join(cmd)}")
# Run hadolint
process = await asyncio.create_subprocess_exec(
*cmd,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
cwd=workspace
)
stdout, stderr = await process.communicate()
# Parse results
if process.returncode == 0 or config.get("no_fail", True):
findings = self._parse_hadolint_output(
stdout.decode(), dockerfile, workspace
)
else:
error_msg = stderr.decode()
logger.warning(f"Hadolint failed for {dockerfile}: {error_msg}")
# Continue with other files even if one fails
except Exception as e:
logger.warning(f"Error scanning {dockerfile}: {e}")
return findings
def _parse_hadolint_output(self, output: str, dockerfile: Path, workspace: Path) -> List[ModuleFinding]:
"""Parse Hadolint JSON output into findings"""
findings = []
if not output.strip():
return findings
try:
# Hadolint outputs JSON array
issues = json.loads(output)
for issue in issues:
# Extract information
rule = issue.get("code", "unknown")
message = issue.get("message", "")
level = issue.get("level", "warning").lower()
line = issue.get("line", 0)
column = issue.get("column", 0)
# Make file path relative to workspace
try:
rel_path = dockerfile.relative_to(workspace)
file_path = str(rel_path)
except ValueError:
file_path = str(dockerfile)
# Map Hadolint level to our severity
severity = self._map_severity(level)
# Get category based on rule
category = self._get_category(rule, message)
# Create finding
finding = self.create_finding(
title=f"Dockerfile issue: {rule}",
description=message or f"Hadolint rule {rule} violation",
severity=severity,
category=category,
file_path=file_path,
line_start=line if line > 0 else None,
recommendation=self._get_recommendation(rule, message),
metadata={
"rule": rule,
"hadolint_level": level,
"column": column,
"file": str(dockerfile)
}
)
findings.append(finding)
except json.JSONDecodeError as e:
logger.warning(f"Failed to parse Hadolint output: {e}")
except Exception as e:
logger.warning(f"Error processing Hadolint results: {e}")
return findings
def _map_severity(self, hadolint_level: str) -> str:
"""Map Hadolint severity to our standard severity levels"""
severity_map = {
"error": "high",
"warning": "medium",
"info": "low",
"style": "info"
}
return severity_map.get(hadolint_level.lower(), "medium")
def _get_category(self, rule: str, message: str) -> str:
"""Determine finding category based on rule and message"""
rule_lower = rule.lower()
message_lower = message.lower()
# Security-related categories
if any(term in rule_lower for term in ["dl3", "dl4"]):
if "user" in message_lower or "root" in message_lower:
return "privilege_escalation"
elif "secret" in message_lower or "password" in message_lower:
return "secrets_management"
elif "version" in message_lower or "pin" in message_lower:
return "dependency_management"
elif "add" in message_lower or "copy" in message_lower:
return "file_operations"
else:
return "security_best_practices"
elif any(term in rule_lower for term in ["dl1", "dl2"]):
return "syntax_errors"
elif "3001" in rule or "3002" in rule:
return "user_management"
elif "3008" in rule or "3009" in rule:
return "privilege_escalation"
elif "3014" in rule or "3015" in rule:
return "port_management"
elif "3020" in rule or "3021" in rule:
return "copy_operations"
else:
return "dockerfile_best_practices"
def _get_recommendation(self, rule: str, message: str) -> str:
"""Generate recommendation based on Hadolint rule"""
recommendations = {
# Security-focused recommendations
"DL3002": "Create a non-root user and switch to it before running the application.",
"DL3008": "Pin package versions to ensure reproducible builds and avoid supply chain attacks.",
"DL3009": "Clean up package manager cache after installation to reduce image size and attack surface.",
"DL3020": "Use COPY instead of ADD for local files to avoid unexpected behavior.",
"DL3025": "Use JSON format for CMD and ENTRYPOINT to avoid shell injection vulnerabilities.",
"DL3059": "Use multi-stage builds to reduce final image size and attack surface.",
"DL4001": "Don't use sudo in Dockerfiles as it's unnecessary and can introduce vulnerabilities.",
"DL4003": "Use a package manager instead of downloading and installing manually.",
"DL4004": "Don't use SSH in Dockerfiles as it's a security risk.",
"DL4005": "Use SHELL instruction to specify shell for RUN commands instead of hardcoding paths.",
}
if rule in recommendations:
return recommendations[rule]
# Generic recommendations based on patterns
message_lower = message.lower()
if "user" in message_lower and "root" in message_lower:
return "Avoid running containers as root user. Create and use a non-privileged user."
elif "version" in message_lower or "pin" in message_lower:
return "Pin package versions to specific versions to ensure reproducible builds."
elif "cache" in message_lower or "clean" in message_lower:
return "Clean up package manager caches to reduce image size and potential security issues."
elif "secret" in message_lower or "password" in message_lower:
return "Don't include secrets in Dockerfiles. Use build arguments or runtime secrets instead."
else:
return f"Follow Dockerfile best practices to address rule {rule}."
def _create_summary(self, findings: List[ModuleFinding], total_files: int) -> Dict[str, Any]:
"""Create analysis summary"""
severity_counts = {"high": 0, "medium": 0, "low": 0, "info": 0}
category_counts = {}
rule_counts = {}
for finding in findings:
# Count by severity
severity_counts[finding.severity] += 1
# Count by category
category = finding.category
category_counts[category] = category_counts.get(category, 0) + 1
# Count by rule
rule = finding.metadata.get("rule", "unknown")
rule_counts[rule] = rule_counts.get(rule, 0) + 1
return {
"total_findings": len(findings),
"files_scanned": total_files,
"severity_counts": severity_counts,
"category_counts": category_counts,
"top_rules": dict(sorted(rule_counts.items(), key=lambda x: x[1], reverse=True)[:10]),
"files_with_issues": len(set(f.file_path for f in findings if f.file_path))
}

View File

@@ -1,447 +0,0 @@
"""
Kubesec Infrastructure Security Module
This module uses Kubesec to scan Kubernetes manifests for security
misconfigurations and best practices violations.
"""
# Copyright (c) 2025 FuzzingLabs
#
# Licensed under the Business Source License 1.1 (BSL). See the LICENSE file
# at the root of this repository for details.
#
# After the Change Date (four years from publication), this version of the
# Licensed Work will be made available under the Apache License, Version 2.0.
# See the LICENSE-APACHE file or http://www.apache.org/licenses/LICENSE-2.0
#
# Additional attribution and requirements are provided in the NOTICE file.
import asyncio
import json
from pathlib import Path
from typing import Dict, Any, List
import subprocess
import logging
from ..base import BaseModule, ModuleMetadata, ModuleFinding, ModuleResult
from . import register_module
logger = logging.getLogger(__name__)
@register_module
class KubesecModule(BaseModule):
"""Kubesec Kubernetes security scanning module"""
def get_metadata(self) -> ModuleMetadata:
"""Get module metadata"""
return ModuleMetadata(
name="kubesec",
version="2.14.0",
description="Kubernetes security scanning for YAML/JSON manifests with security best practices validation",
author="FuzzForge Team",
category="infrastructure",
tags=["kubernetes", "k8s", "security", "best-practices", "manifests"],
input_schema={
"type": "object",
"properties": {
"scan_mode": {
"type": "string",
"enum": ["scan", "http"],
"default": "scan",
"description": "Kubesec scan mode (local scan or HTTP API)"
},
"threshold": {
"type": "integer",
"default": 15,
"description": "Minimum security score threshold"
},
"exit_code": {
"type": "integer",
"default": 0,
"description": "Exit code to return on failure"
},
"format": {
"type": "string",
"enum": ["json", "template"],
"default": "json",
"description": "Output format"
},
"kubernetes_patterns": {
"type": "array",
"items": {"type": "string"},
"default": ["**/*.yaml", "**/*.yml", "**/k8s/*.yaml", "**/kubernetes/*.yaml"],
"description": "Patterns to find Kubernetes manifest files"
},
"exclude_patterns": {
"type": "array",
"items": {"type": "string"},
"description": "Patterns to exclude from scanning"
},
"strict": {
"type": "boolean",
"default": False,
"description": "Enable strict mode (fail on any security issue)"
}
}
},
output_schema={
"type": "object",
"properties": {
"findings": {
"type": "array",
"items": {
"type": "object",
"properties": {
"score": {"type": "integer"},
"security_issues": {"type": "array"},
"file_path": {"type": "string"},
"manifest_kind": {"type": "string"}
}
}
}
}
}
)
def validate_config(self, config: Dict[str, Any]) -> bool:
"""Validate configuration"""
scan_mode = config.get("scan_mode", "scan")
if scan_mode not in ["scan", "http"]:
raise ValueError(f"Invalid scan mode: {scan_mode}. Valid: ['scan', 'http']")
threshold = config.get("threshold", 0)
if not isinstance(threshold, int):
raise ValueError(f"Threshold must be an integer, got: {type(threshold)}")
return True
async def execute(self, config: Dict[str, Any], workspace: Path) -> ModuleResult:
"""Execute Kubesec Kubernetes security scanning"""
self.start_timer()
try:
# Validate inputs
self.validate_config(config)
self.validate_workspace(workspace)
logger.info(f"Running Kubesec Kubernetes scan on {workspace}")
# Find all Kubernetes manifests
k8s_files = self._find_kubernetes_files(workspace, config)
if not k8s_files:
logger.info("No Kubernetes manifest files found")
return self.create_result(
findings=[],
status="success",
summary={"total_findings": 0, "files_scanned": 0}
)
logger.info(f"Found {len(k8s_files)} Kubernetes manifest file(s) to analyze")
# Process each manifest file
all_findings = []
for k8s_file in k8s_files:
findings = await self._scan_manifest(k8s_file, workspace, config)
all_findings.extend(findings)
# Create summary
summary = self._create_summary(all_findings, len(k8s_files))
logger.info(f"Kubesec found {len(all_findings)} security issues across {len(k8s_files)} manifests")
return self.create_result(
findings=all_findings,
status="success",
summary=summary
)
except Exception as e:
logger.error(f"Kubesec module failed: {e}")
return self.create_result(
findings=[],
status="failed",
error=str(e)
)
def _find_kubernetes_files(self, workspace: Path, config: Dict[str, Any]) -> List[Path]:
"""Find Kubernetes manifest files in workspace"""
patterns = config.get("kubernetes_patterns", [
"**/*.yaml", "**/*.yml", "**/k8s/*.yaml", "**/kubernetes/*.yaml"
])
exclude_patterns = config.get("exclude_patterns", [])
k8s_files = []
for pattern in patterns:
files = workspace.glob(pattern)
for file in files:
# Check if file contains Kubernetes resources
if self._is_kubernetes_manifest(file):
# Check if file should be excluded
should_exclude = False
for exclude_pattern in exclude_patterns:
if file.match(exclude_pattern):
should_exclude = True
break
if not should_exclude:
k8s_files.append(file)
return list(set(k8s_files)) # Remove duplicates
def _is_kubernetes_manifest(self, file: Path) -> bool:
"""Check if a file is a Kubernetes manifest"""
try:
content = file.read_text(encoding='utf-8')
# Simple heuristic: check for common Kubernetes fields
k8s_indicators = [
"apiVersion:", "kind:", "metadata:", "spec:",
"Deployment", "Service", "Pod", "ConfigMap",
"Secret", "Ingress", "PersistentVolume"
]
return any(indicator in content for indicator in k8s_indicators)
except Exception:
return False
async def _scan_manifest(self, manifest_file: Path, workspace: Path, config: Dict[str, Any]) -> List[ModuleFinding]:
"""Scan a single Kubernetes manifest with Kubesec"""
findings = []
try:
# Build kubesec command
cmd = ["kubesec", "scan"]
# Add format
format_type = config.get("format", "json")
if format_type == "json":
cmd.append("-f")
cmd.append("json")
# Add the manifest file
cmd.append(str(manifest_file))
logger.debug(f"Running command: {' '.join(cmd)}")
# Run kubesec
process = await asyncio.create_subprocess_exec(
*cmd,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
cwd=workspace
)
stdout, stderr = await process.communicate()
# Parse results
if process.returncode == 0:
findings = self._parse_kubesec_output(
stdout.decode(), manifest_file, workspace, config
)
else:
error_msg = stderr.decode()
logger.warning(f"Kubesec failed for {manifest_file}: {error_msg}")
except Exception as e:
logger.warning(f"Error scanning {manifest_file}: {e}")
return findings
def _parse_kubesec_output(self, output: str, manifest_file: Path, workspace: Path, config: Dict[str, Any]) -> List[ModuleFinding]:
"""Parse Kubesec JSON output into findings"""
findings = []
if not output.strip():
return findings
try:
# Kubesec outputs JSON array
results = json.loads(output)
if not isinstance(results, list):
results = [results]
threshold = config.get("threshold", 0)
for result in results:
score = result.get("score", 0)
object_name = result.get("object", "Unknown")
valid = result.get("valid", True)
message = result.get("message", "")
# Make file path relative to workspace
try:
rel_path = manifest_file.relative_to(workspace)
file_path = str(rel_path)
except ValueError:
file_path = str(manifest_file)
# Process scoring and advise sections
advise = result.get("advise", [])
scoring = result.get("scoring", {})
# Create findings for low scores
if score < threshold or not valid:
severity = "high" if score < 0 else "medium" if score < 5 else "low"
finding = self.create_finding(
title=f"Kubernetes Security Score Low: {object_name}",
description=message or f"Security score {score} below threshold {threshold}",
severity=severity,
category="kubernetes_security",
file_path=file_path,
recommendation=self._get_score_recommendation(score, advise),
metadata={
"score": score,
"threshold": threshold,
"object": object_name,
"valid": valid,
"advise_count": len(advise),
"scoring_details": scoring
}
)
findings.append(finding)
# Create findings for each advisory
for advisory in advise:
selector = advisory.get("selector", "")
reason = advisory.get("reason", "")
href = advisory.get("href", "")
# Determine severity based on advisory type
severity = self._get_advisory_severity(reason, selector)
category = self._get_advisory_category(reason, selector)
finding = self.create_finding(
title=f"Kubernetes Security Advisory: {selector}",
description=reason,
severity=severity,
category=category,
file_path=file_path,
recommendation=self._get_advisory_recommendation(reason, href),
metadata={
"selector": selector,
"href": href,
"object": object_name,
"advisory_type": "kubesec_advise"
}
)
findings.append(finding)
except json.JSONDecodeError as e:
logger.warning(f"Failed to parse Kubesec output: {e}")
except Exception as e:
logger.warning(f"Error processing Kubesec results: {e}")
return findings
def _get_advisory_severity(self, reason: str, selector: str) -> str:
"""Determine severity based on advisory reason and selector"""
reason_lower = reason.lower()
selector_lower = selector.lower()
# High severity issues
if any(term in reason_lower for term in [
"privileged", "root", "hostnetwork", "hostpid", "hostipc",
"allowprivilegeescalation", "runasroot", "security", "capabilities"
]):
return "high"
# Medium severity issues
elif any(term in reason_lower for term in [
"resources", "limits", "requests", "readonly", "securitycontext"
]):
return "medium"
# Low severity issues
elif any(term in reason_lower for term in [
"labels", "annotations", "probe", "liveness", "readiness"
]):
return "low"
else:
return "medium"
def _get_advisory_category(self, reason: str, selector: str) -> str:
"""Determine category based on advisory"""
reason_lower = reason.lower()
if any(term in reason_lower for term in ["privilege", "root", "security", "capabilities"]):
return "privilege_escalation"
elif any(term in reason_lower for term in ["network", "host"]):
return "network_security"
elif any(term in reason_lower for term in ["resources", "limits"]):
return "resource_management"
elif any(term in reason_lower for term in ["probe", "health"]):
return "health_monitoring"
else:
return "kubernetes_best_practices"
def _get_score_recommendation(self, score: int, advise: List[Dict]) -> str:
"""Generate recommendation based on score and advisories"""
if score < 0:
return "Critical security issues detected. Address all security advisories immediately."
elif score < 5:
return "Low security score detected. Review and implement security best practices."
elif len(advise) > 0:
return f"Security score is {score}. Review {len(advise)} advisory recommendations for improvement."
else:
return "Review Kubernetes security configuration and apply security hardening measures."
def _get_advisory_recommendation(self, reason: str, href: str) -> str:
"""Generate recommendation for advisory"""
if href:
return f"{reason} For more details, see: {href}"
reason_lower = reason.lower()
# Specific recommendations based on common patterns
if "privileged" in reason_lower:
return "Remove privileged: true from security context. Run containers with minimal privileges."
elif "root" in reason_lower or "runasroot" in reason_lower:
return "Configure runAsNonRoot: true and set runAsUser to a non-root user ID."
elif "allowprivilegeescalation" in reason_lower:
return "Set allowPrivilegeEscalation: false to prevent privilege escalation."
elif "resources" in reason_lower:
return "Define resource requests and limits to prevent resource exhaustion."
elif "readonly" in reason_lower:
return "Set readOnlyRootFilesystem: true to prevent filesystem modifications."
elif "capabilities" in reason_lower:
return "Drop unnecessary capabilities and add only required ones."
elif "probe" in reason_lower:
return "Add liveness and readiness probes for better health monitoring."
else:
return f"Address the security concern: {reason}"
def _create_summary(self, findings: List[ModuleFinding], total_files: int) -> Dict[str, Any]:
"""Create analysis summary"""
severity_counts = {"critical": 0, "high": 0, "medium": 0, "low": 0, "info": 0}
category_counts = {}
object_counts = {}
scores = []
for finding in findings:
# Count by severity
severity_counts[finding.severity] += 1
# Count by category
category = finding.category
category_counts[category] = category_counts.get(category, 0) + 1
# Count by object
obj = finding.metadata.get("object", "unknown")
object_counts[obj] = object_counts.get(obj, 0) + 1
# Collect scores
score = finding.metadata.get("score")
if score is not None:
scores.append(score)
return {
"total_findings": len(findings),
"files_scanned": total_files,
"severity_counts": severity_counts,
"category_counts": category_counts,
"object_counts": object_counts,
"average_score": sum(scores) / len(scores) if scores else 0,
"min_score": min(scores) if scores else 0,
"max_score": max(scores) if scores else 0,
"files_with_issues": len(set(f.file_path for f in findings if f.file_path))
}

View File

@@ -1,519 +0,0 @@
"""
Polaris Infrastructure Security Module
This module uses Polaris to validate Kubernetes resources against security
and best practice policies.
"""
# Copyright (c) 2025 FuzzingLabs
#
# Licensed under the Business Source License 1.1 (BSL). See the LICENSE file
# at the root of this repository for details.
#
# After the Change Date (four years from publication), this version of the
# Licensed Work will be made available under the Apache License, Version 2.0.
# See the LICENSE-APACHE file or http://www.apache.org/licenses/LICENSE-2.0
#
# Additional attribution and requirements are provided in the NOTICE file.
import asyncio
import json
from pathlib import Path
from typing import Dict, Any, List
import subprocess
import logging
from ..base import BaseModule, ModuleMetadata, ModuleFinding, ModuleResult
from . import register_module
logger = logging.getLogger(__name__)
@register_module
class PolarisModule(BaseModule):
"""Polaris Kubernetes best practices validation module"""
def get_metadata(self) -> ModuleMetadata:
"""Get module metadata"""
return ModuleMetadata(
name="polaris",
version="8.5.0",
description="Kubernetes best practices validation and policy enforcement using Polaris",
author="FuzzForge Team",
category="infrastructure",
tags=["kubernetes", "k8s", "policy", "best-practices", "validation"],
input_schema={
"type": "object",
"properties": {
"audit_path": {
"type": "string",
"description": "Path to audit (defaults to workspace)"
},
"config_file": {
"type": "string",
"description": "Path to Polaris config file"
},
"only_show_failed_tests": {
"type": "boolean",
"default": True,
"description": "Show only failed validation tests"
},
"severity_threshold": {
"type": "string",
"enum": ["error", "warning", "info"],
"default": "info",
"description": "Minimum severity level to report"
},
"format": {
"type": "string",
"enum": ["json", "yaml", "pretty"],
"default": "json",
"description": "Output format"
},
"kubernetes_patterns": {
"type": "array",
"items": {"type": "string"},
"default": ["**/*.yaml", "**/*.yml", "**/k8s/*.yaml", "**/kubernetes/*.yaml"],
"description": "Patterns to find Kubernetes manifest files"
},
"exclude_patterns": {
"type": "array",
"items": {"type": "string"},
"description": "File patterns to exclude"
},
"disable_checks": {
"type": "array",
"items": {"type": "string"},
"description": "List of check names to disable"
},
"enable_checks": {
"type": "array",
"items": {"type": "string"},
"description": "List of check names to enable (if using custom config)"
}
}
},
output_schema={
"type": "object",
"properties": {
"findings": {
"type": "array",
"items": {
"type": "object",
"properties": {
"check_name": {"type": "string"},
"severity": {"type": "string"},
"category": {"type": "string"},
"file_path": {"type": "string"},
"resource_name": {"type": "string"}
}
}
}
}
}
)
def validate_config(self, config: Dict[str, Any]) -> bool:
"""Validate configuration"""
severity_threshold = config.get("severity_threshold", "warning")
valid_severities = ["error", "warning", "info"]
if severity_threshold not in valid_severities:
raise ValueError(f"Invalid severity threshold: {severity_threshold}. Valid: {valid_severities}")
format_type = config.get("format", "json")
valid_formats = ["json", "yaml", "pretty"]
if format_type not in valid_formats:
raise ValueError(f"Invalid format: {format_type}. Valid: {valid_formats}")
return True
async def execute(self, config: Dict[str, Any], workspace: Path) -> ModuleResult:
"""Execute Polaris Kubernetes validation"""
self.start_timer()
try:
# Validate inputs
self.validate_config(config)
self.validate_workspace(workspace)
logger.info(f"Running Polaris Kubernetes validation on {workspace}")
# Find all Kubernetes manifests
k8s_files = self._find_kubernetes_files(workspace, config)
if not k8s_files:
logger.info("No Kubernetes manifest files found")
return self.create_result(
findings=[],
status="success",
summary={"total_findings": 0, "files_scanned": 0}
)
logger.info(f"Found {len(k8s_files)} Kubernetes manifest file(s) to validate")
# Run Polaris audit
findings = await self._run_polaris_audit(workspace, config, k8s_files)
# Create summary
summary = self._create_summary(findings, len(k8s_files))
logger.info(f"Polaris found {len(findings)} policy violations across {len(k8s_files)} manifests")
return self.create_result(
findings=findings,
status="success",
summary=summary
)
except Exception as e:
logger.error(f"Polaris module failed: {e}")
return self.create_result(
findings=[],
status="failed",
error=str(e)
)
def _find_kubernetes_files(self, workspace: Path, config: Dict[str, Any]) -> List[Path]:
"""Find Kubernetes manifest files in workspace"""
patterns = config.get("kubernetes_patterns", [
"**/*.yaml", "**/*.yml", "**/k8s/*.yaml", "**/kubernetes/*.yaml"
])
exclude_patterns = config.get("exclude_patterns", [])
k8s_files = []
for pattern in patterns:
files = workspace.glob(pattern)
for file in files:
# Check if file contains Kubernetes resources
if self._is_kubernetes_manifest(file):
# Check if file should be excluded
should_exclude = False
for exclude_pattern in exclude_patterns:
if file.match(exclude_pattern):
should_exclude = True
break
if not should_exclude:
k8s_files.append(file)
return list(set(k8s_files)) # Remove duplicates
def _is_kubernetes_manifest(self, file: Path) -> bool:
"""Check if a file is a Kubernetes manifest"""
try:
content = file.read_text(encoding='utf-8')
# Simple heuristic: check for common Kubernetes fields
k8s_indicators = [
"apiVersion:", "kind:", "metadata:", "spec:",
"Deployment", "Service", "Pod", "ConfigMap",
"Secret", "Ingress", "PersistentVolume"
]
return any(indicator in content for indicator in k8s_indicators)
except Exception:
return False
async def _run_polaris_audit(self, workspace: Path, config: Dict[str, Any], k8s_files: List[Path]) -> List[ModuleFinding]:
"""Run Polaris audit on workspace"""
findings = []
try:
# Build polaris command
cmd = ["polaris", "audit"]
# Add audit path
audit_path = config.get("audit_path", str(workspace))
cmd.extend(["--audit-path", audit_path])
# Add config file if specified
config_file = config.get("config_file")
if config_file:
cmd.extend(["--config", config_file])
# Add format
format_type = config.get("format", "json")
cmd.extend(["--format", format_type])
# Add only failed tests flag
if config.get("only_show_failed_tests", True):
cmd.append("--only-show-failed-tests")
# Add severity threshold
severity_threshold = config.get("severity_threshold", "warning")
cmd.extend(["--severity", severity_threshold])
# Add disable checks
disable_checks = config.get("disable_checks", [])
for check in disable_checks:
cmd.extend(["--disable-check", check])
logger.debug(f"Running command: {' '.join(cmd)}")
# Run polaris
process = await asyncio.create_subprocess_exec(
*cmd,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
cwd=workspace
)
stdout, stderr = await process.communicate()
# Parse results
if process.returncode == 0 or format_type == "json":
findings = self._parse_polaris_output(stdout.decode(), workspace, config)
else:
error_msg = stderr.decode()
logger.warning(f"Polaris audit failed: {error_msg}")
except Exception as e:
logger.warning(f"Error running Polaris audit: {e}")
return findings
def _parse_polaris_output(self, output: str, workspace: Path, config: Dict[str, Any]) -> List[ModuleFinding]:
"""Parse Polaris JSON output into findings"""
findings = []
if not output.strip():
return findings
try:
data = json.loads(output)
# Get severity threshold for filtering
severity_threshold = config.get("severity_threshold", "warning")
severity_levels = {"error": 3, "warning": 2, "info": 1}
min_severity_level = severity_levels.get(severity_threshold, 2)
# Process audit results
audit_results = data.get("AuditResults", [])
for result in audit_results:
namespace = result.get("Namespace", "default")
results_by_kind = result.get("Results", {})
for kind, kind_results in results_by_kind.items():
for resource_name, resource_data in kind_results.items():
# Get container results
container_results = resource_data.get("ContainerResults", {})
pod_result = resource_data.get("PodResult", {})
# Process container results
for container_name, container_data in container_results.items():
self._process_container_results(
findings, container_data, kind, resource_name,
container_name, namespace, workspace, min_severity_level
)
# Process pod-level results
if pod_result:
self._process_pod_results(
findings, pod_result, kind, resource_name,
namespace, workspace, min_severity_level
)
except json.JSONDecodeError as e:
logger.warning(f"Failed to parse Polaris output: {e}")
except Exception as e:
logger.warning(f"Error processing Polaris results: {e}")
return findings
def _process_container_results(self, findings: List[ModuleFinding], container_data: Dict,
kind: str, resource_name: str, container_name: str,
namespace: str, workspace: Path, min_severity_level: int):
"""Process container-level validation results"""
results = container_data.get("Results", {})
for check_name, check_result in results.items():
severity = check_result.get("Severity", "warning")
success = check_result.get("Success", True)
message = check_result.get("Message", "")
category_name = check_result.get("Category", "")
# Skip if check passed or severity too low
if success:
continue
severity_levels = {"error": 3, "warning": 2, "info": 1}
if severity_levels.get(severity, 1) < min_severity_level:
continue
# Map severity to our standard levels
finding_severity = self._map_severity(severity)
category = self._get_category(check_name, category_name)
finding = self.create_finding(
title=f"Polaris Policy Violation: {check_name}",
description=message or f"Container {container_name} in {kind} {resource_name} failed check {check_name}",
severity=finding_severity,
category=category,
file_path=None, # Polaris doesn't provide file paths in audit mode
recommendation=self._get_recommendation(check_name, message),
metadata={
"check_name": check_name,
"polaris_severity": severity,
"polaris_category": category_name,
"resource_kind": kind,
"resource_name": resource_name,
"container_name": container_name,
"namespace": namespace,
"context": "container"
}
)
findings.append(finding)
def _process_pod_results(self, findings: List[ModuleFinding], pod_result: Dict,
kind: str, resource_name: str, namespace: str,
workspace: Path, min_severity_level: int):
"""Process pod-level validation results"""
results = pod_result.get("Results", {})
for check_name, check_result in results.items():
severity = check_result.get("Severity", "warning")
success = check_result.get("Success", True)
message = check_result.get("Message", "")
category_name = check_result.get("Category", "")
# Skip if check passed or severity too low
if success:
continue
severity_levels = {"error": 3, "warning": 2, "info": 1}
if severity_levels.get(severity, 1) < min_severity_level:
continue
# Map severity to our standard levels
finding_severity = self._map_severity(severity)
category = self._get_category(check_name, category_name)
finding = self.create_finding(
title=f"Polaris Policy Violation: {check_name}",
description=message or f"{kind} {resource_name} failed check {check_name}",
severity=finding_severity,
category=category,
file_path=None, # Polaris doesn't provide file paths in audit mode
recommendation=self._get_recommendation(check_name, message),
metadata={
"check_name": check_name,
"polaris_severity": severity,
"polaris_category": category_name,
"resource_kind": kind,
"resource_name": resource_name,
"namespace": namespace,
"context": "pod"
}
)
findings.append(finding)
def _map_severity(self, polaris_severity: str) -> str:
"""Map Polaris severity to our standard severity levels"""
severity_map = {
"error": "high",
"warning": "medium",
"info": "low"
}
return severity_map.get(polaris_severity.lower(), "medium")
def _get_category(self, check_name: str, category_name: str) -> str:
"""Determine finding category based on check name and category"""
check_lower = check_name.lower()
category_lower = category_name.lower()
# Use Polaris category if available
if "security" in category_lower:
return "security_configuration"
elif "efficiency" in category_lower:
return "resource_efficiency"
elif "reliability" in category_lower:
return "reliability"
# Fallback to check name analysis
if any(term in check_lower for term in ["security", "privilege", "root", "capabilities"]):
return "security_configuration"
elif any(term in check_lower for term in ["resources", "limits", "requests"]):
return "resource_management"
elif any(term in check_lower for term in ["probe", "health", "liveness", "readiness"]):
return "health_monitoring"
elif any(term in check_lower for term in ["image", "tag", "pull"]):
return "image_management"
elif any(term in check_lower for term in ["network", "host"]):
return "network_security"
else:
return "kubernetes_best_practices"
def _get_recommendation(self, check_name: str, message: str) -> str:
"""Generate recommendation based on check name and message"""
check_lower = check_name.lower()
# Security-related recommendations
if "privileged" in check_lower:
return "Remove privileged: true from container security context to reduce security risks."
elif "runasroot" in check_lower:
return "Configure runAsNonRoot: true and specify a non-root user ID."
elif "allowprivilegeescalation" in check_lower:
return "Set allowPrivilegeEscalation: false to prevent privilege escalation attacks."
elif "capabilities" in check_lower:
return "Remove unnecessary capabilities and add only required ones using drop/add lists."
elif "readonly" in check_lower:
return "Set readOnlyRootFilesystem: true to prevent filesystem modifications."
# Resource management recommendations
elif "memory" in check_lower and "requests" in check_lower:
return "Set memory requests to ensure proper resource allocation and scheduling."
elif "memory" in check_lower and "limits" in check_lower:
return "Set memory limits to prevent containers from using excessive memory."
elif "cpu" in check_lower and "requests" in check_lower:
return "Set CPU requests for proper resource allocation and quality of service."
elif "cpu" in check_lower and "limits" in check_lower:
return "Set CPU limits to prevent CPU starvation of other containers."
# Health monitoring recommendations
elif "liveness" in check_lower:
return "Add liveness probes to detect and recover from container failures."
elif "readiness" in check_lower:
return "Add readiness probes to ensure containers are ready before receiving traffic."
# Image management recommendations
elif "tag" in check_lower:
return "Use specific image tags instead of 'latest' for reproducible deployments."
elif "pullpolicy" in check_lower:
return "Set imagePullPolicy appropriately based on your deployment requirements."
# Generic recommendation
elif message:
return f"Address the policy violation: {message}"
else:
return f"Review and fix the configuration issue identified by check: {check_name}"
def _create_summary(self, findings: List[ModuleFinding], total_files: int) -> Dict[str, Any]:
"""Create analysis summary"""
severity_counts = {"critical": 0, "high": 0, "medium": 0, "low": 0, "info": 0}
category_counts = {}
check_counts = {}
resource_counts = {}
for finding in findings:
# Count by severity
severity_counts[finding.severity] += 1
# Count by category
category = finding.category
category_counts[category] = category_counts.get(category, 0) + 1
# Count by check
check_name = finding.metadata.get("check_name", "unknown")
check_counts[check_name] = check_counts.get(check_name, 0) + 1
# Count by resource
resource_kind = finding.metadata.get("resource_kind", "unknown")
resource_counts[resource_kind] = resource_counts.get(resource_kind, 0) + 1
return {
"total_findings": len(findings),
"files_scanned": total_files,
"severity_counts": severity_counts,
"category_counts": category_counts,
"top_checks": dict(sorted(check_counts.items(), key=lambda x: x[1], reverse=True)[:10]),
"resource_type_counts": resource_counts,
"unique_resources": len(set(f"{f.metadata.get('resource_kind')}:{f.metadata.get('resource_name')}" for f in findings)),
"namespaces": len(set(f.metadata.get("namespace", "default") for f in findings))
}

View File

@@ -1,43 +0,0 @@
"""
Penetration Testing Modules
This package contains modules for penetration testing and vulnerability assessment.
Available modules:
- Nuclei: Fast and customizable vulnerability scanner
- Nmap: Network discovery and security auditing
- Masscan: High-speed Internet-wide port scanner
- SQLMap: Automatic SQL injection detection and exploitation
"""
# Copyright (c) 2025 FuzzingLabs
#
# Licensed under the Business Source License 1.1 (BSL). See the LICENSE file
# at the root of this repository for details.
#
# After the Change Date (four years from publication), this version of the
# Licensed Work will be made available under the Apache License, Version 2.0.
# See the LICENSE-APACHE file or http://www.apache.org/licenses/LICENSE-2.0
#
# Additional attribution and requirements are provided in the NOTICE file.
from typing import List, Type
from ..base import BaseModule
# Module registry for automatic discovery
PENETRATION_TESTING_MODULES: List[Type[BaseModule]] = []
def register_module(module_class: Type[BaseModule]):
"""Register a penetration testing module"""
PENETRATION_TESTING_MODULES.append(module_class)
return module_class
def get_available_modules() -> List[Type[BaseModule]]:
"""Get all available penetration testing modules"""
return PENETRATION_TESTING_MODULES.copy()
# Import modules to trigger registration
from .nuclei import NucleiModule
from .nmap import NmapModule
from .masscan import MasscanModule
from .sqlmap import SQLMapModule

View File

@@ -1,607 +0,0 @@
"""
Masscan Penetration Testing Module
This module uses Masscan for high-speed Internet-wide port scanning.
"""
# Copyright (c) 2025 FuzzingLabs
#
# Licensed under the Business Source License 1.1 (BSL). See the LICENSE file
# at the root of this repository for details.
#
# After the Change Date (four years from publication), this version of the
# Licensed Work will be made available under the Apache License, Version 2.0.
# See the LICENSE-APACHE file or http://www.apache.org/licenses/LICENSE-2.0
#
# Additional attribution and requirements are provided in the NOTICE file.
import asyncio
import json
from pathlib import Path
from typing import Dict, Any, List
import subprocess
import logging
from ..base import BaseModule, ModuleMetadata, ModuleFinding, ModuleResult
from . import register_module
logger = logging.getLogger(__name__)
@register_module
class MasscanModule(BaseModule):
"""Masscan high-speed port scanner module"""
def get_metadata(self) -> ModuleMetadata:
"""Get module metadata"""
return ModuleMetadata(
name="masscan",
version="1.3.2",
description="High-speed Internet-wide port scanner for large-scale network discovery",
author="FuzzForge Team",
category="penetration_testing",
tags=["port-scan", "network", "discovery", "high-speed", "mass-scan"],
input_schema={
"type": "object",
"properties": {
"targets": {
"type": "array",
"items": {"type": "string"},
"description": "List of targets (IP addresses, CIDR ranges, domains)"
},
"target_file": {
"type": "string",
"description": "File containing targets to scan"
},
"ports": {
"type": "string",
"default": "1-1000",
"description": "Port range or specific ports to scan"
},
"top_ports": {
"type": "integer",
"description": "Scan top N most common ports"
},
"rate": {
"type": "integer",
"default": 1000,
"description": "Packet transmission rate (packets/second)"
},
"max_rate": {
"type": "integer",
"description": "Maximum packet rate limit"
},
"connection_timeout": {
"type": "integer",
"default": 10,
"description": "Connection timeout in seconds"
},
"wait_time": {
"type": "integer",
"default": 10,
"description": "Time to wait for responses (seconds)"
},
"retries": {
"type": "integer",
"default": 0,
"description": "Number of retries for failed connections"
},
"randomize_hosts": {
"type": "boolean",
"default": True,
"description": "Randomize host order"
},
"source_ip": {
"type": "string",
"description": "Source IP address to use"
},
"source_port": {
"type": "string",
"description": "Source port range to use"
},
"interface": {
"type": "string",
"description": "Network interface to use"
},
"router_mac": {
"type": "string",
"description": "Router MAC address"
},
"exclude_targets": {
"type": "array",
"items": {"type": "string"},
"description": "Targets to exclude from scanning"
},
"exclude_file": {
"type": "string",
"description": "File containing targets to exclude"
},
"ping": {
"type": "boolean",
"default": False,
"description": "Include ping scan"
},
"banners": {
"type": "boolean",
"default": False,
"description": "Grab banners from services"
},
"http_user_agent": {
"type": "string",
"description": "HTTP User-Agent string for banner grabbing"
}
}
},
output_schema={
"type": "object",
"properties": {
"findings": {
"type": "array",
"items": {
"type": "object",
"properties": {
"host": {"type": "string"},
"port": {"type": "integer"},
"protocol": {"type": "string"},
"state": {"type": "string"},
"banner": {"type": "string"}
}
}
}
}
}
)
def validate_config(self, config: Dict[str, Any]) -> bool:
"""Validate configuration"""
targets = config.get("targets", [])
target_file = config.get("target_file")
if not targets and not target_file:
raise ValueError("Either 'targets' or 'target_file' must be specified")
rate = config.get("rate", 1000)
if rate <= 0 or rate > 10000000: # Masscan limit
raise ValueError("Rate must be between 1 and 10,000,000 packets/second")
return True
async def execute(self, config: Dict[str, Any], workspace: Path) -> ModuleResult:
"""Execute Masscan port scanning"""
self.start_timer()
try:
# Validate inputs
self.validate_config(config)
self.validate_workspace(workspace)
logger.info("Running Masscan high-speed port scan")
# Prepare target specification
target_args = self._prepare_targets(config, workspace)
if not target_args:
logger.info("No targets specified for scanning")
return self.create_result(
findings=[],
status="success",
summary={"total_findings": 0, "targets_scanned": 0}
)
# Run Masscan scan
findings = await self._run_masscan_scan(target_args, config, workspace)
# Create summary
target_count = len(config.get("targets", [])) if config.get("targets") else 1
summary = self._create_summary(findings, target_count)
logger.info(f"Masscan found {len(findings)} open ports")
return self.create_result(
findings=findings,
status="success",
summary=summary
)
except Exception as e:
logger.error(f"Masscan module failed: {e}")
return self.create_result(
findings=[],
status="failed",
error=str(e)
)
def _prepare_targets(self, config: Dict[str, Any], workspace: Path) -> List[str]:
"""Prepare target arguments for masscan"""
target_args = []
# Add targets from list
targets = config.get("targets", [])
for target in targets:
target_args.extend(["-t", target])
# Add targets from file
target_file = config.get("target_file")
if target_file:
target_path = workspace / target_file
if target_path.exists():
target_args.extend(["-iL", str(target_path)])
else:
raise FileNotFoundError(f"Target file not found: {target_file}")
return target_args
async def _run_masscan_scan(self, target_args: List[str], config: Dict[str, Any], workspace: Path) -> List[ModuleFinding]:
"""Run Masscan scan"""
findings = []
try:
# Build masscan command
cmd = ["masscan"]
# Add target arguments
cmd.extend(target_args)
# Add port specification
if config.get("top_ports"):
# Masscan doesn't have built-in top ports, use common ports
top_ports = self._get_top_ports(config["top_ports"])
cmd.extend(["-p", top_ports])
else:
ports = config.get("ports", "1-1000")
cmd.extend(["-p", ports])
# Add rate limiting
rate = config.get("rate", 1000)
cmd.extend(["--rate", str(rate)])
# Add max rate if specified
max_rate = config.get("max_rate")
if max_rate:
cmd.extend(["--max-rate", str(max_rate)])
# Add connection timeout
connection_timeout = config.get("connection_timeout", 10)
cmd.extend(["--connection-timeout", str(connection_timeout)])
# Add wait time
wait_time = config.get("wait_time", 10)
cmd.extend(["--wait", str(wait_time)])
# Add retries
retries = config.get("retries", 0)
if retries > 0:
cmd.extend(["--retries", str(retries)])
# Add randomization
if config.get("randomize_hosts", True):
cmd.append("--randomize-hosts")
# Add source IP
source_ip = config.get("source_ip")
if source_ip:
cmd.extend(["--source-ip", source_ip])
# Add source port
source_port = config.get("source_port")
if source_port:
cmd.extend(["--source-port", source_port])
# Add interface
interface = config.get("interface")
if interface:
cmd.extend(["-e", interface])
# Add router MAC
router_mac = config.get("router_mac")
if router_mac:
cmd.extend(["--router-mac", router_mac])
# Add exclude targets
exclude_targets = config.get("exclude_targets", [])
for exclude in exclude_targets:
cmd.extend(["--exclude", exclude])
# Add exclude file
exclude_file = config.get("exclude_file")
if exclude_file:
exclude_path = workspace / exclude_file
if exclude_path.exists():
cmd.extend(["--excludefile", str(exclude_path)])
# Add ping scan
if config.get("ping", False):
cmd.append("--ping")
# Add banner grabbing
if config.get("banners", False):
cmd.append("--banners")
# Add HTTP User-Agent
user_agent = config.get("http_user_agent")
if user_agent:
cmd.extend(["--http-user-agent", user_agent])
# Set output format to JSON
output_file = workspace / "masscan_results.json"
cmd.extend(["-oJ", str(output_file)])
logger.debug(f"Running command: {' '.join(cmd)}")
# Run masscan
process = await asyncio.create_subprocess_exec(
*cmd,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
cwd=workspace
)
stdout, stderr = await process.communicate()
# Parse results from JSON file
if output_file.exists():
findings = self._parse_masscan_json(output_file, workspace)
else:
# Try to parse stdout if no file was created
if stdout:
findings = self._parse_masscan_output(stdout.decode(), workspace)
else:
error_msg = stderr.decode()
logger.error(f"Masscan scan failed: {error_msg}")
except Exception as e:
logger.warning(f"Error running Masscan scan: {e}")
return findings
def _get_top_ports(self, count: int) -> str:
"""Get top N common ports for masscan"""
# Common ports based on Nmap's top ports list
top_ports = [
80, 23, 443, 21, 22, 25, 53, 110, 111, 995, 993, 143, 993, 995, 587, 465,
109, 88, 53, 135, 139, 445, 993, 995, 143, 25, 110, 465, 587, 993, 995,
80, 8080, 443, 8443, 8000, 8888, 8880, 2222, 9999, 3389, 5900, 5901,
1433, 3306, 5432, 1521, 50000, 1494, 554, 37, 79, 82, 5060, 50030
]
# Take first N unique ports
selected_ports = list(dict.fromkeys(top_ports))[:count]
return ",".join(map(str, selected_ports))
def _parse_masscan_json(self, json_file: Path, workspace: Path) -> List[ModuleFinding]:
"""Parse Masscan JSON output into findings"""
findings = []
try:
with open(json_file, 'r') as f:
content = f.read().strip()
# Masscan outputs JSONL format (one JSON object per line)
for line in content.split('\n'):
if not line.strip():
continue
try:
result = json.loads(line)
finding = self._process_masscan_result(result)
if finding:
findings.append(finding)
except json.JSONDecodeError:
continue
except Exception as e:
logger.warning(f"Error parsing Masscan JSON: {e}")
return findings
def _parse_masscan_output(self, output: str, workspace: Path) -> List[ModuleFinding]:
"""Parse Masscan text output into findings"""
findings = []
try:
for line in output.split('\n'):
if not line.strip() or line.startswith('#'):
continue
# Parse format: "open tcp 80 1.2.3.4"
parts = line.split()
if len(parts) >= 4 and parts[0] == "open":
protocol = parts[1]
port = int(parts[2])
ip = parts[3]
result = {
"ip": ip,
"ports": [{"port": port, "proto": protocol, "status": "open"}]
}
finding = self._process_masscan_result(result)
if finding:
findings.append(finding)
except Exception as e:
logger.warning(f"Error parsing Masscan output: {e}")
return findings
def _process_masscan_result(self, result: Dict) -> ModuleFinding:
"""Process a single Masscan result into a finding"""
try:
ip_address = result.get("ip", "")
ports_data = result.get("ports", [])
if not ip_address or not ports_data:
return None
# Process first port (Masscan typically reports one port per result)
port_data = ports_data[0]
port_number = port_data.get("port", 0)
protocol = port_data.get("proto", "tcp")
status = port_data.get("status", "open")
service = port_data.get("service", {})
banner = service.get("banner", "") if service else ""
# Only report open ports
if status != "open":
return None
# Determine severity based on port
severity = self._get_port_severity(port_number)
# Get category
category = self._get_port_category(port_number)
# Create description
description = f"Open port {port_number}/{protocol} on {ip_address}"
if banner:
description += f" (Banner: {banner[:100]})"
# Create finding
finding = self.create_finding(
title=f"Open Port: {port_number}/{protocol}",
description=description,
severity=severity,
category=category,
file_path=None, # Network scan, no file
recommendation=self._get_port_recommendation(port_number, banner),
metadata={
"host": ip_address,
"port": port_number,
"protocol": protocol,
"status": status,
"banner": banner,
"service_info": service
}
)
return finding
except Exception as e:
logger.warning(f"Error processing Masscan result: {e}")
return None
def _get_port_severity(self, port: int) -> str:
"""Determine severity based on port number"""
# High risk ports (commonly exploited or sensitive services)
high_risk_ports = [21, 23, 135, 139, 445, 1433, 1521, 3389, 5900, 6379, 27017]
# Medium risk ports (network services that could be risky if misconfigured)
medium_risk_ports = [22, 25, 53, 110, 143, 993, 995, 3306, 5432]
# Web ports are generally lower risk but still noteworthy
web_ports = [80, 443, 8080, 8443, 8000, 8888]
if port in high_risk_ports:
return "high"
elif port in medium_risk_ports:
return "medium"
elif port in web_ports:
return "low"
elif port < 1024: # Well-known ports
return "medium"
else:
return "low"
def _get_port_category(self, port: int) -> str:
"""Determine category based on port number"""
if port in [80, 443, 8080, 8443, 8000, 8888]:
return "web_services"
elif port == 22:
return "remote_access"
elif port in [20, 21]:
return "file_transfer"
elif port in [25, 110, 143, 587, 993, 995]:
return "email_services"
elif port in [1433, 3306, 5432, 1521, 27017, 6379]:
return "database_services"
elif port == 3389:
return "remote_desktop"
elif port == 53:
return "dns_services"
elif port in [135, 139, 445]:
return "windows_services"
elif port in [23, 5900]:
return "insecure_protocols"
else:
return "network_services"
def _get_port_recommendation(self, port: int, banner: str) -> str:
"""Generate recommendation based on port and banner"""
# Port-specific recommendations
recommendations = {
21: "FTP service detected. Consider using SFTP instead for secure file transfer.",
22: "SSH service detected. Ensure strong authentication and key-based access.",
23: "Telnet service detected. Replace with SSH for secure remote access.",
25: "SMTP service detected. Ensure proper authentication and encryption.",
53: "DNS service detected. Verify it's not an open resolver.",
80: "HTTP service detected. Consider upgrading to HTTPS.",
110: "POP3 service detected. Consider using secure alternatives like IMAPS.",
135: "Windows RPC service exposed. Restrict access if not required.",
139: "NetBIOS service detected. Ensure proper access controls.",
143: "IMAP service detected. Consider using encrypted IMAPS.",
445: "SMB service detected. Ensure latest patches and access controls.",
443: "HTTPS service detected. Verify SSL/TLS configuration.",
993: "IMAPS service detected. Verify certificate configuration.",
995: "POP3S service detected. Verify certificate configuration.",
1433: "SQL Server detected. Ensure strong authentication and network restrictions.",
1521: "Oracle DB detected. Ensure proper security configuration.",
3306: "MySQL service detected. Secure with strong passwords and access controls.",
3389: "RDP service detected. Use strong passwords and consider VPN access.",
5432: "PostgreSQL detected. Ensure proper authentication and access controls.",
5900: "VNC service detected. Use strong passwords and encryption.",
6379: "Redis service detected. Configure authentication and access controls.",
8080: "HTTP proxy/web service detected. Verify if exposure is intended.",
8443: "HTTPS service on non-standard port. Verify certificate configuration."
}
recommendation = recommendations.get(port, f"Port {port} is open. Verify if this service is required and properly secured.")
# Add banner-specific advice
if banner:
banner_lower = banner.lower()
if "default" in banner_lower or "admin" in banner_lower:
recommendation += " Default credentials may be in use - change immediately."
elif any(version in banner_lower for version in ["1.0", "2.0", "old", "legacy"]):
recommendation += " Service version appears outdated - consider upgrading."
return recommendation
def _create_summary(self, findings: List[ModuleFinding], targets_count: int) -> Dict[str, Any]:
"""Create analysis summary"""
severity_counts = {"critical": 0, "high": 0, "medium": 0, "low": 0, "info": 0}
category_counts = {}
port_counts = {}
host_counts = {}
protocol_counts = {"tcp": 0, "udp": 0}
for finding in findings:
# Count by severity
severity_counts[finding.severity] += 1
# Count by category
category = finding.category
category_counts[category] = category_counts.get(category, 0) + 1
# Count by port
port = finding.metadata.get("port")
if port:
port_counts[port] = port_counts.get(port, 0) + 1
# Count by host
host = finding.metadata.get("host", "unknown")
host_counts[host] = host_counts.get(host, 0) + 1
# Count by protocol
protocol = finding.metadata.get("protocol", "tcp")
if protocol in protocol_counts:
protocol_counts[protocol] += 1
return {
"total_findings": len(findings),
"targets_scanned": targets_count,
"severity_counts": severity_counts,
"category_counts": category_counts,
"protocol_counts": protocol_counts,
"unique_hosts": len(host_counts),
"top_ports": dict(sorted(port_counts.items(), key=lambda x: x[1], reverse=True)[:10]),
"host_counts": dict(sorted(host_counts.items(), key=lambda x: x[1], reverse=True)[:10])
}

View File

@@ -1,710 +0,0 @@
"""
Nmap Penetration Testing Module
This module uses Nmap for network discovery, port scanning, and security auditing.
"""
# Copyright (c) 2025 FuzzingLabs
#
# Licensed under the Business Source License 1.1 (BSL). See the LICENSE file
# at the root of this repository for details.
#
# After the Change Date (four years from publication), this version of the
# Licensed Work will be made available under the Apache License, Version 2.0.
# See the LICENSE-APACHE file or http://www.apache.org/licenses/LICENSE-2.0
#
# Additional attribution and requirements are provided in the NOTICE file.
import asyncio
import json
import xml.etree.ElementTree as ET
from pathlib import Path
from typing import Dict, Any, List
import subprocess
import logging
from ..base import BaseModule, ModuleMetadata, ModuleFinding, ModuleResult
from . import register_module
logger = logging.getLogger(__name__)
@register_module
class NmapModule(BaseModule):
"""Nmap network discovery and security auditing module"""
def get_metadata(self) -> ModuleMetadata:
"""Get module metadata"""
return ModuleMetadata(
name="nmap",
version="7.94",
description="Network discovery and security auditing using Nmap",
author="FuzzForge Team",
category="penetration_testing",
tags=["network", "port-scan", "discovery", "security-audit", "service-detection"],
input_schema={
"type": "object",
"properties": {
"targets": {
"type": "array",
"items": {"type": "string"},
"description": "List of targets (IP addresses, domains, CIDR ranges)"
},
"target_file": {
"type": "string",
"description": "File containing targets to scan"
},
"scan_type": {
"type": "string",
"enum": ["syn", "tcp", "udp", "ack", "window", "maimon"],
"default": "syn",
"description": "Type of scan to perform"
},
"ports": {
"type": "string",
"default": "1-1000",
"description": "Port range or specific ports to scan"
},
"top_ports": {
"type": "integer",
"description": "Scan top N most common ports"
},
"service_detection": {
"type": "boolean",
"default": True,
"description": "Enable service version detection"
},
"os_detection": {
"type": "boolean",
"default": False,
"description": "Enable OS detection (requires root)"
},
"script_scan": {
"type": "boolean",
"default": True,
"description": "Enable default NSE scripts"
},
"scripts": {
"type": "array",
"items": {"type": "string"},
"description": "Specific NSE scripts to run"
},
"script_categories": {
"type": "array",
"items": {"type": "string"},
"description": "NSE script categories to run (safe, vuln, etc.)"
},
"timing_template": {
"type": "string",
"enum": ["paranoid", "sneaky", "polite", "normal", "aggressive", "insane"],
"default": "normal",
"description": "Timing template (0-5)"
},
"max_retries": {
"type": "integer",
"default": 1,
"description": "Maximum number of retries"
},
"host_timeout": {
"type": "integer",
"default": 300,
"description": "Host timeout in seconds"
},
"min_rate": {
"type": "integer",
"description": "Minimum packet rate (packets/second)"
},
"max_rate": {
"type": "integer",
"description": "Maximum packet rate (packets/second)"
},
"stealth": {
"type": "boolean",
"default": False,
"description": "Enable stealth scanning options"
},
"skip_discovery": {
"type": "boolean",
"default": False,
"description": "Skip host discovery (treat all as online)"
}
}
},
output_schema={
"type": "object",
"properties": {
"findings": {
"type": "array",
"items": {
"type": "object",
"properties": {
"host": {"type": "string"},
"port": {"type": "integer"},
"service": {"type": "string"},
"state": {"type": "string"},
"version": {"type": "string"}
}
}
}
}
}
)
def validate_config(self, config: Dict[str, Any]) -> bool:
"""Validate configuration"""
targets = config.get("targets", [])
target_file = config.get("target_file")
if not targets and not target_file:
raise ValueError("Either 'targets' or 'target_file' must be specified")
scan_type = config.get("scan_type", "syn")
valid_scan_types = ["syn", "tcp", "udp", "ack", "window", "maimon"]
if scan_type not in valid_scan_types:
raise ValueError(f"Invalid scan type: {scan_type}. Valid: {valid_scan_types}")
timing = config.get("timing_template", "normal")
valid_timings = ["paranoid", "sneaky", "polite", "normal", "aggressive", "insane"]
if timing not in valid_timings:
raise ValueError(f"Invalid timing template: {timing}. Valid: {valid_timings}")
return True
async def execute(self, config: Dict[str, Any], workspace: Path) -> ModuleResult:
"""Execute Nmap network scanning"""
self.start_timer()
try:
# Validate inputs
self.validate_config(config)
self.validate_workspace(workspace)
logger.info("Running Nmap network scan")
# Prepare target file
target_file = await self._prepare_targets(config, workspace)
if not target_file:
logger.info("No targets specified for scanning")
return self.create_result(
findings=[],
status="success",
summary={"total_findings": 0, "hosts_scanned": 0}
)
# Run Nmap scan
findings = await self._run_nmap_scan(target_file, config, workspace)
# Create summary
target_count = len(config.get("targets", [])) if config.get("targets") else 1
summary = self._create_summary(findings, target_count)
logger.info(f"Nmap found {len(findings)} results")
return self.create_result(
findings=findings,
status="success",
summary=summary
)
except Exception as e:
logger.error(f"Nmap module failed: {e}")
return self.create_result(
findings=[],
status="failed",
error=str(e)
)
async def _prepare_targets(self, config: Dict[str, Any], workspace: Path) -> Path:
"""Prepare target file for scanning"""
targets = config.get("targets", [])
target_file = config.get("target_file")
if target_file:
# Use existing target file
target_path = workspace / target_file
if target_path.exists():
return target_path
else:
raise FileNotFoundError(f"Target file not found: {target_file}")
if targets:
# Create temporary target file
target_path = workspace / "nmap_targets.txt"
with open(target_path, 'w') as f:
for target in targets:
f.write(f"{target}\n")
return target_path
return None
async def _run_nmap_scan(self, target_file: Path, config: Dict[str, Any], workspace: Path) -> List[ModuleFinding]:
"""Run Nmap scan"""
findings = []
try:
# Build nmap command
cmd = ["nmap"]
# Add scan type
scan_type = config.get("scan_type", "syn")
scan_type_map = {
"syn": "-sS",
"tcp": "-sT",
"udp": "-sU",
"ack": "-sA",
"window": "-sW",
"maimon": "-sM"
}
cmd.append(scan_type_map[scan_type])
# Add port specification
if config.get("top_ports"):
cmd.extend(["--top-ports", str(config["top_ports"])])
else:
ports = config.get("ports", "1-1000")
cmd.extend(["-p", ports])
# Add service detection
if config.get("service_detection", True):
cmd.append("-sV")
# Add OS detection
if config.get("os_detection", False):
cmd.append("-O")
# Add script scanning
if config.get("script_scan", True):
cmd.append("-sC")
# Add specific scripts
scripts = config.get("scripts", [])
if scripts:
cmd.extend(["--script", ",".join(scripts)])
# Add script categories
script_categories = config.get("script_categories", [])
if script_categories:
cmd.extend(["--script", ",".join(script_categories)])
# Add timing template
timing = config.get("timing_template", "normal")
timing_map = {
"paranoid": "-T0",
"sneaky": "-T1",
"polite": "-T2",
"normal": "-T3",
"aggressive": "-T4",
"insane": "-T5"
}
cmd.append(timing_map[timing])
# Add retry options
max_retries = config.get("max_retries", 1)
cmd.extend(["--max-retries", str(max_retries)])
# Add timeout
host_timeout = config.get("host_timeout", 300)
cmd.extend(["--host-timeout", f"{host_timeout}s"])
# Add rate limiting
if config.get("min_rate"):
cmd.extend(["--min-rate", str(config["min_rate"])])
if config.get("max_rate"):
cmd.extend(["--max-rate", str(config["max_rate"])])
# Add stealth options
if config.get("stealth", False):
cmd.extend(["-f", "--randomize-hosts"])
# Skip host discovery if requested
if config.get("skip_discovery", False):
cmd.append("-Pn")
# Add output format
output_file = workspace / "nmap_results.xml"
cmd.extend(["-oX", str(output_file)])
# Add targets from file
cmd.extend(["-iL", str(target_file)])
# Add verbose and reason flags
cmd.extend(["-v", "--reason"])
logger.debug(f"Running command: {' '.join(cmd)}")
# Run nmap
process = await asyncio.create_subprocess_exec(
*cmd,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
cwd=workspace
)
stdout, stderr = await process.communicate()
# Parse results from XML file
if output_file.exists():
findings = self._parse_nmap_xml(output_file, workspace)
else:
error_msg = stderr.decode()
logger.error(f"Nmap scan failed: {error_msg}")
except Exception as e:
logger.warning(f"Error running Nmap scan: {e}")
return findings
def _parse_nmap_xml(self, xml_file: Path, workspace: Path) -> List[ModuleFinding]:
"""Parse Nmap XML output into findings"""
findings = []
try:
tree = ET.parse(xml_file)
root = tree.getroot()
# Process each host
for host_elem in root.findall(".//host"):
# Get host information
host_status = host_elem.find("status")
if host_status is None or host_status.get("state") != "up":
continue
# Get IP address
address_elem = host_elem.find("address[@addrtype='ipv4']")
if address_elem is None:
address_elem = host_elem.find("address[@addrtype='ipv6']")
if address_elem is None:
continue
ip_address = address_elem.get("addr")
# Get hostname if available
hostname = ""
hostnames_elem = host_elem.find("hostnames")
if hostnames_elem is not None:
hostname_elem = hostnames_elem.find("hostname")
if hostname_elem is not None:
hostname = hostname_elem.get("name", "")
# Get OS information
os_info = self._extract_os_info(host_elem)
# Process ports
ports_elem = host_elem.find("ports")
if ports_elem is not None:
for port_elem in ports_elem.findall("port"):
finding = self._process_port(port_elem, ip_address, hostname, os_info)
if finding:
findings.append(finding)
# Process host scripts
host_scripts = host_elem.find("hostscript")
if host_scripts is not None:
for script_elem in host_scripts.findall("script"):
finding = self._process_host_script(script_elem, ip_address, hostname)
if finding:
findings.append(finding)
except ET.ParseError as e:
logger.warning(f"Failed to parse Nmap XML: {e}")
except Exception as e:
logger.warning(f"Error processing Nmap results: {e}")
return findings
def _extract_os_info(self, host_elem) -> Dict[str, Any]:
"""Extract OS information from host element"""
os_info = {}
os_elem = host_elem.find("os")
if os_elem is not None:
osmatch_elem = os_elem.find("osmatch")
if osmatch_elem is not None:
os_info["name"] = osmatch_elem.get("name", "")
os_info["accuracy"] = osmatch_elem.get("accuracy", "0")
return os_info
def _process_port(self, port_elem, ip_address: str, hostname: str, os_info: Dict) -> ModuleFinding:
"""Process a port element into a finding"""
try:
port_id = port_elem.get("portid")
protocol = port_elem.get("protocol")
# Get state
state_elem = port_elem.find("state")
if state_elem is None:
return None
state = state_elem.get("state")
reason = state_elem.get("reason", "")
# Only report open ports
if state != "open":
return None
# Get service information
service_elem = port_elem.find("service")
service_name = ""
service_version = ""
service_product = ""
service_extra = ""
if service_elem is not None:
service_name = service_elem.get("name", "")
service_version = service_elem.get("version", "")
service_product = service_elem.get("product", "")
service_extra = service_elem.get("extrainfo", "")
# Determine severity based on service
severity = self._get_port_severity(int(port_id), service_name)
# Get category
category = self._get_port_category(int(port_id), service_name)
# Create description
desc_parts = [f"Open port {port_id}/{protocol}"]
if service_name:
desc_parts.append(f"running {service_name}")
if service_product:
desc_parts.append(f"({service_product}")
if service_version:
desc_parts.append(f"version {service_version}")
desc_parts.append(")")
description = " ".join(desc_parts)
# Process port scripts
script_results = []
script_elems = port_elem.findall("script")
for script_elem in script_elems:
script_id = script_elem.get("id", "")
script_output = script_elem.get("output", "")
if script_output:
script_results.append({"id": script_id, "output": script_output})
# Create finding
finding = self.create_finding(
title=f"Open Port: {port_id}/{protocol}",
description=description,
severity=severity,
category=category,
file_path=None, # Network scan, no file
recommendation=self._get_port_recommendation(int(port_id), service_name, script_results),
metadata={
"host": ip_address,
"hostname": hostname,
"port": int(port_id),
"protocol": protocol,
"state": state,
"reason": reason,
"service_name": service_name,
"service_version": service_version,
"service_product": service_product,
"service_extra": service_extra,
"os_info": os_info,
"script_results": script_results
}
)
return finding
except Exception as e:
logger.warning(f"Error processing port: {e}")
return None
def _process_host_script(self, script_elem, ip_address: str, hostname: str) -> ModuleFinding:
"""Process a host script result into a finding"""
try:
script_id = script_elem.get("id", "")
script_output = script_elem.get("output", "")
if not script_output or not script_id:
return None
# Determine if this is a security issue
severity = self._get_script_severity(script_id, script_output)
if severity == "info":
# Skip informational scripts
return None
category = self._get_script_category(script_id)
finding = self.create_finding(
title=f"Host Script Result: {script_id}",
description=script_output.strip(),
severity=severity,
category=category,
file_path=None,
recommendation=self._get_script_recommendation(script_id, script_output),
metadata={
"host": ip_address,
"hostname": hostname,
"script_id": script_id,
"script_output": script_output.strip()
}
)
return finding
except Exception as e:
logger.warning(f"Error processing host script: {e}")
return None
def _get_port_severity(self, port: int, service: str) -> str:
"""Determine severity based on port and service"""
# High risk ports
high_risk_ports = [21, 23, 135, 139, 445, 1433, 1521, 3389, 5432, 5900, 6379]
# Medium risk ports
medium_risk_ports = [22, 25, 53, 110, 143, 993, 995]
# Web ports are generally lower risk
web_ports = [80, 443, 8080, 8443, 8000, 8888]
if port in high_risk_ports:
return "high"
elif port in medium_risk_ports:
return "medium"
elif port in web_ports:
return "low"
elif port < 1024: # Well-known ports
return "medium"
else:
return "low"
def _get_port_category(self, port: int, service: str) -> str:
"""Determine category based on port and service"""
service_lower = service.lower()
if service_lower in ["http", "https"] or port in [80, 443, 8080, 8443]:
return "web_services"
elif service_lower in ["ssh"] or port == 22:
return "remote_access"
elif service_lower in ["ftp", "ftps"] or port in [20, 21]:
return "file_transfer"
elif service_lower in ["smtp", "pop3", "imap"] or port in [25, 110, 143, 587, 993, 995]:
return "email_services"
elif service_lower in ["mysql", "postgresql", "mssql", "oracle"] or port in [1433, 3306, 5432, 1521]:
return "database_services"
elif service_lower in ["rdp"] or port == 3389:
return "remote_desktop"
elif service_lower in ["dns"] or port == 53:
return "dns_services"
elif port in [135, 139, 445]:
return "windows_services"
else:
return "network_services"
def _get_script_severity(self, script_id: str, output: str) -> str:
"""Determine severity for script results"""
script_lower = script_id.lower()
output_lower = output.lower()
# High severity indicators
if any(term in script_lower for term in ["vuln", "exploit", "backdoor"]):
return "high"
if any(term in output_lower for term in ["vulnerable", "exploit", "critical"]):
return "high"
# Medium severity indicators
if any(term in script_lower for term in ["auth", "brute", "enum"]):
return "medium"
if any(term in output_lower for term in ["anonymous", "default", "weak"]):
return "medium"
# Everything else is informational
return "info"
def _get_script_category(self, script_id: str) -> str:
"""Determine category for script results"""
script_lower = script_id.lower()
if "vuln" in script_lower:
return "vulnerability_detection"
elif "auth" in script_lower or "brute" in script_lower:
return "authentication_testing"
elif "enum" in script_lower:
return "information_gathering"
elif "ssl" in script_lower or "tls" in script_lower:
return "ssl_tls_testing"
else:
return "service_detection"
def _get_port_recommendation(self, port: int, service: str, scripts: List[Dict]) -> str:
"""Generate recommendation for open port"""
# Check for script-based issues
for script in scripts:
script_id = script.get("id", "")
if "vuln" in script_id.lower():
return "Vulnerability detected by NSE scripts. Review and patch the service."
# Port-specific recommendations
if port == 21:
return "FTP service detected. Consider using SFTP instead for secure file transfer."
elif port == 23:
return "Telnet service detected. Use SSH instead for secure remote access."
elif port == 135:
return "Windows RPC service exposed. Restrict access if not required."
elif port in [139, 445]:
return "SMB/NetBIOS services detected. Ensure proper access controls and patch levels."
elif port == 1433:
return "SQL Server detected. Ensure strong authentication and network restrictions."
elif port == 3389:
return "RDP service detected. Use strong passwords and consider VPN access."
elif port in [80, 443]:
return "Web service detected. Ensure regular security updates and proper configuration."
else:
return f"Open port {port} detected. Verify if this service is required and properly secured."
def _get_script_recommendation(self, script_id: str, output: str) -> str:
"""Generate recommendation for script results"""
if "vuln" in script_id.lower():
return "Vulnerability detected. Apply security patches and updates."
elif "auth" in script_id.lower():
return "Authentication issue detected. Review and strengthen authentication mechanisms."
elif "ssl" in script_id.lower():
return "SSL/TLS configuration issue. Update SSL configuration and certificates."
else:
return "Review the script output and address any security concerns identified."
def _create_summary(self, findings: List[ModuleFinding], hosts_count: int) -> Dict[str, Any]:
"""Create analysis summary"""
severity_counts = {"critical": 0, "high": 0, "medium": 0, "low": 0, "info": 0}
category_counts = {}
port_counts = {}
service_counts = {}
host_counts = {}
for finding in findings:
# Count by severity
severity_counts[finding.severity] += 1
# Count by category
category = finding.category
category_counts[category] = category_counts.get(category, 0) + 1
# Count by port
port = finding.metadata.get("port")
if port:
port_counts[port] = port_counts.get(port, 0) + 1
# Count by service
service = finding.metadata.get("service_name", "unknown")
service_counts[service] = service_counts.get(service, 0) + 1
# Count by host
host = finding.metadata.get("host", "unknown")
host_counts[host] = host_counts.get(host, 0) + 1
return {
"total_findings": len(findings),
"hosts_scanned": hosts_count,
"severity_counts": severity_counts,
"category_counts": category_counts,
"unique_hosts": len(host_counts),
"top_ports": dict(sorted(port_counts.items(), key=lambda x: x[1], reverse=True)[:10]),
"top_services": dict(sorted(service_counts.items(), key=lambda x: x[1], reverse=True)[:10]),
"host_counts": dict(sorted(host_counts.items(), key=lambda x: x[1], reverse=True)[:5])
}

View File

@@ -1,501 +0,0 @@
"""
Nuclei Penetration Testing Module
This module uses Nuclei to perform fast and customizable vulnerability scanning
using community-powered templates.
"""
# Copyright (c) 2025 FuzzingLabs
#
# Licensed under the Business Source License 1.1 (BSL). See the LICENSE file
# at the root of this repository for details.
#
# After the Change Date (four years from publication), this version of the
# Licensed Work will be made available under the Apache License, Version 2.0.
# See the LICENSE-APACHE file or http://www.apache.org/licenses/LICENSE-2.0
#
# Additional attribution and requirements are provided in the NOTICE file.
import asyncio
import json
from pathlib import Path
from typing import Dict, Any, List
import subprocess
import logging
from ..base import BaseModule, ModuleMetadata, ModuleFinding, ModuleResult
from . import register_module
logger = logging.getLogger(__name__)
@register_module
class NucleiModule(BaseModule):
"""Nuclei fast vulnerability scanner module"""
def get_metadata(self) -> ModuleMetadata:
"""Get module metadata"""
return ModuleMetadata(
name="nuclei",
version="3.1.0",
description="Fast and customizable vulnerability scanner using community-powered templates",
author="FuzzForge Team",
category="penetration_testing",
tags=["vulnerability", "scanner", "web", "network", "templates"],
input_schema={
"type": "object",
"properties": {
"targets": {
"type": "array",
"items": {"type": "string"},
"description": "List of targets (URLs, domains, IP addresses)"
},
"target_file": {
"type": "string",
"description": "File containing targets to scan"
},
"templates": {
"type": "array",
"items": {"type": "string"},
"description": "Specific templates to use"
},
"template_directory": {
"type": "string",
"description": "Directory containing custom templates"
},
"tags": {
"type": "array",
"items": {"type": "string"},
"description": "Template tags to include"
},
"exclude_tags": {
"type": "array",
"items": {"type": "string"},
"description": "Template tags to exclude"
},
"severity": {
"type": "array",
"items": {"type": "string", "enum": ["critical", "high", "medium", "low", "info"]},
"default": ["critical", "high", "medium"],
"description": "Severity levels to include"
},
"concurrency": {
"type": "integer",
"default": 25,
"description": "Number of concurrent threads"
},
"rate_limit": {
"type": "integer",
"default": 150,
"description": "Rate limit (requests per second)"
},
"timeout": {
"type": "integer",
"default": 10,
"description": "Timeout for requests (seconds)"
},
"retries": {
"type": "integer",
"default": 1,
"description": "Number of retries for failed requests"
},
"update_templates": {
"type": "boolean",
"default": False,
"description": "Update templates before scanning"
},
"disable_clustering": {
"type": "boolean",
"default": False,
"description": "Disable template clustering"
},
"no_interactsh": {
"type": "boolean",
"default": True,
"description": "Disable interactsh server for OAST testing"
}
}
},
output_schema={
"type": "object",
"properties": {
"findings": {
"type": "array",
"items": {
"type": "object",
"properties": {
"template_id": {"type": "string"},
"name": {"type": "string"},
"severity": {"type": "string"},
"host": {"type": "string"},
"matched_at": {"type": "string"}
}
}
}
}
}
)
def validate_config(self, config: Dict[str, Any]) -> bool:
"""Validate configuration"""
targets = config.get("targets", [])
target_file = config.get("target_file")
if not targets and not target_file:
raise ValueError("Either 'targets' or 'target_file' must be specified")
severity_levels = config.get("severity", [])
valid_severities = ["critical", "high", "medium", "low", "info"]
for severity in severity_levels:
if severity not in valid_severities:
raise ValueError(f"Invalid severity: {severity}. Valid: {valid_severities}")
return True
async def execute(self, config: Dict[str, Any], workspace: Path) -> ModuleResult:
"""Execute Nuclei vulnerability scanning"""
self.start_timer()
try:
# Validate inputs
self.validate_config(config)
self.validate_workspace(workspace)
logger.info("Running Nuclei vulnerability scan")
# Update templates if requested
if config.get("update_templates", False):
await self._update_templates(workspace)
# Prepare target file
target_file = await self._prepare_targets(config, workspace)
if not target_file:
logger.info("No targets specified for scanning")
return self.create_result(
findings=[],
status="success",
summary={"total_findings": 0, "targets_scanned": 0}
)
# Run Nuclei scan
findings = await self._run_nuclei_scan(target_file, config, workspace)
# Create summary
summary = self._create_summary(findings, len(config.get("targets", [])))
logger.info(f"Nuclei found {len(findings)} vulnerabilities")
return self.create_result(
findings=findings,
status="success",
summary=summary
)
except Exception as e:
logger.error(f"Nuclei module failed: {e}")
return self.create_result(
findings=[],
status="failed",
error=str(e)
)
async def _update_templates(self, workspace: Path):
"""Update Nuclei templates"""
try:
logger.info("Updating Nuclei templates...")
cmd = ["nuclei", "-update-templates"]
process = await asyncio.create_subprocess_exec(
*cmd,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
cwd=workspace
)
stdout, stderr = await process.communicate()
if process.returncode == 0:
logger.info("Templates updated successfully")
else:
logger.warning(f"Template update failed: {stderr.decode()}")
except Exception as e:
logger.warning(f"Error updating templates: {e}")
async def _prepare_targets(self, config: Dict[str, Any], workspace: Path) -> Path:
"""Prepare target file for scanning"""
targets = config.get("targets", [])
target_file = config.get("target_file")
if target_file:
# Use existing target file
target_path = workspace / target_file
if target_path.exists():
return target_path
else:
raise FileNotFoundError(f"Target file not found: {target_file}")
if targets:
# Create temporary target file
target_path = workspace / "nuclei_targets.txt"
with open(target_path, 'w') as f:
for target in targets:
f.write(f"{target}\n")
return target_path
return None
async def _run_nuclei_scan(self, target_file: Path, config: Dict[str, Any], workspace: Path) -> List[ModuleFinding]:
"""Run Nuclei scan"""
findings = []
try:
# Build nuclei command
cmd = ["nuclei", "-l", str(target_file)]
# Add output format
cmd.extend(["-json"])
# Add templates
templates = config.get("templates", [])
if templates:
cmd.extend(["-t", ",".join(templates)])
# Add template directory
template_dir = config.get("template_directory")
if template_dir:
cmd.extend(["-t", template_dir])
# Add tags
tags = config.get("tags", [])
if tags:
cmd.extend(["-tags", ",".join(tags)])
# Add exclude tags
exclude_tags = config.get("exclude_tags", [])
if exclude_tags:
cmd.extend(["-exclude-tags", ",".join(exclude_tags)])
# Add severity
severity_levels = config.get("severity", ["critical", "high", "medium"])
cmd.extend(["-severity", ",".join(severity_levels)])
# Add concurrency
concurrency = config.get("concurrency", 25)
cmd.extend(["-c", str(concurrency)])
# Add rate limit
rate_limit = config.get("rate_limit", 150)
cmd.extend(["-rl", str(rate_limit)])
# Add timeout
timeout = config.get("timeout", 10)
cmd.extend(["-timeout", str(timeout)])
# Add retries
retries = config.get("retries", 1)
cmd.extend(["-retries", str(retries)])
# Add other flags
if config.get("disable_clustering", False):
cmd.append("-no-color")
if config.get("no_interactsh", True):
cmd.append("-no-interactsh")
# Add silent flag for JSON output
cmd.append("-silent")
logger.debug(f"Running command: {' '.join(cmd)}")
# Run nuclei
process = await asyncio.create_subprocess_exec(
*cmd,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
cwd=workspace
)
stdout, stderr = await process.communicate()
# Parse results
if process.returncode == 0 or stdout:
findings = self._parse_nuclei_output(stdout.decode(), workspace)
else:
error_msg = stderr.decode()
logger.error(f"Nuclei scan failed: {error_msg}")
except Exception as e:
logger.warning(f"Error running Nuclei scan: {e}")
return findings
def _parse_nuclei_output(self, output: str, workspace: Path) -> List[ModuleFinding]:
"""Parse Nuclei JSON output into findings"""
findings = []
if not output.strip():
return findings
try:
# Parse each line as JSON (JSONL format)
for line in output.strip().split('\n'):
if not line.strip():
continue
result = json.loads(line)
# Extract information
template_id = result.get("template-id", "")
template_name = result.get("info", {}).get("name", "")
severity = result.get("info", {}).get("severity", "medium")
host = result.get("host", "")
matched_at = result.get("matched-at", "")
description = result.get("info", {}).get("description", "")
reference = result.get("info", {}).get("reference", [])
classification = result.get("info", {}).get("classification", {})
extracted_results = result.get("extracted-results", [])
# Map severity to our standard levels
finding_severity = self._map_severity(severity)
# Get category based on template
category = self._get_category(template_id, template_name, classification)
# Create finding
finding = self.create_finding(
title=f"Nuclei Detection: {template_name}",
description=description or f"Vulnerability detected using template {template_id}",
severity=finding_severity,
category=category,
file_path=None, # Nuclei scans network targets
recommendation=self._get_recommendation(template_id, template_name, reference),
metadata={
"template_id": template_id,
"template_name": template_name,
"nuclei_severity": severity,
"host": host,
"matched_at": matched_at,
"classification": classification,
"reference": reference,
"extracted_results": extracted_results
}
)
findings.append(finding)
except json.JSONDecodeError as e:
logger.warning(f"Failed to parse Nuclei output: {e}")
except Exception as e:
logger.warning(f"Error processing Nuclei results: {e}")
return findings
def _map_severity(self, nuclei_severity: str) -> str:
"""Map Nuclei severity to our standard severity levels"""
severity_map = {
"critical": "critical",
"high": "high",
"medium": "medium",
"low": "low",
"info": "info"
}
return severity_map.get(nuclei_severity.lower(), "medium")
def _get_category(self, template_id: str, template_name: str, classification: Dict) -> str:
"""Determine finding category based on template and classification"""
template_lower = f"{template_id} {template_name}".lower()
# Use classification if available
cwe_id = classification.get("cwe-id")
if cwe_id:
# Map common CWE IDs to categories
if cwe_id in ["CWE-79", "CWE-80"]:
return "cross_site_scripting"
elif cwe_id in ["CWE-89"]:
return "sql_injection"
elif cwe_id in ["CWE-22", "CWE-23"]:
return "path_traversal"
elif cwe_id in ["CWE-352"]:
return "csrf"
elif cwe_id in ["CWE-601"]:
return "redirect"
# Analyze template content
if any(term in template_lower for term in ["xss", "cross-site"]):
return "cross_site_scripting"
elif any(term in template_lower for term in ["sql", "injection"]):
return "sql_injection"
elif any(term in template_lower for term in ["lfi", "rfi", "file", "path", "traversal"]):
return "file_inclusion"
elif any(term in template_lower for term in ["rce", "command", "execution"]):
return "remote_code_execution"
elif any(term in template_lower for term in ["auth", "login", "bypass"]):
return "authentication_bypass"
elif any(term in template_lower for term in ["disclosure", "exposure", "leak"]):
return "information_disclosure"
elif any(term in template_lower for term in ["config", "misconfiguration"]):
return "misconfiguration"
elif any(term in template_lower for term in ["cve-"]):
return "known_vulnerability"
else:
return "web_vulnerability"
def _get_recommendation(self, template_id: str, template_name: str, references: List) -> str:
"""Generate recommendation based on template"""
# Use references if available
if references:
ref_text = ", ".join(references[:3]) # Limit to first 3 references
return f"Review the vulnerability and apply appropriate fixes. References: {ref_text}"
# Generate based on template type
template_lower = f"{template_id} {template_name}".lower()
if "xss" in template_lower:
return "Implement proper input validation and output encoding to prevent XSS attacks."
elif "sql" in template_lower:
return "Use parameterized queries and input validation to prevent SQL injection."
elif "lfi" in template_lower or "rfi" in template_lower:
return "Validate and sanitize file paths. Avoid dynamic file includes with user input."
elif "rce" in template_lower:
return "Sanitize user input and avoid executing system commands with user-controlled data."
elif "auth" in template_lower:
return "Review authentication mechanisms and implement proper access controls."
elif "exposure" in template_lower or "disclosure" in template_lower:
return "Restrict access to sensitive information and implement proper authorization."
elif "cve-" in template_lower:
return "Update the affected software to the latest version to patch known vulnerabilities."
else:
return f"Review and remediate the security issue identified by template {template_id}."
def _create_summary(self, findings: List[ModuleFinding], targets_count: int) -> Dict[str, Any]:
"""Create analysis summary"""
severity_counts = {"critical": 0, "high": 0, "medium": 0, "low": 0, "info": 0}
category_counts = {}
template_counts = {}
host_counts = {}
for finding in findings:
# Count by severity
severity_counts[finding.severity] += 1
# Count by category
category = finding.category
category_counts[category] = category_counts.get(category, 0) + 1
# Count by template
template_id = finding.metadata.get("template_id", "unknown")
template_counts[template_id] = template_counts.get(template_id, 0) + 1
# Count by host
host = finding.metadata.get("host", "unknown")
host_counts[host] = host_counts.get(host, 0) + 1
return {
"total_findings": len(findings),
"targets_scanned": targets_count,
"severity_counts": severity_counts,
"category_counts": category_counts,
"top_templates": dict(sorted(template_counts.items(), key=lambda x: x[1], reverse=True)[:10]),
"affected_hosts": len(host_counts),
"host_counts": dict(sorted(host_counts.items(), key=lambda x: x[1], reverse=True)[:10])
}

View File

@@ -1,671 +0,0 @@
"""
SQLMap Penetration Testing Module
This module uses SQLMap for automatic SQL injection detection and exploitation.
"""
# Copyright (c) 2025 FuzzingLabs
#
# Licensed under the Business Source License 1.1 (BSL). See the LICENSE file
# at the root of this repository for details.
#
# After the Change Date (four years from publication), this version of the
# Licensed Work will be made available under the Apache License, Version 2.0.
# See the LICENSE-APACHE file or http://www.apache.org/licenses/LICENSE-2.0
#
# Additional attribution and requirements are provided in the NOTICE file.
import asyncio
import json
from pathlib import Path
from typing import Dict, Any, List
import subprocess
import logging
from ..base import BaseModule, ModuleMetadata, ModuleFinding, ModuleResult
from . import register_module
logger = logging.getLogger(__name__)
@register_module
class SQLMapModule(BaseModule):
"""SQLMap automatic SQL injection detection and exploitation module"""
def get_metadata(self) -> ModuleMetadata:
"""Get module metadata"""
return ModuleMetadata(
name="sqlmap",
version="1.7.11",
description="Automatic SQL injection detection and exploitation tool",
author="FuzzForge Team",
category="penetration_testing",
tags=["sql-injection", "web", "database", "vulnerability", "exploitation"],
input_schema={
"type": "object",
"properties": {
"target_url": {
"type": "string",
"description": "Target URL to test for SQL injection"
},
"target_file": {
"type": "string",
"description": "File containing URLs to test"
},
"request_file": {
"type": "string",
"description": "Load HTTP request from file (Burp log, etc.)"
},
"data": {
"type": "string",
"description": "Data string to be sent through POST"
},
"cookie": {
"type": "string",
"description": "HTTP Cookie header value"
},
"user_agent": {
"type": "string",
"description": "HTTP User-Agent header value"
},
"referer": {
"type": "string",
"description": "HTTP Referer header value"
},
"headers": {
"type": "object",
"description": "Additional HTTP headers"
},
"method": {
"type": "string",
"enum": ["GET", "POST", "PUT", "DELETE", "PATCH"],
"default": "GET",
"description": "HTTP method to use"
},
"testable_parameters": {
"type": "array",
"items": {"type": "string"},
"description": "Comma-separated list of testable parameter(s)"
},
"skip_parameters": {
"type": "array",
"items": {"type": "string"},
"description": "Parameters to skip during testing"
},
"dbms": {
"type": "string",
"enum": ["mysql", "postgresql", "oracle", "mssql", "sqlite", "access", "firebird", "sybase", "db2", "hsqldb", "h2"],
"description": "Force back-end DBMS to provided value"
},
"level": {
"type": "integer",
"enum": [1, 2, 3, 4, 5],
"default": 1,
"description": "Level of tests to perform (1-5)"
},
"risk": {
"type": "integer",
"enum": [1, 2, 3],
"default": 1,
"description": "Risk of tests to perform (1-3)"
},
"technique": {
"type": "array",
"items": {"type": "string", "enum": ["B", "E", "U", "S", "T", "Q"]},
"description": "SQL injection techniques to use (B=Boolean, E=Error, U=Union, S=Stacked, T=Time, Q=Inline)"
},
"time_sec": {
"type": "integer",
"default": 5,
"description": "Seconds to delay DBMS response for time-based blind SQL injection"
},
"union_cols": {
"type": "string",
"description": "Range of columns to test for UNION query SQL injection"
},
"threads": {
"type": "integer",
"default": 1,
"description": "Maximum number of concurrent HTTP requests"
},
"timeout": {
"type": "integer",
"default": 30,
"description": "Seconds to wait before timeout connection"
},
"retries": {
"type": "integer",
"default": 3,
"description": "Retries when connection timeouts"
},
"randomize": {
"type": "boolean",
"default": True,
"description": "Randomly change value of given parameter(s)"
},
"safe_url": {
"type": "string",
"description": "URL to visit frequently during testing"
},
"safe_freq": {
"type": "integer",
"description": "Test requests between visits to safe URL"
},
"crawl": {
"type": "integer",
"description": "Crawl website starting from target URL (depth)"
},
"forms": {
"type": "boolean",
"default": False,
"description": "Parse and test forms on target URL"
},
"batch": {
"type": "boolean",
"default": True,
"description": "Never ask for user input, use default behavior"
},
"cleanup": {
"type": "boolean",
"default": True,
"description": "Clean up files used by SQLMap"
},
"check_waf": {
"type": "boolean",
"default": False,
"description": "Check for existence of WAF/IPS protection"
},
"tamper": {
"type": "array",
"items": {"type": "string"},
"description": "Use tamper scripts to modify requests"
}
}
},
output_schema={
"type": "object",
"properties": {
"findings": {
"type": "array",
"items": {
"type": "object",
"properties": {
"url": {"type": "string"},
"parameter": {"type": "string"},
"technique": {"type": "string"},
"dbms": {"type": "string"},
"payload": {"type": "string"}
}
}
}
}
}
)
def validate_config(self, config: Dict[str, Any]) -> bool:
"""Validate configuration"""
target_url = config.get("target_url")
target_file = config.get("target_file")
request_file = config.get("request_file")
if not any([target_url, target_file, request_file]):
raise ValueError("Either 'target_url', 'target_file', or 'request_file' must be specified")
level = config.get("level", 1)
if level not in [1, 2, 3, 4, 5]:
raise ValueError("Level must be between 1 and 5")
risk = config.get("risk", 1)
if risk not in [1, 2, 3]:
raise ValueError("Risk must be between 1 and 3")
return True
async def execute(self, config: Dict[str, Any], workspace: Path) -> ModuleResult:
"""Execute SQLMap SQL injection testing"""
self.start_timer()
try:
# Validate inputs
self.validate_config(config)
self.validate_workspace(workspace)
logger.info("Running SQLMap SQL injection scan")
# Run SQLMap scan
findings = await self._run_sqlmap_scan(config, workspace)
# Create summary
summary = self._create_summary(findings)
logger.info(f"SQLMap found {len(findings)} SQL injection vulnerabilities")
return self.create_result(
findings=findings,
status="success",
summary=summary
)
except Exception as e:
logger.error(f"SQLMap module failed: {e}")
return self.create_result(
findings=[],
status="failed",
error=str(e)
)
async def _run_sqlmap_scan(self, config: Dict[str, Any], workspace: Path) -> List[ModuleFinding]:
"""Run SQLMap scan"""
findings = []
try:
# Build sqlmap command
cmd = ["sqlmap"]
# Add target specification
target_url = config.get("target_url")
if target_url:
cmd.extend(["-u", target_url])
target_file = config.get("target_file")
if target_file:
target_path = workspace / target_file
if target_path.exists():
cmd.extend(["-m", str(target_path)])
else:
raise FileNotFoundError(f"Target file not found: {target_file}")
request_file = config.get("request_file")
if request_file:
request_path = workspace / request_file
if request_path.exists():
cmd.extend(["-r", str(request_path)])
else:
raise FileNotFoundError(f"Request file not found: {request_file}")
# Add HTTP options
data = config.get("data")
if data:
cmd.extend(["--data", data])
cookie = config.get("cookie")
if cookie:
cmd.extend(["--cookie", cookie])
user_agent = config.get("user_agent")
if user_agent:
cmd.extend(["--user-agent", user_agent])
referer = config.get("referer")
if referer:
cmd.extend(["--referer", referer])
headers = config.get("headers", {})
for key, value in headers.items():
cmd.extend(["--header", f"{key}: {value}"])
method = config.get("method")
if method and method != "GET":
cmd.extend(["--method", method])
# Add parameter options
testable_params = config.get("testable_parameters", [])
if testable_params:
cmd.extend(["-p", ",".join(testable_params)])
skip_params = config.get("skip_parameters", [])
if skip_params:
cmd.extend(["--skip", ",".join(skip_params)])
# Add injection options
dbms = config.get("dbms")
if dbms:
cmd.extend(["--dbms", dbms])
level = config.get("level", 1)
cmd.extend(["--level", str(level)])
risk = config.get("risk", 1)
cmd.extend(["--risk", str(risk)])
techniques = config.get("technique", [])
if techniques:
cmd.extend(["--technique", "".join(techniques)])
time_sec = config.get("time_sec", 5)
cmd.extend(["--time-sec", str(time_sec)])
union_cols = config.get("union_cols")
if union_cols:
cmd.extend(["--union-cols", union_cols])
# Add performance options
threads = config.get("threads", 1)
cmd.extend(["--threads", str(threads)])
timeout = config.get("timeout", 30)
cmd.extend(["--timeout", str(timeout)])
retries = config.get("retries", 3)
cmd.extend(["--retries", str(retries)])
# Add request options
if config.get("randomize", True):
cmd.append("--randomize")
safe_url = config.get("safe_url")
if safe_url:
cmd.extend(["--safe-url", safe_url])
safe_freq = config.get("safe_freq")
if safe_freq:
cmd.extend(["--safe-freq", str(safe_freq)])
# Add crawling options
crawl_depth = config.get("crawl")
if crawl_depth:
cmd.extend(["--crawl", str(crawl_depth)])
if config.get("forms", False):
cmd.append("--forms")
# Add behavioral options
if config.get("batch", True):
cmd.append("--batch")
if config.get("cleanup", True):
cmd.append("--cleanup")
if config.get("check_waf", False):
cmd.append("--check-waf")
# Add tamper scripts
tamper_scripts = config.get("tamper", [])
if tamper_scripts:
cmd.extend(["--tamper", ",".join(tamper_scripts)])
# Set output directory
output_dir = workspace / "sqlmap_output"
output_dir.mkdir(exist_ok=True)
cmd.extend(["--output-dir", str(output_dir)])
# Add format for easier parsing
cmd.append("--flush-session") # Start fresh
cmd.append("--fresh-queries") # Ignore previous results
logger.debug(f"Running command: {' '.join(cmd)}")
# Run sqlmap
process = await asyncio.create_subprocess_exec(
*cmd,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
cwd=workspace
)
stdout, stderr = await process.communicate()
# Parse results from output directory
findings = self._parse_sqlmap_output(output_dir, stdout.decode(), workspace)
# Log results
if findings:
logger.info(f"SQLMap detected {len(findings)} SQL injection vulnerabilities")
else:
logger.info("No SQL injection vulnerabilities found")
# Check for errors
stderr_text = stderr.decode()
if stderr_text:
logger.warning(f"SQLMap warnings/errors: {stderr_text}")
except Exception as e:
logger.warning(f"Error running SQLMap scan: {e}")
return findings
def _parse_sqlmap_output(self, output_dir: Path, stdout: str, workspace: Path) -> List[ModuleFinding]:
"""Parse SQLMap output into findings"""
findings = []
try:
# Look for session files in output directory
session_files = list(output_dir.glob("**/*.sqlite"))
log_files = list(output_dir.glob("**/*.log"))
# Parse stdout for injection information
findings.extend(self._parse_stdout_output(stdout))
# Parse log files for additional details
for log_file in log_files:
findings.extend(self._parse_log_file(log_file))
# If we have session files, we can extract more detailed information
# For now, we'll rely on stdout parsing
except Exception as e:
logger.warning(f"Error parsing SQLMap output: {e}")
return findings
def _parse_stdout_output(self, stdout: str) -> List[ModuleFinding]:
"""Parse SQLMap stdout for SQL injection findings"""
findings = []
try:
lines = stdout.split('\n')
current_url = None
current_parameter = None
current_technique = None
current_dbms = None
injection_found = False
for line in lines:
line = line.strip()
# Extract URL being tested
if "testing URL" in line or "testing connection to the target URL" in line:
# Extract URL from line
if "'" in line:
url_start = line.find("'") + 1
url_end = line.find("'", url_start)
if url_end > url_start:
current_url = line[url_start:url_end]
# Extract parameter being tested
elif "testing parameter" in line or "testing" in line and "parameter" in line:
if "'" in line:
param_parts = line.split("'")
if len(param_parts) >= 2:
current_parameter = param_parts[1]
# Detect SQL injection found
elif any(indicator in line.lower() for indicator in [
"parameter appears to be vulnerable",
"injectable",
"parameter is vulnerable"
]):
injection_found = True
# Extract technique information
elif "Type:" in line:
current_technique = line.replace("Type:", "").strip()
# Extract database information
elif "back-end DBMS:" in line.lower():
current_dbms = line.split(":")[-1].strip()
# Extract payload information
elif "Payload:" in line:
payload = line.replace("Payload:", "").strip()
# Create finding if we have injection
if injection_found and current_url and current_parameter:
finding = self._create_sqlmap_finding(
current_url, current_parameter, current_technique,
current_dbms, payload
)
if finding:
findings.append(finding)
# Reset state
injection_found = False
current_technique = None
except Exception as e:
logger.warning(f"Error parsing SQLMap stdout: {e}")
return findings
def _parse_log_file(self, log_file: Path) -> List[ModuleFinding]:
"""Parse SQLMap log file for additional findings"""
findings = []
try:
with open(log_file, 'r') as f:
content = f.read()
# Look for injection indicators in log
if "injectable" in content.lower() or "vulnerable" in content.lower():
# Could parse more detailed information from log
# For now, we'll rely on stdout parsing
pass
except Exception as e:
logger.warning(f"Error parsing log file {log_file}: {e}")
return findings
def _create_sqlmap_finding(self, url: str, parameter: str, technique: str, dbms: str, payload: str) -> ModuleFinding:
"""Create a ModuleFinding for SQL injection"""
try:
# Map technique to readable description
technique_map = {
"boolean-based blind": "Boolean-based blind SQL injection",
"time-based blind": "Time-based blind SQL injection",
"error-based": "Error-based SQL injection",
"UNION query": "UNION-based SQL injection",
"stacked queries": "Stacked queries SQL injection",
"inline query": "Inline query SQL injection"
}
technique_desc = technique_map.get(technique, technique or "SQL injection")
# Create description
description = f"SQL injection vulnerability detected in parameter '{parameter}' using {technique_desc}"
if dbms:
description += f" against {dbms} database"
# Determine severity based on technique
severity = self._get_injection_severity(technique, dbms)
# Create finding
finding = self.create_finding(
title=f"SQL Injection: {parameter}",
description=description,
severity=severity,
category="sql_injection",
file_path=None, # Web application testing
recommendation=self._get_sqlinjection_recommendation(technique, dbms),
metadata={
"url": url,
"parameter": parameter,
"technique": technique,
"dbms": dbms,
"payload": payload[:500] if payload else "", # Limit payload length
"injection_type": technique_desc
}
)
return finding
except Exception as e:
logger.warning(f"Error creating SQLMap finding: {e}")
return None
def _get_injection_severity(self, technique: str, dbms: str) -> str:
"""Determine severity based on injection technique and database"""
if not technique:
return "high" # Any SQL injection is serious
technique_lower = technique.lower()
# Critical severity for techniques that allow easy data extraction
if any(term in technique_lower for term in ["union", "error-based"]):
return "critical"
# High severity for techniques that allow some data extraction
elif any(term in technique_lower for term in ["boolean-based", "time-based"]):
return "high"
# Stacked queries are very dangerous as they allow multiple statements
elif "stacked" in technique_lower:
return "critical"
else:
return "high"
def _get_sqlinjection_recommendation(self, technique: str, dbms: str) -> str:
"""Generate recommendation for SQL injection"""
base_recommendation = "Implement parameterized queries/prepared statements and input validation to prevent SQL injection attacks."
if technique:
technique_lower = technique.lower()
if "union" in technique_lower:
base_recommendation += " The UNION-based injection allows direct data extraction - immediate remediation required."
elif "error-based" in technique_lower:
base_recommendation += " Error-based injection reveals database structure - disable error messages in production."
elif "time-based" in technique_lower:
base_recommendation += " Time-based injection allows blind data extraction - implement query timeout limits."
elif "stacked" in technique_lower:
base_recommendation += " Stacked queries injection allows multiple SQL statements - extremely dangerous, fix immediately."
if dbms:
dbms_lower = dbms.lower()
if "mysql" in dbms_lower:
base_recommendation += " For MySQL: disable LOAD_FILE and INTO OUTFILE if not needed."
elif "postgresql" in dbms_lower:
base_recommendation += " For PostgreSQL: review user privileges and disable unnecessary functions."
elif "mssql" in dbms_lower:
base_recommendation += " For SQL Server: disable xp_cmdshell and review extended stored procedures."
return base_recommendation
def _create_summary(self, findings: List[ModuleFinding]) -> Dict[str, Any]:
"""Create analysis summary"""
severity_counts = {"critical": 0, "high": 0, "medium": 0, "low": 0, "info": 0}
technique_counts = {}
dbms_counts = {}
parameter_counts = {}
url_counts = {}
for finding in findings:
# Count by severity
severity_counts[finding.severity] += 1
# Count by technique
technique = finding.metadata.get("technique", "unknown")
technique_counts[technique] = technique_counts.get(technique, 0) + 1
# Count by DBMS
dbms = finding.metadata.get("dbms", "unknown")
if dbms != "unknown":
dbms_counts[dbms] = dbms_counts.get(dbms, 0) + 1
# Count by parameter
parameter = finding.metadata.get("parameter", "unknown")
parameter_counts[parameter] = parameter_counts.get(parameter, 0) + 1
# Count by URL
url = finding.metadata.get("url", "unknown")
url_counts[url] = url_counts.get(url, 0) + 1
return {
"total_findings": len(findings),
"severity_counts": severity_counts,
"technique_counts": technique_counts,
"dbms_counts": dbms_counts,
"vulnerable_parameters": list(parameter_counts.keys()),
"vulnerable_urls": len(url_counts),
"most_common_techniques": dict(sorted(technique_counts.items(), key=lambda x: x[1], reverse=True)[:5]),
"affected_databases": list(dbms_counts.keys())
}

View File

@@ -1,38 +0,0 @@
"""
Static Analysis Security Testing (SAST) Modules
This package contains modules for static code analysis and security testing.
Available modules:
- CodeQL: GitHub's semantic code analysis engine
- SonarQube: Code quality and security analysis platform
- Snyk: Vulnerability scanning for dependencies and code
- OpenGrep: Open-source pattern-based static analysis tool
- Bandit: Python-specific security issue identifier
"""
# Copyright (c) 2025 FuzzingLabs
#
# Licensed under the Business Source License 1.1 (BSL). See the LICENSE file
# at the root of this repository for details.
#
# After the Change Date (four years from publication), this version of the
# Licensed Work will be made available under the Apache License, Version 2.0.
# See the LICENSE-APACHE file or http://www.apache.org/licenses/LICENSE-2.0
#
# Additional attribution and requirements are provided in the NOTICE file.
from typing import List, Type
from ..base import BaseModule
# Module registry for automatic discovery
STATIC_ANALYSIS_MODULES: List[Type[BaseModule]] = []
def register_module(module_class: Type[BaseModule]):
"""Register a static analysis module"""
STATIC_ANALYSIS_MODULES.append(module_class)
return module_class
def get_available_modules() -> List[Type[BaseModule]]:
"""Get all available static analysis modules"""
return STATIC_ANALYSIS_MODULES.copy()

View File

@@ -1,418 +0,0 @@
"""
Bandit Static Analysis Module
This module uses Bandit to detect security vulnerabilities in Python code.
"""
# Copyright (c) 2025 FuzzingLabs
#
# Licensed under the Business Source License 1.1 (BSL). See the LICENSE file
# at the root of this repository for details.
#
# After the Change Date (four years from publication), this version of the
# Licensed Work will be made available under the Apache License, Version 2.0.
# See the LICENSE-APACHE file or http://www.apache.org/licenses/LICENSE-2.0
#
# Additional attribution and requirements are provided in the NOTICE file.
import asyncio
import json
from pathlib import Path
from typing import Dict, Any, List
import subprocess
import logging
from ..base import BaseModule, ModuleMetadata, ModuleFinding, ModuleResult
from . import register_module
logger = logging.getLogger(__name__)
@register_module
class BanditModule(BaseModule):
"""Bandit Python security analysis module"""
def get_metadata(self) -> ModuleMetadata:
"""Get module metadata"""
return ModuleMetadata(
name="bandit",
version="1.7.5",
description="Python-specific security issue identifier using Bandit",
author="FuzzForge Team",
category="static_analysis",
tags=["python", "sast", "security", "vulnerabilities"],
input_schema={
"type": "object",
"properties": {
"confidence": {
"type": "string",
"enum": ["LOW", "MEDIUM", "HIGH"],
"default": "LOW",
"description": "Minimum confidence level for reported issues"
},
"severity": {
"type": "string",
"enum": ["LOW", "MEDIUM", "HIGH"],
"default": "LOW",
"description": "Minimum severity level for reported issues"
},
"tests": {
"type": "array",
"items": {"type": "string"},
"description": "Specific test IDs to run"
},
"skips": {
"type": "array",
"items": {"type": "string"},
"description": "Test IDs to skip"
},
"exclude_dirs": {
"type": "array",
"items": {"type": "string"},
"default": ["tests", "test", ".git", "__pycache__"],
"description": "Directories to exclude from analysis"
},
"include_patterns": {
"type": "array",
"items": {"type": "string"},
"default": ["*.py"],
"description": "File patterns to include"
},
"aggregate": {
"type": "string",
"enum": ["file", "vuln"],
"default": "file",
"description": "How to aggregate results"
},
"context_lines": {
"type": "integer",
"default": 3,
"description": "Number of context lines to show"
}
}
},
output_schema={
"type": "object",
"properties": {
"findings": {
"type": "array",
"items": {
"type": "object",
"properties": {
"test_id": {"type": "string"},
"test_name": {"type": "string"},
"confidence": {"type": "string"},
"severity": {"type": "string"},
"file_path": {"type": "string"},
"line_number": {"type": "integer"}
}
}
}
}
}
)
def validate_config(self, config: Dict[str, Any]) -> bool:
"""Validate configuration"""
confidence = config.get("confidence", "LOW")
# Handle both string and list formats
if isinstance(confidence, list):
confidence = confidence[0] if confidence else "MEDIUM"
if confidence not in ["LOW", "MEDIUM", "HIGH"]:
raise ValueError("confidence must be LOW, MEDIUM, or HIGH")
severity = config.get("severity", "LOW")
# Handle both string and list formats
if isinstance(severity, list):
severity = severity[0] if severity else "MEDIUM"
if severity not in ["LOW", "MEDIUM", "HIGH"]:
raise ValueError("severity must be LOW, MEDIUM, or HIGH")
context_lines = config.get("context_lines", 3)
if not isinstance(context_lines, int) or context_lines < 0 or context_lines > 10:
raise ValueError("context_lines must be between 0 and 10")
return True
async def execute(self, config: Dict[str, Any], workspace: Path) -> ModuleResult:
"""Execute Bandit security analysis"""
self.start_timer()
try:
# Validate inputs
self.validate_config(config)
self.validate_workspace(workspace)
logger.info(f"Running Bandit analysis on {workspace}")
# Check if there are any Python files
python_files = list(workspace.rglob("*.py"))
if not python_files:
logger.info("No Python files found for Bandit analysis")
return self.create_result(
findings=[],
status="success",
summary={"total_findings": 0, "files_scanned": 0}
)
# Build bandit command
cmd = ["bandit", "-f", "json"]
# Add confidence level
confidence = config.get("confidence", "LOW")
# Handle both string and list formats
if isinstance(confidence, list):
confidence = confidence[0] if confidence else "MEDIUM"
cmd.extend(["--confidence-level", self._get_confidence_levels(confidence)])
# Add severity level
severity = config.get("severity", "LOW")
# Handle both string and list formats
if isinstance(severity, list):
severity = severity[0] if severity else "MEDIUM"
cmd.extend(["--severity-level", self._get_severity_levels(severity)])
# Add tests to run
if config.get("tests"):
cmd.extend(["-t", ",".join(config["tests"])])
# Add tests to skip
if config.get("skips"):
cmd.extend(["-s", ",".join(config["skips"])])
# Add exclude directories
exclude_dirs = config.get("exclude_dirs", ["tests", "test", ".git", "__pycache__"])
if exclude_dirs:
cmd.extend(["-x", ",".join(exclude_dirs)])
# Add aggregate mode
aggregate = config.get("aggregate", "file")
cmd.extend(["-a", aggregate])
# Add context lines
context_lines = config.get("context_lines", 3)
cmd.extend(["-n", str(context_lines)])
# Add recursive flag and target
cmd.extend(["-r", str(workspace)])
logger.debug(f"Running command: {' '.join(cmd)}")
# Run Bandit
process = await asyncio.create_subprocess_exec(
*cmd,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
cwd=workspace
)
stdout, stderr = await process.communicate()
# Parse results
findings = []
if process.returncode in [0, 1]: # 0 = no issues, 1 = issues found
findings = self._parse_bandit_output(stdout.decode(), workspace)
else:
error_msg = stderr.decode()
logger.error(f"Bandit failed: {error_msg}")
return self.create_result(
findings=[],
status="failed",
error=f"Bandit execution failed: {error_msg}"
)
# Create summary
summary = self._create_summary(findings, len(python_files))
logger.info(f"Bandit found {len(findings)} security issues")
return self.create_result(
findings=findings,
status="success",
summary=summary
)
except Exception as e:
logger.error(f"Bandit module failed: {e}")
return self.create_result(
findings=[],
status="failed",
error=str(e)
)
def _get_confidence_levels(self, min_confidence: str) -> str:
"""Get minimum confidence level for Bandit"""
return min_confidence.lower()
def _get_severity_levels(self, min_severity: str) -> str:
"""Get minimum severity level for Bandit"""
return min_severity.lower()
def _parse_bandit_output(self, output: str, workspace: Path) -> List[ModuleFinding]:
"""Parse Bandit JSON output into findings"""
findings = []
if not output.strip():
return findings
try:
data = json.loads(output)
results = data.get("results", [])
for result in results:
# Extract information
test_id = result.get("test_id", "unknown")
test_name = result.get("test_name", "")
issue_confidence = result.get("issue_confidence", "MEDIUM")
issue_severity = result.get("issue_severity", "MEDIUM")
issue_text = result.get("issue_text", "")
# File location
filename = result.get("filename", "")
line_number = result.get("line_number", 0)
line_range = result.get("line_range", [])
# Code context
code = result.get("code", "")
# Make file path relative to workspace
if filename:
try:
rel_path = Path(filename).relative_to(workspace)
filename = str(rel_path)
except ValueError:
pass
# Map Bandit severity to our levels
finding_severity = self._map_severity(issue_severity)
# Determine category based on test_id
category = self._get_category(test_id, test_name)
# Create finding
finding = self.create_finding(
title=f"Python security issue: {test_name}",
description=issue_text or f"Bandit test {test_id} detected a security issue",
severity=finding_severity,
category=category,
file_path=filename if filename else None,
line_start=line_number if line_number > 0 else None,
line_end=line_range[-1] if line_range and len(line_range) > 1 else None,
code_snippet=code.strip() if code else None,
recommendation=self._get_recommendation(test_id, test_name),
metadata={
"test_id": test_id,
"test_name": test_name,
"bandit_confidence": issue_confidence,
"bandit_severity": issue_severity,
"line_range": line_range,
"more_info": result.get("more_info", "")
}
)
findings.append(finding)
except json.JSONDecodeError as e:
logger.warning(f"Failed to parse Bandit output: {e}")
except Exception as e:
logger.warning(f"Error processing Bandit results: {e}")
return findings
def _map_severity(self, bandit_severity: str) -> str:
"""Map Bandit severity to our standard severity levels"""
severity_map = {
"HIGH": "high",
"MEDIUM": "medium",
"LOW": "low"
}
return severity_map.get(bandit_severity.upper(), "medium")
def _get_category(self, test_id: str, test_name: str) -> str:
"""Determine finding category based on Bandit test"""
# Map common Bandit test categories
if "sql" in test_id.lower() or "injection" in test_name.lower():
return "injection"
elif "crypto" in test_id.lower() or "hash" in test_name.lower():
return "cryptography"
elif "shell" in test_id.lower() or "subprocess" in test_name.lower():
return "command_injection"
elif "hardcode" in test_id.lower() or "password" in test_name.lower():
return "hardcoded_secrets"
elif "pickle" in test_id.lower() or "deserial" in test_name.lower():
return "deserialization"
elif "request" in test_id.lower() or "http" in test_name.lower():
return "web_security"
elif "random" in test_id.lower():
return "weak_randomness"
elif "path" in test_id.lower() or "traversal" in test_name.lower():
return "path_traversal"
else:
return "python_security"
def _get_recommendation(self, test_id: str, test_name: str) -> str:
"""Generate recommendation based on Bandit test"""
recommendations = {
# SQL Injection
"B608": "Use parameterized queries instead of string formatting for SQL queries.",
"B703": "Use parameterized queries with Django ORM or raw SQL.",
# Cryptography
"B101": "Remove hardcoded passwords and use secure configuration management.",
"B105": "Remove hardcoded passwords and use environment variables or secret management.",
"B106": "Remove hardcoded passwords from function arguments.",
"B107": "Remove hardcoded passwords from default function arguments.",
"B303": "Use cryptographically secure hash functions like SHA-256 or better.",
"B324": "Use strong cryptographic algorithms instead of deprecated ones.",
"B413": "Use secure encryption algorithms and proper key management.",
# Command Injection
"B602": "Validate and sanitize input before using in subprocess calls.",
"B603": "Avoid using subprocess with shell=True. Use array form instead.",
"B605": "Avoid starting processes with shell=True.",
# Deserialization
"B301": "Avoid using pickle for untrusted data. Use JSON or safer alternatives.",
"B302": "Avoid using marshal for untrusted data.",
"B506": "Use safe YAML loading methods like yaml.safe_load().",
# Web Security
"B501": "Validate SSL certificates in requests to prevent MITM attacks.",
"B401": "Import and use telnetlib carefully, prefer SSH for remote connections.",
# Random
"B311": "Use cryptographically secure random generators like secrets module.",
# Path Traversal
"B108": "Validate file paths to prevent directory traversal attacks."
}
return recommendations.get(test_id,
f"Review the {test_name} security issue and apply appropriate security measures.")
def _create_summary(self, findings: List[ModuleFinding], total_files: int) -> Dict[str, Any]:
"""Create analysis summary"""
severity_counts = {"high": 0, "medium": 0, "low": 0}
category_counts = {}
test_counts = {}
for finding in findings:
# Count by severity
severity_counts[finding.severity] += 1
# Count by category
category = finding.category
category_counts[category] = category_counts.get(category, 0) + 1
# Count by test
test_id = finding.metadata.get("test_id", "unknown")
test_counts[test_id] = test_counts.get(test_id, 0) + 1
return {
"total_findings": len(findings),
"files_scanned": total_files,
"severity_counts": severity_counts,
"category_counts": category_counts,
"top_tests": dict(sorted(test_counts.items(), key=lambda x: x[1], reverse=True)[:10]),
"files_with_issues": len(set(f.file_path for f in findings if f.file_path))
}

View File

@@ -1,396 +0,0 @@
"""
OpenGrep Static Analysis Module
This module uses OpenGrep (open-source version of Semgrep) for pattern-based
static analysis across multiple programming languages.
"""
# Copyright (c) 2025 FuzzingLabs
#
# Licensed under the Business Source License 1.1 (BSL). See the LICENSE file
# at the root of this repository for details.
#
# After the Change Date (four years from publication), this version of the
# Licensed Work will be made available under the Apache License, Version 2.0.
# See the LICENSE-APACHE file or http://www.apache.org/licenses/LICENSE-2.0
#
# Additional attribution and requirements are provided in the NOTICE file.
import asyncio
import json
import tempfile
from pathlib import Path
from typing import Dict, Any, List
import subprocess
import logging
from ..base import BaseModule, ModuleMetadata, ModuleFinding, ModuleResult
from . import register_module
logger = logging.getLogger(__name__)
@register_module
class OpenGrepModule(BaseModule):
"""OpenGrep static analysis module"""
def get_metadata(self) -> ModuleMetadata:
"""Get module metadata"""
return ModuleMetadata(
name="opengrep",
version="1.45.0",
description="Open-source pattern-based static analysis tool for security vulnerabilities",
author="FuzzForge Team",
category="static_analysis",
tags=["sast", "pattern-matching", "multi-language", "security"],
input_schema={
"type": "object",
"properties": {
"config": {
"type": "string",
"enum": ["auto", "p/security-audit", "p/owasp-top-ten", "p/cwe-top-25"],
"default": "auto",
"description": "Rule configuration to use"
},
"languages": {
"type": "array",
"items": {"type": "string"},
"description": "Specific languages to analyze"
},
"include_patterns": {
"type": "array",
"items": {"type": "string"},
"description": "File patterns to include"
},
"exclude_patterns": {
"type": "array",
"items": {"type": "string"},
"description": "File patterns to exclude"
},
"max_target_bytes": {
"type": "integer",
"default": 1000000,
"description": "Maximum file size to analyze (bytes)"
},
"timeout": {
"type": "integer",
"default": 300,
"description": "Analysis timeout in seconds"
},
"severity": {
"type": "array",
"items": {"type": "string", "enum": ["ERROR", "WARNING", "INFO"]},
"default": ["ERROR", "WARNING", "INFO"],
"description": "Minimum severity levels to report"
},
"confidence": {
"type": "array",
"items": {"type": "string", "enum": ["HIGH", "MEDIUM", "LOW"]},
"default": ["HIGH", "MEDIUM", "LOW"],
"description": "Minimum confidence levels to report"
}
}
},
output_schema={
"type": "object",
"properties": {
"findings": {
"type": "array",
"items": {
"type": "object",
"properties": {
"rule_id": {"type": "string"},
"severity": {"type": "string"},
"confidence": {"type": "string"},
"file_path": {"type": "string"},
"line_number": {"type": "integer"}
}
}
}
}
}
)
def validate_config(self, config: Dict[str, Any]) -> bool:
"""Validate configuration"""
timeout = config.get("timeout", 300)
if not isinstance(timeout, int) or timeout < 30 or timeout > 3600:
raise ValueError("Timeout must be between 30 and 3600 seconds")
max_bytes = config.get("max_target_bytes", 1000000)
if not isinstance(max_bytes, int) or max_bytes < 1000 or max_bytes > 10000000:
raise ValueError("max_target_bytes must be between 1000 and 10000000")
return True
async def execute(self, config: Dict[str, Any], workspace: Path) -> ModuleResult:
"""Execute OpenGrep static analysis"""
self.start_timer()
try:
# Validate inputs
self.validate_config(config)
self.validate_workspace(workspace)
logger.info(f"Running OpenGrep analysis on {workspace}")
# Build opengrep command
cmd = ["semgrep", "--json"]
# Add configuration
config_type = config.get("config", "auto")
if config_type == "auto":
cmd.extend(["--config", "auto"])
else:
cmd.extend(["--config", config_type])
# Add timeout
cmd.extend(["--timeout", str(config.get("timeout", 300))])
# Add max target bytes
cmd.extend(["--max-target-bytes", str(config.get("max_target_bytes", 1000000))])
# Add languages if specified
if config.get("languages"):
for lang in config["languages"]:
cmd.extend(["--lang", lang])
# Add include patterns
if config.get("include_patterns"):
for pattern in config["include_patterns"]:
cmd.extend(["--include", pattern])
# Add exclude patterns
if config.get("exclude_patterns"):
for pattern in config["exclude_patterns"]:
cmd.extend(["--exclude", pattern])
# Add severity filter (semgrep only accepts one severity level)
severity_levels = config.get("severity", ["ERROR", "WARNING", "INFO"])
if severity_levels:
# Use the highest severity level from the list
severity_priority = {"ERROR": 3, "WARNING": 2, "INFO": 1}
highest_severity = max(severity_levels, key=lambda x: severity_priority.get(x, 0))
cmd.extend(["--severity", highest_severity])
# Add confidence filter (if supported in this version)
confidence_levels = config.get("confidence", ["HIGH", "MEDIUM"])
if confidence_levels and len(confidence_levels) < 3: # Only if not all levels
# Note: confidence filtering might need to be done post-processing
pass
# Disable metrics collection
cmd.append("--disable-version-check")
cmd.append("--no-git-ignore")
# Add target directory
cmd.append(str(workspace))
logger.debug(f"Running command: {' '.join(cmd)}")
# Run OpenGrep
process = await asyncio.create_subprocess_exec(
*cmd,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
cwd=workspace
)
stdout, stderr = await process.communicate()
# Parse results
findings = []
if process.returncode in [0, 1]: # 0 = no findings, 1 = findings found
findings = self._parse_opengrep_output(stdout.decode(), workspace, config)
else:
error_msg = stderr.decode()
logger.error(f"OpenGrep failed: {error_msg}")
return self.create_result(
findings=[],
status="failed",
error=f"OpenGrep execution failed: {error_msg}"
)
# Create summary
summary = self._create_summary(findings)
logger.info(f"OpenGrep found {len(findings)} potential issues")
return self.create_result(
findings=findings,
status="success",
summary=summary
)
except Exception as e:
logger.error(f"OpenGrep module failed: {e}")
return self.create_result(
findings=[],
status="failed",
error=str(e)
)
def _parse_opengrep_output(self, output: str, workspace: Path, config: Dict[str, Any]) -> List[ModuleFinding]:
"""Parse OpenGrep JSON output into findings"""
findings = []
if not output.strip():
return findings
try:
data = json.loads(output)
results = data.get("results", [])
# Get filtering criteria
allowed_severities = set(config.get("severity", ["ERROR", "WARNING", "INFO"]))
allowed_confidences = set(config.get("confidence", ["HIGH", "MEDIUM", "LOW"]))
for result in results:
# Extract basic info
rule_id = result.get("check_id", "unknown")
message = result.get("message", "")
severity = result.get("extra", {}).get("severity", "INFO").upper()
# File location info
path_info = result.get("path", "")
start_line = result.get("start", {}).get("line", 0)
end_line = result.get("end", {}).get("line", 0)
start_col = result.get("start", {}).get("col", 0)
end_col = result.get("end", {}).get("col", 0)
# Code snippet
lines = result.get("extra", {}).get("lines", "")
# Metadata
metadata = result.get("extra", {})
cwe = metadata.get("metadata", {}).get("cwe", [])
owasp = metadata.get("metadata", {}).get("owasp", [])
confidence = metadata.get("metadata", {}).get("confidence", "MEDIUM").upper()
# Apply severity filter
if severity not in allowed_severities:
continue
# Apply confidence filter
if confidence not in allowed_confidences:
continue
# Make file path relative to workspace
if path_info:
try:
rel_path = Path(path_info).relative_to(workspace)
path_info = str(rel_path)
except ValueError:
pass
# Map severity to our standard levels
finding_severity = self._map_severity(severity)
# Create finding
finding = self.create_finding(
title=f"Security issue: {rule_id}",
description=message or f"OpenGrep rule {rule_id} triggered",
severity=finding_severity,
category=self._get_category(rule_id, metadata),
file_path=path_info if path_info else None,
line_start=start_line if start_line > 0 else None,
line_end=end_line if end_line > 0 and end_line != start_line else None,
code_snippet=lines.strip() if lines else None,
recommendation=self._get_recommendation(rule_id, metadata),
metadata={
"rule_id": rule_id,
"opengrep_severity": severity,
"confidence": confidence,
"cwe": cwe,
"owasp": owasp,
"fix": metadata.get("fix", ""),
"impact": metadata.get("impact", ""),
"likelihood": metadata.get("likelihood", ""),
"references": metadata.get("references", [])
}
)
findings.append(finding)
except json.JSONDecodeError as e:
logger.warning(f"Failed to parse OpenGrep output: {e}")
except Exception as e:
logger.warning(f"Error processing OpenGrep results: {e}")
return findings
def _map_severity(self, opengrep_severity: str) -> str:
"""Map OpenGrep severity to our standard severity levels"""
severity_map = {
"ERROR": "high",
"WARNING": "medium",
"INFO": "low"
}
return severity_map.get(opengrep_severity.upper(), "medium")
def _get_category(self, rule_id: str, metadata: Dict[str, Any]) -> str:
"""Determine finding category based on rule and metadata"""
cwe_list = metadata.get("metadata", {}).get("cwe", [])
owasp_list = metadata.get("metadata", {}).get("owasp", [])
# Check for common security categories
if any("injection" in rule_id.lower() for x in [rule_id]):
return "injection"
elif any("xss" in rule_id.lower() for x in [rule_id]):
return "xss"
elif any("csrf" in rule_id.lower() for x in [rule_id]):
return "csrf"
elif any("auth" in rule_id.lower() for x in [rule_id]):
return "authentication"
elif any("crypto" in rule_id.lower() for x in [rule_id]):
return "cryptography"
elif cwe_list:
return f"cwe-{cwe_list[0]}"
elif owasp_list:
return f"owasp-{owasp_list[0].replace(' ', '-').lower()}"
else:
return "security"
def _get_recommendation(self, rule_id: str, metadata: Dict[str, Any]) -> str:
"""Generate recommendation based on rule and metadata"""
fix_suggestion = metadata.get("fix", "")
if fix_suggestion:
return fix_suggestion
# Generic recommendations based on rule type
if "injection" in rule_id.lower():
return "Use parameterized queries or prepared statements to prevent injection attacks."
elif "xss" in rule_id.lower():
return "Properly encode/escape user input before displaying it in web pages."
elif "crypto" in rule_id.lower():
return "Use cryptographically secure algorithms and proper key management."
elif "hardcode" in rule_id.lower():
return "Remove hardcoded secrets and use secure configuration management."
else:
return "Review this security issue and apply appropriate fixes based on your security requirements."
def _create_summary(self, findings: List[ModuleFinding]) -> Dict[str, Any]:
"""Create analysis summary"""
severity_counts = {"critical": 0, "high": 0, "medium": 0, "low": 0}
category_counts = {}
rule_counts = {}
for finding in findings:
# Count by severity
severity_counts[finding.severity] += 1
# Count by category
category = finding.category
category_counts[category] = category_counts.get(category, 0) + 1
# Count by rule
rule_id = finding.metadata.get("rule_id", "unknown")
rule_counts[rule_id] = rule_counts.get(rule_id, 0) + 1
return {
"total_findings": len(findings),
"severity_counts": severity_counts,
"category_counts": category_counts,
"top_rules": dict(sorted(rule_counts.items(), key=lambda x: x[1], reverse=True)[:10]),
"files_analyzed": len(set(f.file_path for f in findings if f.file_path))
}

View File

@@ -152,7 +152,9 @@ services:
chmod 755 /prefect-storage &&
echo '{\"insecure-registries\": [\"registry:5000\", \"localhost:5001\", \"host.docker.internal:5001\"]}' > /tmp/docker/config.json &&
pip install 'prefect[docker]' &&
prefect worker start --pool docker-pool
echo 'Waiting for backend to create work pool...' &&
sleep 15 &&
prefect worker start --pool docker-pool --type docker
"
volumes:
- prefect_storage:/prefect-storage # Access to shared storage for results

View File

@@ -1,675 +0,0 @@
{
"tool": {
"name": "FuzzForge Security Assessment",
"version": "1.0.0"
},
"summary": {
"total_issues": 66,
"by_severity": {
"warning": 49,
"error": 17
}
},
"findings": [
{
"rule_id": "sensitive_file_medium",
"severity": "warning",
"message": "Found potentially sensitive file at private_key.pem",
"location": {
"file": "private_key.pem",
"line": null,
"column": null
}
},
{
"rule_id": "sensitive_file_medium",
"severity": "warning",
"message": "Found potentially sensitive file at wallet.json",
"location": {
"file": "wallet.json",
"line": null,
"column": null
}
},
{
"rule_id": "sensitive_file_medium",
"severity": "warning",
"message": "Found potentially sensitive file at .npmrc",
"location": {
"file": ".npmrc",
"line": null,
"column": null
}
},
{
"rule_id": "sensitive_file_medium",
"severity": "warning",
"message": "Found potentially sensitive file at .env",
"location": {
"file": ".env",
"line": null,
"column": null
}
},
{
"rule_id": "sensitive_file_medium",
"severity": "warning",
"message": "Found potentially sensitive file at .git-credentials",
"location": {
"file": ".git-credentials",
"line": null,
"column": null
}
},
{
"rule_id": "sensitive_file_medium",
"severity": "warning",
"message": "Found potentially sensitive file at data/api_keys.txt",
"location": {
"file": "data/api_keys.txt",
"line": null,
"column": null
}
},
{
"rule_id": "sensitive_file_medium",
"severity": "warning",
"message": "Found potentially sensitive file at data/credentials.json",
"location": {
"file": "data/credentials.json",
"line": null,
"column": null
}
},
{
"rule_id": "sensitive_file_medium",
"severity": "warning",
"message": "Found potentially sensitive file at .fuzzforge/.env",
"location": {
"file": ".fuzzforge/.env",
"line": null,
"column": null
}
},
{
"rule_id": "sql_injection_high",
"severity": "error",
"message": "Detected potential SQL injection vulnerability via F-string in SQL query",
"location": {
"file": "app.py",
"line": 21,
"column": null
}
},
{
"rule_id": "hardcoded_secret_high",
"severity": "error",
"message": "Found potential hardcoded API Key in src/api_handler.py",
"location": {
"file": "src/api_handler.py",
"line": 13,
"column": null
}
},
{
"rule_id": "hardcoded_secret_medium",
"severity": "warning",
"message": "Found potential hardcoded Authentication Token in src/api_handler.py",
"location": {
"file": "src/api_handler.py",
"line": 9,
"column": null
}
},
{
"rule_id": "dangerous_function_medium",
"severity": "warning",
"message": "Use of potentially dangerous function eval(): Arbitrary code execution",
"location": {
"file": "src/api_handler.py",
"line": 22,
"column": null
}
},
{
"rule_id": "dangerous_function_medium",
"severity": "warning",
"message": "Use of potentially dangerous function eval(): Arbitrary code execution",
"location": {
"file": "src/api_handler.py",
"line": 42,
"column": null
}
},
{
"rule_id": "dangerous_function_medium",
"severity": "warning",
"message": "Use of potentially dangerous function exec(): Arbitrary code execution",
"location": {
"file": "src/api_handler.py",
"line": 37,
"column": null
}
},
{
"rule_id": "dangerous_function_medium",
"severity": "warning",
"message": "Use of potentially dangerous function os.system(): Command injection risk",
"location": {
"file": "src/api_handler.py",
"line": 32,
"column": null
}
},
{
"rule_id": "dangerous_function_medium",
"severity": "warning",
"message": "Use of potentially dangerous function os.system(): Command injection risk",
"location": {
"file": "src/api_handler.py",
"line": 59,
"column": null
}
},
{
"rule_id": "dangerous_function_medium",
"severity": "warning",
"message": "Use of potentially dangerous function subprocess with shell=True: Command injection risk",
"location": {
"file": "src/api_handler.py",
"line": 27,
"column": null
}
},
{
"rule_id": "sql_injection_high",
"severity": "error",
"message": "Detected potential SQL injection vulnerability via String concatenation in SQL",
"location": {
"file": "src/database.py",
"line": 31,
"column": null
}
},
{
"rule_id": "sql_injection_high",
"severity": "error",
"message": "Detected potential SQL injection vulnerability via String formatting in SQL",
"location": {
"file": "src/database.py",
"line": 38,
"column": null
}
},
{
"rule_id": "sql_injection_high",
"severity": "error",
"message": "Detected potential SQL injection vulnerability via String formatting in SQL",
"location": {
"file": "src/database.py",
"line": 45,
"column": null
}
},
{
"rule_id": "sql_injection_high",
"severity": "error",
"message": "Detected potential SQL injection vulnerability via F-string in SQL query",
"location": {
"file": "src/database.py",
"line": 38,
"column": null
}
},
{
"rule_id": "sql_injection_high",
"severity": "error",
"message": "Detected potential SQL injection vulnerability via Dynamic query building",
"location": {
"file": "src/database.py",
"line": 31,
"column": null
}
},
{
"rule_id": "sql_injection_high",
"severity": "error",
"message": "Detected potential SQL injection vulnerability via Dynamic query building",
"location": {
"file": "src/database.py",
"line": 63,
"column": null
}
},
{
"rule_id": "dangerous_function_medium",
"severity": "warning",
"message": "Use of potentially dangerous function os.system(): Command injection risk",
"location": {
"file": "src/database.py",
"line": 57,
"column": null
}
},
{
"rule_id": "dangerous_function_medium",
"severity": "warning",
"message": "Use of potentially dangerous function pickle.load(): Deserialization vulnerability",
"location": {
"file": "src/database.py",
"line": 52,
"column": null
}
},
{
"rule_id": "hardcoded_secret_high",
"severity": "error",
"message": "Found potential hardcoded Private Key in scripts/backup.js",
"location": {
"file": "scripts/backup.js",
"line": 81,
"column": null
}
},
{
"rule_id": "hardcoded_secret_medium",
"severity": "warning",
"message": "Found potential hardcoded Potential Secret Hash in scripts/backup.js",
"location": {
"file": "scripts/backup.js",
"line": 81,
"column": null
}
},
{
"rule_id": "dangerous_function_medium",
"severity": "warning",
"message": "Use of potentially dangerous function eval(): Arbitrary code execution",
"location": {
"file": "scripts/backup.js",
"line": 23,
"column": null
}
},
{
"rule_id": "dangerous_function_medium",
"severity": "warning",
"message": "Use of potentially dangerous function new Function(): Arbitrary code execution",
"location": {
"file": "scripts/backup.js",
"line": 28,
"column": null
}
},
{
"rule_id": "dangerous_function_medium",
"severity": "warning",
"message": "Use of potentially dangerous function innerHTML: XSS vulnerability",
"location": {
"file": "scripts/backup.js",
"line": 33,
"column": null
}
},
{
"rule_id": "dangerous_function_medium",
"severity": "warning",
"message": "Use of potentially dangerous function innerHTML: XSS vulnerability",
"location": {
"file": "scripts/backup.js",
"line": 37,
"column": null
}
},
{
"rule_id": "dangerous_function_medium",
"severity": "warning",
"message": "Use of potentially dangerous function document.write(): XSS vulnerability",
"location": {
"file": "scripts/backup.js",
"line": 42,
"column": null
}
},
{
"rule_id": "hardcoded_secret_high",
"severity": "error",
"message": "Found potential hardcoded Private Key in src/Main.java",
"location": {
"file": "src/Main.java",
"line": 77,
"column": null
}
},
{
"rule_id": "sql_injection_high",
"severity": "error",
"message": "Detected potential SQL injection vulnerability via String concatenation in SQL",
"location": {
"file": "src/Main.java",
"line": 23,
"column": null
}
},
{
"rule_id": "sql_injection_high",
"severity": "error",
"message": "Detected potential SQL injection vulnerability via String concatenation in SQL",
"location": {
"file": "src/Main.java",
"line": 29,
"column": null
}
},
{
"rule_id": "sql_injection_high",
"severity": "error",
"message": "Detected potential SQL injection vulnerability via Dynamic query building",
"location": {
"file": "src/Main.java",
"line": 23,
"column": null
}
},
{
"rule_id": "sql_injection_high",
"severity": "error",
"message": "Detected potential SQL injection vulnerability via Dynamic query building",
"location": {
"file": "src/Main.java",
"line": 29,
"column": null
}
},
{
"rule_id": "dangerous_function_medium",
"severity": "warning",
"message": "Use of potentially dangerous function eval(): Arbitrary code execution",
"location": {
"file": "scripts/deploy.php",
"line": 28,
"column": null
}
},
{
"rule_id": "dangerous_function_medium",
"severity": "warning",
"message": "Use of potentially dangerous function exec(): Command execution",
"location": {
"file": "scripts/deploy.php",
"line": 22,
"column": null
}
},
{
"rule_id": "dangerous_function_medium",
"severity": "warning",
"message": "Use of potentially dangerous function exec(): Command execution",
"location": {
"file": "scripts/deploy.php",
"line": 23,
"column": null
}
},
{
"rule_id": "dangerous_function_medium",
"severity": "warning",
"message": "Use of potentially dangerous function system(): Command execution",
"location": {
"file": "scripts/deploy.php",
"line": 21,
"column": null
}
},
{
"rule_id": "dangerous_function_medium",
"severity": "warning",
"message": "Use of potentially dangerous function shell_exec(): Command execution",
"location": {
"file": "scripts/deploy.php",
"line": 23,
"column": null
}
},
{
"rule_id": "dangerous_function_medium",
"severity": "warning",
"message": "Use of potentially dangerous function Direct $_GET usage: Input validation missing",
"location": {
"file": "scripts/deploy.php",
"line": 12,
"column": null
}
},
{
"rule_id": "dangerous_function_medium",
"severity": "warning",
"message": "Use of potentially dangerous function Direct $_GET usage: Input validation missing",
"location": {
"file": "scripts/deploy.php",
"line": 21,
"column": null
}
},
{
"rule_id": "dangerous_function_medium",
"severity": "warning",
"message": "Use of potentially dangerous function Direct $_GET usage: Input validation missing",
"location": {
"file": "scripts/deploy.php",
"line": 23,
"column": null
}
},
{
"rule_id": "dangerous_function_medium",
"severity": "warning",
"message": "Use of potentially dangerous function Direct $_GET usage: Input validation missing",
"location": {
"file": "scripts/deploy.php",
"line": 24,
"column": null
}
},
{
"rule_id": "dangerous_function_medium",
"severity": "warning",
"message": "Use of potentially dangerous function Direct $_GET usage: Input validation missing",
"location": {
"file": "scripts/deploy.php",
"line": 31,
"column": null
}
},
{
"rule_id": "dangerous_function_medium",
"severity": "warning",
"message": "Use of potentially dangerous function Direct $_GET usage: Input validation missing",
"location": {
"file": "scripts/deploy.php",
"line": 45,
"column": null
}
},
{
"rule_id": "dangerous_function_medium",
"severity": "warning",
"message": "Use of potentially dangerous function Direct $_GET usage: Input validation missing",
"location": {
"file": "scripts/deploy.php",
"line": 50,
"column": null
}
},
{
"rule_id": "dangerous_function_medium",
"severity": "warning",
"message": "Use of potentially dangerous function Direct $_GET usage: Input validation missing",
"location": {
"file": "scripts/deploy.php",
"line": 57,
"column": null
}
},
{
"rule_id": "dangerous_function_medium",
"severity": "warning",
"message": "Use of potentially dangerous function Direct $_POST usage: Input validation missing",
"location": {
"file": "scripts/deploy.php",
"line": 13,
"column": null
}
},
{
"rule_id": "dangerous_function_medium",
"severity": "warning",
"message": "Use of potentially dangerous function Direct $_POST usage: Input validation missing",
"location": {
"file": "scripts/deploy.php",
"line": 22,
"column": null
}
},
{
"rule_id": "dangerous_function_medium",
"severity": "warning",
"message": "Use of potentially dangerous function Direct $_POST usage: Input validation missing",
"location": {
"file": "scripts/deploy.php",
"line": 27,
"column": null
}
},
{
"rule_id": "dangerous_function_medium",
"severity": "warning",
"message": "Use of potentially dangerous function Direct $_POST usage: Input validation missing",
"location": {
"file": "scripts/deploy.php",
"line": 32,
"column": null
}
},
{
"rule_id": "dangerous_function_medium",
"severity": "warning",
"message": "Use of potentially dangerous function Direct $_POST usage: Input validation missing",
"location": {
"file": "scripts/deploy.php",
"line": 40,
"column": null
}
},
{
"rule_id": "dangerous_function_medium",
"severity": "warning",
"message": "Use of potentially dangerous function Direct $_POST usage: Input validation missing",
"location": {
"file": "scripts/deploy.php",
"line": 46,
"column": null
}
},
{
"rule_id": "dangerous_function_medium",
"severity": "warning",
"message": "Use of potentially dangerous function Direct $_POST usage: Input validation missing",
"location": {
"file": "scripts/deploy.php",
"line": 53,
"column": null
}
},
{
"rule_id": "dangerous_function_medium",
"severity": "warning",
"message": "Use of potentially dangerous function Direct $_POST usage: Input validation missing",
"location": {
"file": "scripts/deploy.php",
"line": 54,
"column": null
}
},
{
"rule_id": "dangerous_function_medium",
"severity": "warning",
"message": "Use of potentially dangerous function Direct $_POST usage: Input validation missing",
"location": {
"file": "scripts/deploy.php",
"line": 61,
"column": null
}
},
{
"rule_id": "dangerous_function_medium",
"severity": "warning",
"message": "Use of potentially dangerous function Direct $_POST usage: Input validation missing",
"location": {
"file": "scripts/deploy.php",
"line": 62,
"column": null
}
},
{
"rule_id": "hardcoded_secret_high",
"severity": "error",
"message": "Found potential hardcoded API Key in src/utils.rb",
"location": {
"file": "src/utils.rb",
"line": 64,
"column": null
}
},
{
"rule_id": "hardcoded_secret_medium",
"severity": "warning",
"message": "Found potential hardcoded Hardcoded Password in src/utils.rb",
"location": {
"file": "src/utils.rb",
"line": 63,
"column": null
}
},
{
"rule_id": "hardcoded_secret_high",
"severity": "error",
"message": "Found potential hardcoded Private Key in src/app.go",
"location": {
"file": "src/app.go",
"line": 59,
"column": null
}
},
{
"rule_id": "hardcoded_secret_high",
"severity": "error",
"message": "Found potential hardcoded Private Key in src/app.go",
"location": {
"file": "src/app.go",
"line": 62,
"column": null
}
},
{
"rule_id": "hardcoded_secret_medium",
"severity": "warning",
"message": "Found potential hardcoded Potential Secret Hash in src/app.go",
"location": {
"file": "src/app.go",
"line": 59,
"column": null
}
},
{
"rule_id": "hardcoded_secret_medium",
"severity": "warning",
"message": "Found potential hardcoded Potential Secret Hash in src/app.go",
"location": {
"file": "src/app.go",
"line": 62,
"column": null
}
}
]
}

File diff suppressed because it is too large Load Diff