Initial commit

This commit is contained in:
Tanguy Duhamel
2025-09-29 21:26:41 +02:00
parent f0fd367ed8
commit 323a434c73
208 changed files with 72069 additions and 53 deletions
+75
View File
@@ -0,0 +1,75 @@
"""
FuzzForge SDK - Python client for FuzzForge security testing platform
A comprehensive SDK for interacting with the FuzzForge API, providing
workflow management, real-time fuzzing monitoring, and SARIF findings retrieval.
"""
# Copyright (c) 2025 FuzzingLabs
#
# Licensed under the Business Source License 1.1 (BSL). See the LICENSE file
# at the root of this repository for details.
#
# After the Change Date (four years from publication), this version of the
# Licensed Work will be made available under the Apache License, Version 2.0.
# See the LICENSE-APACHE file or http://www.apache.org/licenses/LICENSE-2.0
#
# Additional attribution and requirements are provided in the NOTICE file.
from .client import FuzzForgeClient
from .models import (
WorkflowSubmission,
WorkflowMetadata,
WorkflowListItem,
WorkflowStatus,
WorkflowFindings,
ResourceLimits,
VolumeMount,
FuzzingStats,
CrashReport,
RunSubmissionResponse,
)
from .exceptions import (
FuzzForgeError,
FuzzForgeHTTPError,
WorkflowNotFoundError,
RunNotFoundError,
ValidationError,
)
from .testing import (
WorkflowTester,
TestResult,
TestSummary,
format_test_summary,
DEFAULT_TEST_CONFIG,
)
__version__ = "0.6.0"
__all__ = [
"FuzzForgeClient",
"WorkflowSubmission",
"WorkflowMetadata",
"WorkflowListItem",
"WorkflowStatus",
"WorkflowFindings",
"ResourceLimits",
"VolumeMount",
"FuzzingStats",
"CrashReport",
"RunSubmissionResponse",
"FuzzForgeError",
"FuzzForgeHTTPError",
"WorkflowNotFoundError",
"RunNotFoundError",
"ValidationError",
"WorkflowTester",
"TestResult",
"TestSummary",
"format_test_summary",
"DEFAULT_TEST_CONFIG",
]
def main() -> None:
"""Entry point for the CLI (not implemented yet)"""
print("FuzzForge SDK - Use as a library to interact with FuzzForge API")
+536
View File
@@ -0,0 +1,536 @@
"""
Main client class for interacting with the FuzzForge API.
Provides both synchronous and asynchronous methods for all API endpoints,
including real-time monitoring capabilities for fuzzing workflows.
"""
# Copyright (c) 2025 FuzzingLabs
#
# Licensed under the Business Source License 1.1 (BSL). See the LICENSE file
# at the root of this repository for details.
#
# After the Change Date (four years from publication), this version of the
# Licensed Work will be made available under the Apache License, Version 2.0.
# See the LICENSE-APACHE file or http://www.apache.org/licenses/LICENSE-2.0
#
# Additional attribution and requirements are provided in the NOTICE file.
import asyncio
import json
import logging
from typing import Dict, Any, List, Optional, AsyncIterator, Iterator, Union
from urllib.parse import urljoin, urlparse
import warnings
import httpx
import websockets
try:
from sseclient import SSEClient # type: ignore
_HAVE_SSECLIENT = True
except Exception: # pragma: no cover
SSEClient = None # type: ignore
_HAVE_SSECLIENT = False
try:
import requests # type: ignore
_HAVE_REQUESTS = True
except Exception: # pragma: no cover
requests = None # type: ignore
_HAVE_REQUESTS = False
from .models import (
APIStatus,
WorkflowListItem,
WorkflowMetadata,
WorkflowParametersResponse,
WorkflowSubmission,
RunSubmissionResponse,
WorkflowStatus,
WorkflowFindings,
FuzzingStats,
CrashReport,
WebSocketMessage,
SSEMessage,
)
from .exceptions import (
FuzzForgeError,
FuzzForgeHTTPError,
ConnectionError,
TimeoutError,
WebSocketError,
SSEError,
from_http_error,
)
logger = logging.getLogger(__name__)
class FuzzForgeClient:
"""
Client for interacting with the FuzzForge API.
Provides methods for workflow management, run monitoring, and real-time
fuzzing statistics. Supports both synchronous and asynchronous operations.
Args:
base_url: Base URL of the FuzzForge API (e.g., "http://localhost:8000")
timeout: Default timeout for HTTP requests in seconds
verify_ssl: Whether to verify SSL certificates
"""
def __init__(
self,
base_url: str = "http://localhost:8000",
timeout: float = 30.0,
verify_ssl: bool = True,
):
self.base_url = base_url.rstrip("/")
self.timeout = timeout
self.verify_ssl = verify_ssl
# Create HTTP clients
self._client = httpx.Client(timeout=timeout, verify=verify_ssl)
self._async_client = httpx.AsyncClient(timeout=timeout, verify=verify_ssl)
# WebSocket URL (convert http(s) to ws(s))
parsed = urlparse(self.base_url)
ws_scheme = "wss" if parsed.scheme == "https" else "ws"
self._ws_base_url = f"{ws_scheme}://{parsed.netloc}"
def __enter__(self):
return self
def __exit__(self, exc_type, exc_val, exc_tb):
self.close()
async def __aenter__(self):
return self
async def __aexit__(self, exc_type, exc_val, exc_tb):
await self.aclose()
def close(self):
"""Close the HTTP client."""
self._client.close()
async def aclose(self):
"""Close the async HTTP client."""
await self._async_client.aclose()
def _handle_response(self, response: httpx.Response) -> Dict[str, Any]:
"""Handle HTTP response and raise appropriate exceptions."""
try:
response.raise_for_status()
return response.json()
except httpx.HTTPStatusError as e:
raise from_http_error(e.response.status_code, e.response.text, str(e.request.url))
except httpx.RequestError as e:
raise ConnectionError(f"Request failed: {e}")
except json.JSONDecodeError as e:
raise FuzzForgeError(f"Invalid JSON response: {e}")
async def _ahandle_response(self, response: httpx.Response) -> Dict[str, Any]:
"""Handle async HTTP response and raise appropriate exceptions."""
try:
response.raise_for_status()
return response.json()
except httpx.HTTPStatusError as e:
raise from_http_error(e.response.status_code, e.response.text, str(e.request.url))
except httpx.RequestError as e:
raise ConnectionError(f"Request failed: {e}")
except json.JSONDecodeError as e:
raise FuzzForgeError(f"Invalid JSON response: {e}")
# Root API methods
def get_api_status(self) -> APIStatus:
"""Get API status and information."""
response = self._client.get(self.base_url)
data = self._handle_response(response)
return APIStatus(**data)
async def aget_api_status(self) -> APIStatus:
"""Get API status and information (async)."""
response = await self._async_client.get(self.base_url)
data = await self._ahandle_response(response)
return APIStatus(**data)
# Workflow management methods
def list_workflows(self) -> List[WorkflowListItem]:
"""List all available workflows."""
url = urljoin(self.base_url, "/workflows/")
response = self._client.get(url)
data = self._handle_response(response)
return [WorkflowListItem(**item) for item in data]
async def alist_workflows(self) -> List[WorkflowListItem]:
"""List all available workflows (async)."""
url = urljoin(self.base_url, "/workflows/")
response = await self._async_client.get(url)
data = await self._ahandle_response(response)
return [WorkflowListItem(**item) for item in data]
def get_workflow_metadata(self, workflow_name: str) -> WorkflowMetadata:
"""Get complete metadata for a workflow."""
url = urljoin(self.base_url, f"/workflows/{workflow_name}/metadata")
response = self._client.get(url)
data = self._handle_response(response)
return WorkflowMetadata(**data)
async def aget_workflow_metadata(self, workflow_name: str) -> WorkflowMetadata:
"""Get complete metadata for a workflow (async)."""
url = urljoin(self.base_url, f"/workflows/{workflow_name}/metadata")
response = await self._async_client.get(url)
data = await self._ahandle_response(response)
return WorkflowMetadata(**data)
def get_workflow_parameters(self, workflow_name: str) -> WorkflowParametersResponse:
"""Get parameters schema for a workflow."""
url = urljoin(self.base_url, f"/workflows/{workflow_name}/parameters")
response = self._client.get(url)
data = self._handle_response(response)
return WorkflowParametersResponse(**data)
async def aget_workflow_parameters(self, workflow_name: str) -> WorkflowParametersResponse:
"""Get parameters schema for a workflow (async)."""
url = urljoin(self.base_url, f"/workflows/{workflow_name}/parameters")
response = await self._async_client.get(url)
data = await self._ahandle_response(response)
return WorkflowParametersResponse(**data)
def get_metadata_schema(self) -> Dict[str, Any]:
"""Get the JSON schema for workflow metadata files."""
url = urljoin(self.base_url, "/workflows/metadata/schema")
response = self._client.get(url)
return self._handle_response(response)
async def aget_metadata_schema(self) -> Dict[str, Any]:
"""Get the JSON schema for workflow metadata files (async)."""
url = urljoin(self.base_url, "/workflows/metadata/schema")
response = await self._async_client.get(url)
return await self._ahandle_response(response)
def submit_workflow(
self,
workflow_name: str,
submission: WorkflowSubmission
) -> RunSubmissionResponse:
"""Submit a workflow for execution."""
url = urljoin(self.base_url, f"/workflows/{workflow_name}/submit")
response = self._client.post(url, json=submission.model_dump())
data = self._handle_response(response)
return RunSubmissionResponse(**data)
async def asubmit_workflow(
self,
workflow_name: str,
submission: WorkflowSubmission
) -> RunSubmissionResponse:
"""Submit a workflow for execution (async)."""
url = urljoin(self.base_url, f"/workflows/{workflow_name}/submit")
response = await self._async_client.post(url, json=submission.model_dump())
data = await self._ahandle_response(response)
return RunSubmissionResponse(**data)
# Run management methods
def get_run_status(self, run_id: str) -> WorkflowStatus:
"""Get the status of a workflow run."""
url = urljoin(self.base_url, f"/runs/{run_id}/status")
response = self._client.get(url)
data = self._handle_response(response)
return WorkflowStatus(**data)
async def aget_run_status(self, run_id: str) -> WorkflowStatus:
"""Get the status of a workflow run (async)."""
url = urljoin(self.base_url, f"/runs/{run_id}/status")
response = await self._async_client.get(url)
data = await self._ahandle_response(response)
return WorkflowStatus(**data)
def get_run_findings(self, run_id: str) -> WorkflowFindings:
"""Get findings from a completed workflow run."""
url = urljoin(self.base_url, f"/runs/{run_id}/findings")
response = self._client.get(url)
data = self._handle_response(response)
return WorkflowFindings(**data)
async def aget_run_findings(self, run_id: str) -> WorkflowFindings:
"""Get findings from a completed workflow run (async)."""
url = urljoin(self.base_url, f"/runs/{run_id}/findings")
response = await self._async_client.get(url)
data = await self._ahandle_response(response)
return WorkflowFindings(**data)
def get_workflow_findings(self, workflow_name: str, run_id: str) -> WorkflowFindings:
"""Get findings for a specific workflow run (alternative endpoint)."""
url = urljoin(self.base_url, f"/runs/{workflow_name}/findings/{run_id}")
response = self._client.get(url)
data = self._handle_response(response)
return WorkflowFindings(**data)
async def aget_workflow_findings(self, workflow_name: str, run_id: str) -> WorkflowFindings:
"""Get findings for a specific workflow run (alternative endpoint, async)."""
url = urljoin(self.base_url, f"/runs/{workflow_name}/findings/{run_id}")
response = await self._async_client.get(url)
data = await self._ahandle_response(response)
return WorkflowFindings(**data)
# Fuzzing methods
def get_fuzzing_stats(self, run_id: str) -> FuzzingStats:
"""Get current fuzzing statistics for a run."""
url = urljoin(self.base_url, f"/fuzzing/{run_id}/stats")
response = self._client.get(url)
data = self._handle_response(response)
return FuzzingStats(**data)
async def aget_fuzzing_stats(self, run_id: str) -> FuzzingStats:
"""Get current fuzzing statistics for a run (async)."""
url = urljoin(self.base_url, f"/fuzzing/{run_id}/stats")
response = await self._async_client.get(url)
data = await self._ahandle_response(response)
return FuzzingStats(**data)
def get_crash_reports(self, run_id: str) -> List[CrashReport]:
"""Get crash reports for a fuzzing run."""
url = urljoin(self.base_url, f"/fuzzing/{run_id}/crashes")
response = self._client.get(url)
data = self._handle_response(response)
return [CrashReport(**crash) for crash in data]
async def aget_crash_reports(self, run_id: str) -> List[CrashReport]:
"""Get crash reports for a fuzzing run (async)."""
url = urljoin(self.base_url, f"/fuzzing/{run_id}/crashes")
response = await self._async_client.get(url)
data = await self._ahandle_response(response)
return [CrashReport(**crash) for crash in data]
def cleanup_fuzzing_run(self, run_id: str) -> Dict[str, Any]:
"""Clean up fuzzing run data."""
url = urljoin(self.base_url, f"/fuzzing/{run_id}")
response = self._client.delete(url)
return self._handle_response(response)
async def acleanup_fuzzing_run(self, run_id: str) -> Dict[str, Any]:
"""Clean up fuzzing run data (async)."""
url = urljoin(self.base_url, f"/fuzzing/{run_id}")
response = await self._async_client.delete(url)
return await self._ahandle_response(response)
# Real-time monitoring methods
async def monitor_fuzzing_websocket(self, run_id: str) -> AsyncIterator[WebSocketMessage]:
"""
Monitor fuzzing progress via WebSocket for real-time updates.
Args:
run_id: The fuzzing run ID to monitor
Yields:
WebSocketMessage objects with real-time updates
Raises:
WebSocketError: If WebSocket connection fails
"""
url = f"{self._ws_base_url}/fuzzing/{run_id}/live"
try:
async with websockets.connect(
url,
timeout=self.timeout,
ping_interval=20,
ping_timeout=10
) as websocket:
while True:
try:
# Send periodic ping to keep connection alive
await websocket.ping()
# Receive message with timeout
message = await asyncio.wait_for(
websocket.recv(),
timeout=self.timeout
)
if message == "pong":
continue
data = json.loads(message)
yield WebSocketMessage(**data)
except asyncio.TimeoutError:
logger.warning(f"WebSocket timeout for run {run_id}")
break
except json.JSONDecodeError as e:
logger.error(f"Invalid JSON from WebSocket: {e}")
continue
except websockets.exceptions.WebSocketException as e:
raise WebSocketError(f"WebSocket connection failed: {e}")
except Exception as e:
raise WebSocketError(f"WebSocket error: {e}")
def monitor_fuzzing_sse(self, run_id: str) -> Iterator[SSEMessage]:
"""
Monitor fuzzing progress via Server-Sent Events.
Args:
run_id: The fuzzing run ID to monitor
Yields:
SSEMessage objects with real-time updates
Raises:
SSEError: If SSE connection fails
"""
url = urljoin(self.base_url, f"/fuzzing/{run_id}/stream")
# Prefer requests+sseclient if requests is available; otherwise manually parse SSE via httpx
if _HAVE_REQUESTS:
try:
with requests.Session() as sess:
with sess.get(
url,
headers={"Accept": "text/event-stream"},
stream=True,
timeout=None,
) as resp:
resp.raise_for_status()
client = SSEClient(resp)
for event in client.events():
if not event.data:
continue
try:
data = json.loads(event.data)
yield SSEMessage(**data)
except json.JSONDecodeError as e:
logger.error(f"Invalid JSON from SSE: {e}")
continue
except requests.HTTPError as e: # type: ignore[attr-defined]
status = getattr(e.response, 'status_code', 0)
url_txt = getattr(e.request, 'url', url)
raise from_http_error(status, str(e), str(url_txt))
except Exception as e: # pragma: no cover
raise SSEError(f"SSE connection failed: {e}")
else:
# Manual SSE parse over httpx streaming
try:
with self._client.stream("GET", url, headers={"Accept": "text/event-stream"}, timeout=None) as resp:
resp.raise_for_status()
buffer = ""
for raw_line in resp.iter_lines():
# httpx delivers bytes or str depending on backend; coerce to str
line = raw_line.decode("utf-8", errors="ignore") if isinstance(raw_line, (bytes, bytearray)) else str(raw_line)
if line == "":
# End of event
if buffer:
try:
data = json.loads(buffer)
yield SSEMessage(**data)
except json.JSONDecodeError:
pass
buffer = ""
continue
if line.startswith(":"):
# Comment/heartbeat
continue
if line.startswith("data:"):
payload = line[5:].lstrip()
# Accumulate multi-line data fields
if buffer:
buffer += payload
else:
buffer = payload
except httpx.HTTPStatusError as e:
raise from_http_error(e.response.status_code, e.response.text, str(e.request.url))
except Exception as e: # pragma: no cover
raise SSEError(f"SSE error: {e}")
# Utility methods
def wait_for_completion(
self,
run_id: str,
poll_interval: float = 5.0,
timeout: Optional[float] = None
) -> WorkflowStatus:
"""
Wait for a workflow run to complete.
Args:
run_id: The run ID to monitor
poll_interval: How often to check status (seconds)
timeout: Maximum time to wait (seconds), None for no timeout
Returns:
Final WorkflowStatus when completed
Raises:
TimeoutError: If timeout is reached
FuzzForgeHTTPError: If run fails or other API errors
"""
import time
start_time = time.time()
while True:
status = self.get_run_status(run_id)
if status.is_completed:
return status
elif status.is_failed:
raise FuzzForgeHTTPError(
f"Run {run_id} failed with status: {status.status}",
500,
details={"run_id": run_id, "status": status.status}
)
# Check timeout
if timeout and (time.time() - start_time) > timeout:
raise TimeoutError(f"Timeout waiting for run {run_id} to complete")
time.sleep(poll_interval)
async def await_for_completion(
self,
run_id: str,
poll_interval: float = 5.0,
timeout: Optional[float] = None
) -> WorkflowStatus:
"""
Wait for a workflow run to complete (async).
Args:
run_id: The run ID to monitor
poll_interval: How often to check status (seconds)
timeout: Maximum time to wait (seconds), None for no timeout
Returns:
Final WorkflowStatus when completed
Raises:
TimeoutError: If timeout is reached
FuzzForgeHTTPError: If run fails or other API errors
"""
start_time = asyncio.get_event_loop().time()
while True:
status = await self.aget_run_status(run_id)
if status.is_completed:
return status
elif status.is_failed:
raise FuzzForgeHTTPError(
f"Run {run_id} failed with status: {status.status}",
500,
details={"run_id": run_id, "status": status.status}
)
# Check timeout
if timeout and (asyncio.get_event_loop().time() - start_time) > timeout:
raise TimeoutError(f"Timeout waiting for run {run_id} to complete")
await asyncio.sleep(poll_interval)
+399
View File
@@ -0,0 +1,399 @@
"""
Docker log integration for enhanced error reporting.
This module provides functionality to fetch and parse Docker container logs
to provide better context for deployment and workflow execution errors.
"""
# Copyright (c) 2025 FuzzingLabs
#
# Licensed under the Business Source License 1.1 (BSL). See the LICENSE file
# at the root of this repository for details.
#
# After the Change Date (four years from publication), this version of the
# Licensed Work will be made available under the Apache License, Version 2.0.
# See the LICENSE-APACHE file or http://www.apache.org/licenses/LICENSE-2.0
#
# Additional attribution and requirements are provided in the NOTICE file.
import logging
import re
import subprocess
import json
from typing import Dict, Any, List, Optional, Tuple
from datetime import datetime, timezone
from dataclasses import dataclass
logger = logging.getLogger(__name__)
@dataclass
class ContainerLogEntry:
"""A single log entry from a container."""
timestamp: datetime
level: str
message: str
stream: str # 'stdout' or 'stderr'
raw: str
@dataclass
class ContainerDiagnostics:
"""Complete diagnostics for a container."""
container_id: Optional[str]
status: str
exit_code: Optional[int]
error: Optional[str]
logs: List[ContainerLogEntry]
resource_usage: Dict[str, Any]
volume_mounts: List[Dict[str, str]]
class DockerLogIntegration:
"""
Integration with Docker to fetch container logs and diagnostics.
This class provides methods to fetch container logs, parse common error
patterns, and extract meaningful diagnostic information from Docker
containers related to FuzzForge workflow execution.
"""
def __init__(self):
self.docker_available = self._check_docker_availability()
# Common error patterns in container logs
self.error_patterns = {
'permission_denied': [
r'permission denied',
r'operation not permitted',
r'cannot access.*permission denied'
],
'out_of_memory': [
r'out of memory',
r'oom killed',
r'cannot allocate memory'
],
'image_pull_failed': [
r'failed to pull image',
r'pull access denied',
r'image not found'
],
'volume_mount_failed': [
r'invalid mount config',
r'mount denied',
r'no such file or directory.*mount'
],
'network_error': [
r'network is unreachable',
r'connection refused',
r'timeout.*connect'
],
'prefect_error': [
r'prefect.*error',
r'flow run failed',
r'task.*failed'
]
}
def _check_docker_availability(self) -> bool:
"""Check if Docker is available and accessible."""
try:
result = subprocess.run(['docker', 'version', '--format', 'json'],
capture_output=True, text=True, timeout=5)
return result.returncode == 0
except (subprocess.TimeoutExpired, FileNotFoundError, subprocess.SubprocessError):
return False
def get_container_logs(self, container_name_or_id: str, tail: int = 100) -> List[ContainerLogEntry]:
"""
Fetch logs from a Docker container.
Args:
container_name_or_id: Container name or ID
tail: Number of log lines to retrieve
Returns:
List of parsed log entries
"""
if not self.docker_available:
logger.warning("Docker not available, cannot fetch container logs")
return []
try:
cmd = ['docker', 'logs', '--timestamps', '--tail', str(tail), container_name_or_id]
result = subprocess.run(cmd, capture_output=True, text=True, timeout=10)
if result.returncode != 0:
logger.error(f"Failed to fetch logs for container {container_name_or_id}: {result.stderr}")
return []
return self._parse_docker_logs(result.stdout + result.stderr)
except subprocess.TimeoutExpired:
logger.error(f"Timeout fetching logs for container {container_name_or_id}")
return []
except Exception as e:
logger.error(f"Error fetching container logs: {e}")
return []
def _parse_docker_logs(self, raw_logs: str) -> List[ContainerLogEntry]:
"""Parse raw Docker logs into structured entries."""
entries = []
for line in raw_logs.strip().split('\n'):
if not line.strip():
continue
entry = self._parse_log_line(line)
if entry:
entries.append(entry)
return entries
def _parse_log_line(self, line: str) -> Optional[ContainerLogEntry]:
"""Parse a single log line with timestamp."""
# Docker log format: 2023-10-01T12:00:00.000000000Z message
timestamp_match = re.match(r'^(\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d+Z)\s+(.*)', line)
if timestamp_match:
timestamp_str, message = timestamp_match.groups()
try:
timestamp = datetime.fromisoformat(timestamp_str.replace('Z', '+00:00'))
except ValueError:
timestamp = datetime.now(timezone.utc)
else:
timestamp = datetime.now(timezone.utc)
message = line
# Determine log level from message content
level = self._extract_log_level(message)
# Determine stream (simplified - Docker doesn't clearly separate in combined output)
stream = 'stderr' if any(keyword in message.lower() for keyword in ['error', 'failed', 'exception']) else 'stdout'
return ContainerLogEntry(
timestamp=timestamp,
level=level,
message=message.strip(),
stream=stream,
raw=line
)
def _extract_log_level(self, message: str) -> str:
"""Extract log level from message content."""
message_lower = message.lower()
if any(keyword in message_lower for keyword in ['error', 'failed', 'exception', 'fatal']):
return 'ERROR'
elif any(keyword in message_lower for keyword in ['warning', 'warn']):
return 'WARNING'
elif any(keyword in message_lower for keyword in ['info', 'information']):
return 'INFO'
elif any(keyword in message_lower for keyword in ['debug']):
return 'DEBUG'
else:
return 'INFO'
def get_container_diagnostics(self, container_name_or_id: str) -> ContainerDiagnostics:
"""
Get complete diagnostics for a container including logs, status, and resource usage.
Args:
container_name_or_id: Container name or ID
Returns:
Complete container diagnostics
"""
if not self.docker_available:
return ContainerDiagnostics(
container_id=None,
status="unknown",
exit_code=None,
error="Docker not available",
logs=[],
resource_usage={},
volume_mounts=[]
)
# Get container inspect data
inspect_data = self._get_container_inspect(container_name_or_id)
# Get logs
logs = self.get_container_logs(container_name_or_id)
# Extract key information
if inspect_data:
state = inspect_data.get('State', {})
config = inspect_data.get('Config', {})
host_config = inspect_data.get('HostConfig', {})
status = state.get('Status', 'unknown')
exit_code = state.get('ExitCode')
error = state.get('Error', '')
# Get volume mounts
mounts = inspect_data.get('Mounts', [])
volume_mounts = [
{
'source': mount.get('Source', ''),
'destination': mount.get('Destination', ''),
'mode': mount.get('Mode', ''),
'type': mount.get('Type', '')
}
for mount in mounts
]
# Get resource limits
resource_usage = {
'memory_limit': host_config.get('Memory', 0),
'cpu_limit': host_config.get('CpuQuota', 0),
'cpu_period': host_config.get('CpuPeriod', 0)
}
else:
status = "not_found"
exit_code = None
error = f"Container {container_name_or_id} not found"
volume_mounts = []
resource_usage = {}
return ContainerDiagnostics(
container_id=container_name_or_id,
status=status,
exit_code=exit_code,
error=error,
logs=logs,
resource_usage=resource_usage,
volume_mounts=volume_mounts
)
def _get_container_inspect(self, container_name_or_id: str) -> Optional[Dict[str, Any]]:
"""Get container inspection data."""
try:
cmd = ['docker', 'inspect', container_name_or_id]
result = subprocess.run(cmd, capture_output=True, text=True, timeout=5)
if result.returncode != 0:
return None
data = json.loads(result.stdout)
return data[0] if data else None
except (subprocess.TimeoutExpired, json.JSONDecodeError, Exception) as e:
logger.debug(f"Failed to inspect container {container_name_or_id}: {e}")
return None
def analyze_error_patterns(self, logs: List[ContainerLogEntry]) -> Dict[str, List[str]]:
"""
Analyze logs for common error patterns.
Args:
logs: List of log entries to analyze
Returns:
Dictionary mapping error types to matching log messages
"""
detected_errors = {}
for error_type, patterns in self.error_patterns.items():
matches = []
for log_entry in logs:
for pattern in patterns:
if re.search(pattern, log_entry.message, re.IGNORECASE):
matches.append(log_entry.message)
break # Don't match the same message multiple times
if matches:
detected_errors[error_type] = matches
return detected_errors
def get_container_names_by_label(self, label_filter: str) -> List[str]:
"""
Get container names that match a specific label filter.
Args:
label_filter: Label filter (e.g., "prefect.flow-run-id=12345")
Returns:
List of container names
"""
if not self.docker_available:
return []
try:
cmd = ['docker', 'ps', '-a', '--filter', f'label={label_filter}', '--format', '{{.Names}}']
result = subprocess.run(cmd, capture_output=True, text=True, timeout=5)
if result.returncode != 0:
return []
return [name.strip() for name in result.stdout.strip().split('\n') if name.strip()]
except Exception as e:
logger.debug(f"Failed to get containers by label {label_filter}: {e}")
return []
def suggest_fixes(self, error_analysis: Dict[str, List[str]]) -> List[str]:
"""
Suggest fixes based on detected error patterns.
Args:
error_analysis: Result from analyze_error_patterns()
Returns:
List of suggested fixes
"""
suggestions = []
if 'permission_denied' in error_analysis:
suggestions.extend([
"Check file permissions on the target path",
"Ensure the Docker daemon has access to the mounted volumes",
"Try running with elevated privileges or adjust volume ownership"
])
if 'out_of_memory' in error_analysis:
suggestions.extend([
"Increase memory limits for the workflow",
"Check if the target files are too large for available memory",
"Consider using streaming processing for large datasets"
])
if 'image_pull_failed' in error_analysis:
suggestions.extend([
"Check network connectivity to Docker registry",
"Verify image name and tag are correct",
"Ensure Docker registry credentials are configured"
])
if 'volume_mount_failed' in error_analysis:
suggestions.extend([
"Verify the target path exists and is accessible",
"Check volume mount syntax and permissions",
"Ensure the path is not already in use by another process"
])
if 'network_error' in error_analysis:
suggestions.extend([
"Check network connectivity",
"Verify backend services are running (docker-compose up -d)",
"Check firewall settings and port availability"
])
if 'prefect_error' in error_analysis:
suggestions.extend([
"Check Prefect server connectivity",
"Verify workflow deployment is successful",
"Review workflow-specific parameters and requirements"
])
if not suggestions:
suggestions.append("Review the container logs above for specific error details")
return suggestions
# Global instance for easy access
docker_integration = DockerLogIntegration()
+553
View File
@@ -0,0 +1,553 @@
"""
Enhanced exceptions for FuzzForge SDK with rich context and Docker integration.
Provides comprehensive error information including container logs, diagnostics,
and actionable suggestions for troubleshooting.
"""
# Copyright (c) 2025 FuzzingLabs
#
# Licensed under the Business Source License 1.1 (BSL). See the LICENSE file
# at the root of this repository for details.
#
# After the Change Date (four years from publication), this version of the
# Licensed Work will be made available under the Apache License, Version 2.0.
# See the LICENSE-APACHE file or http://www.apache.org/licenses/LICENSE-2.0
#
# Additional attribution and requirements are provided in the NOTICE file.
import json
import re
from typing import Optional, Dict, Any, List, Union
from dataclasses import dataclass, asdict
from .docker_logs import docker_integration, ContainerDiagnostics
@dataclass
class ErrorContext:
"""Rich context information for errors."""
url: Optional[str] = None
request_method: Optional[str] = None
request_data: Optional[Dict[str, Any]] = None
response_data: Optional[Dict[str, Any]] = None
container_diagnostics: Optional[ContainerDiagnostics] = None
suggested_fixes: List[str] = None
error_patterns: Dict[str, List[str]] = None
related_run_id: Optional[str] = None
workflow_name: Optional[str] = None
def __post_init__(self):
if self.suggested_fixes is None:
self.suggested_fixes = []
if self.error_patterns is None:
self.error_patterns = {}
def to_dict(self) -> Dict[str, Any]:
"""Convert to dictionary for serialization."""
return asdict(self)
class FuzzForgeError(Exception):
"""Base exception for all FuzzForge SDK errors with rich context."""
def __init__(
self,
message: str,
context: Optional[ErrorContext] = None,
original_exception: Optional[Exception] = None
):
super().__init__(message)
self.message = message
self.context = context or ErrorContext()
self.original_exception = original_exception
# Auto-populate container diagnostics if we have a run ID
if self.context.related_run_id and not self.context.container_diagnostics:
self._fetch_container_diagnostics()
def _fetch_container_diagnostics(self):
"""Fetch container diagnostics for the related run."""
if not self.context.related_run_id:
return
try:
# Try to find containers by Prefect run ID label
label_filter = f"prefect.flow-run-id={self.context.related_run_id}"
container_names = docker_integration.get_container_names_by_label(label_filter)
if container_names:
# Use the most recent container
container_name = container_names[0]
diagnostics = docker_integration.get_container_diagnostics(container_name)
# Analyze error patterns in logs
if diagnostics.logs:
error_analysis = docker_integration.analyze_error_patterns(diagnostics.logs)
suggestions = docker_integration.suggest_fixes(error_analysis)
self.context.container_diagnostics = diagnostics
self.context.error_patterns = error_analysis
self.context.suggested_fixes.extend(suggestions)
except Exception:
# Don't fail the main error because of diagnostics issues
pass
def get_summary(self) -> str:
"""Get a summary of the error with key details."""
parts = [self.message]
if self.context.container_diagnostics:
diag = self.context.container_diagnostics
if diag.status != 'running':
parts.append(f"Container status: {diag.status}")
if diag.exit_code is not None:
parts.append(f"Exit code: {diag.exit_code}")
if self.context.error_patterns:
detected = list(self.context.error_patterns.keys())
parts.append(f"Detected issues: {', '.join(detected)}")
return " | ".join(parts)
def get_detailed_info(self) -> Dict[str, Any]:
"""Get detailed error information for rich display."""
info = {
"message": self.message,
"type": self.__class__.__name__,
}
if self.context:
info.update(self.context.to_dict())
return info
def __str__(self) -> str:
return self.get_summary()
class FuzzForgeHTTPError(FuzzForgeError):
"""HTTP-related errors with enhanced context."""
def __init__(
self,
message: str,
status_code: int,
response_text: Optional[str] = None,
context: Optional[ErrorContext] = None,
original_exception: Optional[Exception] = None
):
if context is None:
context = ErrorContext()
# Parse response data if it's JSON
if response_text:
try:
context.response_data = json.loads(response_text)
except (json.JSONDecodeError, TypeError):
context.response_data = {"raw": response_text}
super().__init__(message, context, original_exception)
self.status_code = status_code
self.response_text = response_text
def get_summary(self) -> str:
base = f"HTTP {self.status_code}: {self.message}"
if self.context.container_diagnostics:
diag = self.context.container_diagnostics
if diag.exit_code is not None and diag.exit_code != 0:
base += f" (Container exit code: {diag.exit_code})"
return base
class DeploymentError(FuzzForgeHTTPError):
"""Enhanced deployment errors with container diagnostics."""
def __init__(
self,
workflow_name: str,
message: str,
deployment_id: Optional[str] = None,
container_name: Optional[str] = None,
status_code: int = 500,
response_text: Optional[str] = None,
context: Optional[ErrorContext] = None
):
if context is None:
context = ErrorContext()
context.workflow_name = workflow_name
# If we have a container name, get its diagnostics immediately
if container_name:
try:
diagnostics = docker_integration.get_container_diagnostics(container_name)
context.container_diagnostics = diagnostics
# Analyze logs for error patterns
if diagnostics.logs:
error_analysis = docker_integration.analyze_error_patterns(diagnostics.logs)
suggestions = docker_integration.suggest_fixes(error_analysis)
context.error_patterns = error_analysis
context.suggested_fixes.extend(suggestions)
except Exception:
# Don't fail on diagnostics
pass
full_message = f"Deployment failed for workflow '{workflow_name}': {message}"
super().__init__(full_message, status_code, response_text, context)
self.workflow_name = workflow_name
self.deployment_id = deployment_id
class WorkflowExecutionError(FuzzForgeHTTPError):
"""Enhanced workflow execution errors."""
def __init__(
self,
workflow_name: str,
run_id: str,
message: str,
status_code: int = 400,
response_text: Optional[str] = None,
context: Optional[ErrorContext] = None
):
if context is None:
context = ErrorContext()
context.workflow_name = workflow_name
context.related_run_id = run_id
full_message = f"Workflow '{workflow_name}' execution failed (run: {run_id}): {message}"
super().__init__(full_message, status_code, response_text, context)
self.workflow_name = workflow_name
self.run_id = run_id
class WorkflowNotFoundError(FuzzForgeHTTPError):
"""Enhanced workflow not found error."""
def __init__(
self,
workflow_name: str,
available_workflows: List[str] = None,
context: Optional[ErrorContext] = None
):
if context is None:
context = ErrorContext()
context.workflow_name = workflow_name
if available_workflows:
context.suggested_fixes = [
f"Available workflows: {', '.join(available_workflows)}",
"Use 'fuzzforge workflows list' to see all available workflows",
"Check workflow name spelling and case sensitivity"
]
message = f"Workflow not found: {workflow_name}"
super().__init__(message, 404, context=context)
self.workflow_name = workflow_name
self.available_workflows = available_workflows or []
class RunNotFoundError(FuzzForgeHTTPError):
"""Enhanced run not found error."""
def __init__(
self,
run_id: str,
context: Optional[ErrorContext] = None
):
if context is None:
context = ErrorContext()
context.related_run_id = run_id
context.suggested_fixes = [
"Use 'fuzzforge runs list' to see available runs",
"Check if the run ID is correct and complete",
"Ensure the run hasn't been deleted or expired"
]
message = f"Run not found: {run_id}"
super().__init__(message, 404, context=context)
self.run_id = run_id
class ContainerError(FuzzForgeError):
"""Enhanced container-specific errors."""
def __init__(
self,
container_name: str,
message: str,
exit_code: Optional[int] = None,
context: Optional[ErrorContext] = None
):
if context is None:
context = ErrorContext()
# Immediately fetch container diagnostics
try:
diagnostics = docker_integration.get_container_diagnostics(container_name)
context.container_diagnostics = diagnostics
# Analyze logs for patterns
if diagnostics.logs:
error_analysis = docker_integration.analyze_error_patterns(diagnostics.logs)
suggestions = docker_integration.suggest_fixes(error_analysis)
context.error_patterns = error_analysis
context.suggested_fixes.extend(suggestions)
except Exception:
# Don't fail on diagnostics
pass
full_message = f"Container error ({container_name}): {message}"
if exit_code is not None:
full_message += f" (exit code: {exit_code})"
super().__init__(full_message, context)
self.container_name = container_name
self.exit_code = exit_code
class VolumeError(FuzzForgeError):
"""Volume mount related errors."""
def __init__(
self,
volume_path: str,
message: str,
context: Optional[ErrorContext] = None
):
if context is None:
context = ErrorContext()
context.suggested_fixes = [
"Check if the volume path exists and is accessible",
"Verify file permissions (Docker needs read access)",
"Ensure the path is not in use by another process",
"Try using an absolute path instead of relative path",
"Check if SELinux or AppArmor is blocking access"
]
full_message = f"Volume error ({volume_path}): {message}"
super().__init__(full_message, context)
self.volume_path = volume_path
class ResourceLimitError(FuzzForgeError):
"""Resource limit related errors."""
def __init__(
self,
resource_type: str,
message: str,
current_usage: Optional[Dict[str, Any]] = None,
context: Optional[ErrorContext] = None
):
if context is None:
context = ErrorContext()
context.suggested_fixes = [
f"Increase {resource_type} limits in workflow configuration",
"Check system resource availability",
"Consider using a smaller dataset or batch size",
"Monitor resource usage during execution"
]
full_message = f"{resource_type.title()} limit error: {message}"
super().__init__(full_message, context)
self.resource_type = resource_type
self.current_usage = current_usage or {}
class ValidationError(FuzzForgeError):
"""Enhanced data validation errors."""
def __init__(
self,
field_name: str,
message: str,
provided_value: Any = None,
expected_format: Optional[str] = None,
context: Optional[ErrorContext] = None
):
if context is None:
context = ErrorContext()
if expected_format:
context.suggested_fixes = [
f"Expected format: {expected_format}",
f"Provided value: {provided_value}",
"Check parameter documentation for valid values"
]
full_message = f"Validation error for '{field_name}': {message}"
super().__init__(full_message, context)
self.field_name = field_name
self.provided_value = provided_value
self.expected_format = expected_format
class ConnectionError(FuzzForgeError):
"""Enhanced connection errors."""
def __init__(
self,
endpoint: str,
message: str,
context: Optional[ErrorContext] = None
):
if context is None:
context = ErrorContext()
context.suggested_fixes = [
"Check if FuzzForge backend is running (docker-compose up -d)",
"Verify the API endpoint URL is correct",
"Check network connectivity and firewall settings",
"Ensure all required services are healthy",
"Try restarting the FuzzForge services"
]
full_message = f"Connection error to {endpoint}: {message}"
super().__init__(full_message, context)
self.endpoint = endpoint
class TimeoutError(FuzzForgeError):
"""Enhanced timeout errors."""
def __init__(
self,
operation: str,
timeout_seconds: int,
context: Optional[ErrorContext] = None
):
if context is None:
context = ErrorContext()
context.suggested_fixes = [
f"Increase timeout value (current: {timeout_seconds}s)",
"Check if the operation is resource-intensive",
"Verify backend services are responsive",
"Consider breaking down large operations into smaller chunks"
]
full_message = f"Timeout error for {operation} after {timeout_seconds} seconds"
super().__init__(full_message, context)
self.operation = operation
self.timeout_seconds = timeout_seconds
class WebSocketError(FuzzForgeError):
"""WebSocket-related errors."""
class SSEError(FuzzForgeError):
"""Server-Sent Events related errors."""
def from_http_error(status_code: int, response_text: str, url: str) -> FuzzForgeHTTPError:
"""
Create appropriate exception from HTTP error response with enhanced context.
Args:
status_code: HTTP status code
response_text: Response body text
url: Request URL that failed
Returns:
Appropriate FuzzForgeError subclass with rich context
"""
context = ErrorContext(url=url, response_data={"raw": response_text})
# Try to parse JSON response for more context
try:
response_data = json.loads(response_text)
context.response_data = response_data
# Extract additional context from structured error responses
if isinstance(response_data, dict):
if "run_id" in response_data:
context.related_run_id = response_data["run_id"]
if "workflow" in response_data:
context.workflow_name = response_data["workflow"]
except (json.JSONDecodeError, TypeError):
# Unable to parse JSON, use raw text
pass
# Handle specific error types based on URL patterns and status codes
if status_code == 404:
if "/workflows/" in url and "/submit" not in url:
# Extract workflow name from URL
parts = url.split("/workflows/")
if len(parts) > 1:
workflow_name = parts[1].split("/")[0]
return WorkflowNotFoundError(workflow_name, context=context)
elif "/runs/" in url:
# Extract run ID from URL
parts = url.split("/runs/")
if len(parts) > 1:
run_id = parts[1].split("/")[0]
return RunNotFoundError(run_id, context)
elif status_code == 400:
# Check for specific error patterns in response
if "deployment" in response_text.lower() and "not found" in response_text.lower():
# Extract workflow name if possible
workflow_match = re.search(r"workflow['\"]?\s*[:\-]?\s*['\"]?(\w+)", response_text, re.IGNORECASE)
workflow_name = workflow_match.group(1) if workflow_match else "unknown"
return DeploymentError(
workflow_name=workflow_name,
message="Deployment not found",
status_code=status_code,
response_text=response_text,
context=context
)
elif "volume" in response_text.lower() or "mount" in response_text.lower():
return VolumeError(
volume_path="unknown",
message=response_text,
context=context
)
elif "memory" in response_text.lower() or "resource" in response_text.lower():
return ResourceLimitError(
resource_type="memory",
message=response_text,
context=context
)
elif status_code == 500:
# Server errors might be deployment or execution issues
if "deployment" in response_text.lower() or "container" in response_text.lower():
workflow_match = re.search(r"workflow['\"]?\s*[:\-]?\s*['\"]?(\w+)", response_text, re.IGNORECASE)
workflow_name = workflow_match.group(1) if workflow_match else "unknown"
return DeploymentError(
workflow_name=workflow_name,
message=response_text,
status_code=status_code,
response_text=response_text,
context=context
)
# Generic HTTP error with enhanced context
return FuzzForgeHTTPError(
message=f"HTTP request failed: {response_text}",
status_code=status_code,
response_text=response_text,
context=context
)
+205
View File
@@ -0,0 +1,205 @@
"""
Pydantic models for FuzzForge API data structures.
These models mirror the backend API models and provide type-safe data validation
and serialization for all API requests and responses.
"""
# Copyright (c) 2025 FuzzingLabs
#
# Licensed under the Business Source License 1.1 (BSL). See the LICENSE file
# at the root of this repository for details.
#
# After the Change Date (four years from publication), this version of the
# Licensed Work will be made available under the Apache License, Version 2.0.
# See the LICENSE-APACHE file or http://www.apache.org/licenses/LICENSE-2.0
#
# Additional attribution and requirements are provided in the NOTICE file.
from pydantic import BaseModel, Field, validator
from typing import Dict, Any, Optional, Literal, List, Union
from datetime import datetime
from pathlib import Path
class ResourceLimits(BaseModel):
"""Resource limits for workflow execution"""
cpu_limit: Optional[str] = Field(None, description="CPU limit (e.g., '2' for 2 cores, '500m' for 0.5 cores)")
memory_limit: Optional[str] = Field(None, description="Memory limit (e.g., '1Gi', '512Mi')")
cpu_request: Optional[str] = Field(None, description="CPU request (guaranteed)")
memory_request: Optional[str] = Field(None, description="Memory request (guaranteed)")
class VolumeMount(BaseModel):
"""Volume mount specification"""
host_path: str = Field(..., description="Host path to mount")
container_path: str = Field(..., description="Container path for mount")
mode: Literal["ro", "rw"] = Field(default="ro", description="Mount mode")
@validator("host_path")
def validate_host_path(cls, v):
"""Validate that the host path is absolute"""
path = Path(v)
if not path.is_absolute():
raise ValueError(f"Host path must be absolute: {v}")
return str(path)
@validator("container_path")
def validate_container_path(cls, v):
"""Validate that the container path is absolute"""
if not v.startswith('/'):
raise ValueError(f"Container path must be absolute: {v}")
return v
class WorkflowSubmission(BaseModel):
"""Submit a workflow with configurable settings"""
target_path: str = Field(..., description="Absolute path to analyze")
volume_mode: Literal["ro", "rw"] = Field(
default="ro",
description="Volume mount mode: read-only (ro) or read-write (rw)"
)
parameters: Dict[str, Any] = Field(
default_factory=dict,
description="Workflow-specific parameters"
)
timeout: Optional[int] = Field(
default=None,
description="Timeout in seconds (None for workflow default)",
ge=1,
le=604800 # Max 7 days
)
resource_limits: Optional[ResourceLimits] = Field(
None,
description="Resource limits for workflow container"
)
additional_volumes: List[VolumeMount] = Field(
default_factory=list,
description="Additional volume mounts"
)
@validator("target_path")
def validate_path(cls, v):
"""Validate that the target path is absolute"""
path = Path(v)
if not path.is_absolute():
raise ValueError(f"Path must be absolute: {v}")
return str(path)
class WorkflowListItem(BaseModel):
"""Summary information for a workflow in list views"""
name: str = Field(..., description="Workflow name")
version: str = Field(..., description="Semantic version")
description: str = Field(..., description="Workflow description")
author: Optional[str] = Field(None, description="Workflow author")
tags: List[str] = Field(default_factory=list, description="Workflow tags")
class WorkflowMetadata(BaseModel):
"""Complete metadata for a workflow"""
name: str = Field(..., description="Workflow name")
version: str = Field(..., description="Semantic version")
description: str = Field(..., description="Workflow description")
author: Optional[str] = Field(None, description="Workflow author")
tags: List[str] = Field(default_factory=list, description="Workflow tags")
parameters: Dict[str, Any] = Field(..., description="Parameters schema")
default_parameters: Dict[str, Any] = Field(
default_factory=dict,
description="Default parameter values"
)
required_modules: List[str] = Field(
default_factory=list,
description="Required module names"
)
supported_volume_modes: List[Literal["ro", "rw"]] = Field(
default=["ro", "rw"],
description="Supported volume mount modes"
)
has_custom_docker: bool = Field(
default=False,
description="Whether workflow has custom Dockerfile"
)
class WorkflowParametersResponse(BaseModel):
"""Response for workflow parameters endpoint"""
parameters: Dict[str, Any] = Field(..., description="Parameters schema")
defaults: Dict[str, Any] = Field(default_factory=dict, description="Default values")
required: List[str] = Field(default_factory=list, description="Required parameter names")
class RunSubmissionResponse(BaseModel):
"""Response after submitting a workflow"""
run_id: str = Field(..., description="Unique run identifier")
status: str = Field(..., description="Initial status")
workflow: str = Field(..., description="Workflow name")
message: str = Field(default="Workflow submitted successfully")
class WorkflowStatus(BaseModel):
"""Status of a workflow run"""
run_id: str = Field(..., description="Unique run identifier")
workflow: str = Field(..., description="Workflow name")
status: str = Field(..., description="Current status")
is_completed: bool = Field(..., description="Whether the run is completed")
is_failed: bool = Field(..., description="Whether the run failed")
is_running: bool = Field(..., description="Whether the run is currently running")
created_at: datetime = Field(..., description="Run creation time")
updated_at: datetime = Field(..., description="Last update time")
class WorkflowFindings(BaseModel):
"""Findings from a workflow execution in SARIF format"""
workflow: str = Field(..., description="Workflow name")
run_id: str = Field(..., description="Unique run identifier")
sarif: Dict[str, Any] = Field(..., description="SARIF formatted findings")
metadata: Dict[str, Any] = Field(default_factory=dict, description="Additional metadata")
class FuzzingStats(BaseModel):
"""Real-time fuzzing statistics"""
run_id: str = Field(..., description="Unique run identifier")
workflow: str = Field(..., description="Workflow name")
executions: int = Field(default=0, description="Total executions")
executions_per_sec: float = Field(default=0.0, description="Current execution rate")
crashes: int = Field(default=0, description="Total crashes found")
unique_crashes: int = Field(default=0, description="Unique crashes")
coverage: Optional[float] = Field(None, description="Code coverage percentage")
corpus_size: int = Field(default=0, description="Current corpus size")
elapsed_time: int = Field(default=0, description="Elapsed time in seconds")
last_crash_time: Optional[datetime] = Field(None, description="Time of last crash")
class CrashReport(BaseModel):
"""Individual crash report from fuzzing"""
run_id: str = Field(..., description="Run identifier")
crash_id: str = Field(..., description="Unique crash identifier")
timestamp: datetime = Field(default_factory=datetime.utcnow)
signal: Optional[str] = Field(None, description="Crash signal (SIGSEGV, etc.)")
crash_type: Optional[str] = Field(None, description="Type of crash")
stack_trace: Optional[str] = Field(None, description="Stack trace")
input_file: Optional[str] = Field(None, description="Path to crashing input")
reproducer: Optional[str] = Field(None, description="Minimized reproducer")
severity: str = Field(default="medium", description="Crash severity")
exploitability: Optional[str] = Field(None, description="Exploitability assessment")
class APIStatus(BaseModel):
"""API root endpoint response"""
name: str = Field(..., description="API name")
version: str = Field(..., description="API version")
status: str = Field(..., description="API status")
workflows_loaded: int = Field(..., description="Number of loaded workflows")
class WebSocketMessage(BaseModel):
"""WebSocket message format for real-time updates"""
type: str = Field(..., description="Message type")
data: Dict[str, Any] = Field(..., description="Message payload")
class SSEMessage(BaseModel):
"""Server-Sent Event message format"""
type: str = Field(..., description="Event type")
data: Union[FuzzingStats, CrashReport, Dict[str, Any]] = Field(..., description="Event data")
+415
View File
@@ -0,0 +1,415 @@
"""
Automated testing utilities for FuzzForge workflows.
This module provides high-level testing capabilities for validating
workflow functionality, performance, and expected results.
"""
# Copyright (c) 2025 FuzzingLabs
#
# Licensed under the Business Source License 1.1 (BSL). See the LICENSE file
# at the root of this repository for details.
#
# After the Change Date (four years from publication), this version of the
# Licensed Work will be made available under the Apache License, Version 2.0.
# See the LICENSE-APACHE file or http://www.apache.org/licenses/LICENSE-2.0
#
# Additional attribution and requirements are provided in the NOTICE file.
import time
from pathlib import Path
from typing import Dict, Any, List, Optional, Union
from dataclasses import dataclass
from datetime import datetime
import logging
from .client import FuzzForgeClient
from .models import WorkflowSubmission
from .utils import validate_absolute_path, create_workflow_submission
from .exceptions import FuzzForgeError, ValidationError
logger = logging.getLogger(__name__)
@dataclass
class TestResult:
"""Result of a single workflow test."""
workflow_name: str
test_project_path: str
passed: bool
run_id: Optional[str] = None
findings_count: int = 0
execution_time: float = 0.0
error: Optional[str] = None
expected_min_findings: int = 0
details: Dict[str, Any] = None
def __post_init__(self):
if self.details is None:
self.details = {}
@dataclass
class TestSummary:
"""Summary of multiple workflow tests."""
total: int
passed: int
failed: int
tests: List[TestResult]
start_time: datetime
end_time: Optional[datetime] = None
total_duration: float = 0.0
@property
def failed_tests(self) -> List[TestResult]:
"""Get list of failed tests."""
return [test for test in self.tests if not test.passed]
@property
def success_rate(self) -> float:
"""Get success rate as percentage."""
if self.total == 0:
return 0.0
return (self.passed / self.total) * 100
# Default test configurations for each workflow
DEFAULT_TEST_CONFIG = {
"static_analysis_scan": {
"test_project": "static_analysis_vulnerable",
"expected_min_findings": 3, # Expect at least 3 findings
"timeout": 300, # 5 minutes
"description": "Tests OpenGrep and Bandit static analysis tools"
},
"secret_detection_scan": {
"test_project": "secret_detection_vulnerable",
"expected_min_findings": 5, # Expect at least 5 secrets
"timeout": 180, # 3 minutes
"description": "Tests TruffleHog and Gitleaks secret detection"
},
"infrastructure_scan": {
"test_project": "infrastructure_vulnerable",
"expected_min_findings": 8, # Expect at least 8 IaC issues
"timeout": 240, # 4 minutes
"description": "Tests Checkov, Hadolint, and other IaC security tools"
},
"penetration_testing_scan": {
"test_project": "penetration_testing_vulnerable",
"expected_min_findings": 4, # Expect at least 4 vulnerabilities
"timeout": 420, # 7 minutes (needs time to start services)
"description": "Tests Nuclei penetration testing tools"
},
"security_assessment": {
"test_project": "security_assessment_comprehensive",
"expected_min_findings": 10, # Expect at least 10 mixed findings
"timeout": 600, # 10 minutes (comprehensive scan)
"description": "Comprehensive security assessment across all categories"
}
}
class WorkflowTester:
"""
High-level testing utilities for FuzzForge workflows.
This class provides methods to easily test individual workflows or
run comprehensive test suites against all available workflows.
"""
def __init__(self, client: FuzzForgeClient, test_projects_base_path: Optional[str] = None):
"""
Initialize the workflow tester.
Args:
client: FuzzForge client instance
test_projects_base_path: Base path to test projects directory
"""
self.client = client
self.test_projects_base_path = test_projects_base_path
if not test_projects_base_path:
# Try to auto-detect test projects path
current_dir = Path.cwd()
candidates = [
current_dir / "test_projects",
current_dir.parent / "test_projects",
current_dir / ".." / "test_projects",
Path("/app/test_projects"), # Inside Docker container (last resort)
]
for candidate in candidates:
if candidate.exists() and candidate.is_dir():
self.test_projects_base_path = str(candidate.resolve())
logger.info(f"Auto-detected test projects at: {self.test_projects_base_path}")
break
if not self.test_projects_base_path:
logger.warning("Could not auto-detect test projects path. Please specify explicitly.")
self.test_projects_base_path = str(current_dir / "test_projects")
def test_workflow(
self,
workflow_name: str,
test_project_path: Optional[str] = None,
expected_min_findings: Optional[int] = None,
timeout: int = 300,
**workflow_params
) -> TestResult:
"""
Test a single workflow against a test project.
Args:
workflow_name: Name of the workflow to test
test_project_path: Path to test project (or relative to base path)
expected_min_findings: Minimum expected findings for test to pass
timeout: Timeout in seconds
**workflow_params: Additional workflow parameters
Returns:
TestResult with test outcome and details
"""
start_time = time.time()
try:
# Get test configuration
config = DEFAULT_TEST_CONFIG.get(workflow_name, {})
if expected_min_findings is None:
expected_min_findings = config.get("expected_min_findings", 0)
if timeout == 300: # Use config timeout if default
timeout = config.get("timeout", 300)
# Resolve test project path
if test_project_path is None:
test_project_name = config.get("test_project")
if not test_project_name:
raise ValidationError(f"No test project configured for workflow: {workflow_name}")
test_project_path = str(Path(self.test_projects_base_path) / test_project_name)
elif not Path(test_project_path).is_absolute():
test_project_path = str(Path(self.test_projects_base_path) / test_project_path)
# Validate path exists
test_path = validate_absolute_path(test_project_path)
logger.info(f"Testing workflow '{workflow_name}' with project: {test_path}")
# Create workflow submission
submission = create_workflow_submission(
target_path=str(test_path),
volume_mode="ro",
**workflow_params
)
# Submit workflow
response = self.client.submit_workflow(workflow_name, submission)
run_id = response.run_id
logger.info(f"Workflow submitted with run_id: {run_id}")
# Wait for completion
final_status = self.client.wait_for_completion(
run_id=run_id,
timeout=timeout,
poll_interval=5
)
# Get findings
findings = self.client.get_run_findings(run_id)
findings_count = 0
# Count findings from SARIF data if available
if hasattr(findings, 'sarif') and findings.sarif:
findings_count = findings.sarif.get('total_findings', 0)
execution_time = time.time() - start_time
# Determine if test passed
passed = (
final_status.is_completed and
not final_status.is_failed and
findings_count >= expected_min_findings
)
result = TestResult(
workflow_name=workflow_name,
test_project_path=test_project_path,
passed=passed,
run_id=run_id,
findings_count=findings_count,
execution_time=execution_time,
expected_min_findings=expected_min_findings,
details={
"status": final_status.status,
"sarif_summary": getattr(findings, 'sarif', {}),
"config_used": config.get("description", ""),
"timeout_used": timeout
}
)
if not passed:
if final_status.is_failed:
result.error = f"Workflow execution failed with status: {final_status.status}"
elif findings_count < expected_min_findings:
result.error = f"Found {findings_count} findings, expected at least {expected_min_findings}"
logger.info(f"Test {'PASSED' if passed else 'FAILED'}: {workflow_name}")
return result
except Exception as e:
execution_time = time.time() - start_time
error_msg = f"Test execution failed: {str(e)}"
logger.error(error_msg)
return TestResult(
workflow_name=workflow_name,
test_project_path=test_project_path or "unknown",
passed=False,
execution_time=execution_time,
expected_min_findings=expected_min_findings or 0,
error=error_msg
)
def test_all_workflows(
self,
workflows: Optional[List[str]] = None,
parallel: bool = False
) -> TestSummary:
"""
Test all available workflows.
Args:
workflows: List of specific workflows to test (defaults to all available)
parallel: Whether to run tests in parallel (not yet implemented)
Returns:
TestSummary with results from all tests
"""
start_time = datetime.now()
try:
# Get available workflows if not specified
if workflows is None:
workflow_list = self.client.list_workflows()
workflows = [w.name for w in workflow_list]
logger.info(f"Testing {len(workflows)} workflows: {', '.join(workflows)}")
results = []
# Test each workflow
for workflow_name in workflows:
logger.info(f"Testing workflow: {workflow_name}")
result = self.test_workflow(workflow_name)
results.append(result)
end_time = datetime.now()
total_duration = (end_time - start_time).total_seconds()
passed = len([r for r in results if r.passed])
failed = len(results) - passed
summary = TestSummary(
total=len(results),
passed=passed,
failed=failed,
tests=results,
start_time=start_time,
end_time=end_time,
total_duration=total_duration
)
logger.info(f"Testing complete: {passed}/{len(results)} workflows passed")
return summary
except Exception as e:
logger.error(f"Test suite execution failed: {e}")
end_time = datetime.now()
return TestSummary(
total=0,
passed=0,
failed=1,
tests=[],
start_time=start_time,
end_time=end_time,
total_duration=(end_time - start_time).total_seconds()
)
def validate_workflow_deployment(self, workflow_name: str) -> bool:
"""
Validate that a workflow is properly deployed and available.
Args:
workflow_name: Name of the workflow to validate
Returns:
True if workflow is available, False otherwise
"""
try:
workflows = self.client.list_workflows()
available_workflows = [w.name for w in workflows]
return workflow_name in available_workflows
except Exception as e:
logger.error(f"Failed to validate workflow deployment: {e}")
return False
def get_test_project_path(self, project_name: str) -> str:
"""
Get the full path to a test project.
Args:
project_name: Name of the test project
Returns:
Full path to the test project
"""
return str(Path(self.test_projects_base_path) / project_name)
def format_test_summary(summary: TestSummary, detailed: bool = False) -> str:
"""
Format a test summary for display.
Args:
summary: Test summary to format
detailed: Whether to include detailed results
Returns:
Formatted string representation
"""
lines = []
# Header
lines.append("=" * 60)
lines.append("FuzzForge Workflow Test Results")
lines.append("=" * 60)
# Summary stats
lines.append(f"Total Tests: {summary.total}")
lines.append(f"Passed: {summary.passed}")
lines.append(f"Failed: {summary.failed}")
lines.append(f"Success Rate: {summary.success_rate:.1f}%")
lines.append(f"Total Duration: {summary.total_duration:.1f}s")
lines.append("")
if detailed and summary.tests:
lines.append("Detailed Results:")
lines.append("-" * 40)
for test in summary.tests:
status_icon = "" if test.passed else ""
lines.append(f"{status_icon} {test.workflow_name}")
lines.append(f" Project: {Path(test.test_project_path).name}")
lines.append(f" Findings: {test.findings_count} (expected ≥{test.expected_min_findings})")
lines.append(f" Duration: {test.execution_time:.1f}s")
if test.error:
lines.append(f" Error: {test.error}")
lines.append("")
# Failed tests summary
if summary.failed_tests:
lines.append("Failed Tests:")
lines.append("-" * 40)
for test in summary.failed_tests:
lines.append(f"{test.workflow_name}: {test.error or 'Unknown error'}")
lines.append("")
return "\n".join(lines)
+432
View File
@@ -0,0 +1,432 @@
"""
Utility functions for the FuzzForge SDK.
Provides helper functions for path validation, SARIF processing,
volume mount creation, and other common operations.
"""
# Copyright (c) 2025 FuzzingLabs
#
# Licensed under the Business Source License 1.1 (BSL). See the LICENSE file
# at the root of this repository for details.
#
# After the Change Date (four years from publication), this version of the
# Licensed Work will be made available under the Apache License, Version 2.0.
# See the LICENSE-APACHE file or http://www.apache.org/licenses/LICENSE-2.0
#
# Additional attribution and requirements are provided in the NOTICE file.
import os
import json
from pathlib import Path
from typing import Dict, Any, List, Optional, Union
from datetime import datetime
from .models import VolumeMount, ResourceLimits, WorkflowSubmission
from .exceptions import ValidationError
def validate_absolute_path(path: Union[str, Path]) -> Path:
"""
Validate that a path is absolute and exists.
Args:
path: Path to validate
Returns:
Validated Path object
Raises:
ValidationError: If path is not absolute or doesn't exist
"""
path_obj = Path(path)
if not path_obj.is_absolute():
raise ValidationError(f"Path must be absolute: {path}")
if not path_obj.exists():
raise ValidationError(f"Path does not exist: {path}")
return path_obj
def create_volume_mount(
host_path: Union[str, Path],
container_path: str,
mode: str = "ro"
) -> VolumeMount:
"""
Create a volume mount with path validation.
Args:
host_path: Host path to mount (must exist)
container_path: Container path for the mount
mode: Mount mode ("ro" or "rw")
Returns:
VolumeMount object
Raises:
ValidationError: If paths are invalid
"""
# Validate host path exists and is absolute
validated_host_path = validate_absolute_path(host_path)
# Validate container path is absolute
if not container_path.startswith('/'):
raise ValidationError(f"Container path must be absolute: {container_path}")
# Validate mode
if mode not in ["ro", "rw"]:
raise ValidationError(f"Mode must be 'ro' or 'rw': {mode}")
return VolumeMount(
host_path=str(validated_host_path),
container_path=container_path,
mode=mode # type: ignore
)
def create_resource_limits(
cpu_limit: Optional[str] = None,
memory_limit: Optional[str] = None,
cpu_request: Optional[str] = None,
memory_request: Optional[str] = None
) -> ResourceLimits:
"""
Create resource limits with validation.
Args:
cpu_limit: CPU limit (e.g., "2", "500m")
memory_limit: Memory limit (e.g., "1Gi", "512Mi")
cpu_request: CPU request (guaranteed)
memory_request: Memory request (guaranteed)
Returns:
ResourceLimits object
Raises:
ValidationError: If resource specifications are invalid
"""
# Basic validation for CPU limits
if cpu_limit is not None:
if not (cpu_limit.endswith('m') or cpu_limit.isdigit()):
raise ValidationError(f"Invalid CPU limit format: {cpu_limit}")
if cpu_request is not None:
if not (cpu_request.endswith('m') or cpu_request.isdigit()):
raise ValidationError(f"Invalid CPU request format: {cpu_request}")
# Basic validation for memory limits
memory_suffixes = ['Ki', 'Mi', 'Gi', 'Ti', 'K', 'M', 'G', 'T']
if memory_limit is not None:
if not any(memory_limit.endswith(suffix) for suffix in memory_suffixes):
if not memory_limit.isdigit():
raise ValidationError(f"Invalid memory limit format: {memory_limit}")
if memory_request is not None:
if not any(memory_request.endswith(suffix) for suffix in memory_suffixes):
if not memory_request.isdigit():
raise ValidationError(f"Invalid memory request format: {memory_request}")
return ResourceLimits(
cpu_limit=cpu_limit,
memory_limit=memory_limit,
cpu_request=cpu_request,
memory_request=memory_request
)
def create_workflow_submission(
target_path: Union[str, Path],
volume_mode: str = "ro",
parameters: Optional[Dict[str, Any]] = None,
timeout: Optional[int] = None,
resource_limits: Optional[ResourceLimits] = None,
additional_volumes: Optional[List[VolumeMount]] = None
) -> WorkflowSubmission:
"""
Create a workflow submission with path validation.
Args:
target_path: Path to analyze (must exist)
volume_mode: Mount mode for target path
parameters: Workflow-specific parameters
timeout: Execution timeout in seconds
resource_limits: Resource limits for the container
additional_volumes: Additional volume mounts
Returns:
WorkflowSubmission object
Raises:
ValidationError: If parameters are invalid
"""
# Validate target path
validated_target_path = validate_absolute_path(target_path)
# Validate volume mode
if volume_mode not in ["ro", "rw"]:
raise ValidationError(f"Volume mode must be 'ro' or 'rw': {volume_mode}")
# Validate timeout
if timeout is not None:
if timeout < 1 or timeout > 604800: # Max 7 days
raise ValidationError(f"Timeout must be between 1 and 604800 seconds: {timeout}")
return WorkflowSubmission(
target_path=str(validated_target_path),
volume_mode=volume_mode, # type: ignore
parameters=parameters or {},
timeout=timeout,
resource_limits=resource_limits,
additional_volumes=additional_volumes or []
)
def extract_sarif_results(sarif_data: Dict[str, Any]) -> List[Dict[str, Any]]:
"""
Extract results from SARIF format findings.
Args:
sarif_data: SARIF formatted data
Returns:
List of result objects from SARIF
Raises:
ValidationError: If SARIF data is malformed
"""
if not isinstance(sarif_data, dict):
raise ValidationError("SARIF data must be a dictionary")
runs = sarif_data.get("runs", [])
if not isinstance(runs, list):
raise ValidationError("SARIF runs must be a list")
results = []
for run in runs:
if not isinstance(run, dict):
continue
run_results = run.get("results", [])
if isinstance(run_results, list):
results.extend(run_results)
return results
def count_sarif_severity_levels(sarif_data: Dict[str, Any]) -> Dict[str, int]:
"""
Count findings by severity level in SARIF data.
Args:
sarif_data: SARIF formatted data
Returns:
Dictionary mapping severity levels to counts
"""
results = extract_sarif_results(sarif_data)
severity_counts = {"error": 0, "warning": 0, "note": 0, "info": 0}
for result in results:
level = result.get("level", "warning")
if level in severity_counts:
severity_counts[level] += 1
else:
# Default unknown levels to warning
severity_counts["warning"] += 1
return severity_counts
def format_sarif_summary(sarif_data: Dict[str, Any]) -> str:
"""
Create a human-readable summary of SARIF findings.
Args:
sarif_data: SARIF formatted data
Returns:
Formatted summary string
"""
severity_counts = count_sarif_severity_levels(sarif_data)
total_findings = sum(severity_counts.values())
if total_findings == 0:
return "No findings detected."
summary_parts = [f"Total findings: {total_findings}"]
for level, count in severity_counts.items():
if count > 0:
summary_parts.append(f"{level.title()}: {count}")
return " | ".join(summary_parts)
def save_sarif_to_file(sarif_data: Dict[str, Any], file_path: Union[str, Path]) -> None:
"""
Save SARIF data to a JSON file.
Args:
sarif_data: SARIF formatted data
file_path: Path to save the file
Raises:
ValidationError: If file cannot be written
"""
try:
path_obj = Path(file_path)
# Create parent directories if they don't exist
path_obj.parent.mkdir(parents=True, exist_ok=True)
with open(path_obj, 'w', encoding='utf-8') as f:
json.dump(sarif_data, f, indent=2, ensure_ascii=False)
except (OSError, json.JSONEncodeError) as e:
raise ValidationError(f"Failed to save SARIF file: {e}")
def format_duration(seconds: int) -> str:
"""
Format duration in seconds to human-readable string.
Args:
seconds: Duration in seconds
Returns:
Formatted duration string
"""
if seconds < 60:
return f"{seconds}s"
elif seconds < 3600:
minutes, secs = divmod(seconds, 60)
return f"{minutes}m {secs}s"
else:
hours, remainder = divmod(seconds, 3600)
minutes, secs = divmod(remainder, 60)
return f"{hours}h {minutes}m {secs}s"
def format_execution_rate(executions_per_sec: float) -> str:
"""
Format execution rate for display.
Args:
executions_per_sec: Executions per second
Returns:
Formatted rate string
"""
if executions_per_sec < 1:
return f"{executions_per_sec:.2f} exec/s"
elif executions_per_sec < 1000:
return f"{executions_per_sec:.1f} exec/s"
else:
return f"{executions_per_sec/1000:.1f}k exec/s"
def format_memory_size(size_bytes: int) -> str:
"""
Format memory size in bytes to human-readable string.
Args:
size_bytes: Size in bytes
Returns:
Formatted size string
"""
for unit in ['B', 'KB', 'MB', 'GB', 'TB']:
if size_bytes < 1024.0:
return f"{size_bytes:.1f} {unit}"
size_bytes /= 1024.0
return f"{size_bytes:.1f} PB"
def get_project_files(
project_path: Union[str, Path],
extensions: Optional[List[str]] = None,
exclude_dirs: Optional[List[str]] = None
) -> List[Path]:
"""
Get list of files in a project directory.
Args:
project_path: Path to project directory
extensions: List of file extensions to include (e.g., ['.py', '.js'])
exclude_dirs: List of directory names to exclude (e.g., ['.git', 'node_modules'])
Returns:
List of file paths
Raises:
ValidationError: If project path is invalid
"""
project_path_obj = validate_absolute_path(project_path)
if not project_path_obj.is_dir():
raise ValidationError(f"Project path must be a directory: {project_path}")
exclude_dirs = exclude_dirs or ['.git', '__pycache__', 'node_modules', '.pytest_cache']
extensions = extensions or []
files = []
for root, dirs, filenames in os.walk(project_path_obj):
# Remove excluded directories from search
dirs[:] = [d for d in dirs if d not in exclude_dirs]
root_path = Path(root)
for filename in filenames:
file_path = root_path / filename
# Filter by extensions if specified
if extensions and not any(filename.endswith(ext) for ext in extensions):
continue
files.append(file_path)
return sorted(files)
def estimate_analysis_time(
project_path: Union[str, Path],
workflow_type: str = "static"
) -> int:
"""
Estimate analysis time based on project size and workflow type.
Args:
project_path: Path to project directory
workflow_type: Type of workflow ("static", "dynamic", "fuzzing")
Returns:
Estimated time in seconds
Raises:
ValidationError: If project path is invalid
"""
files = get_project_files(project_path)
total_size = sum(f.stat().st_size for f in files if f.exists())
# Base estimates (very rough)
if workflow_type == "static":
# ~1MB per second for static analysis
base_time = max(30, total_size // (1024 * 1024))
elif workflow_type == "dynamic":
# Dynamic analysis is slower
base_time = max(60, total_size // (512 * 1024))
elif workflow_type == "fuzzing":
# Fuzzing can run for hours/days
base_time = 3600 # Default to 1 hour
else:
# Unknown workflow type
base_time = max(60, total_size // (1024 * 1024))
# Factor in number of files
file_factor = max(1, len(files) // 100)
return base_time * file_factor