Initial commit

This commit is contained in:
Tanguy Duhamel
2025-09-29 21:26:41 +02:00
parent f0fd367ed8
commit 323a434c73
208 changed files with 72069 additions and 53 deletions
+11
View File
@@ -0,0 +1,11 @@
# Copyright (c) 2025 FuzzingLabs
#
# Licensed under the Business Source License 1.1 (BSL). See the LICENSE file
# at the root of this repository for details.
#
# After the Change Date (four years from publication), this version of the
# Licensed Work will be made available under the Apache License, Version 2.0.
# See the LICENSE-APACHE file or http://www.apache.org/licenses/LICENSE-2.0
#
# Additional attribution and requirements are provided in the NOTICE file.
+325
View File
@@ -0,0 +1,325 @@
"""
API endpoints for fuzzing workflow management and real-time monitoring
"""
# Copyright (c) 2025 FuzzingLabs
#
# Licensed under the Business Source License 1.1 (BSL). See the LICENSE file
# at the root of this repository for details.
#
# After the Change Date (four years from publication), this version of the
# Licensed Work will be made available under the Apache License, Version 2.0.
# See the LICENSE-APACHE file or http://www.apache.org/licenses/LICENSE-2.0
#
# Additional attribution and requirements are provided in the NOTICE file.
import logging
from typing import List, Dict, Any
from fastapi import APIRouter, HTTPException, Depends, WebSocket, WebSocketDisconnect
from fastapi.responses import StreamingResponse
import asyncio
import json
from datetime import datetime
from src.models.findings import (
FuzzingStats,
CrashReport
)
from src.core.workflow_discovery import WorkflowDiscovery
logger = logging.getLogger(__name__)
router = APIRouter(prefix="/fuzzing", tags=["fuzzing"])
# In-memory storage for real-time stats (in production, use Redis or similar)
fuzzing_stats: Dict[str, FuzzingStats] = {}
crash_reports: Dict[str, List[CrashReport]] = {}
active_connections: Dict[str, List[WebSocket]] = {}
def initialize_fuzzing_tracking(run_id: str, workflow_name: str):
"""
Initialize fuzzing tracking for a new run.
This function should be called when a workflow is submitted to enable
real-time monitoring and stats collection.
Args:
run_id: The run identifier
workflow_name: Name of the workflow
"""
fuzzing_stats[run_id] = FuzzingStats(
run_id=run_id,
workflow=workflow_name
)
crash_reports[run_id] = []
active_connections[run_id] = []
@router.get("/{run_id}/stats", response_model=FuzzingStats)
async def get_fuzzing_stats(run_id: str) -> FuzzingStats:
"""
Get current fuzzing statistics for a run.
Args:
run_id: The fuzzing run ID
Returns:
Current fuzzing statistics
Raises:
HTTPException: 404 if run not found
"""
if run_id not in fuzzing_stats:
raise HTTPException(
status_code=404,
detail=f"Fuzzing run not found: {run_id}"
)
return fuzzing_stats[run_id]
@router.get("/{run_id}/crashes", response_model=List[CrashReport])
async def get_crash_reports(run_id: str) -> List[CrashReport]:
"""
Get crash reports for a fuzzing run.
Args:
run_id: The fuzzing run ID
Returns:
List of crash reports
Raises:
HTTPException: 404 if run not found
"""
if run_id not in crash_reports:
raise HTTPException(
status_code=404,
detail=f"Fuzzing run not found: {run_id}"
)
return crash_reports[run_id]
@router.post("/{run_id}/stats")
async def update_fuzzing_stats(run_id: str, stats: FuzzingStats):
"""
Update fuzzing statistics (called by fuzzing workflows).
Args:
run_id: The fuzzing run ID
stats: Updated statistics
Raises:
HTTPException: 404 if run not found
"""
if run_id not in fuzzing_stats:
raise HTTPException(
status_code=404,
detail=f"Fuzzing run not found: {run_id}"
)
# Update stats
fuzzing_stats[run_id] = stats
# Debug: log reception for live instrumentation
try:
logger.info(
"Received fuzzing stats update: run_id=%s exec=%s eps=%.2f crashes=%s corpus=%s elapsed=%ss",
run_id,
stats.executions,
stats.executions_per_sec,
stats.crashes,
stats.corpus_size,
stats.elapsed_time,
)
except Exception:
pass
# Notify connected WebSocket clients
if run_id in active_connections:
message = {
"type": "stats_update",
"data": stats.model_dump()
}
for websocket in active_connections[run_id][:]: # Copy to avoid modification during iteration
try:
await websocket.send_text(json.dumps(message))
except Exception:
# Remove disconnected clients
active_connections[run_id].remove(websocket)
@router.post("/{run_id}/crash")
async def report_crash(run_id: str, crash: CrashReport):
"""
Report a new crash (called by fuzzing workflows).
Args:
run_id: The fuzzing run ID
crash: Crash report details
"""
if run_id not in crash_reports:
crash_reports[run_id] = []
# Add crash report
crash_reports[run_id].append(crash)
# Update stats
if run_id in fuzzing_stats:
fuzzing_stats[run_id].crashes += 1
fuzzing_stats[run_id].last_crash_time = crash.timestamp
# Notify connected WebSocket clients
if run_id in active_connections:
message = {
"type": "crash_report",
"data": crash.model_dump()
}
for websocket in active_connections[run_id][:]:
try:
await websocket.send_text(json.dumps(message))
except Exception:
active_connections[run_id].remove(websocket)
@router.websocket("/{run_id}/live")
async def websocket_endpoint(websocket: WebSocket, run_id: str):
"""
WebSocket endpoint for real-time fuzzing updates.
Args:
websocket: WebSocket connection
run_id: The fuzzing run ID to monitor
"""
await websocket.accept()
# Initialize connection tracking
if run_id not in active_connections:
active_connections[run_id] = []
active_connections[run_id].append(websocket)
try:
# Send current stats on connection
if run_id in fuzzing_stats:
current = fuzzing_stats[run_id]
if isinstance(current, dict):
payload = current
elif hasattr(current, "model_dump"):
payload = current.model_dump()
elif hasattr(current, "dict"):
payload = current.dict()
else:
payload = getattr(current, "__dict__", {"run_id": run_id})
message = {"type": "stats_update", "data": payload}
await websocket.send_text(json.dumps(message))
# Keep connection alive
while True:
try:
# Wait for ping or handle disconnect
data = await asyncio.wait_for(websocket.receive_text(), timeout=30.0)
# Echo back for ping-pong
if data == "ping":
await websocket.send_text("pong")
except asyncio.TimeoutError:
# Send periodic heartbeat
await websocket.send_text(json.dumps({"type": "heartbeat"}))
except WebSocketDisconnect:
# Clean up connection
if run_id in active_connections and websocket in active_connections[run_id]:
active_connections[run_id].remove(websocket)
except Exception as e:
logger.error(f"WebSocket error for run {run_id}: {e}")
if run_id in active_connections and websocket in active_connections[run_id]:
active_connections[run_id].remove(websocket)
@router.get("/{run_id}/stream")
async def stream_fuzzing_updates(run_id: str):
"""
Server-Sent Events endpoint for real-time fuzzing updates.
Args:
run_id: The fuzzing run ID to monitor
Returns:
Streaming response with real-time updates
"""
if run_id not in fuzzing_stats:
raise HTTPException(
status_code=404,
detail=f"Fuzzing run not found: {run_id}"
)
async def event_stream():
"""Generate server-sent events for fuzzing updates"""
last_stats_time = datetime.utcnow()
while True:
try:
# Send current stats
if run_id in fuzzing_stats:
current_stats = fuzzing_stats[run_id]
if isinstance(current_stats, dict):
stats_payload = current_stats
elif hasattr(current_stats, "model_dump"):
stats_payload = current_stats.model_dump()
elif hasattr(current_stats, "dict"):
stats_payload = current_stats.dict()
else:
stats_payload = getattr(current_stats, "__dict__", {"run_id": run_id})
event_data = f"data: {json.dumps({'type': 'stats', 'data': stats_payload})}\n\n"
yield event_data
# Send recent crashes
if run_id in crash_reports:
recent_crashes = [
crash for crash in crash_reports[run_id]
if crash.timestamp > last_stats_time
]
for crash in recent_crashes:
event_data = f"data: {json.dumps({'type': 'crash', 'data': crash.model_dump()})}\n\n"
yield event_data
last_stats_time = datetime.utcnow()
await asyncio.sleep(5) # Update every 5 seconds
except Exception as e:
logger.error(f"Error in event stream for run {run_id}: {e}")
break
return StreamingResponse(
event_stream(),
media_type="text/event-stream",
headers={
"Cache-Control": "no-cache",
"Connection": "keep-alive",
}
)
@router.delete("/{run_id}")
async def cleanup_fuzzing_run(run_id: str):
"""
Clean up fuzzing run data.
Args:
run_id: The fuzzing run ID to clean up
"""
# Clean up tracking data
fuzzing_stats.pop(run_id, None)
crash_reports.pop(run_id, None)
# Close any active WebSocket connections
if run_id in active_connections:
for websocket in active_connections[run_id]:
try:
await websocket.close()
except Exception:
pass
del active_connections[run_id]
return {"message": f"Cleaned up fuzzing run {run_id}"}
+184
View File
@@ -0,0 +1,184 @@
"""
API endpoints for workflow run management and findings retrieval
"""
# Copyright (c) 2025 FuzzingLabs
#
# Licensed under the Business Source License 1.1 (BSL). See the LICENSE file
# at the root of this repository for details.
#
# After the Change Date (four years from publication), this version of the
# Licensed Work will be made available under the Apache License, Version 2.0.
# See the LICENSE-APACHE file or http://www.apache.org/licenses/LICENSE-2.0
#
# Additional attribution and requirements are provided in the NOTICE file.
import logging
from typing import Dict, Any
from fastapi import APIRouter, HTTPException, Depends
from src.models.findings import WorkflowFindings, WorkflowStatus
logger = logging.getLogger(__name__)
router = APIRouter(prefix="/runs", tags=["runs"])
def get_prefect_manager():
"""Dependency to get the Prefect manager instance"""
from src.main import prefect_mgr
return prefect_mgr
@router.get("/{run_id}/status", response_model=WorkflowStatus)
async def get_run_status(
run_id: str,
prefect_mgr=Depends(get_prefect_manager)
) -> WorkflowStatus:
"""
Get the current status of a workflow run.
Args:
run_id: The flow run ID
Returns:
Status information including state, timestamps, and completion flags
Raises:
HTTPException: 404 if run not found
"""
try:
status = await prefect_mgr.get_flow_run_status(run_id)
# Find workflow name from deployment
workflow_name = "unknown"
workflow_deployment_id = status.get("workflow", "")
for name, deployment_id in prefect_mgr.deployments.items():
if str(deployment_id) == str(workflow_deployment_id):
workflow_name = name
break
return WorkflowStatus(
run_id=status["run_id"],
workflow=workflow_name,
status=status["status"],
is_completed=status["is_completed"],
is_failed=status["is_failed"],
is_running=status["is_running"],
created_at=status["created_at"],
updated_at=status["updated_at"]
)
except Exception as e:
logger.error(f"Failed to get status for run {run_id}: {e}")
raise HTTPException(
status_code=404,
detail=f"Run not found: {run_id}"
)
@router.get("/{run_id}/findings", response_model=WorkflowFindings)
async def get_run_findings(
run_id: str,
prefect_mgr=Depends(get_prefect_manager)
) -> WorkflowFindings:
"""
Get the findings from a completed workflow run.
Args:
run_id: The flow run ID
Returns:
SARIF-formatted findings from the workflow execution
Raises:
HTTPException: 404 if run not found, 400 if run not completed
"""
try:
# Get run status first
status = await prefect_mgr.get_flow_run_status(run_id)
if not status["is_completed"]:
if status["is_running"]:
raise HTTPException(
status_code=400,
detail=f"Run {run_id} is still running. Current status: {status['status']}"
)
elif status["is_failed"]:
raise HTTPException(
status_code=400,
detail=f"Run {run_id} failed. Status: {status['status']}"
)
else:
raise HTTPException(
status_code=400,
detail=f"Run {run_id} not completed. Status: {status['status']}"
)
# Get the findings
findings = await prefect_mgr.get_flow_run_findings(run_id)
# Find workflow name
workflow_name = "unknown"
workflow_deployment_id = status.get("workflow", "")
for name, deployment_id in prefect_mgr.deployments.items():
if str(deployment_id) == str(workflow_deployment_id):
workflow_name = name
break
# Get workflow version if available
metadata = {
"completion_time": status["updated_at"],
"workflow_version": "unknown"
}
if workflow_name in prefect_mgr.workflows:
workflow_info = prefect_mgr.workflows[workflow_name]
metadata["workflow_version"] = workflow_info.metadata.get("version", "unknown")
return WorkflowFindings(
workflow=workflow_name,
run_id=run_id,
sarif=findings,
metadata=metadata
)
except HTTPException:
raise
except Exception as e:
logger.error(f"Failed to get findings for run {run_id}: {e}")
raise HTTPException(
status_code=500,
detail=f"Failed to retrieve findings: {str(e)}"
)
@router.get("/{workflow_name}/findings/{run_id}", response_model=WorkflowFindings)
async def get_workflow_findings(
workflow_name: str,
run_id: str,
prefect_mgr=Depends(get_prefect_manager)
) -> WorkflowFindings:
"""
Get findings for a specific workflow run.
Alternative endpoint that includes workflow name in the path for clarity.
Args:
workflow_name: Name of the workflow
run_id: The flow run ID
Returns:
SARIF-formatted findings from the workflow execution
Raises:
HTTPException: 404 if workflow or run not found, 400 if run not completed
"""
if workflow_name not in prefect_mgr.workflows:
raise HTTPException(
status_code=404,
detail=f"Workflow not found: {workflow_name}"
)
# Delegate to the main findings endpoint
return await get_run_findings(run_id, prefect_mgr)
+386
View File
@@ -0,0 +1,386 @@
"""
API endpoints for workflow management with enhanced error handling
"""
# Copyright (c) 2025 FuzzingLabs
#
# Licensed under the Business Source License 1.1 (BSL). See the LICENSE file
# at the root of this repository for details.
#
# After the Change Date (four years from publication), this version of the
# Licensed Work will be made available under the Apache License, Version 2.0.
# See the LICENSE-APACHE file or http://www.apache.org/licenses/LICENSE-2.0
#
# Additional attribution and requirements are provided in the NOTICE file.
import logging
import traceback
from typing import List, Dict, Any, Optional
from fastapi import APIRouter, HTTPException, Depends
from pathlib import Path
from src.models.findings import (
WorkflowSubmission,
WorkflowMetadata,
WorkflowListItem,
RunSubmissionResponse
)
from src.core.workflow_discovery import WorkflowDiscovery
logger = logging.getLogger(__name__)
router = APIRouter(prefix="/workflows", tags=["workflows"])
def create_structured_error_response(
error_type: str,
message: str,
workflow_name: Optional[str] = None,
run_id: Optional[str] = None,
container_info: Optional[Dict[str, Any]] = None,
deployment_info: Optional[Dict[str, Any]] = None,
suggestions: Optional[List[str]] = None
) -> Dict[str, Any]:
"""Create a structured error response with rich context."""
error_response = {
"error": {
"type": error_type,
"message": message,
"timestamp": __import__("datetime").datetime.utcnow().isoformat() + "Z"
}
}
if workflow_name:
error_response["error"]["workflow_name"] = workflow_name
if run_id:
error_response["error"]["run_id"] = run_id
if container_info:
error_response["error"]["container"] = container_info
if deployment_info:
error_response["error"]["deployment"] = deployment_info
if suggestions:
error_response["error"]["suggestions"] = suggestions
return error_response
def get_prefect_manager():
"""Dependency to get the Prefect manager instance"""
from src.main import prefect_mgr
return prefect_mgr
@router.get("/", response_model=List[WorkflowListItem])
async def list_workflows(
prefect_mgr=Depends(get_prefect_manager)
) -> List[WorkflowListItem]:
"""
List all discovered workflows with their metadata.
Returns a summary of each workflow including name, version, description,
author, and tags.
"""
workflows = []
for name, info in prefect_mgr.workflows.items():
workflows.append(WorkflowListItem(
name=name,
version=info.metadata.get("version", "0.6.0"),
description=info.metadata.get("description", ""),
author=info.metadata.get("author"),
tags=info.metadata.get("tags", [])
))
return workflows
@router.get("/metadata/schema")
async def get_metadata_schema() -> Dict[str, Any]:
"""
Get the JSON schema for workflow metadata files.
This schema defines the structure and requirements for metadata.yaml files
that must accompany each workflow.
"""
return WorkflowDiscovery.get_metadata_schema()
@router.get("/{workflow_name}/metadata", response_model=WorkflowMetadata)
async def get_workflow_metadata(
workflow_name: str,
prefect_mgr=Depends(get_prefect_manager)
) -> WorkflowMetadata:
"""
Get complete metadata for a specific workflow.
Args:
workflow_name: Name of the workflow
Returns:
Complete metadata including parameters schema, supported volume modes,
required modules, and more.
Raises:
HTTPException: 404 if workflow not found
"""
if workflow_name not in prefect_mgr.workflows:
available_workflows = list(prefect_mgr.workflows.keys())
error_response = create_structured_error_response(
error_type="WorkflowNotFound",
message=f"Workflow '{workflow_name}' not found",
workflow_name=workflow_name,
suggestions=[
f"Available workflows: {', '.join(available_workflows)}",
"Use GET /workflows/ to see all available workflows",
"Check workflow name spelling and case sensitivity"
]
)
raise HTTPException(
status_code=404,
detail=error_response
)
info = prefect_mgr.workflows[workflow_name]
metadata = info.metadata
return WorkflowMetadata(
name=workflow_name,
version=metadata.get("version", "0.6.0"),
description=metadata.get("description", ""),
author=metadata.get("author"),
tags=metadata.get("tags", []),
parameters=metadata.get("parameters", {}),
default_parameters=metadata.get("default_parameters", {}),
required_modules=metadata.get("required_modules", []),
supported_volume_modes=metadata.get("supported_volume_modes", ["ro", "rw"]),
has_custom_docker=info.has_docker
)
@router.post("/{workflow_name}/submit", response_model=RunSubmissionResponse)
async def submit_workflow(
workflow_name: str,
submission: WorkflowSubmission,
prefect_mgr=Depends(get_prefect_manager)
) -> RunSubmissionResponse:
"""
Submit a workflow for execution with volume mounting.
Args:
workflow_name: Name of the workflow to execute
submission: Submission parameters including target path and volume mode
Returns:
Run submission response with run_id and initial status
Raises:
HTTPException: 404 if workflow not found, 400 for invalid parameters
"""
if workflow_name not in prefect_mgr.workflows:
available_workflows = list(prefect_mgr.workflows.keys())
error_response = create_structured_error_response(
error_type="WorkflowNotFound",
message=f"Workflow '{workflow_name}' not found",
workflow_name=workflow_name,
suggestions=[
f"Available workflows: {', '.join(available_workflows)}",
"Use GET /workflows/ to see all available workflows",
"Check workflow name spelling and case sensitivity"
]
)
raise HTTPException(
status_code=404,
detail=error_response
)
try:
# Convert ResourceLimits to dict if provided
resource_limits_dict = None
if submission.resource_limits:
resource_limits_dict = {
"cpu_limit": submission.resource_limits.cpu_limit,
"memory_limit": submission.resource_limits.memory_limit,
"cpu_request": submission.resource_limits.cpu_request,
"memory_request": submission.resource_limits.memory_request
}
# Submit the workflow with enhanced parameters
flow_run = await prefect_mgr.submit_workflow(
workflow_name=workflow_name,
target_path=submission.target_path,
volume_mode=submission.volume_mode,
parameters=submission.parameters,
resource_limits=resource_limits_dict,
additional_volumes=submission.additional_volumes,
timeout=submission.timeout
)
run_id = str(flow_run.id)
# Initialize fuzzing tracking if this looks like a fuzzing workflow
workflow_info = prefect_mgr.workflows.get(workflow_name, {})
workflow_tags = workflow_info.metadata.get("tags", []) if hasattr(workflow_info, 'metadata') else []
if "fuzzing" in workflow_tags or "fuzz" in workflow_name.lower():
from src.api.fuzzing import initialize_fuzzing_tracking
initialize_fuzzing_tracking(run_id, workflow_name)
return RunSubmissionResponse(
run_id=run_id,
status=flow_run.state.name if flow_run.state else "PENDING",
workflow=workflow_name,
message=f"Workflow '{workflow_name}' submitted successfully"
)
except ValueError as e:
# Parameter validation errors
error_response = create_structured_error_response(
error_type="ValidationError",
message=str(e),
workflow_name=workflow_name,
suggestions=[
"Check parameter types and values",
"Use GET /workflows/{workflow_name}/parameters for schema",
"Ensure all required parameters are provided"
]
)
raise HTTPException(status_code=400, detail=error_response)
except Exception as e:
logger.error(f"Failed to submit workflow '{workflow_name}': {e}")
logger.error(f"Traceback: {traceback.format_exc()}")
# Try to get more context about the error
container_info = None
deployment_info = None
suggestions = []
error_message = str(e)
error_type = "WorkflowSubmissionError"
# Detect specific error patterns
if "deployment" in error_message.lower():
error_type = "DeploymentError"
deployment_info = {
"status": "failed",
"error": error_message
}
suggestions.extend([
"Check if Prefect server is running and accessible",
"Verify Docker is running and has sufficient resources",
"Check container image availability",
"Ensure volume paths exist and are accessible"
])
elif "volume" in error_message.lower() or "mount" in error_message.lower():
error_type = "VolumeError"
suggestions.extend([
"Check if the target path exists and is accessible",
"Verify file permissions (Docker needs read access)",
"Ensure the path is not in use by another process",
"Try using an absolute path instead of relative path"
])
elif "memory" in error_message.lower() or "resource" in error_message.lower():
error_type = "ResourceError"
suggestions.extend([
"Check system memory and CPU availability",
"Consider reducing resource limits or dataset size",
"Monitor Docker resource usage",
"Increase Docker memory limits if needed"
])
elif "image" in error_message.lower():
error_type = "ImageError"
suggestions.extend([
"Check if the workflow image exists",
"Verify Docker registry access",
"Try rebuilding the workflow image",
"Check network connectivity to registries"
])
else:
suggestions.extend([
"Check FuzzForge backend logs for details",
"Verify all services are running (docker-compose up -d)",
"Try restarting the workflow deployment",
"Contact support if the issue persists"
])
error_response = create_structured_error_response(
error_type=error_type,
message=f"Failed to submit workflow: {error_message}",
workflow_name=workflow_name,
container_info=container_info,
deployment_info=deployment_info,
suggestions=suggestions
)
raise HTTPException(
status_code=500,
detail=error_response
)
@router.get("/{workflow_name}/parameters")
async def get_workflow_parameters(
workflow_name: str,
prefect_mgr=Depends(get_prefect_manager)
) -> Dict[str, Any]:
"""
Get the parameters schema for a workflow.
Args:
workflow_name: Name of the workflow
Returns:
Parameters schema with types, descriptions, and defaults
Raises:
HTTPException: 404 if workflow not found
"""
if workflow_name not in prefect_mgr.workflows:
available_workflows = list(prefect_mgr.workflows.keys())
error_response = create_structured_error_response(
error_type="WorkflowNotFound",
message=f"Workflow '{workflow_name}' not found",
workflow_name=workflow_name,
suggestions=[
f"Available workflows: {', '.join(available_workflows)}",
"Use GET /workflows/ to see all available workflows"
]
)
raise HTTPException(
status_code=404,
detail=error_response
)
info = prefect_mgr.workflows[workflow_name]
metadata = info.metadata
# Return parameters with enhanced schema information
parameters_schema = metadata.get("parameters", {})
# Extract the actual parameter definitions from JSON schema structure
if "properties" in parameters_schema:
param_definitions = parameters_schema["properties"]
else:
param_definitions = parameters_schema
# Add default values to the schema
default_params = metadata.get("default_parameters", {})
for param_name, param_schema in param_definitions.items():
if isinstance(param_schema, dict) and param_name in default_params:
param_schema["default"] = default_params[param_name]
return {
"workflow": workflow_name,
"parameters": param_definitions,
"default_parameters": default_params,
"required_parameters": [
name for name, schema in param_definitions.items()
if isinstance(schema, dict) and schema.get("required", False)
]
}