feat: FuzzForge AI - complete rewrite for OSS release

This commit is contained in:
AFredefon
2026-01-30 09:57:48 +01:00
commit b46f050aef
226 changed files with 12943 additions and 0 deletions
@@ -0,0 +1,9 @@
"""FuzzForge Runner - Direct execution engine for FuzzForge OSS."""
from fuzzforge_runner.runner import Runner
from fuzzforge_runner.settings import Settings
__all__ = [
"Runner",
"Settings",
]
@@ -0,0 +1,28 @@
"""FuzzForge Runner CLI entry point."""
from fuzzforge_runner.runner import Runner
from fuzzforge_runner.settings import Settings
def main() -> None:
"""Entry point for the FuzzForge Runner CLI.
This is a minimal entry point that can be used for testing
and direct execution. The primary interface is via the MCP server.
"""
import argparse
parser = argparse.ArgumentParser(description="FuzzForge Runner")
parser.add_argument("--version", action="store_true", help="Print version and exit")
args = parser.parse_args()
if args.version:
print("fuzzforge-runner 0.0.1") # noqa: T201
return
print("FuzzForge Runner - Use via MCP server or programmatically") # noqa: T201
if __name__ == "__main__":
main()
@@ -0,0 +1,16 @@
"""FuzzForge Runner constants."""
#: Default directory name for module input inside sandbox.
SANDBOX_INPUT_DIRECTORY: str = "/data/input"
#: Default directory name for module output inside sandbox.
SANDBOX_OUTPUT_DIRECTORY: str = "/data/output"
#: Default archive filename for results.
RESULTS_ARCHIVE_FILENAME: str = "results.tar.gz"
#: Default configuration filename.
MODULE_CONFIG_FILENAME: str = "config.json"
#: Module entrypoint script name.
MODULE_ENTRYPOINT: str = "module"
@@ -0,0 +1,27 @@
"""FuzzForge Runner exceptions."""
from __future__ import annotations
class RunnerError(Exception):
"""Base exception for all Runner errors."""
class ModuleNotFoundError(RunnerError):
"""Raised when a module cannot be found."""
class ModuleExecutionError(RunnerError):
"""Raised when module execution fails."""
class WorkflowExecutionError(RunnerError):
"""Raised when workflow execution fails."""
class StorageError(RunnerError):
"""Raised when storage operations fail."""
class SandboxError(RunnerError):
"""Raised when sandbox operations fail."""
@@ -0,0 +1,635 @@
"""FuzzForge Runner - Direct module execution engine.
This module provides direct execution of FuzzForge modules without
requiring Temporal workflow orchestration. It's designed for local
development and OSS deployment scenarios.
"""
from __future__ import annotations
import json
from io import BytesIO
from pathlib import Path, PurePath
from tarfile import TarFile, TarInfo
from tarfile import open as Archive # noqa: N812
from tempfile import NamedTemporaryFile, TemporaryDirectory
from typing import TYPE_CHECKING, Any, cast
from fuzzforge_common.sandboxes.engines.docker.configuration import DockerConfiguration
from fuzzforge_common.sandboxes.engines.podman.configuration import PodmanConfiguration
from fuzzforge_types.executions import FuzzForgeExecutionIdentifier
from fuzzforge_runner.constants import (
MODULE_ENTRYPOINT,
RESULTS_ARCHIVE_FILENAME,
SANDBOX_INPUT_DIRECTORY,
SANDBOX_OUTPUT_DIRECTORY,
)
from fuzzforge_runner.exceptions import ModuleExecutionError, SandboxError
if TYPE_CHECKING:
from fuzzforge_common.sandboxes.engines.base.engine import AbstractFuzzForgeSandboxEngine
from fuzzforge_runner.settings import EngineSettings, Settings
from structlog.stdlib import BoundLogger
def get_logger() -> BoundLogger:
"""Get structlog logger instance.
:returns: Configured structlog logger.
"""
from structlog import get_logger # noqa: PLC0415
return cast("BoundLogger", get_logger())
class ModuleExecutor:
"""Direct executor for FuzzForge modules.
Handles the complete lifecycle of module execution:
- Spawning isolated sandbox containers
- Pushing input assets and configuration
- Running the module
- Pulling output results
- Cleanup
"""
#: Full settings including engine and registry.
_settings: Settings
#: Engine settings for container operations.
_engine_settings: EngineSettings
def __init__(self, settings: Settings) -> None:
"""Initialize an instance of the class.
:param settings: FuzzForge runner settings.
"""
self._settings = settings
self._engine_settings = settings.engine
def _get_engine_configuration(self) -> DockerConfiguration | PodmanConfiguration:
"""Get the appropriate engine configuration.
:returns: Engine configuration based on settings.
Note: This is only used when socket mode is explicitly needed.
The default is now PodmanCLI with custom storage paths.
"""
from fuzzforge_common.sandboxes.engines.enumeration import FuzzForgeSandboxEngines
# Ensure socket has proper scheme
socket = self._engine_settings.socket
if not socket.startswith(("unix://", "tcp://", "http://", "ssh://")):
socket = f"unix://{socket}"
if self._engine_settings.type == "docker":
return DockerConfiguration(
kind=FuzzForgeSandboxEngines.DOCKER,
socket=socket,
)
return PodmanConfiguration(
kind=FuzzForgeSandboxEngines.PODMAN,
socket=socket,
)
def _get_engine(self) -> AbstractFuzzForgeSandboxEngine:
"""Get the container engine instance.
Uses PodmanCLI with custom storage paths by default for Podman,
providing isolation from system Podman configuration and avoiding
issues with VS Code snap's XDG_DATA_HOME override.
:returns: Configured container engine.
"""
from fuzzforge_common.sandboxes.engines.podman import PodmanCLI
# Use PodmanCLI with custom storage paths for Podman
if self._engine_settings.type == "podman":
return PodmanCLI(
graphroot=self._engine_settings.graphroot,
runroot=self._engine_settings.runroot,
)
# Fall back to socket-based engine for Docker
return self._get_engine_configuration().into_engine()
def _check_image_exists(self, module_identifier: str) -> bool:
"""Check if a module image exists locally.
:param module_identifier: Name/identifier of the module image.
:returns: True if image exists, False otherwise.
"""
engine = self._get_engine()
# Try both common tags: latest and 0.0.1
tags_to_check = ["latest", "0.0.1"]
# Try both naming conventions:
# - localhost/fuzzforge-module-{name}:{tag} (standard convention)
# - localhost/{name}:{tag} (legacy/short form)
name_prefixes = [f"fuzzforge-module-{module_identifier}", module_identifier]
for prefix in name_prefixes:
for tag in tags_to_check:
image_name = f"localhost/{prefix}:{tag}"
if engine.image_exists(image_name):
return True
return False
def _get_local_image_name(self, module_identifier: str) -> str:
"""Get the full local image name for a module.
:param module_identifier: Name/identifier of the module.
:returns: Full image name with localhost prefix.
"""
engine = self._get_engine()
# Check fuzzforge-module- prefix first (standard convention)
prefixed_name = f"localhost/fuzzforge-module-{module_identifier}:latest"
if engine.image_exists(prefixed_name):
return prefixed_name
# Fall back to legacy short form
return f"localhost/{module_identifier}:latest"
def _pull_module_image(self, module_identifier: str, registry_url: str = "ghcr.io/fuzzinglabs", tag: str = "latest") -> None:
"""Pull a module image from the container registry.
:param module_identifier: Name/identifier of the module to pull.
:param registry_url: Container registry URL.
:param tag: Image tag to pull.
:raises SandboxError: If pull fails.
"""
logger = get_logger()
engine = self._get_engine()
# Construct full image name
remote_image = f"{registry_url}/fuzzforge-module-{module_identifier}:{tag}"
local_image = f"localhost/{module_identifier}:{tag}"
logger.info("pulling module image from registry", module=module_identifier, remote_image=remote_image)
try:
# Pull the image using engine abstraction
engine.pull_image(remote_image, timeout=300)
logger.info("module image pulled successfully", module=module_identifier)
# Tag the image locally for consistency
engine.tag_image(remote_image, local_image)
logger.debug("tagged image locally", local_image=local_image)
except TimeoutError as exc:
message = f"Module image pull timed out after 5 minutes: {module_identifier}"
raise SandboxError(message) from exc
except Exception as exc:
message = (
f"Failed to pull module image '{module_identifier}': {exc}\n"
f"Registry: {registry_url}\n"
f"Image: {remote_image}"
)
raise SandboxError(message) from exc
def _ensure_module_image(self, module_identifier: str, registry_url: str = "ghcr.io/fuzzinglabs", tag: str = "latest") -> None:
"""Ensure module image exists, pulling it if necessary.
:param module_identifier: Name/identifier of the module image.
:param registry_url: Container registry URL to pull from.
:param tag: Image tag to pull.
:raises SandboxError: If image check or pull fails.
"""
logger = get_logger()
if self._check_image_exists(module_identifier):
logger.debug("module image exists locally", module=module_identifier)
return
logger.info(
"module image not found locally, pulling from registry",
module=module_identifier,
registry=registry_url,
info="This may take a moment on first run",
)
self._pull_module_image(module_identifier, registry_url, tag)
# Verify image now exists
if not self._check_image_exists(module_identifier):
message = (
f"Module image '{module_identifier}' still not found after pull attempt.\n"
f"Tried to pull from: {registry_url}/fuzzforge-module-{module_identifier}:{tag}"
)
raise SandboxError(message)
def spawn_sandbox(self, module_identifier: str, input_volume: Path | None = None) -> str:
"""Create and prepare a sandbox container for module execution.
Automatically pulls the module image from registry if it doesn't exist locally.
:param module_identifier: Name/identifier of the module image.
:param input_volume: Optional path to mount as /data/input in the container.
:returns: The sandbox container identifier.
:raises SandboxError: If sandbox creation fails.
"""
logger = get_logger()
engine = self._get_engine()
# Ensure module image exists (auto-pull if needed)
# Use registry settings from configuration
registry_url = self._settings.registry.url
tag = self._settings.registry.default_tag
self._ensure_module_image(module_identifier, registry_url, tag)
logger.info("spawning sandbox", module=module_identifier)
try:
image = self._get_local_image_name(module_identifier)
# Build volume mappings
volumes: dict[str, str] | None = None
if input_volume:
volumes = {str(input_volume): SANDBOX_INPUT_DIRECTORY}
sandbox_id = engine.create_container(image=image, volumes=volumes)
logger.info("sandbox spawned", sandbox=sandbox_id, module=module_identifier)
return sandbox_id
except TimeoutError as exc:
message = f"Container creation timed out for module {module_identifier}"
raise SandboxError(message) from exc
except Exception as exc:
message = f"Failed to spawn sandbox for module {module_identifier}"
raise SandboxError(message) from exc
def prepare_input_directory(
self,
assets_path: Path,
configuration: dict[str, Any] | None = None,
) -> Path:
"""Prepare input directory with assets and configuration.
Creates a temporary directory with input.json describing all resources.
This directory can be volume-mounted into the container.
:param assets_path: Path to the assets (file or directory).
:param configuration: Optional module configuration dict.
:returns: Path to prepared input directory.
:raises SandboxError: If preparation fails.
"""
logger = get_logger()
logger.info("preparing input directory", assets=str(assets_path))
try:
# Create temporary directory - caller must clean it up after container finishes
from tempfile import mkdtemp
temp_path = Path(mkdtemp(prefix="fuzzforge-input-"))
# Copy assets to temp directory
if assets_path.exists():
if assets_path.is_file():
# Check if it's a tar.gz archive that needs extraction
if assets_path.suffix == ".gz" or assets_path.name.endswith(".tar.gz"):
# Extract archive contents
import tarfile
with tarfile.open(assets_path, "r:gz") as tar:
tar.extractall(path=temp_path)
logger.debug("extracted tar.gz archive", archive=str(assets_path))
else:
# Single file - copy it
import shutil
shutil.copy2(assets_path, temp_path / assets_path.name)
else:
# Directory - copy all files (including subdirectories)
import shutil
for item in assets_path.iterdir():
if item.is_file():
shutil.copy2(item, temp_path / item.name)
elif item.is_dir():
shutil.copytree(item, temp_path / item.name)
# Scan files and directories and build resource list
resources = []
for item in temp_path.iterdir():
if item.name == "input.json":
continue
if item.is_file():
resources.append({
"name": item.stem,
"description": f"Input file: {item.name}",
"kind": "unknown",
"path": f"/data/input/{item.name}",
})
elif item.is_dir():
resources.append({
"name": item.name,
"description": f"Input directory: {item.name}",
"kind": "unknown",
"path": f"/data/input/{item.name}",
})
# Create input.json with settings and resources
input_data = {
"settings": configuration or {},
"resources": resources,
}
input_json_path = temp_path / "input.json"
input_json_path.write_text(json.dumps(input_data, indent=2))
logger.debug("prepared input directory", resources=len(resources), path=str(temp_path))
return temp_path
except Exception as exc:
message = f"Failed to prepare input directory"
raise SandboxError(message) from exc
def _push_config_to_sandbox(self, sandbox: str, configuration: dict[str, Any]) -> None:
"""Write module configuration to sandbox as config.json.
:param sandbox: The sandbox container identifier.
:param configuration: Configuration dictionary to write.
"""
logger = get_logger()
engine = self._get_engine()
logger.info("writing configuration to sandbox", sandbox=sandbox)
with NamedTemporaryFile(mode="w", suffix=".json", delete=False) as config_file:
config_path = Path(config_file.name)
config_file.write(json.dumps(configuration, indent=2))
try:
engine.copy_to_container(sandbox, config_path, SANDBOX_INPUT_DIRECTORY)
except Exception as exc:
message = f"Failed to copy config.json: {exc}"
raise SandboxError(message) from exc
finally:
config_path.unlink()
def run_module(self, sandbox: str) -> None:
"""Start the sandbox and execute the module.
:param sandbox: The sandbox container identifier.
:raises ModuleExecutionError: If module execution fails.
"""
logger = get_logger()
engine = self._get_engine()
logger.info("starting sandbox and running module", sandbox=sandbox)
try:
# The container runs its ENTRYPOINT (uv run module) when started
exit_code, stdout, stderr = engine.start_container_attached(sandbox, timeout=600)
if exit_code != 0:
logger.error("module execution failed", sandbox=sandbox, stderr=stderr)
message = f"Module execution failed: {stderr}"
raise ModuleExecutionError(message)
logger.info("module execution completed", sandbox=sandbox)
except TimeoutError as exc:
message = f"Module execution timed out after 10 minutes in sandbox {sandbox}"
raise ModuleExecutionError(message) from exc
except ModuleExecutionError:
raise
except Exception as exc:
message = f"Module execution failed in sandbox {sandbox}"
raise ModuleExecutionError(message) from exc
def pull_results_from_sandbox(self, sandbox: str) -> Path:
"""Pull the results archive from the sandbox.
:param sandbox: The sandbox container identifier.
:returns: Path to the downloaded results archive (tar.gz file).
:raises SandboxError: If pull operation fails.
"""
logger = get_logger()
engine = self._get_engine()
logger.info("pulling results from sandbox", sandbox=sandbox)
try:
# Create temporary directory for results
from tempfile import mkdtemp
temp_dir = Path(mkdtemp(prefix="fuzzforge-results-"))
# Copy entire output directory from container
try:
engine.copy_from_container(sandbox, SANDBOX_OUTPUT_DIRECTORY, temp_dir)
except Exception:
# If output directory doesn't exist, that's okay - module may not have produced results
logger.warning("no results found in sandbox", sandbox=sandbox)
# Create tar archive from results directory
import tarfile
archive_file = NamedTemporaryFile(delete=False, suffix=".tar.gz")
archive_path = Path(archive_file.name)
archive_file.close()
with tarfile.open(archive_path, "w:gz") as tar:
# The output is extracted into a subdirectory named after the source
output_subdir = temp_dir / "output"
if output_subdir.exists():
for item in output_subdir.iterdir():
tar.add(item, arcname=item.name)
else:
for item in temp_dir.iterdir():
tar.add(item, arcname=item.name)
# Clean up temp directory
import shutil
shutil.rmtree(temp_dir, ignore_errors=True)
logger.info("results pulled successfully", sandbox=sandbox, archive=str(archive_path))
return archive_path
except TimeoutError as exc:
message = f"Timeout pulling results from sandbox {sandbox}"
raise SandboxError(message) from exc
except Exception as exc:
message = f"Failed to pull results from sandbox {sandbox}"
raise SandboxError(message) from exc
def terminate_sandbox(self, sandbox: str) -> None:
"""Terminate and cleanup the sandbox container.
:param sandbox: The sandbox container identifier.
"""
logger = get_logger()
engine = self._get_engine()
logger.info("terminating sandbox", sandbox=sandbox)
try:
engine.remove_container(sandbox, force=True)
logger.info("sandbox terminated", sandbox=sandbox)
except Exception as exc:
# Log but don't raise - cleanup should be best-effort
logger.warning("failed to terminate sandbox", sandbox=sandbox, error=str(exc))
async def execute(
self,
module_identifier: str,
assets_path: Path,
configuration: dict[str, Any] | None = None,
) -> Path:
"""Execute a module end-to-end.
This is the main entry point that handles the complete execution flow:
1. Spawn sandbox
2. Push assets and configuration
3. Run module
4. Pull results
5. Terminate sandbox
:param module_identifier: Name/identifier of the module to execute.
:param assets_path: Path to the input assets archive.
:param configuration: Optional module configuration.
:returns: Path to the results archive.
:raises ModuleExecutionError: If any step fails.
"""
logger = get_logger()
sandbox: str | None = None
input_dir: Path | None = None
try:
# 1. Prepare input directory with assets
input_dir = self.prepare_input_directory(assets_path, configuration)
# 2. Spawn sandbox with volume mount
sandbox = self.spawn_sandbox(module_identifier, input_volume=input_dir)
# 3. Run module
self.run_module(sandbox)
# 4. Pull results
results_path = self.pull_results_from_sandbox(sandbox)
logger.info(
"module execution completed successfully",
module=module_identifier,
results=str(results_path),
)
return results_path
finally:
# 5. Always cleanup
if sandbox:
self.terminate_sandbox(sandbox)
if input_dir and input_dir.exists():
import shutil
shutil.rmtree(input_dir, ignore_errors=True)
# -------------------------------------------------------------------------
# Continuous/Background Execution Methods
# -------------------------------------------------------------------------
def start_module_continuous(
self,
module_identifier: str,
assets_path: Path,
configuration: dict[str, Any] | None = None,
) -> dict[str, Any]:
"""Start a module in continuous/background mode without waiting.
Returns immediately with container info. Use read_module_output() to
get current status and stop_module_continuous() to stop.
:param module_identifier: Name/identifier of the module to execute.
:param assets_path: Path to the input assets archive.
:param configuration: Optional module configuration.
:returns: Dict with container_id, input_dir for later cleanup.
"""
logger = get_logger()
# 1. Prepare input directory with assets
input_dir = self.prepare_input_directory(assets_path, configuration)
# 2. Spawn sandbox with volume mount
sandbox = self.spawn_sandbox(module_identifier, input_volume=input_dir)
# 3. Start container (non-blocking)
engine = self._get_engine()
engine.start_container(sandbox)
logger.info(
"module started in continuous mode",
module=module_identifier,
container_id=sandbox,
)
return {
"container_id": sandbox,
"input_dir": str(input_dir),
"module": module_identifier,
}
def read_module_output(self, container_id: str, output_file: str = "/data/output/stream.jsonl") -> str:
"""Read output file from a running module container.
:param container_id: The container identifier.
:param output_file: Path to output file inside container.
:returns: File contents as string.
"""
engine = self._get_engine()
return engine.read_file_from_container(container_id, output_file)
def get_module_status(self, container_id: str) -> str:
"""Get the status of a running module container.
:param container_id: The container identifier.
:returns: Container status (e.g., "running", "exited").
"""
engine = self._get_engine()
return engine.get_container_status(container_id)
def stop_module_continuous(self, container_id: str, input_dir: str | None = None) -> Path:
"""Stop a continuously running module and collect results.
:param container_id: The container identifier.
:param input_dir: Optional input directory to cleanup.
:returns: Path to the results archive.
"""
logger = get_logger()
engine = self._get_engine()
try:
# 1. Stop the container gracefully
status = engine.get_container_status(container_id)
if status == "running":
engine.stop_container(container_id, timeout=10)
logger.info("stopped running container", container_id=container_id)
# 2. Pull results
results_path = self.pull_results_from_sandbox(container_id)
logger.info("collected results from continuous session", results=str(results_path))
return results_path
finally:
# 3. Cleanup
self.terminate_sandbox(container_id)
if input_dir:
import shutil
shutil.rmtree(input_dir, ignore_errors=True)
@@ -0,0 +1,360 @@
"""FuzzForge Runner - Workflow orchestration without Temporal.
This module provides simplified workflow orchestration for sequential
module execution without requiring Temporal infrastructure.
"""
from __future__ import annotations
from dataclasses import dataclass, field
from datetime import UTC, datetime
from pathlib import Path
from typing import TYPE_CHECKING, Any, cast
from uuid import uuid4
from fuzzforge_types.executions import FuzzForgeExecutionIdentifier
from fuzzforge_runner.exceptions import WorkflowExecutionError
from fuzzforge_runner.executor import ModuleExecutor
if TYPE_CHECKING:
from fuzzforge_runner.settings import Settings
from fuzzforge_runner.storage import LocalStorage
from structlog.stdlib import BoundLogger
def get_logger() -> BoundLogger:
"""Get structlog logger instance.
:returns: Configured structlog logger.
"""
from structlog import get_logger # noqa: PLC0415
return cast("BoundLogger", get_logger())
@dataclass
class WorkflowStep:
"""Represents a single step in a workflow."""
#: Module identifier to execute.
module_identifier: str
#: Optional configuration for the module.
configuration: dict[str, Any] | None = None
#: Step name/label for logging.
name: str | None = None
@dataclass
class WorkflowDefinition:
"""Defines a workflow as a sequence of module executions."""
#: Workflow name.
name: str
#: Ordered list of steps to execute.
steps: list[WorkflowStep] = field(default_factory=list)
#: Optional workflow description.
description: str | None = None
@dataclass
class StepResult:
"""Result of a single workflow step execution."""
#: Step index (0-based).
step_index: int
#: Module that was executed.
module_identifier: str
#: Path to the results archive.
results_path: Path
#: Execution identifier.
execution_id: str
#: Execution start time.
started_at: datetime
#: Execution end time.
completed_at: datetime
#: Whether execution was successful.
success: bool = True
#: Error message if failed.
error: str | None = None
@dataclass
class WorkflowResult:
"""Result of a complete workflow execution."""
#: Workflow execution identifier.
execution_id: str
#: Workflow name.
name: str
#: Results for each step.
steps: list[StepResult] = field(default_factory=list)
#: Overall success status.
success: bool = True
#: Final results path (from last step).
final_results_path: Path | None = None
class WorkflowOrchestrator:
"""Orchestrates sequential workflow execution.
Executes workflow steps sequentially, passing output from each
module as input to the next. No Temporal required.
"""
#: Module executor instance.
_executor: ModuleExecutor
#: Storage backend.
_storage: LocalStorage
def __init__(self, executor: ModuleExecutor, storage: LocalStorage) -> None:
"""Initialize an instance of the class.
:param executor: Module executor for running modules.
:param storage: Storage backend for managing assets.
"""
self._executor = executor
self._storage = storage
def _generate_execution_id(self) -> str:
"""Generate a unique execution identifier.
:returns: UUID string for execution tracking.
"""
return str(uuid4())
async def execute_workflow(
self,
workflow: WorkflowDefinition,
project_path: Path,
initial_assets_path: Path | None = None,
) -> WorkflowResult:
"""Execute a workflow as a sequence of module executions.
Each step receives the output of the previous step as input.
The first step receives the initial assets.
:param workflow: Workflow definition with steps to execute.
:param project_path: Path to the project directory.
:param initial_assets_path: Path to initial assets (optional).
:returns: Workflow execution result.
:raises WorkflowExecutionError: If workflow execution fails.
"""
logger = get_logger()
workflow_id = self._generate_execution_id()
logger.info(
"starting workflow execution",
workflow=workflow.name,
execution_id=workflow_id,
steps=len(workflow.steps),
)
result = WorkflowResult(
execution_id=workflow_id,
name=workflow.name,
)
if not workflow.steps:
logger.warning("workflow has no steps", workflow=workflow.name)
return result
# Track current assets path - starts with initial assets, then uses previous step output
current_assets: Path | None = initial_assets_path
# If no initial assets, try to get from project
if current_assets is None:
current_assets = self._storage.get_project_assets_path(project_path)
try:
for step_index, step in enumerate(workflow.steps):
step_name = step.name or f"step-{step_index}"
step_execution_id = self._generate_execution_id()
logger.info(
"executing workflow step",
workflow=workflow.name,
step=step_name,
step_index=step_index,
module=step.module_identifier,
execution_id=step_execution_id,
)
started_at = datetime.now(UTC)
try:
# Ensure we have assets for this step
if current_assets is None or not current_assets.exists():
if step_index == 0:
# First step with no assets - create empty archive
current_assets = self._storage.create_empty_assets_archive(project_path)
else:
message = f"No assets available for step {step_index}"
raise WorkflowExecutionError(message)
# Execute the module
results_path = await self._executor.execute(
module_identifier=step.module_identifier,
assets_path=current_assets,
configuration=step.configuration,
)
completed_at = datetime.now(UTC)
# Store results to persistent storage
stored_path = self._storage.store_execution_results(
project_path=project_path,
workflow_id=workflow_id,
step_index=step_index,
execution_id=step_execution_id,
results_path=results_path,
)
# Clean up temporary results archive after storing
try:
if results_path.exists() and results_path != stored_path:
results_path.unlink()
except Exception as cleanup_exc:
logger.warning("failed to clean up temporary results", path=str(results_path), error=str(cleanup_exc))
# Record step result with stored path
step_result = StepResult(
step_index=step_index,
module_identifier=step.module_identifier,
results_path=stored_path,
execution_id=step_execution_id,
started_at=started_at,
completed_at=completed_at,
success=True,
)
result.steps.append(step_result)
# Next step uses this step's output
current_assets = stored_path
logger.info(
"workflow step completed",
step=step_name,
step_index=step_index,
duration_seconds=(completed_at - started_at).total_seconds(),
)
except Exception as exc:
completed_at = datetime.now(UTC)
error_msg = str(exc)
step_result = StepResult(
step_index=step_index,
module_identifier=step.module_identifier,
results_path=Path(),
execution_id=step_execution_id,
started_at=started_at,
completed_at=completed_at,
success=False,
error=error_msg,
)
result.steps.append(step_result)
result.success = False
logger.error(
"workflow step failed",
step=step_name,
step_index=step_index,
error=error_msg,
)
# Stop workflow on failure
break
# Set final results path
if result.steps and result.steps[-1].success:
result.final_results_path = result.steps[-1].results_path
logger.info(
"workflow execution completed",
workflow=workflow.name,
execution_id=workflow_id,
success=result.success,
completed_steps=len([s for s in result.steps if s.success]),
total_steps=len(workflow.steps),
)
return result
except Exception as exc:
message = f"Workflow execution failed: {exc}"
logger.exception("workflow execution error", workflow=workflow.name)
raise WorkflowExecutionError(message) from exc
async def execute_single_module(
self,
module_identifier: str,
project_path: Path,
assets_path: Path | None = None,
configuration: dict[str, Any] | None = None,
) -> StepResult:
"""Execute a single module (convenience method).
This is a simplified interface for executing a single module
outside of a workflow context.
:param module_identifier: Module to execute.
:param project_path: Project directory path.
:param assets_path: Optional path to input assets.
:param configuration: Optional module configuration.
:returns: Execution result.
"""
workflow = WorkflowDefinition(
name=f"single-{module_identifier}",
steps=[
WorkflowStep(
module_identifier=module_identifier,
configuration=configuration,
name="main",
)
],
)
result = await self.execute_workflow(
workflow=workflow,
project_path=project_path,
initial_assets_path=assets_path,
)
if result.steps:
return result.steps[0]
# Should not happen, but handle gracefully
return StepResult(
step_index=0,
module_identifier=module_identifier,
results_path=Path(),
execution_id=result.execution_id,
started_at=datetime.now(UTC),
completed_at=datetime.now(UTC),
success=False,
error="No step results produced",
)
@@ -0,0 +1,378 @@
"""FuzzForge Runner - Main runner interface.
This module provides the high-level interface for FuzzForge OSS,
coordinating module execution, workflow orchestration, and storage.
"""
from __future__ import annotations
from dataclasses import dataclass
from pathlib import Path
from typing import TYPE_CHECKING, Any, cast
from fuzzforge_runner.executor import ModuleExecutor
from fuzzforge_runner.orchestrator import (
StepResult,
WorkflowDefinition,
WorkflowOrchestrator,
WorkflowResult,
WorkflowStep,
)
from fuzzforge_runner.settings import Settings
from fuzzforge_runner.storage import LocalStorage
if TYPE_CHECKING:
from structlog.stdlib import BoundLogger
def get_logger() -> BoundLogger:
"""Get structlog logger instance.
:returns: Configured structlog logger.
"""
from structlog import get_logger # noqa: PLC0415
return cast("BoundLogger", get_logger())
@dataclass
class ModuleInfo:
"""Information about an available module."""
#: Module identifier/name.
identifier: str
#: Module description.
description: str | None = None
#: Module version.
version: str | None = None
#: Whether module image exists locally.
available: bool = True
class Runner:
"""Main FuzzForge Runner interface.
Provides a unified interface for:
- Module discovery and execution
- Workflow orchestration
- Project and asset management
This is the primary entry point for OSS users and the MCP server.
"""
#: Runner settings.
_settings: Settings
#: Module executor.
_executor: ModuleExecutor
#: Local storage backend.
_storage: LocalStorage
#: Workflow orchestrator.
_orchestrator: WorkflowOrchestrator
def __init__(self, settings: Settings | None = None) -> None:
"""Initialize an instance of the class.
:param settings: Runner settings. If None, loads from environment.
"""
self._settings = settings or Settings()
self._executor = ModuleExecutor(self._settings)
self._storage = LocalStorage(self._settings.storage.path)
self._orchestrator = WorkflowOrchestrator(self._executor, self._storage)
@property
def settings(self) -> Settings:
"""Get runner settings.
:returns: Current settings instance.
"""
return self._settings
@property
def storage(self) -> LocalStorage:
"""Get storage backend.
:returns: Storage instance.
"""
return self._storage
# -------------------------------------------------------------------------
# Project Management
# -------------------------------------------------------------------------
def init_project(self, project_path: Path) -> Path:
"""Initialize a new project.
Creates necessary storage directories for a project.
:param project_path: Path to the project directory.
:returns: Path to the project storage directory.
"""
logger = get_logger()
logger.info("initializing project", path=str(project_path))
return self._storage.init_project(project_path)
def set_project_assets(self, project_path: Path, assets_path: Path) -> Path:
"""Set initial assets for a project.
:param project_path: Path to the project directory.
:param assets_path: Path to assets (file or directory).
:returns: Path to stored assets.
"""
logger = get_logger()
logger.info("setting project assets", project=str(project_path), assets=str(assets_path))
return self._storage.store_assets(project_path, assets_path)
# -------------------------------------------------------------------------
# Module Discovery
# -------------------------------------------------------------------------
def list_modules(self) -> list[ModuleInfo]:
"""List available modules.
Discovers modules from the configured modules directory.
:returns: List of available modules.
"""
logger = get_logger()
modules: list[ModuleInfo] = []
modules_path = self._settings.modules_path
if not modules_path.exists():
logger.warning("modules directory not found", path=str(modules_path))
return modules
# Look for module directories (each should have a Dockerfile or be a built image)
for item in modules_path.iterdir():
if item.is_dir():
# Check for module markers
has_dockerfile = (item / "Dockerfile").exists()
has_pyproject = (item / "pyproject.toml").exists()
if has_dockerfile or has_pyproject:
modules.append(
ModuleInfo(
identifier=item.name,
available=has_dockerfile,
)
)
logger.info("discovered modules", count=len(modules))
return modules
def list_module_images(self, filter_prefix: str = "localhost/") -> list[ModuleInfo]:
"""List available module images from the container engine.
Uses the container engine API to discover built module images.
:param filter_prefix: Prefix to filter images (default: "localhost/").
:returns: List of available module images.
"""
logger = get_logger()
modules: list[ModuleInfo] = []
seen: set[str] = set()
# Infrastructure images to skip
skip_images = {"fuzzforge-modules-sdk", "fuzzforge-runner", "fuzzforge-api"}
engine = self._executor._get_engine()
images = engine.list_images(filter_prefix=filter_prefix)
for image in images:
# Only include :latest images
if image.tag != "latest":
continue
# Extract module name from repository
full_name = image.repository.split("/")[-1]
# Skip infrastructure images
if full_name in skip_images:
continue
# Extract clean module name (remove fuzzforge-module- prefix if present)
if full_name.startswith("fuzzforge-module-"):
module_name = full_name.replace("fuzzforge-module-", "")
else:
module_name = full_name
# Skip UUID-like names (temporary/broken containers)
if module_name.count("-") >= 4 and len(module_name) > 30:
continue
# Add unique modules
if module_name not in seen:
seen.add(module_name)
modules.append(
ModuleInfo(
identifier=module_name,
description=None,
version=image.tag,
available=True,
)
)
logger.info("listed module images", count=len(modules))
return modules
def get_module_info(self, module_identifier: str) -> ModuleInfo | None:
"""Get information about a specific module.
:param module_identifier: Module identifier to look up.
:returns: Module info, or None if not found.
"""
modules = self.list_modules()
for module in modules:
if module.identifier == module_identifier:
return module
return None
# -------------------------------------------------------------------------
# Module Execution
# -------------------------------------------------------------------------
async def execute_module(
self,
module_identifier: str,
project_path: Path,
configuration: dict[str, Any] | None = None,
assets_path: Path | None = None,
) -> StepResult:
"""Execute a single module.
:param module_identifier: Module to execute.
:param project_path: Path to the project directory.
:param configuration: Optional module configuration.
:param assets_path: Optional path to input assets.
:returns: Execution result.
"""
logger = get_logger()
logger.info(
"executing module",
module=module_identifier,
project=str(project_path),
)
return await self._orchestrator.execute_single_module(
module_identifier=module_identifier,
project_path=project_path,
assets_path=assets_path,
configuration=configuration,
)
# -------------------------------------------------------------------------
# Workflow Execution
# -------------------------------------------------------------------------
async def execute_workflow(
self,
workflow: WorkflowDefinition,
project_path: Path,
initial_assets_path: Path | None = None,
) -> WorkflowResult:
"""Execute a workflow.
:param workflow: Workflow definition with steps.
:param project_path: Path to the project directory.
:param initial_assets_path: Optional path to initial assets.
:returns: Workflow execution result.
"""
logger = get_logger()
logger.info(
"executing workflow",
workflow=workflow.name,
project=str(project_path),
steps=len(workflow.steps),
)
return await self._orchestrator.execute_workflow(
workflow=workflow,
project_path=project_path,
initial_assets_path=initial_assets_path,
)
def create_workflow(
self,
name: str,
steps: list[tuple[str, dict[str, Any] | None]],
description: str | None = None,
) -> WorkflowDefinition:
"""Create a workflow definition.
Convenience method for creating workflows programmatically.
:param name: Workflow name.
:param steps: List of (module_identifier, configuration) tuples.
:param description: Optional workflow description.
:returns: Workflow definition.
"""
workflow_steps = [
WorkflowStep(
module_identifier=module_id,
configuration=config,
name=f"step-{i}",
)
for i, (module_id, config) in enumerate(steps)
]
return WorkflowDefinition(
name=name,
steps=workflow_steps,
description=description,
)
# -------------------------------------------------------------------------
# Results Management
# -------------------------------------------------------------------------
def get_execution_results(
self,
project_path: Path,
execution_id: str,
) -> Path | None:
"""Get results for an execution.
:param project_path: Path to the project directory.
:param execution_id: Execution ID.
:returns: Path to results archive, or None if not found.
"""
return self._storage.get_execution_results(project_path, execution_id)
def list_executions(self, project_path: Path) -> list[str]:
"""List all executions for a project.
:param project_path: Path to the project directory.
:returns: List of execution IDs.
"""
return self._storage.list_executions(project_path)
def extract_results(self, results_path: Path, destination: Path) -> Path:
"""Extract results archive to a directory.
:param results_path: Path to results archive.
:param destination: Destination directory.
:returns: Path to extracted directory.
"""
return self._storage.extract_results(results_path, destination)
@@ -0,0 +1,114 @@
"""FuzzForge Runner settings configuration."""
from __future__ import annotations
from enum import StrEnum
from pathlib import Path
from typing import Literal
from pydantic import BaseModel, Field
from pydantic_settings import BaseSettings, SettingsConfigDict
class EngineType(StrEnum):
"""Supported container engine types."""
DOCKER = "docker"
PODMAN = "podman"
class EngineSettings(BaseModel):
"""Container engine configuration."""
#: Type of container engine to use.
type: EngineType = EngineType.PODMAN
#: Path to the container engine socket (only used as fallback).
socket: str = Field(default="")
#: Custom graph root for container storage (isolated from system).
#: When set, uses CLI mode instead of socket for better portability.
graphroot: Path = Field(default=Path.home() / ".fuzzforge" / "containers" / "storage")
#: Custom run root for container runtime state.
runroot: Path = Field(default=Path.home() / ".fuzzforge" / "containers" / "run")
class StorageSettings(BaseModel):
"""Storage configuration for local or S3 storage."""
#: Storage backend type.
type: Literal["local", "s3"] = "local"
#: Base path for local storage (used when type is "local").
path: Path = Field(default=Path.home() / ".fuzzforge" / "storage")
#: S3 endpoint URL (used when type is "s3").
s3_endpoint: str | None = None
#: S3 access key (used when type is "s3").
s3_access_key: str | None = None
#: S3 secret key (used when type is "s3").
s3_secret_key: str | None = None
class ProjectSettings(BaseModel):
"""Project configuration."""
#: Default path for FuzzForge projects.
default_path: Path = Field(default=Path.home() / ".fuzzforge" / "projects")
class RegistrySettings(BaseModel):
"""Container registry configuration for module images."""
#: Registry URL for pulling module images.
url: str = Field(default="ghcr.io/fuzzinglabs")
#: Default tag to use when pulling images.
default_tag: str = Field(default="latest")
#: Registry username for authentication (optional).
username: str | None = None
#: Registry password/token for authentication (optional).
password: str | None = None
class Settings(BaseSettings):
"""FuzzForge Runner settings.
Settings can be configured via environment variables with the prefix
``FUZZFORGE_``. Nested settings use underscore as delimiter.
Example:
``FUZZFORGE_ENGINE_TYPE=docker``
``FUZZFORGE_STORAGE_PATH=/data/fuzzforge``
``FUZZFORGE_MODULES_PATH=/path/to/modules``
"""
model_config = SettingsConfigDict(
case_sensitive=False,
env_nested_delimiter="__",
env_prefix="FUZZFORGE_",
)
#: Container engine settings.
engine: EngineSettings = Field(default_factory=EngineSettings)
#: Storage settings.
storage: StorageSettings = Field(default_factory=StorageSettings)
#: Project settings.
project: ProjectSettings = Field(default_factory=ProjectSettings)
#: Container registry settings.
registry: RegistrySettings = Field(default_factory=RegistrySettings)
#: Path to modules directory (for development/local builds).
modules_path: Path = Field(default=Path.home() / ".fuzzforge" / "modules")
#: Enable debug logging.
debug: bool = False
@@ -0,0 +1,359 @@
"""FuzzForge Runner - Local filesystem storage.
This module provides local filesystem storage as an alternative to S3,
enabling zero-configuration operation for OSS deployments.
"""
from __future__ import annotations
import shutil
from pathlib import Path, PurePath
from tarfile import open as Archive # noqa: N812
from tempfile import NamedTemporaryFile, TemporaryDirectory
from typing import TYPE_CHECKING, cast
from fuzzforge_runner.constants import RESULTS_ARCHIVE_FILENAME
from fuzzforge_runner.exceptions import StorageError
if TYPE_CHECKING:
from structlog.stdlib import BoundLogger
def get_logger() -> BoundLogger:
"""Get structlog logger instance.
:returns: Configured structlog logger.
"""
from structlog import get_logger # noqa: PLC0415
return cast("BoundLogger", get_logger())
class LocalStorage:
"""Local filesystem storage backend.
Provides S3-like operations using local filesystem, enabling
FuzzForge operation without external storage infrastructure.
Directory structure:
{base_path}/
projects/
{project_id}/
assets/ # Initial project assets
runs/
{execution_id}/
results.tar.gz
{workflow_id}/
modules/
step-0-{exec_id}/
results.tar.gz
"""
#: Base path for all storage operations.
_base_path: Path
def __init__(self, base_path: Path) -> None:
"""Initialize an instance of the class.
:param base_path: Root directory for storage.
"""
self._base_path = base_path
self._ensure_base_path()
def _ensure_base_path(self) -> None:
"""Ensure the base storage directory exists."""
self._base_path.mkdir(parents=True, exist_ok=True)
def _get_project_path(self, project_path: Path) -> Path:
"""Get the storage path for a project.
:param project_path: Original project path (used as identifier).
:returns: Storage path for the project.
"""
# Use project path name as identifier
project_id = project_path.name
return self._base_path / "projects" / project_id
def init_project(self, project_path: Path) -> Path:
"""Initialize storage for a new project.
:param project_path: Path to the project directory.
:returns: Path to the project storage directory.
"""
logger = get_logger()
storage_path = self._get_project_path(project_path)
# Create directory structure
(storage_path / "assets").mkdir(parents=True, exist_ok=True)
(storage_path / "runs").mkdir(parents=True, exist_ok=True)
logger.info("initialized project storage", project=project_path.name, storage=str(storage_path))
return storage_path
def get_project_assets_path(self, project_path: Path) -> Path | None:
"""Get the path to project assets archive.
:param project_path: Path to the project directory.
:returns: Path to assets archive, or None if not found.
"""
storage_path = self._get_project_path(project_path)
assets_dir = storage_path / "assets"
# Look for assets archive
archive_path = assets_dir / "assets.tar.gz"
if archive_path.exists():
return archive_path
# Check if there are any files in assets directory
if assets_dir.exists() and any(assets_dir.iterdir()):
# Create archive from directory contents
return self._create_archive_from_directory(assets_dir)
return None
def _create_archive_from_directory(self, directory: Path) -> Path:
"""Create a tar.gz archive from a directory's contents.
:param directory: Directory to archive.
:returns: Path to the created archive.
"""
archive_path = directory.parent / f"{directory.name}.tar.gz"
with Archive(archive_path, "w:gz") as tar:
for item in directory.iterdir():
tar.add(item, arcname=item.name)
return archive_path
def create_empty_assets_archive(self, project_path: Path) -> Path:
"""Create an empty assets archive for a project.
:param project_path: Path to the project directory.
:returns: Path to the empty archive.
"""
storage_path = self._get_project_path(project_path)
assets_dir = storage_path / "assets"
assets_dir.mkdir(parents=True, exist_ok=True)
archive_path = assets_dir / "assets.tar.gz"
# Create empty archive
with Archive(archive_path, "w:gz") as tar:
pass # Empty archive
return archive_path
def store_assets(self, project_path: Path, assets_path: Path) -> Path:
"""Store project assets from a local path.
:param project_path: Path to the project directory.
:param assets_path: Source path (file or directory) to store.
:returns: Path to the stored assets.
:raises StorageError: If storage operation fails.
"""
logger = get_logger()
storage_path = self._get_project_path(project_path)
assets_dir = storage_path / "assets"
assets_dir.mkdir(parents=True, exist_ok=True)
try:
if assets_path.is_file():
# Copy archive directly
dest_path = assets_dir / "assets.tar.gz"
shutil.copy2(assets_path, dest_path)
else:
# Create archive from directory
dest_path = assets_dir / "assets.tar.gz"
with Archive(dest_path, "w:gz") as tar:
for item in assets_path.iterdir():
tar.add(item, arcname=item.name)
logger.info("stored project assets", project=project_path.name, path=str(dest_path))
return dest_path
except Exception as exc:
message = f"Failed to store assets: {exc}"
raise StorageError(message) from exc
def store_execution_results(
self,
project_path: Path,
workflow_id: str | None,
step_index: int,
execution_id: str,
results_path: Path,
) -> Path:
"""Store execution results.
:param project_path: Path to the project directory.
:param workflow_id: Workflow execution ID (None for standalone).
:param step_index: Step index in workflow.
:param execution_id: Module execution ID.
:param results_path: Path to results archive to store.
:returns: Path to the stored results.
:raises StorageError: If storage operation fails.
"""
logger = get_logger()
storage_path = self._get_project_path(project_path)
try:
if workflow_id:
# Part of workflow
dest_dir = storage_path / "runs" / workflow_id / "modules" / f"step-{step_index}-{execution_id}"
else:
# Standalone execution
dest_dir = storage_path / "runs" / execution_id
dest_dir.mkdir(parents=True, exist_ok=True)
dest_path = dest_dir / RESULTS_ARCHIVE_FILENAME
shutil.copy2(results_path, dest_path)
logger.info(
"stored execution results",
execution_id=execution_id,
path=str(dest_path),
)
return dest_path
except Exception as exc:
message = f"Failed to store results: {exc}"
raise StorageError(message) from exc
def get_execution_results(
self,
project_path: Path,
execution_id: str,
workflow_id: str | None = None,
step_index: int | None = None,
) -> Path | None:
"""Retrieve execution results.
:param project_path: Path to the project directory.
:param execution_id: Module execution ID.
:param workflow_id: Workflow execution ID (None for standalone).
:param step_index: Step index in workflow.
:returns: Path to results archive, or None if not found.
"""
storage_path = self._get_project_path(project_path)
if workflow_id and step_index is not None:
# Direct workflow path lookup
results_path = (
storage_path / "runs" / workflow_id / "modules" / f"step-{step_index}-{execution_id}" / RESULTS_ARCHIVE_FILENAME
)
if results_path.exists():
return results_path
# Try standalone path
results_path = storage_path / "runs" / execution_id / RESULTS_ARCHIVE_FILENAME
if results_path.exists():
return results_path
# Search for execution_id in all workflow runs
runs_dir = storage_path / "runs"
if runs_dir.exists():
for workflow_dir in runs_dir.iterdir():
if not workflow_dir.is_dir():
continue
# Check if this is a workflow directory (has 'modules' subdirectory)
modules_dir = workflow_dir / "modules"
if modules_dir.exists() and modules_dir.is_dir():
# Search for step directories containing this execution_id
for step_dir in modules_dir.iterdir():
if step_dir.is_dir() and execution_id in step_dir.name:
results_path = step_dir / RESULTS_ARCHIVE_FILENAME
if results_path.exists():
return results_path
return None
def list_executions(self, project_path: Path) -> list[str]:
"""List all execution IDs for a project.
:param project_path: Path to the project directory.
:returns: List of execution IDs.
"""
storage_path = self._get_project_path(project_path)
runs_dir = storage_path / "runs"
if not runs_dir.exists():
return []
return [d.name for d in runs_dir.iterdir() if d.is_dir()]
def delete_execution(self, project_path: Path, execution_id: str) -> bool:
"""Delete an execution and its results.
:param project_path: Path to the project directory.
:param execution_id: Execution ID to delete.
:returns: True if deleted, False if not found.
"""
logger = get_logger()
storage_path = self._get_project_path(project_path)
exec_path = storage_path / "runs" / execution_id
if exec_path.exists():
shutil.rmtree(exec_path)
logger.info("deleted execution", execution_id=execution_id)
return True
return False
def delete_project(self, project_path: Path) -> bool:
"""Delete all storage for a project.
:param project_path: Path to the project directory.
:returns: True if deleted, False if not found.
"""
logger = get_logger()
storage_path = self._get_project_path(project_path)
if storage_path.exists():
shutil.rmtree(storage_path)
logger.info("deleted project storage", project=project_path.name)
return True
return False
def extract_results(self, results_path: Path, destination: Path) -> Path:
"""Extract a results archive to a destination directory.
:param results_path: Path to the results archive.
:param destination: Directory to extract to.
:returns: Path to extracted directory.
:raises StorageError: If extraction fails.
"""
logger = get_logger()
try:
destination.mkdir(parents=True, exist_ok=True)
with Archive(results_path, "r:gz") as tar:
tar.extractall(path=destination)
logger.info("extracted results", source=str(results_path), destination=str(destination))
return destination
except Exception as exc:
message = f"Failed to extract results: {exc}"
raise StorageError(message) from exc