mirror of
https://github.com/FuzzingLabs/fuzzforge_ai.git
synced 2026-06-09 06:13:53 +02:00
feat: FuzzForge AI - complete rewrite for OSS release
This commit is contained in:
@@ -0,0 +1,9 @@
|
||||
"""FuzzForge Runner - Direct execution engine for FuzzForge OSS."""
|
||||
|
||||
from fuzzforge_runner.runner import Runner
|
||||
from fuzzforge_runner.settings import Settings
|
||||
|
||||
__all__ = [
|
||||
"Runner",
|
||||
"Settings",
|
||||
]
|
||||
@@ -0,0 +1,28 @@
|
||||
"""FuzzForge Runner CLI entry point."""
|
||||
|
||||
from fuzzforge_runner.runner import Runner
|
||||
from fuzzforge_runner.settings import Settings
|
||||
|
||||
|
||||
def main() -> None:
|
||||
"""Entry point for the FuzzForge Runner CLI.
|
||||
|
||||
This is a minimal entry point that can be used for testing
|
||||
and direct execution. The primary interface is via the MCP server.
|
||||
|
||||
"""
|
||||
import argparse
|
||||
|
||||
parser = argparse.ArgumentParser(description="FuzzForge Runner")
|
||||
parser.add_argument("--version", action="store_true", help="Print version and exit")
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.version:
|
||||
print("fuzzforge-runner 0.0.1") # noqa: T201
|
||||
return
|
||||
|
||||
print("FuzzForge Runner - Use via MCP server or programmatically") # noqa: T201
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -0,0 +1,16 @@
|
||||
"""FuzzForge Runner constants."""
|
||||
|
||||
#: Default directory name for module input inside sandbox.
|
||||
SANDBOX_INPUT_DIRECTORY: str = "/data/input"
|
||||
|
||||
#: Default directory name for module output inside sandbox.
|
||||
SANDBOX_OUTPUT_DIRECTORY: str = "/data/output"
|
||||
|
||||
#: Default archive filename for results.
|
||||
RESULTS_ARCHIVE_FILENAME: str = "results.tar.gz"
|
||||
|
||||
#: Default configuration filename.
|
||||
MODULE_CONFIG_FILENAME: str = "config.json"
|
||||
|
||||
#: Module entrypoint script name.
|
||||
MODULE_ENTRYPOINT: str = "module"
|
||||
@@ -0,0 +1,27 @@
|
||||
"""FuzzForge Runner exceptions."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
|
||||
class RunnerError(Exception):
|
||||
"""Base exception for all Runner errors."""
|
||||
|
||||
|
||||
class ModuleNotFoundError(RunnerError):
|
||||
"""Raised when a module cannot be found."""
|
||||
|
||||
|
||||
class ModuleExecutionError(RunnerError):
|
||||
"""Raised when module execution fails."""
|
||||
|
||||
|
||||
class WorkflowExecutionError(RunnerError):
|
||||
"""Raised when workflow execution fails."""
|
||||
|
||||
|
||||
class StorageError(RunnerError):
|
||||
"""Raised when storage operations fail."""
|
||||
|
||||
|
||||
class SandboxError(RunnerError):
|
||||
"""Raised when sandbox operations fail."""
|
||||
@@ -0,0 +1,635 @@
|
||||
"""FuzzForge Runner - Direct module execution engine.
|
||||
|
||||
This module provides direct execution of FuzzForge modules without
|
||||
requiring Temporal workflow orchestration. It's designed for local
|
||||
development and OSS deployment scenarios.
|
||||
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from io import BytesIO
|
||||
from pathlib import Path, PurePath
|
||||
from tarfile import TarFile, TarInfo
|
||||
from tarfile import open as Archive # noqa: N812
|
||||
from tempfile import NamedTemporaryFile, TemporaryDirectory
|
||||
from typing import TYPE_CHECKING, Any, cast
|
||||
|
||||
from fuzzforge_common.sandboxes.engines.docker.configuration import DockerConfiguration
|
||||
from fuzzforge_common.sandboxes.engines.podman.configuration import PodmanConfiguration
|
||||
from fuzzforge_types.executions import FuzzForgeExecutionIdentifier
|
||||
|
||||
from fuzzforge_runner.constants import (
|
||||
MODULE_ENTRYPOINT,
|
||||
RESULTS_ARCHIVE_FILENAME,
|
||||
SANDBOX_INPUT_DIRECTORY,
|
||||
SANDBOX_OUTPUT_DIRECTORY,
|
||||
)
|
||||
from fuzzforge_runner.exceptions import ModuleExecutionError, SandboxError
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from fuzzforge_common.sandboxes.engines.base.engine import AbstractFuzzForgeSandboxEngine
|
||||
from fuzzforge_runner.settings import EngineSettings, Settings
|
||||
from structlog.stdlib import BoundLogger
|
||||
|
||||
|
||||
def get_logger() -> BoundLogger:
|
||||
"""Get structlog logger instance.
|
||||
|
||||
:returns: Configured structlog logger.
|
||||
|
||||
"""
|
||||
from structlog import get_logger # noqa: PLC0415
|
||||
|
||||
return cast("BoundLogger", get_logger())
|
||||
|
||||
|
||||
class ModuleExecutor:
|
||||
"""Direct executor for FuzzForge modules.
|
||||
|
||||
Handles the complete lifecycle of module execution:
|
||||
- Spawning isolated sandbox containers
|
||||
- Pushing input assets and configuration
|
||||
- Running the module
|
||||
- Pulling output results
|
||||
- Cleanup
|
||||
|
||||
"""
|
||||
|
||||
#: Full settings including engine and registry.
|
||||
_settings: Settings
|
||||
#: Engine settings for container operations.
|
||||
_engine_settings: EngineSettings
|
||||
|
||||
def __init__(self, settings: Settings) -> None:
|
||||
"""Initialize an instance of the class.
|
||||
|
||||
:param settings: FuzzForge runner settings.
|
||||
|
||||
"""
|
||||
self._settings = settings
|
||||
self._engine_settings = settings.engine
|
||||
|
||||
def _get_engine_configuration(self) -> DockerConfiguration | PodmanConfiguration:
|
||||
"""Get the appropriate engine configuration.
|
||||
|
||||
:returns: Engine configuration based on settings.
|
||||
|
||||
Note: This is only used when socket mode is explicitly needed.
|
||||
The default is now PodmanCLI with custom storage paths.
|
||||
|
||||
"""
|
||||
from fuzzforge_common.sandboxes.engines.enumeration import FuzzForgeSandboxEngines
|
||||
|
||||
# Ensure socket has proper scheme
|
||||
socket = self._engine_settings.socket
|
||||
if not socket.startswith(("unix://", "tcp://", "http://", "ssh://")):
|
||||
socket = f"unix://{socket}"
|
||||
|
||||
if self._engine_settings.type == "docker":
|
||||
return DockerConfiguration(
|
||||
kind=FuzzForgeSandboxEngines.DOCKER,
|
||||
socket=socket,
|
||||
)
|
||||
return PodmanConfiguration(
|
||||
kind=FuzzForgeSandboxEngines.PODMAN,
|
||||
socket=socket,
|
||||
)
|
||||
|
||||
def _get_engine(self) -> AbstractFuzzForgeSandboxEngine:
|
||||
"""Get the container engine instance.
|
||||
|
||||
Uses PodmanCLI with custom storage paths by default for Podman,
|
||||
providing isolation from system Podman configuration and avoiding
|
||||
issues with VS Code snap's XDG_DATA_HOME override.
|
||||
|
||||
:returns: Configured container engine.
|
||||
|
||||
"""
|
||||
from fuzzforge_common.sandboxes.engines.podman import PodmanCLI
|
||||
|
||||
# Use PodmanCLI with custom storage paths for Podman
|
||||
if self._engine_settings.type == "podman":
|
||||
return PodmanCLI(
|
||||
graphroot=self._engine_settings.graphroot,
|
||||
runroot=self._engine_settings.runroot,
|
||||
)
|
||||
|
||||
# Fall back to socket-based engine for Docker
|
||||
return self._get_engine_configuration().into_engine()
|
||||
|
||||
def _check_image_exists(self, module_identifier: str) -> bool:
|
||||
"""Check if a module image exists locally.
|
||||
|
||||
:param module_identifier: Name/identifier of the module image.
|
||||
:returns: True if image exists, False otherwise.
|
||||
|
||||
"""
|
||||
engine = self._get_engine()
|
||||
|
||||
# Try both common tags: latest and 0.0.1
|
||||
tags_to_check = ["latest", "0.0.1"]
|
||||
|
||||
# Try both naming conventions:
|
||||
# - localhost/fuzzforge-module-{name}:{tag} (standard convention)
|
||||
# - localhost/{name}:{tag} (legacy/short form)
|
||||
name_prefixes = [f"fuzzforge-module-{module_identifier}", module_identifier]
|
||||
|
||||
for prefix in name_prefixes:
|
||||
for tag in tags_to_check:
|
||||
image_name = f"localhost/{prefix}:{tag}"
|
||||
if engine.image_exists(image_name):
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
def _get_local_image_name(self, module_identifier: str) -> str:
|
||||
"""Get the full local image name for a module.
|
||||
|
||||
:param module_identifier: Name/identifier of the module.
|
||||
:returns: Full image name with localhost prefix.
|
||||
|
||||
"""
|
||||
engine = self._get_engine()
|
||||
|
||||
# Check fuzzforge-module- prefix first (standard convention)
|
||||
prefixed_name = f"localhost/fuzzforge-module-{module_identifier}:latest"
|
||||
if engine.image_exists(prefixed_name):
|
||||
return prefixed_name
|
||||
|
||||
# Fall back to legacy short form
|
||||
return f"localhost/{module_identifier}:latest"
|
||||
|
||||
def _pull_module_image(self, module_identifier: str, registry_url: str = "ghcr.io/fuzzinglabs", tag: str = "latest") -> None:
|
||||
"""Pull a module image from the container registry.
|
||||
|
||||
:param module_identifier: Name/identifier of the module to pull.
|
||||
:param registry_url: Container registry URL.
|
||||
:param tag: Image tag to pull.
|
||||
:raises SandboxError: If pull fails.
|
||||
|
||||
"""
|
||||
logger = get_logger()
|
||||
engine = self._get_engine()
|
||||
|
||||
# Construct full image name
|
||||
remote_image = f"{registry_url}/fuzzforge-module-{module_identifier}:{tag}"
|
||||
local_image = f"localhost/{module_identifier}:{tag}"
|
||||
|
||||
logger.info("pulling module image from registry", module=module_identifier, remote_image=remote_image)
|
||||
|
||||
try:
|
||||
# Pull the image using engine abstraction
|
||||
engine.pull_image(remote_image, timeout=300)
|
||||
|
||||
logger.info("module image pulled successfully", module=module_identifier)
|
||||
|
||||
# Tag the image locally for consistency
|
||||
engine.tag_image(remote_image, local_image)
|
||||
|
||||
logger.debug("tagged image locally", local_image=local_image)
|
||||
|
||||
except TimeoutError as exc:
|
||||
message = f"Module image pull timed out after 5 minutes: {module_identifier}"
|
||||
raise SandboxError(message) from exc
|
||||
except Exception as exc:
|
||||
message = (
|
||||
f"Failed to pull module image '{module_identifier}': {exc}\n"
|
||||
f"Registry: {registry_url}\n"
|
||||
f"Image: {remote_image}"
|
||||
)
|
||||
raise SandboxError(message) from exc
|
||||
|
||||
def _ensure_module_image(self, module_identifier: str, registry_url: str = "ghcr.io/fuzzinglabs", tag: str = "latest") -> None:
|
||||
"""Ensure module image exists, pulling it if necessary.
|
||||
|
||||
:param module_identifier: Name/identifier of the module image.
|
||||
:param registry_url: Container registry URL to pull from.
|
||||
:param tag: Image tag to pull.
|
||||
:raises SandboxError: If image check or pull fails.
|
||||
|
||||
"""
|
||||
logger = get_logger()
|
||||
|
||||
if self._check_image_exists(module_identifier):
|
||||
logger.debug("module image exists locally", module=module_identifier)
|
||||
return
|
||||
|
||||
logger.info(
|
||||
"module image not found locally, pulling from registry",
|
||||
module=module_identifier,
|
||||
registry=registry_url,
|
||||
info="This may take a moment on first run",
|
||||
)
|
||||
self._pull_module_image(module_identifier, registry_url, tag)
|
||||
|
||||
# Verify image now exists
|
||||
if not self._check_image_exists(module_identifier):
|
||||
message = (
|
||||
f"Module image '{module_identifier}' still not found after pull attempt.\n"
|
||||
f"Tried to pull from: {registry_url}/fuzzforge-module-{module_identifier}:{tag}"
|
||||
)
|
||||
raise SandboxError(message)
|
||||
|
||||
def spawn_sandbox(self, module_identifier: str, input_volume: Path | None = None) -> str:
|
||||
"""Create and prepare a sandbox container for module execution.
|
||||
|
||||
Automatically pulls the module image from registry if it doesn't exist locally.
|
||||
|
||||
:param module_identifier: Name/identifier of the module image.
|
||||
:param input_volume: Optional path to mount as /data/input in the container.
|
||||
:returns: The sandbox container identifier.
|
||||
:raises SandboxError: If sandbox creation fails.
|
||||
|
||||
"""
|
||||
logger = get_logger()
|
||||
engine = self._get_engine()
|
||||
|
||||
# Ensure module image exists (auto-pull if needed)
|
||||
# Use registry settings from configuration
|
||||
registry_url = self._settings.registry.url
|
||||
tag = self._settings.registry.default_tag
|
||||
self._ensure_module_image(module_identifier, registry_url, tag)
|
||||
|
||||
logger.info("spawning sandbox", module=module_identifier)
|
||||
try:
|
||||
image = self._get_local_image_name(module_identifier)
|
||||
|
||||
# Build volume mappings
|
||||
volumes: dict[str, str] | None = None
|
||||
if input_volume:
|
||||
volumes = {str(input_volume): SANDBOX_INPUT_DIRECTORY}
|
||||
|
||||
sandbox_id = engine.create_container(image=image, volumes=volumes)
|
||||
logger.info("sandbox spawned", sandbox=sandbox_id, module=module_identifier)
|
||||
return sandbox_id
|
||||
|
||||
except TimeoutError as exc:
|
||||
message = f"Container creation timed out for module {module_identifier}"
|
||||
raise SandboxError(message) from exc
|
||||
except Exception as exc:
|
||||
message = f"Failed to spawn sandbox for module {module_identifier}"
|
||||
raise SandboxError(message) from exc
|
||||
|
||||
def prepare_input_directory(
|
||||
self,
|
||||
assets_path: Path,
|
||||
configuration: dict[str, Any] | None = None,
|
||||
) -> Path:
|
||||
"""Prepare input directory with assets and configuration.
|
||||
|
||||
Creates a temporary directory with input.json describing all resources.
|
||||
This directory can be volume-mounted into the container.
|
||||
|
||||
:param assets_path: Path to the assets (file or directory).
|
||||
:param configuration: Optional module configuration dict.
|
||||
:returns: Path to prepared input directory.
|
||||
:raises SandboxError: If preparation fails.
|
||||
|
||||
"""
|
||||
logger = get_logger()
|
||||
|
||||
logger.info("preparing input directory", assets=str(assets_path))
|
||||
|
||||
try:
|
||||
# Create temporary directory - caller must clean it up after container finishes
|
||||
from tempfile import mkdtemp
|
||||
temp_path = Path(mkdtemp(prefix="fuzzforge-input-"))
|
||||
|
||||
# Copy assets to temp directory
|
||||
if assets_path.exists():
|
||||
if assets_path.is_file():
|
||||
# Check if it's a tar.gz archive that needs extraction
|
||||
if assets_path.suffix == ".gz" or assets_path.name.endswith(".tar.gz"):
|
||||
# Extract archive contents
|
||||
import tarfile
|
||||
with tarfile.open(assets_path, "r:gz") as tar:
|
||||
tar.extractall(path=temp_path)
|
||||
logger.debug("extracted tar.gz archive", archive=str(assets_path))
|
||||
else:
|
||||
# Single file - copy it
|
||||
import shutil
|
||||
shutil.copy2(assets_path, temp_path / assets_path.name)
|
||||
else:
|
||||
# Directory - copy all files (including subdirectories)
|
||||
import shutil
|
||||
for item in assets_path.iterdir():
|
||||
if item.is_file():
|
||||
shutil.copy2(item, temp_path / item.name)
|
||||
elif item.is_dir():
|
||||
shutil.copytree(item, temp_path / item.name)
|
||||
|
||||
# Scan files and directories and build resource list
|
||||
resources = []
|
||||
for item in temp_path.iterdir():
|
||||
if item.name == "input.json":
|
||||
continue
|
||||
if item.is_file():
|
||||
resources.append({
|
||||
"name": item.stem,
|
||||
"description": f"Input file: {item.name}",
|
||||
"kind": "unknown",
|
||||
"path": f"/data/input/{item.name}",
|
||||
})
|
||||
elif item.is_dir():
|
||||
resources.append({
|
||||
"name": item.name,
|
||||
"description": f"Input directory: {item.name}",
|
||||
"kind": "unknown",
|
||||
"path": f"/data/input/{item.name}",
|
||||
})
|
||||
|
||||
# Create input.json with settings and resources
|
||||
input_data = {
|
||||
"settings": configuration or {},
|
||||
"resources": resources,
|
||||
}
|
||||
input_json_path = temp_path / "input.json"
|
||||
input_json_path.write_text(json.dumps(input_data, indent=2))
|
||||
|
||||
logger.debug("prepared input directory", resources=len(resources), path=str(temp_path))
|
||||
return temp_path
|
||||
|
||||
except Exception as exc:
|
||||
message = f"Failed to prepare input directory"
|
||||
raise SandboxError(message) from exc
|
||||
|
||||
def _push_config_to_sandbox(self, sandbox: str, configuration: dict[str, Any]) -> None:
|
||||
"""Write module configuration to sandbox as config.json.
|
||||
|
||||
:param sandbox: The sandbox container identifier.
|
||||
:param configuration: Configuration dictionary to write.
|
||||
|
||||
"""
|
||||
logger = get_logger()
|
||||
engine = self._get_engine()
|
||||
|
||||
logger.info("writing configuration to sandbox", sandbox=sandbox)
|
||||
|
||||
with NamedTemporaryFile(mode="w", suffix=".json", delete=False) as config_file:
|
||||
config_path = Path(config_file.name)
|
||||
config_file.write(json.dumps(configuration, indent=2))
|
||||
|
||||
try:
|
||||
engine.copy_to_container(sandbox, config_path, SANDBOX_INPUT_DIRECTORY)
|
||||
except Exception as exc:
|
||||
message = f"Failed to copy config.json: {exc}"
|
||||
raise SandboxError(message) from exc
|
||||
finally:
|
||||
config_path.unlink()
|
||||
|
||||
def run_module(self, sandbox: str) -> None:
|
||||
"""Start the sandbox and execute the module.
|
||||
|
||||
:param sandbox: The sandbox container identifier.
|
||||
:raises ModuleExecutionError: If module execution fails.
|
||||
|
||||
"""
|
||||
logger = get_logger()
|
||||
engine = self._get_engine()
|
||||
|
||||
logger.info("starting sandbox and running module", sandbox=sandbox)
|
||||
try:
|
||||
# The container runs its ENTRYPOINT (uv run module) when started
|
||||
exit_code, stdout, stderr = engine.start_container_attached(sandbox, timeout=600)
|
||||
|
||||
if exit_code != 0:
|
||||
logger.error("module execution failed", sandbox=sandbox, stderr=stderr)
|
||||
message = f"Module execution failed: {stderr}"
|
||||
raise ModuleExecutionError(message)
|
||||
logger.info("module execution completed", sandbox=sandbox)
|
||||
|
||||
except TimeoutError as exc:
|
||||
message = f"Module execution timed out after 10 minutes in sandbox {sandbox}"
|
||||
raise ModuleExecutionError(message) from exc
|
||||
except ModuleExecutionError:
|
||||
raise
|
||||
except Exception as exc:
|
||||
message = f"Module execution failed in sandbox {sandbox}"
|
||||
raise ModuleExecutionError(message) from exc
|
||||
|
||||
def pull_results_from_sandbox(self, sandbox: str) -> Path:
|
||||
"""Pull the results archive from the sandbox.
|
||||
|
||||
:param sandbox: The sandbox container identifier.
|
||||
:returns: Path to the downloaded results archive (tar.gz file).
|
||||
:raises SandboxError: If pull operation fails.
|
||||
|
||||
"""
|
||||
logger = get_logger()
|
||||
engine = self._get_engine()
|
||||
|
||||
logger.info("pulling results from sandbox", sandbox=sandbox)
|
||||
try:
|
||||
# Create temporary directory for results
|
||||
from tempfile import mkdtemp
|
||||
temp_dir = Path(mkdtemp(prefix="fuzzforge-results-"))
|
||||
|
||||
# Copy entire output directory from container
|
||||
try:
|
||||
engine.copy_from_container(sandbox, SANDBOX_OUTPUT_DIRECTORY, temp_dir)
|
||||
except Exception:
|
||||
# If output directory doesn't exist, that's okay - module may not have produced results
|
||||
logger.warning("no results found in sandbox", sandbox=sandbox)
|
||||
|
||||
# Create tar archive from results directory
|
||||
import tarfile
|
||||
|
||||
archive_file = NamedTemporaryFile(delete=False, suffix=".tar.gz")
|
||||
archive_path = Path(archive_file.name)
|
||||
archive_file.close()
|
||||
|
||||
with tarfile.open(archive_path, "w:gz") as tar:
|
||||
# The output is extracted into a subdirectory named after the source
|
||||
output_subdir = temp_dir / "output"
|
||||
if output_subdir.exists():
|
||||
for item in output_subdir.iterdir():
|
||||
tar.add(item, arcname=item.name)
|
||||
else:
|
||||
for item in temp_dir.iterdir():
|
||||
tar.add(item, arcname=item.name)
|
||||
|
||||
# Clean up temp directory
|
||||
import shutil
|
||||
shutil.rmtree(temp_dir, ignore_errors=True)
|
||||
|
||||
logger.info("results pulled successfully", sandbox=sandbox, archive=str(archive_path))
|
||||
return archive_path
|
||||
|
||||
except TimeoutError as exc:
|
||||
message = f"Timeout pulling results from sandbox {sandbox}"
|
||||
raise SandboxError(message) from exc
|
||||
except Exception as exc:
|
||||
message = f"Failed to pull results from sandbox {sandbox}"
|
||||
raise SandboxError(message) from exc
|
||||
|
||||
def terminate_sandbox(self, sandbox: str) -> None:
|
||||
"""Terminate and cleanup the sandbox container.
|
||||
|
||||
:param sandbox: The sandbox container identifier.
|
||||
|
||||
"""
|
||||
logger = get_logger()
|
||||
engine = self._get_engine()
|
||||
|
||||
logger.info("terminating sandbox", sandbox=sandbox)
|
||||
try:
|
||||
engine.remove_container(sandbox, force=True)
|
||||
logger.info("sandbox terminated", sandbox=sandbox)
|
||||
except Exception as exc:
|
||||
# Log but don't raise - cleanup should be best-effort
|
||||
logger.warning("failed to terminate sandbox", sandbox=sandbox, error=str(exc))
|
||||
|
||||
async def execute(
|
||||
self,
|
||||
module_identifier: str,
|
||||
assets_path: Path,
|
||||
configuration: dict[str, Any] | None = None,
|
||||
) -> Path:
|
||||
"""Execute a module end-to-end.
|
||||
|
||||
This is the main entry point that handles the complete execution flow:
|
||||
1. Spawn sandbox
|
||||
2. Push assets and configuration
|
||||
3. Run module
|
||||
4. Pull results
|
||||
5. Terminate sandbox
|
||||
|
||||
:param module_identifier: Name/identifier of the module to execute.
|
||||
:param assets_path: Path to the input assets archive.
|
||||
:param configuration: Optional module configuration.
|
||||
:returns: Path to the results archive.
|
||||
:raises ModuleExecutionError: If any step fails.
|
||||
|
||||
"""
|
||||
logger = get_logger()
|
||||
sandbox: str | None = None
|
||||
input_dir: Path | None = None
|
||||
|
||||
try:
|
||||
# 1. Prepare input directory with assets
|
||||
input_dir = self.prepare_input_directory(assets_path, configuration)
|
||||
|
||||
# 2. Spawn sandbox with volume mount
|
||||
sandbox = self.spawn_sandbox(module_identifier, input_volume=input_dir)
|
||||
|
||||
# 3. Run module
|
||||
self.run_module(sandbox)
|
||||
|
||||
# 4. Pull results
|
||||
results_path = self.pull_results_from_sandbox(sandbox)
|
||||
|
||||
logger.info(
|
||||
"module execution completed successfully",
|
||||
module=module_identifier,
|
||||
results=str(results_path),
|
||||
)
|
||||
|
||||
return results_path
|
||||
|
||||
finally:
|
||||
# 5. Always cleanup
|
||||
if sandbox:
|
||||
self.terminate_sandbox(sandbox)
|
||||
if input_dir and input_dir.exists():
|
||||
import shutil
|
||||
shutil.rmtree(input_dir, ignore_errors=True)
|
||||
|
||||
# -------------------------------------------------------------------------
|
||||
# Continuous/Background Execution Methods
|
||||
# -------------------------------------------------------------------------
|
||||
|
||||
def start_module_continuous(
|
||||
self,
|
||||
module_identifier: str,
|
||||
assets_path: Path,
|
||||
configuration: dict[str, Any] | None = None,
|
||||
) -> dict[str, Any]:
|
||||
"""Start a module in continuous/background mode without waiting.
|
||||
|
||||
Returns immediately with container info. Use read_module_output() to
|
||||
get current status and stop_module_continuous() to stop.
|
||||
|
||||
:param module_identifier: Name/identifier of the module to execute.
|
||||
:param assets_path: Path to the input assets archive.
|
||||
:param configuration: Optional module configuration.
|
||||
:returns: Dict with container_id, input_dir for later cleanup.
|
||||
|
||||
"""
|
||||
logger = get_logger()
|
||||
|
||||
# 1. Prepare input directory with assets
|
||||
input_dir = self.prepare_input_directory(assets_path, configuration)
|
||||
|
||||
# 2. Spawn sandbox with volume mount
|
||||
sandbox = self.spawn_sandbox(module_identifier, input_volume=input_dir)
|
||||
|
||||
# 3. Start container (non-blocking)
|
||||
engine = self._get_engine()
|
||||
engine.start_container(sandbox)
|
||||
|
||||
logger.info(
|
||||
"module started in continuous mode",
|
||||
module=module_identifier,
|
||||
container_id=sandbox,
|
||||
)
|
||||
|
||||
return {
|
||||
"container_id": sandbox,
|
||||
"input_dir": str(input_dir),
|
||||
"module": module_identifier,
|
||||
}
|
||||
|
||||
def read_module_output(self, container_id: str, output_file: str = "/data/output/stream.jsonl") -> str:
|
||||
"""Read output file from a running module container.
|
||||
|
||||
:param container_id: The container identifier.
|
||||
:param output_file: Path to output file inside container.
|
||||
:returns: File contents as string.
|
||||
|
||||
"""
|
||||
engine = self._get_engine()
|
||||
return engine.read_file_from_container(container_id, output_file)
|
||||
|
||||
def get_module_status(self, container_id: str) -> str:
|
||||
"""Get the status of a running module container.
|
||||
|
||||
:param container_id: The container identifier.
|
||||
:returns: Container status (e.g., "running", "exited").
|
||||
|
||||
"""
|
||||
engine = self._get_engine()
|
||||
return engine.get_container_status(container_id)
|
||||
|
||||
def stop_module_continuous(self, container_id: str, input_dir: str | None = None) -> Path:
|
||||
"""Stop a continuously running module and collect results.
|
||||
|
||||
:param container_id: The container identifier.
|
||||
:param input_dir: Optional input directory to cleanup.
|
||||
:returns: Path to the results archive.
|
||||
|
||||
"""
|
||||
logger = get_logger()
|
||||
engine = self._get_engine()
|
||||
|
||||
try:
|
||||
# 1. Stop the container gracefully
|
||||
status = engine.get_container_status(container_id)
|
||||
if status == "running":
|
||||
engine.stop_container(container_id, timeout=10)
|
||||
logger.info("stopped running container", container_id=container_id)
|
||||
|
||||
# 2. Pull results
|
||||
results_path = self.pull_results_from_sandbox(container_id)
|
||||
|
||||
logger.info("collected results from continuous session", results=str(results_path))
|
||||
|
||||
return results_path
|
||||
|
||||
finally:
|
||||
# 3. Cleanup
|
||||
self.terminate_sandbox(container_id)
|
||||
if input_dir:
|
||||
import shutil
|
||||
shutil.rmtree(input_dir, ignore_errors=True)
|
||||
@@ -0,0 +1,360 @@
|
||||
"""FuzzForge Runner - Workflow orchestration without Temporal.
|
||||
|
||||
This module provides simplified workflow orchestration for sequential
|
||||
module execution without requiring Temporal infrastructure.
|
||||
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import UTC, datetime
|
||||
from pathlib import Path
|
||||
from typing import TYPE_CHECKING, Any, cast
|
||||
from uuid import uuid4
|
||||
|
||||
from fuzzforge_types.executions import FuzzForgeExecutionIdentifier
|
||||
|
||||
from fuzzforge_runner.exceptions import WorkflowExecutionError
|
||||
from fuzzforge_runner.executor import ModuleExecutor
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from fuzzforge_runner.settings import Settings
|
||||
from fuzzforge_runner.storage import LocalStorage
|
||||
from structlog.stdlib import BoundLogger
|
||||
|
||||
|
||||
def get_logger() -> BoundLogger:
|
||||
"""Get structlog logger instance.
|
||||
|
||||
:returns: Configured structlog logger.
|
||||
|
||||
"""
|
||||
from structlog import get_logger # noqa: PLC0415
|
||||
|
||||
return cast("BoundLogger", get_logger())
|
||||
|
||||
|
||||
@dataclass
|
||||
class WorkflowStep:
|
||||
"""Represents a single step in a workflow."""
|
||||
|
||||
#: Module identifier to execute.
|
||||
module_identifier: str
|
||||
|
||||
#: Optional configuration for the module.
|
||||
configuration: dict[str, Any] | None = None
|
||||
|
||||
#: Step name/label for logging.
|
||||
name: str | None = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class WorkflowDefinition:
|
||||
"""Defines a workflow as a sequence of module executions."""
|
||||
|
||||
#: Workflow name.
|
||||
name: str
|
||||
|
||||
#: Ordered list of steps to execute.
|
||||
steps: list[WorkflowStep] = field(default_factory=list)
|
||||
|
||||
#: Optional workflow description.
|
||||
description: str | None = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class StepResult:
|
||||
"""Result of a single workflow step execution."""
|
||||
|
||||
#: Step index (0-based).
|
||||
step_index: int
|
||||
|
||||
#: Module that was executed.
|
||||
module_identifier: str
|
||||
|
||||
#: Path to the results archive.
|
||||
results_path: Path
|
||||
|
||||
#: Execution identifier.
|
||||
execution_id: str
|
||||
|
||||
#: Execution start time.
|
||||
started_at: datetime
|
||||
|
||||
#: Execution end time.
|
||||
completed_at: datetime
|
||||
|
||||
#: Whether execution was successful.
|
||||
success: bool = True
|
||||
|
||||
#: Error message if failed.
|
||||
error: str | None = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class WorkflowResult:
|
||||
"""Result of a complete workflow execution."""
|
||||
|
||||
#: Workflow execution identifier.
|
||||
execution_id: str
|
||||
|
||||
#: Workflow name.
|
||||
name: str
|
||||
|
||||
#: Results for each step.
|
||||
steps: list[StepResult] = field(default_factory=list)
|
||||
|
||||
#: Overall success status.
|
||||
success: bool = True
|
||||
|
||||
#: Final results path (from last step).
|
||||
final_results_path: Path | None = None
|
||||
|
||||
|
||||
class WorkflowOrchestrator:
|
||||
"""Orchestrates sequential workflow execution.
|
||||
|
||||
Executes workflow steps sequentially, passing output from each
|
||||
module as input to the next. No Temporal required.
|
||||
|
||||
"""
|
||||
|
||||
#: Module executor instance.
|
||||
_executor: ModuleExecutor
|
||||
|
||||
#: Storage backend.
|
||||
_storage: LocalStorage
|
||||
|
||||
def __init__(self, executor: ModuleExecutor, storage: LocalStorage) -> None:
|
||||
"""Initialize an instance of the class.
|
||||
|
||||
:param executor: Module executor for running modules.
|
||||
:param storage: Storage backend for managing assets.
|
||||
|
||||
"""
|
||||
self._executor = executor
|
||||
self._storage = storage
|
||||
|
||||
def _generate_execution_id(self) -> str:
|
||||
"""Generate a unique execution identifier.
|
||||
|
||||
:returns: UUID string for execution tracking.
|
||||
|
||||
"""
|
||||
return str(uuid4())
|
||||
|
||||
async def execute_workflow(
|
||||
self,
|
||||
workflow: WorkflowDefinition,
|
||||
project_path: Path,
|
||||
initial_assets_path: Path | None = None,
|
||||
) -> WorkflowResult:
|
||||
"""Execute a workflow as a sequence of module executions.
|
||||
|
||||
Each step receives the output of the previous step as input.
|
||||
The first step receives the initial assets.
|
||||
|
||||
:param workflow: Workflow definition with steps to execute.
|
||||
:param project_path: Path to the project directory.
|
||||
:param initial_assets_path: Path to initial assets (optional).
|
||||
:returns: Workflow execution result.
|
||||
:raises WorkflowExecutionError: If workflow execution fails.
|
||||
|
||||
"""
|
||||
logger = get_logger()
|
||||
workflow_id = self._generate_execution_id()
|
||||
|
||||
logger.info(
|
||||
"starting workflow execution",
|
||||
workflow=workflow.name,
|
||||
execution_id=workflow_id,
|
||||
steps=len(workflow.steps),
|
||||
)
|
||||
|
||||
result = WorkflowResult(
|
||||
execution_id=workflow_id,
|
||||
name=workflow.name,
|
||||
)
|
||||
|
||||
if not workflow.steps:
|
||||
logger.warning("workflow has no steps", workflow=workflow.name)
|
||||
return result
|
||||
|
||||
# Track current assets path - starts with initial assets, then uses previous step output
|
||||
current_assets: Path | None = initial_assets_path
|
||||
|
||||
# If no initial assets, try to get from project
|
||||
if current_assets is None:
|
||||
current_assets = self._storage.get_project_assets_path(project_path)
|
||||
|
||||
try:
|
||||
for step_index, step in enumerate(workflow.steps):
|
||||
step_name = step.name or f"step-{step_index}"
|
||||
step_execution_id = self._generate_execution_id()
|
||||
|
||||
logger.info(
|
||||
"executing workflow step",
|
||||
workflow=workflow.name,
|
||||
step=step_name,
|
||||
step_index=step_index,
|
||||
module=step.module_identifier,
|
||||
execution_id=step_execution_id,
|
||||
)
|
||||
|
||||
started_at = datetime.now(UTC)
|
||||
|
||||
try:
|
||||
# Ensure we have assets for this step
|
||||
if current_assets is None or not current_assets.exists():
|
||||
if step_index == 0:
|
||||
# First step with no assets - create empty archive
|
||||
current_assets = self._storage.create_empty_assets_archive(project_path)
|
||||
else:
|
||||
message = f"No assets available for step {step_index}"
|
||||
raise WorkflowExecutionError(message)
|
||||
|
||||
# Execute the module
|
||||
results_path = await self._executor.execute(
|
||||
module_identifier=step.module_identifier,
|
||||
assets_path=current_assets,
|
||||
configuration=step.configuration,
|
||||
)
|
||||
|
||||
completed_at = datetime.now(UTC)
|
||||
|
||||
# Store results to persistent storage
|
||||
stored_path = self._storage.store_execution_results(
|
||||
project_path=project_path,
|
||||
workflow_id=workflow_id,
|
||||
step_index=step_index,
|
||||
execution_id=step_execution_id,
|
||||
results_path=results_path,
|
||||
)
|
||||
|
||||
# Clean up temporary results archive after storing
|
||||
try:
|
||||
if results_path.exists() and results_path != stored_path:
|
||||
results_path.unlink()
|
||||
except Exception as cleanup_exc:
|
||||
logger.warning("failed to clean up temporary results", path=str(results_path), error=str(cleanup_exc))
|
||||
|
||||
# Record step result with stored path
|
||||
step_result = StepResult(
|
||||
step_index=step_index,
|
||||
module_identifier=step.module_identifier,
|
||||
results_path=stored_path,
|
||||
execution_id=step_execution_id,
|
||||
started_at=started_at,
|
||||
completed_at=completed_at,
|
||||
success=True,
|
||||
)
|
||||
result.steps.append(step_result)
|
||||
|
||||
# Next step uses this step's output
|
||||
current_assets = stored_path
|
||||
|
||||
logger.info(
|
||||
"workflow step completed",
|
||||
step=step_name,
|
||||
step_index=step_index,
|
||||
duration_seconds=(completed_at - started_at).total_seconds(),
|
||||
)
|
||||
|
||||
except Exception as exc:
|
||||
completed_at = datetime.now(UTC)
|
||||
error_msg = str(exc)
|
||||
|
||||
step_result = StepResult(
|
||||
step_index=step_index,
|
||||
module_identifier=step.module_identifier,
|
||||
results_path=Path(),
|
||||
execution_id=step_execution_id,
|
||||
started_at=started_at,
|
||||
completed_at=completed_at,
|
||||
success=False,
|
||||
error=error_msg,
|
||||
)
|
||||
result.steps.append(step_result)
|
||||
result.success = False
|
||||
|
||||
logger.error(
|
||||
"workflow step failed",
|
||||
step=step_name,
|
||||
step_index=step_index,
|
||||
error=error_msg,
|
||||
)
|
||||
|
||||
# Stop workflow on failure
|
||||
break
|
||||
|
||||
# Set final results path
|
||||
if result.steps and result.steps[-1].success:
|
||||
result.final_results_path = result.steps[-1].results_path
|
||||
|
||||
logger.info(
|
||||
"workflow execution completed",
|
||||
workflow=workflow.name,
|
||||
execution_id=workflow_id,
|
||||
success=result.success,
|
||||
completed_steps=len([s for s in result.steps if s.success]),
|
||||
total_steps=len(workflow.steps),
|
||||
)
|
||||
|
||||
return result
|
||||
|
||||
except Exception as exc:
|
||||
message = f"Workflow execution failed: {exc}"
|
||||
logger.exception("workflow execution error", workflow=workflow.name)
|
||||
raise WorkflowExecutionError(message) from exc
|
||||
|
||||
async def execute_single_module(
|
||||
self,
|
||||
module_identifier: str,
|
||||
project_path: Path,
|
||||
assets_path: Path | None = None,
|
||||
configuration: dict[str, Any] | None = None,
|
||||
) -> StepResult:
|
||||
"""Execute a single module (convenience method).
|
||||
|
||||
This is a simplified interface for executing a single module
|
||||
outside of a workflow context.
|
||||
|
||||
:param module_identifier: Module to execute.
|
||||
:param project_path: Project directory path.
|
||||
:param assets_path: Optional path to input assets.
|
||||
:param configuration: Optional module configuration.
|
||||
:returns: Execution result.
|
||||
|
||||
"""
|
||||
workflow = WorkflowDefinition(
|
||||
name=f"single-{module_identifier}",
|
||||
steps=[
|
||||
WorkflowStep(
|
||||
module_identifier=module_identifier,
|
||||
configuration=configuration,
|
||||
name="main",
|
||||
)
|
||||
],
|
||||
)
|
||||
|
||||
result = await self.execute_workflow(
|
||||
workflow=workflow,
|
||||
project_path=project_path,
|
||||
initial_assets_path=assets_path,
|
||||
)
|
||||
|
||||
if result.steps:
|
||||
return result.steps[0]
|
||||
|
||||
# Should not happen, but handle gracefully
|
||||
return StepResult(
|
||||
step_index=0,
|
||||
module_identifier=module_identifier,
|
||||
results_path=Path(),
|
||||
execution_id=result.execution_id,
|
||||
started_at=datetime.now(UTC),
|
||||
completed_at=datetime.now(UTC),
|
||||
success=False,
|
||||
error="No step results produced",
|
||||
)
|
||||
@@ -0,0 +1,378 @@
|
||||
"""FuzzForge Runner - Main runner interface.
|
||||
|
||||
This module provides the high-level interface for FuzzForge OSS,
|
||||
coordinating module execution, workflow orchestration, and storage.
|
||||
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
from typing import TYPE_CHECKING, Any, cast
|
||||
|
||||
from fuzzforge_runner.executor import ModuleExecutor
|
||||
from fuzzforge_runner.orchestrator import (
|
||||
StepResult,
|
||||
WorkflowDefinition,
|
||||
WorkflowOrchestrator,
|
||||
WorkflowResult,
|
||||
WorkflowStep,
|
||||
)
|
||||
from fuzzforge_runner.settings import Settings
|
||||
from fuzzforge_runner.storage import LocalStorage
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from structlog.stdlib import BoundLogger
|
||||
|
||||
|
||||
def get_logger() -> BoundLogger:
|
||||
"""Get structlog logger instance.
|
||||
|
||||
:returns: Configured structlog logger.
|
||||
|
||||
"""
|
||||
from structlog import get_logger # noqa: PLC0415
|
||||
|
||||
return cast("BoundLogger", get_logger())
|
||||
|
||||
|
||||
@dataclass
|
||||
class ModuleInfo:
|
||||
"""Information about an available module."""
|
||||
|
||||
#: Module identifier/name.
|
||||
identifier: str
|
||||
|
||||
#: Module description.
|
||||
description: str | None = None
|
||||
|
||||
#: Module version.
|
||||
version: str | None = None
|
||||
|
||||
#: Whether module image exists locally.
|
||||
available: bool = True
|
||||
|
||||
|
||||
class Runner:
|
||||
"""Main FuzzForge Runner interface.
|
||||
|
||||
Provides a unified interface for:
|
||||
- Module discovery and execution
|
||||
- Workflow orchestration
|
||||
- Project and asset management
|
||||
|
||||
This is the primary entry point for OSS users and the MCP server.
|
||||
|
||||
"""
|
||||
|
||||
#: Runner settings.
|
||||
_settings: Settings
|
||||
|
||||
#: Module executor.
|
||||
_executor: ModuleExecutor
|
||||
|
||||
#: Local storage backend.
|
||||
_storage: LocalStorage
|
||||
|
||||
#: Workflow orchestrator.
|
||||
_orchestrator: WorkflowOrchestrator
|
||||
|
||||
def __init__(self, settings: Settings | None = None) -> None:
|
||||
"""Initialize an instance of the class.
|
||||
|
||||
:param settings: Runner settings. If None, loads from environment.
|
||||
|
||||
"""
|
||||
self._settings = settings or Settings()
|
||||
self._executor = ModuleExecutor(self._settings)
|
||||
self._storage = LocalStorage(self._settings.storage.path)
|
||||
self._orchestrator = WorkflowOrchestrator(self._executor, self._storage)
|
||||
|
||||
@property
|
||||
def settings(self) -> Settings:
|
||||
"""Get runner settings.
|
||||
|
||||
:returns: Current settings instance.
|
||||
|
||||
"""
|
||||
return self._settings
|
||||
|
||||
@property
|
||||
def storage(self) -> LocalStorage:
|
||||
"""Get storage backend.
|
||||
|
||||
:returns: Storage instance.
|
||||
|
||||
"""
|
||||
return self._storage
|
||||
|
||||
# -------------------------------------------------------------------------
|
||||
# Project Management
|
||||
# -------------------------------------------------------------------------
|
||||
|
||||
def init_project(self, project_path: Path) -> Path:
|
||||
"""Initialize a new project.
|
||||
|
||||
Creates necessary storage directories for a project.
|
||||
|
||||
:param project_path: Path to the project directory.
|
||||
:returns: Path to the project storage directory.
|
||||
|
||||
"""
|
||||
logger = get_logger()
|
||||
logger.info("initializing project", path=str(project_path))
|
||||
return self._storage.init_project(project_path)
|
||||
|
||||
def set_project_assets(self, project_path: Path, assets_path: Path) -> Path:
|
||||
"""Set initial assets for a project.
|
||||
|
||||
:param project_path: Path to the project directory.
|
||||
:param assets_path: Path to assets (file or directory).
|
||||
:returns: Path to stored assets.
|
||||
|
||||
"""
|
||||
logger = get_logger()
|
||||
logger.info("setting project assets", project=str(project_path), assets=str(assets_path))
|
||||
return self._storage.store_assets(project_path, assets_path)
|
||||
|
||||
# -------------------------------------------------------------------------
|
||||
# Module Discovery
|
||||
# -------------------------------------------------------------------------
|
||||
|
||||
def list_modules(self) -> list[ModuleInfo]:
|
||||
"""List available modules.
|
||||
|
||||
Discovers modules from the configured modules directory.
|
||||
|
||||
:returns: List of available modules.
|
||||
|
||||
"""
|
||||
logger = get_logger()
|
||||
modules: list[ModuleInfo] = []
|
||||
|
||||
modules_path = self._settings.modules_path
|
||||
if not modules_path.exists():
|
||||
logger.warning("modules directory not found", path=str(modules_path))
|
||||
return modules
|
||||
|
||||
# Look for module directories (each should have a Dockerfile or be a built image)
|
||||
for item in modules_path.iterdir():
|
||||
if item.is_dir():
|
||||
# Check for module markers
|
||||
has_dockerfile = (item / "Dockerfile").exists()
|
||||
has_pyproject = (item / "pyproject.toml").exists()
|
||||
|
||||
if has_dockerfile or has_pyproject:
|
||||
modules.append(
|
||||
ModuleInfo(
|
||||
identifier=item.name,
|
||||
available=has_dockerfile,
|
||||
)
|
||||
)
|
||||
|
||||
logger.info("discovered modules", count=len(modules))
|
||||
return modules
|
||||
|
||||
def list_module_images(self, filter_prefix: str = "localhost/") -> list[ModuleInfo]:
|
||||
"""List available module images from the container engine.
|
||||
|
||||
Uses the container engine API to discover built module images.
|
||||
|
||||
:param filter_prefix: Prefix to filter images (default: "localhost/").
|
||||
:returns: List of available module images.
|
||||
|
||||
"""
|
||||
logger = get_logger()
|
||||
modules: list[ModuleInfo] = []
|
||||
seen: set[str] = set()
|
||||
|
||||
# Infrastructure images to skip
|
||||
skip_images = {"fuzzforge-modules-sdk", "fuzzforge-runner", "fuzzforge-api"}
|
||||
|
||||
engine = self._executor._get_engine()
|
||||
images = engine.list_images(filter_prefix=filter_prefix)
|
||||
|
||||
for image in images:
|
||||
# Only include :latest images
|
||||
if image.tag != "latest":
|
||||
continue
|
||||
|
||||
# Extract module name from repository
|
||||
full_name = image.repository.split("/")[-1]
|
||||
|
||||
# Skip infrastructure images
|
||||
if full_name in skip_images:
|
||||
continue
|
||||
|
||||
# Extract clean module name (remove fuzzforge-module- prefix if present)
|
||||
if full_name.startswith("fuzzforge-module-"):
|
||||
module_name = full_name.replace("fuzzforge-module-", "")
|
||||
else:
|
||||
module_name = full_name
|
||||
|
||||
# Skip UUID-like names (temporary/broken containers)
|
||||
if module_name.count("-") >= 4 and len(module_name) > 30:
|
||||
continue
|
||||
|
||||
# Add unique modules
|
||||
if module_name not in seen:
|
||||
seen.add(module_name)
|
||||
modules.append(
|
||||
ModuleInfo(
|
||||
identifier=module_name,
|
||||
description=None,
|
||||
version=image.tag,
|
||||
available=True,
|
||||
)
|
||||
)
|
||||
|
||||
logger.info("listed module images", count=len(modules))
|
||||
return modules
|
||||
|
||||
def get_module_info(self, module_identifier: str) -> ModuleInfo | None:
|
||||
"""Get information about a specific module.
|
||||
|
||||
:param module_identifier: Module identifier to look up.
|
||||
:returns: Module info, or None if not found.
|
||||
|
||||
"""
|
||||
modules = self.list_modules()
|
||||
for module in modules:
|
||||
if module.identifier == module_identifier:
|
||||
return module
|
||||
return None
|
||||
|
||||
# -------------------------------------------------------------------------
|
||||
# Module Execution
|
||||
# -------------------------------------------------------------------------
|
||||
|
||||
async def execute_module(
|
||||
self,
|
||||
module_identifier: str,
|
||||
project_path: Path,
|
||||
configuration: dict[str, Any] | None = None,
|
||||
assets_path: Path | None = None,
|
||||
) -> StepResult:
|
||||
"""Execute a single module.
|
||||
|
||||
:param module_identifier: Module to execute.
|
||||
:param project_path: Path to the project directory.
|
||||
:param configuration: Optional module configuration.
|
||||
:param assets_path: Optional path to input assets.
|
||||
:returns: Execution result.
|
||||
|
||||
"""
|
||||
logger = get_logger()
|
||||
logger.info(
|
||||
"executing module",
|
||||
module=module_identifier,
|
||||
project=str(project_path),
|
||||
)
|
||||
|
||||
return await self._orchestrator.execute_single_module(
|
||||
module_identifier=module_identifier,
|
||||
project_path=project_path,
|
||||
assets_path=assets_path,
|
||||
configuration=configuration,
|
||||
)
|
||||
|
||||
# -------------------------------------------------------------------------
|
||||
# Workflow Execution
|
||||
# -------------------------------------------------------------------------
|
||||
|
||||
async def execute_workflow(
|
||||
self,
|
||||
workflow: WorkflowDefinition,
|
||||
project_path: Path,
|
||||
initial_assets_path: Path | None = None,
|
||||
) -> WorkflowResult:
|
||||
"""Execute a workflow.
|
||||
|
||||
:param workflow: Workflow definition with steps.
|
||||
:param project_path: Path to the project directory.
|
||||
:param initial_assets_path: Optional path to initial assets.
|
||||
:returns: Workflow execution result.
|
||||
|
||||
"""
|
||||
logger = get_logger()
|
||||
logger.info(
|
||||
"executing workflow",
|
||||
workflow=workflow.name,
|
||||
project=str(project_path),
|
||||
steps=len(workflow.steps),
|
||||
)
|
||||
|
||||
return await self._orchestrator.execute_workflow(
|
||||
workflow=workflow,
|
||||
project_path=project_path,
|
||||
initial_assets_path=initial_assets_path,
|
||||
)
|
||||
|
||||
def create_workflow(
|
||||
self,
|
||||
name: str,
|
||||
steps: list[tuple[str, dict[str, Any] | None]],
|
||||
description: str | None = None,
|
||||
) -> WorkflowDefinition:
|
||||
"""Create a workflow definition.
|
||||
|
||||
Convenience method for creating workflows programmatically.
|
||||
|
||||
:param name: Workflow name.
|
||||
:param steps: List of (module_identifier, configuration) tuples.
|
||||
:param description: Optional workflow description.
|
||||
:returns: Workflow definition.
|
||||
|
||||
"""
|
||||
workflow_steps = [
|
||||
WorkflowStep(
|
||||
module_identifier=module_id,
|
||||
configuration=config,
|
||||
name=f"step-{i}",
|
||||
)
|
||||
for i, (module_id, config) in enumerate(steps)
|
||||
]
|
||||
|
||||
return WorkflowDefinition(
|
||||
name=name,
|
||||
steps=workflow_steps,
|
||||
description=description,
|
||||
)
|
||||
|
||||
# -------------------------------------------------------------------------
|
||||
# Results Management
|
||||
# -------------------------------------------------------------------------
|
||||
|
||||
def get_execution_results(
|
||||
self,
|
||||
project_path: Path,
|
||||
execution_id: str,
|
||||
) -> Path | None:
|
||||
"""Get results for an execution.
|
||||
|
||||
:param project_path: Path to the project directory.
|
||||
:param execution_id: Execution ID.
|
||||
:returns: Path to results archive, or None if not found.
|
||||
|
||||
"""
|
||||
return self._storage.get_execution_results(project_path, execution_id)
|
||||
|
||||
def list_executions(self, project_path: Path) -> list[str]:
|
||||
"""List all executions for a project.
|
||||
|
||||
:param project_path: Path to the project directory.
|
||||
:returns: List of execution IDs.
|
||||
|
||||
"""
|
||||
return self._storage.list_executions(project_path)
|
||||
|
||||
def extract_results(self, results_path: Path, destination: Path) -> Path:
|
||||
"""Extract results archive to a directory.
|
||||
|
||||
:param results_path: Path to results archive.
|
||||
:param destination: Destination directory.
|
||||
:returns: Path to extracted directory.
|
||||
|
||||
"""
|
||||
return self._storage.extract_results(results_path, destination)
|
||||
@@ -0,0 +1,114 @@
|
||||
"""FuzzForge Runner settings configuration."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from enum import StrEnum
|
||||
from pathlib import Path
|
||||
from typing import Literal
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
from pydantic_settings import BaseSettings, SettingsConfigDict
|
||||
|
||||
|
||||
class EngineType(StrEnum):
|
||||
"""Supported container engine types."""
|
||||
|
||||
DOCKER = "docker"
|
||||
PODMAN = "podman"
|
||||
|
||||
|
||||
class EngineSettings(BaseModel):
|
||||
"""Container engine configuration."""
|
||||
|
||||
#: Type of container engine to use.
|
||||
type: EngineType = EngineType.PODMAN
|
||||
|
||||
#: Path to the container engine socket (only used as fallback).
|
||||
socket: str = Field(default="")
|
||||
|
||||
#: Custom graph root for container storage (isolated from system).
|
||||
#: When set, uses CLI mode instead of socket for better portability.
|
||||
graphroot: Path = Field(default=Path.home() / ".fuzzforge" / "containers" / "storage")
|
||||
|
||||
#: Custom run root for container runtime state.
|
||||
runroot: Path = Field(default=Path.home() / ".fuzzforge" / "containers" / "run")
|
||||
|
||||
|
||||
class StorageSettings(BaseModel):
|
||||
"""Storage configuration for local or S3 storage."""
|
||||
|
||||
#: Storage backend type.
|
||||
type: Literal["local", "s3"] = "local"
|
||||
|
||||
#: Base path for local storage (used when type is "local").
|
||||
path: Path = Field(default=Path.home() / ".fuzzforge" / "storage")
|
||||
|
||||
#: S3 endpoint URL (used when type is "s3").
|
||||
s3_endpoint: str | None = None
|
||||
|
||||
#: S3 access key (used when type is "s3").
|
||||
s3_access_key: str | None = None
|
||||
|
||||
#: S3 secret key (used when type is "s3").
|
||||
s3_secret_key: str | None = None
|
||||
|
||||
|
||||
class ProjectSettings(BaseModel):
|
||||
"""Project configuration."""
|
||||
|
||||
#: Default path for FuzzForge projects.
|
||||
default_path: Path = Field(default=Path.home() / ".fuzzforge" / "projects")
|
||||
|
||||
|
||||
class RegistrySettings(BaseModel):
|
||||
"""Container registry configuration for module images."""
|
||||
|
||||
#: Registry URL for pulling module images.
|
||||
url: str = Field(default="ghcr.io/fuzzinglabs")
|
||||
|
||||
#: Default tag to use when pulling images.
|
||||
default_tag: str = Field(default="latest")
|
||||
|
||||
#: Registry username for authentication (optional).
|
||||
username: str | None = None
|
||||
|
||||
#: Registry password/token for authentication (optional).
|
||||
password: str | None = None
|
||||
|
||||
|
||||
class Settings(BaseSettings):
|
||||
"""FuzzForge Runner settings.
|
||||
|
||||
Settings can be configured via environment variables with the prefix
|
||||
``FUZZFORGE_``. Nested settings use underscore as delimiter.
|
||||
|
||||
Example:
|
||||
``FUZZFORGE_ENGINE_TYPE=docker``
|
||||
``FUZZFORGE_STORAGE_PATH=/data/fuzzforge``
|
||||
``FUZZFORGE_MODULES_PATH=/path/to/modules``
|
||||
|
||||
"""
|
||||
|
||||
model_config = SettingsConfigDict(
|
||||
case_sensitive=False,
|
||||
env_nested_delimiter="__",
|
||||
env_prefix="FUZZFORGE_",
|
||||
)
|
||||
|
||||
#: Container engine settings.
|
||||
engine: EngineSettings = Field(default_factory=EngineSettings)
|
||||
|
||||
#: Storage settings.
|
||||
storage: StorageSettings = Field(default_factory=StorageSettings)
|
||||
|
||||
#: Project settings.
|
||||
project: ProjectSettings = Field(default_factory=ProjectSettings)
|
||||
|
||||
#: Container registry settings.
|
||||
registry: RegistrySettings = Field(default_factory=RegistrySettings)
|
||||
|
||||
#: Path to modules directory (for development/local builds).
|
||||
modules_path: Path = Field(default=Path.home() / ".fuzzforge" / "modules")
|
||||
|
||||
#: Enable debug logging.
|
||||
debug: bool = False
|
||||
@@ -0,0 +1,359 @@
|
||||
"""FuzzForge Runner - Local filesystem storage.
|
||||
|
||||
This module provides local filesystem storage as an alternative to S3,
|
||||
enabling zero-configuration operation for OSS deployments.
|
||||
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import shutil
|
||||
from pathlib import Path, PurePath
|
||||
from tarfile import open as Archive # noqa: N812
|
||||
from tempfile import NamedTemporaryFile, TemporaryDirectory
|
||||
from typing import TYPE_CHECKING, cast
|
||||
|
||||
from fuzzforge_runner.constants import RESULTS_ARCHIVE_FILENAME
|
||||
from fuzzforge_runner.exceptions import StorageError
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from structlog.stdlib import BoundLogger
|
||||
|
||||
|
||||
def get_logger() -> BoundLogger:
|
||||
"""Get structlog logger instance.
|
||||
|
||||
:returns: Configured structlog logger.
|
||||
|
||||
"""
|
||||
from structlog import get_logger # noqa: PLC0415
|
||||
|
||||
return cast("BoundLogger", get_logger())
|
||||
|
||||
|
||||
class LocalStorage:
|
||||
"""Local filesystem storage backend.
|
||||
|
||||
Provides S3-like operations using local filesystem, enabling
|
||||
FuzzForge operation without external storage infrastructure.
|
||||
|
||||
Directory structure:
|
||||
{base_path}/
|
||||
projects/
|
||||
{project_id}/
|
||||
assets/ # Initial project assets
|
||||
runs/
|
||||
{execution_id}/
|
||||
results.tar.gz
|
||||
{workflow_id}/
|
||||
modules/
|
||||
step-0-{exec_id}/
|
||||
results.tar.gz
|
||||
|
||||
"""
|
||||
|
||||
#: Base path for all storage operations.
|
||||
_base_path: Path
|
||||
|
||||
def __init__(self, base_path: Path) -> None:
|
||||
"""Initialize an instance of the class.
|
||||
|
||||
:param base_path: Root directory for storage.
|
||||
|
||||
"""
|
||||
self._base_path = base_path
|
||||
self._ensure_base_path()
|
||||
|
||||
def _ensure_base_path(self) -> None:
|
||||
"""Ensure the base storage directory exists."""
|
||||
self._base_path.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
def _get_project_path(self, project_path: Path) -> Path:
|
||||
"""Get the storage path for a project.
|
||||
|
||||
:param project_path: Original project path (used as identifier).
|
||||
:returns: Storage path for the project.
|
||||
|
||||
"""
|
||||
# Use project path name as identifier
|
||||
project_id = project_path.name
|
||||
return self._base_path / "projects" / project_id
|
||||
|
||||
def init_project(self, project_path: Path) -> Path:
|
||||
"""Initialize storage for a new project.
|
||||
|
||||
:param project_path: Path to the project directory.
|
||||
:returns: Path to the project storage directory.
|
||||
|
||||
"""
|
||||
logger = get_logger()
|
||||
storage_path = self._get_project_path(project_path)
|
||||
|
||||
# Create directory structure
|
||||
(storage_path / "assets").mkdir(parents=True, exist_ok=True)
|
||||
(storage_path / "runs").mkdir(parents=True, exist_ok=True)
|
||||
|
||||
logger.info("initialized project storage", project=project_path.name, storage=str(storage_path))
|
||||
|
||||
return storage_path
|
||||
|
||||
def get_project_assets_path(self, project_path: Path) -> Path | None:
|
||||
"""Get the path to project assets archive.
|
||||
|
||||
:param project_path: Path to the project directory.
|
||||
:returns: Path to assets archive, or None if not found.
|
||||
|
||||
"""
|
||||
storage_path = self._get_project_path(project_path)
|
||||
assets_dir = storage_path / "assets"
|
||||
|
||||
# Look for assets archive
|
||||
archive_path = assets_dir / "assets.tar.gz"
|
||||
if archive_path.exists():
|
||||
return archive_path
|
||||
|
||||
# Check if there are any files in assets directory
|
||||
if assets_dir.exists() and any(assets_dir.iterdir()):
|
||||
# Create archive from directory contents
|
||||
return self._create_archive_from_directory(assets_dir)
|
||||
|
||||
return None
|
||||
|
||||
def _create_archive_from_directory(self, directory: Path) -> Path:
|
||||
"""Create a tar.gz archive from a directory's contents.
|
||||
|
||||
:param directory: Directory to archive.
|
||||
:returns: Path to the created archive.
|
||||
|
||||
"""
|
||||
archive_path = directory.parent / f"{directory.name}.tar.gz"
|
||||
|
||||
with Archive(archive_path, "w:gz") as tar:
|
||||
for item in directory.iterdir():
|
||||
tar.add(item, arcname=item.name)
|
||||
|
||||
return archive_path
|
||||
|
||||
def create_empty_assets_archive(self, project_path: Path) -> Path:
|
||||
"""Create an empty assets archive for a project.
|
||||
|
||||
:param project_path: Path to the project directory.
|
||||
:returns: Path to the empty archive.
|
||||
|
||||
"""
|
||||
storage_path = self._get_project_path(project_path)
|
||||
assets_dir = storage_path / "assets"
|
||||
assets_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
archive_path = assets_dir / "assets.tar.gz"
|
||||
|
||||
# Create empty archive
|
||||
with Archive(archive_path, "w:gz") as tar:
|
||||
pass # Empty archive
|
||||
|
||||
return archive_path
|
||||
|
||||
def store_assets(self, project_path: Path, assets_path: Path) -> Path:
|
||||
"""Store project assets from a local path.
|
||||
|
||||
:param project_path: Path to the project directory.
|
||||
:param assets_path: Source path (file or directory) to store.
|
||||
:returns: Path to the stored assets.
|
||||
:raises StorageError: If storage operation fails.
|
||||
|
||||
"""
|
||||
logger = get_logger()
|
||||
storage_path = self._get_project_path(project_path)
|
||||
assets_dir = storage_path / "assets"
|
||||
assets_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
try:
|
||||
if assets_path.is_file():
|
||||
# Copy archive directly
|
||||
dest_path = assets_dir / "assets.tar.gz"
|
||||
shutil.copy2(assets_path, dest_path)
|
||||
else:
|
||||
# Create archive from directory
|
||||
dest_path = assets_dir / "assets.tar.gz"
|
||||
with Archive(dest_path, "w:gz") as tar:
|
||||
for item in assets_path.iterdir():
|
||||
tar.add(item, arcname=item.name)
|
||||
|
||||
logger.info("stored project assets", project=project_path.name, path=str(dest_path))
|
||||
return dest_path
|
||||
|
||||
except Exception as exc:
|
||||
message = f"Failed to store assets: {exc}"
|
||||
raise StorageError(message) from exc
|
||||
|
||||
def store_execution_results(
|
||||
self,
|
||||
project_path: Path,
|
||||
workflow_id: str | None,
|
||||
step_index: int,
|
||||
execution_id: str,
|
||||
results_path: Path,
|
||||
) -> Path:
|
||||
"""Store execution results.
|
||||
|
||||
:param project_path: Path to the project directory.
|
||||
:param workflow_id: Workflow execution ID (None for standalone).
|
||||
:param step_index: Step index in workflow.
|
||||
:param execution_id: Module execution ID.
|
||||
:param results_path: Path to results archive to store.
|
||||
:returns: Path to the stored results.
|
||||
:raises StorageError: If storage operation fails.
|
||||
|
||||
"""
|
||||
logger = get_logger()
|
||||
storage_path = self._get_project_path(project_path)
|
||||
|
||||
try:
|
||||
if workflow_id:
|
||||
# Part of workflow
|
||||
dest_dir = storage_path / "runs" / workflow_id / "modules" / f"step-{step_index}-{execution_id}"
|
||||
else:
|
||||
# Standalone execution
|
||||
dest_dir = storage_path / "runs" / execution_id
|
||||
|
||||
dest_dir.mkdir(parents=True, exist_ok=True)
|
||||
dest_path = dest_dir / RESULTS_ARCHIVE_FILENAME
|
||||
|
||||
shutil.copy2(results_path, dest_path)
|
||||
|
||||
logger.info(
|
||||
"stored execution results",
|
||||
execution_id=execution_id,
|
||||
path=str(dest_path),
|
||||
)
|
||||
|
||||
return dest_path
|
||||
|
||||
except Exception as exc:
|
||||
message = f"Failed to store results: {exc}"
|
||||
raise StorageError(message) from exc
|
||||
|
||||
def get_execution_results(
|
||||
self,
|
||||
project_path: Path,
|
||||
execution_id: str,
|
||||
workflow_id: str | None = None,
|
||||
step_index: int | None = None,
|
||||
) -> Path | None:
|
||||
"""Retrieve execution results.
|
||||
|
||||
:param project_path: Path to the project directory.
|
||||
:param execution_id: Module execution ID.
|
||||
:param workflow_id: Workflow execution ID (None for standalone).
|
||||
:param step_index: Step index in workflow.
|
||||
:returns: Path to results archive, or None if not found.
|
||||
|
||||
"""
|
||||
storage_path = self._get_project_path(project_path)
|
||||
|
||||
if workflow_id and step_index is not None:
|
||||
# Direct workflow path lookup
|
||||
results_path = (
|
||||
storage_path / "runs" / workflow_id / "modules" / f"step-{step_index}-{execution_id}" / RESULTS_ARCHIVE_FILENAME
|
||||
)
|
||||
if results_path.exists():
|
||||
return results_path
|
||||
|
||||
# Try standalone path
|
||||
results_path = storage_path / "runs" / execution_id / RESULTS_ARCHIVE_FILENAME
|
||||
if results_path.exists():
|
||||
return results_path
|
||||
|
||||
# Search for execution_id in all workflow runs
|
||||
runs_dir = storage_path / "runs"
|
||||
if runs_dir.exists():
|
||||
for workflow_dir in runs_dir.iterdir():
|
||||
if not workflow_dir.is_dir():
|
||||
continue
|
||||
|
||||
# Check if this is a workflow directory (has 'modules' subdirectory)
|
||||
modules_dir = workflow_dir / "modules"
|
||||
if modules_dir.exists() and modules_dir.is_dir():
|
||||
# Search for step directories containing this execution_id
|
||||
for step_dir in modules_dir.iterdir():
|
||||
if step_dir.is_dir() and execution_id in step_dir.name:
|
||||
results_path = step_dir / RESULTS_ARCHIVE_FILENAME
|
||||
if results_path.exists():
|
||||
return results_path
|
||||
|
||||
return None
|
||||
|
||||
def list_executions(self, project_path: Path) -> list[str]:
|
||||
"""List all execution IDs for a project.
|
||||
|
||||
:param project_path: Path to the project directory.
|
||||
:returns: List of execution IDs.
|
||||
|
||||
"""
|
||||
storage_path = self._get_project_path(project_path)
|
||||
runs_dir = storage_path / "runs"
|
||||
|
||||
if not runs_dir.exists():
|
||||
return []
|
||||
|
||||
return [d.name for d in runs_dir.iterdir() if d.is_dir()]
|
||||
|
||||
def delete_execution(self, project_path: Path, execution_id: str) -> bool:
|
||||
"""Delete an execution and its results.
|
||||
|
||||
:param project_path: Path to the project directory.
|
||||
:param execution_id: Execution ID to delete.
|
||||
:returns: True if deleted, False if not found.
|
||||
|
||||
"""
|
||||
logger = get_logger()
|
||||
storage_path = self._get_project_path(project_path)
|
||||
exec_path = storage_path / "runs" / execution_id
|
||||
|
||||
if exec_path.exists():
|
||||
shutil.rmtree(exec_path)
|
||||
logger.info("deleted execution", execution_id=execution_id)
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
def delete_project(self, project_path: Path) -> bool:
|
||||
"""Delete all storage for a project.
|
||||
|
||||
:param project_path: Path to the project directory.
|
||||
:returns: True if deleted, False if not found.
|
||||
|
||||
"""
|
||||
logger = get_logger()
|
||||
storage_path = self._get_project_path(project_path)
|
||||
|
||||
if storage_path.exists():
|
||||
shutil.rmtree(storage_path)
|
||||
logger.info("deleted project storage", project=project_path.name)
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
def extract_results(self, results_path: Path, destination: Path) -> Path:
|
||||
"""Extract a results archive to a destination directory.
|
||||
|
||||
:param results_path: Path to the results archive.
|
||||
:param destination: Directory to extract to.
|
||||
:returns: Path to extracted directory.
|
||||
:raises StorageError: If extraction fails.
|
||||
|
||||
"""
|
||||
logger = get_logger()
|
||||
|
||||
try:
|
||||
destination.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
with Archive(results_path, "r:gz") as tar:
|
||||
tar.extractall(path=destination)
|
||||
|
||||
logger.info("extracted results", source=str(results_path), destination=str(destination))
|
||||
return destination
|
||||
|
||||
except Exception as exc:
|
||||
message = f"Failed to extract results: {exc}"
|
||||
raise StorageError(message) from exc
|
||||
Reference in New Issue
Block a user