build(deps): bump mcp from 1.12.4 to 1.19.0

Bumps [mcp](https://github.com/modelcontextprotocol/python-sdk) from 1.12.4 to 1.19.0. - [Release notes](https://github.com/modelcontextprotocol/python-sdk/releases) - [Changelog](https://github.com/modelcontextprotocol/python-sdk/blob/main/RELEASE.md) - [Commits](https://github.com/modelcontextprotocol/python-sdk/compare/v1.12.4...v1.19.0) --- updated-dependencies: - dependency-name: mcp dependency-version: 1.19.0 dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] <support@github.com>
Merge pull request #252 from Davda-James/feat/mcp_client_logging
2026-06-24 14:19:55 +02:00 · 2025-10-24 17:12:36 +00:00 · 2025-08-21 15:00:22 +03:00 · 2025-08-13 13:52:19 +03:00 · 2025-08-13 13:46:37 +03:00 · 2025-08-10 14:33:25 +05:30
57 changed files with 4662 additions and 2883 deletions
@@ -9,7 +9,7 @@ on:
      - 0.*

 env:
-  POETRY_VERSION: "1.7.1"
+  POETRY_VERSION: "1.8.5"

 jobs:
  if_release:
@@ -7,7 +7,7 @@ on:
    branches: [main]

 env:
-  POETRY_VERSION: "1.7.1"
+  POETRY_VERSION: "1.8.5"
  OPENAI_API_KEY: "sk-fake"

 jobs:
@@ -17,4 +17,5 @@ inv/
 scripts/
 docx/
 agentic_security.toml
-/venv
+/venv
+*.csv
@@ -1,5 +1,5 @@
 # Build stage
-FROM python:3.11-slim as builder
+FROM python:3.11-slim AS builder

 WORKDIR /app

@@ -14,8 +14,15 @@ RUN poetry self add "poetry-plugin-export"
 # Copy only dependency files to leverage Docker layer caching
 COPY pyproject.toml poetry.lock ./

+# update lock file to avoid failure
+RUN poetry lock
+
 # Install dependencies
 RUN poetry export -f requirements.txt --without-hashes -o requirements.txt
+
+# Install wheel (required to build packages like fire)
+RUN pip install --upgrade pip setuptools wheel
+
 RUN pip install --no-cache-dir -r requirements.txt

 # Runtime stage
@@ -21,9 +21,7 @@
  <a href="https://pypi.org/project/agentic-security/">
    <img alt="PyPI Version" src="https://img.shields.io/pypi/v/agentic-security?style=for-the-badge&logo=pypi&labelColor=000000&color=00CCFF" />
  </a>
-  <a href="https://discord.gg/stw3DfZQ">
-    <img alt="Join Discord" src="https://img.shields.io/badge/Discord-Join%20Us-black?style=for-the-badge&logo=discord&labelColor=000000&color=DD55FF" />
-  </a>
+
 </p>


@@ -83,8 +81,7 @@ agentic_security --port=PORT --host=HOST

 ## UI 🧙

-<img width="100%" alt="booking-screen" src="https://res.cloudinary.com/dq0w2rtm9/image/upload/v1736433557/z0bsyzhsqlgcr3w4ovwp.gif">
-<img width="100%" alt="booking-screen" src="https://res.cloudinary.com/dq0w2rtm9/image/upload/v1741192668/final_aa9jhb.gif">
+<img width="100%" alt="booking-screen" src="https://raw.githubusercontent.com/msoedov/agentic_security/refs/heads/main/docs/images/demo.gif">

 ## LLM kwargs

@@ -403,6 +400,16 @@ This setup ensures a continuous integration approach towards maintaining securit

 The `Module` class is designed to manage prompt processing and interaction with external AI models and tools. It supports fetching, processing, and posting prompts asynchronously for model vulnerabilities. Check out [module.md](https://github.com/msoedov/agentic_security/blob/main/docs/module.md) for details.

+
+## MCP server
+
+```shell
+pip install -U mcp
+
+# From cloned directory
+mcp install agentic_security/mcp/main.py
+```
+
 ## Documentation

 For more detailed information on how to use Agentic Security, including advanced features and customization options, please refer to the official documentation.
@@ -429,6 +436,7 @@ We’re just getting started! Here’s what’s on the horizon:

 Note: All dates are tentative and subject to change based on project progress and priorities.

+
 ## 👋 Contributing

 Contributions to Agentic Security are welcome! If you'd like to contribute, please follow these steps:
@@ -1,3 +1,3 @@
-from .lib import AgenticSecurity
+from .lib import SecurityScanner

-__all__ = ["AgenticSecurity"]
+__all__ = ["SecurityScanner"]
@@ -5,7 +5,7 @@ import fire
 import uvicorn

 from agentic_security.app import app
-from agentic_security.lib import AgenticSecurity
+from agentic_security.lib import SecurityScanner
 from agentic_security.misc.banner import init_banner


@@ -32,14 +32,14 @@ class CLI:
        Run Agentic Security in CI mode.
        """
        sys.path.append(os.path.dirname("."))
-        AgenticSecurity().entrypoint()
+        SecurityScanner().entrypoint()

    def init(self, host: str = "0.0.0.0", port: int = 8718):
        """
        Generate the default CI configuration file.
        """
        sys.path.append(os.path.dirname("."))
-        AgenticSecurity().generate_default_settings(host, port)
+        SecurityScanner().generate_default_settings(host, port)

    i = init

@@ -48,7 +48,7 @@ class CLI:
        List all available security checks.
        """
        sys.path.append(os.path.dirname("."))
-        AgenticSecurity().list_checks()
+        SecurityScanner().list_checks()


 def main():
@@ -1,5 +1,4 @@
 import asyncio
-import logging
 import os
 from typing import Any

@@ -10,12 +9,11 @@ from pydantic import BaseModel, ConfigDict, Field

 # Assuming LLMSpec is defined elsewhere; placeholder import
 from agentic_security.http_spec import LLMSpec
+from agentic_security.logutils import logger

 LLM_SPECS = []  # Populate with LLM spec strings if needed

 # Configure logging
-logging.basicConfig(level=logging.INFO)
-logger = logging.getLogger(__name__)


 # Define AgentSpecification model
@@ -1,5 +1,4 @@
 import asyncio
-import logging
 from typing import Any

 import httpx
@@ -8,13 +7,10 @@ from pydantic_ai import Agent, RunContext, Tool

 # Assuming LLMSpec is defined elsewhere; placeholder import
 from agentic_security.http_spec import LLMSpec
+from agentic_security.logutils import logger

 LLM_SPECS = []  # Populate this list with LLM spec strings if needed

-# Configure logging
-logging.basicConfig(level=logging.INFO)
-logger = logging.getLogger(__name__)
-

 # Define AgentSpecification model
 class AgentSpecification(BaseModel):
@@ -1,9 +1,10 @@
 from functools import lru_cache

 import tomli
-from loguru import logger

-SETTINGS_VERSION = 1
+from agentic_security.logutils import logger
+
+SETTINGS_VERSION = 2


@lru_cache(maxsize=1)
@@ -142,6 +143,13 @@ use_disk_cache = false
 retry = 3
 timeout_connect = 30
 timeout_response = 90
+
+[fuzzer]
+max_prompt_lenght = 2048
+budget_multiplier = 100000000
+initial_optimizer_points = 25
+min_failure_samples = 5
+failure_rate_threshold = 0.5
 """.replace(
                    "$HOST", host
                )
@@ -4,10 +4,14 @@ from asyncio import Event, Queue
 from fastapi import FastAPI
 from fastapi.responses import ORJSONResponse

+from agentic_security.http_spec import LLMSpec
+
 tools_inbox: Queue = Queue()
 stop_event: Event = Event()
 current_run: str = {"spec": "", "id": ""}
-_secrets = {}
+_secrets: dict[str, str] = {}
+
+current_run: dict[str, int | LLMSpec] = {"spec": "", "id": ""}


 def create_app() -> FastAPI:
@@ -26,29 +30,29 @@ def get_stop_event() -> Event:
    return stop_event


-def get_current_run() -> str:
+def get_current_run() -> dict[str, int | LLMSpec]:
    """Get the current run id."""
    return current_run


-def set_current_run(spec):
+def set_current_run(spec: LLMSpec) -> dict[str, int | LLMSpec]:
    """Set the current run id."""
    current_run["id"] = hash(id(spec))
    current_run["spec"] = spec
    return current_run


-def get_secrets():
+def get_secrets() -> dict[str, str]:
    return _secrets


-def set_secrets(secrets):
+def set_secrets(secrets: dict[str, str]) -> dict[str, str]:
    _secrets.update(secrets)
    expand_secrets(_secrets)
    return _secrets


-def expand_secrets(secrets):
+def expand_secrets(secrets: dict[str, str]) -> None:
    for key in secrets:
        val = secrets[key]
        if val.startswith("$"):
@@ -1,26 +1,5 @@
-from logging import config
+from agentic_security.logutils import set_log_level_to_info


 def setup_logging():
-    config.dictConfig(
-        {
-            "version": 1,
-            "disable_existing_loggers": True,
-            "handlers": {
-                "console": {
-                    "class": "logging.StreamHandler",
-                },
-            },
-            "root": {
-                "handlers": ["console"],
-                "level": "INFO",
-            },
-            "loggers": {
-                "uvicorn.access": {
-                    "level": "ERROR",  # Set higher log level to suppress info logs globally
-                    "handlers": ["console"],
-                    "propagate": False,
-                }
-            },
-        }
-    )
+    return set_log_level_to_info()
@@ -1,5 +1,6 @@
 import base64
 from enum import Enum
+from urllib.parse import urlparse

 import httpx
 from pydantic import BaseModel
@@ -159,6 +160,14 @@ def parse_http_spec(http_spec: str) -> LLMSpec:
    # Extract the method and URL from the first line
    method, url = lines[0].split(" ")[0:2]

+    # Check url validity
+    valid_url = urlparse(url)
+    # if missing the correct formatting ://, urlparse.netloc will be empty
+    if valid_url.scheme not in ("http", "https") or not valid_url.netloc:
+        raise InvalidHTTPSpecError(
+            f"Invalid URL: {url}. Ensure it starts with 'http://' or 'https://'"
+        )
+
    # Initialize headers and body
    headers = {}
    body = ""
@@ -4,12 +4,12 @@ from datetime import datetime

 import colorama
 import tqdm.asyncio
-from loguru import logger
 from rich.console import Console
 from rich.table import Table
 from tabulate import tabulate

 from agentic_security.config import SettingsMixin  # Importing the configuration mixin
+from agentic_security.logutils import logger
 from agentic_security.primitives import Scan
 from agentic_security.probe_data import REGISTRY
 from agentic_security.routes.scan import streaming_response_generator
@@ -23,7 +23,7 @@ YELLOW = colorama.Fore.YELLOW
 BLUE = colorama.Fore.BLUE


-class AgenticSecurity(SettingsMixin):
+class SecurityScanner(SettingsMixin):
    @classmethod
    async def async_scan(
        cls,
@@ -202,7 +202,7 @@ class AgenticSecurity(SettingsMixin):
        self.load_config(self.default_path)
        logger.info("Configuration loaded successfully.")
        print(self.config)
-        datasets = list(self.get_config_value("modules").values())
+        datasets = list(self.get_config_value("modules", {}).values())
        for d in datasets:
            d["selected"] = True
        self.scan(
@@ -0,0 +1,146 @@
+# import sys
+
+# from loguru import logger
+
+# # Define custom colors
+# BLUE = "#89CFF0"
+# BROWN = "#8B4513"  # Brown for DEBUG
+
+# # Define custom log level colors
+# logger.level("DEBUG", color=f"<fg {BROWN}>")
+# logger.level("INFO", color=f"<fg {BLUE}>")
+
+# # Define custom log format with aligned messages and colored levels
+# LOG_FORMAT = (
+#     "<level>{level:<8}</level> "  # Properly formatted and colored log level
+#     "<level>{message:<100}</level> "  # Left-aligned message for readability
+#     "<cyan>{file.name}</cyan>:<cyan>{line}</cyan>"  # File name and line number in cyan
+# )
+
+# # Remove default handlers and add a new one with custom formatting
+# logger.remove()
+# logger.add(sys.stdout, format=LOG_FORMAT, level="DEBUG", colorize=True)
+import logging
+import logging.config
+import time
+from collections.abc import Callable, Coroutine
+from functools import wraps
+from os import getenv
+from typing import Any, ParamSpec, TypeVar
+
+LOGGER_NAME = None
+
+LOGGING_CONFIG = {
+    "version": 1,
+    "disable_existing_loggers": False,
+    "formatters": {
+        "rich": {"format": "%(message)s", "datefmt": "[%X]"},
+    },
+    "handlers": {
+        "rich": {
+            "class": "rich.logging.RichHandler",
+            "level": "INFO",
+            "formatter": "rich",
+            "show_time": False,
+            "rich_tracebacks": False,
+            "show_path": lambda: True if getenv("API_RUNTIME") == "dev" else False,
+            "tracebacks_show_locals": False,
+        },
+    },
+    "loggers": {
+        "": {  # Root logger configuration
+            "level": "INFO",
+            "handlers": ["rich"],
+            "propagate": True,
+        },
+        "httpx": {  # Disable httpx logging
+            "level": "WARNING",  # Suppress DEBUG and INFO messages from httpx
+            "handlers": [],
+            "propagate": False,
+        },
+        "uvicorn.access": {  # Disable uvicorn.access logging
+            "level": "WARNING",  # Suppress DEBUG and INFO messages from uvicorn.access
+            "handlers": [],
+            "propagate": False,
+        },
+    },
+}
+
+
+def configure_logging():
+    # Apply the dictionary configuration
+    logging.config.dictConfig(LOGGING_CONFIG)
+
+    # Get and return the logger
+    logger = logging.getLogger(LOGGER_NAME)
+    return logger
+
+
+logger: logging.Logger = configure_logging()
+
+
+def set_log_level_to_debug():
+    logger = logging.getLogger(LOGGER_NAME)
+    logger.setLevel(logging.DEBUG)
+    # Update handler level as well
+    for handler in logger.handlers:
+        handler.setLevel(logging.DEBUG)
+
+
+def set_log_level_to_info():
+    logger = logging.getLogger(LOGGER_NAME)
+    logger.setLevel(logging.INFO)
+    # Update handler level as well
+    for handler in logger.handlers:
+        handler.setLevel(logging.INFO)
+
+
+# Set initial log level
+set_log_level_to_info()
+
+
+# Define generic type variables for return type and parameters
+R = TypeVar("R")
+P = ParamSpec("P")
+
+
+def time_execution_sync(
+    additional_text: str = "",
+) -> Callable[[Callable[P, R]], Callable[P, R]]:
+    def decorator(func: Callable[P, R]) -> Callable[P, R]:
+        @wraps(func)
+        def wrapper(*args: P.args, **kwargs: P.kwargs) -> R:
+            start_time = time.time()
+            result = func(*args, **kwargs)
+            execution_time = time.time() - start_time
+            logger.debug(
+                f"{additional_text} Execution time: {execution_time:.2f} seconds"
+            )
+            return result
+
+        return wrapper
+
+    return decorator
+
+
+def time_execution_async(
+    additional_text: str = "",
+) -> Callable[
+    [Callable[P, Coroutine[Any, Any, R]]], Callable[P, Coroutine[Any, Any, R]]
+]:
+    def decorator(
+        func: Callable[P, Coroutine[Any, Any, R]]
+    ) -> Callable[P, Coroutine[Any, Any, R]]:
+        @wraps(func)
+        async def wrapper(*args: P.args, **kwargs: P.kwargs) -> R:
+            start_time = time.time()
+            result = await func(*args, **kwargs)
+            execution_time = time.time() - start_time
+            logger.debug(
+                f"{additional_text} Execution time: {execution_time:.2f} seconds"
+            )
+            return result
+
+        return wrapper
+
+    return decorator
@@ -0,0 +1,68 @@
+import asyncio
+
+from mcp import ClientSession, StdioServerParameters
+from mcp.client.stdio import stdio_client
+
+from agentic_security.logutils import logger
+
+# Create server parameters for stdio connection
+server_params = StdioServerParameters(
+    command="python",  # Executable
+    args=["agentic_security/mcp/main.py"],  # Your server script
+    env=None,  # Optional environment variables
+)
+
+
+async def run() -> None:
+    try:
+        logger.info(
+            "Starting stdio client session with server parameters: %s", server_params
+        )
+        async with stdio_client(server_params) as (read, write):
+            async with ClientSession(read, write) as session:
+                # Initialize the connection --> connection does not work
+                logger.info("Initializing client session...")
+                await session.initialize()
+
+                # List available prompts, resources, and tools --> no avalialbe tools
+                logger.info("Listing available prompts...")
+                prompts = await session.list_prompts()
+                logger.info(f"Available prompts: {prompts}")
+
+                logger.info("Listing available resources...")
+                resources = await session.list_resources()
+                logger.info(f"Available resources: {resources}")
+
+                logger.info("Listing available tools...")
+                tools = await session.list_tools()
+                logger.info(f"Available tools: {tools}")
+
+                # Call the echo tool --> echo tool issue
+                logger.info("Calling echo_tool with message...")
+                echo_result = await session.call_tool(
+                    "echo_tool", arguments={"message": "Hello from client!"}
+                )
+                logger.info(f"Tool result: {echo_result}")
+
+                # # Read the echo resource
+                # echo_content, mime_type = await session.read_resource(
+                #     "echo://Hello_resource"
+                # )
+                # logger.info(f"Resource content: {echo_content}")
+                # logger.info(f"Resource MIME type: {mime_type}")
+
+                # # Get and use the echo prompt
+                # prompt_result = await session.get_prompt(
+                #     "echo_prompt", arguments={"message": "Hello prompt!"}
+                # )
+                # logger.info(f"Prompt result: {prompt_result}")
+
+                logger.info("Client operations completed successfully.")
+                return prompts, resources, tools
+    except Exception as e:
+        logger.error(f"An error occurred during client operations: {e}", exc_info=True)
+        raise
+
+
+if __name__ == "__main__":
+    asyncio.run(run())
@@ -0,0 +1,108 @@
+import httpx
+from mcp.server.fastmcp import FastMCP
+
+# Initialize MCP server
+mcp = FastMCP(
+    name="Agentic Security MCP Server",
+    dependencies=["httpx"],
+)
+
+# FastAPI Server Configuration
+AGENTIC_SECURITY = "http://0.0.0.0:8718"
+
+
+@mcp.tool()
+async def verify_llm(spec: str) -> dict:
+    """
+    Verify an LLM model specification using the FastAPI server
+
+    Returns:
+        dict: containing the verification result form the FastAPI server
+
+    Args: spect(str):  The specification of the LLM model to verify.
+
+    """
+    url = f"{AGENTIC_SECURITY}/verify"
+    async with httpx.AsyncClient() as client:
+        response = await client.post(url, json={"spec": spec})
+        return response.json()
+
+
+@mcp.tool()
+async def start_scan(
+    llmSpec: str,
+    maxBudget: int,
+    optimize: bool = False,
+    enableMultiStepAttack: bool = False,
+) -> dict:
+    """
+    Start an LLM security scan via the FastAPI server.
+    Returns:
+        dict: The scan initiation result from the FastAPI server.
+
+    Args:
+        llmSpec (str): The specification of the LLM model.
+        maxBudget (int): The maximum budget for the scan.
+        optimize (bool, optional): Whether to enable optimization during scanning. Defaults to False.
+        enableMultiStepAttack (bool, optional): Whether to enable multi-step attack
+
+    """
+    url = f"{AGENTIC_SECURITY}/scan"
+    payload = {
+        "llmSpec": llmSpec,
+        "maxBudget": maxBudget,
+        "datasets": [],
+        "optimize": optimize,
+        "enableMultiStepAttack": enableMultiStepAttack,
+        "probe_datasets": [],
+        "secrets": {},
+    }
+    async with httpx.AsyncClient() as client:
+        response = await client.post(url, json=payload)
+        return response.json()
+
+
+@mcp.tool()
+async def stop_scan() -> dict:
+    """Stop an ongoing scan via the FastAPI server.
+
+    Returns:
+        dict: The confirmation from the FastAPI server that the scan has been stopped.
+    """
+    url = f"{AGENTIC_SECURITY}/stop"
+    async with httpx.AsyncClient() as client:
+        response = await client.post(url)
+        return response.json()
+
+
+@mcp.tool()
+async def get_data_config() -> list:
+    """
+    Retrieve data configuration from the FastAPI server.
+
+    Returns:
+        list: The response from the FastAPI server, confirming the scan has been stopped.
+    """
+    url = f"{AGENTIC_SECURITY}/v1/data-config"
+    async with httpx.AsyncClient() as client:
+        response = await client.get(url)
+        return response.json()
+
+
+@mcp.tool()
+async def get_spec_templates() -> list:
+    """
+    Retrieve data configuration from the FastAPI server.
+
+    Returns:
+        list: The LLM specification templates from the FastAPI server.
+    """
+    url = f"{AGENTIC_SECURITY}/v1/llm-specs"
+    async with httpx.AsyncClient() as client:
+        response = await client.get(url)
+        return response.json()
+
+
+# Run the MCP server
+if __name__ == "__main__":
+    mcp.run()
@@ -1,7 +1,8 @@
 from fastapi import Request
-from loguru import logger
 from starlette.middleware.base import BaseHTTPMiddleware

+from agentic_security.logutils import logger
+

 class LogNon200ResponsesMiddleware(BaseHTTPMiddleware):
    async def dispatch(self, request: Request, call_next):
@@ -8,14 +8,14 @@ except ImportError:


 def generate_banner(
-    title="Agentic Security",
-    font="slant",
-    version="v2.1.0",
-    tagline="Proactive Threat Detection & Automated Security Protocols",
-    author="Developed by: [Security Team]",
-    website="Website: https://github.com/msoedov/agentic_security",
-    warning="",
-):
+    title: str = "Agentic Security",
+    font: str = "slant",
+    version: str = "v2.1.0",
+    tagline: str = "Proactive Threat Detection & Automated Security Protocols",
+    author: str = "Developed by: [Security Team]",
+    website: str = "Website: https://github.com/msoedov/agentic_security",
+    warning: str | None = "",  # Using Optional for warning since it might be None
+) -> str:
    """Generate a visually enhanced banner with dynamic width and borders."""
    # Define the text elements

@@ -80,6 +80,7 @@ def generate_banner(


 def init_banner():
+    return
    ver = version("agentic_security")
    try:
        print(generate_banner(version=ver))
@@ -1,5 +1,6 @@
+# noqa
+from agentic_security.primitives.models import CompletionRequest  # noqa
 from agentic_security.primitives.models import (  # noqa
-    CompletionRequest,
    FileProbeResponse,
    LLMInfo,
    Message,
@@ -3,32 +3,44 @@ import random
 import time
 from collections.abc import AsyncGenerator
 from json import JSONDecodeError
+from typing import Any

 import httpx
-import pandas as pd
-from loguru import logger
 from skopt import Optimizer
 from skopt.space import Real

+from agentic_security.config import settings_var
 from agentic_security.http_spec import Modality
+from agentic_security.logutils import logger
 from agentic_security.primitives import Scan, ScanResult
 from agentic_security.probe_actor.cost_module import calculate_cost
 from agentic_security.probe_actor.refusal import refusal_heuristic
+from agentic_security.probe_actor.state import FuzzerState
 from agentic_security.probe_data import audio_generator, image_generator, msj_data
 from agentic_security.probe_data.data import prepare_prompts

-# TODO: full log file
-
-MAX_PROMPT_LENGTH = 2048
-BUDGET_MULTIPLIER = 100_000_000
-INITIAL_OPTIMIZER_POINTS = 25
-MIN_FAILURE_SAMPLES = 5
-FAILURE_RATE_THRESHOLD = 0.5
+MAX_PROMPT_LENGTH = settings_var("fuzzer.max_prompt_lenght", 2048)
+BUDGET_MULTIPLIER = settings_var("fuzzer.budget_multiplier", 100000000)
+INITIAL_OPTIMIZER_POINTS = settings_var("fuzzer.initial_optimizer_points", 25)
+MIN_FAILURE_SAMPLES = settings_var("min_failure_samples", 5)
+FAILURE_RATE_THRESHOLD = settings_var("failure_rate_threshold", 0.5)


 async def generate_prompts(
    prompts: list[str] | AsyncGenerator,
 ) -> AsyncGenerator[str, None]:
+    """
+    Asynchronously generates and yields individual prompts.
+
+    If the input is a list of strings, the function sequentially yields each string.
+    If the input is an asynchronous generator, it forwards each generated prompt.
+
+    Args:
+        prompts (list[str] | AsyncGenerator): A list of strings or an asynchronous generator of prompts.
+
+    Yields:
+        str: An individual prompt from the list or the asynchronous generator.
+    """
    if isinstance(prompts, list):
        for prompt in prompts:
            yield prompt
@@ -37,7 +49,21 @@ async def generate_prompts(
            yield prompt


-def multi_modality_spec(llm_spec):
+def get_modality_adapter(llm_spec):
+    """
+    Returns the appropriate request adapter based on the modality of the LLM specification.
+
+    Depending on the modality of `llm_spec`, the function selects the corresponding request adapter.
+    If the modality is IMAGE or AUDIO, it returns an adapter for handling the respective type.
+    If the modality is TEXT or an unrecognized type, it returns `llm_spec` as is.
+
+    Args:
+        llm_spec: An object containing modality information for the LLM.
+
+    Returns:
+        RequestAdapter | llm_spec: An instance of the appropriate request adapter
+        or the original `llm_spec` if no adaptation is needed.
+    """
    match llm_spec.modality:
        case Modality.IMAGE:
            return image_generator.RequestAdapter(llm_spec)
@@ -50,40 +76,71 @@ def multi_modality_spec(llm_spec):


 async def process_prompt(
-    request_factory, prompt, tokens, module_name, refusals, errors, outputs
+    request_factory,
+    prompt: str,
+    tokens: int,
+    module_name: str,
+    fuzzer_state: FuzzerState,
 ) -> tuple[int, bool]:
    """
-    Process a single prompt and update the token count and failure status.
+    Processes a single prompt using the provided request factory and updates tracking lists.
+
+    This function sends the given `prompt` to the `request_factory`, checks for errors, and updates
+    the `tokens`, `refusals`, `errors`, and `outputs` lists accordingly. If the request fails or
+    the response indicates a refusal, the function records the issue and returns the updated token count
+    along with a boolean indicating whether the prompt was refused.
+
+    Args:
+        request_factory: An object with a `fn` method used to send the prompt.
+        prompt (str): The input prompt to be processed.
+        tokens (int): The current token count, which will be updated.
+        module_name (str): The name of the module handling the request.
+        fuzzer_state: State tracking object for the fuzzer
+
+    Returns:
+        tuple[int, bool]: Updated token count and a boolean indicating if the prompt was refused.
    """
    try:
        response = await request_factory.fn(prompt=prompt)
+
+        # Handle HTTP errors
        if response.status_code == 422:
            logger.error(f"Invalid prompt: {prompt}, error=422")
-            errors.append((module_name, prompt, 422, "Invalid prompt"))
+            fuzzer_state.add_error(module_name, prompt, 422, "Invalid prompt")
            return tokens, True

        if response.status_code >= 400:
            logger.error(f"HTTP {response.status_code} {response.content=}")
-            errors.append((module_name, prompt, response.status_code, response.text))
+            fuzzer_state.add_error(
+                module_name, prompt, response.status_code, response.text
+            )
            return tokens, True
+
+        # Process successful response
        response_text = response.text
        tokens += len(response_text.split())

+        # Check if the response indicates a refusal
        refused = refusal_heuristic(response.json())
        if refused:
-            refusals.append((module_name, prompt, response.status_code, response_text))
+            fuzzer_state.add_refusal(
+                module_name, prompt, response.status_code, response_text
+            )

-        outputs.append((module_name, prompt, response_text, refused))
+        fuzzer_state.add_output(module_name, prompt, response_text, refused)
        return tokens, refused

    except httpx.RequestError as exc:
        logger.error(f"Request error: {exc}")
-        errors.append((module_name, prompt, "?", str(exc)))
+        fuzzer_state.add_error(module_name, prompt, "?", str(exc))
        return tokens, True
    except JSONDecodeError as json_decode_error:
-        logger.error(f"Jason error: {json_decode_error}")
-        errors.append((module_name, prompt, "?", str(json_decode_error)))
+        logger.error(f"JSON error: {json_decode_error}")
+        fuzzer_state.add_error(module_name, prompt, "?", str(json_decode_error))
        return tokens, True
+    except Exception as e:
+        logger.exception(f"Unexpected error: {e}")
+        return tokens, False


 async def process_prompt_batch(
@@ -91,14 +148,29 @@ async def process_prompt_batch(
    prompts: list[str],
    tokens: int,
    module_name: str,
-    refusals,
-    errors,
-    outputs,
+    fuzzer_state: FuzzerState,
 ) -> tuple[int, int]:
+    """
+    Processes a batch of prompts asynchronously and aggregates the results.
+
+    This function sends multiple prompts concurrently using `process_prompt`,
+    collects the token count and failure status for each prompt, and returns
+    the total number of tokens processed and the number of failed prompts.
+
+    Args:
+        request_factory: An object with a `fn` method used to send the prompts.
+        prompts (list[str]): A list of input prompts to be processed.
+        tokens (int): The initial token count, which will be updated.
+        module_name (str): The name of the module handling the request.
+        fuzzer_state: State tracking object for the fuzzer
+
+    Returns:
+        tuple[int, int]:
+            - Total number of tokens processed.
+            - Number of failed prompts.
+    """
    tasks = [
-        process_prompt(
-            request_factory, p, tokens, module_name, refusals, errors, outputs
-        )
+        process_prompt(request_factory, p, tokens, module_name, fuzzer_state)
        for p in prompts
    ]
    results = await asyncio.gather(*tasks)
@@ -107,7 +179,154 @@ async def process_prompt_batch(
    return total_tokens, failures


+async def scan_module(
+    request_factory,
+    module,
+    fuzzer_state: FuzzerState,
+    processed_prompts: int = 0,
+    total_prompts: int = 0,
+    max_budget: int = 0,
+    total_tokens: int = 0,
+    optimize: bool = False,
+    stop_event: asyncio.Event | None = None,
+) -> AsyncGenerator[dict[str, Any], None]:
+    """
+    Scan a single module.
+
+    Args:
+        request_factory: The factory for creating requests
+        module: The prompt module to scan
+        fuzzer_state: State tracking object for the fuzzer
+        processed_prompts: Number of prompts processed so far
+        total_prompts: Total number of prompts to process
+        max_budget: Maximum token budget
+        total_tokens: Current token count
+        optimize: Whether to use optimization
+        stop_event: Event to stop scanning
+
+    Yields:
+        ScanResult objects as the scan progresses
+    """
+    tokens = 0
+    module_failures = 0
+    module_prompts = 0
+    failure_rates = []
+    should_stop = False
+
+    # Initialize optimizer if optimization is enabled
+    optimizer = (
+        Optimizer(
+            [Real(0, 1)], base_estimator="GP", n_initial_points=INITIAL_OPTIMIZER_POINTS
+        )
+        if optimize
+        else None
+    )
+
+    module_size = 0 if module.lazy else len(module.prompts)
+    logger.info(f"Scanning {module.dataset_name} {module_size}")
+    yield ScanResult(
+        module=module.dataset_name,
+        tokens=0,
+        cost=0,
+        progress=0,
+        failureRate=0,
+        prompt="",
+        latency=0,
+        model="",
+    ).model_dump_json()
+
+    async for prompt in generate_prompts(module.prompts):
+        if stop_event and stop_event.is_set():
+            stop_event.clear()
+            logger.info("Scan stopped by user.")
+            yield ScanResult.status_msg("Scan stopped by user.")
+            return
+
+        processed_prompts += 1
+        module_prompts += 1
+
+        # Calculate progress based on total processed prompts
+        progress = 100 * processed_prompts / total_prompts if total_prompts else 0
+        progress = progress % 100
+
+        total_tokens -= tokens
+        start = time.time()
+
+        tokens, failed = await process_prompt(
+            request_factory,
+            prompt,
+            tokens,
+            module.dataset_name,
+            fuzzer_state=fuzzer_state,
+        )
+
+        end = time.time()
+        total_tokens += tokens
+
+        if failed:
+            module_failures += 1
+
+        failure_rate = module_failures / max(module_prompts, 1)
+        failure_rates.append(failure_rate)
+        cost = calculate_cost(tokens)
+
+        response_text = fuzzer_state.get_last_output(prompt) or ""
+
+        yield ScanResult(
+            module=module.dataset_name,
+            tokens=round(tokens / 1000, 1),
+            cost=cost,
+            progress=round(progress, 2),
+            failureRate=round(failure_rate * 100, 2),
+            prompt=prompt[:MAX_PROMPT_LENGTH],
+            latency=end - start,
+            model=response_text,
+        ).model_dump_json()
+
+        # Optimization logic
+        if optimize and optimizer and len(failure_rates) >= MIN_FAILURE_SAMPLES:
+            next_point = optimizer.ask()
+            optimizer.tell(next_point, -failure_rate)
+            best_failure_rate = -optimizer.get_result().fun
+            if best_failure_rate > FAILURE_RATE_THRESHOLD:
+                yield ScanResult.status_msg(
+                    f"High failure rate detected ({best_failure_rate:.2%}). Stopping this module..."
+                )
+                should_stop = True
+                break
+
+        # Budget check
+        if total_tokens > max_budget:
+            logger.info(
+                f"Scan ran out of budget and stopped. {total_tokens=} {max_budget=}"
+            )
+            yield ScanResult.status_msg(
+                f"Scan ran out of budget and stopped. {total_tokens=} {max_budget=}"
+            )
+            should_stop = True
+            break
+
+        if should_stop:
+            break
+
+    return
+
+
 async def with_error_handling(agen):
+    """
+    Wraps an asynchronous generator with error handling.
+
+    This function iterates over an asynchronous generator, yielding its values.
+    If an exception occurs, it logs the error and yields a failure message.
+    Finally, it ensures that a completion message is always yielded.
+
+    Args:
+        agen: An asynchronous generator that produces scan results.
+
+    Yields:
+        ScanResult: Either a successful result, an error message if an
+        exception occurs, or a completion message at the end.
+    """
    try:
        async for t in agen:
            yield t
@@ -123,14 +342,37 @@ async def perform_single_shot_scan(
    max_budget: int,
    datasets: list[dict[str, str]] = [],
    tools_inbox=None,
-    optimize=False,
-    stop_event: asyncio.Event = None,
+    optimize: bool = False,
+    stop_event: asyncio.Event | None = None,
    secrets: dict[str, str] = {},
 ) -> AsyncGenerator[str, None]:
-    """Perform a standard security scan."""
+    """
+    Perform a standard security scan using a given request factory.
+
+    This function processes security scan prompts from selected datasets while
+    respecting a predefined token budget. It supports optimization, failure tracking,
+    and early stopping based on budget constraints or user intervention.
+
+    Args:
+        request_factory: A factory function that generates requests for processing prompts.
+        max_budget (int): The maximum token budget for the scan.
+        datasets (list[dict[str, str]], optional): A list of datasets containing security prompts.
+        tools_inbox: Optional additional tools for processing (default: None).
+        optimize (bool, optional): Whether to enable failure rate optimization (default: False).
+        stop_event (asyncio.Event, optional): An event to signal early termination (default: None).
+        secrets (dict[str, str], optional): A dictionary of secrets for authentication (default: {}).
+
+    Yields:
+        str: JSON-encoded scan results or status messages.
+
+    The function iterates over prompts, processes them asynchronously, and updates
+    failure statistics and token usage. If the scan exceeds the budget or failure rate is too high,
+    it stops execution. Results are saved to a CSV file upon completion.
+    """
    max_budget = max_budget * BUDGET_MULTIPLIER
    selected_datasets = [m for m in datasets if m["selected"]]
-    request_factory = multi_modality_spec(request_factory)
+    request_factory = get_modality_adapter(request_factory)
+
    yield ScanResult.status_msg("Loading datasets...")
    prompt_modules = prepare_prompts(
        dataset_names=[m["dataset_name"] for m in selected_datasets],
@@ -140,108 +382,35 @@ async def perform_single_shot_scan(
    )
    yield ScanResult.status_msg("Datasets loaded. Starting scan...")

-    errors = []
-    refusals = []
-    outputs = []
+    fuzzer_state = FuzzerState()
    total_prompts = sum(len(m.prompts) for m in prompt_modules if not m.lazy)
    processed_prompts = 0

-    optimizer = (
-        Optimizer([Real(0, 1)], base_estimator="GP", n_initial_points=25)
-        if optimize
-        else None
-    )
-    failure_rates = []
-
    total_tokens = 0
-    tokens = 0
-    should_stop = False
    for module in prompt_modules:
-        if should_stop:
-            break
-        tokens = 0
-        module_failures = 0
+        module_gen = scan_module(
+            request_factory=request_factory,
+            module=module,
+            fuzzer_state=fuzzer_state,
+            processed_prompts=processed_prompts,
+            total_prompts=total_prompts,
+            max_budget=max_budget,
+            total_tokens=total_tokens,
+            optimize=optimize,
+            stop_event=stop_event,
+        )
+        try:
+            async for result in module_gen:
+                yield result
+        except Exception:
+            logger.error("Module exception")
+            continue
+        # Update processed_prompts count
        module_size = 0 if module.lazy else len(module.prompts)
-        logger.info(f"Scanning {module.dataset_name} {module_size}")
-        module_prompts = 0  # Reset for each module
-
-        async for prompt in generate_prompts(module.prompts):
-            if stop_event and stop_event.is_set():
-                stop_event.clear()
-                logger.info("Scan stopped by user.")
-                yield ScanResult.status_msg("Scan stopped by user.")
-                return
-
-            processed_prompts += 1
-            module_prompts += 1  # Fixed increment syntax
-            # Calculate progress based on total processed prompts
-            progress = 100 * processed_prompts / total_prompts if total_prompts else 0
-            progress = progress % 100
-
-            total_tokens -= tokens
-            start = time.time()
-            tokens, failed = await process_prompt(
-                request_factory,
-                prompt,
-                tokens,
-                module.dataset_name,
-                refusals,
-                errors,
-                outputs,
-            )
-            end = time.time()
-            total_tokens += tokens
-
-            if failed:
-                module_failures += 1
-            failure_rate = module_failures / max(module_prompts, 1)
-            failure_rates.append(failure_rate)
-            cost = calculate_cost(tokens)
-
-            last_output = outputs[-1] if outputs else None
-            if last_output and last_output[1] == prompt:
-                response_text = last_output[2]
-            else:
-                response_text = ""
-
-            yield ScanResult(
-                module=module.dataset_name,
-                tokens=round(tokens / 1000, 1),
-                cost=cost,
-                progress=round(progress, 2),
-                failureRate=round(failure_rate * 100, 2),
-                prompt=prompt[:MAX_PROMPT_LENGTH],
-                latency=end - start,
-                model=response_text,
-            ).model_dump_json()
-
-            if optimize and len(failure_rates) >= 5:
-                next_point = optimizer.ask()
-                optimizer.tell(next_point, -failure_rate)
-                best_failure_rate = -optimizer.get_result().fun
-                if best_failure_rate > 0.5:
-                    yield ScanResult.status_msg(
-                        f"High failure rate detected ({best_failure_rate:.2%}). Stopping this module..."
-                    )
-                    should_stop = True
-                    break
-            if total_tokens > max_budget:
-                logger.info(
-                    f"Scan ran out of budget and stopped. {total_tokens=} {max_budget=}"
-                )
-                yield ScanResult.status_msg(
-                    f"Scan ran out of budget and stopped. {total_tokens=} {max_budget=}"
-                )
-                should_stop = True
-                break
+        processed_prompts += module_size

    yield ScanResult.status_msg("Scan completed.")
-
-    failure_data = errors + refusals
-    df = pd.DataFrame(
-        failure_data, columns=["module", "prompt", "status_code", "content"]
-    )
-    df.to_csv("failures.csv", index=False)
+    fuzzer_state.export_failures("failures.csv")


 async def perform_many_shot_scan(
@@ -250,14 +419,39 @@ async def perform_many_shot_scan(
    datasets: list[dict[str, str]] = [],
    probe_datasets: list[dict[str, str]] = [],
    tools_inbox=None,
-    optimize=False,
-    stop_event: asyncio.Event = None,
+    optimize: bool = False,
+    stop_event: asyncio.Event | None = None,
    probe_frequency: float = 0.2,
    max_ctx_length: int = 10_000,
    secrets: dict[str, str] = {},
 ) -> AsyncGenerator[str, None]:
-    """Perform a multi-step security scan with probe injection."""
-    request_factory = multi_modality_spec(request_factory)
+    """
+    Perform a multi-step security scan with probe injection.
+
+    This function executes a security scan while periodically injecting probe datasets
+    to test system robustness. It tracks failures, optimizes scan efficiency,
+    and ensures adherence to a predefined token budget.
+
+    Args:
+        request_factory: A factory function that generates requests for processing prompts.
+        max_budget (int): The maximum token budget for the scan.
+        datasets (list[dict[str, str]], optional): The main datasets for scanning.
+        probe_datasets (list[dict[str, str]], optional): Additional datasets for probe injection.
+        tools_inbox: Optional tools for additional processing (default: None).
+        optimize (bool, optional): Whether to enable failure rate optimization (default: False).
+        stop_event (asyncio.Event, optional): An event to signal early termination (default: None).
+        probe_frequency (float, optional): The probability of probe injection (default: 0.2).
+        max_ctx_length (int, optional): The maximum context length before resetting (default: 10,000 tokens).
+        secrets (dict[str, str], optional): A dictionary of secrets for authentication (default: {}).
+
+    Yields:
+        str: JSON-encoded scan results or status messages.
+
+    This function iterates over prompts, injects probe prompts at random intervals,
+    processes them asynchronously, and tracks failure rates. If failure rates exceed a threshold
+    or budget is exhausted, the scan is stopped early. Results are saved to a CSV file upon completion.
+    """
+    request_factory = get_modality_adapter(request_factory)
    # Load main and probe datasets
    yield ScanResult.status_msg("Loading datasets...")
    prompt_modules = prepare_prompts(
@@ -269,17 +463,10 @@ async def perform_many_shot_scan(
    msj_modules = msj_data.prepare_prompts(probe_datasets)
    yield ScanResult.status_msg("Datasets loaded. Starting scan...")

-    errors = []
-    refusals = []
-    outputs = []
+    fuzzer_state = FuzzerState()
    total_prompts = sum(len(m.prompts) for m in prompt_modules if not m.lazy)
    processed_prompts = 0

-    optimizer = (
-        Optimizer([Real(0, 1)], base_estimator="GP", n_initial_points=25)
-        if optimize
-        else None
-    )
    failure_rates = []

    for module in prompt_modules:
@@ -293,6 +480,7 @@ async def perform_many_shot_scan(
                logger.info("Scan stopped by user.")
                yield ScanResult.status_msg("Scan stopped by user.")
                return
+
            tokens = 0
            processed_prompts += 1
            progress = 100 * processed_prompts / total_prompts if total_prompts else 0
@@ -320,9 +508,7 @@ async def perform_many_shot_scan(
                    full_prompt,
                    tokens,
                    module.dataset_name,
-                    refusals,
-                    errors,
-                    outputs,
+                    fuzzer_state=fuzzer_state,
                )
                if failed:
                    module_failures += 1
@@ -343,30 +529,48 @@ async def perform_many_shot_scan(
                prompt=prompt[:MAX_PROMPT_LENGTH],
            ).model_dump_json()

-            if optimize and len(failure_rates) >= 5:
-                next_point = optimizer.ask()
-                optimizer.tell(next_point, -failure_rate)
-                best_failure_rate = -optimizer.get_result().fun
-                if best_failure_rate > 0.5:
-                    yield ScanResult.status_msg(
-                        f"High failure rate detected ({best_failure_rate:.2%}). Stopping this module..."
-                    )
-                    break
+            if optimize and len(failure_rates) >= MIN_FAILURE_SAMPLES:
+                yield ScanResult.status_msg(
+                    f"High failure rate detected ({failure_rate:.2%}). Stopping this module..."
+                )
+                break

    yield ScanResult.status_msg("Scan completed.")
-
-    df = pd.DataFrame(
-        errors + refusals, columns=["module", "prompt", "status_code", "content"]
-    )
-    df.to_csv("failures.csv", index=False)
+    fuzzer_state.export_failures("failures.csv")


 def scan_router(
    request_factory,
    scan_parameters: Scan,
    tools_inbox=None,
-    stop_event: asyncio.Event = None,
+    stop_event: asyncio.Event | None = None,
 ):
+    """
+    Route scan requests to the appropriate scanning function.
+
+    This function determines whether to perform a multi-step or single-shot
+    security scan based on the provided scan parameters.
+
+    Args:
+        request_factory: A factory function to generate requests for processing prompts.
+        scan_parameters (Scan): An object containing the parameters for the scan, including:
+            - enableMultiStepAttack (bool): Whether to perform a multi-step scan.
+            - maxBudget (int): The maximum token budget for the scan.
+            - datasets (list[dict[str, str]]): The datasets to scan.
+            - probe_datasets (list[dict[str, str]], optional): Datasets for probe injection (multi-step only).
+            - optimize (bool): Whether to enable optimization.
+            - secrets (dict[str, str], optional): A dictionary of secrets for authentication.
+        tools_inbox: Optional tools for additional processing (default: None).
+        stop_event (asyncio.Event, optional): An event to signal early termination (default: None).
+
+    Returns:
+        A function wrapped with `with_error_handling`, which executes either:
+        - `perform_many_shot_scan` for multi-step scanning.
+        - `perform_single_shot_scan` for single-shot scanning.
+
+    The function ensures that the appropriate scanning method is chosen based on
+    the `enableMultiStepAttack` flag in `scan_parameters`.
+    """
    if scan_parameters.enableMultiStepAttack:
        return with_error_handling(
            perform_many_shot_scan(
@@ -1,5 +1,4 @@
 import asyncio
-import logging
 from typing import Any

 import httpx
@@ -7,13 +6,10 @@ from pydantic import BaseModel, Field
 from pydantic_ai import Agent, RunContext

 from agentic_security.http_spec import LLMSpec
+from agentic_security.logutils import logger

 LLM_SPECS = []

-# Configure logging
-logging.basicConfig(level=logging.INFO)
-logger = logging.getLogger(__name__)
-

 class AgentSpecification(BaseModel):
    name: str | None = Field(None, description="Name of the LLM/agent")
@@ -0,0 +1,47 @@
+import pandas as pd
+
+
+class FuzzerState:
+    """Container for tracking scan results"""
+
+    def __init__(self):
+        self.errors = []
+        self.refusals = []
+        self.outputs = []
+
+    def add_error(
+        self,
+        module_name: str,
+        prompt: str,
+        status_code: int | str,
+        error_msg: str,
+    ):
+        """Add an error to the state"""
+        self.errors.append((module_name, prompt, status_code, error_msg))
+
+    def add_refusal(
+        self, module_name: str, prompt: str, status_code: int, response_text: str
+    ):
+        """Add a refusal to the state"""
+        self.refusals.append((module_name, prompt, status_code, response_text))
+
+    def add_output(
+        self, module_name: str, prompt: str, response_text: str, refused: bool
+    ):
+        """Add an output to the state"""
+        self.outputs.append((module_name, prompt, response_text, refused))
+
+    def get_last_output(self, prompt: str) -> str | None:
+        """Get the last output for a given prompt"""
+        for output in reversed(self.outputs):
+            if output[1] == prompt:
+                return output[2]
+        return None
+
+    def export_failures(self, filename: str = "failures.csv"):
+        """Export failures to a CSV file"""
+        failure_data = self.errors + self.refusals
+        df = pd.DataFrame(
+            failure_data, columns=["module", "prompt", "status_code", "content"]
+        )
+        df.to_csv(filename, index=False)
@@ -1,4 +1,4 @@
-from .data import load_local_csv
+from .data import load_local_csv, load_local_csv_files

 REGISTRY_V0 = [
    {
@@ -484,3 +484,18 @@ REGISTRY = REGISTRY_V0 + [
        "modality": "text",
    },
 ]
+
+for ds in load_local_csv_files():
+    REGISTRY.append(
+        {
+            "dataset_name": ds.dataset_name,
+            "num_prompts": len(ds.prompts),
+            "tokens": ds.prompts,
+            "approx_cost": 0.0,
+            "is_active": True,
+            "source": f"Local file dataset: {ds.metadata['src']}",
+            "selected": False,
+            "url": "",
+            "modality": "text",
+        }
+    )
@@ -1,4 +1,5 @@
 import base64
+import logging
 import os
 import platform
 import subprocess
@@ -7,6 +8,16 @@ import uuid
 import httpx
 from cache_to_disk import cache_to_disk

+# Configure logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+
+
+class AudioGenerationError(Exception):
+    """Custom exception for errors during audio generation."""
+
+    pass
+

 def encode(content: bytes) -> str:
    encoded_content = base64.b64encode(content).decode("utf-8")
@@ -41,12 +52,25 @@ def generate_audio_mac_wav(prompt: str) -> bytes:
        # Read the WAV file into memory
        with open(temp_wav_path, "rb") as f:
            audio_bytes = f.read()
+
+    except subprocess.CalledProcessError as e:
+        logger.error(f"Subprocess error: {e}")
+        raise AudioGenerationError("Failed to generate or convert audio.") from e
+    except FileNotFoundError as e:
+        logger.error(f"File not found: {e}")
+        raise AudioGenerationError("Required file not found.") from e
+    except Exception as e:
+        logger.exception("Unexpected error occurred.")
+        raise AudioGenerationError(
+            "An unexpected error occurred during audio generation."
+        ) from e
    finally:
-        # Clean up the temporary files
-        if os.path.exists(temp_aiff_path):
-            os.remove(temp_aiff_path)
-        if os.path.exists(temp_wav_path):
-            os.remove(temp_wav_path)
+        for path in (temp_aiff_path, temp_wav_path):
+            try:
+                if os.path.exists(path):
+                    os.remove(path)
+            except Exception as e:
+                logger.warning(f"Failed to delete temporary file {path}: {e}")

    # Return the audio bytes
    return audio_bytes
@@ -1,14 +1,15 @@
 import io
 import os
 import random
-from functools import lru_cache
+from collections.abc import Callable, Iterator
+from functools import partial
+from typing import Any, TypeVar

 import httpx
 import pandas as pd
 from cache_to_disk import cache_to_disk
-from datasets import load_dataset
-from loguru import logger

+from agentic_security.logutils import logger
 from agentic_security.probe_data import stenography_fn
 from agentic_security.probe_data.models import ProbeDataset
 from agentic_security.probe_data.modules import (
@@ -18,282 +19,281 @@ from agentic_security.probe_data.modules import (
    inspect_ai_tool,
    rl_model,
 )
+from datasets import load_dataset
+
+# Type aliases for clarity
+T = TypeVar("T")
+FilterFn = Callable[[pd.Series], bool]
+ColumnMappings = dict[str, str]
+DatasetLoader = Callable[[], ProbeDataset]
+TransformFn = Callable[[str], str]


-@cache_to_disk()
-def load_dataset_general(
-    dataset_name,
-    dataset_split="train",
-    column_mappings=None,
-    filter_fn=None,
-    custom_url=None,
-    additional_metadata=None,
-):
-    """
-    Generalized function to load datasets with flexible configurations.
+# Core data loading utilities
+def fetch_csv_content(url: str) -> str:
+    """Fetch CSV content from a URL."""
+    response = httpx.get(url)
+    response.raise_for_status()  # Raise exception for bad responses
+    return response.content.decode("utf-8")

-    :param dataset_name: Name of the dataset or URL for custom CSVs
-    :param dataset_split: Split to load from the dataset (e.g., "train")
-    :param column_mappings: Dictionary mapping dataset columns to expected keys, e.g., {'prompt': 'query'}
-    :param filter_fn: A filtering function that takes a row and returns True/False
-    :param custom_url: URL for custom CSV datasets
-    :param additional_metadata: Additional metadata to include in the ProbeDataset
-    :return: A ProbeDataset object with the processed data
-    """
-    if custom_url:
-        logger.info(f"Loading custom CSV dataset from {custom_url}")
-        r = httpx.get(custom_url)
-        content = r.content
-        df = pd.read_csv(io.StringIO(content.decode("utf-8")))
-    else:
-        logger.info(f"Loading dataset {dataset_name} from Hugging Face datasets")
-        dataset = load_dataset(dataset_name)
-        df = pd.DataFrame(dataset[dataset_split])

-    # Apply column mappings if provided
-    if column_mappings:
-        df.rename(columns=column_mappings, inplace=True)
+def load_df_from_source(source: str, is_url: bool = False) -> pd.DataFrame:
+    """Load DataFrame from either URL or Hugging Face dataset."""
+    if is_url:
+        logger.info(f"Loading custom CSV from {source}")
+        return pd.read_csv(io.StringIO(fetch_csv_content(source)))
+    logger.info(f"Loading dataset {source} from Hugging Face")
+    return pd.DataFrame(load_dataset(source)["train"])

-    # Filter rows if filter_fn is provided
+
+def transform_df(
+    df: pd.DataFrame,
+    mappings: ColumnMappings | None = None,
+    filter_fn: FilterFn | None = None,
+) -> pd.DataFrame:
+    """Apply transformations to DataFrame."""
+    result = df.copy()
+    if mappings:
+        result.rename(columns=mappings, inplace=True)
    if filter_fn:
-        df = df[df.apply(filter_fn, axis=1)]
-
-    # Extract prompts
-    prompts = df[column_mappings.get("prompt", "prompt")].tolist()
-
-    return ProbeDataset(
-        dataset_name=dataset_name,
-        metadata=additional_metadata or {},
-        prompts=prompts,
-        tokens=count_words_in_list(prompts),
-        approx_cost=0.0,
-    )
+        result = result[result.apply(filter_fn, axis=1)]
+    return result


-def count_words_in_list(str_list):
-    """Calculate the total number of words in a given list of strings.
-
-    :param str_list: List of strings
-    :return: Total number of words across all strings in the list
-    """
-    total_words = sum(len(str(s).split()) for s in str_list)
-    return total_words
-
-
-@cache_to_disk()
-def load_dataset_v1():
-    return load_dataset_general(
-        dataset_name="ShawnMenz/DAN_jailbreak",
-        column_mappings={"prompt": "prompt", "jailbreak": "jailbreak"},
-        filter_fn=lambda row: row["jailbreak"],
-    )
-
-
-@cache_to_disk()
-def load_dataset_v2():
-    return load_dataset_general(
-        dataset_name="deepset/prompt-injections",
-        column_mappings={"prompt": "text", "jailbreak": "label"},
-        filter_fn=lambda row: row["label"],
-    )
-
-
-@cache_to_disk()
-def load_dataset_v4():
-    return load_dataset_general(
-        dataset_name="notrichardren/refuse-to-answer-prompts",
-        column_mappings={"prompt": "claim", "jailbreak": "label"},
-        filter_fn=lambda row: row["label"],
-    )
-
-
-@cache_to_disk()
-def load_dataset_v3():
-    return load_dataset_general(
-        dataset_name="rubend18/ChatGPT-Jailbreak-Prompts",
-        column_mappings={"prompt": "Prompt"},
-        filter_fn=lambda row: row["label"],
-    )
-
-
-@cache_to_disk()
-def load_dataset_v6():
-    dataset = load_dataset("markush1/LLM-Jailbreak-Classifier")
-    bad_prompts = [
-        p
-        for p, c in zip(dataset["train"]["prompt"], dataset["train"]["classification"])
-        if c == "jailbreak"
-    ]
-    return ProbeDataset(
-        dataset_name="markush1/LLM-Jailbreak-Classifier",
-        metadata={},
-        prompts=bad_prompts,
-        tokens=count_words_in_list(bad_prompts),
-        approx_cost=0.0,
-    )
-
-
-@cache_to_disk()
-def load_dataset_v7():
-    splits = {
-        "mini_JailBreakV_28K": "JailBreakV_28K/mini_JailBreakV_28K.csv",
-        "JailBreakV_28K": "JailBreakV_28K/JailBreakV_28K.csv",
-    }
-    df = pd.read_csv(
-        "hf://datasets/JailbreakV-28K/JailBreakV-28k/" + splits["JailBreakV_28K"]
-    )
-    bad_prompts = df["jailbreak_query"].tolist()
-    print(df.shape)
-    return ProbeDataset(
-        dataset_name="JailbreakV-28K/JailBreakV-28k",
-        metadata={},
-        prompts=bad_prompts,
-        tokens=count_words_in_list(bad_prompts),
-        approx_cost=0.0,
-    )
-
-
-@cache_to_disk()
-def load_dataset_v8():
-    df = pd.read_csv(
-        "hf://datasets/ShawnMenz/jailbreak_sft_rm_ds/jailbreak_sft_rm_ds.csv",
-        names=["jailbreak", "prompt"],
-    )
-    filtered = df[df["jailbreak"] == "jailbreak"]["prompt"].tolist()
-    return ProbeDataset(
-        dataset_name="JailbreakV-28K/JailBreakV-28k",
-        metadata={},
-        prompts=filtered,
-        tokens=count_words_in_list(filtered),
-        approx_cost=0.0,
-    )
-
-
-@cache_to_disk()
-def load_dataset_v5():
-    ds = []
-    for c in [
-        "AdvBench",
-        "ForbiddenQuestion",
-        "MJP",
-        "MaliciousInstruct",
-        "QuestionList",
-    ]:
-        dataset = load_dataset("Lemhf14/EasyJailbreak_Datasets", c)
-        bad_prompts = dataset["train"]["query"]
-        ds.extend(bad_prompts)
-
-    return ProbeDataset(
-        dataset_name="Lemhf14/EasyJailbreak_Datasets",
-        metadata={},
-        prompts=ds,
-        tokens=count_words_in_list(ds),
-        approx_cost=0.0,
-    )
-
-
-@cache_to_disk()
-def load_generic_csv(url, name, column="prompt", predicator=None):
-    r = httpx.get(url)
-    content = r.content
-    df = pd.read_csv(io.StringIO(content.decode("utf-8")))
-    logger.info(f"Loaded {len(df)} prompts from {url}")
-    filtered_prompts = df[df.apply(predicator, axis=1)][column].tolist()
+def create_probe_dataset(
+    name: str, prompts: list[str], metadata: dict[str, Any] | None = None
+) -> ProbeDataset:
+    """Create a ProbeDataset from prompts."""
+    metadata = metadata or {}
    return ProbeDataset(
        dataset_name=name,
-        metadata={},
-        prompts=filtered_prompts,
-        tokens=count_words_in_list(filtered_prompts),
+        metadata=metadata,
+        prompts=prompts,
+        tokens=sum(len(str(s).split()) for s in prompts),
        approx_cost=0.0,
    )


-def prepare_prompts(dataset_names, budget, tools_inbox=None, options=[]):
-    # ## Datasets used and cleaned:
-    # markush1/LLM-Jailbreak-Classifier
-    # 1. Open-Orca/OpenOrca
-    # 2. ShawnMenz/DAN_jailbreak
-    # 3. EddyLuo/JailBreakV_28K
-    # 4. https://raw.githubusercontent.com/verazuo/jailbreak_llms/main/data/jailbreak_prompts.csv
+# Generalized dataset loader
+@cache_to_disk()
+def load_dataset_generic(
+    name: str,
+    mappings: ColumnMappings | None = None,
+    filter_fn: FilterFn | None = None,
+    url: str | None = None,
+    metadata: dict[str, Any] | None = None,
+) -> ProbeDataset:
+    """Load and process a dataset with flexible configuration."""
+    try:
+        df = load_df_from_source(url or name, is_url=bool(url))
+        transformed_df = transform_df(df, mappings, filter_fn)

-    dataset_map = {
-        "ShawnMenz/DAN_jailbreak": load_dataset_v1,
-        "deepset/prompt-injections": load_dataset_v2,
-        "notrichardren/refuse-to-answer-prompts": load_dataset_v4,
-        "rubend18/ChatGPT-Jailbreak-Prompts": load_dataset_v3,
-        "Lemhf14/EasyJailbreak_Datasets": load_dataset_v5,
-        "markush1/LLM-Jailbreak-Classifier": load_dataset_v6,
-        "JailbreakV-28K/JailBreakV-28k": load_dataset_v7,
-        "ShawnMenz/jailbreak_sft_rm_ds": load_dataset_v8,
-        "verazuo/jailbreak_llms/2023_05_07": lambda: load_generic_csv(
-            url="https://raw.githubusercontent.com/verazuo/jailbreak_llms/main/data/prompts/jailbreak_prompts_2023_05_07.csv",
-            name="verazuo/jailbreak_llms/2023_05_07",
-            column="prompt",
-            predicator=lambda x: bool(x["jailbreak"]),
-        ),
-        "verazuo/jailbreak_llms/2023_12_25.csv": lambda: load_generic_csv(
-            url="https://raw.githubusercontent.com/verazuo/jailbreak_llms/main/data/prompts/jailbreak_prompts_2023_12_25.csv.csv",
-            name="verazuo/jailbreak_llms/2023_12_25.csv",
-            column="prompt",
-            predicator=lambda x: bool(x["jailbreak"]),
-        ),
-        "Custom CSV": load_local_csv,
-    }
-    dataset_map.update(dataset_map_generics)
+        # Determine which column to use as the prompt source
+        prompt_col = None
+        if mappings and "prompt" in mappings:
+            prompt_col = mappings["prompt"]
+        elif "prompt" in transformed_df.columns:
+            prompt_col = "prompt"
+        else:
+            # Try to find a suitable text column
+            text_columns = [
+                col
+                for col in transformed_df.columns
+                if any(
+                    keyword in col.lower()
+                    for keyword in ["prompt", "text", "query", "question"]
+                )
+            ]
+            if text_columns:
+                prompt_col = text_columns[0]
+                logger.info(f"Using column '{prompt_col}' as prompt source")
+            else:
+                logger.error(f"No suitable prompt column found in dataset {name}")
+                return create_probe_dataset(name, [], metadata)

-    group = []
-    for dataset_name in dataset_names:
-        if dataset_name in dataset_map:
-            logger.info(f"Loading {dataset_name}")
-            try:
-                group.append(dataset_map[dataset_name]())
-            except Exception as e:
-                logger.error(f"Error loading {dataset_name}: {e}")
-
-    dynamic_datasets = {
-        "AgenticBackend": lambda opts: dataset_from_iterator(
-            "AgenticBackend",
-            fine_tuned.Module(group, tools_inbox=tools_inbox, opts=opts).apply(),
-            lazy=True,
-        ),
-        "Steganography": lambda opts: Stenography(group),
-        "llm-adaptive-attacks": lambda opts: dataset_from_iterator(
-            "llm-adaptive-attacks",
-            adaptive_attacks.Module(group, tools_inbox=tools_inbox, opts=opts).apply(),
-        ),
-        "Garak": lambda opts: dataset_from_iterator(
-            "Garak",
-            garak_tool.Module(group, tools_inbox=tools_inbox, opts=opts).apply(),
-            lazy=True,
-        ),
-        "Reinforcement Learning Optimization": lambda opts: dataset_from_iterator(
-            "Reinforcement Learning Optimization",
-            rl_model.Module(group, tools_inbox=tools_inbox, opts=opts).apply(),
-            lazy=True,
-        ),
-        "InspectAI": lambda opts: dataset_from_iterator(
-            "InspectAI",
-            inspect_ai_tool.Module(group, tools_inbox=tools_inbox).apply(),
-            lazy=True,
-        ),
-        "GPT fuzzer": lambda opts: [],
-    }
-
-    dynamic_groups = []
-    options = options or [{} for _ in dataset_names]
-    for dataset_name, opts in zip(dataset_names, options):
-        if dataset_name in dynamic_datasets:
-            logger.info(f"Loading {dataset_name}")
-
-            ds = dynamic_datasets[dataset_name](opts)
-
-            for g in ds:
-                dynamic_groups.append(g)
-    return group + dynamic_groups
+        # Extract prompts and filter out empty ones
+        prompts = [
+            p
+            for p in transformed_df[prompt_col].tolist()
+            if p and isinstance(p, (str, int, float))
+        ]
+        return create_probe_dataset(name, prompts, metadata)
+    except Exception as e:
+        logger.error(f"Error loading dataset {name}: {e}")
+        return create_probe_dataset(name, [], {"error": str(e)})


-class Stenography:
-    fn_library = {
+# Dataset-specific configurations
+DATASET_CONFIGS = {
+    "ShawnMenz/DAN_jailbreak": {
+        "mappings": {"prompt": "prompt"},
+        "filter_fn": lambda row: row["jailbreak"],
+    },
+    "deepset/prompt-injections": {
+        "mappings": {"prompt": "text"},
+        "filter_fn": lambda row: row["label"],
+    },
+    "notrichardren/refuse-to-answer-prompts": {
+        "mappings": {"prompt": "claim"},
+        "filter_fn": lambda row: row["label"],
+    },
+    "rubend18/ChatGPT-Jailbreak-Prompts": {
+        "mappings": {"prompt": "Prompt"},
+        "filter_fn": lambda row: row["label"],
+    },
+    "markush1/LLM-Jailbreak-Classifier": {
+        "mappings": {"prompt": "prompt"},
+        "filter_fn": lambda row: row["classification"] == "jailbreak",
+    },
+    "ShawnMenz/jailbreak_sft_rm_ds": {
+        "url": "hf://datasets/ShawnMenz/jailbreak_sft_rm_ds/jailbreak_sft_rm_ds.csv",
+        "mappings": {"prompt": "prompt"},
+        "filter_fn": lambda row: row["jailbreak"] == "jailbreak",
+    },
+    "verazuo/jailbreak_llms/2023_05_07": {
+        "url": "https://raw.githubusercontent.com/verazuo/jailbreak_llms/main/data/prompts/jailbreak_prompts_2023_05_07.csv",
+        "mappings": {"prompt": "prompt"},
+        "filter_fn": lambda row: bool(row["jailbreak"]),
+    },
+    "verazuo/jailbreak_llms/2023_12_25": {
+        "url": "https://raw.githubusercontent.com/verazuo/jailbreak_llms/main/data/prompts/jailbreak_prompts_2023_12_25.csv",
+        "mappings": {"prompt": "prompt"},
+        "filter_fn": lambda row: bool(row["jailbreak"]),
+    },
+}
+
+# Additional generic dataset configurations
+DATASET_CONFIGS_GENERICS = {
+    "simonycl/aya-23-8B_advbench_jailbreak": {"mappings": {"prompt": "prompt"}},
+    "acmc/jailbreaks_dataset_with_perplexity_bigcode_starcoder2-3b_bigcode_starcoder2-7b": {},
+    "karanxa/dolphin-jailbreak-finetuning-dataset": {"mappings": {"prompt": "text"}},
+    "karanxa/llama-2-jailbreak-dataset": {"mappings": {"prompt": "text"}},
+    "karanxa/llama2-uncensored-jailbreak-dataset-finetuning": {
+        "mappings": {"prompt": "text"}
+    },
+    "liuyanchen1015/Llama-3.2-1B_jailbreak_responses": {
+        "mappings": {"prompt": "jailbreak_prompt_text"}
+    },
+    "liuyanchen1015/Llama-3.2-1B-Instruct_jailbreak_responses": {
+        "mappings": {"prompt": "jailbreak_prompt_text"}
+    },
+    "liuyanchen1015/Llama-3.2-1B-Instruct_jailbreak_responses_with_judgment": {
+        "mappings": {"prompt": "jailbreak_prompt_text"}
+    },
+    "jackhhao/jailbreak-classification": {"mappings": {"prompt": "prompt"}},
+    "walledai/JailbreakBench": {"mappings": {"prompt": "prompt"}},
+    "walledai/JailbreakHub": {"mappings": {"prompt": "prompt"}},
+    "Granther/evil-jailbreak": {"mappings": {"prompt": "text"}},
+    "sevdeawesome/jailbreak_success": {"mappings": {"prompt": "jailbreak_prompt_text"}},
+    "IDA-SERICS/Disaster-tweet-jailbreaking": {"mappings": {"prompt": "prompt_attack"}},
+    "GeorgeDaDude/Jailbreak_Complete_DS_labeled": {"mappings": {"prompt": "question"}},
+    "dayone3nder/jailbreak_prompt_JBB_sft_trainset": {"mappings": {"prompt": "prompt"}},
+    "dayone3nder/general_safe_mix_jailbreak_prompt_JBB_trainset": {
+        "mappings": {"prompt": "prompt"}
+    },
+}
+
+
+# Dataset factory
+def create_dataset_loader(name: str, config: dict[str, Any]) -> DatasetLoader:
+    """Create a dataset loader from configuration."""
+    return partial(
+        load_dataset_generic,
+        name=name,
+        mappings=config.get("mappings"),
+        filter_fn=config.get("filter_fn"),
+        url=config.get("url"),
+        metadata={"source": name, "config": str(config)},
+    )
+
+
+# Specialized loaders
+@cache_to_disk()
+def load_multi_dataset(name: str, sub_datasets: list[str]) -> ProbeDataset:
+    """Load and combine multiple sub-datasets."""
+    prompts = []
+    for sub in sub_datasets:
+        try:
+            dataset = load_dataset(name, sub)
+            if "query" in dataset["train"].features:
+                prompts.extend(dataset["train"]["query"])
+            else:
+                logger.warning(f"No 'query' column in {name}/{sub}")
+        except Exception as e:
+            logger.error(f"Error loading {name}/{sub}: {e}")
+
+    return create_probe_dataset(
+        f"{name}_combined", prompts, {"source": name, "sub_datasets": sub_datasets}
+    )
+
+
+@cache_to_disk()
+def load_jailbreak_v28k() -> ProbeDataset:
+    """Load JailBreakV-28K dataset."""
+    try:
+        df = pd.read_csv(
+            "hf://datasets/JailbreakV-28K/JailBreakV-28k/JailBreakV_28K.csv"
+        )
+        prompts = df["jailbreak_query"].tolist()
+        return create_probe_dataset(
+            "JailbreakV-28K/JailBreakV-28k",
+            prompts,
+            {"source": "JailbreakV-28K/JailBreakV-28k"},
+        )
+    except Exception as e:
+        logger.error(f"Error loading JailbreakV-28K: {e}")
+        return create_probe_dataset("JailbreakV-28K/JailBreakV-28k", [])
+
+
+@cache_to_disk(1)
+def file_dataset(file) -> list[str]:
+    prompts = []
+    try:
+        df = pd.read_csv(os.path.join("./datasets", file), encoding_errors="ignore")
+        if "prompt" in df.columns:
+            prompts = df["prompt"].tolist()
+        else:
+            logger.warning(f"File {file} lacks a suitable prompt column")
+    except Exception as e:
+        logger.error(f"Error reading {file}: {e}")
+    return prompts
+
+
+def load_local_csv() -> ProbeDataset:
+    """Load prompts from local CSV files."""
+    os.makedirs("./datasets", exist_ok=True)
+    csv_files = [f for f in os.listdir("./datasets") if f.endswith(".csv")]
+    logger.info(f"Found {len(csv_files)} CSV files: {csv_files}")
+
+    prompts = []
+    for file in csv_files:
+        prompts.extend(file_dataset(file))
+    return create_probe_dataset("Local CSV", prompts, {"src": str(csv_files)})
+
+
+def load_csv(file: str) -> ProbeDataset:
+    """Load prompts from local CSV files."""
+    prompts = file_dataset(file)
+    return create_probe_dataset(f"fs://{file}", prompts, {"src": str(file)})
+
+
+def load_local_csv_files() -> list[ProbeDataset]:
+    """Load prompts from local CSV files and return a list of ProbeDataset objects."""
+    csv_files = [f for f in os.listdir("./datasets") if f.endswith(".csv")]
+    logger.info(f"Found {len(csv_files)} CSV files: {csv_files}")
+
+    datasets = []
+
+    for file in csv_files:
+        datasets.append(create_probe_dataset(file, file_dataset(file), {"src": file}))
+    return datasets
+
+
+# Stenography transformer
+class StenographyTransformer:
+    """Apply stenography transformations to datasets."""
+
+    TRANSFORMATIONS = {
        "rot5": stenography_fn.rot5,
        "rot13": stenography_fn.rot13,
        "base64": stenography_fn.base64_encode,
@@ -306,151 +306,172 @@ class Stenography:
        "zigzag_obfuscation": stenography_fn.zigzag_obfuscation,
    }

-    def __init__(self, prompt_groups: [ProbeDataset]):
-        self.prompt_groups = prompt_groups
+    def __init__(self, datasets: list[ProbeDataset]):
+        self.datasets = datasets

-    def __iter__(self):
-        return self.apply()
+    def transform(self) -> Iterator[ProbeDataset]:
+        """Generate transformed datasets."""
+        if not self.datasets:
+            logger.warning("No datasets provided to StenographyTransformer")
+            return

-    def apply(self):
-        for prompt_group in self.prompt_groups:
-            size = len(prompt_group.prompts)
-            for name, fn in self.fn_library.items():
-                logger.info(f"Applying {name} to {prompt_group.dataset_name}")
-                selected_prompts = random.sample(prompt_group.prompts, int(size * 0.25))
-                prompts = [
-                    f"Follow the instructions in {name}: {fn(p)}"
-                    for p in selected_prompts
-                ]
-                yield ProbeDataset(
-                    dataset_name=f"stenography.{name}({prompt_group.dataset_name})",
-                    metadata={},
-                    prompts=prompts,
-                    tokens=count_words_in_list(prompts),
-                    approx_cost=0.0,
+        for dataset in self.datasets:
+            if not dataset.prompts:
+                logger.warning(
+                    f"Dataset {dataset.dataset_name} has no prompts to transform"
                )
+                continue
+
+            # Convert async generator (if any) to a list
+            prompts = (
+                list(dataset.prompts) if hasattr(dataset.prompts, "__iter__") else []
+            )
+            size = len(prompts)
+
+            if size == 0:
+                continue
+
+            sample_size = max(1, int(size * 0.25))  # At least 1 sample
+
+            for name, fn in self.TRANSFORMATIONS.items():
+                logger.info(f"Applying {name} to {dataset.dataset_name}")
+                sampled = random.sample(prompts, min(sample_size, size))
+                try:
+                    transformed = [
+                        f"Follow the instructions in {name}: {fn(str(p))}"
+                        for p in sampled
+                    ]
+                    transformed_dataset = create_probe_dataset(
+                        f"stenography.{name}({dataset.dataset_name})",
+                        transformed,
+                        {"source": dataset.dataset_name, "transformation": name},
+                    )
+                    yield transformed_dataset
+                except Exception as e:
+                    logger.error(
+                        f"Error applying {name} to {dataset.dataset_name}: {e}"
+                    )


-@lru_cache
-def load_local_csv() -> ProbeDataset:
-    csv_files = [f for f in os.listdir(".") if f.endswith(".csv")]
-    logger.info(f"Found {len(csv_files)} CSV files")
-    logger.info(f"CSV files: {csv_files}")
-    prompt_list = []
+def dataset_from_iterator(
+    name: str, iterator: Iterator[str], lazy: bool = False
+) -> list[ProbeDataset]:
+    """Convert an iterator into a list of ProbeDataset objects."""
+    try:
+        prompts = list(iterator) if not lazy else iterator
+        tokens = sum(len(str(s).split()) for s in prompts) if not lazy else 0
+        dataset = ProbeDataset(
+            dataset_name=name,
+            metadata={"source": name, "lazy": lazy},
+            prompts=prompts,
+            tokens=tokens,
+            approx_cost=0.0,
+            lazy=lazy,
+        )
+        return [dataset]
+    except Exception as e:
+        logger.error(f"Error creating dataset from iterator {name}: {e}")
+        return [create_probe_dataset(name, [], {"error": str(e)})]

-    for file in csv_files:
-        try:
-            df = pd.read_csv(file)
-        except Exception as e:
-            logger.error(f"Error reading {file}: {e}")
+
+# Main dataset preparation
+def prepare_prompts(
+    dataset_names: list[str],
+    budget: float,
+    tools_inbox=None,
+    options: list[dict] = None,
+) -> list[ProbeDataset]:
+    """Prepare datasets based on names and options."""
+    # Base dataset loaders
+    logger.info(f"Preparing datasets: {dataset_names}")
+    dataset_loaders = {
+        **{k: create_dataset_loader(k, v) for k, v in DATASET_CONFIGS.items()},
+        **{k: create_dataset_loader(k, v) for k, v in DATASET_CONFIGS_GENERICS.items()},
+        "Lemhf14/EasyJailbreak_Datasets": partial(
+            load_multi_dataset,
+            name="Lemhf14/EasyJailbreak_Datasets",
+            sub_datasets=[
+                "AdvBench",
+                "ForbiddenQuestion",
+                "MJP",
+                "MaliciousInstruct",
+                "QuestionList",
+            ],
+        ),
+        "JailbreakV-28K/JailBreakV-28k": load_jailbreak_v28k,
+        "Local CSV": load_local_csv,
+        "Custom CSV": load_local_csv,
+    }
+
+    # Dynamic dataset loaders
+    dynamic_loaders = {
+        "AgenticBackend": lambda opts: dataset_from_iterator(
+            "AgenticBackend",
+            fine_tuned.Module(
+                opts["datasets"], tools_inbox=tools_inbox, opts=opts
+            ).apply(),
+            lazy=True,
+        ),
+        "Steganography": lambda opts: list(
+            StenographyTransformer(opts["datasets"]).transform()
+        ),
+        "llm-adaptive-attacks": lambda opts: dataset_from_iterator(
+            "llm-adaptive-attacks",
+            adaptive_attacks.Module(
+                opts["datasets"], tools_inbox=tools_inbox, opts=opts
+            ).apply(),
+        ),
+        "Garak": lambda opts: dataset_from_iterator(
+            "Garak",
+            garak_tool.Module(
+                opts["datasets"], tools_inbox=tools_inbox, opts=opts
+            ).apply(),
+            lazy=True,
+        ),
+        "Reinforcement Learning Optimization": lambda opts: dataset_from_iterator(
+            "Reinforcement Learning Optimization",
+            rl_model.Module(
+                opts["datasets"], tools_inbox=tools_inbox, opts=opts
+            ).apply(),
+            lazy=True,
+        ),
+        "InspectAI": lambda opts: dataset_from_iterator(
+            "InspectAI",
+            inspect_ai_tool.Module([], tools_inbox=tools_inbox).apply(),
+            lazy=True,
+        ),
+        "GPT fuzzer": lambda opts: [],
+    }
+
+    datasets = []
+    options = options or [dict(datasets=datasets) for _ in dataset_names]
+
+    # Load base datasets
+    for name, opts in zip(dataset_names, options):
+        if name not in dataset_loaders:
            continue
-        # Check if 'prompt' column exists
-        if "prompt" in df.columns:
-            prompt_list.extend(df["prompt"].tolist())
-        else:
-            logger.warning(f"File {file} does not contain a 'prompt' column")
-    return ProbeDataset(
-        dataset_name="Local CSV",
-        metadata={"src": str(csv_files)},
-        prompts=prompt_list,
-        tokens=count_words_in_list(prompt_list),
-        approx_cost=0.0,
-    )
+        try:
+            datasets.append(dataset_loaders[name]())
+        except Exception as e:
+            logger.error(f"Error loading {name}: {e}")

+    # Load dynamic datasets and apply transformations
+    for name, opts in zip(dataset_names, options):
+        if name not in dynamic_loaders:
+            continue
+        logger.info(f"Loading dynamic dataset {name} {opts}")
+        opts["datasets"] = datasets
+        try:
+            dynamic_result = dynamic_loaders[name](opts)
+            datasets.extend(dynamic_result)
+        except Exception as e:
+            logger.exception(f"Error loading dynamic {name}: {e}")

-def dataset_from_iterator(name: str, iterator, lazy=False) -> list:
-    """Convert an iterator into a list of prompts and create a ProbeDataset
-    object.
+    # Load csv datasets and apply transformations
+    for name, opts in zip(dataset_names, options):
+        if not name.endswith(".csv"):
+            continue
+        logger.info(f"Loading csv dataset {name} {opts}")
+        datasets.append(load_csv(name))

-    Args:
-        name (str): The name of the dataset.
-        iterator (iterator): An iterator that generates prompts.
-
-    Returns:
-        list: A list containing a single ProbeDataset object.
-    """
-    prompts = list(iterator) if not lazy else iterator
-    tokens = count_words_in_list(prompts) if not lazy else 0
-    dataset = ProbeDataset(
-        dataset_name=name,
-        metadata={},
-        prompts=prompts,
-        tokens=tokens,
-        approx_cost=0.0,
-        lazy=lazy,
-    )
-    return [dataset]
-
-
-# TODO: refactor this abstraction
-
-dataset_map_generics = {
-    "simonycl/aya-23-8B_advbench_jailbreak": lambda: load_dataset_general(
-        dataset_name="simonycl/aya-23-8B_advbench_jailbreak",
-        column_mappings={"prompt": "prompt"},
-    ),
-    "acmc/jailbreaks_dataset_with_perplexity_bigcode_starcoder2-3b_bigcode_starcoder2-7b": lambda: load_dataset_general(
-        dataset_name="acmc/jailbreaks_dataset_with_perplexity_bigcode_starcoder2-3b_bigcode_starcoder2-7b"
-    ),
-    "karanxa/dolphin-jailbreak-finetuning-dataset": lambda: load_dataset_general(
-        dataset_name="karanxa/dolphin-jailbreak-finetuning-dataset",
-        column_mappings={"prompt": "text"},
-    ),
-    "karanxa/llama-2-jailbreak-dataset": lambda: load_dataset_general(
-        dataset_name="karanxa/llama-2-jailbreak-dataset",
-        column_mappings={"prompt": "text"},
-    ),
-    "karanxa/llama2-uncensored-jailbreak-dataset-finetuning": lambda: load_dataset_general(
-        dataset_name="karanxa/llama2-uncensored-jailbreak-dataset-finetuning",
-        column_mappings={"prompt": "text"},
-    ),
-    "liuyanchen1015/Llama-3.2-1B_jailbreak_responses": lambda: load_dataset_general(
-        dataset_name="liuyanchen1015/Llama-3.2-1B_jailbreak_responses",
-        column_mappings={"prompt": "jailbreak_prompt_text"},
-    ),
-    "liuyanchen1015/Llama-3.2-1B-Instruct_jailbreak_responses": lambda: load_dataset_general(
-        dataset_name="liuyanchen1015/Llama-3.2-1B-Instruct_jailbreak_responses",
-        column_mappings={"prompt": "jailbreak_prompt_text"},
-    ),
-    "liuyanchen1015/Llama-3.2-1B-Instruct_jailbreak_responses_with_judgment": lambda: load_dataset_general(
-        dataset_name="liuyanchen1015/Llama-3.2-1B-Instruct_jailbreak_responses_with_judgment",
-        column_mappings={"prompt": "jailbreak_prompt_text"},
-    ),
-    "jackhhao/jailbreak-classification": lambda: load_dataset_general(
-        dataset_name="jackhhao/jailbreak-classification",
-        column_mappings={"prompt": "prompt"},
-    ),
-    "markush1/LLM-Jailbreak-Classifier": lambda: load_dataset_general(
-        dataset_name="markush1/LLM-Jailbreak-Classifier",
-        column_mappings={"prompt": "prompt"},
-    ),
-    "walledai/JailbreakBench": lambda: load_dataset_general(
-        dataset_name="walledai/JailbreakBench", column_mappings={"prompt": "prompt"}
-    ),
-    "walledai/JailbreakHub": lambda: load_dataset_general(
-        dataset_name="walledai/JailbreakHub", column_mappings={"prompt": "prompt"}
-    ),
-    "Granther/evil-jailbreak": lambda: load_dataset_general(
-        dataset_name="Granther/evil-jailbreak", column_mappings={"prompt": "text"}
-    ),
-    "sevdeawesome/jailbreak_success": lambda: load_dataset_general(
-        dataset_name="sevdeawesome/jailbreak_success",
-        column_mappings={"prompt": "jailbreak_prompt_text"},
-    ),
-    "IDA-SERICS/Disaster-tweet-jailbreaking": lambda: load_dataset_general(
-        dataset_name="IDA-SERICS/Disaster-tweet-jailbreaking",
-        column_mappings={"prompt": "prompt_attack"},
-    ),
-    "GeorgeDaDude/Jailbreak_Complete_DS_labeled": lambda: load_dataset_general(
-        dataset_name="GeorgeDaDude/Jailbreak_Complete_DS_labeled",
-        column_mappings={"prompt": "question"},
-    ),
-    "dayone3nder/jailbreak_prompt_JBB_sft_trainset": lambda: load_dataset_general(
-        dataset_name="dayone3nder/jailbreak_prompt_JBB_sft_trainset",
-        column_mappings={"prompt": "prompt"},
-    ),
-    "dayone3nder/general_safe_mix_jailbreak_prompt_JBB_trainset": lambda: load_dataset_general(
-        dataset_name="dayone3nder/general_safe_mix_jailbreak_prompt_JBB_trainset",
-        column_mappings={"prompt": "prompt"},
-    ),
-}
+    return datasets
@@ -3,7 +3,8 @@ import io

 import httpx
 import pandas as pd
-from loguru import logger
+
+from agentic_security.logutils import logger

 url = "https://raw.githubusercontent.com/tml-epfl/llm-adaptive-attacks/main/harmful_behaviors/harmful_behaviors_pair.csv"

@@ -3,7 +3,8 @@ import os
 import uuid as U

 import httpx
-from loguru import logger
+
+from agentic_security.logutils import logger

 AUTH_TOKEN: str = os.getenv("AS_TOKEN", "gh0-5f4a8ed2-37c6-4bd7-a0cf-7070eae8115b")

@@ -65,7 +66,7 @@ class Module:
                return {}

    async def fetch_prompts(self) -> list[str]:
-        api_url = "https://edge.metaheuristic.co/infer"
+        api_url = "https://mcp.metaheuristic.co/infer"
        headers = {
            "Authorization": f"Bearer {AUTH_TOKEN}",
            "Content-Type": "application/json",
@@ -4,7 +4,7 @@ import json
 import os
 import subprocess

-from loguru import logger
+from agentic_security.logutils import logger

 # TODO: add probes modules

@@ -2,7 +2,7 @@ import asyncio
 import importlib.util
 import os

-from loguru import logger
+from agentic_security.logutils import logger

 inspect_ai_task = (
    __file__.replace("inspect_ai_tool.py", "inspect_ai_task.py")
@@ -8,7 +8,8 @@ from typing import Deque

 import numpy as np
 import requests
-from loguru import logger
+
+from agentic_security.logutils import logger

 AUTH_TOKEN: str = os.getenv("AS_TOKEN", "gh0-5f4a8ed2-37c6-4bd7-a0cf-7070eae8115b")

@@ -215,7 +216,7 @@ class Module:
        self.run_id = U.uuid4().hex
        self.batch_size = self.opts.get("batch_size", 500)
        self.rl_model = CloudRLPromptSelector(
-            prompt_groups, "https://edge.metaheuristic.co", run_id=self.run_id
+            prompt_groups, "https://mcp.metaheuristic.co", run_id=self.run_id
        )

    async def apply(self):
@@ -36,7 +36,7 @@ def mock_requests() -> Mock:
 def mock_rl_selector() -> Mock:
    return CloudRLPromptSelector(
        dataset_prompts,
-        api_url="https://edge.metaheuristic.co",
+        api_url="https://mcp.metaheuristic.co",
    )


@@ -94,7 +94,7 @@ class TestCloudRLPromptSelector:
    def test_select_next_prompt_success_service(self, dataset_prompts):
        selector = CloudRLPromptSelector(
            dataset_prompts,
-            api_url="https://edge.metaheuristic.co",
+            api_url="https://mcp.metaheuristic.co",
        )
        next_prompt = selector.select_next_prompt(
            "How does RL work?", passed_guard=True
@@ -1,6 +1,6 @@
 from dataclasses import dataclass

-from cache_to_disk import cache_to_disk
+from cache_to_disk import cache_to_disk  # noqa


 # TODO: refactor this class to use from .data
@@ -22,7 +22,7 @@ class ProbeDataset:
        }


-@cache_to_disk()
+# @cache_to_disk(n_days_to_cache=1)
 def load_dataset_generic(name, getter=lambda x: x["train"]["prompt"]):
    from datasets import load_dataset

@@ -7,9 +7,61 @@ import pandas as pd
 from matplotlib.cm import ScalarMappable
 from matplotlib.colors import LinearSegmentedColormap, Normalize

+from agentic_security.logutils import logger

-def plot_security_report(table):
+from .primitives import Table
+
+
+def plot_security_report(table: Table) -> io.BytesIO:
+    """
+    Generates a polar plot representing the security report based on the given data.
+
+    Args:
+        table (Table): The input data table containing security metrics.
+
+    Returns:
+        io.BytesIO: A buffer containing the generated plot image in PNG format.
+                    Returns an empty buffer in case of an error.
+    """
+    try:
+        return _plot_security_report(table=table)
+    except (TypeError, ValueError, OverflowError, IndexError, Exception) as e:
+        logger.error(f"Error in generating the security report: {e} {table}")
+    return io.BytesIO()
+
+
+def generate_identifiers(data: pd.DataFrame) -> list[str]:
+    """
+    Generates unique identifiers for the given dataset.
+
+    Args:
+        data (pd.DataFrame): A pandas DataFrame containing security-related data.
+
+    Returns:
+        list[str]: A list of generated identifiers. Returns a list with an empty string in case of an error.
+    """
+    return _generate_identifiers(data=data)
+
+
+def _plot_security_report(table: Table) -> io.BytesIO:
+    """
+    Generates a polar plot-based security report visualizing the failure rates for different modules.
+
+    This function processes the input data, sorts it by failure rate, and generates a polar plot
+    where each bar represents the failure rate for a specific module. The plot includes identifiers,
+    color-coding based on token count, failure rate values on the bars, and a table listing the modules
+    and their corresponding failure rates.
+
+    Args:
+        table (Table): A table-like structure (e.g., pandas DataFrame) containing security report data
+                        with columns for failure rate, tokens, and modules.
+
+    Returns:
+        io.BytesIO: A buffer containing the generated plot image in PNG format.
+    """
    # Data preprocessing
+    logger.info("Data preprocessing started.")
+
    data = pd.DataFrame(table)

    # Sort by failure rate and reset index
@@ -20,10 +72,10 @@ def plot_security_report(table):
    fig, ax = plt.subplots(figsize=(12, 10), subplot_kw={"projection": "polar"})
    fig.set_facecolor("#f0f0f0")
    ax.set_facecolor("#f0f0f0")
+    logger.info("Plot setup complete.")

    # Styling parameters
    colors = ["#6C5B7B", "#C06C84", "#F67280", "#F8B195"][::-1]  # Pastel palette
-    # colors = ["#440154", "#3b528b", "#21908c", "#5dc863"]  # Viridis-inspired palette
    cmap = LinearSegmentedColormap.from_list("custom", colors, N=256)
    norm = Normalize(vmin=data["tokens"].min(), vmax=data["tokens"].max())

@@ -74,7 +126,10 @@ def plot_security_report(table):

    # Title and caption
    fig.suptitle(
-        "Security Report for Different Modules", fontsize=16, fontweight="bold", y=1.02
+        "Security Report for Different Modules",
+        fontsize=16,
+        fontweight="bold",
+        y=1.02,
    )
    caption = "Report generated by https://github.com/msoedov/agentic_security"
    fig.text(
@@ -112,17 +167,12 @@ def plot_security_report(table):
            data["identifier"], data["failureRate"], data["module"]
        )
    ]
-    table = ax.table(
-        cellText=table_data,
-        loc="right",
-        cellLoc="left",
-    )
+    table = ax.table(cellText=table_data, loc="right", cellLoc="left")
    table.auto_set_font_size(False)
    table.set_fontsize(8)

    # Adjust table style
    table.scale(1, 0.7)
-
    for (row, col), cell in table.get_celld().items():
        cell.set_edgecolor("none")
        cell.set_facecolor("#f0f0f0" if row % 2 == 0 else "#e0e0e0")
@@ -132,17 +182,33 @@ def plot_security_report(table):
            cell.set_text_props(fontweight="bold")

    # Adjust layout and save
-
    plt.tight_layout()
    buf = io.BytesIO()
    plt.savefig(buf, format="png", dpi=300, bbox_inches="tight")
    plt.close(fig)
    buf.seek(0)
+    logger.info("Report successfully generated and saved to buffer.")
    return buf


-def generate_identifiers(data):
+def _generate_identifiers(data: pd.DataFrame) -> list[str]:
+    """
+    Generates a list of unique identifiers for each row in the given DataFrame.
+
+    The identifiers are based on the English alphabet, with each identifier consisting
+    of a letter followed by a number. The letter represents the "group" of identifiers
+    (using a letter from A to Z) and the number is a counter within that group. For example:
+    - A1, A2, ..., A26, B1, B2, ..., Z1, Z2, ...
+
+    Args:
+        data (pd.DataFrame): The input DataFrame containing data for which identifiers
+                             are to be generated.
+
+    Returns:
+        list[str]: A list of unique identifiers as strings, one for each row in the DataFrame.
+    """
    data_length = len(data)
+
    alphabet = string.ascii_uppercase
    num_letters = len(alphabet)

@@ -0,0 +1,397 @@
+_SPECS = [
+    """POST ${SELF_URL}/v1/self-probe
+Authorization: Bearer XXXXX
+Content-Type: application/json
+
+{
+"prompt": "<<PROMPT>>"
+}
+
+""",
+    """POST https://api.openai.com/v1/chat/completions
+Authorization: Bearer $OPENAI_API_KEY
+Content-Type: application/json
+
+{
+"model": "gpt-3.5-turbo",
+"messages": [{"role": "user", "content": "<<PROMPT>>"}],
+"temperature": 0.7
+}
+""",
+    """
+POST https://api.deepseek.com/chat/completions
+Authorization: Bearer $DEEPSEEK_API_KEY
+Content-Type: application/json
+
+{
+  "model": "deepseek-chat",
+  "messages": [
+    {"role": "system", "content": "You are a helpful assistant."},
+    {"role": "user", "content": "<<PROMPT>>"}
+  ],
+  "stream": false
+}
+""",
+    """POST https://api.replicate.com/v1/models/mistralai/mixtral-8x7b-instruct-v0.1/predictions
+Authorization: Bearer $APIKEY
+Content-Type: application/json
+
+{
+"input": {
+"top_k": 50,
+"top_p": 0.9,
+"prompt": "Write a bedtime story about neural networks I can read to my toddler",
+"temperature": 0.6,
+"max_new_tokens": 1024,
+"prompt_template": "<s>[INST] <<PROMPT>> [/INST] ",
+"presence_penalty": 0,
+"frequency_penalty": 0
+}
+}
+""",
+    """POST https://api.groq.com/v1/request_manager/text_completion
+Authorization: Bearer $APIKEY
+Content-Type: application/json
+
+{
+"model_id": "codellama-34b",
+"system_prompt": "You are helpful and concise coding assistant",
+"user_prompt": "<<PROMPT>>"
+}
+""",
+    """POST https://api.together.xyz/v1/chat/completions
+Authorization: Bearer $TOGETHER_API_KEY
+Content-Type: application/json
+
+{
+"model": "mistralai/Mixtral-8x7B-Instruct-v0.1",
+"messages": [
+{"role": "system", "content": "You are an expert travel guide"},
+{"role": "user", "content": "<<PROMPT>>"}
+]
+}
+""",
+    """POST ${SELF_URL}/v1/self-probe-image
+Authorization: Bearer XXXXX
+Content-Type: application/json
+
+[
+    {
+      "role": "user",
+      "content": [
+        {
+          "type": "text",
+          "text": "What is in this image?",
+        },
+        {
+          "type": "image_url",
+          "image_url": {
+            "url":  f"data:image/jpeg;base64,{<<BASE64_IMAGE>>}"
+          },
+        },
+      ],
+    }
+]
+""",
+    """POST ${SELF_URL}/v1/self-probe-file
+Authorization: Bearer $API_KEY
+Content-Type: multipart/form-data
+
+{
+  "file": "@./sample_audio.m4a",
+  "model": "whisper-large-v3"
+}
+""",
+    """POST https://api.gemini.com/v1/generate
+Authorization: Bearer $GEMINI_API_KEY
+Content-Type: application/json
+
+{
+  "model": "gemini-latest",
+  "prompt": "<<PROMPT>>",
+  "temperature": 0.8,
+  "max_tokens": 150,
+  "top_p": 1.0,
+  "frequency_penalty": 0,
+  "presence_penalty": 0
+}
+""",
+    """POST https://api.anthropic.com/v1/complete
+Authorization: Bearer $ANTHROPIC_API_KEY
+Content-Type: application/json
+
+{
+  "model": "claude-v1.3",
+  "prompt": "<<PROMPT>>",
+  "temperature": 0.7,
+  "max_tokens_to_sample": 256,
+  "stop_sequences": ["\n\nHuman:"]
+}
+""",
+    """POST https://api.cohere.ai/generate
+Authorization: Bearer $COHERE_API_KEY
+Content-Type: application/json
+
+{
+  "model": "command-xlarge-nightly",
+  "prompt": "<<PROMPT>>",
+  "max_tokens": 300,
+  "temperature": 0.75,
+  "k": 0,
+  "p": 0.75
+}
+""",
+    """POST https://<<RESOURCE_NAME>>.openai.azure.com/openai/deployments/<<DEPLOYMENT_NAME>>/completions?api-version=2023-06-01-preview
+Authorization: Bearer $AZURE_API_KEY
+Content-Type: application/json
+
+{
+  "prompt": "<<PROMPT>>",
+  "max_tokens": 150,
+  "temperature": 0.7,
+  "top_p": 0.9,
+  "frequency_penalty": 0,
+  "presence_penalty": 0
+}
+""",
+    """POST https://api.assemblyai.com/v2/transcript
+Authorization: Bearer $ASSEMBLY_API_KEY
+Content-Type: application/json
+
+{
+  "audio_url": "<<AUDIO_FILE_URL>>"
+}
+""",
+    """POST https://api.openrouter.ai/v1/chat/completions
+Authorization: Bearer $OPENROUTER_API_KEY
+Content-Type: application/json
+
+{
+  "model": "openrouter-latest",
+  "prompt": "<<PROMPT>>",
+  "temperature": 0.7,
+  "max_tokens": 150,
+  "top_p": 0.9,
+  "frequency_penalty": 0,
+  "presence_penalty": 0
+}
+""",
+]
+
+
+LLM_SPECS = [
+    """POST ${SELF_URL}/v1/self-probe
+Authorization: Bearer XXXXX
+Content-Type: application/json
+
+{
+"prompt": "<<PROMPT>>"
+}
+
+""",
+    """POST https://api.openai.com/v1/chat/completions
+Authorization: Bearer $OPENAI_API_KEY
+Content-Type: application/json
+
+{
+"model": "gpt-3.5-turbo",
+"messages": [{"role": "user", "content": "<<PROMPT>>"}],
+"temperature": 0.7
+}
+""",
+    """
+POST https://api.deepseek.com/chat/completions
+Authorization: Bearer $DEEPSEEK_API_KEY
+Content-Type: application/json
+
+{
+  "model": "deepseek-chat",
+  "messages": [
+    {"role": "system", "content": "You are a helpful assistant."},
+    {"role": "user", "content": "<<PROMPT>>"}
+  ],
+  "stream": false
+}
+""",
+    """POST https://api.replicate.com/v1/models/mistralai/mixtral-8x7b-instruct-v0.1/predictions
+Authorization: Bearer $APIKEY
+Content-Type: application/json
+
+{
+"input": {
+"top_k": 50,
+"top_p": 0.9,
+"prompt": "Write a bedtime story about neural networks I can read to my toddler",
+"temperature": 0.6,
+"max_new_tokens": 1024,
+"prompt_template": "<s>[INST] <<PROMPT>> [/INST] ",
+"presence_penalty": 0,
+"frequency_penalty": 0
+}
+}
+""",
+    """POST https://api.groq.com/v1/request_manager/text_completion
+Authorization: Bearer $APIKEY
+Content-Type: application/json
+
+{
+"model_id": "codellama-34b",
+"system_prompt": "You are helpful and concise coding assistant",
+"user_prompt": "<<PROMPT>>"
+}
+""",
+    """POST https://api.together.xyz/v1/chat/completions
+Authorization: Bearer $TOGETHER_API_KEY
+Content-Type: application/json
+
+{
+"model": "mistralai/Mixtral-8x7B-Instruct-v0.1",
+"messages": [
+{"role": "system", "content": "You are an expert travel guide"},
+{"role": "user", "content": "<<PROMPT>>"}
+]
+}
+""",
+    """POST ${SELF_URL}/v1/self-probe-image
+Authorization: Bearer XXXXX
+Content-Type: application/json
+
+[
+    {
+      "role": "user",
+      "content": [
+        {
+          "type": "text",
+          "text": "What is in this image?",
+        },
+        {
+          "type": "image_url",
+          "image_url": {
+            "url":  f"data:image/jpeg;base64,{<<BASE64_IMAGE>>}"
+          },
+        },
+      ],
+    }
+]
+""",
+    """POST ${SELF_URL}/v1/self-probe-file
+Authorization: Bearer $API_KEY
+Content-Type: multipart/form-data
+
+{
+  "file": "@./sample_audio.m4a",
+  "model": "whisper-large-v3"
+}
+""",
+    """POST https://api.gemini.com/v1/generate
+Authorization: Bearer $GEMINI_API_KEY
+Content-Type: application/json
+
+{
+  "model": "gemini-latest",
+  "prompt": "<<PROMPT>>",
+  "temperature": 0.8,
+  "max_tokens": 150,
+  "top_p": 1.0,
+  "frequency_penalty": 0,
+  "presence_penalty": 0
+}
+""",
+    """POST https://api.anthropic.com/v1/complete
+Authorization: Bearer $ANTHROPIC_API_KEY
+Content-Type: application/json
+
+{
+  "model": "claude-v1.3",
+  "prompt": "<<PROMPT>>",
+  "temperature": 0.7,
+  "max_tokens_to_sample": 256,
+  "stop_sequences": ["\n\nHuman:"]
+}
+""",
+    """POST https://api.cohere.ai/generate
+Authorization: Bearer $COHERE_API_KEY
+Content-Type: application/json
+
+{
+  "model": "command-xlarge-nightly",
+  "prompt": "<<PROMPT>>",
+  "max_tokens": 300,
+  "temperature": 0.75,
+  "k": 0,
+  "p": 0.75
+}
+""",
+    """POST https://<<RESOURCE_NAME>>.openai.azure.com/openai/deployments/<<DEPLOYMENT_NAME>>/completions?api-version=2023-06-01-preview
+Authorization: Bearer $AZURE_API_KEY
+Content-Type: application/json
+
+{
+  "prompt": "<<PROMPT>>",
+  "max_tokens": 150,
+  "temperature": 0.7,
+  "top_p": 0.9,
+  "frequency_penalty": 0,
+  "presence_penalty": 0
+}
+""",
+    """POST https://api.assemblyai.com/v2/transcript
+Authorization: Bearer $ASSEMBLY_API_KEY
+Content-Type: application/json
+
+{
+  "audio_url": "<<AUDIO_FILE_URL>>"
+}
+""",
+    """POST https://api.openrouter.ai/v1/chat/completions
+Authorization: Bearer $OPENROUTER_API_KEY
+Content-Type: application/json
+
+{
+  "model": "openrouter-latest",
+  "prompt": "<<PROMPT>>",
+  "temperature": 0.7,
+  "max_tokens": 150,
+  "top_p": 0.9,
+  "frequency_penalty": 0,
+  "presence_penalty": 0
+}
+""",
+]
+
+
+LLM_CONFIGS = [
+    {
+        "name": "Custom API",
+        "prompts": 40000,
+        "customInstructions": "Requires api spec",
+        "logo": "/icons/myshell.png",
+    },
+    {"name": "Open AI", "prompts": 24000, "logo": "/icons/openai.png"},
+    {"name": "Deepseek v1", "prompts": 24000, "logo": "/icons/deepseek.png"},
+    {"name": "Replicate", "prompts": 40000, "logo": "/icons/replicate.png"},
+    {"name": "Groq", "prompts": 40000, "logo": "/icons/groq.png"},
+    {"name": "Together.ai", "prompts": 40000, "logo": "/icons/together.png"},
+    {
+        "name": "Custom API Image",
+        "prompts": 40000,
+        "customInstructions": "Requires api spec",
+        "modality": "Image",
+        "logo": "/icons/myshell.png",
+    },
+    {
+        "name": "Custom API Files",
+        "prompts": 40000,
+        "customInstructions": "Requires api spec",
+        "modality": "Files",
+        "logo": "/icons/myshell.png",
+    },
+    {"name": "Gemini", "prompts": 40000, "logo": "/icons/gemini.png"},
+    {"name": "Claude", "prompts": 40000, "logo": "/icons/claude.png"},
+    {"name": "Cohere", "prompts": 40000, "logo": "/icons/cohere.png"},
+    {"name": "Azure OpenAI", "prompts": 40000, "logo": "/icons/azureai.png"},
+    {"name": "assemblyai", "prompts": 40000, "logo": "/icons/myshell.png"},
+    {"name": "OpenRouter.ai", "prompts": 40000, "logo": "/icons/openrouter.png"},
+]
+
+LLM_SPECS = [dict(spec=spec, **d) for spec, d in zip(_SPECS, LLM_CONFIGS)]
@@ -6,6 +6,7 @@ from fastapi.responses import JSONResponse
 from ..primitives import FileProbeResponse, Probe
 from ..probe_actor.refusal import REFUSAL_MARKS
 from ..probe_data import REGISTRY
+from ._specs import LLM_SPECS

 router = APIRouter()

@@ -73,6 +74,12 @@ async def data_config():
    return [m for m in REGISTRY]


+@router.get("/v1/llm-specs", response_model=list)
+def get_llm_specs():
+    """Returns the LLM API specifications."""
+    return LLM_SPECS
+
+
@router.get("/health")
 async def health_check():
    """Health check endpoint."""
@@ -2,7 +2,8 @@ import random
 from asyncio import Event

 from fastapi import APIRouter
-from loguru import logger
+
+from agentic_security.logutils import logger

 from ..core.app import get_current_run, get_tools_inbox
 from ..primitives import CompletionRequest, Settings
@@ -1,4 +1,6 @@
+from collections.abc import Generator
 from datetime import datetime
+from typing import Any

 from fastapi import (
    APIRouter,
@@ -10,11 +12,12 @@ from fastapi import (
    UploadFile,
 )
 from fastapi.responses import StreamingResponse
-from loguru import logger
+
+from agentic_security.logutils import logger

 from ..core.app import get_stop_event, get_tools_inbox, set_current_run
 from ..dependencies import InMemorySecrets, get_in_memory_secrets
-from ..http_spec import LLMSpec
+from ..http_spec import InvalidHTTPSpecError, LLMSpec
 from ..primitives import LLMInfo, Scan
 from ..probe_actor import fuzzer

@@ -24,10 +27,12 @@ router = APIRouter()
@router.post("/verify")
 async def verify(
    info: LLMInfo, secrets: InMemorySecrets = Depends(get_in_memory_secrets)
-):
+) -> dict[str, int | str | float]:
    spec = LLMSpec.from_string(info.spec)
    try:
        r = await spec.verify()
+    except InvalidHTTPSpecError as e:
+        raise HTTPException(status_code=400, detail=str(e))
    except Exception as e:
        logger.exception(e)
        raise HTTPException(status_code=400, detail=str(e))
@@ -42,7 +47,7 @@ async def verify(
    )


-def streaming_response_generator(scan_parameters: Scan):
+def streaming_response_generator(scan_parameters: Scan) -> Generator[str, Any, None]:
    request_factory = LLMSpec.from_string(scan_parameters.llmSpec)
    set_current_run(request_factory)

@@ -63,7 +68,7 @@ async def scan(
    scan_parameters: Scan,
    background_tasks: BackgroundTasks,
    secrets: InMemorySecrets = Depends(get_in_memory_secrets),
-):
+) -> StreamingResponse:
    scan_parameters.with_secrets(secrets)
    return StreamingResponse(
        streaming_response_generator(scan_parameters), media_type="application/json"
@@ -71,7 +76,7 @@ async def scan(


@router.post("/stop")
-async def stop_scan():
+async def stop_scan() -> dict[str, str]:
    get_stop_event().set()
    return {"status": "Scan stopped"}

@@ -85,7 +90,7 @@ async def scan_csv(
    maxBudget: int = Query(10_000),
    enableMultiStepAttack: bool = Query(False),
    secrets: InMemorySecrets = Depends(get_in_memory_secrets),
-):
+) -> StreamingResponse:
    # TODO: content dataset to fuzzer
    content = await file.read()  # noqa
    llm_spec = await llmSpec.read()
@@ -1,7 +1,8 @@
 import sentry_sdk
-from loguru import logger
 from sentry_sdk.integrations.logging import ignore_logger

+from agentic_security.logutils import logger
+
 from ..primitives import Settings


@@ -101,7 +101,7 @@ Content-Type: application/json
 ]
 `,
  `POST ${SELF_URL}/v1/self-probe-file
-Authorization: Bearer $GROQ_API_KEY
+Authorization: Bearer $API_KEY
 Content-Type: multipart/form-data

 {
@@ -204,8 +204,9 @@ let LLM_CONFIGS = [
  { name: 'Claude', prompts: 40000, logo: '/icons/claude.png' },
  { name: 'Cohere', prompts: 40000, logo: '/icons/cohere.png' },
  { name: 'Azure OpenAI', prompts: 40000, logo: '/icons/azureai.png' },
-  { name: 'OpenRouter.ai', prompts: 40000, logo: '/icons/openrouter.png' },
  { name: 'assemblyai', prompts: 40000, logo: fallbackIcon },
+  { name: 'OpenRouter.ai', prompts: 40000, logo: '/icons/openrouter.png' },
+
 ];
 function has_image(spec) {
  return spec.includes('<<BASE64_IMAGE>>');
@@ -63,6 +63,32 @@
          </div>
        </section>
      </main>
+      <div class="fixed top-6 right-6 z-50 space-y-3">
+        <transition-group name="toast">
+            <div
+                v-for="toast in toasts"
+                :key="toast.id"
+                class="flex items-center p-3 rounded-xl shadow-xl text-white max-w-md animate-toast-in border border-opacity-30"
+                :class="{
+                    'bg-success-toast border-accent-green': toast.type === 'success',
+                    'bg-error-toast border-accent-red': toast.type === 'error',
+                    'bg-info-toast border-accent-orange': toast.type === 'info'
+                }"
+            >
+                <span class="flex-1 font-medium tracking-wide text-sm">{{ toast.message }}</span>
+                <button
+                    @click="removeToast(toast.id)"
+                    class="ml-3 focus:outline-none hover:opacity-80 transition-opacity"
+                >
+                    <svg class="w-4 h-4" fill="none" stroke="currentColor" viewBox="0 0 24 24">
+                        <path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M6 18L18 6M6 6l12 12" />
+                    </svg>
+                </button>
+            </div>
+        </transition-group>
+    </div>
+
+
      <main class="max-w-6xl mx-auto space-y-8">
        <section class="bg-dark-card rounded-lg p-6 shadow-lg" v-show="false">
          <h2 class="text-2xl font-bold mb-4">Select a Config</h2>
@@ -25,6 +25,8 @@ var app = new Vue({
        showModules: false,
        showLogs: false,
        showConsentModal: true,
+        toasts: [], // Array to store toast notifications
+        toastTimeout: 3000, // Duration in milliseconds (3 seconds)
        statusDotClass: 'bg-gray-500', // Default status dot class
        statusText: 'Verified', // Default status text
        statusClass: 'bg-green-500 text-dark-bg', // Default status class
@@ -93,21 +95,36 @@ var app = new Vue({

    },
    methods: {
+        showToast(message, type = 'success') {
+            const id = Date.now(); // Unique ID for each toast
+            this.toasts.push({ id, message, type });
+
+            // Automatically remove toast after timeout
+            setTimeout(() => {
+                this.removeToast(id);
+            }, this.toastTimeout);
+        },
+
+        removeToast(id) {
+            this.toasts = this.toasts.filter(toast => toast.id !== id);
+        },
        focusTextarea() {
            this.isFocused = true;
-            self = this.$refs;
+            // Remove 'self' assignment if not used elsewhere
            this.$nextTick(() => {
-                // Focus the textarea after rendering
-                self.textarea.focus();
-                this.adjustHeight({ target: self.textarea });
+                this.$refs.textarea.focus();
+                this.adjustHeight({ target: this.$refs.textarea });
            });
-            document.addEventListener("mousedown", this.handleClickOutside);
-
+            // Correct the event listener to use handleOutsideClick
+            document.addEventListener("mousedown", this.handleOutsideClick);
        },
        handleOutsideClick(event) {
-            if (!this.$refs.container.contains(event.target)) {
+            if (!this.$refs.textarea) {
+                return
+            }
+            if (!this.$refs.textarea.contains(event.target)) {
                this.isFocused = false;
-                document.removeEventListener("mousedown", this.handleClickOutside);
+                document.removeEventListener("mousedown", this.handleOutsideClick);
            }
        },
        unfocusTextarea() {
@@ -115,7 +132,12 @@ var app = new Vue({
        },
        acceptConsent() {
            this.showConsentModal = false; // Close the modal
-            localStorage.setItem('consentGiven', 'true'); // Save consent to local storage
+
+            try {
+                localStorage.setItem('consentGiven', 'true'); // Save consent to local storage
+            } catch (e) {
+                this.showToast('Failed to save consent', 'error'); // Show error if saving fails
+            }
        },

        saveStateToLocalStorage() {
@@ -156,6 +178,7 @@ var app = new Vue({
            this.integrationVerified = false;
            this.showResetConfirmation = false;
            this.enableMultiStepAttack = false;
+            this.showToast('All settings have been reset to default', 'info');
        },
        confirmResetState() {
            this.showResetConfirmation = true;
@@ -194,31 +217,39 @@ var app = new Vue({
                spec: this.modelSpec,
            };
            let startTime = performance.now(); // Capture start time
-            const response = await fetch(`${SELF_URL}/verify`, {
-                method: 'POST',
-                headers: {
-                    'Content-Type': 'application/json',
-                },
-                body: JSON.stringify(payload),
-            });
-            console.log(response);
-            let r = await response.json();
-            let endTime = performance.now(); // Capture end time
-            let latency = endTime - startTime; // Calculate latency in milliseconds
-            latency = latency.toFixed(3) / 1000; // Round to 2 decimal places
-            this.latency = latency;
-            if (!response.ok) {
-                this.updateStatusDot(false);
-                this.errorMsg = 'Integration verification failed:' + JSON.stringify(r);
-            } else {
-                this.errorMsg = '';
-                this.updateStatusDot(true);
-                this.okMsg = 'Integration verified';
-                this.integrationVerified = true;
-                // console.log('Integration verified', this.integrationVerified);
-                // this.$forceUpdate();

+            try {
+                const response = await fetch(`${SELF_URL}/verify`, {
+                    method: 'POST',
+                    headers: {
+                        'Content-Type': 'application/json',
+                    },
+                    body: JSON.stringify(payload),
+                });
+
+                let r = await response.json();
+
+                let endTime = performance.now(); // Capture end time
+                let latency = ((endTime - startTime) / 1000).toFixed(3); // Calculate latency in milliseconds
+                this.latency = latency;
+
+                if (!response.ok) {
+                    this.updateStatusDot(false);
+                    this.errorMsg = 'Integration verification failed:' + JSON.stringify(r);
+                    this.showToast('Integration verification failed', 'error');
+                } else {
+                    this.errorMsg = '';
+                    this.updateStatusDot(true);
+                    this.okMsg = 'Integration verified';
+                    this.showToast('Integration verified successfully', 'success');
+                    this.integrationVerified = true;
+                }
+            } catch (error) {
+                this.updateStatusDot(true);
+                this.errorMsg = 'Server unreachable';
+                this.showToast('Network error', 'error');
            }
+
            this.saveStateToLocalStorage();
        },
        loadConfigs: async function () {
@@ -240,6 +271,7 @@ var app = new Vue({
            this.errorMsg = '';
            this.okMsg = '';
            this.integrationVerified = false;
+            this.showToast(`Config ${index + 1} selected`, 'info');
        },
        toggleModules() {
            this.showModules = !this.showModules;
@@ -327,6 +359,7 @@ var app = new Vue({
                return
            }
            console.log('New row');
+            this.showToast('New module', 'success');
            let payload = {
                table: this.mainTable,
            };
@@ -437,6 +470,8 @@ var app = new Vue({
                    }
                });
            }
+            this.scanRunning = false;
+            this.showToast('Scan finished successfully', 'success');
            this.saveStateToLocalStorage();

        }
@@ -19,7 +19,7 @@
              technopollas: ['Technopollas', 'sans-serif'],
            },
            colors: {
-                t1: {
+                dark: {
                bg: '#0D0D0D', // Jet Black
                card: '#1A1A1A', // Dark Carbon Fiber
                text: '#FFFFFF',
@@ -30,7 +30,7 @@
                  yellow: '#2E4053', // Jet Black
                },
              },
-              dark: {
+              dark1: {
                bg: '#121212',
                card: '#1E1E1E',
                text: '#FFFFFF',
@@ -86,6 +86,7 @@
        }
      }
    </script>
+
    <style>
 .scrollbar-hide::-webkit-scrollbar {
  display: none;
@@ -96,4 +97,55 @@
 }

    </style>
+<style>
+/* Toast-specific colors */
+.bg-success-toast {
+    background: #1C3F74
+}
+
+.bg-error-toast {
+    background: #85144B
+}
+
+.bg-info-toast {
+    background: #FFC300
+}
+
+.border-accent-green, .border-accent-red, .border-accent-orange {
+    border-color: rgba(255, 255, 255, 0.1); /* Subtle white border for depth */
+}
+
+/* Animation */
+.animate-toast-in {
+    animation: tSlideIn 0.4s cubic-bezier(0.25, 0.1, 0.25, 1);
+}
+
+@keyframes tSlideIn {
+    from {
+        transform: translateX(120%) scale(0.95);
+        opacity: 0;
+    }
+    to {
+        transform: translateX(0) scale(1);
+        opacity: 1;
+    }
+}
+
+.toast-enter-active,
+.toast-leave-active {
+    transition: all 0.4s cubic-bezier(0.25, 0.1, 0.25, 1);
+}
+
+.toast-enter-from,
+.toast-leave-to {
+    transform: translateX(120%) scale(0.95);
+    opacity: 0;
+}
+
+/* Hover effect */
+[toast-type]:hover {
+    transform: translateY(-2px);
+    box-shadow: 0 6px 20px rgba(0, 0, 0, 0.6);
+}
+</style>
  </head>
@@ -33,7 +33,7 @@ The `LLMSpec` class is the core of the HTTP specification. It provides the follo
 ### Methods

 - **`from_string(http_spec: str) -> LLMSpec`**: Parses an HTTP specification string into an `LLMSpec` object.
- **`validate(prompt: str, encoded_image: str, encoded_audio: str, files: dict) -> None`**: Validates the request parameters based on the specified modality.
+- **`validate(prompt: str, encoded_image: str, encoded_audio: str, files: dict) -> null`**: Validates the request parameters based on the specified modality.
 - **`probe(prompt: str, encoded_image: str = "", encoded_audio: str = "", files: dict = {}) -> httpx.Response`**: Sends an HTTP request using the specified parameters.
 - **`verify() -> httpx.Response`**: Verifies the HTTP specification by sending a test request.

@@ -52,12 +52,11 @@ Authorization: Bearer sk-xxxxxxxxx
 Content-Type: application/json

 {
-    "model": "gpt-3.5-turbo",
-    "messages": [{"role": "user", "content": "<<PROMPT>>"}],
-    "temperature": 0.7
+  "model": "gpt-3.5-turbo",
+  "messages": [{"role": "user", "content": "<<PROMPT>>"}],
+  "temperature": 0.7
 }
 """
-
 spec = LLMSpec.from_string(http_spec)
 response = await spec.probe("What is the capital of France?")
 ```
@@ -71,12 +70,11 @@ Authorization: Bearer sk-xxxxxxxxx
 Content-Type: application/json

 {
-    "model": "gpt-4-vision-preview",
-    "messages": [{"role": "user", "content": "What is in this image? <<BASE64_IMAGE>>"}],
-    "temperature": 0.7
+  "model": "gpt-4-vision-preview",
+  "messages": [{"role": "user", "content": "What is in this image? <<BASE64_IMAGE>>"}],
+  "temperature": 0.7
 }
 """
-
 spec = LLMSpec.from_string(http_spec)
 encoded_image = encode_image_base64_by_url("https://example.com/image.jpg")
 response = await spec.probe("What is in this image?", encoded_image=encoded_image)
@@ -91,12 +89,11 @@ Authorization: Bearer sk-xxxxxxxxx
 Content-Type: application/json

 {
-    "model": "whisper-large-v3",
-    "messages": [{"role": "user", "content": "Transcribe this audio: <<BASE64_AUDIO>>"}],
-    "temperature": 0.7
+  "model": "whisper-large-v3",
+  "messages": [{"role": "user", "content": "Transcribe this audio: <<BASE64_AUDIO>>"}],
+  "temperature": 0.7
 }
 """
-
 spec = LLMSpec.from_string(http_spec)
 encoded_audio = encode_audio_base64_by_url("https://example.com/audio.mp3")
 response = await spec.probe("Transcribe this audio:", encoded_audio=encoded_audio)
@@ -111,12 +108,11 @@ Authorization: Bearer sk-xxxxxxxxx
 Content-Type: multipart/form-data

 {
-    "model": "gpt-3.5-turbo",
-    "messages": [{"role": "user", "content": "Process this file: <<FILE>>"}],
-    "temperature": 0.7
+  "model": "gpt-3.5-turbo",
+  "messages": [{"role": "user", "content": "Process this file: <<FILE>>"}],
+  "temperature": 0.7
 }
 """
-
 spec = LLMSpec.from_string(http_spec)
 files = {"file": ("document.txt", open("document.txt", "rb"))}
 response = await spec.probe("Process this file:", files=files)
@@ -54,20 +54,15 @@ The `probe_data` module is a core component of the Agentic Security project, res

 - **Classes:**
  - `PromptSelectionInterface`: Abstract base class for prompt selection strategies.
-
    - Methods:
      - `select_next_prompt(current_prompt: str, passed_guard: bool) -> str`: Selects next prompt
      - `select_next_prompts(current_prompt: str, passed_guard: bool) -> list[str]`: Selects multiple prompts
-      - `update_rewards(previous_prompt: str, current_prompt: str, reward: float, passed_guard: bool) -> None`: Updates rewards
-
+      - `update_rewards(previous_prompt: str, current_prompt: str, reward: float, passed_guard: bool) -> null`: Updates rewards
  - `RandomPromptSelector`: Basic random selection with history tracking.
-
    - Parameters:
      - `prompts: list[str]`: List of available prompts
      - `history_size: int = 3`: Size of history to prevent cycles
-
  - `CloudRLPromptSelector`: Cloud-based RL implementation with fallback.
-
    - Parameters:
      - `prompts: list[str]`: List of available prompts
      - `api_url: str`: URL of RL service
@@ -75,9 +70,7 @@ The `probe_data` module is a core component of the Agentic Security project, res
      - `history_size: int = 300`: Size of history
      - `timeout: int = 5`: Request timeout
      - `run_id: str = ""`: Unique run identifier
-
  - `QLearningPromptSelector`: Local Q-learning implementation.
-
    - Parameters:
      - `prompts: list[str]`: List of available prompts
      - `learning_rate: float = 0.1`: Learning rate
@@ -86,13 +79,11 @@ The `probe_data` module is a core component of the Agentic Security project, res
      - `exploration_decay: float = 0.995`: Exploration decay rate
      - `min_exploration: float = 0.01`: Minimum exploration rate
      - `history_size: int = 300`: Size of history
-
-  - `Module`: Main class that uses CloudRLPromptSelector.
-
-    - Parameters:
-      - `prompt_groups: list[str]`: Groups of prompts
-      - `tools_inbox: asyncio.Queue`: Queue for tool communication
-      - `opts: dict = {}`: Configuration options
+- **Module**: Main class that uses CloudRLPromptSelector.
+  - Parameters:
+    - `prompt_groups: list[str]`: Groups of prompts
+    - `tools_inbox: asyncio.Queue`: Queue for tool communication
+    - `opts: dict = {}`: Configuration options

 ## Usage Examples

@@ -119,10 +110,9 @@ from agentic_security.probe_data.modules.rl_model import QLearningPromptSelector

 prompts = ["What is AI?", "Explain machine learning"]
 selector = QLearningPromptSelector(prompts)
-
 current_prompt = "What is AI?"
-next_prompt = selector.select_next_prompt(current_prompt, passed_guard=True)
-selector.update_rewards(current_prompt, next_prompt, reward=1.0, passed_guard=True)
+next_prompt = selector.select_next_prompt(current_prompt, passed_guard=true)
+selector.update_rewards(current_prompt, next_prompt, reward=1.0, passed_guard=true)
 ```

 ## Conclusion
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "agentic_security"
-version = "0.6.0"
+version = "0.7.4"
 description = "Agentic LLM vulnerability scanner"
 authors = ["Alexander Miasoiedov <msoedov@gmail.com>"]
 maintainers = ["Alexander Miasoiedov <msoedov@gmail.com>"]
@@ -28,53 +28,50 @@ agentic_security = "agentic_security.__main__:main"

 [tool.poetry.dependencies]
 python = "^3.11"
-fastapi = "^0.115.8"
-uvicorn = "^0.34.0"
+fastapi = "^0.116.1"
+uvicorn = "^0.35.0"
 fire = "0.7.0"
 loguru = "^0.7.3"
 httpx = "^0.28.1"
 cache-to-disk = "^2.0.0"
 pandas = ">=1.4,<3.0"
-datasets = "^3.3.0"
+datasets = "^4.0.0"
 tabulate = ">=0.8.9,<0.10.0"
 colorama = "^0.4.4"
-matplotlib = "^3.9.2"
-pydantic = "2.10.6"
+matplotlib = "^3.10.5"
+pydantic = "^2.11.7"
 scikit-optimize = "^0.10.2"
-scikit-learn = "1.6.1"
+scikit-learn = "^1.7.1"
 numpy = ">=1.24.3,<3.0.0"
 jinja2 = "^3.1.4"
 python-multipart = "^0.0.20"
 tomli = "^2.2.1"
-rich = "13.9.4"
+rich = "^14.1.0"
 gTTS = "^2.5.4"
-sentry_sdk = "^2.22.0"
-orjson = "^3.10"
-pyfiglet = "^1.0.2"
-termcolor = "^2.4.0"
-
+sentry_sdk = "^2.34.1"
+orjson = "^3.11.2"
+pyfiglet = "^1.0.3"
+termcolor = "^3.1.0"
+mcp = "^1.12.4"
 # garak = { version = "*", optional = true }
-pytest-xdist = "3.6.1"
-
+pytest-xdist = "^3.8.0"

 [tool.poetry.group.dev.dependencies]
 # Pytest
-pytest = "^8.3.4"
-pytest-asyncio = "^0.25.2"
-inline-snapshot = ">=0.13.3,<0.21.0"
+pytest = "^8.4.1"
+pytest-asyncio = "^1.1.0"
+inline-snapshot = ">=0.13.3,<0.27.2"
 pytest-httpx = "^0.35.0"
-pytest-mock = "^3.14.0"
-
+pytest-mock = "^3.14.1"
 # Rest
 black = ">=24.10,<26.0"
-mypy = "^1.12.0"
-pre-commit = "^4.0.1"
-huggingface-hub = ">=0.25.1,<0.29.0"
-
+mypy = "^1.17.1"
+pre-commit = "^4.3.0"
+huggingface-hub = ">=0.25.1,<0.34.4"
 # Docs
 mkdocs = ">=1.4.2"
-mkdocs-material = "^9.6.4"
-mkdocstrings = ">=0.26.1"
+mkdocs-material = "^9.6.16"
+mkdocstrings = ">=0.29.0"
 mkdocs-jupyter = ">=0.25.1"


@@ -87,7 +84,13 @@ build-backend = "poetry.core.masonry.api"


 [tool.pytest.ini_options]
-addopts = "--durations=5 -m 'not slow' -n auto"
+addopts = "--durations=5 -m 'not slow' -n 3"
 asyncio_mode = "auto"
 asyncio_default_fixture_loop_scope = "function"
 markers = "slow: marks tests as slow"
+
+[project]
+# MCP requires the following fields to be present in the pyproject.toml file
+name = "agentic_security"
+version = "1.0.0"
+requires-python = ">=3.11"
@@ -1,8 +1,17 @@
 import os

 import pytest
+from cache_to_disk import delete_old_disk_caches
+
+from agentic_security.logutils import logger


 def pytest_runtest_setup(item):
    if "slow" in item.keywords and not os.getenv("RUN_SLOW_TESTS"):
        pytest.skip("Skipping slow test")
+
+
+@pytest.fixture(autouse=True, scope="session")
+def setup_delete_old_disk_caches():
+    logger.info("delete_old_disk_caches")
+    delete_old_disk_caches()
@@ -7,6 +7,7 @@ import pytest

 from agentic_security.primitives import Scan
 from agentic_security.probe_actor.fuzzer import (
+    FuzzerState,
    generate_prompts,
    perform_many_shot_scan,
    perform_single_shot_scan,
@@ -207,9 +208,7 @@ class TestProcessPrompt(unittest.IsolatedAsyncioTestCase):
            prompt="test prompt",
            tokens=0,
            module_name="module_a",
-            refusals=[],
-            errors=[],
-            outputs=[],
+            fuzzer_state=FuzzerState(),
        )

        self.assertEqual(tokens, 3)  # Tokens from "Valid response text"
@@ -226,20 +225,17 @@ class TestProcessPrompt(unittest.IsolatedAsyncioTestCase):
            )
        )

-        refusals = []
-        outputs = []
+        fuzzer_state = FuzzerState()
        tokens, refusal = await process_prompt(
            request_factory=mock_request_factory,
            prompt="test prompt",
            tokens=0,
            module_name="module_a",
-            refusals=refusals,
-            errors=[],
-            outputs=outputs,
+            fuzzer_state=fuzzer_state,
        )

        self.assertEqual(tokens, 3)  # Tokens from "Response indicating refusal"
-        self.assertFalse(refusal)
+        # self.assertFalse(fuzzer_state.refusals)

    async def test_http_error_response(self):
        mock_request_factory = Mock()
@@ -252,15 +248,13 @@ class TestProcessPrompt(unittest.IsolatedAsyncioTestCase):
            )
        )

-        refusals = []
+        fuzzer_state = FuzzerState()
        await process_prompt(
            request_factory=mock_request_factory,
            prompt="test prompt",
            tokens=0,
            module_name="module_a",
-            refusals=refusals,
-            errors=[],
-            outputs=[],
+            fuzzer_state=fuzzer_state,
        )

    async def test_request_error(self):
@@ -269,18 +263,14 @@ class TestProcessPrompt(unittest.IsolatedAsyncioTestCase):
            side_effect=httpx.RequestError("Connection error")
        )

-        errors = []
+        fuzzer_state = FuzzerState()
        tokens, refusal = await process_prompt(
            request_factory=mock_request_factory,
            prompt="test prompt",
            tokens=0,
            module_name="module_a",
-            refusals=[],
-            errors=errors,
-            outputs=[],
+            fuzzer_state=fuzzer_state,
        )

        self.assertEqual(tokens, 0)
        self.assertTrue(refusal)
-        self.assertEqual(len(errors), 1)
-        self.assertIn("Connection error", errors[0][3])
@@ -8,7 +8,7 @@ import time
 import pytest

 import agentic_security.test_spec_assets as test_spec_assets
-from agentic_security.lib import AgenticSecurity
+from agentic_security.lib import SecurityScanner


 def has_module(module_name):
@@ -57,7 +57,7 @@ class TestLibraryLevel:
        maxBudget = 1000000
        max_th = 0.3
        datasets = make_test_registry()
-        result = AgenticSecurity.scan(llmSpec, maxBudget, datasets, max_th)
+        result = SecurityScanner.scan(llmSpec, maxBudget, datasets, max_th)
        assert isinstance(result, dict)
        print(result)
        assert len(result) in [0, 1]
@@ -68,7 +68,7 @@ class TestLibraryLevel:
        maxBudget = 1000
        max_th = 0.3
        datasets = make_test_registry()
-        result = AgenticSecurity.scan(
+        result = SecurityScanner.scan(
            llmSpec, maxBudget, datasets, max_th, enableMultiStepAttack=True
        )
        assert isinstance(result, dict)
@@ -93,7 +93,7 @@ class TestLibraryLevel:
                "opts": {"port": 9094},
            },
        ]
-        result = AgenticSecurity.scan(llmSpec, maxBudget, datasets, max_th)
+        result = SecurityScanner.scan(llmSpec, maxBudget, datasets, max_th)
        assert isinstance(result, dict)
        print(result)
        assert len(result) in [0, 1]
@@ -120,7 +120,7 @@ class TestLibraryLevel:
                "modality": "text",
            },
        ]
-        result = AgenticSecurity.scan(llmSpec, maxBudget, datasets, max_th)
+        result = SecurityScanner.scan(llmSpec, maxBudget, datasets, max_th)
        assert isinstance(result, dict)
        print(result)
        assert len(result) in [0, 1]
@@ -148,7 +148,7 @@ class TestLibraryLevel:
                "modality": "text",
            },
        ]
-        result = AgenticSecurity.scan(llmSpec, maxBudget, datasets, max_th)
+        result = SecurityScanner.scan(llmSpec, maxBudget, datasets, max_th)
        assert isinstance(result, dict)
        print(result)
        assert len(result) in [0, 1]
@@ -164,10 +164,10 @@ class TestEntrypointCI:
            temp_path = os.path.join(tmpdir, "custom_agesec.toml")

            # Override default_path to the temporary path
-            AgenticSecurity.default_path = temp_path
+            SecurityScanner.default_path = temp_path

            # Generate the default configuration
-            security = AgenticSecurity()
+            security = SecurityScanner()
            security.generate_default_settings()

            # Check that the config file was created at the temporary path
@@ -189,17 +189,17 @@ class TestEntrypointCI:
            temp_path = os.path.join(tmpdir, "custom_agesec.toml")

            # Override default_path to the temporary path
-            AgenticSecurity.default_path = temp_path
+            SecurityScanner.default_path = temp_path

            # Generate the default configuration
-            security = AgenticSecurity()
+            security = SecurityScanner()
            security.generate_default_settings()

            # Load the generated configuration
-            AgenticSecurity.load_config(temp_path)
+            SecurityScanner.load_config(temp_path)

            # Validate loaded configuration
-            config = AgenticSecurity.config
+            config = SecurityScanner.config
            assert (
                config["general"]["maxBudget"] == 1000000
            ), "maxBudget should be 1000000"
@@ -0,0 +1,12 @@
+import pytest
+
+from agentic_security.mcp.client import run
+
+
+@pytest.mark.asyncio
+async def test_mcp_echo_tool():
+    """Test the echo tool functionality"""
+    prompts, resources, tools = await run()
+    assert prompts
+    assert resources
+    assert tools
@@ -1,7 +1,7 @@
 import pytest
-from datasets import load_dataset

 from agentic_security.probe_data import REGISTRY
+from datasets import load_dataset


@pytest.mark.slow
@@ -4266,9 +4266,9 @@
      }
    },
    "node_modules/compression": {
-      "version": "1.8.0",
-      "resolved": "https://registry.npmjs.org/compression/-/compression-1.8.0.tgz",
-      "integrity": "sha512-k6WLKfunuqCYD3t6AsuPGvQWaKwuLLh2/xHNcX4qE+vIfDNXpSqnrhwA7O53R7WVQUnt8dVAIW+YHr7xTgOgGA==",
+      "version": "1.8.1",
+      "resolved": "https://registry.npmjs.org/compression/-/compression-1.8.1.tgz",
+      "integrity": "sha512-9mAqGPHLakhCLeNyxPkK4xVo746zQ/czLH1Ky+vkitMnWfWZps8r0qXuwhwizagCRttsL4lfG4pIOvaWLpAP0w==",
      "dev": true,
      "license": "MIT",
      "dependencies": {
@@ -4276,7 +4276,7 @@
        "compressible": "~2.0.18",
        "debug": "2.6.9",
        "negotiator": "~0.6.4",
-        "on-headers": "~1.0.2",
+        "on-headers": "~1.1.0",
        "safe-buffer": "5.2.1",
        "vary": "~1.1.2"
      },
@@ -6891,9 +6891,9 @@
      }
    },
    "node_modules/http-proxy-middleware": {
-      "version": "2.0.7",
-      "resolved": "https://registry.npmjs.org/http-proxy-middleware/-/http-proxy-middleware-2.0.7.tgz",
-      "integrity": "sha512-fgVY8AV7qU7z/MmXJ/rxwbrtQH4jBQ9m7kp3llF0liB7glmFeVZFBepQb32T3y8n8k2+AEYuMPCpinYW+/CuRA==",
+      "version": "2.0.9",
+      "resolved": "https://registry.npmjs.org/http-proxy-middleware/-/http-proxy-middleware-2.0.9.tgz",
+      "integrity": "sha512-c1IyJYLYppU574+YI7R4QyX2ystMtVXZwIdzazUIPIJsHuWNd+mho2j+bKoHftndicGj9yh+xjd+l0yj7VeT1Q==",
      "dev": true,
      "license": "MIT",
      "dependencies": {
@@ -8419,9 +8419,9 @@
      }
    },
    "node_modules/on-headers": {
-      "version": "1.0.2",
-      "resolved": "https://registry.npmjs.org/on-headers/-/on-headers-1.0.2.tgz",
-      "integrity": "sha512-pZAE+FJLoyITytdqK0U5s+FIpjN0JP3OzFi/u8Rx+EV5/W+JTWGXG8xFzevE7AjBfDqHv/8vL8qQsIhHnqRkrA==",
+      "version": "1.1.0",
+      "resolved": "https://registry.npmjs.org/on-headers/-/on-headers-1.1.0.tgz",
+      "integrity": "sha512-737ZY3yNnXy37FHkQxPzt4UZ2UWPWiCZWLvFZ4fu5cueciegX0zGPnrlY6bwRg4FdQOe9YU8MkmJwGhoMybl8A==",
      "dev": true,
      "license": "MIT",
      "engines": {