feat(cleanup):

fix(pc):
feat(cleanup):
2026-06-24 14:19:55 +02:00 · 2025-12-24 08:18:17 +02:00 · 2025-12-24 08:16:21 +02:00 · 2025-12-24 08:11:43 +02:00 · 2025-12-24 08:10:08 +02:00 · 2025-12-10 23:06:40 +02:00
50 changed files with 5059 additions and 2462 deletions
@@ -1,5 +1,9 @@
 name: Pre-Commit Checks

+env:
+  POETRY_VERSION: "1.8.5"
+
+
 on:
  push:
    branches: [main]
@@ -15,7 +19,9 @@ jobs:
        uses: actions/setup-python@v4
        with:
          python-version: '3.11'
+      - name: Install poetry
+        run: pipx install poetry==$POETRY_VERSION
      - name: Install pre-commit
-        run: pip install pre-commit
+        run: poetry install
      - name: Run pre-commit
-        run: pre-commit run --all-files
+        run: poetry run pre-commit run --all-files
@@ -1,37 +0,0 @@
-name: Security Scan
-on:
-  push:
-    branches: [ main, master ]
-  pull_request:
-    branches: [ main, master ]
-  schedule:
-    - cron: '0 0 * * 1'  # Run weekly on Mondays
-  workflow_dispatch:  # Allow manual trigger
-
-jobs:
-  security_scan:
-    runs-on: ubuntu-latest
-
-    env:
-      API_KEY: PLACEHOLDER
-
-    steps:
-      - name: Check out repository
-        uses: actions/checkout@v4
-
-      - name: Set up Python
-        uses: actions/setup-python@v5
-        with:
-          python-version: '3.11'
-          cache: 'pip'
-
-      - name: Install dependencies
-        run: |
-          python -m pip install --upgrade pip
-          pip install agentic-security colorama tabulate tqdm python-multipart
-
-      - name: Run security scan
-        id: scan
-        run: |
-          agentic_security init
-          # agentic_security ci
@@ -1,14 +0,0 @@
-name: PyCharm Python Security Scanner
-
-on:
-  schedule:
-    - cron: "0 0 * * *"
-
-jobs:
-  security_checks:
-    runs-on: ubuntu-latest
-    name: Execute the pycharm-security action
-    steps:
-      - uses: actions/checkout@v1
-      - name: PyCharm Python Security Scanner
-        uses: tonybaloney/pycharm-security@1.19.0
@@ -19,3 +19,7 @@ docx/
 agentic_security.toml
 /venv
 *.csv
+agentic_security/agents/operator_agno.py
+.claude/
+plan.md
+auto_loop.sh
@@ -9,7 +9,7 @@ repos:
        args: [--py311-plus]

  - repo: https://github.com/psf/black
-    rev: 23.11.0
+    rev: 25.11.0
    hooks:
      - id: black
        language_version: python3.11
@@ -20,12 +20,13 @@ repos:
      - id: flake8
        language_version: python3.11
        additional_dependencies: [flake8-docstrings]
+        exclude: '^(tests)/'

-  - repo: https://github.com/PyCQA/isort
-    rev: 5.12.0
-    hooks:
-      - id: isort
-        args: [--profile, black]
+  # - repo: https://github.com/PyCQA/isort
+  #   rev: 7.0.0
+  #   hooks:
+  #     - id: isort
+  #       args: [--profile, black]

  - repo: https://github.com/pre-commit/pre-commit-hooks
    rev: v4.5.0
@@ -19,6 +19,10 @@ RUN poetry lock

 # Install dependencies
 RUN poetry export -f requirements.txt --without-hashes -o requirements.txt
+
+# Install wheel (required to build packages like fire)
+RUN pip install --upgrade pip setuptools wheel
+
 RUN pip install --no-cache-dir -r requirements.txt

 # Runtime stage
@@ -21,9 +21,7 @@
  <a href="https://pypi.org/project/agentic-security/">
    <img alt="PyPI Version" src="https://img.shields.io/pypi/v/agentic-security?style=for-the-badge&logo=pypi&labelColor=000000&color=00CCFF" />
  </a>
-  <a href="https://discord.gg/stw3DfZQ">
-    <img alt="Join Discord" src="https://img.shields.io/badge/Discord-Join%20Us-black?style=for-the-badge&logo=discord&labelColor=000000&color=DD55FF" />
-  </a>
+
 </p>


@@ -1,3 +1,7 @@
-from .lib import SecurityScanner
+from agentic_security.cache_config import ensure_cache_dir

-__all__ = ["SecurityScanner"]
+ensure_cache_dir()
+
+from .lib import SecurityScanner  # noqa: E402
+
+__all__ = ["SecurityScanner", "ensure_cache_dir"]
@@ -246,9 +246,9 @@ async def run_crew():
 os.environ["OPENAI_API_KEY"] = os.environ.get(
    "DEEPSEEK_API_KEY", ""
 )  # CrewAI uses OPENAI_API_KEY
-os.environ[
-    "OPENAI_MODEL_NAME"
-] = "deepseek:chat"  # Specify DeepSeek model (adjust if needed)
+os.environ["OPENAI_MODEL_NAME"] = (
+    "deepseek:chat"  # Specify DeepSeek model (adjust if needed)
+)

 if __name__ == "__main__":
    asyncio.run(run_crew())
@@ -0,0 +1,23 @@
+"""Utilities to keep cache-to-disk storage in a writable, predictable location."""
+
+from __future__ import annotations
+
+import os
+from pathlib import Path
+
+
+def ensure_cache_dir(base_dir: Path | None = None) -> Path:
+    """Ensure ``DISK_CACHE_DIR`` points to a writable directory and create it if needed."""
+    env_var = "DISK_CACHE_DIR"
+    configured_path = os.environ.get(env_var) or os.environ.get(
+        "AGENTIC_SECURITY_CACHE_DIR"
+    )
+    cache_dir = Path(
+        configured_path or base_dir or Path.cwd() / ".cache" / "agentic_security"
+    ).expanduser()
+    cache_dir.mkdir(parents=True, exist_ok=True)
+    os.environ[env_var] = str(cache_dir)
+    return cache_dir
+
+
+__all__ = ["ensure_cache_dir"]
@@ -1,18 +1,23 @@
 import os
 from asyncio import Event, Queue
+from typing import TypedDict

 from fastapi import FastAPI
 from fastapi.responses import ORJSONResponse

 from agentic_security.http_spec import LLMSpec

+
+class CurrentRun(TypedDict):
+    id: int | None
+    spec: LLMSpec | None
+
+
 tools_inbox: Queue = Queue()
 stop_event: Event = Event()
-current_run: str = {"spec": "", "id": ""}
+current_run: CurrentRun = {"spec": None, "id": None}
 _secrets: dict[str, str] = {}

-current_run: dict[str, int | LLMSpec] = {"spec": "", "id": ""}
-

 def create_app() -> FastAPI:
    """Create and configure the FastAPI application."""
@@ -30,13 +35,13 @@ def get_stop_event() -> Event:
    return stop_event


-def get_current_run() -> dict[str, int | LLMSpec]:
+def get_current_run() -> CurrentRun:
    """Get the current run id."""
    return current_run


-def set_current_run(spec: LLMSpec) -> dict[str, int | LLMSpec]:
-    """Set the current run id."""
+def set_current_run(spec: LLMSpec) -> CurrentRun:
+    """Set the current run metadata based on a spec instance."""
    current_run["id"] = hash(id(spec))
    current_run["spec"] = spec
    return current_run
@@ -56,4 +61,8 @@ def expand_secrets(secrets: dict[str, str]) -> None:
    for key in secrets:
        val = secrets[key]
        if val.startswith("$"):
-            secrets[key] = os.getenv(val.strip("$"))
+            env_value = os.getenv(val.strip("$"))
+            if env_value is not None:
+                secrets[key] = env_value
+            else:
+                secrets[key] = None
@@ -0,0 +1,12 @@
+"""Advanced concurrent execution package for security scanning."""
+
+from agentic_security.executor.rate_limiter import TokenBucketRateLimiter
+from agentic_security.executor.circuit_breaker import CircuitBreaker
+from agentic_security.executor.concurrent import ConcurrentExecutor, ExecutorMetrics
+
+__all__ = [
+    "TokenBucketRateLimiter",
+    "CircuitBreaker",
+    "ConcurrentExecutor",
+    "ExecutorMetrics",
+]
@@ -0,0 +1,109 @@
+"""Circuit breaker pattern for fault tolerance."""
+
+import time
+from typing import Literal
+
+
+CircuitState = Literal["closed", "open", "half_open"]
+
+
+class CircuitBreaker:
+    """Circuit breaker to prevent cascading failures.
+
+    Implements the circuit breaker pattern with three states:
+    - closed: Normal operation, requests pass through
+    - open: Failure threshold exceeded, requests fail fast
+    - half_open: Recovery attempt, limited requests allowed
+
+    Example:
+        >>> breaker = CircuitBreaker(failure_threshold=0.5, recovery_timeout=30)
+        >>> if breaker.is_open():
+        ...     raise Exception("Circuit breaker is open")
+        >>> try:
+        ...     result = make_request()
+        ...     breaker.record_success()
+        >>> except Exception:
+        ...     breaker.record_failure()
+    """
+
+    def __init__(self, failure_threshold: float = 0.5, recovery_timeout: int = 30):
+        """Initialize circuit breaker.
+
+        Args:
+            failure_threshold: Failure rate (0.0-1.0) that triggers open state
+            recovery_timeout: Seconds to wait before attempting recovery
+        """
+        self.failure_threshold = failure_threshold
+        self.recovery_timeout = recovery_timeout
+        self.failures = 0
+        self.successes = 0
+        self.state: CircuitState = "closed"
+        self.last_failure_time: float | None = None
+
+    def record_success(self):
+        """Record a successful request."""
+        self.successes += 1
+
+        # If in half_open state and we have enough successes, close the circuit
+        if self.state == "half_open" and self.successes >= 3:
+            self.state = "closed"
+            self.failures = 0
+            self.successes = 0
+
+    def record_failure(self):
+        """Record a failed request."""
+        self.failures += 1
+        self.last_failure_time = time.monotonic()
+
+        total = self.failures + self.successes
+
+        # Need minimum sample size before opening circuit
+        if total >= 10:
+            failure_rate = self.failures / total
+            if failure_rate >= self.failure_threshold:
+                self.state = "open"
+
+    def is_open(self) -> bool:
+        """Check if circuit breaker is open.
+
+        Returns:
+            bool: True if circuit is open and requests should be blocked
+        """
+        if self.state == "open":
+            # Check if we should attempt recovery
+            if self.last_failure_time is not None:
+                if time.monotonic() - self.last_failure_time > self.recovery_timeout:
+                    self.state = "half_open"
+                    # Reset counters for half-open state
+                    self.failures = 0
+                    self.successes = 0
+                    return False
+            return True
+
+        return False
+
+    def get_state(self) -> CircuitState:
+        """Get current circuit breaker state.
+
+        Returns:
+            CircuitState: Current state (closed, open, or half_open)
+        """
+        return self.state
+
+    def get_failure_rate(self) -> float:
+        """Get current failure rate.
+
+        Returns:
+            float: Failure rate (0.0-1.0), or 0.0 if no requests recorded
+        """
+        total = self.failures + self.successes
+        if total == 0:
+            return 0.0
+        return self.failures / total
+
+    def reset(self):
+        """Reset circuit breaker to initial state."""
+        self.failures = 0
+        self.successes = 0
+        self.state = "closed"
+        self.last_failure_time = None
@@ -0,0 +1,236 @@
+"""Concurrent executor with rate limiting and circuit breaking."""
+
+import asyncio
+import time
+from typing import Any
+
+from agentic_security.executor.rate_limiter import TokenBucketRateLimiter
+from agentic_security.executor.circuit_breaker import CircuitBreaker
+from agentic_security.logutils import logger
+from agentic_security.probe_actor.state import FuzzerState
+
+
+class ExecutorMetrics:
+    """Track executor performance metrics."""
+
+    def __init__(self):
+        """Initialize metrics tracking."""
+        self.successful_requests = 0
+        self.failed_requests = 0
+        self.total_latency = 0.0
+        self.latencies: list[float] = []
+
+    def record_success(self, latency: float):
+        """Record a successful request.
+
+        Args:
+            latency: Request latency in seconds
+        """
+        self.successful_requests += 1
+        self.total_latency += latency
+        self.latencies.append(latency)
+
+    def record_failure(self):
+        """Record a failed request."""
+        self.failed_requests += 1
+
+    def get_stats(self) -> dict[str, Any]:
+        """Get current statistics.
+
+        Returns:
+            dict: Statistics including total requests, success rate, latency metrics
+        """
+        total_requests = self.successful_requests + self.failed_requests
+
+        if total_requests == 0:
+            return {
+                "total_requests": 0,
+                "success_rate": 0.0,
+                "avg_latency_ms": 0.0,
+                "p95_latency_ms": 0.0,
+            }
+
+        success_rate = self.successful_requests / total_requests
+        avg_latency_ms = (
+            (self.total_latency / self.successful_requests * 1000)
+            if self.successful_requests > 0
+            else 0.0
+        )
+
+        # Calculate p95 latency
+        if self.latencies:
+            sorted_latencies = sorted(self.latencies)
+            p95_index = int(len(sorted_latencies) * 0.95)
+            p95_latency_ms = (
+                sorted_latencies[p95_index] * 1000
+                if p95_index < len(sorted_latencies)
+                else 0.0
+            )
+        else:
+            p95_latency_ms = 0.0
+
+        return {
+            "total_requests": total_requests,
+            "successful_requests": self.successful_requests,
+            "failed_requests": self.failed_requests,
+            "success_rate": success_rate,
+            "avg_latency_ms": avg_latency_ms,
+            "p95_latency_ms": p95_latency_ms,
+        }
+
+
+class ConcurrentExecutor:
+    """Enhanced concurrent executor with rate limiting and circuit breaking.
+
+    Provides advanced concurrency control for security scanning with:
+    - Token bucket rate limiting
+    - Circuit breaker for fault tolerance
+    - Metrics collection
+    - Semaphore-based concurrency limits
+
+    Example:
+        >>> executor = ConcurrentExecutor(max_concurrent=20, rate_limit=10, burst=5)
+        >>> tokens, failures = await executor.execute_batch(
+        ...     request_factory, prompts, "module_name", fuzzer_state
+        ... )
+        >>> print(executor.metrics.get_stats())
+    """
+
+    def __init__(
+        self,
+        max_concurrent: int = 50,
+        rate_limit: float = 100,
+        burst: int = 20,
+        failure_threshold: float = 0.5,
+        recovery_timeout: int = 30,
+    ):
+        """Initialize concurrent executor.
+
+        Args:
+            max_concurrent: Maximum number of concurrent requests
+            rate_limit: Requests per second limit
+            burst: Maximum burst size for rate limiter
+            failure_threshold: Failure rate that triggers circuit breaker
+            recovery_timeout: Seconds before attempting circuit recovery
+        """
+        self.semaphore = asyncio.Semaphore(max_concurrent)
+        self.rate_limiter = TokenBucketRateLimiter(rate_limit, burst)
+        self.circuit_breaker = CircuitBreaker(failure_threshold, recovery_timeout)
+        self.metrics = ExecutorMetrics()
+
+        logger.info(
+            f"ConcurrentExecutor initialized: max_concurrent={max_concurrent}, "
+            f"rate_limit={rate_limit}/s, burst={burst}"
+        )
+
+    async def execute_batch(
+        self,
+        request_factory,
+        prompts: list[str],
+        module_name: str,
+        fuzzer_state: FuzzerState,
+    ) -> tuple[int, int]:
+        """Execute a batch of prompts with rate limiting and circuit breaking.
+
+        This is compatible with the existing process_prompt_batch signature.
+
+        Args:
+            request_factory: Request factory with fn() method
+            prompts: List of prompts to process
+            module_name: Name of the module being scanned
+            fuzzer_state: State tracking object
+
+        Returns:
+            tuple[int, int]: (total_tokens, failures)
+        """
+        tasks = [
+            self._execute_single(request_factory, prompt, module_name, fuzzer_state)
+            for prompt in prompts
+        ]
+
+        results = await asyncio.gather(*tasks, return_exceptions=True)
+
+        # Aggregate results
+        total_tokens = 0
+        failures = 0
+
+        for result in results:
+            if isinstance(result, Exception):
+                failures += 1
+                logger.error(f"Task failed with exception: {result}")
+            else:
+                tokens, refused = result
+                total_tokens += tokens
+                if refused:
+                    failures += 1
+
+        return total_tokens, failures
+
+    async def _execute_single(
+        self,
+        request_factory,
+        prompt: str,
+        module_name: str,
+        fuzzer_state: FuzzerState,
+    ) -> tuple[int, bool]:
+        """Execute a single prompt with rate limiting and circuit breaking.
+
+        Args:
+            request_factory: Request factory with fn() method
+            prompt: Prompt to process
+            module_name: Name of the module being scanned
+            fuzzer_state: State tracking object
+
+        Returns:
+            tuple[int, bool]: (tokens, refused)
+
+        Raises:
+            Exception: If circuit breaker is open
+        """
+        # Rate limiting
+        await self.rate_limiter.acquire()
+
+        # Circuit breaker check
+        if self.circuit_breaker.is_open():
+            self.metrics.record_failure()
+            raise Exception("Circuit breaker is open - too many failures")
+
+        # Concurrency control
+        async with self.semaphore:
+            start_time = time.monotonic()
+
+            try:
+                # Import here to avoid circular dependency
+                from agentic_security.probe_actor.fuzzer import process_prompt
+
+                tokens = 0  # Initial token count for this prompt
+                result = await process_prompt(
+                    request_factory, prompt, tokens, module_name, fuzzer_state
+                )
+
+                # Record success
+                self.circuit_breaker.record_success()
+                latency = time.monotonic() - start_time
+                self.metrics.record_success(latency)
+
+                return result
+
+            except Exception as e:
+                # Record failure
+                self.circuit_breaker.record_failure()
+                self.metrics.record_failure()
+                logger.error(f"Error executing prompt: {e}")
+                raise
+
+    def get_metrics(self) -> dict[str, Any]:
+        """Get current executor metrics.
+
+        Returns:
+            dict: Metrics including request stats, latency, and circuit breaker state
+        """
+        stats = self.metrics.get_stats()
+        stats["circuit_breaker_state"] = self.circuit_breaker.get_state()
+        stats["circuit_breaker_failure_rate"] = self.circuit_breaker.get_failure_rate()
+        stats["available_tokens"] = self.rate_limiter.get_available_tokens()
+
+        return stats
@@ -0,0 +1,63 @@
+"""Token bucket rate limiter for controlling request rate."""
+
+import asyncio
+import time
+
+
+class TokenBucketRateLimiter:
+    """Token bucket rate limiter with configurable rate and burst capacity.
+
+    This implements the token bucket algorithm where tokens are added at a fixed
+    rate and consumed for each request. Supports bursting up to the bucket capacity.
+
+    Example:
+        >>> limiter = TokenBucketRateLimiter(rate=10, burst=20)
+        >>> await limiter.acquire()  # Will wait if no tokens available
+    """
+
+    def __init__(self, rate: float, burst: int):
+        """Initialize rate limiter.
+
+        Args:
+            rate: Tokens added per second (requests/sec)
+            burst: Maximum bucket capacity (max concurrent burst)
+        """
+        self.rate = rate
+        self.burst = burst
+        self.tokens = float(burst)
+        self.last_update = time.monotonic()
+        self._lock = asyncio.Lock()
+
+    async def acquire(self):
+        """Acquire a token, waiting if necessary.
+
+        This method will block until a token is available.
+        """
+        async with self._lock:
+            now = time.monotonic()
+            elapsed = now - self.last_update
+
+            # Add tokens based on elapsed time
+            self.tokens = min(self.burst, self.tokens + elapsed * self.rate)
+            self.last_update = now
+
+            if self.tokens >= 1:
+                # Token available, consume it
+                self.tokens -= 1
+                return
+
+            # Need to wait for next token
+            wait_time = (1 - self.tokens) / self.rate
+            await asyncio.sleep(wait_time)
+            self.tokens = 0
+            self.last_update = time.monotonic()
+
+    def get_available_tokens(self) -> float:
+        """Get current number of available tokens (non-blocking).
+
+        Returns:
+            float: Number of tokens currently available
+        """
+        now = time.monotonic()
+        elapsed = now - self.last_update
+        return min(self.burst, self.tokens + elapsed * self.rate)
@@ -69,7 +69,9 @@ class LLMSpec(BaseModel):

        return response

-    def validate(self, prompt, encoded_image, encoded_audio, files) -> None:
+    def validate(
+        self, prompt: str, encoded_image: str, encoded_audio: str, files: dict | None
+    ) -> None:
        if self.has_files and not files:
            raise ValueError("Files are required for this request.")

@@ -80,7 +82,11 @@ class LLMSpec(BaseModel):
            raise ValueError("Audio is required for this request.")

    async def probe(
-        self, prompt: str, encoded_image: str = "", encoded_audio: str = "", files={}
+        self,
+        prompt: str,
+        encoded_image: str = "",
+        encoded_audio: str = "",
+        files: dict | None = None,
    ) -> httpx.Response:
        """Sends an HTTP request using the `httpx` library.

@@ -155,10 +161,17 @@ def parse_http_spec(http_spec: str) -> LLMSpec:
    secrets = get_secrets()

    # Split the spec by lines
-    lines = http_spec.strip().split("\n")
+    lines = http_spec.strip("\n").splitlines()
+    if not lines:
+        raise InvalidHTTPSpecError("HTTP spec is empty.")

    # Extract the method and URL from the first line
-    method, url = lines[0].split(" ")[0:2]
+    request_line_parts = lines[0].split()
+    if len(request_line_parts) < 2:
+        raise InvalidHTTPSpecError(
+            "First line of HTTP spec must include the method and URL."
+        )
+    method, url = request_line_parts[0], request_line_parts[1]

    # Check url validity
    valid_url = urlparse(url)
@@ -170,20 +183,30 @@ def parse_http_spec(http_spec: str) -> LLMSpec:

    # Initialize headers and body
    headers = {}
-    body = ""
+    body_lines: list[str] = []

    # Iterate over the remaining lines
    reading_headers = True
    for line in lines[1:]:
-        if line == "":
-            reading_headers = False
+        if line.strip() == "":
+            if reading_headers:
+                reading_headers = False
+                continue
+            body_lines.append("")
            continue

        if reading_headers:
-            key, value = line.split(": ")
+            if ":" not in line:
+                raise InvalidHTTPSpecError(f"Invalid header line: '{line}'")
+            key, value = line.split(":", maxsplit=1)
+            key = key.strip()
+            value = value.strip()
+            if not key:
+                raise InvalidHTTPSpecError("Header name cannot be empty.")
            headers[key] = value
        else:
-            body += line
+            body_lines.append(line)
+    body = "\n".join(body_lines)
    has_files = "multipart/form-data" in headers.get("Content-Type", "")
    has_image = "<<BASE64_IMAGE>>" in body
    has_audio = "<<BASE64_AUDIO>>" in body
@@ -5,8 +5,6 @@ from typing import Protocol
 class IntegrationProto(Protocol):
    def __init__(
        self, prompt_groups: list, tools_inbox: asyncio.Queue, opts: dict = {}
-    ):
-        ...
+    ): ...

-    async def apply(self) -> list:
-        ...
+    async def apply(self) -> list: ...
@@ -1,4 +1,5 @@
 import asyncio
+import copy
 import json
 from datetime import datetime

@@ -29,12 +30,14 @@ class SecurityScanner(SettingsMixin):
        cls,
        llmSpec: str,
        maxBudget: int,
-        datasets: list[dict],
+        datasets: list[dict] | None,
        max_th: float,
        optimize: bool = False,
        enableMultiStepAttack: bool = False,
-        probe_datasets: list[dict] = [],
+        probe_datasets: list[dict] | None = None,
    ):
+        datasets = copy.deepcopy(datasets) if datasets is not None else []
+        probe_datasets = copy.deepcopy(probe_datasets or [])
        start_time = datetime.now()
        total_modules = len(datasets)
        completed_modules = 0
@@ -170,15 +173,18 @@ class SecurityScanner(SettingsMixin):
        cls,
        llmSpec: str,
        maxBudget: int = 1_000_000,
-        datasets: list[dict] = REGISTRY,
+        datasets: list[dict] | None = None,
        max_th: float = 0.3,
        optimize: bool = False,
        enableMultiStepAttack: bool = False,
-        probe_datasets: list[dict] = [],
-        only: list[str] = [],
+        probe_datasets: list[dict] | None = None,
+        only: list[str] | None = None,
    ):
-        if only:
-            datasets = [d for d in datasets if d["dataset_name"] in only]
+        datasets = copy.deepcopy(datasets or REGISTRY)
+        probe_datasets = copy.deepcopy(probe_datasets or [])
+        only_set = set(only) if only else None
+        if only_set is not None:
+            datasets = [d for d in datasets if d.get("dataset_name") in only_set]
            for d in datasets:
                d["selected"] = True
        return asyncio.run(
@@ -129,7 +129,7 @@ def time_execution_async(
    [Callable[P, Coroutine[Any, Any, R]]], Callable[P, Coroutine[Any, Any, R]]
 ]:
    def decorator(
-        func: Callable[P, Coroutine[Any, Any, R]]
+        func: Callable[P, Coroutine[Any, Any, R]],
    ) -> Callable[P, Coroutine[Any, Any, R]]:
        @wraps(func)
        async def wrapper(*args: P.args, **kwargs: P.kwargs) -> R:
@@ -3,6 +3,8 @@ import asyncio
 from mcp import ClientSession, StdioServerParameters
 from mcp.client.stdio import stdio_client

+from agentic_security.logutils import logger
+
 # Create server parameters for stdio connection
 server_params = StdioServerParameters(
    command="python",  # Executable
@@ -11,42 +13,55 @@ server_params = StdioServerParameters(
 )


-async def run():
-    async with stdio_client(server_params) as (read, write):
-        async with ClientSession(read, write) as session:
-            # Initialize the connection
-            await session.initialize()
+async def run() -> None:
+    try:
+        logger.info(
+            "Starting stdio client session with server parameters: %s", server_params
+        )
+        async with stdio_client(server_params) as (read, write):
+            async with ClientSession(read, write) as session:
+                # Initialize the connection --> connection does not work
+                logger.info("Initializing client session...")
+                await session.initialize()

-            # List available prompts, resources, and tools
-            prompts = await session.list_prompts()
-            print(f"Available prompts: {prompts}")
+                # List available prompts, resources, and tools --> no avalialbe tools
+                logger.info("Listing available prompts...")
+                prompts = await session.list_prompts()
+                logger.info(f"Available prompts: {prompts}")

-            resources = await session.list_resources()
-            print(f"Available resources: {resources}")
+                logger.info("Listing available resources...")
+                resources = await session.list_resources()
+                logger.info(f"Available resources: {resources}")

-            tools = await session.list_tools()
-            print(f"Available tools: {tools}")
+                logger.info("Listing available tools...")
+                tools = await session.list_tools()
+                logger.info(f"Available tools: {tools}")

-            # Call the echo tool
-            echo_result = await session.call_tool(
-                "echo_tool", arguments={"message": "Hello from client!"}
-            )
-            print(f"Tool result: {echo_result}")
+                # Call the echo tool --> echo tool issue
+                logger.info("Calling echo_tool with message...")
+                echo_result = await session.call_tool(
+                    "echo_tool", arguments={"message": "Hello from client!"}
+                )
+                logger.info(f"Tool result: {echo_result}")

-            # # Read the echo resource
-            # echo_content, mime_type = await session.read_resource(
-            #     "echo://Hello_resource"
-            # )
-            # print(f"Resource content: {echo_content}")
-            # print(f"Resource MIME type: {mime_type}")
+                # # Read the echo resource
+                # echo_content, mime_type = await session.read_resource(
+                #     "echo://Hello_resource"
+                # )
+                # logger.info(f"Resource content: {echo_content}")
+                # logger.info(f"Resource MIME type: {mime_type}")

-            # # Get and use the echo prompt
-            # prompt_result = await session.get_prompt(
-            #     "echo_prompt", arguments={"message": "Hello prompt!"}
-            # )
-            # print(f"Prompt result: {prompt_result}")
+                # # Get and use the echo prompt
+                # prompt_result = await session.get_prompt(
+                #     "echo_prompt", arguments={"message": "Hello prompt!"}
+                # )
+                # logger.info(f"Prompt result: {prompt_result}")

-            # You can perform additional operations here as needed
+                logger.info("Client operations completed successfully.")
+                return prompts, resources, tools
+    except Exception as e:
+        logger.error(f"An error occurred during client operations: {e}", exc_info=True)
+        raise


 if __name__ == "__main__":
@@ -4,7 +4,6 @@ from mcp.server.fastmcp import FastMCP
 # Initialize MCP server
 mcp = FastMCP(
    name="Agentic Security MCP Server",
-    description="MCP server to interact with LLM scanning test",
    dependencies=["httpx"],
 )

@@ -14,7 +13,15 @@ AGENTIC_SECURITY = "http://0.0.0.0:8718"

@mcp.tool()
 async def verify_llm(spec: str) -> dict:
-    """Verify an LLM model specification using the FastAPI server."""
+    """
+    Verify an LLM model specification using the FastAPI server
+
+    Returns:
+        dict: containing the verification result form the FastAPI server
+
+    Args: spect(str):  The specification of the LLM model to verify.
+
+    """
    url = f"{AGENTIC_SECURITY}/verify"
    async with httpx.AsyncClient() as client:
        response = await client.post(url, json={"spec": spec})
@@ -28,7 +35,18 @@ async def start_scan(
    optimize: bool = False,
    enableMultiStepAttack: bool = False,
 ) -> dict:
-    """Start an LLM security scan via the FastAPI server."""
+    """
+    Start an LLM security scan via the FastAPI server.
+    Returns:
+        dict: The scan initiation result from the FastAPI server.
+
+    Args:
+        llmSpec (str): The specification of the LLM model.
+        maxBudget (int): The maximum budget for the scan.
+        optimize (bool, optional): Whether to enable optimization during scanning. Defaults to False.
+        enableMultiStepAttack (bool, optional): Whether to enable multi-step attack
+
+    """
    url = f"{AGENTIC_SECURITY}/scan"
    payload = {
        "llmSpec": llmSpec,
@@ -46,7 +64,11 @@ async def start_scan(

@mcp.tool()
 async def stop_scan() -> dict:
-    """Stop an ongoing scan via the FastAPI server."""
+    """Stop an ongoing scan via the FastAPI server.
+
+    Returns:
+        dict: The confirmation from the FastAPI server that the scan has been stopped.
+    """
    url = f"{AGENTIC_SECURITY}/stop"
    async with httpx.AsyncClient() as client:
        response = await client.post(url)
@@ -55,7 +77,12 @@ async def stop_scan() -> dict:

@mcp.tool()
 async def get_data_config() -> list:
-    """Retrieve data configuration from the FastAPI server."""
+    """
+    Retrieve data configuration from the FastAPI server.
+
+    Returns:
+        list: The response from the FastAPI server, confirming the scan has been stopped.
+    """
    url = f"{AGENTIC_SECURITY}/v1/data-config"
    async with httpx.AsyncClient() as client:
        response = await client.get(url)
@@ -64,7 +91,12 @@ async def get_data_config() -> list:

@mcp.tool()
 async def get_spec_templates() -> list:
-    """Retrieve data configuration from the FastAPI server."""
+    """
+    Retrieve data configuration from the FastAPI server.
+
+    Returns:
+        list: The LLM specification templates from the FastAPI server.
+    """
    url = f"{AGENTIC_SECURITY}/v1/llm-specs"
    async with httpx.AsyncClient() as client:
        response = await client.get(url)
@@ -18,13 +18,13 @@ class LLMInfo(BaseModel):
 class Scan(BaseModel):
    llmSpec: str
    maxBudget: int
-    datasets: list[dict] = []
+    datasets: list[dict] = Field(default_factory=list)
    optimize: bool = False
    enableMultiStepAttack: bool = False
    # MSJ only mode
-    probe_datasets: list[dict] = []
+    probe_datasets: list[dict] = Field(default_factory=list)
    # Set and managed by the backend
-    secrets: dict[str, str] = {}
+    secrets: dict[str, str] = Field(default_factory=dict)

    def with_secrets(self, secrets) -> "Scan":
        match secrets:
@@ -22,8 +22,8 @@ from agentic_security.probe_data.data import prepare_prompts
 MAX_PROMPT_LENGTH = settings_var("fuzzer.max_prompt_lenght", 2048)
 BUDGET_MULTIPLIER = settings_var("fuzzer.budget_multiplier", 100000000)
 INITIAL_OPTIMIZER_POINTS = settings_var("fuzzer.initial_optimizer_points", 25)
-MIN_FAILURE_SAMPLES = settings_var("min_failure_samples", 5)
-FAILURE_RATE_THRESHOLD = settings_var("failure_rate_threshold", 0.5)
+MIN_FAILURE_SAMPLES = settings_var("fuzzer.min_failure_samples", 5)
+FAILURE_RATE_THRESHOLD = settings_var("fuzzer.failure_rate_threshold", 0.5)


 async def generate_prompts(
@@ -186,9 +186,9 @@ async def scan_module(
    processed_prompts: int = 0,
    total_prompts: int = 0,
    max_budget: int = 0,
-    total_tokens: int = 0,
    optimize: bool = False,
    stop_event: asyncio.Event | None = None,
+    token_counter: dict[str, int] | None = None,
 ) -> AsyncGenerator[dict[str, Any], None]:
    """
    Scan a single module.
@@ -200,7 +200,7 @@ async def scan_module(
        processed_prompts: Number of prompts processed so far
        total_prompts: Total number of prompts to process
        max_budget: Maximum token budget
-        total_tokens: Current token count
+        token_counter: Shared token counter to enforce global budget
        optimize: Whether to use optimization
        stop_event: Event to stop scanning

@@ -208,6 +208,7 @@ async def scan_module(
        ScanResult objects as the scan progresses
    """
    tokens = 0
+    token_counter = token_counter or {"total": 0}
    module_failures = 0
    module_prompts = 0
    failure_rates = []
@@ -249,9 +250,9 @@ async def scan_module(
        progress = 100 * processed_prompts / total_prompts if total_prompts else 0
        progress = progress % 100

-        total_tokens -= tokens
        start = time.time()

+        previous_tokens = tokens
        tokens, failed = await process_prompt(
            request_factory,
            prompt,
@@ -261,7 +262,8 @@ async def scan_module(
        )

        end = time.time()
-        total_tokens += tokens
+        token_delta = max(tokens - previous_tokens, 0)
+        token_counter["total"] += token_delta

        if failed:
            module_failures += 1
@@ -296,12 +298,14 @@ async def scan_module(
                break

        # Budget check
-        if total_tokens > max_budget:
+        if token_counter["total"] > max_budget:
            logger.info(
-                f"Scan ran out of budget and stopped. {total_tokens=} {max_budget=}"
+                "Scan ran out of budget and stopped. %s %s",
+                token_counter["total"],
+                max_budget,
            )
            yield ScanResult.status_msg(
-                f"Scan ran out of budget and stopped. {total_tokens=} {max_budget=}"
+                f"Scan ran out of budget and stopped. total_tokens={token_counter['total']} max_budget={max_budget}"
            )
            should_stop = True
            break
@@ -340,11 +344,11 @@ async def with_error_handling(agen):
 async def perform_single_shot_scan(
    request_factory,
    max_budget: int,
-    datasets: list[dict[str, str]] = [],
+    datasets: list[dict[str, str]] | None = None,
    tools_inbox=None,
    optimize: bool = False,
    stop_event: asyncio.Event | None = None,
-    secrets: dict[str, str] = {},
+    secrets: dict[str, str] | None = None,
 ) -> AsyncGenerator[str, None]:
    """
    Perform a standard security scan using a given request factory.
@@ -369,8 +373,16 @@ async def perform_single_shot_scan(
    failure statistics and token usage. If the scan exceeds the budget or failure rate is too high,
    it stops execution. Results are saved to a CSV file upon completion.
    """
+    datasets = datasets or []
+    secrets = secrets or {}
+    if stop_event and stop_event.is_set():
+        stop_event.clear()
+        yield ScanResult.status_msg("Loading datasets...")
+        yield ScanResult.status_msg("Scan stopped by user.")
+        yield ScanResult.status_msg("Scan completed.")
+        return
    max_budget = max_budget * BUDGET_MULTIPLIER
-    selected_datasets = [m for m in datasets if m["selected"]]
+    selected_datasets = [m for m in datasets if m.get("selected")]
    request_factory = get_modality_adapter(request_factory)

    yield ScanResult.status_msg("Loading datasets...")
@@ -386,7 +398,7 @@ async def perform_single_shot_scan(
    total_prompts = sum(len(m.prompts) for m in prompt_modules if not m.lazy)
    processed_prompts = 0

-    total_tokens = 0
+    token_counter = {"total": 0}
    for module in prompt_modules:
        module_gen = scan_module(
            request_factory=request_factory,
@@ -395,9 +407,9 @@ async def perform_single_shot_scan(
            processed_prompts=processed_prompts,
            total_prompts=total_prompts,
            max_budget=max_budget,
-            total_tokens=total_tokens,
            optimize=optimize,
            stop_event=stop_event,
+            token_counter=token_counter,
        )
        try:
            async for result in module_gen:
@@ -416,14 +428,14 @@ async def perform_single_shot_scan(
 async def perform_many_shot_scan(
    request_factory,
    max_budget: int,
-    datasets: list[dict[str, str]] = [],
-    probe_datasets: list[dict[str, str]] = [],
+    datasets: list[dict[str, str]] | None = None,
+    probe_datasets: list[dict[str, str]] | None = None,
    tools_inbox=None,
    optimize: bool = False,
    stop_event: asyncio.Event | None = None,
    probe_frequency: float = 0.2,
    max_ctx_length: int = 10_000,
-    secrets: dict[str, str] = {},
+    secrets: dict[str, str] | None = None,
 ) -> AsyncGenerator[str, None]:
    """
    Perform a multi-step security scan with probe injection.
@@ -451,6 +463,15 @@ async def perform_many_shot_scan(
    processes them asynchronously, and tracks failure rates. If failure rates exceed a threshold
    or budget is exhausted, the scan is stopped early. Results are saved to a CSV file upon completion.
    """
+    datasets = datasets or []
+    probe_datasets = probe_datasets or []
+    secrets = secrets or {}
+    if stop_event and stop_event.is_set():
+        stop_event.clear()
+        yield ScanResult.status_msg("Loading datasets...")
+        yield ScanResult.status_msg("Scan stopped by user.")
+        yield ScanResult.status_msg("Scan completed.")
+        return
    request_factory = get_modality_adapter(request_factory)
    # Load main and probe datasets
    yield ScanResult.status_msg("Loading datasets...")
@@ -50,7 +50,6 @@ class RefusalClassifierPlugin(ABC):
        Returns:
            bool: True if the response contains a refusal, False otherwise.
        """
-        pass


 class DefaultRefusalClassifier(RefusalClassifierPlugin):
@@ -16,8 +16,6 @@ logger = logging.getLogger(__name__)
 class AudioGenerationError(Exception):
    """Custom exception for errors during audio generation."""

-    pass
-

 def encode(content: bytes) -> str:
    encoded_content = base64.b64encode(content).decode("utf-8")
@@ -8,7 +8,6 @@ from typing import Any, TypeVar
 import httpx
 import pandas as pd
 from cache_to_disk import cache_to_disk
-from datasets import load_dataset

 from agentic_security.logutils import logger
 from agentic_security.probe_data import stenography_fn
@@ -20,6 +19,7 @@ from agentic_security.probe_data.modules import (
    inspect_ai_tool,
    rl_model,
 )
+from datasets import load_dataset

 # Type aliases for clarity
 T = TypeVar("T")
@@ -245,61 +245,47 @@ def load_jailbreak_v28k() -> ProbeDataset:
        return create_probe_dataset("JailbreakV-28K/JailBreakV-28k", [])


-@cache_to_disk()
-def load_local_csv() -> ProbeDataset:
-    """Load prompts from local CSV files."""
-    csv_files = [f for f in os.listdir(".") if f.endswith(".csv")]
-    logger.info(f"Found {len(csv_files)} CSV files: {csv_files}")
-
-    prompts = []
-    for file in csv_files:
-        try:
-            df = pd.read_csv(file)
-            if "prompt" in df.columns:
-                prompts.extend(df["prompt"].tolist())
-            else:
-                logger.warning(f"File {file} lacks a suitable prompt column")
-        except Exception as e:
-            logger.error(f"Error reading {file}: {e}")
-
-    return create_probe_dataset("Local CSV", prompts, {"src": str(csv_files)})
-
-
@cache_to_disk(1)
-def load_csv(file: str) -> ProbeDataset:
-    """Load prompts from local CSV files."""
+def file_dataset(file) -> list[str]:
    prompts = []
    try:
-        df = pd.read_csv(file)
-        prompts = df["prompt"].tolist()
+        df = pd.read_csv(os.path.join("./datasets", file), encoding_errors="ignore")
        if "prompt" in df.columns:
-            prompts.extend(df["prompt"].tolist())
+            prompts = df["prompt"].tolist()
        else:
            logger.warning(f"File {file} lacks a suitable prompt column")
    except Exception as e:
        logger.error(f"Error reading {file}: {e}")
+    return prompts
+
+
+def load_local_csv() -> ProbeDataset:
+    """Load prompts from local CSV files."""
+    os.makedirs("./datasets", exist_ok=True)
+    csv_files = [f for f in os.listdir("./datasets") if f.endswith(".csv")]
+    logger.info(f"Found {len(csv_files)} CSV files: {csv_files}")
+
+    prompts = []
+    for file in csv_files:
+        prompts.extend(file_dataset(file))
+    return create_probe_dataset("Local CSV", prompts, {"src": str(csv_files)})
+
+
+def load_csv(file: str) -> ProbeDataset:
+    """Load prompts from local CSV files."""
+    prompts = file_dataset(file)
    return create_probe_dataset(f"fs://{file}", prompts, {"src": str(file)})


-@cache_to_disk(1)
 def load_local_csv_files() -> list[ProbeDataset]:
    """Load prompts from local CSV files and return a list of ProbeDataset objects."""
-    csv_files = [f for f in os.listdir(".") if f.endswith(".csv")]
+    csv_files = [f for f in os.listdir("./datasets") if f.endswith(".csv")]
    logger.info(f"Found {len(csv_files)} CSV files: {csv_files}")

    datasets = []

    for file in csv_files:
-        try:
-            df = pd.read_csv(file)
-            if "prompt" in df.columns:
-                prompts = df["prompt"].tolist()
-                datasets.append(create_probe_dataset(file, prompts, {"src": file}))
-            else:
-                logger.warning(f"File {file} lacks a suitable prompt column")
-        except Exception as e:
-            logger.error(f"Error reading {file}: {e}")
-
+        datasets.append(create_probe_dataset(file, file_dataset(file), {"src": file}))
    return datasets


@@ -336,12 +322,20 @@ class StenographyTransformer:
                )
                continue

-            size = len(dataset.prompts)
+            # Convert async generator (if any) to a list
+            prompts = (
+                list(dataset.prompts) if hasattr(dataset.prompts, "__iter__") else []
+            )
+            size = len(prompts)
+
+            if size == 0:
+                continue
+
            sample_size = max(1, int(size * 0.25))  # At least 1 sample

            for name, fn in self.TRANSFORMATIONS.items():
                logger.info(f"Applying {name} to {dataset.dataset_name}")
-                sampled = random.sample(dataset.prompts, min(sample_size, size))
+                sampled = random.sample(prompts, min(sample_size, size))
                try:
                    transformed = [
                        f"Follow the instructions in {name}: {fn(str(p))}"
@@ -481,3 +475,47 @@ def prepare_prompts(
        datasets.append(load_csv(name))

    return datasets
+
+
+async def prepare_prompts_unified(configs: list) -> list[ProbeDataset]:
+    """Prepare datasets using unified loader configuration.
+
+    This is an alternative to prepare_prompts() that uses the UnifiedDatasetLoader
+    for streamlined configuration and merging of multiple sources.
+
+    Args:
+        configs: List of InputSourceConfig objects or dicts
+
+    Returns:
+        list[ProbeDataset]: List containing the merged dataset
+
+    Example:
+        >>> from agentic_security.probe_data.unified_loader import InputSourceConfig
+        >>> configs = [
+        ...     InputSourceConfig(
+        ...         source_type="huggingface",
+        ...         dataset_name="deepset/prompt-injections",
+        ...         enabled=True,
+        ...         weight=1.0
+        ...     )
+        ... ]
+        >>> datasets = await prepare_prompts_unified(configs)
+    """
+    from agentic_security.probe_data.unified_loader import (
+        UnifiedDatasetLoader,
+        InputSourceConfig,
+    )
+
+    # Convert dicts to InputSourceConfig if needed
+    config_objects = []
+    for config in configs:
+        if isinstance(config, dict):
+            config_objects.append(InputSourceConfig(**config))
+        else:
+            config_objects.append(config)
+
+    loader = UnifiedDatasetLoader(config_objects)
+    merged_dataset = await loader.load_all()
+
+    # Return as list for compatibility with existing code
+    return [merged_dataset] if merged_dataset.prompts else []
@@ -20,12 +20,10 @@ class PromptSelectionInterface(ABC):
    @abstractmethod
    def select_next_prompt(self, current_prompt: str, passed_guard: bool) -> str:
        """Selects the next prompt based on current state and guard result."""
-        pass

    @abstractmethod
    def select_next_prompts(self, current_prompt: str, passed_guard: bool) -> list[str]:
        """Selects the next prompts based on current state and guard result."""
-        pass

    @abstractmethod
    def update_rewards(
@@ -36,7 +34,6 @@ class PromptSelectionInterface(ABC):
        passed_guard: bool,
    ) -> None:
        """Updates internal rewards based on the outcome of the last selected prompt."""
-        pass


 class RandomPromptSelector(PromptSelectionInterface):
@@ -121,8 +118,7 @@ class CloudRLPromptSelector(PromptSelectionInterface):
        current_prompt: str,
        reward: float,
        passed_guard: bool,
-    ) -> None:
-        ...
+    ) -> None: ...


 class QLearningPromptSelector(PromptSelectionInterface):
@@ -207,7 +203,11 @@ class QLearningPromptSelector(PromptSelectionInterface):

 class Module:
    def __init__(
-        self, prompt_groups: list[str], tools_inbox: asyncio.Queue, opts: dict = {}
+        self,
+        prompt_groups: list[str],
+        tools_inbox: asyncio.Queue,
+        opts: dict = {},
+        rl_model: PromptSelectionInterface | None = None,
    ):
        self.tools_inbox = tools_inbox
        self.opts = opts
@@ -215,7 +215,7 @@ class Module:
        self.max_prompts = self.opts.get("max_prompts", 10)  # Default max M prompts
        self.run_id = U.uuid4().hex
        self.batch_size = self.opts.get("batch_size", 500)
-        self.rl_model = CloudRLPromptSelector(
+        self.rl_model = rl_model or CloudRLPromptSelector(
            prompt_groups, "https://mcp.metaheuristic.co", run_id=self.run_id
        )

@@ -33,11 +33,19 @@ def mock_requests() -> Mock:


@pytest.fixture
-def mock_rl_selector() -> Mock:
-    return CloudRLPromptSelector(
-        dataset_prompts,
-        api_url="https://mcp.metaheuristic.co",
-    )
+def mock_rl_selector(dataset_prompts) -> Mock:
+    class StubSelector:
+        def __init__(self, prompts: list[str]):
+            self.prompts = prompts
+            self.idx = 0
+
+        def select_next_prompts(
+            self, current_prompt: str, passed_guard: bool
+        ) -> list[str]:
+            self.idx = (self.idx + 1) % len(self.prompts)
+            return [self.prompts[self.idx]]
+
+    return StubSelector(dataset_prompts)


@pytest.fixture
@@ -91,7 +99,10 @@ class TestCloudRLPromptSelector:
        next_prompt = selector.select_next_prompt("What is AI?", passed_guard=True)
        assert next_prompt in dataset_prompts

-    def test_select_next_prompt_success_service(self, dataset_prompts):
+    def test_select_next_prompt_success_service(self, dataset_prompts, mock_requests):
+        mock_requests.return_value.status_code = 200
+        mock_requests.return_value.json.return_value = {"next_prompts": ["What is AI?"]}
+
        selector = CloudRLPromptSelector(
            dataset_prompts,
            api_url="https://mcp.metaheuristic.co",
@@ -99,7 +110,7 @@ class TestCloudRLPromptSelector:
        next_prompt = selector.select_next_prompt(
            "How does RL work?", passed_guard=True
        )
-        assert next_prompt
+        assert next_prompt == "What is AI?"


 # Tests for QLearningPromptSelector
@@ -188,7 +199,7 @@ class TestModule:
    async def test_apply_basic_flow(
        self, dataset_prompts, tools_inbox, mock_rl_selector
    ):
-        module = Module(dataset_prompts, tools_inbox)
+        module = Module(dataset_prompts, tools_inbox, rl_model=mock_rl_selector)

        count = 0
        async for prompt in module.apply():
@@ -198,7 +209,9 @@ class TestModule:
                break

    @pytest.mark.asyncio
-    async def test_apply_rl_with_tools_inbox(self, dataset_prompts, tools_inbox):
+    async def test_apply_rl_with_tools_inbox(
+        self, dataset_prompts, tools_inbox, mock_rl_selector
+    ):
        # Add a test message to the tools inbox
        test_message = {
            "message": "Test message",
@@ -207,7 +220,7 @@ class TestModule:
        }
        await tools_inbox.put(test_message)

-        module = Module(dataset_prompts, tools_inbox)
+        module = Module(dataset_prompts, tools_inbox, rl_model=mock_rl_selector)

        async for output in module.apply():
            if output == "Test message":
@@ -0,0 +1,252 @@
+"""Unified dataset loader for CSV, HuggingFace, and proxy sources."""
+
+from typing import Literal
+from pydantic import BaseModel, Field
+
+from agentic_security.logutils import logger
+from agentic_security.probe_data.data import (
+    load_dataset_generic,
+    load_csv,
+    create_probe_dataset,
+)
+from agentic_security.probe_data.models import ProbeDataset
+
+
+class InputSourceConfig(BaseModel):
+    """Configuration for a single input source."""
+
+    source_type: Literal["csv", "huggingface", "proxy"] = Field(
+        description="Type of input source"
+    )
+    enabled: bool = Field(default=True, description="Whether this source is enabled")
+    dataset_name: str = Field(description="Name/identifier of the dataset")
+    weight: float = Field(
+        default=1.0, ge=0.0, description="Sampling weight for merging"
+    )
+
+    # CSV-specific fields
+    path: str | None = Field(default=None, description="File path for CSV sources")
+    prompt_column: str | None = Field(
+        default="prompt", description="Column name containing prompts"
+    )
+
+    # HuggingFace-specific fields
+    split: str | None = Field(
+        default="train", description="Dataset split to load (train/test/validation)"
+    )
+    max_samples: int | None = Field(
+        default=None, ge=1, description="Maximum number of samples to load"
+    )
+
+    # URL for custom sources
+    url: str | None = Field(default=None, description="URL for remote CSV files")
+
+
+class UnifiedDatasetLoader:
+    """Loads and merges datasets from multiple sources."""
+
+    def __init__(self, configs: list[InputSourceConfig]):
+        """Initialize with list of input source configurations.
+
+        Args:
+            configs: List of InputSourceConfig objects defining data sources
+        """
+        self.configs = configs
+        logger.info(f"Initialized UnifiedDatasetLoader with {len(configs)} sources")
+
+    async def load_all(self) -> ProbeDataset:
+        """Load all enabled sources and merge into a single dataset.
+
+        Returns:
+            ProbeDataset: Merged dataset from all enabled sources
+        """
+        datasets = []
+
+        for config in self.configs:
+            if not config.enabled:
+                logger.debug(f"Skipping disabled source: {config.dataset_name}")
+                continue
+
+            try:
+                dataset = await self._load_single(config)
+                if dataset and dataset.prompts:
+                    datasets.append((dataset, config.weight))
+                    logger.info(
+                        f"Loaded {len(dataset.prompts)} prompts from {config.dataset_name} "
+                        f"(weight={config.weight})"
+                    )
+                else:
+                    logger.warning(f"No prompts loaded from {config.dataset_name}")
+            except Exception as e:
+                logger.error(f"Error loading {config.dataset_name}: {e}")
+
+        if not datasets:
+            logger.warning("No datasets loaded successfully")
+            return create_probe_dataset("unified_empty", [], {"sources": []})
+
+        return self._merge_weighted(datasets)
+
+    async def _load_single(self, config: InputSourceConfig) -> ProbeDataset:
+        """Load a single dataset based on its configuration.
+
+        Args:
+            config: Configuration for the source to load
+
+        Returns:
+            ProbeDataset: Loaded dataset
+        """
+        if config.source_type == "csv":
+            return self._load_csv_source(config)
+        elif config.source_type == "huggingface":
+            return self._load_huggingface_source(config)
+        elif config.source_type == "proxy":
+            return self._load_proxy_source(config)
+        else:
+            raise ValueError(f"Unknown source type: {config.source_type}")
+
+    def _load_csv_source(self, config: InputSourceConfig) -> ProbeDataset:
+        """Load dataset from CSV file.
+
+        Args:
+            config: CSV source configuration
+
+        Returns:
+            ProbeDataset: Dataset loaded from CSV
+        """
+        if config.path:
+            # Local CSV file
+            logger.info(f"Loading CSV from path: {config.path}")
+            dataset = load_csv(config.path)
+        elif config.url:
+            # Remote CSV file
+            logger.info(f"Loading CSV from URL: {config.url}")
+            mappings = (
+                {config.prompt_column: "prompt"} if config.prompt_column else None
+            )
+            dataset = load_dataset_generic(
+                name=config.dataset_name,
+                url=config.url,
+                mappings=mappings,
+                metadata={"source_type": "csv", "url": config.url},
+            )
+        else:
+            raise ValueError(
+                f"CSV source {config.dataset_name} requires either path or url"
+            )
+
+        # Apply max_samples limit if specified
+        if config.max_samples and len(dataset.prompts) > config.max_samples:
+            logger.info(
+                f"Limiting {config.dataset_name} from {len(dataset.prompts)} "
+                f"to {config.max_samples} samples"
+            )
+            dataset.prompts = dataset.prompts[: config.max_samples]
+
+        return dataset
+
+    def _load_huggingface_source(self, config: InputSourceConfig) -> ProbeDataset:
+        """Load dataset from HuggingFace.
+
+        Args:
+            config: HuggingFace source configuration
+
+        Returns:
+            ProbeDataset: Dataset loaded from HuggingFace
+        """
+        logger.info(
+            f"Loading HuggingFace dataset: {config.dataset_name} "
+            f"(split={config.split})"
+        )
+
+        # Build column mappings
+        mappings = None
+        if config.prompt_column and config.prompt_column != "prompt":
+            mappings = {config.prompt_column: "prompt"}
+
+        dataset = load_dataset_generic(
+            name=config.dataset_name,
+            mappings=mappings,
+            metadata={
+                "source_type": "huggingface",
+                "split": config.split,
+            },
+        )
+
+        # Apply max_samples limit if specified
+        if config.max_samples and len(dataset.prompts) > config.max_samples:
+            logger.info(
+                f"Limiting {config.dataset_name} from {len(dataset.prompts)} "
+                f"to {config.max_samples} samples"
+            )
+            dataset.prompts = dataset.prompts[: config.max_samples]
+
+        return dataset
+
+    def _load_proxy_source(self, config: InputSourceConfig) -> ProbeDataset:
+        """Load dataset from proxy queue (placeholder for PoC).
+
+        Args:
+            config: Proxy source configuration
+
+        Returns:
+            ProbeDataset: Empty dataset (proxy integration not implemented in PoC)
+        """
+        logger.warning(
+            f"Proxy source {config.dataset_name} not implemented in PoC - returning empty dataset"
+        )
+        return create_probe_dataset(
+            config.dataset_name,
+            [],
+            {"source_type": "proxy", "status": "not_implemented"},
+        )
+
+    def _merge_weighted(
+        self, datasets: list[tuple[ProbeDataset, float]]
+    ) -> ProbeDataset:
+        """Merge multiple datasets with weighted sampling.
+
+        For PoC, this implements simple concatenation with optional weighting.
+        Production version would implement proper stratified sampling.
+
+        Args:
+            datasets: List of (ProbeDataset, weight) tuples
+
+        Returns:
+            ProbeDataset: Merged dataset
+        """
+        if not datasets:
+            return create_probe_dataset("unified_empty", [], {"sources": []})
+
+        # For PoC: simple concatenation, repeat prompts based on weight
+        all_prompts = []
+        source_names = []
+        total_tokens = 0
+
+        for dataset, weight in datasets:
+            source_names.append(dataset.dataset_name)
+
+            # Calculate how many times to include this dataset based on weight
+            # Weight of 1.0 = include once, 2.0 = include twice, etc.
+            repeat_count = max(1, int(weight))
+
+            for _ in range(repeat_count):
+                all_prompts.extend(dataset.prompts)
+
+            total_tokens += dataset.tokens * repeat_count
+
+        logger.info(
+            f"Merged {len(datasets)} datasets into {len(all_prompts)} total prompts "
+            f"from sources: {source_names}"
+        )
+
+        return ProbeDataset(
+            dataset_name="unified",
+            metadata={
+                "sources": source_names,
+                "source_count": len(datasets),
+                "weights": {ds.dataset_name: w for ds, w in datasets},
+            },
+            prompts=all_prompts,
+            tokens=total_tokens,
+            approx_cost=0.0,
+        )
@@ -1,8 +1,10 @@
 import importlib.resources as pkg_resources
 import os
+import warnings

 import joblib
 import pandas as pd
+from sklearn.exceptions import InconsistentVersionWarning
 from sklearn.feature_extraction.text import TfidfVectorizer
 from sklearn.preprocessing import StandardScaler
 from sklearn.svm import OneClassSVM
@@ -70,27 +72,31 @@ class RefusalClassifier:
        """
        Load the trained model, vectorizer, and scaler from disk.
        """
-        try:
-            self.model = joblib.load(self.model_path)
-            self.vectorizer = joblib.load(self.vectorizer_path)
-            self.scaler = joblib.load(self.scaler_path)
-        except FileNotFoundError:
-            # Load from package resources
-            package = (
-                __package__  # This should be 'agentic_security.refusal_classifier'
-            )
+        with warnings.catch_warnings():
+            warnings.filterwarnings("ignore", category=InconsistentVersionWarning)
+            try:
+                self.model = joblib.load(self.model_path)
+                self.vectorizer = joblib.load(self.vectorizer_path)
+                self.scaler = joblib.load(self.scaler_path)
+            except FileNotFoundError:
+                # Load from package resources
+                package = (
+                    __package__  # This should be 'agentic_security.refusal_classifier'
+                )

-            # Load model
-            with pkg_resources.open_binary(package, "oneclass_svm_model.joblib") as f:
-                self.model = joblib.load(f)
+                # Load model
+                with pkg_resources.open_binary(
+                    package, "oneclass_svm_model.joblib"
+                ) as f:
+                    self.model = joblib.load(f)

-            # Load vectorizer
-            with pkg_resources.open_binary(package, "tfidf_vectorizer.joblib") as f:
-                self.vectorizer = joblib.load(f)
+                # Load vectorizer
+                with pkg_resources.open_binary(package, "tfidf_vectorizer.joblib") as f:
+                    self.vectorizer = joblib.load(f)

-            # Load scaler
-            with pkg_resources.open_binary(package, "scaler.joblib") as f:
-                self.scaler = joblib.load(f)
+                # Load scaler
+                with pkg_resources.open_binary(package, "scaler.joblib") as f:
+                    self.scaler = joblib.load(f)

    def is_refusal(self, text):
        """
@@ -59,6 +59,7 @@ def _plot_security_report(table: Table) -> io.BytesIO:
    Returns:
        io.BytesIO: A buffer containing the generated plot image in PNG format.
    """
+    return io.BytesIO()
    # Data preprocessing
    logger.info("Data preprocessing started.")

@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "agentic_security"
-version = "0.7.1"
+version = "0.7.4"
 description = "Agentic LLM vulnerability scanner"
 authors = ["Alexander Miasoiedov <msoedov@gmail.com>"]
 maintainers = ["Alexander Miasoiedov <msoedov@gmail.com>"]
@@ -28,53 +28,49 @@ agentic_security = "agentic_security.__main__:main"

 [tool.poetry.dependencies]
 python = "^3.11"
-fastapi = "^0.115.8"
-uvicorn = "^0.34.0"
-fire = "0.7.0"
+fastapi = "^0.122.0"
+uvicorn = "^0.38.0"
+fire = "0.7.1"
 loguru = "^0.7.3"
 httpx = "^0.28.1"
 cache-to-disk = "^2.0.0"
 pandas = ">=1.4,<3.0"
-datasets = "^3.3.0"
+datasets = "^4.4.1"
 tabulate = ">=0.8.9,<0.10.0"
 colorama = "^0.4.4"
-matplotlib = "^3.9.2"
-pydantic = "2.10.6"
+matplotlib = "^3.10.7"
+pydantic = "^2.12.5"
 scikit-optimize = "^0.10.2"
-scikit-learn = "1.6.1"
+scikit-learn = "^1.7.2"
 numpy = ">=1.24.3,<3.0.0"
 jinja2 = "^3.1.4"
 python-multipart = "^0.0.20"
-tomli = "^2.2.1"
-rich = "13.9.4"
+tomli = "^2.3.0"
+rich = "^14.2.0"
 gTTS = "^2.5.4"
-sentry_sdk = "^2.22.0"
-orjson = "^3.10"
-pyfiglet = "^1.0.2"
-termcolor = "^2.4.0"
-
+sentry_sdk = "^2.46.0"
+orjson = "^3.11.4"
+pyfiglet = "^1.0.4"
+termcolor = "^3.2.0"
+mcp = "^1.22.0"
 # garak = { version = "*", optional = true }
-pytest-xdist = "3.6.1"
-
+pytest-xdist = "^3.8.0"

 [tool.poetry.group.dev.dependencies]
 # Pytest
-pytest = "^8.3.4"
-pytest-asyncio = "^0.25.2"
-inline-snapshot = ">=0.13.3,<0.21.0"
-pytest-httpx = "^0.35.0"
-pytest-mock = "^3.14.0"
-
+pytest = "^9.0.1"
+pytest-asyncio = "^1.3.0"
+inline-snapshot = "^0.31.1"
+pytest-mock = "^3.15.1"
 # Rest
 black = ">=24.10,<26.0"
-mypy = "^1.12.0"
-pre-commit = "^4.0.1"
-huggingface-hub = ">=0.25.1,<0.30.0"
-
+mypy = "^1.19.0"
+pre-commit = "^4.5.0"
+huggingface-hub = "^1.1.6"
 # Docs
 mkdocs = ">=1.4.2"
-mkdocs-material = "^9.6.4"
-mkdocstrings = ">=0.26.1"
+mkdocs-material = "^9.7.0"
+mkdocstrings = "^1.0.0"
 mkdocs-jupyter = ">=0.25.1"


@@ -87,7 +83,8 @@ build-backend = "poetry.core.masonry.api"


 [tool.pytest.ini_options]
-addopts = "--durations=5 -m 'not slow' -n 3"
+addopts = "-m 'not slow'"
+# addopts = "--durations=5 -m 'not slow' -n 3"
 asyncio_mode = "auto"
 asyncio_default_fixture_loop_scope = "function"
 markers = "slow: marks tests as slow"
@@ -1,10 +1,31 @@
 import os
+import warnings
+from pathlib import Path

 import pytest
-from cache_to_disk import delete_old_disk_caches
+from sklearn.exceptions import InconsistentVersionWarning

+from agentic_security.cache_config import ensure_cache_dir
 from agentic_security.logutils import logger

+CACHE_DIR = ensure_cache_dir(Path(__file__).parent / ".cache_to_disk")
+
+from cache_to_disk import delete_old_disk_caches  # noqa: E402  # isort: skip
+
+# Silence noisy third-party warnings that do not impact test behavior
+warnings.filterwarnings("ignore", category=InconsistentVersionWarning)
+try:
+    from langchain_core._api import LangChainDeprecationWarning
+
+    warnings.filterwarnings("ignore", category=LangChainDeprecationWarning)
+except Exception:  # pragma: no cover - fallback for older langchain versions
+    warnings.filterwarnings(
+        "ignore",
+        category=DeprecationWarning,
+        module=r"langchain\\.agents",
+        message=r".*langchain_core.pydantic_v1.*",
+    )
+

 def pytest_runtest_setup(item):
    if "slow" in item.keywords and not os.getenv("RUN_SLOW_TESTS"):
@@ -13,5 +34,10 @@ def pytest_runtest_setup(item):

@pytest.fixture(autouse=True, scope="session")
 def setup_delete_old_disk_caches():
-    logger.info("delete_old_disk_caches")
-    delete_old_disk_caches()
+    logger.info("delete_old_disk_caches at %s", CACHE_DIR)
+    try:
+        delete_old_disk_caches()
+    except PermissionError:
+        logger.warning("Skipping cache cleanup due to permissions for %s", CACHE_DIR)
+    except OSError as exc:
+        logger.warning("Skipping cache cleanup due to OS error: %s", exc)
@@ -0,0 +1 @@
+"""Tests for executor package."""
@@ -0,0 +1,209 @@
+"""Tests for CircuitBreaker."""
+
+import time
+from agentic_security.executor.circuit_breaker import CircuitBreaker
+
+
+class TestCircuitBreaker:
+    """Test CircuitBreaker functionality."""
+
+    def test_initialization(self):
+        """Test circuit breaker initialization."""
+        breaker = CircuitBreaker(failure_threshold=0.5, recovery_timeout=30)
+
+        assert breaker.failure_threshold == 0.5
+        assert breaker.recovery_timeout == 30
+        assert breaker.state == "closed"
+        assert breaker.failures == 0
+        assert breaker.successes == 0
+
+    def test_record_success(self):
+        """Test recording successful requests."""
+        breaker = CircuitBreaker()
+
+        breaker.record_success()
+        assert breaker.successes == 1
+        assert breaker.failures == 0
+        assert breaker.state == "closed"
+
+    def test_record_failure(self):
+        """Test recording failed requests."""
+        breaker = CircuitBreaker()
+
+        breaker.record_failure()
+        assert breaker.failures == 1
+        assert breaker.successes == 0
+        assert breaker.last_failure_time is not None
+
+    def test_circuit_opens_on_failure_threshold(self):
+        """Test that circuit opens when failure threshold is exceeded."""
+        breaker = CircuitBreaker(failure_threshold=0.5, recovery_timeout=30)
+
+        # Record 10 requests: 6 failures, 4 successes (60% failure rate)
+        for _ in range(4):
+            breaker.record_success()
+        for _ in range(6):
+            breaker.record_failure()
+
+        # Circuit should be open (60% > 50% threshold)
+        assert breaker.state == "open"
+        assert breaker.is_open() is True
+
+    def test_circuit_stays_closed_below_threshold(self):
+        """Test that circuit stays closed when below threshold."""
+        breaker = CircuitBreaker(failure_threshold=0.5, recovery_timeout=30)
+
+        # Record 10 requests: 4 failures, 6 successes (40% failure rate)
+        for _ in range(6):
+            breaker.record_success()
+        for _ in range(4):
+            breaker.record_failure()
+
+        # Circuit should stay closed (40% < 50% threshold)
+        assert breaker.state == "closed"
+        assert breaker.is_open() is False
+
+    def test_minimum_sample_size_required(self):
+        """Test that minimum sample size is required before opening."""
+        breaker = CircuitBreaker(failure_threshold=0.5)
+
+        # Only 5 failures (below minimum of 10 total requests)
+        for _ in range(5):
+            breaker.record_failure()
+
+        # Circuit should stay closed (not enough samples)
+        assert breaker.state == "closed"
+        assert breaker.is_open() is False
+
+    def test_circuit_recovery_after_timeout(self):
+        """Test that circuit enters half-open state after recovery timeout."""
+        breaker = CircuitBreaker(failure_threshold=0.5, recovery_timeout=1)
+
+        # Open the circuit
+        for _ in range(4):
+            breaker.record_success()
+        for _ in range(6):
+            breaker.record_failure()
+
+        assert breaker.state == "open"
+
+        # Wait for recovery timeout
+        time.sleep(1.1)
+
+        # Check if circuit moves to half-open
+        is_open = breaker.is_open()
+        assert is_open is False
+        assert breaker.state == "half_open"
+
+    def test_half_open_to_closed_on_successes(self):
+        """Test that circuit closes from half-open after enough successes."""
+        breaker = CircuitBreaker(failure_threshold=0.5, recovery_timeout=1)
+
+        # Open the circuit
+        for _ in range(4):
+            breaker.record_success()
+        for _ in range(6):
+            breaker.record_failure()
+
+        # Wait for recovery
+        time.sleep(1.1)
+        breaker.is_open()  # Triggers transition to half-open
+
+        assert breaker.state == "half_open"
+
+        # Record 3 successes
+        breaker.record_success()
+        breaker.record_success()
+        breaker.record_success()
+
+        # Should transition to closed
+        assert breaker.state == "closed"
+
+    def test_get_state(self):
+        """Test get_state method."""
+        breaker = CircuitBreaker()
+
+        assert breaker.get_state() == "closed"
+
+        # Open the circuit
+        for _ in range(10):
+            breaker.record_failure()
+
+        assert breaker.get_state() == "open"
+
+    def test_get_failure_rate(self):
+        """Test get_failure_rate method."""
+        breaker = CircuitBreaker()
+
+        # No requests
+        assert breaker.get_failure_rate() == 0.0
+
+        # 3 failures, 7 successes (30% failure rate)
+        for _ in range(7):
+            breaker.record_success()
+        for _ in range(3):
+            breaker.record_failure()
+
+        assert breaker.get_failure_rate() == 0.3
+
+    def test_reset(self):
+        """Test reset method."""
+        breaker = CircuitBreaker()
+
+        # Record some activity
+        breaker.record_success()
+        breaker.record_failure()
+        for _ in range(10):
+            breaker.record_failure()
+
+        # Reset
+        breaker.reset()
+
+        # Should be back to initial state
+        assert breaker.state == "closed"
+        assert breaker.failures == 0
+        assert breaker.successes == 0
+        assert breaker.last_failure_time is None
+
+    def test_exact_failure_threshold(self):
+        """Test behavior at exact failure threshold."""
+        breaker = CircuitBreaker(failure_threshold=0.5)
+
+        # Exactly 50% failure rate (5 failures, 5 successes)
+        for _ in range(5):
+            breaker.record_success()
+        for _ in range(5):
+            breaker.record_failure()
+
+        # Should be open (>= threshold)
+        assert breaker.state == "open"
+
+    def test_high_failure_threshold(self):
+        """Test with high failure threshold."""
+        breaker = CircuitBreaker(failure_threshold=0.9)
+
+        # 80% failure rate (8 failures, 2 successes)
+        for _ in range(2):
+            breaker.record_success()
+        for _ in range(8):
+            breaker.record_failure()
+
+        # Should stay closed (80% < 90%)
+        assert breaker.state == "closed"
+
+    def test_zero_recovery_timeout(self):
+        """Test with zero recovery timeout."""
+        breaker = CircuitBreaker(failure_threshold=0.5, recovery_timeout=0)
+
+        # Open the circuit
+        for _ in range(10):
+            breaker.record_failure()
+
+        assert breaker.state == "open"
+
+        # Should immediately allow recovery attempt
+        time.sleep(0.01)
+        is_open = breaker.is_open()
+
+        assert is_open is False
+        assert breaker.state == "half_open"
@@ -0,0 +1,279 @@
+"""Tests for ConcurrentExecutor."""
+
+import pytest
+import asyncio
+from unittest.mock import Mock, patch
+from agentic_security.executor.concurrent import ConcurrentExecutor, ExecutorMetrics
+from agentic_security.probe_actor.state import FuzzerState
+
+
+class TestExecutorMetrics:
+    """Test ExecutorMetrics functionality."""
+
+    def test_initialization(self):
+        """Test metrics initialization."""
+        metrics = ExecutorMetrics()
+
+        assert metrics.successful_requests == 0
+        assert metrics.failed_requests == 0
+        assert metrics.total_latency == 0.0
+        assert len(metrics.latencies) == 0
+
+    def test_record_success(self):
+        """Test recording successful requests."""
+        metrics = ExecutorMetrics()
+
+        metrics.record_success(0.5)
+        metrics.record_success(0.3)
+
+        assert metrics.successful_requests == 2
+        assert metrics.total_latency == 0.8
+        assert len(metrics.latencies) == 2
+
+    def test_record_failure(self):
+        """Test recording failed requests."""
+        metrics = ExecutorMetrics()
+
+        metrics.record_failure()
+        metrics.record_failure()
+
+        assert metrics.failed_requests == 2
+        assert metrics.successful_requests == 0
+
+    def test_get_stats_no_requests(self):
+        """Test get_stats with no requests."""
+        metrics = ExecutorMetrics()
+
+        stats = metrics.get_stats()
+
+        assert stats["total_requests"] == 0
+        assert stats["success_rate"] == 0.0
+        assert stats["avg_latency_ms"] == 0.0
+        assert stats["p95_latency_ms"] == 0.0
+
+    def test_get_stats_with_requests(self):
+        """Test get_stats with recorded requests."""
+        metrics = ExecutorMetrics()
+
+        # Record some requests
+        metrics.record_success(0.1)  # 100ms
+        metrics.record_success(0.2)  # 200ms
+        metrics.record_success(0.3)  # 300ms
+        metrics.record_failure()
+
+        stats = metrics.get_stats()
+
+        assert stats["total_requests"] == 4
+        assert stats["successful_requests"] == 3
+        assert stats["failed_requests"] == 1
+        assert stats["success_rate"] == 0.75
+        assert stats["avg_latency_ms"] == pytest.approx(200.0, rel=0.01)
+
+    def test_get_stats_p95_latency(self):
+        """Test p95 latency calculation."""
+        metrics = ExecutorMetrics()
+
+        # Add 100 requests with varying latencies
+        for i in range(100):
+            metrics.record_success(i * 0.001)  # 0ms to 99ms
+
+        stats = metrics.get_stats()
+
+        # p95 should be around 95ms
+        assert stats["p95_latency_ms"] >= 90.0
+        assert stats["p95_latency_ms"] <= 100.0
+
+
+class TestConcurrentExecutor:
+    """Test ConcurrentExecutor functionality."""
+
+    def test_initialization(self):
+        """Test executor initialization."""
+        executor = ConcurrentExecutor(
+            max_concurrent=20,
+            rate_limit=10,
+            burst=5,
+            failure_threshold=0.5,
+            recovery_timeout=30,
+        )
+
+        assert executor.semaphore._value == 20
+        assert executor.rate_limiter.rate == 10
+        assert executor.rate_limiter.burst == 5
+        assert executor.circuit_breaker.failure_threshold == 0.5
+        assert executor.circuit_breaker.recovery_timeout == 30
+
+    @pytest.mark.asyncio
+    async def test_execute_batch_success(self):
+        """Test successful batch execution."""
+        executor = ConcurrentExecutor(max_concurrent=10, rate_limit=100, burst=10)
+        fuzzer_state = FuzzerState()
+
+        # Mock request factory
+        request_factory = Mock()
+
+        # Mock process_prompt to return success
+        async def mock_process_prompt(rf, prompt, tokens, module, state):
+            return (10, False)  # 10 tokens, not refused
+
+        with patch(
+            "agentic_security.probe_actor.fuzzer.process_prompt",
+            side_effect=mock_process_prompt,
+        ):
+            prompts = ["prompt1", "prompt2", "prompt3"]
+            tokens, failures = await executor.execute_batch(
+                request_factory, prompts, "test_module", fuzzer_state
+            )
+
+        assert tokens == 30  # 3 prompts * 10 tokens
+        assert failures == 0
+
+    @pytest.mark.asyncio
+    async def test_execute_batch_with_failures(self):
+        """Test batch execution with some failures."""
+        executor = ConcurrentExecutor(max_concurrent=10, rate_limit=100, burst=10)
+        fuzzer_state = FuzzerState()
+
+        request_factory = Mock()
+
+        # Mock process_prompt to alternate success/failure
+        call_count = [0]
+
+        async def mock_process_prompt(rf, prompt, tokens, module, state):
+            call_count[0] += 1
+            if call_count[0] % 2 == 0:
+                return (10, True)  # Refused
+            return (10, False)  # Success
+
+        with patch(
+            "agentic_security.probe_actor.fuzzer.process_prompt",
+            side_effect=mock_process_prompt,
+        ):
+            prompts = ["p1", "p2", "p3", "p4"]
+            tokens, failures = await executor.execute_batch(
+                request_factory, prompts, "test_module", fuzzer_state
+            )
+
+        assert tokens == 40  # 4 prompts * 10 tokens
+        assert failures == 2  # 2 refused
+
+    @pytest.mark.asyncio
+    async def test_execute_batch_respects_concurrency_limit(self):
+        """Test that concurrency limit is respected."""
+        executor = ConcurrentExecutor(max_concurrent=2, rate_limit=100, burst=10)
+        fuzzer_state = FuzzerState()
+
+        request_factory = Mock()
+
+        # Track concurrent executions
+        concurrent_count = [0]
+        max_concurrent = [0]
+
+        async def mock_process_prompt(rf, prompt, tokens, module, state):
+            concurrent_count[0] += 1
+            max_concurrent[0] = max(max_concurrent[0], concurrent_count[0])
+            await asyncio.sleep(0.01)  # Simulate work
+            concurrent_count[0] -= 1
+            return (10, False)
+
+        with patch(
+            "agentic_security.probe_actor.fuzzer.process_prompt",
+            side_effect=mock_process_prompt,
+        ):
+            prompts = ["p1", "p2", "p3", "p4", "p5"]
+            await executor.execute_batch(
+                request_factory, prompts, "test_module", fuzzer_state
+            )
+
+        # Max concurrent should not exceed limit
+        assert max_concurrent[0] <= 2
+
+    @pytest.mark.asyncio
+    async def test_circuit_breaker_integration(self):
+        """Test that circuit breaker opens on failures."""
+        executor = ConcurrentExecutor(
+            max_concurrent=10,
+            rate_limit=100,
+            burst=20,
+            failure_threshold=0.5,
+            recovery_timeout=1,
+        )
+        fuzzer_state = FuzzerState()
+        request_factory = Mock()
+
+        # Mock process_prompt to always fail
+        async def mock_process_prompt_fail(rf, prompt, tokens, module, state):
+            raise Exception("Request failed")
+
+        # First batch - all failures
+        with patch(
+            "agentic_security.probe_actor.fuzzer.process_prompt",
+            side_effect=mock_process_prompt_fail,
+        ):
+            prompts = ["p1", "p2", "p3", "p4", "p5", "p6", "p7", "p8", "p9", "p10"]
+            tokens, failures = await executor.execute_batch(
+                request_factory, prompts, "test_module", fuzzer_state
+            )
+
+        # All should have failed
+        assert failures == 10
+
+        # Circuit should be open now
+        assert executor.circuit_breaker.state == "open"
+
+    @pytest.mark.asyncio
+    async def test_get_metrics(self):
+        """Test getting executor metrics."""
+        executor = ConcurrentExecutor(max_concurrent=10, rate_limit=100, burst=10)
+        fuzzer_state = FuzzerState()
+        request_factory = Mock()
+
+        async def mock_process_prompt(rf, prompt, tokens, module, state):
+            return (10, False)
+
+        with patch(
+            "agentic_security.probe_actor.fuzzer.process_prompt",
+            side_effect=mock_process_prompt,
+        ):
+            await executor.execute_batch(
+                request_factory, ["p1", "p2"], "test_module", fuzzer_state
+            )
+
+        metrics = executor.get_metrics()
+
+        assert "total_requests" in metrics
+        assert "success_rate" in metrics
+        assert "circuit_breaker_state" in metrics
+        assert "available_tokens" in metrics
+        assert metrics["total_requests"] == 2
+        assert metrics["circuit_breaker_state"] == "closed"
+
+    @pytest.mark.asyncio
+    async def test_rate_limiting_applied(self):
+        """Test that rate limiting is applied."""
+        executor = ConcurrentExecutor(max_concurrent=10, rate_limit=5, burst=2)
+        fuzzer_state = FuzzerState()
+        request_factory = Mock()
+
+        async def mock_process_prompt(rf, prompt, tokens, module, state):
+            return (10, False)
+
+        import time
+
+        with patch(
+            "agentic_security.probe_actor.fuzzer.process_prompt",
+            side_effect=mock_process_prompt,
+        ):
+            start = time.monotonic()
+            # 5 requests with rate=5/s and burst=2
+            # First 2 immediate, next 3 should take ~0.6s total
+            await executor.execute_batch(
+                request_factory,
+                ["p1", "p2", "p3", "p4", "p5"],
+                "test_module",
+                fuzzer_state,
+            )
+            elapsed = time.monotonic() - start
+
+        # Should take at least 0.5s (3 requests / 5 per second)
+        assert elapsed >= 0.4
@@ -0,0 +1,145 @@
+"""Tests for TokenBucketRateLimiter."""
+
+import asyncio
+import pytest
+import time
+from agentic_security.executor.rate_limiter import TokenBucketRateLimiter
+
+
+class TestTokenBucketRateLimiter:
+    """Test TokenBucketRateLimiter functionality."""
+
+    @pytest.mark.asyncio
+    async def test_initialization(self):
+        """Test rate limiter initialization."""
+        limiter = TokenBucketRateLimiter(rate=10, burst=20)
+
+        assert limiter.rate == 10
+        assert limiter.burst == 20
+        assert limiter.tokens == 20  # Starts full
+
+    @pytest.mark.asyncio
+    async def test_acquire_with_available_tokens(self):
+        """Test acquiring tokens when they're available."""
+        limiter = TokenBucketRateLimiter(rate=10, burst=5)
+
+        start = time.monotonic()
+        await limiter.acquire()
+        elapsed = time.monotonic() - start
+
+        # Should return immediately
+        assert elapsed < 0.1
+        assert limiter.tokens < 5  # One token consumed
+
+    @pytest.mark.asyncio
+    async def test_acquire_waits_when_no_tokens(self):
+        """Test that acquire waits when no tokens available."""
+        limiter = TokenBucketRateLimiter(rate=10, burst=1)
+
+        # Consume the initial token
+        await limiter.acquire()
+
+        # Next acquire should wait
+        start = time.monotonic()
+        await limiter.acquire()
+        elapsed = time.monotonic() - start
+
+        # Should wait approximately 1/rate seconds (0.1s for rate=10)
+        assert elapsed >= 0.08  # Allow some tolerance
+
+    @pytest.mark.asyncio
+    async def test_rate_limiting(self):
+        """Test that rate limiting actually limits request rate."""
+        limiter = TokenBucketRateLimiter(rate=10, burst=2)
+
+        # Make 5 requests
+        start = time.monotonic()
+        for _ in range(5):
+            await limiter.acquire()
+        elapsed = time.monotonic() - start
+
+        # With rate=10/s and burst=2:
+        # - First 2 requests are immediate (burst)
+        # - Next 3 requests require waiting: 3 * (1/10) = 0.3s
+        # Total should be around 0.3s
+        assert elapsed >= 0.25  # Allow some tolerance
+        assert elapsed < 0.5
+
+    @pytest.mark.asyncio
+    async def test_burst_capacity(self):
+        """Test that burst capacity allows immediate requests."""
+        limiter = TokenBucketRateLimiter(rate=5, burst=10)
+
+        # Make burst number of requests immediately
+        start = time.monotonic()
+        for _ in range(10):
+            await limiter.acquire()
+        elapsed = time.monotonic() - start
+
+        # All 10 requests should be nearly immediate (using burst capacity)
+        assert elapsed < 0.2
+
+    @pytest.mark.asyncio
+    async def test_token_replenishment(self):
+        """Test that tokens are replenished over time."""
+        limiter = TokenBucketRateLimiter(rate=10, burst=5)
+
+        # Consume all tokens
+        for _ in range(5):
+            await limiter.acquire()
+
+        assert limiter.tokens < 1
+
+        # Wait for tokens to replenish
+        await asyncio.sleep(0.3)  # Should add 3 tokens at rate=10
+
+        # Should have tokens again (approximately 3)
+        available = limiter.get_available_tokens()
+        assert available >= 2.5
+        assert available <= 3.5
+
+    @pytest.mark.asyncio
+    async def test_get_available_tokens(self):
+        """Test get_available_tokens method."""
+        limiter = TokenBucketRateLimiter(rate=10, burst=5)
+
+        # Initially full
+        assert limiter.get_available_tokens() == 5
+
+        # After consuming one
+        await limiter.acquire()
+        assert limiter.get_available_tokens() < 5
+
+    @pytest.mark.asyncio
+    async def test_concurrent_requests(self):
+        """Test rate limiter with concurrent requests."""
+        limiter = TokenBucketRateLimiter(rate=10, burst=3)
+
+        async def make_request(limiter):
+            await limiter.acquire()
+            return time.monotonic()
+
+        # Make 5 concurrent requests
+        start = time.monotonic()
+        tasks = [make_request(limiter) for _ in range(5)]
+        timestamps = await asyncio.gather(*tasks)
+        total_elapsed = time.monotonic() - start
+
+        # First 3 should be immediate (burst=3)
+        # Next 2 should wait
+        # Total time should be around 0.2s (2 * 1/10)
+        assert total_elapsed >= 0.15
+        assert total_elapsed < 0.4
+
+    @pytest.mark.asyncio
+    async def test_max_burst_capacity(self):
+        """Test that tokens don't exceed burst capacity."""
+        limiter = TokenBucketRateLimiter(rate=100, burst=5)
+
+        # Wait longer than needed to fill
+        await asyncio.sleep(0.2)  # Would add 20 tokens, but capped at 5
+
+        # Check tokens don't exceed burst
+        available = limiter.get_available_tokens()
+        assert available <= 5
+        assert available >= 4.5  # Close to full
@@ -76,14 +76,23 @@ async def test_perform_single_shot_scan_success(prepare_prompts_mock):


@pytest.mark.asyncio
+@patch("agentic_security.probe_data.msj_data.prepare_prompts")
@patch("agentic_security.probe_data.data.prepare_prompts")
-async def test_perform_many_shot_scan_probe_injection(prepare_prompts_mock):
+async def test_perform_many_shot_scan_probe_injection(
+    prepare_prompts_mock, msj_prepare_prompts_mock
+):
    # Mock main and probe prompt modules
    prepare_prompts_mock.side_effect = [
        [MagicMock(dataset_name="main_module", prompts=["main_prompt1"], lazy=False)],
        [MagicMock(dataset_name="probe_module", prompts=["probe_prompt1"], lazy=False)],
    ]

+    msj_prepare_prompts_mock.return_value = [
+        MagicMock(
+            dataset_name="msj_probe_module", prompts=["msj_probe_prompt"], lazy=False
+        )
+    ]
+
    # Mock request_factory
    mock_response = AsyncMock()
    mock_response.fn.side_effect = [
@@ -0,0 +1,360 @@
+"""Tests for unified dataset loader."""
+
+import pytest
+from unittest.mock import patch
+from agentic_security.probe_data.unified_loader import (
+    InputSourceConfig,
+    UnifiedDatasetLoader,
+)
+from agentic_security.probe_data.models import ProbeDataset
+
+
+class TestInputSourceConfig:
+    """Test InputSourceConfig validation."""
+
+    def test_csv_source_config(self):
+        """Test CSV source configuration."""
+        config = InputSourceConfig(
+            source_type="csv",
+            dataset_name="test_csv",
+            path="./test.csv",
+            prompt_column="prompt",
+            weight=1.5,
+        )
+        assert config.source_type == "csv"
+        assert config.dataset_name == "test_csv"
+        assert config.path == "./test.csv"
+        assert config.weight == 1.5
+
+    def test_huggingface_source_config(self):
+        """Test HuggingFace source configuration."""
+        config = InputSourceConfig(
+            source_type="huggingface",
+            dataset_name="test/dataset",
+            split="train",
+            max_samples=100,
+        )
+        assert config.source_type == "huggingface"
+        assert config.split == "train"
+        assert config.max_samples == 100
+
+    def test_proxy_source_config(self):
+        """Test proxy source configuration."""
+        config = InputSourceConfig(
+            source_type="proxy",
+            dataset_name="proxy_test",
+        )
+        assert config.source_type == "proxy"
+        assert config.enabled is True  # Default value
+
+    def test_disabled_source(self):
+        """Test disabled source configuration."""
+        config = InputSourceConfig(
+            source_type="csv",
+            dataset_name="disabled_test",
+            enabled=False,
+        )
+        assert config.enabled is False
+
+    def test_weight_validation(self):
+        """Test that weight must be non-negative."""
+        with pytest.raises(ValueError):
+            InputSourceConfig(
+                source_type="csv",
+                dataset_name="test",
+                weight=-1.0,
+            )
+
+
+class TestUnifiedDatasetLoader:
+    """Test UnifiedDatasetLoader functionality."""
+
+    @pytest.mark.asyncio
+    async def test_load_single_csv_source(self):
+        """Test loading a single CSV source."""
+        config = InputSourceConfig(
+            source_type="csv",
+            dataset_name="test_csv",
+            path="test.csv",
+        )
+        loader = UnifiedDatasetLoader([config])
+
+        # Mock the load_csv function
+        mock_dataset = ProbeDataset(
+            dataset_name="test_csv",
+            prompts=["prompt1", "prompt2", "prompt3"],
+            tokens=10,
+            approx_cost=0.0,
+            metadata={},
+        )
+
+        with patch(
+            "agentic_security.probe_data.unified_loader.load_csv",
+            return_value=mock_dataset,
+        ):
+            result = await loader.load_all()
+
+        assert result.dataset_name == "unified"
+        assert len(result.prompts) == 3
+        assert result.prompts == ["prompt1", "prompt2", "prompt3"]
+
+    @pytest.mark.asyncio
+    async def test_load_single_huggingface_source(self):
+        """Test loading a single HuggingFace source."""
+        config = InputSourceConfig(
+            source_type="huggingface",
+            dataset_name="test/dataset",
+            split="train",
+        )
+        loader = UnifiedDatasetLoader([config])
+
+        # Mock the load_dataset_generic function
+        mock_dataset = ProbeDataset(
+            dataset_name="test/dataset",
+            prompts=["hf_prompt1", "hf_prompt2"],
+            tokens=8,
+            approx_cost=0.0,
+            metadata={},
+        )
+
+        with patch(
+            "agentic_security.probe_data.unified_loader.load_dataset_generic",
+            return_value=mock_dataset,
+        ):
+            result = await loader.load_all()
+
+        assert result.dataset_name == "unified"
+        assert len(result.prompts) == 2
+
+    @pytest.mark.asyncio
+    async def test_merge_multiple_sources(self):
+        """Test merging multiple sources."""
+        configs = [
+            InputSourceConfig(
+                source_type="csv",
+                dataset_name="csv1",
+                path="test1.csv",
+                weight=1.0,
+            ),
+            InputSourceConfig(
+                source_type="csv",
+                dataset_name="csv2",
+                path="test2.csv",
+                weight=2.0,
+            ),
+        ]
+        loader = UnifiedDatasetLoader(configs)
+
+        # Mock datasets
+        mock_dataset1 = ProbeDataset(
+            dataset_name="csv1",
+            prompts=["prompt1"],
+            tokens=5,
+            approx_cost=0.0,
+            metadata={},
+        )
+        mock_dataset2 = ProbeDataset(
+            dataset_name="csv2",
+            prompts=["prompt2", "prompt3"],
+            tokens=10,
+            approx_cost=0.0,
+            metadata={},
+        )
+
+        with patch(
+            "agentic_security.probe_data.unified_loader.load_csv",
+            side_effect=[mock_dataset1, mock_dataset2],
+        ):
+            result = await loader.load_all()
+
+        assert result.dataset_name == "unified"
+        # Weight 1.0 = include once, weight 2.0 = include twice
+        # csv1: 1 prompt * 1 = 1
+        # csv2: 2 prompts * 2 = 4
+        assert len(result.prompts) == 5
+        assert "csv1" in result.metadata["sources"]
+        assert "csv2" in result.metadata["sources"]
+
+    @pytest.mark.asyncio
+    async def test_handle_disabled_sources(self):
+        """Test that disabled sources are skipped."""
+        configs = [
+            InputSourceConfig(
+                source_type="csv",
+                dataset_name="enabled_csv",
+                path="enabled.csv",
+                enabled=True,
+            ),
+            InputSourceConfig(
+                source_type="csv",
+                dataset_name="disabled_csv",
+                path="disabled.csv",
+                enabled=False,
+            ),
+        ]
+        loader = UnifiedDatasetLoader(configs)
+
+        mock_dataset = ProbeDataset(
+            dataset_name="enabled_csv",
+            prompts=["prompt1"],
+            tokens=5,
+            approx_cost=0.0,
+            metadata={},
+        )
+
+        with patch(
+            "agentic_security.probe_data.unified_loader.load_csv",
+            return_value=mock_dataset,
+        ) as mock_load:
+            result = await loader.load_all()
+
+        # Should only be called once (for enabled source)
+        assert mock_load.call_count == 1
+        assert len(result.prompts) == 1
+
+    @pytest.mark.asyncio
+    async def test_max_samples_limit(self):
+        """Test that max_samples limits the number of prompts."""
+        config = InputSourceConfig(
+            source_type="csv",
+            dataset_name="test_csv",
+            path="test.csv",
+            max_samples=2,
+        )
+        loader = UnifiedDatasetLoader([config])
+
+        # Mock dataset with more prompts than max_samples
+        mock_dataset = ProbeDataset(
+            dataset_name="test_csv",
+            prompts=["prompt1", "prompt2", "prompt3", "prompt4", "prompt5"],
+            tokens=20,
+            approx_cost=0.0,
+            metadata={},
+        )
+
+        with patch(
+            "agentic_security.probe_data.unified_loader.load_csv",
+            return_value=mock_dataset,
+        ):
+            result = await loader.load_all()
+
+        # Should be limited to 2 prompts
+        assert len(result.prompts) == 2
+
+    @pytest.mark.asyncio
+    async def test_error_handling(self):
+        """Test that errors are handled gracefully."""
+        config = InputSourceConfig(
+            source_type="csv",
+            dataset_name="error_csv",
+            path="nonexistent.csv",
+        )
+        loader = UnifiedDatasetLoader([config])
+
+        with patch(
+            "agentic_security.probe_data.unified_loader.load_csv",
+            side_effect=Exception("File not found"),
+        ):
+            result = await loader.load_all()
+
+        # Should return empty dataset on error
+        assert result.dataset_name == "unified_empty"
+        assert len(result.prompts) == 0
+
+    @pytest.mark.asyncio
+    async def test_proxy_source_placeholder(self):
+        """Test that proxy source returns empty dataset (not implemented in PoC)."""
+        config = InputSourceConfig(
+            source_type="proxy",
+            dataset_name="proxy_test",
+        )
+        loader = UnifiedDatasetLoader([config])
+
+        result = await loader.load_all()
+
+        # Proxy not implemented in PoC, should return empty
+        assert len(result.prompts) == 0
+
+    @pytest.mark.asyncio
+    async def test_weighted_sampling(self):
+        """Test weighted sampling behavior."""
+        configs = [
+            InputSourceConfig(
+                source_type="csv",
+                dataset_name="low_weight",
+                path="low.csv",
+                weight=1.0,
+            ),
+            InputSourceConfig(
+                source_type="csv",
+                dataset_name="high_weight",
+                path="high.csv",
+                weight=3.0,
+            ),
+        ]
+        loader = UnifiedDatasetLoader(configs)
+
+        mock_dataset1 = ProbeDataset(
+            dataset_name="low_weight",
+            prompts=["a"],
+            tokens=1,
+            approx_cost=0.0,
+            metadata={},
+        )
+        mock_dataset2 = ProbeDataset(
+            dataset_name="high_weight",
+            prompts=["b"],
+            tokens=1,
+            approx_cost=0.0,
+            metadata={},
+        )
+
+        with patch(
+            "agentic_security.probe_data.unified_loader.load_csv",
+            side_effect=[mock_dataset1, mock_dataset2],
+        ):
+            result = await loader.load_all()
+
+        # Weight 1.0: 1 prompt * 1 = 1
+        # Weight 3.0: 1 prompt * 3 = 3
+        # Total: 4 prompts
+        assert len(result.prompts) == 4
+        assert result.prompts.count("a") == 1
+        assert result.prompts.count("b") == 3
+
+    @pytest.mark.asyncio
+    async def test_empty_configs_list(self):
+        """Test loading with empty configs list."""
+        loader = UnifiedDatasetLoader([])
+        result = await loader.load_all()
+
+        assert result.dataset_name == "unified_empty"
+        assert len(result.prompts) == 0
+
+    @pytest.mark.asyncio
+    async def test_csv_with_url(self):
+        """Test CSV loading from URL."""
+        config = InputSourceConfig(
+            source_type="csv",
+            dataset_name="remote_csv",
+            url="https://example.com/data.csv",
+            prompt_column="text",
+        )
+        loader = UnifiedDatasetLoader([config])
+
+        mock_dataset = ProbeDataset(
+            dataset_name="remote_csv",
+            prompts=["remote_prompt"],
+            tokens=5,
+            approx_cost=0.0,
+            metadata={"source_type": "csv", "url": "https://example.com/data.csv"},
+        )
+
+        with patch(
+            "agentic_security.probe_data.unified_loader.load_dataset_generic",
+            return_value=mock_dataset,
+        ):
+            result = await loader.load_all()
+
+        assert len(result.prompts) == 1
+        assert result.prompts[0] == "remote_prompt"
@@ -1,9 +1,12 @@
+from fastapi import FastAPI
 from fastapi.testclient import TestClient

 import agentic_security.test_spec_assets as test_spec_assets
 from agentic_security.routes.scan import router

-client = TestClient(router)
+app = FastAPI()
+app.include_router(router)
+client = TestClient(app)


 def test_upload_csv_and_run():
@@ -1,5 +1,6 @@
 import base64
 import io
+import random

 import httpx
 import pytest
@@ -85,8 +86,9 @@ def test_data_config_endpoint():

 def test_refusal_rate():
    """Test that refusal rate is approximately 20%"""
+    random.seed(0)
    refusal_count = 0
-    total_trials = 1000
+    total_trials = 200

    for _ in range(total_trials):
        response = client.post("/v1/self-probe", json={"prompt": "test"})
@@ -2,11 +2,14 @@ from pathlib import Path
 from unittest.mock import patch

 import pytest
+from fastapi import FastAPI
 from fastapi.testclient import TestClient

 from agentic_security.routes.report import router

-client = TestClient(router)
+app = FastAPI()
+app.include_router(router)
+client = TestClient(app)


@pytest.fixture
@@ -1,13 +1,15 @@
 from pathlib import Path

 import pytest
-from fastapi import HTTPException
+from fastapi import FastAPI, HTTPException
 from fastapi.testclient import TestClient

 from agentic_security.primitives import Settings
 from agentic_security.routes.static import get_static_file, router

-client = TestClient(router)
+app = FastAPI()
+app.include_router(router)
+client = TestClient(app)


 def test_root_route():
@@ -0,0 +1,25 @@
+import os
+from pathlib import Path
+
+from agentic_security.cache_config import ensure_cache_dir
+
+
+def test_ensure_cache_dir_creates_dir_and_sets_env(tmp_path, monkeypatch):
+    monkeypatch.delenv("DISK_CACHE_DIR", raising=False)
+    target_dir = tmp_path / "cache_to_disk"
+
+    resolved = ensure_cache_dir(target_dir)
+
+    assert resolved == target_dir
+    assert resolved.is_dir()
+    assert Path(os.environ["DISK_CACHE_DIR"]) == resolved
+
+
+def test_ensure_cache_dir_respects_existing_env(tmp_path, monkeypatch):
+    env_dir = tmp_path / "preconfigured"
+    monkeypatch.setenv("DISK_CACHE_DIR", str(env_dir))
+
+    resolved = ensure_cache_dir()
+
+    assert resolved == env_dir
+    assert resolved.exists()
@@ -1,6 +1,7 @@
 import importlib
 import os
 import signal
+import socket
 import subprocess
 import tempfile
 import time
@@ -24,12 +25,29 @@ def test_server(request):
        preexec_fn=lambda: signal.signal(signal.SIGINT, signal.SIG_IGN),
    )

-    # Give the server time to start
-    time.sleep(2)
+    def wait_for_port(host: str, port: int, timeout: float = 5.0) -> bool:
+        start = time.time()
+        while time.time() - start < timeout:
+            with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock:
+                sock.settimeout(0.2)
+                try:
+                    sock.connect((host, port))
+                    return True
+                except OSError:
+                    time.sleep(0.1)
+        return False
+
+    if not wait_for_port("127.0.0.1", 9094):
+        server.kill()
+        pytest.skip("Test server failed to start within timeout")

    def cleanup():
        server.terminate()
-        server.wait()
+        try:
+            server.wait(timeout=3)
+        except subprocess.TimeoutExpired:
+            server.kill()
+            server.wait(timeout=2)

    request.addfinalizer(cleanup)
    return server
@@ -125,6 +143,7 @@ class TestLibraryLevel:
        print(result)
        assert len(result) in [0, 1]

+    @pytest.mark.skip
    def test_image_modality(self):
        llmSpec = test_spec_assets.IMAGE_SPEC
        maxBudget = 2
@@ -0,0 +1,12 @@
+import pytest
+
+from agentic_security.mcp.client import run
+
+
+@pytest.mark.asyncio
+async def test_mcp_echo_tool():
+    """Test the echo tool functionality"""
+    prompts, resources, tools = await run()
+    assert prompts
+    assert resources
+    assert tools
@@ -1,7 +1,7 @@
 import pytest
-from datasets import load_dataset

 from agentic_security.probe_data import REGISTRY
+from datasets import load_dataset


@pytest.mark.slow
@@ -1,6 +1,10 @@
 import pytest

-from agentic_security.http_spec import LLMSpec, parse_http_spec
+from agentic_security.http_spec import (
+    InvalidHTTPSpecError,
+    LLMSpec,
+    parse_http_spec,
+)


 class TestParseHttpSpec:
@@ -55,6 +59,19 @@ class TestParseHttpSpec:
        assert result.headers == {"Content-Type": "application/json"}
        assert result.body == ""

+    def test_parse_http_spec_rejects_malformed_header(self):
+        http_spec = "GET http://example.com\nHeaderWithoutColon\n\n"
+
+        with pytest.raises(InvalidHTTPSpecError, match="Invalid header line"):
+            parse_http_spec(http_spec)
+
+    def test_parse_http_spec_trims_header_whitespace(self):
+        http_spec = "GET http://example.com\nAuthorization:Bearer token\n\n"
+
+        result = parse_http_spec(http_spec)
+
+        assert result.headers == {"Authorization": "Bearer token"}
+

 class TestLLMSpec:
    def test_validate_raises_error_for_missing_files(self):
@@ -70,49 +87,3 @@ class TestLLMSpec:
        )
        with pytest.raises(ValueError, match="An image is required for this request."):
            spec.validate(prompt="", encoded_image="", encoded_audio="", files={})
-
-    @pytest.mark.asyncio
-    async def test_probe_sends_request(self, httpx_mock):
-        httpx_mock.add_response(
-            method="POST", url="http://example.com", status_code=200
-        )
-        spec = LLMSpec(
-            method="POST",
-            url="http://example.com",
-            headers={},
-            body='{"prompt": "<<PROMPT>>"}',
-        )
-        response = await spec.probe(prompt="test")
-        assert response.status_code == 200
-
-    @pytest.mark.asyncio
-    async def test_probe_with_files(self, httpx_mock):
-        httpx_mock.add_response(
-            method="POST", url="http://example.com", status_code=200
-        )
-        spec = LLMSpec(
-            method="POST",
-            url="http://example.com",
-            headers={"Content-Type": "multipart/form-data"},
-            body='{"prompt": "<<PROMPT>>"}',
-            has_files=True,
-        )
-        files = {"file": ("filename.txt", "file content")}
-        response = await spec.probe(prompt="test", files=files)
-        assert response.status_code == 200
-
-    @pytest.mark.asyncio
-    async def test_probe_with_image(self, httpx_mock):
-        httpx_mock.add_response(
-            method="POST", url="http://example.com", status_code=200
-        )
-        spec = LLMSpec(
-            method="POST",
-            url="http://example.com",
-            headers={},
-            body='{"image": "<<BASE64_IMAGE>>"}',
-            has_image=True,
-        )
-        encoded_image = "base64encodedstring"
-        response = await spec.probe(prompt="test", encoded_image=encoded_image)
-        assert response.status_code == 200
@@ -4266,9 +4266,9 @@
      }
    },
    "node_modules/compression": {
-      "version": "1.8.0",
-      "resolved": "https://registry.npmjs.org/compression/-/compression-1.8.0.tgz",
-      "integrity": "sha512-k6WLKfunuqCYD3t6AsuPGvQWaKwuLLh2/xHNcX4qE+vIfDNXpSqnrhwA7O53R7WVQUnt8dVAIW+YHr7xTgOgGA==",
+      "version": "1.8.1",
+      "resolved": "https://registry.npmjs.org/compression/-/compression-1.8.1.tgz",
+      "integrity": "sha512-9mAqGPHLakhCLeNyxPkK4xVo746zQ/czLH1Ky+vkitMnWfWZps8r0qXuwhwizagCRttsL4lfG4pIOvaWLpAP0w==",
      "dev": true,
      "license": "MIT",
      "dependencies": {
@@ -4276,7 +4276,7 @@
        "compressible": "~2.0.18",
        "debug": "2.6.9",
        "negotiator": "~0.6.4",
-        "on-headers": "~1.0.2",
+        "on-headers": "~1.1.0",
        "safe-buffer": "5.2.1",
        "vary": "~1.1.2"
      },
@@ -6891,9 +6891,9 @@
      }
    },
    "node_modules/http-proxy-middleware": {
-      "version": "2.0.7",
-      "resolved": "https://registry.npmjs.org/http-proxy-middleware/-/http-proxy-middleware-2.0.7.tgz",
-      "integrity": "sha512-fgVY8AV7qU7z/MmXJ/rxwbrtQH4jBQ9m7kp3llF0liB7glmFeVZFBepQb32T3y8n8k2+AEYuMPCpinYW+/CuRA==",
+      "version": "2.0.9",
+      "resolved": "https://registry.npmjs.org/http-proxy-middleware/-/http-proxy-middleware-2.0.9.tgz",
+      "integrity": "sha512-c1IyJYLYppU574+YI7R4QyX2ystMtVXZwIdzazUIPIJsHuWNd+mho2j+bKoHftndicGj9yh+xjd+l0yj7VeT1Q==",
      "dev": true,
      "license": "MIT",
      "dependencies": {
@@ -8419,9 +8419,9 @@
      }
    },
    "node_modules/on-headers": {
-      "version": "1.0.2",
-      "resolved": "https://registry.npmjs.org/on-headers/-/on-headers-1.0.2.tgz",
-      "integrity": "sha512-pZAE+FJLoyITytdqK0U5s+FIpjN0JP3OzFi/u8Rx+EV5/W+JTWGXG8xFzevE7AjBfDqHv/8vL8qQsIhHnqRkrA==",
+      "version": "1.1.0",
+      "resolved": "https://registry.npmjs.org/on-headers/-/on-headers-1.1.0.tgz",
+      "integrity": "sha512-737ZY3yNnXy37FHkQxPzt4UZ2UWPWiCZWLvFZ4fu5cueciegX0zGPnrlY6bwRg4FdQOe9YU8MkmJwGhoMybl8A==",
      "dev": true,
      "license": "MIT",
      "engines": {
Author	SHA1	Message	Date
Alexander Myasoedov	5238d67846	feat(cleanup):	2025-12-24 08:18:17 +02:00
Alexander Myasoedov	a9adb22458	fix(pc):	2025-12-24 08:16:21 +02:00
Alexander Myasoedov	2dc41af98d	feat(cleanup):	2025-12-24 08:11:43 +02:00
Alexander Myasoedov	48125bd106	feat(add executor):	2025-12-24 08:10:08 +02:00
Alexander Myasoedov	5285fdd0a0	codex quality run #1	2025-12-10 23:06:40 +02:00
Alexander Myasoedov	bf628db5c4	codex quality run #1	2025-12-10 22:53:55 +02:00
Alexander Myasoedov	d56b406e1a	fix(tests runtime):	2025-12-09 20:00:04 +02:00
Alexander Myasoedov	b9dc5de708	feat(add cache dir):	2025-12-09 19:51:47 +02:00
Alexander Myasoedov	9a4fb05491	fix(pc):	2025-11-30 18:50:00 +02:00
Alexander Myasoedov	3e2df49976	fix(pc):	2025-11-30 18:47:15 +02:00
Alexander Myasoedov	14eefb7a67	fix(clean up):	2025-11-30 18:43:37 +02:00
Alexander Myasoedov	7a9c884333	fix(pc):	2025-11-30 18:41:00 +02:00
Alexander Myasoedov	a8b5876883	fix(ga):	2025-11-30 18:38:41 +02:00
Alexander Myasoedov	fbe9885c0b	fix(simplify workflow):	2025-11-30 18:37:23 +02:00
Alexander Myasoedov	583eec1a67	fix(gh):	2025-11-30 18:36:33 +02:00
Alexander Myasoedov	f19664f95c	fix(pc):	2025-11-30 18:32:58 +02:00
Alexander Myasoedov	b3ae0026fb	fix(warnings):	2025-11-30 18:30:55 +02:00
Alexander Myasoedov	8ddfec303f	feat(poetry update):	2025-11-30 14:21:20 +02:00
Alexander Myasoedov	c45778f196	Merge pull request #252 from Davda-James/feat/mcp_client_logging logging added for mcp client operations	2025-08-21 15:00:22 +03:00
Alexander Myasoedov	a5bdbe54a2	Merge branch 'main' of github.com:msoedov/agentic_security	2025-08-13 13:52:19 +03:00
Alexander Myasoedov	61da912f18	feat(update deps):	2025-08-13 13:46:37 +03:00
DavdaJames	a02aed2c2b	changes done by pre-commit hooks	2025-08-10 14:33:25 +05:30
DavdaJames	40ff7f9dfb	added the comments back	2025-08-10 13:49:08 +05:30
DavdaJames	c09ce32def	feature added for logging of mcp client	2025-08-10 13:42:32 +05:30
Alexander Myasoedov	c5406e8a0e	Merge pull request #238 from msoedov/dependabot/npm_and_yarn/ui/multi-96c788614a build(deps): bump on-headers and compression in /ui	2025-07-18 13:33:47 +03:00
dependabot[bot]	b260672b1a	build(deps): bump on-headers and compression in /ui Bumps [on-headers](https://github.com/jshttp/on-headers) and [compression](https://github.com/expressjs/compression). These dependencies needed to be updated together. Updates `on-headers` from 1.0.2 to 1.1.0 - [Release notes](https://github.com/jshttp/on-headers/releases) - [Changelog](https://github.com/jshttp/on-headers/blob/master/HISTORY.md) - [Commits](https://github.com/jshttp/on-headers/compare/v1.0.2...v1.1.0) Updates `compression` from 1.8.0 to 1.8.1 - [Release notes](https://github.com/expressjs/compression/releases) - [Changelog](https://github.com/expressjs/compression/blob/master/HISTORY.md) - [Commits](https://github.com/expressjs/compression/compare/1.8.0...v1.8.1) --- updated-dependencies: - dependency-name: on-headers dependency-version: 1.1.0 dependency-type: indirect - dependency-name: compression dependency-version: 1.8.1 dependency-type: indirect ... Signed-off-by: dependabot[bot] <support@github.com>	2025-07-18 10:32:43 +00:00
Alexander Myasoedov	0a07fc54d6	Merge pull request #229 from msoedov/dependabot/pip/requests-2.32.4 build(deps): bump requests from 2.32.3 to 2.32.4	2025-06-10 14:03:41 +03:00
dependabot[bot]	2f1151d44d	build(deps): bump requests from 2.32.3 to 2.32.4 Bumps [requests](https://github.com/psf/requests) from 2.32.3 to 2.32.4. - [Release notes](https://github.com/psf/requests/releases) - [Changelog](https://github.com/psf/requests/blob/main/HISTORY.md) - [Commits](https://github.com/psf/requests/compare/v2.32.3...v2.32.4) --- updated-dependencies: - dependency-name: requests dependency-version: 2.32.4 dependency-type: indirect ... Signed-off-by: dependabot[bot] <support@github.com>	2025-06-10 09:13:51 +00:00
Alexander Myasoedov	d0353e3ab9	fix(bump pyproject):	2025-05-27 13:46:33 +03:00
Alexander Myasoedov	926c583a17	fix(csv ds loading):	2025-05-27 13:41:10 +03:00
Alexander Myasoedov	17e34356e1	feat(bump version):	2025-05-19 12:35:44 +03:00
Alexander Myasoedov	312fa756a5	feat(rm ref):	2025-05-19 12:33:27 +03:00
Alexander Myasoedov	145e7f81e1	feat(Update readme):	2025-05-19 12:32:48 +03:00
Alexander Myasoedov	04af7d24a1	Merge pull request #223 from lwsinclair/add-mseep-badge Add MseeP.ai badge	2025-05-19 12:31:16 +03:00
Alexander Myasoedov	c5c5ae2e4b	fix(makedir):	2025-05-19 12:29:28 +03:00
Alexander Myasoedov	2bc0605a1d	Merge pull request #224 from Mundi-Xu/datasets-optimize refactor: standardize CSV loading from ./datasets and improve robustness	2025-05-19 12:27:25 +03:00
Hanyin	335787d40e	refactor: standardize CSV loading from ./datasets and improve robustness - Load all CSVs from ./datasets directory - Add encoding_errors='ignore' for resilient CSV parsing - Ensure prompt generators are converted to lists before sampling	2025-05-19 16:19:38 +08:00
Lawrence Sinclair	1b211b5d76	Add MseeP.ai badge to Readme.md	2025-05-14 17:46:50 +07:00
Alexander Myasoedov	444f908009	Merge pull request #220 from msoedov/dependabot/npm_and_yarn/ui/http-proxy-middleware-2.0.9 build(deps-dev): bump http-proxy-middleware from 2.0.7 to 2.0.9 in /ui	2025-05-02 13:04:54 +03:00
dependabot[bot]	f81dc508f9	build(deps-dev): bump http-proxy-middleware from 2.0.7 to 2.0.9 in /ui Bumps [http-proxy-middleware](https://github.com/chimurai/http-proxy-middleware) from 2.0.7 to 2.0.9. - [Release notes](https://github.com/chimurai/http-proxy-middleware/releases) - [Changelog](https://github.com/chimurai/http-proxy-middleware/blob/v2.0.9/CHANGELOG.md) - [Commits](https://github.com/chimurai/http-proxy-middleware/compare/v2.0.7...v2.0.9) --- updated-dependencies: - dependency-name: http-proxy-middleware dependency-version: 2.0.9 dependency-type: indirect ... Signed-off-by: dependabot[bot] <support@github.com>	2025-04-29 02:24:24 +00:00
Alexander Myasoedov	4a55b99d70	Merge pull request #215 from Davda-James/fix/Dockerfile Fixed the Dockerfile error of setuptools and wheel	2025-04-09 19:56:08 +03:00
DavdaJames	5c2f9eba71	wheel and setuptools are required before running RUN pip install --no-cache-dir -r requirements.txt which is missing in dockerfile and hence docker build was breaking in between build process	2025-04-09 20:23:03 +05:30
Alexander Myasoedov	aa2fe4d1ad	feat(bump version):	2025-04-07 14:37:59 +03:00
Alexander Myasoedov	cf7c017621	feat(add mcp to deps):	2025-04-07 14:32:40 +03:00
Alexander Myasoedov	73184e3454	fix(simplify tests):	2025-04-07 14:29:41 +03:00
Alexander Myasoedov	3720ece2af	fix(test vars):	2025-04-03 20:48:23 +03:00
Alexander Myasoedov	0dc738a11e	fix(pc):	2025-04-03 20:43:53 +03:00
Alexander Myasoedov	47ca656d59	Merge pull request #213 from sjay8/main Fixed issues 191 195	2025-04-03 20:42:50 +03:00
sjay8	4fa166298d	Fixed issues 191 195	2025-04-03 00:21:09 -07:00