Add env-configurable timeout for proxy providers

feat: seed governance config and responses routing
2026-02-12 21:52:47 +00:00 · 2025-10-21 14:26:34 +02:00 · 2025-10-18 15:52:59 +02:00
10 changed files with 1134 additions and 32 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -188,6 +188,10 @@ logs/
 # Docker volume configs (keep .env.example but ignore actual .env)
 volumes/env/.env

+# Vendored proxy sources (kept locally for reference)
+ai/proxy/bifrost/
+ai/proxy/litellm/
+
 # Test project databases and configurations
 test_projects/*/.fuzzforge/
 test_projects/*/findings.db*
@@ -300,4 +304,4 @@ test_projects/*/.npmrc
 test_projects/*/.git-credentials
 test_projects/*/credentials.*
 test_projects/*/api_keys.*
-test_projects/*/ci-*.sh
+test_projects/*/ci-*.sh
--- a/ai/agents/task_agent/.env.example
+++ b/ai/agents/task_agent/.env.example
@@ -1,10 +1,17 @@
-# Default LiteLLM configuration
-LITELLM_MODEL=gemini/gemini-2.0-flash-001
-# LITELLM_PROVIDER=gemini
+# Default LiteLLM configuration routed through the proxy
+LITELLM_MODEL=openai/gpt-4o-mini
+LITELLM_PROVIDER=openai

-# API keys (uncomment and fill as needed)
-# GOOGLE_API_KEY=
-# OPENAI_API_KEY=
-# ANTHROPIC_API_KEY=
-# OPENROUTER_API_KEY=
-# MISTRAL_API_KEY=
+# Proxy connection (override when running locally without Docker networking)
+# Use http://localhost:10999 when accessing from the host
+FF_LLM_PROXY_BASE_URL=http://llm-proxy:8080
+
+# Virtual key issued by Bifrost or LiteLLM proxy for the task agent (bootstrap replaces the placeholder)
+OPENAI_API_KEY=sk-proxy-default
+
+# Upstream provider keys live inside the proxy container
+# BIFROST_OPENAI_KEY=
+# BIFROST_ANTHROPIC_KEY=
+# BIFROST_GEMINI_KEY=
+# BIFROST_MISTRAL_KEY=
+# BIFROST_OPENROUTER_KEY=
--- a/ai/agents/task_agent/README.md
+++ b/ai/agents/task_agent/README.md
@@ -43,18 +43,30 @@ cd task_agent
 # cp .env.example .env
 ```

-Edit `.env` (or `.env.example`) and add your API keys. The agent must be restarted after changes so the values are picked up:
+Edit `.env` (or `.env.example`) and add your proxy + API keys. The agent must be restarted after changes so the values are picked up:
 ```bash
-# Set default model
-LITELLM_MODEL=gemini/gemini-2.0-flash-001
+# Route every request through the proxy container (use http://localhost:10999 from the host)
+FF_LLM_PROXY_BASE_URL=http://llm-proxy:8080

-# Add API keys for providers you want to use
-GOOGLE_API_KEY=your_google_api_key
-OPENAI_API_KEY=your_openai_api_key
-ANTHROPIC_API_KEY=your_anthropic_api_key
-OPENROUTER_API_KEY=your_openrouter_api_key
+# Default model + provider the agent boots with
+LITELLM_MODEL=openai/gpt-4o-mini
+LITELLM_PROVIDER=openai
+
+# Virtual key issued by the proxy to the task agent (bootstrap replaces the placeholder)
+OPENAI_API_KEY=sk-proxy-default
+
+# Upstream keys stay inside the proxy (Bifrost config references env.BIFROST_* names)
+BIFROST_OPENAI_KEY=your_real_openai_api_key
+BIFROST_ANTHROPIC_KEY=your_real_anthropic_key
+BIFROST_GEMINI_KEY=your_real_gemini_key
+BIFROST_MISTRAL_KEY=your_real_mistral_key
+BIFROST_OPENROUTER_KEY=your_real_openrouter_key
 ```

+> When running the agent outside of Docker, swap `FF_LLM_PROXY_BASE_URL` to the host port (default `http://localhost:10999`).
+
+The compose bootstrap container provisions the Bifrost gateway, creates a virtual key for `fuzzforge-task-agent`, and rewrites `volumes/env/.env`. Fill in the `BIFROST_*` upstream secrets before the first launch so the proxy can reach your providers when the bootstrap script runs.
+
 ### 2. Install Dependencies

 ```bash
--- a/ai/agents/task_agent/litellm_agent/config.py
+++ b/ai/agents/task_agent/litellm_agent/config.py
@@ -4,13 +4,28 @@ from __future__ import annotations

 import os

+
+def _normalize_proxy_base_url(raw_value: str | None) -> str | None:
+    if not raw_value:
+        return None
+    cleaned = raw_value.strip()
+    if not cleaned:
+        return None
+    # Avoid double slashes in downstream requests
+    return cleaned.rstrip("/")
+
 AGENT_NAME = "litellm_agent"
 AGENT_DESCRIPTION = (
    "A LiteLLM-backed shell that exposes hot-swappable model and prompt controls."
 )

-DEFAULT_MODEL = os.getenv("LITELLM_MODEL", "gemini-2.0-flash-001")
-DEFAULT_PROVIDER = os.getenv("LITELLM_PROVIDER")
+DEFAULT_MODEL = os.getenv("LITELLM_MODEL", "openai/gpt-4o-mini")
+DEFAULT_PROVIDER = os.getenv("LITELLM_PROVIDER") or None
+PROXY_BASE_URL = _normalize_proxy_base_url(
+    os.getenv("FF_LLM_PROXY_BASE_URL")
+    or os.getenv("LITELLM_API_BASE")
+    or os.getenv("LITELLM_BASE_URL")
+)

 STATE_PREFIX = "app:litellm_agent/"
 STATE_MODEL_KEY = f"{STATE_PREFIX}model"
--- a/ai/agents/task_agent/litellm_agent/state.py
+++ b/ai/agents/task_agent/litellm_agent/state.py
@@ -3,11 +3,15 @@
 from __future__ import annotations

 from dataclasses import dataclass
+import os
 from typing import Any, Mapping, MutableMapping, Optional

+import httpx
+
 from .config import (
    DEFAULT_MODEL,
    DEFAULT_PROVIDER,
+    PROXY_BASE_URL,
    STATE_MODEL_KEY,
    STATE_PROMPT_KEY,
    STATE_PROVIDER_KEY,
@@ -66,11 +70,109 @@ class HotSwapState:
        """Create a LiteLlm instance for the current state."""

        from google.adk.models.lite_llm import LiteLlm  # Lazy import to avoid cycle
+        from google.adk.models.lite_llm import LiteLLMClient
+        from litellm.types.utils import Choices, Message, ModelResponse, Usage

        kwargs = {"model": self.model}
        if self.provider:
            kwargs["custom_llm_provider"] = self.provider
-        return LiteLlm(**kwargs)
+        if PROXY_BASE_URL:
+            provider = (self.provider or DEFAULT_PROVIDER or "").lower()
+            if provider and provider != "openai":
+                kwargs["api_base"] = f"{PROXY_BASE_URL.rstrip('/')}/{provider}"
+            else:
+                kwargs["api_base"] = PROXY_BASE_URL
+        kwargs.setdefault("api_key", os.environ.get("OPENAI_API_KEY"))
+
+        provider = (self.provider or DEFAULT_PROVIDER or "").lower()
+        model_suffix = self.model.split("/", 1)[-1]
+        use_responses = provider == "openai" and (
+            model_suffix.startswith("gpt-5") or model_suffix.startswith("o1")
+        )
+        if use_responses:
+            kwargs.setdefault("use_responses_api", True)
+
+        llm = LiteLlm(**kwargs)
+
+        if use_responses and PROXY_BASE_URL:
+
+            class _ResponsesAwareClient(LiteLLMClient):
+                def __init__(self, base_client: LiteLLMClient, api_base: str, api_key: str):
+                    self._base_client = base_client
+                    self._api_base = api_base.rstrip("/")
+                    self._api_key = api_key
+
+                async def acompletion(self, model, messages, tools, **kwargs):  # type: ignore[override]
+                    use_responses_api = kwargs.pop("use_responses_api", False)
+                    if not use_responses_api:
+                        return await self._base_client.acompletion(
+                            model=model,
+                            messages=messages,
+                            tools=tools,
+                            **kwargs,
+                        )
+
+                    resolved_model = model
+                    if "/" not in resolved_model:
+                        resolved_model = f"openai/{resolved_model}"
+
+                    payload = {
+                        "model": resolved_model,
+                        "input": _messages_to_responses_input(messages),
+                    }
+
+                    timeout = kwargs.get("timeout", 60)
+                    headers = {
+                        "Authorization": f"Bearer {self._api_key}",
+                        "Content-Type": "application/json",
+                    }
+
+                    async with httpx.AsyncClient(timeout=timeout) as client:
+                        response = await client.post(
+                            f"{self._api_base}/v1/responses",
+                            json=payload,
+                            headers=headers,
+                        )
+                        try:
+                            response.raise_for_status()
+                        except httpx.HTTPStatusError as exc:
+                            text = exc.response.text
+                            raise RuntimeError(
+                                f"Bifrost responses request failed: {text}"
+                            ) from exc
+                        data = response.json()
+
+                    text_output = _extract_output_text(data)
+                    usage = data.get("usage", {})
+
+                    return ModelResponse(
+                        id=data.get("id"),
+                        model=model,
+                        choices=[
+                            Choices(
+                                finish_reason="stop",
+                                index=0,
+                                message=Message(role="assistant", content=text_output),
+                                provider_specific_fields={"bifrost_response": data},
+                            )
+                        ],
+                        usage=Usage(
+                            prompt_tokens=usage.get("input_tokens"),
+                            completion_tokens=usage.get("output_tokens"),
+                            reasoning_tokens=usage.get("output_tokens_details", {}).get(
+                                "reasoning_tokens"
+                            ),
+                            total_tokens=usage.get("total_tokens"),
+                        ),
+                    )
+
+            llm.llm_client = _ResponsesAwareClient(
+                llm.llm_client,
+                PROXY_BASE_URL,
+                os.environ.get("OPENAI_API_KEY", ""),
+            )
+
+        return llm

    @property
    def display_model(self) -> str:
@@ -84,3 +186,69 @@ def apply_state_to_agent(invocation_context, state: HotSwapState) -> None:

    agent = invocation_context.agent
    agent.model = state.instantiate_llm()
+
+
+def _messages_to_responses_input(messages: list[dict[str, Any]]) -> list[dict[str, Any]]:
+    inputs: list[dict[str, Any]] = []
+    for message in messages:
+        role = message.get("role", "user")
+        content = message.get("content", "")
+        text_segments: list[str] = []
+
+        if isinstance(content, list):
+            for item in content:
+                if isinstance(item, dict):
+                    text = item.get("text") or item.get("content")
+                    if text:
+                        text_segments.append(str(text))
+                elif isinstance(item, str):
+                    text_segments.append(item)
+        elif isinstance(content, str):
+            text_segments.append(content)
+
+        text = "\n".join(segment.strip() for segment in text_segments if segment)
+        if not text:
+            continue
+
+        entry_type = "input_text"
+        if role == "assistant":
+            entry_type = "output_text"
+
+        inputs.append(
+            {
+                "role": role,
+                "content": [
+                    {
+                        "type": entry_type,
+                        "text": text,
+                    }
+                ],
+            }
+        )
+
+    if not inputs:
+        inputs.append(
+            {
+                "role": "user",
+                "content": [
+                    {
+                        "type": "input_text",
+                        "text": "",
+                    }
+                ],
+            }
+        )
+    return inputs
+
+
+def _extract_output_text(response_json: dict[str, Any]) -> str:
+    outputs = response_json.get("output", [])
+    collected: list[str] = []
+    for item in outputs:
+        if isinstance(item, dict) and item.get("type") == "message":
+            for part in item.get("content", []):
+                if isinstance(part, dict) and part.get("type") == "output_text":
+                    text = part.get("text", "")
+                    if text:
+                        collected.append(str(text))
+    return "\n\n".join(collected).strip()
--- a/ai/proxy/README.md
+++ b/ai/proxy/README.md
@@ -0,0 +1,5 @@
+# LLM Proxy Integrations
+
+This directory contains vendor source trees that were vendored only for reference when integrating LLM gateways. The actual FuzzForge deployment uses the official Docker images for each project.
+
+See `docs/docs/how-to/llm-proxy.md` for up-to-date instructions on running the proxy services and issuing keys for the agents.
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -146,6 +146,78 @@ services:
    networks:
      - fuzzforge-network

+  # ============================================================================
+  # LLM Proxy (Default) - Bifrost Gateway
+  # ============================================================================
+  llm-proxy:
+    image: maximhq/bifrost:latest
+    container_name: fuzzforge-llm-proxy
+    env_file:
+      - ./volumes/env/.env
+    environment:
+      APP_HOST: 0.0.0.0
+      APP_PORT: 8080
+      LOG_STYLE: pretty
+      OPENAI_API_KEY: ""
+    ports:
+      - "10999:8080"   # Web UI + OpenAI-compatible API
+    volumes:
+      - llm_proxy_data:/app/data
+    networks:
+      - fuzzforge-network
+    restart: unless-stopped
+
+  # ============================================================================
+  # LLM Proxy Bootstrap - Seed providers and virtual keys
+  # ============================================================================
+  llm-proxy-bootstrap:
+    image: python:3.11-slim
+    container_name: fuzzforge-llm-proxy-bootstrap
+    depends_on:
+      llm-proxy:
+        condition: service_started
+    env_file:
+      - ./volumes/env/.env
+    environment:
+      PROXY_BASE_URL: http://llm-proxy:8080
+      ENV_FILE_PATH: /bootstrap/env/.env
+      CONFIG_FILE_PATH: /bootstrap/data/config.json
+    volumes:
+      - ./docker/scripts/bootstrap_llm_proxy.py:/app/bootstrap.py:ro
+      - ./volumes/env:/bootstrap/env
+      - llm_proxy_data:/bootstrap/data
+    networks:
+      - fuzzforge-network
+    command: ["python", "/app/bootstrap.py"]
+    restart: "no"
+
+  # ============================================================================
+  # LLM Proxy (Alternative) - LiteLLM Gateway
+  # ============================================================================
+  llm-proxy-litellm:
+    image: ghcr.io/berriai/litellm:main-stable
+    container_name: fuzzforge-llm-proxy-litellm
+    profiles:
+      - proxy-litellm
+    env_file:
+      - ./volumes/env/.env
+    environment:
+      DATABASE_URL: sqlite:////var/lib/litellm/litellm.db
+      STORE_MODEL_IN_DB: "True"
+    ports:
+      - "4110:4000"   # HTTP API + UI
+    volumes:
+      - litellm_proxy_data:/var/lib/litellm
+    networks:
+      - fuzzforge-network
+    healthcheck:
+      test: ["CMD-SHELL", "wget --no-verbose --tries=1 http://localhost:4000/health/liveliness || exit 1"]
+      interval: 30s
+      timeout: 10s
+      retries: 3
+      start_period: 40s
+    restart: unless-stopped
+
  # ============================================================================
  # Vertical Worker: Rust/Native Security
  # ============================================================================
@@ -460,6 +532,9 @@ services:
      context: ./ai/agents/task_agent
      dockerfile: Dockerfile
    container_name: fuzzforge-task-agent
+    depends_on:
+      llm-proxy-bootstrap:
+        condition: service_completed_successfully
    ports:
      - "10900:8000"
    env_file:
@@ -560,6 +635,10 @@ volumes:
    name: fuzzforge_worker_ossfuzz_cache
  worker_ossfuzz_build:
    name: fuzzforge_worker_ossfuzz_build
+  llm_proxy_data:
+    name: fuzzforge_llm_proxy_data
+  litellm_proxy_data:
+    name: fuzzforge_litellm_proxy_data
  # Add more worker caches as you add verticals:
  # worker_web_cache:
  # worker_ios_cache:
@@ -593,6 +672,8 @@ networks:
 # 4. Web UIs:
 #    - Temporal UI: http://localhost:8233
 #    - MinIO Console: http://localhost:9001 (user: fuzzforge, pass: fuzzforge123)
+#    - Bifrost Proxy: http://localhost:8110 (requires --profile proxy)
+#    - LiteLLM Proxy: http://localhost:4110 (requires --profile proxy-litellm)
 #
 # 5. Resource Usage (Baseline):
 #    - Temporal: ~500MB
--- a/docker/scripts/bootstrap_llm_proxy.py
+++ b/docker/scripts/bootstrap_llm_proxy.py
@@ -0,0 +1,627 @@
+"""Bootstrap the Bifrost proxy with providers and default virtual keys.
+
+This script runs inside a one-shot container during docker-compose startup.
+It will:
+  1. Wait for the proxy health endpoint to respond.
+  2. Configure any upstream providers for which an env key is present.
+  3. Create (or reuse) the default virtual key for the task agent.
+  4. Persist the generated key back into volumes/env/.env so the agent uses it.
+
+The script is idempotent: rerunning it leaves existing configs in place and skips
+key generation if OPENAI_API_KEY already contains a proxy-issued key.
+"""
+
+from __future__ import annotations
+
+import json
+import os
+import re
+import sys
+import time
+import urllib.error
+import urllib.request
+from dataclasses import dataclass
+from pathlib import Path
+from datetime import datetime, timezone
+from typing import Iterable, Mapping, Sequence
+
+PROXY_BASE_URL = os.getenv("PROXY_BASE_URL", "http://llm-proxy:8080").rstrip("/")
+ENV_FILE_PATH = Path(os.getenv("ENV_FILE_PATH", "/bootstrap/env/.env"))
+BIFROST_ENV_FILE_PATH = Path(
+    os.getenv("BIFROST_ENV_FILE_PATH", "/bootstrap/env/.env.bifrost")
+)
+CONFIG_FILE_PATH = Path(os.getenv("CONFIG_FILE_PATH", "/bootstrap/data/config.json"))
+DEFAULT_VIRTUAL_KEY_NAME = "task-agent default"
+DEFAULT_VIRTUAL_KEY_USER = "fuzzforge-task-agent"
+PLACEHOLDER_KEY = "sk-proxy-default"
+MAX_WAIT_SECONDS = 120
+
+DEFAULT_PROVIDER_MODELS: dict[str, list[str]] = {
+    "openai": ["gpt-5"],
+}
+
+
+@dataclass(frozen=True)
+class ProviderSpec:
+    name: str
+    env_var: str
+    fallback_env_vars: tuple[str, ...] = ()
+
+    @property
+    def env_reference(self) -> str:
+        return f"env.{self.env_var}"
+
+
+PROVIDERS: tuple[ProviderSpec, ...] = (
+    ProviderSpec("openai", "BIFROST_OPENAI_KEY", ("OPENAI_API_KEY",)),
+    ProviderSpec("anthropic", "BIFROST_ANTHROPIC_KEY", ("ANTHROPIC_API_KEY",)),
+    ProviderSpec("gemini", "BIFROST_GEMINI_KEY", ("GEMINI_API_KEY",)),
+    ProviderSpec("mistral", "BIFROST_MISTRAL_KEY", ("MISTRAL_API_KEY",)),
+    ProviderSpec("openrouter", "BIFROST_OPENROUTER_KEY", ("OPENROUTER_API_KEY",)),
+)
+
+
+UUID_PATTERN = re.compile(
+    r"^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-"
+    r"[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$"
+)
+
+
+def looks_like_virtual_key(candidate: str | None) -> bool:
+    if not candidate:
+        return False
+    value = candidate.strip()
+    if not value or value == PLACEHOLDER_KEY:
+        return False
+    if UUID_PATTERN.match(value):
+        return True
+    if value.startswith("sk-proxy-"):
+        return True
+    return False
+
+
+def set_env_value(lines: list[str], key: str, value: str) -> tuple[list[str], bool]:
+    prefix = f"{key}="
+    new_line = f"{prefix}{value}"
+    for idx, line in enumerate(lines):
+        stripped = line.lstrip()
+        if not stripped or stripped.startswith("#"):
+            continue
+        if stripped.startswith(prefix):
+            if lines[idx].lstrip() == new_line:
+                return lines, False
+            indent = line[: len(line) - len(stripped)]
+            lines[idx] = f"{indent}{new_line}"
+            return lines, True
+    lines.append(new_line)
+    return lines, True
+
+
+def parse_env_lines(lines: list[str]) -> dict[str, str]:
+    mapping: dict[str, str] = {}
+    for line in lines:
+        stripped = line.strip()
+        if not stripped or stripped.startswith("#"):
+            continue
+        if "=" not in stripped:
+            continue
+        key, value = stripped.split("=", 1)
+        mapping[key] = value
+    return mapping
+
+
+def resolve_provider_key(
+    provider: ProviderSpec,
+    env_map: dict[str, str],
+    bifrost_map: dict[str, str],
+) -> tuple[str | None, str | None, str | None]:
+    candidate = bifrost_map.get(provider.env_var)
+    if candidate:
+        value = candidate.strip()
+        if value:
+            return value, provider.env_var, "bifrost"
+
+    candidate = env_map.get(provider.env_var)
+    if candidate:
+        value = candidate.strip()
+        if value and value != PLACEHOLDER_KEY:
+            return value, provider.env_var, "env"
+
+    candidate = os.getenv(provider.env_var)
+    if candidate:
+        value = candidate.strip()
+        if value and value != PLACEHOLDER_KEY:
+            return value, provider.env_var, "env"
+
+    for var in provider.fallback_env_vars:
+        raw_value = env_map.get(var) or os.getenv(var)
+        if not raw_value:
+            continue
+        value = raw_value.strip()
+        if not value or value == PLACEHOLDER_KEY:
+            continue
+        if var == "OPENAI_API_KEY" and looks_like_virtual_key(value):
+            continue
+        return value, var, "fallback"
+
+    return None, None, None
+
+
+def ensure_provider_env_export(
+    lines: list[str], provider: ProviderSpec, key_value: str
+) -> tuple[list[str], bool]:
+    # Store provider secrets under their dedicated BIFROST_* variables so future
+    # restarts inject them into the proxy container environment automatically.
+    updated_lines, changed = set_env_value(lines, provider.env_var, key_value)
+    if changed:
+        os.environ[provider.env_var] = key_value
+    return updated_lines, changed
+
+
+def get_models_for_provider(
+    provider: ProviderSpec,
+    env_map: dict[str, str],
+    bifrost_map: dict[str, str],
+) -> list[str]:
+    env_var = f"BIFROST_{provider.name.upper()}_MODELS"
+    raw_value = (
+        os.getenv(env_var)
+        or env_map.get(env_var)
+        or bifrost_map.get(env_var)
+    )
+    if raw_value:
+        models = [item.strip() for item in raw_value.split(",") if item.strip()]
+        if models:
+            return models
+    return DEFAULT_PROVIDER_MODELS.get(provider.name, [])
+
+
+def _should_use_responses_api(
+    provider: ProviderSpec,
+    models: list[str],
+    env_map: dict[str, str],
+    bifrost_map: dict[str, str],
+) -> bool:
+    if provider.name != "openai":
+        return False
+
+    env_var = "BIFROST_OPENAI_USE_RESPONSES_API"
+    raw_value = (
+        os.getenv(env_var)
+        or env_map.get(env_var)
+        or bifrost_map.get(env_var)
+    )
+    if raw_value and raw_value.strip().lower() in {"1", "true", "yes", "on"}:
+        return True
+
+    for model in models:
+        suffix = model.split("/", 1)[-1]
+        if suffix.startswith("gpt-5") or suffix.startswith("o1"):
+            return True
+    return False
+
+
+def _read_positive_int(
+    candidate: str | None,
+    *,
+    var_name: str,
+) -> int | None:
+    if candidate is None:
+        return None
+    value = candidate.strip()
+    if not value:
+        return None
+    try:
+        parsed = int(value)
+    except ValueError:
+        log(f"Ignoring non-integer timeout for {var_name}: {value}")
+        return None
+    if parsed <= 0:
+        log(f"Ignoring non-positive timeout for {var_name}: {parsed}")
+        return None
+    return parsed
+
+
+def _lookup_timeout_var(
+    var_name: str,
+    env_map: dict[str, str],
+    bifrost_map: dict[str, str],
+) -> int | None:
+    for source in (
+        bifrost_map.get(var_name),
+        env_map.get(var_name),
+        os.getenv(var_name),
+    ):
+        parsed = _read_positive_int(source, var_name=var_name)
+        if parsed is not None:
+            return parsed
+    return None
+
+
+def _get_timeout_seconds(
+    provider: ProviderSpec,
+    env_map: dict[str, str],
+    bifrost_map: dict[str, str],
+) -> int | None:
+    provider_specific_var = f"BIFROST_{provider.name.upper()}_TIMEOUT_SECONDS"
+    timeout = _lookup_timeout_var(provider_specific_var, env_map, bifrost_map)
+    if timeout is not None:
+        return timeout
+    return _lookup_timeout_var("BIFROST_DEFAULT_TIMEOUT_SECONDS", env_map, bifrost_map)
+
+
+def build_network_config(
+    provider: ProviderSpec,
+    env_map: dict[str, str],
+    bifrost_map: dict[str, str],
+) -> dict[str, object] | None:
+    timeout = _get_timeout_seconds(provider, env_map, bifrost_map)
+    if timeout is None:
+        return None
+    return {"default_request_timeout_in_seconds": timeout}
+
+
+def build_provider_config_entry(
+    provider: ProviderSpec,
+    env_map: dict[str, str],
+    bifrost_map: dict[str, str],
+    *,
+    network_config: dict[str, object] | None = None,
+) -> dict[str, object]:
+    models = get_models_for_provider(provider, env_map, bifrost_map)
+    key_entry: dict[str, object] = {
+        "value": provider.env_reference,
+        "models": models,
+        "weight": 1.0,
+    }
+    if _should_use_responses_api(provider, models, env_map, bifrost_map):
+        key_entry["openai_key_config"] = {"use_responses_api": True}
+
+    entry: dict[str, object] = {"keys": [key_entry]}
+    if network_config:
+        entry["network_config"] = network_config
+    return entry
+
+
+def _default_client_config() -> dict[str, object]:
+    return {"drop_excess_requests": False}
+
+
+def _default_config_store_config() -> dict[str, object]:
+    return {
+        "enabled": True,
+        "type": "sqlite",
+        "config": {"path": "./config.db"},
+    }
+
+
+def update_config_file(
+    providers_config: dict[str, dict[str, object]],
+    virtual_key_value: str | None = None,
+) -> None:
+    if not providers_config:
+        return
+
+    config_data: dict[str, object]
+    if CONFIG_FILE_PATH.exists():
+        try:
+            config_data = json.loads(CONFIG_FILE_PATH.read_text() or "{}")
+        except json.JSONDecodeError:
+            log(
+                "Existing config.json is invalid JSON; regenerating from provider metadata"
+            )
+            config_data = {}
+    else:
+        config_data = {}
+
+    providers_section = config_data.setdefault("providers", {})
+    config_data.setdefault("client", _default_client_config())
+    config_data.setdefault("config_store", _default_config_store_config())
+
+    changed = False
+    for name, entry in providers_config.items():
+        if providers_section.get(name) != entry:
+            providers_section[name] = entry
+            changed = True
+
+    if virtual_key_value:
+        governance_section = config_data.setdefault("governance", {})
+        vk_list: list[dict[str, object]] = governance_section.setdefault(
+            "virtual_keys", []
+        )
+
+        provider_configs = []
+        for provider_name, entry in providers_config.items():
+            allowed_models: list[str] = []
+            for key_entry in entry.get("keys", []):
+                models = key_entry.get("models", [])
+                if models:
+                    allowed_models.extend(models)
+            provider_configs.append(
+                {
+                    "provider": provider_name,
+                    "weight": 1.0,
+                    "allowed_models": allowed_models,
+                }
+            )
+
+        now_iso = datetime.now(timezone.utc).isoformat().replace("+00:00", "Z")
+        virtual_key_entry = {
+            "id": f"{DEFAULT_VIRTUAL_KEY_USER}-vk",
+            "name": DEFAULT_VIRTUAL_KEY_NAME,
+            "description": "Default virtual key issued during bootstrap",
+            "value": virtual_key_value,
+            "is_active": True,
+            "provider_configs": provider_configs,
+            "created_at": now_iso,
+            "updated_at": now_iso,
+        }
+
+        matched = False
+        for existing in vk_list:
+            if existing.get("name") == DEFAULT_VIRTUAL_KEY_NAME or existing.get(
+                "id"
+            ) == virtual_key_entry["id"]:
+                existing.update(virtual_key_entry)
+                matched = True
+                changed = True
+                break
+
+        if not matched:
+            vk_list.append(virtual_key_entry)
+            changed = True
+
+    if not changed:
+        return
+
+    CONFIG_FILE_PATH.parent.mkdir(parents=True, exist_ok=True)
+    CONFIG_FILE_PATH.write_text(json.dumps(config_data, indent=2, sort_keys=True) + "\n")
+    log(f"Wrote provider config to {CONFIG_FILE_PATH}")
+
+
+def log(message: str) -> None:
+    print(f"[llm-proxy-bootstrap] {message}", flush=True)
+
+
+def wait_for_proxy() -> None:
+    url = f"{PROXY_BASE_URL}/health"
+    deadline = time.time() + MAX_WAIT_SECONDS
+    while time.time() < deadline:
+        try:
+            with urllib.request.urlopen(url) as response:  # noqa: S310
+                if response.status == 200:
+                    log("Proxy health endpoint is reachable")
+                    return
+        except urllib.error.URLError as exc:  # pragma: no cover - best effort logging
+            log(f"Proxy not ready yet: {exc}")
+        time.sleep(3)
+    raise TimeoutError(f"Timed out waiting for {url}")
+
+
+def request_json(path: str, *, method: str = "GET", payload: dict | None = None) -> tuple[int, str]:
+    url = f"{PROXY_BASE_URL}{path}"
+    data = None
+    headers = {"Accept": "application/json"}
+    if payload is not None:
+        data = json.dumps(payload).encode("utf-8")
+        headers["Content-Type"] = "application/json"
+    request = urllib.request.Request(url, data=data, headers=headers, method=method)
+    try:
+        with urllib.request.urlopen(request) as response:  # noqa: S310
+            body = response.read().decode("utf-8")
+            return response.status, body
+    except urllib.error.HTTPError as exc:
+        body = exc.read().decode("utf-8")
+        return exc.code, body
+
+
+def post_json(path: str, payload: dict) -> tuple[int, str]:
+    return request_json(path, method="POST", payload=payload)
+
+
+def get_json(path: str) -> tuple[int, str]:
+    return request_json(path, method="GET")
+
+
+def configure_providers() -> dict[str, dict[str, object]]:
+    env_map = parse_env_lines(read_env_file())
+    bifrost_lines = read_bifrost_env_file()
+    bifrost_map = parse_env_lines(bifrost_lines)
+    bifrost_lines_changed = False
+    config_updates: dict[str, dict[str, object]] = {}
+
+    for provider in PROVIDERS:
+        key_value, _source_var, _ = resolve_provider_key(provider, env_map, bifrost_map)
+        if not key_value:
+            continue
+
+        network_config = build_network_config(provider, env_map, bifrost_map)
+        payload = {
+            "provider": provider.name,
+            "keys": [
+                {
+                    "value": key_value,
+                    "models": [],
+                    "weight": 1.0,
+                }
+            ],
+        }
+        if network_config:
+            payload["network_config"] = network_config
+        status, body = post_json("/api/providers", payload)
+        if status in {200, 201}:
+            log(f"Configured provider '{provider.name}'")
+        elif status == 409:
+            log(f"Provider '{provider.name}' already exists (409)")
+        else:
+            log(
+                "Failed to configure provider '%s' (%s): %s"
+                % (provider.name, status, body)
+            )
+            continue
+
+        os.environ[provider.env_var] = key_value
+        if bifrost_map.get(provider.env_var, "") != key_value:
+            bifrost_lines, changed = ensure_provider_env_export(
+                bifrost_lines, provider, key_value
+            )
+            if changed:
+                bifrost_lines_changed = True
+                bifrost_map[provider.env_var] = key_value
+
+        config_updates[provider.name] = build_provider_config_entry(
+            provider,
+            env_map,
+            bifrost_map,
+            network_config=network_config,
+        )
+
+    if bifrost_lines_changed:
+        write_bifrost_env_file(bifrost_lines)
+    return config_updates
+
+
+def read_env_file() -> list[str]:
+    if not ENV_FILE_PATH.exists():
+        raise FileNotFoundError(
+            f"Expected env file at {ENV_FILE_PATH}. Copy volumes/env/.env.example first."
+        )
+    return ENV_FILE_PATH.read_text().splitlines()
+
+
+def write_env_file(lines: Iterable[str]) -> None:
+    ENV_FILE_PATH.write_text("\n".join(lines) + "\n")
+
+
+def read_bifrost_env_file() -> list[str]:
+    if not BIFROST_ENV_FILE_PATH.exists():
+        return []
+    return BIFROST_ENV_FILE_PATH.read_text().splitlines()
+
+
+def write_bifrost_env_file(lines: Iterable[str]) -> None:
+    BIFROST_ENV_FILE_PATH.parent.mkdir(parents=True, exist_ok=True)
+    BIFROST_ENV_FILE_PATH.write_text("\n".join(lines) + "\n")
+
+
+def current_env_key() -> str | None:
+    existing = os.getenv("OPENAI_API_KEY")
+    if existing:
+        return existing.strip()
+    # Fall back to reading file if not present in the container environment
+    for line in read_env_file():
+        if line.startswith("OPENAI_API_KEY="):
+            return line.split("=", 1)[1].strip()
+    return None
+
+
+def _extract_key_value(record: Mapping[str, object]) -> str | None:
+    value = record.get("value") or record.get("key")
+    if value:
+        return str(value)
+    budget = record.get("virtual_key") if isinstance(record.get("virtual_key"), Mapping) else None
+    if isinstance(budget, Mapping):
+        inner_value = budget.get("value") or budget.get("key")
+        if inner_value:
+            return str(inner_value)
+    return None
+
+
+def find_existing_virtual_key() -> Mapping[str, object] | None:
+    status, body = get_json("/api/governance/virtual-keys")
+    if status != 200:
+        log(f"Could not list virtual keys ({status}): {body}")
+        return None
+    try:
+        data = json.loads(body)
+    except json.JSONDecodeError as exc:  # pragma: no cover - defensive
+        log(f"Failed to parse virtual key list: {exc}")
+        return None
+
+    candidates: Sequence[Mapping[str, object]]
+    if isinstance(data, dict) and "virtual_keys" in data and isinstance(data["virtual_keys"], list):
+        candidates = [item for item in data["virtual_keys"] if isinstance(item, Mapping)]
+    elif isinstance(data, list):
+        candidates = [item for item in data if isinstance(item, Mapping)]
+    else:
+        log("Virtual key list response in unexpected format; skipping lookup")
+        return None
+
+    for item in candidates:
+        if str(item.get("name", "")).strip() == DEFAULT_VIRTUAL_KEY_NAME:
+            return item
+    return None
+
+
+def upsert_virtual_key() -> str | None:
+    existing_env = current_env_key()
+
+    record = find_existing_virtual_key()
+    if record:
+        key = _extract_key_value(record)
+        if key:
+            log("Reusing existing virtual key from proxy store")
+            return key
+
+    if existing_env and looks_like_virtual_key(existing_env):
+        log(
+            "Virtual key present in env but not found in proxy store; issuing a new key"
+        )
+
+    payload = {
+        "name": DEFAULT_VIRTUAL_KEY_NAME,
+        "user_id": DEFAULT_VIRTUAL_KEY_USER,
+        "budget": {"max_limit": 25.0, "reset_duration": "7d"},
+    }
+    status, body = post_json("/api/governance/virtual-keys", payload)
+    if status not in {200, 201}:
+        log(f"Failed to create virtual key ({status}): {body}")
+        return None
+    try:
+        data = json.loads(body)
+    except json.JSONDecodeError as exc:  # pragma: no cover - defensive
+        log(f"Could not parse virtual key response: {exc}")
+        return None
+    key = _extract_key_value(data)
+    if not key:
+        log(f"Virtual key response missing key field: {body}")
+        return None
+    log("Generated new virtual key for task agent")
+    return key
+
+
+def persist_key_to_env_file(new_key: str) -> None:
+    lines = read_env_file()
+    updated = False
+    for idx, line in enumerate(lines):
+        if line.startswith("OPENAI_API_KEY="):
+            lines[idx] = f"OPENAI_API_KEY={new_key}"
+            updated = True
+            break
+    if not updated:
+        lines.append(f"OPENAI_API_KEY={new_key}")
+    write_env_file(lines)
+    log(f"Wrote virtual key to {ENV_FILE_PATH}")
+    os.environ["OPENAI_API_KEY"] = new_key
+
+
+def main() -> int:
+    log("Bootstrapping Bifrost proxy")
+    try:
+        wait_for_proxy()
+        providers_config = configure_providers()
+        existing_key = current_env_key()
+        new_key = upsert_virtual_key()
+        virtual_key_value = new_key or existing_key
+        if new_key and new_key != existing_key:
+            persist_key_to_env_file(new_key)
+        update_config_file(providers_config, virtual_key_value)
+        log("Bootstrap complete")
+        return 0
+    except Exception as exc:  # pragma: no cover - startup failure reported to logs
+        log(f"Bootstrap failed: {exc}")
+        return 1
+
+
+if __name__ == "__main__":
+    sys.exit(main())
--- a/docs/docs/how-to/llm-proxy.md
+++ b/docs/docs/how-to/llm-proxy.md
@@ -0,0 +1,149 @@
+---
+title: "Run the LLM Proxy"
+description: "Deploy Bifrost (default) or LiteLLM as an LLM gateway and connect it to the task agent."
+---
+
+## Overview
+
+FuzzForge routes every LLM request through a proxy so that usage can be metered, priced, and rate limited per user. The repository now ships with Docker Compose profiles for two supported gateways:
+
+- **Bifrost** (`maximhq/bifrost`) — default option with granular governance and budgeting
+- **LiteLLM Proxy** (`ghcr.io/berriai/litellm`) — drop-in alternative that exposes similar OpenAI-compatible endpoints
+
+Both services read provider credentials from `volumes/env/.env` and persist their internal state in dedicated Docker volumes, so configuration survives container restarts.
+
+## Before You Start
+
+1. Copy `volumes/env/.env.example` to `volumes/env/.env` and fill in:
+   - Leave `OPENAI_API_KEY=sk-proxy-default`, or paste your raw OpenAI key if you
+     want the bootstrapper to migrate it automatically into `volumes/env/.env.bifrost`
+   - `FF_LLM_PROXY_BASE_URL` pointing to the proxy hostname inside Docker
+   - Optional `LITELLM_MASTER_KEY`/`LITELLM_SALT_KEY` if you plan to run the LiteLLM proxy
+2. When running tools outside Docker, change `FF_LLM_PROXY_BASE_URL` to the published host port (for example `http://localhost:10999`).
+
+## Bifrost Gateway (default)
+
+Start the service with the new Compose profile:
+
+```bash
+# Launch the proxy + UI (http://localhost:10999)
+docker compose up llm-proxy
+```
+
+The container binds its SQLite databases underneath the named volume `fuzzforge_llm_proxy_data`, so your configuration, request logs, and issued virtual keys persist. On startup a bootstrap job seeds the default providers, creates the `fuzzforge-task-agent` virtual key, and writes the generated token back to `volumes/env/.env` so the agent picks it up automatically.
+
+### Configure providers
+
+1. Open `http://localhost:10999` and follow the onboarding flow.
+2. Upstream keys are added automatically when the bootstrap job finds standard
+   variables such as `OPENAI_API_KEY`, `ANTHROPIC_API_KEY`, etc. The raw secrets
+   are mirrored into `volumes/env/.env.bifrost`, so future restarts rehydrate the
+   proxy without more manual edits. The same pass also generates `/app/data/config.json`
+   (backed by the `fuzzforge_llm_proxy_data` volume) populated with provider entries,
+   `client.drop_excess_requests=false`, and an enabled SQLite `config_store`, so
+   budgets and UI-driven configuration persist exactly the way the docs expect.
+   To raise the upstream timeout beyond the 30 s default, set `BIFROST_DEFAULT_TIMEOUT_SECONDS`
+   or provider-specific overrides such as `BIFROST_ANTHROPIC_TIMEOUT_SECONDS` in
+   `volumes/env/.env` before bootstrapping; the script propagates them to the proxy’s
+   network configuration automatically.
+3. (Optional) Set `BIFROST_OPENAI_MODELS` to a comma-separated list if you want
+   to scope a key to specific models (for example `openai/gpt-5,openai/gpt-5-nano`).
+   When you target Responses-only models, flip `BIFROST_OPENAI_USE_RESPONSES_API=true`
+   so the proxy runs them against the newer endpoint. You can still add or rotate
+   keys manually via **Providers → Add key**—reference either the migrated
+   `env.BIFROST_*` variables or paste the secret directly.
+4. (Optional) Add price caps, context window overrides, or caching policies from the same UI. Settings are stored immediately in the mounted data volume.
+
+If you prefer a file-based bootstrap, mount a `config.json` under `/app/data` that references the same environment variables:
+
+```json
+{
+  "providers": {
+    "openai": {
+      "keys": [{ "value": "env.BIFROST_OPENAI_KEY", "weight": 1.0 }]
+    }
+  }
+}
+```
+
+### Issue per-user virtual keys
+
+Virtual keys let you attach budgets and rate limits to each downstream agent. Create them from the UI (**Governance → Virtual Keys**) or via the API:
+
+```bash
+curl -X POST http://localhost:10999/api/governance/virtual-keys \
+  -H "Authorization: Bearer <admin-access-token>" \
+  -H "Content-Type: application/json" \
+  -d '{
+    "name": "task-agent default",
+    "user_id": "fuzzforge-task-agent",
+    "budget": {"max_limit": 10.0, "reset_duration": "1d"}
+  }'
+```
+
+Use the returned `key` value as the agent's `OPENAI_API_KEY`. When making requests manually, send the header `x-bf-vk: <virtual-key>` (the task agent handles this automatically once the key is in the environment).
+
+You can generate scoped keys for teammates the same way to give each person isolated quotas and audit trails.
+
+## LiteLLM Proxy (alternative)
+
+If you prefer LiteLLM's gateway, enable the second profile:
+
+```bash
+# Requires LITELLM_MASTER_KEY + LITELLM_SALT_KEY in volumes/env/.env
+docker compose --profile proxy-litellm up llm-proxy-litellm
+```
+
+The service exposes the admin UI at `http://localhost:4110/ui` and stores state in the `fuzzforge_litellm_proxy_data` volume (SQLite by default).
+
+Generate user-facing keys with the built-in `/key/generate` endpoint:
+
+```bash
+curl http://localhost:4110/key/generate \
+  -H "Authorization: Bearer $LITELLM_MASTER_KEY" \
+  -H "Content-Type: application/json" \
+  -d '{
+    "models": ["openai/gpt-4o-mini"],
+    "metadata": {"user": "fuzzforge-task-agent"},
+    "duration": "7d",
+    "budget": {"soft": 8.0, "hard": 10.0}
+  }'
+```
+
+Set the returned key as `OPENAI_API_KEY` for the task agent and update its base URL to `http://llm-proxy-litellm:4000` (or `http://localhost:4110` outside Docker).
+
+## Wiring the Task Agent
+
+Both proxies expose an OpenAI-compatible API. The LiteLLM agent only needs the base URL and a bearer token:
+
+```bash
+FF_LLM_PROXY_BASE_URL=http://llm-proxy:8080          # or http://llm-proxy-litellm:4000 when switching proxies
+OPENAI_API_KEY=sk-proxy-default                      # virtual key issued by the gateway
+LITELLM_MODEL=openai/gpt-5
+LITELLM_PROVIDER=openai
+```
+
+The agent automatically forwards requests to the configured proxy and never touches the raw provider secrets. When you hot-swap models from the UI or CLI, the proxy enforces the budgets and rate limits tied to the virtual key.
+
+To verify end‑to‑end connectivity, run:
+
+```bash
+curl -X POST http://localhost:10999/v1/chat/completions \
+  -H "Authorization: Bearer $OPENAI_API_KEY" \
+  -H "Content-Type: application/json" \
+  -d '{
+    "model": "openai/gpt-5",
+    "messages": [{"role": "user", "content": "Proxy health check"}]
+  }'
+```
+
+Replace the host/port with the LiteLLM endpoint when using that gateway.
+
+## Switching Between Proxies
+
+1. Stop the current proxy container.
+2. Update `FF_LLM_PROXY_BASE_URL` in `volumes/env/.env` to the new service host (`llm-proxy` or `llm-proxy-litellm`).
+3. Replace `OPENAI_API_KEY` with the virtual key generated by the selected proxy.
+4. Restart the `task-agent` container so it picks up the new environment.
+
+Because the agent only knows about the OpenAI-compatible interface, no further code changes are required when alternating between Bifrost and LiteLLM.
--- a/volumes/env/.env.example
+++ b/volumes/env/.env.example
@@ -1,17 +1,51 @@
 # FuzzForge Agent Configuration
-# Copy this to .env and configure your API keys
+# Copy this to .env and configure your API keys and proxy settings

-# LiteLLM Model Configuration
-LITELLM_MODEL=gemini/gemini-2.0-flash-001
-# LITELLM_PROVIDER=gemini
+# LiteLLM Model Configuration (default routed through the proxy)
+LITELLM_MODEL=openai/gpt-5
+LITELLM_PROVIDER=openai

-# API Keys (uncomment and configure as needed)
-# GOOGLE_API_KEY=
-# OPENAI_API_KEY=
-# ANTHROPIC_API_KEY=
-# OPENROUTER_API_KEY=
-# MISTRAL_API_KEY=
+# Proxy configuration
+# Base URL is used by the task agent to talk to the proxy container inside Docker.
+# When running everything locally without Docker networking, replace with http://localhost:10999.
+FF_LLM_PROXY_BASE_URL=http://llm-proxy:8080

-# Agent Configuration
+# Default virtual key issued by the proxy for the task agent.
+# Leave as sk-proxy-default and the bootstrap job will replace it on startup.
+# If you paste your real OpenAI key here before the first boot, the bootstrap
+# job moves it into a generated .env.bifrost file and swaps this value with the
+# proxy-issued virtual key so the agent still authenticates via the gateway.
+OPENAI_API_KEY=sk-proxy-default
+
+# Upstream provider keys (loaded by the proxy via env.BIFROST_* references).
+# You can fill these directly, but normally the bootstrapper manages them for
+# you in .env.bifrost after the first startup. To scope keys to specific models,
+# set BIFROST_OPENAI_MODELS=openai/gpt-5 (or similar) before launching the proxy.
+# BIFROST_OPENAI_KEY=
+# BIFROST_OPENAI_MODELS=openai/gpt-5,openai/gpt-5-nano
+# BIFROST_OPENAI_USE_RESPONSES_API=true
+# Increase the proxy's upstream request timeout (seconds). Applies per provider,
+# falling back to BIFROST_DEFAULT_TIMEOUT_SECONDS when the provider-specific
+# value is not set.
+# BIFROST_DEFAULT_TIMEOUT_SECONDS=60
+# BIFROST_OPENAI_TIMEOUT_SECONDS=60
+# BIFROST_ANTHROPIC_KEY=
+# BIFROST_ANTHROPIC_TIMEOUT_SECONDS=60
+# BIFROST_GEMINI_KEY=
+# BIFROST_GEMINI_TIMEOUT_SECONDS=60
+# BIFROST_MISTRAL_KEY=
+# BIFROST_MISTRAL_TIMEOUT_SECONDS=60
+# BIFROST_OPENROUTER_KEY=
+# BIFROST_OPENROUTER_TIMEOUT_SECONDS=60
+
+# LiteLLM proxy (alternative gateway)
+# LITELLM_MASTER_KEY=sk-master-key
+# LITELLM_SALT_KEY=choose-a-random-string
+
+# Bifrost gateway (default proxy)
+# APP_HOST=0.0.0.0
+# APP_PORT=8080
+
+# Agent behaviour
 # DEFAULT_TIMEOUT=120
 # DEFAULT_CONTEXT_ID=default
Author	SHA1	Message	Date
Songbird	22f01562ba	Add env-configurable timeout for proxy providers	2025-10-21 14:26:34 +02:00
Songbird	092a90df5d	feat: seed governance config and responses routing	2025-10-18 15:52:59 +02:00