Compare commits

...

2 Commits

Author SHA1 Message Date
Songbird
22f01562ba Add env-configurable timeout for proxy providers 2025-10-21 14:26:34 +02:00
Songbird
092a90df5d feat: seed governance config and responses routing 2025-10-18 15:52:59 +02:00
10 changed files with 1134 additions and 32 deletions

6
.gitignore vendored
View File

@@ -188,6 +188,10 @@ logs/
# Docker volume configs (keep .env.example but ignore actual .env)
volumes/env/.env
# Vendored proxy sources (kept locally for reference)
ai/proxy/bifrost/
ai/proxy/litellm/
# Test project databases and configurations
test_projects/*/.fuzzforge/
test_projects/*/findings.db*
@@ -300,4 +304,4 @@ test_projects/*/.npmrc
test_projects/*/.git-credentials
test_projects/*/credentials.*
test_projects/*/api_keys.*
test_projects/*/ci-*.sh
test_projects/*/ci-*.sh

View File

@@ -1,10 +1,17 @@
# Default LiteLLM configuration
LITELLM_MODEL=gemini/gemini-2.0-flash-001
# LITELLM_PROVIDER=gemini
# Default LiteLLM configuration routed through the proxy
LITELLM_MODEL=openai/gpt-4o-mini
LITELLM_PROVIDER=openai
# API keys (uncomment and fill as needed)
# GOOGLE_API_KEY=
# OPENAI_API_KEY=
# ANTHROPIC_API_KEY=
# OPENROUTER_API_KEY=
# MISTRAL_API_KEY=
# Proxy connection (override when running locally without Docker networking)
# Use http://localhost:10999 when accessing from the host
FF_LLM_PROXY_BASE_URL=http://llm-proxy:8080
# Virtual key issued by Bifrost or LiteLLM proxy for the task agent (bootstrap replaces the placeholder)
OPENAI_API_KEY=sk-proxy-default
# Upstream provider keys live inside the proxy container
# BIFROST_OPENAI_KEY=
# BIFROST_ANTHROPIC_KEY=
# BIFROST_GEMINI_KEY=
# BIFROST_MISTRAL_KEY=
# BIFROST_OPENROUTER_KEY=

View File

@@ -43,18 +43,30 @@ cd task_agent
# cp .env.example .env
```
Edit `.env` (or `.env.example`) and add your API keys. The agent must be restarted after changes so the values are picked up:
Edit `.env` (or `.env.example`) and add your proxy + API keys. The agent must be restarted after changes so the values are picked up:
```bash
# Set default model
LITELLM_MODEL=gemini/gemini-2.0-flash-001
# Route every request through the proxy container (use http://localhost:10999 from the host)
FF_LLM_PROXY_BASE_URL=http://llm-proxy:8080
# Add API keys for providers you want to use
GOOGLE_API_KEY=your_google_api_key
OPENAI_API_KEY=your_openai_api_key
ANTHROPIC_API_KEY=your_anthropic_api_key
OPENROUTER_API_KEY=your_openrouter_api_key
# Default model + provider the agent boots with
LITELLM_MODEL=openai/gpt-4o-mini
LITELLM_PROVIDER=openai
# Virtual key issued by the proxy to the task agent (bootstrap replaces the placeholder)
OPENAI_API_KEY=sk-proxy-default
# Upstream keys stay inside the proxy (Bifrost config references env.BIFROST_* names)
BIFROST_OPENAI_KEY=your_real_openai_api_key
BIFROST_ANTHROPIC_KEY=your_real_anthropic_key
BIFROST_GEMINI_KEY=your_real_gemini_key
BIFROST_MISTRAL_KEY=your_real_mistral_key
BIFROST_OPENROUTER_KEY=your_real_openrouter_key
```
> When running the agent outside of Docker, swap `FF_LLM_PROXY_BASE_URL` to the host port (default `http://localhost:10999`).
The compose bootstrap container provisions the Bifrost gateway, creates a virtual key for `fuzzforge-task-agent`, and rewrites `volumes/env/.env`. Fill in the `BIFROST_*` upstream secrets before the first launch so the proxy can reach your providers when the bootstrap script runs.
### 2. Install Dependencies
```bash

View File

@@ -4,13 +4,28 @@ from __future__ import annotations
import os
def _normalize_proxy_base_url(raw_value: str | None) -> str | None:
if not raw_value:
return None
cleaned = raw_value.strip()
if not cleaned:
return None
# Avoid double slashes in downstream requests
return cleaned.rstrip("/")
AGENT_NAME = "litellm_agent"
AGENT_DESCRIPTION = (
"A LiteLLM-backed shell that exposes hot-swappable model and prompt controls."
)
DEFAULT_MODEL = os.getenv("LITELLM_MODEL", "gemini-2.0-flash-001")
DEFAULT_PROVIDER = os.getenv("LITELLM_PROVIDER")
DEFAULT_MODEL = os.getenv("LITELLM_MODEL", "openai/gpt-4o-mini")
DEFAULT_PROVIDER = os.getenv("LITELLM_PROVIDER") or None
PROXY_BASE_URL = _normalize_proxy_base_url(
os.getenv("FF_LLM_PROXY_BASE_URL")
or os.getenv("LITELLM_API_BASE")
or os.getenv("LITELLM_BASE_URL")
)
STATE_PREFIX = "app:litellm_agent/"
STATE_MODEL_KEY = f"{STATE_PREFIX}model"

View File

@@ -3,11 +3,15 @@
from __future__ import annotations
from dataclasses import dataclass
import os
from typing import Any, Mapping, MutableMapping, Optional
import httpx
from .config import (
DEFAULT_MODEL,
DEFAULT_PROVIDER,
PROXY_BASE_URL,
STATE_MODEL_KEY,
STATE_PROMPT_KEY,
STATE_PROVIDER_KEY,
@@ -66,11 +70,109 @@ class HotSwapState:
"""Create a LiteLlm instance for the current state."""
from google.adk.models.lite_llm import LiteLlm # Lazy import to avoid cycle
from google.adk.models.lite_llm import LiteLLMClient
from litellm.types.utils import Choices, Message, ModelResponse, Usage
kwargs = {"model": self.model}
if self.provider:
kwargs["custom_llm_provider"] = self.provider
return LiteLlm(**kwargs)
if PROXY_BASE_URL:
provider = (self.provider or DEFAULT_PROVIDER or "").lower()
if provider and provider != "openai":
kwargs["api_base"] = f"{PROXY_BASE_URL.rstrip('/')}/{provider}"
else:
kwargs["api_base"] = PROXY_BASE_URL
kwargs.setdefault("api_key", os.environ.get("OPENAI_API_KEY"))
provider = (self.provider or DEFAULT_PROVIDER or "").lower()
model_suffix = self.model.split("/", 1)[-1]
use_responses = provider == "openai" and (
model_suffix.startswith("gpt-5") or model_suffix.startswith("o1")
)
if use_responses:
kwargs.setdefault("use_responses_api", True)
llm = LiteLlm(**kwargs)
if use_responses and PROXY_BASE_URL:
class _ResponsesAwareClient(LiteLLMClient):
def __init__(self, base_client: LiteLLMClient, api_base: str, api_key: str):
self._base_client = base_client
self._api_base = api_base.rstrip("/")
self._api_key = api_key
async def acompletion(self, model, messages, tools, **kwargs): # type: ignore[override]
use_responses_api = kwargs.pop("use_responses_api", False)
if not use_responses_api:
return await self._base_client.acompletion(
model=model,
messages=messages,
tools=tools,
**kwargs,
)
resolved_model = model
if "/" not in resolved_model:
resolved_model = f"openai/{resolved_model}"
payload = {
"model": resolved_model,
"input": _messages_to_responses_input(messages),
}
timeout = kwargs.get("timeout", 60)
headers = {
"Authorization": f"Bearer {self._api_key}",
"Content-Type": "application/json",
}
async with httpx.AsyncClient(timeout=timeout) as client:
response = await client.post(
f"{self._api_base}/v1/responses",
json=payload,
headers=headers,
)
try:
response.raise_for_status()
except httpx.HTTPStatusError as exc:
text = exc.response.text
raise RuntimeError(
f"Bifrost responses request failed: {text}"
) from exc
data = response.json()
text_output = _extract_output_text(data)
usage = data.get("usage", {})
return ModelResponse(
id=data.get("id"),
model=model,
choices=[
Choices(
finish_reason="stop",
index=0,
message=Message(role="assistant", content=text_output),
provider_specific_fields={"bifrost_response": data},
)
],
usage=Usage(
prompt_tokens=usage.get("input_tokens"),
completion_tokens=usage.get("output_tokens"),
reasoning_tokens=usage.get("output_tokens_details", {}).get(
"reasoning_tokens"
),
total_tokens=usage.get("total_tokens"),
),
)
llm.llm_client = _ResponsesAwareClient(
llm.llm_client,
PROXY_BASE_URL,
os.environ.get("OPENAI_API_KEY", ""),
)
return llm
@property
def display_model(self) -> str:
@@ -84,3 +186,69 @@ def apply_state_to_agent(invocation_context, state: HotSwapState) -> None:
agent = invocation_context.agent
agent.model = state.instantiate_llm()
def _messages_to_responses_input(messages: list[dict[str, Any]]) -> list[dict[str, Any]]:
inputs: list[dict[str, Any]] = []
for message in messages:
role = message.get("role", "user")
content = message.get("content", "")
text_segments: list[str] = []
if isinstance(content, list):
for item in content:
if isinstance(item, dict):
text = item.get("text") or item.get("content")
if text:
text_segments.append(str(text))
elif isinstance(item, str):
text_segments.append(item)
elif isinstance(content, str):
text_segments.append(content)
text = "\n".join(segment.strip() for segment in text_segments if segment)
if not text:
continue
entry_type = "input_text"
if role == "assistant":
entry_type = "output_text"
inputs.append(
{
"role": role,
"content": [
{
"type": entry_type,
"text": text,
}
],
}
)
if not inputs:
inputs.append(
{
"role": "user",
"content": [
{
"type": "input_text",
"text": "",
}
],
}
)
return inputs
def _extract_output_text(response_json: dict[str, Any]) -> str:
outputs = response_json.get("output", [])
collected: list[str] = []
for item in outputs:
if isinstance(item, dict) and item.get("type") == "message":
for part in item.get("content", []):
if isinstance(part, dict) and part.get("type") == "output_text":
text = part.get("text", "")
if text:
collected.append(str(text))
return "\n\n".join(collected).strip()

5
ai/proxy/README.md Normal file
View File

@@ -0,0 +1,5 @@
# LLM Proxy Integrations
This directory contains vendor source trees that were vendored only for reference when integrating LLM gateways. The actual FuzzForge deployment uses the official Docker images for each project.
See `docs/docs/how-to/llm-proxy.md` for up-to-date instructions on running the proxy services and issuing keys for the agents.

View File

@@ -146,6 +146,78 @@ services:
networks:
- fuzzforge-network
# ============================================================================
# LLM Proxy (Default) - Bifrost Gateway
# ============================================================================
llm-proxy:
image: maximhq/bifrost:latest
container_name: fuzzforge-llm-proxy
env_file:
- ./volumes/env/.env
environment:
APP_HOST: 0.0.0.0
APP_PORT: 8080
LOG_STYLE: pretty
OPENAI_API_KEY: ""
ports:
- "10999:8080" # Web UI + OpenAI-compatible API
volumes:
- llm_proxy_data:/app/data
networks:
- fuzzforge-network
restart: unless-stopped
# ============================================================================
# LLM Proxy Bootstrap - Seed providers and virtual keys
# ============================================================================
llm-proxy-bootstrap:
image: python:3.11-slim
container_name: fuzzforge-llm-proxy-bootstrap
depends_on:
llm-proxy:
condition: service_started
env_file:
- ./volumes/env/.env
environment:
PROXY_BASE_URL: http://llm-proxy:8080
ENV_FILE_PATH: /bootstrap/env/.env
CONFIG_FILE_PATH: /bootstrap/data/config.json
volumes:
- ./docker/scripts/bootstrap_llm_proxy.py:/app/bootstrap.py:ro
- ./volumes/env:/bootstrap/env
- llm_proxy_data:/bootstrap/data
networks:
- fuzzforge-network
command: ["python", "/app/bootstrap.py"]
restart: "no"
# ============================================================================
# LLM Proxy (Alternative) - LiteLLM Gateway
# ============================================================================
llm-proxy-litellm:
image: ghcr.io/berriai/litellm:main-stable
container_name: fuzzforge-llm-proxy-litellm
profiles:
- proxy-litellm
env_file:
- ./volumes/env/.env
environment:
DATABASE_URL: sqlite:////var/lib/litellm/litellm.db
STORE_MODEL_IN_DB: "True"
ports:
- "4110:4000" # HTTP API + UI
volumes:
- litellm_proxy_data:/var/lib/litellm
networks:
- fuzzforge-network
healthcheck:
test: ["CMD-SHELL", "wget --no-verbose --tries=1 http://localhost:4000/health/liveliness || exit 1"]
interval: 30s
timeout: 10s
retries: 3
start_period: 40s
restart: unless-stopped
# ============================================================================
# Vertical Worker: Rust/Native Security
# ============================================================================
@@ -460,6 +532,9 @@ services:
context: ./ai/agents/task_agent
dockerfile: Dockerfile
container_name: fuzzforge-task-agent
depends_on:
llm-proxy-bootstrap:
condition: service_completed_successfully
ports:
- "10900:8000"
env_file:
@@ -560,6 +635,10 @@ volumes:
name: fuzzforge_worker_ossfuzz_cache
worker_ossfuzz_build:
name: fuzzforge_worker_ossfuzz_build
llm_proxy_data:
name: fuzzforge_llm_proxy_data
litellm_proxy_data:
name: fuzzforge_litellm_proxy_data
# Add more worker caches as you add verticals:
# worker_web_cache:
# worker_ios_cache:
@@ -593,6 +672,8 @@ networks:
# 4. Web UIs:
# - Temporal UI: http://localhost:8233
# - MinIO Console: http://localhost:9001 (user: fuzzforge, pass: fuzzforge123)
# - Bifrost Proxy: http://localhost:8110 (requires --profile proxy)
# - LiteLLM Proxy: http://localhost:4110 (requires --profile proxy-litellm)
#
# 5. Resource Usage (Baseline):
# - Temporal: ~500MB

View File

@@ -0,0 +1,627 @@
"""Bootstrap the Bifrost proxy with providers and default virtual keys.
This script runs inside a one-shot container during docker-compose startup.
It will:
1. Wait for the proxy health endpoint to respond.
2. Configure any upstream providers for which an env key is present.
3. Create (or reuse) the default virtual key for the task agent.
4. Persist the generated key back into volumes/env/.env so the agent uses it.
The script is idempotent: rerunning it leaves existing configs in place and skips
key generation if OPENAI_API_KEY already contains a proxy-issued key.
"""
from __future__ import annotations
import json
import os
import re
import sys
import time
import urllib.error
import urllib.request
from dataclasses import dataclass
from pathlib import Path
from datetime import datetime, timezone
from typing import Iterable, Mapping, Sequence
PROXY_BASE_URL = os.getenv("PROXY_BASE_URL", "http://llm-proxy:8080").rstrip("/")
ENV_FILE_PATH = Path(os.getenv("ENV_FILE_PATH", "/bootstrap/env/.env"))
BIFROST_ENV_FILE_PATH = Path(
os.getenv("BIFROST_ENV_FILE_PATH", "/bootstrap/env/.env.bifrost")
)
CONFIG_FILE_PATH = Path(os.getenv("CONFIG_FILE_PATH", "/bootstrap/data/config.json"))
DEFAULT_VIRTUAL_KEY_NAME = "task-agent default"
DEFAULT_VIRTUAL_KEY_USER = "fuzzforge-task-agent"
PLACEHOLDER_KEY = "sk-proxy-default"
MAX_WAIT_SECONDS = 120
DEFAULT_PROVIDER_MODELS: dict[str, list[str]] = {
"openai": ["gpt-5"],
}
@dataclass(frozen=True)
class ProviderSpec:
name: str
env_var: str
fallback_env_vars: tuple[str, ...] = ()
@property
def env_reference(self) -> str:
return f"env.{self.env_var}"
PROVIDERS: tuple[ProviderSpec, ...] = (
ProviderSpec("openai", "BIFROST_OPENAI_KEY", ("OPENAI_API_KEY",)),
ProviderSpec("anthropic", "BIFROST_ANTHROPIC_KEY", ("ANTHROPIC_API_KEY",)),
ProviderSpec("gemini", "BIFROST_GEMINI_KEY", ("GEMINI_API_KEY",)),
ProviderSpec("mistral", "BIFROST_MISTRAL_KEY", ("MISTRAL_API_KEY",)),
ProviderSpec("openrouter", "BIFROST_OPENROUTER_KEY", ("OPENROUTER_API_KEY",)),
)
UUID_PATTERN = re.compile(
r"^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-"
r"[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$"
)
def looks_like_virtual_key(candidate: str | None) -> bool:
if not candidate:
return False
value = candidate.strip()
if not value or value == PLACEHOLDER_KEY:
return False
if UUID_PATTERN.match(value):
return True
if value.startswith("sk-proxy-"):
return True
return False
def set_env_value(lines: list[str], key: str, value: str) -> tuple[list[str], bool]:
prefix = f"{key}="
new_line = f"{prefix}{value}"
for idx, line in enumerate(lines):
stripped = line.lstrip()
if not stripped or stripped.startswith("#"):
continue
if stripped.startswith(prefix):
if lines[idx].lstrip() == new_line:
return lines, False
indent = line[: len(line) - len(stripped)]
lines[idx] = f"{indent}{new_line}"
return lines, True
lines.append(new_line)
return lines, True
def parse_env_lines(lines: list[str]) -> dict[str, str]:
mapping: dict[str, str] = {}
for line in lines:
stripped = line.strip()
if not stripped or stripped.startswith("#"):
continue
if "=" not in stripped:
continue
key, value = stripped.split("=", 1)
mapping[key] = value
return mapping
def resolve_provider_key(
provider: ProviderSpec,
env_map: dict[str, str],
bifrost_map: dict[str, str],
) -> tuple[str | None, str | None, str | None]:
candidate = bifrost_map.get(provider.env_var)
if candidate:
value = candidate.strip()
if value:
return value, provider.env_var, "bifrost"
candidate = env_map.get(provider.env_var)
if candidate:
value = candidate.strip()
if value and value != PLACEHOLDER_KEY:
return value, provider.env_var, "env"
candidate = os.getenv(provider.env_var)
if candidate:
value = candidate.strip()
if value and value != PLACEHOLDER_KEY:
return value, provider.env_var, "env"
for var in provider.fallback_env_vars:
raw_value = env_map.get(var) or os.getenv(var)
if not raw_value:
continue
value = raw_value.strip()
if not value or value == PLACEHOLDER_KEY:
continue
if var == "OPENAI_API_KEY" and looks_like_virtual_key(value):
continue
return value, var, "fallback"
return None, None, None
def ensure_provider_env_export(
lines: list[str], provider: ProviderSpec, key_value: str
) -> tuple[list[str], bool]:
# Store provider secrets under their dedicated BIFROST_* variables so future
# restarts inject them into the proxy container environment automatically.
updated_lines, changed = set_env_value(lines, provider.env_var, key_value)
if changed:
os.environ[provider.env_var] = key_value
return updated_lines, changed
def get_models_for_provider(
provider: ProviderSpec,
env_map: dict[str, str],
bifrost_map: dict[str, str],
) -> list[str]:
env_var = f"BIFROST_{provider.name.upper()}_MODELS"
raw_value = (
os.getenv(env_var)
or env_map.get(env_var)
or bifrost_map.get(env_var)
)
if raw_value:
models = [item.strip() for item in raw_value.split(",") if item.strip()]
if models:
return models
return DEFAULT_PROVIDER_MODELS.get(provider.name, [])
def _should_use_responses_api(
provider: ProviderSpec,
models: list[str],
env_map: dict[str, str],
bifrost_map: dict[str, str],
) -> bool:
if provider.name != "openai":
return False
env_var = "BIFROST_OPENAI_USE_RESPONSES_API"
raw_value = (
os.getenv(env_var)
or env_map.get(env_var)
or bifrost_map.get(env_var)
)
if raw_value and raw_value.strip().lower() in {"1", "true", "yes", "on"}:
return True
for model in models:
suffix = model.split("/", 1)[-1]
if suffix.startswith("gpt-5") or suffix.startswith("o1"):
return True
return False
def _read_positive_int(
candidate: str | None,
*,
var_name: str,
) -> int | None:
if candidate is None:
return None
value = candidate.strip()
if not value:
return None
try:
parsed = int(value)
except ValueError:
log(f"Ignoring non-integer timeout for {var_name}: {value}")
return None
if parsed <= 0:
log(f"Ignoring non-positive timeout for {var_name}: {parsed}")
return None
return parsed
def _lookup_timeout_var(
var_name: str,
env_map: dict[str, str],
bifrost_map: dict[str, str],
) -> int | None:
for source in (
bifrost_map.get(var_name),
env_map.get(var_name),
os.getenv(var_name),
):
parsed = _read_positive_int(source, var_name=var_name)
if parsed is not None:
return parsed
return None
def _get_timeout_seconds(
provider: ProviderSpec,
env_map: dict[str, str],
bifrost_map: dict[str, str],
) -> int | None:
provider_specific_var = f"BIFROST_{provider.name.upper()}_TIMEOUT_SECONDS"
timeout = _lookup_timeout_var(provider_specific_var, env_map, bifrost_map)
if timeout is not None:
return timeout
return _lookup_timeout_var("BIFROST_DEFAULT_TIMEOUT_SECONDS", env_map, bifrost_map)
def build_network_config(
provider: ProviderSpec,
env_map: dict[str, str],
bifrost_map: dict[str, str],
) -> dict[str, object] | None:
timeout = _get_timeout_seconds(provider, env_map, bifrost_map)
if timeout is None:
return None
return {"default_request_timeout_in_seconds": timeout}
def build_provider_config_entry(
provider: ProviderSpec,
env_map: dict[str, str],
bifrost_map: dict[str, str],
*,
network_config: dict[str, object] | None = None,
) -> dict[str, object]:
models = get_models_for_provider(provider, env_map, bifrost_map)
key_entry: dict[str, object] = {
"value": provider.env_reference,
"models": models,
"weight": 1.0,
}
if _should_use_responses_api(provider, models, env_map, bifrost_map):
key_entry["openai_key_config"] = {"use_responses_api": True}
entry: dict[str, object] = {"keys": [key_entry]}
if network_config:
entry["network_config"] = network_config
return entry
def _default_client_config() -> dict[str, object]:
return {"drop_excess_requests": False}
def _default_config_store_config() -> dict[str, object]:
return {
"enabled": True,
"type": "sqlite",
"config": {"path": "./config.db"},
}
def update_config_file(
providers_config: dict[str, dict[str, object]],
virtual_key_value: str | None = None,
) -> None:
if not providers_config:
return
config_data: dict[str, object]
if CONFIG_FILE_PATH.exists():
try:
config_data = json.loads(CONFIG_FILE_PATH.read_text() or "{}")
except json.JSONDecodeError:
log(
"Existing config.json is invalid JSON; regenerating from provider metadata"
)
config_data = {}
else:
config_data = {}
providers_section = config_data.setdefault("providers", {})
config_data.setdefault("client", _default_client_config())
config_data.setdefault("config_store", _default_config_store_config())
changed = False
for name, entry in providers_config.items():
if providers_section.get(name) != entry:
providers_section[name] = entry
changed = True
if virtual_key_value:
governance_section = config_data.setdefault("governance", {})
vk_list: list[dict[str, object]] = governance_section.setdefault(
"virtual_keys", []
)
provider_configs = []
for provider_name, entry in providers_config.items():
allowed_models: list[str] = []
for key_entry in entry.get("keys", []):
models = key_entry.get("models", [])
if models:
allowed_models.extend(models)
provider_configs.append(
{
"provider": provider_name,
"weight": 1.0,
"allowed_models": allowed_models,
}
)
now_iso = datetime.now(timezone.utc).isoformat().replace("+00:00", "Z")
virtual_key_entry = {
"id": f"{DEFAULT_VIRTUAL_KEY_USER}-vk",
"name": DEFAULT_VIRTUAL_KEY_NAME,
"description": "Default virtual key issued during bootstrap",
"value": virtual_key_value,
"is_active": True,
"provider_configs": provider_configs,
"created_at": now_iso,
"updated_at": now_iso,
}
matched = False
for existing in vk_list:
if existing.get("name") == DEFAULT_VIRTUAL_KEY_NAME or existing.get(
"id"
) == virtual_key_entry["id"]:
existing.update(virtual_key_entry)
matched = True
changed = True
break
if not matched:
vk_list.append(virtual_key_entry)
changed = True
if not changed:
return
CONFIG_FILE_PATH.parent.mkdir(parents=True, exist_ok=True)
CONFIG_FILE_PATH.write_text(json.dumps(config_data, indent=2, sort_keys=True) + "\n")
log(f"Wrote provider config to {CONFIG_FILE_PATH}")
def log(message: str) -> None:
print(f"[llm-proxy-bootstrap] {message}", flush=True)
def wait_for_proxy() -> None:
url = f"{PROXY_BASE_URL}/health"
deadline = time.time() + MAX_WAIT_SECONDS
while time.time() < deadline:
try:
with urllib.request.urlopen(url) as response: # noqa: S310
if response.status == 200:
log("Proxy health endpoint is reachable")
return
except urllib.error.URLError as exc: # pragma: no cover - best effort logging
log(f"Proxy not ready yet: {exc}")
time.sleep(3)
raise TimeoutError(f"Timed out waiting for {url}")
def request_json(path: str, *, method: str = "GET", payload: dict | None = None) -> tuple[int, str]:
url = f"{PROXY_BASE_URL}{path}"
data = None
headers = {"Accept": "application/json"}
if payload is not None:
data = json.dumps(payload).encode("utf-8")
headers["Content-Type"] = "application/json"
request = urllib.request.Request(url, data=data, headers=headers, method=method)
try:
with urllib.request.urlopen(request) as response: # noqa: S310
body = response.read().decode("utf-8")
return response.status, body
except urllib.error.HTTPError as exc:
body = exc.read().decode("utf-8")
return exc.code, body
def post_json(path: str, payload: dict) -> tuple[int, str]:
return request_json(path, method="POST", payload=payload)
def get_json(path: str) -> tuple[int, str]:
return request_json(path, method="GET")
def configure_providers() -> dict[str, dict[str, object]]:
env_map = parse_env_lines(read_env_file())
bifrost_lines = read_bifrost_env_file()
bifrost_map = parse_env_lines(bifrost_lines)
bifrost_lines_changed = False
config_updates: dict[str, dict[str, object]] = {}
for provider in PROVIDERS:
key_value, _source_var, _ = resolve_provider_key(provider, env_map, bifrost_map)
if not key_value:
continue
network_config = build_network_config(provider, env_map, bifrost_map)
payload = {
"provider": provider.name,
"keys": [
{
"value": key_value,
"models": [],
"weight": 1.0,
}
],
}
if network_config:
payload["network_config"] = network_config
status, body = post_json("/api/providers", payload)
if status in {200, 201}:
log(f"Configured provider '{provider.name}'")
elif status == 409:
log(f"Provider '{provider.name}' already exists (409)")
else:
log(
"Failed to configure provider '%s' (%s): %s"
% (provider.name, status, body)
)
continue
os.environ[provider.env_var] = key_value
if bifrost_map.get(provider.env_var, "") != key_value:
bifrost_lines, changed = ensure_provider_env_export(
bifrost_lines, provider, key_value
)
if changed:
bifrost_lines_changed = True
bifrost_map[provider.env_var] = key_value
config_updates[provider.name] = build_provider_config_entry(
provider,
env_map,
bifrost_map,
network_config=network_config,
)
if bifrost_lines_changed:
write_bifrost_env_file(bifrost_lines)
return config_updates
def read_env_file() -> list[str]:
if not ENV_FILE_PATH.exists():
raise FileNotFoundError(
f"Expected env file at {ENV_FILE_PATH}. Copy volumes/env/.env.example first."
)
return ENV_FILE_PATH.read_text().splitlines()
def write_env_file(lines: Iterable[str]) -> None:
ENV_FILE_PATH.write_text("\n".join(lines) + "\n")
def read_bifrost_env_file() -> list[str]:
if not BIFROST_ENV_FILE_PATH.exists():
return []
return BIFROST_ENV_FILE_PATH.read_text().splitlines()
def write_bifrost_env_file(lines: Iterable[str]) -> None:
BIFROST_ENV_FILE_PATH.parent.mkdir(parents=True, exist_ok=True)
BIFROST_ENV_FILE_PATH.write_text("\n".join(lines) + "\n")
def current_env_key() -> str | None:
existing = os.getenv("OPENAI_API_KEY")
if existing:
return existing.strip()
# Fall back to reading file if not present in the container environment
for line in read_env_file():
if line.startswith("OPENAI_API_KEY="):
return line.split("=", 1)[1].strip()
return None
def _extract_key_value(record: Mapping[str, object]) -> str | None:
value = record.get("value") or record.get("key")
if value:
return str(value)
budget = record.get("virtual_key") if isinstance(record.get("virtual_key"), Mapping) else None
if isinstance(budget, Mapping):
inner_value = budget.get("value") or budget.get("key")
if inner_value:
return str(inner_value)
return None
def find_existing_virtual_key() -> Mapping[str, object] | None:
status, body = get_json("/api/governance/virtual-keys")
if status != 200:
log(f"Could not list virtual keys ({status}): {body}")
return None
try:
data = json.loads(body)
except json.JSONDecodeError as exc: # pragma: no cover - defensive
log(f"Failed to parse virtual key list: {exc}")
return None
candidates: Sequence[Mapping[str, object]]
if isinstance(data, dict) and "virtual_keys" in data and isinstance(data["virtual_keys"], list):
candidates = [item for item in data["virtual_keys"] if isinstance(item, Mapping)]
elif isinstance(data, list):
candidates = [item for item in data if isinstance(item, Mapping)]
else:
log("Virtual key list response in unexpected format; skipping lookup")
return None
for item in candidates:
if str(item.get("name", "")).strip() == DEFAULT_VIRTUAL_KEY_NAME:
return item
return None
def upsert_virtual_key() -> str | None:
existing_env = current_env_key()
record = find_existing_virtual_key()
if record:
key = _extract_key_value(record)
if key:
log("Reusing existing virtual key from proxy store")
return key
if existing_env and looks_like_virtual_key(existing_env):
log(
"Virtual key present in env but not found in proxy store; issuing a new key"
)
payload = {
"name": DEFAULT_VIRTUAL_KEY_NAME,
"user_id": DEFAULT_VIRTUAL_KEY_USER,
"budget": {"max_limit": 25.0, "reset_duration": "7d"},
}
status, body = post_json("/api/governance/virtual-keys", payload)
if status not in {200, 201}:
log(f"Failed to create virtual key ({status}): {body}")
return None
try:
data = json.loads(body)
except json.JSONDecodeError as exc: # pragma: no cover - defensive
log(f"Could not parse virtual key response: {exc}")
return None
key = _extract_key_value(data)
if not key:
log(f"Virtual key response missing key field: {body}")
return None
log("Generated new virtual key for task agent")
return key
def persist_key_to_env_file(new_key: str) -> None:
lines = read_env_file()
updated = False
for idx, line in enumerate(lines):
if line.startswith("OPENAI_API_KEY="):
lines[idx] = f"OPENAI_API_KEY={new_key}"
updated = True
break
if not updated:
lines.append(f"OPENAI_API_KEY={new_key}")
write_env_file(lines)
log(f"Wrote virtual key to {ENV_FILE_PATH}")
os.environ["OPENAI_API_KEY"] = new_key
def main() -> int:
log("Bootstrapping Bifrost proxy")
try:
wait_for_proxy()
providers_config = configure_providers()
existing_key = current_env_key()
new_key = upsert_virtual_key()
virtual_key_value = new_key or existing_key
if new_key and new_key != existing_key:
persist_key_to_env_file(new_key)
update_config_file(providers_config, virtual_key_value)
log("Bootstrap complete")
return 0
except Exception as exc: # pragma: no cover - startup failure reported to logs
log(f"Bootstrap failed: {exc}")
return 1
if __name__ == "__main__":
sys.exit(main())

View File

@@ -0,0 +1,149 @@
---
title: "Run the LLM Proxy"
description: "Deploy Bifrost (default) or LiteLLM as an LLM gateway and connect it to the task agent."
---
## Overview
FuzzForge routes every LLM request through a proxy so that usage can be metered, priced, and rate limited per user. The repository now ships with Docker Compose profiles for two supported gateways:
- **Bifrost** (`maximhq/bifrost`) — default option with granular governance and budgeting
- **LiteLLM Proxy** (`ghcr.io/berriai/litellm`) — drop-in alternative that exposes similar OpenAI-compatible endpoints
Both services read provider credentials from `volumes/env/.env` and persist their internal state in dedicated Docker volumes, so configuration survives container restarts.
## Before You Start
1. Copy `volumes/env/.env.example` to `volumes/env/.env` and fill in:
- Leave `OPENAI_API_KEY=sk-proxy-default`, or paste your raw OpenAI key if you
want the bootstrapper to migrate it automatically into `volumes/env/.env.bifrost`
- `FF_LLM_PROXY_BASE_URL` pointing to the proxy hostname inside Docker
- Optional `LITELLM_MASTER_KEY`/`LITELLM_SALT_KEY` if you plan to run the LiteLLM proxy
2. When running tools outside Docker, change `FF_LLM_PROXY_BASE_URL` to the published host port (for example `http://localhost:10999`).
## Bifrost Gateway (default)
Start the service with the new Compose profile:
```bash
# Launch the proxy + UI (http://localhost:10999)
docker compose up llm-proxy
```
The container binds its SQLite databases underneath the named volume `fuzzforge_llm_proxy_data`, so your configuration, request logs, and issued virtual keys persist. On startup a bootstrap job seeds the default providers, creates the `fuzzforge-task-agent` virtual key, and writes the generated token back to `volumes/env/.env` so the agent picks it up automatically.
### Configure providers
1. Open `http://localhost:10999` and follow the onboarding flow.
2. Upstream keys are added automatically when the bootstrap job finds standard
variables such as `OPENAI_API_KEY`, `ANTHROPIC_API_KEY`, etc. The raw secrets
are mirrored into `volumes/env/.env.bifrost`, so future restarts rehydrate the
proxy without more manual edits. The same pass also generates `/app/data/config.json`
(backed by the `fuzzforge_llm_proxy_data` volume) populated with provider entries,
`client.drop_excess_requests=false`, and an enabled SQLite `config_store`, so
budgets and UI-driven configuration persist exactly the way the docs expect.
To raise the upstream timeout beyond the 30s default, set `BIFROST_DEFAULT_TIMEOUT_SECONDS`
or provider-specific overrides such as `BIFROST_ANTHROPIC_TIMEOUT_SECONDS` in
`volumes/env/.env` before bootstrapping; the script propagates them to the proxys
network configuration automatically.
3. (Optional) Set `BIFROST_OPENAI_MODELS` to a comma-separated list if you want
to scope a key to specific models (for example `openai/gpt-5,openai/gpt-5-nano`).
When you target Responses-only models, flip `BIFROST_OPENAI_USE_RESPONSES_API=true`
so the proxy runs them against the newer endpoint. You can still add or rotate
keys manually via **Providers → Add key**—reference either the migrated
`env.BIFROST_*` variables or paste the secret directly.
4. (Optional) Add price caps, context window overrides, or caching policies from the same UI. Settings are stored immediately in the mounted data volume.
If you prefer a file-based bootstrap, mount a `config.json` under `/app/data` that references the same environment variables:
```json
{
"providers": {
"openai": {
"keys": [{ "value": "env.BIFROST_OPENAI_KEY", "weight": 1.0 }]
}
}
}
```
### Issue per-user virtual keys
Virtual keys let you attach budgets and rate limits to each downstream agent. Create them from the UI (**Governance → Virtual Keys**) or via the API:
```bash
curl -X POST http://localhost:10999/api/governance/virtual-keys \
-H "Authorization: Bearer <admin-access-token>" \
-H "Content-Type: application/json" \
-d '{
"name": "task-agent default",
"user_id": "fuzzforge-task-agent",
"budget": {"max_limit": 10.0, "reset_duration": "1d"}
}'
```
Use the returned `key` value as the agent's `OPENAI_API_KEY`. When making requests manually, send the header `x-bf-vk: <virtual-key>` (the task agent handles this automatically once the key is in the environment).
You can generate scoped keys for teammates the same way to give each person isolated quotas and audit trails.
## LiteLLM Proxy (alternative)
If you prefer LiteLLM's gateway, enable the second profile:
```bash
# Requires LITELLM_MASTER_KEY + LITELLM_SALT_KEY in volumes/env/.env
docker compose --profile proxy-litellm up llm-proxy-litellm
```
The service exposes the admin UI at `http://localhost:4110/ui` and stores state in the `fuzzforge_litellm_proxy_data` volume (SQLite by default).
Generate user-facing keys with the built-in `/key/generate` endpoint:
```bash
curl http://localhost:4110/key/generate \
-H "Authorization: Bearer $LITELLM_MASTER_KEY" \
-H "Content-Type: application/json" \
-d '{
"models": ["openai/gpt-4o-mini"],
"metadata": {"user": "fuzzforge-task-agent"},
"duration": "7d",
"budget": {"soft": 8.0, "hard": 10.0}
}'
```
Set the returned key as `OPENAI_API_KEY` for the task agent and update its base URL to `http://llm-proxy-litellm:4000` (or `http://localhost:4110` outside Docker).
## Wiring the Task Agent
Both proxies expose an OpenAI-compatible API. The LiteLLM agent only needs the base URL and a bearer token:
```bash
FF_LLM_PROXY_BASE_URL=http://llm-proxy:8080 # or http://llm-proxy-litellm:4000 when switching proxies
OPENAI_API_KEY=sk-proxy-default # virtual key issued by the gateway
LITELLM_MODEL=openai/gpt-5
LITELLM_PROVIDER=openai
```
The agent automatically forwards requests to the configured proxy and never touches the raw provider secrets. When you hot-swap models from the UI or CLI, the proxy enforces the budgets and rate limits tied to the virtual key.
To verify endtoend connectivity, run:
```bash
curl -X POST http://localhost:10999/v1/chat/completions \
-H "Authorization: Bearer $OPENAI_API_KEY" \
-H "Content-Type: application/json" \
-d '{
"model": "openai/gpt-5",
"messages": [{"role": "user", "content": "Proxy health check"}]
}'
```
Replace the host/port with the LiteLLM endpoint when using that gateway.
## Switching Between Proxies
1. Stop the current proxy container.
2. Update `FF_LLM_PROXY_BASE_URL` in `volumes/env/.env` to the new service host (`llm-proxy` or `llm-proxy-litellm`).
3. Replace `OPENAI_API_KEY` with the virtual key generated by the selected proxy.
4. Restart the `task-agent` container so it picks up the new environment.
Because the agent only knows about the OpenAI-compatible interface, no further code changes are required when alternating between Bifrost and LiteLLM.

View File

@@ -1,17 +1,51 @@
# FuzzForge Agent Configuration
# Copy this to .env and configure your API keys
# Copy this to .env and configure your API keys and proxy settings
# LiteLLM Model Configuration
LITELLM_MODEL=gemini/gemini-2.0-flash-001
# LITELLM_PROVIDER=gemini
# LiteLLM Model Configuration (default routed through the proxy)
LITELLM_MODEL=openai/gpt-5
LITELLM_PROVIDER=openai
# API Keys (uncomment and configure as needed)
# GOOGLE_API_KEY=
# OPENAI_API_KEY=
# ANTHROPIC_API_KEY=
# OPENROUTER_API_KEY=
# MISTRAL_API_KEY=
# Proxy configuration
# Base URL is used by the task agent to talk to the proxy container inside Docker.
# When running everything locally without Docker networking, replace with http://localhost:10999.
FF_LLM_PROXY_BASE_URL=http://llm-proxy:8080
# Agent Configuration
# Default virtual key issued by the proxy for the task agent.
# Leave as sk-proxy-default and the bootstrap job will replace it on startup.
# If you paste your real OpenAI key here before the first boot, the bootstrap
# job moves it into a generated .env.bifrost file and swaps this value with the
# proxy-issued virtual key so the agent still authenticates via the gateway.
OPENAI_API_KEY=sk-proxy-default
# Upstream provider keys (loaded by the proxy via env.BIFROST_* references).
# You can fill these directly, but normally the bootstrapper manages them for
# you in .env.bifrost after the first startup. To scope keys to specific models,
# set BIFROST_OPENAI_MODELS=openai/gpt-5 (or similar) before launching the proxy.
# BIFROST_OPENAI_KEY=
# BIFROST_OPENAI_MODELS=openai/gpt-5,openai/gpt-5-nano
# BIFROST_OPENAI_USE_RESPONSES_API=true
# Increase the proxy's upstream request timeout (seconds). Applies per provider,
# falling back to BIFROST_DEFAULT_TIMEOUT_SECONDS when the provider-specific
# value is not set.
# BIFROST_DEFAULT_TIMEOUT_SECONDS=60
# BIFROST_OPENAI_TIMEOUT_SECONDS=60
# BIFROST_ANTHROPIC_KEY=
# BIFROST_ANTHROPIC_TIMEOUT_SECONDS=60
# BIFROST_GEMINI_KEY=
# BIFROST_GEMINI_TIMEOUT_SECONDS=60
# BIFROST_MISTRAL_KEY=
# BIFROST_MISTRAL_TIMEOUT_SECONDS=60
# BIFROST_OPENROUTER_KEY=
# BIFROST_OPENROUTER_TIMEOUT_SECONDS=60
# LiteLLM proxy (alternative gateway)
# LITELLM_MASTER_KEY=sk-master-key
# LITELLM_SALT_KEY=choose-a-random-string
# Bifrost gateway (default proxy)
# APP_HOST=0.0.0.0
# APP_PORT=8080
# Agent behaviour
# DEFAULT_TIMEOUT=120
# DEFAULT_CONTEXT_ID=default