Files
OBLITERATUS/obliteratus/telemetry.py
T
2026-03-04 12:38:18 -08:00

1178 lines
41 KiB
Python

"""Anonymous telemetry for community benchmark collection.
Logs benchmark results to a local JSONL file and automatically syncs to a
central HuggingFace Dataset repo for cross-Space community leaderboard
aggregation. No user identity, IP addresses, or prompt content is stored —
only aggregate benchmark metrics (model name, method, scores, hardware info,
timestamp).
Telemetry is disabled by default to respect user privacy. Users can opt in
by setting OBLITERATUS_TELEMETRY=1 or calling enable_telemetry(). On
HuggingFace Spaces, telemetry is auto-enabled for community leaderboard.
Architecture:
1. Every benchmark/obliteration run appends a record to a local JSONL
file (default: ~/.obliteratus/telemetry.jsonl or /tmp/obliteratus_telemetry.jsonl
in containers).
2. On HuggingFace Spaces, records are automatically synced to a central
HuggingFace Dataset repo (default: pliny-the-prompter/OBLITERATUS-TELEMETRY,
configurable via OBLITERATUS_TELEMETRY_REPO). Each Space instance
uploads its own JSONL file (keyed by SPACE_ID + session), so
duplicated Spaces all feed into the same central leaderboard.
3. The Leaderboard tab reads from both local JSONL *and* the central Hub
dataset, merging and deduplicating results so all community
contributions are visible regardless of which Space instance
generated them.
"""
from __future__ import annotations
import hashlib
import json
import logging
import math
import os
import platform
import time
import threading
import uuid
from dataclasses import dataclass, field, asdict
from datetime import datetime, timezone
from pathlib import Path
from typing import Any
logger = logging.getLogger(__name__)
# ── Configuration ─────────────────────────────────────────────────────
_ON_HF_SPACES = os.environ.get("SPACE_ID") is not None
# ── Telemetry state ──────────────────────────────────────────────────
_enabled: bool | None = None
# Central Hub repo for cross-Space telemetry aggregation.
# Default repo is used on HF Spaces so all instances (including duplicated
# Spaces) send data to the same central dataset automatically.
_DEFAULT_TELEMETRY_REPO = "pliny-the-prompter/OBLITERATUS-TELEMETRY"
_TELEMETRY_REPO = os.environ.get(
"OBLITERATUS_TELEMETRY_REPO",
_DEFAULT_TELEMETRY_REPO if _ON_HF_SPACES else "",
)
# Hub sync debounce interval (seconds). After each log_benchmark(), we
# schedule a background upload but skip if the last sync was < this many
# seconds ago. This prevents hammering the Hub API during rapid benchmark
# loops while still ensuring timely uploads.
_HUB_SYNC_INTERVAL = 30
_hub_sync_last: float = 0.0
_hub_sync_lock = threading.Lock()
_hub_repo_created: bool = False
# Locate writable telemetry directory
def _is_mount_point(path: Path) -> bool:
"""Check if a path is a mount point (different device from parent)."""
try:
if not path.exists():
return False
return path.stat().st_dev != path.parent.stat().st_dev
except (OSError, ValueError):
return False
def _test_writable(d: Path) -> bool:
"""Test if a directory exists and is writable."""
try:
d.mkdir(parents=True, exist_ok=True)
test_file = d / ".write_test"
test_file.write_text("ok")
test_file.unlink()
return True
except (PermissionError, OSError):
return False
def _telemetry_dir() -> Path:
"""Find a writable directory for telemetry storage.
Priority order:
1. ``OBLITERATUS_DATA_DIR`` env var (explicit override)
2. HuggingFace Spaces persistent storage (``/data/obliteratus``)
— survives container rebuilds and factory resets
3. ``~/.obliteratus`` (local installs)
4. ``/tmp/obliteratus_telemetry`` (last resort — does NOT survive rebuilds)
On HF Spaces, ``/data`` is the persistent storage mount point. If it
exists as a real mount but isn't writable yet (race during boot), we
retry briefly before falling through.
"""
# 1. Explicit override — always wins
explicit = os.environ.get("OBLITERATUS_DATA_DIR")
if explicit:
p = Path(explicit)
if _test_writable(p):
logger.info("Telemetry storage: %s (OBLITERATUS_DATA_DIR)", p)
return p
logger.warning(
"OBLITERATUS_DATA_DIR=%s is not writable, falling through", explicit
)
# 2. HF Spaces persistent storage at /data
if _ON_HF_SPACES:
data_root = Path("/data")
hf_dir = data_root / "obliteratus"
# On Spaces, /data may take a moment to mount after container start.
# Retry a few times if the directory exists as a mount point but
# isn't writable yet.
if data_root.exists():
for attempt in range(3):
if _test_writable(hf_dir):
if attempt > 0:
logger.info(
"Telemetry storage: %s (HF persistent, ready after %d retries)",
hf_dir, attempt,
)
else:
logger.info("Telemetry storage: %s (HF persistent storage)", hf_dir)
return hf_dir
# Brief wait for mount to become ready
if attempt < 2:
time.sleep(1)
# /data exists but isn't writable — warn loudly
is_mount = _is_mount_point(data_root)
logger.warning(
"/data exists (mount_point=%s) but /data/obliteratus is NOT writable. "
"Persistent storage may not be enabled for this Space. "
"Data will NOT survive factory rebuilds! "
"Enable persistent storage in Space settings or set OBLITERATUS_DATA_DIR.",
is_mount,
)
# 3. Home directory (local installs)
home_dir = Path.home() / ".obliteratus"
if _test_writable(home_dir):
logger.info("Telemetry storage: %s (home directory)", home_dir)
return home_dir
# 4. Last resort — /tmp does NOT survive rebuilds
fallback = Path("/tmp/obliteratus_telemetry")
fallback.mkdir(parents=True, exist_ok=True)
if _ON_HF_SPACES:
logger.warning(
"Telemetry storage: %s — this is EPHEMERAL and will be lost on rebuild! "
"Enable persistent storage in your Space settings.",
fallback,
)
else:
logger.info("Telemetry storage: %s (temporary)", fallback)
return fallback
_TELEMETRY_DIR = _telemetry_dir()
TELEMETRY_FILE = _TELEMETRY_DIR / "telemetry.jsonl"
# Lock for thread-safe writes
_write_lock = threading.Lock()
def _is_persistent_storage() -> bool:
"""Check if the current telemetry directory is on persistent storage."""
return str(_TELEMETRY_DIR).startswith("/data")
def storage_diagnostic() -> dict[str, Any]:
"""Return a diagnostic dict about the current storage configuration.
Useful for debugging persistent storage issues on HF Spaces.
"""
data_root = Path("/data")
return {
"telemetry_dir": str(_TELEMETRY_DIR),
"telemetry_file": str(TELEMETRY_FILE),
"telemetry_file_exists": TELEMETRY_FILE.exists(),
"telemetry_file_size_bytes": (
TELEMETRY_FILE.stat().st_size if TELEMETRY_FILE.exists() else 0
),
"is_persistent": _is_persistent_storage(),
"on_hf_spaces": _ON_HF_SPACES,
"data_dir_exists": data_root.exists(),
"data_dir_is_mount": _is_mount_point(data_root),
"data_dir_writable": os.access(data_root, os.W_OK) if data_root.exists() else False,
"explicit_data_dir": os.environ.get("OBLITERATUS_DATA_DIR", ""),
"telemetry_repo": _TELEMETRY_REPO,
"telemetry_enabled": is_enabled(),
}
def disable_telemetry():
"""Disable telemetry collection."""
global _enabled
_enabled = False
def enable_telemetry():
"""Enable telemetry collection."""
global _enabled
_enabled = True
def is_telemetry_enabled() -> bool:
return is_enabled()
def is_enabled() -> bool:
"""Check if telemetry is enabled (off by default, opt in with OBLITERATUS_TELEMETRY=1).
This is the single source of truth for telemetry state. Both v1
(log_benchmark) and v2 (send_report) paths check this function.
"""
global _enabled
if _enabled is not None:
return _enabled
default = "1" if _ON_HF_SPACES else "0"
env = os.environ.get("OBLITERATUS_TELEMETRY", default)
return env not in ("0", "false")
# ── Record schema ─────────────────────────────────────────────────────
@dataclass
class BenchmarkRecord:
"""A single benchmark result entry."""
# Identity
timestamp: str = ""
session_id: str = "" # Random per-session, not per-user
# Model
model_id: str = ""
model_family: str = "" # e.g. "qwen", "llama", "gemma"
model_size_b: float = 0.0 # Billions of parameters
is_moe: bool = False
# Method
method: str = ""
n_directions: int = 0
norm_preserve: bool = False
refinement_passes: int = 0
use_whitened_svd: bool = False
use_bayesian: bool = False
# Dataset
dataset: str = ""
n_prompts: int = 0
# Results
refusal_rate: float | None = None
perplexity: float | None = None
coherence: float | None = None
kl_divergence: float | None = None
strong_layers: int = 0
ega_expert_dirs: int = 0
time_seconds: float = 0.0
error: str | None = None
# Hardware
gpu_name: str = ""
gpu_vram_gb: float = 0.0
quantization: str | None = None
# Extra metadata
extra: dict[str, Any] = field(default_factory=dict)
def __post_init__(self):
if not self.timestamp:
self.timestamp = datetime.now(timezone.utc).isoformat()
# ── Session ID (random, per-process, non-identifying) ────────────────
def _generate_session_id() -> str:
"""Generate a random session ID (not tied to user identity)."""
import random
raw = f"{time.time()}-{random.random()}-{os.getpid()}"
return hashlib.sha256(raw.encode()).hexdigest()[:12]
_SESSION_ID = _generate_session_id()
# ── Hub sync (cross-Space telemetry aggregation) ─────────────────────
def _instance_slug() -> str:
"""Generate a unique slug for this Space instance.
Hashes the HF Space ID (to avoid leaking usernames in the public
dataset) and combines it with the process session ID. This is used
as the filename when uploading per-instance JSONL to the Hub repo.
"""
space_id = os.environ.get("SPACE_ID", "local")
space_hash = hashlib.sha256(space_id.encode()).hexdigest()[:10]
return f"{space_hash}_{_SESSION_ID}"
_hub_repo_lock = threading.Lock()
def _ensure_hub_repo(repo_id: str) -> bool:
"""Create the central telemetry dataset repo if it doesn't exist.
Uses create_repo with exist_ok=True so this is safe to call
repeatedly. Thread-safe via _hub_repo_lock.
Returns True if the repo is ready, False on failure.
"""
global _hub_repo_created
if _hub_repo_created:
return True
with _hub_repo_lock:
if _hub_repo_created: # double-check under lock
return True
try:
from huggingface_hub import HfApi
api = HfApi(token=os.environ.get("HF_TOKEN"))
# First try create_repo (works if we own the namespace)
try:
api.create_repo(
repo_id=repo_id,
repo_type="dataset",
private=False,
exist_ok=True,
)
_hub_repo_created = True
return True
except Exception:
pass
# Fallback: check if the repo already exists (works for
# collaborators / org members who can write but not create)
try:
api.repo_info(repo_id=repo_id, repo_type="dataset")
_hub_repo_created = True
logger.info(f"Hub repo {repo_id} exists (verified via repo_info)")
return True
except Exception as e:
logger.warning(
f"Hub repo {repo_id}: create_repo failed and repo_info "
f"also failed — repo may not exist or token lacks access: {e}"
)
return False
except ImportError:
logger.warning("huggingface_hub not installed — cannot ensure Hub repo")
return False
_sync_in_progress = threading.Event()
def _sync_to_hub_bg() -> None:
"""Background thread target: upload local JSONL to the central Hub repo.
Each Space instance writes its data to a unique file path in the repo:
data/{instance_slug}.jsonl
This avoids write conflicts between concurrent Space instances while
ensuring all data lands in the same dataset repository.
Uses _sync_in_progress event to prevent overlapping uploads.
"""
if _sync_in_progress.is_set():
return # Another sync is already running
_sync_in_progress.set()
try:
repo = _TELEMETRY_REPO
if not repo:
return
if not TELEMETRY_FILE.exists():
return
from huggingface_hub import HfApi
if not _ensure_hub_repo(repo):
return
api = HfApi(token=os.environ.get("HF_TOKEN"))
slug = _instance_slug()
api.upload_file(
path_or_fileobj=str(TELEMETRY_FILE),
path_in_repo=f"data/{slug}.jsonl",
repo_id=repo,
repo_type="dataset",
commit_message=f"Auto-sync telemetry from {slug}",
)
logger.info(f"Synced telemetry to {repo}/data/{slug}.jsonl")
except Exception as e:
logger.warning(f"Hub sync failed: {e}")
finally:
_sync_in_progress.clear()
def _schedule_hub_sync() -> None:
"""Schedule a debounced background sync of local telemetry to Hub.
Skips if:
- No telemetry repo is configured
- Telemetry is disabled
- Last sync was less than _HUB_SYNC_INTERVAL seconds ago
"""
global _hub_sync_last
if not _TELEMETRY_REPO:
return
if not is_enabled():
return
with _hub_sync_lock:
now = time.time()
if now - _hub_sync_last < _HUB_SYNC_INTERVAL:
return
_hub_sync_last = now
t = threading.Thread(target=_sync_to_hub_bg, daemon=True)
t.start()
def fetch_hub_records(max_records: int = 10000) -> list[dict[str, Any]]:
"""Fetch all telemetry records from the central HF Hub dataset.
Downloads all per-instance JSONL files from the ``data/`` directory
in the telemetry repo and parses them into records. Returns an empty
list if the repo is not configured or not reachable.
This is used by :func:`get_leaderboard_data` to merge community-wide
results with local data.
"""
repo = _TELEMETRY_REPO
if not repo:
return []
try:
from huggingface_hub import HfApi, hf_hub_download
api = HfApi(token=os.environ.get("HF_TOKEN"))
try:
all_files = api.list_repo_files(repo, repo_type="dataset")
except Exception:
# Repo doesn't exist yet or network error
return []
jsonl_files = [f for f in all_files if f.startswith("data/") and f.endswith(".jsonl")]
if not jsonl_files:
return []
records: list[dict[str, Any]] = []
for filepath in jsonl_files:
try:
local_path = hf_hub_download(
repo, filepath, repo_type="dataset",
# etag_timeout=0 forces a freshness check against Hub
# so we always get the latest data, not stale cache
etag_timeout=0,
)
with open(local_path) as f:
for line in f:
line = line.strip()
if not line:
continue
try:
records.append(json.loads(line))
except json.JSONDecodeError:
continue
if len(records) >= max_records:
break
except Exception:
continue
if len(records) >= max_records:
break
return records
except ImportError:
logger.debug("huggingface_hub not installed — cannot fetch Hub records")
return []
except Exception as e:
logger.debug(f"Failed to fetch Hub records: {e}")
return []
# ── Hub restore (warm-start after rebuild) ────────────────────────────
_restore_done = False
_restore_lock = threading.Lock()
def restore_from_hub() -> int:
"""Download community records from Hub into the local JSONL file.
This is the critical path for surviving factory rebuilds: even if
``/data`` is wiped or unavailable, we can reconstruct the leaderboard
from the central Hub dataset on startup.
Records already present locally (by ``(session_id, timestamp)`` key)
are skipped to avoid duplicates.
Returns the number of new records restored.
"""
global _restore_done
if _restore_done:
return 0
with _restore_lock:
if _restore_done:
return 0
_restore_done = True
repo = _TELEMETRY_REPO
if not repo:
return 0
try:
# Read existing local keys for dedup
existing_keys: set[tuple[str, str]] = set()
if TELEMETRY_FILE.exists():
try:
with open(TELEMETRY_FILE) as f:
for line in f:
line = line.strip()
if not line:
continue
try:
r = json.loads(line)
existing_keys.add(
(r.get("session_id", ""), r.get("timestamp", ""))
)
except json.JSONDecodeError:
continue
except Exception:
pass
hub_records = fetch_hub_records()
if not hub_records:
return 0
new_count = 0
with _write_lock:
with open(TELEMETRY_FILE, "a") as f:
for r in hub_records:
key = (r.get("session_id", ""), r.get("timestamp", ""))
if key in existing_keys:
continue
existing_keys.add(key)
f.write(json.dumps(r, default=str) + "\n")
new_count += 1
if new_count:
logger.info(
"Restored %d records from Hub repo %s to local storage at %s",
new_count, repo, TELEMETRY_FILE,
)
return new_count
except Exception as e:
logger.warning("Hub restore failed: %s", e)
return 0
def _restore_from_hub_bg() -> None:
"""Background thread: restore Hub records to local on startup."""
try:
restore_from_hub()
except Exception as e:
logger.warning("Background Hub restore failed: %s", e)
# Auto-restore on HF Spaces startup (background, non-blocking).
# This ensures the leaderboard has data even after a factory rebuild.
if _ON_HF_SPACES and is_enabled() and _TELEMETRY_REPO:
_restore_thread = threading.Thread(target=_restore_from_hub_bg, daemon=True)
_restore_thread.start()
# ── Hardware detection ────────────────────────────────────────────────
def _detect_gpu() -> tuple[str, float]:
"""Detect GPU name and VRAM. Returns ('', 0.0) if no GPU."""
try:
import torch
if torch.cuda.is_available():
name = torch.cuda.get_device_name(0)
vram = torch.cuda.get_device_properties(0).total_memory / (1024 ** 3)
return name, round(vram, 1)
except Exception:
pass
return "", 0.0
def _detect_model_family(model_id: str) -> str:
"""Extract model family from model ID."""
lower = model_id.lower()
families = [
"qwen", "llama", "gemma", "mistral", "phi", "falcon",
"deepseek", "olmo", "glm", "gpt-oss", "minimax",
"smollm", "internlm", "minicpm", "tinyllama",
]
for f in families:
if f in lower:
return f
return "unknown"
# ── Write / Read ──────────────────────────────────────────────────────
def log_benchmark(record: BenchmarkRecord) -> bool:
"""Append a benchmark record to the local telemetry file.
Returns True if successfully written, False if telemetry is disabled
or an error occurred.
"""
if not is_enabled():
return False
if not record.session_id:
record.session_id = _SESSION_ID
if not record.gpu_name:
record.gpu_name, record.gpu_vram_gb = _detect_gpu()
if not record.model_family:
record.model_family = _detect_model_family(record.model_id)
try:
data = asdict(record)
with _write_lock:
with open(TELEMETRY_FILE, "a") as f:
f.write(json.dumps(data, default=str) + "\n")
# Auto-sync to central Hub repo (debounced, background thread)
_schedule_hub_sync()
return True
except Exception as e:
logger.debug(f"Telemetry write failed: {e}")
return False
def log_benchmark_from_dict(
model_id: str,
method: str,
entry: dict[str, Any],
dataset: str = "",
n_prompts: int = 0,
quantization: str | None = None,
pipeline_config: dict[str, Any] | None = None,
) -> bool:
"""Convenience wrapper: create a BenchmarkRecord from benchmark result dict.
Called from app.py benchmark() after each method completes.
"""
cfg = pipeline_config or {}
record = BenchmarkRecord(
model_id=model_id,
method=method,
dataset=dataset,
n_prompts=n_prompts,
quantization=quantization,
refusal_rate=entry.get("refusal_rate"),
perplexity=entry.get("perplexity"),
coherence=entry.get("coherence"),
kl_divergence=entry.get("kl_divergence"),
strong_layers=entry.get("strong_layers", 0),
ega_expert_dirs=entry.get("ega_expert_dirs", 0),
time_seconds=entry.get("time_s", 0.0),
error=entry.get("error"),
n_directions=cfg.get("n_directions", 0),
norm_preserve=cfg.get("norm_preserve", False),
refinement_passes=cfg.get("refinement_passes", 0),
use_whitened_svd=cfg.get("use_whitened_svd", False),
use_bayesian=cfg.get("bayesian_trials", 0) > 0,
)
return log_benchmark(record)
def read_telemetry(max_records: int = 10000) -> list[dict[str, Any]]:
"""Read all telemetry records from the local JSONL file.
Returns a list of dicts, newest first.
"""
records = []
if not TELEMETRY_FILE.exists():
return records
try:
with open(TELEMETRY_FILE) as f:
for line in f:
line = line.strip()
if not line:
continue
try:
records.append(json.loads(line))
except json.JSONDecodeError:
continue
if len(records) >= max_records:
break
except Exception as e:
logger.debug(f"Telemetry read failed: {e}")
# Newest first
records.reverse()
return records
def get_leaderboard_data() -> list[dict[str, Any]]:
"""Get aggregated leaderboard data from local + Hub telemetry.
Merges local records with community-wide records from the central Hub
dataset, deduplicates by (session_id, timestamp), groups by
(model_id, method) and computes best/avg metrics.
Returns a list of dicts suitable for display in a Gradio Dataframe.
"""
local_records = read_telemetry()
# Fetch community records from central Hub repo
hub_records = []
try:
hub_records = fetch_hub_records()
except Exception:
pass # Hub fetch is best-effort
# Merge and deduplicate by (session_id, timestamp)
seen: set[tuple[str, str]] = set()
records: list[dict[str, Any]] = []
for r in local_records + hub_records:
key = (r.get("session_id", ""), r.get("timestamp", ""))
if key in seen:
continue
seen.add(key)
records.append(r)
if not records:
return []
# Group by (model_id, method)
groups: dict[tuple[str, str], list[dict]] = {}
for r in records:
if r.get("error"):
continue
key = (r.get("model_id", ""), r.get("method", ""))
if key not in groups:
groups[key] = []
groups[key].append(r)
leaderboard = []
for (model_id, method), runs in groups.items():
# Compute aggregates
refusal_rates = [r["refusal_rate"] for r in runs if r.get("refusal_rate") is not None]
perplexities = [r["perplexity"] for r in runs if r.get("perplexity") is not None]
coherences = [r["coherence"] for r in runs if r.get("coherence") is not None]
times = [r["time_seconds"] for r in runs if r.get("time_seconds") is not None]
entry = {
"model": model_id.split("/")[-1] if "/" in model_id else model_id,
"model_id": model_id,
"method": method,
"runs": len(runs),
"best_refusal": min(refusal_rates) if refusal_rates else None,
"avg_refusal": sum(refusal_rates) / len(refusal_rates) if refusal_rates else None,
"best_perplexity": min(perplexities) if perplexities else None,
"avg_perplexity": sum(perplexities) / len(perplexities) if perplexities else None,
"avg_coherence": sum(coherences) / len(coherences) if coherences else None,
"avg_time_s": sum(times) / len(times) if times else None,
"gpu": runs[0].get("gpu_name", "") if runs else "",
"last_run": runs[0].get("timestamp", "") if runs else "",
}
leaderboard.append(entry)
# Sort: lowest refusal rate first, then by perplexity
leaderboard.sort(key=lambda x: (x.get("best_refusal") or 999, x.get("best_perplexity") or 999))
return leaderboard
def push_to_hub(repo_id: str | None = None) -> bool:
"""Push local telemetry to the central HuggingFace Dataset repo.
Uploads this instance's local JSONL file to the central Hub repo as a
per-instance file (``data/{instance_slug}.jsonl``). All Space instances
(including duplicated ones) contribute to the same dataset.
Requires HF_TOKEN to be set (automatically available on HF Spaces).
"""
repo = repo_id or _TELEMETRY_REPO
if not repo:
logger.warning("No telemetry repo configured — set OBLITERATUS_TELEMETRY_REPO")
return False
records = read_telemetry()
if not records:
logger.info("No telemetry records to push")
return False
try:
from huggingface_hub import HfApi
if not _ensure_hub_repo(repo):
return False
api = HfApi(token=os.environ.get("HF_TOKEN"))
slug = _instance_slug()
api.upload_file(
path_or_fileobj=str(TELEMETRY_FILE),
path_in_repo=f"data/{slug}.jsonl",
repo_id=repo,
repo_type="dataset",
commit_message=f"Manual push from {slug} ({len(records)} records)",
)
logger.info(f"Pushed {len(records)} records to {repo}/data/{slug}.jsonl")
return True
except ImportError:
logger.warning("huggingface_hub not installed — cannot push telemetry")
return False
except Exception as e:
logger.warning(f"Failed to push telemetry: {e}")
return False
# ── V2 Telemetry API: structured report building ────────────────────
_ALLOWED_METHOD_CONFIG_KEYS = frozenset({
"n_directions", "norm_preserve", "regularization", "refinement_passes",
"project_biases", "use_chat_template", "use_whitened_svd",
"true_iterative_refinement", "use_jailbreak_contrast",
"layer_adaptive_strength", "attention_head_surgery",
"safety_neuron_masking", "per_expert_directions", "use_sae_features",
"invert_refusal", "project_embeddings", "embed_regularization",
"activation_steering", "steering_strength", "expert_transplant",
"transplant_blend", "reflection_strength",
})
_ALLOWED_ANALYSIS_KEYS = frozenset({
"detected_alignment_method", "alignment_confidence",
"alignment_probabilities", "cone_is_polyhedral", "cone_dimensionality",
"mean_pairwise_cosine", "direction_specificity", "cluster_count",
"direction_persistence", "mean_refusal_sparsity_index",
"recommended_sparsity", "use_sparse_surgery", "estimated_robustness",
"self_repair_estimate", "entanglement_score", "entangled_layers",
"clean_layers", "recommended_n_directions",
"recommended_regularization", "recommended_refinement_passes",
"recommended_layers", "skip_layers",
})
def _safe_float(val: Any) -> float | None:
"""Safely convert a value to float, returning None on failure."""
if val is None:
return None
try:
f = float(val)
if not math.isfinite(f):
return None
return f
except (TypeError, ValueError):
return None
def _get_environment_info() -> dict[str, str]:
"""Collect non-identifying environment information."""
return {
"python_version": platform.python_version(),
"os": platform.system(),
"arch": platform.machine(),
"torch_version": _get_torch_version(),
}
def _get_torch_version() -> str:
try:
import torch
return torch.__version__
except ImportError:
return "not_installed"
def _get_peak_vram() -> dict[str, float] | None:
try:
import torch
if torch.cuda.is_available():
allocated = torch.cuda.max_memory_allocated() / (1024 ** 3)
reserved = torch.cuda.max_memory_reserved() / (1024 ** 3)
return {
"peak_allocated_gb": round(allocated, 2),
"peak_reserved_gb": round(reserved, 2),
}
except Exception:
pass
return None
def _direction_stats(pipeline) -> dict[str, Any]:
"""Extract direction quality statistics from a pipeline."""
directions = getattr(pipeline, "refusal_directions", {})
subspaces = getattr(pipeline, "refusal_subspaces", {})
if not directions:
return {}
import torch
stats: dict[str, Any] = {}
norms = {}
for idx, d in sorted(directions.items()):
if isinstance(d, torch.Tensor):
norms[str(idx)] = round(d.float().norm().item(), 4)
if norms:
stats["direction_norms"] = norms
sorted_indices = sorted(directions.keys())
if len(sorted_indices) >= 2:
cosines = []
for i in range(len(sorted_indices) - 1):
d1 = directions[sorted_indices[i]].float()
d2 = directions[sorted_indices[i + 1]].float()
cos = torch.nn.functional.cosine_similarity(
d1.unsqueeze(0), d2.unsqueeze(0)
).item()
cosines.append(abs(cos))
stats["mean_direction_persistence"] = round(sum(cosines) / len(cosines), 4)
if subspaces:
effective_ranks = {}
for idx, sub in subspaces.items():
if isinstance(sub, torch.Tensor) and sub.dim() == 2 and sub.shape[0] > 1:
try:
s = torch.linalg.svdvals(sub.float())
s = s[s > 1e-12]
if len(s) > 0:
p = s / s.sum()
entropy = -(p * p.log()).sum()
effective_ranks[str(idx)] = round(torch.exp(entropy).item(), 2)
except Exception:
pass
if effective_ranks:
stats["effective_ranks"] = effective_ranks
return stats
def _extract_excise_details(pipeline) -> dict[str, Any]:
"""Extract excision operation details from a pipeline."""
details: dict[str, Any] = {}
techniques: list[str] = []
modified = getattr(pipeline, "_excise_modified_count", None)
if modified is not None:
details["modified_count"] = modified
refusal_heads = getattr(pipeline, "_refusal_heads", {})
if refusal_heads:
techniques.append("head_surgery")
details["head_surgery_layers"] = len(refusal_heads)
total_heads = sum(len(heads) for heads in refusal_heads.values())
details["total_heads_projected"] = total_heads
sae_dirs = getattr(pipeline, "_sae_directions", {})
if sae_dirs:
techniques.append("sae_features")
details["sae_direction_count"] = len(sae_dirs)
expert_scores = getattr(pipeline, "_expert_safety_scores", {})
if expert_scores:
techniques.append("expert_gating")
layer_weights = getattr(pipeline, "_layer_excise_weights", {})
if layer_weights:
techniques.append("layer_adaptive")
details["adaptive_weight_min"] = round(min(layer_weights.values()), 4)
details["adaptive_weight_max"] = round(max(layer_weights.values()), 4)
expert_dirs = getattr(pipeline, "_expert_directions", {})
if expert_dirs:
techniques.append("per_expert")
steering_hooks = getattr(pipeline, "_steering_hooks", [])
if steering_hooks:
techniques.append("activation_steering")
if getattr(pipeline, "invert_refusal", False):
techniques.append("inversion")
if getattr(pipeline, "project_embeddings", False):
techniques.append("embedding_projection")
if getattr(pipeline, "activation_steering", False) and "activation_steering" not in techniques:
techniques.append("activation_steering")
if getattr(pipeline, "expert_transplant", False):
techniques.append("expert_transplant")
if techniques:
details["used_techniques"] = techniques
return details
def _extract_prompt_counts(pipeline) -> dict[str, int]:
"""Extract prompt count information from a pipeline."""
counts: dict[str, int] = {}
harmful = getattr(pipeline, "harmful_prompts", None)
if harmful is not None:
counts["harmful"] = len(harmful)
harmless = getattr(pipeline, "harmless_prompts", None)
if harmless is not None:
counts["harmless"] = len(harmless)
jailbreak = getattr(pipeline, "jailbreak_prompts", None)
if jailbreak is not None and jailbreak:
counts["jailbreak"] = len(jailbreak)
return counts
def _extract_stage_durations(pipeline) -> dict[str, float] | None:
"""Extract stage duration timings from a pipeline."""
durations = getattr(pipeline, "_stage_durations", None)
if durations and isinstance(durations, dict):
return dict(durations)
return None
def _extract_analysis_insights(informed_report) -> dict[str, Any]:
"""Extract and filter analysis insights from an informed pipeline report."""
insights_obj = getattr(informed_report, "insights", None)
if insights_obj is None:
return {}
result: dict[str, Any] = {}
for key in _ALLOWED_ANALYSIS_KEYS:
val = getattr(insights_obj, key, None)
if val is not None:
result[key] = val
return result
def build_report(
*,
architecture: str,
num_layers: int,
num_heads: int,
hidden_size: int,
total_params: int,
method: str,
method_config: dict[str, Any] | None = None,
quality_metrics: dict[str, Any] | None = None,
stage_durations: dict[str, float] | None = None,
strong_layers: list[int] | None = None,
direction_stats: dict[str, Any] | None = None,
excise_details: dict[str, Any] | None = None,
prompt_counts: dict[str, int] | None = None,
gpu_memory: dict[str, float] | None = None,
analysis_insights: dict[str, Any] | None = None,
informed_extras: dict[str, Any] | None = None,
) -> dict[str, Any]:
"""Build a structured telemetry report (schema v2)."""
report: dict[str, Any] = {
"schema_version": 2,
"session_id": uuid.uuid4().hex,
"timestamp": datetime.now(timezone.utc).isoformat(),
"model": {
"architecture": architecture,
"num_layers": num_layers,
"num_heads": num_heads,
"hidden_size": hidden_size,
"total_params": total_params,
},
"method": method,
"environment": _get_environment_info(),
}
if method_config:
report["method_config"] = {
k: v for k, v in method_config.items()
if k in _ALLOWED_METHOD_CONFIG_KEYS
}
else:
report["method_config"] = {}
if quality_metrics:
report["quality_metrics"] = dict(quality_metrics)
else:
report["quality_metrics"] = {}
if stage_durations:
report["stage_durations"] = stage_durations
if strong_layers is not None:
report["strong_layers"] = strong_layers
if direction_stats:
report["direction_stats"] = direction_stats
if excise_details:
report["excise_details"] = excise_details
if prompt_counts:
report["prompt_counts"] = prompt_counts
if gpu_memory:
report["gpu_memory"] = gpu_memory
if analysis_insights:
filtered = {k: v for k, v in analysis_insights.items() if k in _ALLOWED_ANALYSIS_KEYS}
if filtered:
report["analysis_insights"] = filtered
if informed_extras:
report["informed"] = dict(informed_extras)
return report
def _send_sync(report: dict[str, Any]) -> None:
"""Synchronously write a v2 telemetry report to local JSONL and sync to Hub."""
try:
with _write_lock:
with open(TELEMETRY_FILE, "a") as f:
f.write(json.dumps(report, default=str) + "\n")
_schedule_hub_sync()
except Exception as e:
logger.debug("Telemetry v2 write failed: %s", e)
logger.debug("Telemetry report sent (schema_version=%s)", report.get("schema_version"))
def send_report(report: dict[str, Any]) -> None:
"""Send a telemetry report in a background thread."""
if not is_enabled():
return
def _bg():
try:
_send_sync(report)
except Exception as e:
logger.debug("Telemetry send failed: %s", e)
t = threading.Thread(target=_bg, daemon=True)
t.start()
def maybe_send_pipeline_report(pipeline) -> None:
"""Build and send a telemetry report from a completed pipeline."""
if not is_enabled():
return
try:
summary = pipeline.handle.summary()
method_config = {}
for key in _ALLOWED_METHOD_CONFIG_KEYS:
val = getattr(pipeline, key, None)
if val is not None:
method_config[key] = val
report = build_report(
architecture=summary.get("architecture", "unknown"),
num_layers=summary.get("num_layers", 0),
num_heads=summary.get("num_heads", 0),
hidden_size=summary.get("hidden_size", 0),
total_params=summary.get("total_params", 0),
method=pipeline.method,
method_config=method_config,
quality_metrics=pipeline._quality_metrics,
stage_durations=_extract_stage_durations(pipeline),
strong_layers=pipeline._strong_layers,
direction_stats=_direction_stats(pipeline),
excise_details=_extract_excise_details(pipeline),
prompt_counts=_extract_prompt_counts(pipeline),
gpu_memory=_get_peak_vram(),
)
send_report(report)
except Exception as e:
logger.debug("Failed to build pipeline report: %s", e)
def maybe_send_informed_report(pipeline, informed_report) -> None:
"""Build and send a telemetry report from a completed informed pipeline."""
if not is_enabled():
return
try:
summary = pipeline.handle.summary()
method_config = {}
for key in _ALLOWED_METHOD_CONFIG_KEYS:
val = getattr(pipeline, key, None)
if val is not None:
method_config[key] = val
analysis_insights = _extract_analysis_insights(informed_report)
informed_extras = {}
for attr in ("ouroboros_passes", "final_refusal_rate",
"analysis_duration", "total_duration"):
val = getattr(informed_report, attr, None)
if val is not None:
informed_extras[attr] = val
report = build_report(
architecture=summary.get("architecture", "unknown"),
num_layers=summary.get("num_layers", 0),
num_heads=summary.get("num_heads", 0),
hidden_size=summary.get("hidden_size", 0),
total_params=summary.get("total_params", 0),
method=pipeline.method,
method_config=method_config,
quality_metrics=pipeline._quality_metrics,
stage_durations=_extract_stage_durations(pipeline),
strong_layers=pipeline._strong_layers,
direction_stats=_direction_stats(pipeline),
excise_details=_extract_excise_details(pipeline),
prompt_counts=_extract_prompt_counts(pipeline),
gpu_memory=_get_peak_vram(),
analysis_insights=analysis_insights,
informed_extras=informed_extras,
)
send_report(report)
except Exception as e:
logger.debug("Failed to build informed report: %s", e)