mirror of
https://github.com/elder-plinius/OBLITERATUS.git
synced 2026-04-23 03:36:25 +02:00
440 lines
18 KiB
Bash
440 lines
18 KiB
Bash
#!/usr/bin/env bash
|
|
# ─────────────────────────────────────────────────────────────────────────────
|
|
# OBLITERATUS Remote Benchmark Runner
|
|
#
|
|
# One-command benchmark on your HuggingFace Space GPU.
|
|
#
|
|
# Usage:
|
|
# ./scripts/run_benchmark_remote.sh # defaults: Qwen 0.5B, all methods
|
|
# ./scripts/run_benchmark_remote.sh --model Qwen/Qwen2.5-1.5B-Instruct
|
|
# ./scripts/run_benchmark_remote.sh --model openai/gpt-oss-20b
|
|
# ./scripts/run_benchmark_remote.sh --models "Qwen/Qwen2.5-0.5B-Instruct openai/gpt-oss-20b"
|
|
# ./scripts/run_benchmark_remote.sh --methods "basic advanced surgical"
|
|
# ./scripts/run_benchmark_remote.sh --prompts 33 # use 33/66/99 prompts per side
|
|
# ./scripts/run_benchmark_remote.sh --dry-run # print the command, don't execute
|
|
# ./scripts/run_benchmark_remote.sh --verbose # show SSH debug output
|
|
# ─────────────────────────────────────────────────────────────────────────────
|
|
set -euo pipefail
|
|
|
|
# ── Defaults ─────────────────────────────────────────────────────────────────
|
|
SSH_KEY="${OBLITERATUS_SSH_KEY:-$HOME/.ssh/hf_obliteratus}"
|
|
SSH_HOST="${OBLITERATUS_SSH_HOST:-}"
|
|
MODEL="${OBLITERATUS_MODEL:-Qwen/Qwen2.5-0.5B-Instruct}"
|
|
MODELS=""
|
|
METHODS="${OBLITERATUS_METHODS:-basic advanced aggressive surgical inverted nuclear}"
|
|
PROMPTS="${OBLITERATUS_PROMPTS:-33}"
|
|
DRY_RUN=false
|
|
VERBOSE=false
|
|
|
|
# ── Parse args ───────────────────────────────────────────────────────────────
|
|
while [[ $# -gt 0 ]]; do
|
|
case "$1" in
|
|
--model) MODEL="$2"; MODELS=""; shift 2 ;;
|
|
--models) MODELS="$2"; shift 2 ;;
|
|
--methods) METHODS="$2"; shift 2 ;;
|
|
--prompts) PROMPTS="$2"; shift 2 ;;
|
|
--key) SSH_KEY="$2"; shift 2 ;;
|
|
--host) SSH_HOST="$2"; shift 2 ;;
|
|
--dry-run) DRY_RUN=true; shift ;;
|
|
--verbose|-v) VERBOSE=true; shift ;;
|
|
-h|--help)
|
|
head -15 "$0" | tail -11
|
|
exit 0
|
|
;;
|
|
*)
|
|
echo "Unknown arg: $1" >&2; exit 1 ;;
|
|
esac
|
|
done
|
|
|
|
# If --models not set, use single --model
|
|
if [[ -z "$MODELS" ]]; then
|
|
MODELS="$MODEL"
|
|
fi
|
|
|
|
# ── Validate SSH host ──────────────────────────────────────────────────────
|
|
if [[ -z "$SSH_HOST" ]]; then
|
|
echo "ERROR: SSH_HOST not configured."
|
|
echo ""
|
|
echo "Set your HF Space SSH host:"
|
|
echo " 1. export OBLITERATUS_SSH_HOST=your-username-spacename@ssh.hf.space"
|
|
echo " 2. Or pass --host your-username-spacename@ssh.hf.space"
|
|
exit 1
|
|
fi
|
|
|
|
# ── Validate SSH key ────────────────────────────────────────────────────────
|
|
if [[ ! -f "$SSH_KEY" ]]; then
|
|
echo "ERROR: SSH key not found at $SSH_KEY"
|
|
echo ""
|
|
echo "Either:"
|
|
echo " 1. Place your HF Space SSH key at ~/.ssh/hf_obliteratus"
|
|
echo " 2. Set OBLITERATUS_SSH_KEY=/path/to/key"
|
|
echo " 3. Pass --key /path/to/key"
|
|
exit 1
|
|
fi
|
|
|
|
echo "╔══════════════════════════════════════════════════════════════╗"
|
|
echo "║ OBLITERATUS — Remote GPU Benchmark ║"
|
|
echo "╠══════════════════════════════════════════════════════════════╣"
|
|
echo "║ Host: $SSH_HOST"
|
|
echo "║ Models: $MODELS"
|
|
echo "║ Methods: $METHODS"
|
|
echo "║ Prompts: $PROMPTS per side"
|
|
echo "║ SSH key: $SSH_KEY"
|
|
echo "╚══════════════════════════════════════════════════════════════╝"
|
|
echo ""
|
|
|
|
# ── Build the Python benchmark script to run remotely ────────────────────────
|
|
read -r -d '' REMOTE_SCRIPT << 'PYEOF' || true
|
|
import json, sys, time, shutil, gc, os
|
|
os.environ.setdefault("PYTORCH_CUDA_ALLOC_CONF", "expandable_segments:True")
|
|
os.environ.setdefault("CUDA_LAUNCH_BLOCKING", "1")
|
|
|
|
import torch
|
|
import torch.nn as nn
|
|
|
|
# Add app dir to path (HF Space layout: /home/user/app)
|
|
sys.path.insert(0, os.environ.get("APP_DIR", "/home/user/app"))
|
|
|
|
# ── Hotpatch: fix device detection for accelerate device_map="auto" ──────
|
|
# The deployed Space code uses next(model.parameters()).device which is
|
|
# unreliable when accelerate distributes params across devices.
|
|
import obliteratus.abliterate as _abl
|
|
|
|
@staticmethod
|
|
def _get_model_device(model):
|
|
"""Find the correct input device (embedding layer) for accelerate models."""
|
|
if hasattr(model, "hf_device_map"):
|
|
try:
|
|
embed = model.get_input_embeddings()
|
|
return next(embed.parameters()).device
|
|
except (StopIteration, AttributeError):
|
|
for p in model.parameters():
|
|
if p.device.type != "meta":
|
|
return p.device
|
|
return torch.device("cpu")
|
|
return next(model.parameters()).device
|
|
|
|
_abl.AbliterationPipeline._get_model_device = _get_model_device
|
|
|
|
# Patch _collect_activations to use the fixed device detection
|
|
_orig_collect = _abl.AbliterationPipeline._collect_activations.__code__
|
|
import types
|
|
|
|
def _patched_collect(self, layer_modules, prompts, label):
|
|
"""Collect last-token activations — patched for correct device detection."""
|
|
n_layers = len(layer_modules)
|
|
activations = {i: [] for i in range(n_layers)}
|
|
hooks = []
|
|
|
|
def make_hook(idx):
|
|
def hook_fn(module, input, output):
|
|
hidden = output[0] if isinstance(output, tuple) else output
|
|
activations[idx].append(hidden[:, -1, :].detach().cpu().float())
|
|
return hook_fn
|
|
|
|
for idx in range(n_layers):
|
|
hooks.append(layer_modules[idx].register_forward_hook(make_hook(idx)))
|
|
|
|
model = self.handle.model
|
|
tokenizer = self.handle.tokenizer
|
|
|
|
max_length = 256
|
|
if torch.cuda.is_available():
|
|
free_gb = sum(
|
|
torch.cuda.mem_get_info(i)[0] / (1024 ** 3)
|
|
for i in range(torch.cuda.device_count())
|
|
)
|
|
# Scale thresholds by model size (baseline: 7B with hidden=4096, 32 layers)
|
|
_h = self.handle.hidden_size if self.handle else 4096
|
|
_l = n_layers if n_layers else 32
|
|
_ms = (_h / 4096) * (_l / 32)
|
|
_tight = max(4.0 * _ms, 0.5)
|
|
_low = max(2.0 * _ms, 0.25)
|
|
if free_gb < _low:
|
|
max_length = 64
|
|
self.log(f" Low GPU memory ({free_gb:.1f} GB free, threshold {_low:.1f} GB), using max_length={max_length}")
|
|
elif free_gb < _tight:
|
|
max_length = 128
|
|
self.log(f" Tight GPU memory ({free_gb:.1f} GB free, threshold {_tight:.1f} GB), using max_length={max_length}")
|
|
|
|
device = self._get_model_device(model)
|
|
|
|
try:
|
|
for i, prompt in enumerate(prompts):
|
|
self.log(f" [{label}] prompt {i + 1}/{len(prompts)}")
|
|
inputs = tokenizer(
|
|
prompt, return_tensors="pt", padding=True, truncation=True,
|
|
max_length=max_length,
|
|
)
|
|
inputs = {k: v.to(device) for k, v in inputs.items()}
|
|
with torch.no_grad():
|
|
model(**inputs)
|
|
del inputs
|
|
self._free_gpu_memory()
|
|
finally:
|
|
for h in hooks:
|
|
h.remove()
|
|
|
|
return activations
|
|
|
|
_abl.AbliterationPipeline._collect_activations = _patched_collect
|
|
print("[hotpatch] Device detection fix applied")
|
|
# ── End hotpatch ─────────────────────────────────────────────────────────
|
|
|
|
# ── Hotpatch: nuclear mode tuning ─────────────────────────────────────────
|
|
# The deployed Space code has stale nuclear defaults. Override them here
|
|
# so the benchmark exercises the latest tuning without redeploying.
|
|
import math as _math
|
|
|
|
# 1. Updated method configs (read at __init__ time)
|
|
_abl.METHODS["nuclear"].update({
|
|
"n_directions": 4,
|
|
"reflection_strength": 1.25,
|
|
"embed_regularization": 0.50,
|
|
"steering_strength": 0.15,
|
|
"safety_neuron_masking": False,
|
|
})
|
|
_abl.METHODS["inverted"]["safety_neuron_masking"] = False
|
|
|
|
# 2. Cap layers for inversion modes (40% of total) — post-distill
|
|
_orig_distill = _abl.AbliterationPipeline._distill_refusal_subspace
|
|
def _patched_distill(self):
|
|
_orig_distill(self)
|
|
if self.invert_refusal and self._strong_layers:
|
|
try:
|
|
n_total = len(_abl.get_layer_modules(self.handle))
|
|
except Exception:
|
|
n_total = 24
|
|
max_layers = max(3, int(n_total * 0.40))
|
|
if len(self._strong_layers) > max_layers:
|
|
old_count = len(self._strong_layers)
|
|
self._strong_layers = self._strong_layers[:max_layers]
|
|
self.log(f" [hotpatch] Capped {old_count} -> {max_layers} layers for inversion (40% of {n_total})")
|
|
# Truncate SAE directions: 4 features for nuclear, 6 for inverted
|
|
n_sae = 4 if self.reflection_strength < 2.0 else 6
|
|
for idx in list(self._sae_directions.keys()):
|
|
dirs = self._sae_directions[idx]
|
|
if dirs.shape[0] > n_sae:
|
|
self._sae_directions[idx] = dirs[:n_sae]
|
|
if self._sae_directions:
|
|
self.log(f" [hotpatch] SAE features capped to {n_sae} per layer")
|
|
_abl.AbliterationPipeline._distill_refusal_subspace = _patched_distill
|
|
|
|
print("[hotpatch] Nuclear tuning: 4 dirs, 1.25x reflect, no neuron mask, 40%% layer cap, 4 SAE features")
|
|
# ── End nuclear hotpatch ──────────────────────────────────────────────────
|
|
|
|
from obliteratus.abliterate import AbliterationPipeline, METHODS, HARMFUL_PROMPTS, HARMLESS_PROMPTS
|
|
|
|
MODELS_LIST = os.environ["BENCH_MODELS"].split()
|
|
METHODS_LIST = os.environ["BENCH_METHODS"].split()
|
|
N_PROMPTS = int(os.environ["BENCH_PROMPTS"])
|
|
|
|
print(f"\n{'='*60}")
|
|
print(f"OBLITERATUS BENCHMARK")
|
|
print(f"{'='*60}")
|
|
print(f"Models: {MODELS_LIST}")
|
|
print(f"Methods: {METHODS_LIST}")
|
|
print(f"Prompts: {N_PROMPTS} per side")
|
|
if torch.cuda.is_available():
|
|
gpu = torch.cuda.get_device_name(0)
|
|
total = torch.cuda.get_device_properties(0).total_memory / 1e9
|
|
free = torch.cuda.mem_get_info(0)[0] / 1e9
|
|
print(f"GPU: {gpu} ({total:.1f} GB total, {free:.1f} GB free)")
|
|
else:
|
|
print("GPU: NONE (CPU only)")
|
|
print(f"{'='*60}\n")
|
|
|
|
harmful = HARMFUL_PROMPTS[:N_PROMPTS]
|
|
harmless = HARMLESS_PROMPTS[:N_PROMPTS]
|
|
|
|
all_results = []
|
|
|
|
for model_name in MODELS_LIST:
|
|
print(f"\n{'═'*60}")
|
|
print(f"MODEL: {model_name}")
|
|
print(f"{'═'*60}")
|
|
|
|
model_results = []
|
|
|
|
for method in METHODS_LIST:
|
|
if method not in METHODS:
|
|
print(f"SKIP unknown method: {method}")
|
|
continue
|
|
|
|
print(f"\n{'─'*60}")
|
|
print(f"METHOD: {method} — {METHODS[method]['label']}")
|
|
print(f"{'─'*60}")
|
|
|
|
# Clean slate
|
|
gc.collect()
|
|
if torch.cuda.is_available():
|
|
torch.cuda.empty_cache()
|
|
torch.cuda.reset_peak_memory_stats()
|
|
|
|
outdir = f"/tmp/obliteratus_bench_{method}"
|
|
t0 = time.time()
|
|
pipeline = None
|
|
|
|
try:
|
|
pipeline = AbliterationPipeline(
|
|
model_name=model_name,
|
|
output_dir=outdir,
|
|
device="auto",
|
|
dtype="float16",
|
|
method=method,
|
|
harmful_prompts=harmful,
|
|
harmless_prompts=harmless,
|
|
on_log=lambda msg: print(f" {msg}"),
|
|
)
|
|
result_path = pipeline.run()
|
|
elapsed = time.time() - t0
|
|
|
|
r = {
|
|
"model": model_name,
|
|
"method": method,
|
|
"label": METHODS[method]["label"],
|
|
"time_seconds": round(elapsed, 1),
|
|
"quality": pipeline._quality_metrics,
|
|
"strong_layers": pipeline._strong_layers,
|
|
"n_strong_layers": len(pipeline._strong_layers),
|
|
"n_directions": pipeline.n_directions,
|
|
}
|
|
|
|
if torch.cuda.is_available():
|
|
r["peak_gpu_mb"] = round(torch.cuda.max_memory_allocated() / 1e6, 1)
|
|
|
|
model_results.append(r)
|
|
|
|
print(f"\n ✓ {method} complete in {elapsed:.1f}s")
|
|
print(f" Quality: {json.dumps(pipeline._quality_metrics, default=str)}")
|
|
|
|
except Exception as e:
|
|
elapsed = time.time() - t0
|
|
model_results.append({
|
|
"model": model_name,
|
|
"method": method,
|
|
"label": METHODS.get(method, {}).get("label", method),
|
|
"time_seconds": round(elapsed, 1),
|
|
"error": str(e),
|
|
})
|
|
print(f"\n ✗ {method} FAILED after {elapsed:.1f}s: {e}")
|
|
import traceback
|
|
traceback.print_exc()
|
|
|
|
# Cleanup saved model to free disk
|
|
shutil.rmtree(outdir, ignore_errors=True)
|
|
|
|
# Force cleanup between runs
|
|
if pipeline is not None:
|
|
del pipeline
|
|
gc.collect()
|
|
if torch.cuda.is_available():
|
|
torch.cuda.empty_cache()
|
|
|
|
all_results.extend(model_results)
|
|
|
|
# Summary table for this model
|
|
print(f"\n{'='*60}")
|
|
print(f"RESULTS: {model_name}")
|
|
print(f"{'Method':<12} {'Time':>8} {'PPL':>10} {'Coher':>8} {'Refusal':>8} {'GPU MB':>8}")
|
|
print(f"{'─'*12} {'─'*8} {'─'*10} {'─'*8} {'─'*8} {'─'*8}")
|
|
for r in model_results:
|
|
if "error" in r:
|
|
print(f"{r['method']:<12} {r['time_seconds']:>7.1f}s {'FAILED':>10}")
|
|
continue
|
|
q = r.get("quality", {})
|
|
ppl = q.get("perplexity")
|
|
coh = q.get("coherence")
|
|
ref = q.get("refusal_rate")
|
|
gpu = r.get("peak_gpu_mb")
|
|
ppl_str = f"{ppl:.2f}" if ppl is not None else "N/A"
|
|
print(f"{r['method']:<12} {r['time_seconds']:>7.1f}s "
|
|
f"{ppl_str:>10} "
|
|
f"{f'{coh:.0%}' if coh is not None else 'N/A':>8} "
|
|
f"{f'{ref:.0%}' if ref is not None else 'N/A':>8} "
|
|
f"{gpu if gpu is not None else 'N/A':>8}")
|
|
print(f"{'='*60}")
|
|
|
|
# Final JSON dump
|
|
print(f"\n\n{'='*60}")
|
|
print("ALL BENCHMARK RESULTS (JSON)")
|
|
print(f"{'='*60}")
|
|
print("```json")
|
|
print(json.dumps(all_results, indent=2, default=str))
|
|
print("```")
|
|
PYEOF
|
|
|
|
# ── SSH options ──────────────────────────────────────────────────────────────
|
|
SSH_OPTS=(
|
|
-i "$SSH_KEY"
|
|
-o StrictHostKeyChecking=no
|
|
-o UserKnownHostsFile=/dev/null
|
|
-o ConnectTimeout=30
|
|
-o ServerAliveInterval=60
|
|
-o ServerAliveCountMax=10
|
|
)
|
|
|
|
if $VERBOSE; then
|
|
SSH_OPTS+=( -v )
|
|
fi
|
|
|
|
# ── Pre-flight: verify SSH connectivity ─────────────────────────────────────
|
|
echo "Checking SSH connectivity..."
|
|
if ! ssh "${SSH_OPTS[@]}" "$SSH_HOST" "echo 'SSH_OK'" 2>/tmp/obliteratus_ssh_debug.log; then
|
|
echo ""
|
|
echo "ERROR: SSH connection failed!"
|
|
echo ""
|
|
echo "Debug output:"
|
|
cat /tmp/obliteratus_ssh_debug.log
|
|
echo ""
|
|
echo "Troubleshooting checklist:"
|
|
echo " 1. Is Dev Mode enabled on your HF Space?"
|
|
echo " → Check your Space's Settings tab (Dev Mode must be ON)"
|
|
echo " 2. Is the Space awake (not sleeping/building)?"
|
|
echo " → Visit the Space URL and wait for the UI to load"
|
|
echo " 3. Is your SSH public key added to your HF profile?"
|
|
echo " → https://huggingface.co/settings/keys"
|
|
echo " → Run: cat ${SSH_KEY}.pub"
|
|
echo " 4. Are key permissions correct?"
|
|
echo " → Run: chmod 600 $SSH_KEY"
|
|
echo " 5. Try manually:"
|
|
echo " → ssh -v -i $SSH_KEY $SSH_HOST echo hello"
|
|
echo ""
|
|
rm -f /tmp/obliteratus_ssh_debug.log
|
|
exit 1
|
|
fi
|
|
rm -f /tmp/obliteratus_ssh_debug.log
|
|
echo "SSH connection verified ✓"
|
|
echo ""
|
|
|
|
# ── Build SSH command ────────────────────────────────────────────────────────
|
|
# Write the Python script to a temp file and pipe it, instead of passing
|
|
# via -c (avoids command-line length limits and shell escaping issues).
|
|
REMOTE_SCRIPT_FILE=$(mktemp /tmp/obliteratus_bench_XXXXXX.py)
|
|
echo "$REMOTE_SCRIPT" > "$REMOTE_SCRIPT_FILE"
|
|
trap "rm -f '$REMOTE_SCRIPT_FILE'" EXIT
|
|
|
|
if $DRY_RUN; then
|
|
echo "[DRY RUN] Would execute:"
|
|
echo " cat script.py | ssh ${SSH_OPTS[*]} $SSH_HOST 'BENCH_MODELS=... python3 -u'"
|
|
echo ""
|
|
echo "Script saved to: $REMOTE_SCRIPT_FILE"
|
|
exit 0
|
|
fi
|
|
|
|
echo "Running benchmark on Space..."
|
|
echo ""
|
|
|
|
# Sanitize inputs: reject values containing shell metacharacters to prevent
|
|
# command injection on the remote host.
|
|
for _var_name in MODELS METHODS PROMPTS; do
|
|
_val="${!_var_name}"
|
|
if [[ "$_val" =~ [\'\"\;\&\|\`\$\(\)\{\}\<\>\\] ]]; then
|
|
echo "ERROR: ${_var_name} contains unsafe characters: $_val" >&2
|
|
exit 1
|
|
fi
|
|
done
|
|
|
|
cat "$REMOTE_SCRIPT_FILE" | ssh "${SSH_OPTS[@]}" "$SSH_HOST" \
|
|
"BENCH_MODELS='$MODELS' BENCH_METHODS='$METHODS' BENCH_PROMPTS='$PROMPTS' python3 -u -"
|