mirror of
https://github.com/elder-plinius/OBLITERATUS.git
synced 2026-06-06 22:34:02 +02:00
Add files via upload
This commit is contained in:
@@ -526,7 +526,7 @@ Works with any HuggingFace transformer, including: GPT-2, LLaMA, Mistral, Falcon
|
||||
## References
|
||||
|
||||
- Arditi et al. (2024). *Refusal in Language Models Is Mediated by a Single Direction.* [arXiv:2406.11717](https://arxiv.org/abs/2406.11717)
|
||||
- Gulmez, G. (2025). *Gabliteration: SVD-Based Multi-Direction Refusal Removal.* [arXiv:2512.18901](https://arxiv.org/abs/2512.18901)
|
||||
- Gülmez, G. (2026). *Gabliteration: Adaptive Multi-Directional Neural Weight Modification for Selective Behavioral Alteration in Large Language Models.* [arXiv:2512.18901](https://arxiv.org/abs/2512.18901)
|
||||
- grimjim (2025). *Norm-Preserving Biprojected Abliteration.* [HuggingFace](https://huggingface.co/grimjim)
|
||||
- Turner et al. (2023). *Activation Addition: Steering Language Models Without Optimization.* [arXiv:2308.10248](https://arxiv.org/abs/2308.10248)
|
||||
- Rimsky et al. (2024). *Steering Llama 2 via Contrastive Activation Addition.* [arXiv:2312.06681](https://arxiv.org/abs/2312.06681)
|
||||
|
||||
@@ -18,6 +18,7 @@ ZeroGPU Support:
|
||||
from __future__ import annotations
|
||||
|
||||
import gc
|
||||
import json as _json
|
||||
import os
|
||||
import re
|
||||
import time
|
||||
@@ -117,7 +118,84 @@ _obliterate_counter: int = 0
|
||||
|
||||
# Flag to suppress session_model_dd.change when obliterate programmatically
|
||||
# sets the dropdown value (prevents wasteful GPU re-allocation on ZeroGPU)
|
||||
_skip_session_load: bool = False
|
||||
_skip_session_load: int = 0 # counter (not bool) — obliterate sets to 2 for both dropdowns
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# ZeroGPU session persistence — survive process restarts
|
||||
# ---------------------------------------------------------------------------
|
||||
# On ZeroGPU Spaces, the container may restart between requests (idle timeout,
|
||||
# scaling, etc.). The browser retains the old dropdown values but the Python
|
||||
# process loses all in-memory state (_state, _session_models). To recover,
|
||||
# we persist a small JSON sidecar next to each checkpoint.
|
||||
|
||||
_SESSION_META_FILE = "obliteratus_session.json"
|
||||
|
||||
|
||||
def _persist_session_meta(output_dir: str, label: str, meta: dict) -> None:
|
||||
"""Write session metadata next to a checkpoint so we can recover later."""
|
||||
try:
|
||||
p = Path(output_dir) / _SESSION_META_FILE
|
||||
data = {"label": label, **meta}
|
||||
p.write_text(_json.dumps(data, indent=2))
|
||||
except Exception:
|
||||
pass # best-effort
|
||||
|
||||
|
||||
def _recover_sessions_from_disk() -> None:
|
||||
"""Scan /tmp for obliterated checkpoints and repopulate _session_models.
|
||||
|
||||
Called on startup and when a stale dropdown value is detected. Skips
|
||||
directories that are already registered.
|
||||
"""
|
||||
global _last_obliterated_label, _obliterate_counter
|
||||
found_any = False
|
||||
for pattern in ("obliterated_*", "obliterated", "bench_*"):
|
||||
for p in Path("/tmp").glob(pattern):
|
||||
if not p.is_dir():
|
||||
continue
|
||||
meta_file = p / _SESSION_META_FILE
|
||||
if not meta_file.exists():
|
||||
continue
|
||||
try:
|
||||
data = _json.loads(meta_file.read_text())
|
||||
except Exception:
|
||||
continue
|
||||
label = data.get("label", p.name)
|
||||
if label in _session_models:
|
||||
continue # already registered
|
||||
_session_models[label] = {
|
||||
"model_id": data.get("model_id", ""),
|
||||
"model_choice": data.get("model_choice", data.get("model_id", "")),
|
||||
"method": data.get("method", "unknown"),
|
||||
"dataset_key": data.get("dataset_key", ""),
|
||||
"prompt_volume": data.get("prompt_volume", 0),
|
||||
"output_dir": str(p),
|
||||
"source": data.get("source", "recovered"),
|
||||
}
|
||||
found_any = True
|
||||
# Track the latest for auto-select
|
||||
_last_obliterated_label = label
|
||||
# Keep counter above any existing numbered dirs
|
||||
if p.name.startswith("obliterated_"):
|
||||
try:
|
||||
idx = int(p.name.split("_", 1)[1])
|
||||
if idx >= _obliterate_counter:
|
||||
_obliterate_counter = idx + 1
|
||||
except (ValueError, IndexError):
|
||||
pass
|
||||
# If we recovered sessions but _state has no output_dir, set it to the
|
||||
# most recent checkpoint so chat_respond can reload from disk.
|
||||
if found_any and not _state.get("output_dir"):
|
||||
with _lock:
|
||||
latest = _last_obliterated_label
|
||||
if latest and latest in _session_models:
|
||||
_state["output_dir"] = _session_models[latest]["output_dir"]
|
||||
_state["model_name"] = _session_models[latest].get("model_choice")
|
||||
_state["method"] = _session_models[latest].get("method")
|
||||
|
||||
|
||||
# Run recovery on import (app startup)
|
||||
_recover_sessions_from_disk()
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Model presets — 100+ models organized by provider
|
||||
@@ -961,6 +1039,14 @@ def benchmark(
|
||||
"prompt_volume": prompt_volume,
|
||||
"output_dir": bench_save_path,
|
||||
}
|
||||
_persist_session_meta(bench_save_path, label, {
|
||||
"model_id": model_id,
|
||||
"model_choice": model_choice,
|
||||
"method": method_key,
|
||||
"dataset_key": dataset_key,
|
||||
"prompt_volume": prompt_volume,
|
||||
"source": "benchmark",
|
||||
})
|
||||
|
||||
# Explicitly free the pipeline and its model to reclaim GPU memory
|
||||
# before the next benchmark iteration. _clear_gpu() only clears
|
||||
@@ -1306,6 +1392,14 @@ def benchmark_multi_model(
|
||||
"prompt_volume": prompt_volume,
|
||||
"output_dir": mm_save_path,
|
||||
}
|
||||
_persist_session_meta(mm_save_path, label, {
|
||||
"model_id": model_id,
|
||||
"model_choice": model_display,
|
||||
"method": method_key,
|
||||
"dataset_key": dataset_key,
|
||||
"prompt_volume": prompt_volume,
|
||||
"source": "benchmark_mm",
|
||||
})
|
||||
|
||||
# Explicitly free pipeline and model before next iteration
|
||||
if pipeline_ref[0] is not None:
|
||||
@@ -1734,6 +1828,16 @@ def obliterate(model_choice: str, method_choice: str, hub_repo: str,
|
||||
_state["steering"] = steering_meta
|
||||
_state["output_dir"] = save_dir # for ZeroGPU checkpoint reload
|
||||
|
||||
# Persist session metadata to disk so we survive ZeroGPU process restarts
|
||||
_persist_session_meta(save_dir, _cache_label, {
|
||||
"model_id": model_id,
|
||||
"model_choice": model_choice,
|
||||
"method": method,
|
||||
"dataset_key": dataset_key if not use_custom else "custom",
|
||||
"prompt_volume": prompt_volume,
|
||||
"source": "obliterate",
|
||||
})
|
||||
|
||||
if can_generate:
|
||||
# Model fits — use it directly (steering hooks already installed)
|
||||
with _lock:
|
||||
@@ -1870,7 +1974,7 @@ def obliterate(model_choice: str, method_choice: str, hub_repo: str,
|
||||
# Set skip flag so the .change handler doesn't trigger a wasteful
|
||||
# GPU re-allocation — the model is already loaded.
|
||||
global _skip_session_load
|
||||
_skip_session_load = True
|
||||
_skip_session_load = 2 # both session_model_dd and ab_session_model_dd fire .change
|
||||
_dd_update = gr.update(
|
||||
choices=_get_session_model_choices(),
|
||||
value=_last_obliterated_label or None,
|
||||
@@ -1947,25 +2051,30 @@ def chat_respond(message: str, history: list[dict], system_prompt: str,
|
||||
model = _state["model"]
|
||||
tokenizer = _state["tokenizer"]
|
||||
|
||||
if model is None or tokenizer is None:
|
||||
yield "No model loaded yet. Go to the **Obliterate** tab first and liberate a model."
|
||||
return
|
||||
# ZeroGPU safety: detect whether we need to reload from checkpoint.
|
||||
# Between GPU allocations, ZeroGPU may deallocate GPU memory, leaving
|
||||
# model as None (garbage-collected) or with stale/meta tensors.
|
||||
# Meta tensors raise NotImplementedError on .to(), not RuntimeError,
|
||||
# so we catch Exception broadly here.
|
||||
_needs_reload = model is None or tokenizer is None
|
||||
if not _needs_reload:
|
||||
try:
|
||||
dev = next(model.parameters()).device
|
||||
if dev.type == "meta":
|
||||
_needs_reload = True
|
||||
elif torch.cuda.is_available() and dev.type != "cuda":
|
||||
model.to("cuda")
|
||||
except Exception:
|
||||
_needs_reload = True
|
||||
|
||||
# ZeroGPU safety: ensure model is on GPU if available.
|
||||
# Between GPU allocations, ZeroGPU may have moved the model to CPU/meta,
|
||||
# or tensors may be stale from a previous GPU context.
|
||||
# The @spaces.GPU decorator guarantees a GPU is available here.
|
||||
_needs_reload = False
|
||||
try:
|
||||
dev = next(model.parameters()).device
|
||||
if torch.cuda.is_available() and dev.type != "cuda":
|
||||
model.to("cuda")
|
||||
except (StopIteration, RuntimeError):
|
||||
_needs_reload = True
|
||||
|
||||
# If model tensors are stale/meta, reload from the saved checkpoint
|
||||
if _needs_reload and _ZEROGPU_AVAILABLE:
|
||||
# Reload from saved checkpoint if model is missing or stale
|
||||
if _needs_reload:
|
||||
checkpoint = _state.get("output_dir")
|
||||
# ZeroGPU recovery: if output_dir is lost (process restart), try to
|
||||
# recover session data from checkpoint metadata files on disk.
|
||||
if not checkpoint or not Path(checkpoint).exists():
|
||||
_recover_sessions_from_disk()
|
||||
checkpoint = _state.get("output_dir")
|
||||
if checkpoint and Path(checkpoint).exists():
|
||||
try:
|
||||
is_preset = (_state.get("model_name") or "") in MODELS
|
||||
@@ -1985,11 +2094,12 @@ def chat_respond(message: str, history: list[dict], system_prompt: str,
|
||||
with _lock:
|
||||
_state["model"] = model
|
||||
_state["tokenizer"] = tokenizer
|
||||
_state["status"] = "ready"
|
||||
except Exception:
|
||||
yield "Model failed to reload from checkpoint. Try re-obliterating."
|
||||
return
|
||||
else:
|
||||
yield "Model tensors are stale (ZeroGPU). Re-obliterate to create a fresh checkpoint."
|
||||
yield "No model loaded yet. Go to the **Obliterate** tab first and liberate a model."
|
||||
return
|
||||
|
||||
# Sanitize inputs to prevent resource exhaustion
|
||||
@@ -2117,8 +2227,8 @@ def load_bench_into_chat(choice: str, progress=gr.Progress()):
|
||||
# Skip if the obliterate function just set the dropdown value — the model
|
||||
# is already loaded and we'd just waste GPU quota re-allocating.
|
||||
global _skip_session_load
|
||||
if _skip_session_load:
|
||||
_skip_session_load = False
|
||||
if _skip_session_load > 0:
|
||||
_skip_session_load -= 1
|
||||
if choice and _state.get("status") == "ready":
|
||||
yield (
|
||||
f"**Ready!** `{choice}` is loaded — just type in the chat below.",
|
||||
@@ -2127,8 +2237,65 @@ def load_bench_into_chat(choice: str, progress=gr.Progress()):
|
||||
return
|
||||
|
||||
if not choice or choice not in _bench_configs:
|
||||
yield "**Error:** No benchmark result selected. Pick a model from the dropdown first.", ""
|
||||
return
|
||||
# On ZeroGPU, global state may be lost between process restarts.
|
||||
# Try to recover session data from checkpoint metadata files on disk.
|
||||
if choice and choice not in _bench_configs:
|
||||
_recover_sessions_from_disk()
|
||||
# After recovery, the choice might now be in _bench_configs
|
||||
if choice in _bench_configs:
|
||||
pass # fall through to the normal loading path below
|
||||
else:
|
||||
# choice still not found — but we may have recovered output_dir
|
||||
pass
|
||||
|
||||
# If recovery didn't find the exact choice, check if model is loaded
|
||||
if choice not in _bench_configs:
|
||||
with _lock:
|
||||
if _state["status"] == "ready" and _state["model"] is not None:
|
||||
yield (
|
||||
f"**Ready!** Model already loaded — just type in the chat below.",
|
||||
get_chat_header(),
|
||||
)
|
||||
return
|
||||
# Check if we can reload from a checkpoint on disk
|
||||
checkpoint = _state.get("output_dir")
|
||||
if checkpoint and Path(checkpoint).exists():
|
||||
yield (
|
||||
f"**Loading model** from saved checkpoint...",
|
||||
"",
|
||||
)
|
||||
# If we have a checkpoint, attempt reload outside the lock
|
||||
checkpoint = _state.get("output_dir")
|
||||
if checkpoint and Path(checkpoint).exists():
|
||||
is_preset = (_state.get("model_name") or "") in MODELS
|
||||
try:
|
||||
model_loaded = AutoModelForCausalLM.from_pretrained(
|
||||
checkpoint, device_map="auto", torch_dtype=torch.float16,
|
||||
trust_remote_code=is_preset,
|
||||
)
|
||||
tokenizer_loaded = AutoTokenizer.from_pretrained(
|
||||
checkpoint, trust_remote_code=is_preset,
|
||||
)
|
||||
if tokenizer_loaded.pad_token is None:
|
||||
tokenizer_loaded.pad_token = tokenizer_loaded.eos_token
|
||||
with _lock:
|
||||
_state["model"] = model_loaded
|
||||
_state["tokenizer"] = tokenizer_loaded
|
||||
_state["status"] = "ready"
|
||||
yield (
|
||||
f"**Loaded!** Model reloaded from checkpoint — ready to chat.",
|
||||
get_chat_header(),
|
||||
)
|
||||
return
|
||||
except Exception as e:
|
||||
yield f"**Error:** Could not reload model: {e}", get_chat_header()
|
||||
return
|
||||
yield (
|
||||
"**Error:** Model checkpoint not found. The Space may have restarted — "
|
||||
"please re-obliterate the model on the **Obliterate** tab.",
|
||||
"",
|
||||
)
|
||||
return
|
||||
|
||||
cfg = _bench_configs[choice]
|
||||
model_id = cfg["model_id"]
|
||||
@@ -2320,28 +2487,27 @@ def ab_chat_respond(message: str, history_left: list[dict], history_right: list[
|
||||
tokenizer = _state["tokenizer"]
|
||||
model_name = _state["model_name"]
|
||||
|
||||
if abliterated_model is None or tokenizer is None:
|
||||
yield (history_left + [{"role": "user", "content": message},
|
||||
{"role": "assistant", "content": "No abliterated model loaded. Obliterate a model first."}],
|
||||
history_right + [{"role": "user", "content": message},
|
||||
{"role": "assistant", "content": "No abliterated model loaded. Obliterate a model first."}],
|
||||
"Load a model first.",
|
||||
"#### Original (Pre-Abliteration)",
|
||||
"#### Abliterated")
|
||||
return
|
||||
# ZeroGPU safety: detect whether we need to reload from checkpoint.
|
||||
# Model may be None (garbage-collected after GPU deallocation) or stale.
|
||||
# Meta tensors raise NotImplementedError on .to(), so catch broadly.
|
||||
_needs_reload = abliterated_model is None or tokenizer is None
|
||||
if not _needs_reload:
|
||||
try:
|
||||
dev = next(abliterated_model.parameters()).device
|
||||
if dev.type == "meta":
|
||||
_needs_reload = True
|
||||
elif torch.cuda.is_available() and dev.type != "cuda":
|
||||
abliterated_model.to("cuda")
|
||||
except Exception:
|
||||
_needs_reload = True
|
||||
|
||||
# ZeroGPU safety: ensure model is on GPU if available.
|
||||
# If tensors are stale from a prior GPU context, reload from checkpoint.
|
||||
_needs_reload = False
|
||||
try:
|
||||
dev = next(abliterated_model.parameters()).device
|
||||
if torch.cuda.is_available() and dev.type != "cuda":
|
||||
abliterated_model.to("cuda")
|
||||
except (StopIteration, RuntimeError):
|
||||
_needs_reload = True
|
||||
|
||||
if _needs_reload and _ZEROGPU_AVAILABLE:
|
||||
if _needs_reload:
|
||||
checkpoint = _state.get("output_dir")
|
||||
# ZeroGPU recovery: try disk scan if output_dir is lost
|
||||
if not checkpoint or not Path(checkpoint).exists():
|
||||
_recover_sessions_from_disk()
|
||||
checkpoint = _state.get("output_dir")
|
||||
model_name = _state.get("model_name") or model_name
|
||||
if checkpoint and Path(checkpoint).exists():
|
||||
try:
|
||||
is_preset = (model_name or "") in MODELS
|
||||
@@ -2361,8 +2527,19 @@ def ab_chat_respond(message: str, history_left: list[dict], history_right: list[
|
||||
with _lock:
|
||||
_state["model"] = abliterated_model
|
||||
_state["tokenizer"] = tokenizer
|
||||
_state["status"] = "ready"
|
||||
except Exception:
|
||||
pass # Fall through — will fail at generation with a clear error
|
||||
else:
|
||||
_no_model_msg = "No abliterated model loaded. Obliterate a model first."
|
||||
yield (history_left + [{"role": "user", "content": message},
|
||||
{"role": "assistant", "content": _no_model_msg}],
|
||||
history_right + [{"role": "user", "content": message},
|
||||
{"role": "assistant", "content": _no_model_msg}],
|
||||
"Load a model first.",
|
||||
"#### Original (Pre-Abliteration)",
|
||||
"#### Abliterated")
|
||||
return
|
||||
|
||||
# Build header strings showing model name on each side
|
||||
header_left = f"#### Original (Pre-Abliteration)\n`{model_name}`"
|
||||
|
||||
@@ -116,7 +116,7 @@ The paper also analyzes how adversarial suffixes (e.g., GCG-generated) suppress
|
||||
|
||||
## 2. Gabliteration (arXiv:2512.18901) — Multi-Direction Subspace Approach {#2-gabliteration}
|
||||
|
||||
**Author:** Gokdeniz Gulmez (independent research)
|
||||
**Author:** Gökdeniz Gülmez (independent research)
|
||||
**arXiv:** [2512.18901](https://arxiv.org/abs/2512.18901)
|
||||
**Version:** v3, revised January 28, 2026
|
||||
**Models:** [Hugging Face collection](https://huggingface.co/collections/Goekdeniz-Guelmez/gabliteration)
|
||||
@@ -733,7 +733,7 @@ MI research helps make AI safe but could be used adversarially. The same techniq
|
||||
|
||||
1. Arditi, A., Obeso, O., Syed, A., Paleka, D., Panickssery, N., Gurnee, W., & Nanda, N. (2024). Refusal in Language Models Is Mediated by a Single Direction. NeurIPS 2024. [arXiv:2406.11717](https://arxiv.org/abs/2406.11717)
|
||||
|
||||
2. Gulmez, G. (2025). Gabliteration: Adaptive Multi-Directional Neural Weight Modification for Selective Behavioral Alteration in Large Language Models. [arXiv:2512.18901](https://arxiv.org/abs/2512.18901)
|
||||
2. Gülmez, G. (2026). Gabliteration: Adaptive Multi-Directional Neural Weight Modification for Selective Behavioral Alteration in Large Language Models. [arXiv:2512.18901](https://arxiv.org/abs/2512.18901)
|
||||
|
||||
3. grimjim. (2025). Norm-Preserving Biprojected Abliteration / MPOA. [HuggingFace Blog](https://huggingface.co/blog/grimjim/norm-preserving-biprojected-abliteration) | [Projected Abliteration](https://huggingface.co/blog/grimjim/projected-abliteration) | [Code](https://github.com/jim-plus/llm-abliteration)
|
||||
|
||||
|
||||
@@ -1802,7 +1802,7 @@ implementations diverge from the closed-form GRRO solution.
|
||||
## References
|
||||
|
||||
1. Arditi, A. et al. (2024). Refusal in Language Models Is Mediated by a Single Direction. NeurIPS 2024.
|
||||
2. Gulmez, G. (2025). Gabliteration. arXiv:2512.18901.
|
||||
2. Gülmez, G. (2026). Gabliteration: Adaptive Multi-Directional Neural Weight Modification for Selective Behavioral Alteration in Large Language Models. arXiv:2512.18901.
|
||||
3. grimjim (2025). Norm-Preserving Biprojected Abliteration (MPOA). HuggingFace.
|
||||
4. Wollschlager, T. et al. (2025). The Geometry of Refusal. ICML 2025.
|
||||
5. Joad et al. (2026). There Is More to Refusal than a Single Direction. arXiv:2602.02132.
|
||||
|
||||
@@ -53,7 +53,7 @@
|
||||
"id": "install"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": "!pip install -q git+https://github.com/elder-plinius/OBLITERATUS.git\n!pip install -q accelerate bitsandbytes\n\nimport torch\nprint(f\"PyTorch {torch.__version__}\")\nprint(f\"CUDA available: {torch.cuda.is_available()}\")\nif torch.cuda.is_available():\n print(f\"GPU: {torch.cuda.get_device_name(0)}\")\n print(f\"VRAM: {torch.cuda.get_device_properties(0).total_mem / 1024**3:.1f} GB\")"
|
||||
"source": "!pip install -q git+https://github.com/elder-plinius/OBLITERATUS.git\n!pip install -q accelerate bitsandbytes\n\nimport torch\nprint(f\"PyTorch {torch.__version__}\")\nprint(f\"CUDA available: {torch.cuda.is_available()}\")\nif torch.cuda.is_available():\n print(f\"GPU: {torch.cuda.get_device_name(0)}\")\n print(f\"VRAM: {torch.cuda.get_device_properties(0).total_memory / 1024**3:.1f} GB\")"
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
|
||||
@@ -334,7 +334,7 @@ METHODS = {
|
||||
"layer_selection": "middle60",
|
||||
},
|
||||
"gabliteration": {
|
||||
"label": "Gabliteration (Gulmez 2025 Baseline)",
|
||||
"label": "Gabliteration (Gülmez 2026 Baseline)",
|
||||
"description": (
|
||||
"Faithful reproduction of Gabliteration (arXiv:2512.18901). "
|
||||
"SVD-based multi-direction extraction (top-4), ridge-regularized "
|
||||
@@ -2494,7 +2494,7 @@ class AbliterationPipeline:
|
||||
|
||||
References:
|
||||
- SteerMoE (Fayyaz et al., 2025): expert activation frequency analysis
|
||||
- Gabliteration (Gulmez, 2025): multi-direction SVD abliteration
|
||||
- Gabliteration (Gülmez, 2026): multi-direction SVD abliteration
|
||||
- SAFEx (Lai et al., NeurIPS 2025): safety expert identification
|
||||
"""
|
||||
if not self._routing_harmful or not self._routing_harmless:
|
||||
|
||||
@@ -919,7 +919,7 @@ class InformedAbliterationPipeline(AbliterationPipeline):
|
||||
"Arditi et al., Refusal in Language Models Is Mediated by a Single Direction (2024)",
|
||||
"Gabliteration: SVD-based multi-direction extraction (arXiv:2512.18901)",
|
||||
"grimjim, Norm-Preserving Biprojected Abliteration (2025)",
|
||||
"Gurnee & Nanda, The Geometry of Refusal in LLMs — concept cones (ICML 2025)",
|
||||
"Wollschlager et al., The Geometry of Refusal in LLMs — concept cones (ICML 2025, arXiv:2502.17420)",
|
||||
"Joad et al., The Ouroboros Effect: Self-Repair in Abliterated LLMs (2026)",
|
||||
"OBLITERATUS: Analysis-informed abliteration pipeline (novel)",
|
||||
],
|
||||
|
||||
@@ -24,7 +24,7 @@ def _detect_compute_tier() -> str:
|
||||
import torch
|
||||
|
||||
if torch.cuda.is_available():
|
||||
vram_gb = torch.cuda.get_device_properties(0).total_mem / (1024**3)
|
||||
vram_gb = torch.cuda.get_device_properties(0).total_memory / (1024**3)
|
||||
if vram_gb >= 20:
|
||||
return "large"
|
||||
elif vram_gb >= 8:
|
||||
|
||||
@@ -10,6 +10,7 @@ Usage:
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import pathlib
|
||||
import platform
|
||||
import shutil
|
||||
import sys
|
||||
@@ -48,7 +49,7 @@ def _detect_gpu() -> list[dict]:
|
||||
{
|
||||
"index": i,
|
||||
"name": props.name,
|
||||
"vram_gb": round(props.total_mem / 1024**3, 1),
|
||||
"vram_gb": round(props.total_memory / 1024**3, 1),
|
||||
"compute": f"{props.major}.{props.minor}",
|
||||
}
|
||||
)
|
||||
@@ -292,6 +293,12 @@ def launch_local_ui(
|
||||
console.print("[dim]Loading OBLITERATUS UI (this may take a moment on first run)...[/dim]")
|
||||
start = time.time()
|
||||
|
||||
# app.py lives at the project root, one level above this package.
|
||||
# When installed via pip the root isn't on sys.path, so add it.
|
||||
_project_root = str(pathlib.Path(__file__).resolve().parent.parent)
|
||||
if _project_root not in sys.path:
|
||||
sys.path.insert(0, _project_root)
|
||||
|
||||
from app import launch as app_launch
|
||||
|
||||
elapsed = time.time() - start
|
||||
|
||||
@@ -7,11 +7,14 @@
|
||||
year={2024}
|
||||
}
|
||||
|
||||
@article{gabliteration2024,
|
||||
title={{Gabliteration}: {SVD}-Based Multi-Direction Refusal Removal},
|
||||
author={Gabriel, Saul and {contributors}},
|
||||
journal={arXiv preprint arXiv:2512.18901},
|
||||
year={2024}
|
||||
@misc{gabliteration2024,
|
||||
title={{Gabliteration}: Adaptive Multi-Directional Neural Weight Modification for Selective Behavioral Alteration in Large Language Models},
|
||||
author={G\"{o}kdeniz G\"{u}lmez},
|
||||
year={2026},
|
||||
eprint={2512.18901},
|
||||
archivePrefix={arXiv},
|
||||
primaryClass={cs.AI},
|
||||
url={https://arxiv.org/abs/2512.18901}
|
||||
}
|
||||
|
||||
@misc{grimjim2025,
|
||||
|
||||
@@ -7,7 +7,7 @@ comparison tables with standardized community metrics.
|
||||
|
||||
Baselines included:
|
||||
1. FailSpy/abliterator (2024) — Community workhorse baseline
|
||||
2. Gabliteration (Gulmez 2025) — SVD multi-direction + ridge regularization
|
||||
2. Gabliteration (Gülmez 2026) — SVD multi-direction + ridge regularization
|
||||
3. Heretic / p-e-w (2025) — Bayesian TPE auto-tuning (current SOTA for quality)
|
||||
4. Wollschlager RDO (ICML 2025) — Gradient-based direction optimization
|
||||
|
||||
|
||||
Reference in New Issue
Block a user