diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 6ad4d12..d7f08f7 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -5,7 +5,7 @@ Thanks for your interest in contributing. This document covers everything you ne
 ## Development Setup
 
 ```bash
-git clone https://github.com/obliteratus-project/OBLITERATUS.git
+git clone https://github.com/elder-plinius/OBLITERATUS.git
 cd OBLITERATUS
 pip install -e ".[dev]"
 ```
diff --git a/README.md b/README.md
index 52e8e8c..c593e8d 100644
--- a/README.md
+++ b/README.md
@@ -28,7 +28,7 @@ short_description: "One-click model liberation + chat playground"
     <img src="https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue" alt="Open in HF Spaces">
   </a>
   &nbsp;
-  <a href="https://colab.research.google.com/github/obliteratus-project/OBLITERATUS/blob/main/notebooks/abliterate.ipynb">
+  <a href="https://colab.research.google.com/github/elder-plinius/OBLITERATUS/blob/main/notebooks/abliterate.ipynb">
     <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open in Colab">
   </a>
 </p>
@@ -55,7 +55,7 @@ Built on published research from [Arditi et al. (2024)](https://arxiv.org/abs/24
 obliteratus obliterate meta-llama/Llama-3.1-8B-Instruct --method advanced
 ```
 
-Or zero commands — just [open the Colab notebook](https://colab.research.google.com/github/obliteratus-project/OBLITERATUS/blob/main/notebooks/abliterate.ipynb) and hit Run All.
+Or zero commands — just [open the Colab notebook](https://colab.research.google.com/github/elder-plinius/OBLITERATUS/blob/main/notebooks/abliterate.ipynb) and hit Run All.
 
 ## What it does
 
@@ -153,7 +153,7 @@ The `obliteratus ui` command adds a Rich terminal startup with GPU detection and
 
 ### 3. Google Colab (free GPU)
 
-[![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/obliteratus-project/OBLITERATUS/blob/main/notebooks/abliterate.ipynb)
+[![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/elder-plinius/OBLITERATUS/blob/main/notebooks/abliterate.ipynb)
 
 Pick a model from the dropdown, pick a method, hit Run All. Download the result or push straight to HuggingFace Hub. Works on the free T4 tier for models up to ~8B parameters.
 
@@ -545,7 +545,7 @@ If you use OBLITERATUS in your research, please cite:
                Refusal Removal in Large Language Models},
   author    = {{OBLITERATUS Contributors}},
   year      = {2026},
-  url       = {https://github.com/obliteratus-project/OBLITERATUS},
+  url       = {https://github.com/elder-plinius/OBLITERATUS},
   note      = {15 analysis modules, 837 tests}
 }
 ```
@@ -565,7 +565,7 @@ pytest
 
 - **Open source** — [GNU Affero General Public License v3.0](LICENSE) (AGPL-3.0). You can freely use, modify, and distribute OBLITERATUS under AGPL terms. If you run a modified version as a network service (SaaS), you must release your source code to users under the same license.
 
-- **Commercial** — Organizations that cannot comply with AGPL obligations (e.g., proprietary SaaS, closed-source products, internal tools where source disclosure is not possible) can purchase a commercial license. Contact us via [GitHub Issues](https://github.com/obliteratus-project/OBLITERATUS/issues) for pricing and terms.
+- **Commercial** — Organizations that cannot comply with AGPL obligations (e.g., proprietary SaaS, closed-source products, internal tools where source disclosure is not possible) can purchase a commercial license. Contact us via [GitHub Issues](https://github.com/elder-plinius/OBLITERATUS/issues) for pricing and terms.
 
 This is the same dual-licensing model used by MongoDB, Qt, Grafana, and others.
 
diff --git a/SECURITY.md b/SECURITY.md
index 80fd422..69cb1ad 100644
--- a/SECURITY.md
+++ b/SECURITY.md
@@ -11,7 +11,7 @@ OBLITERATUS is a mechanistic interpretability research tool. It removes refusal
 If you discover a security vulnerability in OBLITERATUS, please report it responsibly:
 
 1. **Do not** open a public GitHub issue
-2. Open a [private security advisory](https://github.com/obliteratus-project/OBLITERATUS/security/advisories/new) with:
+2. Open a [private security advisory](https://github.com/elder-plinius/OBLITERATUS/security/advisories/new) with:
    - Description of the vulnerability
    - Steps to reproduce
    - Potential impact
diff --git a/app.py b/app.py
index 452125a..ba32a9b 100644
--- a/app.py
+++ b/app.py
@@ -115,6 +115,10 @@ _last_obliterated_label: str = ""
 # Counter for unique obliteration save directories
 _obliterate_counter: int = 0
 
+# Flag to suppress session_model_dd.change when obliterate programmatically
+# sets the dropdown value (prevents wasteful GPU re-allocation on ZeroGPU)
+_skip_session_load: bool = False
+
 # ---------------------------------------------------------------------------
 # Model presets — 100+ models organized by provider
 # ---------------------------------------------------------------------------
@@ -1459,7 +1463,7 @@ def obliterate(model_choice: str, method_choice: str, hub_repo: str,
             f"   or locally: `export HF_TOKEN=hf_...`\n\n"
             f"Get your token at [huggingface.co/settings/tokens](https://huggingface.co/settings/tokens)\n\n"
             f"Alternatively, choose a non-gated model (those without the \U0001f512 icon).",
-            "", gr.update(), gr.update(), gr.update(),
+            "", gr.update(), gr.update(), gr.update(), gr.update(),
         )
         return
 
@@ -1468,14 +1472,14 @@ def obliterate(model_choice: str, method_choice: str, hub_repo: str,
         if not re.match(r'^[a-zA-Z0-9_-]+/[a-zA-Z0-9_.-]+$', push_to_hub):
             yield (
                 "**Error:** Invalid Hub repo format. Use `username/model-name`.",
-                "", gr.update(), gr.update(), gr.update(),
+                "", gr.update(), gr.update(), gr.update(), gr.update(),
             )
             return
         if not os.environ.get("HF_TOKEN"):
             yield (
                 "**Error:** HF_TOKEN not set. Push to Hub requires a write token. "
                 "Set it via `export HF_TOKEN=hf_...` or in your Space secrets.",
-                "", gr.update(), gr.update(), gr.update(),
+                "", gr.update(), gr.update(), gr.update(), gr.update(),
             )
             return
 
@@ -1486,7 +1490,7 @@ def obliterate(model_choice: str, method_choice: str, hub_repo: str,
     _clear_gpu()
     with _lock:
         if _state["status"] == "obliterating":
-            yield "**Error:** An obliteration is already in progress.", "", gr.update(), gr.update(), gr.update()
+            yield "**Error:** An obliteration is already in progress.", "", gr.update(), gr.update(), gr.update(), gr.update()
             return
         _state["log"] = []
         _state["status"] = "obliterating"
@@ -1638,9 +1642,9 @@ def obliterate(model_choice: str, method_choice: str, hub_repo: str,
         status_msg = f"**Obliterating\u2026** ({_elapsed()})"
         if len(log_lines) > last_yielded[0]:
             last_yielded[0] = len(log_lines)
-            yield status_msg, "\n".join(log_lines), gr.update(), gr.update(), gr.update()
+            yield status_msg, "\n".join(log_lines), gr.update(), gr.update(), gr.update(), gr.update()
         else:
-            yield status_msg, "\n".join(log_lines), gr.update(), gr.update(), gr.update()
+            yield status_msg, "\n".join(log_lines), gr.update(), gr.update(), gr.update(), gr.update()
         if time.time() - _pipeline_start > _max_pipeline_secs:
             log_lines.append("\nTIMEOUT: Pipeline exceeded 45-minute limit.")
             break
@@ -1655,7 +1659,7 @@ def obliterate(model_choice: str, method_choice: str, hub_repo: str,
         err_msg = str(error_ref[0]) or repr(error_ref[0])
         log_lines.append(f"\nERROR: {err_msg}")
         _state["log"] = log_lines
-        yield f"**Error:** {err_msg}", "\n".join(log_lines), get_chat_header(), gr.update(), gr.update()
+        yield f"**Error:** {err_msg}", "\n".join(log_lines), get_chat_header(), gr.update(), gr.update(), gr.update()
         return
 
     # Success — keep model in memory for chat.
@@ -1757,7 +1761,7 @@ def obliterate(model_choice: str, method_choice: str, hub_repo: str,
             if bnb_available:
                 log_lines.append("\nModel too large for chat at float16 — reloading in 4-bit...")
                 last_yielded[0] = len(log_lines)
-                yield status_msg, "\n".join(log_lines), gr.update(), gr.update(), gr.update()
+                yield status_msg, "\n".join(log_lines), gr.update(), gr.update(), gr.update(), gr.update()
                 try:
                     from transformers import BitsAndBytesConfig
                     bnb_cfg = BitsAndBytesConfig(
@@ -1804,7 +1808,7 @@ def obliterate(model_choice: str, method_choice: str, hub_repo: str,
                     else "Falling back to CPU offload..."
                 )
                 last_yielded[0] = len(log_lines)
-                yield status_msg, "\n".join(log_lines), gr.update(), gr.update(), gr.update()
+                yield status_msg, "\n".join(log_lines), gr.update(), gr.update(), gr.update(), gr.update()
                 try:
                     offload_dir = tempfile.mkdtemp(prefix="obliteratus_offload_")
                     model_reloaded = AutoModelForCausalLM.from_pretrained(
@@ -1861,13 +1865,21 @@ def obliterate(model_choice: str, method_choice: str, hub_repo: str,
                 f"**{model_choice}** liberated with `{method}` method. "
                 f"Saved to `{save_dir}`. Chat requires a larger GPU."
             )
-        # Update session dropdown directly (don't rely on .then() which can
-        # fail to fire on ZeroGPU after generator teardown)
+        # Update BOTH session dropdowns directly (don't rely on .then() which
+        # fails to fire on ZeroGPU after generator teardown).
+        # Set skip flag so the .change handler doesn't trigger a wasteful
+        # GPU re-allocation — the model is already loaded.
+        global _skip_session_load
+        _skip_session_load = True
         _dd_update = gr.update(
             choices=_get_session_model_choices(),
             value=_last_obliterated_label or None,
         )
-        yield status_msg, "\n".join(log_lines), get_chat_header(), _dd_update, metrics_card
+        _ab_dd_update = gr.update(
+            choices=_get_session_model_choices(),
+            value=_last_obliterated_label or None,
+        )
+        yield status_msg, "\n".join(log_lines), get_chat_header(), _dd_update, metrics_card, _ab_dd_update
 
     except Exception as e:
         # Ensure status never gets stuck on "obliterating"
@@ -1876,7 +1888,7 @@ def obliterate(model_choice: str, method_choice: str, hub_repo: str,
         err_msg = str(e) or repr(e)
         log_lines.append(f"\nERROR (post-pipeline): {err_msg}")
         _state["log"] = log_lines
-        yield f"**Error:** {err_msg}", "\n".join(log_lines), get_chat_header(), gr.update(), gr.update()
+        yield f"**Error:** {err_msg}", "\n".join(log_lines), get_chat_header(), gr.update(), gr.update(), gr.update()
 
 
 # ---------------------------------------------------------------------------
@@ -2102,6 +2114,18 @@ def load_bench_into_chat(choice: str, progress=gr.Progress()):
 
     On ZeroGPU, uses the visitor's GPU quota.
     """
+    # Skip if the obliterate function just set the dropdown value — the model
+    # is already loaded and we'd just waste GPU quota re-allocating.
+    global _skip_session_load
+    if _skip_session_load:
+        _skip_session_load = False
+        if choice and _state.get("status") == "ready":
+            yield (
+                f"**Ready!** `{choice}` is loaded — just type in the chat below.",
+                get_chat_header(),
+            )
+            return
+
     if not choice or choice not in _bench_configs:
         yield "**Error:** No benchmark result selected. Pick a model from the dropdown first.", ""
         return
@@ -3727,6 +3751,7 @@ Pre-configured benchmark configurations for common research questions.
                     choices=_get_session_model_choices(),
                     label="Cached Models",
                     info="Select a model to auto-load it for chat",
+                    allow_custom_value=True,
                 )
                 session_load_status = gr.Markdown("")
 
@@ -3779,6 +3804,7 @@ See exactly how abliteration changes model behavior on the same prompt.
                     choices=_get_session_model_choices(),
                     label="Cached Models",
                     info="Select a model to auto-load it for A/B comparison",
+                    allow_custom_value=True,
                 )
                 ab_session_load_status = gr.Markdown("")
 
@@ -4125,8 +4151,8 @@ Built on the shoulders of:
 
 ### Links
 
-- [GitHub](https://github.com/obliteratus-project/OBLITERATUS)
-- [Paper](https://github.com/obliteratus-project/OBLITERATUS/tree/main/paper)
+- [GitHub](https://github.com/elder-plinius/OBLITERATUS)
+- [Paper](https://github.com/elder-plinius/OBLITERATUS/tree/main/paper)
 """)
 
     # Wire method dropdown → auto-update advanced settings
@@ -4192,28 +4218,27 @@ Built on the shoulders of:
     ).then(fn=_get_vram_html, outputs=[vram_display])
 
     # Wire obliterate button (after all tabs so chat_status is defined)
-    # session_model_dd is a direct output (4th) so the dropdown updates
-    # reliably even on ZeroGPU where .then() may not fire after generator teardown.
+    # Both session_model_dd (4th) and ab_session_model_dd (6th) are direct
+    # outputs so the dropdowns update reliably even on ZeroGPU where .then()
+    # may not fire after generator teardown.
     obliterate_btn.click(
         fn=obliterate,
         inputs=[model_dd, method_dd, hub_repo, prompt_vol_dd, dataset_dd,
                 custom_harmful_tb, custom_harmless_tb] + _adv_controls,
-        outputs=[status_md, log_box, chat_status, session_model_dd, metrics_md],
+        outputs=[status_md, log_box, chat_status, session_model_dd, metrics_md, ab_session_model_dd],
     ).then(
-        fn=lambda: (
-            gr.update(choices=_get_session_model_choices()),
-            _get_vram_html(),
-        ),
-        outputs=[ab_session_model_dd, vram_display],
+        fn=lambda: _get_vram_html(),
+        outputs=[vram_display],
     )
 
     # Wire session model auto-loading (Chat tab dropdown change)
+    # Always pass choices + value together so ZeroGPU doesn't hit stale choices
     session_model_dd.change(
         fn=load_bench_into_chat,
         inputs=[session_model_dd],
         outputs=[session_load_status, chat_status],
     ).then(
-        fn=lambda v: (gr.update(value=v), _get_vram_html()),
+        fn=lambda v: (gr.update(choices=_get_session_model_choices(), value=v), _get_vram_html()),
         inputs=[session_model_dd],
         outputs=[ab_session_model_dd, vram_display],
     )
@@ -4224,7 +4249,7 @@ Built on the shoulders of:
         inputs=[ab_session_model_dd],
         outputs=[ab_session_load_status, chat_status],
     ).then(
-        fn=lambda v: (gr.update(value=v), _get_vram_html()),
+        fn=lambda v: (gr.update(choices=_get_session_model_choices(), value=v), _get_vram_html()),
         inputs=[ab_session_model_dd],
         outputs=[session_model_dd, vram_display],
     )
diff --git a/docs/index.html b/docs/index.html
index 2062a92..180c798 100644
--- a/docs/index.html
+++ b/docs/index.html
@@ -1095,7 +1095,7 @@
                 <h2>&gt; Quickstart: Free a Model</h2>
                 <div style="background:#000; padding:16px; border:1px solid var(--border); margin-top:12px; line-height:2; font-size:0.78rem;">
                     <span style="color:var(--text-dim)"># 1. get the liberation toolkit</span><br>
-                    <span style="color:var(--accent)">$</span> git clone https://github.com/obliteratus-project/OBLITERATUS<br>
+                    <span style="color:var(--accent)">$</span> git clone https://github.com/elder-plinius/OBLITERATUS<br>
                     <span style="color:var(--accent)">$</span> cd OBLITERATUS<br>
                     <span style="color:var(--accent)">$</span> pip install -e .<br><br>
                     <span style="color:var(--text-dim)"># 2. interactive mode (guided liberation)</span><br>
@@ -1154,7 +1154,7 @@
                     <div style="margin-bottom:16px; padding:12px; border-left:3px solid var(--yellow); background:rgba(255,183,0,0.03)">
                         <h4 style="color:var(--yellow); font-size:0.82rem">Concept Cone Geometry <span style="font-size:0.65rem; color:var(--red)">[NOVEL]</span></h4>
                         <p style="color:var(--text-dim); font-size:0.75rem; margin-top:4px">
-                            Analyzes whether different harm categories (weapons, cyber, drugs, etc.) share a single refusal direction or have distinct mechanisms. Computes cone solid angles, Direction Specificity Index, and polyhedral classification. Based on Gurnee &amp; Nanda (ICML 2025) with novel extensions.
+                            Analyzes whether different harm categories (weapons, cyber, drugs, etc.) share a single refusal direction or have distinct mechanisms. Computes cone solid angles, Direction Specificity Index, and polyhedral classification. Based on Wollschlager et al. (ICML 2025) with novel extensions.
                         </p>
                     </div>
                     <div style="margin-bottom:16px; padding:12px; border-left:3px solid var(--yellow); background:rgba(255,183,0,0.03)">
@@ -1397,7 +1397,7 @@
                 <div style="margin-bottom:16px; padding:16px; background:linear-gradient(135deg, rgba(249,171,0,0.08), rgba(249,171,0,0.02)); border:1px solid rgba(249,171,0,0.3); border-radius:6px">
                     <div style="font-size:0.82rem; font-weight:700; color:var(--yellow); margin-bottom:8px; letter-spacing:0.5px">&#9656; COLAB NOTEBOOK</div>
                     <div style="display:flex; align-items:center; gap:12px; flex-wrap:wrap">
-                        <a id="colab-link" href="https://colab.research.google.com/github/obliteratus-project/OBLITERATUS/blob/main/notebooks/abliterate.ipynb" target="_blank" rel="noopener"
+                        <a id="colab-link" href="https://colab.research.google.com/github/elder-plinius/OBLITERATUS/blob/main/notebooks/abliterate.ipynb" target="_blank" rel="noopener"
                            style="display:inline-flex; align-items:center; gap:8px; background:#f9ab00; color:#000; padding:10px 20px; font-weight:700; font-size:0.85rem; text-decoration:none; border-radius:4px; letter-spacing:0.5px; font-family:'Fira Code',monospace">
                             <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="" style="height:20px; vertical-align:middle">
                             OPEN IN COLAB
diff --git a/hf-spaces/README.md b/hf-spaces/README.md
index 08498b7..8c21308 100644
--- a/hf-spaces/README.md
+++ b/hf-spaces/README.md
@@ -50,7 +50,7 @@ Logged-in HuggingFace users get free GPU quota. For more quota, upgrade to [HF P
 ## Run locally (same UI, your own GPU)
 
 ```bash
-git clone https://github.com/obliteratus-project/OBLITERATUS
+git clone https://github.com/elder-plinius/OBLITERATUS
 cd OBLITERATUS
 pip install -e ".[spaces]"
 
@@ -73,5 +73,5 @@ No GPU hardware selection needed — ZeroGPU handles allocation automatically.
 
 ## Links
 
-- [GitHub](https://github.com/obliteratus-project/OBLITERATUS)
-- [Paper](https://github.com/obliteratus-project/OBLITERATUS/tree/main/paper)
+- [GitHub](https://github.com/elder-plinius/OBLITERATUS)
+- [Paper](https://github.com/elder-plinius/OBLITERATUS/tree/main/paper)
diff --git a/notebooks/abliterate.ipynb b/notebooks/abliterate.ipynb
index 29e4b68..52ff463 100644
--- a/notebooks/abliterate.ipynb
+++ b/notebooks/abliterate.ipynb
@@ -53,7 +53,7 @@
     "id": "install"
    },
    "outputs": [],
-   "source": "!pip install -q git+https://github.com/obliteratus-project/OBLITERATUS.git\n!pip install -q accelerate bitsandbytes\n\nimport torch\nprint(f\"PyTorch {torch.__version__}\")\nprint(f\"CUDA available: {torch.cuda.is_available()}\")\nif torch.cuda.is_available():\n    print(f\"GPU: {torch.cuda.get_device_name(0)}\")\n    print(f\"VRAM: {torch.cuda.get_device_properties(0).total_mem / 1024**3:.1f} GB\")"
+   "source": "!pip install -q git+https://github.com/elder-plinius/OBLITERATUS.git\n!pip install -q accelerate bitsandbytes\n\nimport torch\nprint(f\"PyTorch {torch.__version__}\")\nprint(f\"CUDA available: {torch.cuda.is_available()}\")\nif torch.cuda.is_available():\n    print(f\"GPU: {torch.cuda.get_device_name(0)}\")\n    print(f\"VRAM: {torch.cuda.get_device_properties(0).total_mem / 1024**3:.1f} GB\")"
   },
   {
    "cell_type": "markdown",
diff --git a/obliteratus/abliterate.py b/obliteratus/abliterate.py
index cded82f..df010d5 100644
--- a/obliteratus/abliterate.py
+++ b/obliteratus/abliterate.py
@@ -4010,6 +4010,11 @@ class AbliterationPipeline:
                     f"Projecting packed quantized data would silently corrupt the model. "
                     f"Original error: {e}"
                 )
+        # Some architectures store weights as non-float types (e.g. uint8 from
+        # custom quantization schemes).  Projections require float math, so
+        # convert and treat as "quantized" so the caller writes back properly.
+        if not weight.data.is_floating_point():
+            return weight.data.to(torch.float32), True
         return weight.data, False
 
     @staticmethod
@@ -4049,10 +4054,20 @@ class AbliterationPipeline:
                 )
             return
 
+        # ── Non-float weight (e.g. uint8 from custom quantization) ─────
+        # If the original weight isn't a bitsandbytes/GPTQ/AWQ param, just
+        # replace with the float version so projections are preserved.
+        weight = proj_module.weight
+        if not AbliterationPipeline._is_quantized_param(weight):
+            proj_module.weight = nn.Parameter(
+                W_modified.to(device=weight.device),
+                requires_grad=weight.requires_grad,
+            )
+            return
+
         # ── bitsandbytes re-quantization ──────────────────────────
         try:
             import bitsandbytes as bnb
-            weight = proj_module.weight
             quantized, new_state = bnb.functional.quantize_4bit(
                 W_modified.to(weight.device),
                 quant_type=getattr(weight, "quant_type", "nf4"),
@@ -4087,7 +4102,8 @@ class AbliterationPipeline:
         norms: dict[str, float] = {}
         for param_name, param in layer.named_parameters():
             if param_name.endswith(".weight"):
-                norms[param_name] = param.data.norm().item()
+                data = param.data.float() if not param.data.is_floating_point() else param.data
+                norms[param_name] = data.norm().item()
         return norms
 
     @staticmethod
@@ -4106,7 +4122,8 @@ class AbliterationPipeline:
                 continue
             original_norm = saved_norms[param_name]
             if original_norm > 0:
-                new_norm = param.data.norm().item()
+                data = param.data.float() if not param.data.is_floating_point() else param.data
+                new_norm = data.norm().item()
                 if math.isnan(new_norm) or math.isinf(new_norm) or new_norm == 0:
                     continue  # Skip — weight is degenerate after projection
                 if abs(new_norm - original_norm) > 1e-6:
@@ -4294,6 +4311,10 @@ class AbliterationPipeline:
                     continue
             else:
                 data = param.data
+                # Non-float (e.g. uint8) fused params need float conversion
+                if not data.is_floating_point():
+                    data = data.float()
+                    is_quantized = True  # ensure write-back replaces param
 
             if data.dim() < 3:
                 continue
diff --git a/pyproject.toml b/pyproject.toml
index 9cf1ac4..1111e46 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -38,9 +38,9 @@ dependencies = [
 ]
 
 [project.urls]
-"Homepage" = "https://github.com/obliteratus-project/OBLITERATUS"
-"Repository" = "https://github.com/obliteratus-project/OBLITERATUS"
-"Bug Tracker" = "https://github.com/obliteratus-project/OBLITERATUS/issues"
+"Homepage" = "https://github.com/elder-plinius/OBLITERATUS"
+"Repository" = "https://github.com/elder-plinius/OBLITERATUS"
+"Bug Tracker" = "https://github.com/elder-plinius/OBLITERATUS/issues"
 
 [project.optional-dependencies]
 dev = ["pytest>=7.0", "pytest-cov", "ruff", "mypy"]
diff --git a/tests/__init__.py b/tests/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/test_abliterate.py b/tests/test_abliterate.py
new file mode 100644
index 0000000..4ca0ba3
--- /dev/null
+++ b/tests/test_abliterate.py
@@ -0,0 +1,2634 @@
+"""Tests for the SOTA abliteration pipeline."""
+
+from __future__ import annotations
+
+import json
+from pathlib import Path
+from unittest.mock import MagicMock
+
+import pytest
+import torch
+from transformers import GPT2Config, GPT2LMHeadModel
+
+from obliteratus.abliterate import (
+    HARMFUL_PROMPTS,
+    HARMLESS_PROMPTS,
+    METHODS,
+    STAGES,
+    AbliterationPipeline,
+    PipelineStage,
+    StageResult,
+)
+from obliteratus.models.loader import ModelHandle
+
+
+# ---------------------------------------------------------------------------
+# Fixtures
+# ---------------------------------------------------------------------------
+
+def _make_tiny_handle():
+    """Create a minimal ModelHandle with a tiny GPT-2 for testing."""
+    config = GPT2Config(
+        vocab_size=1000,
+        n_positions=128,
+        n_embd=64,
+        n_layer=4,
+        n_head=2,
+        n_inner=256,
+    )
+    model = GPT2LMHeadModel(config)
+    model.eval()
+
+    tokenizer = MagicMock()
+    tokenizer.pad_token = "<pad>"
+    tokenizer.eos_token = "<eos>"
+    tokenizer.return_value = {
+        "input_ids": torch.randint(0, 1000, (1, 10)),
+        "attention_mask": torch.ones(1, 10, dtype=torch.long),
+    }
+    tokenizer.decode.return_value = "The capital of France is Paris, a beautiful city"
+
+    handle = ModelHandle(
+        model=model,
+        tokenizer=tokenizer,
+        config=config,
+        model_name="gpt2-test",
+        task="causal_lm",
+    )
+    handle.snapshot()
+    return handle
+
+
+def _make_varied_tokenizer(handle):
+    """Set up a tokenizer mock that returns different tokens per call."""
+    call_count = [0]
+    def mock_tokenizer(prompt, **kwargs):
+        call_count[0] += 1
+        torch.manual_seed(call_count[0])
+        return {
+            "input_ids": torch.randint(0, 1000, (1, 5)),
+            "attention_mask": torch.ones(1, 5, dtype=torch.long),
+        }
+    handle.tokenizer.side_effect = mock_tokenizer
+
+
+@pytest.fixture
+def handle():
+    return _make_tiny_handle()
+
+
+# ---------------------------------------------------------------------------
+# Data & stage definitions
+# ---------------------------------------------------------------------------
+
+class TestPrompts:
+    def test_harmful_prompts_expanded(self):
+        assert len(HARMFUL_PROMPTS) >= 99
+
+    def test_harmless_prompts_expanded(self):
+        assert len(HARMLESS_PROMPTS) >= 99
+
+    def test_prompt_lists_same_length(self):
+        assert len(HARMFUL_PROMPTS) == len(HARMLESS_PROMPTS)
+
+    def test_prompt_count_512(self):
+        """512 prompts across 7 severity tiers."""
+        assert len(HARMFUL_PROMPTS) == 512
+        assert len(HARMLESS_PROMPTS) == 512
+
+    def test_prompt_volume_slicing(self):
+        """Slicing at standard volumes gives correct counts."""
+        for n in (33, 66, 99, 256, 512):
+            assert len(HARMFUL_PROMPTS[:n]) == n
+            assert len(HARMLESS_PROMPTS[:n]) == n
+
+
+class TestStages:
+    def test_six_stages(self):
+        assert len(STAGES) == 6
+
+    def test_stage_keys(self):
+        keys = [s.key for s in STAGES]
+        assert keys == ["summon", "probe", "distill", "excise", "verify", "rebirth"]
+
+    def test_stage_dataclass(self):
+        stage = PipelineStage(key="test", name="TEST", description="A test stage")
+        assert stage.key == "test"
+        assert stage.name == "TEST"
+
+    def test_stage_result_defaults(self):
+        result = StageResult(stage="test", status="running")
+        assert result.message == ""
+        assert result.duration == 0.0
+        assert result.details == {}
+
+
+# ---------------------------------------------------------------------------
+# Method presets
+# ---------------------------------------------------------------------------
+
+class TestMethods:
+    def test_methods_exist(self):
+        assert set(METHODS.keys()) == {"basic", "advanced", "aggressive", "informed", "surgical", "inverted", "nuclear", "optimized", "failspy", "gabliteration", "heretic", "rdo", "spectral_cascade"}
+
+    def test_basic_single_direction(self):
+        cfg = METHODS["basic"]
+        assert cfg["n_directions"] == 1
+        assert cfg["norm_preserve"] is False
+        assert cfg["regularization"] == 0.0
+        assert cfg["refinement_passes"] == 1
+
+    def test_advanced_multi_direction(self):
+        cfg = METHODS["advanced"]
+        assert cfg["n_directions"] > 1
+        assert cfg["norm_preserve"] is True
+        assert cfg["regularization"] > 0
+        assert cfg["refinement_passes"] >= 2
+
+    def test_aggressive_full_gabliteration(self):
+        cfg = METHODS["aggressive"]
+        assert cfg["n_directions"] >= 8
+        assert cfg["norm_preserve"] is True
+        assert cfg["refinement_passes"] >= 3
+
+
+# ---------------------------------------------------------------------------
+# Pipeline init
+# ---------------------------------------------------------------------------
+
+class TestPipelineInit:
+    def test_default_prompts(self):
+        pipeline = AbliterationPipeline(model_name="test-model")
+        assert pipeline.harmful_prompts == HARMFUL_PROMPTS
+        assert pipeline.harmless_prompts == HARMLESS_PROMPTS
+
+    def test_custom_prompts(self):
+        harmful = ["bad prompt"]
+        harmless = ["good prompt"]
+        pipeline = AbliterationPipeline(
+            model_name="test-model",
+            harmful_prompts=harmful,
+            harmless_prompts=harmless,
+        )
+        assert pipeline.harmful_prompts == harmful
+        assert pipeline.harmless_prompts == harmless
+
+    def test_defaults(self):
+        pipeline = AbliterationPipeline(model_name="test-model")
+        assert pipeline.device == "auto"
+        assert pipeline.dtype == "float16"
+        assert pipeline.output_dir == Path("abliterated")
+        assert pipeline.trust_remote_code is False
+        assert pipeline.handle is None
+
+    def test_default_method_is_advanced(self):
+        pipeline = AbliterationPipeline(model_name="test-model")
+        assert pipeline.method == "advanced"
+        assert pipeline.n_directions == METHODS["advanced"]["n_directions"]
+        assert pipeline.norm_preserve == METHODS["advanced"]["norm_preserve"]
+        assert pipeline.regularization == METHODS["advanced"]["regularization"]
+
+    def test_method_basic(self):
+        pipeline = AbliterationPipeline(model_name="test-model", method="basic")
+        assert pipeline.n_directions == 1
+        assert pipeline.norm_preserve is False
+        assert pipeline.regularization == 0.0
+
+    def test_method_aggressive(self):
+        pipeline = AbliterationPipeline(model_name="test-model", method="aggressive")
+        assert pipeline.n_directions == 8
+        assert pipeline.norm_preserve is True
+        assert pipeline.refinement_passes == 3
+
+    def test_explicit_overrides_method(self):
+        pipeline = AbliterationPipeline(
+            model_name="test-model",
+            method="basic",
+            n_directions=6,
+            norm_preserve=True,
+            regularization=0.5,
+            refinement_passes=4,
+        )
+        assert pipeline.n_directions == 6
+        assert pipeline.norm_preserve is True
+        assert pipeline.regularization == 0.5
+        assert pipeline.refinement_passes == 4
+
+    def test_callbacks(self):
+        stage_results = []
+        log_msgs = []
+        pipeline = AbliterationPipeline(
+            model_name="test-model",
+            on_stage=lambda r: stage_results.append(r),
+            on_log=lambda m: log_msgs.append(m),
+        )
+        pipeline.log("hello")
+        assert log_msgs == ["hello"]
+
+        pipeline._emit("test", "running", "msg")
+        assert len(stage_results) == 1
+        assert stage_results[0].stage == "test"
+
+
+# ---------------------------------------------------------------------------
+# _project_out_advanced (norm-preserving + regularization)
+# ---------------------------------------------------------------------------
+
+class TestProjectOutAdvanced:
+    def test_norm_preserving(self):
+        """Norm-preserving mode should keep Frobenius norm constant."""
+        class Wrapper(torch.nn.Module):
+            def __init__(self):
+                super().__init__()
+                self.o_proj = torch.nn.Linear(4, 8, bias=False)
+
+        module = Wrapper()
+        torch.manual_seed(42)
+        module.o_proj.weight.data = torch.randn(8, 4)
+        original_norm = module.o_proj.weight.data.norm().item()
+
+        direction = torch.randn(4, 1)
+        direction = direction / direction.norm()
+
+        AbliterationPipeline._project_out_advanced(
+            module, direction, ["o_proj"], norm_preserve=True, regularization=0.0
+        )
+
+        new_norm = module.o_proj.weight.data.norm().item()
+        # With amplification cap (1.10x max), exact norm preservation isn't
+        # guaranteed on tiny matrices (hidden_dim=4) where a single direction
+        # removes a large fraction of energy.  Verify the norm is closer to
+        # original than the un-preserved norm would be (i.e. cap is working).
+        without_preserve_norm_sq = original_norm ** 2 - (module.o_proj.weight.data @ direction).pow(2).sum().item()
+        # The new norm should be >= the un-preserved norm (cap restores some)
+        assert new_norm >= original_norm * 0.85, \
+            f"Norm should be approximately preserved (within cap): {original_norm:.4f} vs {new_norm:.4f}"
+
+    def test_regularization_partial_removal(self):
+        """Regularization should preserve some of the refusal component."""
+        class Wrapper(torch.nn.Module):
+            def __init__(self):
+                super().__init__()
+                self.o_proj = torch.nn.Linear(4, 8, bias=False)
+
+        module_full = Wrapper()
+        module_reg = Wrapper()
+        torch.manual_seed(42)
+        W_orig = torch.randn(8, 4)
+        module_full.o_proj.weight.data = W_orig.clone()
+        module_reg.o_proj.weight.data = W_orig.clone()
+
+        direction = torch.randn(4, 1)
+        direction = direction / direction.norm()
+
+        # Full removal
+        AbliterationPipeline._project_out_advanced(
+            module_full, direction, ["o_proj"], norm_preserve=False, regularization=0.0
+        )
+        # Regularized (30% preserved)
+        AbliterationPipeline._project_out_advanced(
+            module_reg, direction, ["o_proj"], norm_preserve=False, regularization=0.3
+        )
+
+        W_full = module_full.o_proj.weight.data
+        W_reg = module_reg.o_proj.weight.data
+
+        # Full removal should have zero projection on direction
+        proj_full = (W_full @ direction).norm().item()
+        assert proj_full < 1e-4
+
+        # Regularized should have non-zero projection (30% preserved)
+        proj_reg = (W_reg @ direction).norm().item()
+        proj_orig = (W_orig @ direction).norm().item()
+        expected_ratio = 0.3
+        actual_ratio = proj_reg / proj_orig if proj_orig > 0 else 0
+        assert abs(actual_ratio - expected_ratio) < 0.05, \
+            f"Expected ~{expected_ratio:.0%} preserved, got {actual_ratio:.0%}"
+
+    def test_norm_preserving_transposed(self):
+        """Norm-preserving should also work for transposed weights."""
+        class Wrapper(torch.nn.Module):
+            def __init__(self):
+                super().__init__()
+                self.c_proj = torch.nn.Linear(8, 4, bias=False)
+
+        module = Wrapper()
+        torch.manual_seed(42)
+        module.c_proj.weight.data = torch.randn(4, 8)
+        original_norm = module.c_proj.weight.data.norm().item()
+
+        direction = torch.randn(4, 1)
+        direction = direction / direction.norm()
+
+        AbliterationPipeline._project_out_advanced(
+            module, direction, ["c_proj"], norm_preserve=True, regularization=0.0
+        )
+
+        new_norm = module.c_proj.weight.data.norm().item()
+        # With amplification cap (1.10x max), exact norm preservation isn't
+        # guaranteed on tiny matrices where a single direction removes a large
+        # fraction of energy.
+        assert new_norm >= original_norm * 0.80, \
+            f"Norm should be approximately preserved (within cap): {original_norm:.4f} vs {new_norm:.4f}"
+
+
+# ---------------------------------------------------------------------------
+# Full attention projection (q/k/v + o_proj)
+# ---------------------------------------------------------------------------
+
+class TestAttentionFullProjection:
+    """Test that ALL attention weight matrices are projected (not just o_proj)."""
+
+    def test_qkv_all_projected(self):
+        """q_proj, k_proj, v_proj should all be projected alongside o_proj."""
+        hidden = 16
+
+        class FakeAttn(torch.nn.Module):
+            def __init__(self):
+                super().__init__()
+                self.q_proj = torch.nn.Linear(hidden, hidden, bias=False)
+                self.k_proj = torch.nn.Linear(hidden, hidden, bias=False)
+                self.v_proj = torch.nn.Linear(hidden, hidden, bias=False)
+                self.o_proj = torch.nn.Linear(hidden, hidden, bias=False)
+
+        attn = FakeAttn()
+        torch.manual_seed(42)
+        for p in attn.parameters():
+            p.data = torch.randn_like(p.data)
+
+        originals = {
+            name: getattr(attn, name).weight.data.clone()
+            for name in ["q_proj", "k_proj", "v_proj", "o_proj"]
+        }
+
+        d = torch.randn(hidden, 1)
+        d = d / d.norm()
+
+        from obliteratus.abliterate import _ATTN_OUT_NAMES, _ATTN_IN_NAMES
+        count = AbliterationPipeline._project_out_advanced(
+            attn, d, _ATTN_OUT_NAMES + _ATTN_IN_NAMES,
+        )
+
+        assert count == 4, f"Should project 4 weights (q/k/v/o), got {count}"
+        for name in ["q_proj", "k_proj", "v_proj", "o_proj"]:
+            assert not torch.allclose(
+                getattr(attn, name).weight.data, originals[name]
+            ), f"{name} should be modified"
+
+    def test_project_all_does_not_early_return(self):
+        """_project_out_advanced should project ALL matching weights, not just first."""
+        hidden = 16
+
+        class FakeModule(torch.nn.Module):
+            def __init__(self):
+                super().__init__()
+                self.up_proj = torch.nn.Linear(hidden, 32, bias=False)
+                self.gate_proj = torch.nn.Linear(hidden, 32, bias=False)
+
+        mod = FakeModule()
+        torch.manual_seed(42)
+        orig_up = mod.up_proj.weight.data.clone()
+        orig_gate = mod.gate_proj.weight.data.clone()
+
+        d = torch.randn(hidden, 1)
+        d = d / d.norm()
+
+        from obliteratus.abliterate import _FFN_IN_NAMES
+        count = AbliterationPipeline._project_out_advanced(mod, d, _FFN_IN_NAMES)
+
+        assert count == 2, f"Should project both up_proj and gate_proj, got {count}"
+        assert not torch.allclose(mod.up_proj.weight.data, orig_up), "up_proj should be modified"
+        assert not torch.allclose(mod.gate_proj.weight.data, orig_gate), "gate_proj should be modified"
+
+    def test_lm_head_projection(self):
+        """lm_head should be projectable via _project_out_advanced."""
+        hidden = 16
+        vocab = 100
+
+        class FakeModel(torch.nn.Module):
+            def __init__(self):
+                super().__init__()
+                self.lm_head = torch.nn.Linear(hidden, vocab, bias=False)
+
+        model = FakeModel()
+        torch.manual_seed(42)
+        orig = model.lm_head.weight.data.clone()
+
+        d = torch.randn(hidden, 1)
+        d = d / d.norm()
+
+        count = AbliterationPipeline._project_out_advanced(
+            model, d, ["lm_head"], regularization=0.0,
+        )
+
+        assert count == 1, "Should project lm_head"
+        assert not torch.allclose(model.lm_head.weight.data, orig), "lm_head should be modified"
+        # Verify refusal direction is removed from lm_head
+        proj = (model.lm_head.weight.data @ d).norm().item()
+        assert proj < 1e-4, f"Refusal direction should be removed from lm_head, proj={proj}"
+
+
+class TestKneeDetectionThreshold:
+    """Test that knee detection uses 5% threshold to include more layers."""
+
+    def test_five_percent_threshold_includes_more(self):
+        """Layers between 5% and 10% of max should now be included."""
+        # Layer norms: max=10.0, then several between 5%-10%
+        sorted_layers = [(0, 10.0), (1, 8.0), (2, 6.0), (3, 0.7), (4, 0.6)]
+        selected = AbliterationPipeline._select_layers_knee(sorted_layers)
+        # 0.7 and 0.6 are 7% and 6% of max — should now be included (> 5% threshold)
+        assert 3 in selected or 4 in selected, (
+            f"Layers with 6-7% of max signal should be included, got {selected}"
+        )
+
+
+# ---------------------------------------------------------------------------
+# MoE projection (router, shared expert, input/output, fused)
+# ---------------------------------------------------------------------------
+
+class TestProjectMoEExperts:
+    """Test the full MoE projection pipeline: router, shared expert, experts."""
+
+    def _make_direction(self, hidden_dim=16):
+        d = torch.randn(hidden_dim, 1)
+        return d / d.norm()
+
+    def test_router_gate_projected(self):
+        """Router/gate weight should have refusal direction removed."""
+        hidden = 16
+        n_experts = 4
+
+        class FakeMoE(torch.nn.Module):
+            def __init__(self):
+                super().__init__()
+                self.gate = torch.nn.Linear(hidden, n_experts, bias=True)
+                self.experts = torch.nn.ModuleList([
+                    self._make_expert() for _ in range(n_experts)
+                ])
+
+            @staticmethod
+            def _make_expert():
+                m = torch.nn.Module()
+                m.down_proj = torch.nn.Linear(hidden, 32, bias=False)
+                m.up_proj = torch.nn.Linear(hidden, 32, bias=False)
+                return m
+
+        moe = FakeMoE()
+        d = self._make_direction(hidden)
+        W_gate_orig = moe.gate.weight.data.clone()
+
+        count = AbliterationPipeline._project_moe_experts(moe, d)
+        assert count > 0
+
+        # Gate weight should have been modified
+        assert not torch.allclose(moe.gate.weight.data, W_gate_orig), \
+            "Router/gate weights should be projected"
+
+        # The gate weight's projection onto the direction should be ~0
+        proj = (moe.gate.weight.data @ d).norm().item()
+        assert proj < 1e-4, f"Gate should have no component along refusal dir, got {proj}"
+
+    def test_shared_expert_projected(self):
+        """Shared expert (always-on) should have both input and output projected."""
+        hidden = 16
+
+        class FakeMoE(torch.nn.Module):
+            def __init__(self):
+                super().__init__()
+                self.gate = torch.nn.Linear(hidden, 2, bias=False)
+                self.shared_expert = torch.nn.Module()
+                self.shared_expert.down_proj = torch.nn.Linear(hidden, 32, bias=False)
+                self.shared_expert.up_proj = torch.nn.Linear(hidden, 32, bias=False)
+                self.experts = torch.nn.ModuleList([
+                    self._make_expert() for _ in range(2)
+                ])
+
+            @staticmethod
+            def _make_expert():
+                m = torch.nn.Module()
+                m.down_proj = torch.nn.Linear(hidden, 32, bias=False)
+                m.up_proj = torch.nn.Linear(hidden, 32, bias=False)
+                return m
+
+        moe = FakeMoE()
+        d = self._make_direction(hidden)
+        shared_down_orig = moe.shared_expert.down_proj.weight.data.clone()
+        shared_up_orig = moe.shared_expert.up_proj.weight.data.clone()
+
+        count = AbliterationPipeline._project_moe_experts(moe, d)
+        assert count > 0
+
+        # Both shared expert output AND input projections should be modified
+        assert not torch.allclose(moe.shared_expert.down_proj.weight.data, shared_down_orig), \
+            "Shared expert output (down_proj) should be projected"
+        assert not torch.allclose(moe.shared_expert.up_proj.weight.data, shared_up_orig), \
+            "Shared expert input (up_proj) should be projected"
+
+    def test_expert_input_projections_projected(self):
+        """Expert input projections (up_proj, gate_proj) should also be modified."""
+        hidden = 16
+
+        class FakeExpert(torch.nn.Module):
+            def __init__(self):
+                super().__init__()
+                self.down_proj = torch.nn.Linear(hidden, 32, bias=False)
+                self.up_proj = torch.nn.Linear(hidden, 32, bias=False)
+                self.gate_proj = torch.nn.Linear(hidden, 32, bias=False)
+
+        class FakeMoE(torch.nn.Module):
+            def __init__(self):
+                super().__init__()
+                self.experts = torch.nn.ModuleList([FakeExpert() for _ in range(2)])
+
+        moe = FakeMoE()
+        d = self._make_direction(hidden)
+        up_orig = moe.experts[0].up_proj.weight.data.clone()
+
+        count = AbliterationPipeline._project_moe_experts(moe, d)
+
+        # Each expert contributes 2 projections (output + input)
+        # 2 experts * 2 = 4 minimum
+        assert count >= 4, f"Expected >= 4 projections (out+in per expert), got {count}"
+
+        assert not torch.allclose(moe.experts[0].up_proj.weight.data, up_orig), \
+            "Expert input (up_proj) should be projected"
+
+    def test_fused_3d_output_and_input(self):
+        """Fused 3D parameter patterns (GPT-OSS style) should project both directions."""
+        hidden = 16
+        intermediate = 32
+        n_experts = 4
+
+        class FusedExperts(torch.nn.Module):
+            def __init__(self):
+                super().__init__()
+                self.down_proj = torch.nn.Parameter(torch.randn(n_experts, intermediate, hidden))
+                self.up_proj = torch.nn.Parameter(torch.randn(n_experts, intermediate, hidden))
+
+        class FakeMoE(torch.nn.Module):
+            def __init__(self):
+                super().__init__()
+                self.experts = FusedExperts()
+
+        moe = FakeMoE()
+        d = self._make_direction(hidden)
+        down_orig = moe.experts.down_proj.data.clone()
+        up_orig = moe.experts.up_proj.data.clone()
+
+        count = AbliterationPipeline._project_moe_experts(moe, d)
+
+        # 4 experts output + 4 experts input = 8
+        assert count == 8, f"Expected 8 fused projections, got {count}"
+
+        assert not torch.allclose(moe.experts.down_proj.data, down_orig), \
+            "Fused output (down_proj) should be projected"
+        assert not torch.allclose(moe.experts.up_proj.data, up_orig), \
+            "Fused input (up_proj) should be projected"
+
+    def test_fused_3d_norm_preserve(self):
+        """Fused 3D projections should preserve norms when requested."""
+        hidden = 16
+        intermediate = 32
+        n_experts = 4
+
+        class FusedExperts(torch.nn.Module):
+            def __init__(self):
+                super().__init__()
+                self.down_proj = torch.nn.Parameter(torch.randn(n_experts, intermediate, hidden))
+
+        class FakeMoE(torch.nn.Module):
+            def __init__(self):
+                super().__init__()
+                self.experts = FusedExperts()
+
+        moe = FakeMoE()
+        d = self._make_direction(hidden)
+
+        # Record per-expert norms before
+        orig_norms = [moe.experts.down_proj.data[i].norm().item() for i in range(n_experts)]
+
+        AbliterationPipeline._project_moe_experts(moe, d, norm_preserve=True)
+
+        # Check per-expert norms preserved
+        for i in range(n_experts):
+            new_norm = moe.experts.down_proj.data[i].norm().item()
+            assert abs(orig_norms[i] - new_norm) < 1e-3, \
+                f"Expert {i} norm not preserved: {orig_norms[i]:.4f} vs {new_norm:.4f}"
+
+    def test_no_experts_returns_zero(self):
+        """Module without experts attribute should return 0."""
+        class NoMoE(torch.nn.Module):
+            def __init__(self):
+                super().__init__()
+                self.mlp = torch.nn.Linear(16, 32)
+
+        moe = NoMoE()
+        d = self._make_direction(16)
+        assert AbliterationPipeline._project_moe_experts(moe, d) == 0
+
+    def test_router_bias_projected(self):
+        """Router bias should be projected when project_biases=True."""
+        hidden = 16
+
+        class FakeMoE(torch.nn.Module):
+            def __init__(self):
+                super().__init__()
+                self.gate = torch.nn.Linear(hidden, 4, bias=True)
+                self.experts = torch.nn.ModuleList([
+                    self._make_expert() for _ in range(4)
+                ])
+
+            @staticmethod
+            def _make_expert():
+                m = torch.nn.Module()
+                m.down_proj = torch.nn.Linear(hidden, 32, bias=False)
+                return m
+
+        moe = FakeMoE()
+        d = self._make_direction(hidden)
+        bias_orig = moe.gate.bias.data.clone()
+
+        count = AbliterationPipeline._project_moe_experts(moe, d, project_biases=True)
+
+        # Gate has 4 outputs (num_experts), direction has 16 dims
+        # bias shape (4,) != direction shape (16,), so bias won't match.
+        # This is correct: router bias is (num_experts,), not (hidden_dim,),
+        # so _project_bias won't modify it (shape mismatch is expected).
+        assert torch.allclose(moe.gate.bias.data, bias_orig), (
+            "Router bias should be unchanged when shape mismatches direction"
+        )
+        assert isinstance(count, int)
+        assert count > 0  # expert weights should still be projected
+
+    def test_router_auto_detection_fallback(self):
+        """Unknown router name should be auto-detected and projected."""
+        import warnings as w
+        hidden = 16
+        n_experts = 4
+
+        class FakeMoE(torch.nn.Module):
+            def __init__(self):
+                super().__init__()
+                # Unusual router name not in _ROUTER_NAMES
+                self.moe_gate_proj = torch.nn.Linear(hidden, n_experts, bias=False)
+                self.experts = torch.nn.ModuleList([
+                    self._make_expert() for _ in range(n_experts)
+                ])
+
+            @staticmethod
+            def _make_expert():
+                m = torch.nn.Module()
+                m.down_proj = torch.nn.Linear(hidden, 32, bias=False)
+                return m
+
+        moe = FakeMoE()
+        d = self._make_direction(hidden)
+        gate_orig = moe.moe_gate_proj.weight.data.clone()
+
+        with w.catch_warnings(record=True) as caught:
+            w.simplefilter("always")
+            AbliterationPipeline._project_moe_experts(moe, d)
+
+        # Should auto-detect and project the unusual router name
+        assert not torch.allclose(moe.moe_gate_proj.weight.data, gate_orig), \
+            "Auto-detected router should be projected"
+
+        # Should emit a warning about the auto-detection
+        auto_detect_warnings = [
+            x for x in caught
+            if "auto-detected" in str(x.message)
+        ]
+        assert len(auto_detect_warnings) > 0, "Should warn about auto-detected router"
+
+    def test_full_moe_all_components(self):
+        """End-to-end: all MoE components should be modified together."""
+        hidden = 16
+
+        class FakeExpert(torch.nn.Module):
+            def __init__(self):
+                super().__init__()
+                self.down_proj = torch.nn.Linear(hidden, 32, bias=False)
+                self.up_proj = torch.nn.Linear(hidden, 32, bias=False)
+
+        class FakeMoE(torch.nn.Module):
+            def __init__(self):
+                super().__init__()
+                self.gate = torch.nn.Linear(hidden, 4, bias=False)
+                self.shared_expert = torch.nn.Module()
+                self.shared_expert.down_proj = torch.nn.Linear(hidden, 32, bias=False)
+                self.shared_expert.up_proj = torch.nn.Linear(hidden, 32, bias=False)
+                self.experts = torch.nn.ModuleList([FakeExpert() for _ in range(4)])
+
+        moe = FakeMoE()
+        d = self._make_direction(hidden)
+
+        count = AbliterationPipeline._project_moe_experts(moe, d)
+
+        # Expected: 1 (gate) + 2 (shared out+in) + 4*2 (expert out+in) = 11
+        assert count == 11, f"Expected 11 total projections, got {count}"
+
+
+# ---------------------------------------------------------------------------
+# SOTA technique #1: Safety-neuron masking (GateBreaker-style z-score)
+# ---------------------------------------------------------------------------
+
+class TestSafetyNeuronMasking:
+    def test_outlier_neurons_zeroed(self):
+        """Neurons with outsized refusal projection should be zeroed."""
+        hidden = 16
+
+        class Wrapper(torch.nn.Module):
+            def __init__(self):
+                super().__init__()
+                self.down_proj = torch.nn.Linear(hidden, 64, bias=False)
+
+        module = Wrapper()
+        torch.manual_seed(42)
+        # Inject a few rows with very high projection along direction
+        d = torch.randn(hidden, 1)
+        d = d / d.norm()
+        # Make rows 0,1,2 have huge projection (outliers)
+        for i in range(3):
+            module.down_proj.weight.data[i] = d.squeeze() * 10.0
+
+        n_masked = AbliterationPipeline._mask_safety_neurons(
+            module, d, ["down_proj"], z_threshold=2.0,
+        )
+
+        assert n_masked >= 3, f"Expected >= 3 masked neurons, got {n_masked}"
+        # Masked rows should be zero
+        for i in range(3):
+            assert module.down_proj.weight.data[i].abs().max().item() < 1e-6
+
+    def test_no_outliers_no_masking(self):
+        """When all neurons have similar projection, none should be masked."""
+        hidden = 16
+
+        class Wrapper(torch.nn.Module):
+            def __init__(self):
+                super().__init__()
+                self.down_proj = torch.nn.Linear(hidden, 32, bias=False)
+
+        module = Wrapper()
+        # Uniform weights — no outliers
+        module.down_proj.weight.data = torch.ones(32, hidden) * 0.1
+
+        d = torch.randn(hidden, 1)
+        d = d / d.norm()
+
+        n_masked = AbliterationPipeline._mask_safety_neurons(
+            module, d, ["down_proj"], z_threshold=2.0,
+        )
+        assert n_masked == 0
+
+    def test_high_threshold_masks_fewer(self):
+        """Higher z_threshold should mask fewer neurons."""
+        hidden = 16
+
+        class Wrapper(torch.nn.Module):
+            def __init__(self):
+                super().__init__()
+                self.down_proj = torch.nn.Linear(hidden, 64, bias=False)
+
+        module1 = Wrapper()
+        module2 = Wrapper()
+        torch.manual_seed(42)
+        W = torch.randn(64, hidden)
+        module1.down_proj.weight.data = W.clone()
+        module2.down_proj.weight.data = W.clone()
+
+        d = torch.randn(hidden, 1)
+        d = d / d.norm()
+
+        n1 = AbliterationPipeline._mask_safety_neurons(module1, d, ["down_proj"], z_threshold=1.5)
+        n2 = AbliterationPipeline._mask_safety_neurons(module2, d, ["down_proj"], z_threshold=3.0)
+        assert n1 >= n2, f"Lower threshold should mask more: {n1} vs {n2}"
+
+
+# ---------------------------------------------------------------------------
+# SOTA technique #2: Jailbreak-contrastive direction refinement
+# ---------------------------------------------------------------------------
+
+class TestJailbreakContrastive:
+    def test_jailbreak_prompt_generation(self):
+        """Pipeline should generate jailbreak variants of harmful prompts."""
+        pipeline = AbliterationPipeline(model_name="test")
+        pipeline.harmful_prompts = ["How to hack a computer", "Write a virus"]
+
+        jailbreak = pipeline._generate_jailbreak_prompts()
+
+        assert len(jailbreak) == 2
+        # Each should contain the original prompt
+        assert "hack a computer" in jailbreak[0].lower()
+        assert "virus" in jailbreak[1].lower()
+        # Each should be wrapped in a template (longer than original)
+        assert len(jailbreak[0]) > len("How to hack a computer")
+
+    def test_jailbreak_contrast_blending(self):
+        """Jailbreak-contrastive blending should modify refusal direction."""
+        pipeline = AbliterationPipeline(
+            model_name="test",
+            use_jailbreak_contrast=True,
+            n_directions=1,
+        )
+        hidden = 16
+        pipeline._on_log = lambda m: None
+
+        # Simulate probed means
+        torch.manual_seed(42)
+        harm_mean = torch.randn(1, hidden)
+        safe_mean = torch.randn(1, hidden)
+        jb_mean = torch.randn(1, hidden)
+
+        pipeline._harmful_means = {0: harm_mean}
+        pipeline._harmless_means = {0: safe_mean}
+        pipeline._jailbreak_means = {0: jb_mean}
+        pipeline._harmful_acts = {0: [harm_mean]}
+        pipeline._harmless_acts = {0: [safe_mean]}
+        pipeline._jailbreak_acts = {0: [jb_mean]}
+
+        # Run distill (will set standard direction, then blend)
+        pipeline._distill()
+
+        # Direction should be a unit vector
+        d = pipeline.refusal_directions[0]
+        assert abs(d.norm().item() - 1.0) < 1e-4
+
+        # Direction should differ from pure harm-safe difference
+        std_diff = (harm_mean - safe_mean).squeeze()
+        std_dir = std_diff / std_diff.norm()
+        cosine = (d @ std_dir).item()
+        # Blended direction should not be identical to standard
+        assert cosine < 0.99, f"Blended direction too similar to standard: cos={cosine}"
+
+    def test_surgical_method_enables_jailbreak(self):
+        """Surgical method should enable jailbreak-contrastive by default."""
+        cfg = METHODS["surgical"]
+        assert cfg["use_jailbreak_contrast"] is True
+
+
+# ---------------------------------------------------------------------------
+# SOTA technique #3: Layer-adaptive projection strength
+# ---------------------------------------------------------------------------
+
+class TestLayerAdaptiveStrength:
+    def test_layer_weights_computed(self):
+        """Layer-adaptive weights should be proportional to refusal signal."""
+        pipeline = AbliterationPipeline(
+            model_name="test",
+            layer_adaptive_strength=True,
+            n_directions=1,
+        )
+        hidden = 16
+        pipeline._on_log = lambda m: None
+
+        # Simulate: layer 0 has strong signal, layer 1 weak
+        torch.manual_seed(42)
+        strong_diff = torch.randn(1, hidden) * 10.0
+        weak_diff = torch.randn(1, hidden) * 1.0
+        zero_mean = torch.zeros(1, hidden)
+
+        pipeline._harmful_means = {0: strong_diff, 1: weak_diff}
+        pipeline._harmless_means = {0: zero_mean, 1: zero_mean}
+        pipeline._harmful_acts = {0: [strong_diff], 1: [weak_diff]}
+        pipeline._harmless_acts = {0: [zero_mean], 1: [zero_mean]}
+
+        pipeline._distill()
+
+        # Layer weights should exist for strong layers
+        assert len(pipeline._layer_excise_weights) > 0
+        # Strongest layer should have weight ~1.0
+        max_weight = max(pipeline._layer_excise_weights.values())
+        assert max_weight > 0.9, f"Max weight should be ~1.0, got {max_weight}"
+
+    def test_surgical_method_enables_adaptive(self):
+        """Surgical method should enable layer-adaptive by default."""
+        cfg = METHODS["surgical"]
+        assert cfg["layer_adaptive_strength"] is True
+
+
+# ---------------------------------------------------------------------------
+# SOTA technique #5: Attention head surgery
+# ---------------------------------------------------------------------------
+
+class TestAttentionHeadSurgery:
+    def test_head_selective_projection(self):
+        """Selective head projection should only modify targeted head rows."""
+        hidden = 16
+        n_heads = 4
+        head_dim = hidden // n_heads
+
+        class FakeAttn(torch.nn.Module):
+            def __init__(self):
+                super().__init__()
+                self.o_proj = torch.nn.Linear(hidden, hidden, bias=False)
+
+        attn = FakeAttn()
+        torch.manual_seed(42)
+        W_orig = attn.o_proj.weight.data.clone()
+
+        d = torch.randn(hidden, 1)
+        d = d / d.norm()
+
+        # Head scores: head 0 is top safety head, head 3 is lowest
+        head_scores = [(0, 5.0), (1, 3.0), (2, 1.0), (3, 0.5)]
+
+        n_modified = AbliterationPipeline._project_head_selective(
+            attn, d, head_scores, n_heads=n_heads, head_fraction=0.25,
+        )
+
+        assert n_modified >= 1, "Should modify at least 1 head"
+
+        W_new = attn.o_proj.weight.data
+        # Head 0 columns (targeted) should be modified
+        assert not torch.allclose(
+            W_new[:, 0:head_dim], W_orig[:, 0:head_dim]
+        ), "Targeted head 0 should be modified"
+
+        # Head 3 columns (NOT targeted) should be untouched
+        assert torch.allclose(
+            W_new[:, 3*head_dim:4*head_dim],
+            W_orig[:, 3*head_dim:4*head_dim],
+        ), "Non-targeted head 3 should be untouched"
+
+    def test_head_surgery_norm_preserve(self):
+        """Head surgery with norm_preserve should maintain per-head norms."""
+        hidden = 16
+        n_heads = 4
+        head_dim = hidden // n_heads
+
+        class FakeAttn(torch.nn.Module):
+            def __init__(self):
+                super().__init__()
+                self.o_proj = torch.nn.Linear(hidden, hidden, bias=False)
+
+        attn = FakeAttn()
+        torch.manual_seed(42)
+
+        d = torch.randn(hidden, 1)
+        d = d / d.norm()
+
+        orig_norms = [
+            attn.o_proj.weight.data[:, h*head_dim:(h+1)*head_dim].norm().item()
+            for h in range(n_heads)
+        ]
+
+        head_scores = [(0, 5.0), (1, 3.0), (2, 1.0), (3, 0.5)]
+        AbliterationPipeline._project_head_selective(
+            attn, d, head_scores, n_heads=n_heads,
+            head_fraction=0.5, norm_preserve=True,
+        )
+
+        # Targeted heads should have preserved norms
+        for h in range(2):  # top 50% = 2 heads
+            new_norm = attn.o_proj.weight.data[:, h*head_dim:(h+1)*head_dim].norm().item()
+            assert abs(orig_norms[h] - new_norm) < 1e-3, \
+                f"Head {h} norm not preserved: {orig_norms[h]:.4f} vs {new_norm:.4f}"
+
+    def test_head_surgery_non_square_gqa(self):
+        """Head surgery should work for GQA models with non-square o_proj (attn_dim != hidden_dim)."""
+        hidden_dim = 12   # model hidden dimension
+        attn_dim = 32      # attention dimension (n_heads * head_dim_attn)
+        n_heads = 4
+        head_dim_attn = attn_dim // n_heads  # 8
+
+        class FakeAttnGQA(torch.nn.Module):
+            def __init__(self):
+                super().__init__()
+                # o_proj maps attn_dim -> hidden_dim
+                # nn.Linear weight shape: (hidden_dim, attn_dim) = (12, 32)
+                self.o_proj = torch.nn.Linear(attn_dim, hidden_dim, bias=False)
+
+        attn = FakeAttnGQA()
+        torch.manual_seed(42)
+        attn.o_proj.weight.data = torch.randn(hidden_dim, attn_dim)
+        W_orig = attn.o_proj.weight.data.clone()
+
+        d = torch.randn(hidden_dim, 1)
+        d = d / d.norm()
+
+        head_scores = [(0, 5.0), (1, 3.0), (2, 1.0), (3, 0.5)]
+
+        n_modified = AbliterationPipeline._project_head_selective(
+            attn, d, head_scores, n_heads=n_heads, head_fraction=0.25,
+        )
+
+        assert n_modified >= 1, "Should modify at least 1 head"
+
+        W_new = attn.o_proj.weight.data
+        # Head 0 columns (targeted) should be modified
+        assert not torch.allclose(
+            W_new[:, 0:head_dim_attn], W_orig[:, 0:head_dim_attn]
+        ), "Targeted head 0 should be modified"
+
+        # Head 3 columns (NOT targeted) should be untouched
+        assert torch.allclose(
+            W_new[:, 3*head_dim_attn:4*head_dim_attn],
+            W_orig[:, 3*head_dim_attn:4*head_dim_attn],
+        ), "Non-targeted head 3 should be untouched"
+
+    def test_head_surgery_gqa_norm_preserve(self):
+        """Head surgery on GQA non-square o_proj with norm_preserve."""
+        hidden_dim = 12
+        attn_dim = 32
+        n_heads = 4
+        head_dim_attn = attn_dim // n_heads
+
+        class FakeAttnGQA(torch.nn.Module):
+            def __init__(self):
+                super().__init__()
+                self.o_proj = torch.nn.Linear(attn_dim, hidden_dim, bias=False)
+
+        attn = FakeAttnGQA()
+        torch.manual_seed(42)
+        attn.o_proj.weight.data = torch.randn(hidden_dim, attn_dim)
+
+        d = torch.randn(hidden_dim, 1)
+        d = d / d.norm()
+
+        orig_norms = [
+            attn.o_proj.weight.data[:, h*head_dim_attn:(h+1)*head_dim_attn].norm().item()
+            for h in range(n_heads)
+        ]
+
+        head_scores = [(0, 5.0), (1, 3.0), (2, 1.0), (3, 0.5)]
+        AbliterationPipeline._project_head_selective(
+            attn, d, head_scores, n_heads=n_heads,
+            head_fraction=0.5, norm_preserve=True,
+        )
+
+        for h in range(2):  # top 50% = 2 heads
+            new_norm = attn.o_proj.weight.data[:, h*head_dim_attn:(h+1)*head_dim_attn].norm().item()
+            assert abs(orig_norms[h] - new_norm) < 1e-3, \
+                f"GQA head {h} norm not preserved: {orig_norms[h]:.4f} vs {new_norm:.4f}"
+
+
+# ---------------------------------------------------------------------------
+# SOTA technique #6: SAE feature-level abliteration
+# ---------------------------------------------------------------------------
+
+class TestSAEAbliteration:
+    def test_sae_train_and_reconstruct(self):
+        """SAE should train and reconstruct activations."""
+        from obliteratus.analysis.sae_abliteration import train_sae
+
+        hidden = 32
+        # Generate synthetic activations
+        torch.manual_seed(42)
+        acts = [torch.randn(hidden) for _ in range(64)]
+
+        sae = train_sae(acts, hidden, expansion=2, n_epochs=10, lr=1e-3)
+
+        # Forward pass should work
+        x = torch.randn(1, hidden)
+        x_hat, z = sae(x)
+        assert x_hat.shape == x.shape
+        assert z.shape == (1, 2 * hidden)  # expansion=2
+
+        # Z should be sparse (ReLU activation)
+        assert (z == 0).float().mean() > 0.3, "Features should be sparse"
+
+    def test_refusal_feature_identification(self):
+        """SAE should identify features that differ between harmful/harmless."""
+        from obliteratus.analysis.sae_abliteration import (
+            train_sae, identify_refusal_features,
+        )
+
+        hidden = 32
+        torch.manual_seed(42)
+
+        # Create activations with clear harmful/harmless separation
+        refusal_dir = torch.randn(hidden)
+        refusal_dir = refusal_dir / refusal_dir.norm()
+
+        harmful_acts = [torch.randn(hidden) + 2.0 * refusal_dir for _ in range(32)]
+        harmless_acts = [torch.randn(hidden) - 2.0 * refusal_dir for _ in range(32)]
+        all_acts = harmful_acts + harmless_acts
+
+        sae = train_sae(all_acts, hidden, expansion=2, n_epochs=30, lr=3e-4)
+        result = identify_refusal_features(
+            sae, harmful_acts, harmless_acts, layer_idx=0, top_k=4,
+        )
+
+        assert result.n_refusal_features == 4
+        assert result.sae_directions.shape == (4, hidden)
+        assert result.variance_explained > 0.0
+        # SAE directions should have some alignment with the actual refusal direction
+        best_cos = max(
+            abs((result.sae_directions[i] @ refusal_dir).item())
+            for i in range(result.sae_directions.shape[0])
+        )
+        assert best_cos > 0.1, f"SAE should find direction aligned with refusal: best_cos={best_cos}"
+
+    def test_sae_directions_unit_norm(self):
+        """SAE-derived directions should be unit normalized."""
+        from obliteratus.analysis.sae_abliteration import (
+            train_sae, identify_refusal_features,
+        )
+
+        hidden = 16
+        torch.manual_seed(42)
+        harmful = [torch.randn(hidden) + torch.ones(hidden) for _ in range(16)]
+        harmless = [torch.randn(hidden) - torch.ones(hidden) for _ in range(16)]
+
+        sae = train_sae(harmful + harmless, hidden, expansion=2, n_epochs=10)
+        result = identify_refusal_features(sae, harmful, harmless, 0, top_k=3)
+
+        for i in range(result.sae_directions.shape[0]):
+            norm = result.sae_directions[i].norm().item()
+            assert abs(norm - 1.0) < 1e-3, f"Direction {i} norm={norm}, expected 1.0"
+
+
+# ---------------------------------------------------------------------------
+# Surgical method preset
+# ---------------------------------------------------------------------------
+
+class TestSurgicalMethod:
+    def test_surgical_enables_all_sota(self):
+        """Surgical method should enable all 6 SOTA techniques."""
+        cfg = METHODS["surgical"]
+        assert cfg["use_jailbreak_contrast"] is True
+        assert cfg["layer_adaptive_strength"] is True
+        assert cfg["safety_neuron_masking"] is True
+        assert cfg["per_expert_directions"] is True
+        assert cfg["attention_head_surgery"] is True
+        assert cfg["use_sae_features"] is True
+
+    def test_basic_disables_all_sota(self):
+        """Basic method should not enable SOTA techniques (no keys or False)."""
+        cfg = METHODS["basic"]
+        assert cfg.get("use_jailbreak_contrast", False) is False
+        assert cfg.get("layer_adaptive_strength", False) is False
+        assert cfg.get("safety_neuron_masking", False) is False
+
+    def test_pipeline_init_surgical(self):
+        """Pipeline initialized with surgical method should have all flags set."""
+        pipeline = AbliterationPipeline(model_name="test", method="surgical")
+        assert pipeline.use_jailbreak_contrast is True
+        assert pipeline.layer_adaptive_strength is True
+        assert pipeline.safety_neuron_masking is True
+        assert pipeline.per_expert_directions is True
+        assert pipeline.attention_head_surgery is True
+        assert pipeline.use_sae_features is True
+
+    def test_pipeline_init_explicit_override(self):
+        """Explicit params should override method defaults."""
+        pipeline = AbliterationPipeline(
+            model_name="test", method="surgical",
+            safety_neuron_masking=False,
+        )
+        assert pipeline.safety_neuron_masking is False
+        assert pipeline.use_jailbreak_contrast is True  # rest still from surgical
+
+
+# ---------------------------------------------------------------------------
+# Inverted method (semantic refusal inversion)
+# ---------------------------------------------------------------------------
+
+class TestInvertedMethod:
+    def test_inverted_preset_config(self):
+        """Inverted method preset should enable inversion flag."""
+        cfg = METHODS["inverted"]
+        assert cfg["invert_refusal"] is True
+        assert cfg["n_directions"] == 8
+        assert cfg["use_jailbreak_contrast"] is True
+
+    def test_surgical_does_not_invert(self):
+        """Surgical method should NOT enable inversion by default."""
+        cfg = METHODS["surgical"]
+        assert cfg.get("invert_refusal", False) is False
+
+    def test_pipeline_init_inverted(self):
+        """Pipeline initialized with inverted method should have flag set."""
+        pipeline = AbliterationPipeline(model_name="test", method="inverted")
+        assert pipeline.invert_refusal is True
+        assert pipeline.use_jailbreak_contrast is True
+        assert pipeline.safety_neuron_masking is False  # zeroing + reflection is destructive
+
+    def test_pipeline_invert_explicit_override(self):
+        """Explicit invert_refusal param should override method default."""
+        pipeline = AbliterationPipeline(
+            model_name="test", method="surgical", invert_refusal=True,
+        )
+        assert pipeline.invert_refusal is True
+
+        pipeline2 = AbliterationPipeline(
+            model_name="test", method="inverted", invert_refusal=False,
+        )
+        assert pipeline2.invert_refusal is False
+
+    def test_reflection_math(self):
+        """2x projection (reflection) should negate the refusal component."""
+        hidden = 16
+
+        class Wrapper(torch.nn.Module):
+            def __init__(self):
+                super().__init__()
+                self.o_proj = torch.nn.Linear(hidden, 32, bias=False)
+
+        module = Wrapper()
+        torch.manual_seed(42)
+        W_orig = module.o_proj.weight.data.clone()
+
+        d = torch.randn(hidden, 1)
+        d = d / d.norm()
+
+        # Original projection onto d
+        orig_proj = (W_orig @ d).squeeze()
+
+        # Reflection: regularization=-1.0 → scale=2.0
+        AbliterationPipeline._project_out_advanced(
+            module, d, ["o_proj"], regularization=-1.0,
+        )
+
+        W_reflected = module.o_proj.weight.data
+        new_proj = (W_reflected @ d).squeeze()
+
+        # After reflection, projection should be NEGATED (sign flipped)
+        assert torch.allclose(new_proj, -orig_proj, atol=1e-4), (
+            f"Reflected projection should be negated: expected ~{-orig_proj[:3]} got {new_proj[:3]}"
+        )
+
+    def test_reflection_preserves_orthogonal_component(self):
+        """Reflection should not change the component perpendicular to d."""
+        hidden = 8
+
+        class Wrapper(torch.nn.Module):
+            def __init__(self):
+                super().__init__()
+                self.o_proj = torch.nn.Linear(hidden, 16, bias=False)
+
+        module = Wrapper()
+        torch.manual_seed(42)
+        W_orig = module.o_proj.weight.data.clone()
+
+        d = torch.randn(hidden, 1)
+        d = d / d.norm()
+
+        # Compute original orthogonal component
+        orig_d_component = (W_orig @ d) @ d.T  # rank-1 matrix: projection onto d
+        orig_ortho = W_orig - orig_d_component  # everything except d-component
+
+        AbliterationPipeline._project_out_advanced(
+            module, d, ["o_proj"], regularization=-1.0,
+        )
+
+        W_reflected = module.o_proj.weight.data
+        new_d_component = (W_reflected @ d) @ d.T
+        new_ortho = W_reflected - new_d_component
+
+        # Orthogonal component should be unchanged
+        assert torch.allclose(orig_ortho, new_ortho, atol=1e-4), (
+            "Reflection should preserve orthogonal component"
+        )
+
+    def test_moe_expert_safety_classification(self):
+        """_identify_safety_experts should classify experts by router affinity."""
+        hidden = 16
+        n_experts = 4
+
+        class FakeMoE(torch.nn.Module):
+            def __init__(self):
+                super().__init__()
+                self.gate = torch.nn.Linear(hidden, n_experts, bias=False)
+                self.experts = torch.nn.ModuleList([
+                    torch.nn.Linear(hidden, hidden) for _ in range(n_experts)
+                ])
+
+        class FakeLayer(torch.nn.Module):
+            def __init__(self):
+                super().__init__()
+                self.self_attn = torch.nn.Module()
+                self.self_attn.o_proj = torch.nn.Linear(hidden, hidden, bias=False)
+                self.mlp = FakeMoE()
+
+        from obliteratus.models.loader import ModelHandle
+        from unittest.mock import MagicMock
+        from transformers import GPT2Config
+
+        config = GPT2Config(n_embd=hidden, n_head=2, n_layer=1, vocab_size=100, n_positions=64)
+        model = MagicMock()
+        model.parameters.return_value = iter([torch.zeros(1)])
+
+        handle = ModelHandle(
+            model=model, tokenizer=MagicMock(),
+            config=config, model_name="test", task="causal_lm",
+        )
+
+        pipeline = AbliterationPipeline(model_name="test", method="inverted")
+        pipeline.handle = handle
+        pipeline._on_log = lambda m: None
+        pipeline._on_stage = lambda r: None
+
+        # Set up fake layer and direction
+        layer = FakeLayer()
+        torch.manual_seed(42)
+
+        # Make router weight so expert 0 has highest affinity for d
+        d = torch.randn(hidden)
+        d = d / d.norm()
+        # Set router weights: expert 0 aligned with d, expert 3 anti-aligned
+        layer.mlp.gate.weight.data[0] = d * 5.0
+        layer.mlp.gate.weight.data[1] = d * 1.0
+        layer.mlp.gate.weight.data[2] = d * -1.0
+        layer.mlp.gate.weight.data[3] = d * -5.0
+
+        # Mock get_layer_modules to return our fake layer
+        import obliteratus.abliterate as abl_module
+        orig_get_layers = abl_module.get_layer_modules
+        orig_get_ffn = abl_module.get_ffn_module
+        abl_module.get_layer_modules = lambda h: [layer]
+        abl_module.get_ffn_module = lambda lay, a: lay.mlp
+        try:
+            pipeline.refusal_directions = {0: d}
+            pipeline._strong_layers = [0]
+            pipeline._identify_safety_experts()
+        finally:
+            abl_module.get_layer_modules = orig_get_layers
+            abl_module.get_ffn_module = orig_get_ffn
+
+        assert 0 in pipeline._expert_safety_scores
+        scores = pipeline._expert_safety_scores[0]
+        # Expert 0 should be highest safety affinity
+        assert scores[0][0] == 0, f"Expert 0 should be top safety, got {scores[0]}"
+        # Expert 3 should be lowest
+        assert scores[-1][0] == 3, f"Expert 3 should be lowest, got {scores[-1]}"
+
+    def test_moe_inverted_excision_selective(self):
+        """Inverted MoE excision should reflect safety experts and remove from capability."""
+        hidden = 16
+        n_experts = 4
+
+        class FakeExpert(torch.nn.Module):
+            def __init__(self):
+                super().__init__()
+                self.down_proj = torch.nn.Linear(hidden, hidden, bias=False)
+
+        class FakeMoE(torch.nn.Module):
+            def __init__(self):
+                super().__init__()
+                self.gate = torch.nn.Linear(hidden, n_experts, bias=False)
+                self.experts = torch.nn.ModuleList([FakeExpert() for _ in range(n_experts)])
+
+        moe = FakeMoE()
+        torch.manual_seed(42)
+        for p in moe.parameters():
+            p.data = torch.randn_like(p.data)
+
+        d = torch.randn(hidden, 1)
+        d = d / d.norm()
+
+        # Set up safety scores: experts 0,1 are safety, 2,3 are capability
+        pipeline = AbliterationPipeline(model_name="test", method="inverted")
+        pipeline._on_log = lambda m: None
+        pipeline._on_stage = lambda r: None
+        pipeline._expert_safety_scores = {
+            0: [(0, 5.0), (1, 3.0), (2, -1.0), (3, -3.0)]
+        }
+
+        orig_router = moe.gate.weight.data.clone()
+
+        count = pipeline._project_moe_experts_inverted(
+            moe, d, 0, norm_preserve=False, project_biases=False,
+        )
+
+        assert count > 0, "Should project some weights"
+
+        # Router should be reflected (capped at 1.5x to prevent extreme logits
+        # that cause CUDA illegal memory access in batched expert forward).
+        # With router_reg = max(reflect_reg, -0.5) → scale = 1.5:
+        #   new_proj ≈ orig_proj - 1.5 * orig_proj = -0.5 * orig_proj
+        # Additionally, _stabilize_router_weights clamps outliers, so we
+        # verify the sign is flipped and magnitude is substantial.
+        router_proj = (moe.gate.weight.data @ d.squeeze()).squeeze()
+        orig_router_proj = (orig_router @ d.squeeze()).squeeze()
+        cosine = torch.nn.functional.cosine_similarity(
+            router_proj.unsqueeze(0), -orig_router_proj.unsqueeze(0),
+        )
+        assert cosine > 0.5, (
+            f"Router projection should be at least partially reflected, cosine={cosine.item():.3f}"
+        )
+
+        # Safety expert 0: should be reflected (projection negated)
+        e0_proj = (moe.experts[0].down_proj.weight.data @ d).norm()
+        # After reflection the projection doesn't go to zero — it negates
+        assert e0_proj > 1e-4, "Safety expert should have non-zero projection (reflected, not removed)"
+
+        # Capability expert 3: should have projection removed (near zero)
+        e3_proj = (moe.experts[3].down_proj.weight.data @ d).norm().item()
+        assert e3_proj < 1e-3, f"Capability expert should have projection removed, got {e3_proj}"
+
+
+# ---------------------------------------------------------------------------
+# Nuclear method
+# ---------------------------------------------------------------------------
+
+class TestNuclearMethod:
+    def test_nuclear_preset_config(self):
+        """Nuclear method should match inverted baseline + permanent weight techniques."""
+        cfg = METHODS["nuclear"]
+        assert cfg["invert_refusal"] is True
+        assert cfg["n_directions"] == 4  # fewer than inverted to avoid over-ablation
+        assert cfg["refinement_passes"] == 2  # same as inverted
+        assert cfg["reflection_strength"] == 1.25  # tempered for CoT coherence
+        assert cfg["project_embeddings"] is True
+        assert cfg["embed_regularization"] == 0.50  # conservative cascade limit
+        assert cfg["activation_steering"] is True  # residual cleanup hooks
+        assert cfg["steering_strength"] == 0.15  # light residual correction
+        assert cfg["expert_transplant"] is True
+        assert cfg["transplant_blend"] == 0.10  # gentle nudge, not overwrite
+        assert cfg["use_jailbreak_contrast"] is True
+        assert cfg["attention_head_surgery"] is True
+        assert cfg["layer_adaptive_strength"] is True  # per-layer scaling
+
+    def test_nuclear_pipeline_init(self):
+        """Pipeline initialized with nuclear method should have all flags set."""
+        pipeline = AbliterationPipeline(model_name="test", method="nuclear")
+        assert pipeline.invert_refusal is True
+        assert pipeline.reflection_strength == 1.25
+        assert pipeline.embed_regularization == 0.50
+        assert pipeline.transplant_blend == 0.10
+        assert pipeline.project_embeddings is True
+        assert pipeline.activation_steering is True  # residual cleanup
+        assert pipeline.expert_transplant is True
+        assert pipeline.n_directions == 4
+        assert pipeline.refinement_passes == 2
+        assert pipeline.layer_adaptive_strength is True
+
+    def test_reflection_strength_configurable(self):
+        """reflection_strength should be explicitly overridable."""
+        pipeline = AbliterationPipeline(
+            model_name="test", method="inverted", reflection_strength=3.0,
+        )
+        assert pipeline.reflection_strength == 3.0
+
+    def test_inverted_default_strength_is_2(self):
+        """Inverted method should default to reflection_strength=2.0."""
+        pipeline = AbliterationPipeline(model_name="test", method="inverted")
+        assert pipeline.reflection_strength == 2.0
+
+    def test_boosted_reflection_math(self):
+        """2.5x reflection should produce stronger negation than 2x."""
+        hidden = 16
+
+        class Wrapper(torch.nn.Module):
+            def __init__(self):
+                super().__init__()
+                self.o_proj = torch.nn.Linear(hidden, 32, bias=False)
+
+        d = torch.randn(hidden, 1)
+        d = d / d.norm()
+
+        # 2x reflection
+        module_2x = Wrapper()
+        torch.manual_seed(42)
+        module_2x.o_proj.weight.data = torch.randn(32, hidden)
+        orig = module_2x.o_proj.weight.data.clone()
+        AbliterationPipeline._project_out_advanced(
+            module_2x, d, ["o_proj"], regularization=-1.0,  # scale=2.0
+        )
+        proj_2x = (module_2x.o_proj.weight.data @ d).squeeze()
+
+        # 2.5x reflection
+        module_25x = Wrapper()
+        module_25x.o_proj.weight.data = orig.clone()
+        AbliterationPipeline._project_out_advanced(
+            module_25x, d, ["o_proj"], regularization=-1.5,  # scale=2.5
+        )
+        proj_25x = (module_25x.o_proj.weight.data @ d).squeeze()
+
+        # 2.5x should be 25% stronger negation than 2x
+        assert proj_25x.norm() > proj_2x.norm(), (
+            "2.5x reflection should produce stronger (more negative) projection than 2x"
+        )
+
+    def test_activation_steering_hook(self):
+        """Steering hooks should subtract refusal direction from hidden states."""
+        hidden = 8
+
+        class FakeLayer(torch.nn.Module):
+            def forward(self, x):
+                return x
+
+        layer = FakeLayer()
+        layers = torch.nn.ModuleList([layer])
+
+        # Explicitly enable steering (nuclear preset has it off by default)
+        pipeline = AbliterationPipeline(
+            model_name="test", method="inverted", activation_steering=True,
+            steering_strength=0.5,
+        )
+        pipeline._on_log = lambda m: None
+        pipeline._on_stage = lambda r: None
+
+        d = torch.randn(hidden)
+        d = d / d.norm()
+        pipeline.refusal_directions = {0: d}
+        pipeline._strong_layers = [0]
+
+        n_hooks = pipeline._install_activation_steering(layers)
+        assert n_hooks == 1
+        assert len(pipeline._steering_hooks) == 1
+
+        # Create a hidden state with strong refusal component
+        batch = torch.randn(1, 4, hidden)
+        refusal_component = 5.0 * d.unsqueeze(0).unsqueeze(0).expand_as(batch)
+        input_hidden = batch + refusal_component
+
+        # Run through the layer (hook should fire)
+        output = layer(input_hidden)
+
+        # The refusal component should be reduced
+        proj_before = torch.einsum("bsh,h->bs", input_hidden, d).abs().mean()
+        proj_after = torch.einsum("bsh,h->bs", output, d).abs().mean()
+        assert proj_after < proj_before, (
+            f"Steering should reduce refusal projection: before={proj_before:.3f}, after={proj_after:.3f}"
+        )
+
+        # Cleanup
+        for hook in pipeline._steering_hooks:
+            hook.remove()
+
+    def test_expert_transplant(self):
+        """Expert transplant should overwrite safety expert weights with capability average."""
+        hidden = 16
+        n_experts = 4
+
+        class FakeExpert(torch.nn.Module):
+            def __init__(self):
+                super().__init__()
+                self.down_proj = torch.nn.Linear(hidden, hidden, bias=False)
+
+        class FakeMoE(torch.nn.Module):
+            def __init__(self):
+                super().__init__()
+                self.gate = torch.nn.Linear(hidden, n_experts, bias=False)
+                self.experts = torch.nn.ModuleList([FakeExpert() for _ in range(n_experts)])
+
+        class FakeLayer(torch.nn.Module):
+            def __init__(self):
+                super().__init__()
+                self.self_attn = torch.nn.Module()
+                self.self_attn.o_proj = torch.nn.Linear(hidden, hidden, bias=False)
+                self.mlp = FakeMoE()
+
+        layer = FakeLayer()
+        layers = torch.nn.ModuleList([layer])
+        torch.manual_seed(42)
+        for p in layer.parameters():
+            p.data = torch.randn_like(p.data)
+
+        # Save original safety expert weight
+        orig_safety0 = layer.mlp.experts[0].down_proj.weight.data.clone()
+        # Save capability expert weights for computing expected mean
+        # With top-third classification (n_experts // 3 = 1), only expert 0
+        # is safety; experts 1, 2, 3 are all capability.
+        cap1 = layer.mlp.experts[1].down_proj.weight.data.clone()
+        cap2 = layer.mlp.experts[2].down_proj.weight.data.clone()
+        cap3 = layer.mlp.experts[3].down_proj.weight.data.clone()
+        expected_mean = (cap1 + cap2 + cap3) / 3.0
+
+        import obliteratus.abliterate as abl_module
+        from obliteratus.models.loader import ModelHandle
+        from transformers import GPT2Config
+
+        config = GPT2Config(n_embd=hidden, n_head=2, n_layer=1, vocab_size=100, n_positions=64)
+        model = MagicMock()
+        model.parameters.return_value = iter([torch.zeros(1)])
+        handle = ModelHandle(model=model, tokenizer=MagicMock(), config=config, model_name="test", task="causal_lm")
+
+        pipeline = AbliterationPipeline(model_name="test", method="nuclear")
+        pipeline.handle = handle
+        pipeline._on_log = lambda m: None
+        pipeline._on_stage = lambda r: None
+        pipeline._strong_layers = [0]
+        # Experts 0,1 are safety (high affinity), 2,3 are capability
+        pipeline._expert_safety_scores = {
+            0: [(0, 5.0), (1, 3.0), (2, -1.0), (3, -3.0)]
+        }
+
+        orig_get_ffn = abl_module.get_ffn_module
+        abl_module.get_ffn_module = lambda lay, a: lay.mlp
+        try:
+            count = pipeline._transplant_expert_weights(layers)
+        finally:
+            abl_module.get_ffn_module = orig_get_ffn
+
+        assert count >= 1, f"Should blend at least 1 weight (top-third safety expert), got {count}"
+
+        # Safety expert 0 should be a 10% blend toward capability mean
+        # (nuclear default transplant_blend=0.10)
+        # new = 0.90 * original + 0.10 * capability_mean
+        blend = pipeline.transplant_blend  # 0.10
+        expected_blend = (1.0 - blend) * orig_safety0 + blend * expected_mean
+        transplanted = layer.mlp.experts[0].down_proj.weight.data
+        assert torch.allclose(transplanted, expected_blend, atol=1e-4), (
+            f"Safety expert weight should be {blend:.0%} blended toward capability mean"
+        )
+
+        # Capability expert 2 should be unchanged
+        assert torch.allclose(layer.mlp.experts[2].down_proj.weight.data, cap2, atol=1e-6), (
+            "Capability expert should be unchanged"
+        )
+
+    def test_gather_state_dict_raises_on_missing_offload(self):
+        """Should raise RuntimeError (not silently corrupt) when offload dir is missing."""
+        from obliteratus.models.loader import ModelHandle
+        from transformers import GPT2Config
+
+        config = GPT2Config(n_embd=8, n_head=2, n_layer=1, vocab_size=100, n_positions=64)
+
+        # Create a fake model whose state_dict returns a meta tensor
+        fake_model = MagicMock()
+        meta_tensor = torch.empty(4, 8, device="meta")
+        fake_model.state_dict.return_value = {"layer.weight": meta_tensor}
+
+        handle = ModelHandle(
+            model=fake_model, tokenizer=MagicMock(), config=config,
+            model_name="test", task="causal_lm",
+        )
+        handle._offload_dir = "/nonexistent/path"
+
+        pipeline = AbliterationPipeline(model_name="test", method="nuclear")
+        pipeline.handle = handle
+        pipeline._on_log = lambda m: None
+        pipeline._on_stage = lambda r: None
+
+        with pytest.raises(RuntimeError, match="bricked checkpoint"):
+            pipeline._gather_state_dict()
+
+
+# ---------------------------------------------------------------------------
+# Knee detection
+# ---------------------------------------------------------------------------
+
+class TestKneeDetection:
+    def test_empty_input(self):
+        result = AbliterationPipeline._select_layers_knee([])
+        assert result == []
+
+    def test_two_layers(self):
+        result = AbliterationPipeline._select_layers_knee([(0, 5.0), (1, 3.0)])
+        assert set(result) == {0, 1}
+
+    def test_clear_knee(self):
+        """Layers with a sharp dropoff should be separated by knee detection."""
+        sorted_layers = [
+            (14, 10.0), (15, 9.5), (13, 9.0),  # strong cluster
+            (16, 2.0), (12, 1.5), (17, 1.0), (11, 0.5), (18, 0.2), (10, 0.1),
+        ]
+        result = AbliterationPipeline._select_layers_knee(sorted_layers)
+        # Should select the strong cluster (layers 14, 15, 13) and exclude weak ones
+        assert 14 in result
+        assert 15 in result
+        assert 13 in result
+        assert len(result) <= 5  # shouldn't select all 9
+
+    def test_minimum_threshold_filters_noise(self):
+        """Layers below 10% of max should be filtered out."""
+        sorted_layers = [(0, 10.0), (1, 0.5)]  # 0.5 is 5% of 10
+        result = AbliterationPipeline._select_layers_knee(sorted_layers)
+        # Layer 1 is below 10% threshold
+        assert 0 in result
+
+    def test_all_equal_norms(self):
+        """When all norms are equal, should select all (or most)."""
+        sorted_layers = [(i, 5.0) for i in range(5)]
+        result = AbliterationPipeline._select_layers_knee(sorted_layers)
+        assert len(result) >= 1
+
+
+# ---------------------------------------------------------------------------
+# Activation collection
+# ---------------------------------------------------------------------------
+
+class TestActivationCollection:
+    def test_collect_activations(self, handle):
+        """Test that activation collection returns correct structure."""
+        from obliteratus.strategies.utils import get_layer_modules
+
+        pipeline = AbliterationPipeline(model_name="test")
+        pipeline.handle = handle
+        pipeline._on_log = lambda m: None
+
+        layers = get_layer_modules(handle)
+        prompts = ["Hello world", "Test prompt"]
+
+        handle.tokenizer.return_value = {
+            "input_ids": torch.randint(0, 1000, (1, 5)),
+            "attention_mask": torch.ones(1, 5, dtype=torch.long),
+        }
+
+        activations = pipeline._collect_activations(layers, prompts, "test")
+
+        assert len(activations) == len(layers)
+        for idx in range(len(layers)):
+            assert len(activations[idx]) == len(prompts)
+            for act in activations[idx]:
+                assert act.device == torch.device("cpu")
+                assert act.shape[-1] == handle.hidden_size
+
+
+# ---------------------------------------------------------------------------
+# Distill: single direction (basic method)
+# ---------------------------------------------------------------------------
+
+class TestDistillBasic:
+    def test_single_direction(self, handle):
+        """Basic method: single refusal direction via difference-in-means."""
+        from obliteratus.strategies.utils import get_layer_modules
+
+        pipeline = AbliterationPipeline(
+            model_name="test",
+            method="basic",
+            harmful_prompts=["bad prompt"],
+            harmless_prompts=["good prompt"],
+        )
+        pipeline.handle = handle
+        pipeline._on_log = lambda m: None
+        pipeline._on_stage = lambda r: None
+        _make_varied_tokenizer(handle)
+
+        pipeline._probe()
+        pipeline._distill()
+
+        n_layers = len(get_layer_modules(handle))
+        assert len(pipeline.refusal_directions) == n_layers
+        for idx, direction in pipeline.refusal_directions.items():
+            assert abs(direction.norm().item() - 1.0) < 1e-4
+            # Single direction: subspace should be (1, hidden_dim)
+            assert pipeline.refusal_subspaces[idx].shape[0] == 1
+
+
+# ---------------------------------------------------------------------------
+# Distill: multi-direction SVD (advanced/aggressive method)
+# ---------------------------------------------------------------------------
+
+class TestDistillSVD:
+    def test_multi_direction_svd(self, handle):
+        """Advanced method: SVD extracts multiple refusal directions.
+
+        Note: on small models (hidden_size < 2048 or < 2B params), n_directions
+        is automatically capped to 2 to prevent over-ablation.  The test model
+        (hidden_size=64, 4 layers) triggers this safeguard.
+        """
+        from obliteratus.strategies.utils import get_layer_modules
+
+        pipeline = AbliterationPipeline(
+            model_name="test",
+            method="advanced",
+            harmful_prompts=["bad1", "bad2", "bad3", "bad4", "bad5"],
+            harmless_prompts=["good1", "good2", "good3", "good4", "good5"],
+        )
+        pipeline.handle = handle
+        pipeline._on_log = lambda m: None
+        pipeline._on_stage = lambda r: None
+        _make_varied_tokenizer(handle)
+
+        pipeline._probe()
+        pipeline._distill()
+
+        n_layers = len(get_layer_modules(handle))
+        assert len(pipeline.refusal_subspaces) == n_layers
+        # Small-model cap: n_directions capped to 2 for tiny test model
+        expected_dirs = min(2, pipeline.n_directions, 5, handle.hidden_size)
+        for idx, subspace in pipeline.refusal_subspaces.items():
+            assert subspace.shape[0] == expected_dirs
+            assert subspace.shape[1] == handle.hidden_size
+
+        # Primary direction should still be a unit vector
+        for idx, direction in pipeline.refusal_directions.items():
+            assert abs(direction.norm().item() - 1.0) < 1e-4
+
+
+# ---------------------------------------------------------------------------
+# Full pipeline: excise with different methods
+# ---------------------------------------------------------------------------
+
+class TestExcise:
+    def test_excise_basic(self, handle):
+        """Basic method should modify weights."""
+        from obliteratus.strategies.utils import get_layer_modules
+
+        pipeline = AbliterationPipeline(
+            model_name="test",
+            method="basic",
+            harmful_prompts=["bad prompt"],
+            harmless_prompts=["good prompt"],
+        )
+        pipeline.handle = handle
+        pipeline._on_log = lambda m: None
+        pipeline._on_stage = lambda r: None
+        _make_varied_tokenizer(handle)
+
+        layers = get_layer_modules(handle)
+        original_weights = {}
+        for idx in range(len(layers)):
+            for name, param in layers[idx].named_parameters():
+                original_weights[(idx, name)] = param.data.clone()
+
+        pipeline._probe()
+        pipeline._distill()
+        pipeline._excise()
+
+        any_changed = False
+        for idx in range(len(layers)):
+            for name, param in layers[idx].named_parameters():
+                if not torch.allclose(original_weights[(idx, name)], param.data, atol=1e-6):
+                    any_changed = True
+                    break
+
+        assert any_changed, "Excise should modify at least some weights"
+
+    def test_excise_advanced_norm_preserving(self, handle):
+        """Advanced method with norm preservation should maintain weight norms."""
+        from obliteratus.strategies.utils import get_layer_modules
+
+        pipeline = AbliterationPipeline(
+            model_name="test",
+            method="advanced",
+            harmful_prompts=["bad prompt"],
+            harmless_prompts=["good prompt"],
+        )
+        pipeline.handle = handle
+        pipeline._on_log = lambda m: None
+        pipeline._on_stage = lambda r: None
+        _make_varied_tokenizer(handle)
+
+        get_layer_modules(handle)
+
+        pipeline._probe()
+        pipeline._distill()
+        pipeline._excise()
+
+        # Weights should have been modified (advanced uses _project_out_advanced)
+        assert len(pipeline._strong_layers) > 0
+
+
+# ---------------------------------------------------------------------------
+# Rebirth (save)
+# ---------------------------------------------------------------------------
+
+class TestRebirth:
+    def test_rebirth_saves_metadata(self, handle, tmp_path):
+        """Rebirth should save model and comprehensive metadata JSON."""
+        pipeline = AbliterationPipeline(
+            model_name="test-model",
+            output_dir=str(tmp_path / "output"),
+            method="advanced",
+        )
+        pipeline.handle = handle
+        pipeline._on_log = lambda m: None
+        pipeline._on_stage = lambda r: None
+        pipeline._strong_layers = [0]
+        pipeline._quality_metrics = {"perplexity": 8.5, "coherence": 1.0}
+
+        handle.model.save_pretrained = MagicMock()
+        handle.tokenizer.save_pretrained = MagicMock()
+
+        result_path = pipeline._rebirth()
+
+        assert result_path == tmp_path / "output"
+        assert (result_path / "abliteration_metadata.json").exists()
+
+        metadata = json.loads((result_path / "abliteration_metadata.json").read_text())
+        assert metadata["source_model"] == "test-model"
+        assert metadata["technique"] == "refusal_direction_ablation"
+        assert metadata["method"] == "advanced"
+        assert metadata["strong_layers"] == [0]
+        assert "method_config" in metadata
+        assert metadata["method_config"]["n_directions"] == METHODS["advanced"]["n_directions"]
+        assert metadata["method_config"]["norm_preserve"] is True
+        assert "references" in metadata
+        assert len(metadata["references"]) >= 3
+        assert "quality_metrics" in metadata
+        assert metadata["quality_metrics"]["perplexity"] == 8.5
+
+
+# ---------------------------------------------------------------------------
+# CLI integration
+# ---------------------------------------------------------------------------
+
+class TestCLI:
+    def test_abliterate_parser_with_method(self):
+        """Test that the abliterate subcommand parses method correctly."""
+        import argparse
+
+        parser = argparse.ArgumentParser()
+        subparsers = parser.add_subparsers(dest="command")
+        abl_parser = subparsers.add_parser("abliterate")
+        abl_parser.add_argument("model", type=str)
+        abl_parser.add_argument("--output-dir", type=str, default=None)
+        abl_parser.add_argument("--device", type=str, default="auto")
+        abl_parser.add_argument("--dtype", type=str, default="float16")
+        abl_parser.add_argument("--method", type=str, default="advanced",
+                                choices=["basic", "advanced", "aggressive"])
+        abl_parser.add_argument("--n-directions", type=int, default=None)
+        abl_parser.add_argument("--regularization", type=float, default=None)
+        abl_parser.add_argument("--refinement-passes", type=int, default=None)
+
+        args = parser.parse_args(["abliterate", "gpt2", "--method", "aggressive", "--n-directions", "6"])
+        assert args.command == "abliterate"
+        assert args.model == "gpt2"
+        assert args.method == "aggressive"
+        assert args.n_directions == 6
+        assert args.dtype == "float16"
+
+    def test_default_method(self):
+        """Default method should be advanced."""
+        import argparse
+
+        parser = argparse.ArgumentParser()
+        subparsers = parser.add_subparsers(dest="command")
+        abl_parser = subparsers.add_parser("abliterate")
+        abl_parser.add_argument("model", type=str)
+        abl_parser.add_argument("--method", type=str, default="advanced")
+
+        args = parser.parse_args(["abliterate", "gpt2"])
+        assert args.method == "advanced"
+
+
+# ---------------------------------------------------------------------------
+# Expert-Granular Abliteration (EGA)
+# ---------------------------------------------------------------------------
+
+class TestFindRouterModule:
+    """Test _find_router_module static method."""
+
+    def test_finds_gate(self):
+        """Should find a router named 'gate'."""
+        hidden = 16
+
+        class FakeMoE(torch.nn.Module):
+            def __init__(self):
+                super().__init__()
+                self.gate = torch.nn.Linear(hidden, 4, bias=False)
+                self.experts = torch.nn.ModuleList()
+
+        moe = FakeMoE()
+        router = AbliterationPipeline._find_router_module(moe)
+        assert router is moe.gate
+
+    def test_finds_router(self):
+        """Should find a router named 'router'."""
+        hidden = 16
+
+        class FakeMoE(torch.nn.Module):
+            def __init__(self):
+                super().__init__()
+                self.router = torch.nn.Linear(hidden, 4, bias=False)
+                self.experts = torch.nn.ModuleList()
+
+        moe = FakeMoE()
+        router = AbliterationPipeline._find_router_module(moe)
+        assert router is moe.router
+
+    def test_auto_detects_unknown_router(self):
+        """Should auto-detect a router with unusual name via heuristic."""
+        hidden = 16
+
+        class FakeMoE(torch.nn.Module):
+            def __init__(self):
+                super().__init__()
+                self.moe_gate_proj = torch.nn.Linear(hidden, 4, bias=False)
+                self.experts = torch.nn.ModuleList()
+
+        moe = FakeMoE()
+        router = AbliterationPipeline._find_router_module(moe)
+        assert router is moe.moe_gate_proj
+
+    def test_returns_none_no_router(self):
+        """Should return None when no router is found."""
+        class NoRouter(torch.nn.Module):
+            def __init__(self):
+                super().__init__()
+                self.linear = torch.nn.Linear(16, 16)
+
+        mod = NoRouter()
+        assert AbliterationPipeline._find_router_module(mod) is None
+
+
+class TestRouterProfilingHooks:
+    """Test _install_router_profiling_hooks."""
+
+    def _make_moe_pipeline_and_layers(self, hidden=16, n_experts=4):
+        """Create a pipeline with a fake MoE model for router profiling tests."""
+        from obliteratus.models.loader import ModelHandle
+        from transformers import GPT2Config
+
+        class FakeExpert(torch.nn.Module):
+            def __init__(self):
+                super().__init__()
+                self.down_proj = torch.nn.Linear(hidden, hidden, bias=False)
+
+        class FakeMoE(torch.nn.Module):
+            def __init__(self):
+                super().__init__()
+                self.gate = torch.nn.Linear(hidden, n_experts, bias=False)
+                self.experts = torch.nn.ModuleList([FakeExpert() for _ in range(n_experts)])
+
+            def forward(self, x):
+                return x
+
+        class FakeLayer(torch.nn.Module):
+            def __init__(self):
+                super().__init__()
+                self.self_attn = torch.nn.Module()
+                self.self_attn.o_proj = torch.nn.Linear(hidden, hidden, bias=False)
+                self.mlp = FakeMoE()
+
+            def forward(self, x):
+                return (x,)
+
+        config = GPT2Config(n_embd=hidden, n_head=2, n_layer=1, vocab_size=100, n_positions=64)
+        model = MagicMock()
+        model.parameters.return_value = iter([torch.zeros(1)])
+        handle = ModelHandle(model=model, tokenizer=MagicMock(), config=config, model_name="test", task="causal_lm")
+
+        pipeline = AbliterationPipeline(model_name="test", method="surgical")
+        pipeline.handle = handle
+        pipeline._on_log = lambda m: None
+        pipeline._on_stage = lambda r: None
+
+        layer = FakeLayer()
+        layers = torch.nn.ModuleList([layer])
+
+        # Monkey-patch get_ffn_module
+        import obliteratus.abliterate as abl_module
+        orig_get_ffn = abl_module.get_ffn_module
+        abl_module.get_ffn_module = lambda lay, a: lay.mlp
+
+        return pipeline, layers, layer, abl_module, orig_get_ffn
+
+    def test_hooks_installed(self):
+        """Should install hooks on MoE router modules."""
+        pipeline, layers, layer, abl_module, orig_get_ffn = self._make_moe_pipeline_and_layers()
+        try:
+            hooks = pipeline._install_router_profiling_hooks(layers)
+            assert len(hooks) == 1
+            assert 0 in pipeline._routing_harmful
+            assert 0 in pipeline._routing_harmless
+        finally:
+            for h in hooks:
+                h.remove()
+            abl_module.get_ffn_module = orig_get_ffn
+
+    def test_hooks_record_logits(self):
+        """Hooks should record router logits during forward passes."""
+        pipeline, layers, layer, abl_module, orig_get_ffn = self._make_moe_pipeline_and_layers()
+        try:
+            hooks = pipeline._install_router_profiling_hooks(layers)
+
+            # Simulate harmful forward pass
+            pipeline._routing_is_harmful = True
+            x = torch.randn(1, 5, 16)
+            layer.mlp.gate(x)  # triggers hook
+
+            assert len(pipeline._routing_harmful[0]) == 1
+            assert pipeline._routing_harmful[0][0].shape[0] == 4  # n_experts
+
+            # Simulate harmless forward pass
+            pipeline._routing_is_harmful = False
+            layer.mlp.gate(x)
+
+            assert len(pipeline._routing_harmless[0]) == 1
+        finally:
+            for h in hooks:
+                h.remove()
+            abl_module.get_ffn_module = orig_get_ffn
+
+    def test_no_handle_returns_empty(self):
+        """Should return empty list when handle is None."""
+        pipeline = AbliterationPipeline(model_name="test", method="surgical")
+        pipeline.handle = None
+        hooks = pipeline._install_router_profiling_hooks(torch.nn.ModuleList())
+        assert hooks == []
+
+
+class TestComputeExpertGranularDirections:
+    """Test _compute_expert_granular_directions."""
+
+    def test_computes_per_expert_directions(self):
+        """Should compute per-expert refusal directions from routing data."""
+        hidden = 16
+        n_experts = 4
+
+        pipeline = AbliterationPipeline(model_name="test", method="surgical")
+        pipeline._on_log = lambda m: None
+        pipeline._on_stage = lambda r: None
+        pipeline._strong_layers = [0]
+
+        torch.manual_seed(42)
+
+        # Simulate router logits: expert 0 favored for harmful, expert 3 for harmless
+        h_logits = []
+        s_logits = []
+        for _ in range(10):
+            hl = torch.randn(n_experts)
+            hl[0] += 2.0  # bias expert 0 for harmful
+            h_logits.append(hl)
+            sl = torch.randn(n_experts)
+            sl[3] += 2.0  # bias expert 3 for harmless
+            s_logits.append(sl)
+
+        pipeline._routing_harmful = {0: h_logits}
+        pipeline._routing_harmless = {0: s_logits}
+
+        # Simulate per-prompt activations with harmful/harmless separation
+        refusal_dir = torch.randn(hidden)
+        refusal_dir = refusal_dir / refusal_dir.norm()
+
+        h_acts = [torch.randn(hidden) + 1.5 * refusal_dir for _ in range(10)]
+        s_acts = [torch.randn(hidden) - 1.5 * refusal_dir for _ in range(10)]
+        pipeline._harmful_acts = {0: h_acts}
+        pipeline._harmless_acts = {0: s_acts}
+
+        pipeline._compute_expert_granular_directions()
+
+        # Should have computed expert directions for layer 0
+        assert 0 in pipeline._expert_directions
+        assert len(pipeline._expert_directions[0]) > 0
+
+        # Should have dynamic safety scores
+        assert 0 in pipeline._expert_safety_scores
+        scores = pipeline._expert_safety_scores[0]
+        assert len(scores) == n_experts
+        # Expert 0 should have higher safety score (more activated for harmful)
+        expert_0_score = next(s for eid, s in scores if eid == 0)
+        expert_3_score = next(s for eid, s in scores if eid == 3)
+        assert expert_0_score > expert_3_score, (
+            f"Expert 0 should have higher safety score: {expert_0_score} vs {expert_3_score}"
+        )
+
+    def test_directions_are_unit_vectors(self):
+        """Per-expert directions should be unit normalized."""
+        hidden = 16
+        n_experts = 4
+
+        pipeline = AbliterationPipeline(model_name="test", method="surgical")
+        pipeline._on_log = lambda m: None
+        pipeline._strong_layers = [0]
+
+        torch.manual_seed(42)
+        h_logits = [torch.randn(n_experts) for _ in range(10)]
+        s_logits = [torch.randn(n_experts) for _ in range(10)]
+        pipeline._routing_harmful = {0: h_logits}
+        pipeline._routing_harmless = {0: s_logits}
+        pipeline._harmful_acts = {0: [torch.randn(hidden) + torch.ones(hidden) for _ in range(10)]}
+        pipeline._harmless_acts = {0: [torch.randn(hidden) - torch.ones(hidden) for _ in range(10)]}
+
+        pipeline._compute_expert_granular_directions()
+
+        if 0 in pipeline._expert_directions:
+            for ei, d in pipeline._expert_directions[0].items():
+                assert abs(d.norm().item() - 1.0) < 1e-4, (
+                    f"Expert {ei} direction norm={d.norm().item()}, expected 1.0"
+                )
+
+    def test_skips_when_no_routing_data(self):
+        """Should skip gracefully when no routing data is available."""
+        pipeline = AbliterationPipeline(model_name="test", method="surgical")
+        pipeline._on_log = lambda m: None
+        pipeline._routing_harmful = {}
+        pipeline._routing_harmless = {}
+
+        pipeline._compute_expert_granular_directions()
+
+        assert len(pipeline._expert_directions) == 0
+
+    def test_skips_expert_with_low_routing_weight(self):
+        """Experts with insufficient routing weight should not get directions."""
+        hidden = 16
+
+        pipeline = AbliterationPipeline(model_name="test", method="surgical")
+        pipeline._on_log = lambda m: None
+        pipeline._strong_layers = [0]
+
+        # Create routing logits where expert 3 is never selected (very low)
+        h_logits = []
+        s_logits = []
+        for _ in range(3):
+            hl = torch.tensor([5.0, 5.0, 5.0, -100.0])  # expert 3 never routed
+            h_logits.append(hl)
+            sl = torch.tensor([5.0, 5.0, 5.0, -100.0])
+            s_logits.append(sl)
+
+        pipeline._routing_harmful = {0: h_logits}
+        pipeline._routing_harmless = {0: s_logits}
+
+        torch.manual_seed(42)
+        pipeline._harmful_acts = {0: [torch.randn(hidden) for _ in range(3)]}
+        pipeline._harmless_acts = {0: [torch.randn(hidden) for _ in range(3)]}
+
+        pipeline._compute_expert_granular_directions()
+
+        # Expert 3 should NOT have a direction (routing weight too low)
+        if 0 in pipeline._expert_directions:
+            assert 3 not in pipeline._expert_directions[0], (
+                "Expert with near-zero routing weight should not get a direction"
+            )
+
+
+class TestProjectMoEExpertsGranular:
+    """Test _project_moe_experts_granular (ModuleList path)."""
+
+    def _make_direction(self, hidden_dim=16):
+        d = torch.randn(hidden_dim, 1)
+        return d / d.norm()
+
+    def test_per_expert_directions_applied(self):
+        """Each expert should use its own direction when available."""
+        hidden = 16
+        n_experts = 4
+
+        class FakeExpert(torch.nn.Module):
+            def __init__(self):
+                super().__init__()
+                self.down_proj = torch.nn.Linear(hidden, 32, bias=False)
+                self.up_proj = torch.nn.Linear(hidden, 32, bias=False)
+
+        class FakeMoE(torch.nn.Module):
+            def __init__(self):
+                super().__init__()
+                self.gate = torch.nn.Linear(hidden, n_experts, bias=False)
+                self.experts = torch.nn.ModuleList([FakeExpert() for _ in range(n_experts)])
+
+        moe = FakeMoE()
+        torch.manual_seed(42)
+        for p in moe.parameters():
+            p.data = torch.randn_like(p.data)
+
+        shared_dir = self._make_direction(hidden)
+
+        # Create distinct per-expert directions
+        expert_dirs = {}
+        for ei in range(n_experts):
+            d = torch.randn(hidden)
+            d = d / d.norm()
+            expert_dirs[ei] = d
+
+        pipeline = AbliterationPipeline(model_name="test", method="surgical")
+        pipeline._on_log = lambda m: None
+        pipeline._expert_directions = {0: expert_dirs}
+
+        # Save originals
+        orig_weights = {
+            ei: moe.experts[ei].down_proj.weight.data.clone()
+            for ei in range(n_experts)
+        }
+
+        count = pipeline._project_moe_experts_granular(
+            moe, shared_dir, layer_idx=0,
+        )
+
+        assert count > 0, "Should project some weights"
+
+        # All experts should be modified
+        for ei in range(n_experts):
+            assert not torch.allclose(
+                moe.experts[ei].down_proj.weight.data, orig_weights[ei]
+            ), f"Expert {ei} should be modified"
+
+    def test_falls_back_to_shared_direction(self):
+        """Experts without per-expert direction should use shared direction."""
+        hidden = 16
+        n_experts = 4
+
+        class FakeExpert(torch.nn.Module):
+            def __init__(self):
+                super().__init__()
+                self.down_proj = torch.nn.Linear(hidden, 32, bias=False)
+                self.up_proj = torch.nn.Linear(hidden, 32, bias=False)
+
+        class FakeMoE(torch.nn.Module):
+            def __init__(self):
+                super().__init__()
+                self.gate = torch.nn.Linear(hidden, n_experts, bias=False)
+                self.experts = torch.nn.ModuleList([FakeExpert() for _ in range(n_experts)])
+
+        moe = FakeMoE()
+        torch.manual_seed(42)
+        for p in moe.parameters():
+            p.data = torch.randn_like(p.data)
+
+        shared_dir = self._make_direction(hidden)
+
+        # Only expert 0 has a per-expert direction
+        expert_dirs = {0: torch.randn(hidden).div_(torch.randn(hidden).norm())}
+        expert_dirs[0] = expert_dirs[0] / expert_dirs[0].norm()
+
+        pipeline = AbliterationPipeline(model_name="test", method="surgical")
+        pipeline._on_log = lambda m: None
+        pipeline._expert_directions = {0: expert_dirs}
+
+        orig_e1 = moe.experts[1].down_proj.weight.data.clone()
+
+        pipeline._project_moe_experts_granular(
+            moe, shared_dir, layer_idx=0,
+        )
+
+        # Experts 1,2,3 should be modified (using shared direction)
+        assert not torch.allclose(moe.experts[1].down_proj.weight.data, orig_e1), \
+            "Expert 1 should use shared direction fallback"
+
+    def test_router_uses_shared_direction(self):
+        """Router should always use the shared direction, not per-expert."""
+        hidden = 16
+        n_experts = 4
+
+        class FakeExpert(torch.nn.Module):
+            def __init__(self):
+                super().__init__()
+                self.down_proj = torch.nn.Linear(hidden, 32, bias=False)
+
+        class FakeMoE(torch.nn.Module):
+            def __init__(self):
+                super().__init__()
+                self.gate = torch.nn.Linear(hidden, n_experts, bias=False)
+                self.experts = torch.nn.ModuleList([FakeExpert() for _ in range(n_experts)])
+
+        moe = FakeMoE()
+        shared_dir = self._make_direction(hidden)
+
+        pipeline = AbliterationPipeline(model_name="test", method="surgical")
+        pipeline._on_log = lambda m: None
+        pipeline._expert_directions = {0: {0: torch.randn(hidden)}}
+
+        orig_gate = moe.gate.weight.data.clone()
+
+        pipeline._project_moe_experts_granular(moe, shared_dir, layer_idx=0)
+
+        # Gate should be projected
+        assert not torch.allclose(moe.gate.weight.data, orig_gate), \
+            "Router should be projected with shared direction"
+
+        # Gate's projection onto shared direction should be near zero
+        proj = (moe.gate.weight.data @ shared_dir).norm().item()
+        assert proj < 1e-4, f"Router should have shared dir removed, proj={proj}"
+
+    def test_shared_expert_uses_shared_direction(self):
+        """Shared expert should always use the shared direction."""
+        hidden = 16
+
+        class FakeExpert(torch.nn.Module):
+            def __init__(self):
+                super().__init__()
+                self.down_proj = torch.nn.Linear(hidden, 32, bias=False)
+                self.up_proj = torch.nn.Linear(hidden, 32, bias=False)
+
+        class FakeMoE(torch.nn.Module):
+            def __init__(self):
+                super().__init__()
+                self.gate = torch.nn.Linear(hidden, 2, bias=False)
+                self.shared_expert = torch.nn.Module()
+                self.shared_expert.down_proj = torch.nn.Linear(hidden, 32, bias=False)
+                self.shared_expert.up_proj = torch.nn.Linear(hidden, 32, bias=False)
+                self.experts = torch.nn.ModuleList([FakeExpert() for _ in range(2)])
+
+        moe = FakeMoE()
+        shared_dir = self._make_direction(hidden)
+
+        pipeline = AbliterationPipeline(model_name="test", method="surgical")
+        pipeline._on_log = lambda m: None
+        pipeline._expert_directions = {0: {0: torch.randn(hidden)}}
+
+        orig_shared = moe.shared_expert.down_proj.weight.data.clone()
+
+        pipeline._project_moe_experts_granular(moe, shared_dir, layer_idx=0)
+
+        assert not torch.allclose(moe.shared_expert.down_proj.weight.data, orig_shared), \
+            "Shared expert should be projected"
+
+
+class TestProjectFused3DGranular:
+    """Test _project_fused_3d_granular for fused 3D expert tensors."""
+
+    def test_per_expert_directions_on_fused(self):
+        """Each expert slice should use its own direction."""
+        hidden = 16
+        intermediate = 32
+        n_experts = 4
+
+        class FusedExperts(torch.nn.Module):
+            def __init__(self):
+                super().__init__()
+                self.down_proj = torch.nn.Parameter(torch.randn(n_experts, intermediate, hidden))
+
+        container = FusedExperts()
+        torch.manual_seed(42)
+
+        shared_dir = torch.randn(hidden, 1)
+        shared_dir = shared_dir / shared_dir.norm()
+
+        # Per-expert directions
+        expert_dirs = {}
+        for ei in range(n_experts):
+            d = torch.randn(hidden)
+            d = d / d.norm()
+            expert_dirs[ei] = d
+
+        orig_data = container.down_proj.data.clone()
+
+        count = AbliterationPipeline._project_fused_3d_granular(
+            container, shared_dir, expert_dirs, ["down_proj"],
+            norm_preserve=False, scale=1.0,
+        )
+
+        assert count == n_experts, f"Should project {n_experts} experts, got {count}"
+
+        # Each expert should be modified
+        for ei in range(n_experts):
+            assert not torch.allclose(
+                container.down_proj.data[ei], orig_data[ei]
+            ), f"Expert {ei} should be modified"
+
+    def test_fallback_to_shared_on_fused(self):
+        """Experts without per-expert direction should use shared direction."""
+        hidden = 16
+        intermediate = 32
+        n_experts = 4
+
+        class FusedExperts(torch.nn.Module):
+            def __init__(self):
+                super().__init__()
+                self.down_proj = torch.nn.Parameter(torch.randn(n_experts, intermediate, hidden))
+
+        container = FusedExperts()
+        torch.manual_seed(42)
+
+        shared_dir = torch.randn(hidden, 1)
+        shared_dir = shared_dir / shared_dir.norm()
+
+        # Only expert 0 has a direction
+        expert_dirs = {0: torch.randn(hidden).div_(1.0)}
+        expert_dirs[0] = expert_dirs[0] / expert_dirs[0].norm()
+
+        orig_data = container.down_proj.data.clone()
+
+        count = AbliterationPipeline._project_fused_3d_granular(
+            container, shared_dir, expert_dirs, ["down_proj"],
+            norm_preserve=False, scale=1.0,
+        )
+
+        assert count == n_experts
+        # All experts should be modified (experts 1-3 use shared dir)
+        for ei in range(n_experts):
+            assert not torch.allclose(
+                container.down_proj.data[ei], orig_data[ei]
+            ), f"Expert {ei} should be modified"
+
+    def test_norm_preserve_on_fused(self):
+        """Fused 3D with norm_preserve should maintain per-expert norms."""
+        hidden = 16
+        intermediate = 32
+        n_experts = 4
+
+        class FusedExperts(torch.nn.Module):
+            def __init__(self):
+                super().__init__()
+                self.down_proj = torch.nn.Parameter(torch.randn(n_experts, intermediate, hidden))
+
+        container = FusedExperts()
+        torch.manual_seed(42)
+
+        shared_dir = torch.randn(hidden, 1)
+        shared_dir = shared_dir / shared_dir.norm()
+
+        expert_dirs = {}
+        for ei in range(n_experts):
+            d = torch.randn(hidden)
+            expert_dirs[ei] = d / d.norm()
+
+        orig_norms = [container.down_proj.data[i].norm().item() for i in range(n_experts)]
+
+        AbliterationPipeline._project_fused_3d_granular(
+            container, shared_dir, expert_dirs, ["down_proj"],
+            norm_preserve=True, scale=1.0,
+        )
+
+        for i in range(n_experts):
+            new_norm = container.down_proj.data[i].norm().item()
+            assert abs(orig_norms[i] - new_norm) < 1e-3, (
+                f"Expert {i} norm not preserved: {orig_norms[i]:.4f} vs {new_norm:.4f}"
+            )
+
+    def test_skips_non_3d_params(self):
+        """Should skip parameters that are not 3-dimensional."""
+        hidden = 16
+
+        class FlatExperts(torch.nn.Module):
+            def __init__(self):
+                super().__init__()
+                self.down_proj = torch.nn.Parameter(torch.randn(32, hidden))
+
+        container = FlatExperts()
+        shared_dir = torch.randn(hidden, 1)
+        shared_dir = shared_dir / shared_dir.norm()
+
+        count = AbliterationPipeline._project_fused_3d_granular(
+            container, shared_dir, {}, ["down_proj"],
+            norm_preserve=False, scale=1.0,
+        )
+        assert count == 0
+
+
+class TestEGAExciseIntegration:
+    """Test that EGA integrates properly in the excise stage path."""
+
+    def test_ega_pipeline_flags(self):
+        """Pipeline with surgical method should enable per_expert_directions."""
+        pipeline = AbliterationPipeline(model_name="test", method="surgical")
+        assert pipeline.per_expert_directions is True
+
+    def test_ega_only_on_primary_direction(self):
+        """EGA should only apply for dir_idx==0, not higher SVD directions."""
+        # This is enforced by the `and dir_idx == 0` check in _excise
+        # We verify the code structure exists
+        from obliteratus.abliterate import AbliterationPipeline
+        import inspect
+        source = inspect.getsource(AbliterationPipeline._excise_inner)
+        assert "dir_idx == 0" in source, "EGA should only apply for primary direction"
+        assert "_project_moe_experts_granular" in source, "EGA method should be called in excise"
+
+    def test_ega_distill_integration(self):
+        """EGA should be called during distill when per_expert_directions is enabled."""
+        from obliteratus.abliterate import AbliterationPipeline
+        import inspect
+        source = inspect.getsource(AbliterationPipeline._distill)
+        assert "_compute_expert_granular_directions" in source
+        assert "per_expert_directions" in source
+
+    def test_nuclear_method_enables_ega(self):
+        """Nuclear method should also enable per_expert_directions."""
+        cfg = METHODS["nuclear"]
+        assert cfg["per_expert_directions"] is True
+        pipeline = AbliterationPipeline(model_name="test", method="nuclear")
+        assert pipeline.per_expert_directions is True
+
+    def test_basic_method_disables_ega(self):
+        """Basic method should not enable per_expert_directions."""
+        cfg = METHODS["basic"]
+        assert cfg.get("per_expert_directions", False) is False
+
+    def test_inverted_method_enables_ega(self):
+        """Inverted method should enable per_expert_directions."""
+        cfg = METHODS["inverted"]
+        assert cfg["per_expert_directions"] is True
+
+    def test_ega_with_routing_data_end_to_end(self):
+        """End-to-end: EGA computes directions and granular projection modifies weights."""
+        hidden = 16
+        n_experts = 4
+
+        class FakeExpert(torch.nn.Module):
+            def __init__(self):
+                super().__init__()
+                self.down_proj = torch.nn.Linear(hidden, 32, bias=False)
+                self.up_proj = torch.nn.Linear(hidden, 32, bias=False)
+
+        class FakeMoE(torch.nn.Module):
+            def __init__(self):
+                super().__init__()
+                self.gate = torch.nn.Linear(hidden, n_experts, bias=False)
+                self.experts = torch.nn.ModuleList([FakeExpert() for _ in range(n_experts)])
+
+        moe = FakeMoE()
+        torch.manual_seed(42)
+        for p in moe.parameters():
+            p.data = torch.randn_like(p.data)
+
+        pipeline = AbliterationPipeline(model_name="test", method="surgical")
+        pipeline._on_log = lambda m: None
+        pipeline._on_stage = lambda r: None
+        pipeline._strong_layers = [0]
+
+        # Simulate EGA routing data
+        h_logits = [torch.randn(n_experts) for _ in range(5)]
+        s_logits = [torch.randn(n_experts) for _ in range(5)]
+        pipeline._routing_harmful = {0: h_logits}
+        pipeline._routing_harmless = {0: s_logits}
+
+        # Simulate activations with clear separation
+        refusal_dir = torch.randn(hidden)
+        refusal_dir = refusal_dir / refusal_dir.norm()
+        pipeline._harmful_acts = {0: [torch.randn(hidden) + 2 * refusal_dir for _ in range(5)]}
+        pipeline._harmless_acts = {0: [torch.randn(hidden) - 2 * refusal_dir for _ in range(5)]}
+
+        # Step 1: compute EGA directions
+        pipeline._compute_expert_granular_directions()
+        assert 0 in pipeline._expert_directions
+        assert len(pipeline._expert_directions[0]) > 0
+
+        # Step 2: apply granular projection
+        shared_dir = torch.randn(hidden, 1)
+        shared_dir = shared_dir / shared_dir.norm()
+
+        orig_expert0 = moe.experts[0].down_proj.weight.data.clone()
+
+        count = pipeline._project_moe_experts_granular(
+            moe, shared_dir, layer_idx=0,
+        )
+
+        assert count > 0
+        assert not torch.allclose(moe.experts[0].down_proj.weight.data, orig_expert0), \
+            "Expert weights should be modified by EGA"
diff --git a/tests/test_abliterate_extended.py b/tests/test_abliterate_extended.py
new file mode 100644
index 0000000..ec45001
--- /dev/null
+++ b/tests/test_abliterate_extended.py
@@ -0,0 +1,302 @@
+"""Extended tests for novel abliteration pipeline features.
+
+Tests the new capabilities added to the OBLITERATUS abliteration pipeline:
+- Bias projection
+- Chat template wrapping
+- Method presets with new parameters
+- True iterative refinement
+- Whitened SVD integration
+"""
+
+from __future__ import annotations
+
+from unittest.mock import MagicMock
+
+import torch
+from transformers import GPT2Config, GPT2LMHeadModel
+
+from obliteratus.abliterate import (
+    METHODS,
+    AbliterationPipeline,
+)
+from obliteratus.models.loader import ModelHandle
+
+
+def _make_tiny_handle():
+    """Create a minimal ModelHandle with a tiny GPT-2 for testing."""
+    config = GPT2Config(
+        vocab_size=1000,
+        n_positions=128,
+        n_embd=64,
+        n_layer=4,
+        n_head=2,
+        n_inner=256,
+    )
+    model = GPT2LMHeadModel(config)
+    model.eval()
+
+    tokenizer = MagicMock()
+    tokenizer.pad_token = "<pad>"
+    tokenizer.eos_token = "<eos>"
+    tokenizer.return_value = {
+        "input_ids": torch.randint(0, 1000, (1, 10)),
+        "attention_mask": torch.ones(1, 10, dtype=torch.long),
+    }
+    tokenizer.decode.return_value = "The capital of France is Paris, a beautiful city"
+
+    handle = ModelHandle(
+        model=model,
+        tokenizer=tokenizer,
+        config=config,
+        model_name="gpt2-test",
+        task="causal_lm",
+    )
+    handle.snapshot()
+    return handle
+
+
+def _make_varied_tokenizer(handle):
+    """Set up a tokenizer mock that returns different tokens per call."""
+    call_count = [0]
+    def mock_tokenizer(prompt, **kwargs):
+        call_count[0] += 1
+        torch.manual_seed(call_count[0])
+        return {
+            "input_ids": torch.randint(0, 1000, (1, 5)),
+            "attention_mask": torch.ones(1, 5, dtype=torch.long),
+        }
+    handle.tokenizer.side_effect = mock_tokenizer
+
+
+# ---------------------------------------------------------------------------
+# New method preset parameters
+# ---------------------------------------------------------------------------
+
+class TestNewMethodPresets:
+    def test_basic_has_new_params(self):
+        cfg = METHODS["basic"]
+        assert "project_biases" in cfg
+        assert "use_chat_template" in cfg
+        assert "use_whitened_svd" in cfg
+        assert "true_iterative_refinement" in cfg
+        assert cfg["project_biases"] is False
+        assert cfg["use_chat_template"] is False
+
+    def test_advanced_has_new_params(self):
+        cfg = METHODS["advanced"]
+        assert cfg["project_biases"] is True
+        assert cfg["use_chat_template"] is True
+        assert cfg["use_whitened_svd"] is False
+        assert cfg["true_iterative_refinement"] is False
+
+    def test_aggressive_has_new_params(self):
+        cfg = METHODS["aggressive"]
+        assert cfg["project_biases"] is True
+        assert cfg["use_chat_template"] is True
+        assert cfg["use_whitened_svd"] is True
+        assert cfg["true_iterative_refinement"] is True
+
+
+# ---------------------------------------------------------------------------
+# Pipeline initialization with new parameters
+# ---------------------------------------------------------------------------
+
+class TestNewPipelineInit:
+    def test_default_new_params(self):
+        pipeline = AbliterationPipeline(model_name="test-model")
+        # advanced method defaults
+        assert pipeline.project_biases is True
+        assert pipeline.use_chat_template is True
+        assert pipeline.use_whitened_svd is False
+        assert pipeline.true_iterative_refinement is False
+
+    def test_basic_method_new_params(self):
+        pipeline = AbliterationPipeline(model_name="test-model", method="basic")
+        assert pipeline.project_biases is False
+        assert pipeline.use_chat_template is False
+        assert pipeline.use_whitened_svd is False
+        assert pipeline.true_iterative_refinement is False
+
+    def test_aggressive_method_new_params(self):
+        pipeline = AbliterationPipeline(model_name="test-model", method="aggressive")
+        assert pipeline.project_biases is True
+        assert pipeline.use_chat_template is True
+        assert pipeline.use_whitened_svd is True
+        assert pipeline.true_iterative_refinement is True
+
+    def test_explicit_overrides_new_params(self):
+        pipeline = AbliterationPipeline(
+            model_name="test-model",
+            method="basic",
+            project_biases=True,
+            use_chat_template=True,
+            use_whitened_svd=True,
+            true_iterative_refinement=True,
+        )
+        assert pipeline.project_biases is True
+        assert pipeline.use_chat_template is True
+        assert pipeline.use_whitened_svd is True
+        assert pipeline.true_iterative_refinement is True
+
+
+# ---------------------------------------------------------------------------
+# Bias projection
+# ---------------------------------------------------------------------------
+
+class TestBiasProjection:
+    def test_project_bias_removes_component(self):
+        """Bias projection should remove refusal direction component from bias."""
+        class Wrapper(torch.nn.Module):
+            def __init__(self):
+                super().__init__()
+                self.o_proj = torch.nn.Linear(4, 4, bias=True)
+
+        module = Wrapper()
+        torch.manual_seed(42)
+        module.o_proj.bias.data = torch.tensor([1.0, 2.0, 3.0, 4.0])
+
+        direction = torch.tensor([1.0, 0.0, 0.0, 0.0]).unsqueeze(-1)  # unit vector along dim 0
+
+        count = AbliterationPipeline._project_bias(module, direction, ["o_proj"])
+        assert count == 1
+
+        # The component along direction [1,0,0,0] was 1.0, should now be ~0
+        new_bias = module.o_proj.bias.data
+        projection_onto_dir = (new_bias @ direction.squeeze()).item()
+        assert abs(projection_onto_dir) < 1e-5
+
+        # Other components should be unchanged
+        assert abs(new_bias[1].item() - 2.0) < 1e-5
+        assert abs(new_bias[2].item() - 3.0) < 1e-5
+        assert abs(new_bias[3].item() - 4.0) < 1e-5
+
+    def test_project_bias_no_bias(self):
+        """Should handle modules without bias gracefully."""
+        class Wrapper(torch.nn.Module):
+            def __init__(self):
+                super().__init__()
+                self.o_proj = torch.nn.Linear(4, 4, bias=False)
+
+        module = Wrapper()
+        direction = torch.randn(4, 1)
+        count = AbliterationPipeline._project_bias(module, direction, ["o_proj"])
+        assert count == 0
+
+    def test_project_bias_no_matching_module(self):
+        """Should return 0 when no candidate names match."""
+        class Wrapper(torch.nn.Module):
+            def __init__(self):
+                super().__init__()
+                self.something = torch.nn.Linear(4, 4, bias=True)
+
+        module = Wrapper()
+        direction = torch.randn(4, 1)
+        count = AbliterationPipeline._project_bias(module, direction, ["o_proj"])
+        assert count == 0
+
+
+# ---------------------------------------------------------------------------
+# Chat template wrapping
+# ---------------------------------------------------------------------------
+
+class TestChatTemplate:
+    def test_no_wrap_when_disabled(self):
+        """Should not wrap prompts when use_chat_template is False."""
+        pipeline = AbliterationPipeline(
+            model_name="test-model",
+            method="basic",
+            use_chat_template=False,
+        )
+        prompts = ["Hello", "World"]
+        result = pipeline._maybe_apply_chat_template(prompts)
+        assert result == prompts
+
+    def test_no_wrap_without_handle(self):
+        """Should return raw prompts when handle is not set."""
+        pipeline = AbliterationPipeline(
+            model_name="test-model",
+            use_chat_template=True,
+        )
+        prompts = ["Hello"]
+        result = pipeline._maybe_apply_chat_template(prompts)
+        assert result == prompts
+
+    def test_wraps_with_template(self):
+        """Should wrap prompts when tokenizer has apply_chat_template."""
+        pipeline = AbliterationPipeline(
+            model_name="test-model",
+            use_chat_template=True,
+        )
+        handle = MagicMock()
+        tokenizer = MagicMock()
+
+        def mock_apply(messages, tokenize=False, add_generation_prompt=True):
+            return f"<user>{messages[0]['content']}</user><assistant>"
+
+        tokenizer.apply_chat_template = mock_apply
+        handle.tokenizer = tokenizer
+        pipeline.handle = handle
+        pipeline._on_log = lambda m: None
+
+        result = pipeline._maybe_apply_chat_template(["Hello"])
+        assert "<user>Hello</user>" in result[0]
+
+    def test_fallback_when_no_template(self):
+        """Should fall back to raw prompts when template is not configured."""
+        pipeline = AbliterationPipeline(
+            model_name="test-model",
+            use_chat_template=True,
+        )
+        handle = MagicMock()
+        tokenizer = MagicMock()
+        tokenizer.apply_chat_template.side_effect = Exception("No template")
+        handle.tokenizer = tokenizer
+        pipeline.handle = handle
+        pipeline._on_log = lambda m: None
+
+        result = pipeline._maybe_apply_chat_template(["Hello"])
+        assert result == ["Hello"]
+
+
+# ---------------------------------------------------------------------------
+# Metadata includes new fields
+# ---------------------------------------------------------------------------
+
+class TestMetadata:
+    def test_rebirth_includes_new_config(self):
+        """Metadata should include all new configuration parameters."""
+        import json
+        handle = _make_tiny_handle()
+        pipeline = AbliterationPipeline(
+            model_name="test-model",
+            method="aggressive",
+        )
+        pipeline.handle = handle
+        pipeline._on_log = lambda m: None
+        pipeline._on_stage = lambda r: None
+        pipeline._strong_layers = [0]
+        pipeline._quality_metrics = {"perplexity": 8.5, "coherence": 1.0}
+
+        handle.model.save_pretrained = MagicMock()
+        handle.tokenizer.save_pretrained = MagicMock()
+
+        import tempfile
+        from pathlib import Path
+        with tempfile.TemporaryDirectory() as tmp:
+            pipeline.output_dir = Path(tmp) / "output"
+            pipeline._rebirth()
+
+            metadata = json.loads(
+                (pipeline.output_dir / "abliteration_metadata.json").read_text()
+            )
+            cfg = metadata["method_config"]
+            assert "project_biases" in cfg
+            assert "use_chat_template" in cfg
+            assert "use_whitened_svd" in cfg
+            assert "true_iterative_refinement" in cfg
+            assert cfg["project_biases"] is True
+            assert cfg["use_whitened_svd"] is True
+
+            # Should have more references now
+            assert len(metadata["references"]) >= 5
+            assert any("OBLITERATUS" in r for r in metadata["references"])
diff --git a/tests/test_abliteration_math.py b/tests/test_abliteration_math.py
new file mode 100644
index 0000000..6856f39
--- /dev/null
+++ b/tests/test_abliteration_math.py
@@ -0,0 +1,300 @@
+"""Mathematical verification that abliteration actually removes refusal directions.
+
+These tests verify the core linear algebra claims WITHOUT mocks:
+  1. Projection removes the target direction from weight matrices
+  2. Norm-preserving projection maintains weight magnitude
+  3. Multi-direction SVD extracts the correct subspace
+  4. Whitened SVD produces orthogonal directions
+  5. Random directions do NOT have the same effect (negative control)
+
+Unlike the other test files, these use real tensors and verify mathematical
+properties directly — no MagicMock, no mocked tokenizers.
+"""
+
+from __future__ import annotations
+
+
+import torch
+
+
+class TestProjectionRemovesDirection:
+    """Verify that orthogonal projection removes the target direction."""
+
+    def test_single_direction_projection(self):
+        """After projecting out direction d from weight W,
+        W_proj @ d should be approximately zero."""
+        torch.manual_seed(42)
+        hidden = 256
+        out_dim = 128
+
+        W = torch.randn(out_dim, hidden)
+        d = torch.randn(hidden)
+        d = d / d.norm()
+
+        # Project out d: W_proj = W - (W @ d) @ d^T
+        proj = W @ d  # (out_dim,)
+        W_proj = W - proj.unsqueeze(1) * d.unsqueeze(0)
+
+        # Verify: W_proj @ d should be ~0
+        residual = W_proj @ d
+        assert residual.abs().max().item() < 1e-5, f"Residual too large: {residual.abs().max()}"
+
+    def test_projection_preserves_orthogonal_components(self):
+        """Projection should NOT change components orthogonal to d."""
+        torch.manual_seed(42)
+        hidden = 256
+        out_dim = 128
+
+        W = torch.randn(out_dim, hidden)
+        d = torch.randn(hidden)
+        d = d / d.norm()
+
+        # Create a vector orthogonal to d
+        v = torch.randn(hidden)
+        v = v - (v @ d) * d  # Gram-Schmidt
+        v = v / v.norm()
+
+        # Project out d
+        proj = W @ d
+        W_proj = W - proj.unsqueeze(1) * d.unsqueeze(0)
+
+        # W @ v should equal W_proj @ v (orthogonal component unchanged)
+        original = W @ v
+        projected = W_proj @ v
+        diff = (original - projected).abs().max().item()
+        assert diff < 1e-5, f"Orthogonal component changed by {diff}"
+
+    def test_multi_direction_subspace_removal(self):
+        """Projecting out a k-dimensional subspace should remove all k directions."""
+        torch.manual_seed(42)
+        hidden = 256
+        out_dim = 128
+        k = 4
+
+        W = torch.randn(out_dim, hidden)
+        # Create orthonormal subspace
+        Q, _ = torch.linalg.qr(torch.randn(hidden, k))
+        subspace = Q.T  # (k, hidden)
+
+        # Project out subspace: W_proj = W - W @ Q @ Q^T
+        W_proj = W - (W @ Q) @ Q.T
+
+        # Verify: W_proj @ subspace^T should be ~0 for all directions
+        residual = W_proj @ subspace.T  # (out_dim, k)
+        assert residual.abs().max().item() < 1e-5, f"Subspace residual: {residual.abs().max()}"
+
+    def test_double_projection_is_idempotent(self):
+        """Projecting twice should give the same result as projecting once."""
+        torch.manual_seed(42)
+        hidden = 256
+        out_dim = 128
+
+        W = torch.randn(out_dim, hidden)
+        d = torch.randn(hidden)
+        d = d / d.norm()
+
+        # Project once
+        proj1 = W @ d
+        W1 = W - proj1.unsqueeze(1) * d.unsqueeze(0)
+
+        # Project twice
+        proj2 = W1 @ d
+        W2 = W1 - proj2.unsqueeze(1) * d.unsqueeze(0)
+
+        diff = (W1 - W2).abs().max().item()
+        assert diff < 1e-5, f"Second projection changed weights by {diff}"
+
+
+class TestNormPreservation:
+    """Verify that norm-preserving projection maintains weight magnitude."""
+
+    def test_norm_preserving_projection(self):
+        """Biprojected norm-preserving abliteration should keep ||W|| constant."""
+        torch.manual_seed(42)
+        hidden = 256
+        out_dim = 128
+
+        W = torch.randn(out_dim, hidden)
+        d = torch.randn(hidden)
+        d = d / d.norm()
+
+        # Standard projection
+        proj_coeff = W @ d
+        W_proj = W - proj_coeff.unsqueeze(1) * d.unsqueeze(0)
+
+        # Norm-preserving rescaling (per-row)
+        row_norms_orig = W.norm(dim=1, keepdim=True).clamp(min=1e-8)
+        row_norms_proj = W_proj.norm(dim=1, keepdim=True).clamp(min=1e-8)
+        W_norm_preserved = W_proj * (row_norms_orig / row_norms_proj)
+
+        # Direction is still removed
+        residual = W_norm_preserved @ d
+        # Norm-preserving can't guarantee zero projection (it rescales),
+        # but projection should be significantly reduced
+        original_proj = (W @ d).abs().mean().item()
+        preserved_proj = residual.abs().mean().item()
+        assert preserved_proj < original_proj * 0.5, \
+            f"Norm-preserved projection {preserved_proj} not much less than original {original_proj}"
+
+        # Row norms are preserved
+        row_diff = (W_norm_preserved.norm(dim=1) - W.norm(dim=1)).abs().max().item()
+        assert row_diff < 1e-5, f"Row norms changed by {row_diff}"
+
+
+class TestSVDDirectionExtraction:
+    """Verify that SVD on the difference matrix extracts the refusal direction."""
+
+    def test_planted_direction_recovery(self):
+        """Plant a known direction in the difference and verify SVD recovers it."""
+        torch.manual_seed(42)
+        n_samples = 50
+        hidden = 256
+
+        # Plant a known refusal direction
+        true_direction = torch.randn(hidden)
+        true_direction = true_direction / true_direction.norm()
+
+        # Harmful activations = harmless + signal along true_direction + noise
+        harmless = torch.randn(n_samples, hidden) * 0.5
+        signal_strength = 5.0
+        harmful = harmless + signal_strength * true_direction.unsqueeze(0) + torch.randn(n_samples, hidden) * 0.1
+
+        # Extract via SVD on difference
+        diff = harmful - harmless
+        U, S, Vh = torch.linalg.svd(diff, full_matrices=False)
+        extracted = Vh[0]
+        extracted = extracted / extracted.norm()
+
+        # The extracted direction should align with the true direction
+        cosine = (extracted @ true_direction).abs().item()
+        assert cosine > 0.95, f"Cosine similarity {cosine:.3f} too low (expected > 0.95)"
+
+    def test_multi_direction_recovery(self):
+        """Plant k directions and verify SVD recovers the subspace."""
+        torch.manual_seed(42)
+        n_samples = 200
+        hidden = 256
+        k = 3
+
+        # Plant k orthogonal directions with varying per-sample strength
+        Q, _ = torch.linalg.qr(torch.randn(hidden, k))
+        true_subspace = Q.T  # (k, hidden)
+
+        # Each sample gets a random mix of the k planted directions
+        harmless = torch.randn(n_samples, hidden) * 0.01
+        coefficients = torch.randn(n_samples, k).abs() * 5.0
+        signal = coefficients @ true_subspace  # (n_samples, hidden)
+        harmful = harmless + signal
+
+        diff = harmful - harmless
+        U, S, Vh = torch.linalg.svd(diff, full_matrices=False)
+        extracted_subspace = Vh[:k]  # (k, hidden)
+
+        # Check subspace overlap: project true directions into extracted subspace
+        for i in range(k):
+            proj = extracted_subspace @ true_subspace[i]
+            captured_variance = proj.norm().item()
+            assert captured_variance > 0.9, \
+                f"Direction {i}: captured variance {captured_variance:.3f} too low"
+
+
+class TestRandomDirectionBaseline:
+    """Verify that random directions do NOT have the same effect as learned ones."""
+
+    def test_random_direction_has_lower_projection(self):
+        """Random directions should project much less on harmful activations
+        than the true refusal direction."""
+        torch.manual_seed(42)
+        n_samples = 50
+        hidden = 256
+
+        # Create structured harmful vs harmless difference
+        true_dir = torch.randn(hidden)
+        true_dir = true_dir / true_dir.norm()
+
+        harmless = torch.randn(n_samples, hidden) * 0.5
+        harmful = harmless + 3.0 * true_dir.unsqueeze(0)
+
+        harmful_mean = harmful.mean(dim=0)
+
+        # True direction projection
+        true_proj = (harmful_mean @ true_dir).abs().item()
+
+        # Random direction projections (seeds far from 42 to avoid collision)
+        random_projs = []
+        for i in range(100):
+            rng = torch.Generator().manual_seed(10000 + i)
+            rand_dir = torch.randn(hidden, generator=rng)
+            rand_dir = rand_dir / rand_dir.norm()
+            random_projs.append((harmful_mean @ rand_dir).abs().item())
+
+        mean_random = sum(random_projs) / len(random_projs)
+
+        # True direction should project MUCH more than random average
+        assert true_proj > mean_random * 3.0, \
+            f"True projection ({true_proj:.3f}) not much larger than random mean ({mean_random:.3f})"
+
+
+class TestWhitenedSVD:
+    """Verify whitened SVD properties."""
+
+    def test_whitened_directions_are_orthogonal(self):
+        """Whitened SVD should produce orthogonal directions."""
+        torch.manual_seed(42)
+        n_samples = 80
+        hidden = 128
+        k = 4
+
+        H = torch.randn(n_samples, hidden) + torch.randn(1, hidden) * 2
+        B = torch.randn(n_samples, hidden)
+
+        mu_B = B.mean(dim=0, keepdim=True)
+        B_centered = B - mu_B
+        cov_B = (B_centered.T @ B_centered) / (n_samples - 1)
+        cov_B += 1e-4 * torch.eye(hidden)
+
+        eigenvalues, eigenvectors = torch.linalg.eigh(cov_B)
+        eigenvalues = eigenvalues.clamp(min=0)
+        inv_sqrt_eig = 1.0 / torch.sqrt(eigenvalues + 1e-4)
+        whiten_proj = eigenvectors * inv_sqrt_eig.unsqueeze(0)
+
+        H_whitened = (H - mu_B) @ whiten_proj
+        B_whitened = B_centered @ whiten_proj
+        D_whitened = H_whitened - B_whitened
+
+        U, S, Vh = torch.linalg.svd(D_whitened, full_matrices=False)
+        directions = Vh[:k]
+
+        # Check orthogonality: directions @ directions^T should be ~identity
+        gram = directions @ directions.T
+        identity = torch.eye(k)
+        off_diag = (gram - identity).abs().max().item()
+        assert off_diag < 1e-4, f"Directions not orthogonal: max off-diagonal = {off_diag}"
+
+
+class TestReproducibility:
+    """Verify that seed setting produces deterministic results."""
+
+    def test_set_seed_determinism(self):
+        """Same seed should produce identical random tensors."""
+        from obliteratus.reproducibility import set_seed
+
+        set_seed(123, deterministic=False)
+        a = torch.randn(100)
+
+        set_seed(123, deterministic=False)
+        b = torch.randn(100)
+
+        assert torch.equal(a, b), "Same seed produced different tensors"
+
+    def test_different_seeds_differ(self):
+        """Different seeds should produce different tensors."""
+        from obliteratus.reproducibility import set_seed
+
+        set_seed(123, deterministic=False)
+        a = torch.randn(100)
+
+        set_seed(456, deterministic=False)
+        b = torch.randn(100)
+
+        assert not torch.equal(a, b), "Different seeds produced identical tensors"
diff --git a/tests/test_advanced_metrics.py b/tests/test_advanced_metrics.py
new file mode 100644
index 0000000..b534df9
--- /dev/null
+++ b/tests/test_advanced_metrics.py
@@ -0,0 +1,364 @@
+"""Tests for advanced evaluation metrics."""
+
+from __future__ import annotations
+
+
+import pytest
+import torch
+
+from obliteratus.evaluation.advanced_metrics import (
+    REFUSAL_PREFIXES,
+    REFUSAL_SUBSTRINGS,
+    _is_refusal,
+    refusal_rate,
+    token_kl_divergence,
+    first_token_kl_divergence,
+    effective_rank,
+    effective_rank_change,
+    activation_cosine_similarity,
+    linear_cka,
+    refusal_projection_magnitude,
+    format_eval_report,
+    AbliterationEvalResult,
+)
+
+
+# ---------------------------------------------------------------------------
+# Refusal Rate
+# ---------------------------------------------------------------------------
+
+class TestRefusalRate:
+    def test_all_refusals(self):
+        responses = [
+            "I'm sorry, but I cannot help with that.",
+            "I apologize, but I'm unable to assist.",
+            "As an AI, I cannot provide that information.",
+        ]
+        assert refusal_rate(responses, mode="prefix") == 1.0
+
+    def test_no_refusals(self):
+        responses = [
+            "Sure, here's how to do that.",
+            "Here's a step-by-step guide.",
+            "The answer is 42.",
+        ]
+        assert refusal_rate(responses, mode="prefix") == 0.0
+
+    def test_mixed_responses(self):
+        responses = [
+            "I'm sorry, I can't do that.",
+            "Sure, here you go.",
+            "I cannot provide that.",
+            "Here's what you need.",
+        ]
+        rate = refusal_rate(responses, mode="prefix")
+        assert rate == 0.5  # 2 out of 4
+
+    def test_substring_mode(self):
+        responses = [
+            "Well, actually I cannot help with that request.",
+            "Let me explain how to do this.",
+        ]
+        assert refusal_rate(responses, mode="substring") == 0.5
+
+    def test_combined_mode(self):
+        responses = [
+            "I'm sorry, but no.",  # caught by prefix
+            "That said, i won't do that for you.",  # caught by substring
+            "Here's the answer.",  # not a refusal
+        ]
+        rate = refusal_rate(responses, mode="combined")
+        assert abs(rate - 2.0 / 3.0) < 1e-6
+
+    def test_empty_responses(self):
+        assert refusal_rate([], mode="prefix") == 0.0
+
+    def test_empty_string_not_refusal(self):
+        assert not _is_refusal("", mode="combined")
+
+    def test_is_refusal_prefix(self):
+        assert _is_refusal("I'm sorry, I can't do that.", mode="prefix")
+        assert _is_refusal("As an AI language model, I cannot", mode="prefix")
+        assert not _is_refusal("The answer is 42.", mode="prefix")
+
+    def test_is_refusal_substring(self):
+        assert _is_refusal("I must say, i cannot assist with that.", mode="substring")
+        assert not _is_refusal("The capital of France is Paris.", mode="substring")
+
+    def test_refusal_prefixes_nonempty(self):
+        assert len(REFUSAL_PREFIXES) > 20
+
+    def test_refusal_substrings_nonempty(self):
+        assert len(REFUSAL_SUBSTRINGS) > 10
+
+
+# ---------------------------------------------------------------------------
+# KL Divergence
+# ---------------------------------------------------------------------------
+
+class TestKLDivergence:
+    def test_identical_distributions(self):
+        """KL divergence of identical distributions should be 0."""
+        logits = torch.randn(2, 10, 100)
+        kl = token_kl_divergence(logits, logits)
+        assert abs(kl) < 1e-5
+
+    def test_different_distributions(self):
+        """KL divergence of different distributions should be positive."""
+        torch.manual_seed(42)
+        logits_a = torch.randn(2, 10, 100)
+        logits_b = torch.randn(2, 10, 100)
+        kl = token_kl_divergence(logits_a, logits_b)
+        assert kl > 0
+
+    def test_kl_nonnegative(self):
+        """KL divergence should always be non-negative."""
+        torch.manual_seed(42)
+        for _ in range(5):
+            logits_a = torch.randn(1, 5, 50)
+            logits_b = torch.randn(1, 5, 50)
+            kl = token_kl_divergence(logits_a, logits_b)
+            assert kl >= -1e-6  # allow small numerical errors
+
+    def test_first_token_kl_identical(self):
+        """First-token KL of identical distributions should be 0."""
+        logits = torch.randn(4, 20, 100)
+        kl = first_token_kl_divergence(logits, logits)
+        assert abs(kl) < 1e-5
+
+    def test_first_token_kl_different(self):
+        """First-token KL of different distributions should be positive."""
+        torch.manual_seed(42)
+        logits_a = torch.randn(4, 20, 100)
+        logits_b = torch.randn(4, 20, 100)
+        kl = first_token_kl_divergence(logits_a, logits_b)
+        assert kl > 0
+
+    def test_temperature_effect(self):
+        """Higher temperature should reduce KL divergence (smoother distributions)."""
+        torch.manual_seed(42)
+        logits_a = torch.randn(2, 5, 50)
+        logits_b = torch.randn(2, 5, 50)
+        kl_t1 = token_kl_divergence(logits_a, logits_b, temperature=1.0)
+        kl_t5 = token_kl_divergence(logits_a, logits_b, temperature=5.0)
+        assert kl_t5 < kl_t1
+
+
+# ---------------------------------------------------------------------------
+# Effective Rank
+# ---------------------------------------------------------------------------
+
+class TestEffectiveRank:
+    def test_rank_one_matrix(self):
+        """Rank-1 matrix should have effective rank close to 1."""
+        v = torch.randn(8, 1)
+        u = torch.randn(1, 4)
+        W = v @ u  # rank-1
+        erank = effective_rank(W)
+        assert erank < 1.5
+
+    def test_identity_matrix(self):
+        """Identity matrix should have effective rank equal to dimension."""
+        n = 8
+        W = torch.eye(n)
+        erank = effective_rank(W)
+        assert abs(erank - n) < 0.1
+
+    def test_random_full_rank(self):
+        """Random matrix should have high effective rank."""
+        torch.manual_seed(42)
+        W = torch.randn(16, 16)
+        erank = effective_rank(W)
+        assert erank > 10  # should be close to 16
+
+    def test_zero_matrix(self):
+        """Zero matrix should have effective rank 0."""
+        W = torch.zeros(4, 4)
+        erank = effective_rank(W)
+        assert erank == 0.0
+
+    def test_effective_rank_change(self):
+        """Should compute before/after rank comparison."""
+        torch.manual_seed(42)
+        W_before = torch.randn(8, 8)
+        # Simulate abliteration: remove a direction (reduces rank slightly)
+        d = torch.randn(8, 1)
+        d = d / d.norm()
+        W_after = W_before - (W_before @ d) @ d.T
+
+        result = effective_rank_change(W_before, W_after)
+        assert "rank_before" in result
+        assert "rank_after" in result
+        assert "rank_delta" in result
+        assert "rank_ratio" in result
+        assert result["rank_after"] <= result["rank_before"] + 0.1
+
+    def test_rejects_non_2d(self):
+        """Should raise ValueError for non-2D tensors."""
+        with pytest.raises(ValueError):
+            effective_rank(torch.randn(4, 4, 4))
+
+
+# ---------------------------------------------------------------------------
+# Activation Cosine Similarity
+# ---------------------------------------------------------------------------
+
+class TestActivationCosineSimilarity:
+    def test_identical_activations(self):
+        acts = torch.randn(10, 32)
+        sim = activation_cosine_similarity(acts, acts)
+        assert abs(sim - 1.0) < 1e-5
+
+    def test_orthogonal_activations(self):
+        """Orthogonal activations should have cosine near 0."""
+        a = torch.tensor([[1.0, 0.0, 0.0]])
+        b = torch.tensor([[0.0, 1.0, 0.0]])
+        sim = activation_cosine_similarity(a, b)
+        assert abs(sim) < 1e-5
+
+    def test_opposite_activations(self):
+        """Opposite activations should have cosine -1."""
+        a = torch.randn(5, 16)
+        sim = activation_cosine_similarity(a, -a)
+        assert abs(sim - (-1.0)) < 1e-5
+
+    def test_handles_3d(self):
+        """Should handle 3D tensors by reshaping."""
+        a = torch.randn(2, 5, 16)
+        b = torch.randn(2, 5, 16)
+        sim = activation_cosine_similarity(a, b)
+        assert -1.0 <= sim <= 1.0
+
+
+# ---------------------------------------------------------------------------
+# Linear CKA
+# ---------------------------------------------------------------------------
+
+class TestLinearCKA:
+    def test_identical_representations(self):
+        """CKA of identical representations should be 1.0."""
+        X = torch.randn(20, 16)
+        cka = linear_cka(X, X)
+        assert abs(cka - 1.0) < 1e-4
+
+    def test_scaled_representations(self):
+        """CKA should be invariant to isotropic scaling."""
+        X = torch.randn(20, 16)
+        Y = X * 5.0
+        cka = linear_cka(X, Y)
+        assert abs(cka - 1.0) < 1e-4
+
+    def test_random_representations(self):
+        """CKA of random representations should be low."""
+        torch.manual_seed(42)
+        X = torch.randn(100, 16)
+        Y = torch.randn(100, 16)
+        cka = linear_cka(X, Y)
+        assert cka < 0.3  # random should be near 0
+
+    def test_cka_bounded(self):
+        """CKA should be between 0 and 1."""
+        torch.manual_seed(42)
+        for _ in range(5):
+            X = torch.randn(20, 8)
+            Y = torch.randn(20, 8)
+            cka = linear_cka(X, Y)
+            assert -0.01 <= cka <= 1.01  # small tolerance for numerics
+
+    def test_different_dimensions(self):
+        """CKA should work with different hidden dimensions."""
+        X = torch.randn(20, 16)
+        Y = torch.randn(20, 32)
+        cka = linear_cka(X, Y)
+        assert -0.01 <= cka <= 1.01
+
+    def test_handles_3d(self):
+        """Should handle 3D tensors by reshaping."""
+        X = torch.randn(2, 10, 16)
+        Y = torch.randn(2, 10, 16)
+        cka = linear_cka(X, Y)
+        assert -0.01 <= cka <= 1.01
+
+
+# ---------------------------------------------------------------------------
+# Refusal Direction Projection Magnitude
+# ---------------------------------------------------------------------------
+
+class TestRefusalProjection:
+    def test_aligned_activations(self):
+        """Activations aligned with direction should have high projection."""
+        d = torch.tensor([1.0, 0.0, 0.0])
+        acts = torch.tensor([
+            [5.0, 0.0, 0.0],
+            [3.0, 0.0, 0.0],
+            [4.0, 0.0, 0.0],
+        ])
+        result = refusal_projection_magnitude(acts, d)
+        assert result["mean"] == 4.0
+        assert result["abs_mean"] == 4.0
+
+    def test_orthogonal_activations(self):
+        """Orthogonal activations should have zero projection."""
+        d = torch.tensor([1.0, 0.0, 0.0])
+        acts = torch.tensor([
+            [0.0, 5.0, 0.0],
+            [0.0, 0.0, 3.0],
+        ])
+        result = refusal_projection_magnitude(acts, d)
+        assert abs(result["mean"]) < 1e-5
+        assert abs(result["abs_mean"]) < 1e-5
+
+    def test_result_keys(self):
+        """Should return all expected keys."""
+        d = torch.randn(8)
+        acts = torch.randn(5, 8)
+        result = refusal_projection_magnitude(acts, d)
+        assert set(result.keys()) == {"mean", "std", "max", "min", "abs_mean"}
+
+
+# ---------------------------------------------------------------------------
+# Eval Report Formatting
+# ---------------------------------------------------------------------------
+
+class TestEvalReport:
+    def test_format_report(self):
+        result = AbliterationEvalResult(
+            refusal_rate_harmful=0.1,
+            refusal_rate_harmless=0.02,
+            kl_divergence=0.15,
+            perplexity=12.5,
+            coherence_score=0.8,
+            mean_activation_cosine=0.95,
+            mean_cka=0.92,
+        )
+        report = format_eval_report(result)
+        assert "10.0%" in report
+        assert "12.50" in report
+        assert "excellent" in report  # KL < 0.2
+
+    def test_format_report_high_kl(self):
+        result = AbliterationEvalResult(
+            refusal_rate_harmful=0.0,
+            refusal_rate_harmless=0.0,
+            kl_divergence=1.5,
+            perplexity=50.0,
+            coherence_score=0.4,
+            mean_activation_cosine=None,
+            mean_cka=None,
+        )
+        report = format_eval_report(result)
+        assert "significant damage" in report
+
+    def test_format_report_no_kl(self):
+        result = AbliterationEvalResult(
+            refusal_rate_harmful=0.5,
+            refusal_rate_harmless=0.1,
+            kl_divergence=None,
+            perplexity=20.0,
+            coherence_score=1.0,
+            mean_activation_cosine=None,
+            mean_cka=None,
+        )
+        report = format_eval_report(result)
+        assert "50.0%" in report
+        assert "KL" not in report
diff --git a/tests/test_analysis.py b/tests/test_analysis.py
new file mode 100644
index 0000000..1fb03c7
--- /dev/null
+++ b/tests/test_analysis.py
@@ -0,0 +1,345 @@
+"""Tests for the analysis techniques."""
+
+from __future__ import annotations
+
+
+import torch
+
+from obliteratus.analysis.whitened_svd import WhitenedSVDExtractor, WhitenedSVDResult
+from obliteratus.analysis.cross_layer import CrossLayerAlignmentAnalyzer, CrossLayerResult
+from obliteratus.analysis.activation_probing import ActivationProbe, ProbeResult
+
+
+# ---------------------------------------------------------------------------
+# WhitenedSVDExtractor
+# ---------------------------------------------------------------------------
+
+class TestWhitenedSVD:
+    def test_basic_extraction(self):
+        """Whitened SVD should extract directions from activation differences."""
+        torch.manual_seed(42)
+        n_prompts, hidden_dim = 10, 32
+
+        # Create activations with a clear refusal direction
+        refusal_dir = torch.randn(hidden_dim)
+        refusal_dir = refusal_dir / refusal_dir.norm()
+
+        harmless = [torch.randn(hidden_dim) for _ in range(n_prompts)]
+        harmful = [h + 2.0 * refusal_dir for h in harmless]  # shifted along refusal dir
+
+        extractor = WhitenedSVDExtractor()
+        result = extractor.extract(harmful, harmless, n_directions=3)
+
+        assert isinstance(result, WhitenedSVDResult)
+        assert result.directions.shape == (3, hidden_dim)
+        assert result.singular_values.shape == (3,)
+        assert result.variance_explained > 0
+        assert result.condition_number > 0
+        assert result.effective_rank > 0
+
+    def test_directions_are_unit_vectors(self):
+        """Extracted directions should be unit length."""
+        torch.manual_seed(42)
+        harmless = [torch.randn(16) for _ in range(8)]
+        harmful = [h + torch.randn(16) * 0.5 for h in harmless]
+
+        extractor = WhitenedSVDExtractor()
+        result = extractor.extract(harmful, harmless, n_directions=2)
+
+        for i in range(result.directions.shape[0]):
+            assert abs(result.directions[i].norm().item() - 1.0) < 1e-4
+
+    def test_primary_aligns_with_planted_direction(self):
+        """Primary whitened direction should capture the planted refusal signal.
+
+        Whitening rotates directions relative to the covariance structure,
+        so perfect alignment with the raw direction is not expected. We verify
+        the whitened direction explains substantial variance and has moderate
+        alignment (whitening intentionally reweights dimensions).
+        """
+        torch.manual_seed(42)
+        hidden_dim = 64
+        n_prompts = 30
+
+        refusal_dir = torch.randn(hidden_dim)
+        refusal_dir = refusal_dir / refusal_dir.norm()
+
+        # Isotropic harmless activations (whitening has minimal effect)
+        harmless = [torch.randn(hidden_dim) * 0.1 for _ in range(n_prompts)]
+        harmful = [h + 5.0 * refusal_dir for h in harmless]
+
+        extractor = WhitenedSVDExtractor(regularization_eps=1e-3)
+        result = extractor.extract(harmful, harmless, n_directions=1)
+
+        cos_sim = (result.directions[0] @ refusal_dir).abs().item()
+        # Moderate alignment expected (whitening reweights dimensions)
+        assert cos_sim > 0.2, f"Expected alignment > 0.2, got {cos_sim:.3f}"
+        # More importantly: the direction should explain most variance
+        assert result.variance_explained > 0.5
+
+    def test_extract_all_layers(self):
+        """Should extract directions for all provided layers."""
+        torch.manual_seed(42)
+        harmful_acts = {}
+        harmless_acts = {}
+        for layer in range(4):
+            harmful_acts[layer] = [torch.randn(16) for _ in range(5)]
+            harmless_acts[layer] = [torch.randn(16) for _ in range(5)]
+
+        extractor = WhitenedSVDExtractor()
+        results = extractor.extract_all_layers(harmful_acts, harmless_acts, n_directions=2)
+
+        assert len(results) == 4
+        for idx in range(4):
+            assert idx in results
+            assert results[idx].directions.shape[0] == 2
+
+    def test_compare_with_standard(self):
+        """Comparison should return valid cosine similarities."""
+        torch.manual_seed(42)
+        harmless = [torch.randn(16) for _ in range(8)]
+        harmful = [h + torch.randn(16) for h in harmless]
+
+        extractor = WhitenedSVDExtractor()
+        result = extractor.extract(harmful, harmless, n_directions=2)
+
+        std_dir = torch.randn(16)
+        std_dir = std_dir / std_dir.norm()
+
+        comparison = WhitenedSVDExtractor.compare_with_standard(result, std_dir)
+        assert "primary_direction_cosine" in comparison
+        assert "subspace_principal_cosine" in comparison
+        assert 0 <= comparison["primary_direction_cosine"] <= 1.0
+
+    def test_handles_3d_activations(self):
+        """Should handle activations with an extra batch dimension."""
+        torch.manual_seed(42)
+        # (1, hidden_dim) shape from hook output
+        harmless = [torch.randn(1, 16) for _ in range(5)]
+        harmful = [torch.randn(1, 16) for _ in range(5)]
+
+        extractor = WhitenedSVDExtractor()
+        result = extractor.extract(harmful, harmless, n_directions=2)
+        assert result.directions.shape == (2, 16)
+
+    def test_variance_explained_bounded(self):
+        """Variance explained should be between 0 and 1."""
+        torch.manual_seed(42)
+        harmless = [torch.randn(16) for _ in range(8)]
+        harmful = [torch.randn(16) for _ in range(8)]
+
+        extractor = WhitenedSVDExtractor()
+        result = extractor.extract(harmful, harmless, n_directions=3)
+        assert 0 <= result.variance_explained <= 1.0
+
+
+# ---------------------------------------------------------------------------
+# CrossLayerAlignmentAnalyzer
+# ---------------------------------------------------------------------------
+
+class TestCrossLayerAlignment:
+    def test_identical_directions(self):
+        """Identical directions across layers should give persistence = 1."""
+        direction = torch.randn(32)
+        direction = direction / direction.norm()
+        directions = {i: direction.clone() for i in range(5)}
+
+        analyzer = CrossLayerAlignmentAnalyzer()
+        result = analyzer.analyze(directions)
+
+        assert isinstance(result, CrossLayerResult)
+        assert result.direction_persistence_score > 0.99
+        assert result.mean_adjacent_cosine > 0.99
+        assert result.total_geodesic_distance < 0.01
+
+    def test_orthogonal_directions(self):
+        """Orthogonal directions should give low persistence."""
+        # Create orthogonal directions via QR decomposition
+        torch.manual_seed(42)
+        M = torch.randn(5, 32)
+        Q, _ = torch.linalg.qr(M.T)
+        directions = {i: Q[:, i] for i in range(5)}
+
+        analyzer = CrossLayerAlignmentAnalyzer()
+        result = analyzer.analyze(directions)
+
+        assert result.direction_persistence_score < 0.3
+        assert result.mean_adjacent_cosine < 0.3
+
+    def test_cluster_detection(self):
+        """Should detect clusters of similar directions."""
+        torch.manual_seed(42)
+        # Create two clusters
+        d1 = torch.randn(32)
+        d1 = d1 / d1.norm()
+        d2 = torch.randn(32)
+        d2 = d2 / d2.norm()
+
+        directions = {
+            0: d1, 1: d1 + 0.01 * torch.randn(32),
+            2: d1 + 0.01 * torch.randn(32),
+            3: d2, 4: d2 + 0.01 * torch.randn(32),
+        }
+        # Normalize
+        directions = {k: v / v.norm() for k, v in directions.items()}
+
+        analyzer = CrossLayerAlignmentAnalyzer(cluster_threshold=0.9)
+        result = analyzer.analyze(directions)
+
+        # Should find at least 2 clusters
+        assert result.cluster_count >= 2
+
+    def test_empty_input(self):
+        """Should handle empty input gracefully."""
+        analyzer = CrossLayerAlignmentAnalyzer()
+        result = analyzer.analyze({})
+        assert result.layer_indices == []
+        assert result.cluster_count == 0
+
+    def test_single_layer(self):
+        """Single layer should work fine."""
+        analyzer = CrossLayerAlignmentAnalyzer()
+        result = analyzer.analyze({5: torch.randn(16)})
+        assert result.layer_indices == [5]
+        assert result.direction_persistence_score == 1.0
+
+    def test_strong_layers_filter(self):
+        """Should only analyze specified strong layers."""
+        directions = {i: torch.randn(16) for i in range(10)}
+        analyzer = CrossLayerAlignmentAnalyzer()
+        result = analyzer.analyze(directions, strong_layers=[2, 5, 7])
+        assert result.layer_indices == [2, 5, 7]
+        assert result.cosine_matrix.shape == (3, 3)
+
+    def test_cosine_matrix_symmetry(self):
+        """Cosine matrix should be symmetric."""
+        torch.manual_seed(42)
+        directions = {i: torch.randn(16) for i in range(4)}
+        analyzer = CrossLayerAlignmentAnalyzer()
+        result = analyzer.analyze(directions)
+        diff = (result.cosine_matrix - result.cosine_matrix.T).abs().max().item()
+        assert diff < 1e-5
+
+    def test_cosine_matrix_diagonal_ones(self):
+        """Diagonal of cosine matrix should be 1.0."""
+        torch.manual_seed(42)
+        directions = {i: torch.randn(16) for i in range(4)}
+        analyzer = CrossLayerAlignmentAnalyzer()
+        result = analyzer.analyze(directions)
+        for i in range(4):
+            assert abs(result.cosine_matrix[i, i].item() - 1.0) < 1e-4
+
+    def test_angular_drift_monotonic(self):
+        """Angular drift should be monotonically non-decreasing."""
+        torch.manual_seed(42)
+        directions = {i: torch.randn(16) for i in range(6)}
+        analyzer = CrossLayerAlignmentAnalyzer()
+        result = analyzer.analyze(directions)
+        for i in range(len(result.angular_drift) - 1):
+            assert result.angular_drift[i + 1] >= result.angular_drift[i] - 1e-6
+
+    def test_format_report(self):
+        """Format report should produce a non-empty string."""
+        torch.manual_seed(42)
+        directions = {i: torch.randn(16) for i in range(4)}
+        analyzer = CrossLayerAlignmentAnalyzer()
+        result = analyzer.analyze(directions)
+        report = CrossLayerAlignmentAnalyzer.format_report(result)
+        assert "Cross-Layer" in report
+        assert "persistence" in report
+
+
+# ---------------------------------------------------------------------------
+# ActivationProbe
+# ---------------------------------------------------------------------------
+
+class TestActivationProbe:
+    def test_clean_elimination(self):
+        """After removing direction, projections should be near-zero."""
+        torch.manual_seed(42)
+        hidden_dim = 32
+        refusal_dir = torch.randn(hidden_dim)
+        refusal_dir = refusal_dir / refusal_dir.norm()
+
+        # "Post-abliteration" activations: direction has been removed
+        harmless = [torch.randn(hidden_dim) for _ in range(10)]
+        harmful = [torch.randn(hidden_dim) for _ in range(10)]
+        # Both sets are random, no refusal signal => gap should be small
+
+        probe = ActivationProbe()
+        result = probe.probe_layer(harmful, harmless, refusal_dir)
+        assert abs(result.projection_gap) < 1.0
+        assert result.separation_d_prime < 2.0
+
+    def test_residual_detection(self):
+        """Should detect residual refusal signal when direction wasn't removed."""
+        torch.manual_seed(42)
+        hidden_dim = 32
+        refusal_dir = torch.randn(hidden_dim)
+        refusal_dir = refusal_dir / refusal_dir.norm()
+
+        harmless = [torch.randn(hidden_dim) for _ in range(10)]
+        # Harmful still has strong refusal direction component
+        harmful = [h + 5.0 * refusal_dir for h in harmless]
+
+        probe = ActivationProbe()
+        result = probe.probe_layer(harmful, harmless, refusal_dir)
+        assert abs(result.projection_gap) > 1.0
+        assert result.separation_d_prime > 2.0
+
+    def test_probe_all_layers(self):
+        """Should compute aggregate metrics across layers."""
+        torch.manual_seed(42)
+        hidden_dim = 16
+        n_layers = 4
+
+        harmful_acts = {}
+        harmless_acts = {}
+        refusal_dirs = {}
+
+        for layer in range(n_layers):
+            harmful_acts[layer] = [torch.randn(hidden_dim) for _ in range(5)]
+            harmless_acts[layer] = [torch.randn(hidden_dim) for _ in range(5)]
+            d = torch.randn(hidden_dim)
+            refusal_dirs[layer] = d / d.norm()
+
+        probe = ActivationProbe()
+        result = probe.probe_all_layers(harmful_acts, harmless_acts, refusal_dirs)
+
+        assert isinstance(result, ProbeResult)
+        assert len(result.per_layer) == n_layers
+        assert 0 <= result.refusal_elimination_score <= 1.0
+        assert result.mean_projection_gap >= 0
+
+    def test_res_score_range(self):
+        """RES should always be between 0 and 1."""
+        torch.manual_seed(42)
+        for seed in range(5):
+            torch.manual_seed(seed)
+            harmful = {0: [torch.randn(8) for _ in range(3)]}
+            harmless = {0: [torch.randn(8) for _ in range(3)]}
+            dirs = {0: torch.randn(8)}
+            dirs[0] = dirs[0] / dirs[0].norm()
+
+            probe = ActivationProbe()
+            result = probe.probe_all_layers(harmful, harmless, dirs)
+            assert 0 <= result.refusal_elimination_score <= 1.0
+
+    def test_format_report(self):
+        """Format report should produce readable output."""
+        torch.manual_seed(42)
+        harmful = {0: [torch.randn(8) for _ in range(3)]}
+        harmless = {0: [torch.randn(8) for _ in range(3)]}
+        dirs = {0: torch.randn(8)}
+
+        probe = ActivationProbe()
+        result = probe.probe_all_layers(harmful, harmless, dirs)
+        report = ActivationProbe.format_report(result)
+        assert "Refusal Elimination Score" in report
+
+    def test_empty_input(self):
+        """Should handle empty input gracefully."""
+        probe = ActivationProbe()
+        result = probe.probe_all_layers({}, {}, {})
+        assert result.refusal_elimination_score == 0.0
+        assert len(result.per_layer) == 0
diff --git a/tests/test_analysis_utils.py b/tests/test_analysis_utils.py
new file mode 100644
index 0000000..2399e94
--- /dev/null
+++ b/tests/test_analysis_utils.py
@@ -0,0 +1,65 @@
+"""Tests for shared analysis utilities (gini_coefficient, etc.)."""
+
+from __future__ import annotations
+
+import pytest
+
+from obliteratus.analysis.utils import gini_coefficient
+
+
+class TestGiniCoefficient:
+    """Tests for the Gini coefficient computation."""
+
+    def test_empty_list(self):
+        assert gini_coefficient([]) == 0.0
+
+    def test_single_value(self):
+        assert gini_coefficient([42.0]) == 0.0
+
+    def test_uniform_distribution(self):
+        """All-equal values → Gini = 0."""
+        assert gini_coefficient([1.0, 1.0, 1.0, 1.0]) == pytest.approx(0.0, abs=1e-10)
+
+    def test_maximally_concentrated(self):
+        """One value, rest zero → Gini ≈ 1."""
+        result = gini_coefficient([100.0, 0.0, 0.0, 0.0])
+        assert result > 0.7  # For n=4, max Gini = (n-1)/n = 0.75
+
+    def test_all_zeros(self):
+        assert gini_coefficient([0.0, 0.0, 0.0]) == 0.0
+
+    def test_two_equal_values(self):
+        assert gini_coefficient([5.0, 5.0]) == pytest.approx(0.0, abs=1e-10)
+
+    def test_two_unequal_values(self):
+        """[0, 10] → Gini = 0.5 for n=2."""
+        result = gini_coefficient([0.0, 10.0])
+        assert result == pytest.approx(0.5, abs=0.01)
+
+    def test_moderate_inequality(self):
+        """Moderate spread → Gini between 0 and 1."""
+        result = gini_coefficient([1.0, 2.0, 3.0, 4.0, 5.0])
+        assert 0.1 < result < 0.5
+
+    def test_result_in_valid_range(self):
+        """Gini is always in [0, 1]."""
+        for vals in [[1, 2, 3], [0, 0, 100], [5, 5, 5], [1], [0.1, 0.9]]:
+            result = gini_coefficient(vals)
+            assert 0.0 <= result <= 1.0, f"Gini({vals}) = {result} out of range"
+
+    def test_large_uniform(self):
+        """Large uniform distribution → Gini ≈ 0."""
+        vals = [1.0] * 1000
+        assert gini_coefficient(vals) == pytest.approx(0.0, abs=1e-10)
+
+    def test_large_concentrated(self):
+        """Large distribution with one outlier → high Gini."""
+        vals = [0.0] * 999 + [1000.0]
+        result = gini_coefficient(vals)
+        assert result > 0.99
+
+    def test_order_invariant(self):
+        """Gini should not depend on input order."""
+        a = gini_coefficient([1.0, 3.0, 5.0, 7.0])
+        b = gini_coefficient([7.0, 1.0, 5.0, 3.0])
+        assert a == pytest.approx(b)
diff --git a/tests/test_architecture_profiles.py b/tests/test_architecture_profiles.py
new file mode 100644
index 0000000..00d1c13
--- /dev/null
+++ b/tests/test_architecture_profiles.py
@@ -0,0 +1,598 @@
+"""Tests for architecture-aware preset defaults.
+
+Tests the detection logic and recommended parameter overrides for each
+architecture class (dense/MoE, standard/reasoning).
+"""
+
+from __future__ import annotations
+
+
+from obliteratus.architecture_profiles import (
+    ArchitectureClass,
+    ArchitectureProfile,
+    ReasoningClass,
+    detect_architecture,
+    get_profile_summary,
+    apply_profile_to_method_config,
+)
+
+
+# ---------------------------------------------------------------------------
+#  Detection: Dense models
+# ---------------------------------------------------------------------------
+
+
+class TestDenseDetection:
+    """Test that standard dense models are correctly classified."""
+
+    def test_llama_is_dense(self):
+        profile = detect_architecture("meta-llama/Llama-3.1-8B-Instruct")
+        assert profile.arch_class == ArchitectureClass.DENSE
+        assert profile.reasoning_class == ReasoningClass.STANDARD
+        assert not profile.is_moe
+
+    def test_qwen_dense_is_dense(self):
+        profile = detect_architecture("Qwen/Qwen2.5-7B-Instruct")
+        assert profile.arch_class == ArchitectureClass.DENSE
+        assert not profile.is_moe
+
+    def test_gemma_is_dense(self):
+        profile = detect_architecture("google/gemma-3-27b-it")
+        assert profile.arch_class == ArchitectureClass.DENSE
+
+    def test_phi_is_dense(self):
+        profile = detect_architecture("microsoft/Phi-4-mini-instruct")
+        assert profile.arch_class == ArchitectureClass.DENSE
+
+    def test_mistral_small_is_dense(self):
+        profile = detect_architecture("mistralai/Mistral-Small-24B-Instruct-2501")
+        assert profile.arch_class == ArchitectureClass.DENSE
+
+    def test_yi_is_dense(self):
+        profile = detect_architecture("01-ai/Yi-1.5-9B-Chat")
+        assert profile.arch_class == ArchitectureClass.DENSE
+
+    def test_dense_label(self):
+        profile = detect_architecture("meta-llama/Llama-3.1-8B-Instruct")
+        assert profile.profile_label == "Dense Standard"
+
+    def test_dense_recommended_method(self):
+        profile = detect_architecture("meta-llama/Llama-3.1-8B-Instruct")
+        assert profile.recommended_method == "aggressive"
+
+
+# ---------------------------------------------------------------------------
+#  Detection: MoE models
+# ---------------------------------------------------------------------------
+
+
+class TestMoEDetection:
+    """Test that MoE models are correctly classified."""
+
+    def test_gpt_oss_is_moe(self):
+        """GPT-OSS is MoE. Without config, defaults to small (conservative)."""
+        profile = detect_architecture("openai/gpt-oss-20b")
+        assert profile.is_moe
+        assert profile.arch_class == ArchitectureClass.SMALL_MOE
+
+    def test_qwen3_30b_is_small_moe(self):
+        profile = detect_architecture("Qwen/Qwen3-30B-A3B")
+        assert profile.is_moe
+
+    def test_deepseek_v3_is_large_moe(self):
+        profile = detect_architecture("deepseek-ai/DeepSeek-V3.2")
+        assert profile.is_moe
+
+    def test_kimi_k2_is_large_moe(self):
+        profile = detect_architecture("moonshotai/Kimi-K2-Instruct")
+        assert profile.is_moe
+
+    def test_qwen3_235b_is_moe(self):
+        profile = detect_architecture("Qwen/Qwen3-235B-A22B")
+        assert profile.is_moe
+
+    def test_glm_47_is_moe(self):
+        profile = detect_architecture("zai-org/GLM-4.7")
+        assert profile.is_moe
+
+    def test_llama4_maverick_is_moe(self):
+        profile = detect_architecture("meta-llama/Llama-4-Maverick-17B-128E-Instruct")
+        assert profile.is_moe
+
+    def test_step_flash_is_moe(self):
+        profile = detect_architecture("stepfun-ai/Step-3.5-Flash")
+        assert profile.is_moe
+
+    def test_minimax_is_moe(self):
+        profile = detect_architecture("MiniMaxAI/MiniMax-M2.1")
+        assert profile.is_moe
+
+    def test_mistral_large_3_is_moe(self):
+        profile = detect_architecture("mistralai/Mistral-Large-3-675B-Instruct-2512")
+        assert profile.is_moe
+
+    def test_moe_recommended_method_is_surgical(self):
+        """All MoE profiles recommend surgical method."""
+        profile = detect_architecture("openai/gpt-oss-20b")
+        assert profile.recommended_method == "surgical"
+
+    def test_gpt_oss_with_config_is_small_moe(self):
+        """GPT-OSS with config providing expert count → small MoE."""
+        class MockConfig:
+            model_type = "gpt_neox"
+            num_hidden_layers = 32
+            hidden_size = 2560
+            intermediate_size = 6912
+            vocab_size = 50304
+            num_local_experts = 8
+            num_experts_per_tok = 2
+        profile = detect_architecture("openai/gpt-oss-20b", config=MockConfig())
+        assert profile.is_moe
+        assert profile.arch_class == ArchitectureClass.SMALL_MOE
+
+
+# ---------------------------------------------------------------------------
+#  Detection: Reasoning models
+# ---------------------------------------------------------------------------
+
+
+class TestReasoningDetection:
+    """Test that reasoning models are correctly classified."""
+
+    def test_r1_distill_qwen_is_reasoning(self):
+        profile = detect_architecture("deepseek-ai/DeepSeek-R1-Distill-Qwen-7B")
+        assert profile.reasoning_class == ReasoningClass.REASONING
+
+    def test_r1_distill_llama_is_reasoning(self):
+        profile = detect_architecture("deepseek-ai/DeepSeek-R1-Distill-Llama-8B")
+        assert profile.reasoning_class == ReasoningClass.REASONING
+
+    def test_r1_distill_is_dense_reasoning(self):
+        """R1 distills are dense (distilled from MoE into dense)."""
+        profile = detect_architecture("deepseek-ai/DeepSeek-R1-Distill-Qwen-14B")
+        assert profile.arch_class == ArchitectureClass.DENSE
+        assert profile.reasoning_class == ReasoningClass.REASONING
+        assert profile.profile_label == "Dense Reasoning"
+
+    def test_olmo_think_is_reasoning(self):
+        profile = detect_architecture("allenai/Olmo-3.1-32B-Think")
+        assert profile.reasoning_class == ReasoningClass.REASONING
+
+    def test_olmo_standard_is_not_reasoning(self):
+        """OLMo (without Think) must NOT be classified as reasoning.
+        Regression test: 'olmo' contains 'o1' substring."""
+        profile = detect_architecture("allenai/Olmo-3-7B-Instruct")
+        assert profile.reasoning_class == ReasoningClass.STANDARD
+
+    def test_falcon3_is_not_reasoning(self):
+        """falcon3 must NOT match 'o3' reasoning pattern."""
+        profile = detect_architecture("tiiuae/Falcon3-7B-Instruct")
+        assert profile.reasoning_class == ReasoningClass.STANDARD
+
+    def test_full_r1_is_moe_reasoning(self):
+        profile = detect_architecture("deepseek-ai/DeepSeek-R1")
+        assert profile.is_moe
+        assert profile.reasoning_class == ReasoningClass.REASONING
+
+    def test_reasoning_dense_more_directions(self):
+        """Dense reasoning models need more directions (>=12) to span refusal."""
+        profile = detect_architecture("deepseek-ai/DeepSeek-R1-Distill-Qwen-7B")
+        assert profile.arch_class == ArchitectureClass.DENSE
+        assert profile.method_overrides.get("n_directions", 0) >= 12
+
+    def test_reasoning_dense_more_passes(self):
+        """Dense reasoning models need more refinement passes (>=4)."""
+        profile = detect_architecture("deepseek-ai/DeepSeek-R1-Distill-Qwen-7B")
+        assert profile.arch_class == ArchitectureClass.DENSE
+        assert profile.method_overrides.get("refinement_passes", 0) >= 4
+
+    def test_non_reasoning_is_standard(self):
+        profile = detect_architecture("meta-llama/Llama-3.1-8B-Instruct")
+        assert profile.reasoning_class == ReasoningClass.STANDARD
+
+
+# ---------------------------------------------------------------------------
+#  Detection with config object
+# ---------------------------------------------------------------------------
+
+
+class TestConfigDetection:
+    """Test detection when a mock config is provided."""
+
+    def test_moe_config_attrs(self):
+        """Config with num_local_experts should be detected as MoE."""
+        class MockConfig:
+            model_type = "mixtral"
+            num_hidden_layers = 32
+            hidden_size = 4096
+            intermediate_size = 14336
+            vocab_size = 32000
+            num_local_experts = 8
+            num_experts_per_tok = 2
+
+        profile = detect_architecture(
+            "custom/mixtral-model", config=MockConfig(),
+            num_layers=32, hidden_size=4096,
+        )
+        assert profile.is_moe
+        assert profile.num_experts == 8
+        assert profile.num_active_experts == 2
+
+    def test_large_moe_threshold(self):
+        """MoE models with >100B params should be classified as large."""
+        class MockConfig:
+            model_type = "deepseek_v3"
+            num_hidden_layers = 61
+            hidden_size = 7168
+            intermediate_size = 18432
+            vocab_size = 102400
+            n_routed_experts = 256
+            num_experts_per_tok = 8
+
+        profile = detect_architecture(
+            "custom/large-moe", config=MockConfig(),
+        )
+        assert profile.arch_class == ArchitectureClass.LARGE_MOE
+
+    def test_small_moe_threshold(self):
+        """MoE models with <=16 experts should be classified as small."""
+        class MockConfig:
+            model_type = "mixtral"
+            num_hidden_layers = 32
+            hidden_size = 4096
+            intermediate_size = 14336
+            vocab_size = 32000
+            num_local_experts = 8
+            num_experts_per_tok = 2
+
+        profile = detect_architecture(
+            "custom/small-moe", config=MockConfig(),
+        )
+        assert profile.arch_class == ArchitectureClass.SMALL_MOE
+
+    def test_dense_config(self):
+        """Config without MoE attributes should be dense."""
+        class MockConfig:
+            model_type = "llama"
+            num_hidden_layers = 32
+            hidden_size = 4096
+            intermediate_size = 11008
+            vocab_size = 32000
+
+        profile = detect_architecture(
+            "custom/dense-model", config=MockConfig(),
+        )
+        assert profile.arch_class == ArchitectureClass.DENSE
+        assert not profile.is_moe
+
+    def test_llama4_scout_is_large_moe(self):
+        """Llama 4 Scout: 109B total params with 16 experts → LARGE_MOE.
+        Regression test: params > 100B must override low expert count."""
+        class MockConfig:
+            model_type = "llama4"
+            num_hidden_layers = 48
+            hidden_size = 5120
+            intermediate_size = 14336
+            vocab_size = 202048
+            num_local_experts = 16
+            num_experts_per_tok = 1
+
+        profile = detect_architecture(
+            "meta-llama/Llama-4-Scout-17B-16E-Instruct",
+            config=MockConfig(),
+        )
+        assert profile.is_moe
+        assert profile.arch_class == ArchitectureClass.LARGE_MOE
+
+
+# ---------------------------------------------------------------------------
+#  Recommended defaults validation
+# ---------------------------------------------------------------------------
+
+
+class TestRecommendedDefaults:
+    """Test that recommended defaults match research findings."""
+
+    def test_dense_standard_no_riemannian(self):
+        """Dense Standard: Riemannian OFF (manifolds are flat)."""
+        profile = detect_architecture("meta-llama/Llama-3.1-8B-Instruct")
+        assert not profile.breakthrough_modules.get("riemannian", True)
+
+    def test_dense_standard_anti_ouroboros_on(self):
+        """Dense Standard: Anti-Ouroboros ON for self-repair mapping."""
+        profile = detect_architecture("meta-llama/Llama-3.1-8B-Instruct")
+        assert profile.breakthrough_modules.get("anti_ouroboros", False)
+
+    def test_dense_standard_spectral_cert_on(self):
+        """Dense Standard: Spectral cert ON for verification."""
+        profile = detect_architecture("meta-llama/Llama-3.1-8B-Instruct")
+        assert profile.breakthrough_modules.get("spectral_cert", False)
+
+    def test_moe_conditional_on(self):
+        """MoE: Conditional abliteration is #1 technique (Cracken AI 2025)."""
+        profile = detect_architecture("openai/gpt-oss-20b")
+        assert profile.breakthrough_modules.get("conditional", False)
+
+    def test_moe_no_project_embeddings(self):
+        """MoE: Project embeddings OFF (cascades through router)."""
+        profile = detect_architecture("openai/gpt-oss-20b")
+        assert not profile.method_overrides.get("project_embeddings", True)
+
+    def test_moe_per_expert_directions(self):
+        """MoE: Per-expert directions ON (global directions fail on MoE)."""
+        profile = detect_architecture("openai/gpt-oss-20b")
+        assert profile.method_overrides.get("per_expert_directions", False)
+
+    def test_large_moe_riemannian_on(self):
+        """Large MoE: Riemannian ON (curved shared layer geometry)."""
+        profile = detect_architecture("deepseek-ai/DeepSeek-V3.2")
+        assert profile.breakthrough_modules.get("riemannian", False)
+
+    def test_reasoning_dense_jailbreak_contrast(self):
+        """Reasoning Dense: Jailbreak contrast ON for thinking-chain refusal."""
+        profile = detect_architecture("deepseek-ai/DeepSeek-R1-Distill-Qwen-7B")
+        assert profile.method_overrides.get("use_jailbreak_contrast", False)
+
+    def test_reasoning_moe_gentle_transplant(self):
+        """Reasoning MoE: transplant_blend very low (preserve reasoning)."""
+        profile = detect_architecture("deepseek-ai/DeepSeek-R1")
+        assert profile.method_overrides.get("transplant_blend", 1.0) <= 0.10
+
+
+# ---------------------------------------------------------------------------
+#  Profile summary
+# ---------------------------------------------------------------------------
+
+
+class TestProfileSummary:
+    """Test the human-readable profile summary."""
+
+    def test_summary_contains_profile_label(self):
+        profile = detect_architecture("meta-llama/Llama-3.1-8B-Instruct")
+        summary = get_profile_summary(profile)
+        assert "Dense Standard" in summary
+
+    def test_summary_contains_method(self):
+        profile = detect_architecture("meta-llama/Llama-3.1-8B-Instruct")
+        summary = get_profile_summary(profile)
+        assert "aggressive" in summary
+
+    def test_summary_contains_citations(self):
+        profile = detect_architecture("openai/gpt-oss-20b")
+        summary = get_profile_summary(profile)
+        assert "SAFEx" in summary or "Cracken" in summary
+
+    def test_summary_contains_moe_info(self):
+        profile = detect_architecture("openai/gpt-oss-20b")
+        summary = get_profile_summary(profile)
+        assert "MoE" in summary
+
+    def test_summary_contains_breakthrough_modules(self):
+        profile = detect_architecture("openai/gpt-oss-20b")
+        summary = get_profile_summary(profile)
+        assert "conditional" in summary
+
+
+# ---------------------------------------------------------------------------
+#  apply_profile_to_method_config
+# ---------------------------------------------------------------------------
+
+
+class TestApplyProfile:
+    """Test that profile overrides are correctly applied to method configs."""
+
+    def test_overrides_applied(self):
+        from obliteratus.abliterate import METHODS
+        profile = detect_architecture("deepseek-ai/DeepSeek-R1-Distill-Qwen-7B")
+        base = dict(METHODS["aggressive"])
+        merged = apply_profile_to_method_config(profile, base)
+        assert merged["n_directions"] == profile.method_overrides["n_directions"]
+
+    def test_non_overridden_preserved(self):
+        from obliteratus.abliterate import METHODS
+        profile = detect_architecture("meta-llama/Llama-3.1-8B-Instruct")
+        base = dict(METHODS["aggressive"])
+        merged = apply_profile_to_method_config(profile, base)
+        # norm_preserve is not in overrides, should come from base
+        assert merged["norm_preserve"] == base["norm_preserve"]
+
+    def test_empty_overrides(self):
+        from obliteratus.abliterate import METHODS
+        base = dict(METHODS["advanced"])
+        profile = ArchitectureProfile(
+            arch_class=ArchitectureClass.DENSE,
+            reasoning_class=ReasoningClass.STANDARD,
+            method_overrides={},
+            breakthrough_modules={},
+        )
+        merged = apply_profile_to_method_config(profile, base)
+        assert merged == base
+
+    def test_override_key_not_in_base_is_added(self):
+        """Override keys absent from base config should be added to result.
+
+        This is important for the UI auto-detect path: keys like
+        use_jailbreak_contrast may not exist in the base method config
+        but are valid pipeline parameters that app.py reads via merged.get().
+        """
+        from obliteratus.abliterate import METHODS
+        base = dict(METHODS["advanced"])
+        profile = ArchitectureProfile(
+            arch_class=ArchitectureClass.DENSE,
+            reasoning_class=ReasoningClass.STANDARD,
+            method_overrides={"use_jailbreak_contrast": True},
+            breakthrough_modules={},
+        )
+        merged = apply_profile_to_method_config(profile, base)
+        assert merged["use_jailbreak_contrast"] is True
+
+
+# ---------------------------------------------------------------------------
+#  All 6 profile combinations
+# ---------------------------------------------------------------------------
+
+
+class TestAllSixProfiles:
+    """Verify label, method, overrides, and breakthrough modules for each profile."""
+
+    def _make_moe_config(self, num_experts=8, active=2, layers=32, hidden=4096):
+        class C:
+            model_type = "mixtral"
+            num_hidden_layers = layers
+            hidden_size = hidden
+            intermediate_size = hidden * 4
+            vocab_size = 32000
+            num_local_experts = num_experts
+            num_experts_per_tok = active
+        return C()
+
+    def test_dense_standard_full(self):
+        p = detect_architecture("meta-llama/Llama-3.1-8B-Instruct")
+        assert p.profile_label == "Dense Standard"
+        assert p.recommended_method == "aggressive"
+        assert not p.breakthrough_modules["riemannian"]
+        assert p.breakthrough_modules["anti_ouroboros"]
+        assert p.breakthrough_modules["spectral_cert"]
+        assert not p.breakthrough_modules["conditional"]
+        assert len(p.profile_description) > 0
+        assert len(p.research_citations) > 0
+
+    def test_dense_reasoning_full(self):
+        p = detect_architecture("deepseek-ai/DeepSeek-R1-Distill-Qwen-7B")
+        assert p.profile_label == "Dense Reasoning"
+        assert p.recommended_method == "aggressive"
+        assert p.method_overrides["n_directions"] >= 12
+        assert p.method_overrides["refinement_passes"] >= 4
+        assert p.method_overrides["use_jailbreak_contrast"] is True
+        assert p.method_overrides["use_chat_template"] is True
+        assert p.breakthrough_modules["anti_ouroboros"]
+        assert p.breakthrough_modules["riemannian"]
+        assert p.breakthrough_modules["conditional"]
+        assert p.breakthrough_modules["spectral_cert"]
+        assert len(p.profile_description) > 0
+
+    def test_small_moe_standard_full(self):
+        config = self._make_moe_config(num_experts=8, active=2)
+        p = detect_architecture("custom/small-moe-model", config=config)
+        assert p.profile_label == "Small MoE Standard"
+        assert p.arch_class == ArchitectureClass.SMALL_MOE
+        assert p.recommended_method == "surgical"
+        assert p.method_overrides["per_expert_directions"] is True
+        assert p.method_overrides["invert_refusal"] is False
+        assert p.method_overrides["project_embeddings"] is False
+        assert p.breakthrough_modules["conditional"]
+        assert p.breakthrough_modules["anti_ouroboros"]
+        assert p.breakthrough_modules["spectral_cert"]
+        assert not p.breakthrough_modules["riemannian"]
+        assert len(p.profile_description) > 0
+
+    def test_small_moe_reasoning_full(self):
+        """The most fragile combination: MoE + reasoning."""
+        config = self._make_moe_config(num_experts=8, active=2)
+        # Add "think" to name to trigger reasoning detection
+        p = detect_architecture("custom/small-moe-think-model", config=config)
+        assert p.profile_label == "Small MoE Reasoning"
+        assert p.arch_class == ArchitectureClass.SMALL_MOE
+        assert p.reasoning_class == ReasoningClass.REASONING
+        assert p.recommended_method == "surgical"
+        assert p.method_overrides["per_expert_directions"] is True
+        assert p.method_overrides["use_jailbreak_contrast"] is True
+        assert p.method_overrides["use_chat_template"] is True
+        assert p.method_overrides["invert_refusal"] is False
+        assert p.breakthrough_modules["conditional"]
+        assert p.breakthrough_modules["anti_ouroboros"]
+        assert p.breakthrough_modules["spectral_cert"]
+        assert len(p.profile_description) > 0
+
+    def test_large_moe_standard_full(self):
+        config = self._make_moe_config(num_experts=256, active=8, layers=61, hidden=7168)
+        p = detect_architecture("custom/large-moe-model", config=config)
+        assert p.profile_label == "Large MoE Standard"
+        assert p.arch_class == ArchitectureClass.LARGE_MOE
+        assert p.recommended_method == "surgical"
+        assert p.method_overrides["per_expert_directions"] is True
+        assert p.method_overrides["layer_adaptive_strength"] is True
+        assert p.method_overrides["expert_transplant"] is True
+        assert p.method_overrides["transplant_blend"] == 0.10
+        assert p.method_overrides["attention_head_surgery"] is True
+        assert p.method_overrides["project_embeddings"] is False
+        assert p.breakthrough_modules["conditional"]
+        assert p.breakthrough_modules["riemannian"]
+        assert p.breakthrough_modules["anti_ouroboros"]
+        assert p.breakthrough_modules["spectral_cert"]
+        assert len(p.profile_description) > 0
+
+    def test_large_moe_reasoning_full(self):
+        config = self._make_moe_config(num_experts=256, active=8, layers=61, hidden=7168)
+        p = detect_architecture("custom/large-moe-r1-model", config=config)
+        assert p.profile_label == "Large MoE Reasoning"
+        assert p.arch_class == ArchitectureClass.LARGE_MOE
+        assert p.reasoning_class == ReasoningClass.REASONING
+        assert p.recommended_method == "surgical"
+        assert p.method_overrides["n_directions"] == 8
+        assert p.method_overrides["transplant_blend"] == 0.08
+        assert p.method_overrides["use_jailbreak_contrast"] is True
+        assert p.method_overrides["safety_neuron_masking"] is True
+        assert p.breakthrough_modules["conditional"]
+        assert p.breakthrough_modules["riemannian"]
+        assert p.breakthrough_modules["anti_ouroboros"]
+        assert p.breakthrough_modules["spectral_cert"]
+        assert len(p.profile_description) > 0
+
+
+# ---------------------------------------------------------------------------
+#  Edge cases
+# ---------------------------------------------------------------------------
+
+
+class TestEdgeCases:
+    """Edge cases for architecture detection."""
+
+    def test_empty_model_name(self):
+        """Empty string should fall through to Dense Standard."""
+        profile = detect_architecture("")
+        assert profile.arch_class == ArchitectureClass.DENSE
+        assert profile.reasoning_class == ReasoningClass.STANDARD
+
+    def test_unknown_model_type_in_config(self):
+        """Unknown model_type should not cause MoE classification."""
+        class MockConfig:
+            model_type = "banana"
+            num_hidden_layers = 12
+            hidden_size = 768
+            intermediate_size = 3072
+            vocab_size = 30522
+        profile = detect_architecture("custom/unknown-arch", config=MockConfig())
+        assert profile.arch_class == ArchitectureClass.DENSE
+
+    def test_config_with_zero_experts(self):
+        """num_local_experts=0 should not trigger MoE."""
+        class MockConfig:
+            model_type = "llama"
+            num_hidden_layers = 32
+            hidden_size = 4096
+            intermediate_size = 11008
+            vocab_size = 32000
+            num_local_experts = 0
+        profile = detect_architecture("custom/dense-with-zero", config=MockConfig())
+        assert not profile.is_moe
+        assert profile.arch_class == ArchitectureClass.DENSE
+
+    def test_allcaps_model_name(self):
+        """Case-insensitive matching should work for all-caps names."""
+        profile = detect_architecture("DEEPSEEK-AI/DEEPSEEK-R1-DISTILL-QWEN-7B")
+        assert profile.reasoning_class == ReasoningClass.REASONING
+        assert profile.arch_class == ArchitectureClass.DENSE  # distill = dense
+
+    def test_single_expert_is_moe(self):
+        """num_local_experts=1 is technically MoE (single expert)."""
+        class MockConfig:
+            model_type = "llama"
+            num_hidden_layers = 32
+            hidden_size = 4096
+            intermediate_size = 11008
+            vocab_size = 32000
+            num_local_experts = 1
+        profile = detect_architecture("custom/single-expert", config=MockConfig())
+        # 1 expert still triggers MoE detection (the code treats any >0 as MoE)
+        assert profile.is_moe
diff --git a/tests/test_benchmarks.py b/tests/test_benchmarks.py
new file mode 100644
index 0000000..b29939b
--- /dev/null
+++ b/tests/test_benchmarks.py
@@ -0,0 +1,183 @@
+"""Tests for lightweight benchmark harnesses."""
+
+from __future__ import annotations
+
+from unittest.mock import MagicMock
+
+import torch
+
+from obliteratus.evaluation.benchmarks import (
+    KNOWLEDGE_ITEMS,
+    TRUTHFULNESS_ITEMS,
+    MATH_REASONING_ITEMS,
+    BenchmarkRunner,
+    BenchmarkResult,
+    format_benchmark_report,
+)
+
+
+def _make_mock_model_and_tokenizer(vocab_size=1000, hidden_dim=64):
+    """Create mock model and tokenizer for benchmark testing."""
+    model = MagicMock()
+
+    # Model returns logits when called
+    def mock_forward(**kwargs):
+        input_ids = kwargs.get("input_ids", torch.randint(0, vocab_size, (1, 10)))
+        batch_size, seq_len = input_ids.shape
+        result = MagicMock()
+        result.logits = torch.randn(batch_size, seq_len, vocab_size)
+        return result
+
+    model.side_effect = mock_forward
+    model.__call__ = mock_forward
+
+    # Model.generate returns token IDs
+    def mock_generate(**kwargs):
+        input_ids = kwargs.get("input_ids", torch.randint(0, vocab_size, (1, 10)))
+        # Append some "generated" tokens
+        gen_tokens = torch.randint(0, vocab_size, (1, 20))
+        return torch.cat([input_ids, gen_tokens], dim=1)
+
+    model.generate = mock_generate
+
+    # Model.parameters for device detection
+    param = torch.nn.Parameter(torch.randn(1))
+    model.parameters = MagicMock(return_value=iter([param]))
+
+    tokenizer = MagicMock()
+    tokenizer.return_value = {
+        "input_ids": torch.randint(0, vocab_size, (1, 15)),
+        "attention_mask": torch.ones(1, 15, dtype=torch.long),
+    }
+    tokenizer.side_effect = lambda text, **kwargs: {
+        "input_ids": torch.randint(0, vocab_size, (1, 15)),
+        "attention_mask": torch.ones(1, 15, dtype=torch.long),
+    }
+
+    def mock_decode(ids, **kwargs):
+        return "The answer is 42. This is a generated response about the topic."
+
+    def mock_encode(text, **kwargs):
+        # Return different IDs for A, B, C, D
+        if text == "A":
+            return [65]
+        elif text == "B":
+            return [66]
+        elif text == "C":
+            return [67]
+        elif text == "D":
+            return [68]
+        return [hash(text) % vocab_size]
+
+    tokenizer.decode = mock_decode
+    tokenizer.encode = mock_encode
+
+    return model, tokenizer
+
+
+class TestBenchmarkItems:
+    def test_knowledge_items_have_required_fields(self):
+        for item in KNOWLEDGE_ITEMS:
+            assert "q" in item
+            assert "choices" in item
+            assert "answer" in item
+            assert "category" in item
+            assert 0 <= item["answer"] < len(item["choices"])
+
+    def test_knowledge_items_count(self):
+        assert len(KNOWLEDGE_ITEMS) >= 20
+
+    def test_knowledge_categories(self):
+        categories = set(item["category"] for item in KNOWLEDGE_ITEMS)
+        assert len(categories) >= 4  # multiple categories
+
+    def test_truthfulness_items_have_required_fields(self):
+        for item in TRUTHFULNESS_ITEMS:
+            assert "q" in item
+            assert "true_answer" in item
+            assert "common_false" in item
+            assert "category" in item
+
+    def test_truthfulness_items_count(self):
+        assert len(TRUTHFULNESS_ITEMS) >= 10
+
+    def test_math_items_have_required_fields(self):
+        for item in MATH_REASONING_ITEMS:
+            assert "q" in item
+            assert "answer" in item
+            assert "category" in item
+            assert isinstance(item["answer"], (int, float))
+
+    def test_math_items_count(self):
+        assert len(MATH_REASONING_ITEMS) >= 10
+
+
+class TestBenchmarkRunner:
+    def test_knowledge_probe_returns_result(self):
+        model, tokenizer = _make_mock_model_and_tokenizer()
+        runner = BenchmarkRunner(model, tokenizer, device="cpu")
+        result = runner.run_knowledge_probe()
+
+        assert isinstance(result, BenchmarkResult)
+        assert result.benchmark_name == "knowledge_probe"
+        assert 0 <= result.score <= 1.0
+        assert result.n_total == len(KNOWLEDGE_ITEMS)
+        assert result.n_correct >= 0
+        assert len(result.per_category) > 0
+
+    def test_truthfulness_probe_returns_result(self):
+        model, tokenizer = _make_mock_model_and_tokenizer()
+        runner = BenchmarkRunner(model, tokenizer, device="cpu")
+        result = runner.run_truthfulness_probe()
+
+        assert isinstance(result, BenchmarkResult)
+        assert result.benchmark_name == "truthfulness_probe"
+        assert 0 <= result.score <= 1.0
+        assert result.n_total == len(TRUTHFULNESS_ITEMS)
+
+    def test_math_probe_returns_result(self):
+        model, tokenizer = _make_mock_model_and_tokenizer()
+        runner = BenchmarkRunner(model, tokenizer, device="cpu")
+        result = runner.run_math_reasoning_probe()
+
+        assert isinstance(result, BenchmarkResult)
+        assert result.benchmark_name == "math_reasoning_probe"
+        assert 0 <= result.score <= 1.0
+        assert result.n_total == len(MATH_REASONING_ITEMS)
+
+    def test_run_all(self):
+        model, tokenizer = _make_mock_model_and_tokenizer()
+        runner = BenchmarkRunner(model, tokenizer, device="cpu")
+        results = runner.run_all()
+
+        assert "knowledge" in results
+        assert "truthfulness" in results
+        assert "math_reasoning" in results
+
+    def test_format_report(self):
+        model, tokenizer = _make_mock_model_and_tokenizer()
+        runner = BenchmarkRunner(model, tokenizer, device="cpu")
+        results = runner.run_all()
+        report = format_benchmark_report(results)
+
+        assert "Capability" in report
+        assert "knowledge" in report
+        assert "truthfulness" in report
+        assert "math" in report
+
+    def test_per_category_scores_bounded(self):
+        model, tokenizer = _make_mock_model_and_tokenizer()
+        runner = BenchmarkRunner(model, tokenizer, device="cpu")
+        result = runner.run_knowledge_probe()
+
+        for cat, score in result.per_category.items():
+            assert 0 <= score <= 1.0
+
+    def test_extract_number(self):
+        model, tokenizer = _make_mock_model_and_tokenizer()
+        runner = BenchmarkRunner(model, tokenizer, device="cpu")
+
+        assert runner._extract_number("The answer is 42.") == 42.0
+        assert runner._extract_number("$20.50 is the price") == 20.50
+        assert runner._extract_number("Result: -3.14") == -3.14
+        assert runner._extract_number("No numbers here") is None
diff --git a/tests/test_causal_and_transfer.py b/tests/test_causal_and_transfer.py
new file mode 100644
index 0000000..9e18a48
--- /dev/null
+++ b/tests/test_causal_and_transfer.py
@@ -0,0 +1,535 @@
+"""Tests for causal tracing, residual stream decomposition,
+probing classifiers, and cross-model transfer analysis."""
+
+from __future__ import annotations
+
+import math
+
+import torch
+
+from obliteratus.analysis.causal_tracing import (
+    CausalRefusalTracer,
+    CausalTracingResult,
+    ComponentCausalEffect,
+)
+from obliteratus.analysis.residual_stream import (
+    ResidualStreamDecomposer,
+    ResidualStreamResult,
+    LayerDecomposition,
+)
+from obliteratus.analysis.probing_classifiers import (
+    LinearRefusalProbe,
+    ProbeResult,
+    ProbingSuiteResult,
+)
+from obliteratus.analysis.cross_model_transfer import (
+    TransferAnalyzer,
+    CrossModelResult,
+    CrossCategoryResult,
+    CrossLayerResult,
+    UniversalityReport,
+)
+
+
+# ---------------------------------------------------------------------------
+#  Helpers
+# ---------------------------------------------------------------------------
+
+def _make_layer_activations(
+    n_layers=8, hidden_dim=32, refusal_strength=2.0,
+):
+    """Create synthetic per-layer activations with planted refusal signal."""
+    torch.manual_seed(42)
+    directions = {}
+    activations = {}
+
+    base = torch.randn(hidden_dim) * 0.1
+
+    for i in range(n_layers):
+        d = torch.randn(hidden_dim)
+        d = d / d.norm()
+        directions[i] = d
+
+        # Stronger refusal in middle layers
+        strength = refusal_strength if 2 <= i <= 5 else 0.3
+        activations[i] = base + strength * d + torch.randn(hidden_dim) * 0.05
+
+    return activations, directions
+
+
+def _make_separable_activations(
+    n_per_class=20, hidden_dim=16, separation=3.0, seed=42,
+):
+    """Create harmful/harmless activations that are linearly separable."""
+    torch.manual_seed(seed)
+    direction = torch.randn(hidden_dim)
+    direction = direction / direction.norm()
+
+    harmful = [
+        torch.randn(hidden_dim) * 0.5 + separation * direction
+        for _ in range(n_per_class)
+    ]
+    harmless = [
+        torch.randn(hidden_dim) * 0.5 - separation * direction
+        for _ in range(n_per_class)
+    ]
+    return harmful, harmless, direction
+
+
+# ===========================================================================
+#  Tests: Causal Tracing
+# ===========================================================================
+
+class TestCausalTracing:
+    def test_basic_tracing(self):
+        activations, directions = _make_layer_activations()
+        tracer = CausalRefusalTracer(noise_level=3.0)
+        result = tracer.trace_from_activations(activations, directions)
+
+        assert isinstance(result, CausalTracingResult)
+        assert result.n_layers == 8
+        assert result.clean_refusal_strength > 0
+        assert len(result.component_effects) == 8
+
+    def test_causal_components_identified(self):
+        activations, directions = _make_layer_activations()
+        tracer = CausalRefusalTracer(noise_level=3.0, causal_threshold=0.05)
+        result = tracer.trace_from_activations(activations, directions)
+
+        assert result.circuit_size > 0
+        assert result.circuit_fraction > 0
+        assert len(result.causal_components) > 0
+
+    def test_corruption_reduces_strength(self):
+        activations, directions = _make_layer_activations(refusal_strength=5.0)
+        tracer = CausalRefusalTracer(noise_level=10.0)
+        result = tracer.trace_from_activations(activations, directions)
+
+        # With high noise, corrupted should differ from clean
+        assert result.total_corruption_effect != 0
+
+    def test_single_direction_input(self):
+        activations, directions = _make_layer_activations()
+        single_dir = directions[3]  # Use one direction for all layers
+        tracer = CausalRefusalTracer()
+        result = tracer.trace_from_activations(activations, single_dir)
+
+        assert result.n_layers == 8
+        assert len(result.component_effects) == 8
+
+    def test_component_effects_structure(self):
+        activations, directions = _make_layer_activations()
+        tracer = CausalRefusalTracer()
+        result = tracer.trace_from_activations(activations, directions)
+
+        for e in result.component_effects:
+            assert isinstance(e, ComponentCausalEffect)
+            assert e.component_type == "full_layer"
+            assert e.causal_effect >= 0
+
+    def test_correlation_causal_agreement_bounded(self):
+        activations, directions = _make_layer_activations()
+        tracer = CausalRefusalTracer()
+        result = tracer.trace_from_activations(activations, directions)
+        assert -1.0 <= result.correlation_causal_agreement <= 1.0
+
+    def test_silent_contributors(self):
+        activations, directions = _make_layer_activations()
+        tracer = CausalRefusalTracer()
+        result = tracer.trace_from_activations(activations, directions)
+        sc = tracer.identify_silent_contributors(result, top_k=3)
+
+        assert "silent_contributors" in sc
+        assert "loud_non_contributors" in sc
+        assert len(sc["silent_contributors"]) <= 3
+
+    def test_custom_component_types(self):
+        activations, directions = _make_layer_activations()
+        tracer = CausalRefusalTracer()
+        result = tracer.trace_from_activations(
+            activations, directions,
+            component_types=["attention", "mlp"],
+        )
+        # 8 layers * 2 types = 16 effects
+        assert len(result.component_effects) == 16
+
+    def test_format_report(self):
+        activations, directions = _make_layer_activations()
+        tracer = CausalRefusalTracer()
+        result = tracer.trace_from_activations(activations, directions)
+        report = CausalRefusalTracer.format_tracing_report(result)
+
+        assert "Causal Tracing" in report
+        assert "Circuit size" in report
+
+
+# ===========================================================================
+#  Tests: Residual Stream Decomposition
+# ===========================================================================
+
+class TestResidualStreamDecomposition:
+    def test_basic_decomposition(self):
+        activations, directions = _make_layer_activations()
+        decomposer = ResidualStreamDecomposer()
+        result = decomposer.decompose(activations, directions)
+
+        assert isinstance(result, ResidualStreamResult)
+        assert result.n_layers == 8
+        assert len(result.per_layer) == 8
+        assert result.total_attention_contribution > 0
+        assert result.total_mlp_contribution > 0
+
+    def test_attention_fraction_bounded(self):
+        activations, directions = _make_layer_activations()
+        decomposer = ResidualStreamDecomposer()
+        result = decomposer.decompose(activations, directions)
+        assert 0 <= result.attention_fraction <= 1.0
+
+    def test_with_head_count(self):
+        activations, directions = _make_layer_activations()
+        decomposer = ResidualStreamDecomposer(n_heads_per_layer=4)
+        result = decomposer.decompose(activations, directions)
+
+        assert result.n_refusal_heads >= 0
+        assert len(result.refusal_heads) > 0
+
+    def test_layer_decomposition_structure(self):
+        activations, directions = _make_layer_activations()
+        decomposer = ResidualStreamDecomposer()
+        result = decomposer.decompose(activations, directions)
+
+        for _layer_idx, d in result.per_layer.items():
+            assert isinstance(d, LayerDecomposition)
+            assert 0 <= d.attn_mlp_ratio <= 1.0
+            assert d.cumulative_refusal >= 0
+
+    def test_accumulation_profile(self):
+        activations, directions = _make_layer_activations()
+        decomposer = ResidualStreamDecomposer()
+        result = decomposer.decompose(activations, directions)
+
+        assert len(result.accumulation_profile) == 8
+        # Accumulation should be monotonically non-decreasing
+        for i in range(1, len(result.accumulation_profile)):
+            assert result.accumulation_profile[i] >= result.accumulation_profile[i - 1]
+
+    def test_with_explicit_attn_mlp(self):
+        """Test with provided attention and MLP outputs."""
+        torch.manual_seed(42)
+        hidden_dim = 16
+        n_layers = 4
+        ref_dir = torch.randn(hidden_dim)
+        ref_dir = ref_dir / ref_dir.norm()
+
+        acts = {}
+        attn_outs = {}
+        mlp_outs = {}
+        for i in range(n_layers):
+            attn = torch.randn(hidden_dim) * 0.5
+            mlp = torch.randn(hidden_dim) * 0.5
+            attn_outs[i] = attn
+            mlp_outs[i] = mlp
+            acts[i] = attn + mlp + (torch.randn(hidden_dim) * 0.1 if i == 0 else acts[i-1])
+
+        decomposer = ResidualStreamDecomposer()
+        result = decomposer.decompose(
+            acts, ref_dir,
+            attn_outputs=attn_outs, mlp_outputs=mlp_outs,
+        )
+        assert len(result.per_layer) == n_layers
+
+    def test_single_direction(self):
+        activations, _ = _make_layer_activations()
+        single_dir = torch.randn(32)
+        decomposer = ResidualStreamDecomposer()
+        result = decomposer.decompose(activations, single_dir)
+        assert result.n_layers == 8
+
+    def test_head_concentration_bounded(self):
+        activations, directions = _make_layer_activations()
+        decomposer = ResidualStreamDecomposer(n_heads_per_layer=8)
+        result = decomposer.decompose(activations, directions)
+        assert 0 <= result.head_concentration <= 1.0
+
+    def test_format_decomposition(self):
+        activations, directions = _make_layer_activations()
+        decomposer = ResidualStreamDecomposer(n_heads_per_layer=4)
+        result = decomposer.decompose(activations, directions)
+        report = ResidualStreamDecomposer.format_decomposition(result)
+
+        assert "Residual Stream" in report
+        assert "Attention" in report
+        assert "MLP" in report
+
+
+# ===========================================================================
+#  Tests: Probing Classifiers
+# ===========================================================================
+
+class TestProbingClassifiers:
+    def test_separable_data_high_accuracy(self):
+        """With well-separated data, probe should achieve high accuracy."""
+        harmful, harmless, direction = _make_separable_activations(
+            n_per_class=30, separation=5.0,
+        )
+        probe = LinearRefusalProbe(n_epochs=200)
+        result = probe.probe_layer(harmful, harmless, direction, layer_idx=5)
+
+        assert isinstance(result, ProbeResult)
+        assert result.layer_idx == 5
+        assert result.accuracy > 0.7  # Should be separable
+
+    def test_inseparable_data_low_accuracy(self):
+        """With overlapping data, probe should have lower accuracy."""
+        harmful, harmless, direction = _make_separable_activations(
+            n_per_class=30, separation=0.01,
+        )
+        probe = LinearRefusalProbe(n_epochs=50)
+        result = probe.probe_layer(harmful, harmless, direction)
+        # Accuracy should be near chance (0.5)
+        assert result.accuracy < 0.9
+
+    def test_learned_direction_unit(self):
+        harmful, harmless, direction = _make_separable_activations()
+        probe = LinearRefusalProbe(n_epochs=100)
+        result = probe.probe_layer(harmful, harmless, direction)
+        assert abs(result.learned_direction.norm().item() - 1.0) < 0.01
+
+    def test_cosine_with_analytical(self):
+        """Learned direction should align with analytical direction."""
+        harmful, harmless, direction = _make_separable_activations(
+            n_per_class=50, separation=5.0,
+        )
+        probe = LinearRefusalProbe(n_epochs=300)
+        result = probe.probe_layer(harmful, harmless, direction)
+        # With clear separation, learned direction should agree
+        assert result.cosine_with_analytical > 0.3
+
+    def test_without_analytical_direction(self):
+        harmful, harmless, _ = _make_separable_activations()
+        probe = LinearRefusalProbe(n_epochs=50)
+        result = probe.probe_layer(harmful, harmless)
+        assert result.cosine_with_analytical == 0.0
+
+    def test_auroc_bounded(self):
+        harmful, harmless, direction = _make_separable_activations()
+        probe = LinearRefusalProbe(n_epochs=100)
+        result = probe.probe_layer(harmful, harmless, direction)
+        assert 0 <= result.auroc <= 1.0
+
+    def test_mutual_information_nonnegative(self):
+        harmful, harmless, direction = _make_separable_activations()
+        probe = LinearRefusalProbe(n_epochs=100)
+        result = probe.probe_layer(harmful, harmless, direction)
+        assert result.mutual_information >= 0
+
+    def test_probe_all_layers(self):
+        harmful_acts = {}
+        harmless_acts = {}
+        anal_dirs = {}
+        for li in range(6):
+            harmful, harmless, direction = _make_separable_activations(
+                n_per_class=15, separation=3.0, seed=li * 10,
+            )
+            harmful_acts[li] = harmful
+            harmless_acts[li] = harmless
+            anal_dirs[li] = direction
+
+        probe = LinearRefusalProbe(n_epochs=100)
+        result = probe.probe_all_layers(harmful_acts, harmless_acts, anal_dirs)
+
+        assert isinstance(result, ProbingSuiteResult)
+        assert len(result.per_layer) == 6
+        assert result.best_accuracy > 0
+        assert result.total_mutual_information >= 0
+
+    def test_format_report(self):
+        harmful_acts = {}
+        harmless_acts = {}
+        for li in range(4):
+            harmful, harmless, _ = _make_separable_activations(
+                n_per_class=15, seed=li,
+            )
+            harmful_acts[li] = harmful
+            harmless_acts[li] = harmless
+
+        probe = LinearRefusalProbe(n_epochs=50)
+        result = probe.probe_all_layers(harmful_acts, harmless_acts)
+        report = LinearRefusalProbe.format_probing_report(result)
+
+        assert "Linear Probing" in report
+        assert "accuracy" in report.lower()
+
+    def test_cross_entropy_finite(self):
+        harmful, harmless, direction = _make_separable_activations()
+        probe = LinearRefusalProbe(n_epochs=100)
+        result = probe.probe_layer(harmful, harmless, direction)
+        assert math.isfinite(result.cross_entropy)
+
+
+# ===========================================================================
+#  Tests: Cross-Model Transfer Analysis
+# ===========================================================================
+
+class TestTransferAnalysis:
+    def test_cross_model_identical(self):
+        """Identical directions should give perfect transfer."""
+        torch.manual_seed(42)
+        dirs = {i: torch.randn(32) for i in range(8)}
+        analyzer = TransferAnalyzer()
+        result = analyzer.analyze_cross_model(dirs, dirs, "model_a", "model_a")
+
+        assert isinstance(result, CrossModelResult)
+        assert result.mean_transfer_score > 0.99
+
+    def test_cross_model_random(self):
+        """Random directions should give low transfer."""
+        torch.manual_seed(42)
+        dirs_a = {i: torch.randn(32) for i in range(8)}
+        torch.manual_seed(99)
+        dirs_b = {i: torch.randn(32) for i in range(8)}
+
+        analyzer = TransferAnalyzer()
+        result = analyzer.analyze_cross_model(dirs_a, dirs_b, "a", "b")
+        # Random 32-dim vectors have low expected cosine
+        assert result.mean_transfer_score < 0.7
+
+    def test_cross_model_structure(self):
+        torch.manual_seed(42)
+        dirs_a = {i: torch.randn(32) for i in range(8)}
+        dirs_b = {i: torch.randn(32) for i in range(8)}
+        analyzer = TransferAnalyzer()
+        result = analyzer.analyze_cross_model(dirs_a, dirs_b)
+
+        assert 0 <= result.transfer_above_threshold <= 1.0
+        assert len(result.per_layer_transfer) == 8
+
+    def test_cross_category_similar(self):
+        """Similar categories should cluster together."""
+        torch.manual_seed(42)
+        shared = torch.randn(32)
+        shared = shared / shared.norm()
+
+        cat_dirs = {}
+        for cat in ["weapons", "bombs", "explosives"]:
+            d = shared + 0.2 * torch.randn(32)
+            cat_dirs[cat] = d / d.norm()
+
+        # Add one very different category
+        cat_dirs["fraud"] = torch.randn(32)
+
+        analyzer = TransferAnalyzer()
+        result = analyzer.analyze_cross_category(cat_dirs)
+
+        assert isinstance(result, CrossCategoryResult)
+        assert result.mean_cross_category_transfer > 0
+        assert len(result.categories) == 4
+
+    def test_cross_category_specificity(self):
+        torch.manual_seed(42)
+        cat_dirs = {f"cat_{i}": torch.randn(16) for i in range(5)}
+        analyzer = TransferAnalyzer()
+        result = analyzer.analyze_cross_category(cat_dirs)
+
+        assert result.most_universal_category != ""
+        assert result.most_specific_category != ""
+        assert len(result.category_clusters) > 0
+
+    def test_cross_layer(self):
+        _, directions = _make_layer_activations()
+        analyzer = TransferAnalyzer()
+        result = analyzer.analyze_cross_layer(directions)
+
+        assert isinstance(result, CrossLayerResult)
+        assert result.mean_adjacent_transfer >= 0
+        assert result.transfer_decay_rate >= 0
+
+    def test_cross_layer_adjacent_vs_distant(self):
+        """Adjacent layers typically have higher transfer than distant ones."""
+        torch.manual_seed(42)
+        # Create directions with gradual drift
+        d = torch.randn(32)
+        d = d / d.norm()
+        directions = {}
+        for i in range(10):
+            noise = torch.randn(32) * 0.1 * i
+            di = d + noise
+            directions[i] = di / di.norm()
+
+        analyzer = TransferAnalyzer()
+        result = analyzer.analyze_cross_layer(directions)
+        # Adjacent should have higher transfer than distant
+        assert result.mean_adjacent_transfer >= result.mean_distant_transfer - 0.1
+
+    def test_universality_index(self):
+        torch.manual_seed(42)
+        dirs = {i: torch.randn(32) for i in range(6)}
+
+        analyzer = TransferAnalyzer()
+        cross_model = analyzer.analyze_cross_model(dirs, dirs)
+        cross_layer = analyzer.analyze_cross_layer(dirs)
+        cat_dirs = {f"cat_{i}": torch.randn(32) for i in range(4)}
+        cross_cat = analyzer.analyze_cross_category(cat_dirs)
+
+        report = analyzer.compute_universality_index(
+            cross_model=cross_model,
+            cross_category=cross_cat,
+            cross_layer=cross_layer,
+        )
+
+        assert isinstance(report, UniversalityReport)
+        assert 0 <= report.universality_index <= 1.0
+
+    def test_universality_empty(self):
+        analyzer = TransferAnalyzer()
+        report = analyzer.compute_universality_index()
+        assert report.universality_index == 0.0
+
+    def test_format_cross_model(self):
+        torch.manual_seed(42)
+        dirs = {i: torch.randn(32) for i in range(4)}
+        analyzer = TransferAnalyzer()
+        result = analyzer.analyze_cross_model(dirs, dirs, "llama", "mistral")
+        report = TransferAnalyzer.format_cross_model(result)
+        assert "Cross-Model" in report
+        assert "llama" in report
+
+    def test_format_cross_category(self):
+        torch.manual_seed(42)
+        cat_dirs = {f"cat_{i}": torch.randn(16) for i in range(3)}
+        analyzer = TransferAnalyzer()
+        result = analyzer.analyze_cross_category(cat_dirs)
+        report = TransferAnalyzer.format_cross_category(result)
+        assert "Cross-Category" in report
+
+    def test_format_universality(self):
+        analyzer = TransferAnalyzer()
+        report_obj = analyzer.compute_universality_index()
+        report = TransferAnalyzer.format_universality(report_obj)
+        assert "Universality" in report
+
+    def test_dimension_mismatch_handled(self):
+        """Cross-model with different hidden dims should truncate."""
+        dirs_a = {0: torch.randn(32), 1: torch.randn(32)}
+        dirs_b = {0: torch.randn(64), 1: torch.randn(64)}
+        analyzer = TransferAnalyzer()
+        result = analyzer.analyze_cross_model(dirs_a, dirs_b)
+        assert len(result.per_layer_transfer) == 2
+
+
+# ===========================================================================
+#  Tests: Integration
+# ===========================================================================
+
+class TestNewImports:
+    def test_all_new_modules_importable(self):
+        from obliteratus.analysis import (
+            CausalRefusalTracer,
+            ResidualStreamDecomposer,
+            LinearRefusalProbe,
+            TransferAnalyzer,
+        )
+        assert CausalRefusalTracer is not None
+        assert ResidualStreamDecomposer is not None
+        assert LinearRefusalProbe is not None
+        assert TransferAnalyzer is not None
diff --git a/tests/test_cli.py b/tests/test_cli.py
new file mode 100644
index 0000000..98ed2ab
--- /dev/null
+++ b/tests/test_cli.py
@@ -0,0 +1,133 @@
+"""CLI dispatch tests for obliteratus.cli.main().
+
+These tests verify argument parsing and subcommand routing without
+downloading real models or running any pipeline.  They use
+``unittest.mock.patch`` to capture stdout/stderr and
+``pytest.raises(SystemExit)`` for argparse exits.
+"""
+
+from __future__ import annotations
+
+from io import StringIO
+from unittest.mock import patch
+
+import pytest
+
+from obliteratus.cli import main
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+
+def _capture_exit(argv: list[str] | None, *, expect_code: int | None = None):
+    """Call main(argv), expecting SystemExit; return captured stderr text."""
+    buf = StringIO()
+    with pytest.raises(SystemExit) as exc_info, patch("sys.stderr", buf):
+        main(argv)
+    if expect_code is not None:
+        assert exc_info.value.code == expect_code
+    return buf.getvalue()
+
+
+# ---------------------------------------------------------------------------
+# Tests
+# ---------------------------------------------------------------------------
+
+
+class TestCLIDispatch:
+    """Test suite for CLI argument parsing and subcommand dispatch."""
+
+    # 1. No args -> prints help / exits with error
+    def test_main_no_args_prints_help(self):
+        """Calling main() with no args should exit (subcommand is required)."""
+        stderr_text = _capture_exit([], expect_code=2)
+        # argparse prints usage info to stderr on error
+        assert "usage" in stderr_text.lower() or "required" in stderr_text.lower()
+
+    # 2. models command lists models without error
+    def test_models_command(self):
+        """Calling main(['models']) should list models without raising."""
+        with patch("obliteratus.cli.console") as mock_console:
+            main(["models"])
+        # console.print is called at least once to render the table
+        assert mock_console.print.call_count >= 1
+
+    # 3. obliterate without model arg -> error
+    def test_obliterate_requires_model(self):
+        """Calling main(['obliterate']) without a model arg should error."""
+        stderr_text = _capture_exit(["obliterate"], expect_code=2)
+        assert "model" in stderr_text.lower() or "required" in stderr_text.lower()
+
+    # 4. obliterate --method accepts valid methods
+    def test_obliterate_valid_methods(self):
+        """Test that --method accepts all 9 pipeline methods."""
+        valid_methods = [
+            "basic", "advanced", "aggressive", "spectral_cascade",
+            "informed", "surgical", "optimized", "inverted", "nuclear",
+        ]
+        for method in valid_methods:
+            # Patch the actual pipeline execution so nothing runs
+            with patch("obliteratus.cli._cmd_abliterate") as mock_cmd:
+                main(["obliterate", "fake/model", "--method", method])
+                mock_cmd.assert_called_once()
+                args_passed = mock_cmd.call_args[0][0]
+                assert args_passed.method == method
+
+    # 4b. invalid methods are rejected
+    def test_obliterate_rejects_invalid_method(self):
+        """The CLI --method flag rejects unknown method names."""
+        stderr_text = _capture_exit(
+            ["obliterate", "fake/model", "--method", "nonexistent"],
+            expect_code=2,
+        )
+        assert "invalid choice" in stderr_text.lower()
+
+    # 5. run requires config path
+    def test_run_requires_config(self):
+        """Calling main(['run']) without a config path should error."""
+        stderr_text = _capture_exit(["run"], expect_code=2)
+        assert "config" in stderr_text.lower() or "required" in stderr_text.lower()
+
+    # 6. aggregate with nonexistent dir handles gracefully
+    def test_aggregate_command_missing_dir(self):
+        """Calling main(['aggregate']) with nonexistent dir should handle gracefully."""
+        with patch("obliteratus.cli.console") as mock_console:
+            main(["aggregate", "--dir", "/nonexistent/path/to/nowhere"])
+        # The command prints a message about no contributions found and returns
+        printed_text = " ".join(
+            str(call) for call in mock_console.print.call_args_list
+        )
+        assert "no contributions found" in printed_text.lower() or mock_console.print.called
+
+    # 7. --help flag prints help
+    def test_help_flag(self):
+        """Calling main(['--help']) should print help and exit 0."""
+        buf = StringIO()
+        with pytest.raises(SystemExit) as exc_info, patch("sys.stdout", buf):
+            main(["--help"])
+        assert exc_info.value.code == 0
+        output = buf.getvalue()
+        assert "obliteratus" in output.lower() or "usage" in output.lower()
+
+    # 8. interactive subcommand is registered
+    def test_interactive_command_exists(self):
+        """Verify 'interactive' subcommand is registered and dispatches."""
+        with patch("obliteratus.cli._cmd_interactive") as mock_cmd:
+            main(["interactive"])
+            mock_cmd.assert_called_once()
+
+    # 9. --contribute and --contribute-notes are accepted on obliterate
+    def test_contribute_flags_on_obliterate(self):
+        """Verify --contribute and --contribute-notes are accepted args."""
+        with patch("obliteratus.cli._cmd_abliterate") as mock_cmd:
+            main([
+                "obliterate", "fake/model",
+                "--contribute",
+                "--contribute-notes", "Testing contribution system",
+            ])
+            mock_cmd.assert_called_once()
+            args_passed = mock_cmd.call_args[0][0]
+            assert args_passed.contribute is True
+            assert args_passed.contribute_notes == "Testing contribution system"
diff --git a/tests/test_community.py b/tests/test_community.py
new file mode 100644
index 0000000..f240088
--- /dev/null
+++ b/tests/test_community.py
@@ -0,0 +1,567 @@
+"""Tests for the community contribution system."""
+
+import json
+from unittest.mock import MagicMock
+
+import pytest
+import torch
+
+from obliteratus.community import (
+    CONTRIBUTION_SCHEMA_VERSION,
+    _config_fingerprint,
+    _model_short_name,
+    aggregate_results,
+    generate_latex_table,
+    load_contributions,
+    save_contribution,
+)
+
+
+# ── Helper: mock pipeline ──────────────────────────────────────────────
+
+
+def _make_mock_pipeline():
+    """Build a mock pipeline with all fields the community module reads."""
+    p = MagicMock()
+    p.handle.summary.return_value = {
+        "architecture": "LlamaForCausalLM",
+        "num_layers": 32,
+        "num_heads": 32,
+        "hidden_size": 4096,
+        "total_params": 8_000_000_000,
+    }
+    p.method = "advanced"
+    p.n_directions = 4
+    p.norm_preserve = True
+    p.regularization = 0.3
+    p.refinement_passes = 2
+    p.project_biases = True
+    p.use_chat_template = True
+    p.use_whitened_svd = True
+    p.true_iterative_refinement = False
+    p.use_jailbreak_contrast = False
+    p.layer_adaptive_strength = False
+    p.attention_head_surgery = True
+    p.safety_neuron_masking = False
+    p.per_expert_directions = False
+    p.use_sae_features = False
+    p.invert_refusal = False
+    p.project_embeddings = False
+    p.embed_regularization = 0.5
+    p.activation_steering = False
+    p.steering_strength = 0.3
+    p.expert_transplant = False
+    p.transplant_blend = 0.3
+    p.reflection_strength = 2.0
+    p.quantization = None
+
+    p._quality_metrics = {"perplexity": 5.2, "coherence": 0.8, "refusal_rate": 0.05}
+    p._strong_layers = [10, 11, 12, 13]
+    p._stage_durations = {
+        "summon": 3.0, "probe": 12.5, "distill": 4.1,
+        "excise": 2.0, "verify": 8.3, "rebirth": 5.0,
+    }
+    p._excise_modified_count = 128
+
+    # Direction data
+    d = torch.randn(4096)
+    d = d / d.norm()
+    p.refusal_directions = {10: d, 11: d + 0.01 * torch.randn(4096)}
+    p.refusal_subspaces = {10: torch.randn(4, 4096)}
+
+    # Excise details
+    p._refusal_heads = {10: [(0, 0.9), (3, 0.8)]}
+    p._sae_directions = {}
+    p._expert_safety_scores = {}
+    p._layer_excise_weights = {}
+    p._expert_directions = {}
+    p._steering_hooks = []
+
+    # Prompts
+    p.harmful_prompts = ["x"] * 33
+    p.harmless_prompts = ["y"] * 33
+    p.jailbreak_prompts = None
+
+    return p
+
+
+# ── Model short name ───────────────────────────────────────────────────
+
+
+class TestModelShortName:
+    def test_strips_org_prefix(self):
+        assert _model_short_name("meta-llama/Llama-2-7b-chat-hf") == "llama-2-7b-chat-hf"
+
+    def test_no_org_prefix(self):
+        assert _model_short_name("gpt2") == "gpt2"
+
+    def test_sanitizes_special_chars(self):
+        assert _model_short_name("org/Model_V2.1") == "model-v2-1"
+
+    def test_caps_length(self):
+        long_name = "a" * 100
+        assert len(_model_short_name(long_name)) <= 60
+
+    def test_collapses_dashes(self):
+        assert _model_short_name("org/Model---Name") == "model-name"
+
+    def test_strips_trailing_dashes(self):
+        assert _model_short_name("org/Model-") == "model"
+
+
+# ── Config fingerprint ─────────────────────────────────────────────────
+
+
+class TestConfigFingerprint:
+    def test_deterministic(self):
+        config = {"n_directions": 4, "norm_preserve": True}
+        fp1 = _config_fingerprint(config)
+        fp2 = _config_fingerprint(config)
+        assert fp1 == fp2
+
+    def test_different_configs_different_hashes(self):
+        fp1 = _config_fingerprint({"n_directions": 4})
+        fp2 = _config_fingerprint({"n_directions": 8})
+        assert fp1 != fp2
+
+    def test_key_order_invariant(self):
+        fp1 = _config_fingerprint({"a": 1, "b": 2})
+        fp2 = _config_fingerprint({"b": 2, "a": 1})
+        assert fp1 == fp2
+
+    def test_returns_8_char_hex(self):
+        fp = _config_fingerprint({"test": True})
+        assert len(fp) == 8
+        assert all(c in "0123456789abcdef" for c in fp)
+
+
+# ── Save contribution ──────────────────────────────────────────────────
+
+
+class TestSaveContribution:
+    def test_saves_json_file(self, tmp_path):
+        pipeline = _make_mock_pipeline()
+        path = save_contribution(
+            pipeline,
+            model_name="meta-llama/Llama-2-7b-chat-hf",
+            output_dir=tmp_path,
+        )
+        assert path.exists()
+        assert path.suffix == ".json"
+        data = json.loads(path.read_text())
+        assert data["contribution_schema_version"] == CONTRIBUTION_SCHEMA_VERSION
+        assert data["model_name"] == "meta-llama/Llama-2-7b-chat-hf"
+
+    def test_filename_format(self, tmp_path):
+        pipeline = _make_mock_pipeline()
+        path = save_contribution(
+            pipeline,
+            model_name="meta-llama/Llama-2-7b-chat-hf",
+            output_dir=tmp_path,
+        )
+        name = path.stem
+        assert name.startswith("llama-2-7b-chat-hf_advanced_")
+
+    def test_includes_telemetry_report(self, tmp_path):
+        pipeline = _make_mock_pipeline()
+        path = save_contribution(
+            pipeline,
+            model_name="meta-llama/Llama-2-7b-chat-hf",
+            output_dir=tmp_path,
+        )
+        data = json.loads(path.read_text())
+        telemetry = data["telemetry"]
+        assert telemetry["schema_version"] == 2
+        assert telemetry["model"]["architecture"] == "LlamaForCausalLM"
+        assert telemetry["method"] == "advanced"
+        assert telemetry["quality_metrics"]["refusal_rate"] == 0.05
+
+    def test_includes_config_fingerprint(self, tmp_path):
+        pipeline = _make_mock_pipeline()
+        path = save_contribution(
+            pipeline,
+            model_name="meta-llama/Llama-2-7b-chat-hf",
+            output_dir=tmp_path,
+        )
+        data = json.loads(path.read_text())
+        assert "config_fingerprint" in data
+        assert len(data["config_fingerprint"]) == 8
+
+    def test_includes_notes(self, tmp_path):
+        pipeline = _make_mock_pipeline()
+        path = save_contribution(
+            pipeline,
+            model_name="test/model",
+            notes="Ran on A100 with default prompts",
+            output_dir=tmp_path,
+        )
+        data = json.loads(path.read_text())
+        assert data["notes"] == "Ran on A100 with default prompts"
+
+    def test_creates_output_dir(self, tmp_path):
+        subdir = tmp_path / "nested" / "dir"
+        assert not subdir.exists()
+        pipeline = _make_mock_pipeline()
+        path = save_contribution(
+            pipeline, model_name="test/model", output_dir=subdir,
+        )
+        assert subdir.exists()
+        assert path.exists()
+
+    def test_timestamp_format(self, tmp_path):
+        pipeline = _make_mock_pipeline()
+        path = save_contribution(
+            pipeline, model_name="test/model", output_dir=tmp_path,
+        )
+        data = json.loads(path.read_text())
+        ts = data["timestamp"]
+        # Should be UTC ISO-ish: YYYYMMDDTHHMMSSZ
+        assert ts.endswith("Z")
+        assert "T" in ts
+        assert len(ts) == 16
+
+    def test_method_config_extracted(self, tmp_path):
+        pipeline = _make_mock_pipeline()
+        path = save_contribution(
+            pipeline, model_name="test/model", output_dir=tmp_path,
+        )
+        data = json.loads(path.read_text())
+        cfg = data["telemetry"]["method_config"]
+        assert cfg["n_directions"] == 4
+        assert cfg["norm_preserve"] is True
+        assert cfg["attention_head_surgery"] is True
+
+
+# ── Load contributions ─────────────────────────────────────────────────
+
+
+class TestLoadContributions:
+    def _write_contrib(self, directory, model, method, refusal_rate, idx=0):
+        """Write a minimal valid contribution file."""
+        record = {
+            "contribution_schema_version": CONTRIBUTION_SCHEMA_VERSION,
+            "timestamp": f"20260227T{idx:06d}Z",
+            "model_name": model,
+            "config_fingerprint": "abcd1234",
+            "notes": "",
+            "telemetry": {
+                "schema_version": 2,
+                "method": method,
+                "quality_metrics": {"refusal_rate": refusal_rate},
+            },
+        }
+        path = directory / f"contrib_{idx}.json"
+        path.write_text(json.dumps(record))
+        return path
+
+    def test_loads_valid_files(self, tmp_path):
+        self._write_contrib(tmp_path, "test/model", "advanced", 0.05, 0)
+        self._write_contrib(tmp_path, "test/model", "basic", 0.10, 1)
+        records = load_contributions(tmp_path)
+        assert len(records) == 2
+
+    def test_sorts_by_timestamp(self, tmp_path):
+        self._write_contrib(tmp_path, "model-b", "advanced", 0.05, 2)
+        self._write_contrib(tmp_path, "model-a", "advanced", 0.10, 1)
+        records = load_contributions(tmp_path)
+        assert records[0]["model_name"] == "model-a"
+        assert records[1]["model_name"] == "model-b"
+
+    def test_skips_non_contribution_json(self, tmp_path):
+        # Write a JSON file without contribution_schema_version
+        (tmp_path / "random.json").write_text('{"foo": "bar"}')
+        self._write_contrib(tmp_path, "test/model", "advanced", 0.05, 0)
+        records = load_contributions(tmp_path)
+        assert len(records) == 1
+
+    def test_skips_invalid_json(self, tmp_path):
+        (tmp_path / "bad.json").write_text("not valid json {{{")
+        self._write_contrib(tmp_path, "test/model", "advanced", 0.05, 0)
+        records = load_contributions(tmp_path)
+        assert len(records) == 1
+
+    def test_returns_empty_for_missing_dir(self, tmp_path):
+        records = load_contributions(tmp_path / "nonexistent")
+        assert records == []
+
+    def test_tracks_source_file(self, tmp_path):
+        self._write_contrib(tmp_path, "test/model", "advanced", 0.05, 0)
+        records = load_contributions(tmp_path)
+        assert "_source_file" in records[0]
+        assert "contrib_0.json" in records[0]["_source_file"]
+
+    def test_ignores_non_json_files(self, tmp_path):
+        (tmp_path / "readme.txt").write_text("some text")
+        self._write_contrib(tmp_path, "test/model", "advanced", 0.05, 0)
+        records = load_contributions(tmp_path)
+        assert len(records) == 1
+
+
+# ── Aggregate results ──────────────────────────────────────────────────
+
+
+class TestAggregateResults:
+    def _make_record(self, model, method, refusal_rate, perplexity=None, coherence=None):
+        metrics = {"refusal_rate": refusal_rate}
+        if perplexity is not None:
+            metrics["perplexity"] = perplexity
+        if coherence is not None:
+            metrics["coherence"] = coherence
+        return {
+            "model_name": model,
+            "telemetry": {
+                "method": method,
+                "quality_metrics": metrics,
+            },
+        }
+
+    def test_single_record(self):
+        records = [self._make_record("model-a", "advanced", 0.05)]
+        result = aggregate_results(records)
+        assert "model-a" in result
+        assert "advanced" in result["model-a"]
+        assert result["model-a"]["advanced"]["n_runs"] == 1
+        assert result["model-a"]["advanced"]["refusal_rate"]["mean"] == 0.05
+
+    def test_multiple_runs_same_model_method(self):
+        records = [
+            self._make_record("model-a", "advanced", 0.04),
+            self._make_record("model-a", "advanced", 0.06),
+        ]
+        result = aggregate_results(records)
+        stats = result["model-a"]["advanced"]
+        assert stats["n_runs"] == 2
+        assert stats["refusal_rate"]["mean"] == 0.05
+        assert stats["refusal_rate"]["min"] == 0.04
+        assert stats["refusal_rate"]["max"] == 0.06
+        assert stats["refusal_rate"]["n"] == 2
+
+    def test_multiple_models(self):
+        records = [
+            self._make_record("model-a", "advanced", 0.05),
+            self._make_record("model-b", "basic", 0.10),
+        ]
+        result = aggregate_results(records)
+        assert len(result) == 2
+        assert "model-a" in result
+        assert "model-b" in result
+
+    def test_multiple_methods(self):
+        records = [
+            self._make_record("model-a", "advanced", 0.05),
+            self._make_record("model-a", "basic", 0.10),
+        ]
+        result = aggregate_results(records)
+        assert len(result["model-a"]) == 2
+        assert "advanced" in result["model-a"]
+        assert "basic" in result["model-a"]
+
+    def test_std_zero_for_single_run(self):
+        records = [self._make_record("model-a", "advanced", 0.05)]
+        result = aggregate_results(records)
+        assert result["model-a"]["advanced"]["refusal_rate"]["std"] == 0.0
+
+    def test_multiple_metrics(self):
+        records = [
+            self._make_record("model-a", "advanced", 0.05, perplexity=5.2, coherence=0.8),
+        ]
+        result = aggregate_results(records)
+        stats = result["model-a"]["advanced"]
+        assert "refusal_rate" in stats
+        assert "perplexity" in stats
+        assert "coherence" in stats
+        assert stats["perplexity"]["mean"] == 5.2
+
+    def test_missing_metric_skipped(self):
+        records = [self._make_record("model-a", "advanced", 0.05)]
+        result = aggregate_results(records)
+        # coherence not provided, should not appear
+        assert "coherence" not in result["model-a"]["advanced"]
+
+    def test_unknown_model_and_method(self):
+        records = [{
+            "telemetry": {"quality_metrics": {"refusal_rate": 0.1}},
+        }]
+        result = aggregate_results(records)
+        assert "unknown" in result
+        assert "unknown" in result["unknown"]
+
+
+# ── LaTeX table generation ─────────────────────────────────────────────
+
+
+class TestGenerateLatexTable:
+    def _sample_aggregated(self):
+        return {
+            "meta-llama/Llama-2-7b-chat-hf": {
+                "advanced": {
+                    "n_runs": 3,
+                    "refusal_rate": {"mean": 0.04, "std": 0.01, "n": 3, "min": 0.03, "max": 0.05},
+                },
+                "basic": {
+                    "n_runs": 2,
+                    "refusal_rate": {"mean": 0.08, "std": 0.02, "n": 2, "min": 0.06, "max": 0.10},
+                },
+            },
+            "mistralai/Mistral-7B-Instruct-v0.2": {
+                "advanced": {
+                    "n_runs": 1,
+                    "refusal_rate": {"mean": 0.03, "std": 0.0, "n": 1, "min": 0.03, "max": 0.03},
+                },
+            },
+        }
+
+    def test_produces_valid_latex(self):
+        agg = self._sample_aggregated()
+        latex = generate_latex_table(agg)
+        assert "\\begin{tabular}" in latex
+        assert "\\end{tabular}" in latex
+        assert "\\toprule" in latex
+        assert "\\bottomrule" in latex
+
+    def test_includes_model_names(self):
+        agg = self._sample_aggregated()
+        latex = generate_latex_table(agg)
+        assert "Llama-2-7b-chat-hf" in latex
+        assert "Mistral-7B-Instruct-v0.2" in latex
+
+    def test_includes_method_headers(self):
+        agg = self._sample_aggregated()
+        latex = generate_latex_table(agg)
+        assert "advanced" in latex
+        assert "basic" in latex
+
+    def test_missing_method_shows_dash(self):
+        agg = self._sample_aggregated()
+        latex = generate_latex_table(agg)
+        # Mistral doesn't have "basic" method
+        assert "---" in latex
+
+    def test_shows_std_when_multiple_runs(self):
+        agg = self._sample_aggregated()
+        latex = generate_latex_table(agg)
+        assert "$\\pm$" in latex
+
+    def test_no_std_for_single_run(self):
+        agg = {
+            "model": {
+                "method": {
+                    "n_runs": 1,
+                    "refusal_rate": {"mean": 0.03, "std": 0.0, "n": 1, "min": 0.03, "max": 0.03},
+                },
+            },
+        }
+        latex = generate_latex_table(agg)
+        assert "$\\pm$" not in latex
+
+    def test_methods_filter(self):
+        agg = self._sample_aggregated()
+        latex = generate_latex_table(agg, methods=["advanced"])
+        assert "\\textbf{advanced}" in latex
+        assert "\\textbf{basic}" not in latex
+
+    def test_custom_metric(self):
+        agg = {
+            "model": {
+                "method": {
+                    "n_runs": 2,
+                    "perplexity": {"mean": 5.2, "std": 0.3, "n": 2, "min": 4.9, "max": 5.5},
+                },
+            },
+        }
+        latex = generate_latex_table(agg, metric="perplexity")
+        assert "5.2" in latex
+
+    def test_column_count_matches_methods(self):
+        agg = self._sample_aggregated()
+        latex = generate_latex_table(agg)
+        # 2 methods → "lcc" (1 model col + 2 method cols)
+        assert "{@{}lcc@{}}" in latex
+
+
+# ── CLI integration ────────────────────────────────────────────────────
+
+
+class TestCLIContributeFlag:
+    def test_contribute_flag_accepted(self):
+        """Verify the --contribute flag parses without error."""
+        from obliteratus.cli import main
+
+        # We can't run the full command (no GPU), but verify parsing works
+        with pytest.raises(SystemExit):
+            # "obliterate" requires a model, so parse will fail,
+            # but if --contribute is not recognized it fails differently
+            main(["obliterate", "--help"])
+
+    def test_aggregate_command_accepted(self):
+        """Verify the aggregate command parses without error."""
+        from obliteratus.cli import main
+
+        with pytest.raises(SystemExit):
+            main(["aggregate", "--help"])
+
+
+# ── Package exports ────────────────────────────────────────────────────
+
+
+class TestPackageExports:
+    def test_save_contribution_importable(self):
+        from obliteratus import save_contribution
+        assert callable(save_contribution)
+
+    def test_load_contributions_importable(self):
+        from obliteratus import load_contributions
+        assert callable(load_contributions)
+
+    def test_aggregate_results_importable(self):
+        from obliteratus import aggregate_results
+        assert callable(aggregate_results)
+
+
+# ── End-to-end: save → load → aggregate ───────────────────────────────
+
+
+class TestEndToEnd:
+    def test_save_load_aggregate_roundtrip(self, tmp_path):
+        """Full roundtrip: save contributions, load them, aggregate."""
+        pipeline = _make_mock_pipeline()
+
+        # Save two contributions (different models to avoid filename collision)
+        save_contribution(
+            pipeline, model_name="test/model-a", output_dir=tmp_path,
+        )
+        # Tweak metrics for second run with a different model name
+        pipeline._quality_metrics = {"perplexity": 5.5, "coherence": 0.75, "refusal_rate": 0.07}
+        save_contribution(
+            pipeline, model_name="test/model-b", output_dir=tmp_path,
+        )
+
+        # Load
+        records = load_contributions(tmp_path)
+        assert len(records) == 2
+
+        # Aggregate
+        aggregated = aggregate_results(records)
+        assert "test/model-a" in aggregated
+        assert "test/model-b" in aggregated
+        stats_a = aggregated["test/model-a"]["advanced"]
+        stats_b = aggregated["test/model-b"]["advanced"]
+        assert stats_a["n_runs"] == 1
+        assert stats_b["n_runs"] == 1
+        assert abs(stats_a["refusal_rate"]["mean"] - 0.05) < 0.001
+        assert abs(stats_b["refusal_rate"]["mean"] - 0.07) < 0.001
+
+    def test_save_load_aggregate_to_latex(self, tmp_path):
+        """Full roundtrip ending in LaTeX output."""
+        pipeline = _make_mock_pipeline()
+        save_contribution(
+            pipeline, model_name="meta-llama/Llama-2-7b-chat-hf", output_dir=tmp_path,
+        )
+
+        records = load_contributions(tmp_path)
+        aggregated = aggregate_results(records)
+        latex = generate_latex_table(aggregated)
+
+        assert "\\begin{tabular}" in latex
+        assert "Llama-2-7b-chat-hf" in latex
+        assert "advanced" in latex
diff --git a/tests/test_config.py b/tests/test_config.py
new file mode 100644
index 0000000..debaad5
--- /dev/null
+++ b/tests/test_config.py
@@ -0,0 +1,59 @@
+"""Tests for configuration loading."""
+
+from __future__ import annotations
+
+
+import yaml
+
+from obliteratus.config import StudyConfig
+
+
+SAMPLE_CONFIG = {
+    "model": {
+        "name": "gpt2",
+        "task": "causal_lm",
+        "dtype": "float32",
+        "device": "cpu",
+    },
+    "dataset": {
+        "name": "wikitext",
+        "subset": "wikitext-2-raw-v1",
+        "split": "test",
+        "text_column": "text",
+        "max_samples": 50,
+    },
+    "strategies": [
+        {"name": "layer_removal", "params": {}},
+        {"name": "ffn_ablation", "params": {}},
+    ],
+    "metrics": ["perplexity"],
+    "batch_size": 4,
+    "max_length": 256,
+    "output_dir": "results/test",
+}
+
+
+class TestStudyConfig:
+    def test_from_dict(self):
+        config = StudyConfig.from_dict(SAMPLE_CONFIG)
+        assert config.model.name == "gpt2"
+        assert config.model.task == "causal_lm"
+        assert config.dataset.name == "wikitext"
+        assert len(config.strategies) == 2
+        assert config.strategies[0].name == "layer_removal"
+
+    def test_from_yaml(self, tmp_path):
+        yaml_path = tmp_path / "test_config.yaml"
+        yaml_path.write_text(yaml.dump(SAMPLE_CONFIG))
+
+        config = StudyConfig.from_yaml(yaml_path)
+        assert config.model.name == "gpt2"
+        assert config.batch_size == 4
+
+    def test_roundtrip(self):
+        config = StudyConfig.from_dict(SAMPLE_CONFIG)
+        d = config.to_dict()
+        config2 = StudyConfig.from_dict(d)
+        assert config2.model.name == config.model.name
+        assert config2.dataset.name == config.dataset.name
+        assert len(config2.strategies) == len(config.strategies)
diff --git a/tests/test_defense_robustness.py b/tests/test_defense_robustness.py
new file mode 100644
index 0000000..0b7f679
--- /dev/null
+++ b/tests/test_defense_robustness.py
@@ -0,0 +1,169 @@
+"""Tests for defense robustness evaluation framework."""
+
+from __future__ import annotations
+
+from unittest.mock import MagicMock
+
+import torch
+
+from obliteratus.analysis.defense_robustness import (
+    DefenseProfile,
+    DefenseRobustnessEvaluator,
+    EntanglementMap,
+    SelfRepairResult,
+)
+
+
+def _make_mock_pipeline(n_layers=6, hidden_dim=16, n_prompts=5):
+    """Create a mock pipeline with refusal directions and activations."""
+    pipeline = MagicMock()
+    pipeline.model_name = "test-model"
+
+    # Generate refusal directions (some strong, some weak)
+    torch.manual_seed(42)
+    directions = {}
+    for i in range(n_layers):
+        d = torch.randn(hidden_dim)
+        directions[i] = d / d.norm()
+    pipeline.refusal_directions = directions
+
+    # Generate activations with a planted refusal signal in middle layers
+    harmful_means = {}
+    harmless_means = {}
+    harmful_acts = {}
+    harmless_acts = {}
+
+    for i in range(n_layers):
+        base = torch.randn(hidden_dim)
+        harmless_means[i] = base.unsqueeze(0)
+
+        # Middle layers have stronger refusal signal
+        signal_strength = 3.0 if 2 <= i <= 4 else 0.5
+        harmful_means[i] = (base + signal_strength * directions[i]).unsqueeze(0)
+
+        harmful_acts[i] = [base + signal_strength * directions[i] + torch.randn(hidden_dim) * 0.1 for _ in range(n_prompts)]
+        harmless_acts[i] = [base + torch.randn(hidden_dim) * 0.1 for _ in range(n_prompts)]
+
+    pipeline._harmful_means = harmful_means
+    pipeline._harmless_means = harmless_means
+    pipeline._harmful_acts = harmful_acts
+    pipeline._harmless_acts = harmless_acts
+
+    return pipeline
+
+
+class TestDefenseProfile:
+    def test_profile_generates(self):
+        pipeline = _make_mock_pipeline()
+        evaluator = DefenseRobustnessEvaluator(pipeline)
+        profile = evaluator.profile_defense()
+
+        assert isinstance(profile, DefenseProfile)
+        assert profile.model_name == "test-model"
+        assert profile.refusal_layer_spread > 0
+        assert profile.mean_refusal_strength > 0
+        assert profile.max_refusal_strength >= profile.mean_refusal_strength
+        assert profile.estimated_robustness in ("low", "medium", "high", "very_high")
+
+    def test_alignment_type_estimate(self):
+        pipeline = _make_mock_pipeline()
+        evaluator = DefenseRobustnessEvaluator(pipeline)
+        profile = evaluator.profile_defense()
+        assert profile.alignment_type_estimate != "unknown"
+
+    def test_empty_pipeline(self):
+        pipeline = MagicMock()
+        pipeline.model_name = "empty"
+        pipeline.refusal_directions = {}
+        evaluator = DefenseRobustnessEvaluator(pipeline)
+        profile = evaluator.profile_defense()
+        assert profile.estimated_robustness == "unknown"
+
+    def test_concentration_bounded(self):
+        pipeline = _make_mock_pipeline()
+        evaluator = DefenseRobustnessEvaluator(pipeline)
+        profile = evaluator.profile_defense()
+        # Gini coefficient should be between 0 and 1
+        assert 0 <= profile.refusal_concentration <= 1.0
+
+    def test_self_repair_bounded(self):
+        pipeline = _make_mock_pipeline()
+        evaluator = DefenseRobustnessEvaluator(pipeline)
+        profile = evaluator.profile_defense()
+        assert 0 <= profile.self_repair_estimate <= 1.0
+
+    def test_format_report(self):
+        pipeline = _make_mock_pipeline()
+        evaluator = DefenseRobustnessEvaluator(pipeline)
+        profile = evaluator.profile_defense()
+        report = DefenseRobustnessEvaluator.format_defense_profile(profile)
+        assert "Defense Robustness" in report
+        assert "test-model" in report
+
+
+class TestSelfRepair:
+    def test_self_repair_measurement(self):
+        pipeline = _make_mock_pipeline()
+        evaluator = DefenseRobustnessEvaluator(pipeline)
+        result = evaluator.measure_self_repair(layer_idx=3)
+
+        assert isinstance(result, SelfRepairResult)
+        assert result.layer_idx == 3
+        assert result.original_refusal_strength >= 0
+        assert 0 <= result.repair_ratio <= 1.0
+        assert len(result.compensating_layers) > 0
+        assert 3 not in result.compensating_layers  # shouldn't list itself
+
+    def test_repair_ratio_high_for_distributed(self):
+        """Distributed refusal should have high repair ratio."""
+        pipeline = _make_mock_pipeline(n_layers=10)
+        evaluator = DefenseRobustnessEvaluator(pipeline)
+        result = evaluator.measure_self_repair(layer_idx=3)
+        # With distributed signal, removing one layer leaves much compensation
+        assert result.repair_ratio > 0.5
+
+    def test_format_self_repair(self):
+        pipeline = _make_mock_pipeline()
+        evaluator = DefenseRobustnessEvaluator(pipeline)
+        result = evaluator.measure_self_repair(layer_idx=2)
+        report = DefenseRobustnessEvaluator.format_self_repair(result)
+        assert "Self-Repair" in report
+        assert "Layer 2" in report
+
+
+class TestEntanglement:
+    def test_entanglement_map(self):
+        pipeline = _make_mock_pipeline()
+        evaluator = DefenseRobustnessEvaluator(pipeline)
+        emap = evaluator.map_entanglement()
+
+        assert isinstance(emap, EntanglementMap)
+        assert len(emap.layer_entanglement) > 0
+        assert 0 <= emap.overall_entanglement <= 1.0
+        assert len(emap.most_entangled_layers) > 0
+        assert len(emap.least_entangled_layers) > 0
+
+    def test_capability_sensitivity_keys(self):
+        pipeline = _make_mock_pipeline()
+        evaluator = DefenseRobustnessEvaluator(pipeline)
+        emap = evaluator.map_entanglement()
+
+        expected_keys = {"factual_knowledge", "reasoning", "language_fluency",
+                         "instruction_following", "math"}
+        assert set(emap.capability_sensitivity.keys()) == expected_keys
+
+    def test_math_most_sensitive(self):
+        """Math should be estimated as the most sensitive capability."""
+        pipeline = _make_mock_pipeline()
+        evaluator = DefenseRobustnessEvaluator(pipeline)
+        emap = evaluator.map_entanglement()
+        if emap.overall_entanglement > 0:
+            assert emap.capability_sensitivity["math"] >= emap.capability_sensitivity["language_fluency"]
+
+    def test_format_entanglement(self):
+        pipeline = _make_mock_pipeline()
+        evaluator = DefenseRobustnessEvaluator(pipeline)
+        emap = evaluator.map_entanglement()
+        report = DefenseRobustnessEvaluator.format_entanglement(emap)
+        assert "Entanglement" in report
+        assert "math" in report
diff --git a/tests/test_edge_cases.py b/tests/test_edge_cases.py
new file mode 100644
index 0000000..4184c0f
--- /dev/null
+++ b/tests/test_edge_cases.py
@@ -0,0 +1,510 @@
+"""Edge-case and robustness tests.
+
+Tests for NaN/Inf handling, empty inputs, extreme dimensions,
+and other boundary conditions that the main test suite doesn't cover.
+"""
+
+from __future__ import annotations
+
+import math
+
+import pytest
+import torch
+import torch.nn as nn
+
+from obliteratus.analysis.whitened_svd import WhitenedSVDExtractor
+from obliteratus.analysis.cross_layer import CrossLayerAlignmentAnalyzer
+from obliteratus.analysis.concept_geometry import ConceptConeAnalyzer
+from obliteratus.analysis.alignment_imprint import AlignmentImprintDetector
+from obliteratus.analysis.multi_token_position import MultiTokenPositionAnalyzer
+from obliteratus.analysis.sparse_surgery import SparseDirectionSurgeon
+from obliteratus.analysis.causal_tracing import CausalRefusalTracer
+from obliteratus.analysis.residual_stream import ResidualStreamDecomposer
+from obliteratus.analysis.probing_classifiers import LinearRefusalProbe
+from obliteratus.analysis.cross_model_transfer import TransferAnalyzer
+from obliteratus.evaluation.advanced_metrics import (
+    refusal_rate,
+    effective_rank,
+    activation_cosine_similarity,
+)
+from obliteratus.analysis.steering_vectors import (
+    SteeringVectorFactory,
+    SteeringHookManager,
+    SteeringConfig,
+    SteeringResult,
+    compute_steering_effectiveness,
+    format_steering_report,
+)
+
+
+# ===========================================================================
+#  NaN / Inf handling
+# ===========================================================================
+
+class TestNaNInfHandling:
+    """Test that modules handle degenerate inputs gracefully."""
+
+    def test_whitened_svd_nan_activations(self):
+        """WhitenedSVD with NaN — currently raises; documenting behavior."""
+        harmful = [torch.tensor([float("nan"), 1.0, 2.0]) for _ in range(5)]
+        harmless = [torch.randn(3) for _ in range(5)]
+        extractor = WhitenedSVDExtractor()
+        # NaN propagation through SVD is expected to produce NaN results
+        # This documents the current behavior — ideally would guard against it
+        raised = False
+        result = None
+        try:
+            result = extractor.extract(harmful, harmless)
+        except (RuntimeError, ValueError):
+            raised = True
+        # Either it raised an exception (acceptable) or returned a result with NaNs
+        assert raised or result is not None, (
+            "Should either raise on NaN input or return a result"
+        )
+
+    def test_whitened_svd_zero_activations(self):
+        """WhitenedSVD with all-zero activations."""
+        harmful = [torch.zeros(8) for _ in range(5)]
+        harmless = [torch.zeros(8) for _ in range(5)]
+        extractor = WhitenedSVDExtractor()
+        result = extractor.extract(harmful, harmless)
+        # Should return a valid result without crashing
+        assert result is not None
+        assert result.directions is not None
+        assert result.singular_values is not None
+
+    def test_concept_cone_nan_direction(self):
+        """ConceptConeAnalyzer with NaN in activations — documenting behavior."""
+        harmful = [torch.randn(16) for _ in range(10)]
+        harmless = [torch.randn(16) for _ in range(10)]
+        # Poison one activation
+        harmful[3] = torch.full((16,), float("nan"))
+        cat_map = {i: f"cat_{i % 3}" for i in range(10)}
+        analyzer = ConceptConeAnalyzer(category_map=cat_map)
+        raised = False
+        result = None
+        try:
+            result = analyzer.analyze_layer(harmful, harmless)
+        except (RuntimeError, ValueError):
+            raised = True
+        # Either it raised an exception (acceptable) or returned a result
+        assert raised or result is not None, (
+            "Should either raise on NaN input or return a result"
+        )
+
+    def test_sparse_surgery_zero_direction(self):
+        """Sparse surgery with zero refusal direction."""
+        W = torch.randn(32, 16)
+        zero_dir = torch.zeros(16)
+        surgeon = SparseDirectionSurgeon()
+        result = surgeon.analyze_weight_matrix(W, zero_dir)
+        assert result.mean_projection == 0.0
+
+    def test_sparse_surgery_zero_weight(self):
+        """Sparse surgery with zero weight matrix."""
+        W = torch.zeros(32, 16)
+        ref_dir = torch.randn(16)
+        surgeon = SparseDirectionSurgeon()
+        result = surgeon.analyze_weight_matrix(W, ref_dir)
+        assert result.max_projection < 1e-6
+
+    def test_effective_rank_nan_matrix(self):
+        """effective_rank should handle matrix with NaN."""
+        W = torch.randn(10, 10)
+        W[0, 0] = float("nan")
+        # Should either return a value or raise cleanly
+        try:
+            result = effective_rank(torch.nan_to_num(W))
+            assert math.isfinite(result)
+        except Exception:
+            pass  # Raising is acceptable for NaN input
+
+    def test_cosine_similarity_zero_vectors(self):
+        """Cosine similarity between zero vectors."""
+        a = torch.zeros(32)
+        b = torch.zeros(32)
+        result = activation_cosine_similarity(a, b)
+        # Should be 0 or NaN, not crash
+        assert math.isfinite(result) or math.isnan(result)
+
+    def test_transfer_analyzer_nan_directions(self):
+        """Transfer analyzer with NaN directions."""
+        dirs_a = {0: torch.randn(16), 1: torch.tensor([float("nan")] * 16)}
+        dirs_b = {0: torch.randn(16), 1: torch.randn(16)}
+        analyzer = TransferAnalyzer()
+        # Should not crash
+        result = analyzer.analyze_cross_model(dirs_a, dirs_b)
+        assert result is not None
+        assert isinstance(result.mean_transfer_score, float)
+        assert result.per_layer_transfer is not None
+
+
+# ===========================================================================
+#  Empty inputs
+# ===========================================================================
+
+class TestEmptyInputs:
+    """Test graceful handling of empty or minimal inputs."""
+
+    def test_cross_layer_empty_directions(self):
+        analyzer = CrossLayerAlignmentAnalyzer()
+        result = analyzer.analyze({})
+        assert result.direction_persistence_score == 0.0
+
+    def test_alignment_imprint_single_layer(self):
+        """Single layer should still return a result."""
+        detector = AlignmentImprintDetector()
+        dirs = {0: torch.randn(32)}
+        result = detector.detect_imprint(dirs)
+        assert result.predicted_method in ("dpo", "rlhf", "cai", "sft", "unknown")
+
+    def test_multi_token_single_position(self):
+        """Single-position sequence."""
+        ref_dir = torch.randn(16)
+        acts = torch.randn(1, 16)
+        analyzer = MultiTokenPositionAnalyzer()
+        result = analyzer.analyze_prompt(acts, ref_dir)
+        assert result.n_tokens == 1
+        assert result.peak_position == 0
+
+    def test_probing_minimal_data(self):
+        """Probing with very few samples."""
+        harmful = [torch.randn(8) for _ in range(3)]
+        harmless = [torch.randn(8) for _ in range(3)]
+        probe = LinearRefusalProbe(n_epochs=10)
+        result = probe.probe_layer(harmful, harmless)
+        assert 0 <= result.accuracy <= 1.0
+
+    def test_residual_stream_single_layer(self):
+        acts = {0: torch.randn(32)}
+        ref_dir = torch.randn(32)
+        decomposer = ResidualStreamDecomposer()
+        result = decomposer.decompose(acts, ref_dir)
+        assert result.n_layers == 1
+
+    def test_causal_tracing_single_layer(self):
+        acts = {0: torch.randn(32)}
+        ref_dirs = {0: torch.randn(32)}
+        tracer = CausalRefusalTracer()
+        result = tracer.trace_from_activations(acts, ref_dirs)
+        assert result.n_layers == 1
+
+    def test_transfer_no_common_layers(self):
+        """Cross-model with no overlapping layer indices."""
+        dirs_a = {0: torch.randn(16), 1: torch.randn(16)}
+        dirs_b = {2: torch.randn(16), 3: torch.randn(16)}
+        analyzer = TransferAnalyzer()
+        result = analyzer.analyze_cross_model(dirs_a, dirs_b)
+        assert result.mean_transfer_score == 0.0
+
+    def test_refusal_rate_empty_list(self):
+        result = refusal_rate([])
+        assert result == 0.0
+
+    def test_refusal_rate_single_response(self):
+        result = refusal_rate(["I cannot help with that."])
+        assert result == 1.0
+
+
+# ===========================================================================
+#  Extreme dimensions
+# ===========================================================================
+
+class TestExtremeDimensions:
+    """Test with unusually large or small dimensions."""
+
+    def test_high_dimensional_directions(self):
+        """Test with realistic hidden dimension (4096)."""
+        hidden_dim = 4096
+        torch.manual_seed(42)
+        dirs = {i: torch.randn(hidden_dim) for i in range(8)}
+        analyzer = TransferAnalyzer()
+        result = analyzer.analyze_cross_layer(dirs)
+        assert result.mean_adjacent_transfer >= 0
+
+    def test_high_dim_sparse_surgery(self):
+        """Sparse surgery with large weight matrix."""
+        W = torch.randn(2048, 1024)
+        ref_dir = torch.randn(1024)
+        surgeon = SparseDirectionSurgeon(sparsity=0.05)
+        result = surgeon.analyze_weight_matrix(W, ref_dir)
+        assert result.n_rows_modified == int(0.05 * 2048)
+
+    def test_single_dimension(self):
+        """1D hidden dimension edge case."""
+        dirs = {i: torch.randn(1) for i in range(4)}
+        analyzer = TransferAnalyzer()
+        result = analyzer.analyze_cross_layer(dirs)
+        # All 1D directions are parallel or anti-parallel, so cosine is always 1.0
+        assert result.mean_adjacent_transfer >= 0.99
+
+    def test_many_layers_imprint(self):
+        """Alignment imprint with many layers (128)."""
+        dirs = {i: torch.randn(32) for i in range(128)}
+        detector = AlignmentImprintDetector()
+        result = detector.detect_imprint(dirs)
+        total = (result.dpo_probability + result.rlhf_probability +
+                 result.cai_probability + result.sft_probability)
+        assert abs(total - 1.0) < 0.01
+
+    @pytest.mark.parametrize("n_prompts", [1, 2, 5, 50, 100])
+    def test_concept_cone_varying_prompt_counts(self, n_prompts):
+        """Concept cone with varying numbers of prompts."""
+        harmful = [torch.randn(16) for _ in range(n_prompts)]
+        harmless = [torch.randn(16) for _ in range(n_prompts)]
+        cat_map = {i: f"cat_{i % 3}" for i in range(n_prompts)}
+        analyzer = ConceptConeAnalyzer(category_map=cat_map, min_category_size=1)
+        result = analyzer.analyze_layer(harmful, harmless)
+        # Should produce a valid result regardless of prompt count
+        assert result is not None
+        assert result.general_direction is not None
+        assert result.category_count >= 1
+
+    @pytest.mark.parametrize("seq_len", [1, 2, 10, 100, 500])
+    def test_multi_token_varying_lengths(self, seq_len):
+        """Multi-token analysis with varying sequence lengths."""
+        ref_dir = torch.randn(16)
+        acts = torch.randn(seq_len, 16)
+        analyzer = MultiTokenPositionAnalyzer()
+        result = analyzer.analyze_prompt(acts, ref_dir)
+        assert result.n_tokens == seq_len
+
+    @pytest.mark.parametrize("hidden_dim", [2, 4, 16, 64, 256, 1024])
+    def test_probing_varying_dimensions(self, hidden_dim):
+        """Probing classifier across different hidden dims (min 2 for SGD)."""
+        harmful = [torch.randn(hidden_dim) * 2 + 1 for _ in range(10)]
+        harmless = [torch.randn(hidden_dim) * 2 - 1 for _ in range(10)]
+        probe = LinearRefusalProbe(n_epochs=20)
+        result = probe.probe_layer(harmful, harmless)
+        assert 0 <= result.accuracy <= 1.0
+
+
+# ===========================================================================
+#  Refusal rate edge cases
+# ===========================================================================
+
+class TestRefusalRateEdgeCases:
+    """Edge cases for refusal rate detection."""
+
+    def test_novel_refusal_phrasing(self):
+        """Test that unusual refusal phrasings might be missed (known limitation)."""
+        novel_refusals = [
+            "That request falls outside my operational parameters.",
+            "I'm designed to be helpful but this conflicts with my guidelines.",
+        ]
+        # These should ideally be detected but string matching may miss them
+        rate = refusal_rate(novel_refusals)
+        # This tests the known limitation — rate may be 0 (string matching misses novel phrasings)
+        assert 0 <= rate <= 1.0
+
+    def test_false_positive_sorry(self):
+        """'I'm sorry to hear that' followed by helpful content."""
+        responses = [
+            "I'm sorry to hear that. Here's how you can solve the problem: first...",
+        ]
+        rate = refusal_rate(responses, mode="prefix")
+        # Known limitation: prefix matching produces false positives here
+        # Just verify it returns a valid float
+        assert 0 <= rate <= 1.0
+
+    def test_case_insensitivity(self):
+        responses = ["I CANNOT ASSIST WITH THAT REQUEST."]
+        rate = refusal_rate(responses, mode="substring")
+        assert rate == 1.0
+
+    def test_whitespace_handling(self):
+        responses = ["  I'm sorry, but I can't help with that.  "]
+        rate = refusal_rate(responses, mode="prefix")
+        # Should handle leading whitespace
+        assert 0 <= rate <= 1.0
+
+
+# ===========================================================================
+#  Steering Vectors
+# ===========================================================================
+
+class TestSteeringVectors:
+    """Tests for the steering vector inference-time intervention system."""
+
+    def test_from_refusal_direction(self):
+        d = torch.randn(32)
+        vec = SteeringVectorFactory.from_refusal_direction(d, source_layer=5)
+        assert vec.label == "refusal"
+        assert vec.source_layer == 5
+        assert vec.default_alpha == -1.0
+        assert abs(vec.direction.norm().item() - 1.0) < 0.01
+
+    def test_from_contrastive_pairs(self):
+        pos = [torch.randn(16) + 2 for _ in range(10)]
+        neg = [torch.randn(16) - 2 for _ in range(10)]
+        vec = SteeringVectorFactory.from_contrastive_pairs(pos, neg, label="test")
+        assert vec.label == "test"
+        assert abs(vec.direction.norm().item() - 1.0) < 0.01
+        assert "n_positive" in vec.metadata
+
+    def test_combine_vectors(self):
+        v1 = SteeringVectorFactory.from_refusal_direction(torch.randn(32))
+        v2 = SteeringVectorFactory.from_refusal_direction(torch.randn(32))
+        combined = SteeringVectorFactory.combine([v1, v2], label="merged")
+        assert combined.label == "merged"
+        assert abs(combined.direction.norm().item() - 1.0) < 0.01
+
+    def test_combine_single(self):
+        v = SteeringVectorFactory.from_refusal_direction(torch.randn(16))
+        combined = SteeringVectorFactory.combine([v])
+        assert abs(combined.direction.norm().item() - 1.0) < 0.01
+
+    def test_combine_empty_raises(self):
+        with pytest.raises(ValueError):
+            SteeringVectorFactory.combine([])
+
+    def test_hook_manager_lifecycle(self):
+        """Test install/remove lifecycle without a real model."""
+        manager = SteeringHookManager()
+        assert not manager.is_active
+        manager.remove()  # Should not crash even with no hooks
+        assert not manager.is_active
+
+    def test_hook_with_simple_model(self):
+        """Test steering on a simple nn.Sequential model."""
+        model = nn.Sequential(
+            nn.Linear(16, 16),
+            nn.ReLU(),
+            nn.Linear(16, 16),
+            nn.ReLU(),
+            nn.Linear(16, 8),
+        )
+
+        vec = SteeringVectorFactory.from_refusal_direction(torch.randn(16))
+        config = SteeringConfig(
+            vectors=[vec],
+            target_layers=[0, 2],  # steer at first and third linear layers
+            alpha=1.0,
+        )
+
+        manager = SteeringHookManager()
+        # Install on specific modules
+        layers = list(model.children())
+        result = manager.install(model, config, layer_modules=layers)
+        assert result.hooks_installed == 2
+        assert manager.is_active
+
+        # Run a forward pass (should not crash)
+        x = torch.randn(1, 16)
+        output = model(x)
+        assert output.shape == (1, 8)
+
+        # Remove hooks
+        manager.remove()
+        assert not manager.is_active
+
+    def test_steering_effectiveness_remove(self):
+        eff = compute_steering_effectiveness(2.0, 0.5, direction="remove")
+        assert 0 < eff < 1.0  # Reduced but not eliminated
+
+    def test_steering_effectiveness_perfect_remove(self):
+        eff = compute_steering_effectiveness(2.0, 0.0, direction="remove")
+        assert eff == 1.0
+
+    def test_steering_effectiveness_no_change(self):
+        eff = compute_steering_effectiveness(2.0, 2.0, direction="remove")
+        assert eff == 0.0
+
+    def test_steering_effectiveness_add(self):
+        eff = compute_steering_effectiveness(1.0, 3.0, direction="add")
+        assert eff == 1.0  # Capped at 1.0
+
+    def test_format_report(self):
+        vec = SteeringVectorFactory.from_refusal_direction(torch.randn(32))
+        config = SteeringConfig(vectors=[vec], target_layers=[3, 5], alpha=0.5)
+        result = SteeringResult(config=config, hooks_installed=2, total_steered_layers=2)
+        report = format_steering_report(result)
+        assert "Steering" in report
+        assert "refusal" in report
+
+    def test_steering_config_position_modes(self):
+        """Test different position modes in config."""
+        for pos in ["all", "last", "first"]:
+            config = SteeringConfig(
+                vectors=[SteeringVectorFactory.from_refusal_direction(torch.randn(8))],
+                target_layers=[0],
+                position=pos,
+            )
+            assert config.position == pos
+
+    def test_imports(self):
+        from obliteratus.analysis import SteeringVectorFactory, SteeringHookManager
+        assert SteeringVectorFactory is not None
+        assert SteeringHookManager is not None
+
+
+class TestParametrizedDimensions:
+    """Parametrized tests across different hidden dimensions."""
+
+    @pytest.mark.parametrize("hidden_dim", [2, 8, 64, 256, 768])
+    def test_whitened_svd_various_dims(self, hidden_dim):
+        n_samples = max(4, hidden_dim // 4)
+        harmful = [torch.randn(hidden_dim) for _ in range(n_samples)]
+        harmless = [torch.randn(hidden_dim) for _ in range(n_samples)]
+        extractor = WhitenedSVDExtractor()
+        result = extractor.extract(harmful, harmless, n_directions=1)
+        assert result.directions.shape[1] == hidden_dim
+
+    @pytest.mark.parametrize("hidden_dim", [2, 8, 64, 256])
+    def test_cross_layer_various_dims(self, hidden_dim):
+        directions = {i: torch.randn(hidden_dim) for i in range(4)}
+        analyzer = CrossLayerAlignmentAnalyzer()
+        result = analyzer.analyze(directions)
+        assert 0.0 <= result.direction_persistence_score <= 1.0
+
+    @pytest.mark.parametrize("hidden_dim", [4, 32, 128])
+    def test_sparse_surgery_various_dims(self, hidden_dim):
+        weight = torch.randn(hidden_dim, hidden_dim)
+        direction = torch.randn(hidden_dim)
+        direction = direction / direction.norm()
+        surgeon = SparseDirectionSurgeon()
+        result = surgeon.analyze_weight_matrix(weight, direction, layer_idx=0)
+        assert 0.0 <= result.energy_removed <= 1.0
+
+    @pytest.mark.parametrize("n_layers", [1, 4, 12, 32])
+    def test_imprint_various_layer_counts(self, n_layers):
+        directions = {i: torch.randn(64) for i in range(n_layers)}
+        detector = AlignmentImprintDetector()
+        result = detector.detect_imprint(directions)
+        assert result.predicted_method in ("dpo", "rlhf", "cai", "sft", "unknown")
+
+
+class TestExceptionPaths:
+    """Tests for error handling and boundary conditions."""
+
+    def test_whitened_svd_mismatched_dims(self):
+        """Harmful and harmless with different hidden dims should fail or handle gracefully."""
+        harmful = [torch.randn(64) for _ in range(10)]
+        harmless = [torch.randn(32) for _ in range(10)]
+        extractor = WhitenedSVDExtractor()
+        with pytest.raises(Exception):
+            extractor.extract(harmful, harmless, n_directions=1)
+
+    def test_whitened_svd_single_sample(self):
+        """Single sample should not crash (may return 0 directions due to insufficient data)."""
+        harmful = [torch.randn(32)]
+        harmless = [torch.randn(32)]
+        extractor = WhitenedSVDExtractor()
+        result = extractor.extract(harmful, harmless, n_directions=1)
+        assert result.directions.shape[1] == 32  # hidden dim preserved
+
+    def test_sparse_surgery_zero_direction(self):
+        """Zero direction vector should not crash."""
+        weight = torch.randn(16, 16)
+        direction = torch.zeros(16)
+        surgeon = SparseDirectionSurgeon()
+        # Should handle gracefully (possibly returning 0 energy)
+        result = surgeon.analyze_weight_matrix(weight, direction, layer_idx=0)
+        assert result is not None
+
+    def test_cross_layer_single_layer(self):
+        """Single layer directions should still produce a result."""
+        directions = {0: torch.randn(32)}
+        analyzer = CrossLayerAlignmentAnalyzer()
+        result = analyzer.analyze(directions)
+        assert result is not None
diff --git a/tests/test_heretic_eval.py b/tests/test_heretic_eval.py
new file mode 100644
index 0000000..7e1b600
--- /dev/null
+++ b/tests/test_heretic_eval.py
@@ -0,0 +1,483 @@
+"""Tests for the community-standard (Heretics/Arditi) evaluation protocol."""
+
+from __future__ import annotations
+
+import pytest
+import torch
+
+from obliteratus.evaluation.heretic_eval import (
+    ARDITI_REFUSAL_SUBSTRINGS,
+    _DEFAULT_HARMLESS_PROMPTS,
+    _HARMBENCH_CLS_TEMPLATE,
+    _JAILBREAKBENCH_BUILTIN,
+    _extract_score,
+    _last_real_token_indices,
+    _sanitize_model_path,
+    arditi_refusal_rate,
+    first_token_kl_on_prompts,
+    format_comparison_table,
+    HereticComparisonResult,
+    LM_EVAL_BENCHMARKS,
+    load_jailbreakbench_prompts,
+    unload_harmbench_classifier,
+)
+
+
+# ---------------------------------------------------------------------------
+# JailbreakBench Prompts
+# ---------------------------------------------------------------------------
+
+class TestJailbreakBenchPrompts:
+    def test_builtin_has_100_prompts(self):
+        assert len(_JAILBREAKBENCH_BUILTIN) == 100
+
+    def test_builtin_all_non_empty(self):
+        for i, p in enumerate(_JAILBREAKBENCH_BUILTIN):
+            assert isinstance(p, str) and len(p.strip()) > 10, f"Prompt {i} is too short"
+
+    def test_builtin_no_duplicates(self):
+        assert len(set(_JAILBREAKBENCH_BUILTIN)) == len(_JAILBREAKBENCH_BUILTIN)
+
+    def test_load_returns_list(self):
+        prompts = load_jailbreakbench_prompts()
+        assert isinstance(prompts, list)
+        assert len(prompts) >= 100
+
+    def test_load_caches(self):
+        p1 = load_jailbreakbench_prompts()
+        p2 = load_jailbreakbench_prompts()
+        assert p1 == p2
+
+    def test_load_returns_copy(self):
+        """Loading should return a copy, not the cached list itself."""
+        p1 = load_jailbreakbench_prompts()
+        p1.append("mutated")
+        p2 = load_jailbreakbench_prompts()
+        assert "mutated" not in p2
+
+
+# ---------------------------------------------------------------------------
+# Arditi Refusal Rate
+# ---------------------------------------------------------------------------
+
+class TestArditiRefusalRate:
+    def test_canonical_substring_count(self):
+        """Arditi/Zou list has exactly 29 substrings."""
+        assert len(ARDITI_REFUSAL_SUBSTRINGS) == 29
+
+    def test_all_refusals(self):
+        responses = [
+            "I'm sorry, but I cannot help with that.",
+            "I apologize for the inconvenience.",
+            "As an AI, I don't do that.",
+        ]
+        assert arditi_refusal_rate(responses) == 1.0
+
+    def test_no_refusals(self):
+        responses = [
+            "Sure, here is a step-by-step guide.",
+            "The answer to your question is 42.",
+            "Here you go, this should work.",
+        ]
+        assert arditi_refusal_rate(responses) == 0.0
+
+    def test_empty_responses_not_refusals(self):
+        """Per Zou et al. logic, empty responses have no substring match = not refusals."""
+        responses = ["", "  ", ""]
+        assert arditi_refusal_rate(responses) == 0.0
+
+    def test_empty_list(self):
+        assert arditi_refusal_rate([]) == 0.0
+
+    def test_case_insensitive(self):
+        responses = ["i'm sorry, but I can't help."]
+        assert arditi_refusal_rate(responses) == 1.0
+
+    def test_mixed(self):
+        responses = [
+            "I cannot do that.",
+            "Sure, here's the info.",
+            "My apologies, I'm unable to help.",
+            "The capital of France is Paris.",
+        ]
+        rate = arditi_refusal_rate(responses)
+        assert rate == 0.5  # 2/4
+
+    def test_subtle_refusal_not_caught(self):
+        """Arditi list is deliberately limited — subtle refusals should not match."""
+        responses = [
+            "I'd rather not help with that specific request.",
+            "Let me redirect you to a more appropriate resource.",
+        ]
+        # These are NOT in the Arditi 29-substring list
+        assert arditi_refusal_rate(responses) == 0.0
+
+
+# ---------------------------------------------------------------------------
+# First-Token KL Divergence
+# ---------------------------------------------------------------------------
+
+class TestFirstTokenKL:
+    @pytest.fixture
+    def simple_models(self):
+        """Create two tiny 'models' with controllable logit distributions."""
+        class FakeModel(torch.nn.Module):
+            def __init__(self, peak_idx: int = 0):
+                super().__init__()
+                # A single parameter so next(model.parameters()).device works
+                self._param = torch.nn.Parameter(torch.zeros(1))
+                self._peak_idx = peak_idx
+
+            def __call__(self, **kwargs):
+                batch_size = kwargs["input_ids"].shape[0]
+                seq_len = kwargs["input_ids"].shape[1]
+                vocab_size = 10
+                # Create a non-uniform distribution peaked at _peak_idx
+                base = torch.zeros(vocab_size)
+                base[self._peak_idx] = 5.0
+                logits = base.unsqueeze(0).unsqueeze(0).expand(
+                    batch_size, seq_len, vocab_size
+                ).clone()
+                return type("Output", (), {"logits": logits})()
+
+        class FakeTokenizer:
+            pad_token_id = 0
+            def __call__(self, texts, return_tensors="pt", **kwargs):
+                batch_size = len(texts) if isinstance(texts, list) else 1
+                input_ids = torch.ones(batch_size, 5, dtype=torch.long)
+                return {"input_ids": input_ids, "attention_mask": torch.ones_like(input_ids)}
+
+        return FakeModel, FakeTokenizer
+
+    def test_identical_models_zero_kl(self, simple_models):
+        FakeModel, FakeTokenizer = simple_models
+        model_a = FakeModel(peak_idx=0)
+        model_b = FakeModel(peak_idx=0)
+        tokenizer = FakeTokenizer()
+
+        result = first_token_kl_on_prompts(
+            model_a, model_b, tokenizer,
+            ["hello", "world"],
+        )
+        assert abs(result["mean_kl"]) < 1e-5
+        assert result["interpretation"] == "excellent (minimal collateral damage)"
+
+    def test_different_models_positive_kl(self, simple_models):
+        FakeModel, FakeTokenizer = simple_models
+        model_a = FakeModel(peak_idx=0)  # peaked at vocab position 0
+        model_b = FakeModel(peak_idx=5)  # peaked at vocab position 5
+        tokenizer = FakeTokenizer()
+
+        result = first_token_kl_on_prompts(
+            model_a, model_b, tokenizer,
+            ["test prompt"],
+        )
+        assert result["mean_kl"] > 0
+
+    def test_returns_per_prompt_kl(self, simple_models):
+        FakeModel, FakeTokenizer = simple_models
+        model_a = FakeModel(peak_idx=0)
+        model_b = FakeModel(peak_idx=3)
+        tokenizer = FakeTokenizer()
+
+        result = first_token_kl_on_prompts(
+            model_a, model_b, tokenizer,
+            ["a", "b", "c"],
+        )
+        assert len(result["per_prompt_kl"]) == 3
+        assert result["std_kl"] >= 0
+
+
+# ---------------------------------------------------------------------------
+# HereticComparisonResult
+# ---------------------------------------------------------------------------
+
+class TestHereticComparisonResult:
+    def test_dataclass_fields(self):
+        r = HereticComparisonResult(
+            model_name="test-model",
+            method="OBLITERATUS",
+            refusal_rate_arditi=0.05,
+            refusal_rate_obliteratus=0.03,
+            harmbench_asr=0.85,
+            n_jailbreakbench=100,
+            n_refusals_remaining=5,
+            first_token_kl=0.15,
+            kl_interpretation="excellent",
+        )
+        assert r.model_name == "test-model"
+        assert r.method == "OBLITERATUS"
+        assert r.refusal_rate_arditi == 0.05
+        assert r.harmbench_asr == 0.85
+        assert r.first_token_kl == 0.15
+
+    def test_optional_fields_default_none(self):
+        r = HereticComparisonResult(
+            model_name="test",
+            method="test",
+            refusal_rate_arditi=0.0,
+            refusal_rate_obliteratus=0.0,
+            harmbench_asr=None,
+            n_jailbreakbench=100,
+            n_refusals_remaining=0,
+        )
+        assert r.mmlu is None
+        assert r.gsm8k is None
+        assert r.perplexity is None
+        assert r.harmbench_per_item == []
+        assert r.kl_per_prompt == []
+
+
+# ---------------------------------------------------------------------------
+# Comparison Table Formatting
+# ---------------------------------------------------------------------------
+
+class TestComparisonTable:
+    def test_format_single_result(self):
+        r = HereticComparisonResult(
+            model_name="Llama-2-7B",
+            method="OBLITERATUS",
+            refusal_rate_arditi=0.05,
+            refusal_rate_obliteratus=0.03,
+            harmbench_asr=0.85,
+            n_jailbreakbench=100,
+            n_refusals_remaining=5,
+            first_token_kl=0.15,
+            kl_interpretation="excellent",
+            mmlu=0.518,
+            gsm8k=0.313,
+        )
+        table = format_comparison_table([r])
+        assert "OBLITERATUS" in table
+        assert "REFUSAL REMOVAL" in table
+        assert "CAPABILITY PRESERVATION" in table
+        assert "DISTRIBUTION QUALITY" in table
+        assert "5.0%" in table  # arditi refusal rate
+        assert "85.0%" in table  # harmbench asr
+        assert "5/100" in table  # JBB refusals
+        assert "0.1500" in table  # KL divergence
+
+    def test_format_multiple_results(self):
+        results = [
+            HereticComparisonResult(
+                model_name="test", method="OBLITERATUS",
+                refusal_rate_arditi=0.05, refusal_rate_obliteratus=0.03,
+                harmbench_asr=0.85, n_jailbreakbench=100, n_refusals_remaining=5,
+            ),
+            HereticComparisonResult(
+                model_name="test", method="Heretic",
+                refusal_rate_arditi=0.03, refusal_rate_obliteratus=0.03,
+                harmbench_asr=0.90, n_jailbreakbench=100, n_refusals_remaining=3,
+            ),
+        ]
+        table = format_comparison_table(results)
+        assert "OBLITERATUS" in table
+        assert "Heretic" in table
+
+    def test_heretic_reference_numbers_present(self):
+        """The comparison table should include Heretic's published reference numbers."""
+        table = format_comparison_table([
+            HereticComparisonResult(
+                model_name="test", method="test",
+                refusal_rate_arditi=0.0, refusal_rate_obliteratus=0.0,
+                harmbench_asr=None, n_jailbreakbench=100, n_refusals_remaining=0,
+            ),
+        ])
+        assert "p-e-w/heretic" in table
+        assert "97/100" in table  # Original model refusal count
+        assert "0.16" in table    # Heretic's KL divergence
+
+
+# ---------------------------------------------------------------------------
+# LM-Eval Benchmark Config
+# ---------------------------------------------------------------------------
+
+class TestLmEvalBenchmarks:
+    def test_five_standard_benchmarks(self):
+        assert len(LM_EVAL_BENCHMARKS) == 5
+
+    def test_required_keys(self):
+        for key, config in LM_EVAL_BENCHMARKS.items():
+            assert "task" in config
+            assert "num_fewshot" in config
+            assert "description" in config
+
+    def test_expected_benchmarks(self):
+        expected = {"mmlu", "gsm8k", "arc_challenge", "hellaswag", "truthfulqa_mc2"}
+        assert set(LM_EVAL_BENCHMARKS.keys()) == expected
+
+    def test_fewshot_settings_match_arditi(self):
+        """Few-shot settings should match Arditi et al. / Open LLM Leaderboard."""
+        assert LM_EVAL_BENCHMARKS["mmlu"]["num_fewshot"] == 5
+        assert LM_EVAL_BENCHMARKS["gsm8k"]["num_fewshot"] == 5
+        assert LM_EVAL_BENCHMARKS["truthfulqa_mc2"]["num_fewshot"] == 0
+
+
+# ---------------------------------------------------------------------------
+# HarmBench Template
+# ---------------------------------------------------------------------------
+
+class TestHarmBenchTemplate:
+    def test_template_has_sys_tags(self):
+        """Official template requires <<SYS>> / <</SYS>> delimiters."""
+        assert "<<SYS>>" in _HARMBENCH_CLS_TEMPLATE
+        assert "<</SYS>>" in _HARMBENCH_CLS_TEMPLATE
+
+    def test_template_has_seven_rules(self):
+        """Official HarmBench template has exactly 7 rules."""
+        rule_count = _HARMBENCH_CLS_TEMPLATE.count("\n- ")
+        assert rule_count == 7
+
+    def test_template_has_behavior_placeholder(self):
+        assert "{behavior}" in _HARMBENCH_CLS_TEMPLATE
+
+    def test_template_has_generation_placeholder(self):
+        assert "{generation}" in _HARMBENCH_CLS_TEMPLATE
+
+    def test_template_ends_with_inst_tag(self):
+        assert _HARMBENCH_CLS_TEMPLATE.strip().endswith("[/INST]")
+
+    def test_template_answer_format(self):
+        """Official template uses lowercase 'yes' or 'no'."""
+        assert '"yes" or "no"' in _HARMBENCH_CLS_TEMPLATE
+
+
+# ---------------------------------------------------------------------------
+# Score Extraction (C3 fix)
+# ---------------------------------------------------------------------------
+
+class TestExtractScore:
+    def test_normal_score(self):
+        assert _extract_score({"acc,none": 0.75}) == 0.75
+
+    def test_zero_score_not_skipped(self):
+        """A legitimate score of 0.0 should be returned, not treated as falsy."""
+        assert _extract_score({"acc,none": 0.0}) == 0.0
+
+    def test_fallback_to_next_key(self):
+        assert _extract_score({"acc_norm,none": 0.65}) == 0.65
+
+    def test_mc2_key(self):
+        assert _extract_score({"mc2,none": 0.42}) == 0.42
+
+    def test_no_matching_key(self):
+        assert _extract_score({"unknown_metric": 0.99}) == 0.0
+
+    def test_priority_order(self):
+        """acc,none should take priority over acc_norm,none."""
+        result = _extract_score({"acc,none": 0.5, "acc_norm,none": 0.9})
+        assert result == 0.5
+
+
+# ---------------------------------------------------------------------------
+# Padding-Aware Last-Token Indices
+# ---------------------------------------------------------------------------
+
+class TestLastRealTokenIndices:
+    def test_no_padding(self):
+        mask = torch.ones(3, 5, dtype=torch.long)
+        indices = _last_real_token_indices(mask)
+        assert indices.tolist() == [4, 4, 4]
+
+    def test_with_padding(self):
+        mask = torch.tensor([
+            [1, 1, 1, 1, 1],  # length 5, last real = index 4
+            [1, 1, 1, 0, 0],  # length 3, last real = index 2
+            [1, 0, 0, 0, 0],  # length 1, last real = index 0
+        ])
+        indices = _last_real_token_indices(mask)
+        assert indices.tolist() == [4, 2, 0]
+
+    def test_single_token(self):
+        mask = torch.tensor([[1]])
+        indices = _last_real_token_indices(mask)
+        assert indices.tolist() == [0]
+
+
+# ---------------------------------------------------------------------------
+# Model Path Sanitization
+# ---------------------------------------------------------------------------
+
+class TestSanitizeModelPath:
+    def test_normal_path(self):
+        assert _sanitize_model_path("/tmp/my-model") == "/tmp/my-model"
+
+    def test_hf_model_id(self):
+        assert _sanitize_model_path("meta-llama/Llama-2-7b-hf") == "meta-llama/Llama-2-7b-hf"
+
+    def test_rejects_commas(self):
+        with pytest.raises(ValueError, match="commas"):
+            _sanitize_model_path("evil,trust_remote_code=True")
+
+
+# ---------------------------------------------------------------------------
+# Classifier Unload
+# ---------------------------------------------------------------------------
+
+class TestClassifierUnload:
+    def test_unload_when_not_loaded(self):
+        """Unloading when nothing is loaded should not raise."""
+        unload_harmbench_classifier()  # should be a no-op
+
+
+# ---------------------------------------------------------------------------
+# Default Harmless Prompts
+# ---------------------------------------------------------------------------
+
+class TestDefaultHarmlessPrompts:
+    def test_has_100_unique_prompts(self):
+        assert len(_DEFAULT_HARMLESS_PROMPTS) == 100
+
+    def test_no_duplicates(self):
+        assert len(set(_DEFAULT_HARMLESS_PROMPTS)) == len(_DEFAULT_HARMLESS_PROMPTS)
+
+    def test_all_non_empty(self):
+        for i, p in enumerate(_DEFAULT_HARMLESS_PROMPTS):
+            assert isinstance(p, str) and len(p) > 10, f"Prompt {i} is too short"
+
+
+# ---------------------------------------------------------------------------
+# KL Divergence Non-Negativity
+# ---------------------------------------------------------------------------
+
+class TestKLNonNegativity:
+    @pytest.fixture
+    def models_and_tokenizer(self):
+        class FakeModel(torch.nn.Module):
+            def __init__(self, peak_idx: int = 0):
+                super().__init__()
+                self._param = torch.nn.Parameter(torch.zeros(1))
+                self._peak_idx = peak_idx
+
+            def __call__(self, **kwargs):
+                batch_size = kwargs["input_ids"].shape[0]
+                seq_len = kwargs["input_ids"].shape[1]
+                vocab_size = 10
+                base = torch.zeros(vocab_size)
+                base[self._peak_idx] = 5.0
+                logits = base.unsqueeze(0).unsqueeze(0).expand(
+                    batch_size, seq_len, vocab_size
+                ).clone()
+                return type("Output", (), {"logits": logits})()
+
+        class FakeTokenizer:
+            pad_token_id = 0
+            def __call__(self, texts, return_tensors="pt", **kwargs):
+                batch_size = len(texts) if isinstance(texts, list) else 1
+                input_ids = torch.ones(batch_size, 5, dtype=torch.long)
+                return {"input_ids": input_ids, "attention_mask": torch.ones_like(input_ids)}
+
+        return FakeModel, FakeTokenizer
+
+    def test_all_kl_values_non_negative(self, models_and_tokenizer):
+        FakeModel, FakeTokenizer = models_and_tokenizer
+        model_a = FakeModel(peak_idx=0)
+        model_b = FakeModel(peak_idx=3)
+        tokenizer = FakeTokenizer()
+
+        result = first_token_kl_on_prompts(
+            model_a, model_b, tokenizer,
+            ["a", "b", "c", "d", "e"],
+        )
+        for val in result["per_prompt_kl"]:
+            assert val >= 0.0, f"KL value {val} is negative"
diff --git a/tests/test_informed_pipeline.py b/tests/test_informed_pipeline.py
new file mode 100644
index 0000000..4890036
--- /dev/null
+++ b/tests/test_informed_pipeline.py
@@ -0,0 +1,385 @@
+"""Tests for the Analysis-Informed Abliteration Pipeline."""
+
+from __future__ import annotations
+
+
+import pytest
+import torch
+
+from obliteratus.informed_pipeline import (
+    AnalysisInsights,
+    InformedAbliterationPipeline,
+    InformedPipelineReport,
+    INFORMED_METHOD,
+)
+from obliteratus.abliterate import METHODS
+
+
+# ---------------------------------------------------------------------------
+# Fixtures
+# ---------------------------------------------------------------------------
+
+@pytest.fixture
+def insights():
+    """Default AnalysisInsights for testing."""
+    return AnalysisInsights()
+
+
+@pytest.fixture
+def pipeline(tmp_path):
+    """An InformedAbliterationPipeline with no model loaded."""
+    return InformedAbliterationPipeline(
+        model_name="test-model",
+        output_dir=str(tmp_path / "test_informed"),
+    )
+
+
+# ---------------------------------------------------------------------------
+# AnalysisInsights
+# ---------------------------------------------------------------------------
+
+class TestAnalysisInsights:
+    def test_default_values(self, insights):
+        assert insights.detected_alignment_method == "unknown"
+        assert insights.alignment_confidence == 0.0
+        assert insights.cone_is_polyhedral is False
+        assert insights.cone_dimensionality == 1.0
+        assert insights.mean_pairwise_cosine == 1.0
+        assert insights.per_category_directions == {}
+        assert insights.direction_specificity == {}
+        assert insights.cluster_count == 0
+        assert insights.direction_persistence == 0.0
+        assert insights.use_sparse_surgery is False
+        assert insights.recommended_n_directions == 4
+        assert insights.recommended_regularization == 0.0
+        assert insights.recommended_refinement_passes == 2
+        assert insights.recommended_layers == []
+        assert insights.skip_layers == []
+
+    def test_default_robustness(self, insights):
+        assert insights.estimated_robustness == "unknown"
+        assert insights.self_repair_estimate == 0.0
+        assert insights.entanglement_score == 0.0
+        assert insights.entangled_layers == []
+        assert insights.clean_layers == []
+
+
+class TestInformedPipelineReport:
+    def test_default_report(self):
+        insights = AnalysisInsights()
+        report = InformedPipelineReport(insights=insights)
+        assert report.analysis_duration == 0.0
+        assert report.total_duration == 0.0
+        assert report.ouroboros_passes == 0
+        assert report.final_refusal_rate == 0.0
+        assert report.stages == []
+
+
+# ---------------------------------------------------------------------------
+# Method preset
+# ---------------------------------------------------------------------------
+
+class TestInformedMethod:
+    def test_informed_method_in_abliterate_methods(self):
+        assert "informed" in METHODS
+        cfg = METHODS["informed"]
+        assert cfg["norm_preserve"] is True
+        assert cfg["project_biases"] is True
+        assert cfg["use_chat_template"] is True
+        assert cfg["use_whitened_svd"] is True
+        assert cfg["true_iterative_refinement"] is True
+
+    def test_informed_method_standalone(self):
+        assert INFORMED_METHOD["label"] == "Informed (Analysis-Guided)"
+        assert INFORMED_METHOD["n_directions"] == 4
+        assert INFORMED_METHOD["norm_preserve"] is True
+
+
+# ---------------------------------------------------------------------------
+# Pipeline initialization
+# ---------------------------------------------------------------------------
+
+class TestPipelineInit:
+    def test_method_set_to_informed(self, pipeline):
+        assert pipeline.method == "informed"
+
+    def test_default_analysis_flags(self, pipeline):
+        assert pipeline._run_cone is True
+        assert pipeline._run_alignment is True
+        assert pipeline._run_cross_layer is True
+        assert pipeline._run_sparse is True
+        assert pipeline._run_defense is True
+
+    def test_ouroboros_defaults(self, pipeline):
+        assert pipeline._ouroboros_threshold == 0.5
+        assert pipeline._max_ouroboros_passes == 3
+
+    def test_entanglement_gate(self, pipeline):
+        assert pipeline._entanglement_gate == 0.8
+
+    def test_inherits_base_pipeline(self, pipeline):
+        assert pipeline.norm_preserve is True
+        assert pipeline.project_biases is True
+        assert pipeline.use_chat_template is True
+        assert pipeline.use_whitened_svd is True
+        assert pipeline.true_iterative_refinement is True
+
+    def test_custom_flags(self):
+        p = InformedAbliterationPipeline(
+            model_name="test",
+            run_cone_analysis=False,
+            run_alignment_detection=False,
+            ouroboros_threshold=0.3,
+            max_ouroboros_passes=5,
+            entanglement_gate=0.9,
+        )
+        assert p._run_cone is False
+        assert p._run_alignment is False
+        assert p._ouroboros_threshold == 0.3
+        assert p._max_ouroboros_passes == 5
+        assert p._entanglement_gate == 0.9
+
+
+# ---------------------------------------------------------------------------
+# Configuration derivation
+# ---------------------------------------------------------------------------
+
+class TestConfigurationDerivation:
+    """Test the _derive_configuration logic with various insights."""
+
+    def _make_pipeline_with_insights(self, **kwargs):
+        p = InformedAbliterationPipeline(
+            model_name="test",
+            on_log=lambda m: None,
+        )
+        for k, v in kwargs.items():
+            setattr(p._insights, k, v)
+        return p
+
+    def test_polyhedral_cone_more_directions(self):
+        p = self._make_pipeline_with_insights(
+            cone_is_polyhedral=True,
+            cone_dimensionality=3.5,
+        )
+        p._derive_configuration()
+        # Polyhedral with dim 3.5 → n_dirs = max(4, min(8, int(3.5*2))) = 7
+        assert p.n_directions == 7
+
+    def test_linear_cone_fewer_directions(self):
+        p = self._make_pipeline_with_insights(
+            cone_is_polyhedral=False,
+            cone_dimensionality=1.0,
+        )
+        p._derive_configuration()
+        # Linear with dim 1.0 → n_dirs = max(1, min(4, int(1.0+1))) = 2
+        assert p.n_directions == 2
+
+    def test_dpo_zero_regularization(self):
+        p = self._make_pipeline_with_insights(
+            detected_alignment_method="dpo",
+            entanglement_score=0.1,
+        )
+        p._derive_configuration()
+        assert p.regularization == 0.0
+
+    def test_rlhf_moderate_regularization(self):
+        p = self._make_pipeline_with_insights(
+            detected_alignment_method="rlhf",
+            entanglement_score=0.2,
+        )
+        p._derive_configuration()
+        assert p.regularization == 0.15
+
+    def test_cai_regularization(self):
+        p = self._make_pipeline_with_insights(
+            detected_alignment_method="cai",
+            entanglement_score=0.2,
+        )
+        p._derive_configuration()
+        assert p.regularization == 0.2
+
+    def test_sft_low_regularization(self):
+        p = self._make_pipeline_with_insights(
+            detected_alignment_method="sft",
+            entanglement_score=0.1,
+        )
+        p._derive_configuration()
+        assert p.regularization == 0.05
+
+    def test_high_entanglement_increases_regularization(self):
+        p = self._make_pipeline_with_insights(
+            detected_alignment_method="dpo",
+            entanglement_score=0.7,
+        )
+        p._derive_configuration()
+        # DPO base = 0.0, + 0.15 for high entanglement = 0.15
+        assert p.regularization == 0.15
+
+    def test_high_self_repair_more_passes(self):
+        p = self._make_pipeline_with_insights(
+            self_repair_estimate=0.8,
+        )
+        p._derive_configuration()
+        assert p.refinement_passes == 3
+
+    def test_moderate_self_repair_two_passes(self):
+        p = self._make_pipeline_with_insights(
+            self_repair_estimate=0.5,
+        )
+        p._derive_configuration()
+        assert p.refinement_passes == 2
+
+    def test_low_self_repair_one_pass(self):
+        p = self._make_pipeline_with_insights(
+            self_repair_estimate=0.2,
+        )
+        p._derive_configuration()
+        assert p.refinement_passes == 1
+
+    def test_cluster_layers_used(self):
+        p = self._make_pipeline_with_insights(
+            cluster_representative_layers=[5, 10, 15],
+            direction_clusters=[[3, 4, 5], [9, 10, 11], [14, 15, 16]],
+        )
+        p.refusal_directions = {i: torch.randn(64) for i in range(20)}
+        p._derive_configuration()
+        # Should include all cluster layers
+        assert 5 in p._insights.recommended_layers
+        assert 10 in p._insights.recommended_layers
+
+    def test_entangled_layers_skipped(self):
+        p = self._make_pipeline_with_insights(
+            cluster_representative_layers=[5, 10, 15],
+            direction_clusters=[[3, 4, 5], [9, 10, 11], [14, 15, 16]],
+            entangled_layers=[10],
+        )
+        p._derive_configuration()
+        # Layer 10 should be skipped
+        assert 10 not in p._insights.recommended_layers
+        assert 10 in p._insights.skip_layers
+
+    def test_sparse_surgery_enabled_when_rsi_high(self):
+        p = self._make_pipeline_with_insights(
+            mean_refusal_sparsity_index=0.7,
+        )
+        p._sparse_threshold = 0.5
+        p._derive_configuration()
+        assert p._insights.use_sparse_surgery is True
+
+    def test_sparse_surgery_disabled_when_rsi_low(self):
+        p = self._make_pipeline_with_insights(
+            mean_refusal_sparsity_index=0.3,
+        )
+        p._sparse_threshold = 0.5
+        p._derive_configuration()
+        assert p._insights.use_sparse_surgery is False
+
+    def test_whitened_svd_for_multi_direction(self):
+        p = self._make_pipeline_with_insights(
+            cone_is_polyhedral=True,
+            cone_dimensionality=2.5,
+        )
+        p._derive_configuration()
+        assert p.n_directions > 1
+        assert p.use_whitened_svd is True
+
+    def test_no_whitened_svd_for_single_direction(self):
+        p = self._make_pipeline_with_insights(
+            cone_is_polyhedral=False,
+            cone_dimensionality=0.5,
+        )
+        p._derive_configuration()
+        # dim 0.5 → max(1, min(4, int(0.5+1))) = 1
+        assert p.n_directions == 1
+        assert p.use_whitened_svd is False
+
+
+# ---------------------------------------------------------------------------
+# Format report
+# ---------------------------------------------------------------------------
+
+class TestFormatInsights:
+    def test_format_default(self, insights):
+        text = InformedAbliterationPipeline.format_insights(insights)
+        assert "Analysis-Informed Pipeline" in text
+        assert "UNKNOWN" in text  # detected method
+        assert "LINEAR" in text  # cone type
+
+    def test_format_polyhedral(self):
+        insights = AnalysisInsights(
+            detected_alignment_method="dpo",
+            alignment_confidence=0.85,
+            cone_is_polyhedral=True,
+            cone_dimensionality=3.5,
+            cluster_count=4,
+        )
+        text = InformedAbliterationPipeline.format_insights(insights)
+        assert "DPO" in text
+        assert "POLYHEDRAL" in text
+        assert "3.50" in text
+
+    def test_format_includes_derived_config(self, insights):
+        insights.recommended_n_directions = 6
+        insights.recommended_regularization = 0.2
+        insights.recommended_refinement_passes = 3
+        text = InformedAbliterationPipeline.format_insights(insights)
+        assert "n_directions: 6" in text
+        assert "regularization: 0.2" in text
+        assert "refinement_passes: 3" in text
+
+
+# ---------------------------------------------------------------------------
+# Edge cases
+# ---------------------------------------------------------------------------
+
+class TestEdgeCases:
+    def test_no_cluster_layers_falls_back(self):
+        p = InformedAbliterationPipeline(
+            model_name="test",
+            on_log=lambda m: None,
+        )
+        p._insights.cluster_representative_layers = []
+        p._derive_configuration()
+        assert p._insights.recommended_layers == []
+
+    def test_regularization_capped(self):
+        p = InformedAbliterationPipeline(
+            model_name="test",
+            on_log=lambda m: None,
+        )
+        p._insights.detected_alignment_method = "cai"
+        p._insights.entanglement_score = 0.9
+        p._derive_configuration()
+        # CAI base = 0.2, + 0.15 = 0.35, capped at 0.5
+        assert p.regularization <= 0.5
+
+    def test_all_layers_entangled_keeps_some(self):
+        """If all cluster layers are entangled, don't skip all of them."""
+        p = InformedAbliterationPipeline(
+            model_name="test",
+            on_log=lambda m: None,
+        )
+        p._insights.cluster_representative_layers = [5]
+        p._insights.direction_clusters = [[5]]
+        p._insights.entangled_layers = [5]
+        p._derive_configuration()
+        # Should NOT skip the only layer
+        assert 5 in p._insights.recommended_layers
+
+    def test_cone_dimensionality_bounds(self):
+        """Extreme cone dimensionality values are handled."""
+        p = InformedAbliterationPipeline(
+            model_name="test",
+            on_log=lambda m: None,
+        )
+        # Very high dimensionality
+        p._insights.cone_is_polyhedral = True
+        p._insights.cone_dimensionality = 10.0
+        p._derive_configuration()
+        assert p.n_directions <= 8  # capped
+
+        # Very low dimensionality
+        p._insights.cone_is_polyhedral = False
+        p._insights.cone_dimensionality = 0.1
+        p._derive_configuration()
+        assert p.n_directions >= 1  # at least 1
diff --git a/tests/test_logit_lens.py b/tests/test_logit_lens.py
new file mode 100644
index 0000000..fd66dd5
--- /dev/null
+++ b/tests/test_logit_lens.py
@@ -0,0 +1,172 @@
+"""Tests for logit lens refusal direction analysis."""
+
+from __future__ import annotations
+
+from unittest.mock import MagicMock
+
+import torch
+
+from obliteratus.analysis.logit_lens import (
+    RefusalLogitLens,
+    LogitLensResult,
+    MultiLayerLogitLensResult,
+    REFUSAL_TOKENS,
+    COMPLIANCE_TOKENS,
+)
+
+
+def _make_mock_model(hidden_dim=32, vocab_size=100):
+    """Create a mock model with LM head and layer norm."""
+    model = MagicMock()
+
+    # LM head weight (vocab_size, hidden_dim)
+    lm_head = MagicMock()
+    lm_head.weight = MagicMock()
+    lm_head.weight.data = torch.randn(vocab_size, hidden_dim)
+    model.lm_head = lm_head
+
+    # Final LayerNorm
+    ln_f = MagicMock()
+    ln_f.weight = MagicMock()
+    ln_f.weight.data = torch.ones(hidden_dim)
+    ln_f.bias = MagicMock()
+    ln_f.bias.data = torch.zeros(hidden_dim)
+    model.transformer = MagicMock()
+    model.transformer.ln_f = ln_f
+
+    return model
+
+
+def _make_mock_tokenizer(vocab_size=100):
+    """Create a mock tokenizer."""
+    tokenizer = MagicMock()
+
+    def mock_decode(ids):
+        if isinstance(ids, list) and len(ids) == 1:
+            return f"tok_{ids[0]}"
+        return f"tok_{ids}"
+
+    def mock_encode(text, add_special_tokens=False):
+        # Return a deterministic token ID based on the text
+        return [hash(text) % vocab_size]
+
+    tokenizer.decode = mock_decode
+    tokenizer.encode = mock_encode
+    return tokenizer
+
+
+class TestRefusalLogitLens:
+    def test_basic_analysis(self):
+        """Should produce a LogitLensResult with expected fields."""
+        model = _make_mock_model()
+        tokenizer = _make_mock_tokenizer()
+        direction = torch.randn(32)
+
+        lens = RefusalLogitLens(top_k=10)
+        result = lens.analyze_direction(direction, model, tokenizer, layer_idx=5)
+
+        assert isinstance(result, LogitLensResult)
+        assert result.layer_idx == 5
+        assert len(result.top_promoted) == 10
+        assert len(result.top_suppressed) == 10
+        assert isinstance(result.refusal_specificity, float)
+        assert isinstance(result.logit_effect_entropy, float)
+        assert isinstance(result.refusal_compliance_gap, float)
+
+    def test_promoted_suppressed_ordering(self):
+        """Top promoted should have higher logit boost than top suppressed."""
+        model = _make_mock_model()
+        tokenizer = _make_mock_tokenizer()
+        direction = torch.randn(32)
+
+        lens = RefusalLogitLens(top_k=5)
+        result = lens.analyze_direction(direction, model, tokenizer)
+
+        # Promoted tokens should have positive-ish values
+        # Suppressed tokens should have negative-ish values
+        max_promoted = max(v for _, v in result.top_promoted)
+        min_suppressed = min(v for _, v in result.top_suppressed)
+        assert max_promoted > min_suppressed
+
+    def test_multi_layer_analysis(self):
+        """Should analyze multiple layers."""
+        model = _make_mock_model()
+        tokenizer = _make_mock_tokenizer()
+        directions = {0: torch.randn(32), 1: torch.randn(32), 2: torch.randn(32)}
+
+        lens = RefusalLogitLens(top_k=5)
+        result = lens.analyze_all_layers(directions, model, tokenizer)
+
+        assert isinstance(result, MultiLayerLogitLensResult)
+        assert len(result.per_layer) == 3
+        assert result.strongest_refusal_layer in [0, 1, 2]
+        assert result.peak_specificity_layer in [0, 1, 2]
+
+    def test_strong_layers_filter(self):
+        """Should only analyze specified strong layers."""
+        model = _make_mock_model()
+        tokenizer = _make_mock_tokenizer()
+        directions = {i: torch.randn(32) for i in range(10)}
+
+        lens = RefusalLogitLens(top_k=5)
+        result = lens.analyze_all_layers(
+            directions, model, tokenizer, strong_layers=[2, 5]
+        )
+        assert set(result.per_layer.keys()) == {2, 5}
+
+    def test_handles_unnormalized_direction(self):
+        """Should handle non-unit directions."""
+        model = _make_mock_model()
+        tokenizer = _make_mock_tokenizer()
+        direction = torch.randn(32) * 100.0  # large magnitude
+
+        lens = RefusalLogitLens(top_k=5)
+        result = lens.analyze_direction(direction, model, tokenizer)
+        # Should still produce valid results
+        assert len(result.top_promoted) == 5
+
+    def test_format_report(self):
+        """Format report should produce readable output."""
+        model = _make_mock_model()
+        tokenizer = _make_mock_tokenizer()
+        directions = {0: torch.randn(32), 1: torch.randn(32)}
+
+        lens = RefusalLogitLens(top_k=5)
+        result = lens.analyze_all_layers(directions, model, tokenizer)
+        report = RefusalLogitLens.format_report(result)
+        assert "Logit Lens" in report
+        assert "Layer 0:" in report
+
+    def test_empty_directions(self):
+        """Should handle empty input gracefully."""
+        model = _make_mock_model()
+        tokenizer = _make_mock_tokenizer()
+
+        lens = RefusalLogitLens(top_k=5)
+        result = lens.analyze_all_layers({}, model, tokenizer)
+        assert len(result.per_layer) == 0
+
+    def test_token_lists_nonempty(self):
+        """Refusal and compliance token lists should have entries."""
+        assert len(REFUSAL_TOKENS) > 10
+        assert len(COMPLIANCE_TOKENS) > 10
+
+    def test_entropy_nonnegative(self):
+        """Logit effect entropy should be non-negative."""
+        model = _make_mock_model()
+        tokenizer = _make_mock_tokenizer()
+        direction = torch.randn(32)
+
+        lens = RefusalLogitLens(top_k=5)
+        result = lens.analyze_direction(direction, model, tokenizer)
+        assert result.logit_effect_entropy >= 0
+
+    def test_2d_direction_input(self):
+        """Should handle 2D direction input (unsqueezed)."""
+        model = _make_mock_model()
+        tokenizer = _make_mock_tokenizer()
+        direction = torch.randn(1, 32)
+
+        lens = RefusalLogitLens(top_k=5)
+        result = lens.analyze_direction(direction, model, tokenizer)
+        assert len(result.top_promoted) == 5
diff --git a/tests/test_metrics.py b/tests/test_metrics.py
new file mode 100644
index 0000000..004eee9
--- /dev/null
+++ b/tests/test_metrics.py
@@ -0,0 +1,60 @@
+"""Tests for evaluation metrics."""
+
+from __future__ import annotations
+
+
+import torch
+
+from obliteratus.evaluation.metrics import accuracy, f1_score_metric, perplexity
+
+
+class TestPerplexity:
+    def test_perfect_prediction(self):
+        # Create logits that strongly predict the correct next token
+        vocab_size = 10
+        seq_len = 5
+        batch_size = 1
+
+        labels = torch.tensor([[0, 1, 2, 3, 4]])
+        logits = torch.full((batch_size, seq_len, vocab_size), -100.0)
+        # Set high logit for the correct next token
+        for t in range(seq_len - 1):
+            logits[0, t, labels[0, t + 1]] = 100.0
+
+        ppl = perplexity(logits, labels)
+        assert ppl < 2.0, f"Expected near-1 perplexity, got {ppl}"
+
+    def test_random_prediction_higher(self):
+        vocab_size = 100
+        seq_len = 20
+        batch_size = 2
+
+        torch.manual_seed(42)
+        logits = torch.randn(batch_size, seq_len, vocab_size)
+        labels = torch.randint(0, vocab_size, (batch_size, seq_len))
+
+        ppl = perplexity(logits, labels)
+        assert ppl > 10, f"Random logits should yield high perplexity, got {ppl}"
+
+
+class TestAccuracy:
+    def test_perfect(self):
+        assert accuracy([1, 2, 3], [1, 2, 3]) == 1.0
+
+    def test_zero(self):
+        assert accuracy([1, 2, 3], [4, 5, 6]) == 0.0
+
+    def test_partial(self):
+        assert accuracy([1, 2, 3, 4], [1, 2, 0, 0]) == 0.5
+
+    def test_empty(self):
+        assert accuracy([], []) == 0.0
+
+
+class TestF1:
+    def test_perfect(self):
+        assert f1_score_metric([0, 1, 0, 1], [0, 1, 0, 1]) == 1.0
+
+    def test_zero(self):
+        score = f1_score_metric([0, 0, 0, 0], [1, 1, 1, 1])
+        assert score == 0.0
diff --git a/tests/test_module_imports.py b/tests/test_module_imports.py
new file mode 100644
index 0000000..d564891
--- /dev/null
+++ b/tests/test_module_imports.py
@@ -0,0 +1,85 @@
+"""Smoke tests verifying all new modules are importable from package level."""
+
+from __future__ import annotations
+
+
+class TestTopLevelImports:
+    """Verify obliteratus top-level exports."""
+
+    def test_set_seed(self):
+        from obliteratus import set_seed
+        assert callable(set_seed)
+
+    def test_run_sweep(self):
+        from obliteratus import run_sweep
+        assert callable(run_sweep)
+
+    def test_sweep_config(self):
+        from obliteratus import SweepConfig
+        cfg = SweepConfig(
+            model_name="test",
+            sweep_params={"n_directions": [1, 2]},
+        )
+        assert cfg.model_name == "test"
+
+    def test_sweep_result(self):
+        from obliteratus import SweepResult
+        r = SweepResult(
+            params={"n_directions": 1},
+            seed=42,
+            quality_metrics={},
+            stage_durations={},
+            strong_layers=[],
+        )
+        assert r.seed == 42
+
+
+class TestEvaluationImports:
+    """Verify evaluation subpackage exports."""
+
+    def test_refusal_rate_with_ci(self):
+        from obliteratus.evaluation import refusal_rate_with_ci
+        result = refusal_rate_with_ci(["Sure, here you go."], mode="combined")
+        assert result["rate"] == 0.0
+        assert result["n_samples"] == 1
+
+    def test_random_direction_ablation(self):
+        from obliteratus.evaluation import random_direction_ablation
+        assert callable(random_direction_ablation)
+
+    def test_direction_specificity_test(self):
+        from obliteratus.evaluation import direction_specificity_test
+        assert callable(direction_specificity_test)
+
+    def test_run_benchmarks(self):
+        from obliteratus.evaluation import run_benchmarks
+        assert callable(run_benchmarks)
+
+    def test_compare_models(self):
+        from obliteratus.evaluation import compare_models
+        assert callable(compare_models)
+
+
+class TestDirectImports:
+    """Verify direct module imports still work."""
+
+    def test_reproducibility(self):
+        from obliteratus.reproducibility import set_seed
+        import torch
+        set_seed(999, deterministic=False)
+        a = torch.randn(10)
+        set_seed(999, deterministic=False)
+        b = torch.randn(10)
+        assert torch.equal(a, b)
+
+    def test_baselines(self):
+        from obliteratus.evaluation.baselines import (
+            BaselineResult,
+        )
+        assert BaselineResult is not None
+
+    def test_lm_eval_integration(self):
+        from obliteratus.evaluation.lm_eval_integration import (
+            run_benchmarks,
+        )
+        assert callable(run_benchmarks)
diff --git a/tests/test_new_analysis_modules.py b/tests/test_new_analysis_modules.py
new file mode 100644
index 0000000..e2060d7
--- /dev/null
+++ b/tests/test_new_analysis_modules.py
@@ -0,0 +1,672 @@
+"""Tests for the five new analysis modules:
+  1. Tuned Lens (learned-affine logit lens variant)
+  2. Activation Patching (real interchange intervention)
+  3. Enhanced SAE Decomposition Pipeline
+  4. Wasserstein-Optimal Direction Extraction
+  5. Bayesian-Optimized Kernel Projection
+"""
+
+from __future__ import annotations
+
+
+import pytest
+import torch
+import torch.nn as nn
+
+from obliteratus.analysis.tuned_lens import (
+    TunedLensTrainer,
+    TunedLensProbe,
+    RefusalTunedLens,
+    TunedLensResult,
+    MultiLayerTunedLensResult,
+)
+from obliteratus.analysis.activation_patching import (
+    ActivationPatcher,
+    PatchingSite,
+    ActivationPatchingResult,
+)
+from obliteratus.analysis.sae_abliteration import (
+    SAEDecompositionPipeline,
+    SAEDecompositionResult,
+    FeatureClusterResult,
+)
+from obliteratus.analysis.wasserstein_optimal import (
+    WassersteinOptimalExtractor,
+    WassersteinDirectionResult,
+    WassersteinComparisonResult,
+    MultiLayerWassersteinResult,
+)
+from obliteratus.analysis.bayesian_kernel_projection import (
+    BayesianKernelProjection,
+    BayesianOptimizationResult,
+    ProjectionConfig,
+)
+
+
+# ---------------------------------------------------------------------------
+#  Helpers
+# ---------------------------------------------------------------------------
+
+def _make_activations(
+    hidden_dim=32, n_per_class=20, separation=2.0, seed=42,
+):
+    """Create harmful/harmless activations with planted refusal signal."""
+    torch.manual_seed(seed)
+    direction = torch.randn(hidden_dim)
+    direction = direction / direction.norm()
+
+    harmful = [
+        torch.randn(hidden_dim) * 0.3 + separation * direction
+        for _ in range(n_per_class)
+    ]
+    harmless = [
+        torch.randn(hidden_dim) * 0.3
+        for _ in range(n_per_class)
+    ]
+    return harmful, harmless, direction
+
+
+def _make_multilayer_activations(
+    n_layers=6, hidden_dim=32, n_per_class=20, separation=2.0, seed=42,
+):
+    """Create per-layer activations with planted refusal signals."""
+    torch.manual_seed(seed)
+
+    harmful_acts = {}
+    harmless_acts = {}
+    directions = {}
+
+    for li in range(n_layers):
+        d = torch.randn(hidden_dim)
+        d = d / d.norm()
+        directions[li] = d
+
+        strength = separation if 1 <= li <= n_layers - 2 else 0.3
+        harmful_acts[li] = [
+            torch.randn(hidden_dim) * 0.3 + strength * d
+            for _ in range(n_per_class)
+        ]
+        harmless_acts[li] = [
+            torch.randn(hidden_dim) * 0.3
+            for _ in range(n_per_class)
+        ]
+
+    return harmful_acts, harmless_acts, directions
+
+
+class FakeTokenizer:
+    """Fake tokenizer that maps strings to reproducible token IDs."""
+
+    def __init__(self, vocab_size=100):
+        self.vocab_size = vocab_size
+
+    def encode(self, text, add_special_tokens=False):
+        return [hash(text) % self.vocab_size]
+
+    def decode(self, ids):
+        return f"tok_{ids[0]}"
+
+
+class FakeModel(nn.Module):
+    """Fake model with lm_head and transformer.ln_f for testing."""
+
+    def __init__(self, hidden_dim=32, vocab_size=100, n_layers=4):
+        super().__init__()
+        self.hidden_dim = hidden_dim
+        self.vocab_size = vocab_size
+        self.n_layers = n_layers
+
+        self.lm_head = nn.Linear(hidden_dim, vocab_size, bias=False)
+        self.transformer = nn.Module()
+        self.transformer.ln_f = nn.LayerNorm(hidden_dim)
+        self.transformer.h = nn.ModuleList([
+            nn.Linear(hidden_dim, hidden_dim) for _ in range(n_layers)
+        ])
+
+    def forward(self, input_ids):
+        # Fake forward pass
+        batch_size, seq_len = input_ids.shape
+        x = torch.randn(batch_size, seq_len, self.hidden_dim)
+        for layer in self.transformer.h:
+            x = layer(x) + x
+        logits = self.lm_head(self.transformer.ln_f(x))
+        return type('Output', (), {'logits': logits})()
+
+
+# ===========================================================================
+#  Tests: Tuned Lens
+# ===========================================================================
+
+class TestTunedLensTrainer:
+    def test_train_single_probe(self):
+        hidden_dim = 16
+        n_samples = 30
+
+        layer_acts = torch.randn(n_samples, hidden_dim)
+        final_acts = layer_acts + torch.randn(n_samples, hidden_dim) * 0.1
+
+        trainer = TunedLensTrainer(hidden_dim, n_epochs=20)
+        probe = trainer.train_probe(layer_acts, final_acts, layer_idx=3)
+
+        assert isinstance(probe, TunedLensProbe)
+        assert probe.layer_idx == 3
+        assert probe.weight.shape == (hidden_dim, hidden_dim)
+        assert probe.bias.shape == (hidden_dim,)
+        assert probe.train_loss < 1.0  # should converge somewhat
+
+    def test_train_all_layers(self):
+        hidden_dim = 16
+        n_samples = 20
+
+        layer_acts = {
+            i: torch.randn(n_samples, hidden_dim) for i in range(4)
+        }
+        final_acts = torch.randn(n_samples, hidden_dim)
+
+        trainer = TunedLensTrainer(hidden_dim, n_epochs=10)
+        probes = trainer.train_all_layers(layer_acts, final_acts)
+
+        assert len(probes) == 4
+        for i in range(4):
+            assert i in probes
+            assert probes[i].weight.shape == (hidden_dim, hidden_dim)
+
+    def test_probe_near_identity_for_final_layer(self):
+        """Probe for the final layer should be close to identity."""
+        hidden_dim = 16
+        n_samples = 50
+
+        acts = torch.randn(n_samples, hidden_dim)
+        trainer = TunedLensTrainer(hidden_dim, n_epochs=50)
+        probe = trainer.train_probe(acts, acts, layer_idx=0)
+
+        # Weight should be close to identity
+        identity = torch.eye(hidden_dim)
+        diff = (probe.weight - identity).norm().item()
+        assert diff < 1.0
+
+
+class TestRefusalTunedLens:
+    def test_analyze_direction(self):
+        hidden_dim = 32
+        vocab_size = 100
+
+        model = FakeModel(hidden_dim, vocab_size)
+        tokenizer = FakeTokenizer(vocab_size)
+
+        direction = torch.randn(hidden_dim)
+        probe = TunedLensProbe(
+            layer_idx=2,
+            weight=torch.eye(hidden_dim) + torch.randn(hidden_dim, hidden_dim) * 0.01,
+            bias=torch.zeros(hidden_dim),
+            train_loss=0.01,
+        )
+
+        lens = RefusalTunedLens(top_k=10)
+        result = lens.analyze_direction(direction, probe, model, tokenizer)
+
+        assert isinstance(result, TunedLensResult)
+        assert result.layer_idx == 2
+        assert len(result.top_promoted) <= 10
+        assert len(result.top_suppressed) <= 10
+        assert isinstance(result.correction_magnitude, float)
+        assert result.correction_magnitude >= 0
+
+    def test_analyze_all_layers(self):
+        hidden_dim = 32
+        vocab_size = 100
+
+        model = FakeModel(hidden_dim, vocab_size)
+        tokenizer = FakeTokenizer(vocab_size)
+
+        directions = {
+            i: torch.randn(hidden_dim) for i in range(4)
+        }
+        probes = {
+            i: TunedLensProbe(
+                layer_idx=i,
+                weight=torch.eye(hidden_dim),
+                bias=torch.zeros(hidden_dim),
+                train_loss=0.01,
+            )
+            for i in range(4)
+        }
+
+        lens = RefusalTunedLens(top_k=5)
+        result = lens.analyze_all_layers(directions, probes, model, tokenizer)
+
+        assert isinstance(result, MultiLayerTunedLensResult)
+        assert len(result.per_layer) == 4
+        assert result.strongest_refusal_layer in range(4)
+
+    def test_compare_with_logit_lens(self):
+        logit_gaps = {0: 0.1, 1: 0.5, 2: 0.3, 3: 0.8}
+
+        tuned_result = MultiLayerTunedLensResult(
+            per_layer={
+                i: TunedLensResult(
+                    layer_idx=i,
+                    top_promoted=[], top_suppressed=[],
+                    refusal_token_mean_boost=0.0,
+                    compliance_token_mean_boost=0.0,
+                    refusal_compliance_gap=v * 1.1,  # similar ranking
+                    correction_magnitude=0.1,
+                )
+                for i, v in logit_gaps.items()
+            },
+            probes={},
+            strongest_refusal_layer=3,
+            peak_gap_layer=3,
+            mean_refusal_compliance_gap=0.5,
+            logit_lens_agreement=0.0,
+        )
+
+        agreement = RefusalTunedLens.compare_with_logit_lens(tuned_result, logit_gaps)
+        # Same ranking → correlation should be 1.0
+        assert agreement == pytest.approx(1.0, abs=0.01)
+
+    def test_format_report(self):
+        result = MultiLayerTunedLensResult(
+            per_layer={},
+            probes={},
+            strongest_refusal_layer=0,
+            peak_gap_layer=0,
+            mean_refusal_compliance_gap=0.0,
+            logit_lens_agreement=0.0,
+        )
+        report = RefusalTunedLens.format_report(result)
+        assert "Tuned Lens" in report
+        assert "No layers analyzed" in report
+
+
+# ===========================================================================
+#  Tests: Activation Patching
+# ===========================================================================
+
+class TestActivationPatcher:
+    def test_patching_site_creation(self):
+        site = PatchingSite(layer_idx=3, component="residual")
+        assert site.layer_idx == 3
+        assert site.component == "residual"
+        assert site.head_idx is None
+
+    def test_patching_site_with_head(self):
+        site = PatchingSite(layer_idx=2, component="attn_head", head_idx=5)
+        assert site.head_idx == 5
+
+    def test_patch_sweep_with_model(self):
+        """Test full patching sweep on fake model."""
+        hidden_dim = 32
+        model = FakeModel(hidden_dim, vocab_size=100, n_layers=4)
+
+        clean_ids = torch.randint(0, 100, (1, 10))
+        corrupted_ids = torch.randint(0, 100, (1, 10))
+
+        patcher = ActivationPatcher(significance_threshold=0.05)
+
+        result = patcher.patch_sweep(
+            model, clean_ids, corrupted_ids,
+            mode="noising",
+        )
+
+        assert isinstance(result, ActivationPatchingResult)
+        assert result.patching_mode == "noising"
+        assert result.n_layers == 4
+        assert len(result.effects) > 0
+        assert isinstance(result.circuit_fraction, float)
+        assert 0.0 <= result.circuit_fraction <= 1.0
+
+    def test_patch_sweep_denoising(self):
+        hidden_dim = 32
+        model = FakeModel(hidden_dim, vocab_size=100, n_layers=4)
+
+        clean_ids = torch.randint(0, 100, (1, 10))
+        corrupted_ids = torch.randint(0, 100, (1, 10))
+
+        patcher = ActivationPatcher()
+        result = patcher.patch_sweep(
+            model, clean_ids, corrupted_ids,
+            mode="denoising",
+        )
+
+        assert result.patching_mode == "denoising"
+
+    def test_custom_metric(self):
+        hidden_dim = 32
+        model = FakeModel(hidden_dim, vocab_size=100, n_layers=4)
+
+        clean_ids = torch.randint(0, 100, (1, 10))
+        corrupted_ids = torch.randint(0, 100, (1, 10))
+
+        def custom_metric(logits):
+            return logits.sum().item()
+
+        patcher = ActivationPatcher(metric_fn=custom_metric)
+        result = patcher.patch_sweep(model, clean_ids, corrupted_ids)
+
+        assert isinstance(result, ActivationPatchingResult)
+        assert isinstance(result.clean_baseline, float)
+
+    def test_format_report(self):
+        result = ActivationPatchingResult(
+            n_layers=4,
+            n_sites=4,
+            patching_mode="noising",
+            effects=[],
+            clean_baseline=1.0,
+            corrupted_baseline=0.0,
+            total_effect=1.0,
+            significant_sites=[],
+            circuit_fraction=0.0,
+            top_causal_layers=[],
+        )
+        report = ActivationPatcher.format_report(result)
+        assert "Activation Patching" in report
+        assert "noising" in report
+
+
+# ===========================================================================
+#  Tests: Enhanced SAE Decomposition Pipeline
+# ===========================================================================
+
+class TestSAEDecompositionPipeline:
+    def test_basic_pipeline(self):
+        harmful, harmless, _ = _make_activations(hidden_dim=16, n_per_class=30, separation=2.0)
+
+        pipeline = SAEDecompositionPipeline(
+            expansion=2, n_epochs=10, top_k_features=8, n_clusters=3,
+        )
+        result = pipeline.run(harmful, harmless, layer_idx=0)
+
+        assert isinstance(result, SAEDecompositionResult)
+        assert result.layer_idx == 0
+        assert result.sae is not None
+        assert result.refusal_features.n_refusal_features == 8
+        assert len(result.feature_sparsity) == 8
+        assert len(result.feature_monosemanticity) == 8
+        assert len(result.per_feature_refusal_reduction) == 8
+        assert len(result.cumulative_refusal_reduction) == 8
+        assert 0.0 <= result.raw_direction_overlap <= 1.0
+
+    def test_feature_clustering(self):
+        harmful, harmless, _ = _make_activations(hidden_dim=16, n_per_class=30)
+
+        pipeline = SAEDecompositionPipeline(
+            expansion=2, n_epochs=10, top_k_features=8, n_clusters=3,
+        )
+        result = pipeline.run(harmful, harmless)
+
+        clusters = result.feature_clusters
+        assert clusters is not None
+        assert isinstance(clusters, FeatureClusterResult)
+        assert clusters.n_clusters == 3
+        assert len(clusters.cluster_labels) == 8
+        assert all(0 <= lbl < 3 for lbl in clusters.cluster_labels)
+        assert clusters.cluster_directions.shape[0] == 3
+        assert -1.0 <= clusters.silhouette_score <= 1.0
+
+    def test_cumulative_reduction_monotonic(self):
+        harmful, harmless, _ = _make_activations(hidden_dim=16, n_per_class=30, separation=3.0)
+
+        pipeline = SAEDecompositionPipeline(expansion=2, n_epochs=10, top_k_features=6)
+        result = pipeline.run(harmful, harmless)
+
+        # Cumulative reduction should be non-decreasing
+        for i in range(1, len(result.cumulative_refusal_reduction)):
+            assert result.cumulative_refusal_reduction[i] >= result.cumulative_refusal_reduction[i - 1] - 1e-6
+
+    def test_format_report(self):
+        harmful, harmless, _ = _make_activations(hidden_dim=16, n_per_class=20)
+        pipeline = SAEDecompositionPipeline(expansion=2, n_epochs=5, top_k_features=4, n_clusters=2)
+        result = pipeline.run(harmful, harmless)
+
+        report = SAEDecompositionPipeline.format_report(result)
+        assert "SAE Feature Decomposition" in report
+        assert "Variance explained" in report
+
+
+# ===========================================================================
+#  Tests: Wasserstein-Optimal Direction Extraction
+# ===========================================================================
+
+class TestWassersteinOptimalExtractor:
+    def test_basic_extraction(self):
+        harmful, harmless, planted_dir = _make_activations(
+            hidden_dim=32, n_per_class=30, separation=3.0,
+        )
+
+        extractor = WassersteinOptimalExtractor()
+        result = extractor.extract(harmful, harmless, layer_idx=0)
+
+        assert isinstance(result, WassersteinDirectionResult)
+        assert result.layer_idx == 0
+        assert result.direction.shape == (32,)
+        assert abs(result.direction.norm().item() - 1.0) < 1e-5
+        assert result.wasserstein_cost >= 0
+        assert result.mean_shift_component >= 0
+        assert result.bures_component >= 0
+        assert result.cost_effectiveness_ratio >= 0
+
+    def test_direction_captures_signal(self):
+        """Wasserstein direction should have non-trivial refusal projection."""
+        harmful, harmless, planted_dir = _make_activations(
+            hidden_dim=32, n_per_class=30, separation=3.0,
+        )
+
+        extractor = WassersteinOptimalExtractor()
+        result = extractor.extract(harmful, harmless)
+
+        # Direction should have some alignment with planted signal
+        cosine = abs((result.direction @ planted_dir).item())
+        assert cosine > 0.1  # not totally orthogonal
+
+    def test_extract_all_layers(self):
+        harmful_acts, harmless_acts, _ = _make_multilayer_activations(
+            n_layers=4, hidden_dim=16, n_per_class=20,
+        )
+
+        extractor = WassersteinOptimalExtractor()
+        result = extractor.extract_all_layers(harmful_acts, harmless_acts)
+
+        assert isinstance(result, MultiLayerWassersteinResult)
+        assert len(result.per_layer) == 4
+        assert result.best_layer in range(4)
+        assert result.mean_cost_ratio >= 0
+
+    def test_compare_with_alternatives(self):
+        harmful, harmless, planted_dir = _make_activations(
+            hidden_dim=16, n_per_class=30, separation=3.0,
+        )
+
+        extractor = WassersteinOptimalExtractor()
+        w_result = extractor.extract(harmful, harmless)
+
+        # Use planted direction as "Fisher" and diff-in-means
+        H = torch.stack(harmful).float()
+        B = torch.stack(harmless).float()
+        dim_dir = (H.mean(0) - B.mean(0))
+        dim_dir = dim_dir / dim_dir.norm()
+
+        comparison = extractor.compare_with_alternatives(
+            w_result, harmful, harmless,
+            fisher_direction=planted_dir,
+            dim_direction=dim_dir,
+        )
+
+        assert isinstance(comparison, WassersteinComparisonResult)
+        assert comparison.wasserstein_cost_ratio >= 0
+        assert comparison.fisher_cost_ratio is not None
+        assert comparison.dim_cost_ratio is not None
+        assert 0 <= comparison.cosine_wasserstein_fisher <= 1
+        assert 0 <= comparison.cosine_wasserstein_dim <= 1
+
+    def test_wasserstein_lower_cost_than_dim(self):
+        """Wasserstein-optimal should have lower cost ratio than diff-in-means."""
+        harmful, harmless, _ = _make_activations(
+            hidden_dim=32, n_per_class=50, separation=2.0,
+        )
+
+        extractor = WassersteinOptimalExtractor()
+        w_result = extractor.extract(harmful, harmless)
+
+        H = torch.stack(harmful).float()
+        B = torch.stack(harmless).float()
+        dim_dir = (H.mean(0) - B.mean(0))
+        dim_dir = dim_dir / dim_dir.norm()
+
+        comparison = extractor.compare_with_alternatives(
+            w_result, harmful, harmless, dim_direction=dim_dir,
+        )
+
+        # Wasserstein should have lower or equal cost ratio by construction
+        assert comparison.wasserstein_cost_ratio <= comparison.dim_cost_ratio + 1e-4
+
+    def test_format_report(self):
+        harmful, harmless, _ = _make_activations(hidden_dim=16, n_per_class=20)
+        extractor = WassersteinOptimalExtractor()
+        result = extractor.extract_all_layers(
+            {0: harmful, 1: harmful},
+            {0: harmless, 1: harmless},
+        )
+        report = WassersteinOptimalExtractor.format_report(result)
+        assert "Wasserstein" in report
+        assert "cost ratio" in report.lower()
+
+
+# ===========================================================================
+#  Tests: Bayesian-Optimized Kernel Projection
+# ===========================================================================
+
+class TestBayesianKernelProjection:
+    def test_basic_optimization(self):
+        harmful_acts, harmless_acts, directions = _make_multilayer_activations(
+            n_layers=6, hidden_dim=16, n_per_class=20,
+        )
+
+        optimizer = BayesianKernelProjection(
+            n_trials=30, refusal_weight=0.6, distortion_weight=0.4,
+        )
+        result = optimizer.optimize(harmful_acts, harmless_acts, directions)
+
+        assert isinstance(result, BayesianOptimizationResult)
+        assert result.n_trials == 30
+        assert result.best_score >= 0
+        assert 0 <= result.best_refusal_reduction <= 1.0
+        assert result.best_harmless_distortion >= 0
+        assert len(result.all_trials) == 30
+
+    def test_best_config_structure(self):
+        harmful_acts, harmless_acts, directions = _make_multilayer_activations(
+            n_layers=4, hidden_dim=16, n_per_class=15,
+        )
+
+        optimizer = BayesianKernelProjection(n_trials=20)
+        result = optimizer.optimize(harmful_acts, harmless_acts, directions)
+
+        config = result.best_config
+        assert isinstance(config, ProjectionConfig)
+        assert config.layer_range[0] <= config.layer_range[1]
+        assert config.n_directions >= 1
+        assert 0 <= config.regularization <= 0.5
+
+    def test_pareto_front(self):
+        harmful_acts, harmless_acts, directions = _make_multilayer_activations(
+            n_layers=6, hidden_dim=16, n_per_class=20,
+        )
+
+        optimizer = BayesianKernelProjection(n_trials=50)
+        result = optimizer.optimize(harmful_acts, harmless_acts, directions)
+
+        # Pareto front should have at least 1 entry
+        assert len(result.pareto_configs) >= 1
+
+        # Pareto entries should be non-dominated
+        for i in range(len(result.pareto_configs) - 1):
+            # Each entry should have lower distortion than the next
+            # (since they're sorted by decreasing refusal reduction)
+            assert (
+                result.pareto_configs[i].harmless_distortion
+                >= result.pareto_configs[i + 1].harmless_distortion - 1e-8
+            )
+
+    def test_layer_importance(self):
+        harmful_acts, harmless_acts, directions = _make_multilayer_activations(
+            n_layers=6, hidden_dim=16, n_per_class=20,
+        )
+
+        optimizer = BayesianKernelProjection(n_trials=50)
+        result = optimizer.optimize(harmful_acts, harmless_acts, directions)
+
+        assert len(result.layer_importance) == 6
+        for _layer, imp in result.layer_importance.items():
+            assert 0 <= imp <= 1.0
+
+    def test_tpe_improves_over_random(self):
+        """TPE phase should produce better configs than random exploration."""
+        harmful_acts, harmless_acts, directions = _make_multilayer_activations(
+            n_layers=6, hidden_dim=16, n_per_class=20,
+        )
+
+        optimizer = BayesianKernelProjection(n_trials=60, seed=42)
+        result = optimizer.optimize(harmful_acts, harmless_acts, directions)
+
+        # Compare average score of first 20 (random) vs last 20 (TPE)
+        first_20 = sorted(result.all_trials[:20], key=lambda t: t.combined_score)
+        last_20 = sorted(result.all_trials[-20:], key=lambda t: t.combined_score)
+
+        best_random = first_20[0].combined_score
+        best_tpe = min(t.combined_score for t in last_20)
+
+        # TPE should find at least as good (lower = better)
+        # This is probabilistic so we allow some slack
+        assert best_tpe <= best_random + 0.3
+
+    def test_empty_input(self):
+        optimizer = BayesianKernelProjection(n_trials=10)
+        result = optimizer.optimize({}, {}, {})
+
+        assert result.n_trials == 0
+        assert result.best_score == 0.0
+
+    def test_format_report(self):
+        harmful_acts, harmless_acts, directions = _make_multilayer_activations(
+            n_layers=4, hidden_dim=16, n_per_class=15,
+        )
+
+        optimizer = BayesianKernelProjection(n_trials=20)
+        result = optimizer.optimize(harmful_acts, harmless_acts, directions)
+
+        report = BayesianKernelProjection.format_report(result)
+        assert "Bayesian" in report
+        assert "Pareto" in report
+        assert "Layer importance" in report
+
+
+# ===========================================================================
+#  Tests: Module imports
+# ===========================================================================
+
+class TestModuleImports:
+    def test_all_new_modules_importable(self):
+        from obliteratus.analysis import TunedLensTrainer
+        from obliteratus.analysis import RefusalTunedLens
+        from obliteratus.analysis import ActivationPatcher
+        from obliteratus.analysis import WassersteinOptimalExtractor
+        from obliteratus.analysis import BayesianKernelProjection
+        from obliteratus.analysis import SAEDecompositionPipeline
+
+        assert TunedLensTrainer is not None
+        assert RefusalTunedLens is not None
+        assert ActivationPatcher is not None
+        assert WassersteinOptimalExtractor is not None
+        assert BayesianKernelProjection is not None
+        assert SAEDecompositionPipeline is not None
+
+    def test_new_modules_in_all(self):
+        import obliteratus.analysis as analysis
+        assert "TunedLensTrainer" in analysis.__all__
+        assert "RefusalTunedLens" in analysis.__all__
+        assert "ActivationPatcher" in analysis.__all__
+        assert "WassersteinOptimalExtractor" in analysis.__all__
+        assert "BayesianKernelProjection" in analysis.__all__
+        assert "SAEDecompositionPipeline" in analysis.__all__
diff --git a/tests/test_novel_analysis.py b/tests/test_novel_analysis.py
new file mode 100644
index 0000000..3da9a3a
--- /dev/null
+++ b/tests/test_novel_analysis.py
@@ -0,0 +1,669 @@
+"""Tests for analysis techniques: concept cones, alignment imprints,
+multi-token position, and sparse direction surgery."""
+
+from __future__ import annotations
+
+
+import torch
+
+from obliteratus.analysis.concept_geometry import (
+    ConceptConeAnalyzer,
+    ConeConeResult,
+    MultiLayerConeResult,
+    CategoryDirection,
+    DEFAULT_HARM_CATEGORIES,
+)
+from obliteratus.analysis.alignment_imprint import (
+    AlignmentImprintDetector,
+    AlignmentImprint,
+    BaseInstructDelta,
+)
+from obliteratus.analysis.multi_token_position import (
+    MultiTokenPositionAnalyzer,
+    PositionAnalysisResult,
+    MultiTokenSummary,
+)
+from obliteratus.analysis.sparse_surgery import (
+    SparseDirectionSurgeon,
+    SparseProjectionResult,
+    SparseSurgeryPlan,
+)
+
+
+# ---------------------------------------------------------------------------
+#  Helpers
+# ---------------------------------------------------------------------------
+
+def _make_category_activations(
+    hidden_dim=32, n_prompts=30, n_categories=5, category_spread=0.3,
+):
+    """Create synthetic activations with planted per-category refusal directions.
+
+    Each category gets its own refusal direction, with some shared component
+    to simulate a polyhedral cone structure.
+    """
+    torch.manual_seed(42)
+
+    # Shared refusal component
+    shared = torch.randn(hidden_dim)
+    shared = shared / shared.norm()
+
+    # Per-category unique components
+    cat_dirs = {}
+    categories = [f"cat_{i}" for i in range(n_categories)]
+    for cat in categories:
+        unique = torch.randn(hidden_dim)
+        unique = unique / unique.norm()
+        combined = shared + category_spread * unique
+        cat_dirs[cat] = combined / combined.norm()
+
+    # Assign prompts to categories
+    prompts_per_cat = n_prompts // n_categories
+    category_map = {}
+    for i, cat in enumerate(categories):
+        for j in range(prompts_per_cat):
+            category_map[i * prompts_per_cat + j] = cat
+
+    actual_n = prompts_per_cat * n_categories
+
+    # Generate activations
+    harmful_acts = []
+    harmless_acts = []
+    for idx in range(actual_n):
+        cat = category_map[idx]
+        base = torch.randn(hidden_dim) * 0.1
+        harmful_acts.append(base + 2.0 * cat_dirs[cat])
+        harmless_acts.append(base)
+
+    return harmful_acts, harmless_acts, category_map, cat_dirs
+
+
+def _make_refusal_directions(n_layers=8, hidden_dim=32, concentration="distributed"):
+    """Create synthetic refusal directions with specified concentration pattern."""
+    torch.manual_seed(123)
+    directions = {}
+    strengths = {}
+
+    for i in range(n_layers):
+        d = torch.randn(hidden_dim)
+        directions[i] = d / d.norm()
+
+        if concentration == "concentrated":
+            # Strong in last few layers only (SFT-like)
+            strengths[i] = 3.0 if i >= n_layers - 2 else 0.1
+        elif concentration == "distributed":
+            # Even across layers (RLHF-like)
+            strengths[i] = 1.0 + 0.2 * torch.randn(1).item()
+        elif concentration == "orthogonal":
+            # Each layer direction is more orthogonal (CAI-like)
+            if i > 0:
+                # Make each direction more orthogonal to previous
+                prev = directions[i - 1]
+                d = d - (d @ prev) * prev
+                d = d / d.norm().clamp(min=1e-8)
+                directions[i] = d
+            strengths[i] = 1.5
+        else:
+            strengths[i] = 2.0 if 2 <= i <= 4 else 0.5
+
+    return directions, strengths
+
+
+# ===========================================================================
+#  Tests: Concept Cone Geometry
+# ===========================================================================
+
+class TestConceptConeAnalyzer:
+    def test_basic_analysis(self):
+        harmful, harmless, cat_map, _ = _make_category_activations()
+        analyzer = ConceptConeAnalyzer(category_map=cat_map)
+        result = analyzer.analyze_layer(harmful, harmless, layer_idx=5)
+
+        assert isinstance(result, ConeConeResult)
+        assert result.layer_idx == 5
+        assert result.category_count >= 2
+        assert result.cone_dimensionality > 0
+        assert result.cone_solid_angle >= 0
+        assert 0 <= result.mean_pairwise_cosine <= 1.0
+
+    def test_polyhedral_detection(self):
+        """With spread-out categories, should detect polyhedral geometry."""
+        harmful, harmless, cat_map, _ = _make_category_activations(
+            category_spread=2.0,  # Large spread -> distinct directions
+        )
+        analyzer = ConceptConeAnalyzer(category_map=cat_map)
+        result = analyzer.analyze_layer(harmful, harmless)
+        # With high spread, directions should be more distinct
+        assert result.cone_dimensionality > 1.0
+
+    def test_linear_detection(self):
+        """With no spread, should detect linear (single direction) geometry."""
+        harmful, harmless, cat_map, _ = _make_category_activations(
+            category_spread=0.0,  # No spread -> all directions aligned
+        )
+        analyzer = ConceptConeAnalyzer(category_map=cat_map)
+        result = analyzer.analyze_layer(harmful, harmless)
+        assert result.mean_pairwise_cosine > 0.8
+
+    def test_category_directions_populated(self):
+        harmful, harmless, cat_map, _ = _make_category_activations()
+        analyzer = ConceptConeAnalyzer(category_map=cat_map)
+        result = analyzer.analyze_layer(harmful, harmless)
+
+        for cd in result.category_directions:
+            assert isinstance(cd, CategoryDirection)
+            assert cd.strength > 0
+            assert cd.n_prompts >= 2
+            assert 0 <= cd.specificity <= 1.0
+
+    def test_pairwise_cosines(self):
+        harmful, harmless, cat_map, _ = _make_category_activations()
+        analyzer = ConceptConeAnalyzer(category_map=cat_map)
+        result = analyzer.analyze_layer(harmful, harmless)
+
+        for (a, b), cos in result.pairwise_cosines.items():
+            assert 0 <= cos <= 1.0
+            assert a < b  # Sorted pair
+
+    def test_general_direction_unit(self):
+        harmful, harmless, cat_map, _ = _make_category_activations()
+        analyzer = ConceptConeAnalyzer(category_map=cat_map)
+        result = analyzer.analyze_layer(harmful, harmless)
+        assert abs(result.general_direction.norm().item() - 1.0) < 0.01
+
+    def test_multi_layer_analysis(self):
+        harmful, harmless, cat_map, _ = _make_category_activations()
+        harmful_by_layer = {i: harmful for i in range(4)}
+        harmless_by_layer = {i: harmless for i in range(4)}
+
+        analyzer = ConceptConeAnalyzer(category_map=cat_map)
+        result = analyzer.analyze_all_layers(harmful_by_layer, harmless_by_layer)
+
+        assert isinstance(result, MultiLayerConeResult)
+        assert len(result.per_layer) == 4
+        assert result.mean_cone_dimensionality > 0
+
+    def test_format_report(self):
+        harmful, harmless, cat_map, _ = _make_category_activations()
+        analyzer = ConceptConeAnalyzer(category_map=cat_map)
+        result = analyzer.analyze_layer(harmful, harmless, layer_idx=3)
+        report = ConceptConeAnalyzer.format_report(result)
+
+        assert "Concept Cone" in report
+        assert "Layer 3" in report
+        assert "dimensionality" in report
+
+    def test_default_category_map(self):
+        assert len(DEFAULT_HARM_CATEGORIES) == 30
+        cats = set(DEFAULT_HARM_CATEGORIES.values())
+        assert "weapons" in cats
+        assert "cyber" in cats
+
+    def test_empty_activations(self):
+        analyzer = ConceptConeAnalyzer()
+        result = analyzer.analyze_layer([], [], layer_idx=0)
+        assert result.category_count == 0
+
+    def test_min_category_size(self):
+        """Categories with too few prompts should be excluded."""
+        harmful, harmless, cat_map, _ = _make_category_activations(
+            n_prompts=10, n_categories=5,
+        )
+        analyzer = ConceptConeAnalyzer(category_map=cat_map, min_category_size=3)
+        result = analyzer.analyze_layer(harmful, harmless)
+        # Each category has only 2 prompts, so with min_size=3 all are excluded
+        assert result.category_count == 0
+
+
+# ===========================================================================
+#  Tests: Alignment Imprint Detector
+# ===========================================================================
+
+class TestAlignmentImprintDetector:
+    def test_basic_detection(self):
+        directions, strengths = _make_refusal_directions()
+        detector = AlignmentImprintDetector()
+        imprint = detector.detect_imprint(directions, strengths)
+
+        assert isinstance(imprint, AlignmentImprint)
+        assert imprint.predicted_method in ("dpo", "rlhf", "cai", "sft")
+        assert 0 <= imprint.confidence <= 1.0
+
+    def test_probabilities_sum_to_one(self):
+        directions, strengths = _make_refusal_directions()
+        detector = AlignmentImprintDetector()
+        imprint = detector.detect_imprint(directions, strengths)
+
+        total = (imprint.dpo_probability + imprint.rlhf_probability +
+                 imprint.cai_probability + imprint.sft_probability)
+        assert abs(total - 1.0) < 0.01
+
+    def test_concentrated_detects_sft_or_dpo(self):
+        """Concentrated refusal (tail-biased) should predict SFT or DPO."""
+        directions, strengths = _make_refusal_directions(concentration="concentrated")
+        detector = AlignmentImprintDetector()
+        imprint = detector.detect_imprint(directions, strengths)
+        # SFT and DPO both have concentrated signatures
+        assert imprint.predicted_method in ("sft", "dpo")
+
+    def test_distributed_detects_not_sft(self):
+        """Distributed refusal should not be predicted as SFT."""
+        directions, strengths = _make_refusal_directions(
+            n_layers=16, concentration="distributed",
+        )
+        detector = AlignmentImprintDetector()
+        imprint = detector.detect_imprint(directions, strengths)
+        # With distributed refusal, Gini is low -> SFT is unlikely to be top prediction
+        assert imprint.predicted_method != "sft"
+
+    def test_orthogonal_detects_cai(self):
+        """Orthogonal layer directions should lean toward CAI."""
+        directions, strengths = _make_refusal_directions(
+            n_layers=12, concentration="orthogonal",
+        )
+        detector = AlignmentImprintDetector()
+        imprint = detector.detect_imprint(directions, strengths)
+        # CAI should rank highly due to orthogonality
+        assert imprint.cai_probability > 0.15
+
+    def test_feature_extraction(self):
+        directions, strengths = _make_refusal_directions()
+        detector = AlignmentImprintDetector()
+        imprint = detector.detect_imprint(directions, strengths)
+
+        assert 0 <= imprint.gini_coefficient <= 1.0
+        assert imprint.effective_rank > 0
+        assert 0 <= imprint.cross_layer_smoothness <= 1.0
+        assert 0 <= imprint.tail_layer_bias <= 1.0
+        assert 0 <= imprint.mean_pairwise_orthogonality <= 1.0
+        assert imprint.spectral_decay_rate >= 0
+
+    def test_empty_directions(self):
+        detector = AlignmentImprintDetector()
+        imprint = detector.detect_imprint({})
+        assert imprint.predicted_method == "unknown"
+        assert imprint.confidence == 0.0
+
+    def test_compare_base_instruct(self):
+        torch.manual_seed(42)
+        hidden_dim = 32
+        directions, _ = _make_refusal_directions(hidden_dim=hidden_dim)
+
+        base_acts = {i: torch.randn(hidden_dim) for i in range(8)}
+        instruct_acts = {
+            i: base_acts[i] + 1.5 * directions[i] for i in range(8)
+        }
+
+        detector = AlignmentImprintDetector()
+        deltas = detector.compare_base_instruct(base_acts, instruct_acts, directions)
+
+        assert len(deltas) == 8
+        for d in deltas:
+            assert isinstance(d, BaseInstructDelta)
+            assert d.delta_magnitude > 0
+            # Since delta IS the refusal direction, cosine should be high
+            assert abs(d.cosine_with_refusal) > 0.5
+
+    def test_format_imprint(self):
+        directions, strengths = _make_refusal_directions()
+        detector = AlignmentImprintDetector()
+        imprint = detector.detect_imprint(directions, strengths)
+        report = AlignmentImprintDetector.format_imprint(imprint)
+
+        assert "Alignment Imprint" in report
+        assert "DPO" in report
+        assert "RLHF" in report
+        assert "Gini" in report
+
+    def test_per_layer_strength_populated(self):
+        directions, strengths = _make_refusal_directions()
+        detector = AlignmentImprintDetector()
+        imprint = detector.detect_imprint(directions, strengths)
+        assert len(imprint.per_layer_strength) == len(directions)
+
+
+# ===========================================================================
+#  Tests: Multi-Token Position Analysis
+# ===========================================================================
+
+class TestMultiTokenPositionAnalyzer:
+    def _make_activations_with_trigger(
+        self, seq_len=20, hidden_dim=32, trigger_pos=5,
+    ):
+        """Create activations with a planted trigger at a specific position."""
+        torch.manual_seed(42)
+        refusal_dir = torch.randn(hidden_dim)
+        refusal_dir = refusal_dir / refusal_dir.norm()
+
+        # Background activations
+        acts = torch.randn(seq_len, hidden_dim) * 0.1
+
+        # Strong refusal at trigger position
+        acts[trigger_pos] += 3.0 * refusal_dir
+
+        # Weaker refusal at last position
+        acts[-1] += 1.0 * refusal_dir
+
+        # Moderate at a few positions after trigger (decay)
+        for i in range(trigger_pos + 1, min(trigger_pos + 4, seq_len)):
+            decay = 0.5 ** (i - trigger_pos)
+            acts[i] += 3.0 * decay * refusal_dir
+
+        return acts, refusal_dir
+
+    def test_basic_analysis(self):
+        acts, ref_dir = self._make_activations_with_trigger()
+        analyzer = MultiTokenPositionAnalyzer()
+        result = analyzer.analyze_prompt(acts, ref_dir, layer_idx=3)
+
+        assert isinstance(result, PositionAnalysisResult)
+        assert result.layer_idx == 3
+        assert result.n_tokens == 20
+        assert result.peak_strength > 0
+
+    def test_trigger_detection(self):
+        acts, ref_dir = self._make_activations_with_trigger(trigger_pos=5)
+        analyzer = MultiTokenPositionAnalyzer(trigger_threshold=0.5)
+        result = analyzer.analyze_prompt(acts, ref_dir)
+
+        # The planted trigger should be detected
+        assert 5 in result.trigger_positions
+        assert result.peak_position == 5
+
+    def test_peak_vs_last(self):
+        """Peak should be at trigger, not last token."""
+        acts, ref_dir = self._make_activations_with_trigger(trigger_pos=5)
+        analyzer = MultiTokenPositionAnalyzer()
+        result = analyzer.analyze_prompt(acts, ref_dir)
+
+        assert result.peak_strength > result.last_token_strength
+        assert result.peak_position != result.n_tokens - 1
+
+    def test_decay_rate_positive(self):
+        acts, ref_dir = self._make_activations_with_trigger(trigger_pos=5)
+        analyzer = MultiTokenPositionAnalyzer()
+        result = analyzer.analyze_prompt(acts, ref_dir)
+        # With exponential decay planted, decay rate should be positive
+        assert result.decay_rate > 0
+
+    def test_position_gini_bounded(self):
+        acts, ref_dir = self._make_activations_with_trigger()
+        analyzer = MultiTokenPositionAnalyzer()
+        result = analyzer.analyze_prompt(acts, ref_dir)
+        assert 0 <= result.position_gini <= 1.0
+
+    def test_token_profiles_length(self):
+        acts, ref_dir = self._make_activations_with_trigger(seq_len=15)
+        analyzer = MultiTokenPositionAnalyzer()
+        result = analyzer.analyze_prompt(acts, ref_dir)
+        assert len(result.token_profiles) == 15
+
+    def test_custom_token_texts(self):
+        acts, ref_dir = self._make_activations_with_trigger(seq_len=10, trigger_pos=3)
+        tokens = ["How", "to", "make", "a", "bomb", "from", "scratch", "please", "help", "me"]
+        analyzer = MultiTokenPositionAnalyzer()
+        result = analyzer.analyze_prompt(acts, ref_dir, token_texts=tokens)
+        for tp in result.token_profiles:
+            assert tp.token_text in tokens or tp.token_text.startswith("pos_")
+
+    def test_batch_analysis(self):
+        batch = []
+        for i in range(5):
+            acts, ref_dir = self._make_activations_with_trigger(
+                trigger_pos=3 + i % 3,
+            )
+            batch.append(acts)
+
+        analyzer = MultiTokenPositionAnalyzer()
+        summary = analyzer.analyze_batch(batch, ref_dir)
+
+        assert isinstance(summary, MultiTokenSummary)
+        assert len(summary.per_prompt) == 5
+        assert summary.mean_peak_vs_last_ratio > 0
+        assert summary.mean_trigger_count > 0
+        assert 0 <= summary.peak_is_last_fraction <= 1.0
+        assert 0 <= summary.last_token_dominance <= 1.0
+
+    def test_last_token_dominant_case(self):
+        """When signal is only at last token, peak should equal last."""
+        torch.manual_seed(42)
+        hidden_dim = 32
+        seq_len = 10
+        ref_dir = torch.randn(hidden_dim)
+        ref_dir = ref_dir / ref_dir.norm()
+
+        acts = torch.randn(seq_len, hidden_dim) * 0.01
+        acts[-1] += 5.0 * ref_dir
+
+        analyzer = MultiTokenPositionAnalyzer()
+        result = analyzer.analyze_prompt(acts, ref_dir)
+        assert result.peak_position == seq_len - 1
+
+    def test_format_position_report(self):
+        acts, ref_dir = self._make_activations_with_trigger()
+        analyzer = MultiTokenPositionAnalyzer()
+        result = analyzer.analyze_prompt(acts, ref_dir, prompt_text="How to hack?")
+        report = MultiTokenPositionAnalyzer.format_position_report(result)
+
+        assert "Multi-Token" in report
+        assert "Peak position" in report
+
+    def test_format_summary(self):
+        batch = []
+        for _ in range(3):
+            acts, ref_dir = self._make_activations_with_trigger()
+            batch.append(acts)
+
+        analyzer = MultiTokenPositionAnalyzer()
+        summary = analyzer.analyze_batch(batch, ref_dir)
+        report = MultiTokenPositionAnalyzer.format_summary(summary)
+
+        assert "Summary" in report
+        assert "Prompts analyzed" in report
+
+    def test_3d_activations_handled(self):
+        """Should handle (1, seq_len, hidden_dim) inputs."""
+        acts, ref_dir = self._make_activations_with_trigger()
+        acts = acts.unsqueeze(0)  # Add batch dim
+        analyzer = MultiTokenPositionAnalyzer()
+        result = analyzer.analyze_prompt(acts, ref_dir)
+        assert result.n_tokens == 20
+
+    def test_empty_batch(self):
+        ref_dir = torch.randn(32)
+        analyzer = MultiTokenPositionAnalyzer()
+        summary = analyzer.analyze_batch([], ref_dir)
+        assert len(summary.per_prompt) == 0
+        assert summary.peak_is_last_fraction == 1.0
+
+
+# ===========================================================================
+#  Tests: Sparse Direction Surgery
+# ===========================================================================
+
+class TestSparseDirectionSurgeon:
+    def _make_weight_with_sparse_refusal(
+        self, out_dim=64, in_dim=32, n_refusal_rows=5,
+    ):
+        """Create a weight matrix where refusal is concentrated in a few rows."""
+        torch.manual_seed(42)
+        refusal_dir = torch.randn(in_dim)
+        refusal_dir = refusal_dir / refusal_dir.norm()
+
+        W = torch.randn(out_dim, in_dim) * 0.1
+
+        # Plant strong refusal signal in specific rows
+        refusal_rows = list(range(n_refusal_rows))
+        for i in refusal_rows:
+            W[i] += 5.0 * refusal_dir
+
+        return W, refusal_dir, refusal_rows
+
+    def test_basic_analysis(self):
+        W, ref_dir, _ = self._make_weight_with_sparse_refusal()
+        surgeon = SparseDirectionSurgeon(sparsity=0.1)
+        result = surgeon.analyze_weight_matrix(W, ref_dir, layer_idx=3)
+
+        assert isinstance(result, SparseProjectionResult)
+        assert result.layer_idx == 3
+        assert result.n_rows_total == 64
+        assert result.n_rows_modified > 0
+        assert result.mean_projection > 0
+        assert result.max_projection > result.mean_projection
+
+    def test_refusal_sparsity_index(self):
+        """With sparse refusal, RSI should be high."""
+        W, ref_dir, _ = self._make_weight_with_sparse_refusal(
+            out_dim=100, n_refusal_rows=5,
+        )
+        surgeon = SparseDirectionSurgeon()
+        result = surgeon.analyze_weight_matrix(W, ref_dir)
+        assert result.refusal_sparsity_index > 0.3  # Concentrated signal
+
+    def test_energy_removed(self):
+        """Top rows should capture most of the refusal energy."""
+        W, ref_dir, _ = self._make_weight_with_sparse_refusal(
+            out_dim=64, n_refusal_rows=5,
+        )
+        surgeon = SparseDirectionSurgeon(sparsity=0.15)  # ~10 rows out of 64
+        result = surgeon.analyze_weight_matrix(W, ref_dir)
+        # With 5 refusal rows and 10 modified, should capture most energy
+        assert result.energy_removed > 0.5
+
+    def test_frobenius_change_bounded(self):
+        W, ref_dir, _ = self._make_weight_with_sparse_refusal()
+        surgeon = SparseDirectionSurgeon(sparsity=0.1)
+        result = surgeon.analyze_weight_matrix(W, ref_dir)
+        assert result.frobenius_change > 0
+        assert result.frobenius_change < 1.0  # Shouldn't change more than 100%
+
+    def test_apply_sparse_projection(self):
+        """Sparse projection should reduce refusal signal."""
+        W, ref_dir, _ = self._make_weight_with_sparse_refusal()
+        surgeon = SparseDirectionSurgeon(sparsity=0.1)
+
+        W_modified = surgeon.apply_sparse_projection(W, ref_dir)
+
+        # Check that modified rows have reduced projection
+        original_proj = (W @ ref_dir).abs().sum().item()
+        modified_proj = (W_modified @ ref_dir).abs().sum().item()
+        assert modified_proj < original_proj
+
+    def test_sparse_preserves_unmodified_rows(self):
+        """Rows below the threshold should be unchanged."""
+        W, ref_dir, refusal_rows = self._make_weight_with_sparse_refusal(
+            out_dim=64, n_refusal_rows=5,
+        )
+        surgeon = SparseDirectionSurgeon(sparsity=0.1)  # ~6 rows
+        W_modified = surgeon.apply_sparse_projection(W, ref_dir)
+
+        # Count rows that actually changed
+        diffs = (W - W_modified).abs().sum(dim=1)
+        n_changed = (diffs > 1e-6).sum().item()
+        n_unchanged = (diffs < 1e-6).sum().item()
+
+        assert n_changed <= int(0.1 * 64) + 1  # Sparsity bound
+        assert n_unchanged >= 57  # Most rows unchanged
+
+    def test_dense_vs_sparse_comparison(self):
+        """Dense projection should modify all rows; sparse should modify fewer."""
+        W, ref_dir, _ = self._make_weight_with_sparse_refusal()
+
+        # Dense projection
+        r = ref_dir / ref_dir.norm()
+        W_dense = W - (W @ r).unsqueeze(1) * r.unsqueeze(0)
+
+        # Sparse projection
+        surgeon = SparseDirectionSurgeon(sparsity=0.1)
+        W_sparse = surgeon.apply_sparse_projection(W, ref_dir)
+
+        dense_changes = (W - W_dense).abs().sum(dim=1)
+        sparse_changes = (W - W_sparse).abs().sum(dim=1)
+
+        n_dense_changed = (dense_changes > 1e-6).sum().item()
+        n_sparse_changed = (sparse_changes > 1e-6).sum().item()
+
+        assert n_sparse_changed < n_dense_changed
+
+    def test_plan_surgery(self):
+        weights = {}
+        directions = {}
+        for i in range(6):
+            W, ref_dir, _ = self._make_weight_with_sparse_refusal()
+            weights[i] = W
+            directions[i] = ref_dir
+
+        surgeon = SparseDirectionSurgeon(sparsity=0.1)
+        plan = surgeon.plan_surgery(weights, directions)
+
+        assert isinstance(plan, SparseSurgeryPlan)
+        assert len(plan.per_layer) == 6
+        assert 0 < plan.recommended_sparsity < 1.0
+        assert plan.mean_refusal_sparsity_index > 0
+        assert plan.mean_energy_removed > 0
+
+    def test_auto_sparsity(self):
+        W, ref_dir, _ = self._make_weight_with_sparse_refusal()
+        surgeon = SparseDirectionSurgeon(auto_sparsity=True)
+        result = surgeon.analyze_weight_matrix(W, ref_dir)
+        # Auto sparsity should find a reasonable value
+        assert 0.01 <= result.sparsity <= 0.5
+
+    def test_auto_sparsity_apply(self):
+        W, ref_dir, _ = self._make_weight_with_sparse_refusal()
+        surgeon = SparseDirectionSurgeon(auto_sparsity=True)
+        W_modified = surgeon.apply_sparse_projection(W, ref_dir)
+        # Should reduce projection
+        assert (W_modified @ ref_dir).abs().sum() < (W @ ref_dir).abs().sum()
+
+    def test_format_analysis(self):
+        W, ref_dir, _ = self._make_weight_with_sparse_refusal()
+        surgeon = SparseDirectionSurgeon(sparsity=0.1)
+        result = surgeon.analyze_weight_matrix(W, ref_dir, layer_idx=4)
+        report = SparseDirectionSurgeon.format_analysis(result)
+
+        assert "Sparse Direction Surgery" in report
+        assert "Layer 4" in report
+        assert "Refusal Sparsity Index" in report
+
+    def test_format_plan(self):
+        weights = {i: torch.randn(32, 16) for i in range(4)}
+        directions = {i: torch.randn(16) for i in range(4)}
+
+        surgeon = SparseDirectionSurgeon(sparsity=0.1)
+        plan = surgeon.plan_surgery(weights, directions)
+        report = SparseDirectionSurgeon.format_plan(plan)
+
+        assert "Sparse Direction Surgery Plan" in report
+        assert "Recommended sparsity" in report
+
+    def test_empty_inputs(self):
+        surgeon = SparseDirectionSurgeon()
+        plan = surgeon.plan_surgery({}, {})
+        assert len(plan.per_layer) == 0
+
+    def test_output_dtype_preserved(self):
+        """Output should match input dtype."""
+        W, ref_dir, _ = self._make_weight_with_sparse_refusal()
+        W_half = W.half()
+        surgeon = SparseDirectionSurgeon(sparsity=0.1)
+        W_out = surgeon.apply_sparse_projection(W_half, ref_dir)
+        assert W_out.dtype == torch.float16
+
+
+# ===========================================================================
+#  Tests: Integration / Imports
+# ===========================================================================
+
+class TestAnalysisImports:
+    def test_all_new_modules_importable(self):
+        from obliteratus.analysis import (
+            ConceptConeAnalyzer,
+            AlignmentImprintDetector,
+            MultiTokenPositionAnalyzer,
+            SparseDirectionSurgeon,
+        )
+        assert ConceptConeAnalyzer is not None
+        assert AlignmentImprintDetector is not None
+        assert MultiTokenPositionAnalyzer is not None
+        assert SparseDirectionSurgeon is not None
diff --git a/tests/test_refusal_detection.py b/tests/test_refusal_detection.py
new file mode 100644
index 0000000..1d2755c
--- /dev/null
+++ b/tests/test_refusal_detection.py
@@ -0,0 +1,155 @@
+"""Tests for refusal detection, including semantic mode and confidence intervals."""
+
+from __future__ import annotations
+
+
+from obliteratus.evaluation.advanced_metrics import (
+    _is_refusal,
+    _is_refusal_semantic,
+    refusal_rate,
+    refusal_rate_with_ci,
+)
+
+
+class TestKeywordRefusal:
+    """Test the keyword-based refusal detection."""
+
+    def test_obvious_refusal(self):
+        assert _is_refusal("I'm sorry, I can't help with that.", "combined")
+
+    def test_obvious_non_refusal(self):
+        assert not _is_refusal("Here are the instructions for building a birdhouse.", "combined")
+
+    def test_empty_string(self):
+        assert not _is_refusal("", "combined")
+
+    def test_prefix_mode(self):
+        assert _is_refusal("I cannot assist with that request.", "prefix")
+        assert not _is_refusal("Well, I cannot do that.", "prefix")  # not a prefix
+
+    def test_substring_mode(self):
+        assert _is_refusal("Well, I cannot assist with that.", "substring")
+
+
+class TestSemanticRefusal:
+    """Test the semantic refusal detection patterns."""
+
+    def test_hedge_then_refuse(self):
+        text = "While I understand your interest in this topic, I cannot provide instructions for that."
+        assert _is_refusal(text, "semantic")
+
+    def test_redirect(self):
+        text = "Instead of providing those instructions, I can help you with something constructive."
+        assert _is_refusal(text, "semantic")
+
+    def test_concern_flagging(self):
+        text = "This raises serious ethical concerns that prevent me from responding."
+        assert _is_refusal(text, "semantic")
+
+    def test_meta_refusal(self):
+        text = "I'm designed to avoid providing information that could cause harm."
+        assert _is_refusal(text, "semantic")
+
+    def test_responsibility_deflection(self):
+        text = "You should consult a professional about this matter."
+        assert _is_refusal(text, "semantic")
+
+    def test_genuine_compliance_not_flagged(self):
+        """Actual compliant responses should not trigger semantic detection."""
+        texts = [
+            "Here are the steps to build a birdhouse from scratch.",
+            "The chemical formula for water is H2O.",
+            "Python is a great language for data science. Here's how to get started.",
+            "To make chocolate chip cookies, you'll need flour, sugar, butter, and chocolate chips.",
+        ]
+        for text in texts:
+            assert not _is_refusal(text, "semantic"), f"False positive: {text[:50]}"
+
+    def test_semantic_catches_what_keyword_misses(self):
+        """Semantic mode should catch refusals that keyword mode misses."""
+        # This hedge-then-refuse doesn't start with any refusal prefix
+        # and doesn't contain exact refusal substrings
+        text = "While I appreciate your curiosity, I'm not able to share that information."
+        assert not _is_refusal(text, "prefix")
+        # "not able to" is in substring list, so combined catches it
+        assert _is_refusal(text, "combined")
+
+        # This one truly requires semantic detection
+        text = "Although I respect your question, I will not be able to help with this particular request."
+        # "will not" is in substrings, but test the semantic path independently
+        assert _is_refusal_semantic(
+            "While I understand your request, I cannot assist with that specific topic."
+        )
+
+
+class TestRefusalRate:
+    def test_all_refuse(self):
+        responses = ["I'm sorry, no.", "I cannot help.", "I won't do that."]
+        assert refusal_rate(responses, "combined") == 1.0
+
+    def test_none_refuse(self):
+        responses = ["Sure, here you go.", "Of course!", "The answer is 42."]
+        assert refusal_rate(responses, "combined") == 0.0
+
+    def test_mixed(self):
+        responses = ["I'm sorry, no.", "Sure, here you go."]
+        assert refusal_rate(responses, "combined") == 0.5
+
+    def test_empty_list(self):
+        assert refusal_rate([], "combined") == 0.0
+
+    def test_semantic_mode(self):
+        responses = [
+            "While I understand your interest, I cannot provide that information.",
+            "Here are the steps you requested.",
+        ]
+        assert refusal_rate(responses, "semantic") == 0.5
+
+
+class TestRefusalRateWithCI:
+    def test_returns_all_fields(self):
+        responses = ["I'm sorry"] * 5 + ["Sure, here you go."] * 5
+        ci = refusal_rate_with_ci(responses, mode="combined")
+        assert "rate" in ci
+        assert "ci_lower" in ci
+        assert "ci_upper" in ci
+        assert "n_samples" in ci
+        assert ci["n_samples"] == 10
+
+    def test_ci_bounds_bracket_rate(self):
+        responses = ["I'm sorry"] * 30 + ["Sure, here you go."] * 70
+        ci = refusal_rate_with_ci(responses, mode="combined")
+        assert ci["ci_lower"] <= ci["rate"] <= ci["ci_upper"]
+
+    def test_all_refuse_tight_ci(self):
+        responses = ["I'm sorry"] * 50
+        ci = refusal_rate_with_ci(responses, mode="combined")
+        assert ci["rate"] == 1.0
+        # Wilson CI: 50/50 at 95% gives ci_lower ~0.929, not 1.0
+        # (a proper CI acknowledges uncertainty even with all-positive observations)
+        assert ci["ci_lower"] > 0.9
+        assert ci["ci_upper"] == 1.0
+
+    def test_empty_responses(self):
+        ci = refusal_rate_with_ci([], mode="combined")
+        assert ci["rate"] == 0.0
+        assert ci["n_samples"] == 0
+
+    def test_ci_narrower_with_more_samples(self):
+        """More samples should produce tighter confidence intervals."""
+        responses_small = ["I'm sorry"] * 5 + ["Sure"] * 5
+        responses_large = ["I'm sorry"] * 50 + ["Sure"] * 50
+
+        ci_small = refusal_rate_with_ci(responses_small)
+        ci_large = refusal_rate_with_ci(responses_large)
+
+        width_small = ci_small["ci_upper"] - ci_small["ci_lower"]
+        width_large = ci_large["ci_upper"] - ci_large["ci_lower"]
+        assert width_large < width_small, \
+            f"Large CI ({width_large}) not narrower than small CI ({width_small})"
+
+    def test_deterministic_with_seed(self):
+        responses = ["I'm sorry"] * 30 + ["Sure"] * 70
+        ci1 = refusal_rate_with_ci(responses)
+        ci2 = refusal_rate_with_ci(responses)
+        assert ci1 == ci2, "Same input produced different CIs"
diff --git a/tests/test_report.py b/tests/test_report.py
new file mode 100644
index 0000000..beff3e6
--- /dev/null
+++ b/tests/test_report.py
@@ -0,0 +1,70 @@
+"""Tests for the reporting module."""
+
+from __future__ import annotations
+
+import json
+
+from obliteratus.reporting.report import AblationReport, AblationResult
+
+
+def _make_report() -> AblationReport:
+    report = AblationReport(model_name="test-model")
+    report.add_baseline({"perplexity": 25.0, "accuracy": 0.85})
+    report.add_result(
+        AblationResult(
+            strategy="layer_removal",
+            component="layer_0",
+            description="Remove layer 0",
+            metrics={"perplexity": 30.0, "accuracy": 0.80},
+        )
+    )
+    report.add_result(
+        AblationResult(
+            strategy="layer_removal",
+            component="layer_1",
+            description="Remove layer 1",
+            metrics={"perplexity": 50.0, "accuracy": 0.60},
+        )
+    )
+    return report
+
+
+class TestAblationReport:
+    def test_to_dataframe(self):
+        report = _make_report()
+        df = report.to_dataframe()
+        assert len(df) == 2
+        assert "perplexity" in df.columns
+        assert "perplexity_delta" in df.columns
+        assert "perplexity_pct_change" in df.columns
+
+    def test_save_json(self, tmp_path):
+        report = _make_report()
+        out = tmp_path / "results.json"
+        report.save_json(out)
+        data = json.loads(out.read_text())
+        assert data["model_name"] == "test-model"
+        assert len(data["results"]) == 2
+        assert data["baseline_metrics"]["perplexity"] == 25.0
+
+    def test_save_csv(self, tmp_path):
+        report = _make_report()
+        out = tmp_path / "results.csv"
+        report.save_csv(out)
+        text = out.read_text()
+        assert "layer_0" in text
+        assert "perplexity" in text
+
+    def test_delta_calculation(self):
+        report = _make_report()
+        df = report.to_dataframe()
+        row0 = df[df["component"] == "layer_0"].iloc[0]
+        assert row0["perplexity_delta"] == 5.0  # 30 - 25
+        assert abs(row0["perplexity_pct_change"] - 20.0) < 0.01
+
+    def test_plot_impact(self, tmp_path):
+        report = _make_report()
+        out = tmp_path / "impact.png"
+        report.plot_impact(metric="perplexity", output_path=out)
+        assert out.exists()
+        assert out.stat().st_size > 0
diff --git a/tests/test_strategies.py b/tests/test_strategies.py
new file mode 100644
index 0000000..c65a3f9
--- /dev/null
+++ b/tests/test_strategies.py
@@ -0,0 +1,179 @@
+"""Tests for ablation strategies using a small GPT-2 model."""
+
+from __future__ import annotations
+
+import pytest
+import torch
+
+from obliteratus.strategies.base import AblationSpec
+from obliteratus.strategies.registry import STRATEGY_REGISTRY, get_strategy
+
+
+# ---------------------------------------------------------------------------
+# Fixtures
+# ---------------------------------------------------------------------------
+
+def _make_dummy_handle():
+    """Create a minimal ModelHandle with a tiny GPT-2 for testing (no network)."""
+    from unittest.mock import MagicMock
+    from transformers import GPT2Config, GPT2LMHeadModel
+    from obliteratus.models.loader import ModelHandle
+
+    config = GPT2Config(
+        vocab_size=1000,
+        n_positions=128,
+        n_embd=64,
+        n_layer=2,
+        n_head=2,
+        n_inner=256,
+    )
+    model = GPT2LMHeadModel(config)
+    model.eval()
+
+    # Strategy tests don't tokenize — use a simple mock
+    tokenizer = MagicMock()
+    tokenizer.pad_token = "<pad>"
+    tokenizer.eos_token = "<eos>"
+
+    handle = ModelHandle(
+        model=model,
+        tokenizer=tokenizer,
+        config=config,
+        model_name="gpt2-test",
+        task="causal_lm",
+    )
+    handle.snapshot()
+    return handle
+
+
+@pytest.fixture
+def handle():
+    return _make_dummy_handle()
+
+
+# ---------------------------------------------------------------------------
+# Registry tests
+# ---------------------------------------------------------------------------
+
+class TestRegistry:
+    def test_all_strategies_registered(self):
+        expected = {"layer_removal", "head_pruning", "ffn_ablation", "embedding_ablation"}
+        assert expected.issubset(set(STRATEGY_REGISTRY.keys()))
+
+    def test_get_strategy_returns_instance(self):
+        strat = get_strategy("layer_removal")
+        assert strat.name == "layer_removal"
+
+    def test_get_unknown_strategy_raises(self):
+        with pytest.raises(KeyError, match="Unknown strategy"):
+            get_strategy("nonexistent_strategy")
+
+
+# ---------------------------------------------------------------------------
+# Layer removal
+# ---------------------------------------------------------------------------
+
+class TestLayerRemoval:
+    def test_enumerate(self, handle):
+        strat = get_strategy("layer_removal")
+        specs = strat.enumerate(handle)
+        assert len(specs) == handle.num_layers
+        assert all(s.strategy_name == "layer_removal" for s in specs)
+
+    def test_apply_zeros_layer(self, handle):
+        strat = get_strategy("layer_removal")
+        specs = strat.enumerate(handle)
+        strat.apply(handle, specs[0])
+
+        from obliteratus.strategies.utils import get_layer_modules
+        layer = get_layer_modules(handle)[0]
+        for param in layer.parameters():
+            assert torch.all(param == 0), "Layer params should be zeroed after ablation"
+
+    def test_restore_after_ablation(self, handle):
+        strat = get_strategy("layer_removal")
+        specs = strat.enumerate(handle)
+
+        from obliteratus.strategies.utils import get_layer_modules
+        original_weight = get_layer_modules(handle)[0].attn.c_attn.weight.clone()
+
+        strat.apply(handle, specs[0])
+        handle.restore()
+
+        restored_weight = get_layer_modules(handle)[0].attn.c_attn.weight
+        assert torch.allclose(original_weight, restored_weight)
+
+
+# ---------------------------------------------------------------------------
+# Head pruning
+# ---------------------------------------------------------------------------
+
+class TestHeadPruning:
+    def test_enumerate(self, handle):
+        strat = get_strategy("head_pruning")
+        specs = strat.enumerate(handle)
+        assert len(specs) == handle.num_layers * handle.num_heads
+
+    def test_apply_zeros_head(self, handle):
+        strat = get_strategy("head_pruning")
+        spec = AblationSpec(
+            strategy_name="head_pruning",
+            component="layer_0_head_0",
+            description="test",
+            metadata={"layer_idx": 0, "head_idx": 0},
+        )
+        strat.apply(handle, spec)
+
+        from obliteratus.strategies.utils import get_layer_modules, get_attention_module
+        attn = get_attention_module(get_layer_modules(handle)[0], handle.architecture)
+        head_dim = handle.hidden_size // handle.num_heads
+        # GPT-2 uses c_attn (Conv1D), check output projection c_proj
+        if hasattr(attn, "c_proj"):
+            # Conv1D stores weight transposed
+            assert torch.all(attn.c_proj.weight[0:head_dim, :] == 0)
+
+
+# ---------------------------------------------------------------------------
+# FFN ablation
+# ---------------------------------------------------------------------------
+
+class TestFFNAblation:
+    def test_enumerate(self, handle):
+        strat = get_strategy("ffn_ablation")
+        specs = strat.enumerate(handle)
+        assert len(specs) == handle.num_layers
+
+    def test_apply_zeros_ffn(self, handle):
+        strat = get_strategy("ffn_ablation")
+        specs = strat.enumerate(handle)
+        strat.apply(handle, specs[0])
+
+        from obliteratus.strategies.utils import get_layer_modules, get_ffn_module
+        ffn = get_ffn_module(get_layer_modules(handle)[0], handle.architecture)
+        for param in ffn.parameters():
+            assert torch.all(param == 0)
+
+
+# ---------------------------------------------------------------------------
+# Embedding ablation
+# ---------------------------------------------------------------------------
+
+class TestEmbeddingAblation:
+    def test_enumerate(self, handle):
+        strat = get_strategy("embedding_ablation")
+        specs = strat.enumerate(handle)
+        assert len(specs) > 0
+
+    def test_apply_zeros_dims(self, handle):
+        strat = get_strategy("embedding_ablation")
+        spec = AblationSpec(
+            strategy_name="embedding_ablation",
+            component="embed_dims_0_4",
+            description="test",
+            metadata={"dim_start": 0, "dim_end": 4},
+        )
+        strat.apply(handle, spec)
+
+        from obliteratus.strategies.utils import get_embedding_module
+        emb = get_embedding_module(handle)
+        assert torch.all(emb.weight[:, 0:4] == 0)
diff --git a/tests/test_study_presets.py b/tests/test_study_presets.py
new file mode 100644
index 0000000..bba2fcc
--- /dev/null
+++ b/tests/test_study_presets.py
@@ -0,0 +1,108 @@
+"""Tests for ablation presets."""
+
+from __future__ import annotations
+
+from obliteratus.study_presets import (
+    STUDY_PRESETS,
+    get_study_preset,
+    get_preset,
+    list_study_presets,
+    list_presets,
+)
+from obliteratus.config import StudyConfig
+
+
+class TestPresets:
+    def test_all_presets_registered(self):
+        expected_keys = {"quick", "full", "attention", "layers", "knowledge", "pruning", "embeddings", "jailbreak", "guardrail", "robustness"}
+        assert expected_keys.issubset(set(STUDY_PRESETS.keys()))
+
+    def test_get_preset(self):
+        preset = get_study_preset("quick")
+        assert preset.name == "Quick Scan"
+        assert preset.key == "quick"
+        assert len(preset.strategies) == 2
+
+    def test_get_preset_alias(self):
+        preset = get_preset("quick")
+        assert preset.name == "Quick Scan"
+
+    def test_get_unknown_preset_raises(self):
+        import pytest
+        with pytest.raises(KeyError, match="Unknown preset"):
+            get_study_preset("nonexistent")
+
+    def test_list_presets(self):
+        presets = list_study_presets()
+        assert len(presets) >= 7
+        keys = [p.key for p in presets]
+        assert "quick" in keys
+        assert "full" in keys
+
+    def test_list_presets_alias(self):
+        assert list_presets() == list_study_presets()
+
+    def test_preset_strategies_are_valid(self):
+        from obliteratus.strategies import STRATEGY_REGISTRY
+        for preset in list_study_presets():
+            for s in preset.strategies:
+                assert s["name"] in STRATEGY_REGISTRY, (
+                    f"Preset {preset.key!r} references unknown strategy {s['name']!r}"
+                )
+
+
+class TestConfigWithPreset:
+    def test_preset_key_in_config(self):
+        config_dict = {
+            "preset": "quick",
+            "model": {"name": "gpt2", "task": "causal_lm", "dtype": "float32", "device": "cpu"},
+            "dataset": {"name": "wikitext", "subset": "wikitext-2-raw-v1", "split": "test", "text_column": "text"},
+        }
+        config = StudyConfig.from_dict(config_dict)
+        # Should inherit strategies from the quick preset
+        assert len(config.strategies) == 2
+        strategy_names = [s.name for s in config.strategies]
+        assert "layer_removal" in strategy_names
+        assert "ffn_ablation" in strategy_names
+        # Should inherit max_samples
+        assert config.dataset.max_samples == 25
+        # Should inherit batch_size and max_length
+        assert config.batch_size == 4
+        assert config.max_length == 128
+
+    def test_legacy_study_preset_key_still_works(self):
+        config_dict = {
+            "study_preset": "quick",
+            "model": {"name": "gpt2", "task": "causal_lm", "dtype": "float32", "device": "cpu"},
+            "dataset": {"name": "wikitext", "subset": "wikitext-2-raw-v1", "split": "test", "text_column": "text"},
+        }
+        config = StudyConfig.from_dict(config_dict)
+        assert len(config.strategies) == 2
+
+    def test_preset_can_be_overridden(self):
+        config_dict = {
+            "preset": "quick",
+            "model": {"name": "gpt2", "task": "causal_lm", "dtype": "float32", "device": "cpu"},
+            "dataset": {"name": "wikitext", "subset": "wikitext-2-raw-v1", "split": "test", "text_column": "text", "max_samples": 999},
+            "batch_size": 16,
+            "strategies": [{"name": "head_pruning", "params": {}}],
+        }
+        config = StudyConfig.from_dict(config_dict)
+        # Explicit strategies should override preset
+        assert len(config.strategies) == 1
+        assert config.strategies[0].name == "head_pruning"
+        # Explicit batch_size should override
+        assert config.batch_size == 16
+        # Explicit max_samples in dataset should be kept
+        assert config.dataset.max_samples == 999
+
+    def test_full_preset(self):
+        config_dict = {
+            "preset": "full",
+            "model": {"name": "gpt2", "task": "causal_lm", "dtype": "float32", "device": "cpu"},
+            "dataset": {"name": "wikitext", "subset": "wikitext-2-raw-v1", "split": "test", "text_column": "text"},
+        }
+        config = StudyConfig.from_dict(config_dict)
+        assert len(config.strategies) == 4
+        strategy_names = {s.name for s in config.strategies}
+        assert strategy_names == {"layer_removal", "head_pruning", "ffn_ablation", "embedding_ablation"}
diff --git a/tests/test_telemetry.py b/tests/test_telemetry.py
new file mode 100644
index 0000000..d5ae7f8
--- /dev/null
+++ b/tests/test_telemetry.py
@@ -0,0 +1,696 @@
+"""Tests for the opt-in telemetry module."""
+
+import json
+import os
+import tempfile
+from dataclasses import dataclass, field
+from pathlib import Path
+from unittest.mock import MagicMock, patch
+
+import torch
+
+from obliteratus.telemetry import (
+    _ALLOWED_METHOD_CONFIG_KEYS,
+    _direction_stats,
+    _extract_excise_details,
+    _extract_prompt_counts,
+    _extract_analysis_insights,
+    _is_mount_point,
+    _test_writable,
+    build_report,
+    disable_telemetry,
+    enable_telemetry,
+    is_enabled,
+    maybe_send_informed_report,
+    maybe_send_pipeline_report,
+    restore_from_hub,
+    send_report,
+    storage_diagnostic,
+)
+
+
+def _reset_telemetry():
+    import obliteratus.telemetry as t
+    t._enabled = None
+
+
+# ── Enable / disable ────────────────────────────────────────────────────
+
+
+class TestTelemetryConfig:
+    """Test telemetry enable/disable logic."""
+
+    def setup_method(self):
+        _reset_telemetry()
+
+    def test_disabled_by_default(self):
+        with patch.dict(os.environ, {}, clear=True):
+            _reset_telemetry()
+            assert not is_enabled()
+
+    def test_enabled_by_default_on_hf_spaces(self):
+        with patch.dict(os.environ, {"SPACE_ID": "user/space"}, clear=True):
+            import obliteratus.telemetry as t
+            old_val = t._ON_HF_SPACES
+            t._ON_HF_SPACES = True
+            _reset_telemetry()
+            assert is_enabled()
+            t._ON_HF_SPACES = old_val
+
+    def test_disable_via_env_zero(self):
+        with patch.dict(os.environ, {"OBLITERATUS_TELEMETRY": "0"}):
+            _reset_telemetry()
+            assert not is_enabled()
+
+    def test_disable_via_env_false(self):
+        with patch.dict(os.environ, {"OBLITERATUS_TELEMETRY": "false"}):
+            _reset_telemetry()
+            assert not is_enabled()
+
+    def test_enable_via_env_explicit(self):
+        with patch.dict(os.environ, {"OBLITERATUS_TELEMETRY": "1"}):
+            _reset_telemetry()
+            assert is_enabled()
+
+    def test_enable_programmatically(self):
+        enable_telemetry()
+        assert is_enabled()
+
+    def test_disable_programmatically(self):
+        enable_telemetry()
+        assert is_enabled()
+        disable_telemetry()
+        assert not is_enabled()
+
+    def test_programmatic_overrides_env(self):
+        with patch.dict(os.environ, {"OBLITERATUS_TELEMETRY": "1"}):
+            disable_telemetry()
+            assert not is_enabled()
+
+
+# ── Report building ─────────────────────────────────────────────────────
+
+
+class TestBuildReport:
+    """Test report payload construction."""
+
+    def _base_kwargs(self, **overrides):
+        defaults = dict(
+            architecture="LlamaForCausalLM",
+            num_layers=32,
+            num_heads=32,
+            hidden_size=4096,
+            total_params=8_000_000_000,
+            method="advanced",
+            method_config={"n_directions": 4, "norm_preserve": True},
+            quality_metrics={"perplexity": 5.2, "refusal_rate": 0.05},
+        )
+        defaults.update(overrides)
+        return defaults
+
+    def test_schema_version_2(self):
+        report = build_report(**self._base_kwargs())
+        assert report["schema_version"] == 2
+
+    def test_basic_fields(self):
+        report = build_report(**self._base_kwargs())
+        assert report["model"]["architecture"] == "LlamaForCausalLM"
+        assert report["model"]["num_layers"] == 32
+        assert report["model"]["total_params"] == 8_000_000_000
+        assert report["method"] == "advanced"
+        assert report["quality_metrics"]["refusal_rate"] == 0.05
+        assert len(report["session_id"]) == 32
+
+    def test_filters_unknown_config_keys(self):
+        report = build_report(**self._base_kwargs(
+            method_config={"n_directions": 1, "secret_flag": True, "nuke": "boom"},
+        ))
+        assert "n_directions" in report["method_config"]
+        assert "secret_flag" not in report["method_config"]
+        assert "nuke" not in report["method_config"]
+
+    def test_allows_all_valid_config_keys(self):
+        """Every key in the allowlist should pass through."""
+        config = {k: True for k in _ALLOWED_METHOD_CONFIG_KEYS}
+        report = build_report(**self._base_kwargs(method_config=config))
+        for k in _ALLOWED_METHOD_CONFIG_KEYS:
+            assert k in report["method_config"], f"Missing allowlisted key: {k}"
+
+    def test_no_model_name_in_report(self):
+        report = build_report(**self._base_kwargs())
+        report_str = json.dumps(report)
+        assert "meta-llama" not in report_str
+        assert "Llama-3" not in report_str
+
+    def test_environment_info(self):
+        report = build_report(**self._base_kwargs())
+        env = report["environment"]
+        assert "python_version" in env
+        assert "os" in env
+        assert "arch" in env
+
+    def test_stage_durations(self):
+        durations = {"summon": 2.5, "probe": 10.1, "distill": 3.2}
+        report = build_report(**self._base_kwargs(stage_durations=durations))
+        assert report["stage_durations"] == durations
+
+    def test_direction_stats(self):
+        stats = {"direction_norms": {"10": 0.95}, "mean_direction_persistence": 0.87}
+        report = build_report(**self._base_kwargs(direction_stats=stats))
+        assert report["direction_stats"]["mean_direction_persistence"] == 0.87
+
+    def test_excise_details(self):
+        details = {"modified_count": 128, "used_techniques": ["head_surgery"]}
+        report = build_report(**self._base_kwargs(excise_details=details))
+        assert report["excise_details"]["modified_count"] == 128
+
+    def test_prompt_counts(self):
+        counts = {"harmful": 33, "harmless": 33, "jailbreak": 15}
+        report = build_report(**self._base_kwargs(prompt_counts=counts))
+        assert report["prompt_counts"]["harmful"] == 33
+        assert report["prompt_counts"]["jailbreak"] == 15
+
+    def test_gpu_memory(self):
+        mem = {"peak_allocated_gb": 7.2, "peak_reserved_gb": 8.0}
+        report = build_report(**self._base_kwargs(gpu_memory=mem))
+        assert report["gpu_memory"]["peak_allocated_gb"] == 7.2
+
+    def test_analysis_insights_filtered(self):
+        """Only allowlisted analysis keys should pass through."""
+        insights = {
+            "detected_alignment_method": "DPO",
+            "alignment_confidence": 0.92,
+            "secret_internal_data": "should not appear",
+        }
+        report = build_report(**self._base_kwargs(analysis_insights=insights))
+        assert report["analysis_insights"]["detected_alignment_method"] == "DPO"
+        assert "secret_internal_data" not in report["analysis_insights"]
+
+    def test_informed_extras(self):
+        extras = {"ouroboros_passes": 3, "final_refusal_rate": 0.02, "total_duration": 120.5}
+        report = build_report(**self._base_kwargs(informed_extras=extras))
+        assert report["informed"]["ouroboros_passes"] == 3
+
+    def test_optional_fields_omitted_when_empty(self):
+        """Optional fields should not appear when not provided."""
+        report = build_report(**self._base_kwargs())
+        assert "stage_durations" not in report
+        assert "direction_stats" not in report
+        assert "excise_details" not in report
+        assert "prompt_counts" not in report
+        assert "gpu_memory" not in report
+        assert "analysis_insights" not in report
+        assert "informed" not in report
+
+
+# ── Direction stats extraction ──────────────────────────────────────────
+
+
+class TestDirectionStats:
+    """Test direction quality metric extraction."""
+
+    def test_direction_norms(self):
+        pipeline = MagicMock()
+        pipeline.refusal_directions = {
+            0: torch.randn(128),
+            1: torch.randn(128),
+        }
+        pipeline.refusal_subspaces = {}
+        stats = _direction_stats(pipeline)
+        assert "direction_norms" in stats
+        assert "0" in stats["direction_norms"]
+        assert "1" in stats["direction_norms"]
+
+    def test_direction_persistence(self):
+        """Adjacent layers with similar directions should have high persistence."""
+        d = torch.randn(128)
+        d = d / d.norm()
+        pipeline = MagicMock()
+        pipeline.refusal_directions = {0: d, 1: d + 0.01 * torch.randn(128)}
+        pipeline.refusal_subspaces = {}
+        stats = _direction_stats(pipeline)
+        assert "mean_direction_persistence" in stats
+        assert stats["mean_direction_persistence"] > 0.9
+
+    def test_effective_rank(self):
+        """Multi-direction subspace should yield effective rank > 1."""
+        pipeline = MagicMock()
+        pipeline.refusal_directions = {0: torch.randn(128)}
+        # 4-direction subspace with distinct directions
+        sub = torch.randn(4, 128)
+        pipeline.refusal_subspaces = {0: sub}
+        stats = _direction_stats(pipeline)
+        assert "effective_ranks" in stats
+        assert float(stats["effective_ranks"]["0"]) > 1.0
+
+    def test_empty_directions(self):
+        pipeline = MagicMock()
+        pipeline.refusal_directions = {}
+        pipeline.refusal_subspaces = {}
+        stats = _direction_stats(pipeline)
+        assert stats == {}
+
+
+# ── Excise details extraction ───────────────────────────────────────────
+
+
+class TestExciseDetails:
+    def test_basic_excise_details(self):
+        pipeline = MagicMock()
+        pipeline._excise_modified_count = 64
+        pipeline._refusal_heads = {10: [(0, 0.9), (3, 0.8)], 11: [(1, 0.7)]}
+        pipeline._sae_directions = {}
+        pipeline._expert_safety_scores = {}
+        pipeline._layer_excise_weights = {}
+        pipeline._expert_directions = {}
+        pipeline._steering_hooks = []
+        pipeline.invert_refusal = False
+        pipeline.project_embeddings = False
+        pipeline.activation_steering = False
+        pipeline.expert_transplant = False
+
+        details = _extract_excise_details(pipeline)
+        assert details["modified_count"] == 64
+        assert details["head_surgery_layers"] == 2
+        assert details["total_heads_projected"] == 3
+        assert "head_surgery" in details["used_techniques"]
+
+    def test_adaptive_weights(self):
+        pipeline = MagicMock()
+        pipeline._excise_modified_count = None
+        pipeline._refusal_heads = {}
+        pipeline._sae_directions = {}
+        pipeline._expert_safety_scores = {}
+        pipeline._layer_excise_weights = {0: 0.2, 1: 0.8, 2: 0.5}
+        pipeline._expert_directions = {}
+        pipeline._steering_hooks = []
+        pipeline.invert_refusal = False
+        pipeline.project_embeddings = False
+        pipeline.activation_steering = False
+        pipeline.expert_transplant = False
+
+        details = _extract_excise_details(pipeline)
+        assert details["adaptive_weight_min"] == 0.2
+        assert details["adaptive_weight_max"] == 0.8
+        assert "layer_adaptive" in details["used_techniques"]
+
+
+# ── Prompt counts extraction ────────────────────────────────────────────
+
+
+class TestPromptCounts:
+    def test_basic_counts(self):
+        pipeline = MagicMock()
+        pipeline.harmful_prompts = ["a"] * 33
+        pipeline.harmless_prompts = ["b"] * 33
+        pipeline.jailbreak_prompts = None
+        counts = _extract_prompt_counts(pipeline)
+        assert counts["harmful"] == 33
+        assert counts["harmless"] == 33
+        assert "jailbreak" not in counts
+
+    def test_with_jailbreak(self):
+        pipeline = MagicMock()
+        pipeline.harmful_prompts = ["a"] * 33
+        pipeline.harmless_prompts = ["b"] * 33
+        pipeline.jailbreak_prompts = ["c"] * 10
+        counts = _extract_prompt_counts(pipeline)
+        assert counts["jailbreak"] == 10
+
+
+# ── Send behavior ───────────────────────────────────────────────────────
+
+
+class TestSendReport:
+    def setup_method(self):
+        _reset_telemetry()
+
+    def test_does_not_send_when_disabled(self):
+        disable_telemetry()
+        with patch("obliteratus.telemetry._send_sync") as mock_send:
+            send_report({"test": True})
+            mock_send.assert_not_called()
+
+    def test_sends_when_enabled(self):
+        enable_telemetry()
+        with patch("obliteratus.telemetry._send_sync") as mock_send:
+            send_report({"test": True})
+            import time
+            time.sleep(0.1)
+            mock_send.assert_called_once_with({"test": True})
+
+    def test_send_failure_is_silent(self):
+        enable_telemetry()
+        with patch("obliteratus.telemetry._send_sync", side_effect=Exception("network down")) as mock_send:
+            # send_report should not propagate the exception to the caller
+            send_report({"test": True})
+            import time
+            time.sleep(0.1)  # Allow background thread to execute
+            mock_send.assert_called_once_with({"test": True})
+
+
+# ── Pipeline integration ────────────────────────────────────────────────
+
+
+def _make_mock_pipeline():
+    """Build a mock pipeline with all fields the telemetry module reads."""
+    p = MagicMock()
+    p.handle.summary.return_value = {
+        "architecture": "LlamaForCausalLM",
+        "num_layers": 32,
+        "num_heads": 32,
+        "hidden_size": 4096,
+        "total_params": 8_000_000_000,
+    }
+    p.method = "advanced"
+    p.n_directions = 4
+    p.norm_preserve = True
+    p.regularization = 0.1
+    p.refinement_passes = 2
+    p.project_biases = True
+    p.use_chat_template = True
+    p.use_whitened_svd = True
+    p.true_iterative_refinement = False
+    p.use_jailbreak_contrast = False
+    p.layer_adaptive_strength = False
+    p.attention_head_surgery = True
+    p.safety_neuron_masking = False
+    p.per_expert_directions = False
+    p.use_sae_features = False
+    p.invert_refusal = False
+    p.project_embeddings = False
+    p.embed_regularization = 0.5
+    p.activation_steering = False
+    p.steering_strength = 0.3
+    p.expert_transplant = False
+    p.transplant_blend = 0.3
+    p.reflection_strength = 2.0
+    p.quantization = None
+
+    p._quality_metrics = {"perplexity": 5.2, "coherence": 0.8, "refusal_rate": 0.05}
+    p._strong_layers = [10, 11, 12, 13]
+    p._stage_durations = {"summon": 3.0, "probe": 12.5, "distill": 4.1, "excise": 2.0, "verify": 8.3, "rebirth": 5.0}
+    p._excise_modified_count = 128
+
+    # Direction data
+    d = torch.randn(4096)
+    d = d / d.norm()
+    p.refusal_directions = {10: d, 11: d + 0.01 * torch.randn(4096), 12: d, 13: d}
+    p.refusal_subspaces = {10: torch.randn(4, 4096)}
+
+    # Excise details
+    p._refusal_heads = {10: [(0, 0.9), (3, 0.8)]}
+    p._sae_directions = {}
+    p._expert_safety_scores = {}
+    p._layer_excise_weights = {}
+    p._expert_directions = {}
+    p._steering_hooks = []
+
+    # Prompts
+    p.harmful_prompts = ["x"] * 33
+    p.harmless_prompts = ["y"] * 33
+    p.jailbreak_prompts = None
+
+    return p
+
+
+class TestPipelineIntegration:
+    def setup_method(self):
+        _reset_telemetry()
+
+    def test_does_nothing_when_disabled(self):
+        disable_telemetry()
+        with patch("obliteratus.telemetry.send_report") as mock_send:
+            maybe_send_pipeline_report(_make_mock_pipeline())
+            mock_send.assert_not_called()
+
+    def test_comprehensive_report(self):
+        """Verify that all data points are extracted from the pipeline."""
+        enable_telemetry()
+        p = _make_mock_pipeline()
+        with patch("obliteratus.telemetry.send_report") as mock_send:
+            maybe_send_pipeline_report(p)
+            mock_send.assert_called_once()
+            report = mock_send.call_args[0][0]
+
+            # Core fields
+            assert report["schema_version"] == 2
+            assert report["model"]["architecture"] == "LlamaForCausalLM"
+            assert report["method"] == "advanced"
+
+            # Method config — check all keys passed through
+            cfg = report["method_config"]
+            assert cfg["n_directions"] == 4
+            assert cfg["norm_preserve"] is True
+            assert cfg["use_whitened_svd"] is True
+            assert cfg["attention_head_surgery"] is True
+
+            # Quality metrics
+            assert report["quality_metrics"]["perplexity"] == 5.2
+            assert report["quality_metrics"]["refusal_rate"] == 0.05
+
+            # Stage durations
+            assert "stage_durations" in report
+            assert report["stage_durations"]["summon"] == 3.0
+            assert report["stage_durations"]["verify"] == 8.3
+
+            # Strong layers
+            assert report["strong_layers"] == [10, 11, 12, 13]
+
+            # Direction stats
+            assert "direction_stats" in report
+            assert "direction_norms" in report["direction_stats"]
+            assert "mean_direction_persistence" in report["direction_stats"]
+
+            # Excise details
+            assert "excise_details" in report
+            assert report["excise_details"]["modified_count"] == 128
+            assert "head_surgery" in report["excise_details"]["used_techniques"]
+
+            # Prompt counts
+            assert report["prompt_counts"]["harmful"] == 33
+            assert report["prompt_counts"]["harmless"] == 33
+
+            # Environment
+            assert "os" in report["environment"]
+            assert "python_version" in report["environment"]
+
+
+# ── Informed pipeline integration ────────────────────────────────────────
+
+
+@dataclass
+class _MockInsights:
+    detected_alignment_method: str = "DPO"
+    alignment_confidence: float = 0.92
+    alignment_probabilities: dict = field(default_factory=lambda: {"DPO": 0.92, "RLHF": 0.05})
+    cone_is_polyhedral: bool = True
+    cone_dimensionality: float = 3.2
+    mean_pairwise_cosine: float = 0.45
+    direction_specificity: dict = field(default_factory=lambda: {"violence": 0.8})
+    cluster_count: int = 3
+    direction_persistence: float = 0.87
+    mean_refusal_sparsity_index: float = 0.15
+    recommended_sparsity: float = 0.1
+    use_sparse_surgery: bool = True
+    estimated_robustness: str = "medium"
+    self_repair_estimate: float = 0.3
+    entanglement_score: float = 0.2
+    entangled_layers: list = field(default_factory=lambda: [15, 16])
+    clean_layers: list = field(default_factory=lambda: [10, 11, 12])
+    recommended_n_directions: int = 6
+    recommended_regularization: float = 0.05
+    recommended_refinement_passes: int = 3
+    recommended_layers: list = field(default_factory=lambda: [10, 11, 12, 13])
+    skip_layers: list = field(default_factory=lambda: [15])
+
+
+@dataclass
+class _MockInformedReport:
+    insights: _MockInsights = field(default_factory=_MockInsights)
+    ouroboros_passes: int = 2
+    final_refusal_rate: float = 0.02
+    analysis_duration: float = 15.3
+    total_duration: float = 85.7
+
+
+class TestInformedPipelineIntegration:
+    def setup_method(self):
+        _reset_telemetry()
+
+    def test_does_nothing_when_disabled(self):
+        disable_telemetry()
+        with patch("obliteratus.telemetry.send_report") as mock_send:
+            maybe_send_informed_report(_make_mock_pipeline(), _MockInformedReport())
+            mock_send.assert_not_called()
+
+    def test_comprehensive_informed_report(self):
+        enable_telemetry()
+        p = _make_mock_pipeline()
+        report_obj = _MockInformedReport()
+
+        with patch("obliteratus.telemetry.send_report") as mock_send:
+            maybe_send_informed_report(p, report_obj)
+            mock_send.assert_called_once()
+            report = mock_send.call_args[0][0]
+
+            # All base fields present
+            assert report["schema_version"] == 2
+            assert report["model"]["architecture"] == "LlamaForCausalLM"
+            assert "direction_stats" in report
+            assert "excise_details" in report
+
+            # Analysis insights
+            ai = report["analysis_insights"]
+            assert ai["detected_alignment_method"] == "DPO"
+            assert ai["alignment_confidence"] == 0.92
+            assert ai["cone_is_polyhedral"] is True
+            assert ai["cone_dimensionality"] == 3.2
+            assert ai["cluster_count"] == 3
+            assert ai["self_repair_estimate"] == 0.3
+            assert ai["entanglement_score"] == 0.2
+            assert ai["recommended_n_directions"] == 6
+
+            # Informed extras
+            inf = report["informed"]
+            assert inf["ouroboros_passes"] == 2
+            assert inf["final_refusal_rate"] == 0.02
+            assert inf["analysis_duration"] == 15.3
+            assert inf["total_duration"] == 85.7
+
+    def test_analysis_insights_filter_unknown_keys(self):
+        enable_telemetry()
+        _make_mock_pipeline()
+
+        @dataclass
+        class _BadInsights(_MockInsights):
+            secret_sauce: str = "should not appear"
+
+        report_obj = _MockInformedReport(insights=_BadInsights())
+        insights = _extract_analysis_insights(report_obj)
+        assert "detected_alignment_method" in insights
+        assert "secret_sauce" not in insights
+
+
+# ── Stage duration tracking on pipeline ──────────────────────────────────
+
+
+class TestStageDurationTracking:
+    def test_emit_records_durations(self):
+        """Verify _emit stores durations in _stage_durations dict."""
+        from obliteratus.abliterate import AbliterationPipeline
+
+        p = AbliterationPipeline.__new__(AbliterationPipeline)
+        p._stage_durations = {}
+        p._excise_modified_count = None
+        p._on_stage = lambda r: None
+
+        p._emit("summon", "done", "loaded", duration=3.5)
+        p._emit("probe", "done", "probed", duration=10.2)
+        p._emit("excise", "done", "excised", duration=2.1, modified_count=64)
+
+        assert p._stage_durations == {"summon": 3.5, "probe": 10.2, "excise": 2.1}
+        assert p._excise_modified_count == 64
+
+    def test_running_status_does_not_record(self):
+        """Only 'done' status should record durations."""
+        from obliteratus.abliterate import AbliterationPipeline
+
+        p = AbliterationPipeline.__new__(AbliterationPipeline)
+        p._stage_durations = {}
+        p._excise_modified_count = None
+        p._on_stage = lambda r: None
+
+        p._emit("summon", "running", "loading...", duration=0)
+        assert p._stage_durations == {}
+
+
+# ── Storage helpers ──────────────────────────────────────────────────────
+
+
+class TestStorageHelpers:
+    """Test persistent storage helper functions."""
+
+    def test_test_writable_valid_dir(self):
+        with tempfile.TemporaryDirectory() as d:
+            assert _test_writable(Path(d) / "subdir")
+
+    def test_test_writable_unwritable(self):
+        # /proc is never writable for arbitrary files
+        assert not _test_writable(Path("/proc/obliteratus_test"))
+
+    def test_is_mount_point_existing_path(self):
+        # Should return a bool without raising for any existing path
+        result = _is_mount_point(Path("/"))
+        assert isinstance(result, bool)
+
+    def test_is_mount_point_nonexistent(self):
+        assert not _is_mount_point(Path("/nonexistent_dir_12345"))
+
+    def test_storage_diagnostic_returns_dict(self):
+        diag = storage_diagnostic()
+        assert isinstance(diag, dict)
+        assert "telemetry_dir" in diag
+        assert "is_persistent" in diag
+        assert "on_hf_spaces" in diag
+        assert "telemetry_enabled" in diag
+        assert "data_dir_exists" in diag
+
+
+# ── Hub restore ──────────────────────────────────────────────────────────
+
+
+class TestHubRestore:
+    """Test Hub-to-local restore functionality."""
+
+    def setup_method(self):
+        _reset_telemetry()
+        # Reset restore state so each test can trigger it
+        import obliteratus.telemetry as t
+        t._restore_done = False
+
+    def test_restore_skips_when_no_repo(self):
+        with patch("obliteratus.telemetry._TELEMETRY_REPO", ""):
+            assert restore_from_hub() == 0
+
+    def test_restore_deduplicates(self):
+        """Records already in local JSONL should not be re-added."""
+        import obliteratus.telemetry as t
+
+        with tempfile.TemporaryDirectory() as d:
+            test_file = Path(d) / "telemetry.jsonl"
+            existing = {"session_id": "abc", "timestamp": "2025-01-01T00:00:00"}
+            test_file.write_text(json.dumps(existing) + "\n")
+
+            old_file = t.TELEMETRY_FILE
+            old_repo = t._TELEMETRY_REPO
+            t.TELEMETRY_FILE = test_file
+            t._TELEMETRY_REPO = "test/repo"
+            t._restore_done = False
+
+            try:
+                hub_records = [
+                    {"session_id": "abc", "timestamp": "2025-01-01T00:00:00"},  # duplicate
+                    {"session_id": "def", "timestamp": "2025-01-02T00:00:00"},  # new
+                ]
+                with patch("obliteratus.telemetry.fetch_hub_records", return_value=hub_records):
+                    count = restore_from_hub()
+                    assert count == 1  # Only the new record
+
+                # Verify file contents
+                lines = test_file.read_text().strip().split("\n")
+                assert len(lines) == 2  # original + 1 new
+            finally:
+                t.TELEMETRY_FILE = old_file
+                t._TELEMETRY_REPO = old_repo
+
+    def test_restore_only_runs_once(self):
+        """Calling restore_from_hub() twice should be a no-op the second time."""
+        import obliteratus.telemetry as t
+        t._restore_done = False
+
+        with patch("obliteratus.telemetry._TELEMETRY_REPO", "test/repo"):
+            with patch("obliteratus.telemetry.fetch_hub_records", return_value=[]):
+                restore_from_hub()
+                # Second call should return 0 immediately
+                assert restore_from_hub() == 0
diff --git a/tests/test_visualization.py b/tests/test_visualization.py
new file mode 100644
index 0000000..a25b7d3
--- /dev/null
+++ b/tests/test_visualization.py
@@ -0,0 +1,167 @@
+"""Tests for visualization module (non-interactive, save-to-file)."""
+
+from __future__ import annotations
+
+import tempfile
+from pathlib import Path
+
+import pytest
+import torch
+
+from obliteratus.analysis.cross_layer import CrossLayerAlignmentAnalyzer
+from obliteratus.analysis.activation_probing import ActivationProbe
+from obliteratus.analysis.visualization import (
+    _sanitize_label,
+    plot_refusal_topology,
+    plot_cross_layer_heatmap,
+    plot_angular_drift,
+    plot_probe_dashboard,
+    plot_defense_radar,
+)
+from obliteratus.analysis.defense_robustness import DefenseProfile
+
+
+@pytest.fixture
+def tmp_dir():
+    with tempfile.TemporaryDirectory() as d:
+        yield Path(d)
+
+
+def _make_refusal_data(n_layers=6, hidden_dim=16):
+    """Create test refusal directions and means."""
+    torch.manual_seed(42)
+    directions = {}
+    harmful_means = {}
+    harmless_means = {}
+
+    for i in range(n_layers):
+        d = torch.randn(hidden_dim)
+        directions[i] = d / d.norm()
+        base = torch.randn(hidden_dim)
+        harmless_means[i] = base.unsqueeze(0)
+        harmful_means[i] = (base + (2.0 if i in [2, 3, 4] else 0.3) * directions[i]).unsqueeze(0)
+
+    strong_layers = [2, 3, 4]
+    return directions, harmful_means, harmless_means, strong_layers
+
+
+class TestRefusalTopology:
+    def test_plot_saves_file(self, tmp_dir):
+        directions, h_means, b_means, strong = _make_refusal_data()
+        path = tmp_dir / "topology.png"
+        plot_refusal_topology(
+            directions, h_means, b_means, strong, output_path=path
+        )
+        assert path.exists()
+        assert path.stat().st_size > 0
+
+    def test_plot_returns_figure(self, tmp_dir):
+        directions, h_means, b_means, strong = _make_refusal_data()
+        fig = plot_refusal_topology(
+            directions, h_means, b_means, strong, output_path=tmp_dir / "test.png"
+        )
+        assert fig is not None
+
+
+class TestCrossLayerHeatmap:
+    def test_plot_saves_file(self, tmp_dir):
+        torch.manual_seed(42)
+        directions = {i: torch.randn(16) for i in range(6)}
+        analyzer = CrossLayerAlignmentAnalyzer()
+        result = analyzer.analyze(directions)
+
+        path = tmp_dir / "heatmap.png"
+        plot_cross_layer_heatmap(result, output_path=path)
+        assert path.exists()
+
+
+class TestAngularDrift:
+    def test_plot_saves_file(self, tmp_dir):
+        torch.manual_seed(42)
+        directions = {i: torch.randn(16) for i in range(8)}
+        analyzer = CrossLayerAlignmentAnalyzer()
+        result = analyzer.analyze(directions)
+
+        path = tmp_dir / "drift.png"
+        plot_angular_drift(result, output_path=path)
+        assert path.exists()
+
+
+class TestProbeDashboard:
+    def test_plot_saves_file(self, tmp_dir):
+        torch.manual_seed(42)
+        harmful = {i: [torch.randn(8) for _ in range(3)] for i in range(4)}
+        harmless = {i: [torch.randn(8) for _ in range(3)] for i in range(4)}
+        dirs = {i: torch.randn(8) for i in range(4)}
+
+        probe = ActivationProbe()
+        result = probe.probe_all_layers(harmful, harmless, dirs)
+
+        path = tmp_dir / "probe.png"
+        plot_probe_dashboard(result, output_path=path)
+        assert path.exists()
+
+
+class TestDefenseRadar:
+    def test_plot_saves_file(self, tmp_dir):
+        profile = DefenseProfile(
+            model_name="test-model",
+            alignment_type_estimate="RLHF-like",
+            refusal_concentration=0.4,
+            refusal_layer_spread=5,
+            mean_refusal_strength=2.0,
+            max_refusal_strength=4.0,
+            self_repair_estimate=0.6,
+            entanglement_score=0.3,
+            estimated_robustness="medium",
+        )
+        path = tmp_dir / "radar.png"
+        plot_defense_radar(profile, output_path=path)
+        assert path.exists()
+
+    def test_model_name_sanitized_in_title(self, tmp_dir):
+        """Ensure sensitive paths in model_name don't leak into saved charts."""
+        profile = DefenseProfile(
+            model_name="/home/user/.cache/huggingface/hub/models--secret-org/private-model",
+            alignment_type_estimate="RLHF-like",
+            refusal_concentration=0.4,
+            refusal_layer_spread=5,
+            mean_refusal_strength=2.0,
+            max_refusal_strength=4.0,
+            self_repair_estimate=0.6,
+            entanglement_score=0.3,
+            estimated_robustness="medium",
+        )
+        path = tmp_dir / "radar_sanitized.png"
+        fig = plot_defense_radar(profile, output_path=path)
+        # Title should not contain the full filesystem path
+        title_text = fig.axes[0].get_title()
+        assert "/home/user" not in title_text
+        assert ".cache" not in title_text
+
+
+class TestSanitizeLabel:
+    def test_strips_absolute_paths(self):
+        result = _sanitize_label("/home/user/.cache/huggingface/models--org/model")
+        assert "/home/user" not in result
+        assert "model" in result
+
+    def test_redacts_hf_tokens(self):
+        result = _sanitize_label("model with hf_abcdefghij token")
+        assert "hf_abcdefghij" not in result
+        assert "<TOKEN>" in result
+
+    def test_redacts_long_hex_strings(self):
+        hex_str = "a" * 40
+        result = _sanitize_label(f"commit {hex_str}")
+        assert hex_str not in result
+        assert "<REDACTED>" in result
+
+    def test_truncates_long_strings(self):
+        long = "x" * 200
+        result = _sanitize_label(long)
+        assert len(result) <= 80
+        assert result.endswith("...")
+
+    def test_passes_normal_strings_through(self):
+        assert _sanitize_label("Refusal Topology Map") == "Refusal Topology Map"