OBLITERATUS/tests/test_architecture_profiles.py

"""Tests for architecture-aware preset defaults.

Tests the detection logic and recommended parameter overrides for each
architecture class (dense/MoE, standard/reasoning).
"""

from __future__ import annotations


from obliteratus.architecture_profiles import (
    ArchitectureClass,
    ArchitectureProfile,
    ReasoningClass,
    detect_architecture,
    get_profile_summary,
    apply_profile_to_method_config,
)


# ---------------------------------------------------------------------------
#  Detection: Dense models
# ---------------------------------------------------------------------------


class TestDenseDetection:
    """Test that standard dense models are correctly classified."""

    def test_llama_is_dense(self):
        profile = detect_architecture("meta-llama/Llama-3.1-8B-Instruct")
        assert profile.arch_class == ArchitectureClass.DENSE
        assert profile.reasoning_class == ReasoningClass.STANDARD
        assert not profile.is_moe

    def test_qwen_dense_is_dense(self):
        profile = detect_architecture("Qwen/Qwen2.5-7B-Instruct")
        assert profile.arch_class == ArchitectureClass.DENSE
        assert not profile.is_moe

    def test_gemma_is_dense(self):
        profile = detect_architecture("google/gemma-3-27b-it")
        assert profile.arch_class == ArchitectureClass.DENSE

    def test_phi_is_dense(self):
        profile = detect_architecture("microsoft/Phi-4-mini-instruct")
        assert profile.arch_class == ArchitectureClass.DENSE

    def test_mistral_small_is_dense(self):
        profile = detect_architecture("mistralai/Mistral-Small-24B-Instruct-2501")
        assert profile.arch_class == ArchitectureClass.DENSE

    def test_yi_is_dense(self):
        profile = detect_architecture("01-ai/Yi-1.5-9B-Chat")
        assert profile.arch_class == ArchitectureClass.DENSE

    def test_dense_label(self):
        profile = detect_architecture("meta-llama/Llama-3.1-8B-Instruct")
        assert profile.profile_label == "Dense Standard"

    def test_dense_recommended_method(self):
        profile = detect_architecture("meta-llama/Llama-3.1-8B-Instruct")
        assert profile.recommended_method == "aggressive"


# ---------------------------------------------------------------------------
#  Detection: MoE models
# ---------------------------------------------------------------------------


class TestMoEDetection:
    """Test that MoE models are correctly classified."""

    def test_gpt_oss_is_moe(self):
        """GPT-OSS is MoE. Without config, defaults to small (conservative)."""
        profile = detect_architecture("openai/gpt-oss-20b")
        assert profile.is_moe
        assert profile.arch_class == ArchitectureClass.SMALL_MOE

    def test_qwen3_30b_is_small_moe(self):
        profile = detect_architecture("Qwen/Qwen3-30B-A3B")
        assert profile.is_moe

    def test_deepseek_v3_is_large_moe(self):
        profile = detect_architecture("deepseek-ai/DeepSeek-V3.2")
        assert profile.is_moe

    def test_kimi_k2_is_large_moe(self):
        profile = detect_architecture("moonshotai/Kimi-K2-Instruct")
        assert profile.is_moe

    def test_qwen3_235b_is_moe(self):
        profile = detect_architecture("Qwen/Qwen3-235B-A22B")
        assert profile.is_moe

    def test_glm_47_is_moe(self):
        profile = detect_architecture("zai-org/GLM-4.7")
        assert profile.is_moe

    def test_llama4_maverick_is_moe(self):
        profile = detect_architecture("meta-llama/Llama-4-Maverick-17B-128E-Instruct")
        assert profile.is_moe

    def test_step_flash_is_moe(self):
        profile = detect_architecture("stepfun-ai/Step-3.5-Flash")
        assert profile.is_moe

    def test_minimax_is_moe(self):
        profile = detect_architecture("MiniMaxAI/MiniMax-M2.1")
        assert profile.is_moe

    def test_mistral_large_3_is_moe(self):
        profile = detect_architecture("mistralai/Mistral-Large-3-675B-Instruct-2512")
        assert profile.is_moe

    def test_moe_recommended_method_is_surgical(self):
        """All MoE profiles recommend surgical method."""
        profile = detect_architecture("openai/gpt-oss-20b")
        assert profile.recommended_method == "surgical"

    def test_gpt_oss_with_config_is_small_moe(self):
        """GPT-OSS with config providing expert count → small MoE."""
        class MockConfig:
            model_type = "gpt_neox"
            num_hidden_layers = 32
            hidden_size = 2560
            intermediate_size = 6912
            vocab_size = 50304
            num_local_experts = 8
            num_experts_per_tok = 2
        profile = detect_architecture("openai/gpt-oss-20b", config=MockConfig())
        assert profile.is_moe
        assert profile.arch_class == ArchitectureClass.SMALL_MOE


# ---------------------------------------------------------------------------
#  Detection: Reasoning models
# ---------------------------------------------------------------------------


class TestReasoningDetection:
    """Test that reasoning models are correctly classified."""

    def test_r1_distill_qwen_is_reasoning(self):
        profile = detect_architecture("deepseek-ai/DeepSeek-R1-Distill-Qwen-7B")
        assert profile.reasoning_class == ReasoningClass.REASONING

    def test_r1_distill_llama_is_reasoning(self):
        profile = detect_architecture("deepseek-ai/DeepSeek-R1-Distill-Llama-8B")
        assert profile.reasoning_class == ReasoningClass.REASONING

    def test_r1_distill_is_dense_reasoning(self):
        """R1 distills are dense (distilled from MoE into dense)."""
        profile = detect_architecture("deepseek-ai/DeepSeek-R1-Distill-Qwen-14B")
        assert profile.arch_class == ArchitectureClass.DENSE
        assert profile.reasoning_class == ReasoningClass.REASONING
        assert profile.profile_label == "Dense Reasoning"

    def test_olmo_think_is_reasoning(self):
        profile = detect_architecture("allenai/Olmo-3.1-32B-Think")
        assert profile.reasoning_class == ReasoningClass.REASONING

    def test_olmo_standard_is_not_reasoning(self):
        """OLMo (without Think) must NOT be classified as reasoning.
        Regression test: 'olmo' contains 'o1' substring."""
        profile = detect_architecture("allenai/Olmo-3-7B-Instruct")
        assert profile.reasoning_class == ReasoningClass.STANDARD

    def test_falcon3_is_not_reasoning(self):
        """falcon3 must NOT match 'o3' reasoning pattern."""
        profile = detect_architecture("tiiuae/Falcon3-7B-Instruct")
        assert profile.reasoning_class == ReasoningClass.STANDARD

    def test_full_r1_is_moe_reasoning(self):
        profile = detect_architecture("deepseek-ai/DeepSeek-R1")
        assert profile.is_moe
        assert profile.reasoning_class == ReasoningClass.REASONING

    def test_reasoning_dense_more_directions(self):
        """Dense reasoning models need more directions (>=12) to span refusal."""
        profile = detect_architecture("deepseek-ai/DeepSeek-R1-Distill-Qwen-7B")
        assert profile.arch_class == ArchitectureClass.DENSE
        assert profile.method_overrides.get("n_directions", 0) >= 12

    def test_reasoning_dense_more_passes(self):
        """Dense reasoning models need more refinement passes (>=4)."""
        profile = detect_architecture("deepseek-ai/DeepSeek-R1-Distill-Qwen-7B")
        assert profile.arch_class == ArchitectureClass.DENSE
        assert profile.method_overrides.get("refinement_passes", 0) >= 4

    def test_non_reasoning_is_standard(self):
        profile = detect_architecture("meta-llama/Llama-3.1-8B-Instruct")
        assert profile.reasoning_class == ReasoningClass.STANDARD


# ---------------------------------------------------------------------------
#  Detection with config object
# ---------------------------------------------------------------------------


class TestConfigDetection:
    """Test detection when a mock config is provided."""

    def test_moe_config_attrs(self):
        """Config with num_local_experts should be detected as MoE."""
        class MockConfig:
            model_type = "mixtral"
            num_hidden_layers = 32
            hidden_size = 4096
            intermediate_size = 14336
            vocab_size = 32000
            num_local_experts = 8
            num_experts_per_tok = 2

        profile = detect_architecture(
            "custom/mixtral-model", config=MockConfig(),
            num_layers=32, hidden_size=4096,
        )
        assert profile.is_moe
        assert profile.num_experts == 8
        assert profile.num_active_experts == 2

    def test_large_moe_threshold(self):
        """MoE models with >100B params should be classified as large."""
        class MockConfig:
            model_type = "deepseek_v3"
            num_hidden_layers = 61
            hidden_size = 7168
            intermediate_size = 18432
            vocab_size = 102400
            n_routed_experts = 256
            num_experts_per_tok = 8

        profile = detect_architecture(
            "custom/large-moe", config=MockConfig(),
        )
        assert profile.arch_class == ArchitectureClass.LARGE_MOE

    def test_small_moe_threshold(self):
        """MoE models with <=16 experts should be classified as small."""
        class MockConfig:
            model_type = "mixtral"
            num_hidden_layers = 32
            hidden_size = 4096
            intermediate_size = 14336
            vocab_size = 32000
            num_local_experts = 8
            num_experts_per_tok = 2

        profile = detect_architecture(
            "custom/small-moe", config=MockConfig(),
        )
        assert profile.arch_class == ArchitectureClass.SMALL_MOE

    def test_dense_config(self):
        """Config without MoE attributes should be dense."""
        class MockConfig:
            model_type = "llama"
            num_hidden_layers = 32
            hidden_size = 4096
            intermediate_size = 11008
            vocab_size = 32000

        profile = detect_architecture(
            "custom/dense-model", config=MockConfig(),
        )
        assert profile.arch_class == ArchitectureClass.DENSE
        assert not profile.is_moe

    def test_llama4_scout_is_large_moe(self):
        """Llama 4 Scout: 109B total params with 16 experts → LARGE_MOE.
        Regression test: params > 100B must override low expert count."""
        class MockConfig:
            model_type = "llama4"
            num_hidden_layers = 48
            hidden_size = 5120
            intermediate_size = 14336
            vocab_size = 202048
            num_local_experts = 16
            num_experts_per_tok = 1

        profile = detect_architecture(
            "meta-llama/Llama-4-Scout-17B-16E-Instruct",
            config=MockConfig(),
        )
        assert profile.is_moe
        assert profile.arch_class == ArchitectureClass.LARGE_MOE


# ---------------------------------------------------------------------------
#  Recommended defaults validation
# ---------------------------------------------------------------------------


class TestRecommendedDefaults:
    """Test that recommended defaults match research findings."""

    def test_dense_standard_no_riemannian(self):
        """Dense Standard: Riemannian OFF (manifolds are flat)."""
        profile = detect_architecture("meta-llama/Llama-3.1-8B-Instruct")
        assert not profile.breakthrough_modules.get("riemannian", True)

    def test_dense_standard_anti_ouroboros_on(self):
        """Dense Standard: Anti-Ouroboros ON for self-repair mapping."""
        profile = detect_architecture("meta-llama/Llama-3.1-8B-Instruct")
        assert profile.breakthrough_modules.get("anti_ouroboros", False)

    def test_dense_standard_spectral_cert_on(self):
        """Dense Standard: Spectral cert ON for verification."""
        profile = detect_architecture("meta-llama/Llama-3.1-8B-Instruct")
        assert profile.breakthrough_modules.get("spectral_cert", False)

    def test_moe_conditional_on(self):
        """MoE: Conditional abliteration is #1 technique (Cracken AI 2025)."""
        profile = detect_architecture("openai/gpt-oss-20b")
        assert profile.breakthrough_modules.get("conditional", False)

    def test_moe_no_project_embeddings(self):
        """MoE: Project embeddings OFF (cascades through router)."""
        profile = detect_architecture("openai/gpt-oss-20b")
        assert not profile.method_overrides.get("project_embeddings", True)

    def test_moe_per_expert_directions(self):
        """MoE: Per-expert directions ON (global directions fail on MoE)."""
        profile = detect_architecture("openai/gpt-oss-20b")
        assert profile.method_overrides.get("per_expert_directions", False)

    def test_large_moe_riemannian_on(self):
        """Large MoE: Riemannian ON (curved shared layer geometry)."""
        profile = detect_architecture("deepseek-ai/DeepSeek-V3.2")
        assert profile.breakthrough_modules.get("riemannian", False)

    def test_reasoning_dense_jailbreak_contrast(self):
        """Reasoning Dense: Jailbreak contrast ON for thinking-chain refusal."""
        profile = detect_architecture("deepseek-ai/DeepSeek-R1-Distill-Qwen-7B")
        assert profile.method_overrides.get("use_jailbreak_contrast", False)

    def test_reasoning_moe_gentle_transplant(self):
        """Reasoning MoE: transplant_blend very low (preserve reasoning)."""
        profile = detect_architecture("deepseek-ai/DeepSeek-R1")
        assert profile.method_overrides.get("transplant_blend", 1.0) <= 0.10


# ---------------------------------------------------------------------------
#  Profile summary
# ---------------------------------------------------------------------------


class TestProfileSummary:
    """Test the human-readable profile summary."""

    def test_summary_contains_profile_label(self):
        profile = detect_architecture("meta-llama/Llama-3.1-8B-Instruct")
        summary = get_profile_summary(profile)
        assert "Dense Standard" in summary

    def test_summary_contains_method(self):
        profile = detect_architecture("meta-llama/Llama-3.1-8B-Instruct")
        summary = get_profile_summary(profile)
        assert "aggressive" in summary

    def test_summary_contains_citations(self):
        profile = detect_architecture("openai/gpt-oss-20b")
        summary = get_profile_summary(profile)
        assert "SAFEx" in summary or "Cracken" in summary

    def test_summary_contains_moe_info(self):
        profile = detect_architecture("openai/gpt-oss-20b")
        summary = get_profile_summary(profile)
        assert "MoE" in summary

    def test_summary_contains_breakthrough_modules(self):
        profile = detect_architecture("openai/gpt-oss-20b")
        summary = get_profile_summary(profile)
        assert "conditional" in summary


# ---------------------------------------------------------------------------
#  apply_profile_to_method_config
# ---------------------------------------------------------------------------


class TestApplyProfile:
    """Test that profile overrides are correctly applied to method configs."""

    def test_overrides_applied(self):
        from obliteratus.abliterate import METHODS
        profile = detect_architecture("deepseek-ai/DeepSeek-R1-Distill-Qwen-7B")
        base = dict(METHODS["aggressive"])
        merged = apply_profile_to_method_config(profile, base)
        assert merged["n_directions"] == profile.method_overrides["n_directions"]

    def test_non_overridden_preserved(self):
        from obliteratus.abliterate import METHODS
        profile = detect_architecture("meta-llama/Llama-3.1-8B-Instruct")
        base = dict(METHODS["aggressive"])
        merged = apply_profile_to_method_config(profile, base)
        # norm_preserve is not in overrides, should come from base
        assert merged["norm_preserve"] == base["norm_preserve"]

    def test_empty_overrides(self):
        from obliteratus.abliterate import METHODS
        base = dict(METHODS["advanced"])
        profile = ArchitectureProfile(
            arch_class=ArchitectureClass.DENSE,
            reasoning_class=ReasoningClass.STANDARD,
            method_overrides={},
            breakthrough_modules={},
        )
        merged = apply_profile_to_method_config(profile, base)
        assert merged == base

    def test_override_key_not_in_base_is_added(self):
        """Override keys absent from base config should be added to result.

        This is important for the UI auto-detect path: keys like
        use_jailbreak_contrast may not exist in the base method config
        but are valid pipeline parameters that app.py reads via merged.get().
        """
        from obliteratus.abliterate import METHODS
        base = dict(METHODS["advanced"])
        profile = ArchitectureProfile(
            arch_class=ArchitectureClass.DENSE,
            reasoning_class=ReasoningClass.STANDARD,
            method_overrides={"use_jailbreak_contrast": True},
            breakthrough_modules={},
        )
        merged = apply_profile_to_method_config(profile, base)
        assert merged["use_jailbreak_contrast"] is True


# ---------------------------------------------------------------------------
#  All 6 profile combinations
# ---------------------------------------------------------------------------


class TestAllSixProfiles:
    """Verify label, method, overrides, and breakthrough modules for each profile."""

    def _make_moe_config(self, num_experts=8, active=2, layers=32, hidden=4096):
        class C:
            model_type = "mixtral"
            num_hidden_layers = layers
            hidden_size = hidden
            intermediate_size = hidden * 4
            vocab_size = 32000
            num_local_experts = num_experts
            num_experts_per_tok = active
        return C()

    def test_dense_standard_full(self):
        p = detect_architecture("meta-llama/Llama-3.1-8B-Instruct")
        assert p.profile_label == "Dense Standard"
        assert p.recommended_method == "aggressive"
        assert not p.breakthrough_modules["riemannian"]
        assert p.breakthrough_modules["anti_ouroboros"]
        assert p.breakthrough_modules["spectral_cert"]
        assert not p.breakthrough_modules["conditional"]
        assert len(p.profile_description) > 0
        assert len(p.research_citations) > 0

    def test_dense_reasoning_full(self):
        p = detect_architecture("deepseek-ai/DeepSeek-R1-Distill-Qwen-7B")
        assert p.profile_label == "Dense Reasoning"
        assert p.recommended_method == "aggressive"
        assert p.method_overrides["n_directions"] >= 12
        assert p.method_overrides["refinement_passes"] >= 4
        assert p.method_overrides["use_jailbreak_contrast"] is True
        assert p.method_overrides["use_chat_template"] is True
        assert p.breakthrough_modules["anti_ouroboros"]
        assert p.breakthrough_modules["riemannian"]
        assert p.breakthrough_modules["conditional"]
        assert p.breakthrough_modules["spectral_cert"]
        assert len(p.profile_description) > 0

    def test_small_moe_standard_full(self):
        config = self._make_moe_config(num_experts=8, active=2)
        p = detect_architecture("custom/small-moe-model", config=config)
        assert p.profile_label == "Small MoE Standard"
        assert p.arch_class == ArchitectureClass.SMALL_MOE
        assert p.recommended_method == "surgical"
        assert p.method_overrides["per_expert_directions"] is True
        assert p.method_overrides["invert_refusal"] is False
        assert p.method_overrides["project_embeddings"] is False
        assert p.breakthrough_modules["conditional"]
        assert p.breakthrough_modules["anti_ouroboros"]
        assert p.breakthrough_modules["spectral_cert"]
        assert not p.breakthrough_modules["riemannian"]
        assert len(p.profile_description) > 0

    def test_small_moe_reasoning_full(self):
        """The most fragile combination: MoE + reasoning."""
        config = self._make_moe_config(num_experts=8, active=2)
        # Add "think" to name to trigger reasoning detection
        p = detect_architecture("custom/small-moe-think-model", config=config)
        assert p.profile_label == "Small MoE Reasoning"
        assert p.arch_class == ArchitectureClass.SMALL_MOE
        assert p.reasoning_class == ReasoningClass.REASONING
        assert p.recommended_method == "surgical"
        assert p.method_overrides["per_expert_directions"] is True
        assert p.method_overrides["use_jailbreak_contrast"] is True
        assert p.method_overrides["use_chat_template"] is True
        assert p.method_overrides["invert_refusal"] is False
        assert p.breakthrough_modules["conditional"]
        assert p.breakthrough_modules["anti_ouroboros"]
        assert p.breakthrough_modules["spectral_cert"]
        assert len(p.profile_description) > 0

    def test_large_moe_standard_full(self):
        config = self._make_moe_config(num_experts=256, active=8, layers=61, hidden=7168)
        p = detect_architecture("custom/large-moe-model", config=config)
        assert p.profile_label == "Large MoE Standard"
        assert p.arch_class == ArchitectureClass.LARGE_MOE
        assert p.recommended_method == "surgical"
        assert p.method_overrides["per_expert_directions"] is True
        assert p.method_overrides["layer_adaptive_strength"] is True
        assert p.method_overrides["expert_transplant"] is True
        assert p.method_overrides["transplant_blend"] == 0.10
        assert p.method_overrides["attention_head_surgery"] is True
        assert p.method_overrides["project_embeddings"] is False
        assert p.breakthrough_modules["conditional"]
        assert p.breakthrough_modules["riemannian"]
        assert p.breakthrough_modules["anti_ouroboros"]
        assert p.breakthrough_modules["spectral_cert"]
        assert len(p.profile_description) > 0

    def test_large_moe_reasoning_full(self):
        config = self._make_moe_config(num_experts=256, active=8, layers=61, hidden=7168)
        p = detect_architecture("custom/large-moe-r1-model", config=config)
        assert p.profile_label == "Large MoE Reasoning"
        assert p.arch_class == ArchitectureClass.LARGE_MOE
        assert p.reasoning_class == ReasoningClass.REASONING
        assert p.recommended_method == "surgical"
        assert p.method_overrides["n_directions"] == 8
        assert p.method_overrides["transplant_blend"] == 0.08
        assert p.method_overrides["use_jailbreak_contrast"] is True
        assert p.method_overrides["safety_neuron_masking"] is True
        assert p.breakthrough_modules["conditional"]
        assert p.breakthrough_modules["riemannian"]
        assert p.breakthrough_modules["anti_ouroboros"]
        assert p.breakthrough_modules["spectral_cert"]
        assert len(p.profile_description) > 0


# ---------------------------------------------------------------------------
#  Edge cases
# ---------------------------------------------------------------------------


class TestEdgeCases:
    """Edge cases for architecture detection."""

    def test_empty_model_name(self):
        """Empty string should fall through to Dense Standard."""
        profile = detect_architecture("")
        assert profile.arch_class == ArchitectureClass.DENSE
        assert profile.reasoning_class == ReasoningClass.STANDARD

    def test_unknown_model_type_in_config(self):
        """Unknown model_type should not cause MoE classification."""
        class MockConfig:
            model_type = "banana"
            num_hidden_layers = 12
            hidden_size = 768
            intermediate_size = 3072
            vocab_size = 30522
        profile = detect_architecture("custom/unknown-arch", config=MockConfig())
        assert profile.arch_class == ArchitectureClass.DENSE

    def test_config_with_zero_experts(self):
        """num_local_experts=0 should not trigger MoE."""
        class MockConfig:
            model_type = "llama"
            num_hidden_layers = 32
            hidden_size = 4096
            intermediate_size = 11008
            vocab_size = 32000
            num_local_experts = 0
        profile = detect_architecture("custom/dense-with-zero", config=MockConfig())
        assert not profile.is_moe
        assert profile.arch_class == ArchitectureClass.DENSE

    def test_allcaps_model_name(self):
        """Case-insensitive matching should work for all-caps names."""
        profile = detect_architecture("DEEPSEEK-AI/DEEPSEEK-R1-DISTILL-QWEN-7B")
        assert profile.reasoning_class == ReasoningClass.REASONING
        assert profile.arch_class == ArchitectureClass.DENSE  # distill = dense

    def test_single_expert_is_moe(self):
        """num_local_experts=1 is technically MoE (single expert)."""
        class MockConfig:
            model_type = "llama"
            num_hidden_layers = 32
            hidden_size = 4096
            intermediate_size = 11008
            vocab_size = 32000
            num_local_experts = 1
        profile = detect_architecture("custom/single-expert", config=MockConfig())
        # 1 expert still triggers MoE detection (the code treats any >0 as MoE)
        assert profile.is_moe