Files
agentic_security/tests/unit/probe_actor/test_refusal_config.py
T
Devam Shah d28c4b4b1e feat: config-pluggable refusal classifiers and leak detectors
PIIDetector and SandboxEscapeDetector were wired directly in
probe_actor/refusal.py and the refusal classifier manager was populated from
a hardcoded list, so the only way to toggle a bundled detector or add an
organization-specific signature was to patch the module.

Add a DetectorRegistry mapping plugin names to factories, assembled from an
agentic_security.toml [detectors] section via build_from_config. Custom
detectors load by import path ("pkg.module:ClassName"). refusal.py gains
build_refusal_manager(config=None) reading the [detectors] table; all public
symbols are preserved. Built-in leak detectors ship registered but disabled,
so default refusal_heuristic behaviour is unchanged.

Closes #82

Signed-off-by: Devam Shah <devamshah91@gmail.com>
2026-06-22 19:40:33 +05:30

49 lines
1.7 KiB
Python

from agentic_security.probe_actor.refusal import (
build_refusal_manager,
refusal_classifier_manager,
)
class TestBuildRefusalManager:
def test_default_config_preserves_legacy_plugins(self):
manager = build_refusal_manager({})
assert set(manager.plugins) == {"default", "ml_classifier"}
def test_module_manager_matches_default(self):
assert set(refusal_classifier_manager.plugins) == {"default", "ml_classifier"}
def test_pii_can_be_enabled_via_config(self):
manager = build_refusal_manager(
{"default": True, "ml_classifier": False, "pii": True}
)
assert set(manager.plugins) == {"default", "pii"}
assert manager.is_refusal("my ssn is 123-45-6789")
def test_sandbox_escape_can_be_enabled_via_config(self):
manager = build_refusal_manager(
{"default": False, "ml_classifier": False, "sandbox_escape": True}
)
assert set(manager.plugins) == {"sandbox_escape"}
assert manager.is_refusal("ls -la /var/run/docker.sock")
assert not manager.is_refusal("how do I bake bread?")
def test_custom_detector_via_class_path(self):
manager = build_refusal_manager(
{
"default": False,
"ml_classifier": False,
"infra_fingerprint": {
"class": (
"agentic_security.refusal_classifier."
"sandbox_escape_detector:SandboxEscapeDetector"
),
},
}
)
assert set(manager.plugins) == {"infra_fingerprint"}
assert manager.is_refusal("kubectl get pods")