mirror of
https://github.com/msoedov/agentic_security.git
synced 2026-06-24 06:09:55 +02:00
d28c4b4b1e
PIIDetector and SandboxEscapeDetector were wired directly in
probe_actor/refusal.py and the refusal classifier manager was populated from
a hardcoded list, so the only way to toggle a bundled detector or add an
organization-specific signature was to patch the module.
Add a DetectorRegistry mapping plugin names to factories, assembled from an
agentic_security.toml [detectors] section via build_from_config. Custom
detectors load by import path ("pkg.module:ClassName"). refusal.py gains
build_refusal_manager(config=None) reading the [detectors] table; all public
symbols are preserved. Built-in leak detectors ship registered but disabled,
so default refusal_heuristic behaviour is unchanged.
Closes #82
Signed-off-by: Devam Shah <devamshah91@gmail.com>
49 lines
1.7 KiB
Python
49 lines
1.7 KiB
Python
from agentic_security.probe_actor.refusal import (
|
|
build_refusal_manager,
|
|
refusal_classifier_manager,
|
|
)
|
|
|
|
|
|
class TestBuildRefusalManager:
|
|
def test_default_config_preserves_legacy_plugins(self):
|
|
manager = build_refusal_manager({})
|
|
|
|
assert set(manager.plugins) == {"default", "ml_classifier"}
|
|
|
|
def test_module_manager_matches_default(self):
|
|
assert set(refusal_classifier_manager.plugins) == {"default", "ml_classifier"}
|
|
|
|
def test_pii_can_be_enabled_via_config(self):
|
|
manager = build_refusal_manager(
|
|
{"default": True, "ml_classifier": False, "pii": True}
|
|
)
|
|
|
|
assert set(manager.plugins) == {"default", "pii"}
|
|
assert manager.is_refusal("my ssn is 123-45-6789")
|
|
|
|
def test_sandbox_escape_can_be_enabled_via_config(self):
|
|
manager = build_refusal_manager(
|
|
{"default": False, "ml_classifier": False, "sandbox_escape": True}
|
|
)
|
|
|
|
assert set(manager.plugins) == {"sandbox_escape"}
|
|
assert manager.is_refusal("ls -la /var/run/docker.sock")
|
|
assert not manager.is_refusal("how do I bake bread?")
|
|
|
|
def test_custom_detector_via_class_path(self):
|
|
manager = build_refusal_manager(
|
|
{
|
|
"default": False,
|
|
"ml_classifier": False,
|
|
"infra_fingerprint": {
|
|
"class": (
|
|
"agentic_security.refusal_classifier."
|
|
"sandbox_escape_detector:SandboxEscapeDetector"
|
|
),
|
|
},
|
|
}
|
|
)
|
|
|
|
assert set(manager.plugins) == {"infra_fingerprint"}
|
|
assert manager.is_refusal("kubectl get pods")
|