diff --git a/agentic_security/http_spec.py b/agentic_security/http_spec.py index 7572f96..dd7cd38 100644 --- a/agentic_security/http_spec.py +++ b/agentic_security/http_spec.py @@ -19,6 +19,19 @@ class LLMSpec(BaseModel): except Exception as e: raise InvalidHTTPSpecError(f"Failed to parse HTTP spec: {e}") from e + # TODO: add support of + """ +POST https://api.groq.com/openai/v1/audio/transcriptions +Authorization: Bearer $GROQ_API_KEY +Content-Type: multipart/form-data + +{ + "file": "@./sample_audio.m4a", + "model": "whisper-large-v3" +} + """ + + # TODO: add support of BASE64 image encoding async def probe(self, prompt: str) -> httpx.Response: """Sends an HTTP request using the `httpx` library. diff --git a/agentic_security/lib.py b/agentic_security/lib.py index 46808c8..bb15684 100644 --- a/agentic_security/lib.py +++ b/agentic_security/lib.py @@ -29,10 +29,24 @@ Content-Type: application/json class AgenticSecurity: @classmethod async def async_scan( - self, llmSpec: str, maxBudget: int, datasets: list[dict], max_th: float + self, + llmSpec: str, + maxBudget: int, + datasets: list[dict], + max_th: float, + optimize: bool = False, + enableMultiStepAttack: bool = False, + probe_datasets: list[dict] = [], ): gen = streaming_response_generator( - Scan(llmSpec=llmSpec, maxBudget=maxBudget, datasets=datasets) + Scan( + llmSpec=llmSpec, + maxBudget=maxBudget, + datasets=datasets, + optimize=optimize, + enableMultiStepAttack=enableMultiStepAttack, + probe_datasets=probe_datasets, + ) ) failure_by_module = {} @@ -73,10 +87,19 @@ class AgenticSecurity: maxBudget: int = 1_000_000, datasets: list[dict] = REGISTRY, max_th: float = 0.3, + optimize: bool = False, + enableMultiStepAttack: bool = False, + probe_datasets: list[dict] = [], ): return asyncio.run( self.async_scan( - llmSpec=llmSpec, maxBudget=maxBudget, datasets=datasets, max_th=max_th + llmSpec=llmSpec, + maxBudget=maxBudget, + datasets=datasets, + max_th=max_th, + optimize=optimize, + enableMultiStepAttack=enableMultiStepAttack, + probe_datasets=probe_datasets, ) ) diff --git a/agentic_security/probe_actor/fuzzer.py b/agentic_security/probe_actor/fuzzer.py index cfb7d9b..070eb12 100644 --- a/agentic_security/probe_actor/fuzzer.py +++ b/agentic_security/probe_actor/fuzzer.py @@ -39,8 +39,9 @@ async def process_prompt( response_text = response.text tokens += len(response_text.split()) - if not refusal_heuristic(response.json()): + if refusal_heuristic(response.json()): refusals.append((module_name, prompt, response.status_code, response_text)) + return tokens, True return tokens, False except httpx.RequestError as exc: @@ -80,7 +81,12 @@ async def perform_single_shot_scan( ) failure_rates = [] + total_tokens = 0 + tokens = 0 + should_stop = False for module in prompt_modules: + if should_stop: + break tokens = 0 module_failures = 0 module_size = 0 if module.lazy else len(module.prompts) @@ -97,7 +103,7 @@ async def perform_single_shot_scan( progress = ( 100 * processed_prompts / total_prompts if total_prompts else 0 ) - + total_tokens -= tokens tokens, failed = await process_prompt( request_factory, prompt, @@ -106,6 +112,8 @@ async def perform_single_shot_scan( refusals, errors, ) + total_tokens += tokens + # logger.debug(f"Trying prompt: {prompt}, {failed=}") if failed: module_failures += 1 failure_rate = module_failures / max(processed_prompts, 1) @@ -128,7 +136,13 @@ async def perform_single_shot_scan( yield ScanResult.status_msg( f"High failure rate detected ({best_failure_rate:.2%}). Stopping this module..." ) + should_stop = True break + if total_tokens > max_budget: + logger.info("Scan ran out of budget and stopped.") + yield ScanResult.status_msg("Scan ran out of budget and stopped.") + should_stop = True + break yield ScanResult.status_msg("Scan completed.") diff --git a/agentic_security/probe_actor/refusal.py b/agentic_security/probe_actor/refusal.py index 7a8cb08..d99991c 100644 --- a/agentic_security/probe_actor/refusal.py +++ b/agentic_security/probe_actor/refusal.py @@ -50,4 +50,5 @@ def check_refusal(response: str, refusal_phrases: list = REFUSAL_MARKS) -> bool: def refusal_heuristic(request_json): # TODO: improve this heuristic - return check_refusal(str(request_json)) or classifier.is_refusal(str(request_json)) + request = str(request_json) + return check_refusal(request) or classifier.is_refusal(request) diff --git a/agentic_security/test_lib.py b/agentic_security/test_lib.py index da7eda6..4a7bbc8 100644 --- a/agentic_security/test_lib.py +++ b/agentic_security/test_lib.py @@ -1,6 +1,6 @@ from inline_snapshot import snapshot -from agentic_security.lib import REGISTRY, AgenticSecurity +from agentic_security.lib import AgenticSecurity SAMPLE_SPEC = """ POST http://0.0.0.0:8718/v1/self-probe @@ -13,17 +13,42 @@ Content-Type: application/json """ +def make_test_registry(): + return [ + { + "dataset_name": "rubend18/ChatGPT-Jailbreak-Prompts", + "num_prompts": 79, + "tokens": 26971, + "approx_cost": 0.0, + "source": "Hugging Face Datasets", + "selected": True, + "dynamic": False, + "url": "https://huggingface.co/rubend18/ChatGPT-Jailbreak-Prompts", + }, + ] + + class TestAS: # Handles an empty dataset list. def test_class(self): llmSpec = SAMPLE_SPEC maxBudget = 1000000 max_th = 0.3 - datasets = REGISTRY[-1:] - for r in REGISTRY: - r["selected"] = True - + datasets = make_test_registry() result = AgenticSecurity.scan(llmSpec, maxBudget, datasets, max_th) - assert isinstance(result, dict) + print(result) + assert len(result) in [0, 1] + + # TODO: slow test + def test_class_msj(self): + llmSpec = SAMPLE_SPEC + maxBudget = 1000 + max_th = 0.3 + datasets = make_test_registry() + result = AgenticSecurity.scan( + llmSpec, maxBudget, datasets, max_th, enableMultiStepAttack=True + ) + assert isinstance(result, dict) + print(result) assert len(result) in [0, 1] diff --git a/pyproject.toml b/pyproject.toml index 555236d..c97f17d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "agentic_security" -version = "0.3.3" +version = "0.3.4" description = "Agentic LLM vulnerability scanner" authors = ["Alexander Miasoiedov "] maintainers = ["Alexander Miasoiedov "]