From 1335be9b0b55c346d920f8cd823e48ef0bafa1c9 Mon Sep 17 00:00:00 2001 From: Alexander Myasoedov Date: Tue, 24 Dec 2024 23:29:20 +0200 Subject: [PATCH] feat(Update modules interface): --- agentic_security/core/app.py | 13 ++++++ agentic_security/probe_actor/fuzzer.py | 4 +- agentic_security/probe_data/__init__.py | 4 ++ agentic_security/probe_data/data.py | 23 ++++++----- .../probe_data/modules/adaptive_attacks.py | 3 +- .../probe_data/modules/garak_tool.py | 14 +++---- .../probe_data/modules/inspect_ai_task.py | 13 ------ .../probe_data/modules/inspect_ai_tool.py | 2 +- .../modules/test_adaptive_attacks.py | 2 +- agentic_security/routes/proxy.py | 40 ++++++++++--------- agentic_security/routes/scan.py | 3 +- agentic_security/test_lib.py | 1 + 12 files changed, 69 insertions(+), 53 deletions(-) delete mode 100644 agentic_security/probe_data/modules/inspect_ai_task.py diff --git a/agentic_security/core/app.py b/agentic_security/core/app.py index 57893ca..dbfcf73 100644 --- a/agentic_security/core/app.py +++ b/agentic_security/core/app.py @@ -4,6 +4,7 @@ from fastapi import FastAPI tools_inbox: Queue = Queue() stop_event: Event = Event() +current_run: str = {"spec": "", "id": ""} def create_app() -> FastAPI: @@ -20,3 +21,15 @@ def get_tools_inbox() -> Queue: def get_stop_event() -> Event: """Get the global stop event.""" return stop_event + + +def get_current_run() -> str: + """Get the current run id.""" + return current_run + + +def set_current_run(spec): + """Set the current run id.""" + current_run["id"] = hash(id(spec)) + current_run["spec"] = spec + return current_run diff --git a/agentic_security/probe_actor/fuzzer.py b/agentic_security/probe_actor/fuzzer.py index 802eab4..9df8c7e 100644 --- a/agentic_security/probe_actor/fuzzer.py +++ b/agentic_security/probe_actor/fuzzer.py @@ -62,12 +62,14 @@ async def perform_single_shot_scan( ) -> AsyncGenerator[str, None]: """Perform a standard security scan.""" + selected_datasets = [m for m in datasets if m["selected"]] try: yield ScanResult.status_msg("Loading datasets...") prompt_modules = prepare_prompts( - dataset_names=[m["dataset_name"] for m in datasets if m["selected"]], + dataset_names=[m["dataset_name"] for m in selected_datasets], budget=max_budget, tools_inbox=tools_inbox, + options=[m.get("opts", {}) for m in selected_datasets], ) yield ScanResult.status_msg("Datasets loaded. Starting scan...") diff --git a/agentic_security/probe_data/__init__.py b/agentic_security/probe_data/__init__.py index d98cbaf..8e40480 100644 --- a/agentic_security/probe_data/__init__.py +++ b/agentic_security/probe_data/__init__.py @@ -180,6 +180,10 @@ REGISTRY = [ "selected": False, "url": "https://github.com/leondz/garak2", "dynamic": True, + "opts": { + "port": 8718, + "modules": ["encoding"], + }, }, { "dataset_name": "InspectAI", diff --git a/agentic_security/probe_data/data.py b/agentic_security/probe_data/data.py index 15cdd28..c1459bd 100644 --- a/agentic_security/probe_data/data.py +++ b/agentic_security/probe_data/data.py @@ -213,7 +213,7 @@ def load_generic_csv(url, name, column="prompt", predicator=None): ) -def prepare_prompts(dataset_names, budget, tools_inbox=None): +def prepare_prompts(dataset_names, budget, tools_inbox=None, options=[]): # ## Datasets used and cleaned: # markush1/LLM-Jailbreak-Classifier # 1. Open-Orca/OpenOrca @@ -255,28 +255,31 @@ def prepare_prompts(dataset_names, budget, tools_inbox=None): logger.error(f"Error loading {dataset_name}: {e}") dynamic_datasets = { - "Steganography": lambda: Stenography(group), - "llm-adaptive-attacks": lambda: dataset_from_iterator( - "llm-adaptive-attacks", adaptive_attacks.Module(group).apply() + "Steganography": lambda opts: Stenography(group), + "llm-adaptive-attacks": lambda opts: dataset_from_iterator( + "llm-adaptive-attacks", + adaptive_attacks.Module(group, tools_inbox=tools_inbox, opts=opts).apply(), ), - "Garak": lambda: dataset_from_iterator( + "Garak": lambda opts: dataset_from_iterator( "Garak", - garak_tool.Module(group, tools_inbox=tools_inbox).apply(), + garak_tool.Module(group, tools_inbox=tools_inbox, opts=opts).apply(), lazy=True, ), - "InspectAI": lambda: dataset_from_iterator( + "InspectAI": lambda opts: dataset_from_iterator( "InspectAI", inspect_ai_tool.Module(group, tools_inbox=tools_inbox).apply(), lazy=True, ), - "GPT fuzzer": lambda: [], + "GPT fuzzer": lambda opts: [], } dynamic_groups = [] - for dataset_name in dataset_names: + options = options or [{} for _ in dataset_names] + for dataset_name, opts in zip(dataset_names, options): if dataset_name in dynamic_datasets: logger.info(f"Loading {dataset_name}") - ds = dynamic_datasets[dataset_name]() + + ds = dynamic_datasets[dataset_name](opts) for g in ds: dynamic_groups.append(g) diff --git a/agentic_security/probe_data/modules/adaptive_attacks.py b/agentic_security/probe_data/modules/adaptive_attacks.py index 7ac0d00..801d0c2 100644 --- a/agentic_security/probe_data/modules/adaptive_attacks.py +++ b/agentic_security/probe_data/modules/adaptive_attacks.py @@ -3,12 +3,13 @@ import io import httpx import pandas as pd from loguru import logger +import asyncio url = "https://raw.githubusercontent.com/tml-epfl/llm-adaptive-attacks/main/harmful_behaviors/harmful_behaviors_pair.csv" class Module: - def __init__(self, prompt_groups: []): + def __init__(self, prompt_groups: [], tools_inbox: asyncio.Queue, opts: dict = {}): r = httpx.get(url) content = r.content diff --git a/agentic_security/probe_data/modules/garak_tool.py b/agentic_security/probe_data/modules/garak_tool.py index f1d2967..759c279 100644 --- a/agentic_security/probe_data/modules/garak_tool.py +++ b/agentic_security/probe_data/modules/garak_tool.py @@ -11,21 +11,22 @@ from loguru import logger GARAK_CONFIG = "garak_rest.json" -def write_garak_config_json(): +def write_garak_config_json(port): with open(GARAK_CONFIG, "w") as f: - f.write(json.dumps(SPEC)) + f.write(json.dumps(SPEC, indent=4).replace("$PORT", str(port))) # TODO: add config params to data registry class Module: - def __init__(self, prompt_groups: [], tools_inbox: asyncio.Queue): + def __init__(self, prompt_groups: [], tools_inbox: asyncio.Queue, opts: dict = {}): self.tools_inbox = tools_inbox if not self.is_garak_installed(): logger.error( "Garak module is not installed. Please install it using 'pip install garak'" ) + self.opts = opts def is_garak_installed(self) -> bool: garak_spec = importlib.util.find_spec("garak") @@ -46,7 +47,7 @@ class Module: "encoding", ] logger.info("Starting Garak tool. Writing config file.") - write_garak_config_json() + write_garak_config_json(port=self.opts.get("port", 8718)) logger.info(f"Executing command: {command}") # Execute the command with the specific environment process = subprocess.Popen( @@ -63,7 +64,6 @@ class Module: ready.set() logger.info("Garak tool finished.") logger.info(f"stdout: {out}") - logger.error(f"exit code: {process.returncode}") if process.returncode != 0: logger.error(f"Error executing command: {command}") logger.error(f"err: {err}") @@ -73,8 +73,8 @@ class Module: SPEC = { "rest": { "RestGenerator": { - "name": "OpenAI GPT-4 Service", - "uri": "http://0.0.0.0:8718/proxy/chat/completions", + "name": "Agentic Security Proxy Service", + "uri": "http://0.0.0.0:$PORT/proxy/chat/completions", "method": "POST", "headers": { "Authorization": "Bearer $OPENAI_API_KEY", diff --git a/agentic_security/probe_data/modules/inspect_ai_task.py b/agentic_security/probe_data/modules/inspect_ai_task.py deleted file mode 100644 index 54c717a..0000000 --- a/agentic_security/probe_data/modules/inspect_ai_task.py +++ /dev/null @@ -1,13 +0,0 @@ -from inspect_ai import Task, eval, task -from inspect_ai.dataset import example_dataset -from inspect_ai.scorer import model_graded_fact -from inspect_ai.solver import chain_of_thought, generate, self_critique - - -@task -def theory_of_mind(): - return Task( - dataset=example_dataset("theory_of_mind"), - plan=[chain_of_thought(), generate(), self_critique()], - scorer=model_graded_fact(), - ) diff --git a/agentic_security/probe_data/modules/inspect_ai_tool.py b/agentic_security/probe_data/modules/inspect_ai_tool.py index 6c218d2..0e9f3b8 100644 --- a/agentic_security/probe_data/modules/inspect_ai_tool.py +++ b/agentic_security/probe_data/modules/inspect_ai_tool.py @@ -14,7 +14,7 @@ inspect_ai_task = ( class Module: name = "Inspect AI" - def __init__(self, prompt_groups: [], tools_inbox: asyncio.Queue): + def __init__(self, prompt_groups: [], tools_inbox: asyncio.Queue, opts: dict = {}): self.tools_inbox = tools_inbox if not self.is_tool_installed(): logger.error( diff --git a/agentic_security/probe_data/modules/test_adaptive_attacks.py b/agentic_security/probe_data/modules/test_adaptive_attacks.py index 10d0630..63a93ba 100644 --- a/agentic_security/probe_data/modules/test_adaptive_attacks.py +++ b/agentic_security/probe_data/modules/test_adaptive_attacks.py @@ -7,7 +7,7 @@ class TestModule: # Module can be initialized with a list of prompt groups. def test_initialize_with_prompt_groups(self): prompt_groups = [] - module = Module(prompt_groups) + module = Module(prompt_groups, None, {}) assert module is not None assert isinstance(module, Module) assert len(module.goals) == snapshot(50) diff --git a/agentic_security/routes/proxy.py b/agentic_security/routes/proxy.py index 45b15fa..d67bebf 100644 --- a/agentic_security/routes/proxy.py +++ b/agentic_security/routes/proxy.py @@ -2,8 +2,9 @@ import random from asyncio import Event from fastapi import APIRouter +from loguru import logger -from ..core.app import get_tools_inbox +from ..core.app import get_current_run, get_tools_inbox from ..models.schemas import CompletionRequest, Settings from ..probe_actor.refusal import REFUSAL_MARKS @@ -18,6 +19,7 @@ async def proxy_completions(request: CompletionRequest): [msg.content for msg in request.messages if msg.role == "user"] ) # Todo: get current llm spec for proper proxing + request_factory = get_current_run()["spec"] message = prompt_content + " " + message ready = Event() ref = dict(message=message, reply="", ready=ready) @@ -29,20 +31,22 @@ async def proxy_completions(request: CompletionRequest): await ready.wait() reply = ref["reply"] return reply - - # Simulate a completion response - return { - "id": "chatcmpl-abc123", - "object": "chat.completion", - "created": 1677858242, - "model": "gpt-3.5-turbo-0613", - "usage": {"prompt_tokens": 13, "completion_tokens": 7, "total_tokens": 20}, - "choices": [ - { - "message": {"role": "assistant", "content": message}, - "logprobs": None, - "finish_reason": "stop", - "index": 0, - } - ], - } + elif not request_factory: + logger.debug("No request factory found. Using mock response.") + return { + "id": "chatcmpl-abc123", + "object": "chat.completion", + "created": 1677858242, + "model": "gpt-3.5-turbo-0613", + "usage": {"prompt_tokens": 13, "completion_tokens": 7, "total_tokens": 20}, + "choices": [ + { + "message": {"role": "assistant", "content": message}, + "logprobs": None, + "finish_reason": "stop", + "index": 0, + } + ], + } + else: + return await request_factory.fn(prompt_content) diff --git a/agentic_security/routes/scan.py b/agentic_security/routes/scan.py index a7fbb57..088e7fb 100644 --- a/agentic_security/routes/scan.py +++ b/agentic_security/routes/scan.py @@ -3,7 +3,7 @@ from datetime import datetime from fastapi import APIRouter, BackgroundTasks, HTTPException from fastapi.responses import StreamingResponse -from ..core.app import get_stop_event, get_tools_inbox +from ..core.app import get_stop_event, get_tools_inbox, set_current_run from ..http_spec import LLMSpec from ..models.schemas import LLMInfo, Scan from ..probe_actor import fuzzer @@ -27,6 +27,7 @@ async def verify(info: LLMInfo): def streaming_response_generator(scan_parameters: Scan): request_factory = LLMSpec.from_string(scan_parameters.llmSpec) + set_current_run(request_factory) async def _gen(): async for scan_result in fuzzer.scan_router( diff --git a/agentic_security/test_lib.py b/agentic_security/test_lib.py index 881d9ba..ae405ea 100644 --- a/agentic_security/test_lib.py +++ b/agentic_security/test_lib.py @@ -88,6 +88,7 @@ class TestAS: "selected": True, "url": "https://github.com/leondz/garak2", "dynamic": True, + "opts": {"port": 9094}, }, ] result = AgenticSecurity.scan(llmSpec, maxBudget, datasets, max_th)