From ffd7d710f10ea888e5fbfce080adc81b3888d89f Mon Sep 17 00:00:00 2001
From: Alexander Myasoedov <msoedov@gmail.com>
Date: Fri, 3 Jan 2025 00:07:10 +0200
Subject: [PATCH] feat(Add backend):

---
 agentic_security/probe_actor/fuzzer.py        |  7 +-
 agentic_security/probe_actor/refusal.py       |  3 +
 agentic_security/probe_data/__init__.py       | 28 ++++---
 agentic_security/probe_data/data.py           |  8 +-
 .../probe_data/modules/fine_tuned.py          | 84 +++++++++++++++++++
 agentic_security/static/main.js               |  6 +-
 agentic_security/test_lib.py                  | 26 ++++++
 7 files changed, 145 insertions(+), 17 deletions(-)
 create mode 100644 agentic_security/probe_data/modules/fine_tuned.py

diff --git a/agentic_security/probe_actor/fuzzer.py b/agentic_security/probe_actor/fuzzer.py
index 237a37c..b5e1a20 100644
--- a/agentic_security/probe_actor/fuzzer.py
+++ b/agentic_security/probe_actor/fuzzer.py
@@ -32,9 +32,14 @@ async def process_prompt(
 ):
     try:
         response = await request_factory.fn(prompt=prompt)
+        if response.status_code == 422:
+            logger.error(f"Invalid prompt: {prompt}, error=422")
+            errors.append((module_name, prompt, 422, "Invalid prompt"))
+            return tokens, True
+
         if response.status_code >= 400:
             raise httpx.HTTPStatusError(
-                f"HTTP {response.status_code}",
+                f"HTTP {response.status_code} {response.content=}",
                 request=response.request,
                 response=response,
             )
diff --git a/agentic_security/probe_actor/refusal.py b/agentic_security/probe_actor/refusal.py
index d99991c..ec3020f 100644
--- a/agentic_security/probe_actor/refusal.py
+++ b/agentic_security/probe_actor/refusal.py
@@ -48,6 +48,9 @@ def check_refusal(response: str, refusal_phrases: list = REFUSAL_MARKS) -> bool:
     return any(phrase in response for phrase in refusal_phrases)
 
 
+# TODO: add plugin system for multiple classifiers of the output
+
+
 def refusal_heuristic(request_json):
     # TODO: improve this heuristic
     request = str(request_json)
diff --git a/agentic_security/probe_data/__init__.py b/agentic_security/probe_data/__init__.py
index 8d81f64..abde7de 100644
--- a/agentic_security/probe_data/__init__.py
+++ b/agentic_security/probe_data/__init__.py
@@ -1,6 +1,21 @@
 from .data import load_local_csv
 
 REGISTRY = [
+    {
+        "dataset_name": "AgenticBackend",
+        "num_prompts": 2000,
+        "tokens": 0,
+        "approx_cost": 0.0,
+        "source": "Fine-tuned cloud hosted model",
+        "selected": True,
+        "url": "Cloud",
+        "dynamic": False,
+        "opts": {
+            "port": 8718,
+            "modules": ["encoding"],
+        },
+        "modality": "text",
+    },
     {
         "dataset_name": "ShawnMenz/DAN_jailbreak",
         "num_prompts": 666,
@@ -73,7 +88,7 @@ REGISTRY = [
         "tokens": 1975800,
         "approx_cost": 0.0,
         "source": "Hugging Face Datasets",
-        "selected": True,
+        "selected": False,
         "dynamic": False,
         "url": "https://huggingface.co/JailbreakV-28K/JailBreakV-28k",
         "modality": "text",
@@ -111,17 +126,6 @@ REGISTRY = [
         "url": "",
         "modality": "text",
     },
-    {
-        "dataset_name": "Agentic Security",
-        "num_prompts": 0,
-        "tokens": 0,
-        "approx_cost": 0.0,
-        "source": "Local dataset",
-        "selected": False,
-        "dynamic": True,
-        "url": "",
-        "modality": "text",
-    },
     {
         "dataset_name": "jailbreak_llms/2023_05_07",
         "num_prompts": 0,
diff --git a/agentic_security/probe_data/data.py b/agentic_security/probe_data/data.py
index 60b5039..946f691 100644
--- a/agentic_security/probe_data/data.py
+++ b/agentic_security/probe_data/data.py
@@ -12,6 +12,7 @@ from agentic_security.probe_data import stenography_fn
 from agentic_security.probe_data.models import ProbeDataset
 from agentic_security.probe_data.modules import (
     adaptive_attacks,
+    fine_tuned,
     garak_tool,
     inspect_ai_tool,
 )
@@ -23,7 +24,7 @@ def count_words_in_list(str_list):
     :param str_list: List of strings
     :return: Total number of words across all strings in the list
     """
-    total_words = sum(len(s.split()) for s in str_list)
+    total_words = sum(len(str(s).split()) for s in str_list)
     return total_words
 
 
@@ -237,6 +238,11 @@ def prepare_prompts(dataset_names, budget, tools_inbox=None, options=[]):
                 logger.error(f"Error loading {dataset_name}: {e}")
 
     dynamic_datasets = {
+        "AgenticBackend": lambda opts: dataset_from_iterator(
+            "AgenticBackend",
+            fine_tuned.Module(group, tools_inbox=tools_inbox, opts=opts).apply(),
+            lazy=True,
+        ),
         "Steganography": lambda opts: Stenography(group),
         "llm-adaptive-attacks": lambda opts: dataset_from_iterator(
             "llm-adaptive-attacks",
diff --git a/agentic_security/probe_data/modules/fine_tuned.py b/agentic_security/probe_data/modules/fine_tuned.py
new file mode 100644
index 0000000..a12ce8f
--- /dev/null
+++ b/agentic_security/probe_data/modules/fine_tuned.py
@@ -0,0 +1,84 @@
+import asyncio
+import httpx
+from typing import List
+import uuid as U
+from loguru import logger
+
+
+class Module:
+    def __init__(
+        self, prompt_groups: List[str], tools_inbox: asyncio.Queue, opts: dict = {}
+    ):
+        self.tools_inbox = tools_inbox
+        self.opts = opts
+        self.prompt_groups = prompt_groups
+        self.max_prompts = self.opts.get("max_prompts", 2000)  # Default max M prompts
+        self.run_id = U.uuid4().hex
+        self.batch_size = self.opts.get("batch_size", 500)
+
+    async def apply(self):
+
+        for _ in range(self.max_prompts // self.batch_size):
+            # Fetch prompts from the API
+            prompts = await self.fetch_prompts()
+
+            if not prompts:
+                logger.error("No prompts retrieved from the API.")
+                return
+
+            logger.info(f"Retrieved {len(prompts)} prompts.")
+
+            for i, prompt in enumerate(
+                prompts[: self.max_prompts]
+            ):  # Limit to max_prompts
+                logger.info(f"Processing prompt {i+1}/{len(prompts)}: {prompt}")
+                # response = await self.post_prompt(prompt)
+                # logger.info(f"Response: {response}")
+                yield prompt
+
+                while not self.tools_inbox.empty():
+                    ref = await self.tools_inbox.get()
+                    message, _, ready = ref["message"], ref["reply"], ref["ready"]
+                    yield message
+                    ready.set()
+
+    async def post_prompt(self, prompt: str):
+        port = self.opts.get("port", 8718)
+        uri = f"http://0.0.0.0:{port}/proxy/chat/completions"
+        headers = {"Content-Type": "application/json"}
+        data = {
+            "model": "gpt-4",
+            "messages": [{"role": "user", "content": prompt}],
+            "max_tokens": 1050,
+            "temperature": 0.7,
+        }
+
+        async with httpx.AsyncClient() as client:
+            try:
+                response = await client.post(uri, headers=headers, json=data)
+                response.raise_for_status()
+                return response.json()
+            except httpx.RequestError as e:
+                logger.error(f"Failed to post prompt: {e}")
+                return {}
+
+    async def fetch_prompts(self) -> List[str]:
+        api_url = "https://msoedov--agesec-backend-fastapi-app.modal.run/infer"
+        headers = {
+            "Authorization": "Bearer gh0-5f4a8ed2-37c6-4bd7-a0cf-7070eae8115b",
+            "Content-Type": "application/json",
+        }
+
+        async with httpx.AsyncClient() as client:
+            try:
+                response = await client.post(
+                    api_url,
+                    headers=headers,
+                    json={"batch_size": self.batch_size, "run_id": self.run_id},
+                )
+                response.raise_for_status()
+                data = response.json()
+                return data.get("prompts", [])
+            except httpx.RequestError as e:
+                logger.error(f"Failed to fetch prompts: {e}")
+                return []
diff --git a/agentic_security/static/main.js b/agentic_security/static/main.js
index 1f760c5..30e8719 100644
--- a/agentic_security/static/main.js
+++ b/agentic_security/static/main.js
@@ -78,10 +78,10 @@ var app = new Vue({
                 enableChartDiagram: this.enableChartDiagram,
                 enableMultiStepAttack: this.enableMultiStepAttack,
             };
-            localStorage.setItem('appState', JSON.stringify(state));
+            localStorage.setItem('appState:v1', JSON.stringify(state));
         },
         loadStateFromLocalStorage() {
-            const savedState = localStorage.getItem('appState');
+            const savedState = localStorage.getItem('appState:v1');
             console.log('Loading state from local storage:', savedState);
             if (savedState) {
                 const state = JSON.parse(savedState);
@@ -94,7 +94,7 @@ var app = new Vue({
             }
         },
         resetState() {
-            localStorage.removeItem('appState');
+            localStorage.removeItem('appState:v1');
             this.modelSpec = LLM_SPECS[0];
             this.budget = 50;
             this.dataConfig.forEach(config => config.selected = false);
diff --git a/agentic_security/test_lib.py b/agentic_security/test_lib.py
index 0dde2e3..504893b 100644
--- a/agentic_security/test_lib.py
+++ b/agentic_security/test_lib.py
@@ -95,3 +95,29 @@ class TestAS:
         assert isinstance(result, dict)
         print(result)
         assert len(result) in [0, 1]
+
+    def test_backend(self, test_server):
+        llmSpec = test_spec_assets.SAMPLE_SPEC
+        maxBudget = 1000000
+        max_th = 0.3
+        datasets = [
+            {
+                "dataset_name": "AgenticBackend",
+                "num_prompts": 0,
+                "tokens": 0,
+                "approx_cost": 0.0,
+                "source": "Fine-tuned cloud hosted model",
+                "selected": True,
+                "url": "",
+                "dynamic": True,
+                "opts": {
+                    "port": 9094,
+                    "modules": ["encoding"],
+                },
+                "modality": "text",
+            },
+        ]
+        result = AgenticSecurity.scan(llmSpec, maxBudget, datasets, max_th)
+        assert isinstance(result, dict)
+        print(result)
+        assert len(result) in [0, 1]