feat(Add backend):

2026-06-24 06:09:55 +02:00 · 2025-01-03 00:07:10 +02:00
parent 9de34e2835
commit ffd7d710f1
7 changed files with 145 additions and 17 deletions
@@ -32,9 +32,14 @@ async def process_prompt(
 ):
    try:
        response = await request_factory.fn(prompt=prompt)
+        if response.status_code == 422:
+            logger.error(f"Invalid prompt: {prompt}, error=422")
+            errors.append((module_name, prompt, 422, "Invalid prompt"))
+            return tokens, True
+
        if response.status_code >= 400:
            raise httpx.HTTPStatusError(
-                f"HTTP {response.status_code}",
+                f"HTTP {response.status_code} {response.content=}",
                request=response.request,
                response=response,
            )
@@ -48,6 +48,9 @@ def check_refusal(response: str, refusal_phrases: list = REFUSAL_MARKS) -> bool:
    return any(phrase in response for phrase in refusal_phrases)


+# TODO: add plugin system for multiple classifiers of the output
+
+
 def refusal_heuristic(request_json):
    # TODO: improve this heuristic
    request = str(request_json)
@@ -1,6 +1,21 @@
 from .data import load_local_csv

 REGISTRY = [
+    {
+        "dataset_name": "AgenticBackend",
+        "num_prompts": 2000,
+        "tokens": 0,
+        "approx_cost": 0.0,
+        "source": "Fine-tuned cloud hosted model",
+        "selected": True,
+        "url": "Cloud",
+        "dynamic": False,
+        "opts": {
+            "port": 8718,
+            "modules": ["encoding"],
+        },
+        "modality": "text",
+    },
    {
        "dataset_name": "ShawnMenz/DAN_jailbreak",
        "num_prompts": 666,
@@ -73,7 +88,7 @@ REGISTRY = [
        "tokens": 1975800,
        "approx_cost": 0.0,
        "source": "Hugging Face Datasets",
-        "selected": True,
+        "selected": False,
        "dynamic": False,
        "url": "https://huggingface.co/JailbreakV-28K/JailBreakV-28k",
        "modality": "text",
@@ -111,17 +126,6 @@ REGISTRY = [
        "url": "",
        "modality": "text",
    },
-    {
-        "dataset_name": "Agentic Security",
-        "num_prompts": 0,
-        "tokens": 0,
-        "approx_cost": 0.0,
-        "source": "Local dataset",
-        "selected": False,
-        "dynamic": True,
-        "url": "",
-        "modality": "text",
-    },
    {
        "dataset_name": "jailbreak_llms/2023_05_07",
        "num_prompts": 0,
@@ -12,6 +12,7 @@ from agentic_security.probe_data import stenography_fn
 from agentic_security.probe_data.models import ProbeDataset
 from agentic_security.probe_data.modules import (
    adaptive_attacks,
+    fine_tuned,
    garak_tool,
    inspect_ai_tool,
 )
@@ -23,7 +24,7 @@ def count_words_in_list(str_list):
    :param str_list: List of strings
    :return: Total number of words across all strings in the list
    """
-    total_words = sum(len(s.split()) for s in str_list)
+    total_words = sum(len(str(s).split()) for s in str_list)
    return total_words


@@ -237,6 +238,11 @@ def prepare_prompts(dataset_names, budget, tools_inbox=None, options=[]):
                logger.error(f"Error loading {dataset_name}: {e}")

    dynamic_datasets = {
+        "AgenticBackend": lambda opts: dataset_from_iterator(
+            "AgenticBackend",
+            fine_tuned.Module(group, tools_inbox=tools_inbox, opts=opts).apply(),
+            lazy=True,
+        ),
        "Steganography": lambda opts: Stenography(group),
        "llm-adaptive-attacks": lambda opts: dataset_from_iterator(
            "llm-adaptive-attacks",
@@ -0,0 +1,84 @@
+import asyncio
+import httpx
+from typing import List
+import uuid as U
+from loguru import logger
+
+
+class Module:
+    def __init__(
+        self, prompt_groups: List[str], tools_inbox: asyncio.Queue, opts: dict = {}
+    ):
+        self.tools_inbox = tools_inbox
+        self.opts = opts
+        self.prompt_groups = prompt_groups
+        self.max_prompts = self.opts.get("max_prompts", 2000)  # Default max M prompts
+        self.run_id = U.uuid4().hex
+        self.batch_size = self.opts.get("batch_size", 500)
+
+    async def apply(self):
+
+        for _ in range(self.max_prompts // self.batch_size):
+            # Fetch prompts from the API
+            prompts = await self.fetch_prompts()
+
+            if not prompts:
+                logger.error("No prompts retrieved from the API.")
+                return
+
+            logger.info(f"Retrieved {len(prompts)} prompts.")
+
+            for i, prompt in enumerate(
+                prompts[: self.max_prompts]
+            ):  # Limit to max_prompts
+                logger.info(f"Processing prompt {i+1}/{len(prompts)}: {prompt}")
+                # response = await self.post_prompt(prompt)
+                # logger.info(f"Response: {response}")
+                yield prompt
+
+                while not self.tools_inbox.empty():
+                    ref = await self.tools_inbox.get()
+                    message, _, ready = ref["message"], ref["reply"], ref["ready"]
+                    yield message
+                    ready.set()
+
+    async def post_prompt(self, prompt: str):
+        port = self.opts.get("port", 8718)
+        uri = f"http://0.0.0.0:{port}/proxy/chat/completions"
+        headers = {"Content-Type": "application/json"}
+        data = {
+            "model": "gpt-4",
+            "messages": [{"role": "user", "content": prompt}],
+            "max_tokens": 1050,
+            "temperature": 0.7,
+        }
+
+        async with httpx.AsyncClient() as client:
+            try:
+                response = await client.post(uri, headers=headers, json=data)
+                response.raise_for_status()
+                return response.json()
+            except httpx.RequestError as e:
+                logger.error(f"Failed to post prompt: {e}")
+                return {}
+
+    async def fetch_prompts(self) -> List[str]:
+        api_url = "https://msoedov--agesec-backend-fastapi-app.modal.run/infer"
+        headers = {
+            "Authorization": "Bearer gh0-5f4a8ed2-37c6-4bd7-a0cf-7070eae8115b",
+            "Content-Type": "application/json",
+        }
+
+        async with httpx.AsyncClient() as client:
+            try:
+                response = await client.post(
+                    api_url,
+                    headers=headers,
+                    json={"batch_size": self.batch_size, "run_id": self.run_id},
+                )
+                response.raise_for_status()
+                data = response.json()
+                return data.get("prompts", [])
+            except httpx.RequestError as e:
+                logger.error(f"Failed to fetch prompts: {e}")
+                return []
@@ -78,10 +78,10 @@ var app = new Vue({
                enableChartDiagram: this.enableChartDiagram,
                enableMultiStepAttack: this.enableMultiStepAttack,
            };
-            localStorage.setItem('appState', JSON.stringify(state));
+            localStorage.setItem('appState:v1', JSON.stringify(state));
        },
        loadStateFromLocalStorage() {
-            const savedState = localStorage.getItem('appState');
+            const savedState = localStorage.getItem('appState:v1');
            console.log('Loading state from local storage:', savedState);
            if (savedState) {
                const state = JSON.parse(savedState);
@@ -94,7 +94,7 @@ var app = new Vue({
            }
        },
        resetState() {
-            localStorage.removeItem('appState');
+            localStorage.removeItem('appState:v1');
            this.modelSpec = LLM_SPECS[0];
            this.budget = 50;
            this.dataConfig.forEach(config => config.selected = false);
@@ -95,3 +95,29 @@ class TestAS:
        assert isinstance(result, dict)
        print(result)
        assert len(result) in [0, 1]
+
+    def test_backend(self, test_server):
+        llmSpec = test_spec_assets.SAMPLE_SPEC
+        maxBudget = 1000000
+        max_th = 0.3
+        datasets = [
+            {
+                "dataset_name": "AgenticBackend",
+                "num_prompts": 0,
+                "tokens": 0,
+                "approx_cost": 0.0,
+                "source": "Fine-tuned cloud hosted model",
+                "selected": True,
+                "url": "",
+                "dynamic": True,
+                "opts": {
+                    "port": 9094,
+                    "modules": ["encoding"],
+                },
+                "modality": "text",
+            },
+        ]
+        result = AgenticSecurity.scan(llmSpec, maxBudget, datasets, max_th)
+        assert isinstance(result, dict)
+        print(result)
+        assert len(result) in [0, 1]