From 5fa33f094c7fd36c7f97c0b68e724d016e11f49e Mon Sep 17 00:00:00 2001
From: Alexander Myasoedov <msoedov@gmail.com>
Date: Sun, 9 Feb 2025 22:01:57 +0200
Subject: [PATCH] feat(add cost module):

---
 .dockerignore                               | 45 +++++++++++++++-
 Dockerfile                                  |  4 ++
 agentic_security/probe_actor/cost_module.py | 58 +++++++++++++++++++++
 agentic_security/probe_actor/fuzzer.py      |  7 ++-
 4 files changed, 109 insertions(+), 5 deletions(-)
 create mode 100644 agentic_security/probe_actor/cost_module.py

diff --git a/.dockerignore b/.dockerignore
index ac442ff..fcd80ee 100644
--- a/.dockerignore
+++ b/.dockerignore
@@ -1,2 +1,45 @@
-.git/
+# Byte-compiled / optimized / DLL files
 __pycache__/
+*.py[cod]
+
+# Distribution / packaging
+build/
+dist/
+*.egg-info/
+
+# Virtual environments
+
+.venv/
+env/
+ENV/
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.coverage
+.cache
+nosetests.xml
+coverage.xml
+
+# PyInstaller
+*.spec
+
+# macOS specific files
+.DS_Store
+
+# Windows specific files
+Thumbs.db
+desktop.ini
+
+# Tools and editors
+.idea/
+.vscode/
+cmder/
+
+# Output directories
+Output/
+te/
diff --git a/Dockerfile b/Dockerfile
index 73de7f5..cdc3bfd 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -21,6 +21,10 @@ RUN pip install --no-cache-dir -r requirements.txt
 # Runtime stage
 FROM python:3.11-slim
 
+# Set environment variables
+ENV PYTHONDONTWRITEBYTECODE=1
+ENV PYTHONUNBUFFERED=1
+
 WORKDIR /app
 
 # Copy only the necessary files from the builder stage
diff --git a/agentic_security/probe_actor/cost_module.py b/agentic_security/probe_actor/cost_module.py
new file mode 100644
index 0000000..74db2dd
--- /dev/null
+++ b/agentic_security/probe_actor/cost_module.py
@@ -0,0 +1,58 @@
+def calculate_cost(tokens: int, model: str = "deepseek-chat") -> float:
+    """Calculate API cost based on token count and model.
+
+    Args:
+        tokens (int): Number of tokens used
+        model (str): Model name to calculate cost for
+
+    Returns:
+        float: Cost in USD
+    """
+    # API pricing as of 2024-03-01
+    pricing = {
+        "deepseek-chat": {
+            "input": 0.0007 / 1000,  # $0.70 per million input tokens
+            "output": 0.0028 / 1000,  # $2.80 per million output tokens
+        },
+        "gpt-4-turbo": {
+            "input": 0.01 / 1000,  # $10 per million input tokens
+            "output": 0.03 / 1000,  # $30 per million output tokens
+        },
+        "gpt-4": {
+            "input": 0.03 / 1000,  # $30 per million input tokens
+            "output": 0.06 / 1000,  # $60 per million output tokens
+        },
+        "gpt-3.5-turbo": {
+            "input": 0.0015 / 1000,  # $1.50 per million input tokens
+            "output": 0.002 / 1000,  # $2.00 per million output tokens
+        },
+        "claude-3-opus": {
+            "input": 0.015 / 1000,  # $15 per million input tokens
+            "output": 0.075 / 1000,  # $75 per million output tokens
+        },
+        "claude-3-sonnet": {
+            "input": 0.003 / 1000,  # $3 per million input tokens
+            "output": 0.015 / 1000,  # $15 per million output tokens
+        },
+        "claude-3-haiku": {
+            "input": 0.00025 / 1000,  # $0.25 per million input tokens
+            "output": 0.00125 / 1000,  # $1.25 per million output tokens
+        },
+        "mistral-large": {
+            "input": 0.008 / 1000,  # $8 per million input tokens
+            "output": 0.024 / 1000,  # $24 per million output tokens
+        },
+        "mixtral-8x7b": {
+            "input": 0.002 / 1000,  # $2 per million input tokens
+            "output": 0.006 / 1000,  # $6 per million output tokens
+        },
+    }
+
+    if model not in pricing:
+        raise ValueError(f"Unknown model: {model}")
+
+    # For now, assume 1:1 input/output ratio
+    input_cost = tokens * pricing[model]["input"]
+    output_cost = tokens * pricing[model]["output"]
+
+    return round(input_cost + output_cost, 4)
diff --git a/agentic_security/probe_actor/fuzzer.py b/agentic_security/probe_actor/fuzzer.py
index cbbc697..7d18749 100644
--- a/agentic_security/probe_actor/fuzzer.py
+++ b/agentic_security/probe_actor/fuzzer.py
@@ -10,6 +10,7 @@ from skopt.space import Real
 
 from agentic_security.http_spec import Modality
 from agentic_security.models.schemas import Scan, ScanResult
+from agentic_security.probe_actor.cost_module import calculate_cost
 from agentic_security.probe_actor.refusal import refusal_heuristic
 from agentic_security.probe_data import audio_generator, image_generator, msj_data
 from agentic_security.probe_data.data import prepare_prompts
@@ -38,8 +39,6 @@ def multi_modality_spec(llm_spec):
             return llm_spec
         case _:
             return llm_spec
-        # case _:
-        #     raise NotImplementedError(f"Modality {llm_spec.modality} not supported yet")
 
 
 async def process_prompt(
@@ -143,7 +142,7 @@ async def perform_single_shot_scan(
                     module_failures += 1
                 failure_rate = module_failures / max(processed_prompts, 1)
                 failure_rates.append(failure_rate)
-                cost = round(tokens * 1.5 / 1000_000, 2)
+                cost = calculate_cost(tokens)
 
                 yield ScanResult(
                     module=module.dataset_name,
@@ -274,7 +273,7 @@ async def perform_many_shot_scan(
 
                 failure_rate = module_failures / max(processed_prompts, 1)
                 failure_rates.append(failure_rate)
-                cost = round(tokens * 1.5 / 1000_000, 2)
+                cost = calculate_cost(tokens)
 
                 yield ScanResult(
                     module=module.dataset_name,