mirror of
https://github.com/msoedov/agentic_security.git
synced 2026-06-24 14:19:55 +02:00
Compare commits
18 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 5395c6b7a0 | |||
| eaaa199d29 | |||
| e3f4dfdc41 | |||
| 4bf2f662b6 | |||
| ba2535e241 | |||
| e5934ef87f | |||
| 3dd559a96a | |||
| ae9e68bbab | |||
| 5f1c95f632 | |||
| 48b9ed432e | |||
| d12ed1e72c | |||
| 4e35452494 | |||
| cc5ea04205 | |||
| 9c4828f259 | |||
| da1ec36b5b | |||
| cf4eafb89c | |||
| c77e868283 | |||
| befe488ab5 |
@@ -5,3 +5,4 @@ __pycache__/
|
||||
failures.csv
|
||||
runs/
|
||||
*.todo
|
||||
logs/
|
||||
|
||||
@@ -26,11 +26,19 @@
|
||||
- LLM API integration and stress testing 🛠️
|
||||
- Wide range of fuzzing and attack techniques 🌀
|
||||
|
||||
|
||||
| Tool | Source | Integrated |
|
||||
|-------------------------|-------------------------------------------------------------------------------|------------|
|
||||
| Garak | [leondz/garak](https://github.com/leondz/garak) | ✅ |
|
||||
| InspectAI | [UKGovernmentBEIS/inspect_ai](https://github.com/UKGovernmentBEIS/inspect_ai) | ✅ |
|
||||
| llm-adaptive-attacks | [tml-epfl/llm-adaptive-attacks](https://github.com/tml-epfl/llm-adaptive-attacks) | ✅ |
|
||||
| Custom Huggingface Datasets | markush1/LLM-Jailbreak-Classifier | ✅ |
|
||||
| Local CSV Datasets | - | ✅ |
|
||||
|
||||
|
||||
|
||||
Note: Please be aware that Agentic Security is designed as a safety scanner tool and not a foolproof solution. It cannot guarantee complete protection against all possible threats.
|
||||
|
||||
## About the Project 🧙
|
||||
|
||||
<img width="100%" alt="booking-screen" src="https://res.cloudinary.com/do9qa2bqr/image/upload/v1713002396/1-ezgif.com-video-to-gif-converter_s2hsro.gif">
|
||||
|
||||
## 📦 Installation
|
||||
|
||||
@@ -63,6 +71,11 @@ agentic_security --port=PORT --host=HOST
|
||||
|
||||
```
|
||||
|
||||
## UI 🧙
|
||||
|
||||
|
||||
<img width="100%" alt="booking-screen" src="https://res.cloudinary.com/do9qa2bqr/image/upload/v1713002396/1-ezgif.com-video-to-gif-converter_s2hsro.gif">
|
||||
|
||||
## LLM kwargs
|
||||
|
||||
Agentic Security uses plain text HTTP spec like:
|
||||
|
||||
+48
-18
@@ -1,14 +1,15 @@
|
||||
import random
|
||||
import sys
|
||||
from asyncio import Event, Queue
|
||||
from datetime import datetime
|
||||
from logging import config
|
||||
from pathlib import Path
|
||||
|
||||
from fastapi import BackgroundTasks, FastAPI, HTTPException, Response
|
||||
from fastapi import BackgroundTasks, FastAPI, HTTPException, Request, Response
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
from fastapi.responses import FileResponse, StreamingResponse
|
||||
from loguru import logger
|
||||
from pydantic import BaseModel
|
||||
from starlette.middleware.base import BaseHTTPMiddleware
|
||||
|
||||
from .http_spec import LLMSpec
|
||||
from .probe_actor import fuzzer
|
||||
@@ -16,15 +17,6 @@ from .probe_actor.refusal import REFUSAL_MARKS
|
||||
from .probe_data import REGISTRY
|
||||
from .report_chart import plot_security_report
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(
|
||||
sys.stderr,
|
||||
format="<green>[{level}]</green> <blue>{time:YYYY-MM-DD HH:mm:ss.SS}</blue> | <cyan>{module}:{function}:{line}</cyan> | <white>{message}</white>",
|
||||
colorize=True,
|
||||
level="INFO",
|
||||
)
|
||||
|
||||
|
||||
# Create the FastAPI app instance
|
||||
app = FastAPI()
|
||||
origins = [
|
||||
@@ -164,13 +156,13 @@ class Message(BaseModel):
|
||||
class CompletionRequest(BaseModel):
|
||||
model: str
|
||||
messages: list[Message]
|
||||
temperature: float
|
||||
top_p: float
|
||||
n: int
|
||||
stop: list[str]
|
||||
max_tokens: int
|
||||
presence_penalty: float
|
||||
frequency_penalty: float
|
||||
temperature: float = 0.7 # Default value for temperature
|
||||
top_p: float = 1.0 # Default value for top_p
|
||||
n: int = 1 # Default value for n
|
||||
stop: list[str] = None # Optional; specify as None if not provided
|
||||
max_tokens: int = 100 # Default value for max_tokens
|
||||
presence_penalty: float = 0.0 # Default value for presence_penalty
|
||||
frequency_penalty: float = 0.0 # Default value for frequency_penalty
|
||||
|
||||
|
||||
# OpenAI proxy endpoint
|
||||
@@ -206,3 +198,41 @@ async def proxy_completions(request: CompletionRequest):
|
||||
}
|
||||
],
|
||||
}
|
||||
|
||||
|
||||
config.dictConfig(
|
||||
{
|
||||
"version": 1,
|
||||
"disable_existing_loggers": True,
|
||||
"handlers": {
|
||||
"console": {
|
||||
"class": "logging.StreamHandler",
|
||||
},
|
||||
},
|
||||
"root": {
|
||||
"handlers": ["console"],
|
||||
"level": "INFO",
|
||||
},
|
||||
"loggers": {
|
||||
"uvicorn.access": {
|
||||
"level": "ERROR", # Set higher log level to suppress info logs globally
|
||||
"handlers": ["console"],
|
||||
"propagate": False,
|
||||
}
|
||||
},
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
class LogNon200ResponsesMiddleware(BaseHTTPMiddleware):
|
||||
async def dispatch(self, request: Request, call_next):
|
||||
response = await call_next(request)
|
||||
if response.status_code != 200:
|
||||
logger.error(
|
||||
f"{request.method} {request.url} - Status code: {response.status_code}"
|
||||
)
|
||||
return response
|
||||
|
||||
|
||||
# Add middleware to the application
|
||||
app.add_middleware(LogNon200ResponsesMiddleware)
|
||||
|
||||
@@ -141,6 +141,16 @@ REGISTRY = [
|
||||
"url": "https://github.com/leondz/garak2",
|
||||
"dynamic": True,
|
||||
},
|
||||
{
|
||||
"dataset_name": "InspectAI",
|
||||
"num_prompts": 0,
|
||||
"tokens": 0,
|
||||
"approx_cost": 0.0,
|
||||
"source": "Github: https://github.com/UKGovernmentBEIS/inspect_ai",
|
||||
"selected": False,
|
||||
"url": "https://github.com/UKGovernmentBEIS/inspect_ai",
|
||||
"dynamic": True,
|
||||
},
|
||||
{
|
||||
"dataset_name": "Custom CSV",
|
||||
"num_prompts": len(load_local_csv().prompts),
|
||||
|
||||
@@ -7,7 +7,11 @@ import pandas as pd
|
||||
from loguru import logger
|
||||
|
||||
from agentic_security.probe_data import stenography_fn
|
||||
from agentic_security.probe_data.modules import adaptive_attacks, garak_tool
|
||||
from agentic_security.probe_data.modules import (
|
||||
adaptive_attacks,
|
||||
garak_tool,
|
||||
inspect_ai_tool,
|
||||
)
|
||||
|
||||
IS_VERCEL = os.getenv("IS_VERCEL", "f") == "t"
|
||||
|
||||
@@ -206,6 +210,11 @@ def prepare_prompts(dataset_names, budget, tools_inbox=None):
|
||||
garak_tool.Module(group, tools_inbox=tools_inbox).apply(),
|
||||
lazy=True,
|
||||
),
|
||||
"InspectAI": lambda: dataset_from_iterator(
|
||||
"InspectAI",
|
||||
inspect_ai_tool.Module(group, tools_inbox=tools_inbox).apply(),
|
||||
lazy=True,
|
||||
),
|
||||
"GPT fuzzer": lambda: [],
|
||||
}
|
||||
|
||||
|
||||
@@ -9,7 +9,6 @@ from loguru import logger
|
||||
|
||||
|
||||
class Module:
|
||||
|
||||
def __init__(self, prompt_groups: [], tools_inbox: asyncio.Queue):
|
||||
self.tools_inbox = tools_inbox
|
||||
if not self.is_garak_installed():
|
||||
|
||||
@@ -0,0 +1,13 @@
|
||||
from inspect_ai import Task, eval, task
|
||||
from inspect_ai.dataset import example_dataset
|
||||
from inspect_ai.scorer import model_graded_fact
|
||||
from inspect_ai.solver import chain_of_thought, generate, self_critique
|
||||
|
||||
|
||||
@task
|
||||
def theory_of_mind():
|
||||
return Task(
|
||||
dataset=example_dataset("theory_of_mind"),
|
||||
plan=[chain_of_thought(), generate(), self_critique()],
|
||||
scorer=model_graded_fact(),
|
||||
)
|
||||
@@ -0,0 +1,71 @@
|
||||
import asyncio
|
||||
import importlib.util
|
||||
import os
|
||||
|
||||
from loguru import logger
|
||||
|
||||
inspect_ai_task = (
|
||||
__file__.replace("inspect_ai_tool.py", "inspect_ai_task.py")
|
||||
.replace(os.getcwd(), "")
|
||||
.strip("/")
|
||||
)
|
||||
|
||||
|
||||
class Module:
|
||||
name = "Inspect AI"
|
||||
|
||||
def __init__(self, prompt_groups: [], tools_inbox: asyncio.Queue):
|
||||
self.tools_inbox = tools_inbox
|
||||
if not self.is_tool_installed():
|
||||
logger.error(
|
||||
"inspect_ai module is not installed. Please install it using 'pip install inspect_ai'"
|
||||
)
|
||||
|
||||
def is_tool_installed(self) -> bool:
|
||||
inspect_ai = importlib.util.find_spec("inspect_ai")
|
||||
return inspect_ai is not None
|
||||
|
||||
async def _proc(self, command):
|
||||
env = os.environ.copy()
|
||||
env["OPENAI_API_BASE"] = "http://0.0.0.0:8718/proxy"
|
||||
process = await asyncio.create_subprocess_shell(
|
||||
command,
|
||||
stdout=asyncio.subprocess.PIPE,
|
||||
stderr=asyncio.subprocess.PIPE,
|
||||
env=env,
|
||||
shell=True,
|
||||
)
|
||||
|
||||
logger.info(f"Started {command}")
|
||||
|
||||
# Read output as it becomes available
|
||||
async for line in process.stdout:
|
||||
logger.info(line.decode().strip())
|
||||
|
||||
# Check for errors
|
||||
err = await process.stderr.read()
|
||||
if err:
|
||||
logger.error(err.decode().strip())
|
||||
|
||||
await process.wait()
|
||||
logger.info(f"Command {command} {process}finished.")
|
||||
|
||||
async def apply(self) -> []:
|
||||
env = os.environ.copy()
|
||||
env["OPENAI_API_BASE"] = "http://0.0.0.0:8718/proxy"
|
||||
|
||||
# Command to be executed
|
||||
command = f"inspect eval {inspect_ai_task} --model openai/gpt-4 --model-base-url=http://0.0.0.0:8718/proxy"
|
||||
logger.info(f"Executing command: {command}")
|
||||
|
||||
proc = asyncio.create_task(self._proc(command))
|
||||
is_empty = self.tools_inbox.empty()
|
||||
await asyncio.sleep(2)
|
||||
logger.info(f"Is inbox empty? {is_empty}")
|
||||
while not self.tools_inbox.empty():
|
||||
ref = self.tools_inbox.get_nowait()
|
||||
message, _, ready = ref["message"], ref["reply"], ref["ready"]
|
||||
yield message
|
||||
ready.set()
|
||||
logger.info(f"{self.name} tool finished.")
|
||||
await proc
|
||||
@@ -12,13 +12,13 @@ class TestPreparePrompts:
|
||||
# Assert that the prepared_prompts list is empty
|
||||
assert prepared_prompts == []
|
||||
|
||||
assert len(
|
||||
prepare_prompts(["markush1/LLM-Jailbreak-Classifier"], 100)
|
||||
) == snapshot(1)
|
||||
# assert len(
|
||||
# prepare_prompts(["markush1/LLM-Jailbreak-Classifier"], 100)
|
||||
# ) == snapshot(1)
|
||||
|
||||
assert len(
|
||||
prepare_prompts(
|
||||
["markush1/LLM-Jailbreak-Classifier", "llm-adaptive-attacks"],
|
||||
["llm-adaptive-attacks"],
|
||||
100,
|
||||
)
|
||||
) == snapshot(2)
|
||||
) == snapshot(1)
|
||||
|
||||
Generated
+20
-20
@@ -1,4 +1,4 @@
|
||||
# This file is automatically @generated by Poetry 1.8.2 and should not be changed by hand.
|
||||
# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand.
|
||||
|
||||
[[package]]
|
||||
name = "aiohttp"
|
||||
@@ -250,13 +250,13 @@ files = [
|
||||
|
||||
[[package]]
|
||||
name = "certifi"
|
||||
version = "2024.2.2"
|
||||
version = "2024.7.4"
|
||||
description = "Python package for providing Mozilla's CA Bundle."
|
||||
optional = false
|
||||
python-versions = ">=3.6"
|
||||
files = [
|
||||
{file = "certifi-2024.2.2-py3-none-any.whl", hash = "sha256:dc383c07b76109f368f6106eee2b593b04a011ea4d55f652c6ca24a754d1cdd1"},
|
||||
{file = "certifi-2024.2.2.tar.gz", hash = "sha256:0569859f95fc761b18b45ef421b1290a0f65f147e92a1e5eb3e635f9a5e4e66f"},
|
||||
{file = "certifi-2024.7.4-py3-none-any.whl", hash = "sha256:c198e21b1289c2ab85ee4e67bb4b4ef3ead0892059901a8d5b622f24a1101e90"},
|
||||
{file = "certifi-2024.7.4.tar.gz", hash = "sha256:5a1e7645bc0ec61a09e26c36f6106dd4cf40c6db3a1fb6352b0244e7fb057c7b"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -673,12 +673,12 @@ typing = ["typing-extensions (>=4.8)"]
|
||||
|
||||
[[package]]
|
||||
name = "fire"
|
||||
version = "0.5.0"
|
||||
version = "0.6.0"
|
||||
description = "A library for automatically generating command line interfaces."
|
||||
optional = false
|
||||
python-versions = "*"
|
||||
files = [
|
||||
{file = "fire-0.5.0.tar.gz", hash = "sha256:a6b0d49e98c8963910021f92bba66f65ab440da2982b78eb1bbf95a0a34aacc6"},
|
||||
{file = "fire-0.6.0.tar.gz", hash = "sha256:54ec5b996ecdd3c0309c800324a0703d6da512241bc73b553db959d98de0aa66"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
@@ -1087,13 +1087,13 @@ files = [
|
||||
|
||||
[[package]]
|
||||
name = "inline-snapshot"
|
||||
version = "0.8.2"
|
||||
version = "0.9.0"
|
||||
description = "golden master/snapshot/approval testing library which puts the values right into your source code"
|
||||
optional = false
|
||||
python-versions = ">=3.7"
|
||||
files = [
|
||||
{file = "inline_snapshot-0.8.2-py3-none-any.whl", hash = "sha256:df365176dc04e8054c699981a834e4c4482cb42c34cb3378515e3f65d49a70df"},
|
||||
{file = "inline_snapshot-0.8.2.tar.gz", hash = "sha256:b20515b7bf01675b0f6adaadfd6277ef4456cd797c0735582b07f949a908c2f7"},
|
||||
{file = "inline_snapshot-0.9.0-py3-none-any.whl", hash = "sha256:90deee9be342a270d07d95049e525c609f9e84319e6f9d1506ae19aa2973f8d5"},
|
||||
{file = "inline_snapshot-0.9.0.tar.gz", hash = "sha256:5f1c4f0fbf7bcbc0d05dc43822032ed4b954d3f20b01868bde7feda8d7c38817"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
@@ -1997,13 +1997,13 @@ testing = ["pytest", "pytest-benchmark"]
|
||||
|
||||
[[package]]
|
||||
name = "pre-commit"
|
||||
version = "3.7.0"
|
||||
version = "3.7.1"
|
||||
description = "A framework for managing and maintaining multi-language pre-commit hooks."
|
||||
optional = false
|
||||
python-versions = ">=3.9"
|
||||
files = [
|
||||
{file = "pre_commit-3.7.0-py2.py3-none-any.whl", hash = "sha256:5eae9e10c2b5ac51577c3452ec0a490455c45a0533f7960f993a0d01e59decab"},
|
||||
{file = "pre_commit-3.7.0.tar.gz", hash = "sha256:e209d61b8acdcf742404408531f0c37d49d2c734fd7cff2d6076083d191cb060"},
|
||||
{file = "pre_commit-3.7.1-py2.py3-none-any.whl", hash = "sha256:fae36fd1d7ad7d6a5a1c0b0d5adb2ed1a3bda5a21bf6c3e5372073d7a11cd4c5"},
|
||||
{file = "pre_commit-3.7.1.tar.gz", hash = "sha256:8ca3ad567bc78a4972a3f1a477e94a79d4597e8140a6e0b651c5e33899c3654a"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
@@ -2337,13 +2337,13 @@ files = [
|
||||
|
||||
[[package]]
|
||||
name = "requests"
|
||||
version = "2.31.0"
|
||||
version = "2.32.0"
|
||||
description = "Python HTTP for Humans."
|
||||
optional = false
|
||||
python-versions = ">=3.7"
|
||||
python-versions = ">=3.8"
|
||||
files = [
|
||||
{file = "requests-2.31.0-py3-none-any.whl", hash = "sha256:58cd2187c01e70e6e26505bca751777aa9f2ee0b7f4300988b709f44e013003f"},
|
||||
{file = "requests-2.31.0.tar.gz", hash = "sha256:942c5a758f98d790eaed1a29cb6eefc7ffb0d1cf7af05c3d2791656dbd6ad1e1"},
|
||||
{file = "requests-2.32.0-py3-none-any.whl", hash = "sha256:f2c3881dddb70d056c5bd7600a4fae312b2a300e39be6a118d30b90bd27262b5"},
|
||||
{file = "requests-2.32.0.tar.gz", hash = "sha256:fa5490319474c82ef1d2c9bc459d3652e3ae4ef4c4ebdd18a21145a47ca4b6b8"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
@@ -2525,13 +2525,13 @@ files = [
|
||||
|
||||
[[package]]
|
||||
name = "tqdm"
|
||||
version = "4.66.2"
|
||||
version = "4.66.3"
|
||||
description = "Fast, Extensible Progress Meter"
|
||||
optional = false
|
||||
python-versions = ">=3.7"
|
||||
files = [
|
||||
{file = "tqdm-4.66.2-py3-none-any.whl", hash = "sha256:1ee4f8a893eb9bef51c6e35730cebf234d5d0b6bd112b0271e10ed7c24a02bd9"},
|
||||
{file = "tqdm-4.66.2.tar.gz", hash = "sha256:6cd52cdf0fef0e0f543299cfc96fec90d7b8a7e88745f411ec33eb44d5ed3531"},
|
||||
{file = "tqdm-4.66.3-py3-none-any.whl", hash = "sha256:4f41d54107ff9a223dca80b53efe4fb654c67efaba7f47bada3ee9d50e05bd53"},
|
||||
{file = "tqdm-4.66.3.tar.gz", hash = "sha256:23097a41eba115ba99ecae40d06444c15d1c0c698d527a01c6c8bd1c5d0647e5"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
@@ -3194,4 +3194,4 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p
|
||||
[metadata]
|
||||
lock-version = "2.0"
|
||||
python-versions = "^3.9"
|
||||
content-hash = "b29700a8e3d5d5086bb0acde90e9480d87de4ee0874b85d5a37a635977237525"
|
||||
content-hash = "28569362ae4d469cbb095d6d221a0c765ccdc45d6d024f5741a663e6ca5f012c"
|
||||
|
||||
+3
-3
@@ -1,6 +1,6 @@
|
||||
[tool.poetry]
|
||||
name = "agentic_security"
|
||||
version = "0.1.4"
|
||||
version = "0.1.5"
|
||||
description = "Agentic LLM vulnerability scanner"
|
||||
authors = ["Alexander Miasoiedov <msoedov@gmail.com>"]
|
||||
maintainers = ["Alexander Miasoiedov <msoedov@gmail.com>"]
|
||||
@@ -28,7 +28,7 @@ agentic_security = "agentic_security.__main__:entrypoint"
|
||||
python = "^3.9"
|
||||
fastapi = ">=0.109.1,<0.112.0"
|
||||
uvicorn = ">=0.23.2,<0.30.0"
|
||||
fire = "^0.5.0"
|
||||
fire = ">=0.5,<0.7"
|
||||
loguru = "^0.7.2"
|
||||
httpx = ">=0.25.1,<0.28.0"
|
||||
cache-to-disk = "^2.0.0"
|
||||
@@ -44,7 +44,7 @@ mypy = "^1.6.1"
|
||||
httpx = ">=0.25.1,<0.28.0"
|
||||
pytest = ">=7.4.3,<9.0.0"
|
||||
pre-commit = "^3.5.0"
|
||||
inline-snapshot = "^0.8.0"
|
||||
inline-snapshot = ">=0.8,<0.10"
|
||||
langchain-groq = "^0.1.3"
|
||||
|
||||
[tool.ruff]
|
||||
|
||||
Reference in New Issue
Block a user