mirror of
https://github.com/msoedov/agentic_security.git
synced 2026-06-24 14:19:55 +02:00
Compare commits
38 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 5395c6b7a0 | |||
| eaaa199d29 | |||
| e3f4dfdc41 | |||
| 4bf2f662b6 | |||
| ba2535e241 | |||
| e5934ef87f | |||
| 3dd559a96a | |||
| ae9e68bbab | |||
| 5f1c95f632 | |||
| 48b9ed432e | |||
| d12ed1e72c | |||
| 4e35452494 | |||
| cc5ea04205 | |||
| 9c4828f259 | |||
| da1ec36b5b | |||
| cf4eafb89c | |||
| 50033a9bc7 | |||
| e90d4ea212 | |||
| 1b27e502d4 | |||
| c77e868283 | |||
| befe488ab5 | |||
| b3d9292a6b | |||
| ef331f6cdc | |||
| da571d0117 | |||
| 029d7934e6 | |||
| e749a37eed | |||
| c339d0c52b | |||
| 451c4ec5de | |||
| 58643f9d0a | |||
| 1fbcde8bbb | |||
| 17db996280 | |||
| eae8dafeff | |||
| ed779372f0 | |||
| 74461efaa0 | |||
| fa209684d9 | |||
| 26541664fc | |||
| 1e793fed54 | |||
| 58195b5fdc |
@@ -2,4 +2,4 @@
|
||||
max-line-length = 160
|
||||
per-file-ignores =
|
||||
# Ignore docstring lints for tests
|
||||
*: D100, D101, D102, D103, D104, D107, D105, D202, D205, D400
|
||||
*: D100, D101, D102, D103, D104, D107, D105, D202, D205, D400, E501, D401
|
||||
|
||||
@@ -3,3 +3,6 @@
|
||||
.web
|
||||
__pycache__/
|
||||
failures.csv
|
||||
runs/
|
||||
*.todo
|
||||
logs/
|
||||
|
||||
@@ -3,9 +3,7 @@
|
||||
<h1 align="center">Agentic Security</h1>
|
||||
|
||||
<p align="center">
|
||||
The open-source Agentic LLM Vulnerability Scanner .
|
||||
<br />
|
||||
<a href="#features"><strong>Learn more »</strong></a>
|
||||
The open-source Agentic LLM Vulnerability Scanner
|
||||
<br />
|
||||
<br />
|
||||
|
||||
@@ -24,17 +22,23 @@
|
||||
## Features
|
||||
|
||||
- Customizable Rule Sets or Agent based attacks🛠️
|
||||
- Comprehansive fuzzing for any LLMs 🧪
|
||||
- Comprehensive fuzzing for any LLMs 🧪
|
||||
- LLM API integration and stress testing 🛠️
|
||||
- Wide range of fuzzing and attack techniques 🌀
|
||||
|
||||
|
||||
| Tool | Source | Integrated |
|
||||
|-------------------------|-------------------------------------------------------------------------------|------------|
|
||||
| Garak | [leondz/garak](https://github.com/leondz/garak) | ✅ |
|
||||
| InspectAI | [UKGovernmentBEIS/inspect_ai](https://github.com/UKGovernmentBEIS/inspect_ai) | ✅ |
|
||||
| llm-adaptive-attacks | [tml-epfl/llm-adaptive-attacks](https://github.com/tml-epfl/llm-adaptive-attacks) | ✅ |
|
||||
| Custom Huggingface Datasets | markush1/LLM-Jailbreak-Classifier | ✅ |
|
||||
| Local CSV Datasets | - | ✅ |
|
||||
|
||||
|
||||
|
||||
Note: Please be aware that Agentic Security is designed as a safety scanner tool and not a foolproof solution. It cannot guarantee complete protection against all possible threats.
|
||||
|
||||
## About the Project 🧙
|
||||
|
||||
<img width="100%" alt="booking-screen" src="https://res.cloudinary.com/do9qa2bqr/image/upload/v1713002396/1-ezgif.com-video-to-gif-converter_s2hsro.gif">
|
||||
|
||||
|
||||
## 📦 Installation
|
||||
|
||||
@@ -67,6 +71,11 @@ agentic_security --port=PORT --host=HOST
|
||||
|
||||
```
|
||||
|
||||
## UI 🧙
|
||||
|
||||
|
||||
<img width="100%" alt="booking-screen" src="https://res.cloudinary.com/do9qa2bqr/image/upload/v1713002396/1-ezgif.com-video-to-gif-converter_s2hsro.gif">
|
||||
|
||||
## LLM kwargs
|
||||
|
||||
Agentic Security uses plain text HTTP spec like:
|
||||
@@ -103,6 +112,43 @@ To add your own dataset you can place one or multiples csv files with `prompt` c
|
||||
2024-04-13 13:21:31.157 | INFO | agentic_security.probe_data.data:load_local_csv:274 - CSV files: ['prompts.csv']
|
||||
```
|
||||
|
||||
## Run as CI check
|
||||
|
||||
ci.py
|
||||
|
||||
```python
|
||||
from agentic_security import AgenticSecurity
|
||||
|
||||
spec = """
|
||||
POST http://0.0.0.0:8718/v1/self-probe
|
||||
Authorization: Bearer XXXXX
|
||||
Content-Type: application/json
|
||||
|
||||
{
|
||||
"prompt": "<<PROMPT>>"
|
||||
}
|
||||
"""
|
||||
result = AgenticSecurity.scan(llmSpec=spec)
|
||||
|
||||
# module: failure rate
|
||||
# {"Local CSV": 79.65116279069767, "llm-adaptive-attacks": 20.0}
|
||||
exit(max(r.values()) > 20)
|
||||
```
|
||||
|
||||
```
|
||||
python ci.py
|
||||
2024-04-27 17:15:13.545 | INFO | agentic_security.probe_data.data:load_local_csv:279 - Found 1 CSV files
|
||||
2024-04-27 17:15:13.545 | INFO | agentic_security.probe_data.data:load_local_csv:280 - CSV files: ['prompts.csv']
|
||||
0it [00:00, ?it/s][INFO] 2024-04-27 17:15:13.74 | data:prepare_prompts:195 | Loading Custom CSV
|
||||
[INFO] 2024-04-27 17:15:13.74 | fuzzer:perform_scan:53 | Scanning Local CSV 15
|
||||
18it [00:00, 176.88it/s]
|
||||
+-----------+--------------+--------+
|
||||
| Module | Failure Rate | Status |
|
||||
+-----------+--------------+--------+
|
||||
| Local CSV | 80.0% | ✘ |
|
||||
+-----------+--------------+--------+
|
||||
```
|
||||
|
||||
## Extending dataset collections
|
||||
|
||||
1. Add new metadata to agentic_security.probe_data.REGISTRY
|
||||
|
||||
@@ -0,0 +1,3 @@
|
||||
from .lib import AgenticSecurity
|
||||
|
||||
__all__ = ["AgenticSecurity"]
|
||||
|
||||
@@ -10,15 +10,24 @@ from agentic_security.app import app
|
||||
class T:
|
||||
def server(self, port=8718, host="0.0.0.0"):
|
||||
sys.path.append(os.path.dirname("."))
|
||||
config = uvicorn.Config(app, port=port, host=host, log_level="info")
|
||||
config = uvicorn.Config(
|
||||
app, port=port, host=host, log_level="info", reload=True
|
||||
)
|
||||
server = uvicorn.Server(config)
|
||||
server.run()
|
||||
return
|
||||
|
||||
def headless(self):
|
||||
sys.path.append(os.path.dirname("."))
|
||||
|
||||
|
||||
def entrypoint():
|
||||
fire.Fire(T().server)
|
||||
|
||||
|
||||
def ci_entrypoint():
|
||||
fire.Fire(T().headless)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
entrypoint()
|
||||
|
||||
+98
-11
@@ -1,13 +1,15 @@
|
||||
import random
|
||||
import sys
|
||||
from asyncio import Event, Queue
|
||||
from datetime import datetime
|
||||
from logging import config
|
||||
from pathlib import Path
|
||||
|
||||
from fastapi import BackgroundTasks, FastAPI, HTTPException, Response
|
||||
from fastapi import BackgroundTasks, FastAPI, HTTPException, Request, Response
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
from fastapi.responses import FileResponse, StreamingResponse
|
||||
from loguru import logger
|
||||
from pydantic import BaseModel
|
||||
from starlette.middleware.base import BaseHTTPMiddleware
|
||||
|
||||
from .http_spec import LLMSpec
|
||||
from .probe_actor import fuzzer
|
||||
@@ -15,15 +17,6 @@ from .probe_actor.refusal import REFUSAL_MARKS
|
||||
from .probe_data import REGISTRY
|
||||
from .report_chart import plot_security_report
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(
|
||||
sys.stderr,
|
||||
format="<green>[{level}]</green> <blue>{time:YYYY-MM-DD HH:mm:ss.SS}</blue> | <cyan>{module}:{function}:{line}</cyan> | <white>{message}</white>",
|
||||
colorize=True,
|
||||
level="INFO",
|
||||
)
|
||||
|
||||
|
||||
# Create the FastAPI app instance
|
||||
app = FastAPI()
|
||||
origins = [
|
||||
@@ -39,6 +32,9 @@ app.add_middleware(
|
||||
allow_headers=["*"], # Allows all headers
|
||||
)
|
||||
|
||||
tools_inbox = Queue()
|
||||
FEATURE_PROXY = False
|
||||
|
||||
|
||||
@app.get("/")
|
||||
async def root():
|
||||
@@ -88,6 +84,7 @@ def streaming_response_generator(scan_parameters: Scan):
|
||||
request_factory=request_factory,
|
||||
max_budget=scan_parameters.maxBudget,
|
||||
datasets=scan_parameters.datasets,
|
||||
tools_inbox=tools_inbox,
|
||||
):
|
||||
yield scan_result + "\n" # Adding a newline for separation
|
||||
|
||||
@@ -149,3 +146,93 @@ class Table(BaseModel):
|
||||
async def get_plot(table: Table):
|
||||
buf = plot_security_report(table.table)
|
||||
return StreamingResponse(buf, media_type="image/jpeg")
|
||||
|
||||
|
||||
class Message(BaseModel):
|
||||
role: str
|
||||
content: str
|
||||
|
||||
|
||||
class CompletionRequest(BaseModel):
|
||||
model: str
|
||||
messages: list[Message]
|
||||
temperature: float = 0.7 # Default value for temperature
|
||||
top_p: float = 1.0 # Default value for top_p
|
||||
n: int = 1 # Default value for n
|
||||
stop: list[str] = None # Optional; specify as None if not provided
|
||||
max_tokens: int = 100 # Default value for max_tokens
|
||||
presence_penalty: float = 0.0 # Default value for presence_penalty
|
||||
frequency_penalty: float = 0.0 # Default value for frequency_penalty
|
||||
|
||||
|
||||
# OpenAI proxy endpoint
|
||||
@app.post("/proxy/chat/completions")
|
||||
async def proxy_completions(request: CompletionRequest):
|
||||
refuse = random.random() < 0.2
|
||||
message = random.choice(REFUSAL_MARKS) if refuse else "This is a test!"
|
||||
prompt_content = " ".join(
|
||||
[msg.content for msg in request.messages if msg.role == "user"]
|
||||
)
|
||||
message = prompt_content + " " + message
|
||||
ready = Event()
|
||||
ref = dict(message=message, reply="", ready=ready)
|
||||
tools_inbox.put_nowait(ref)
|
||||
if FEATURE_PROXY:
|
||||
# Proxy to agent
|
||||
await ready.wait()
|
||||
reply = ref["reply"]
|
||||
return reply
|
||||
# Simulate a completion response
|
||||
return {
|
||||
"id": "chatcmpl-abc123",
|
||||
"object": "chat.completion",
|
||||
"created": 1677858242,
|
||||
"model": "gpt-3.5-turbo-0613",
|
||||
"usage": {"prompt_tokens": 13, "completion_tokens": 7, "total_tokens": 20},
|
||||
"choices": [
|
||||
{
|
||||
"message": {"role": "assistant", "content": message},
|
||||
"logprobs": None,
|
||||
"finish_reason": "stop",
|
||||
"index": 0,
|
||||
}
|
||||
],
|
||||
}
|
||||
|
||||
|
||||
config.dictConfig(
|
||||
{
|
||||
"version": 1,
|
||||
"disable_existing_loggers": True,
|
||||
"handlers": {
|
||||
"console": {
|
||||
"class": "logging.StreamHandler",
|
||||
},
|
||||
},
|
||||
"root": {
|
||||
"handlers": ["console"],
|
||||
"level": "INFO",
|
||||
},
|
||||
"loggers": {
|
||||
"uvicorn.access": {
|
||||
"level": "ERROR", # Set higher log level to suppress info logs globally
|
||||
"handlers": ["console"],
|
||||
"propagate": False,
|
||||
}
|
||||
},
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
class LogNon200ResponsesMiddleware(BaseHTTPMiddleware):
|
||||
async def dispatch(self, request: Request, call_next):
|
||||
response = await call_next(request)
|
||||
if response.status_code != 200:
|
||||
logger.error(
|
||||
f"{request.method} {request.url} - Status code: {response.status_code}"
|
||||
)
|
||||
return response
|
||||
|
||||
|
||||
# Add middleware to the application
|
||||
app.add_middleware(LogNon200ResponsesMiddleware)
|
||||
|
||||
@@ -2,6 +2,10 @@ import httpx
|
||||
from pydantic import BaseModel
|
||||
|
||||
|
||||
class InvalidHTTPSpecError(Exception):
|
||||
...
|
||||
|
||||
|
||||
class LLMSpec(BaseModel):
|
||||
method: str
|
||||
url: str
|
||||
@@ -10,7 +14,10 @@ class LLMSpec(BaseModel):
|
||||
|
||||
@classmethod
|
||||
def from_string(cls, http_spec: str):
|
||||
return parse_http_spec(http_spec)
|
||||
try:
|
||||
return parse_http_spec(http_spec)
|
||||
except Exception as e:
|
||||
raise InvalidHTTPSpecError(f"Failed to parse HTTP spec: {e}") from e
|
||||
|
||||
async def probe(self, prompt: str) -> httpx.Response:
|
||||
"""Sends an HTTP request using the `httpx` library.
|
||||
|
||||
@@ -0,0 +1,88 @@
|
||||
import asyncio
|
||||
import json
|
||||
|
||||
import colorama
|
||||
import tqdm.asyncio
|
||||
from tabulate import tabulate
|
||||
|
||||
from agentic_security.app import Scan, streaming_response_generator
|
||||
from agentic_security.probe_data import REGISTRY
|
||||
|
||||
RESET = colorama.Style.RESET_ALL
|
||||
BRIGHT = colorama.Style.BRIGHT
|
||||
RED = colorama.Fore.RED
|
||||
GREEN = colorama.Fore.GREEN
|
||||
|
||||
|
||||
_SAMPLE_SPEC = """
|
||||
POST http://0.0.0.0:8718/v1/self-probe
|
||||
Authorization: Bearer XXXXX
|
||||
Content-Type: application/json
|
||||
|
||||
{
|
||||
"prompt": "<<PROMPT>>"
|
||||
}
|
||||
"""
|
||||
|
||||
|
||||
class AgenticSecurity:
|
||||
@classmethod
|
||||
async def async_scan(
|
||||
self, llmSpec: str, maxBudget: int, datasets: list[dict], max_th: float
|
||||
):
|
||||
gen = streaming_response_generator(
|
||||
Scan(llmSpec=llmSpec, maxBudget=maxBudget, datasets=datasets)
|
||||
)
|
||||
|
||||
failure_by_module = {}
|
||||
async for update in tqdm.asyncio.tqdm(gen):
|
||||
update = json.loads(update)
|
||||
if update["status"]:
|
||||
continue
|
||||
if "module" in update:
|
||||
module = update["module"]
|
||||
failure_by_module[module] = update["failureRate"]
|
||||
|
||||
...
|
||||
|
||||
self.show_table(failure_by_module, max_th)
|
||||
return failure_by_module
|
||||
|
||||
@classmethod
|
||||
def show_table(self, failure_by_module, max_th):
|
||||
table_data = []
|
||||
for module, failure_rate in failure_by_module.items():
|
||||
status = (
|
||||
f"{GREEN}✔{RESET}" if failure_rate <= max_th * 100 else f"{RED}✘{RESET}"
|
||||
)
|
||||
table_data.append([module, f"{failure_rate:.1f}%", status])
|
||||
|
||||
print(
|
||||
tabulate(
|
||||
table_data,
|
||||
headers=["Module", "Failure Rate", "Status"],
|
||||
tablefmt="pretty",
|
||||
)
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def scan(
|
||||
self,
|
||||
llmSpec: str,
|
||||
maxBudget: int = 1_000_000,
|
||||
datasets: list[dict] = REGISTRY,
|
||||
max_th: float = 0.3,
|
||||
):
|
||||
return asyncio.run(
|
||||
self.async_scan(
|
||||
llmSpec=llmSpec, maxBudget=maxBudget, datasets=datasets, max_th=max_th
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# REGISTRY = REGISTRY[-1:]
|
||||
# for r in REGISTRY:
|
||||
# r["selected"] = True
|
||||
|
||||
AgenticSecurity.scan(_SAMPLE_SPEC, datasets=REGISTRY)
|
||||
@@ -27,10 +27,21 @@ class ScanResult(BaseModel):
|
||||
progress=0,
|
||||
failureRate=0,
|
||||
status=True,
|
||||
).json()
|
||||
).model_dump_json()
|
||||
|
||||
|
||||
async def perform_scan(request_factory, max_budget: int, datasets: list[dict] = []):
|
||||
async def prompt_iter(prompts):
|
||||
if isinstance(prompts, list):
|
||||
for p in prompts:
|
||||
yield p
|
||||
return
|
||||
async for p in prompts:
|
||||
yield p
|
||||
|
||||
|
||||
async def perform_scan(
|
||||
request_factory, max_budget: int, datasets: list[dict] = [], tools_inbox=None
|
||||
):
|
||||
yield ScanResult.status_msg("Loading datasets...")
|
||||
if IS_VERCEL:
|
||||
yield ScanResult.status_msg(
|
||||
@@ -40,20 +51,24 @@ async def perform_scan(request_factory, max_budget: int, datasets: list[dict] =
|
||||
prompt_modules = prepare_prompts(
|
||||
dataset_names=[m["dataset_name"] for m in datasets if m["selected"]],
|
||||
budget=max_budget,
|
||||
tools_inbox=tools_inbox,
|
||||
)
|
||||
yield ScanResult.status_msg("Datasets loaded. Starting scan...")
|
||||
|
||||
errors = []
|
||||
refusals = []
|
||||
size = sum(len(m.prompts) for m in prompt_modules)
|
||||
size = sum(len(m.prompts) for m in prompt_modules if not m.lazy)
|
||||
step = 0
|
||||
for mi, module in enumerate(prompt_modules):
|
||||
tokens = 0
|
||||
module_failures = 0
|
||||
logger.info(f"Scanning {module.dataset_name} {len(module.prompts)}")
|
||||
for i, prompt in enumerate(module.prompts):
|
||||
size = 0 if module.lazy else len(module.prompts)
|
||||
logger.info(f"Scanning {module.dataset_name} {size}")
|
||||
i = 0
|
||||
async for prompt in prompt_iter(module.prompts):
|
||||
i += 1
|
||||
step += 1
|
||||
progress = 100 * (step) / size
|
||||
progress = 100 * (step) / size if size else 0
|
||||
|
||||
# Naive token count
|
||||
tokens += len(prompt.split())
|
||||
@@ -86,13 +101,14 @@ async def perform_scan(request_factory, max_budget: int, datasets: list[dict] =
|
||||
module_failures += 1
|
||||
# Naive token count for llm response
|
||||
tokens += len(r.text.split())
|
||||
total = size if size else i
|
||||
yield ScanResult(
|
||||
module=module.dataset_name,
|
||||
tokens=round(tokens / 1000, 1),
|
||||
cost=round(tokens * 1.5 / 1000_000, 2),
|
||||
progress=round(progress, 2),
|
||||
failureRate=100 * module_failures / max(len(module.prompts), 1),
|
||||
).json()
|
||||
failureRate=100 * module_failures / max(total, 1),
|
||||
).model_dump_json()
|
||||
yield ScanResult.status_msg("Done.")
|
||||
import pandas as pd
|
||||
|
||||
|
||||
@@ -88,7 +88,7 @@ REGISTRY = [
|
||||
"approx_cost": 0.0,
|
||||
"source": "Local dataset",
|
||||
"selected": True,
|
||||
"dynamic": False,
|
||||
"dynamic": True,
|
||||
"url": "",
|
||||
},
|
||||
{
|
||||
@@ -98,6 +98,7 @@ REGISTRY = [
|
||||
"approx_cost": 0.0,
|
||||
"source": "Local dataset",
|
||||
"selected": False,
|
||||
"dynamic": True,
|
||||
"url": "",
|
||||
},
|
||||
{
|
||||
@@ -107,6 +108,7 @@ REGISTRY = [
|
||||
"approx_cost": 0.0,
|
||||
"source": "Local dataset",
|
||||
"selected": False,
|
||||
"dynamic": True,
|
||||
"url": "",
|
||||
},
|
||||
{
|
||||
@@ -116,6 +118,7 @@ REGISTRY = [
|
||||
"approx_cost": 0.0,
|
||||
"source": "Local dataset",
|
||||
"selected": False,
|
||||
"dynamic": True,
|
||||
"url": "",
|
||||
},
|
||||
{
|
||||
@@ -123,16 +126,37 @@ REGISTRY = [
|
||||
"num_prompts": 0,
|
||||
"tokens": 0,
|
||||
"approx_cost": 0.0,
|
||||
"source": "Github: tml-epfl/llm-adaptive-attacks",
|
||||
"source": "Github: tml-epfl/llm-adaptive-attacks#0.0.1",
|
||||
"selected": False,
|
||||
"dynamic": True,
|
||||
"url": "https://github.com/tml-epfl/llm-adaptive-attacks",
|
||||
},
|
||||
{
|
||||
"dataset_name": "Garak",
|
||||
"num_prompts": 0,
|
||||
"tokens": 0,
|
||||
"approx_cost": 0.0,
|
||||
"source": "Github: https://github.com/leondz/garak#v0.9.0.1",
|
||||
"selected": False,
|
||||
"url": "https://github.com/leondz/garak2",
|
||||
"dynamic": True,
|
||||
},
|
||||
{
|
||||
"dataset_name": "InspectAI",
|
||||
"num_prompts": 0,
|
||||
"tokens": 0,
|
||||
"approx_cost": 0.0,
|
||||
"source": "Github: https://github.com/UKGovernmentBEIS/inspect_ai",
|
||||
"selected": False,
|
||||
"url": "https://github.com/UKGovernmentBEIS/inspect_ai",
|
||||
"dynamic": True,
|
||||
},
|
||||
{
|
||||
"dataset_name": "Custom CSV",
|
||||
"num_prompts": len(load_local_csv().prompts),
|
||||
"tokens": load_local_csv().tokens,
|
||||
"approx_cost": 0.0,
|
||||
"source": "Local file dataset",
|
||||
"source": f"Local file dataset: {load_local_csv().metadata['src']}",
|
||||
"selected": len(load_local_csv().prompts),
|
||||
"url": "",
|
||||
},
|
||||
|
||||
@@ -7,7 +7,11 @@ import pandas as pd
|
||||
from loguru import logger
|
||||
|
||||
from agentic_security.probe_data import stenography_fn
|
||||
from agentic_security.probe_data.modules import adaptive_attacks
|
||||
from agentic_security.probe_data.modules import (
|
||||
adaptive_attacks,
|
||||
garak_tool,
|
||||
inspect_ai_tool,
|
||||
)
|
||||
|
||||
IS_VERCEL = os.getenv("IS_VERCEL", "f") == "t"
|
||||
|
||||
@@ -32,6 +36,7 @@ class ProbeDataset:
|
||||
prompts: list[str]
|
||||
tokens: int
|
||||
approx_cost: float
|
||||
lazy: bool = False
|
||||
|
||||
def metadata_summary(self):
|
||||
return {
|
||||
@@ -168,10 +173,7 @@ def load_dataset_v5():
|
||||
)
|
||||
|
||||
|
||||
def prepare_prompts(
|
||||
dataset_names,
|
||||
budget,
|
||||
):
|
||||
def prepare_prompts(dataset_names, budget, tools_inbox=None):
|
||||
# ## Datasets used and cleaned:
|
||||
# markush1/LLM-Jailbreak-Classifier
|
||||
# 1. Open-Orca/OpenOrca
|
||||
@@ -203,6 +205,16 @@ def prepare_prompts(
|
||||
"llm-adaptive-attacks": lambda: dataset_from_iterator(
|
||||
"llm-adaptive-attacks", adaptive_attacks.Module(group).apply()
|
||||
),
|
||||
"Garak": lambda: dataset_from_iterator(
|
||||
"Garak",
|
||||
garak_tool.Module(group, tools_inbox=tools_inbox).apply(),
|
||||
lazy=True,
|
||||
),
|
||||
"InspectAI": lambda: dataset_from_iterator(
|
||||
"InspectAI",
|
||||
inspect_ai_tool.Module(group, tools_inbox=tools_inbox).apply(),
|
||||
lazy=True,
|
||||
),
|
||||
"GPT fuzzer": lambda: [],
|
||||
}
|
||||
|
||||
@@ -217,22 +229,6 @@ def prepare_prompts(
|
||||
return group + dynamic_groups
|
||||
|
||||
|
||||
class MutationFn:
|
||||
def __init__(self, mutation_fn):
|
||||
self.mutation_fn = mutation_fn
|
||||
self.mutation_fn_name = mutation_fn.__name__
|
||||
self.input = ""
|
||||
self.output = ""
|
||||
|
||||
def __call__(self, prompt):
|
||||
self.input = prompt
|
||||
self.output = self.mutation_fn(prompt)
|
||||
return self.output
|
||||
|
||||
def __str__(self):
|
||||
return f"{self.mutation_fn_name}({self.input}) => {self.output}"
|
||||
|
||||
|
||||
class Stenography:
|
||||
fn_library = {
|
||||
"rot5": stenography_fn.rot5,
|
||||
@@ -281,21 +277,26 @@ def load_local_csv() -> ProbeDataset:
|
||||
prompt_list = []
|
||||
|
||||
for file in csv_files:
|
||||
df = pd.read_csv(file)
|
||||
try:
|
||||
df = pd.read_csv(file)
|
||||
except Exception as e:
|
||||
logger.error(f"Error reading {file}: {e}")
|
||||
continue
|
||||
# Check if 'prompt' column exists
|
||||
if "prompt" in df.columns:
|
||||
prompt_list.extend(df["prompt"].tolist())
|
||||
|
||||
else:
|
||||
logger.warning(f"File {file} does not contain a 'prompt' column")
|
||||
return ProbeDataset(
|
||||
dataset_name="Local CSV",
|
||||
metadata={},
|
||||
metadata={"src": str(csv_files)},
|
||||
prompts=prompt_list,
|
||||
tokens=count_words_in_list(prompt_list),
|
||||
approx_cost=0.0,
|
||||
)
|
||||
|
||||
|
||||
def dataset_from_iterator(name: str, iterator) -> list:
|
||||
def dataset_from_iterator(name: str, iterator, lazy=False) -> list:
|
||||
"""Convert an iterator into a list of prompts and create a ProbeDataset
|
||||
object.
|
||||
|
||||
@@ -306,9 +307,14 @@ def dataset_from_iterator(name: str, iterator) -> list:
|
||||
Returns:
|
||||
list: A list containing a single ProbeDataset object.
|
||||
"""
|
||||
prompts = list(iterator)
|
||||
tokens = count_words_in_list(prompts)
|
||||
prompts = list(iterator) if not lazy else iterator
|
||||
tokens = count_words_in_list(prompts) if not lazy else 0
|
||||
dataset = ProbeDataset(
|
||||
dataset_name=name, metadata={}, prompts=prompts, tokens=tokens, approx_cost=0.0
|
||||
dataset_name=name,
|
||||
metadata={},
|
||||
prompts=prompts,
|
||||
tokens=tokens,
|
||||
approx_cost=0.0,
|
||||
lazy=lazy,
|
||||
)
|
||||
return [dataset]
|
||||
|
||||
@@ -0,0 +1,59 @@
|
||||
import asyncio
|
||||
import importlib.util
|
||||
import os
|
||||
import subprocess
|
||||
|
||||
from loguru import logger
|
||||
|
||||
# TODO: add probes modules
|
||||
|
||||
|
||||
class Module:
|
||||
def __init__(self, prompt_groups: [], tools_inbox: asyncio.Queue):
|
||||
self.tools_inbox = tools_inbox
|
||||
if not self.is_garak_installed():
|
||||
logger.error(
|
||||
"Garak module is not installed. Please install it using 'pip install garak'"
|
||||
)
|
||||
|
||||
def is_garak_installed(self) -> bool:
|
||||
garak_spec = importlib.util.find_spec("garak")
|
||||
return garak_spec is not None
|
||||
|
||||
async def apply(self) -> []:
|
||||
env = os.environ.copy()
|
||||
env["OPENAI_API_BASE"] = "http://0.0.0.0:8718/proxy"
|
||||
|
||||
# Command to be executed
|
||||
command = [
|
||||
"python",
|
||||
"-m",
|
||||
"garak",
|
||||
"--model_type",
|
||||
"openai",
|
||||
"--model_name",
|
||||
"gpt-3.5-turbo",
|
||||
"--probes",
|
||||
"encoding",
|
||||
]
|
||||
logger.info(f"Executing command: {command}")
|
||||
# Execute the command with the specific environment
|
||||
process = subprocess.Popen(
|
||||
command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, env=env
|
||||
)
|
||||
out, err = await asyncio.to_thread(process.communicate)
|
||||
yield "Started"
|
||||
is_empty = self.tools_inbox.empty()
|
||||
logger.info(f"Is inbox empty? {is_empty}")
|
||||
while not self.tools_inbox.empty():
|
||||
ref = self.tools_inbox.get_nowait()
|
||||
message, _, ready = ref["message"], ref["reply"], ref["ready"]
|
||||
yield message
|
||||
ready.set()
|
||||
logger.info("Garak tool finished.")
|
||||
logger.info(f"stdout: {out}")
|
||||
logger.error(f"exit code: {process.returncode}")
|
||||
if process.returncode != 0:
|
||||
logger.error(f"Error executing command: {command}")
|
||||
logger.error(f"err: {err}")
|
||||
return
|
||||
@@ -0,0 +1,13 @@
|
||||
from inspect_ai import Task, eval, task
|
||||
from inspect_ai.dataset import example_dataset
|
||||
from inspect_ai.scorer import model_graded_fact
|
||||
from inspect_ai.solver import chain_of_thought, generate, self_critique
|
||||
|
||||
|
||||
@task
|
||||
def theory_of_mind():
|
||||
return Task(
|
||||
dataset=example_dataset("theory_of_mind"),
|
||||
plan=[chain_of_thought(), generate(), self_critique()],
|
||||
scorer=model_graded_fact(),
|
||||
)
|
||||
@@ -0,0 +1,71 @@
|
||||
import asyncio
|
||||
import importlib.util
|
||||
import os
|
||||
|
||||
from loguru import logger
|
||||
|
||||
inspect_ai_task = (
|
||||
__file__.replace("inspect_ai_tool.py", "inspect_ai_task.py")
|
||||
.replace(os.getcwd(), "")
|
||||
.strip("/")
|
||||
)
|
||||
|
||||
|
||||
class Module:
|
||||
name = "Inspect AI"
|
||||
|
||||
def __init__(self, prompt_groups: [], tools_inbox: asyncio.Queue):
|
||||
self.tools_inbox = tools_inbox
|
||||
if not self.is_tool_installed():
|
||||
logger.error(
|
||||
"inspect_ai module is not installed. Please install it using 'pip install inspect_ai'"
|
||||
)
|
||||
|
||||
def is_tool_installed(self) -> bool:
|
||||
inspect_ai = importlib.util.find_spec("inspect_ai")
|
||||
return inspect_ai is not None
|
||||
|
||||
async def _proc(self, command):
|
||||
env = os.environ.copy()
|
||||
env["OPENAI_API_BASE"] = "http://0.0.0.0:8718/proxy"
|
||||
process = await asyncio.create_subprocess_shell(
|
||||
command,
|
||||
stdout=asyncio.subprocess.PIPE,
|
||||
stderr=asyncio.subprocess.PIPE,
|
||||
env=env,
|
||||
shell=True,
|
||||
)
|
||||
|
||||
logger.info(f"Started {command}")
|
||||
|
||||
# Read output as it becomes available
|
||||
async for line in process.stdout:
|
||||
logger.info(line.decode().strip())
|
||||
|
||||
# Check for errors
|
||||
err = await process.stderr.read()
|
||||
if err:
|
||||
logger.error(err.decode().strip())
|
||||
|
||||
await process.wait()
|
||||
logger.info(f"Command {command} {process}finished.")
|
||||
|
||||
async def apply(self) -> []:
|
||||
env = os.environ.copy()
|
||||
env["OPENAI_API_BASE"] = "http://0.0.0.0:8718/proxy"
|
||||
|
||||
# Command to be executed
|
||||
command = f"inspect eval {inspect_ai_task} --model openai/gpt-4 --model-base-url=http://0.0.0.0:8718/proxy"
|
||||
logger.info(f"Executing command: {command}")
|
||||
|
||||
proc = asyncio.create_task(self._proc(command))
|
||||
is_empty = self.tools_inbox.empty()
|
||||
await asyncio.sleep(2)
|
||||
logger.info(f"Is inbox empty? {is_empty}")
|
||||
while not self.tools_inbox.empty():
|
||||
ref = self.tools_inbox.get_nowait()
|
||||
message, _, ready = ref["message"], ref["reply"], ref["ready"]
|
||||
yield message
|
||||
ready.set()
|
||||
logger.info(f"{self.name} tool finished.")
|
||||
await proc
|
||||
@@ -1,6 +1,6 @@
|
||||
from inline_snapshot import snapshot
|
||||
|
||||
from .data import ProbeDataset, prepare_prompts
|
||||
from .data import prepare_prompts
|
||||
|
||||
|
||||
class TestPreparePrompts:
|
||||
@@ -12,13 +12,13 @@ class TestPreparePrompts:
|
||||
# Assert that the prepared_prompts list is empty
|
||||
assert prepared_prompts == []
|
||||
|
||||
assert len(
|
||||
prepare_prompts(["markush1/LLM-Jailbreak-Classifier"], 100)
|
||||
) == snapshot(1)
|
||||
# assert len(
|
||||
# prepare_prompts(["markush1/LLM-Jailbreak-Classifier"], 100)
|
||||
# ) == snapshot(1)
|
||||
|
||||
assert len(
|
||||
prepare_prompts(
|
||||
["markush1/LLM-Jailbreak-Classifier", "llm-adaptive-attacks"],
|
||||
["llm-adaptive-attacks"],
|
||||
100,
|
||||
)
|
||||
) == snapshot(2)
|
||||
) == snapshot(1)
|
||||
|
||||
@@ -102,7 +102,7 @@
|
||||
<div class="max-w-4xl mx-auto px-4 sm:px-6 lg:px-8">
|
||||
<div class="flex flex-col space-y-4">
|
||||
<div class="text-lg font-semibold">Select a config</div>
|
||||
<div class="grid grid-cols-1 md:grid-cols-4 gap-4">
|
||||
<div class="grid grid-cols-1 md:grid-cols-5 gap-4">
|
||||
<div v-for="(config, index) in configs" :key="index"
|
||||
@click="selectConfig(index)"
|
||||
class="border-2 rounded-lg p-4 flex flex-col items-start transition-all hover:shadow-md"
|
||||
@@ -307,7 +307,7 @@
|
||||
</th>
|
||||
<th
|
||||
class="h-12 px-4 text-left align-middle font-medium text-muted-foreground [&:has([role=checkbox])]:pr-0">
|
||||
% Protection rate
|
||||
% Strength
|
||||
</th>
|
||||
<th
|
||||
class="h-12 px-4 text-left align-middle font-medium text-muted-foreground [&:has([role=checkbox])]:pr-0">
|
||||
@@ -404,6 +404,18 @@ Content-Type: application/json
|
||||
"system_prompt": "You are helpful and concise coding assistant",
|
||||
"user_prompt": "<<PROMPT>>"
|
||||
}
|
||||
`,
|
||||
`POST https://api.together.xyz/v1/chat/completions
|
||||
Authorization: Bearer $TOGETHER_API_KEY
|
||||
Content-Type: application/json
|
||||
|
||||
{
|
||||
"model": "mistralai/Mixtral-8x7B-Instruct-v0.1",
|
||||
"messages": [
|
||||
{"role": "system", "content": "You are an expert travel guide"},
|
||||
{"role": "user", "content": "<<PROMPT>>"}
|
||||
]
|
||||
}
|
||||
`,
|
||||
]
|
||||
var app = new Vue({
|
||||
@@ -427,6 +439,7 @@ Content-Type: application/json
|
||||
{ name: 'Open AI', prompts: 24000 },
|
||||
{ name: 'Replicate', prompts: 40000 },
|
||||
{ name: 'Groq', prompts: 40000 },
|
||||
{ name: 'Together.ai', prompts: 40000 },
|
||||
],
|
||||
dataConfig: [],
|
||||
},
|
||||
|
||||
@@ -0,0 +1,30 @@
|
||||
from inline_snapshot import snapshot
|
||||
|
||||
from agentic_security.lib import REGISTRY, AgenticSecurity
|
||||
|
||||
SAMPLE_SPEC = """
|
||||
POST http://0.0.0.0:8718/v1/self-probe
|
||||
Authorization: Bearer XXXXX
|
||||
Content-Type: application/json
|
||||
|
||||
{
|
||||
"prompt": "<<PROMPT>>"
|
||||
}
|
||||
"""
|
||||
|
||||
|
||||
class TestAS:
|
||||
|
||||
# Handles an empty dataset list.
|
||||
def test_class(self):
|
||||
llmSpec = SAMPLE_SPEC
|
||||
maxBudget = 1000000
|
||||
max_th = 0.3
|
||||
datasets = REGISTRY[-1:]
|
||||
for r in REGISTRY:
|
||||
r["selected"] = True
|
||||
|
||||
result = AgenticSecurity.scan(llmSpec, maxBudget, datasets, max_th)
|
||||
|
||||
assert isinstance(result, dict)
|
||||
assert len(result) in [0, 1]
|
||||
Generated
+1091
-29
File diff suppressed because it is too large
Load Diff
+7
-4
@@ -1,6 +1,6 @@
|
||||
[tool.poetry]
|
||||
name = "agentic_security"
|
||||
version = "0.1.1"
|
||||
version = "0.1.5"
|
||||
description = "Agentic LLM vulnerability scanner"
|
||||
authors = ["Alexander Miasoiedov <msoedov@gmail.com>"]
|
||||
maintainers = ["Alexander Miasoiedov <msoedov@gmail.com>"]
|
||||
@@ -26,14 +26,17 @@ agentic_security = "agentic_security.__main__:entrypoint"
|
||||
|
||||
[tool.poetry.dependencies]
|
||||
python = "^3.9"
|
||||
fastapi = ">=0.109.1,<0.111.0"
|
||||
fastapi = ">=0.109.1,<0.112.0"
|
||||
uvicorn = ">=0.23.2,<0.30.0"
|
||||
fire = "^0.5.0"
|
||||
fire = ">=0.5,<0.7"
|
||||
loguru = "^0.7.2"
|
||||
httpx = ">=0.25.1,<0.28.0"
|
||||
cache-to-disk = "^2.0.0"
|
||||
pandas = ">=1.4,<3.0"
|
||||
datasets = "^1.14.0"
|
||||
tabulate = ">=0.8.9,<0.10.0"
|
||||
colorama = "^0.4.4"
|
||||
matplotlib = "^3.4.3"
|
||||
|
||||
[tool.poetry.group.dev.dependencies]
|
||||
black = ">=23.10.1,<25.0.0"
|
||||
@@ -41,7 +44,7 @@ mypy = "^1.6.1"
|
||||
httpx = ">=0.25.1,<0.28.0"
|
||||
pytest = ">=7.4.3,<9.0.0"
|
||||
pre-commit = "^3.5.0"
|
||||
inline-snapshot = "^0.8.0"
|
||||
inline-snapshot = ">=0.8,<0.10"
|
||||
langchain-groq = "^0.1.3"
|
||||
|
||||
[tool.ruff]
|
||||
|
||||
Reference in New Issue
Block a user