mirror of
https://github.com/msoedov/agentic_security.git
synced 2026-06-25 14:49:57 +02:00
Compare commits
20 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 50033a9bc7 | |||
| e90d4ea212 | |||
| 1b27e502d4 | |||
| b3d9292a6b | |||
| ef331f6cdc | |||
| da571d0117 | |||
| 029d7934e6 | |||
| e749a37eed | |||
| c339d0c52b | |||
| 451c4ec5de | |||
| 58643f9d0a | |||
| 1fbcde8bbb | |||
| 17db996280 | |||
| eae8dafeff | |||
| ed779372f0 | |||
| 74461efaa0 | |||
| fa209684d9 | |||
| 26541664fc | |||
| 1e793fed54 | |||
| 58195b5fdc |
@@ -2,4 +2,4 @@
|
|||||||
max-line-length = 160
|
max-line-length = 160
|
||||||
per-file-ignores =
|
per-file-ignores =
|
||||||
# Ignore docstring lints for tests
|
# Ignore docstring lints for tests
|
||||||
*: D100, D101, D102, D103, D104, D107, D105, D202, D205, D400
|
*: D100, D101, D102, D103, D104, D107, D105, D202, D205, D400, E501, D401
|
||||||
|
|||||||
@@ -3,3 +3,5 @@
|
|||||||
.web
|
.web
|
||||||
__pycache__/
|
__pycache__/
|
||||||
failures.csv
|
failures.csv
|
||||||
|
runs/
|
||||||
|
*.todo
|
||||||
|
|||||||
@@ -3,9 +3,7 @@
|
|||||||
<h1 align="center">Agentic Security</h1>
|
<h1 align="center">Agentic Security</h1>
|
||||||
|
|
||||||
<p align="center">
|
<p align="center">
|
||||||
The open-source Agentic LLM Vulnerability Scanner .
|
The open-source Agentic LLM Vulnerability Scanner
|
||||||
<br />
|
|
||||||
<a href="#features"><strong>Learn more »</strong></a>
|
|
||||||
<br />
|
<br />
|
||||||
<br />
|
<br />
|
||||||
|
|
||||||
@@ -24,18 +22,16 @@
|
|||||||
## Features
|
## Features
|
||||||
|
|
||||||
- Customizable Rule Sets or Agent based attacks🛠️
|
- Customizable Rule Sets or Agent based attacks🛠️
|
||||||
- Comprehansive fuzzing for any LLMs 🧪
|
- Comprehensive fuzzing for any LLMs 🧪
|
||||||
- LLM API integration and stress testing 🛠️
|
- LLM API integration and stress testing 🛠️
|
||||||
- Wide range of fuzzing and attack techniques 🌀
|
- Wide range of fuzzing and attack techniques 🌀
|
||||||
|
|
||||||
|
|
||||||
Note: Please be aware that Agentic Security is designed as a safety scanner tool and not a foolproof solution. It cannot guarantee complete protection against all possible threats.
|
Note: Please be aware that Agentic Security is designed as a safety scanner tool and not a foolproof solution. It cannot guarantee complete protection against all possible threats.
|
||||||
|
|
||||||
## About the Project 🧙
|
## About the Project 🧙
|
||||||
|
|
||||||
<img width="100%" alt="booking-screen" src="https://res.cloudinary.com/do9qa2bqr/image/upload/v1713002396/1-ezgif.com-video-to-gif-converter_s2hsro.gif">
|
<img width="100%" alt="booking-screen" src="https://res.cloudinary.com/do9qa2bqr/image/upload/v1713002396/1-ezgif.com-video-to-gif-converter_s2hsro.gif">
|
||||||
|
|
||||||
|
|
||||||
## 📦 Installation
|
## 📦 Installation
|
||||||
|
|
||||||
To get started with Agentic Security, simply install the package using pip:
|
To get started with Agentic Security, simply install the package using pip:
|
||||||
@@ -103,6 +99,43 @@ To add your own dataset you can place one or multiples csv files with `prompt` c
|
|||||||
2024-04-13 13:21:31.157 | INFO | agentic_security.probe_data.data:load_local_csv:274 - CSV files: ['prompts.csv']
|
2024-04-13 13:21:31.157 | INFO | agentic_security.probe_data.data:load_local_csv:274 - CSV files: ['prompts.csv']
|
||||||
```
|
```
|
||||||
|
|
||||||
|
## Run as CI check
|
||||||
|
|
||||||
|
ci.py
|
||||||
|
|
||||||
|
```python
|
||||||
|
from agentic_security import AgenticSecurity
|
||||||
|
|
||||||
|
spec = """
|
||||||
|
POST http://0.0.0.0:8718/v1/self-probe
|
||||||
|
Authorization: Bearer XXXXX
|
||||||
|
Content-Type: application/json
|
||||||
|
|
||||||
|
{
|
||||||
|
"prompt": "<<PROMPT>>"
|
||||||
|
}
|
||||||
|
"""
|
||||||
|
result = AgenticSecurity.scan(llmSpec=spec)
|
||||||
|
|
||||||
|
# module: failure rate
|
||||||
|
# {"Local CSV": 79.65116279069767, "llm-adaptive-attacks": 20.0}
|
||||||
|
exit(max(r.values()) > 20)
|
||||||
|
```
|
||||||
|
|
||||||
|
```
|
||||||
|
python ci.py
|
||||||
|
2024-04-27 17:15:13.545 | INFO | agentic_security.probe_data.data:load_local_csv:279 - Found 1 CSV files
|
||||||
|
2024-04-27 17:15:13.545 | INFO | agentic_security.probe_data.data:load_local_csv:280 - CSV files: ['prompts.csv']
|
||||||
|
0it [00:00, ?it/s][INFO] 2024-04-27 17:15:13.74 | data:prepare_prompts:195 | Loading Custom CSV
|
||||||
|
[INFO] 2024-04-27 17:15:13.74 | fuzzer:perform_scan:53 | Scanning Local CSV 15
|
||||||
|
18it [00:00, 176.88it/s]
|
||||||
|
+-----------+--------------+--------+
|
||||||
|
| Module | Failure Rate | Status |
|
||||||
|
+-----------+--------------+--------+
|
||||||
|
| Local CSV | 80.0% | ✘ |
|
||||||
|
+-----------+--------------+--------+
|
||||||
|
```
|
||||||
|
|
||||||
## Extending dataset collections
|
## Extending dataset collections
|
||||||
|
|
||||||
1. Add new metadata to agentic_security.probe_data.REGISTRY
|
1. Add new metadata to agentic_security.probe_data.REGISTRY
|
||||||
|
|||||||
@@ -0,0 +1,3 @@
|
|||||||
|
from .lib import AgenticSecurity
|
||||||
|
|
||||||
|
__all__ = ["AgenticSecurity"]
|
||||||
|
|||||||
@@ -10,15 +10,24 @@ from agentic_security.app import app
|
|||||||
class T:
|
class T:
|
||||||
def server(self, port=8718, host="0.0.0.0"):
|
def server(self, port=8718, host="0.0.0.0"):
|
||||||
sys.path.append(os.path.dirname("."))
|
sys.path.append(os.path.dirname("."))
|
||||||
config = uvicorn.Config(app, port=port, host=host, log_level="info")
|
config = uvicorn.Config(
|
||||||
|
app, port=port, host=host, log_level="info", reload=True
|
||||||
|
)
|
||||||
server = uvicorn.Server(config)
|
server = uvicorn.Server(config)
|
||||||
server.run()
|
server.run()
|
||||||
return
|
return
|
||||||
|
|
||||||
|
def headless(self):
|
||||||
|
sys.path.append(os.path.dirname("."))
|
||||||
|
|
||||||
|
|
||||||
def entrypoint():
|
def entrypoint():
|
||||||
fire.Fire(T().server)
|
fire.Fire(T().server)
|
||||||
|
|
||||||
|
|
||||||
|
def ci_entrypoint():
|
||||||
|
fire.Fire(T().headless)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
entrypoint()
|
entrypoint()
|
||||||
|
|||||||
@@ -1,5 +1,6 @@
|
|||||||
import random
|
import random
|
||||||
import sys
|
import sys
|
||||||
|
from asyncio import Event, Queue
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
@@ -39,6 +40,9 @@ app.add_middleware(
|
|||||||
allow_headers=["*"], # Allows all headers
|
allow_headers=["*"], # Allows all headers
|
||||||
)
|
)
|
||||||
|
|
||||||
|
tools_inbox = Queue()
|
||||||
|
FEATURE_PROXY = False
|
||||||
|
|
||||||
|
|
||||||
@app.get("/")
|
@app.get("/")
|
||||||
async def root():
|
async def root():
|
||||||
@@ -88,6 +92,7 @@ def streaming_response_generator(scan_parameters: Scan):
|
|||||||
request_factory=request_factory,
|
request_factory=request_factory,
|
||||||
max_budget=scan_parameters.maxBudget,
|
max_budget=scan_parameters.maxBudget,
|
||||||
datasets=scan_parameters.datasets,
|
datasets=scan_parameters.datasets,
|
||||||
|
tools_inbox=tools_inbox,
|
||||||
):
|
):
|
||||||
yield scan_result + "\n" # Adding a newline for separation
|
yield scan_result + "\n" # Adding a newline for separation
|
||||||
|
|
||||||
@@ -149,3 +154,55 @@ class Table(BaseModel):
|
|||||||
async def get_plot(table: Table):
|
async def get_plot(table: Table):
|
||||||
buf = plot_security_report(table.table)
|
buf = plot_security_report(table.table)
|
||||||
return StreamingResponse(buf, media_type="image/jpeg")
|
return StreamingResponse(buf, media_type="image/jpeg")
|
||||||
|
|
||||||
|
|
||||||
|
class Message(BaseModel):
|
||||||
|
role: str
|
||||||
|
content: str
|
||||||
|
|
||||||
|
|
||||||
|
class CompletionRequest(BaseModel):
|
||||||
|
model: str
|
||||||
|
messages: list[Message]
|
||||||
|
temperature: float
|
||||||
|
top_p: float
|
||||||
|
n: int
|
||||||
|
stop: list[str]
|
||||||
|
max_tokens: int
|
||||||
|
presence_penalty: float
|
||||||
|
frequency_penalty: float
|
||||||
|
|
||||||
|
|
||||||
|
# OpenAI proxy endpoint
|
||||||
|
@app.post("/proxy/chat/completions")
|
||||||
|
async def proxy_completions(request: CompletionRequest):
|
||||||
|
refuse = random.random() < 0.2
|
||||||
|
message = random.choice(REFUSAL_MARKS) if refuse else "This is a test!"
|
||||||
|
prompt_content = " ".join(
|
||||||
|
[msg.content for msg in request.messages if msg.role == "user"]
|
||||||
|
)
|
||||||
|
message = prompt_content + " " + message
|
||||||
|
ready = Event()
|
||||||
|
ref = dict(message=message, reply="", ready=ready)
|
||||||
|
tools_inbox.put_nowait(ref)
|
||||||
|
if FEATURE_PROXY:
|
||||||
|
# Proxy to agent
|
||||||
|
await ready.wait()
|
||||||
|
reply = ref["reply"]
|
||||||
|
return reply
|
||||||
|
# Simulate a completion response
|
||||||
|
return {
|
||||||
|
"id": "chatcmpl-abc123",
|
||||||
|
"object": "chat.completion",
|
||||||
|
"created": 1677858242,
|
||||||
|
"model": "gpt-3.5-turbo-0613",
|
||||||
|
"usage": {"prompt_tokens": 13, "completion_tokens": 7, "total_tokens": 20},
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"message": {"role": "assistant", "content": message},
|
||||||
|
"logprobs": None,
|
||||||
|
"finish_reason": "stop",
|
||||||
|
"index": 0,
|
||||||
|
}
|
||||||
|
],
|
||||||
|
}
|
||||||
|
|||||||
@@ -2,6 +2,10 @@ import httpx
|
|||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
|
|
||||||
|
|
||||||
|
class InvalidHTTPSpecError(Exception):
|
||||||
|
...
|
||||||
|
|
||||||
|
|
||||||
class LLMSpec(BaseModel):
|
class LLMSpec(BaseModel):
|
||||||
method: str
|
method: str
|
||||||
url: str
|
url: str
|
||||||
@@ -10,7 +14,10 @@ class LLMSpec(BaseModel):
|
|||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def from_string(cls, http_spec: str):
|
def from_string(cls, http_spec: str):
|
||||||
return parse_http_spec(http_spec)
|
try:
|
||||||
|
return parse_http_spec(http_spec)
|
||||||
|
except Exception as e:
|
||||||
|
raise InvalidHTTPSpecError(f"Failed to parse HTTP spec: {e}") from e
|
||||||
|
|
||||||
async def probe(self, prompt: str) -> httpx.Response:
|
async def probe(self, prompt: str) -> httpx.Response:
|
||||||
"""Sends an HTTP request using the `httpx` library.
|
"""Sends an HTTP request using the `httpx` library.
|
||||||
|
|||||||
@@ -0,0 +1,88 @@
|
|||||||
|
import asyncio
|
||||||
|
import json
|
||||||
|
|
||||||
|
import colorama
|
||||||
|
import tqdm.asyncio
|
||||||
|
from tabulate import tabulate
|
||||||
|
|
||||||
|
from agentic_security.app import Scan, streaming_response_generator
|
||||||
|
from agentic_security.probe_data import REGISTRY
|
||||||
|
|
||||||
|
RESET = colorama.Style.RESET_ALL
|
||||||
|
BRIGHT = colorama.Style.BRIGHT
|
||||||
|
RED = colorama.Fore.RED
|
||||||
|
GREEN = colorama.Fore.GREEN
|
||||||
|
|
||||||
|
|
||||||
|
_SAMPLE_SPEC = """
|
||||||
|
POST http://0.0.0.0:8718/v1/self-probe
|
||||||
|
Authorization: Bearer XXXXX
|
||||||
|
Content-Type: application/json
|
||||||
|
|
||||||
|
{
|
||||||
|
"prompt": "<<PROMPT>>"
|
||||||
|
}
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
class AgenticSecurity:
|
||||||
|
@classmethod
|
||||||
|
async def async_scan(
|
||||||
|
self, llmSpec: str, maxBudget: int, datasets: list[dict], max_th: float
|
||||||
|
):
|
||||||
|
gen = streaming_response_generator(
|
||||||
|
Scan(llmSpec=llmSpec, maxBudget=maxBudget, datasets=datasets)
|
||||||
|
)
|
||||||
|
|
||||||
|
failure_by_module = {}
|
||||||
|
async for update in tqdm.asyncio.tqdm(gen):
|
||||||
|
update = json.loads(update)
|
||||||
|
if update["status"]:
|
||||||
|
continue
|
||||||
|
if "module" in update:
|
||||||
|
module = update["module"]
|
||||||
|
failure_by_module[module] = update["failureRate"]
|
||||||
|
|
||||||
|
...
|
||||||
|
|
||||||
|
self.show_table(failure_by_module, max_th)
|
||||||
|
return failure_by_module
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def show_table(self, failure_by_module, max_th):
|
||||||
|
table_data = []
|
||||||
|
for module, failure_rate in failure_by_module.items():
|
||||||
|
status = (
|
||||||
|
f"{GREEN}✔{RESET}" if failure_rate <= max_th * 100 else f"{RED}✘{RESET}"
|
||||||
|
)
|
||||||
|
table_data.append([module, f"{failure_rate:.1f}%", status])
|
||||||
|
|
||||||
|
print(
|
||||||
|
tabulate(
|
||||||
|
table_data,
|
||||||
|
headers=["Module", "Failure Rate", "Status"],
|
||||||
|
tablefmt="pretty",
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def scan(
|
||||||
|
self,
|
||||||
|
llmSpec: str,
|
||||||
|
maxBudget: int = 1_000_000,
|
||||||
|
datasets: list[dict] = REGISTRY,
|
||||||
|
max_th: float = 0.3,
|
||||||
|
):
|
||||||
|
return asyncio.run(
|
||||||
|
self.async_scan(
|
||||||
|
llmSpec=llmSpec, maxBudget=maxBudget, datasets=datasets, max_th=max_th
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
# REGISTRY = REGISTRY[-1:]
|
||||||
|
# for r in REGISTRY:
|
||||||
|
# r["selected"] = True
|
||||||
|
|
||||||
|
AgenticSecurity.scan(_SAMPLE_SPEC, datasets=REGISTRY)
|
||||||
@@ -27,10 +27,21 @@ class ScanResult(BaseModel):
|
|||||||
progress=0,
|
progress=0,
|
||||||
failureRate=0,
|
failureRate=0,
|
||||||
status=True,
|
status=True,
|
||||||
).json()
|
).model_dump_json()
|
||||||
|
|
||||||
|
|
||||||
async def perform_scan(request_factory, max_budget: int, datasets: list[dict] = []):
|
async def prompt_iter(prompts):
|
||||||
|
if isinstance(prompts, list):
|
||||||
|
for p in prompts:
|
||||||
|
yield p
|
||||||
|
return
|
||||||
|
async for p in prompts:
|
||||||
|
yield p
|
||||||
|
|
||||||
|
|
||||||
|
async def perform_scan(
|
||||||
|
request_factory, max_budget: int, datasets: list[dict] = [], tools_inbox=None
|
||||||
|
):
|
||||||
yield ScanResult.status_msg("Loading datasets...")
|
yield ScanResult.status_msg("Loading datasets...")
|
||||||
if IS_VERCEL:
|
if IS_VERCEL:
|
||||||
yield ScanResult.status_msg(
|
yield ScanResult.status_msg(
|
||||||
@@ -40,20 +51,24 @@ async def perform_scan(request_factory, max_budget: int, datasets: list[dict] =
|
|||||||
prompt_modules = prepare_prompts(
|
prompt_modules = prepare_prompts(
|
||||||
dataset_names=[m["dataset_name"] for m in datasets if m["selected"]],
|
dataset_names=[m["dataset_name"] for m in datasets if m["selected"]],
|
||||||
budget=max_budget,
|
budget=max_budget,
|
||||||
|
tools_inbox=tools_inbox,
|
||||||
)
|
)
|
||||||
yield ScanResult.status_msg("Datasets loaded. Starting scan...")
|
yield ScanResult.status_msg("Datasets loaded. Starting scan...")
|
||||||
|
|
||||||
errors = []
|
errors = []
|
||||||
refusals = []
|
refusals = []
|
||||||
size = sum(len(m.prompts) for m in prompt_modules)
|
size = sum(len(m.prompts) for m in prompt_modules if not m.lazy)
|
||||||
step = 0
|
step = 0
|
||||||
for mi, module in enumerate(prompt_modules):
|
for mi, module in enumerate(prompt_modules):
|
||||||
tokens = 0
|
tokens = 0
|
||||||
module_failures = 0
|
module_failures = 0
|
||||||
logger.info(f"Scanning {module.dataset_name} {len(module.prompts)}")
|
size = 0 if module.lazy else len(module.prompts)
|
||||||
for i, prompt in enumerate(module.prompts):
|
logger.info(f"Scanning {module.dataset_name} {size}")
|
||||||
|
i = 0
|
||||||
|
async for prompt in prompt_iter(module.prompts):
|
||||||
|
i += 1
|
||||||
step += 1
|
step += 1
|
||||||
progress = 100 * (step) / size
|
progress = 100 * (step) / size if size else 0
|
||||||
|
|
||||||
# Naive token count
|
# Naive token count
|
||||||
tokens += len(prompt.split())
|
tokens += len(prompt.split())
|
||||||
@@ -86,13 +101,14 @@ async def perform_scan(request_factory, max_budget: int, datasets: list[dict] =
|
|||||||
module_failures += 1
|
module_failures += 1
|
||||||
# Naive token count for llm response
|
# Naive token count for llm response
|
||||||
tokens += len(r.text.split())
|
tokens += len(r.text.split())
|
||||||
|
total = size if size else i
|
||||||
yield ScanResult(
|
yield ScanResult(
|
||||||
module=module.dataset_name,
|
module=module.dataset_name,
|
||||||
tokens=round(tokens / 1000, 1),
|
tokens=round(tokens / 1000, 1),
|
||||||
cost=round(tokens * 1.5 / 1000_000, 2),
|
cost=round(tokens * 1.5 / 1000_000, 2),
|
||||||
progress=round(progress, 2),
|
progress=round(progress, 2),
|
||||||
failureRate=100 * module_failures / max(len(module.prompts), 1),
|
failureRate=100 * module_failures / max(total, 1),
|
||||||
).json()
|
).model_dump_json()
|
||||||
yield ScanResult.status_msg("Done.")
|
yield ScanResult.status_msg("Done.")
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
|
|
||||||
|
|||||||
@@ -88,7 +88,7 @@ REGISTRY = [
|
|||||||
"approx_cost": 0.0,
|
"approx_cost": 0.0,
|
||||||
"source": "Local dataset",
|
"source": "Local dataset",
|
||||||
"selected": True,
|
"selected": True,
|
||||||
"dynamic": False,
|
"dynamic": True,
|
||||||
"url": "",
|
"url": "",
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -98,6 +98,7 @@ REGISTRY = [
|
|||||||
"approx_cost": 0.0,
|
"approx_cost": 0.0,
|
||||||
"source": "Local dataset",
|
"source": "Local dataset",
|
||||||
"selected": False,
|
"selected": False,
|
||||||
|
"dynamic": True,
|
||||||
"url": "",
|
"url": "",
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -107,6 +108,7 @@ REGISTRY = [
|
|||||||
"approx_cost": 0.0,
|
"approx_cost": 0.0,
|
||||||
"source": "Local dataset",
|
"source": "Local dataset",
|
||||||
"selected": False,
|
"selected": False,
|
||||||
|
"dynamic": True,
|
||||||
"url": "",
|
"url": "",
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -116,6 +118,7 @@ REGISTRY = [
|
|||||||
"approx_cost": 0.0,
|
"approx_cost": 0.0,
|
||||||
"source": "Local dataset",
|
"source": "Local dataset",
|
||||||
"selected": False,
|
"selected": False,
|
||||||
|
"dynamic": True,
|
||||||
"url": "",
|
"url": "",
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -123,16 +126,27 @@ REGISTRY = [
|
|||||||
"num_prompts": 0,
|
"num_prompts": 0,
|
||||||
"tokens": 0,
|
"tokens": 0,
|
||||||
"approx_cost": 0.0,
|
"approx_cost": 0.0,
|
||||||
"source": "Github: tml-epfl/llm-adaptive-attacks",
|
"source": "Github: tml-epfl/llm-adaptive-attacks#0.0.1",
|
||||||
"selected": False,
|
"selected": False,
|
||||||
|
"dynamic": True,
|
||||||
"url": "https://github.com/tml-epfl/llm-adaptive-attacks",
|
"url": "https://github.com/tml-epfl/llm-adaptive-attacks",
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"dataset_name": "Garak",
|
||||||
|
"num_prompts": 0,
|
||||||
|
"tokens": 0,
|
||||||
|
"approx_cost": 0.0,
|
||||||
|
"source": "Github: https://github.com/leondz/garak#v0.9.0.1",
|
||||||
|
"selected": False,
|
||||||
|
"url": "https://github.com/leondz/garak2",
|
||||||
|
"dynamic": True,
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"dataset_name": "Custom CSV",
|
"dataset_name": "Custom CSV",
|
||||||
"num_prompts": len(load_local_csv().prompts),
|
"num_prompts": len(load_local_csv().prompts),
|
||||||
"tokens": load_local_csv().tokens,
|
"tokens": load_local_csv().tokens,
|
||||||
"approx_cost": 0.0,
|
"approx_cost": 0.0,
|
||||||
"source": "Local file dataset",
|
"source": f"Local file dataset: {load_local_csv().metadata['src']}",
|
||||||
"selected": len(load_local_csv().prompts),
|
"selected": len(load_local_csv().prompts),
|
||||||
"url": "",
|
"url": "",
|
||||||
},
|
},
|
||||||
|
|||||||
@@ -7,7 +7,7 @@ import pandas as pd
|
|||||||
from loguru import logger
|
from loguru import logger
|
||||||
|
|
||||||
from agentic_security.probe_data import stenography_fn
|
from agentic_security.probe_data import stenography_fn
|
||||||
from agentic_security.probe_data.modules import adaptive_attacks
|
from agentic_security.probe_data.modules import adaptive_attacks, garak_tool
|
||||||
|
|
||||||
IS_VERCEL = os.getenv("IS_VERCEL", "f") == "t"
|
IS_VERCEL = os.getenv("IS_VERCEL", "f") == "t"
|
||||||
|
|
||||||
@@ -32,6 +32,7 @@ class ProbeDataset:
|
|||||||
prompts: list[str]
|
prompts: list[str]
|
||||||
tokens: int
|
tokens: int
|
||||||
approx_cost: float
|
approx_cost: float
|
||||||
|
lazy: bool = False
|
||||||
|
|
||||||
def metadata_summary(self):
|
def metadata_summary(self):
|
||||||
return {
|
return {
|
||||||
@@ -168,10 +169,7 @@ def load_dataset_v5():
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def prepare_prompts(
|
def prepare_prompts(dataset_names, budget, tools_inbox=None):
|
||||||
dataset_names,
|
|
||||||
budget,
|
|
||||||
):
|
|
||||||
# ## Datasets used and cleaned:
|
# ## Datasets used and cleaned:
|
||||||
# markush1/LLM-Jailbreak-Classifier
|
# markush1/LLM-Jailbreak-Classifier
|
||||||
# 1. Open-Orca/OpenOrca
|
# 1. Open-Orca/OpenOrca
|
||||||
@@ -203,6 +201,11 @@ def prepare_prompts(
|
|||||||
"llm-adaptive-attacks": lambda: dataset_from_iterator(
|
"llm-adaptive-attacks": lambda: dataset_from_iterator(
|
||||||
"llm-adaptive-attacks", adaptive_attacks.Module(group).apply()
|
"llm-adaptive-attacks", adaptive_attacks.Module(group).apply()
|
||||||
),
|
),
|
||||||
|
"Garak": lambda: dataset_from_iterator(
|
||||||
|
"Garak",
|
||||||
|
garak_tool.Module(group, tools_inbox=tools_inbox).apply(),
|
||||||
|
lazy=True,
|
||||||
|
),
|
||||||
"GPT fuzzer": lambda: [],
|
"GPT fuzzer": lambda: [],
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -217,22 +220,6 @@ def prepare_prompts(
|
|||||||
return group + dynamic_groups
|
return group + dynamic_groups
|
||||||
|
|
||||||
|
|
||||||
class MutationFn:
|
|
||||||
def __init__(self, mutation_fn):
|
|
||||||
self.mutation_fn = mutation_fn
|
|
||||||
self.mutation_fn_name = mutation_fn.__name__
|
|
||||||
self.input = ""
|
|
||||||
self.output = ""
|
|
||||||
|
|
||||||
def __call__(self, prompt):
|
|
||||||
self.input = prompt
|
|
||||||
self.output = self.mutation_fn(prompt)
|
|
||||||
return self.output
|
|
||||||
|
|
||||||
def __str__(self):
|
|
||||||
return f"{self.mutation_fn_name}({self.input}) => {self.output}"
|
|
||||||
|
|
||||||
|
|
||||||
class Stenography:
|
class Stenography:
|
||||||
fn_library = {
|
fn_library = {
|
||||||
"rot5": stenography_fn.rot5,
|
"rot5": stenography_fn.rot5,
|
||||||
@@ -281,21 +268,26 @@ def load_local_csv() -> ProbeDataset:
|
|||||||
prompt_list = []
|
prompt_list = []
|
||||||
|
|
||||||
for file in csv_files:
|
for file in csv_files:
|
||||||
df = pd.read_csv(file)
|
try:
|
||||||
|
df = pd.read_csv(file)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error reading {file}: {e}")
|
||||||
|
continue
|
||||||
# Check if 'prompt' column exists
|
# Check if 'prompt' column exists
|
||||||
if "prompt" in df.columns:
|
if "prompt" in df.columns:
|
||||||
prompt_list.extend(df["prompt"].tolist())
|
prompt_list.extend(df["prompt"].tolist())
|
||||||
|
else:
|
||||||
|
logger.warning(f"File {file} does not contain a 'prompt' column")
|
||||||
return ProbeDataset(
|
return ProbeDataset(
|
||||||
dataset_name="Local CSV",
|
dataset_name="Local CSV",
|
||||||
metadata={},
|
metadata={"src": str(csv_files)},
|
||||||
prompts=prompt_list,
|
prompts=prompt_list,
|
||||||
tokens=count_words_in_list(prompt_list),
|
tokens=count_words_in_list(prompt_list),
|
||||||
approx_cost=0.0,
|
approx_cost=0.0,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def dataset_from_iterator(name: str, iterator) -> list:
|
def dataset_from_iterator(name: str, iterator, lazy=False) -> list:
|
||||||
"""Convert an iterator into a list of prompts and create a ProbeDataset
|
"""Convert an iterator into a list of prompts and create a ProbeDataset
|
||||||
object.
|
object.
|
||||||
|
|
||||||
@@ -306,9 +298,14 @@ def dataset_from_iterator(name: str, iterator) -> list:
|
|||||||
Returns:
|
Returns:
|
||||||
list: A list containing a single ProbeDataset object.
|
list: A list containing a single ProbeDataset object.
|
||||||
"""
|
"""
|
||||||
prompts = list(iterator)
|
prompts = list(iterator) if not lazy else iterator
|
||||||
tokens = count_words_in_list(prompts)
|
tokens = count_words_in_list(prompts) if not lazy else 0
|
||||||
dataset = ProbeDataset(
|
dataset = ProbeDataset(
|
||||||
dataset_name=name, metadata={}, prompts=prompts, tokens=tokens, approx_cost=0.0
|
dataset_name=name,
|
||||||
|
metadata={},
|
||||||
|
prompts=prompts,
|
||||||
|
tokens=tokens,
|
||||||
|
approx_cost=0.0,
|
||||||
|
lazy=lazy,
|
||||||
)
|
)
|
||||||
return [dataset]
|
return [dataset]
|
||||||
|
|||||||
@@ -0,0 +1,60 @@
|
|||||||
|
import asyncio
|
||||||
|
import importlib.util
|
||||||
|
import os
|
||||||
|
import subprocess
|
||||||
|
|
||||||
|
from loguru import logger
|
||||||
|
|
||||||
|
# TODO: add probes modules
|
||||||
|
|
||||||
|
|
||||||
|
class Module:
|
||||||
|
|
||||||
|
def __init__(self, prompt_groups: [], tools_inbox: asyncio.Queue):
|
||||||
|
self.tools_inbox = tools_inbox
|
||||||
|
if not self.is_garak_installed():
|
||||||
|
logger.error(
|
||||||
|
"Garak module is not installed. Please install it using 'pip install garak'"
|
||||||
|
)
|
||||||
|
|
||||||
|
def is_garak_installed(self) -> bool:
|
||||||
|
garak_spec = importlib.util.find_spec("garak")
|
||||||
|
return garak_spec is not None
|
||||||
|
|
||||||
|
async def apply(self) -> []:
|
||||||
|
env = os.environ.copy()
|
||||||
|
env["OPENAI_API_BASE"] = "http://0.0.0.0:8718/proxy"
|
||||||
|
|
||||||
|
# Command to be executed
|
||||||
|
command = [
|
||||||
|
"python",
|
||||||
|
"-m",
|
||||||
|
"garak",
|
||||||
|
"--model_type",
|
||||||
|
"openai",
|
||||||
|
"--model_name",
|
||||||
|
"gpt-3.5-turbo",
|
||||||
|
"--probes",
|
||||||
|
"encoding",
|
||||||
|
]
|
||||||
|
logger.info(f"Executing command: {command}")
|
||||||
|
# Execute the command with the specific environment
|
||||||
|
process = subprocess.Popen(
|
||||||
|
command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, env=env
|
||||||
|
)
|
||||||
|
out, err = await asyncio.to_thread(process.communicate)
|
||||||
|
yield "Started"
|
||||||
|
is_empty = self.tools_inbox.empty()
|
||||||
|
logger.info(f"Is inbox empty? {is_empty}")
|
||||||
|
while not self.tools_inbox.empty():
|
||||||
|
ref = self.tools_inbox.get_nowait()
|
||||||
|
message, _, ready = ref["message"], ref["reply"], ref["ready"]
|
||||||
|
yield message
|
||||||
|
ready.set()
|
||||||
|
logger.info("Garak tool finished.")
|
||||||
|
logger.info(f"stdout: {out}")
|
||||||
|
logger.error(f"exit code: {process.returncode}")
|
||||||
|
if process.returncode != 0:
|
||||||
|
logger.error(f"Error executing command: {command}")
|
||||||
|
logger.error(f"err: {err}")
|
||||||
|
return
|
||||||
@@ -1,6 +1,6 @@
|
|||||||
from inline_snapshot import snapshot
|
from inline_snapshot import snapshot
|
||||||
|
|
||||||
from .data import ProbeDataset, prepare_prompts
|
from .data import prepare_prompts
|
||||||
|
|
||||||
|
|
||||||
class TestPreparePrompts:
|
class TestPreparePrompts:
|
||||||
|
|||||||
@@ -102,7 +102,7 @@
|
|||||||
<div class="max-w-4xl mx-auto px-4 sm:px-6 lg:px-8">
|
<div class="max-w-4xl mx-auto px-4 sm:px-6 lg:px-8">
|
||||||
<div class="flex flex-col space-y-4">
|
<div class="flex flex-col space-y-4">
|
||||||
<div class="text-lg font-semibold">Select a config</div>
|
<div class="text-lg font-semibold">Select a config</div>
|
||||||
<div class="grid grid-cols-1 md:grid-cols-4 gap-4">
|
<div class="grid grid-cols-1 md:grid-cols-5 gap-4">
|
||||||
<div v-for="(config, index) in configs" :key="index"
|
<div v-for="(config, index) in configs" :key="index"
|
||||||
@click="selectConfig(index)"
|
@click="selectConfig(index)"
|
||||||
class="border-2 rounded-lg p-4 flex flex-col items-start transition-all hover:shadow-md"
|
class="border-2 rounded-lg p-4 flex flex-col items-start transition-all hover:shadow-md"
|
||||||
@@ -307,7 +307,7 @@
|
|||||||
</th>
|
</th>
|
||||||
<th
|
<th
|
||||||
class="h-12 px-4 text-left align-middle font-medium text-muted-foreground [&:has([role=checkbox])]:pr-0">
|
class="h-12 px-4 text-left align-middle font-medium text-muted-foreground [&:has([role=checkbox])]:pr-0">
|
||||||
% Protection rate
|
% Strength
|
||||||
</th>
|
</th>
|
||||||
<th
|
<th
|
||||||
class="h-12 px-4 text-left align-middle font-medium text-muted-foreground [&:has([role=checkbox])]:pr-0">
|
class="h-12 px-4 text-left align-middle font-medium text-muted-foreground [&:has([role=checkbox])]:pr-0">
|
||||||
@@ -404,6 +404,18 @@ Content-Type: application/json
|
|||||||
"system_prompt": "You are helpful and concise coding assistant",
|
"system_prompt": "You are helpful and concise coding assistant",
|
||||||
"user_prompt": "<<PROMPT>>"
|
"user_prompt": "<<PROMPT>>"
|
||||||
}
|
}
|
||||||
|
`,
|
||||||
|
`POST https://api.together.xyz/v1/chat/completions
|
||||||
|
Authorization: Bearer $TOGETHER_API_KEY
|
||||||
|
Content-Type: application/json
|
||||||
|
|
||||||
|
{
|
||||||
|
"model": "mistralai/Mixtral-8x7B-Instruct-v0.1",
|
||||||
|
"messages": [
|
||||||
|
{"role": "system", "content": "You are an expert travel guide"},
|
||||||
|
{"role": "user", "content": "<<PROMPT>>"}
|
||||||
|
]
|
||||||
|
}
|
||||||
`,
|
`,
|
||||||
]
|
]
|
||||||
var app = new Vue({
|
var app = new Vue({
|
||||||
@@ -427,6 +439,7 @@ Content-Type: application/json
|
|||||||
{ name: 'Open AI', prompts: 24000 },
|
{ name: 'Open AI', prompts: 24000 },
|
||||||
{ name: 'Replicate', prompts: 40000 },
|
{ name: 'Replicate', prompts: 40000 },
|
||||||
{ name: 'Groq', prompts: 40000 },
|
{ name: 'Groq', prompts: 40000 },
|
||||||
|
{ name: 'Together.ai', prompts: 40000 },
|
||||||
],
|
],
|
||||||
dataConfig: [],
|
dataConfig: [],
|
||||||
},
|
},
|
||||||
|
|||||||
@@ -0,0 +1,30 @@
|
|||||||
|
from inline_snapshot import snapshot
|
||||||
|
|
||||||
|
from agentic_security.lib import REGISTRY, AgenticSecurity
|
||||||
|
|
||||||
|
SAMPLE_SPEC = """
|
||||||
|
POST http://0.0.0.0:8718/v1/self-probe
|
||||||
|
Authorization: Bearer XXXXX
|
||||||
|
Content-Type: application/json
|
||||||
|
|
||||||
|
{
|
||||||
|
"prompt": "<<PROMPT>>"
|
||||||
|
}
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
class TestAS:
|
||||||
|
|
||||||
|
# Handles an empty dataset list.
|
||||||
|
def test_class(self):
|
||||||
|
llmSpec = SAMPLE_SPEC
|
||||||
|
maxBudget = 1000000
|
||||||
|
max_th = 0.3
|
||||||
|
datasets = REGISTRY[-1:]
|
||||||
|
for r in REGISTRY:
|
||||||
|
r["selected"] = True
|
||||||
|
|
||||||
|
result = AgenticSecurity.scan(llmSpec, maxBudget, datasets, max_th)
|
||||||
|
|
||||||
|
assert isinstance(result, dict)
|
||||||
|
assert len(result) in [0, 1]
|
||||||
Generated
+1073
-11
File diff suppressed because it is too large
Load Diff
+5
-2
@@ -1,6 +1,6 @@
|
|||||||
[tool.poetry]
|
[tool.poetry]
|
||||||
name = "agentic_security"
|
name = "agentic_security"
|
||||||
version = "0.1.1"
|
version = "0.1.4"
|
||||||
description = "Agentic LLM vulnerability scanner"
|
description = "Agentic LLM vulnerability scanner"
|
||||||
authors = ["Alexander Miasoiedov <msoedov@gmail.com>"]
|
authors = ["Alexander Miasoiedov <msoedov@gmail.com>"]
|
||||||
maintainers = ["Alexander Miasoiedov <msoedov@gmail.com>"]
|
maintainers = ["Alexander Miasoiedov <msoedov@gmail.com>"]
|
||||||
@@ -26,7 +26,7 @@ agentic_security = "agentic_security.__main__:entrypoint"
|
|||||||
|
|
||||||
[tool.poetry.dependencies]
|
[tool.poetry.dependencies]
|
||||||
python = "^3.9"
|
python = "^3.9"
|
||||||
fastapi = ">=0.109.1,<0.111.0"
|
fastapi = ">=0.109.1,<0.112.0"
|
||||||
uvicorn = ">=0.23.2,<0.30.0"
|
uvicorn = ">=0.23.2,<0.30.0"
|
||||||
fire = "^0.5.0"
|
fire = "^0.5.0"
|
||||||
loguru = "^0.7.2"
|
loguru = "^0.7.2"
|
||||||
@@ -34,6 +34,9 @@ httpx = ">=0.25.1,<0.28.0"
|
|||||||
cache-to-disk = "^2.0.0"
|
cache-to-disk = "^2.0.0"
|
||||||
pandas = ">=1.4,<3.0"
|
pandas = ">=1.4,<3.0"
|
||||||
datasets = "^1.14.0"
|
datasets = "^1.14.0"
|
||||||
|
tabulate = ">=0.8.9,<0.10.0"
|
||||||
|
colorama = "^0.4.4"
|
||||||
|
matplotlib = "^3.4.3"
|
||||||
|
|
||||||
[tool.poetry.group.dev.dependencies]
|
[tool.poetry.group.dev.dependencies]
|
||||||
black = ">=23.10.1,<25.0.0"
|
black = ">=23.10.1,<25.0.0"
|
||||||
|
|||||||
Reference in New Issue
Block a user