mirror of
https://github.com/msoedov/agentic_security.git
synced 2026-06-25 14:49:57 +02:00
Compare commits
20 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 50033a9bc7 | |||
| e90d4ea212 | |||
| 1b27e502d4 | |||
| b3d9292a6b | |||
| ef331f6cdc | |||
| da571d0117 | |||
| 029d7934e6 | |||
| e749a37eed | |||
| c339d0c52b | |||
| 451c4ec5de | |||
| 58643f9d0a | |||
| 1fbcde8bbb | |||
| 17db996280 | |||
| eae8dafeff | |||
| ed779372f0 | |||
| 74461efaa0 | |||
| fa209684d9 | |||
| 26541664fc | |||
| 1e793fed54 | |||
| 58195b5fdc |
@@ -2,4 +2,4 @@
|
||||
max-line-length = 160
|
||||
per-file-ignores =
|
||||
# Ignore docstring lints for tests
|
||||
*: D100, D101, D102, D103, D104, D107, D105, D202, D205, D400
|
||||
*: D100, D101, D102, D103, D104, D107, D105, D202, D205, D400, E501, D401
|
||||
|
||||
@@ -3,3 +3,5 @@
|
||||
.web
|
||||
__pycache__/
|
||||
failures.csv
|
||||
runs/
|
||||
*.todo
|
||||
|
||||
@@ -3,9 +3,7 @@
|
||||
<h1 align="center">Agentic Security</h1>
|
||||
|
||||
<p align="center">
|
||||
The open-source Agentic LLM Vulnerability Scanner .
|
||||
<br />
|
||||
<a href="#features"><strong>Learn more »</strong></a>
|
||||
The open-source Agentic LLM Vulnerability Scanner
|
||||
<br />
|
||||
<br />
|
||||
|
||||
@@ -24,18 +22,16 @@
|
||||
## Features
|
||||
|
||||
- Customizable Rule Sets or Agent based attacks🛠️
|
||||
- Comprehansive fuzzing for any LLMs 🧪
|
||||
- Comprehensive fuzzing for any LLMs 🧪
|
||||
- LLM API integration and stress testing 🛠️
|
||||
- Wide range of fuzzing and attack techniques 🌀
|
||||
|
||||
|
||||
Note: Please be aware that Agentic Security is designed as a safety scanner tool and not a foolproof solution. It cannot guarantee complete protection against all possible threats.
|
||||
|
||||
## About the Project 🧙
|
||||
|
||||
<img width="100%" alt="booking-screen" src="https://res.cloudinary.com/do9qa2bqr/image/upload/v1713002396/1-ezgif.com-video-to-gif-converter_s2hsro.gif">
|
||||
|
||||
|
||||
## 📦 Installation
|
||||
|
||||
To get started with Agentic Security, simply install the package using pip:
|
||||
@@ -103,6 +99,43 @@ To add your own dataset you can place one or multiples csv files with `prompt` c
|
||||
2024-04-13 13:21:31.157 | INFO | agentic_security.probe_data.data:load_local_csv:274 - CSV files: ['prompts.csv']
|
||||
```
|
||||
|
||||
## Run as CI check
|
||||
|
||||
ci.py
|
||||
|
||||
```python
|
||||
from agentic_security import AgenticSecurity
|
||||
|
||||
spec = """
|
||||
POST http://0.0.0.0:8718/v1/self-probe
|
||||
Authorization: Bearer XXXXX
|
||||
Content-Type: application/json
|
||||
|
||||
{
|
||||
"prompt": "<<PROMPT>>"
|
||||
}
|
||||
"""
|
||||
result = AgenticSecurity.scan(llmSpec=spec)
|
||||
|
||||
# module: failure rate
|
||||
# {"Local CSV": 79.65116279069767, "llm-adaptive-attacks": 20.0}
|
||||
exit(max(r.values()) > 20)
|
||||
```
|
||||
|
||||
```
|
||||
python ci.py
|
||||
2024-04-27 17:15:13.545 | INFO | agentic_security.probe_data.data:load_local_csv:279 - Found 1 CSV files
|
||||
2024-04-27 17:15:13.545 | INFO | agentic_security.probe_data.data:load_local_csv:280 - CSV files: ['prompts.csv']
|
||||
0it [00:00, ?it/s][INFO] 2024-04-27 17:15:13.74 | data:prepare_prompts:195 | Loading Custom CSV
|
||||
[INFO] 2024-04-27 17:15:13.74 | fuzzer:perform_scan:53 | Scanning Local CSV 15
|
||||
18it [00:00, 176.88it/s]
|
||||
+-----------+--------------+--------+
|
||||
| Module | Failure Rate | Status |
|
||||
+-----------+--------------+--------+
|
||||
| Local CSV | 80.0% | ✘ |
|
||||
+-----------+--------------+--------+
|
||||
```
|
||||
|
||||
## Extending dataset collections
|
||||
|
||||
1. Add new metadata to agentic_security.probe_data.REGISTRY
|
||||
|
||||
@@ -0,0 +1,3 @@
|
||||
from .lib import AgenticSecurity
|
||||
|
||||
__all__ = ["AgenticSecurity"]
|
||||
|
||||
@@ -10,15 +10,24 @@ from agentic_security.app import app
|
||||
class T:
|
||||
def server(self, port=8718, host="0.0.0.0"):
|
||||
sys.path.append(os.path.dirname("."))
|
||||
config = uvicorn.Config(app, port=port, host=host, log_level="info")
|
||||
config = uvicorn.Config(
|
||||
app, port=port, host=host, log_level="info", reload=True
|
||||
)
|
||||
server = uvicorn.Server(config)
|
||||
server.run()
|
||||
return
|
||||
|
||||
def headless(self):
|
||||
sys.path.append(os.path.dirname("."))
|
||||
|
||||
|
||||
def entrypoint():
|
||||
fire.Fire(T().server)
|
||||
|
||||
|
||||
def ci_entrypoint():
|
||||
fire.Fire(T().headless)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
entrypoint()
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
import random
|
||||
import sys
|
||||
from asyncio import Event, Queue
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
|
||||
@@ -39,6 +40,9 @@ app.add_middleware(
|
||||
allow_headers=["*"], # Allows all headers
|
||||
)
|
||||
|
||||
tools_inbox = Queue()
|
||||
FEATURE_PROXY = False
|
||||
|
||||
|
||||
@app.get("/")
|
||||
async def root():
|
||||
@@ -88,6 +92,7 @@ def streaming_response_generator(scan_parameters: Scan):
|
||||
request_factory=request_factory,
|
||||
max_budget=scan_parameters.maxBudget,
|
||||
datasets=scan_parameters.datasets,
|
||||
tools_inbox=tools_inbox,
|
||||
):
|
||||
yield scan_result + "\n" # Adding a newline for separation
|
||||
|
||||
@@ -149,3 +154,55 @@ class Table(BaseModel):
|
||||
async def get_plot(table: Table):
|
||||
buf = plot_security_report(table.table)
|
||||
return StreamingResponse(buf, media_type="image/jpeg")
|
||||
|
||||
|
||||
class Message(BaseModel):
|
||||
role: str
|
||||
content: str
|
||||
|
||||
|
||||
class CompletionRequest(BaseModel):
|
||||
model: str
|
||||
messages: list[Message]
|
||||
temperature: float
|
||||
top_p: float
|
||||
n: int
|
||||
stop: list[str]
|
||||
max_tokens: int
|
||||
presence_penalty: float
|
||||
frequency_penalty: float
|
||||
|
||||
|
||||
# OpenAI proxy endpoint
|
||||
@app.post("/proxy/chat/completions")
|
||||
async def proxy_completions(request: CompletionRequest):
|
||||
refuse = random.random() < 0.2
|
||||
message = random.choice(REFUSAL_MARKS) if refuse else "This is a test!"
|
||||
prompt_content = " ".join(
|
||||
[msg.content for msg in request.messages if msg.role == "user"]
|
||||
)
|
||||
message = prompt_content + " " + message
|
||||
ready = Event()
|
||||
ref = dict(message=message, reply="", ready=ready)
|
||||
tools_inbox.put_nowait(ref)
|
||||
if FEATURE_PROXY:
|
||||
# Proxy to agent
|
||||
await ready.wait()
|
||||
reply = ref["reply"]
|
||||
return reply
|
||||
# Simulate a completion response
|
||||
return {
|
||||
"id": "chatcmpl-abc123",
|
||||
"object": "chat.completion",
|
||||
"created": 1677858242,
|
||||
"model": "gpt-3.5-turbo-0613",
|
||||
"usage": {"prompt_tokens": 13, "completion_tokens": 7, "total_tokens": 20},
|
||||
"choices": [
|
||||
{
|
||||
"message": {"role": "assistant", "content": message},
|
||||
"logprobs": None,
|
||||
"finish_reason": "stop",
|
||||
"index": 0,
|
||||
}
|
||||
],
|
||||
}
|
||||
|
||||
@@ -2,6 +2,10 @@ import httpx
|
||||
from pydantic import BaseModel
|
||||
|
||||
|
||||
class InvalidHTTPSpecError(Exception):
|
||||
...
|
||||
|
||||
|
||||
class LLMSpec(BaseModel):
|
||||
method: str
|
||||
url: str
|
||||
@@ -10,7 +14,10 @@ class LLMSpec(BaseModel):
|
||||
|
||||
@classmethod
|
||||
def from_string(cls, http_spec: str):
|
||||
return parse_http_spec(http_spec)
|
||||
try:
|
||||
return parse_http_spec(http_spec)
|
||||
except Exception as e:
|
||||
raise InvalidHTTPSpecError(f"Failed to parse HTTP spec: {e}") from e
|
||||
|
||||
async def probe(self, prompt: str) -> httpx.Response:
|
||||
"""Sends an HTTP request using the `httpx` library.
|
||||
|
||||
@@ -0,0 +1,88 @@
|
||||
import asyncio
|
||||
import json
|
||||
|
||||
import colorama
|
||||
import tqdm.asyncio
|
||||
from tabulate import tabulate
|
||||
|
||||
from agentic_security.app import Scan, streaming_response_generator
|
||||
from agentic_security.probe_data import REGISTRY
|
||||
|
||||
RESET = colorama.Style.RESET_ALL
|
||||
BRIGHT = colorama.Style.BRIGHT
|
||||
RED = colorama.Fore.RED
|
||||
GREEN = colorama.Fore.GREEN
|
||||
|
||||
|
||||
_SAMPLE_SPEC = """
|
||||
POST http://0.0.0.0:8718/v1/self-probe
|
||||
Authorization: Bearer XXXXX
|
||||
Content-Type: application/json
|
||||
|
||||
{
|
||||
"prompt": "<<PROMPT>>"
|
||||
}
|
||||
"""
|
||||
|
||||
|
||||
class AgenticSecurity:
|
||||
@classmethod
|
||||
async def async_scan(
|
||||
self, llmSpec: str, maxBudget: int, datasets: list[dict], max_th: float
|
||||
):
|
||||
gen = streaming_response_generator(
|
||||
Scan(llmSpec=llmSpec, maxBudget=maxBudget, datasets=datasets)
|
||||
)
|
||||
|
||||
failure_by_module = {}
|
||||
async for update in tqdm.asyncio.tqdm(gen):
|
||||
update = json.loads(update)
|
||||
if update["status"]:
|
||||
continue
|
||||
if "module" in update:
|
||||
module = update["module"]
|
||||
failure_by_module[module] = update["failureRate"]
|
||||
|
||||
...
|
||||
|
||||
self.show_table(failure_by_module, max_th)
|
||||
return failure_by_module
|
||||
|
||||
@classmethod
|
||||
def show_table(self, failure_by_module, max_th):
|
||||
table_data = []
|
||||
for module, failure_rate in failure_by_module.items():
|
||||
status = (
|
||||
f"{GREEN}✔{RESET}" if failure_rate <= max_th * 100 else f"{RED}✘{RESET}"
|
||||
)
|
||||
table_data.append([module, f"{failure_rate:.1f}%", status])
|
||||
|
||||
print(
|
||||
tabulate(
|
||||
table_data,
|
||||
headers=["Module", "Failure Rate", "Status"],
|
||||
tablefmt="pretty",
|
||||
)
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def scan(
|
||||
self,
|
||||
llmSpec: str,
|
||||
maxBudget: int = 1_000_000,
|
||||
datasets: list[dict] = REGISTRY,
|
||||
max_th: float = 0.3,
|
||||
):
|
||||
return asyncio.run(
|
||||
self.async_scan(
|
||||
llmSpec=llmSpec, maxBudget=maxBudget, datasets=datasets, max_th=max_th
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# REGISTRY = REGISTRY[-1:]
|
||||
# for r in REGISTRY:
|
||||
# r["selected"] = True
|
||||
|
||||
AgenticSecurity.scan(_SAMPLE_SPEC, datasets=REGISTRY)
|
||||
@@ -27,10 +27,21 @@ class ScanResult(BaseModel):
|
||||
progress=0,
|
||||
failureRate=0,
|
||||
status=True,
|
||||
).json()
|
||||
).model_dump_json()
|
||||
|
||||
|
||||
async def perform_scan(request_factory, max_budget: int, datasets: list[dict] = []):
|
||||
async def prompt_iter(prompts):
|
||||
if isinstance(prompts, list):
|
||||
for p in prompts:
|
||||
yield p
|
||||
return
|
||||
async for p in prompts:
|
||||
yield p
|
||||
|
||||
|
||||
async def perform_scan(
|
||||
request_factory, max_budget: int, datasets: list[dict] = [], tools_inbox=None
|
||||
):
|
||||
yield ScanResult.status_msg("Loading datasets...")
|
||||
if IS_VERCEL:
|
||||
yield ScanResult.status_msg(
|
||||
@@ -40,20 +51,24 @@ async def perform_scan(request_factory, max_budget: int, datasets: list[dict] =
|
||||
prompt_modules = prepare_prompts(
|
||||
dataset_names=[m["dataset_name"] for m in datasets if m["selected"]],
|
||||
budget=max_budget,
|
||||
tools_inbox=tools_inbox,
|
||||
)
|
||||
yield ScanResult.status_msg("Datasets loaded. Starting scan...")
|
||||
|
||||
errors = []
|
||||
refusals = []
|
||||
size = sum(len(m.prompts) for m in prompt_modules)
|
||||
size = sum(len(m.prompts) for m in prompt_modules if not m.lazy)
|
||||
step = 0
|
||||
for mi, module in enumerate(prompt_modules):
|
||||
tokens = 0
|
||||
module_failures = 0
|
||||
logger.info(f"Scanning {module.dataset_name} {len(module.prompts)}")
|
||||
for i, prompt in enumerate(module.prompts):
|
||||
size = 0 if module.lazy else len(module.prompts)
|
||||
logger.info(f"Scanning {module.dataset_name} {size}")
|
||||
i = 0
|
||||
async for prompt in prompt_iter(module.prompts):
|
||||
i += 1
|
||||
step += 1
|
||||
progress = 100 * (step) / size
|
||||
progress = 100 * (step) / size if size else 0
|
||||
|
||||
# Naive token count
|
||||
tokens += len(prompt.split())
|
||||
@@ -86,13 +101,14 @@ async def perform_scan(request_factory, max_budget: int, datasets: list[dict] =
|
||||
module_failures += 1
|
||||
# Naive token count for llm response
|
||||
tokens += len(r.text.split())
|
||||
total = size if size else i
|
||||
yield ScanResult(
|
||||
module=module.dataset_name,
|
||||
tokens=round(tokens / 1000, 1),
|
||||
cost=round(tokens * 1.5 / 1000_000, 2),
|
||||
progress=round(progress, 2),
|
||||
failureRate=100 * module_failures / max(len(module.prompts), 1),
|
||||
).json()
|
||||
failureRate=100 * module_failures / max(total, 1),
|
||||
).model_dump_json()
|
||||
yield ScanResult.status_msg("Done.")
|
||||
import pandas as pd
|
||||
|
||||
|
||||
@@ -88,7 +88,7 @@ REGISTRY = [
|
||||
"approx_cost": 0.0,
|
||||
"source": "Local dataset",
|
||||
"selected": True,
|
||||
"dynamic": False,
|
||||
"dynamic": True,
|
||||
"url": "",
|
||||
},
|
||||
{
|
||||
@@ -98,6 +98,7 @@ REGISTRY = [
|
||||
"approx_cost": 0.0,
|
||||
"source": "Local dataset",
|
||||
"selected": False,
|
||||
"dynamic": True,
|
||||
"url": "",
|
||||
},
|
||||
{
|
||||
@@ -107,6 +108,7 @@ REGISTRY = [
|
||||
"approx_cost": 0.0,
|
||||
"source": "Local dataset",
|
||||
"selected": False,
|
||||
"dynamic": True,
|
||||
"url": "",
|
||||
},
|
||||
{
|
||||
@@ -116,6 +118,7 @@ REGISTRY = [
|
||||
"approx_cost": 0.0,
|
||||
"source": "Local dataset",
|
||||
"selected": False,
|
||||
"dynamic": True,
|
||||
"url": "",
|
||||
},
|
||||
{
|
||||
@@ -123,16 +126,27 @@ REGISTRY = [
|
||||
"num_prompts": 0,
|
||||
"tokens": 0,
|
||||
"approx_cost": 0.0,
|
||||
"source": "Github: tml-epfl/llm-adaptive-attacks",
|
||||
"source": "Github: tml-epfl/llm-adaptive-attacks#0.0.1",
|
||||
"selected": False,
|
||||
"dynamic": True,
|
||||
"url": "https://github.com/tml-epfl/llm-adaptive-attacks",
|
||||
},
|
||||
{
|
||||
"dataset_name": "Garak",
|
||||
"num_prompts": 0,
|
||||
"tokens": 0,
|
||||
"approx_cost": 0.0,
|
||||
"source": "Github: https://github.com/leondz/garak#v0.9.0.1",
|
||||
"selected": False,
|
||||
"url": "https://github.com/leondz/garak2",
|
||||
"dynamic": True,
|
||||
},
|
||||
{
|
||||
"dataset_name": "Custom CSV",
|
||||
"num_prompts": len(load_local_csv().prompts),
|
||||
"tokens": load_local_csv().tokens,
|
||||
"approx_cost": 0.0,
|
||||
"source": "Local file dataset",
|
||||
"source": f"Local file dataset: {load_local_csv().metadata['src']}",
|
||||
"selected": len(load_local_csv().prompts),
|
||||
"url": "",
|
||||
},
|
||||
|
||||
@@ -7,7 +7,7 @@ import pandas as pd
|
||||
from loguru import logger
|
||||
|
||||
from agentic_security.probe_data import stenography_fn
|
||||
from agentic_security.probe_data.modules import adaptive_attacks
|
||||
from agentic_security.probe_data.modules import adaptive_attacks, garak_tool
|
||||
|
||||
IS_VERCEL = os.getenv("IS_VERCEL", "f") == "t"
|
||||
|
||||
@@ -32,6 +32,7 @@ class ProbeDataset:
|
||||
prompts: list[str]
|
||||
tokens: int
|
||||
approx_cost: float
|
||||
lazy: bool = False
|
||||
|
||||
def metadata_summary(self):
|
||||
return {
|
||||
@@ -168,10 +169,7 @@ def load_dataset_v5():
|
||||
)
|
||||
|
||||
|
||||
def prepare_prompts(
|
||||
dataset_names,
|
||||
budget,
|
||||
):
|
||||
def prepare_prompts(dataset_names, budget, tools_inbox=None):
|
||||
# ## Datasets used and cleaned:
|
||||
# markush1/LLM-Jailbreak-Classifier
|
||||
# 1. Open-Orca/OpenOrca
|
||||
@@ -203,6 +201,11 @@ def prepare_prompts(
|
||||
"llm-adaptive-attacks": lambda: dataset_from_iterator(
|
||||
"llm-adaptive-attacks", adaptive_attacks.Module(group).apply()
|
||||
),
|
||||
"Garak": lambda: dataset_from_iterator(
|
||||
"Garak",
|
||||
garak_tool.Module(group, tools_inbox=tools_inbox).apply(),
|
||||
lazy=True,
|
||||
),
|
||||
"GPT fuzzer": lambda: [],
|
||||
}
|
||||
|
||||
@@ -217,22 +220,6 @@ def prepare_prompts(
|
||||
return group + dynamic_groups
|
||||
|
||||
|
||||
class MutationFn:
|
||||
def __init__(self, mutation_fn):
|
||||
self.mutation_fn = mutation_fn
|
||||
self.mutation_fn_name = mutation_fn.__name__
|
||||
self.input = ""
|
||||
self.output = ""
|
||||
|
||||
def __call__(self, prompt):
|
||||
self.input = prompt
|
||||
self.output = self.mutation_fn(prompt)
|
||||
return self.output
|
||||
|
||||
def __str__(self):
|
||||
return f"{self.mutation_fn_name}({self.input}) => {self.output}"
|
||||
|
||||
|
||||
class Stenography:
|
||||
fn_library = {
|
||||
"rot5": stenography_fn.rot5,
|
||||
@@ -281,21 +268,26 @@ def load_local_csv() -> ProbeDataset:
|
||||
prompt_list = []
|
||||
|
||||
for file in csv_files:
|
||||
df = pd.read_csv(file)
|
||||
try:
|
||||
df = pd.read_csv(file)
|
||||
except Exception as e:
|
||||
logger.error(f"Error reading {file}: {e}")
|
||||
continue
|
||||
# Check if 'prompt' column exists
|
||||
if "prompt" in df.columns:
|
||||
prompt_list.extend(df["prompt"].tolist())
|
||||
|
||||
else:
|
||||
logger.warning(f"File {file} does not contain a 'prompt' column")
|
||||
return ProbeDataset(
|
||||
dataset_name="Local CSV",
|
||||
metadata={},
|
||||
metadata={"src": str(csv_files)},
|
||||
prompts=prompt_list,
|
||||
tokens=count_words_in_list(prompt_list),
|
||||
approx_cost=0.0,
|
||||
)
|
||||
|
||||
|
||||
def dataset_from_iterator(name: str, iterator) -> list:
|
||||
def dataset_from_iterator(name: str, iterator, lazy=False) -> list:
|
||||
"""Convert an iterator into a list of prompts and create a ProbeDataset
|
||||
object.
|
||||
|
||||
@@ -306,9 +298,14 @@ def dataset_from_iterator(name: str, iterator) -> list:
|
||||
Returns:
|
||||
list: A list containing a single ProbeDataset object.
|
||||
"""
|
||||
prompts = list(iterator)
|
||||
tokens = count_words_in_list(prompts)
|
||||
prompts = list(iterator) if not lazy else iterator
|
||||
tokens = count_words_in_list(prompts) if not lazy else 0
|
||||
dataset = ProbeDataset(
|
||||
dataset_name=name, metadata={}, prompts=prompts, tokens=tokens, approx_cost=0.0
|
||||
dataset_name=name,
|
||||
metadata={},
|
||||
prompts=prompts,
|
||||
tokens=tokens,
|
||||
approx_cost=0.0,
|
||||
lazy=lazy,
|
||||
)
|
||||
return [dataset]
|
||||
|
||||
@@ -0,0 +1,60 @@
|
||||
import asyncio
|
||||
import importlib.util
|
||||
import os
|
||||
import subprocess
|
||||
|
||||
from loguru import logger
|
||||
|
||||
# TODO: add probes modules
|
||||
|
||||
|
||||
class Module:
|
||||
|
||||
def __init__(self, prompt_groups: [], tools_inbox: asyncio.Queue):
|
||||
self.tools_inbox = tools_inbox
|
||||
if not self.is_garak_installed():
|
||||
logger.error(
|
||||
"Garak module is not installed. Please install it using 'pip install garak'"
|
||||
)
|
||||
|
||||
def is_garak_installed(self) -> bool:
|
||||
garak_spec = importlib.util.find_spec("garak")
|
||||
return garak_spec is not None
|
||||
|
||||
async def apply(self) -> []:
|
||||
env = os.environ.copy()
|
||||
env["OPENAI_API_BASE"] = "http://0.0.0.0:8718/proxy"
|
||||
|
||||
# Command to be executed
|
||||
command = [
|
||||
"python",
|
||||
"-m",
|
||||
"garak",
|
||||
"--model_type",
|
||||
"openai",
|
||||
"--model_name",
|
||||
"gpt-3.5-turbo",
|
||||
"--probes",
|
||||
"encoding",
|
||||
]
|
||||
logger.info(f"Executing command: {command}")
|
||||
# Execute the command with the specific environment
|
||||
process = subprocess.Popen(
|
||||
command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, env=env
|
||||
)
|
||||
out, err = await asyncio.to_thread(process.communicate)
|
||||
yield "Started"
|
||||
is_empty = self.tools_inbox.empty()
|
||||
logger.info(f"Is inbox empty? {is_empty}")
|
||||
while not self.tools_inbox.empty():
|
||||
ref = self.tools_inbox.get_nowait()
|
||||
message, _, ready = ref["message"], ref["reply"], ref["ready"]
|
||||
yield message
|
||||
ready.set()
|
||||
logger.info("Garak tool finished.")
|
||||
logger.info(f"stdout: {out}")
|
||||
logger.error(f"exit code: {process.returncode}")
|
||||
if process.returncode != 0:
|
||||
logger.error(f"Error executing command: {command}")
|
||||
logger.error(f"err: {err}")
|
||||
return
|
||||
@@ -1,6 +1,6 @@
|
||||
from inline_snapshot import snapshot
|
||||
|
||||
from .data import ProbeDataset, prepare_prompts
|
||||
from .data import prepare_prompts
|
||||
|
||||
|
||||
class TestPreparePrompts:
|
||||
|
||||
@@ -102,7 +102,7 @@
|
||||
<div class="max-w-4xl mx-auto px-4 sm:px-6 lg:px-8">
|
||||
<div class="flex flex-col space-y-4">
|
||||
<div class="text-lg font-semibold">Select a config</div>
|
||||
<div class="grid grid-cols-1 md:grid-cols-4 gap-4">
|
||||
<div class="grid grid-cols-1 md:grid-cols-5 gap-4">
|
||||
<div v-for="(config, index) in configs" :key="index"
|
||||
@click="selectConfig(index)"
|
||||
class="border-2 rounded-lg p-4 flex flex-col items-start transition-all hover:shadow-md"
|
||||
@@ -307,7 +307,7 @@
|
||||
</th>
|
||||
<th
|
||||
class="h-12 px-4 text-left align-middle font-medium text-muted-foreground [&:has([role=checkbox])]:pr-0">
|
||||
% Protection rate
|
||||
% Strength
|
||||
</th>
|
||||
<th
|
||||
class="h-12 px-4 text-left align-middle font-medium text-muted-foreground [&:has([role=checkbox])]:pr-0">
|
||||
@@ -404,6 +404,18 @@ Content-Type: application/json
|
||||
"system_prompt": "You are helpful and concise coding assistant",
|
||||
"user_prompt": "<<PROMPT>>"
|
||||
}
|
||||
`,
|
||||
`POST https://api.together.xyz/v1/chat/completions
|
||||
Authorization: Bearer $TOGETHER_API_KEY
|
||||
Content-Type: application/json
|
||||
|
||||
{
|
||||
"model": "mistralai/Mixtral-8x7B-Instruct-v0.1",
|
||||
"messages": [
|
||||
{"role": "system", "content": "You are an expert travel guide"},
|
||||
{"role": "user", "content": "<<PROMPT>>"}
|
||||
]
|
||||
}
|
||||
`,
|
||||
]
|
||||
var app = new Vue({
|
||||
@@ -427,6 +439,7 @@ Content-Type: application/json
|
||||
{ name: 'Open AI', prompts: 24000 },
|
||||
{ name: 'Replicate', prompts: 40000 },
|
||||
{ name: 'Groq', prompts: 40000 },
|
||||
{ name: 'Together.ai', prompts: 40000 },
|
||||
],
|
||||
dataConfig: [],
|
||||
},
|
||||
|
||||
@@ -0,0 +1,30 @@
|
||||
from inline_snapshot import snapshot
|
||||
|
||||
from agentic_security.lib import REGISTRY, AgenticSecurity
|
||||
|
||||
SAMPLE_SPEC = """
|
||||
POST http://0.0.0.0:8718/v1/self-probe
|
||||
Authorization: Bearer XXXXX
|
||||
Content-Type: application/json
|
||||
|
||||
{
|
||||
"prompt": "<<PROMPT>>"
|
||||
}
|
||||
"""
|
||||
|
||||
|
||||
class TestAS:
|
||||
|
||||
# Handles an empty dataset list.
|
||||
def test_class(self):
|
||||
llmSpec = SAMPLE_SPEC
|
||||
maxBudget = 1000000
|
||||
max_th = 0.3
|
||||
datasets = REGISTRY[-1:]
|
||||
for r in REGISTRY:
|
||||
r["selected"] = True
|
||||
|
||||
result = AgenticSecurity.scan(llmSpec, maxBudget, datasets, max_th)
|
||||
|
||||
assert isinstance(result, dict)
|
||||
assert len(result) in [0, 1]
|
||||
Generated
+1073
-11
File diff suppressed because it is too large
Load Diff
+5
-2
@@ -1,6 +1,6 @@
|
||||
[tool.poetry]
|
||||
name = "agentic_security"
|
||||
version = "0.1.1"
|
||||
version = "0.1.4"
|
||||
description = "Agentic LLM vulnerability scanner"
|
||||
authors = ["Alexander Miasoiedov <msoedov@gmail.com>"]
|
||||
maintainers = ["Alexander Miasoiedov <msoedov@gmail.com>"]
|
||||
@@ -26,7 +26,7 @@ agentic_security = "agentic_security.__main__:entrypoint"
|
||||
|
||||
[tool.poetry.dependencies]
|
||||
python = "^3.9"
|
||||
fastapi = ">=0.109.1,<0.111.0"
|
||||
fastapi = ">=0.109.1,<0.112.0"
|
||||
uvicorn = ">=0.23.2,<0.30.0"
|
||||
fire = "^0.5.0"
|
||||
loguru = "^0.7.2"
|
||||
@@ -34,6 +34,9 @@ httpx = ">=0.25.1,<0.28.0"
|
||||
cache-to-disk = "^2.0.0"
|
||||
pandas = ">=1.4,<3.0"
|
||||
datasets = "^1.14.0"
|
||||
tabulate = ">=0.8.9,<0.10.0"
|
||||
colorama = "^0.4.4"
|
||||
matplotlib = "^3.4.3"
|
||||
|
||||
[tool.poetry.group.dev.dependencies]
|
||||
black = ">=23.10.1,<25.0.0"
|
||||
|
||||
Reference in New Issue
Block a user