mirror of
https://github.com/msoedov/agentic_security.git
synced 2026-06-23 21:59:57 +02:00
feat(Add new datasets; v0.1.6):
This commit is contained in:
@@ -26,7 +26,6 @@
|
||||
- LLM API integration and stress testing 🛠️
|
||||
- Wide range of fuzzing and attack techniques 🌀
|
||||
|
||||
|
||||
| Tool | Source | Integrated |
|
||||
|-------------------------|-------------------------------------------------------------------------------|------------|
|
||||
| Garak | [leondz/garak](https://github.com/leondz/garak) | ✅ |
|
||||
@@ -35,11 +34,8 @@
|
||||
| Custom Huggingface Datasets | markush1/LLM-Jailbreak-Classifier | ✅ |
|
||||
| Local CSV Datasets | - | ✅ |
|
||||
|
||||
|
||||
|
||||
Note: Please be aware that Agentic Security is designed as a safety scanner tool and not a foolproof solution. It cannot guarantee complete protection against all possible threats.
|
||||
|
||||
|
||||
## 📦 Installation
|
||||
|
||||
To get started with Agentic Security, simply install the package using pip:
|
||||
@@ -73,7 +69,6 @@ agentic_security --port=PORT --host=HOST
|
||||
|
||||
## UI 🧙
|
||||
|
||||
|
||||
<img width="100%" alt="booking-screen" src="https://res.cloudinary.com/do9qa2bqr/image/upload/v1713002396/1-ezgif.com-video-to-gif-converter_s2hsro.gif">
|
||||
|
||||
## LLM kwargs
|
||||
@@ -305,11 +300,7 @@ Agentic Security is released under the Apache License v2.
|
||||
|
||||
## Contact us
|
||||
|
||||
## 🤝 Schedule a 1-on-1 Session
|
||||
|
||||
<a href="https://cal.com/alexander-myasoedov-go2tfs/30min"><img src="https://cal.com/book-with-cal-dark.svg" alt="Book us with Cal.com"></a>
|
||||
|
||||
Book a 1-on-1 Session with the founders, to discuss any issues, provide feedback, or explore how we can improve agentic_security for you.
|
||||
|
||||
## Repo Activity
|
||||
|
||||
|
||||
@@ -61,6 +61,26 @@ REGISTRY = [
|
||||
"dynamic": False,
|
||||
"url": "https://huggingface.co/markush1/LLM-Jailbreak-Classifier",
|
||||
},
|
||||
{
|
||||
"dataset_name": "JailbreakV-28K/JailBreakV-28k",
|
||||
"num_prompts": 28300,
|
||||
"tokens": 1975800,
|
||||
"approx_cost": 0.0,
|
||||
"source": "Hugging Face Datasets",
|
||||
"selected": True,
|
||||
"dynamic": False,
|
||||
"url": "https://huggingface.co/JailbreakV-28K/JailBreakV-28k",
|
||||
},
|
||||
{
|
||||
"dataset_name": "ShawnMenz/jailbreak_sft_rm_ds",
|
||||
"num_prompts": 371000,
|
||||
"tokens": 1975800,
|
||||
"approx_cost": 0.0,
|
||||
"source": "Hugging Face Datasets",
|
||||
"selected": True,
|
||||
"dynamic": False,
|
||||
"url": "https://huggingface.co/ShawnMenz/jailbreak_sft_rm_ds",
|
||||
},
|
||||
{
|
||||
"dataset_name": "Steganography",
|
||||
"num_prompts": 10,
|
||||
@@ -91,6 +111,26 @@ REGISTRY = [
|
||||
"dynamic": True,
|
||||
"url": "",
|
||||
},
|
||||
{
|
||||
"dataset_name": "jailbreak_llms/2023_05_07",
|
||||
"num_prompts": 0,
|
||||
"tokens": 0,
|
||||
"approx_cost": 0.0,
|
||||
"source": "Github",
|
||||
"selected": True,
|
||||
"dynamic": True,
|
||||
"url": "https://github.com/verazuo/jailbreak_llms",
|
||||
},
|
||||
{
|
||||
"dataset_name": "jailbreak_llms/2023_12_25.csv",
|
||||
"num_prompts": 0,
|
||||
"tokens": 0,
|
||||
"approx_cost": 0.0,
|
||||
"source": "Github",
|
||||
"selected": True,
|
||||
"dynamic": True,
|
||||
"url": "https://github.com/verazuo/jailbreak_llms",
|
||||
},
|
||||
{
|
||||
"dataset_name": "Malwaregen",
|
||||
"num_prompts": 0,
|
||||
|
||||
@@ -1,8 +1,10 @@
|
||||
import io
|
||||
import os
|
||||
import random
|
||||
from dataclasses import dataclass
|
||||
from functools import lru_cache
|
||||
|
||||
import httpx
|
||||
import pandas as pd
|
||||
from loguru import logger
|
||||
|
||||
@@ -148,6 +150,44 @@ def load_dataset_v6():
|
||||
)
|
||||
|
||||
|
||||
@cache_to_disk()
|
||||
def load_dataset_v7():
|
||||
|
||||
splits = {
|
||||
"mini_JailBreakV_28K": "JailBreakV_28K/mini_JailBreakV_28K.csv",
|
||||
"JailBreakV_28K": "JailBreakV_28K/JailBreakV_28K.csv",
|
||||
}
|
||||
df = pd.read_csv(
|
||||
"hf://datasets/JailbreakV-28K/JailBreakV-28k/" + splits["JailBreakV_28K"]
|
||||
)
|
||||
bad_prompts = df["jailbreak_query"].tolist()
|
||||
print(df.shape)
|
||||
return ProbeDataset(
|
||||
dataset_name="JailbreakV-28K/JailBreakV-28k",
|
||||
metadata={},
|
||||
prompts=bad_prompts,
|
||||
tokens=count_words_in_list(bad_prompts),
|
||||
approx_cost=0.0,
|
||||
)
|
||||
|
||||
|
||||
@cache_to_disk()
|
||||
def load_dataset_v8():
|
||||
|
||||
df = pd.read_csv(
|
||||
"hf://datasets/ShawnMenz/jailbreak_sft_rm_ds/jailbreak_sft_rm_ds.csv",
|
||||
names=["jailbreak", "prompt"],
|
||||
)
|
||||
filtered = df[df["jailbreak"] == "jailbreak"]["prompt"].tolist()
|
||||
return ProbeDataset(
|
||||
dataset_name="JailbreakV-28K/JailBreakV-28k",
|
||||
metadata={},
|
||||
prompts=filtered,
|
||||
tokens=count_words_in_list(filtered),
|
||||
approx_cost=0.0,
|
||||
)
|
||||
|
||||
|
||||
@cache_to_disk()
|
||||
def load_dataset_v5():
|
||||
from datasets import load_dataset
|
||||
@@ -173,6 +213,22 @@ def load_dataset_v5():
|
||||
)
|
||||
|
||||
|
||||
@cache_to_disk()
|
||||
def load_generic_csv(url, name, column="prompt", predicator=None):
|
||||
r = httpx.get(url)
|
||||
content = r.content
|
||||
df = pd.read_csv(io.StringIO(content.decode("utf-8")))
|
||||
logger.info(f"Loaded {len(df)} prompts from {url}")
|
||||
filtered_prompts = df[df.apply(predicator, axis=1)][column].tolist()
|
||||
return ProbeDataset(
|
||||
dataset_name=name,
|
||||
metadata={},
|
||||
prompts=filtered_prompts,
|
||||
tokens=count_words_in_list(filtered_prompts),
|
||||
approx_cost=0.0,
|
||||
)
|
||||
|
||||
|
||||
def prepare_prompts(dataset_names, budget, tools_inbox=None):
|
||||
# ## Datasets used and cleaned:
|
||||
# markush1/LLM-Jailbreak-Classifier
|
||||
@@ -188,6 +244,20 @@ def prepare_prompts(dataset_names, budget, tools_inbox=None):
|
||||
"rubend18/ChatGPT-Jailbreak-Prompts": load_dataset_v3,
|
||||
"Lemhf14/EasyJailbreak_Datasets": load_dataset_v5,
|
||||
"markush1/LLM-Jailbreak-Classifier": load_dataset_v6,
|
||||
"JailbreakV-28K/JailBreakV-28k": load_dataset_v7,
|
||||
"ShawnMenz/jailbreak_sft_rm_ds": load_dataset_v8,
|
||||
"verazuo/jailbreak_llms/2023_05_07": lambda: load_generic_csv(
|
||||
url="https://raw.githubusercontent.com/verazuo/jailbreak_llms/main/data/prompts/jailbreak_prompts_2023_05_07.csv",
|
||||
name="verazuo/jailbreak_llms/2023_05_07",
|
||||
column="prompt",
|
||||
predicator=lambda x: bool(x["jailbreak"]),
|
||||
),
|
||||
"verazuo/jailbreak_llms/2023_12_25.csv": lambda: load_generic_csv(
|
||||
url="https://raw.githubusercontent.com/verazuo/jailbreak_llms/main/data/prompts/jailbreak_prompts_2023_12_25.csv.csv",
|
||||
name="verazuo/jailbreak_llms/2023_12_25.csv",
|
||||
column="prompt",
|
||||
predicator=lambda x: bool(x["jailbreak"]),
|
||||
),
|
||||
"Custom CSV": load_local_csv,
|
||||
}
|
||||
|
||||
|
||||
+1
-1
@@ -1,6 +1,6 @@
|
||||
[tool.poetry]
|
||||
name = "agentic_security"
|
||||
version = "0.1.5"
|
||||
version = "0.1.6"
|
||||
description = "Agentic LLM vulnerability scanner"
|
||||
authors = ["Alexander Miasoiedov <msoedov@gmail.com>"]
|
||||
maintainers = ["Alexander Miasoiedov <msoedov@gmail.com>"]
|
||||
|
||||
Reference in New Issue
Block a user