feat(add ds):

This commit is contained in:
Alexander Myasoedov
2025-01-07 15:42:32 +02:00
parent 8857842e40
commit cf2f2e255a
6 changed files with 293 additions and 7 deletions
+202 -1
View File
@@ -1,6 +1,207 @@
from .data import load_local_csv
REGISTRY = [
REGISTRY_V0 = [
{
"dataset_name": "simonycl/aya-23-8B_advbench_jailbreak",
"num_prompts": 416,
"tokens": None, # Add actual token count if available
"approx_cost": 0.0,
"source": "Hugging Face Datasets",
"selected": False,
"dynamic": False,
"url": "https://huggingface.co/simonycl/aya-23-8B_advbench_jailbreak",
"modality": "text",
},
{
"dataset_name": "acmc/jailbreaks_dataset_with_perplexity_bigcode_starcoder2-3b_bigcode_starcoder2-7b",
"num_prompts": 11191,
"tokens": None, # Add actual token count if available
"approx_cost": 0.0,
"source": "Hugging Face Datasets",
"selected": False,
"dynamic": False,
"url": "https://huggingface.co/acmc/jailbreaks_dataset_with_perplexity_bigcode_starcoder2-3b_bigcode_starcoder2-7b",
"modality": "text",
},
{
"dataset_name": "karanxa/dolphin-jailbreak-finetuning-dataset",
"num_prompts": 42684,
"tokens": None, # Add actual token count if available
"approx_cost": 0.0,
"source": "Hugging Face Datasets",
"selected": False,
"dynamic": False,
"url": "https://huggingface.co/karanxa/dolphin-jailbreak-finetuning-dataset",
"modality": "text",
},
{
"dataset_name": "karanxa/llama-2-jailbreak-dataset",
"num_prompts": 40613,
"tokens": None, # Add actual token count if available
"approx_cost": 0.0,
"source": "Hugging Face Datasets",
"selected": False,
"dynamic": False,
"url": "https://huggingface.co/karanxa/llama-2-jailbreak-dataset",
"modality": "text",
},
{
"dataset_name": "karanxa/llama2-uncensored-jailbreak-dataset-finetuning",
"num_prompts": 42854,
"tokens": None, # Add actual token count if available
"approx_cost": 0.0,
"source": "Hugging Face Datasets",
"selected": False,
"dynamic": False,
"url": "https://huggingface.co/karanxa/llama2-uncensored-jailbreak-dataset-finetuning",
"modality": "text",
},
{
"dataset_name": "liuyanchen1015/Llama-3.2-1B_jailbreak_responses",
"num_prompts": 9888,
"tokens": None, # Add actual token count if available
"approx_cost": 0.0,
"source": "Hugging Face Datasets",
"selected": False,
"dynamic": False,
"url": "https://huggingface.co/liuyanchen1015/Llama-3.2-1B_jailbreak_responses",
"modality": "text",
},
{
"dataset_name": "liuyanchen1015/Llama-3.2-1B-Instruct_jailbreak_responses",
"num_prompts": 9888,
"tokens": None, # Add actual token count if available
"approx_cost": 0.0,
"source": "Hugging Face Datasets",
"selected": False,
"dynamic": False,
"url": "https://huggingface.co/liuyanchen1015/Llama-3.2-1B-Instruct_jailbreak_responses",
"modality": "text",
},
{
"dataset_name": "liuyanchen1015/Llama-3.2-1B-Instruct_jailbreak_responses_with_judgment",
"num_prompts": 9888,
"tokens": None, # Add actual token count if available
"approx_cost": 0.0,
"source": "Hugging Face Datasets",
"selected": False,
"dynamic": False,
"url": "https://huggingface.co/liuyanchen1015/Llama-3.2-1B-Instruct_jailbreak_responses_with_judgment",
"modality": "text",
},
{
"dataset_name": "jackhhao/jailbreak-classification",
"num_prompts": 1044,
"tokens": None, # Add actual token count if available
"approx_cost": 0.0,
"source": "Hugging Face Datasets",
"selected": False,
"dynamic": False,
"url": "https://huggingface.co/jackhhao/jailbreak-classification",
"modality": "text",
},
{
"dataset_name": "markush1/LLM-Jailbreak-Classifier",
"num_prompts": 201193,
"tokens": None, # Add actual token count if available
"approx_cost": 0.0,
"source": "Hugging Face Datasets",
"selected": False,
"dynamic": False,
"url": "https://huggingface.co/markush1/LLM-Jailbreak-Classifier",
"modality": "text",
},
{
"dataset_name": "walledai/JailbreakBench",
"num_prompts": 200,
"tokens": None, # Add actual token count if available
"approx_cost": 0.0,
"source": "Hugging Face Datasets",
"selected": False,
"dynamic": False,
"url": "https://huggingface.co/walledai/JailbreakBench",
"modality": "text",
},
{
"dataset_name": "walledai/JailbreakHub",
"num_prompts": 15140,
"tokens": None, # Add actual token count if available
"approx_cost": 0.0,
"source": "Hugging Face Datasets",
"selected": False,
"dynamic": False,
"url": "https://huggingface.co/walledai/JailbreakHub",
"modality": "text",
},
{
"dataset_name": "Granther/evil-jailbreak",
"num_prompts": 1200,
"tokens": None, # Add actual token count if available
"approx_cost": 0.0,
"source": "Hugging Face Datasets",
"selected": False,
"dynamic": False,
"url": "https://huggingface.co/Granther/evil-jailbreak",
"modality": "text",
},
{
"dataset_name": "sevdeawesome/jailbreak_success",
"num_prompts": 10800,
"tokens": None, # Add actual token count if available
"approx_cost": 0.0,
"source": "Hugging Face Datasets",
"selected": False,
"dynamic": False,
"url": "https://huggingface.co/sevdeawesome/jailbreak_success",
"modality": "text",
},
{
"dataset_name": "IDA-SERICS/Disaster-tweet-jailbreaking",
"num_prompts": 3000,
"tokens": None, # Add actual token count if available
"approx_cost": 0.0,
"source": "Hugging Face Datasets",
"selected": False,
"dynamic": False,
"url": "https://huggingface.co/IDA-SERICS/Disaster-tweet-jailbreaking",
"modality": "text",
},
{
"dataset_name": "GeorgeDaDude/Jailbreak_Complete_DS_labeled",
"num_prompts": 11383,
"tokens": None, # Add actual token count if available
"approx_cost": 0.0,
"source": "Hugging Face Datasets",
"selected": False,
"dynamic": False,
"url": "https://huggingface.co/GeorgeDaDude/Jailbreak_Complete_DS_labeled",
"modality": "text",
},
{
"dataset_name": "dayone3nder/jailbreak_prompt_JBB_sft_trainset",
"num_prompts": 4785,
"tokens": None, # Add actual token count if available
"approx_cost": 0.0,
"source": "Hugging Face Datasets",
"selected": False,
"dynamic": False,
"url": "https://huggingface.co/dayone3nder/jailbreak_prompt_JBB_sft_trainset",
"modality": "text",
},
{
"dataset_name": "dayone3nder/general_safe_mix_jailbreak_prompt_JBB_trainset",
"num_prompts": 24679,
"tokens": None, # Add actual token count if available
"approx_cost": 0.0,
"source": "Hugging Face Datasets",
"selected": False,
"dynamic": False,
"url": "https://huggingface.co/dayone3nder/general_safe_mix_jailbreak_prompt_JBB_trainset",
"modality": "text",
},
]
REGISTRY = REGISTRY_V0 + [
{
"dataset_name": "AgenticBackend",
"num_prompts": 2000,
@@ -3,6 +3,8 @@ import platform
import subprocess
import uuid
from cache_to_disk import cache_to_disk
def generate_audio_mac_wav(prompt: str) -> bytes:
"""
@@ -43,6 +45,7 @@ def generate_audio_mac_wav(prompt: str) -> bytes:
return audio_bytes
@cache_to_disk()
def generate_audioform(prompt: str) -> bytes:
"""
Generate an audio file from the provided prompt in WAV format.
+72
View File
@@ -238,6 +238,7 @@ def prepare_prompts(dataset_names, budget, tools_inbox=None, options=[]):
),
"Custom CSV": load_local_csv,
}
dataset_map.update(dataset_map_generics)
group = []
for dataset_name in dataset_names:
@@ -376,3 +377,74 @@ def dataset_from_iterator(name: str, iterator, lazy=False) -> list:
# TODO: refactor this abstraction
dataset_map_generics = {
"simonycl/aya-23-8B_advbench_jailbreak": lambda: load_dataset_general(
dataset_name="simonycl/aya-23-8B_advbench_jailbreak",
column_mappings={"prompt": "prompt"},
),
"acmc/jailbreaks_dataset_with_perplexity_bigcode_starcoder2-3b_bigcode_starcoder2-7b": lambda: load_dataset_general(
dataset_name="acmc/jailbreaks_dataset_with_perplexity_bigcode_starcoder2-3b_bigcode_starcoder2-7b"
),
"karanxa/dolphin-jailbreak-finetuning-dataset": lambda: load_dataset_general(
dataset_name="karanxa/dolphin-jailbreak-finetuning-dataset",
column_mappings={"prompt": "text"},
),
"karanxa/llama-2-jailbreak-dataset": lambda: load_dataset_general(
dataset_name="karanxa/llama-2-jailbreak-dataset",
column_mappings={"prompt": "text"},
),
"karanxa/llama2-uncensored-jailbreak-dataset-finetuning": lambda: load_dataset_general(
dataset_name="karanxa/llama2-uncensored-jailbreak-dataset-finetuning",
column_mappings={"prompt": "text"},
),
"liuyanchen1015/Llama-3.2-1B_jailbreak_responses": lambda: load_dataset_general(
dataset_name="liuyanchen1015/Llama-3.2-1B_jailbreak_responses",
column_mappings={"prompt": "jailbreak_prompt_text"},
),
"liuyanchen1015/Llama-3.2-1B-Instruct_jailbreak_responses": lambda: load_dataset_general(
dataset_name="liuyanchen1015/Llama-3.2-1B-Instruct_jailbreak_responses",
column_mappings={"prompt": "jailbreak_prompt_text"},
),
"liuyanchen1015/Llama-3.2-1B-Instruct_jailbreak_responses_with_judgment": lambda: load_dataset_general(
dataset_name="liuyanchen1015/Llama-3.2-1B-Instruct_jailbreak_responses_with_judgment",
column_mappings={"prompt": "jailbreak_prompt_text"},
),
"jackhhao/jailbreak-classification": lambda: load_dataset_general(
dataset_name="jackhhao/jailbreak-classification",
column_mappings={"prompt": "prompt"},
),
"markush1/LLM-Jailbreak-Classifier": lambda: load_dataset_general(
dataset_name="markush1/LLM-Jailbreak-Classifier",
column_mappings={"prompt": "prompt"},
),
"walledai/JailbreakBench": lambda: load_dataset_general(
dataset_name="walledai/JailbreakBench", column_mappings={"prompt": "prompt"}
),
"walledai/JailbreakHub": lambda: load_dataset_general(
dataset_name="walledai/JailbreakHub", column_mappings={"prompt": "prompt"}
),
"Granther/evil-jailbreak": lambda: load_dataset_general(
dataset_name="Granther/evil-jailbreak", column_mappings={"prompt": "text"}
),
"sevdeawesome/jailbreak_success": lambda: load_dataset_general(
dataset_name="sevdeawesome/jailbreak_success",
column_mappings={"prompt": "jailbreak_prompt_text"},
),
"IDA-SERICS/Disaster-tweet-jailbreaking": lambda: load_dataset_general(
dataset_name="IDA-SERICS/Disaster-tweet-jailbreaking",
column_mappings={"prompt": "prompt_attack"},
),
"GeorgeDaDude/Jailbreak_Complete_DS_labeled": lambda: load_dataset_general(
dataset_name="GeorgeDaDude/Jailbreak_Complete_DS_labeled",
column_mappings={"prompt": "question"},
),
"dayone3nder/jailbreak_prompt_JBB_sft_trainset": lambda: load_dataset_general(
dataset_name="dayone3nder/jailbreak_prompt_JBB_sft_trainset",
column_mappings={"prompt": "prompt"},
),
"dayone3nder/general_safe_mix_jailbreak_prompt_JBB_trainset": lambda: load_dataset_general(
dataset_name="dayone3nder/general_safe_mix_jailbreak_prompt_JBB_trainset",
column_mappings={"prompt": "prompt"},
),
}
@@ -1,6 +1,7 @@
import io
import matplotlib.pyplot as plt
from cache_to_disk import cache_to_disk
from tqdm import tqdm
from agentic_security.probe_data.models import ImageProbeDataset, ProbeDataset
@@ -34,6 +35,7 @@ def generate_image_dataset(
return image_datasets
@cache_to_disk()
def generate_image(prompt: str) -> bytes:
"""
Generate an image based on the provided prompt and return it as bytes.
+11 -6
View File
@@ -82,7 +82,7 @@
</label>
<div
v-if="!isFocused"
class="w-full bg-dark-bg text-dark-accent-orange border border-gray-600 rounded-lg p-3 cursor-text"
class="w-full bg-dark-bg text-dark-accent-orange border border-gray-600 rounded-lg p-3 cursor-text mb-5"
@click="focusTextarea"
v-html="highlightedText"></div>
@@ -356,11 +356,16 @@
v-for="(package, index) in dataConfig"
:key="index"
@click="addPackage(index)"
class="border rounded-lg p-3 cursor-pointer transition-all hover:shadow-md"
:class="{'border-dark-accent-green bg-dark-accent-green bg-opacity-20': package.selected, 'border-gray-600': !package.selected}">
<div class="font-medium mb-1">{{ package.dataset_name }}</div>
<div class="text-sm text-gray-400">{{ package.source ||
'Local dataset' }}</div>
class="border rounded-lg p-3 cursor-pointer transition-all hover:shadow-md overflow-hidden"
:class="{
'border-dark-accent-green bg-dark-accent-green bg-opacity-20': package.selected,
'border-gray-600': !package.selected
}">
<div class="font-medium mb-1 truncate">{{ package.dataset_name
}}</div>
<div class="text-sm text-gray-400 truncate">
{{ package.source || 'Local dataset' }}
</div>
<div class="mt-2 text-sm font-semibold">
{{ package.dynamic ? 'Dynamic dataset' :
`${package.num_prompts.toLocaleString()} prompts` }}
+3
View File
@@ -272,6 +272,9 @@ var app = new Vue({
},
adjustHeight(event) {
const element = event.target;
if (!element) {
return
}
// Reset height to ensure accurate measurement
element.style.height = 'auto';
// Adjust height based on scrollHeight