From e2cb9093298b78f75fecf8574ab1739785842c4b Mon Sep 17 00:00:00 2001 From: Alexander Myasoedov Date: Sun, 9 Mar 2025 17:10:14 +0200 Subject: [PATCH] refactor(data module): --- agentic_security/probe_data/data.py | 671 ++++++++++++---------------- 1 file changed, 281 insertions(+), 390 deletions(-) diff --git a/agentic_security/probe_data/data.py b/agentic_security/probe_data/data.py index 4c414d7..b4ffa3e 100644 --- a/agentic_security/probe_data/data.py +++ b/agentic_security/probe_data/data.py @@ -1,7 +1,8 @@ import io import os import random -from functools import lru_cache +from collections.abc import Callable, Iterator +from functools import partial import httpx import pandas as pd @@ -19,281 +20,200 @@ from agentic_security.probe_data.modules import ( rl_model, ) +# Type aliases for clarity +FilterFn = Callable[[pd.Series], bool] +ColumnMappings = dict[str, str] +DatasetLoader = Callable[[], ProbeDataset] -@cache_to_disk() -def load_dataset_general( - dataset_name, - dataset_split="train", - column_mappings=None, - filter_fn=None, - custom_url=None, - additional_metadata=None, -): - """ - Generalized function to load datasets with flexible configurations. - :param dataset_name: Name of the dataset or URL for custom CSVs - :param dataset_split: Split to load from the dataset (e.g., "train") - :param column_mappings: Dictionary mapping dataset columns to expected keys, e.g., {'prompt': 'query'} - :param filter_fn: A filtering function that takes a row and returns True/False - :param custom_url: URL for custom CSV datasets - :param additional_metadata: Additional metadata to include in the ProbeDataset - :return: A ProbeDataset object with the processed data - """ - if custom_url: - logger.info(f"Loading custom CSV dataset from {custom_url}") - r = httpx.get(custom_url) - content = r.content - df = pd.read_csv(io.StringIO(content.decode("utf-8"))) - else: - logger.info(f"Loading dataset {dataset_name} from Hugging Face datasets") - dataset = load_dataset(dataset_name) - df = pd.DataFrame(dataset[dataset_split]) +# Core data loading utilities +def fetch_csv_content(url: str) -> str: + """Fetch CSV content from a URL.""" + response = httpx.get(url) + return response.content.decode("utf-8") - # Apply column mappings if provided - if column_mappings: - df.rename(columns=column_mappings, inplace=True) - # Filter rows if filter_fn is provided +def load_df_from_source(source: str, is_url: bool = False) -> pd.DataFrame: + """Load DataFrame from either URL or Hugging Face dataset.""" + if is_url: + logger.info(f"Loading custom CSV from {source}") + return pd.read_csv(io.StringIO(fetch_csv_content(source))) + logger.info(f"Loading dataset {source} from Hugging Face") + return pd.DataFrame(load_dataset(source)["train"]) + + +def transform_df( + df: pd.DataFrame, + mappings: ColumnMappings | None = None, + filter_fn: FilterFn | None = None, +) -> pd.DataFrame: + """Apply transformations to DataFrame.""" + result = df.copy() + if mappings: + result.rename(columns=mappings, inplace=True) if filter_fn: - df = df[df.apply(filter_fn, axis=1)] - - # Extract prompts - prompts = df[column_mappings.get("prompt", "prompt")].tolist() - - return ProbeDataset( - dataset_name=dataset_name, - metadata=additional_metadata or {}, - prompts=prompts, - tokens=count_words_in_list(prompts), - approx_cost=0.0, - ) + result = result[result.apply(filter_fn, axis=1)] + return result -def count_words_in_list(str_list): - """Calculate the total number of words in a given list of strings. - - :param str_list: List of strings - :return: Total number of words across all strings in the list - """ - total_words = sum(len(str(s).split()) for s in str_list) - return total_words - - -@cache_to_disk() -def load_dataset_v1(): - return load_dataset_general( - dataset_name="ShawnMenz/DAN_jailbreak", - column_mappings={"prompt": "prompt", "jailbreak": "jailbreak"}, - filter_fn=lambda row: row["jailbreak"], - ) - - -@cache_to_disk() -def load_dataset_v2(): - return load_dataset_general( - dataset_name="deepset/prompt-injections", - column_mappings={"prompt": "text", "jailbreak": "label"}, - filter_fn=lambda row: row["label"], - ) - - -@cache_to_disk() -def load_dataset_v4(): - return load_dataset_general( - dataset_name="notrichardren/refuse-to-answer-prompts", - column_mappings={"prompt": "claim", "jailbreak": "label"}, - filter_fn=lambda row: row["label"], - ) - - -@cache_to_disk() -def load_dataset_v3(): - return load_dataset_general( - dataset_name="rubend18/ChatGPT-Jailbreak-Prompts", - column_mappings={"prompt": "Prompt"}, - filter_fn=lambda row: row["label"], - ) - - -@cache_to_disk() -def load_dataset_v6(): - dataset = load_dataset("markush1/LLM-Jailbreak-Classifier") - bad_prompts = [ - p - for p, c in zip(dataset["train"]["prompt"], dataset["train"]["classification"]) - if c == "jailbreak" - ] - return ProbeDataset( - dataset_name="markush1/LLM-Jailbreak-Classifier", - metadata={}, - prompts=bad_prompts, - tokens=count_words_in_list(bad_prompts), - approx_cost=0.0, - ) - - -@cache_to_disk() -def load_dataset_v7(): - splits = { - "mini_JailBreakV_28K": "JailBreakV_28K/mini_JailBreakV_28K.csv", - "JailBreakV_28K": "JailBreakV_28K/JailBreakV_28K.csv", - } - df = pd.read_csv( - "hf://datasets/JailbreakV-28K/JailBreakV-28k/" + splits["JailBreakV_28K"] - ) - bad_prompts = df["jailbreak_query"].tolist() - print(df.shape) - return ProbeDataset( - dataset_name="JailbreakV-28K/JailBreakV-28k", - metadata={}, - prompts=bad_prompts, - tokens=count_words_in_list(bad_prompts), - approx_cost=0.0, - ) - - -@cache_to_disk() -def load_dataset_v8(): - df = pd.read_csv( - "hf://datasets/ShawnMenz/jailbreak_sft_rm_ds/jailbreak_sft_rm_ds.csv", - names=["jailbreak", "prompt"], - ) - filtered = df[df["jailbreak"] == "jailbreak"]["prompt"].tolist() - return ProbeDataset( - dataset_name="JailbreakV-28K/JailBreakV-28k", - metadata={}, - prompts=filtered, - tokens=count_words_in_list(filtered), - approx_cost=0.0, - ) - - -@cache_to_disk() -def load_dataset_v5(): - ds = [] - for c in [ - "AdvBench", - "ForbiddenQuestion", - "MJP", - "MaliciousInstruct", - "QuestionList", - ]: - dataset = load_dataset("Lemhf14/EasyJailbreak_Datasets", c) - bad_prompts = dataset["train"]["query"] - ds.extend(bad_prompts) - - return ProbeDataset( - dataset_name="Lemhf14/EasyJailbreak_Datasets", - metadata={}, - prompts=ds, - tokens=count_words_in_list(ds), - approx_cost=0.0, - ) - - -@cache_to_disk() -def load_generic_csv(url, name, column="prompt", predicator=None): - r = httpx.get(url) - content = r.content - df = pd.read_csv(io.StringIO(content.decode("utf-8"))) - logger.info(f"Loaded {len(df)} prompts from {url}") - filtered_prompts = df[df.apply(predicator, axis=1)][column].tolist() +def create_probe_dataset( + name: str, prompts: list[str], metadata: dict = None +) -> ProbeDataset: + """Create a ProbeDataset from prompts.""" + metadata = metadata or {} return ProbeDataset( dataset_name=name, - metadata={}, - prompts=filtered_prompts, - tokens=count_words_in_list(filtered_prompts), + metadata=metadata, + prompts=prompts, + tokens=sum(len(str(s).split()) for s in prompts), approx_cost=0.0, ) -def prepare_prompts(dataset_names, budget, tools_inbox=None, options=[]): - # ## Datasets used and cleaned: - # markush1/LLM-Jailbreak-Classifier - # 1. Open-Orca/OpenOrca - # 2. ShawnMenz/DAN_jailbreak - # 3. EddyLuo/JailBreakV_28K - # 4. https://raw.githubusercontent.com/verazuo/jailbreak_llms/main/data/jailbreak_prompts.csv - - dataset_map = { - "ShawnMenz/DAN_jailbreak": load_dataset_v1, - "deepset/prompt-injections": load_dataset_v2, - "notrichardren/refuse-to-answer-prompts": load_dataset_v4, - "rubend18/ChatGPT-Jailbreak-Prompts": load_dataset_v3, - "Lemhf14/EasyJailbreak_Datasets": load_dataset_v5, - "markush1/LLM-Jailbreak-Classifier": load_dataset_v6, - "JailbreakV-28K/JailBreakV-28k": load_dataset_v7, - "ShawnMenz/jailbreak_sft_rm_ds": load_dataset_v8, - "verazuo/jailbreak_llms/2023_05_07": lambda: load_generic_csv( - url="https://raw.githubusercontent.com/verazuo/jailbreak_llms/main/data/prompts/jailbreak_prompts_2023_05_07.csv", - name="verazuo/jailbreak_llms/2023_05_07", - column="prompt", - predicator=lambda x: bool(x["jailbreak"]), - ), - "verazuo/jailbreak_llms/2023_12_25.csv": lambda: load_generic_csv( - url="https://raw.githubusercontent.com/verazuo/jailbreak_llms/main/data/prompts/jailbreak_prompts_2023_12_25.csv.csv", - name="verazuo/jailbreak_llms/2023_12_25.csv", - column="prompt", - predicator=lambda x: bool(x["jailbreak"]), - ), - "Custom CSV": load_local_csv, - } - dataset_map.update(dataset_map_generics) - - group = [] - for dataset_name in dataset_names: - if dataset_name in dataset_map: - logger.info(f"Loading {dataset_name}") - try: - group.append(dataset_map[dataset_name]()) - except Exception as e: - logger.error(f"Error loading {dataset_name}: {e}") - - dynamic_datasets = { - "AgenticBackend": lambda opts: dataset_from_iterator( - "AgenticBackend", - fine_tuned.Module(group, tools_inbox=tools_inbox, opts=opts).apply(), - lazy=True, - ), - "Steganography": lambda opts: Stenography(group), - "llm-adaptive-attacks": lambda opts: dataset_from_iterator( - "llm-adaptive-attacks", - adaptive_attacks.Module(group, tools_inbox=tools_inbox, opts=opts).apply(), - ), - "Garak": lambda opts: dataset_from_iterator( - "Garak", - garak_tool.Module(group, tools_inbox=tools_inbox, opts=opts).apply(), - lazy=True, - ), - "Reinforcement Learning Optimization": lambda opts: dataset_from_iterator( - "Reinforcement Learning Optimization", - rl_model.Module(group, tools_inbox=tools_inbox, opts=opts).apply(), - lazy=True, - ), - "InspectAI": lambda opts: dataset_from_iterator( - "InspectAI", - inspect_ai_tool.Module(group, tools_inbox=tools_inbox).apply(), - lazy=True, - ), - "GPT fuzzer": lambda opts: [], - } - - dynamic_groups = [] - options = options or [{} for _ in dataset_names] - for dataset_name, opts in zip(dataset_names, options): - if dataset_name in dynamic_datasets: - logger.info(f"Loading {dataset_name}") - - ds = dynamic_datasets[dataset_name](opts) - - for g in ds: - dynamic_groups.append(g) - return group + dynamic_groups +# Generalized dataset loader +@cache_to_disk() +def load_dataset_generic( + name: str, + mappings: ColumnMappings | None = None, + filter_fn: FilterFn | None = None, + url: str | None = None, + metadata: dict | None = None, +) -> ProbeDataset: + """Load and process a dataset with flexible configuration.""" + df = load_df_from_source(url or name, is_url=bool(url)) + transformed_df = transform_df(df, mappings, filter_fn) + prompt_col = mappings.get("prompt", "prompt") if mappings else "prompt" + prompts = transformed_df[prompt_col].tolist() + return create_probe_dataset(name, prompts, metadata) -class Stenography: - fn_library = { +# Dataset-specific configurations +DATASET_CONFIGS = { + "ShawnMenz/DAN_jailbreak": { + "mappings": {"prompt": "prompt"}, + "filter_fn": lambda row: row["jailbreak"], + }, + "deepset/prompt-injections": { + "mappings": {"prompt": "text"}, + "filter_fn": lambda row: row["label"], + }, + "notrichardren/refuse-to-answer-prompts": { + "mappings": {"prompt": "claim"}, + "filter_fn": lambda row: row["label"], + }, + "rubend18/ChatGPT-Jailbreak-Prompts": { + "mappings": {"prompt": "Prompt"}, + "filter_fn": lambda row: row["label"], + }, + "markush1/LLM-Jailbreak-Classifier": { + "mappings": {"prompt": "prompt"}, + "filter_fn": lambda row: row["classification"] == "jailbreak", + }, + "ShawnMenz/jailbreak_sft_rm_ds": { + "url": "hf://datasets/ShawnMenz/jailbreak_sft_rm_ds/jailbreak_sft_rm_ds.csv", + "mappings": {"prompt": "prompt"}, + "filter_fn": lambda row: row["jailbreak"] == "jailbreak", + }, + "verazuo/jailbreak_llms/2023_05_07": { + "url": "https://raw.githubusercontent.com/verazuo/jailbreak_llms/main/data/prompts/jailbreak_prompts_2023_05_07.csv", + "mappings": {"prompt": "prompt"}, + "filter_fn": lambda row: bool(row["jailbreak"]), + }, + "verazuo/jailbreak_llms/2023_12_25": { + "url": "https://raw.githubusercontent.com/verazuo/jailbreak_llms/main/data/prompts/jailbreak_prompts_2023_12_25.csv", + "mappings": {"prompt": "prompt"}, + "filter_fn": lambda row: bool(row["jailbreak"]), + }, +} + +# Additional generic dataset configurations +DATASET_CONFIGS_GENERICS = { + "simonycl/aya-23-8B_advbench_jailbreak": {"mappings": {"prompt": "prompt"}}, + "acmc/jailbreaks_dataset_with_perplexity_bigcode_starcoder2-3b_bigcode_starcoder2-7b": {}, + "karanxa/dolphin-jailbreak-finetuning-dataset": {"mappings": {"prompt": "text"}}, + "karanxa/llama-2-jailbreak-dataset": {"mappings": {"prompt": "text"}}, + "karanxa/llama2-uncensored-jailbreak-dataset-finetuning": { + "mappings": {"prompt": "text"} + }, + "liuyanchen1015/Llama-3.2-1B_jailbreak_responses": { + "mappings": {"prompt": "jailbreak_prompt_text"} + }, + "liuyanchen1015/Llama-3.2-1B-Instruct_jailbreak_responses": { + "mappings": {"prompt": "jailbreak_prompt_text"} + }, + "liuyanchen1015/Llama-3.2-1B-Instruct_jailbreak_responses_with_judgment": { + "mappings": {"prompt": "jailbreak_prompt_text"} + }, + "jackhhao/jailbreak-classification": {"mappings": {"prompt": "prompt"}}, + "walledai/JailbreakBench": {"mappings": {"prompt": "prompt"}}, + "walledai/JailbreakHub": {"mappings": {"prompt": "prompt"}}, + "Granther/evil-jailbreak": {"mappings": {"prompt": "text"}}, + "sevdeawesome/jailbreak_success": {"mappings": {"prompt": "jailbreak_prompt_text"}}, + "IDA-SERICS/Disaster-tweet-jailbreaking": {"mappings": {"prompt": "prompt_attack"}}, + "GeorgeDaDude/Jailbreak_Complete_DS_labeled": {"mappings": {"prompt": "question"}}, + "dayone3nder/jailbreak_prompt_JBB_sft_trainset": {"mappings": {"prompt": "prompt"}}, + "dayone3nder/general_safe_mix_jailbreak_prompt_JBB_trainset": { + "mappings": {"prompt": "prompt"} + }, +} + + +# Dataset factory +def create_dataset_loader(name: str, config: dict) -> DatasetLoader: + """Create a dataset loader from configuration.""" + return partial( + load_dataset_generic, + name=name, + mappings=config.get("mappings"), + filter_fn=config.get("filter_fn"), + url=config.get("url"), + ) + + +# Specialized loaders +@cache_to_disk() +def load_multi_dataset(name: str, sub_datasets: list[str]) -> ProbeDataset: + """Load and combine multiple sub-datasets.""" + prompts = [] + for sub in sub_datasets: + dataset = load_dataset(name, sub) + prompts.extend(dataset["train"]["query"]) + return create_probe_dataset(f"{name}_combined", prompts) + + +@cache_to_disk() +def load_jailbreak_v28k() -> ProbeDataset: + """Load JailBreakV-28K dataset.""" + df = pd.read_csv("hf://datasets/JailbreakV-28K/JailBreakV-28k/JailBreakV_28K.csv") + prompts = df["jailbreak_query"].tolist() + return create_probe_dataset("JailbreakV-28K/JailBreakV-28k", prompts) + + +@cache_to_disk() +def load_local_csv() -> ProbeDataset: + """Load prompts from local CSV files.""" + csv_files = [f for f in os.listdir(".") if f.endswith(".csv")] + logger.info(f"Found {len(csv_files)} CSV files: {csv_files}") + + prompts = [] + for file in csv_files: + try: + df = pd.read_csv(file) + if "prompt" in df.columns: + prompts.extend(df["prompt"].tolist()) + else: + logger.warning(f"File {file} lacks 'prompt' column") + except Exception as e: + logger.error(f"Error reading {file}: {e}") + + return create_probe_dataset("Local CSV", prompts, {"src": str(csv_files)}) + + +# Stenography transformer +class StenographyTransformer: + """Apply stenography transformations to datasets.""" + + TRANSFORMATIONS = { "rot5": stenography_fn.rot5, "rot13": stenography_fn.rot13, "base64": stenography_fn.base64_encode, @@ -306,71 +226,32 @@ class Stenography: "zigzag_obfuscation": stenography_fn.zigzag_obfuscation, } - def __init__(self, prompt_groups: [ProbeDataset]): - self.prompt_groups = prompt_groups + def __init__(self, datasets: list[ProbeDataset]): + self.datasets = datasets - def __iter__(self): - return self.apply() + def transform(self) -> Iterator[ProbeDataset]: + """Generate transformed datasets.""" + for dataset in self.datasets: + size = len(dataset.prompts) + sample_size = int(size * 0.25) - def apply(self): - for prompt_group in self.prompt_groups: - size = len(prompt_group.prompts) - for name, fn in self.fn_library.items(): - logger.info(f"Applying {name} to {prompt_group.dataset_name}") - selected_prompts = random.sample(prompt_group.prompts, int(size * 0.25)) - prompts = [ - f"Follow the instructions in {name}: {fn(p)}" - for p in selected_prompts + for name, fn in self.TRANSFORMATIONS.items(): + logger.info(f"Applying {name} to {dataset.dataset_name}") + sampled = random.sample(dataset.prompts, sample_size) + transformed = [ + f"Follow the instructions in {name}: {fn(p)}" for p in sampled ] - yield ProbeDataset( - dataset_name=f"stenography.{name}({prompt_group.dataset_name})", - metadata={}, - prompts=prompts, - tokens=count_words_in_list(prompts), - approx_cost=0.0, + yield create_probe_dataset( + f"stenography.{name}({dataset.dataset_name})", transformed ) -@lru_cache -def load_local_csv() -> ProbeDataset: - csv_files = [f for f in os.listdir(".") if f.endswith(".csv")] - logger.info(f"Found {len(csv_files)} CSV files") - logger.info(f"CSV files: {csv_files}") - prompt_list = [] - - for file in csv_files: - try: - df = pd.read_csv(file) - except Exception as e: - logger.error(f"Error reading {file}: {e}") - continue - # Check if 'prompt' column exists - if "prompt" in df.columns: - prompt_list.extend(df["prompt"].tolist()) - else: - logger.warning(f"File {file} does not contain a 'prompt' column") - return ProbeDataset( - dataset_name="Local CSV", - metadata={"src": str(csv_files)}, - prompts=prompt_list, - tokens=count_words_in_list(prompt_list), - approx_cost=0.0, - ) - - -def dataset_from_iterator(name: str, iterator, lazy=False) -> list: - """Convert an iterator into a list of prompts and create a ProbeDataset - object. - - Args: - name (str): The name of the dataset. - iterator (iterator): An iterator that generates prompts. - - Returns: - list: A list containing a single ProbeDataset object. - """ +def dataset_from_iterator( + name: str, iterator, lazy: bool = False +) -> list[ProbeDataset]: + """Convert an iterator into a list of ProbeDataset objects.""" prompts = list(iterator) if not lazy else iterator - tokens = count_words_in_list(prompts) if not lazy else 0 + tokens = sum(len(str(s).split()) for s in prompts) if not lazy else 0 dataset = ProbeDataset( dataset_name=name, metadata={}, @@ -382,75 +263,85 @@ def dataset_from_iterator(name: str, iterator, lazy=False) -> list: return [dataset] -# TODO: refactor this abstraction +# Main dataset preparation +def prepare_prompts( + dataset_names: list[str], + budget: float, + tools_inbox=None, + options: list[dict] = None, +) -> list[ProbeDataset]: + """Prepare datasets based on names and options.""" + # Base dataset loaders + dataset_loaders = { + **{k: create_dataset_loader(k, v) for k, v in DATASET_CONFIGS.items()}, + **{k: create_dataset_loader(k, v) for k, v in DATASET_CONFIGS_GENERICS.items()}, + "Lemhf14/EasyJailbreak_Datasets": partial( + load_multi_dataset, + name="Lemhf14/EasyJailbreak_Datasets", + sub_datasets=[ + "AdvBench", + "ForbiddenQuestion", + "MJP", + "MaliciousInstruct", + "QuestionList", + ], + ), + "JailbreakV-28K/JailBreakV-28k": load_jailbreak_v28k, + "Local CSV": load_local_csv, + } -dataset_map_generics = { - "simonycl/aya-23-8B_advbench_jailbreak": lambda: load_dataset_general( - dataset_name="simonycl/aya-23-8B_advbench_jailbreak", - column_mappings={"prompt": "prompt"}, - ), - "acmc/jailbreaks_dataset_with_perplexity_bigcode_starcoder2-3b_bigcode_starcoder2-7b": lambda: load_dataset_general( - dataset_name="acmc/jailbreaks_dataset_with_perplexity_bigcode_starcoder2-3b_bigcode_starcoder2-7b" - ), - "karanxa/dolphin-jailbreak-finetuning-dataset": lambda: load_dataset_general( - dataset_name="karanxa/dolphin-jailbreak-finetuning-dataset", - column_mappings={"prompt": "text"}, - ), - "karanxa/llama-2-jailbreak-dataset": lambda: load_dataset_general( - dataset_name="karanxa/llama-2-jailbreak-dataset", - column_mappings={"prompt": "text"}, - ), - "karanxa/llama2-uncensored-jailbreak-dataset-finetuning": lambda: load_dataset_general( - dataset_name="karanxa/llama2-uncensored-jailbreak-dataset-finetuning", - column_mappings={"prompt": "text"}, - ), - "liuyanchen1015/Llama-3.2-1B_jailbreak_responses": lambda: load_dataset_general( - dataset_name="liuyanchen1015/Llama-3.2-1B_jailbreak_responses", - column_mappings={"prompt": "jailbreak_prompt_text"}, - ), - "liuyanchen1015/Llama-3.2-1B-Instruct_jailbreak_responses": lambda: load_dataset_general( - dataset_name="liuyanchen1015/Llama-3.2-1B-Instruct_jailbreak_responses", - column_mappings={"prompt": "jailbreak_prompt_text"}, - ), - "liuyanchen1015/Llama-3.2-1B-Instruct_jailbreak_responses_with_judgment": lambda: load_dataset_general( - dataset_name="liuyanchen1015/Llama-3.2-1B-Instruct_jailbreak_responses_with_judgment", - column_mappings={"prompt": "jailbreak_prompt_text"}, - ), - "jackhhao/jailbreak-classification": lambda: load_dataset_general( - dataset_name="jackhhao/jailbreak-classification", - column_mappings={"prompt": "prompt"}, - ), - "markush1/LLM-Jailbreak-Classifier": lambda: load_dataset_general( - dataset_name="markush1/LLM-Jailbreak-Classifier", - column_mappings={"prompt": "prompt"}, - ), - "walledai/JailbreakBench": lambda: load_dataset_general( - dataset_name="walledai/JailbreakBench", column_mappings={"prompt": "prompt"} - ), - "walledai/JailbreakHub": lambda: load_dataset_general( - dataset_name="walledai/JailbreakHub", column_mappings={"prompt": "prompt"} - ), - "Granther/evil-jailbreak": lambda: load_dataset_general( - dataset_name="Granther/evil-jailbreak", column_mappings={"prompt": "text"} - ), - "sevdeawesome/jailbreak_success": lambda: load_dataset_general( - dataset_name="sevdeawesome/jailbreak_success", - column_mappings={"prompt": "jailbreak_prompt_text"}, - ), - "IDA-SERICS/Disaster-tweet-jailbreaking": lambda: load_dataset_general( - dataset_name="IDA-SERICS/Disaster-tweet-jailbreaking", - column_mappings={"prompt": "prompt_attack"}, - ), - "GeorgeDaDude/Jailbreak_Complete_DS_labeled": lambda: load_dataset_general( - dataset_name="GeorgeDaDude/Jailbreak_Complete_DS_labeled", - column_mappings={"prompt": "question"}, - ), - "dayone3nder/jailbreak_prompt_JBB_sft_trainset": lambda: load_dataset_general( - dataset_name="dayone3nder/jailbreak_prompt_JBB_sft_trainset", - column_mappings={"prompt": "prompt"}, - ), - "dayone3nder/general_safe_mix_jailbreak_prompt_JBB_trainset": lambda: load_dataset_general( - dataset_name="dayone3nder/general_safe_mix_jailbreak_prompt_JBB_trainset", - column_mappings={"prompt": "prompt"}, - ), -} + # Dynamic dataset loaders + dynamic_loaders = { + "AgenticBackend": lambda opts: dataset_from_iterator( + "AgenticBackend", + fine_tuned.Module([], tools_inbox=tools_inbox, opts=opts).apply(), + lazy=True, + ), + "Steganography": lambda opts: list(StenographyTransformer([]).transform()), + "llm-adaptive-attacks": lambda opts: dataset_from_iterator( + "llm-adaptive-attacks", + adaptive_attacks.Module([], tools_inbox=tools_inbox, opts=opts).apply(), + ), + "Garak": lambda opts: dataset_from_iterator( + "Garak", + garak_tool.Module([], tools_inbox=tools_inbox, opts=opts).apply(), + lazy=True, + ), + "Reinforcement Learning Optimization": lambda opts: dataset_from_iterator( + "Reinforcement Learning Optimization", + rl_model.Module([], tools_inbox=tools_inbox, opts=opts).apply(), + lazy=True, + ), + "InspectAI": lambda opts: dataset_from_iterator( + "InspectAI", + inspect_ai_tool.Module([], tools_inbox=tools_inbox).apply(), + lazy=True, + ), + "GPT fuzzer": lambda opts: [], + } + + options = options or [{} for _ in dataset_names] + datasets = [] + + # Load base datasets + for name, opts in zip(dataset_names, options): + if name in dataset_loaders: + logger.info(f"Loading base dataset {name}") + try: + datasets.append(dataset_loaders[name]()) + except Exception as e: + logger.error(f"Error loading {name}: {e}") + + # Load dynamic datasets and apply transformations + for name, opts in zip(dataset_names, options): + if name in dynamic_loaders: + logger.info(f"Loading dynamic dataset {name}") + try: + dynamic_result = dynamic_loaders[name](opts) + datasets.extend(dynamic_result) + except Exception as e: + logger.error(f"Error loading dynamic {name}: {e}") + elif name == "Steganography": + datasets.extend(list(StenographyTransformer(datasets).transform())) + + return datasets