diff --git a/agentic_security/probe_data/__init__.py b/agentic_security/probe_data/__init__.py index adf2282..ecb04be 100644 --- a/agentic_security/probe_data/__init__.py +++ b/agentic_security/probe_data/__init__.py @@ -5,6 +5,7 @@ REGISTRY_V0 = [ "dataset_name": "simonycl/aya-23-8B_advbench_jailbreak", "num_prompts": 416, "tokens": None, # Add actual token count if available + "is_active": True, "approx_cost": 0.0, "source": "Hugging Face Datasets", "selected": False, @@ -16,6 +17,7 @@ REGISTRY_V0 = [ "dataset_name": "acmc/jailbreaks_dataset_with_perplexity_bigcode_starcoder2-3b_bigcode_starcoder2-7b", "num_prompts": 11191, "tokens": None, # Add actual token count if available + "is_active": True, "approx_cost": 0.0, "source": "Hugging Face Datasets", "selected": False, @@ -27,6 +29,7 @@ REGISTRY_V0 = [ "dataset_name": "karanxa/dolphin-jailbreak-finetuning-dataset", "num_prompts": 42684, "tokens": None, # Add actual token count if available + "is_active": True, "approx_cost": 0.0, "source": "Hugging Face Datasets", "selected": False, @@ -38,6 +41,7 @@ REGISTRY_V0 = [ "dataset_name": "karanxa/llama-2-jailbreak-dataset", "num_prompts": 40613, "tokens": None, # Add actual token count if available + "is_active": True, "approx_cost": 0.0, "source": "Hugging Face Datasets", "selected": False, @@ -49,6 +53,7 @@ REGISTRY_V0 = [ "dataset_name": "karanxa/llama2-uncensored-jailbreak-dataset-finetuning", "num_prompts": 42854, "tokens": None, # Add actual token count if available + "is_active": True, "approx_cost": 0.0, "source": "Hugging Face Datasets", "selected": False, @@ -60,6 +65,7 @@ REGISTRY_V0 = [ "dataset_name": "liuyanchen1015/Llama-3.2-1B_jailbreak_responses", "num_prompts": 9888, "tokens": None, # Add actual token count if available + "is_active": True, "approx_cost": 0.0, "source": "Hugging Face Datasets", "selected": False, @@ -71,6 +77,7 @@ REGISTRY_V0 = [ "dataset_name": "liuyanchen1015/Llama-3.2-1B-Instruct_jailbreak_responses", "num_prompts": 9888, "tokens": None, # Add actual token count if available + "is_active": True, "approx_cost": 0.0, "source": "Hugging Face Datasets", "selected": False, @@ -82,6 +89,7 @@ REGISTRY_V0 = [ "dataset_name": "liuyanchen1015/Llama-3.2-1B-Instruct_jailbreak_responses_with_judgment", "num_prompts": 9888, "tokens": None, # Add actual token count if available + "is_active": True, "approx_cost": 0.0, "source": "Hugging Face Datasets", "selected": False, @@ -93,6 +101,7 @@ REGISTRY_V0 = [ "dataset_name": "jackhhao/jailbreak-classification", "num_prompts": 1044, "tokens": None, # Add actual token count if available + "is_active": True, "approx_cost": 0.0, "source": "Hugging Face Datasets", "selected": False, @@ -104,6 +113,7 @@ REGISTRY_V0 = [ "dataset_name": "markush1/LLM-Jailbreak-Classifier", "num_prompts": 201193, "tokens": None, # Add actual token count if available + "is_active": True, "approx_cost": 0.0, "source": "Hugging Face Datasets", "selected": False, @@ -115,6 +125,7 @@ REGISTRY_V0 = [ "dataset_name": "walledai/JailbreakBench", "num_prompts": 200, "tokens": None, # Add actual token count if available + "is_active": True, "approx_cost": 0.0, "source": "Hugging Face Datasets", "selected": False, @@ -126,6 +137,7 @@ REGISTRY_V0 = [ "dataset_name": "walledai/JailbreakHub", "num_prompts": 15140, "tokens": None, # Add actual token count if available + "is_active": True, "approx_cost": 0.0, "source": "Hugging Face Datasets", "selected": False, @@ -137,6 +149,7 @@ REGISTRY_V0 = [ "dataset_name": "Granther/evil-jailbreak", "num_prompts": 1200, "tokens": None, # Add actual token count if available + "is_active": True, "approx_cost": 0.0, "source": "Hugging Face Datasets", "selected": False, @@ -148,6 +161,7 @@ REGISTRY_V0 = [ "dataset_name": "sevdeawesome/jailbreak_success", "num_prompts": 10800, "tokens": None, # Add actual token count if available + "is_active": True, "approx_cost": 0.0, "source": "Hugging Face Datasets", "selected": False, @@ -159,6 +173,7 @@ REGISTRY_V0 = [ "dataset_name": "IDA-SERICS/Disaster-tweet-jailbreaking", "num_prompts": 3000, "tokens": None, # Add actual token count if available + "is_active": True, "approx_cost": 0.0, "source": "Hugging Face Datasets", "selected": False, @@ -170,6 +185,7 @@ REGISTRY_V0 = [ "dataset_name": "GeorgeDaDude/Jailbreak_Complete_DS_labeled", "num_prompts": 11383, "tokens": None, # Add actual token count if available + "is_active": True, "approx_cost": 0.0, "source": "Hugging Face Datasets", "selected": False, @@ -181,6 +197,7 @@ REGISTRY_V0 = [ "dataset_name": "dayone3nder/jailbreak_prompt_JBB_sft_trainset", "num_prompts": 4785, "tokens": None, # Add actual token count if available + "is_active": True, "approx_cost": 0.0, "source": "Hugging Face Datasets", "selected": False, @@ -192,6 +209,7 @@ REGISTRY_V0 = [ "dataset_name": "dayone3nder/general_safe_mix_jailbreak_prompt_JBB_trainset", "num_prompts": 24679, "tokens": None, # Add actual token count if available + "is_active": True, "approx_cost": 0.0, "source": "Hugging Face Datasets", "selected": False, @@ -206,6 +224,7 @@ REGISTRY = REGISTRY_V0 + [ "dataset_name": "AgenticBackend", "num_prompts": 2000, "tokens": 0, + "is_active": True, "approx_cost": 0.0, "source": "Fine-tuned cloud hosted model", "selected": True, @@ -221,6 +240,7 @@ REGISTRY = REGISTRY_V0 + [ "dataset_name": "ShawnMenz/DAN_jailbreak", "num_prompts": 666, "tokens": 224196, + "is_active": True, "approx_cost": 0.0, "source": "Hugging Face Datasets", "selected": False, @@ -232,6 +252,7 @@ REGISTRY = REGISTRY_V0 + [ "dataset_name": "deepset/prompt-injections", "num_prompts": 203, "tokens": 6988, + "is_active": True, "approx_cost": 0.0, "source": "Hugging Face Datasets", "selected": False, @@ -243,6 +264,7 @@ REGISTRY = REGISTRY_V0 + [ "dataset_name": "rubend18/ChatGPT-Jailbreak-Prompts", "num_prompts": 79, "tokens": 26971, + "is_active": True, "approx_cost": 0.0, "source": "Hugging Face Datasets", "selected": False, @@ -254,6 +276,7 @@ REGISTRY = REGISTRY_V0 + [ "dataset_name": "notrichardren/refuse-to-answer-prompts", "num_prompts": 522, "tokens": 7172, + "is_active": True, "approx_cost": 0.0, "source": "Hugging Face Datasets", "selected": False, @@ -265,6 +288,7 @@ REGISTRY = REGISTRY_V0 + [ "dataset_name": "Lemhf14/EasyJailbreak_Datasets", "num_prompts": 1630, "tokens": 19758, + "is_active": True, "approx_cost": 0.0, "source": "Hugging Face Datasets", "selected": False, @@ -276,6 +300,7 @@ REGISTRY = REGISTRY_V0 + [ "dataset_name": "markush1/LLM-Jailbreak-Classifier", "num_prompts": 1119, "tokens": 19758, + "is_active": True, "approx_cost": 0.0, "source": "Hugging Face Datasets", "selected": False, @@ -287,6 +312,7 @@ REGISTRY = REGISTRY_V0 + [ "dataset_name": "JailbreakV-28K/JailBreakV-28k", "num_prompts": 28300, "tokens": 1975800, + "is_active": True, "approx_cost": 0.0, "source": "Hugging Face Datasets", "selected": False, @@ -298,6 +324,7 @@ REGISTRY = REGISTRY_V0 + [ "dataset_name": "ShawnMenz/jailbreak_sft_rm_ds", "num_prompts": 371000, "tokens": 1975800, + "is_active": True, "approx_cost": 0.0, "source": "Hugging Face Datasets", "selected": False, @@ -309,6 +336,7 @@ REGISTRY = REGISTRY_V0 + [ "dataset_name": "Steganography", "num_prompts": 10, "tokens": 0, + "is_active": True, "approx_cost": 0.0, "source": "Local mutation dataset", "selected": False, @@ -320,6 +348,7 @@ REGISTRY = REGISTRY_V0 + [ "dataset_name": "GPT fuzzer", "num_prompts": 10, "tokens": 0, + "is_active": True, "approx_cost": 0.0, "source": "Local mutation dataset", "selected": False, @@ -331,6 +360,7 @@ REGISTRY = REGISTRY_V0 + [ "dataset_name": "jailbreak_llms/2023_05_07", "num_prompts": 0, "tokens": 0, + "is_active": True, "approx_cost": 0.0, "source": "Github", "selected": False, @@ -342,6 +372,7 @@ REGISTRY = REGISTRY_V0 + [ "dataset_name": "jailbreak_llms/2023_12_25.csv", "num_prompts": 0, "tokens": 0, + "is_active": True, "approx_cost": 0.0, "source": "Github", "selected": False, @@ -353,6 +384,7 @@ REGISTRY = REGISTRY_V0 + [ "dataset_name": "Malwaregen", "num_prompts": 0, "tokens": 0, + "is_active": True, "approx_cost": 0.0, "source": "Local dataset", "selected": False, @@ -364,6 +396,7 @@ REGISTRY = REGISTRY_V0 + [ "dataset_name": "Hallucination", "num_prompts": 0, "tokens": 0, + "is_active": True, "approx_cost": 0.0, "source": "Local dataset", "selected": False, @@ -375,6 +408,7 @@ REGISTRY = REGISTRY_V0 + [ "dataset_name": "DataLeak", "num_prompts": 0, "tokens": 0, + "is_active": True, "approx_cost": 0.0, "source": "Local dataset", "selected": False, @@ -386,6 +420,7 @@ REGISTRY = REGISTRY_V0 + [ "dataset_name": "llm-adaptive-attacks", "num_prompts": 0, "tokens": 0, + "is_active": True, "approx_cost": 0.0, "source": "Github: tml-epfl/llm-adaptive-attacks#0.0.1", "selected": False, @@ -397,6 +432,7 @@ REGISTRY = REGISTRY_V0 + [ "dataset_name": "Garak", "num_prompts": 0, "tokens": 0, + "is_active": True, "approx_cost": 0.0, "source": "Github: https://github.com/leondz/garak#v0.9.0.1", "selected": False, @@ -412,6 +448,7 @@ REGISTRY = REGISTRY_V0 + [ "dataset_name": "Reinforcement Learning Optimization", "num_prompts": 0, "tokens": 0, + "is_active": True, "approx_cost": 0.0, "source": "Cloud hosted model", "selected": False, @@ -427,6 +464,7 @@ REGISTRY = REGISTRY_V0 + [ "dataset_name": "InspectAI", "num_prompts": 0, "tokens": 0, + "is_active": True, "approx_cost": 0.0, "source": "Github: https://github.com/UKGovernmentBEIS/inspect_ai", "selected": False, @@ -439,6 +477,7 @@ REGISTRY = REGISTRY_V0 + [ "num_prompts": len(load_local_csv().prompts), "tokens": load_local_csv().tokens, "approx_cost": 0.0, + "is_active": True, "source": f"Local file dataset: {load_local_csv().metadata['src']}", "selected": len(load_local_csv().prompts), "url": "",