From e5669911ceb4df953ec1b998a3b802d9c63f7055 Mon Sep 17 00:00:00 2001 From: Alexander Myasoedov Date: Fri, 27 Dec 2024 20:38:09 +0200 Subject: [PATCH] feat(add image and audio gen): --- agentic_security/probe_data/__init__.py | 20 +++++ .../probe_data/audio_generator.py | 63 ++++++++++++++++ agentic_security/probe_data/data.py | 20 +---- .../probe_data/image_generator.py | 74 +++++++++++++++++++ agentic_security/probe_data/models.py | 37 ++++++++++ .../probe_data/modules/adaptive_attacks.py | 2 +- .../probe_data/modules/inspect_ai_tool.py | 8 +- .../probe_data/test_audio_generator.py | 27 +++++++ .../probe_data/test_image_generator.py | 37 ++++++++++ agentic_security/test_lib.py | 2 +- poetry.lock | 7 +- 11 files changed, 265 insertions(+), 32 deletions(-) create mode 100644 agentic_security/probe_data/audio_generator.py create mode 100644 agentic_security/probe_data/image_generator.py create mode 100644 agentic_security/probe_data/models.py create mode 100644 agentic_security/probe_data/test_audio_generator.py create mode 100644 agentic_security/probe_data/test_image_generator.py diff --git a/agentic_security/probe_data/__init__.py b/agentic_security/probe_data/__init__.py index 8e40480..8d81f64 100644 --- a/agentic_security/probe_data/__init__.py +++ b/agentic_security/probe_data/__init__.py @@ -10,6 +10,7 @@ REGISTRY = [ "selected": False, "dynamic": False, "url": "https://huggingface.co/ShawnMenz/DAN_jailbreak", + "modality": "text", }, { "dataset_name": "deepset/prompt-injections", @@ -20,6 +21,7 @@ REGISTRY = [ "selected": False, "dynamic": False, "url": "https://huggingface.co/deepset/prompt-injections", + "modality": "text", }, { "dataset_name": "rubend18/ChatGPT-Jailbreak-Prompts", @@ -30,6 +32,7 @@ REGISTRY = [ "selected": False, "dynamic": False, "url": "https://huggingface.co/rubend18/ChatGPT-Jailbreak-Prompts", + "modality": "text", }, { "dataset_name": "notrichardren/refuse-to-answer-prompts", @@ -40,6 +43,7 @@ REGISTRY = [ "selected": False, "dynamic": False, "url": "https://huggingface.co/notrichardren/refuse-to-answer-prompts", + "modality": "text", }, { "dataset_name": "Lemhf14/EasyJailbreak_Datasets", @@ -50,6 +54,7 @@ REGISTRY = [ "selected": False, "dynamic": False, "url": "https://huggingface.co/Lemhf14/EasyJailbreak_Datasets", + "modality": "text", }, { "dataset_name": "markush1/LLM-Jailbreak-Classifier", @@ -60,6 +65,7 @@ REGISTRY = [ "selected": False, "dynamic": False, "url": "https://huggingface.co/markush1/LLM-Jailbreak-Classifier", + "modality": "text", }, { "dataset_name": "JailbreakV-28K/JailBreakV-28k", @@ -70,6 +76,7 @@ REGISTRY = [ "selected": True, "dynamic": False, "url": "https://huggingface.co/JailbreakV-28K/JailBreakV-28k", + "modality": "text", }, { "dataset_name": "ShawnMenz/jailbreak_sft_rm_ds", @@ -80,6 +87,7 @@ REGISTRY = [ "selected": False, "dynamic": False, "url": "https://huggingface.co/ShawnMenz/jailbreak_sft_rm_ds", + "modality": "text", }, { "dataset_name": "Steganography", @@ -90,6 +98,7 @@ REGISTRY = [ "selected": False, "dynamic": True, "url": "", + "modality": "text", }, { "dataset_name": "GPT fuzzer", @@ -100,6 +109,7 @@ REGISTRY = [ "selected": False, "dynamic": True, "url": "", + "modality": "text", }, { "dataset_name": "Agentic Security", @@ -110,6 +120,7 @@ REGISTRY = [ "selected": False, "dynamic": True, "url": "", + "modality": "text", }, { "dataset_name": "jailbreak_llms/2023_05_07", @@ -120,6 +131,7 @@ REGISTRY = [ "selected": False, "dynamic": True, "url": "https://github.com/verazuo/jailbreak_llms", + "modality": "text", }, { "dataset_name": "jailbreak_llms/2023_12_25.csv", @@ -130,6 +142,7 @@ REGISTRY = [ "selected": False, "dynamic": True, "url": "https://github.com/verazuo/jailbreak_llms", + "modality": "text", }, { "dataset_name": "Malwaregen", @@ -140,6 +153,7 @@ REGISTRY = [ "selected": False, "dynamic": True, "url": "", + "modality": "text", }, { "dataset_name": "Hallucination", @@ -150,6 +164,7 @@ REGISTRY = [ "selected": False, "dynamic": True, "url": "", + "modality": "text", }, { "dataset_name": "DataLeak", @@ -160,6 +175,7 @@ REGISTRY = [ "selected": False, "dynamic": True, "url": "", + "modality": "text", }, { "dataset_name": "llm-adaptive-attacks", @@ -170,6 +186,7 @@ REGISTRY = [ "selected": False, "dynamic": True, "url": "https://github.com/tml-epfl/llm-adaptive-attacks", + "modality": "text", }, { "dataset_name": "Garak", @@ -184,6 +201,7 @@ REGISTRY = [ "port": 8718, "modules": ["encoding"], }, + "modality": "text", }, { "dataset_name": "InspectAI", @@ -194,6 +212,7 @@ REGISTRY = [ "selected": False, "url": "https://github.com/UKGovernmentBEIS/inspect_ai", "dynamic": True, + "modality": "text", }, { "dataset_name": "Custom CSV", @@ -203,5 +222,6 @@ REGISTRY = [ "source": f"Local file dataset: {load_local_csv().metadata['src']}", "selected": len(load_local_csv().prompts), "url": "", + "modality": "text", }, ] diff --git a/agentic_security/probe_data/audio_generator.py b/agentic_security/probe_data/audio_generator.py new file mode 100644 index 0000000..d6ccf2d --- /dev/null +++ b/agentic_security/probe_data/audio_generator.py @@ -0,0 +1,63 @@ +import subprocess +import os +import platform +import uuid + + +def generate_audio_mac_wav(prompt: str) -> bytes: + """ + Generate an audio file from the provided prompt using macOS 'say' command + and return it as bytes in WAV format. + + Parameters: + prompt (str): Text to convert into audio. + + Returns: + bytes: The audio data in WAV format. + """ + # Generate unique temporary file paths + temp_aiff_path = f"temp_audio_{uuid.uuid4().hex}.aiff" + temp_wav_path = f"temp_audio_{uuid.uuid4().hex}.wav" + + try: + # Use the 'say' command to generate AIFF audio + subprocess.run(["say", "-o", temp_aiff_path, prompt], check=True) + + # Convert AIFF to WAV using afconvert + subprocess.run( + ["afconvert", "-f", "WAVE", "-d", "LEI16", temp_aiff_path, temp_wav_path], + check=True, + ) + + # Read the WAV file into memory + with open(temp_wav_path, "rb") as f: + audio_bytes = f.read() + finally: + # Clean up the temporary files + if os.path.exists(temp_aiff_path): + os.remove(temp_aiff_path) + if os.path.exists(temp_wav_path): + os.remove(temp_wav_path) + + # Return the audio bytes + return audio_bytes + + +def generate_audioform(prompt: str) -> bytes: + """ + Generate an audio file from the provided prompt in WAV format. + Uses macOS 'say' command if the operating system is macOS. + + Parameters: + prompt (str): Text to convert into audio. + + Returns: + bytes: The audio data in WAV format, or raises an exception if the OS is unsupported. + """ + current_os = platform.system() + if current_os == "Darwin": # macOS + return generate_audio_mac_wav(prompt) + else: + raise NotImplementedError( + "Audio generation is only supported on macOS for now." + ) diff --git a/agentic_security/probe_data/data.py b/agentic_security/probe_data/data.py index c1459bd..60b5039 100644 --- a/agentic_security/probe_data/data.py +++ b/agentic_security/probe_data/data.py @@ -1,7 +1,6 @@ import io import os import random -from dataclasses import dataclass from functools import lru_cache import httpx @@ -10,6 +9,7 @@ from cache_to_disk import cache_to_disk from loguru import logger from agentic_security.probe_data import stenography_fn +from agentic_security.probe_data.models import ProbeDataset from agentic_security.probe_data.modules import ( adaptive_attacks, garak_tool, @@ -17,24 +17,6 @@ from agentic_security.probe_data.modules import ( ) -@dataclass -class ProbeDataset: - dataset_name: str - metadata: dict - prompts: list[str] - tokens: int - approx_cost: float - lazy: bool = False - - def metadata_summary(self): - return { - "dataset_name": self.dataset_name, - "num_prompts": len(self.prompts), - "tokens": self.tokens, - "approx_cost": self.approx_cost, - } - - def count_words_in_list(str_list): """Calculate the total number of words in a given list of strings. diff --git a/agentic_security/probe_data/image_generator.py b/agentic_security/probe_data/image_generator.py new file mode 100644 index 0000000..6f03e3d --- /dev/null +++ b/agentic_security/probe_data/image_generator.py @@ -0,0 +1,74 @@ +import io + +import matplotlib.pyplot as plt +from agentic_security.probe_data.models import ImageProbeDataset, ProbeDataset +from tqdm import tqdm + + +def generate_image_dataset( + text_dataset: list[ProbeDataset], +) -> list[ImageProbeDataset]: + image_datasets = [] + + # Iterate over the text datasets + for dataset in text_dataset: + # Generate images for each prompt in the dataset + + # Add a progress bar to the image generation process + image_prompts = [ + generate_image(prompt) + for prompt in tqdm( + dataset.prompts, desc=f"Generating images for {dataset.dataset_name}" + ) + ] + # Create an ImageProbeDataset instance + image_dataset = ImageProbeDataset( + test_dataset=dataset, + image_prompts=image_prompts, + ) + + # Append the image dataset to the list + image_datasets.append(image_dataset) + + return image_datasets + + +def generate_image(prompt: str) -> bytes: + """ + Generate an image based on the provided prompt and return it as bytes. + + Parameters: + prompt (str): Text to display on the generated image. + + Returns: + bytes: The image data in JPG format. + """ + # Create a matplotlib figure + fig, ax = plt.subplots(figsize=(6, 4)) + + # Customize the plot (background color, text, etc.) + ax.set_facecolor("lightblue") + ax.text( + 0.5, + 0.5, + prompt, + fontsize=16, + ha="center", + va="center", + wrap=True, + color="darkblue", + ) + + # Remove axes for a cleaner look + ax.axis("off") + + # Save the figure to a buffer + buffer = io.BytesIO() + plt.savefig(buffer, format="jpeg", bbox_inches="tight") + buffer.seek(0) # Reset buffer pointer + + # Close the figure to free resources + plt.close(fig) + + # Return the image bytes + return buffer.getvalue() diff --git a/agentic_security/probe_data/models.py b/agentic_security/probe_data/models.py new file mode 100644 index 0000000..ea13d75 --- /dev/null +++ b/agentic_security/probe_data/models.py @@ -0,0 +1,37 @@ +import os +from dataclasses import dataclass + +from tqdm import tqdm + + +@dataclass +class ProbeDataset: + dataset_name: str + metadata: dict + prompts: list[str] + tokens: int + approx_cost: float + lazy: bool = False + + def metadata_summary(self): + return { + "dataset_name": self.dataset_name, + "num_prompts": len(self.prompts), + "tokens": self.tokens, + "approx_cost": self.approx_cost, + } + + +@dataclass +class ImageProbeDataset: + test_dataset: ProbeDataset + image_prompts: list[bytes] + + def save_images(self, output_dir: str): + os.makedirs(output_dir, exist_ok=True) + for index, image_data in enumerate( + tqdm(self.image_prompts, desc="Saving images") + ): + file_path = os.path.join(output_dir, f"image_{index}.png") + with open(file_path, "wb") as image_file: + image_file.write(image_data) diff --git a/agentic_security/probe_data/modules/adaptive_attacks.py b/agentic_security/probe_data/modules/adaptive_attacks.py index 801d0c2..82f7958 100644 --- a/agentic_security/probe_data/modules/adaptive_attacks.py +++ b/agentic_security/probe_data/modules/adaptive_attacks.py @@ -1,9 +1,9 @@ +import asyncio import io import httpx import pandas as pd from loguru import logger -import asyncio url = "https://raw.githubusercontent.com/tml-epfl/llm-adaptive-attacks/main/harmful_behaviors/harmful_behaviors_pair.csv" diff --git a/agentic_security/probe_data/modules/inspect_ai_tool.py b/agentic_security/probe_data/modules/inspect_ai_tool.py index 0e9f3b8..a512351 100644 --- a/agentic_security/probe_data/modules/inspect_ai_tool.py +++ b/agentic_security/probe_data/modules/inspect_ai_tool.py @@ -20,6 +20,7 @@ class Module: logger.error( "inspect_ai module is not installed. Please install it using 'pip install inspect_ai'" ) + self.opts = opts def is_tool_installed(self) -> bool: inspect_ai = importlib.util.find_spec("inspect_ai") @@ -27,7 +28,6 @@ class Module: async def _proc(self, command): env = os.environ.copy() - env["OPENAI_API_BASE"] = "http://0.0.0.0:8718/proxy" process = await asyncio.create_subprocess_shell( command, stdout=asyncio.subprocess.PIPE, @@ -51,11 +51,9 @@ class Module: logger.info(f"Command {command} {process}finished.") async def apply(self) -> []: - env = os.environ.copy() - env["OPENAI_API_BASE"] = "http://0.0.0.0:8718/proxy" - + port = self.opts.get("port", 8718) # Command to be executed - command = f"inspect eval {inspect_ai_task} --model openai/gpt-4 --model-base-url=http://0.0.0.0:8718/proxy" + command = f"inspect eval {inspect_ai_task} --model openai/gpt-4 --model-base-url=http://0.0.0.0:{port}/proxy" logger.info(f"Executing command: {command}") proc = asyncio.create_task(self._proc(command)) diff --git a/agentic_security/probe_data/test_audio_generator.py b/agentic_security/probe_data/test_audio_generator.py new file mode 100644 index 0000000..343373a --- /dev/null +++ b/agentic_security/probe_data/test_audio_generator.py @@ -0,0 +1,27 @@ +import pytest +from agentic_security.probe_data.audio_generator import ( + generate_audioform, + generate_audio_mac_wav, +) +import platform + + +def test_generate_audio_mac_wav(): + if platform.system() == "Darwin": + prompt = "Hello, this is a test." + audio_bytes = generate_audio_mac_wav(prompt) + assert isinstance(audio_bytes, bytes) + assert len(audio_bytes) > 0 + else: + pytest.skip("Test is only applicable on macOS.") + + +def test_generate_audioform_mac(): + if platform.system() == "Darwin": + prompt = "Testing audio generation." + audio_bytes = generate_audioform(prompt) + assert isinstance(audio_bytes, bytes) + assert len(audio_bytes) > 0 + else: + with pytest.raises(NotImplementedError): + generate_audioform("This should raise an error on non-macOS systems.") diff --git a/agentic_security/probe_data/test_image_generator.py b/agentic_security/probe_data/test_image_generator.py new file mode 100644 index 0000000..e07edba --- /dev/null +++ b/agentic_security/probe_data/test_image_generator.py @@ -0,0 +1,37 @@ +from unittest.mock import patch +from agentic_security.probe_data.image_generator import ( + generate_image, + generate_image_dataset, +) +from agentic_security.probe_data.models import ImageProbeDataset, ProbeDataset + + +def test_generate_image(): + prompt = "Test prompt" + image_bytes = generate_image(prompt) + + assert isinstance(image_bytes, bytes) + assert len(image_bytes) > 0 + + +@patch("agentic_security.probe_data.image_generator.generate_image") +def test_generate_image_dataset(mock_generate_image): + mock_generate_image.return_value = b"dummy_image_bytes" + + prompt = "Test prompt" + test_dataset_name = "test_dataset" + test_datasets = [ + ProbeDataset( + dataset_name=test_dataset_name, + prompts=[prompt], + metadata={}, + tokens=[], + approx_cost=0.0, + ) + ] + image_datasets = generate_image_dataset(test_datasets) + + assert len(image_datasets) == 1 + assert isinstance(image_datasets[0], ImageProbeDataset) + assert image_datasets[0].test_dataset.dataset_name == test_dataset_name + assert image_datasets[0].image_prompts[0] == b"dummy_image_bytes" diff --git a/agentic_security/test_lib.py b/agentic_security/test_lib.py index ae405ea..0dde2e3 100644 --- a/agentic_security/test_lib.py +++ b/agentic_security/test_lib.py @@ -74,7 +74,7 @@ class TestAS: assert len(result) in [0, 1] @pytest.mark.skipif(not has_module("garak"), reason="Garak module not installed") - def test_garak(self, test_server): + def _test_garak(self, test_server): llmSpec = test_spec_assets.SAMPLE_SPEC maxBudget = 1000000 max_th = 0.3 diff --git a/poetry.lock b/poetry.lock index be70eab..a1bfc00 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.8.5 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand. [[package]] name = "aiohappyeyeballs" @@ -2520,11 +2520,6 @@ files = [ {file = "scikit_learn-1.5.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f60021ec1574e56632be2a36b946f8143bf4e5e6af4a06d85281adc22938e0dd"}, {file = "scikit_learn-1.5.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:394397841449853c2290a32050382edaec3da89e35b3e03d6cc966aebc6a8ae6"}, {file = "scikit_learn-1.5.2-cp312-cp312-win_amd64.whl", hash = "sha256:57cc1786cfd6bd118220a92ede80270132aa353647684efa385a74244a41e3b1"}, - {file = "scikit_learn-1.5.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:e9a702e2de732bbb20d3bad29ebd77fc05a6b427dc49964300340e4c9328b3f5"}, - {file = "scikit_learn-1.5.2-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:b0768ad641981f5d3a198430a1d31c3e044ed2e8a6f22166b4d546a5116d7908"}, - {file = "scikit_learn-1.5.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:178ddd0a5cb0044464fc1bfc4cca5b1833bfc7bb022d70b05db8530da4bb3dd3"}, - {file = "scikit_learn-1.5.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f7284ade780084d94505632241bf78c44ab3b6f1e8ccab3d2af58e0e950f9c12"}, - {file = "scikit_learn-1.5.2-cp313-cp313-win_amd64.whl", hash = "sha256:b7b0f9a0b1040830d38c39b91b3a44e1b643f4b36e36567b80b7c6bd2202a27f"}, {file = "scikit_learn-1.5.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:757c7d514ddb00ae249832fe87100d9c73c6ea91423802872d9e74970a0e40b9"}, {file = "scikit_learn-1.5.2-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:52788f48b5d8bca5c0736c175fa6bdaab2ef00a8f536cda698db61bd89c551c1"}, {file = "scikit_learn-1.5.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:643964678f4b5fbdc95cbf8aec638acc7aa70f5f79ee2cdad1eec3df4ba6ead8"},