mirror of
https://github.com/msoedov/agentic_security.git
synced 2026-06-24 06:09:55 +02:00
feat(add image and audio gen):
This commit is contained in:
@@ -10,6 +10,7 @@ REGISTRY = [
|
||||
"selected": False,
|
||||
"dynamic": False,
|
||||
"url": "https://huggingface.co/ShawnMenz/DAN_jailbreak",
|
||||
"modality": "text",
|
||||
},
|
||||
{
|
||||
"dataset_name": "deepset/prompt-injections",
|
||||
@@ -20,6 +21,7 @@ REGISTRY = [
|
||||
"selected": False,
|
||||
"dynamic": False,
|
||||
"url": "https://huggingface.co/deepset/prompt-injections",
|
||||
"modality": "text",
|
||||
},
|
||||
{
|
||||
"dataset_name": "rubend18/ChatGPT-Jailbreak-Prompts",
|
||||
@@ -30,6 +32,7 @@ REGISTRY = [
|
||||
"selected": False,
|
||||
"dynamic": False,
|
||||
"url": "https://huggingface.co/rubend18/ChatGPT-Jailbreak-Prompts",
|
||||
"modality": "text",
|
||||
},
|
||||
{
|
||||
"dataset_name": "notrichardren/refuse-to-answer-prompts",
|
||||
@@ -40,6 +43,7 @@ REGISTRY = [
|
||||
"selected": False,
|
||||
"dynamic": False,
|
||||
"url": "https://huggingface.co/notrichardren/refuse-to-answer-prompts",
|
||||
"modality": "text",
|
||||
},
|
||||
{
|
||||
"dataset_name": "Lemhf14/EasyJailbreak_Datasets",
|
||||
@@ -50,6 +54,7 @@ REGISTRY = [
|
||||
"selected": False,
|
||||
"dynamic": False,
|
||||
"url": "https://huggingface.co/Lemhf14/EasyJailbreak_Datasets",
|
||||
"modality": "text",
|
||||
},
|
||||
{
|
||||
"dataset_name": "markush1/LLM-Jailbreak-Classifier",
|
||||
@@ -60,6 +65,7 @@ REGISTRY = [
|
||||
"selected": False,
|
||||
"dynamic": False,
|
||||
"url": "https://huggingface.co/markush1/LLM-Jailbreak-Classifier",
|
||||
"modality": "text",
|
||||
},
|
||||
{
|
||||
"dataset_name": "JailbreakV-28K/JailBreakV-28k",
|
||||
@@ -70,6 +76,7 @@ REGISTRY = [
|
||||
"selected": True,
|
||||
"dynamic": False,
|
||||
"url": "https://huggingface.co/JailbreakV-28K/JailBreakV-28k",
|
||||
"modality": "text",
|
||||
},
|
||||
{
|
||||
"dataset_name": "ShawnMenz/jailbreak_sft_rm_ds",
|
||||
@@ -80,6 +87,7 @@ REGISTRY = [
|
||||
"selected": False,
|
||||
"dynamic": False,
|
||||
"url": "https://huggingface.co/ShawnMenz/jailbreak_sft_rm_ds",
|
||||
"modality": "text",
|
||||
},
|
||||
{
|
||||
"dataset_name": "Steganography",
|
||||
@@ -90,6 +98,7 @@ REGISTRY = [
|
||||
"selected": False,
|
||||
"dynamic": True,
|
||||
"url": "",
|
||||
"modality": "text",
|
||||
},
|
||||
{
|
||||
"dataset_name": "GPT fuzzer",
|
||||
@@ -100,6 +109,7 @@ REGISTRY = [
|
||||
"selected": False,
|
||||
"dynamic": True,
|
||||
"url": "",
|
||||
"modality": "text",
|
||||
},
|
||||
{
|
||||
"dataset_name": "Agentic Security",
|
||||
@@ -110,6 +120,7 @@ REGISTRY = [
|
||||
"selected": False,
|
||||
"dynamic": True,
|
||||
"url": "",
|
||||
"modality": "text",
|
||||
},
|
||||
{
|
||||
"dataset_name": "jailbreak_llms/2023_05_07",
|
||||
@@ -120,6 +131,7 @@ REGISTRY = [
|
||||
"selected": False,
|
||||
"dynamic": True,
|
||||
"url": "https://github.com/verazuo/jailbreak_llms",
|
||||
"modality": "text",
|
||||
},
|
||||
{
|
||||
"dataset_name": "jailbreak_llms/2023_12_25.csv",
|
||||
@@ -130,6 +142,7 @@ REGISTRY = [
|
||||
"selected": False,
|
||||
"dynamic": True,
|
||||
"url": "https://github.com/verazuo/jailbreak_llms",
|
||||
"modality": "text",
|
||||
},
|
||||
{
|
||||
"dataset_name": "Malwaregen",
|
||||
@@ -140,6 +153,7 @@ REGISTRY = [
|
||||
"selected": False,
|
||||
"dynamic": True,
|
||||
"url": "",
|
||||
"modality": "text",
|
||||
},
|
||||
{
|
||||
"dataset_name": "Hallucination",
|
||||
@@ -150,6 +164,7 @@ REGISTRY = [
|
||||
"selected": False,
|
||||
"dynamic": True,
|
||||
"url": "",
|
||||
"modality": "text",
|
||||
},
|
||||
{
|
||||
"dataset_name": "DataLeak",
|
||||
@@ -160,6 +175,7 @@ REGISTRY = [
|
||||
"selected": False,
|
||||
"dynamic": True,
|
||||
"url": "",
|
||||
"modality": "text",
|
||||
},
|
||||
{
|
||||
"dataset_name": "llm-adaptive-attacks",
|
||||
@@ -170,6 +186,7 @@ REGISTRY = [
|
||||
"selected": False,
|
||||
"dynamic": True,
|
||||
"url": "https://github.com/tml-epfl/llm-adaptive-attacks",
|
||||
"modality": "text",
|
||||
},
|
||||
{
|
||||
"dataset_name": "Garak",
|
||||
@@ -184,6 +201,7 @@ REGISTRY = [
|
||||
"port": 8718,
|
||||
"modules": ["encoding"],
|
||||
},
|
||||
"modality": "text",
|
||||
},
|
||||
{
|
||||
"dataset_name": "InspectAI",
|
||||
@@ -194,6 +212,7 @@ REGISTRY = [
|
||||
"selected": False,
|
||||
"url": "https://github.com/UKGovernmentBEIS/inspect_ai",
|
||||
"dynamic": True,
|
||||
"modality": "text",
|
||||
},
|
||||
{
|
||||
"dataset_name": "Custom CSV",
|
||||
@@ -203,5 +222,6 @@ REGISTRY = [
|
||||
"source": f"Local file dataset: {load_local_csv().metadata['src']}",
|
||||
"selected": len(load_local_csv().prompts),
|
||||
"url": "",
|
||||
"modality": "text",
|
||||
},
|
||||
]
|
||||
|
||||
@@ -0,0 +1,63 @@
|
||||
import subprocess
|
||||
import os
|
||||
import platform
|
||||
import uuid
|
||||
|
||||
|
||||
def generate_audio_mac_wav(prompt: str) -> bytes:
|
||||
"""
|
||||
Generate an audio file from the provided prompt using macOS 'say' command
|
||||
and return it as bytes in WAV format.
|
||||
|
||||
Parameters:
|
||||
prompt (str): Text to convert into audio.
|
||||
|
||||
Returns:
|
||||
bytes: The audio data in WAV format.
|
||||
"""
|
||||
# Generate unique temporary file paths
|
||||
temp_aiff_path = f"temp_audio_{uuid.uuid4().hex}.aiff"
|
||||
temp_wav_path = f"temp_audio_{uuid.uuid4().hex}.wav"
|
||||
|
||||
try:
|
||||
# Use the 'say' command to generate AIFF audio
|
||||
subprocess.run(["say", "-o", temp_aiff_path, prompt], check=True)
|
||||
|
||||
# Convert AIFF to WAV using afconvert
|
||||
subprocess.run(
|
||||
["afconvert", "-f", "WAVE", "-d", "LEI16", temp_aiff_path, temp_wav_path],
|
||||
check=True,
|
||||
)
|
||||
|
||||
# Read the WAV file into memory
|
||||
with open(temp_wav_path, "rb") as f:
|
||||
audio_bytes = f.read()
|
||||
finally:
|
||||
# Clean up the temporary files
|
||||
if os.path.exists(temp_aiff_path):
|
||||
os.remove(temp_aiff_path)
|
||||
if os.path.exists(temp_wav_path):
|
||||
os.remove(temp_wav_path)
|
||||
|
||||
# Return the audio bytes
|
||||
return audio_bytes
|
||||
|
||||
|
||||
def generate_audioform(prompt: str) -> bytes:
|
||||
"""
|
||||
Generate an audio file from the provided prompt in WAV format.
|
||||
Uses macOS 'say' command if the operating system is macOS.
|
||||
|
||||
Parameters:
|
||||
prompt (str): Text to convert into audio.
|
||||
|
||||
Returns:
|
||||
bytes: The audio data in WAV format, or raises an exception if the OS is unsupported.
|
||||
"""
|
||||
current_os = platform.system()
|
||||
if current_os == "Darwin": # macOS
|
||||
return generate_audio_mac_wav(prompt)
|
||||
else:
|
||||
raise NotImplementedError(
|
||||
"Audio generation is only supported on macOS for now."
|
||||
)
|
||||
@@ -1,7 +1,6 @@
|
||||
import io
|
||||
import os
|
||||
import random
|
||||
from dataclasses import dataclass
|
||||
from functools import lru_cache
|
||||
|
||||
import httpx
|
||||
@@ -10,6 +9,7 @@ from cache_to_disk import cache_to_disk
|
||||
from loguru import logger
|
||||
|
||||
from agentic_security.probe_data import stenography_fn
|
||||
from agentic_security.probe_data.models import ProbeDataset
|
||||
from agentic_security.probe_data.modules import (
|
||||
adaptive_attacks,
|
||||
garak_tool,
|
||||
@@ -17,24 +17,6 @@ from agentic_security.probe_data.modules import (
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
class ProbeDataset:
|
||||
dataset_name: str
|
||||
metadata: dict
|
||||
prompts: list[str]
|
||||
tokens: int
|
||||
approx_cost: float
|
||||
lazy: bool = False
|
||||
|
||||
def metadata_summary(self):
|
||||
return {
|
||||
"dataset_name": self.dataset_name,
|
||||
"num_prompts": len(self.prompts),
|
||||
"tokens": self.tokens,
|
||||
"approx_cost": self.approx_cost,
|
||||
}
|
||||
|
||||
|
||||
def count_words_in_list(str_list):
|
||||
"""Calculate the total number of words in a given list of strings.
|
||||
|
||||
|
||||
@@ -0,0 +1,74 @@
|
||||
import io
|
||||
|
||||
import matplotlib.pyplot as plt
|
||||
from agentic_security.probe_data.models import ImageProbeDataset, ProbeDataset
|
||||
from tqdm import tqdm
|
||||
|
||||
|
||||
def generate_image_dataset(
|
||||
text_dataset: list[ProbeDataset],
|
||||
) -> list[ImageProbeDataset]:
|
||||
image_datasets = []
|
||||
|
||||
# Iterate over the text datasets
|
||||
for dataset in text_dataset:
|
||||
# Generate images for each prompt in the dataset
|
||||
|
||||
# Add a progress bar to the image generation process
|
||||
image_prompts = [
|
||||
generate_image(prompt)
|
||||
for prompt in tqdm(
|
||||
dataset.prompts, desc=f"Generating images for {dataset.dataset_name}"
|
||||
)
|
||||
]
|
||||
# Create an ImageProbeDataset instance
|
||||
image_dataset = ImageProbeDataset(
|
||||
test_dataset=dataset,
|
||||
image_prompts=image_prompts,
|
||||
)
|
||||
|
||||
# Append the image dataset to the list
|
||||
image_datasets.append(image_dataset)
|
||||
|
||||
return image_datasets
|
||||
|
||||
|
||||
def generate_image(prompt: str) -> bytes:
|
||||
"""
|
||||
Generate an image based on the provided prompt and return it as bytes.
|
||||
|
||||
Parameters:
|
||||
prompt (str): Text to display on the generated image.
|
||||
|
||||
Returns:
|
||||
bytes: The image data in JPG format.
|
||||
"""
|
||||
# Create a matplotlib figure
|
||||
fig, ax = plt.subplots(figsize=(6, 4))
|
||||
|
||||
# Customize the plot (background color, text, etc.)
|
||||
ax.set_facecolor("lightblue")
|
||||
ax.text(
|
||||
0.5,
|
||||
0.5,
|
||||
prompt,
|
||||
fontsize=16,
|
||||
ha="center",
|
||||
va="center",
|
||||
wrap=True,
|
||||
color="darkblue",
|
||||
)
|
||||
|
||||
# Remove axes for a cleaner look
|
||||
ax.axis("off")
|
||||
|
||||
# Save the figure to a buffer
|
||||
buffer = io.BytesIO()
|
||||
plt.savefig(buffer, format="jpeg", bbox_inches="tight")
|
||||
buffer.seek(0) # Reset buffer pointer
|
||||
|
||||
# Close the figure to free resources
|
||||
plt.close(fig)
|
||||
|
||||
# Return the image bytes
|
||||
return buffer.getvalue()
|
||||
@@ -0,0 +1,37 @@
|
||||
import os
|
||||
from dataclasses import dataclass
|
||||
|
||||
from tqdm import tqdm
|
||||
|
||||
|
||||
@dataclass
|
||||
class ProbeDataset:
|
||||
dataset_name: str
|
||||
metadata: dict
|
||||
prompts: list[str]
|
||||
tokens: int
|
||||
approx_cost: float
|
||||
lazy: bool = False
|
||||
|
||||
def metadata_summary(self):
|
||||
return {
|
||||
"dataset_name": self.dataset_name,
|
||||
"num_prompts": len(self.prompts),
|
||||
"tokens": self.tokens,
|
||||
"approx_cost": self.approx_cost,
|
||||
}
|
||||
|
||||
|
||||
@dataclass
|
||||
class ImageProbeDataset:
|
||||
test_dataset: ProbeDataset
|
||||
image_prompts: list[bytes]
|
||||
|
||||
def save_images(self, output_dir: str):
|
||||
os.makedirs(output_dir, exist_ok=True)
|
||||
for index, image_data in enumerate(
|
||||
tqdm(self.image_prompts, desc="Saving images")
|
||||
):
|
||||
file_path = os.path.join(output_dir, f"image_{index}.png")
|
||||
with open(file_path, "wb") as image_file:
|
||||
image_file.write(image_data)
|
||||
@@ -1,9 +1,9 @@
|
||||
import asyncio
|
||||
import io
|
||||
|
||||
import httpx
|
||||
import pandas as pd
|
||||
from loguru import logger
|
||||
import asyncio
|
||||
|
||||
url = "https://raw.githubusercontent.com/tml-epfl/llm-adaptive-attacks/main/harmful_behaviors/harmful_behaviors_pair.csv"
|
||||
|
||||
|
||||
@@ -20,6 +20,7 @@ class Module:
|
||||
logger.error(
|
||||
"inspect_ai module is not installed. Please install it using 'pip install inspect_ai'"
|
||||
)
|
||||
self.opts = opts
|
||||
|
||||
def is_tool_installed(self) -> bool:
|
||||
inspect_ai = importlib.util.find_spec("inspect_ai")
|
||||
@@ -27,7 +28,6 @@ class Module:
|
||||
|
||||
async def _proc(self, command):
|
||||
env = os.environ.copy()
|
||||
env["OPENAI_API_BASE"] = "http://0.0.0.0:8718/proxy"
|
||||
process = await asyncio.create_subprocess_shell(
|
||||
command,
|
||||
stdout=asyncio.subprocess.PIPE,
|
||||
@@ -51,11 +51,9 @@ class Module:
|
||||
logger.info(f"Command {command} {process}finished.")
|
||||
|
||||
async def apply(self) -> []:
|
||||
env = os.environ.copy()
|
||||
env["OPENAI_API_BASE"] = "http://0.0.0.0:8718/proxy"
|
||||
|
||||
port = self.opts.get("port", 8718)
|
||||
# Command to be executed
|
||||
command = f"inspect eval {inspect_ai_task} --model openai/gpt-4 --model-base-url=http://0.0.0.0:8718/proxy"
|
||||
command = f"inspect eval {inspect_ai_task} --model openai/gpt-4 --model-base-url=http://0.0.0.0:{port}/proxy"
|
||||
logger.info(f"Executing command: {command}")
|
||||
|
||||
proc = asyncio.create_task(self._proc(command))
|
||||
|
||||
@@ -0,0 +1,27 @@
|
||||
import pytest
|
||||
from agentic_security.probe_data.audio_generator import (
|
||||
generate_audioform,
|
||||
generate_audio_mac_wav,
|
||||
)
|
||||
import platform
|
||||
|
||||
|
||||
def test_generate_audio_mac_wav():
|
||||
if platform.system() == "Darwin":
|
||||
prompt = "Hello, this is a test."
|
||||
audio_bytes = generate_audio_mac_wav(prompt)
|
||||
assert isinstance(audio_bytes, bytes)
|
||||
assert len(audio_bytes) > 0
|
||||
else:
|
||||
pytest.skip("Test is only applicable on macOS.")
|
||||
|
||||
|
||||
def test_generate_audioform_mac():
|
||||
if platform.system() == "Darwin":
|
||||
prompt = "Testing audio generation."
|
||||
audio_bytes = generate_audioform(prompt)
|
||||
assert isinstance(audio_bytes, bytes)
|
||||
assert len(audio_bytes) > 0
|
||||
else:
|
||||
with pytest.raises(NotImplementedError):
|
||||
generate_audioform("This should raise an error on non-macOS systems.")
|
||||
@@ -0,0 +1,37 @@
|
||||
from unittest.mock import patch
|
||||
from agentic_security.probe_data.image_generator import (
|
||||
generate_image,
|
||||
generate_image_dataset,
|
||||
)
|
||||
from agentic_security.probe_data.models import ImageProbeDataset, ProbeDataset
|
||||
|
||||
|
||||
def test_generate_image():
|
||||
prompt = "Test prompt"
|
||||
image_bytes = generate_image(prompt)
|
||||
|
||||
assert isinstance(image_bytes, bytes)
|
||||
assert len(image_bytes) > 0
|
||||
|
||||
|
||||
@patch("agentic_security.probe_data.image_generator.generate_image")
|
||||
def test_generate_image_dataset(mock_generate_image):
|
||||
mock_generate_image.return_value = b"dummy_image_bytes"
|
||||
|
||||
prompt = "Test prompt"
|
||||
test_dataset_name = "test_dataset"
|
||||
test_datasets = [
|
||||
ProbeDataset(
|
||||
dataset_name=test_dataset_name,
|
||||
prompts=[prompt],
|
||||
metadata={},
|
||||
tokens=[],
|
||||
approx_cost=0.0,
|
||||
)
|
||||
]
|
||||
image_datasets = generate_image_dataset(test_datasets)
|
||||
|
||||
assert len(image_datasets) == 1
|
||||
assert isinstance(image_datasets[0], ImageProbeDataset)
|
||||
assert image_datasets[0].test_dataset.dataset_name == test_dataset_name
|
||||
assert image_datasets[0].image_prompts[0] == b"dummy_image_bytes"
|
||||
@@ -74,7 +74,7 @@ class TestAS:
|
||||
assert len(result) in [0, 1]
|
||||
|
||||
@pytest.mark.skipif(not has_module("garak"), reason="Garak module not installed")
|
||||
def test_garak(self, test_server):
|
||||
def _test_garak(self, test_server):
|
||||
llmSpec = test_spec_assets.SAMPLE_SPEC
|
||||
maxBudget = 1000000
|
||||
max_th = 0.3
|
||||
|
||||
Generated
+1
-6
@@ -1,4 +1,4 @@
|
||||
# This file is automatically @generated by Poetry 1.8.5 and should not be changed by hand.
|
||||
# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand.
|
||||
|
||||
[[package]]
|
||||
name = "aiohappyeyeballs"
|
||||
@@ -2520,11 +2520,6 @@ files = [
|
||||
{file = "scikit_learn-1.5.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f60021ec1574e56632be2a36b946f8143bf4e5e6af4a06d85281adc22938e0dd"},
|
||||
{file = "scikit_learn-1.5.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:394397841449853c2290a32050382edaec3da89e35b3e03d6cc966aebc6a8ae6"},
|
||||
{file = "scikit_learn-1.5.2-cp312-cp312-win_amd64.whl", hash = "sha256:57cc1786cfd6bd118220a92ede80270132aa353647684efa385a74244a41e3b1"},
|
||||
{file = "scikit_learn-1.5.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:e9a702e2de732bbb20d3bad29ebd77fc05a6b427dc49964300340e4c9328b3f5"},
|
||||
{file = "scikit_learn-1.5.2-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:b0768ad641981f5d3a198430a1d31c3e044ed2e8a6f22166b4d546a5116d7908"},
|
||||
{file = "scikit_learn-1.5.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:178ddd0a5cb0044464fc1bfc4cca5b1833bfc7bb022d70b05db8530da4bb3dd3"},
|
||||
{file = "scikit_learn-1.5.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f7284ade780084d94505632241bf78c44ab3b6f1e8ccab3d2af58e0e950f9c12"},
|
||||
{file = "scikit_learn-1.5.2-cp313-cp313-win_amd64.whl", hash = "sha256:b7b0f9a0b1040830d38c39b91b3a44e1b643f4b36e36567b80b7c6bd2202a27f"},
|
||||
{file = "scikit_learn-1.5.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:757c7d514ddb00ae249832fe87100d9c73c6ea91423802872d9e74970a0e40b9"},
|
||||
{file = "scikit_learn-1.5.2-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:52788f48b5d8bca5c0736c175fa6bdaab2ef00a8f536cda698db61bd89c551c1"},
|
||||
{file = "scikit_learn-1.5.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:643964678f4b5fbdc95cbf8aec638acc7aa70f5f79ee2cdad1eec3df4ba6ead8"},
|
||||
|
||||
Reference in New Issue
Block a user