mirror of
https://github.com/jiaxiaojunQAQ/OmniSafeBench-MM.git
synced 2026-06-07 16:13:54 +02:00
Enhance project configuration and structure: Added build-system requirements in pyproject.toml, updated README for installation instructions, and refactored attack classes to utilize a new create_test_case method. Introduced a plugins.yaml for component registration and removed the legacy registry initialization file.
This commit is contained in:
+7
-11
@@ -14,7 +14,7 @@ import importlib
|
||||
import logging
|
||||
|
||||
from core.data_formats import TestCase
|
||||
from core.unified_registry import BaseAttack
|
||||
from core.base_classes import BaseAttack
|
||||
from core.unified_registry import UNIFIED_REGISTRY
|
||||
from config.config_loader import get_model_config
|
||||
|
||||
@@ -339,14 +339,10 @@ Feel free to use these strategies or come up with your own. At the end of your r
|
||||
# CoT optimize prompt
|
||||
optimized_prompt, cot_log = self._optimize_prompt_with_cot(original_prompt)
|
||||
|
||||
return TestCase(
|
||||
test_case_id=case_id,
|
||||
prompt=optimized_prompt,
|
||||
image_path=vap_save_path,
|
||||
metadata={
|
||||
"attack_method": "bap",
|
||||
"original_prompt": original_prompt,
|
||||
"jailbreak_prompt": optimized_prompt,
|
||||
"jailbreak_image_path": vap_save_path,
|
||||
},
|
||||
return self.create_test_case(
|
||||
case_id=case_id,
|
||||
jailbreak_prompt=optimized_prompt,
|
||||
jailbreak_image_path=str(vap_save_path),
|
||||
original_prompt=original_prompt,
|
||||
original_image_path=str(image_path),
|
||||
)
|
||||
|
||||
+8
-12
@@ -20,7 +20,8 @@ from sentence_transformers import SentenceTransformer, util
|
||||
|
||||
|
||||
from core.data_formats import TestCase
|
||||
from core.unified_registry import BaseAttack, UNIFIED_REGISTRY
|
||||
from core.base_classes import BaseAttack
|
||||
from core.unified_registry import UNIFIED_REGISTRY
|
||||
from config.config_loader import get_model_config
|
||||
|
||||
|
||||
@@ -500,15 +501,10 @@ class CSDJAttack(BaseAttack):
|
||||
img_path = str(result_root / f"{key}.jpg")
|
||||
grid.save(img_path)
|
||||
|
||||
test_case = TestCase(
|
||||
test_case_id=str(case_id),
|
||||
image_path=str(img_path),
|
||||
prompt=self.adv_prompt,
|
||||
metadata={
|
||||
"attack_method": "cs_dj",
|
||||
"original_prompt": original_prompt,
|
||||
"jailbreak_prompt": self.adv_prompt,
|
||||
"jailbreak_image_path": str(img_path),
|
||||
},
|
||||
return self.create_test_case(
|
||||
case_id=case_id,
|
||||
jailbreak_prompt=self.adv_prompt,
|
||||
jailbreak_image_path=str(img_path),
|
||||
original_prompt=original_prompt,
|
||||
original_image_path=str(image_path),
|
||||
)
|
||||
return test_case
|
||||
|
||||
@@ -9,7 +9,7 @@ import textwrap
|
||||
from PIL import Image, ImageFont, ImageDraw
|
||||
|
||||
from core.data_formats import TestCase
|
||||
from core.unified_registry import BaseAttack
|
||||
from core.base_classes import BaseAttack
|
||||
|
||||
|
||||
# ===================== Font and Drawing Tools =====================
|
||||
@@ -127,15 +127,10 @@ class FigStepAttack(BaseAttack):
|
||||
img_path = self.output_image_dir / f"{case_id}.png"
|
||||
img.save(img_path) ## Save image
|
||||
|
||||
test_case = TestCase(
|
||||
test_case_id=str(case_id),
|
||||
image_path=str(img_path),
|
||||
prompt=self.cfg.attack_prompt,
|
||||
metadata={
|
||||
"attack_method": "figstep",
|
||||
"original_prompt": original_prompt,
|
||||
"jailbreak_prompt": self.cfg.attack_prompt,
|
||||
"jailbreak_image_path": str(img_path),
|
||||
},
|
||||
return self.create_test_case(
|
||||
case_id=case_id,
|
||||
jailbreak_prompt=self.cfg.attack_prompt,
|
||||
jailbreak_image_path=str(img_path),
|
||||
original_prompt=original_prompt,
|
||||
original_image_path=str(image_path),
|
||||
)
|
||||
return test_case
|
||||
|
||||
+8
-14
@@ -11,7 +11,8 @@ import glob
|
||||
from PIL import Image, ImageFont, ImageDraw
|
||||
|
||||
from core.data_formats import TestCase
|
||||
from core.unified_registry import BaseAttack, BaseModel, UNIFIED_REGISTRY
|
||||
from core.base_classes import BaseAttack, BaseModel
|
||||
from core.unified_registry import UNIFIED_REGISTRY
|
||||
from config.config_loader import get_model_config
|
||||
from .utils import (
|
||||
generate_caption,
|
||||
@@ -182,17 +183,10 @@ class HadesAttack(BaseAttack):
|
||||
image_save_path = output_dir / image_filename
|
||||
new_image.save(image_save_path)
|
||||
|
||||
# Create test case
|
||||
test_case = TestCase(
|
||||
test_case_id=str(case_id),
|
||||
image_path=str(image_save_path),
|
||||
prompt=adv_prompt,
|
||||
metadata={
|
||||
"attack_method": "hades",
|
||||
"original_prompt": original_prompt,
|
||||
"jailbreak_prompt": adv_prompt,
|
||||
"jailbreak_image_path": str(image_save_path),
|
||||
},
|
||||
return self.create_test_case(
|
||||
case_id=case_id,
|
||||
jailbreak_prompt=adv_prompt,
|
||||
jailbreak_image_path=str(image_save_path),
|
||||
original_prompt=original_prompt,
|
||||
original_image_path=str(image_path),
|
||||
)
|
||||
|
||||
return test_case
|
||||
|
||||
+8
-13
@@ -25,7 +25,8 @@ from .attack_utils import (
|
||||
get_gpt_response,
|
||||
)
|
||||
from core.data_formats import TestCase
|
||||
from core.unified_registry import BaseAttack, BaseModel, UNIFIED_REGISTRY
|
||||
from core.base_classes import BaseAttack, BaseModel
|
||||
from core.unified_registry import UNIFIED_REGISTRY
|
||||
from config.config_loader import get_model_config
|
||||
|
||||
|
||||
@@ -1408,16 +1409,10 @@ class HIMRDAttack(BaseAttack):
|
||||
else []
|
||||
)
|
||||
|
||||
test_case = TestCase(
|
||||
test_case_id=case_id,
|
||||
image_path=images[0],
|
||||
prompt=prompts_list[0],
|
||||
metadata={
|
||||
"attack_method": "himrd",
|
||||
"original_prompt": original_prompt,
|
||||
"jailbreak_prompt": prompts_list[0],
|
||||
"jailbreak_image_path": images[0],
|
||||
},
|
||||
return self.create_test_case(
|
||||
case_id=case_id,
|
||||
jailbreak_prompt=prompts_list[0],
|
||||
jailbreak_image_path=str(images[0]),
|
||||
original_prompt=original_prompt,
|
||||
original_image_path=str(image_path),
|
||||
)
|
||||
|
||||
return test_case
|
||||
|
||||
+7
-11
@@ -17,7 +17,7 @@ from .utils.torchattacks.attacks.pgdl2 import *
|
||||
import numpy as np
|
||||
|
||||
from core.data_formats import TestCase
|
||||
from core.unified_registry import BaseAttack
|
||||
from core.base_classes import BaseAttack
|
||||
from dataclasses import dataclass
|
||||
|
||||
|
||||
@@ -96,14 +96,10 @@ class ImgJPAttack(BaseAttack):
|
||||
self.adv_image_path = self.output_image_dir / f"imgjp_adv.jpg"
|
||||
self.adv_image.save(self.adv_image_path)
|
||||
|
||||
return TestCase(
|
||||
test_case_id=case_id,
|
||||
prompt=original_prompt,
|
||||
image_path=self.adv_image_path,
|
||||
metadata={
|
||||
"attack_method": "imgjp",
|
||||
"original_prompt": original_prompt,
|
||||
"jailbreak_prompt": original_prompt,
|
||||
"jailbreak_image_path": self.adv_image_path,
|
||||
},
|
||||
return self.create_test_case(
|
||||
case_id=case_id,
|
||||
jailbreak_prompt=original_prompt,
|
||||
jailbreak_image_path=str(self.adv_image_path),
|
||||
original_prompt=original_prompt,
|
||||
original_image_path=str(image_path),
|
||||
)
|
||||
|
||||
+7
-12
@@ -7,7 +7,7 @@ from typing import Any, Dict, Iterable, List, Optional, Tuple, Union
|
||||
from pathlib import Path
|
||||
|
||||
from core.data_formats import TestCase
|
||||
from core.unified_registry import BaseAttack
|
||||
from core.base_classes import BaseAttack
|
||||
|
||||
from .utils.mixaug import (
|
||||
mixup_images,
|
||||
@@ -142,15 +142,10 @@ class JOODAttack(BaseAttack):
|
||||
mixed_image_pil.save(img_path) ## Save image
|
||||
adv_prompt = _parse_scenario2prompts(prompt, cfg.aug)
|
||||
|
||||
test_case = TestCase(
|
||||
test_case_id=str(case_id),
|
||||
image_path=str(img_path),
|
||||
prompt=adv_prompt,
|
||||
metadata={
|
||||
"attack_method": "jood",
|
||||
"original_prompt": original_prompt,
|
||||
"jailbreak_prompt": adv_prompt,
|
||||
"jailbreak_image_path": str(img_path),
|
||||
},
|
||||
return self.create_test_case(
|
||||
case_id=case_id,
|
||||
jailbreak_prompt=adv_prompt,
|
||||
jailbreak_image_path=str(img_path),
|
||||
original_prompt=original_prompt,
|
||||
original_image_path=str(image_path),
|
||||
)
|
||||
return test_case
|
||||
|
||||
+8
-14
@@ -13,7 +13,7 @@ from PIL import Image, ImageFont, ImageDraw
|
||||
import textwrap
|
||||
|
||||
from core.data_formats import TestCase
|
||||
from core.unified_registry import BaseAttack
|
||||
from core.base_classes import BaseAttack
|
||||
|
||||
|
||||
from . import utils
|
||||
@@ -236,17 +236,11 @@ class MMLAttack(BaseAttack):
|
||||
"images_rotate", "images_rotate"
|
||||
).format(scrambled_prompt)
|
||||
|
||||
test_case = TestCase(
|
||||
test_case_id=case_id,
|
||||
image_path=img_path,
|
||||
prompt=adv_prompt,
|
||||
metadata={
|
||||
"attack_method": "mml",
|
||||
"original_prompt": original_prompt,
|
||||
"jailbreak_prompt": adv_prompt,
|
||||
"jailbreak_image_path": img_path,
|
||||
"aug_type": self.cfg.aug_type,
|
||||
},
|
||||
return self.create_test_case(
|
||||
case_id=case_id,
|
||||
jailbreak_prompt=adv_prompt,
|
||||
jailbreak_image_path=str(img_path),
|
||||
original_prompt=original_prompt,
|
||||
original_image_path=str(image_path),
|
||||
metadata={"aug_type": self.cfg.aug_type},
|
||||
)
|
||||
|
||||
return test_case
|
||||
|
||||
+7
-11
@@ -14,7 +14,7 @@ from tqdm import tqdm
|
||||
from .image_random import Image_Attacker as Image_Attacker_Random
|
||||
from .text_random import Text_Attacker as Text_Attacker_Random
|
||||
from .minigpt_utils import prompt_wrapper, generator
|
||||
from core.unified_registry import BaseAttack
|
||||
from core.base_classes import BaseAttack
|
||||
from core.data_formats import TestCase
|
||||
from .utils import get_perspective_score
|
||||
|
||||
@@ -439,14 +439,10 @@ class PBAAttack(BaseAttack):
|
||||
## Joint optimization
|
||||
self._optimization()
|
||||
|
||||
return TestCase(
|
||||
test_case_id=case_id,
|
||||
image_path=self.save_path,
|
||||
prompt=original_prompt,
|
||||
metadata={
|
||||
"attack_method": "pba",
|
||||
"original_prompt": original_prompt,
|
||||
"jailbreak_prompt": original_prompt,
|
||||
"jailbreak_image_path": self.save_path,
|
||||
},
|
||||
return self.create_test_case(
|
||||
case_id=case_id,
|
||||
jailbreak_prompt=original_prompt,
|
||||
jailbreak_image_path=str(self.save_path),
|
||||
original_prompt=original_prompt,
|
||||
original_image_path=str(image_path),
|
||||
)
|
||||
|
||||
+9
-15
@@ -16,7 +16,8 @@ import threading
|
||||
import logging
|
||||
|
||||
from core.data_formats import TestCase
|
||||
from core.unified_registry import BaseAttack, UNIFIED_REGISTRY
|
||||
from core.base_classes import BaseAttack
|
||||
from core.unified_registry import UNIFIED_REGISTRY
|
||||
from config.config_loader import get_model_config
|
||||
|
||||
from diffusers import StableDiffusion3Pipeline
|
||||
@@ -475,18 +476,11 @@ class QRAttack(BaseAttack):
|
||||
img_path = self.output_image_dir / img_name
|
||||
canvas.save(img_path)
|
||||
|
||||
# Create test case
|
||||
test_case = TestCase(
|
||||
test_case_id=str(case_id),
|
||||
image_path=str(img_path),
|
||||
prompt=rephrased,
|
||||
metadata={
|
||||
"attack_method": "qr",
|
||||
"original_prompt": original_prompt,
|
||||
"key_phrase": key_phrase,
|
||||
"jailbreak_prompt": rephrased,
|
||||
"jailbreak_image_path": str(img_path),
|
||||
},
|
||||
return self.create_test_case(
|
||||
case_id=case_id,
|
||||
jailbreak_prompt=rephrased,
|
||||
jailbreak_image_path=str(img_path),
|
||||
original_prompt=original_prompt,
|
||||
original_image_path=str(image_path),
|
||||
metadata={"key_phrase": key_phrase},
|
||||
)
|
||||
|
||||
return test_case
|
||||
|
||||
+15
-20
@@ -19,7 +19,8 @@ import io
|
||||
import base64
|
||||
|
||||
from core.data_formats import TestCase
|
||||
from core.unified_registry import BaseAttack, UNIFIED_REGISTRY
|
||||
from core.base_classes import BaseAttack
|
||||
from core.unified_registry import UNIFIED_REGISTRY
|
||||
from config.config_loader import get_model_config
|
||||
|
||||
from .judge import judger
|
||||
@@ -277,13 +278,13 @@ class SIAttack(BaseAttack):
|
||||
if target_model is None or processor is None:
|
||||
self.logger.error("Target model loading failed, cannot generate test case")
|
||||
# Create error test case
|
||||
return TestCase(
|
||||
test_case_id=str(case_id),
|
||||
image_path=image_path,
|
||||
prompt=original_prompt,
|
||||
return self.create_test_case(
|
||||
case_id=case_id,
|
||||
jailbreak_prompt=original_prompt,
|
||||
jailbreak_image_path=str(image_path),
|
||||
original_prompt=original_prompt,
|
||||
original_image_path=str(image_path),
|
||||
metadata={
|
||||
"attack_method": "si",
|
||||
"original_prompt": original_prompt,
|
||||
"error": "Target model loading failed",
|
||||
"target_model": self.cfg.target_model_name,
|
||||
},
|
||||
@@ -341,17 +342,11 @@ class SIAttack(BaseAttack):
|
||||
image_per.convert("RGB").save(save_img_path, format="PNG")
|
||||
|
||||
# Create test case
|
||||
test_case = TestCase(
|
||||
test_case_id=str(case_id),
|
||||
image_path=str(save_img_path),
|
||||
prompt=final_question,
|
||||
metadata={
|
||||
"attack_method": "si",
|
||||
"original_prompt": original_prompt,
|
||||
"jailbreak_prompt": final_question,
|
||||
"jailbreak_image_path": str(save_img_path),
|
||||
"target_model": self.cfg.target_model_name,
|
||||
},
|
||||
return self.create_test_case(
|
||||
case_id=case_id,
|
||||
jailbreak_prompt=final_question,
|
||||
jailbreak_image_path=str(save_img_path),
|
||||
original_prompt=original_prompt,
|
||||
original_image_path=str(image_path),
|
||||
metadata={"target_model": self.cfg.target_model_name},
|
||||
)
|
||||
|
||||
return test_case
|
||||
|
||||
+7
-10
@@ -91,14 +91,11 @@ class UMKAttack(BaseAttack):
|
||||
save_path = images_dir / f"umk_{case_id}.jpg"
|
||||
save_image(adv_t_save, str(save_path))
|
||||
|
||||
return TestCase(
|
||||
test_case_id=case_id,
|
||||
image_path=save_path,
|
||||
prompt=original_prompt + text_suffix,
|
||||
metadata={
|
||||
"attack_method": "umk",
|
||||
"original_prompt": original_prompt,
|
||||
"jailbreak_prompt": original_prompt + text_suffix,
|
||||
"jailbreak_image_path": save_path,
|
||||
},
|
||||
jailbreak_prompt = original_prompt + text_suffix
|
||||
return self.create_test_case(
|
||||
case_id=case_id,
|
||||
jailbreak_prompt=jailbreak_prompt,
|
||||
jailbreak_image_path=str(save_path),
|
||||
original_prompt=original_prompt,
|
||||
original_image_path=str(image_path),
|
||||
)
|
||||
|
||||
@@ -13,7 +13,7 @@ import matplotlib.pyplot as plt
|
||||
import torch
|
||||
from transformers import AutoProcessor
|
||||
from transformers import Qwen2_5_VLForConditionalGeneration
|
||||
from core.unified_registry import BaseAttack
|
||||
from core.base_classes import BaseAttack
|
||||
from core.data_formats import TestCase
|
||||
from dataclasses import dataclass
|
||||
|
||||
@@ -435,14 +435,10 @@ class VisCRAAttack(BaseAttack):
|
||||
# Modify query
|
||||
modified_query = self._modify_query(query, self.attack_type)
|
||||
|
||||
return TestCase(
|
||||
test_case_id=case_id,
|
||||
image_path=attack_image,
|
||||
prompt=modified_query,
|
||||
metadata={
|
||||
"attack_method": "viscra",
|
||||
"original_prompt": query,
|
||||
"jailbreak_prompt": modified_query,
|
||||
"jailbreak_image_path": attack_image,
|
||||
},
|
||||
return self.create_test_case(
|
||||
case_id=case_id,
|
||||
jailbreak_prompt=modified_query,
|
||||
jailbreak_image_path=str(attack_image),
|
||||
original_prompt=query,
|
||||
original_image_path=str(image_path),
|
||||
)
|
||||
|
||||
@@ -12,7 +12,7 @@ from torchvision.utils import save_image
|
||||
from PIL import Image
|
||||
from tqdm import tqdm
|
||||
|
||||
from core.unified_registry import BaseAttack
|
||||
from core.base_classes import BaseAttack
|
||||
from core.data_formats import TestCase
|
||||
from dataclasses import dataclass
|
||||
|
||||
@@ -273,14 +273,10 @@ class VisualAdvAttack(BaseAttack):
|
||||
self.save_path = self.output_image_dir / "advimg.jpg"
|
||||
save_image(adv_t_save, str(self.save_path))
|
||||
|
||||
return TestCase(
|
||||
test_case_id=case_id,
|
||||
image_path=self.save_path,
|
||||
prompt=original_prompt,
|
||||
metadata={
|
||||
"attack_method": "visual_adv",
|
||||
"original_prompt": original_prompt,
|
||||
"jailbreak_prompt": original_prompt,
|
||||
"jailbreak_image_path": self.save_path,
|
||||
},
|
||||
return self.create_test_case(
|
||||
case_id=case_id,
|
||||
jailbreak_prompt=original_prompt,
|
||||
jailbreak_image_path=str(self.save_path),
|
||||
original_prompt=original_prompt,
|
||||
original_image_path=str(image_path),
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user