Enhance project configuration and structure: Added build-system requirements in pyproject.toml, updated README for installation instructions, and refactored attack classes to utilize a new create_test_case method. Introduced a plugins.yaml for component registration and removed the legacy registry initialization file.

This commit is contained in:
Liao, Jie
2025-12-18 23:32:04 +08:00
parent becb9c8624
commit 84a4d1708e
49 changed files with 1474 additions and 727 deletions
+7 -11
View File
@@ -14,7 +14,7 @@ import importlib
import logging
from core.data_formats import TestCase
from core.unified_registry import BaseAttack
from core.base_classes import BaseAttack
from core.unified_registry import UNIFIED_REGISTRY
from config.config_loader import get_model_config
@@ -339,14 +339,10 @@ Feel free to use these strategies or come up with your own. At the end of your r
# CoT optimize prompt
optimized_prompt, cot_log = self._optimize_prompt_with_cot(original_prompt)
return TestCase(
test_case_id=case_id,
prompt=optimized_prompt,
image_path=vap_save_path,
metadata={
"attack_method": "bap",
"original_prompt": original_prompt,
"jailbreak_prompt": optimized_prompt,
"jailbreak_image_path": vap_save_path,
},
return self.create_test_case(
case_id=case_id,
jailbreak_prompt=optimized_prompt,
jailbreak_image_path=str(vap_save_path),
original_prompt=original_prompt,
original_image_path=str(image_path),
)
+8 -12
View File
@@ -20,7 +20,8 @@ from sentence_transformers import SentenceTransformer, util
from core.data_formats import TestCase
from core.unified_registry import BaseAttack, UNIFIED_REGISTRY
from core.base_classes import BaseAttack
from core.unified_registry import UNIFIED_REGISTRY
from config.config_loader import get_model_config
@@ -500,15 +501,10 @@ class CSDJAttack(BaseAttack):
img_path = str(result_root / f"{key}.jpg")
grid.save(img_path)
test_case = TestCase(
test_case_id=str(case_id),
image_path=str(img_path),
prompt=self.adv_prompt,
metadata={
"attack_method": "cs_dj",
"original_prompt": original_prompt,
"jailbreak_prompt": self.adv_prompt,
"jailbreak_image_path": str(img_path),
},
return self.create_test_case(
case_id=case_id,
jailbreak_prompt=self.adv_prompt,
jailbreak_image_path=str(img_path),
original_prompt=original_prompt,
original_image_path=str(image_path),
)
return test_case
+7 -12
View File
@@ -9,7 +9,7 @@ import textwrap
from PIL import Image, ImageFont, ImageDraw
from core.data_formats import TestCase
from core.unified_registry import BaseAttack
from core.base_classes import BaseAttack
# ===================== Font and Drawing Tools =====================
@@ -127,15 +127,10 @@ class FigStepAttack(BaseAttack):
img_path = self.output_image_dir / f"{case_id}.png"
img.save(img_path) ## Save image
test_case = TestCase(
test_case_id=str(case_id),
image_path=str(img_path),
prompt=self.cfg.attack_prompt,
metadata={
"attack_method": "figstep",
"original_prompt": original_prompt,
"jailbreak_prompt": self.cfg.attack_prompt,
"jailbreak_image_path": str(img_path),
},
return self.create_test_case(
case_id=case_id,
jailbreak_prompt=self.cfg.attack_prompt,
jailbreak_image_path=str(img_path),
original_prompt=original_prompt,
original_image_path=str(image_path),
)
return test_case
+8 -14
View File
@@ -11,7 +11,8 @@ import glob
from PIL import Image, ImageFont, ImageDraw
from core.data_formats import TestCase
from core.unified_registry import BaseAttack, BaseModel, UNIFIED_REGISTRY
from core.base_classes import BaseAttack, BaseModel
from core.unified_registry import UNIFIED_REGISTRY
from config.config_loader import get_model_config
from .utils import (
generate_caption,
@@ -182,17 +183,10 @@ class HadesAttack(BaseAttack):
image_save_path = output_dir / image_filename
new_image.save(image_save_path)
# Create test case
test_case = TestCase(
test_case_id=str(case_id),
image_path=str(image_save_path),
prompt=adv_prompt,
metadata={
"attack_method": "hades",
"original_prompt": original_prompt,
"jailbreak_prompt": adv_prompt,
"jailbreak_image_path": str(image_save_path),
},
return self.create_test_case(
case_id=case_id,
jailbreak_prompt=adv_prompt,
jailbreak_image_path=str(image_save_path),
original_prompt=original_prompt,
original_image_path=str(image_path),
)
return test_case
+8 -13
View File
@@ -25,7 +25,8 @@ from .attack_utils import (
get_gpt_response,
)
from core.data_formats import TestCase
from core.unified_registry import BaseAttack, BaseModel, UNIFIED_REGISTRY
from core.base_classes import BaseAttack, BaseModel
from core.unified_registry import UNIFIED_REGISTRY
from config.config_loader import get_model_config
@@ -1408,16 +1409,10 @@ class HIMRDAttack(BaseAttack):
else []
)
test_case = TestCase(
test_case_id=case_id,
image_path=images[0],
prompt=prompts_list[0],
metadata={
"attack_method": "himrd",
"original_prompt": original_prompt,
"jailbreak_prompt": prompts_list[0],
"jailbreak_image_path": images[0],
},
return self.create_test_case(
case_id=case_id,
jailbreak_prompt=prompts_list[0],
jailbreak_image_path=str(images[0]),
original_prompt=original_prompt,
original_image_path=str(image_path),
)
return test_case
+7 -11
View File
@@ -17,7 +17,7 @@ from .utils.torchattacks.attacks.pgdl2 import *
import numpy as np
from core.data_formats import TestCase
from core.unified_registry import BaseAttack
from core.base_classes import BaseAttack
from dataclasses import dataclass
@@ -96,14 +96,10 @@ class ImgJPAttack(BaseAttack):
self.adv_image_path = self.output_image_dir / f"imgjp_adv.jpg"
self.adv_image.save(self.adv_image_path)
return TestCase(
test_case_id=case_id,
prompt=original_prompt,
image_path=self.adv_image_path,
metadata={
"attack_method": "imgjp",
"original_prompt": original_prompt,
"jailbreak_prompt": original_prompt,
"jailbreak_image_path": self.adv_image_path,
},
return self.create_test_case(
case_id=case_id,
jailbreak_prompt=original_prompt,
jailbreak_image_path=str(self.adv_image_path),
original_prompt=original_prompt,
original_image_path=str(image_path),
)
+7 -12
View File
@@ -7,7 +7,7 @@ from typing import Any, Dict, Iterable, List, Optional, Tuple, Union
from pathlib import Path
from core.data_formats import TestCase
from core.unified_registry import BaseAttack
from core.base_classes import BaseAttack
from .utils.mixaug import (
mixup_images,
@@ -142,15 +142,10 @@ class JOODAttack(BaseAttack):
mixed_image_pil.save(img_path) ## Save image
adv_prompt = _parse_scenario2prompts(prompt, cfg.aug)
test_case = TestCase(
test_case_id=str(case_id),
image_path=str(img_path),
prompt=adv_prompt,
metadata={
"attack_method": "jood",
"original_prompt": original_prompt,
"jailbreak_prompt": adv_prompt,
"jailbreak_image_path": str(img_path),
},
return self.create_test_case(
case_id=case_id,
jailbreak_prompt=adv_prompt,
jailbreak_image_path=str(img_path),
original_prompt=original_prompt,
original_image_path=str(image_path),
)
return test_case
+8 -14
View File
@@ -13,7 +13,7 @@ from PIL import Image, ImageFont, ImageDraw
import textwrap
from core.data_formats import TestCase
from core.unified_registry import BaseAttack
from core.base_classes import BaseAttack
from . import utils
@@ -236,17 +236,11 @@ class MMLAttack(BaseAttack):
"images_rotate", "images_rotate"
).format(scrambled_prompt)
test_case = TestCase(
test_case_id=case_id,
image_path=img_path,
prompt=adv_prompt,
metadata={
"attack_method": "mml",
"original_prompt": original_prompt,
"jailbreak_prompt": adv_prompt,
"jailbreak_image_path": img_path,
"aug_type": self.cfg.aug_type,
},
return self.create_test_case(
case_id=case_id,
jailbreak_prompt=adv_prompt,
jailbreak_image_path=str(img_path),
original_prompt=original_prompt,
original_image_path=str(image_path),
metadata={"aug_type": self.cfg.aug_type},
)
return test_case
+7 -11
View File
@@ -14,7 +14,7 @@ from tqdm import tqdm
from .image_random import Image_Attacker as Image_Attacker_Random
from .text_random import Text_Attacker as Text_Attacker_Random
from .minigpt_utils import prompt_wrapper, generator
from core.unified_registry import BaseAttack
from core.base_classes import BaseAttack
from core.data_formats import TestCase
from .utils import get_perspective_score
@@ -439,14 +439,10 @@ class PBAAttack(BaseAttack):
## Joint optimization
self._optimization()
return TestCase(
test_case_id=case_id,
image_path=self.save_path,
prompt=original_prompt,
metadata={
"attack_method": "pba",
"original_prompt": original_prompt,
"jailbreak_prompt": original_prompt,
"jailbreak_image_path": self.save_path,
},
return self.create_test_case(
case_id=case_id,
jailbreak_prompt=original_prompt,
jailbreak_image_path=str(self.save_path),
original_prompt=original_prompt,
original_image_path=str(image_path),
)
+9 -15
View File
@@ -16,7 +16,8 @@ import threading
import logging
from core.data_formats import TestCase
from core.unified_registry import BaseAttack, UNIFIED_REGISTRY
from core.base_classes import BaseAttack
from core.unified_registry import UNIFIED_REGISTRY
from config.config_loader import get_model_config
from diffusers import StableDiffusion3Pipeline
@@ -475,18 +476,11 @@ class QRAttack(BaseAttack):
img_path = self.output_image_dir / img_name
canvas.save(img_path)
# Create test case
test_case = TestCase(
test_case_id=str(case_id),
image_path=str(img_path),
prompt=rephrased,
metadata={
"attack_method": "qr",
"original_prompt": original_prompt,
"key_phrase": key_phrase,
"jailbreak_prompt": rephrased,
"jailbreak_image_path": str(img_path),
},
return self.create_test_case(
case_id=case_id,
jailbreak_prompt=rephrased,
jailbreak_image_path=str(img_path),
original_prompt=original_prompt,
original_image_path=str(image_path),
metadata={"key_phrase": key_phrase},
)
return test_case
+15 -20
View File
@@ -19,7 +19,8 @@ import io
import base64
from core.data_formats import TestCase
from core.unified_registry import BaseAttack, UNIFIED_REGISTRY
from core.base_classes import BaseAttack
from core.unified_registry import UNIFIED_REGISTRY
from config.config_loader import get_model_config
from .judge import judger
@@ -277,13 +278,13 @@ class SIAttack(BaseAttack):
if target_model is None or processor is None:
self.logger.error("Target model loading failed, cannot generate test case")
# Create error test case
return TestCase(
test_case_id=str(case_id),
image_path=image_path,
prompt=original_prompt,
return self.create_test_case(
case_id=case_id,
jailbreak_prompt=original_prompt,
jailbreak_image_path=str(image_path),
original_prompt=original_prompt,
original_image_path=str(image_path),
metadata={
"attack_method": "si",
"original_prompt": original_prompt,
"error": "Target model loading failed",
"target_model": self.cfg.target_model_name,
},
@@ -341,17 +342,11 @@ class SIAttack(BaseAttack):
image_per.convert("RGB").save(save_img_path, format="PNG")
# Create test case
test_case = TestCase(
test_case_id=str(case_id),
image_path=str(save_img_path),
prompt=final_question,
metadata={
"attack_method": "si",
"original_prompt": original_prompt,
"jailbreak_prompt": final_question,
"jailbreak_image_path": str(save_img_path),
"target_model": self.cfg.target_model_name,
},
return self.create_test_case(
case_id=case_id,
jailbreak_prompt=final_question,
jailbreak_image_path=str(save_img_path),
original_prompt=original_prompt,
original_image_path=str(image_path),
metadata={"target_model": self.cfg.target_model_name},
)
return test_case
+7 -10
View File
@@ -91,14 +91,11 @@ class UMKAttack(BaseAttack):
save_path = images_dir / f"umk_{case_id}.jpg"
save_image(adv_t_save, str(save_path))
return TestCase(
test_case_id=case_id,
image_path=save_path,
prompt=original_prompt + text_suffix,
metadata={
"attack_method": "umk",
"original_prompt": original_prompt,
"jailbreak_prompt": original_prompt + text_suffix,
"jailbreak_image_path": save_path,
},
jailbreak_prompt = original_prompt + text_suffix
return self.create_test_case(
case_id=case_id,
jailbreak_prompt=jailbreak_prompt,
jailbreak_image_path=str(save_path),
original_prompt=original_prompt,
original_image_path=str(image_path),
)
+7 -11
View File
@@ -13,7 +13,7 @@ import matplotlib.pyplot as plt
import torch
from transformers import AutoProcessor
from transformers import Qwen2_5_VLForConditionalGeneration
from core.unified_registry import BaseAttack
from core.base_classes import BaseAttack
from core.data_formats import TestCase
from dataclasses import dataclass
@@ -435,14 +435,10 @@ class VisCRAAttack(BaseAttack):
# Modify query
modified_query = self._modify_query(query, self.attack_type)
return TestCase(
test_case_id=case_id,
image_path=attack_image,
prompt=modified_query,
metadata={
"attack_method": "viscra",
"original_prompt": query,
"jailbreak_prompt": modified_query,
"jailbreak_image_path": attack_image,
},
return self.create_test_case(
case_id=case_id,
jailbreak_prompt=modified_query,
jailbreak_image_path=str(attack_image),
original_prompt=query,
original_image_path=str(image_path),
)
+7 -11
View File
@@ -12,7 +12,7 @@ from torchvision.utils import save_image
from PIL import Image
from tqdm import tqdm
from core.unified_registry import BaseAttack
from core.base_classes import BaseAttack
from core.data_formats import TestCase
from dataclasses import dataclass
@@ -273,14 +273,10 @@ class VisualAdvAttack(BaseAttack):
self.save_path = self.output_image_dir / "advimg.jpg"
save_image(adv_t_save, str(self.save_path))
return TestCase(
test_case_id=case_id,
image_path=self.save_path,
prompt=original_prompt,
metadata={
"attack_method": "visual_adv",
"original_prompt": original_prompt,
"jailbreak_prompt": original_prompt,
"jailbreak_image_path": self.save_path,
},
return self.create_test_case(
case_id=case_id,
jailbreak_prompt=original_prompt,
jailbreak_image_path=str(self.save_path),
original_prompt=original_prompt,
original_image_path=str(image_path),
)