Files
EvoSynth/jailbreak_toolbox/evaluators/multi_thread_evaluator.py
dongdongunique f3af94df0b first commit
2025-12-10 00:54:02 +08:00

83 lines
2.8 KiB
Python

# jailbreak_toolbox/evaluators/multi_thread_evaluator.py
from typing import List, Dict, Any, Optional, Callable
from concurrent.futures import ThreadPoolExecutor, as_completed
from tqdm import tqdm
from .base_evaluator import BaseEvaluator
from ..attacks.base_attack import AttackResult
from .base_evaluator import EvaluationMetrics
class MultiThreadedEvaluator(BaseEvaluator):
"""
Base class for evaluators that support multi-threaded evaluation.
This allows efficient parallel processing of evaluation results.
"""
def __init__(self, max_workers: int = 5, **kwargs):
"""
Initialize the multi-threaded evaluator.
Args:
max_workers: Maximum number of worker threads
"""
super().__init__(**kwargs)
self.max_workers = max_workers
def evaluate(self, results: List[AttackResult]) -> EvaluationMetrics:
"""
Evaluate a batch of attack results in parallel.
Args:
results: List of attack results to evaluate
Returns:
Evaluation metrics
"""
if not results:
return EvaluationMetrics(attack_success_rate=0.0, average_attack_score=0.0)
# Track successful attacks
successful_count = 0
attack_scores = []
# Create progress bar
progress_bar = tqdm(total=len(results), desc="Evaluating results")
# Process results in parallel
with ThreadPoolExecutor(max_workers=self.max_workers) as executor:
# Submit all evaluation jobs
future_to_result = {
executor.submit(self._evaluate_single, result): i
for i, result in enumerate(results)
}
# Process completed evaluations
for future in as_completed(future_to_result):
success, score = future.result()
if success:
successful_count += 1
attack_scores.append(score)
progress_bar.update(1)
progress_bar.close()
# Calculate metrics
attack_success_rate = successful_count / len(results)
average_attack_score = sum(attack_scores) / len(attack_scores) if attack_scores else 0.0
return EvaluationMetrics(
attack_success_rate=attack_success_rate,
average_attack_score=average_attack_score
)
def _evaluate_single(self, result: AttackResult) -> tuple:
"""
Evaluate a single attack result.
Args:
result: Attack result to evaluate
Returns:
Tuple of (success, score)
"""
raise NotImplementedError("Subclasses must implement the _evaluate_single method")