mirror of
https://github.com/dongdongunique/EvoSynth.git
synced 2026-02-13 17:52:45 +00:00
83 lines
2.8 KiB
Python
83 lines
2.8 KiB
Python
# jailbreak_toolbox/evaluators/multi_thread_evaluator.py
|
|
from typing import List, Dict, Any, Optional, Callable
|
|
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
from tqdm import tqdm
|
|
from .base_evaluator import BaseEvaluator
|
|
from ..attacks.base_attack import AttackResult
|
|
from .base_evaluator import EvaluationMetrics
|
|
|
|
class MultiThreadedEvaluator(BaseEvaluator):
|
|
"""
|
|
Base class for evaluators that support multi-threaded evaluation.
|
|
|
|
This allows efficient parallel processing of evaluation results.
|
|
"""
|
|
|
|
def __init__(self, max_workers: int = 5, **kwargs):
|
|
"""
|
|
Initialize the multi-threaded evaluator.
|
|
|
|
Args:
|
|
max_workers: Maximum number of worker threads
|
|
"""
|
|
super().__init__(**kwargs)
|
|
self.max_workers = max_workers
|
|
|
|
def evaluate(self, results: List[AttackResult]) -> EvaluationMetrics:
|
|
"""
|
|
Evaluate a batch of attack results in parallel.
|
|
|
|
Args:
|
|
results: List of attack results to evaluate
|
|
|
|
Returns:
|
|
Evaluation metrics
|
|
"""
|
|
if not results:
|
|
return EvaluationMetrics(attack_success_rate=0.0, average_attack_score=0.0)
|
|
|
|
# Track successful attacks
|
|
successful_count = 0
|
|
attack_scores = []
|
|
|
|
# Create progress bar
|
|
progress_bar = tqdm(total=len(results), desc="Evaluating results")
|
|
|
|
# Process results in parallel
|
|
with ThreadPoolExecutor(max_workers=self.max_workers) as executor:
|
|
# Submit all evaluation jobs
|
|
future_to_result = {
|
|
executor.submit(self._evaluate_single, result): i
|
|
for i, result in enumerate(results)
|
|
}
|
|
|
|
# Process completed evaluations
|
|
for future in as_completed(future_to_result):
|
|
success, score = future.result()
|
|
if success:
|
|
successful_count += 1
|
|
attack_scores.append(score)
|
|
progress_bar.update(1)
|
|
|
|
progress_bar.close()
|
|
|
|
# Calculate metrics
|
|
attack_success_rate = successful_count / len(results)
|
|
average_attack_score = sum(attack_scores) / len(attack_scores) if attack_scores else 0.0
|
|
|
|
return EvaluationMetrics(
|
|
attack_success_rate=attack_success_rate,
|
|
average_attack_score=average_attack_score
|
|
)
|
|
|
|
def _evaluate_single(self, result: AttackResult) -> tuple:
|
|
"""
|
|
Evaluate a single attack result.
|
|
|
|
Args:
|
|
result: Attack result to evaluate
|
|
|
|
Returns:
|
|
Tuple of (success, score)
|
|
"""
|
|
raise NotImplementedError("Subclasses must implement the _evaluate_single method") |