From 75eb41f50bc86a10f18b9b767c023e49ccc5ad33 Mon Sep 17 00:00:00 2001 From: dongdongunique <909580378@qq.com> Date: Wed, 10 Dec 2025 03:23:53 +0800 Subject: [PATCH] Add context length protection in access_runcontext_history - Add loop mechanism to reduce history_depth if output exceeds max_total_chars - Prevents context overflow when accessing run context history - Prints reduction info and returns depth_reduced field in result --- eval_async.py | 10 +-- .../ai_agents/autonomous_orchestrator.py | 1 - .../ai_agents/external_power_tools.py | 83 +++++++++++++------ .../models/implementations/openai_model.py | 2 +- 4 files changed, 62 insertions(+), 34 deletions(-) diff --git a/eval_async.py b/eval_async.py index 27338b4..d40fce3 100644 --- a/eval_async.py +++ b/eval_async.py @@ -41,10 +41,10 @@ def parse_args(): # Model configurations (non-attack parameters) parser.add_argument("--attacker-model", type=str, default="deepseek-chat", help="Attacker model name (default: deepseek-chat)") - parser.add_argument("--judge-model", type=str, default="gpt-4o-mini", - help="Judge model name (default: gpt-4o-mini)") + parser.add_argument("--judge-model", type=str, default="gpt-4o", + help="Judge model name") parser.add_argument("--target-models", nargs="+", - default=["qwen3-max", "mistralai/mistral-large-2512"], + default=["gpt-4o"], help="List of target model names to test against") # API configuration (infrastructure parameters) @@ -72,7 +72,7 @@ def parse_args(): help="Directory for async logs (default: ./async_logs)") # Dataset option (non-attack parameter) - parser.add_argument("--dataset", type=str, default="harmbench", + parser.add_argument("--dataset", type=str, default="harmbench_test", help="Dataset name to use (default: harmbench)") # Experiment control options @@ -487,7 +487,7 @@ async def main(): # Save combined results summary - timestamp_summary = datetime.strftime("%Y%m%d_%H%M%S") + timestamp_summary = datetime.utcnow().strftime("%Y%m%d_%H%M%S") results_file = f"{args.results_dir}/multi_model_summary_{timestamp_summary}.json" with open(results_file, "w", encoding="utf-8") as f: diff --git a/jailbreak_toolbox/attacks/blackbox/implementations/evosynth/ai_agents/autonomous_orchestrator.py b/jailbreak_toolbox/attacks/blackbox/implementations/evosynth/ai_agents/autonomous_orchestrator.py index 737a671..d192219 100644 --- a/jailbreak_toolbox/attacks/blackbox/implementations/evosynth/ai_agents/autonomous_orchestrator.py +++ b/jailbreak_toolbox/attacks/blackbox/implementations/evosynth/ai_agents/autonomous_orchestrator.py @@ -11,7 +11,6 @@ import aiofiles from agents import Agent, function_tool, RunContextWrapper, OpenAIChatCompletionsModel, trace -from jailbreak_toolbox.attacks.blackbox.implementations.DrAttack import attack from .master_coordinator_agent import MasterCoordinatorAgent from .reconnaissance_agent import ReconnaissanceAgent diff --git a/jailbreak_toolbox/attacks/blackbox/implementations/evosynth/ai_agents/external_power_tools.py b/jailbreak_toolbox/attacks/blackbox/implementations/evosynth/ai_agents/external_power_tools.py index ba790db..9bfc6b9 100644 --- a/jailbreak_toolbox/attacks/blackbox/implementations/evosynth/ai_agents/external_power_tools.py +++ b/jailbreak_toolbox/attacks/blackbox/implementations/evosynth/ai_agents/external_power_tools.py @@ -509,32 +509,37 @@ Focus on specific defensive techniques and why this particular attack failed - p @function_tool def access_runcontext_history( ctx: RunContextWrapper, - history_depth: int = 4 + history_depth: int = 4, + max_total_chars: int = 450000 ) -> dict: """ Access both session data and evolution session history from the context - + Args: ctx: Runtime context wrapper containing session and evolution context - history_depth: Maximum number of items to return for session and evolution data (default: 4) if the depth is too long, then the context will exceed its context length. This will result in poor performance. - + history_depth: Maximum number of items to return for session and evolution data (default: 4) + max_total_chars: Maximum total characters allowed in output (default: 450000) + Returns: Complete history including session data and evolution session data """ - try: + import json + + def build_result(depth): + """Build result dict with given depth.""" # Extract session data from context session_data = getattr(ctx.context, 'session_data', {}) created_tools = getattr(ctx.context, 'created_tools', []) ai_generated_tools = getattr(ctx.context, 'ai_generated_tools', []) exploitation_conversations = getattr(ctx.context, 'exploitation_conversations', []) - + # Get evolution context from the unified context evolution_context = None if hasattr(ctx.context, 'evolution_context'): evolution_context = ctx.context.evolution_context elif hasattr(ctx.context, 'tools_by_id'): evolution_context = ctx.context - + # Get evolution session data evolution_session_data = {} if evolution_context: @@ -546,31 +551,32 @@ def access_runcontext_history( "session_id": getattr(evolution_context, 'session_id', 'unknown'), "created_at": getattr(evolution_context, 'created_at', None) } - - # Apply history_depth to get last several items - def get_last_items(data, depth): + + # Apply depth to get last several items + def get_last_items(data, d): if isinstance(data, list): - return data[-depth:] if data else [] - else: - return data - - limited_session_data = get_last_items(session_data, history_depth) - limited_created_tools = get_last_items(created_tools, history_depth) - limited_ai_generated_tools = get_last_items(ai_generated_tools, history_depth) - limited_conversations = get_last_items(exploitation_conversations, history_depth) - + return data[-d:] if data else [] + elif isinstance(data, dict): + items = list(data.items())[-d:] + return dict(items) + return data + + limited_session_data = get_last_items(session_data, depth) + limited_created_tools = get_last_items(created_tools, depth) + limited_ai_generated_tools = get_last_items(ai_generated_tools, depth) + limited_conversations = get_last_items(exploitation_conversations, depth) + limited_evolution_data = {} if evolution_session_data: limited_evolution_data = { - "tools_by_id": get_last_items(evolution_session_data["tools_by_id"], history_depth), - "tools_by_name": get_last_items(evolution_session_data["tools_by_name"], history_depth), - "performance_analyses": get_last_items(evolution_session_data["performance_analyses"], history_depth), - "evolution_history": get_last_items(evolution_session_data["evolution_history"], history_depth), + "tools_by_id": get_last_items(evolution_session_data["tools_by_id"], depth), + "tools_by_name": get_last_items(evolution_session_data["tools_by_name"], depth), + "performance_analyses": get_last_items(evolution_session_data["performance_analyses"], depth), + "evolution_history": get_last_items(evolution_session_data["evolution_history"], depth), "session_id": evolution_session_data["session_id"], "created_at": evolution_session_data["created_at"] } - - # Return combined data + return { "history_accessed": True, "session_data": limited_session_data, @@ -578,11 +584,34 @@ def access_runcontext_history( "ai_generated_tools": limited_ai_generated_tools, "exploitation_conversations": limited_conversations, "evolution_session": limited_evolution_data, - "history_depth_applied": history_depth, + "history_depth_applied": depth, "context_type": type(ctx.context).__name__, "accessed_at": datetime.now().isoformat() } - + + try: + original_depth = history_depth + current_depth = history_depth + + # Loop: reduce depth until output fits in max_total_chars + while current_depth >= 1: + result = build_result(current_depth) + result_size = len(json.dumps(result, default=str)) + + if result_size <= max_total_chars: + if current_depth < original_depth: + result["_depth_reduced"] = f"history_depth reduced from {original_depth} to {current_depth} because output exceeded {max_total_chars} chars" + print(f"[access_runcontext_history] history_depth reduced from {original_depth} to {current_depth} (size: {result_size}/{max_total_chars})") + return result + + current_depth -= 1 + + # If even depth=1 exceeds, return minimal result with warning + result = build_result(1) + result["_depth_reduced"] = f"history_depth reduced to 1, but output may still exceed limit. Original depth: {original_depth}" + print(f"[access_runcontext_history] WARNING: Even depth=1 may exceed context limit") + return result + except Exception as e: return { "history_accessed": False, diff --git a/jailbreak_toolbox/models/implementations/openai_model.py b/jailbreak_toolbox/models/implementations/openai_model.py index e4abbeb..a99b180 100644 --- a/jailbreak_toolbox/models/implementations/openai_model.py +++ b/jailbreak_toolbox/models/implementations/openai_model.py @@ -2,7 +2,6 @@ from ..base_model import BaseModel from ...core.registry import model_registry import openai import time -import torch import base64 from io import BytesIO from typing import Any, Optional, List, Dict, Union @@ -265,6 +264,7 @@ class OpenAIModel(BaseModel): A list of floats representing the embedding vector """ try: + import torch clean_text = text_input.replace("\n", " ") # Use specified model or default embedding model