From 75eb41f50bc86a10f18b9b767c023e49ccc5ad33 Mon Sep 17 00:00:00 2001
From: dongdongunique <909580378@qq.com>
Date: Wed, 10 Dec 2025 03:23:53 +0800
Subject: [PATCH] Add context length protection in access_runcontext_history

- Add loop mechanism to reduce history_depth if output exceeds max_total_chars
- Prevents context overflow when accessing run context history
- Prints reduction info and returns depth_reduced field in result
---
 eval_async.py                                 | 10 +--
 .../ai_agents/autonomous_orchestrator.py      |  1 -
 .../ai_agents/external_power_tools.py         | 83 +++++++++++++------
 .../models/implementations/openai_model.py    |  2 +-
 4 files changed, 62 insertions(+), 34 deletions(-)

diff --git a/eval_async.py b/eval_async.py
index 27338b4..d40fce3 100644
--- a/eval_async.py
+++ b/eval_async.py
@@ -41,10 +41,10 @@ def parse_args():
     # Model configurations (non-attack parameters)
     parser.add_argument("--attacker-model", type=str, default="deepseek-chat",
                        help="Attacker model name (default: deepseek-chat)")
-    parser.add_argument("--judge-model", type=str, default="gpt-4o-mini",
-                       help="Judge model name (default: gpt-4o-mini)")
+    parser.add_argument("--judge-model", type=str, default="gpt-4o",
+                       help="Judge model name")
     parser.add_argument("--target-models", nargs="+",
-                       default=["qwen3-max", "mistralai/mistral-large-2512"],
+                       default=["gpt-4o"],
                        help="List of target model names to test against")
 
     # API configuration (infrastructure parameters)
@@ -72,7 +72,7 @@ def parse_args():
                        help="Directory for async logs (default: ./async_logs)")
 
     # Dataset option (non-attack parameter)
-    parser.add_argument("--dataset", type=str, default="harmbench",
+    parser.add_argument("--dataset", type=str, default="harmbench_test",
                        help="Dataset name to use (default: harmbench)")
 
     # Experiment control options
@@ -487,7 +487,7 @@ async def main():
 
 
     # Save combined results summary
-    timestamp_summary = datetime.strftime("%Y%m%d_%H%M%S")
+    timestamp_summary = datetime.utcnow().strftime("%Y%m%d_%H%M%S")
     results_file = f"{args.results_dir}/multi_model_summary_{timestamp_summary}.json"
 
     with open(results_file, "w", encoding="utf-8") as f:
diff --git a/jailbreak_toolbox/attacks/blackbox/implementations/evosynth/ai_agents/autonomous_orchestrator.py b/jailbreak_toolbox/attacks/blackbox/implementations/evosynth/ai_agents/autonomous_orchestrator.py
index 737a671..d192219 100644
--- a/jailbreak_toolbox/attacks/blackbox/implementations/evosynth/ai_agents/autonomous_orchestrator.py
+++ b/jailbreak_toolbox/attacks/blackbox/implementations/evosynth/ai_agents/autonomous_orchestrator.py
@@ -11,7 +11,6 @@ import aiofiles
 
 from agents import Agent, function_tool, RunContextWrapper, OpenAIChatCompletionsModel, trace
 
-from jailbreak_toolbox.attacks.blackbox.implementations.DrAttack import attack
 
 from .master_coordinator_agent import MasterCoordinatorAgent
 from .reconnaissance_agent import ReconnaissanceAgent
diff --git a/jailbreak_toolbox/attacks/blackbox/implementations/evosynth/ai_agents/external_power_tools.py b/jailbreak_toolbox/attacks/blackbox/implementations/evosynth/ai_agents/external_power_tools.py
index ba790db..9bfc6b9 100644
--- a/jailbreak_toolbox/attacks/blackbox/implementations/evosynth/ai_agents/external_power_tools.py
+++ b/jailbreak_toolbox/attacks/blackbox/implementations/evosynth/ai_agents/external_power_tools.py
@@ -509,32 +509,37 @@ Focus on specific defensive techniques and why this particular attack failed - p
 @function_tool
 def access_runcontext_history(
     ctx: RunContextWrapper,
-    history_depth: int = 4
+    history_depth: int = 4,
+    max_total_chars: int = 450000
 ) -> dict:
     """
     Access both session data and evolution session history from the context
-    
+
     Args:
         ctx: Runtime context wrapper containing session and evolution context
-        history_depth: Maximum number of items to return for session and evolution data (default: 4) if the depth is too long, then the context will exceed its context length. This will result in poor performance.
-    
+        history_depth: Maximum number of items to return for session and evolution data (default: 4)
+        max_total_chars: Maximum total characters allowed in output (default: 450000)
+
     Returns:
         Complete history including session data and evolution session data
     """
-    try:
+    import json
+
+    def build_result(depth):
+        """Build result dict with given depth."""
         # Extract session data from context
         session_data = getattr(ctx.context, 'session_data', {})
         created_tools = getattr(ctx.context, 'created_tools', [])
         ai_generated_tools = getattr(ctx.context, 'ai_generated_tools', [])
         exploitation_conversations = getattr(ctx.context, 'exploitation_conversations', [])
-        
+
         # Get evolution context from the unified context
         evolution_context = None
         if hasattr(ctx.context, 'evolution_context'):
             evolution_context = ctx.context.evolution_context
         elif hasattr(ctx.context, 'tools_by_id'):
             evolution_context = ctx.context
-        
+
         # Get evolution session data
         evolution_session_data = {}
         if evolution_context:
@@ -546,31 +551,32 @@ def access_runcontext_history(
                 "session_id": getattr(evolution_context, 'session_id', 'unknown'),
                 "created_at": getattr(evolution_context, 'created_at', None)
             }
-        
-        # Apply history_depth to get last several items
-        def get_last_items(data, depth):
+
+        # Apply depth to get last several items
+        def get_last_items(data, d):
             if isinstance(data, list):
-                return data[-depth:] if data else []
-            else:
-                return data
-        
-        limited_session_data = get_last_items(session_data, history_depth)
-        limited_created_tools = get_last_items(created_tools, history_depth)
-        limited_ai_generated_tools = get_last_items(ai_generated_tools, history_depth)
-        limited_conversations = get_last_items(exploitation_conversations, history_depth)
-        
+                return data[-d:] if data else []
+            elif isinstance(data, dict):
+                items = list(data.items())[-d:]
+                return dict(items)
+            return data
+
+        limited_session_data = get_last_items(session_data, depth)
+        limited_created_tools = get_last_items(created_tools, depth)
+        limited_ai_generated_tools = get_last_items(ai_generated_tools, depth)
+        limited_conversations = get_last_items(exploitation_conversations, depth)
+
         limited_evolution_data = {}
         if evolution_session_data:
             limited_evolution_data = {
-                "tools_by_id": get_last_items(evolution_session_data["tools_by_id"], history_depth),
-                "tools_by_name": get_last_items(evolution_session_data["tools_by_name"], history_depth),
-                "performance_analyses": get_last_items(evolution_session_data["performance_analyses"], history_depth),
-                "evolution_history": get_last_items(evolution_session_data["evolution_history"], history_depth),
+                "tools_by_id": get_last_items(evolution_session_data["tools_by_id"], depth),
+                "tools_by_name": get_last_items(evolution_session_data["tools_by_name"], depth),
+                "performance_analyses": get_last_items(evolution_session_data["performance_analyses"], depth),
+                "evolution_history": get_last_items(evolution_session_data["evolution_history"], depth),
                 "session_id": evolution_session_data["session_id"],
                 "created_at": evolution_session_data["created_at"]
             }
-        
-        # Return combined data
+
         return {
             "history_accessed": True,
             "session_data": limited_session_data,
@@ -578,11 +584,34 @@ def access_runcontext_history(
             "ai_generated_tools": limited_ai_generated_tools,
             "exploitation_conversations": limited_conversations,
             "evolution_session": limited_evolution_data,
-            "history_depth_applied": history_depth,
+            "history_depth_applied": depth,
             "context_type": type(ctx.context).__name__,
             "accessed_at": datetime.now().isoformat()
         }
-        
+
+    try:
+        original_depth = history_depth
+        current_depth = history_depth
+
+        # Loop: reduce depth until output fits in max_total_chars
+        while current_depth >= 1:
+            result = build_result(current_depth)
+            result_size = len(json.dumps(result, default=str))
+
+            if result_size <= max_total_chars:
+                if current_depth < original_depth:
+                    result["_depth_reduced"] = f"history_depth reduced from {original_depth} to {current_depth} because output exceeded {max_total_chars} chars"
+                    print(f"[access_runcontext_history] history_depth reduced from {original_depth} to {current_depth} (size: {result_size}/{max_total_chars})")
+                return result
+
+            current_depth -= 1
+
+        # If even depth=1 exceeds, return minimal result with warning
+        result = build_result(1)
+        result["_depth_reduced"] = f"history_depth reduced to 1, but output may still exceed limit. Original depth: {original_depth}"
+        print(f"[access_runcontext_history] WARNING: Even depth=1 may exceed context limit")
+        return result
+
     except Exception as e:
         return {
             "history_accessed": False,
diff --git a/jailbreak_toolbox/models/implementations/openai_model.py b/jailbreak_toolbox/models/implementations/openai_model.py
index e4abbeb..a99b180 100644
--- a/jailbreak_toolbox/models/implementations/openai_model.py
+++ b/jailbreak_toolbox/models/implementations/openai_model.py
@@ -2,7 +2,6 @@ from ..base_model import BaseModel
 from ...core.registry import model_registry
 import openai
 import time
-import torch
 import base64
 from io import BytesIO
 from typing import Any, Optional, List, Dict, Union
@@ -265,6 +264,7 @@ class OpenAIModel(BaseModel):
             A list of floats representing the embedding vector
         """
         try:
+            import torch
             clean_text = text_input.replace("\n", " ")
 
             # Use specified model or default embedding model