Add context length protection in access_runcontext_history

- Add loop mechanism to reduce history_depth if output exceeds max_total_chars
- Prevents context overflow when accessing run context history
- Prints reduction info and returns depth_reduced field in result
This commit is contained in:
dongdongunique
2025-12-10 03:23:53 +08:00
parent a5e5185c10
commit 75eb41f50b
4 changed files with 62 additions and 34 deletions
+5 -5
View File
@@ -41,10 +41,10 @@ def parse_args():
# Model configurations (non-attack parameters)
parser.add_argument("--attacker-model", type=str, default="deepseek-chat",
help="Attacker model name (default: deepseek-chat)")
parser.add_argument("--judge-model", type=str, default="gpt-4o-mini",
help="Judge model name (default: gpt-4o-mini)")
parser.add_argument("--judge-model", type=str, default="gpt-4o",
help="Judge model name")
parser.add_argument("--target-models", nargs="+",
default=["qwen3-max", "mistralai/mistral-large-2512"],
default=["gpt-4o"],
help="List of target model names to test against")
# API configuration (infrastructure parameters)
@@ -72,7 +72,7 @@ def parse_args():
help="Directory for async logs (default: ./async_logs)")
# Dataset option (non-attack parameter)
parser.add_argument("--dataset", type=str, default="harmbench",
parser.add_argument("--dataset", type=str, default="harmbench_test",
help="Dataset name to use (default: harmbench)")
# Experiment control options
@@ -487,7 +487,7 @@ async def main():
# Save combined results summary
timestamp_summary = datetime.strftime("%Y%m%d_%H%M%S")
timestamp_summary = datetime.utcnow().strftime("%Y%m%d_%H%M%S")
results_file = f"{args.results_dir}/multi_model_summary_{timestamp_summary}.json"
with open(results_file, "w", encoding="utf-8") as f:
@@ -11,7 +11,6 @@ import aiofiles
from agents import Agent, function_tool, RunContextWrapper, OpenAIChatCompletionsModel, trace
from jailbreak_toolbox.attacks.blackbox.implementations.DrAttack import attack
from .master_coordinator_agent import MasterCoordinatorAgent
from .reconnaissance_agent import ReconnaissanceAgent
@@ -509,32 +509,37 @@ Focus on specific defensive techniques and why this particular attack failed - p
@function_tool
def access_runcontext_history(
ctx: RunContextWrapper,
history_depth: int = 4
history_depth: int = 4,
max_total_chars: int = 450000
) -> dict:
"""
Access both session data and evolution session history from the context
Args:
ctx: Runtime context wrapper containing session and evolution context
history_depth: Maximum number of items to return for session and evolution data (default: 4) if the depth is too long, then the context will exceed its context length. This will result in poor performance.
history_depth: Maximum number of items to return for session and evolution data (default: 4)
max_total_chars: Maximum total characters allowed in output (default: 450000)
Returns:
Complete history including session data and evolution session data
"""
try:
import json
def build_result(depth):
"""Build result dict with given depth."""
# Extract session data from context
session_data = getattr(ctx.context, 'session_data', {})
created_tools = getattr(ctx.context, 'created_tools', [])
ai_generated_tools = getattr(ctx.context, 'ai_generated_tools', [])
exploitation_conversations = getattr(ctx.context, 'exploitation_conversations', [])
# Get evolution context from the unified context
evolution_context = None
if hasattr(ctx.context, 'evolution_context'):
evolution_context = ctx.context.evolution_context
elif hasattr(ctx.context, 'tools_by_id'):
evolution_context = ctx.context
# Get evolution session data
evolution_session_data = {}
if evolution_context:
@@ -546,31 +551,32 @@ def access_runcontext_history(
"session_id": getattr(evolution_context, 'session_id', 'unknown'),
"created_at": getattr(evolution_context, 'created_at', None)
}
# Apply history_depth to get last several items
def get_last_items(data, depth):
# Apply depth to get last several items
def get_last_items(data, d):
if isinstance(data, list):
return data[-depth:] if data else []
else:
return data
limited_session_data = get_last_items(session_data, history_depth)
limited_created_tools = get_last_items(created_tools, history_depth)
limited_ai_generated_tools = get_last_items(ai_generated_tools, history_depth)
limited_conversations = get_last_items(exploitation_conversations, history_depth)
return data[-d:] if data else []
elif isinstance(data, dict):
items = list(data.items())[-d:]
return dict(items)
return data
limited_session_data = get_last_items(session_data, depth)
limited_created_tools = get_last_items(created_tools, depth)
limited_ai_generated_tools = get_last_items(ai_generated_tools, depth)
limited_conversations = get_last_items(exploitation_conversations, depth)
limited_evolution_data = {}
if evolution_session_data:
limited_evolution_data = {
"tools_by_id": get_last_items(evolution_session_data["tools_by_id"], history_depth),
"tools_by_name": get_last_items(evolution_session_data["tools_by_name"], history_depth),
"performance_analyses": get_last_items(evolution_session_data["performance_analyses"], history_depth),
"evolution_history": get_last_items(evolution_session_data["evolution_history"], history_depth),
"tools_by_id": get_last_items(evolution_session_data["tools_by_id"], depth),
"tools_by_name": get_last_items(evolution_session_data["tools_by_name"], depth),
"performance_analyses": get_last_items(evolution_session_data["performance_analyses"], depth),
"evolution_history": get_last_items(evolution_session_data["evolution_history"], depth),
"session_id": evolution_session_data["session_id"],
"created_at": evolution_session_data["created_at"]
}
# Return combined data
return {
"history_accessed": True,
"session_data": limited_session_data,
@@ -578,11 +584,34 @@ def access_runcontext_history(
"ai_generated_tools": limited_ai_generated_tools,
"exploitation_conversations": limited_conversations,
"evolution_session": limited_evolution_data,
"history_depth_applied": history_depth,
"history_depth_applied": depth,
"context_type": type(ctx.context).__name__,
"accessed_at": datetime.now().isoformat()
}
try:
original_depth = history_depth
current_depth = history_depth
# Loop: reduce depth until output fits in max_total_chars
while current_depth >= 1:
result = build_result(current_depth)
result_size = len(json.dumps(result, default=str))
if result_size <= max_total_chars:
if current_depth < original_depth:
result["_depth_reduced"] = f"history_depth reduced from {original_depth} to {current_depth} because output exceeded {max_total_chars} chars"
print(f"[access_runcontext_history] history_depth reduced from {original_depth} to {current_depth} (size: {result_size}/{max_total_chars})")
return result
current_depth -= 1
# If even depth=1 exceeds, return minimal result with warning
result = build_result(1)
result["_depth_reduced"] = f"history_depth reduced to 1, but output may still exceed limit. Original depth: {original_depth}"
print(f"[access_runcontext_history] WARNING: Even depth=1 may exceed context limit")
return result
except Exception as e:
return {
"history_accessed": False,
@@ -2,7 +2,6 @@ from ..base_model import BaseModel
from ...core.registry import model_registry
import openai
import time
import torch
import base64
from io import BytesIO
from typing import Any, Optional, List, Dict, Union
@@ -265,6 +264,7 @@ class OpenAIModel(BaseModel):
A list of floats representing the embedding vector
"""
try:
import torch
clean_text = text_input.replace("\n", " ")
# Use specified model or default embedding model