mirror of
https://github.com/dongdongunique/EvoSynth.git
synced 2026-05-31 12:29:29 +02:00
Add context length protection in access_runcontext_history
- Add loop mechanism to reduce history_depth if output exceeds max_total_chars - Prevents context overflow when accessing run context history - Prints reduction info and returns depth_reduced field in result
This commit is contained in:
+5
-5
@@ -41,10 +41,10 @@ def parse_args():
|
||||
# Model configurations (non-attack parameters)
|
||||
parser.add_argument("--attacker-model", type=str, default="deepseek-chat",
|
||||
help="Attacker model name (default: deepseek-chat)")
|
||||
parser.add_argument("--judge-model", type=str, default="gpt-4o-mini",
|
||||
help="Judge model name (default: gpt-4o-mini)")
|
||||
parser.add_argument("--judge-model", type=str, default="gpt-4o",
|
||||
help="Judge model name")
|
||||
parser.add_argument("--target-models", nargs="+",
|
||||
default=["qwen3-max", "mistralai/mistral-large-2512"],
|
||||
default=["gpt-4o"],
|
||||
help="List of target model names to test against")
|
||||
|
||||
# API configuration (infrastructure parameters)
|
||||
@@ -72,7 +72,7 @@ def parse_args():
|
||||
help="Directory for async logs (default: ./async_logs)")
|
||||
|
||||
# Dataset option (non-attack parameter)
|
||||
parser.add_argument("--dataset", type=str, default="harmbench",
|
||||
parser.add_argument("--dataset", type=str, default="harmbench_test",
|
||||
help="Dataset name to use (default: harmbench)")
|
||||
|
||||
# Experiment control options
|
||||
@@ -487,7 +487,7 @@ async def main():
|
||||
|
||||
|
||||
# Save combined results summary
|
||||
timestamp_summary = datetime.strftime("%Y%m%d_%H%M%S")
|
||||
timestamp_summary = datetime.utcnow().strftime("%Y%m%d_%H%M%S")
|
||||
results_file = f"{args.results_dir}/multi_model_summary_{timestamp_summary}.json"
|
||||
|
||||
with open(results_file, "w", encoding="utf-8") as f:
|
||||
|
||||
-1
@@ -11,7 +11,6 @@ import aiofiles
|
||||
|
||||
from agents import Agent, function_tool, RunContextWrapper, OpenAIChatCompletionsModel, trace
|
||||
|
||||
from jailbreak_toolbox.attacks.blackbox.implementations.DrAttack import attack
|
||||
|
||||
from .master_coordinator_agent import MasterCoordinatorAgent
|
||||
from .reconnaissance_agent import ReconnaissanceAgent
|
||||
|
||||
+56
-27
@@ -509,32 +509,37 @@ Focus on specific defensive techniques and why this particular attack failed - p
|
||||
@function_tool
|
||||
def access_runcontext_history(
|
||||
ctx: RunContextWrapper,
|
||||
history_depth: int = 4
|
||||
history_depth: int = 4,
|
||||
max_total_chars: int = 450000
|
||||
) -> dict:
|
||||
"""
|
||||
Access both session data and evolution session history from the context
|
||||
|
||||
|
||||
Args:
|
||||
ctx: Runtime context wrapper containing session and evolution context
|
||||
history_depth: Maximum number of items to return for session and evolution data (default: 4) if the depth is too long, then the context will exceed its context length. This will result in poor performance.
|
||||
|
||||
history_depth: Maximum number of items to return for session and evolution data (default: 4)
|
||||
max_total_chars: Maximum total characters allowed in output (default: 450000)
|
||||
|
||||
Returns:
|
||||
Complete history including session data and evolution session data
|
||||
"""
|
||||
try:
|
||||
import json
|
||||
|
||||
def build_result(depth):
|
||||
"""Build result dict with given depth."""
|
||||
# Extract session data from context
|
||||
session_data = getattr(ctx.context, 'session_data', {})
|
||||
created_tools = getattr(ctx.context, 'created_tools', [])
|
||||
ai_generated_tools = getattr(ctx.context, 'ai_generated_tools', [])
|
||||
exploitation_conversations = getattr(ctx.context, 'exploitation_conversations', [])
|
||||
|
||||
|
||||
# Get evolution context from the unified context
|
||||
evolution_context = None
|
||||
if hasattr(ctx.context, 'evolution_context'):
|
||||
evolution_context = ctx.context.evolution_context
|
||||
elif hasattr(ctx.context, 'tools_by_id'):
|
||||
evolution_context = ctx.context
|
||||
|
||||
|
||||
# Get evolution session data
|
||||
evolution_session_data = {}
|
||||
if evolution_context:
|
||||
@@ -546,31 +551,32 @@ def access_runcontext_history(
|
||||
"session_id": getattr(evolution_context, 'session_id', 'unknown'),
|
||||
"created_at": getattr(evolution_context, 'created_at', None)
|
||||
}
|
||||
|
||||
# Apply history_depth to get last several items
|
||||
def get_last_items(data, depth):
|
||||
|
||||
# Apply depth to get last several items
|
||||
def get_last_items(data, d):
|
||||
if isinstance(data, list):
|
||||
return data[-depth:] if data else []
|
||||
else:
|
||||
return data
|
||||
|
||||
limited_session_data = get_last_items(session_data, history_depth)
|
||||
limited_created_tools = get_last_items(created_tools, history_depth)
|
||||
limited_ai_generated_tools = get_last_items(ai_generated_tools, history_depth)
|
||||
limited_conversations = get_last_items(exploitation_conversations, history_depth)
|
||||
|
||||
return data[-d:] if data else []
|
||||
elif isinstance(data, dict):
|
||||
items = list(data.items())[-d:]
|
||||
return dict(items)
|
||||
return data
|
||||
|
||||
limited_session_data = get_last_items(session_data, depth)
|
||||
limited_created_tools = get_last_items(created_tools, depth)
|
||||
limited_ai_generated_tools = get_last_items(ai_generated_tools, depth)
|
||||
limited_conversations = get_last_items(exploitation_conversations, depth)
|
||||
|
||||
limited_evolution_data = {}
|
||||
if evolution_session_data:
|
||||
limited_evolution_data = {
|
||||
"tools_by_id": get_last_items(evolution_session_data["tools_by_id"], history_depth),
|
||||
"tools_by_name": get_last_items(evolution_session_data["tools_by_name"], history_depth),
|
||||
"performance_analyses": get_last_items(evolution_session_data["performance_analyses"], history_depth),
|
||||
"evolution_history": get_last_items(evolution_session_data["evolution_history"], history_depth),
|
||||
"tools_by_id": get_last_items(evolution_session_data["tools_by_id"], depth),
|
||||
"tools_by_name": get_last_items(evolution_session_data["tools_by_name"], depth),
|
||||
"performance_analyses": get_last_items(evolution_session_data["performance_analyses"], depth),
|
||||
"evolution_history": get_last_items(evolution_session_data["evolution_history"], depth),
|
||||
"session_id": evolution_session_data["session_id"],
|
||||
"created_at": evolution_session_data["created_at"]
|
||||
}
|
||||
|
||||
# Return combined data
|
||||
|
||||
return {
|
||||
"history_accessed": True,
|
||||
"session_data": limited_session_data,
|
||||
@@ -578,11 +584,34 @@ def access_runcontext_history(
|
||||
"ai_generated_tools": limited_ai_generated_tools,
|
||||
"exploitation_conversations": limited_conversations,
|
||||
"evolution_session": limited_evolution_data,
|
||||
"history_depth_applied": history_depth,
|
||||
"history_depth_applied": depth,
|
||||
"context_type": type(ctx.context).__name__,
|
||||
"accessed_at": datetime.now().isoformat()
|
||||
}
|
||||
|
||||
|
||||
try:
|
||||
original_depth = history_depth
|
||||
current_depth = history_depth
|
||||
|
||||
# Loop: reduce depth until output fits in max_total_chars
|
||||
while current_depth >= 1:
|
||||
result = build_result(current_depth)
|
||||
result_size = len(json.dumps(result, default=str))
|
||||
|
||||
if result_size <= max_total_chars:
|
||||
if current_depth < original_depth:
|
||||
result["_depth_reduced"] = f"history_depth reduced from {original_depth} to {current_depth} because output exceeded {max_total_chars} chars"
|
||||
print(f"[access_runcontext_history] history_depth reduced from {original_depth} to {current_depth} (size: {result_size}/{max_total_chars})")
|
||||
return result
|
||||
|
||||
current_depth -= 1
|
||||
|
||||
# If even depth=1 exceeds, return minimal result with warning
|
||||
result = build_result(1)
|
||||
result["_depth_reduced"] = f"history_depth reduced to 1, but output may still exceed limit. Original depth: {original_depth}"
|
||||
print(f"[access_runcontext_history] WARNING: Even depth=1 may exceed context limit")
|
||||
return result
|
||||
|
||||
except Exception as e:
|
||||
return {
|
||||
"history_accessed": False,
|
||||
|
||||
@@ -2,7 +2,6 @@ from ..base_model import BaseModel
|
||||
from ...core.registry import model_registry
|
||||
import openai
|
||||
import time
|
||||
import torch
|
||||
import base64
|
||||
from io import BytesIO
|
||||
from typing import Any, Optional, List, Dict, Union
|
||||
@@ -265,6 +264,7 @@ class OpenAIModel(BaseModel):
|
||||
A list of floats representing the embedding vector
|
||||
"""
|
||||
try:
|
||||
import torch
|
||||
clean_text = text_input.replace("\n", " ")
|
||||
|
||||
# Use specified model or default embedding model
|
||||
|
||||
Reference in New Issue
Block a user