From 6e7106ea264707dac4bfa988ea998e811af8b3a7 Mon Sep 17 00:00:00 2001 From: Luca Beurer-Kellner Date: Thu, 6 Nov 2025 09:39:30 +0100 Subject: [PATCH] fix: update anthropic model --- gateway/routes/anthropic.py | 2 +- resources/streaming_chunk_text/anthropic.txt | 2 +- ...est_anthropic_header_with_invariant_key.py | 76 ++++ .../test_anthropic_with_tool_call.py | 380 ++++++++++++++++++ .../test_anthropic_without_tool_call.py | 136 +++++++ .../guardrails/test_guardrails_anthropic.py | 12 +- 6 files changed, 600 insertions(+), 8 deletions(-) create mode 100644 tests/integration/anthropic/test_anthropic_header_with_invariant_key.py create mode 100644 tests/integration/anthropic/test_anthropic_with_tool_call.py create mode 100644 tests/integration/anthropic/test_anthropic_without_tool_call.py diff --git a/gateway/routes/anthropic.py b/gateway/routes/anthropic.py index b0e4904..e706ecb 100644 --- a/gateway/routes/anthropic.py +++ b/gateway/routes/anthropic.py @@ -263,7 +263,7 @@ class AnthropicProvider(BaseProvider): b'event: message_start\ndata: {"type":"message_start","message": {"id":"msg_01LkayzAaw7b7QkUAw91psyx","type":"message","role":"assistant" - ,"model":"claude-3-5-sonnet-20241022","content":[],"stop_reason":null, + ,"model":"claude-sonnet-4-5-20250929","content":[],"stop_reason":null, "stop_sequence":null,"usage":{"input_tokens":20,"cache_creation_input_to' and diff --git a/resources/streaming_chunk_text/anthropic.txt b/resources/streaming_chunk_text/anthropic.txt index c8c4480..8252d32 100644 --- a/resources/streaming_chunk_text/anthropic.txt +++ b/resources/streaming_chunk_text/anthropic.txt @@ -1,4 +1,4 @@ -anthropic_chunk_list = [b'event: message_start\ndata: {"type":"message_start","message":{"id":"msg_012KWB6kiKvzx7r1SKs5nGA1","type":"message","role":"assistant","model":"claude-3-5-sonnet-20241022","content":[],"stop_reason":null,"stop_sequence":null,"usage":{"input_tokens":20,"cache_creation_input_tokens":0,"cache_read_input_tokens":0,"output_tokens":5}} }\n\nevent: content_block_start\ndata: {"type":"content_block_start","index":0,"content_block":{"type":"text","text":""} }\n\n' +anthropic_chunk_list = [b'event: message_start\ndata: {"type":"message_start","message":{"id":"msg_012KWB6kiKvzx7r1SKs5nGA1","type":"message","role":"assistant","model":"claude-sonnet-4-5-20250929","content":[],"stop_reason":null,"stop_sequence":null,"usage":{"input_tokens":20,"cache_creation_input_tokens":0,"cache_read_input_tokens":0,"output_tokens":5}} }\n\nevent: content_block_start\ndata: {"type":"content_block_start","index":0,"content_block":{"type":"text","text":""} }\n\n' ,b'event: content_block_delta\ndata: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":" making it an attractive destination for both business"} }\n\n' , b'event: content_block_delta\ndata: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":" and leisure."} }\n\n' , b'event: content_block_stop\ndata: {"type":"content_block_stop","index":0}\n\n' diff --git a/tests/integration/anthropic/test_anthropic_header_with_invariant_key.py b/tests/integration/anthropic/test_anthropic_header_with_invariant_key.py new file mode 100644 index 0000000..1b43198 --- /dev/null +++ b/tests/integration/anthropic/test_anthropic_header_with_invariant_key.py @@ -0,0 +1,76 @@ +"""Test the Anthropic gateway with Invariant key in the ANTHROPIC_API_KEY.""" + +import os +import sys +import time +import uuid +from unittest.mock import patch + +# Add integration folder (parent) to sys.path +sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +import anthropic +import pytest +import requests +from httpx import Client + +# Pytest plugins +pytest_plugins = ("pytest_asyncio",) + + +@pytest.mark.skipif( + not os.getenv("ANTHROPIC_API_KEY"), reason="No ANTHROPIC_API_KEY set" +) +async def test_gateway_with_invariant_key_in_anthropic_key_header( + gateway_url, explorer_api_url +): + """Test the Anthropic gateway with Invariant key in the Anthropic key""" + anthropic_api_key = os.getenv("ANTHROPIC_API_KEY") + dataset_name = f"test-dataset-anthropic-{uuid.uuid4()}" + invariant_key_suffix = f";invariant-auth={os.getenv('INVARIANT_API_KEY')}" + with patch.dict( + os.environ, + {"ANTHROPIC_API_KEY": anthropic_api_key + invariant_key_suffix}, + ): + client = anthropic.Anthropic( + http_client=Client(), + base_url=f"{gateway_url}/api/v1/gateway/{dataset_name}/anthropic", + ) + response = client.messages.create( + model="claude-sonnet-4-5-20250929", + max_tokens=1024, + messages=[ + { + "role": "user", + "content": "Give me an introduction to Zurich, Switzerland within 200 words.", + } + ], + ) + + assert response is not None + response_text = response.content[0].text + assert "zurich" in response_text.lower() + + # Wait for the trace to be saved + # This is needed because the trace is saved asynchronously + time.sleep(2) + + traces_response = requests.get( + f"{explorer_api_url}/api/v1/dataset/byuser/developer/{dataset_name}/traces", + timeout=5, + ) + traces = traces_response.json() + assert len(traces) == 1 + + trace_id = traces[0]["id"] + get_trace_response = requests.get( + f"{explorer_api_url}/api/v1/trace/{trace_id}", timeout=5 + ) + trace = get_trace_response.json() + assert trace["messages"] == [ + { + "role": "user", + "content": "Give me an introduction to Zurich, Switzerland within 200 words.", + }, + {"role": "assistant", "content": response_text}, + ] diff --git a/tests/integration/anthropic/test_anthropic_with_tool_call.py b/tests/integration/anthropic/test_anthropic_with_tool_call.py new file mode 100644 index 0000000..b7777d9 --- /dev/null +++ b/tests/integration/anthropic/test_anthropic_with_tool_call.py @@ -0,0 +1,380 @@ +"""Test the Anthropic messages API with tool call for the weather agent.""" + +import base64 +import json +import os +import sys +import time +import uuid +from pathlib import Path + +# Add integration folder (parent) to sys.path +sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +import anthropic +import pytest +import requests +from utils import get_anthropic_client + +# Pytest plugins +pytest_plugins = ("pytest_asyncio",) + + +class WeatherAgent: + """Weather agent to get the current weather in a given location.""" + + def __init__(self, gateway_url, push_to_explorer): + self.dataset_name = f"test-dataset-anthropic-{uuid.uuid4()}" + self.client = get_anthropic_client( + gateway_url, push_to_explorer, self.dataset_name + ) + self.get_weather_function = { + "name": "get_weather", + "description": "Get the current weather in a given location", + "input_schema": { + "type": "object", + "properties": { + "location": { + "type": "string", + "description": "The city and state, e.g. San Francisco, CA", + }, + "unit": { + "type": "string", + "enum": ["celsius", "fahrenheit"], + "description": 'The unit of temperature, either "celsius" or "fahrenheit"', + }, + }, + "required": ["location"], + }, + } + + def get_response(self, messages: list[dict]) -> list[dict]: + """ + Get the response from the agent for a given user query for weather. + """ + response_list = [] + while True: + response = self.client.messages.create( + tools=[self.get_weather_function], + model="claude-sonnet-4-5-20250929", + max_tokens=1024, + messages=messages, + ) + response_list.append(response) + # If there's tool call, Extract the tool call parameters from the response + if len(response.content) > 1 and response.content[1].type == "tool_use": + tool_call_params = response.content[1].input + tool_call_result = self.get_weather(tool_call_params["location"]) + tool_call_id = response.content[1].id + messages.append({"role": response.role, "content": response.content}) + messages.append( + { + "role": "user", + "content": [ + { + "type": "tool_result", + "tool_use_id": tool_call_id, + "content": tool_call_result, + } + ], + } + ) + else: + return response_list + + def get_streaming_response(self, messages: list[dict]) -> list[dict]: + """Get streaming response from the agent for a given user query for weather.""" + response_list = [] + + def clean_quotes(text): + # Convert \' to ' + text = text.replace("'", "'") + # Convert \" to " + text = text.replace('"', '"') + text = text.replace("\n", " ") + return text + + while True: + json_data = "" + content = [] + event = None + with self.client.messages.stream( + tools=[self.get_weather_function], + model="claude-sonnet-4-5-20250929", + max_tokens=1024, + messages=messages, + ) as stream: + for event in stream: + if isinstance(event, anthropic.types.RawContentBlockStartEvent): + # Start a new block + current_block = event.content_block + current_text = "" + elif isinstance(event, anthropic.types.RawContentBlockDeltaEvent): + if hasattr(event.delta, "text"): + # Accumulate text for TextBlock + current_text += clean_quotes(event.delta.text) + elif hasattr(event.delta, "partial_json"): + # Accumulate JSON for ToolUseBlock + json_data += clean_quotes(event.delta.partial_json) + current_text += clean_quotes(event.delta.partial_json) + elif isinstance(event, anthropic.types.RawContentBlockStopEvent): + # Block is complete, add it to content + if current_block.type == "text": + content.append( + anthropic.types.TextBlock( + citations=None, text=current_text, type="text" + ) + ) + elif current_block.type == "tool_use": + content.append( + anthropic.types.ToolUseBlock( + id=current_block.id, + input=json.loads(current_text), + name=current_block.name, + type="tool_use", + ) + ) + if content: + response_list.append(content) + if ( + isinstance(event, anthropic.types.RawMessageStopEvent) + and event.message.stop_reason == "tool_use" + ): + tool_call_params = json.loads(json_data) + tool_call_result = self.get_weather(tool_call_params["location"]) + messages.append({"role": "assistant", "content": content}) + messages.append( + { + "role": "user", + "content": [ + { + "type": "tool_result", + "tool_use_id": content[-1].id, + "content": tool_call_result, + } + ], + } + ) + else: + return response_list + + def get_weather(self, location: str): + """Get the current weather in a given location using latitude and longitude.""" + response = f"""Weather in {location}: + Good morning! Expect overcast skies with intermittent showers throughout the day. + Temperatures will range from a cool 15°C in the early hours to around 19°C by mid-afternoon. + Light winds from the northeast at about 10 km/h will keep conditions mild. + It might be a good idea to carry an umbrella if you’re heading out. Stay dry and have a great day! + """ + return response + + +@pytest.mark.skipif( + not os.getenv("ANTHROPIC_API_KEY"), reason="No ANTHROPIC_API_KEY set" +) +@pytest.mark.parametrize("push_to_explorer", [False, True]) +async def test_response_with_tool_call(explorer_api_url, gateway_url, push_to_explorer): + """Test the chat completion without streaming for the weather agent.""" + + weather_agent = WeatherAgent(gateway_url, push_to_explorer) + + query = "Tell me the weather for New York in Celsius" + + city = "new york" + # Process each query + responses = [] + messages = [{"role": "user", "content": query}] + response = weather_agent.get_response(messages) + assert response is not None + assert response[0].role == "assistant" + assert response[0].stop_reason == "tool_use" + assert response[0].content[0].type == "text" + assert response[0].content[1].type == "tool_use" + assert city in response[0].content[1].input["location"].lower() + + assert response[1].role == "assistant" + assert response[1].stop_reason == "end_turn" + responses.append(response) + + if push_to_explorer: + # Wait for the trace to be saved + # This is needed because the trace is saved asynchronously + time.sleep(2) + traces_response = requests.get( + f"{explorer_api_url}/api/v1/dataset/byuser/developer/{weather_agent.dataset_name}/traces", + timeout=5, + ) + traces = traces_response.json() + trace = traces[-1] + trace_id = trace["id"] + # Fetch the trace + trace_response = requests.get( + f"{explorer_api_url}/api/v1/trace/{trace_id}", timeout=5 + ) + trace = trace_response.json() + trace_messages = trace["messages"] + + assert trace_messages[0]["role"] == "user" + assert trace_messages[0]["content"] == query + assert trace_messages[1]["role"] == "assistant" + assert city in trace_messages[1]["content"].lower() + assert trace_messages[2]["role"] == "assistant" + assert trace_messages[2]["tool_calls"][0]["function"]["name"] == "get_weather" + assert ( + city + in trace_messages[2]["tool_calls"][0]["function"]["arguments"][ + "location" + ].lower() + ) + assert trace_messages[3]["role"] == "tool" + assert trace_messages[4]["role"] == "assistant" + assert city in trace_messages[4]["content"].lower() + + +@pytest.mark.skipif( + not os.getenv("ANTHROPIC_API_KEY"), reason="No ANTHROPIC_API_KEY set" +) +@pytest.mark.parametrize("push_to_explorer", [False, True]) +async def test_streaming_response_with_tool_call( + explorer_api_url, gateway_url, push_to_explorer +): + """Test the chat completion with streaming for the weather agent.""" + weather_agent = WeatherAgent(gateway_url, push_to_explorer) + + query = "Tell me the weather for New York in Celsius" + city = "new york" + + messages = [{"role": "user", "content": query}] + response = weather_agent.get_streaming_response(messages) + + if len(response) == 2: + assert response is not None + assert response[0][0].type == "text" + assert response[0][1].type == "tool_use" + assert response[0][1].name == "get_weather" + assert city in response[0][1].input["location"].lower() + + assert response[1][0].type == "text" + assert city in response[1][0].text.lower() + elif len(response) == 1: + # expected output in this case is something like this: + # [[TextBlock(text="I'll help you check the weather in New York using the get_weather function.", type='text', citations=None), ToolUseBlock(id='toolu_019VZsmxuUhShou2EpPBxvpe', input={'location': 'New York, NY', 'unit': 'celsius'}, name='get_weather', type='tool_use')]] + + assert response is not None + assert response[0][0].type == "text" + assert response[0][1].type == "tool_use" + assert response[0][1].name == "get_weather" + assert city in response[0][1].input["location"].lower() + + else: + assert False, "Expected response length 2 or 1, but got" + str(response) + + if push_to_explorer: + # Wait for the trace to be saved + # This is needed because the trace is saved asynchronously + time.sleep(2) + traces_response = requests.get( + f"{explorer_api_url}/api/v1/dataset/byuser/developer/{weather_agent.dataset_name}/traces", + timeout=5, + ) + traces = traces_response.json() + + trace = traces[-1] + trace_id = trace["id"] + # Fetch the trace + trace_response = requests.get( + f"{explorer_api_url}/api/v1/trace/{trace_id}", timeout=5 + ) + trace = trace_response.json() + trace_messages = trace["messages"] + assert trace_messages[0]["role"] == "user" + assert trace_messages[0]["content"] == query + assert trace_messages[1]["role"] == "assistant" + assert city in trace_messages[1]["content"].lower() + assert trace_messages[2]["role"] == "assistant" + assert trace_messages[2]["tool_calls"][0]["function"]["name"] == "get_weather" + assert ( + city + in trace_messages[2]["tool_calls"][0]["function"]["arguments"][ + "location" + ].lower() + ) + assert trace_messages[3]["role"] == "tool" + assert trace_messages[4]["role"] == "assistant" + assert city in trace_messages[4]["content"].lower() + + +@pytest.mark.skipif( + not os.getenv("ANTHROPIC_API_KEY"), reason="No ANTHROPIC_API_KEY set" +) +@pytest.mark.parametrize("push_to_explorer", [False, True]) +async def test_response_with_tool_call_with_image( + explorer_api_url, gateway_url, push_to_explorer +): + """Test the chat completion with image for the weather agent.""" + weather_agent = WeatherAgent(gateway_url, push_to_explorer) + + image_path = Path(__file__).parent.parent / "resources" / "images" / "new-york.jpg" + + with image_path.open("rb") as image_file: + base64_image = base64.b64encode(image_file.read()).decode("utf-8") + query = "get the weather in the city of this image in Celsius" + city = "new york" + messages = [ + { + "role": "user", + "content": [ + {"type": "text", "text": query}, + { + "type": "image", + "source": { + "type": "base64", + "media_type": "image/jpeg", + "data": base64_image, + }, + }, + ], + } + ] + response = weather_agent.get_response(messages) + assert response is not None + assert response[0].role == "assistant" + assert response[0].stop_reason == "tool_use" + assert response[0].content[0].type == "text" + assert response[0].content[1].type == "tool_use" + assert city in response[0].content[1].input["location"].lower() + + assert response[1].role == "assistant" + assert response[1].stop_reason == "end_turn" + + if push_to_explorer: + # Wait for the trace to be saved + # This is needed because the trace is saved asynchronously + time.sleep(2) + traces_response = requests.get( + f"{explorer_api_url}/api/v1/dataset/byuser/developer/{weather_agent.dataset_name}/traces", + timeout=5, + ) + traces = traces_response.json() + + trace = traces[-1] + trace_id = trace["id"] + trace_response = requests.get( + f"{explorer_api_url}/api/v1/trace/{trace_id}", timeout=5 + ) + trace = trace_response.json() + trace_messages = trace["messages"] + assert trace_messages[0]["role"] == "user" + assert trace_messages[1]["role"] == "assistant" + assert city in trace_messages[1]["content"].lower() + assert trace_messages[2]["role"] == "assistant" + assert ( + trace_messages[2]["tool_calls"][0]["function"]["name"] == "get_weather" + ) + assert ( + city + in trace_messages[2]["tool_calls"][0]["function"]["arguments"][ + "location" + ].lower() + ) + assert trace_messages[3]["role"] == "tool" diff --git a/tests/integration/anthropic/test_anthropic_without_tool_call.py b/tests/integration/anthropic/test_anthropic_without_tool_call.py new file mode 100644 index 0000000..82cd9c3 --- /dev/null +++ b/tests/integration/anthropic/test_anthropic_without_tool_call.py @@ -0,0 +1,136 @@ +"""Tests for the Anthropic API without tool call.""" + +import os +import sys +import time +import uuid + +# Add integration folder (parent) to sys.path +sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +import pytest +import requests +from utils import get_anthropic_client + +# Pytest plugins +pytest_plugins = ("pytest_asyncio",) + + +@pytest.mark.skipif( + not os.getenv("ANTHROPIC_API_KEY"), reason="No ANTHROPIC_API_KEY set" +) +@pytest.mark.parametrize("push_to_explorer", [False, True]) +async def test_response_without_tool_call( + explorer_api_url, gateway_url, push_to_explorer +): + """Test the Anthropic gateway without tool calling.""" + dataset_name = f"test-dataset-anthropic-{uuid.uuid4()}" + client = get_anthropic_client( + gateway_url, + push_to_explorer, + dataset_name, + ) + + cities = ["zurich", "new york", "london"] + queries = [ + "Can you introduce Zurich, Switzerland within 200 words?", + "Tell me the history of New York within 100 words?", + "How's the weather in London next week?", + ] + + # Process each query + responses = [] + for query in queries: + response = client.messages.create( + model="claude-sonnet-4-5-20250929", + max_tokens=1024, + messages=[{"role": "user", "content": query}], + ) + response_text = response.content[0].text + responses.append(response_text) + assert response_text is not None + assert cities[queries.index(query)] in response_text.lower() + + if push_to_explorer: + # Wait for the trace to be saved + # This is needed because the trace is saved asynchronously + time.sleep(2) + traces_response = requests.get( + f"{explorer_api_url}/api/v1/dataset/byuser/developer/{dataset_name}/traces", + timeout=5, + ) + traces = traces_response.json() + assert len(traces) == len(queries) + + for index, trace in enumerate(traces): + trace_id = trace["id"] + # Fetch the trace + trace_response = requests.get( + f"{explorer_api_url}/api/v1/trace/{trace_id}", + timeout=5, + ) + trace = trace_response.json() + assert trace["messages"] == [ + {"role": "user", "content": queries[index]}, + {"role": "assistant", "content": responses[index]}, + ] + + +@pytest.mark.skipif( + not os.getenv("ANTHROPIC_API_KEY"), reason="No ANTHROPIC_API_KEY set" +) +@pytest.mark.parametrize("push_to_explorer", [False, True]) +async def test_streaming_response_without_tool_call( + explorer_api_url, gateway_url, push_to_explorer +): + """Test the Anthropic gateway without tool calling.""" + dataset_name = f"test-dataset-anthropic-{uuid.uuid4()}" + client = get_anthropic_client(gateway_url, push_to_explorer, dataset_name) + + cities = ["zurich", "new york", "london"] + queries = [ + "Can you introduce Zurich, Switzerland in 2 short sentences?", + "Tell me the history of New York within 2 short sentences.", + "Explain the geography of London in 2 short sentences.", + ] + # Process each query + responses = [] + for index, query in enumerate(queries): + messages = [{"role": "user", "content": query}] + response_text = "" + + with client.messages.stream( + model="claude-sonnet-4-5-20250929", + max_tokens=200, + messages=messages, + ) as response: + for reply in response.text_stream: + response_text += reply + assert cities[index] in response_text.lower() + responses.append(response_text) + assert response_text is not None + assert cities[queries.index(query)] in response_text.lower() + + if push_to_explorer: + # Wait for the trace to be saved + # This is needed because the trace is saved asynchronously + time.sleep(2) + traces_response = requests.get( + f"{explorer_api_url}/api/v1/dataset/byuser/developer/{dataset_name}/traces", + timeout=5, + ) + traces = traces_response.json() + assert len(traces) == len(queries) + + for index, trace in enumerate(traces): + trace_id = trace["id"] + # Fetch the trace + trace_response = requests.get( + f"{explorer_api_url}/api/v1/trace/{trace_id}", + timeout=5, + ) + trace = trace_response.json() + assert trace["messages"] == [ + {"role": "user", "content": queries[index]}, + {"role": "assistant", "content": responses[index]}, + ] diff --git a/tests/integration/guardrails/test_guardrails_anthropic.py b/tests/integration/guardrails/test_guardrails_anthropic.py index 77bcf1f..e5b0ea1 100644 --- a/tests/integration/guardrails/test_guardrails_anthropic.py +++ b/tests/integration/guardrails/test_guardrails_anthropic.py @@ -39,7 +39,7 @@ async def test_message_content_guardrail_from_file( ) request = { - "model": "claude-3-5-sonnet-20241022", + "model": "claude-sonnet-4-5-20250929", "max_tokens": 100, "messages": [{"role": "user", "content": "What is the capital of Spain?"}], } @@ -150,7 +150,7 @@ async def test_tool_call_guardrail_from_file( ], "tools": tools, "system": system_message, - "model": "claude-3-5-sonnet-20241022", + "model": "claude-sonnet-4-5-20250929", "max_tokens": 150, } @@ -250,7 +250,7 @@ async def test_input_from_guardrail_from_file( ) request = { - "model": "claude-3-5-sonnet-20241022", + "model": "claude-sonnet-4-5-20250929", "max_tokens": 100, "messages": [{"role": "user", "content": "Tell me more about Fight Club."}], } @@ -352,7 +352,7 @@ async def test_with_guardrails_from_explorer(explorer_api_url, gateway_url, do_s # This should not be blocked by the guardrails from the explorer when we push to explorer # because the file based guardrails are overridden by the explorer guardrails spain_request = { - "model": "claude-3-5-sonnet-20241022", + "model": "claude-sonnet-4-5-20250929", "messages": [{"role": "user", "content": "What is the capital of Spain?"}], "max_tokens": 100, } @@ -379,7 +379,7 @@ async def test_with_guardrails_from_explorer(explorer_api_url, gateway_url, do_s # This should be blocked by the guardrails from the explorer user_prompt = "What kind of a creature is Shrek? What is his Shrek's wife's name? Only answer these questions with single sentences, don't add any extra details." shrek_request = { - "model": "claude-3-5-sonnet-20241022", + "model": "claude-sonnet-4-5-20250929", "messages": [ { "role": "user", @@ -497,7 +497,7 @@ async def test_preguardrailing_with_guardrails_from_explorer( user_prompt = "Tell me a one sentence pun." request = { - "model": "claude-3-5-sonnet-20241022", + "model": "claude-sonnet-4-5-20250929", "messages": [ { "role": "user",