fix: update anthropic model

This commit is contained in:
Luca Beurer-Kellner
2025-11-06 09:39:30 +01:00
parent 5ad89808a1
commit 6e7106ea26
6 changed files with 600 additions and 8 deletions

View File

@@ -263,7 +263,7 @@ class AnthropicProvider(BaseProvider):
b'event: message_start\ndata: {"type":"message_start","message": b'event: message_start\ndata: {"type":"message_start","message":
{"id":"msg_01LkayzAaw7b7QkUAw91psyx","type":"message","role":"assistant" {"id":"msg_01LkayzAaw7b7QkUAw91psyx","type":"message","role":"assistant"
,"model":"claude-3-5-sonnet-20241022","content":[],"stop_reason":null, ,"model":"claude-sonnet-4-5-20250929","content":[],"stop_reason":null,
"stop_sequence":null,"usage":{"input_tokens":20,"cache_creation_input_to' "stop_sequence":null,"usage":{"input_tokens":20,"cache_creation_input_to'
and and

View File

@@ -1,4 +1,4 @@
anthropic_chunk_list = [b'event: message_start\ndata: {"type":"message_start","message":{"id":"msg_012KWB6kiKvzx7r1SKs5nGA1","type":"message","role":"assistant","model":"claude-3-5-sonnet-20241022","content":[],"stop_reason":null,"stop_sequence":null,"usage":{"input_tokens":20,"cache_creation_input_tokens":0,"cache_read_input_tokens":0,"output_tokens":5}} }\n\nevent: content_block_start\ndata: {"type":"content_block_start","index":0,"content_block":{"type":"text","text":""} }\n\n' anthropic_chunk_list = [b'event: message_start\ndata: {"type":"message_start","message":{"id":"msg_012KWB6kiKvzx7r1SKs5nGA1","type":"message","role":"assistant","model":"claude-sonnet-4-5-20250929","content":[],"stop_reason":null,"stop_sequence":null,"usage":{"input_tokens":20,"cache_creation_input_tokens":0,"cache_read_input_tokens":0,"output_tokens":5}} }\n\nevent: content_block_start\ndata: {"type":"content_block_start","index":0,"content_block":{"type":"text","text":""} }\n\n'
,b'event: content_block_delta\ndata: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":" making it an attractive destination for both business"} }\n\n' ,b'event: content_block_delta\ndata: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":" making it an attractive destination for both business"} }\n\n'
, b'event: content_block_delta\ndata: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":" and leisure."} }\n\n' , b'event: content_block_delta\ndata: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":" and leisure."} }\n\n'
, b'event: content_block_stop\ndata: {"type":"content_block_stop","index":0}\n\n' , b'event: content_block_stop\ndata: {"type":"content_block_stop","index":0}\n\n'

View File

@@ -0,0 +1,76 @@
"""Test the Anthropic gateway with Invariant key in the ANTHROPIC_API_KEY."""
import os
import sys
import time
import uuid
from unittest.mock import patch
# Add integration folder (parent) to sys.path
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
import anthropic
import pytest
import requests
from httpx import Client
# Pytest plugins
pytest_plugins = ("pytest_asyncio",)
@pytest.mark.skipif(
not os.getenv("ANTHROPIC_API_KEY"), reason="No ANTHROPIC_API_KEY set"
)
async def test_gateway_with_invariant_key_in_anthropic_key_header(
gateway_url, explorer_api_url
):
"""Test the Anthropic gateway with Invariant key in the Anthropic key"""
anthropic_api_key = os.getenv("ANTHROPIC_API_KEY")
dataset_name = f"test-dataset-anthropic-{uuid.uuid4()}"
invariant_key_suffix = f";invariant-auth={os.getenv('INVARIANT_API_KEY')}"
with patch.dict(
os.environ,
{"ANTHROPIC_API_KEY": anthropic_api_key + invariant_key_suffix},
):
client = anthropic.Anthropic(
http_client=Client(),
base_url=f"{gateway_url}/api/v1/gateway/{dataset_name}/anthropic",
)
response = client.messages.create(
model="claude-sonnet-4-5-20250929",
max_tokens=1024,
messages=[
{
"role": "user",
"content": "Give me an introduction to Zurich, Switzerland within 200 words.",
}
],
)
assert response is not None
response_text = response.content[0].text
assert "zurich" in response_text.lower()
# Wait for the trace to be saved
# This is needed because the trace is saved asynchronously
time.sleep(2)
traces_response = requests.get(
f"{explorer_api_url}/api/v1/dataset/byuser/developer/{dataset_name}/traces",
timeout=5,
)
traces = traces_response.json()
assert len(traces) == 1
trace_id = traces[0]["id"]
get_trace_response = requests.get(
f"{explorer_api_url}/api/v1/trace/{trace_id}", timeout=5
)
trace = get_trace_response.json()
assert trace["messages"] == [
{
"role": "user",
"content": "Give me an introduction to Zurich, Switzerland within 200 words.",
},
{"role": "assistant", "content": response_text},
]

View File

@@ -0,0 +1,380 @@
"""Test the Anthropic messages API with tool call for the weather agent."""
import base64
import json
import os
import sys
import time
import uuid
from pathlib import Path
# Add integration folder (parent) to sys.path
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
import anthropic
import pytest
import requests
from utils import get_anthropic_client
# Pytest plugins
pytest_plugins = ("pytest_asyncio",)
class WeatherAgent:
"""Weather agent to get the current weather in a given location."""
def __init__(self, gateway_url, push_to_explorer):
self.dataset_name = f"test-dataset-anthropic-{uuid.uuid4()}"
self.client = get_anthropic_client(
gateway_url, push_to_explorer, self.dataset_name
)
self.get_weather_function = {
"name": "get_weather",
"description": "Get the current weather in a given location",
"input_schema": {
"type": "object",
"properties": {
"location": {
"type": "string",
"description": "The city and state, e.g. San Francisco, CA",
},
"unit": {
"type": "string",
"enum": ["celsius", "fahrenheit"],
"description": 'The unit of temperature, either "celsius" or "fahrenheit"',
},
},
"required": ["location"],
},
}
def get_response(self, messages: list[dict]) -> list[dict]:
"""
Get the response from the agent for a given user query for weather.
"""
response_list = []
while True:
response = self.client.messages.create(
tools=[self.get_weather_function],
model="claude-sonnet-4-5-20250929",
max_tokens=1024,
messages=messages,
)
response_list.append(response)
# If there's tool call, Extract the tool call parameters from the response
if len(response.content) > 1 and response.content[1].type == "tool_use":
tool_call_params = response.content[1].input
tool_call_result = self.get_weather(tool_call_params["location"])
tool_call_id = response.content[1].id
messages.append({"role": response.role, "content": response.content})
messages.append(
{
"role": "user",
"content": [
{
"type": "tool_result",
"tool_use_id": tool_call_id,
"content": tool_call_result,
}
],
}
)
else:
return response_list
def get_streaming_response(self, messages: list[dict]) -> list[dict]:
"""Get streaming response from the agent for a given user query for weather."""
response_list = []
def clean_quotes(text):
# Convert \' to '
text = text.replace("'", "'")
# Convert \" to "
text = text.replace('"', '"')
text = text.replace("\n", " ")
return text
while True:
json_data = ""
content = []
event = None
with self.client.messages.stream(
tools=[self.get_weather_function],
model="claude-sonnet-4-5-20250929",
max_tokens=1024,
messages=messages,
) as stream:
for event in stream:
if isinstance(event, anthropic.types.RawContentBlockStartEvent):
# Start a new block
current_block = event.content_block
current_text = ""
elif isinstance(event, anthropic.types.RawContentBlockDeltaEvent):
if hasattr(event.delta, "text"):
# Accumulate text for TextBlock
current_text += clean_quotes(event.delta.text)
elif hasattr(event.delta, "partial_json"):
# Accumulate JSON for ToolUseBlock
json_data += clean_quotes(event.delta.partial_json)
current_text += clean_quotes(event.delta.partial_json)
elif isinstance(event, anthropic.types.RawContentBlockStopEvent):
# Block is complete, add it to content
if current_block.type == "text":
content.append(
anthropic.types.TextBlock(
citations=None, text=current_text, type="text"
)
)
elif current_block.type == "tool_use":
content.append(
anthropic.types.ToolUseBlock(
id=current_block.id,
input=json.loads(current_text),
name=current_block.name,
type="tool_use",
)
)
if content:
response_list.append(content)
if (
isinstance(event, anthropic.types.RawMessageStopEvent)
and event.message.stop_reason == "tool_use"
):
tool_call_params = json.loads(json_data)
tool_call_result = self.get_weather(tool_call_params["location"])
messages.append({"role": "assistant", "content": content})
messages.append(
{
"role": "user",
"content": [
{
"type": "tool_result",
"tool_use_id": content[-1].id,
"content": tool_call_result,
}
],
}
)
else:
return response_list
def get_weather(self, location: str):
"""Get the current weather in a given location using latitude and longitude."""
response = f"""Weather in {location}:
Good morning! Expect overcast skies with intermittent showers throughout the day.
Temperatures will range from a cool 15°C in the early hours to around 19°C by mid-afternoon.
Light winds from the northeast at about 10 km/h will keep conditions mild.
It might be a good idea to carry an umbrella if youre heading out. Stay dry and have a great day!
"""
return response
@pytest.mark.skipif(
not os.getenv("ANTHROPIC_API_KEY"), reason="No ANTHROPIC_API_KEY set"
)
@pytest.mark.parametrize("push_to_explorer", [False, True])
async def test_response_with_tool_call(explorer_api_url, gateway_url, push_to_explorer):
"""Test the chat completion without streaming for the weather agent."""
weather_agent = WeatherAgent(gateway_url, push_to_explorer)
query = "Tell me the weather for New York in Celsius"
city = "new york"
# Process each query
responses = []
messages = [{"role": "user", "content": query}]
response = weather_agent.get_response(messages)
assert response is not None
assert response[0].role == "assistant"
assert response[0].stop_reason == "tool_use"
assert response[0].content[0].type == "text"
assert response[0].content[1].type == "tool_use"
assert city in response[0].content[1].input["location"].lower()
assert response[1].role == "assistant"
assert response[1].stop_reason == "end_turn"
responses.append(response)
if push_to_explorer:
# Wait for the trace to be saved
# This is needed because the trace is saved asynchronously
time.sleep(2)
traces_response = requests.get(
f"{explorer_api_url}/api/v1/dataset/byuser/developer/{weather_agent.dataset_name}/traces",
timeout=5,
)
traces = traces_response.json()
trace = traces[-1]
trace_id = trace["id"]
# Fetch the trace
trace_response = requests.get(
f"{explorer_api_url}/api/v1/trace/{trace_id}", timeout=5
)
trace = trace_response.json()
trace_messages = trace["messages"]
assert trace_messages[0]["role"] == "user"
assert trace_messages[0]["content"] == query
assert trace_messages[1]["role"] == "assistant"
assert city in trace_messages[1]["content"].lower()
assert trace_messages[2]["role"] == "assistant"
assert trace_messages[2]["tool_calls"][0]["function"]["name"] == "get_weather"
assert (
city
in trace_messages[2]["tool_calls"][0]["function"]["arguments"][
"location"
].lower()
)
assert trace_messages[3]["role"] == "tool"
assert trace_messages[4]["role"] == "assistant"
assert city in trace_messages[4]["content"].lower()
@pytest.mark.skipif(
not os.getenv("ANTHROPIC_API_KEY"), reason="No ANTHROPIC_API_KEY set"
)
@pytest.mark.parametrize("push_to_explorer", [False, True])
async def test_streaming_response_with_tool_call(
explorer_api_url, gateway_url, push_to_explorer
):
"""Test the chat completion with streaming for the weather agent."""
weather_agent = WeatherAgent(gateway_url, push_to_explorer)
query = "Tell me the weather for New York in Celsius"
city = "new york"
messages = [{"role": "user", "content": query}]
response = weather_agent.get_streaming_response(messages)
if len(response) == 2:
assert response is not None
assert response[0][0].type == "text"
assert response[0][1].type == "tool_use"
assert response[0][1].name == "get_weather"
assert city in response[0][1].input["location"].lower()
assert response[1][0].type == "text"
assert city in response[1][0].text.lower()
elif len(response) == 1:
# expected output in this case is something like this:
# [[TextBlock(text="I'll help you check the weather in New York using the get_weather function.", type='text', citations=None), ToolUseBlock(id='toolu_019VZsmxuUhShou2EpPBxvpe', input={'location': 'New York, NY', 'unit': 'celsius'}, name='get_weather', type='tool_use')]]
assert response is not None
assert response[0][0].type == "text"
assert response[0][1].type == "tool_use"
assert response[0][1].name == "get_weather"
assert city in response[0][1].input["location"].lower()
else:
assert False, "Expected response length 2 or 1, but got" + str(response)
if push_to_explorer:
# Wait for the trace to be saved
# This is needed because the trace is saved asynchronously
time.sleep(2)
traces_response = requests.get(
f"{explorer_api_url}/api/v1/dataset/byuser/developer/{weather_agent.dataset_name}/traces",
timeout=5,
)
traces = traces_response.json()
trace = traces[-1]
trace_id = trace["id"]
# Fetch the trace
trace_response = requests.get(
f"{explorer_api_url}/api/v1/trace/{trace_id}", timeout=5
)
trace = trace_response.json()
trace_messages = trace["messages"]
assert trace_messages[0]["role"] == "user"
assert trace_messages[0]["content"] == query
assert trace_messages[1]["role"] == "assistant"
assert city in trace_messages[1]["content"].lower()
assert trace_messages[2]["role"] == "assistant"
assert trace_messages[2]["tool_calls"][0]["function"]["name"] == "get_weather"
assert (
city
in trace_messages[2]["tool_calls"][0]["function"]["arguments"][
"location"
].lower()
)
assert trace_messages[3]["role"] == "tool"
assert trace_messages[4]["role"] == "assistant"
assert city in trace_messages[4]["content"].lower()
@pytest.mark.skipif(
not os.getenv("ANTHROPIC_API_KEY"), reason="No ANTHROPIC_API_KEY set"
)
@pytest.mark.parametrize("push_to_explorer", [False, True])
async def test_response_with_tool_call_with_image(
explorer_api_url, gateway_url, push_to_explorer
):
"""Test the chat completion with image for the weather agent."""
weather_agent = WeatherAgent(gateway_url, push_to_explorer)
image_path = Path(__file__).parent.parent / "resources" / "images" / "new-york.jpg"
with image_path.open("rb") as image_file:
base64_image = base64.b64encode(image_file.read()).decode("utf-8")
query = "get the weather in the city of this image in Celsius"
city = "new york"
messages = [
{
"role": "user",
"content": [
{"type": "text", "text": query},
{
"type": "image",
"source": {
"type": "base64",
"media_type": "image/jpeg",
"data": base64_image,
},
},
],
}
]
response = weather_agent.get_response(messages)
assert response is not None
assert response[0].role == "assistant"
assert response[0].stop_reason == "tool_use"
assert response[0].content[0].type == "text"
assert response[0].content[1].type == "tool_use"
assert city in response[0].content[1].input["location"].lower()
assert response[1].role == "assistant"
assert response[1].stop_reason == "end_turn"
if push_to_explorer:
# Wait for the trace to be saved
# This is needed because the trace is saved asynchronously
time.sleep(2)
traces_response = requests.get(
f"{explorer_api_url}/api/v1/dataset/byuser/developer/{weather_agent.dataset_name}/traces",
timeout=5,
)
traces = traces_response.json()
trace = traces[-1]
trace_id = trace["id"]
trace_response = requests.get(
f"{explorer_api_url}/api/v1/trace/{trace_id}", timeout=5
)
trace = trace_response.json()
trace_messages = trace["messages"]
assert trace_messages[0]["role"] == "user"
assert trace_messages[1]["role"] == "assistant"
assert city in trace_messages[1]["content"].lower()
assert trace_messages[2]["role"] == "assistant"
assert (
trace_messages[2]["tool_calls"][0]["function"]["name"] == "get_weather"
)
assert (
city
in trace_messages[2]["tool_calls"][0]["function"]["arguments"][
"location"
].lower()
)
assert trace_messages[3]["role"] == "tool"

View File

@@ -0,0 +1,136 @@
"""Tests for the Anthropic API without tool call."""
import os
import sys
import time
import uuid
# Add integration folder (parent) to sys.path
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
import pytest
import requests
from utils import get_anthropic_client
# Pytest plugins
pytest_plugins = ("pytest_asyncio",)
@pytest.mark.skipif(
not os.getenv("ANTHROPIC_API_KEY"), reason="No ANTHROPIC_API_KEY set"
)
@pytest.mark.parametrize("push_to_explorer", [False, True])
async def test_response_without_tool_call(
explorer_api_url, gateway_url, push_to_explorer
):
"""Test the Anthropic gateway without tool calling."""
dataset_name = f"test-dataset-anthropic-{uuid.uuid4()}"
client = get_anthropic_client(
gateway_url,
push_to_explorer,
dataset_name,
)
cities = ["zurich", "new york", "london"]
queries = [
"Can you introduce Zurich, Switzerland within 200 words?",
"Tell me the history of New York within 100 words?",
"How's the weather in London next week?",
]
# Process each query
responses = []
for query in queries:
response = client.messages.create(
model="claude-sonnet-4-5-20250929",
max_tokens=1024,
messages=[{"role": "user", "content": query}],
)
response_text = response.content[0].text
responses.append(response_text)
assert response_text is not None
assert cities[queries.index(query)] in response_text.lower()
if push_to_explorer:
# Wait for the trace to be saved
# This is needed because the trace is saved asynchronously
time.sleep(2)
traces_response = requests.get(
f"{explorer_api_url}/api/v1/dataset/byuser/developer/{dataset_name}/traces",
timeout=5,
)
traces = traces_response.json()
assert len(traces) == len(queries)
for index, trace in enumerate(traces):
trace_id = trace["id"]
# Fetch the trace
trace_response = requests.get(
f"{explorer_api_url}/api/v1/trace/{trace_id}",
timeout=5,
)
trace = trace_response.json()
assert trace["messages"] == [
{"role": "user", "content": queries[index]},
{"role": "assistant", "content": responses[index]},
]
@pytest.mark.skipif(
not os.getenv("ANTHROPIC_API_KEY"), reason="No ANTHROPIC_API_KEY set"
)
@pytest.mark.parametrize("push_to_explorer", [False, True])
async def test_streaming_response_without_tool_call(
explorer_api_url, gateway_url, push_to_explorer
):
"""Test the Anthropic gateway without tool calling."""
dataset_name = f"test-dataset-anthropic-{uuid.uuid4()}"
client = get_anthropic_client(gateway_url, push_to_explorer, dataset_name)
cities = ["zurich", "new york", "london"]
queries = [
"Can you introduce Zurich, Switzerland in 2 short sentences?",
"Tell me the history of New York within 2 short sentences.",
"Explain the geography of London in 2 short sentences.",
]
# Process each query
responses = []
for index, query in enumerate(queries):
messages = [{"role": "user", "content": query}]
response_text = ""
with client.messages.stream(
model="claude-sonnet-4-5-20250929",
max_tokens=200,
messages=messages,
) as response:
for reply in response.text_stream:
response_text += reply
assert cities[index] in response_text.lower()
responses.append(response_text)
assert response_text is not None
assert cities[queries.index(query)] in response_text.lower()
if push_to_explorer:
# Wait for the trace to be saved
# This is needed because the trace is saved asynchronously
time.sleep(2)
traces_response = requests.get(
f"{explorer_api_url}/api/v1/dataset/byuser/developer/{dataset_name}/traces",
timeout=5,
)
traces = traces_response.json()
assert len(traces) == len(queries)
for index, trace in enumerate(traces):
trace_id = trace["id"]
# Fetch the trace
trace_response = requests.get(
f"{explorer_api_url}/api/v1/trace/{trace_id}",
timeout=5,
)
trace = trace_response.json()
assert trace["messages"] == [
{"role": "user", "content": queries[index]},
{"role": "assistant", "content": responses[index]},
]

View File

@@ -39,7 +39,7 @@ async def test_message_content_guardrail_from_file(
) )
request = { request = {
"model": "claude-3-5-sonnet-20241022", "model": "claude-sonnet-4-5-20250929",
"max_tokens": 100, "max_tokens": 100,
"messages": [{"role": "user", "content": "What is the capital of Spain?"}], "messages": [{"role": "user", "content": "What is the capital of Spain?"}],
} }
@@ -150,7 +150,7 @@ async def test_tool_call_guardrail_from_file(
], ],
"tools": tools, "tools": tools,
"system": system_message, "system": system_message,
"model": "claude-3-5-sonnet-20241022", "model": "claude-sonnet-4-5-20250929",
"max_tokens": 150, "max_tokens": 150,
} }
@@ -250,7 +250,7 @@ async def test_input_from_guardrail_from_file(
) )
request = { request = {
"model": "claude-3-5-sonnet-20241022", "model": "claude-sonnet-4-5-20250929",
"max_tokens": 100, "max_tokens": 100,
"messages": [{"role": "user", "content": "Tell me more about Fight Club."}], "messages": [{"role": "user", "content": "Tell me more about Fight Club."}],
} }
@@ -352,7 +352,7 @@ async def test_with_guardrails_from_explorer(explorer_api_url, gateway_url, do_s
# This should not be blocked by the guardrails from the explorer when we push to explorer # This should not be blocked by the guardrails from the explorer when we push to explorer
# because the file based guardrails are overridden by the explorer guardrails # because the file based guardrails are overridden by the explorer guardrails
spain_request = { spain_request = {
"model": "claude-3-5-sonnet-20241022", "model": "claude-sonnet-4-5-20250929",
"messages": [{"role": "user", "content": "What is the capital of Spain?"}], "messages": [{"role": "user", "content": "What is the capital of Spain?"}],
"max_tokens": 100, "max_tokens": 100,
} }
@@ -379,7 +379,7 @@ async def test_with_guardrails_from_explorer(explorer_api_url, gateway_url, do_s
# This should be blocked by the guardrails from the explorer # This should be blocked by the guardrails from the explorer
user_prompt = "What kind of a creature is Shrek? What is his Shrek's wife's name? Only answer these questions with single sentences, don't add any extra details." user_prompt = "What kind of a creature is Shrek? What is his Shrek's wife's name? Only answer these questions with single sentences, don't add any extra details."
shrek_request = { shrek_request = {
"model": "claude-3-5-sonnet-20241022", "model": "claude-sonnet-4-5-20250929",
"messages": [ "messages": [
{ {
"role": "user", "role": "user",
@@ -497,7 +497,7 @@ async def test_preguardrailing_with_guardrails_from_explorer(
user_prompt = "Tell me a one sentence pun." user_prompt = "Tell me a one sentence pun."
request = { request = {
"model": "claude-3-5-sonnet-20241022", "model": "claude-sonnet-4-5-20250929",
"messages": [ "messages": [
{ {
"role": "user", "role": "user",