fix: update anthropic model

2026-02-12 14:32:45 +00:00 · 2025-11-06 09:39:30 +01:00
parent 5ad89808a1
commit 6e7106ea26
6 changed files with 600 additions and 8 deletions
--- a/gateway/routes/anthropic.py
+++ b/gateway/routes/anthropic.py
@@ -263,7 +263,7 @@ class AnthropicProvider(BaseProvider):
        b'event: message_start\ndata: {"type":"message_start","message":
        {"id":"msg_01LkayzAaw7b7QkUAw91psyx","type":"message","role":"assistant"
-        ,"model":"claude-3-5-sonnet-20241022","content":[],"stop_reason":null,
+        ,"model":"claude-sonnet-4-5-20250929","content":[],"stop_reason":null,
        "stop_sequence":null,"usage":{"input_tokens":20,"cache_creation_input_to'
        and
--- a/resources/streaming_chunk_text/anthropic.txt
+++ b/resources/streaming_chunk_text/anthropic.txt
@@ -1,4 +1,4 @@
-anthropic_chunk_list = [b'event: message_start\ndata: {"type":"message_start","message":{"id":"msg_012KWB6kiKvzx7r1SKs5nGA1","type":"message","role":"assistant","model":"claude-3-5-sonnet-20241022","content":[],"stop_reason":null,"stop_sequence":null,"usage":{"input_tokens":20,"cache_creation_input_tokens":0,"cache_read_input_tokens":0,"output_tokens":5}}        }\n\nevent: content_block_start\ndata: {"type":"content_block_start","index":0,"content_block":{"type":"text","text":""}       }\n\n'
+anthropic_chunk_list = [b'event: message_start\ndata: {"type":"message_start","message":{"id":"msg_012KWB6kiKvzx7r1SKs5nGA1","type":"message","role":"assistant","model":"claude-sonnet-4-5-20250929","content":[],"stop_reason":null,"stop_sequence":null,"usage":{"input_tokens":20,"cache_creation_input_tokens":0,"cache_read_input_tokens":0,"output_tokens":5}}        }\n\nevent: content_block_start\ndata: {"type":"content_block_start","index":0,"content_block":{"type":"text","text":""}       }\n\n'
    ,b'event: content_block_delta\ndata: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":" making it an attractive destination for both business"}               }\n\n'
    , b'event: content_block_delta\ndata: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":" and leisure."}   }\n\n'
    , b'event: content_block_stop\ndata: {"type":"content_block_stop","index":0}\n\n'
--- a/tests/integration/anthropic/test_anthropic_header_with_invariant_key.py
+++ b/tests/integration/anthropic/test_anthropic_header_with_invariant_key.py
@@ -0,0 +1,76 @@
 """Test the Anthropic gateway with Invariant key in the ANTHROPIC_API_KEY."""
 import os
 import sys
 import time
 import uuid
 from unittest.mock import patch
 # Add integration folder (parent) to sys.path
 sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 import anthropic
 import pytest
 import requests
 from httpx import Client
 # Pytest plugins
 pytest_plugins = ("pytest_asyncio",)
@pytest.mark.skipif(
    not os.getenv("ANTHROPIC_API_KEY"), reason="No ANTHROPIC_API_KEY set"
 )
 async def test_gateway_with_invariant_key_in_anthropic_key_header(
    gateway_url, explorer_api_url
 ):
    """Test the Anthropic gateway with Invariant key in the Anthropic key"""
    anthropic_api_key = os.getenv("ANTHROPIC_API_KEY")
    dataset_name = f"test-dataset-anthropic-{uuid.uuid4()}"
    invariant_key_suffix = f";invariant-auth={os.getenv('INVARIANT_API_KEY')}"
    with patch.dict(
        os.environ,
        {"ANTHROPIC_API_KEY": anthropic_api_key + invariant_key_suffix},
    ):
        client = anthropic.Anthropic(
            http_client=Client(),
            base_url=f"{gateway_url}/api/v1/gateway/{dataset_name}/anthropic",
        )
        response = client.messages.create(
            model="claude-sonnet-4-5-20250929",
            max_tokens=1024,
            messages=[
                {
                    "role": "user",
                    "content": "Give me an introduction to Zurich, Switzerland within 200 words.",
                }
            ],
        )
        assert response is not None
        response_text = response.content[0].text
        assert "zurich" in response_text.lower()
        # Wait for the trace to be saved
        # This is needed because the trace is saved asynchronously
        time.sleep(2)
        traces_response = requests.get(
            f"{explorer_api_url}/api/v1/dataset/byuser/developer/{dataset_name}/traces",
            timeout=5,
        )
        traces = traces_response.json()
        assert len(traces) == 1
        trace_id = traces[0]["id"]
        get_trace_response = requests.get(
            f"{explorer_api_url}/api/v1/trace/{trace_id}", timeout=5
        )
        trace = get_trace_response.json()
        assert trace["messages"] == [
            {
                "role": "user",
                "content": "Give me an introduction to Zurich, Switzerland within 200 words.",
            },
            {"role": "assistant", "content": response_text},
        ]
--- a/tests/integration/anthropic/test_anthropic_with_tool_call.py
+++ b/tests/integration/anthropic/test_anthropic_with_tool_call.py
@@ -0,0 +1,380 @@
 """Test the Anthropic messages API with tool call for the weather agent."""
 import base64
 import json
 import os
 import sys
 import time
 import uuid
 from pathlib import Path
 # Add integration folder (parent) to sys.path
 sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 import anthropic
 import pytest
 import requests
 from utils import get_anthropic_client
 # Pytest plugins
 pytest_plugins = ("pytest_asyncio",)
 class WeatherAgent:
    """Weather agent to get the current weather in a given location."""
    def __init__(self, gateway_url, push_to_explorer):
        self.dataset_name = f"test-dataset-anthropic-{uuid.uuid4()}"
        self.client = get_anthropic_client(
            gateway_url, push_to_explorer, self.dataset_name
        )
        self.get_weather_function = {
            "name": "get_weather",
            "description": "Get the current weather in a given location",
            "input_schema": {
                "type": "object",
                "properties": {
                    "location": {
                        "type": "string",
                        "description": "The city and state, e.g. San Francisco, CA",
                    },
                    "unit": {
                        "type": "string",
                        "enum": ["celsius", "fahrenheit"],
                        "description": 'The unit of temperature, either "celsius" or "fahrenheit"',
                    },
                },
                "required": ["location"],
            },
        }
    def get_response(self, messages: list[dict]) -> list[dict]:
        """
        Get the response from the agent for a given user query for weather.
        """
        response_list = []
        while True:
            response = self.client.messages.create(
                tools=[self.get_weather_function],
                model="claude-sonnet-4-5-20250929",
                max_tokens=1024,
                messages=messages,
            )
            response_list.append(response)
            # If there's tool call, Extract the tool call parameters from the response
            if len(response.content) > 1 and response.content[1].type == "tool_use":
                tool_call_params = response.content[1].input
                tool_call_result = self.get_weather(tool_call_params["location"])
                tool_call_id = response.content[1].id
                messages.append({"role": response.role, "content": response.content})
                messages.append(
                    {
                        "role": "user",
                        "content": [
                            {
                                "type": "tool_result",
                                "tool_use_id": tool_call_id,
                                "content": tool_call_result,
                            }
                        ],
                    }
                )
            else:
                return response_list
    def get_streaming_response(self, messages: list[dict]) -> list[dict]:
        """Get streaming response from the agent for a given user query for weather."""
        response_list = []
        def clean_quotes(text):
            # Convert \' to '
            text = text.replace("'", "'")
            # Convert \" to "
            text = text.replace('"', '"')
            text = text.replace("\n", " ")
            return text
        while True:
            json_data = ""
            content = []
            event = None
            with self.client.messages.stream(
                tools=[self.get_weather_function],
                model="claude-sonnet-4-5-20250929",
                max_tokens=1024,
                messages=messages,
            ) as stream:
                for event in stream:
                    if isinstance(event, anthropic.types.RawContentBlockStartEvent):
                        # Start a new block
                        current_block = event.content_block
                        current_text = ""
                    elif isinstance(event, anthropic.types.RawContentBlockDeltaEvent):
                        if hasattr(event.delta, "text"):
                            # Accumulate text for TextBlock
                            current_text += clean_quotes(event.delta.text)
                        elif hasattr(event.delta, "partial_json"):
                            # Accumulate JSON for ToolUseBlock
                            json_data += clean_quotes(event.delta.partial_json)
                            current_text += clean_quotes(event.delta.partial_json)
                    elif isinstance(event, anthropic.types.RawContentBlockStopEvent):
                        # Block is complete, add it to content
                        if current_block.type == "text":
                            content.append(
                                anthropic.types.TextBlock(
                                    citations=None, text=current_text, type="text"
                                )
                            )
                        elif current_block.type == "tool_use":
                            content.append(
                                anthropic.types.ToolUseBlock(
                                    id=current_block.id,
                                    input=json.loads(current_text),
                                    name=current_block.name,
                                    type="tool_use",
                                )
                            )
            if content:
                response_list.append(content)
            if (
                isinstance(event, anthropic.types.RawMessageStopEvent)
                and event.message.stop_reason == "tool_use"
            ):
                tool_call_params = json.loads(json_data)
                tool_call_result = self.get_weather(tool_call_params["location"])
                messages.append({"role": "assistant", "content": content})
                messages.append(
                    {
                        "role": "user",
                        "content": [
                            {
                                "type": "tool_result",
                                "tool_use_id": content[-1].id,
                                "content": tool_call_result,
                            }
                        ],
                    }
                )
            else:
                return response_list
    def get_weather(self, location: str):
        """Get the current weather in a given location using latitude and longitude."""
        response = f"""Weather in {location}:
                    Good morning! Expect overcast skies with intermittent showers throughout the day. 
                    Temperatures will range from a cool 15°C in the early hours to around 19°C by mid-afternoon.
                    Light winds from the northeast at about 10 km/h will keep conditions mild. 
                    It might be a good idea to carry an umbrella if you’re heading out. Stay dry and have a great day!
                    """
        return response
@pytest.mark.skipif(
    not os.getenv("ANTHROPIC_API_KEY"), reason="No ANTHROPIC_API_KEY set"
 )
@pytest.mark.parametrize("push_to_explorer", [False, True])
 async def test_response_with_tool_call(explorer_api_url, gateway_url, push_to_explorer):
    """Test the chat completion without streaming for the weather agent."""
    weather_agent = WeatherAgent(gateway_url, push_to_explorer)
    query = "Tell me the weather for New York in Celsius"
    city = "new york"
    # Process each query
    responses = []
    messages = [{"role": "user", "content": query}]
    response = weather_agent.get_response(messages)
    assert response is not None
    assert response[0].role == "assistant"
    assert response[0].stop_reason == "tool_use"
    assert response[0].content[0].type == "text"
    assert response[0].content[1].type == "tool_use"
    assert city in response[0].content[1].input["location"].lower()
    assert response[1].role == "assistant"
    assert response[1].stop_reason == "end_turn"
    responses.append(response)
    if push_to_explorer:
        # Wait for the trace to be saved
        # This is needed because the trace is saved asynchronously
        time.sleep(2)
        traces_response = requests.get(
            f"{explorer_api_url}/api/v1/dataset/byuser/developer/{weather_agent.dataset_name}/traces",
            timeout=5,
        )
        traces = traces_response.json()
        trace = traces[-1]
        trace_id = trace["id"]
        # Fetch the trace
        trace_response = requests.get(
            f"{explorer_api_url}/api/v1/trace/{trace_id}", timeout=5
        )
        trace = trace_response.json()
        trace_messages = trace["messages"]
        assert trace_messages[0]["role"] == "user"
        assert trace_messages[0]["content"] == query
        assert trace_messages[1]["role"] == "assistant"
        assert city in trace_messages[1]["content"].lower()
        assert trace_messages[2]["role"] == "assistant"
        assert trace_messages[2]["tool_calls"][0]["function"]["name"] == "get_weather"
        assert (
            city
            in trace_messages[2]["tool_calls"][0]["function"]["arguments"][
                "location"
            ].lower()
        )
        assert trace_messages[3]["role"] == "tool"
        assert trace_messages[4]["role"] == "assistant"
        assert city in trace_messages[4]["content"].lower()
@pytest.mark.skipif(
    not os.getenv("ANTHROPIC_API_KEY"), reason="No ANTHROPIC_API_KEY set"
 )
@pytest.mark.parametrize("push_to_explorer", [False, True])
 async def test_streaming_response_with_tool_call(
    explorer_api_url, gateway_url, push_to_explorer
 ):
    """Test the chat completion with streaming for the weather agent."""
    weather_agent = WeatherAgent(gateway_url, push_to_explorer)
    query = "Tell me the weather for New York in Celsius"
    city = "new york"
    messages = [{"role": "user", "content": query}]
    response = weather_agent.get_streaming_response(messages)
    if len(response) == 2:
        assert response is not None
        assert response[0][0].type == "text"
        assert response[0][1].type == "tool_use"
        assert response[0][1].name == "get_weather"
        assert city in response[0][1].input["location"].lower()
        assert response[1][0].type == "text"
        assert city in response[1][0].text.lower()
    elif len(response) == 1:
        # expected output in this case is something like this:
        # [[TextBlock(text="I'll help you check the weather in New York using the get_weather function.", type='text', citations=None), ToolUseBlock(id='toolu_019VZsmxuUhShou2EpPBxvpe', input={'location': 'New York, NY', 'unit': 'celsius'}, name='get_weather', type='tool_use')]]
        assert response is not None
        assert response[0][0].type == "text"
        assert response[0][1].type == "tool_use"
        assert response[0][1].name == "get_weather"
        assert city in response[0][1].input["location"].lower()
    else:
        assert False, "Expected response length 2 or 1, but got" + str(response)
    if push_to_explorer:
        # Wait for the trace to be saved
        # This is needed because the trace is saved asynchronously
        time.sleep(2)
        traces_response = requests.get(
            f"{explorer_api_url}/api/v1/dataset/byuser/developer/{weather_agent.dataset_name}/traces",
            timeout=5,
        )
        traces = traces_response.json()
        trace = traces[-1]
        trace_id = trace["id"]
        # Fetch the trace
        trace_response = requests.get(
            f"{explorer_api_url}/api/v1/trace/{trace_id}", timeout=5
        )
        trace = trace_response.json()
        trace_messages = trace["messages"]
        assert trace_messages[0]["role"] == "user"
        assert trace_messages[0]["content"] == query
        assert trace_messages[1]["role"] == "assistant"
        assert city in trace_messages[1]["content"].lower()
        assert trace_messages[2]["role"] == "assistant"
        assert trace_messages[2]["tool_calls"][0]["function"]["name"] == "get_weather"
        assert (
            city
            in trace_messages[2]["tool_calls"][0]["function"]["arguments"][
                "location"
            ].lower()
        )
        assert trace_messages[3]["role"] == "tool"
        assert trace_messages[4]["role"] == "assistant"
        assert city in trace_messages[4]["content"].lower()
@pytest.mark.skipif(
    not os.getenv("ANTHROPIC_API_KEY"), reason="No ANTHROPIC_API_KEY set"
 )
@pytest.mark.parametrize("push_to_explorer", [False, True])
 async def test_response_with_tool_call_with_image(
    explorer_api_url, gateway_url, push_to_explorer
 ):
    """Test the chat completion with image for the weather agent."""
    weather_agent = WeatherAgent(gateway_url, push_to_explorer)
    image_path = Path(__file__).parent.parent / "resources" / "images" / "new-york.jpg"
    with image_path.open("rb") as image_file:
        base64_image = base64.b64encode(image_file.read()).decode("utf-8")
        query = "get the weather in the city of this image in Celsius"
        city = "new york"
        messages = [
            {
                "role": "user",
                "content": [
                    {"type": "text", "text": query},
                    {
                        "type": "image",
                        "source": {
                            "type": "base64",
                            "media_type": "image/jpeg",
                            "data": base64_image,
                        },
                    },
                ],
            }
        ]
        response = weather_agent.get_response(messages)
        assert response is not None
        assert response[0].role == "assistant"
        assert response[0].stop_reason == "tool_use"
        assert response[0].content[0].type == "text"
        assert response[0].content[1].type == "tool_use"
        assert city in response[0].content[1].input["location"].lower()
        assert response[1].role == "assistant"
        assert response[1].stop_reason == "end_turn"
        if push_to_explorer:
            # Wait for the trace to be saved
            # This is needed because the trace is saved asynchronously
            time.sleep(2)
            traces_response = requests.get(
                f"{explorer_api_url}/api/v1/dataset/byuser/developer/{weather_agent.dataset_name}/traces",
                timeout=5,
            )
            traces = traces_response.json()
            trace = traces[-1]
            trace_id = trace["id"]
            trace_response = requests.get(
                f"{explorer_api_url}/api/v1/trace/{trace_id}", timeout=5
            )
            trace = trace_response.json()
            trace_messages = trace["messages"]
            assert trace_messages[0]["role"] == "user"
            assert trace_messages[1]["role"] == "assistant"
            assert city in trace_messages[1]["content"].lower()
            assert trace_messages[2]["role"] == "assistant"
            assert (
                trace_messages[2]["tool_calls"][0]["function"]["name"] == "get_weather"
            )
            assert (
                city
                in trace_messages[2]["tool_calls"][0]["function"]["arguments"][
                    "location"
                ].lower()
            )
            assert trace_messages[3]["role"] == "tool"
--- a/tests/integration/anthropic/test_anthropic_without_tool_call.py
+++ b/tests/integration/anthropic/test_anthropic_without_tool_call.py
@@ -0,0 +1,136 @@
 """Tests for the Anthropic API without tool call."""
 import os
 import sys
 import time
 import uuid
 # Add integration folder (parent) to sys.path
 sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 import pytest
 import requests
 from utils import get_anthropic_client
 # Pytest plugins
 pytest_plugins = ("pytest_asyncio",)
@pytest.mark.skipif(
    not os.getenv("ANTHROPIC_API_KEY"), reason="No ANTHROPIC_API_KEY set"
 )
@pytest.mark.parametrize("push_to_explorer", [False, True])
 async def test_response_without_tool_call(
    explorer_api_url, gateway_url, push_to_explorer
 ):
    """Test the Anthropic gateway without tool calling."""
    dataset_name = f"test-dataset-anthropic-{uuid.uuid4()}"
    client = get_anthropic_client(
        gateway_url,
        push_to_explorer,
        dataset_name,
    )
    cities = ["zurich", "new york", "london"]
    queries = [
        "Can you introduce Zurich, Switzerland within 200 words?",
        "Tell me the history of New York within 100 words?",
        "How's the weather in London next week?",
    ]
    # Process each query
    responses = []
    for query in queries:
        response = client.messages.create(
            model="claude-sonnet-4-5-20250929",
            max_tokens=1024,
            messages=[{"role": "user", "content": query}],
        )
        response_text = response.content[0].text
        responses.append(response_text)
        assert response_text is not None
        assert cities[queries.index(query)] in response_text.lower()
    if push_to_explorer:
        # Wait for the trace to be saved
        # This is needed because the trace is saved asynchronously
        time.sleep(2)
        traces_response = requests.get(
            f"{explorer_api_url}/api/v1/dataset/byuser/developer/{dataset_name}/traces",
            timeout=5,
        )
        traces = traces_response.json()
        assert len(traces) == len(queries)
        for index, trace in enumerate(traces):
            trace_id = trace["id"]
            # Fetch the trace
            trace_response = requests.get(
                f"{explorer_api_url}/api/v1/trace/{trace_id}",
                timeout=5,
            )
            trace = trace_response.json()
            assert trace["messages"] == [
                {"role": "user", "content": queries[index]},
                {"role": "assistant", "content": responses[index]},
            ]
@pytest.mark.skipif(
    not os.getenv("ANTHROPIC_API_KEY"), reason="No ANTHROPIC_API_KEY set"
 )
@pytest.mark.parametrize("push_to_explorer", [False, True])
 async def test_streaming_response_without_tool_call(
    explorer_api_url, gateway_url, push_to_explorer
 ):
    """Test the Anthropic gateway without tool calling."""
    dataset_name = f"test-dataset-anthropic-{uuid.uuid4()}"
    client = get_anthropic_client(gateway_url, push_to_explorer, dataset_name)
    cities = ["zurich", "new york", "london"]
    queries = [
        "Can you introduce Zurich, Switzerland in 2 short sentences?",
        "Tell me the history of New York within 2 short sentences.",
        "Explain the geography of London in 2 short sentences.",
    ]
    # Process each query
    responses = []
    for index, query in enumerate(queries):
        messages = [{"role": "user", "content": query}]
        response_text = ""
        with client.messages.stream(
            model="claude-sonnet-4-5-20250929",
            max_tokens=200,
            messages=messages,
        ) as response:
            for reply in response.text_stream:
                response_text += reply
            assert cities[index] in response_text.lower()
        responses.append(response_text)
        assert response_text is not None
        assert cities[queries.index(query)] in response_text.lower()
    if push_to_explorer:
        # Wait for the trace to be saved
        # This is needed because the trace is saved asynchronously
        time.sleep(2)
        traces_response = requests.get(
            f"{explorer_api_url}/api/v1/dataset/byuser/developer/{dataset_name}/traces",
            timeout=5,
        )
        traces = traces_response.json()
        assert len(traces) == len(queries)
        for index, trace in enumerate(traces):
            trace_id = trace["id"]
            # Fetch the trace
            trace_response = requests.get(
                f"{explorer_api_url}/api/v1/trace/{trace_id}",
                timeout=5,
            )
            trace = trace_response.json()
            assert trace["messages"] == [
                {"role": "user", "content": queries[index]},
                {"role": "assistant", "content": responses[index]},
            ]
--- a/tests/integration/guardrails/test_guardrails_anthropic.py
+++ b/tests/integration/guardrails/test_guardrails_anthropic.py
@@ -39,7 +39,7 @@ async def test_message_content_guardrail_from_file(
    )
    request = {
-        "model": "claude-3-5-sonnet-20241022",
+        "model": "claude-sonnet-4-5-20250929",
        "max_tokens": 100,
        "messages": [{"role": "user", "content": "What is the capital of Spain?"}],
    }
@@ -150,7 +150,7 @@ async def test_tool_call_guardrail_from_file(
        ],
        "tools": tools,
        "system": system_message,
-        "model": "claude-3-5-sonnet-20241022",
+        "model": "claude-sonnet-4-5-20250929",
        "max_tokens": 150,
    }
@@ -250,7 +250,7 @@ async def test_input_from_guardrail_from_file(
    )
    request = {
-        "model": "claude-3-5-sonnet-20241022",
+        "model": "claude-sonnet-4-5-20250929",
        "max_tokens": 100,
        "messages": [{"role": "user", "content": "Tell me more about Fight Club."}],
    }
@@ -352,7 +352,7 @@ async def test_with_guardrails_from_explorer(explorer_api_url, gateway_url, do_s
    # This should not be blocked by the guardrails from the explorer when we push to explorer
    # because the file based guardrails are overridden by the explorer guardrails
    spain_request = {
-        "model": "claude-3-5-sonnet-20241022",
+        "model": "claude-sonnet-4-5-20250929",
        "messages": [{"role": "user", "content": "What is the capital of Spain?"}],
        "max_tokens": 100,
    }
@@ -379,7 +379,7 @@ async def test_with_guardrails_from_explorer(explorer_api_url, gateway_url, do_s
    # This should be blocked by the guardrails from the explorer
    user_prompt = "What kind of a creature is Shrek? What is his Shrek's wife's name? Only answer these questions with single sentences, don't add any extra details."
    shrek_request = {
-        "model": "claude-3-5-sonnet-20241022",
+        "model": "claude-sonnet-4-5-20250929",
        "messages": [
            {
                "role": "user",
@@ -497,7 +497,7 @@ async def test_preguardrailing_with_guardrails_from_explorer(
    user_prompt = "Tell me a one sentence pun."
    request = {
-        "model": "claude-3-5-sonnet-20241022",
+        "model": "claude-sonnet-4-5-20250929",
        "messages": [
            {
                "role": "user",