diff --git a/src/agents/_run_impl.py b/src/agents/_run_impl.py index 1f896d7..1370462 100644 --- a/src/agents/_run_impl.py +++ b/src/agents/_run_impl.py @@ -213,19 +213,25 @@ class RunImpl: tools = agent.tools # Only reset in the problematic scenarios where loops are likely unintentional if cls._should_reset_tool_choice(agent.model_settings, tools): - agent.model_settings = dataclasses.replace( + # Create a modified copy instead of modifying the original agent + new_model_settings = dataclasses.replace( agent.model_settings, tool_choice="auto" ) + # Create a new internal agent with updated settings + agent = dataclasses.replace(agent, model_settings=new_model_settings) if ( run_config.model_settings and cls._should_reset_tool_choice(run_config.model_settings, tools) ): - run_config.model_settings = dataclasses.replace( + # Also update the run_config model settings with a copy + new_run_config_settings = dataclasses.replace( run_config.model_settings, tool_choice="auto" ) + # Create a new run_config with the new settings + run_config = dataclasses.replace(run_config, model_settings=new_run_config_settings) # Second, check if there are any handoffs if run_handoffs := processed_response.handoffs: diff --git a/tests/test_tool_choice_reset.py b/tests/test_tool_choice_reset.py index b47c4d9..7dae6f6 100644 --- a/tests/test_tool_choice_reset.py +++ b/tests/test_tool_choice_reset.py @@ -1,286 +1,161 @@ -import asyncio -import dataclasses -import json -from unittest import mock +import pytest -from openai.types.responses.response_function_tool_call import ResponseFunctionToolCall +from agents import Agent, ModelSettings, Runner, Tool +from agents._run_impl import RunImpl -from agents import Agent, ModelSettings, RunConfig, Runner, function_tool -from agents.items import Usage -from agents.models.interface import ModelResponse -from agents.tool import Tool - - -@function_tool -def echo(text: str) -> str: - """Echo the input text""" - return text - - -def should_reset_tool_choice(model_settings: ModelSettings, tools: list[Tool]) -> bool: - if model_settings is None or model_settings.tool_choice is None: - return False - - # for specific tool choices - if ( - isinstance(model_settings.tool_choice, str) and - model_settings.tool_choice not in ["auto", "required", "none"] - ): - return True - - # for one tool and required tool choice - if model_settings.tool_choice == "required": - return len(tools) == 1 - - return False - -# Mock model implementation that always calls tools when tool_choice is set -class MockModel: - def __init__(self, tool_call_counter): - self.tool_call_counter = tool_call_counter - - async def get_response(self, **kwargs): - tools = kwargs.get("tools", []) - model_settings = kwargs.get("model_settings") - - # Increment the counter to track how many times this model is called - self.tool_call_counter["count"] += 1 - - # If we've been called many times, we're likely in an infinite loop - if self.tool_call_counter["count"] > 5: - self.tool_call_counter["potential_infinite_loop"] = True - - # Always create a tool call if tool_choice is required/specific - tool_calls = [] - if model_settings and model_settings.tool_choice: - if model_settings.tool_choice in ["required", "echo"] and tools: - # Create a mock function call to the first tool - tool = tools[0] - tool_calls.append( - ResponseFunctionToolCall( - id="call_1", - name=tool.name, - arguments=json.dumps({"text": "This is a test"}), - call_id="call_1", - type="function_call", - ) - ) - - return ModelResponse( - output=tool_calls, - referenceable_id="123", - usage=Usage(input_tokens=10, output_tokens=10, total_tokens=20), - ) +from .fake_model import FakeModel +from .test_responses import ( + get_function_tool, + get_function_tool_call, + get_text_message, +) class TestToolChoiceReset: - async def test_tool_choice_resets_after_call(self): - """Test that tool_choice is reset to 'auto' after tool call when set to 'required'""" - # Create an agent with tool_choice="required" + + def test_should_reset_tool_choice_direct(self): + """ + Test the _should_reset_tool_choice method directly with various inputs + to ensure it correctly identifies cases where reset is needed. + """ + # Case 1: tool_choice = None should not reset + model_settings = ModelSettings(tool_choice=None) + tools1: list[Tool] = [get_function_tool("tool1")] + # Cast to list[Tool] to fix type checking issues + assert not RunImpl._should_reset_tool_choice(model_settings, tools1) + + # Case 2: tool_choice = "auto" should not reset + model_settings = ModelSettings(tool_choice="auto") + assert not RunImpl._should_reset_tool_choice(model_settings, tools1) + + # Case 3: tool_choice = "none" should not reset + model_settings = ModelSettings(tool_choice="none") + assert not RunImpl._should_reset_tool_choice(model_settings, tools1) + + # Case 4: tool_choice = "required" with one tool should reset + model_settings = ModelSettings(tool_choice="required") + assert RunImpl._should_reset_tool_choice(model_settings, tools1) + + # Case 5: tool_choice = "required" with multiple tools should not reset + model_settings = ModelSettings(tool_choice="required") + tools2: list[Tool] = [get_function_tool("tool1"), get_function_tool("tool2")] + assert not RunImpl._should_reset_tool_choice(model_settings, tools2) + + # Case 6: Specific tool choice should reset + model_settings = ModelSettings(tool_choice="specific_tool") + assert RunImpl._should_reset_tool_choice(model_settings, tools1) + + @pytest.mark.asyncio + async def test_required_tool_choice_with_multiple_runs(self): + """ + Test scenario 1: When multiple runs are executed with tool_choice="required" + Ensure each run works correctly and doesn't get stuck in infinite loop + Also verify that tool_choice remains "required" between runs + """ + # Set up our fake model with responses for two runs + fake_model = FakeModel() + fake_model.add_multiple_turn_outputs([ + [get_text_message("First run response")], + [get_text_message("Second run response")] + ]) + + # Create agent with a custom tool and tool_choice="required" + custom_tool = get_function_tool("custom_tool") agent = Agent( - name="Test agent", - tools=[echo], # Only one tool + name="test_agent", + model=fake_model, + tools=[custom_tool], model_settings=ModelSettings(tool_choice="required"), ) - # Directly modify the model_settings - # Instead of trying to run the full execute_tools_and_side_effects, - # we'll just test the tool_choice reset logic directly - processed_response = mock.MagicMock() - processed_response.functions = [mock.MagicMock()] # At least one function call - processed_response.computer_actions = [] + # First run should work correctly and preserve tool_choice + result1 = await Runner.run(agent, "first run") + assert result1.final_output == "First run response" + assert agent.model_settings.tool_choice == "required", "tool_choice should stay required" - # Create a mock run_config - run_config = mock.MagicMock() - run_config.model_settings = None + # Second run should also work correctly with tool_choice still required + result2 = await Runner.run(agent, "second run") + assert result2.final_output == "Second run response" + assert agent.model_settings.tool_choice == "required", "tool_choice should stay required" - # Execute our code under test - if processed_response.functions: - # Apply the targeted reset logic - tools = agent.tools - if should_reset_tool_choice(agent.model_settings, tools): - agent.model_settings = dataclasses.replace( - agent.model_settings, - tool_choice="auto" # Reset to auto - ) + @pytest.mark.asyncio + async def test_required_with_stop_at_tool_name(self): + """ + Test scenario 2: When using required tool_choice with stop_at_tool_names behavior + Ensure it correctly stops at the specified tool + """ + # Set up fake model to return a tool call for second_tool + fake_model = FakeModel() + fake_model.set_next_output([ + get_function_tool_call("second_tool", "{}") + ]) - # Also reset run_config's model_settings if it exists - if ( - run_config.model_settings and - should_reset_tool_choice(run_config.model_settings, tools) - ): - run_config.model_settings = dataclasses.replace( - run_config.model_settings, - tool_choice="auto" # Reset to auto - ) + # Create agent with two tools and tool_choice="required" and stop_at_tool behavior + first_tool = get_function_tool("first_tool", return_value="first tool result") + second_tool = get_function_tool("second_tool", return_value="second tool result") - # Check that tool_choice was reset to "auto" - assert agent.model_settings.tool_choice == "auto" - - async def test_tool_choice_resets_from_specific_function(self): - """Test tool_choice reset to 'auto' after call when set to specific function name""" - # Create an agent with tool_choice set to a specific function agent = Agent( - name="Test agent", - instructions="You are a test agent", - tools=[echo], - model="gpt-4-0125-preview", - model_settings=ModelSettings(tool_choice="echo"), # Specific function name - ) - - # Execute our code under test - processed_response = mock.MagicMock() - processed_response.functions = [mock.MagicMock()] # At least one function call - processed_response.computer_actions = [] - - # Create a mock run_config - run_config = mock.MagicMock() - run_config.model_settings = None - - # Execute our code under test - if processed_response.functions: - # Apply the targeted reset logic - tools = agent.tools - if should_reset_tool_choice(agent.model_settings, tools): - agent.model_settings = dataclasses.replace( - agent.model_settings, - tool_choice="auto" # Reset to auto - ) - - # Also reset run_config's model_settings if it exists - if ( - run_config.model_settings and - should_reset_tool_choice(run_config.model_settings, tools) - ): - run_config.model_settings = dataclasses.replace( - run_config.model_settings, - tool_choice="auto" # Reset to auto - ) - - # Check that tool_choice was reset to "auto" - assert agent.model_settings.tool_choice == "auto" - - async def test_tool_choice_no_reset_when_auto(self): - """Test that tool_choice is not changed when it's already set to 'auto'""" - # Create an agent with tool_choice="auto" - agent = Agent( - name="Test agent", - tools=[echo], - model_settings=ModelSettings(tool_choice="auto"), - ) - - # Execute our code under test - processed_response = mock.MagicMock() - processed_response.functions = [mock.MagicMock()] # At least one function call - processed_response.computer_actions = [] - - # Create a mock run_config - run_config = mock.MagicMock() - run_config.model_settings = None - - # Execute our code under test - if processed_response.functions: - # Apply the targeted reset logic - tools = agent.tools - if should_reset_tool_choice(agent.model_settings, tools): - agent.model_settings = dataclasses.replace( - agent.model_settings, - tool_choice="auto" # Reset to auto - ) - - # Also reset run_config's model_settings if it exists - if ( - run_config.model_settings and - should_reset_tool_choice(run_config.model_settings, tools) - ): - run_config.model_settings = dataclasses.replace( - run_config.model_settings, - tool_choice="auto" # Reset to auto - ) - - # Check that tool_choice remains "auto" - assert agent.model_settings.tool_choice == "auto" - - async def test_run_config_tool_choice_reset(self): - """Test that run_config.model_settings.tool_choice is reset to 'auto'""" - # Create an agent with default model_settings - agent = Agent( - name="Test agent", - tools=[echo], # Only one tool - model_settings=ModelSettings(tool_choice=None), - ) - - # Create a run_config with tool_choice="required" - run_config = RunConfig() - run_config.model_settings = ModelSettings(tool_choice="required") - - # Execute our code under test - processed_response = mock.MagicMock() - processed_response.functions = [mock.MagicMock()] # At least one function call - processed_response.computer_actions = [] - - # Execute our code under test - if processed_response.functions: - # Apply the targeted reset logic - tools = agent.tools - if should_reset_tool_choice(agent.model_settings, tools): - agent.model_settings = dataclasses.replace( - agent.model_settings, - tool_choice="auto" # Reset to auto - ) - - # Also reset run_config's model_settings if it exists - if ( - run_config.model_settings and - should_reset_tool_choice(run_config.model_settings, tools) - ): - run_config.model_settings = dataclasses.replace( - run_config.model_settings, - tool_choice="auto" # Reset to auto - ) - - # Check that run_config's tool_choice was reset to "auto" - assert run_config.model_settings.tool_choice == "auto" - - @mock.patch("agents.run.Runner._get_model") - async def test_integration_prevents_infinite_loop(self, mock_get_model): - """Integration test to verify that tool_choice reset prevents infinite loops""" - # Create a counter to track model calls and detect potential infinite loops - tool_call_counter = {"count": 0, "potential_infinite_loop": False} - - # Set up our mock model that will always use tools when tool_choice is set - mock_model_instance = MockModel(tool_call_counter) - # Return our mock model directly - mock_get_model.return_value = mock_model_instance - - # Create an agent with tool_choice="required" to force tool usage - agent = Agent( - name="Test agent", - instructions="You are a test agent", - tools=[echo], + name="test_agent", + model=fake_model, + tools=[first_tool, second_tool], model_settings=ModelSettings(tool_choice="required"), - # Use "run_llm_again" to allow LLM to continue after tool calls - # This would cause infinite loops without the tool_choice reset - tool_use_behavior="run_llm_again", + tool_use_behavior={"stop_at_tool_names": ["second_tool"]}, ) - # Set a timeout to catch potential infinite loops that our fix doesn't address - try: - # Run the agent with a timeout - async def run_with_timeout(): - return await Runner.run(agent, input="Test input") + # Run should stop after using second_tool + result = await Runner.run(agent, "run test") + assert result.final_output == "second tool result" - result = await asyncio.wait_for(run_with_timeout(), timeout=2.0) + @pytest.mark.asyncio + async def test_specific_tool_choice(self): + """ + Test scenario 3: When using a specific tool choice name + Ensure it doesn't cause infinite loops + """ + # Set up fake model to return a text message + fake_model = FakeModel() + fake_model.set_next_output([get_text_message("Test message")]) - # Verify the agent ran successfully - assert result is not None + # Create agent with specific tool_choice + tool1 = get_function_tool("tool1") + tool2 = get_function_tool("tool2") + tool3 = get_function_tool("tool3") - # Verify the tool was called at least once but not too many times - # (indicating no infinite loop) - assert tool_call_counter["count"] >= 1 - assert tool_call_counter["count"] < 5 - assert not tool_call_counter["potential_infinite_loop"] + agent = Agent( + name="test_agent", + model=fake_model, + tools=[tool1, tool2, tool3], + model_settings=ModelSettings(tool_choice="tool1"), # Specific tool + ) - except asyncio.TimeoutError as err: - # If we hit a timeout, the test failed - we likely have an infinite loop - raise AssertionError("Timeout occurred, potential infinite loop detected") from err + # Run should complete without infinite loops + result = await Runner.run(agent, "first run") + assert result.final_output == "Test message" + + @pytest.mark.asyncio + async def test_required_with_single_tool(self): + """ + Test scenario 4: When using required tool_choice with only one tool + Ensure it doesn't cause infinite loops + """ + # Set up fake model to return a tool call followed by a text message + fake_model = FakeModel() + fake_model.add_multiple_turn_outputs([ + # First call returns a tool call + [get_function_tool_call("custom_tool", "{}")], + # Second call returns a text message + [get_text_message("Final response")] + ]) + + # Create agent with a single tool and tool_choice="required" + custom_tool = get_function_tool("custom_tool", return_value="tool result") + agent = Agent( + name="test_agent", + model=fake_model, + tools=[custom_tool], + model_settings=ModelSettings(tool_choice="required"), + ) + + # Run should complete without infinite loops + result = await Runner.run(agent, "first run") + assert result.final_output == "Final response"