Fix potential infinite tool call loop by resetting tool_choice after … (#263)

# Fix potential infinite tool call loop by resetting tool_choice after tool execution ## Summary This PR fixes an issue where setting `tool_choice` to "required" or a specific function name could cause models to get stuck in an infinite tool call loop. When `tool_choice` is set to force tool usage, this setting persists across model invocations. This PR automatically resets `tool_choice` to "auto" after tool execution, allowing the model to decide whether to make additional tool calls in subsequent turns. Unlike using `tool_use_behavior="stop_on_first_tool"`, this approach lets the model continue processing tool results while preventing forced repeated tool calls. ## Test plan - Added tests to verify tool_choice reset behavior for both agent and run_config settings - Added integration test to verify the solution prevents infinite loops - All tests pass ## Checks - [x] I've added new tests for the fix - [x] I've updated the relevant documentation (added comment in code) - [x] I've run `make lint` and `make format` - [x] I've made sure tests pass
2025-03-25 11:30:53 -04:00 · 2025-03-25 11:30:53 -04:00 · 927a29c56b
commit 927a29c56b
parent 13abb6826d 07a4af1fe2
3 changed files with 213 additions and 2 deletions
--- a/docs/agents.md
+++ b/docs/agents.md
@ -142,4 +142,11 @@ Supplying a list of tools doesn't always mean the LLM will use a tool. You can f

 !!! note

-    If requiring tool use, you should consider setting [`Agent.tool_use_behavior`] to stop the Agent from running when a tool output is produced. Otherwise, the Agent might run in an infinite loop, where the LLM produces a tool call , and the tool result is sent to the LLM, and this infinite loops because the LLM is always forced to use a tool.
+    To prevent infinite loops, the framework automatically resets `tool_choice` to "auto" after a tool call in the following scenarios:
+    
+    1. When `tool_choice` is set to a specific function name (any string that's not "auto", "required", or "none")
+    2. When `tool_choice` is set to "required" AND there is only one tool available
+    
+    This targeted reset mechanism allows the model to decide whether to make additional tool calls in subsequent turns while avoiding infinite loops in these specific cases.
+    
+    If you want the Agent to completely stop after a tool call (rather than continuing with auto mode), you can set [`Agent.tool_use_behavior="stop_on_first_tool"`] which will directly use the tool output as the final response without further LLM processing.
--- a/src/agents/_run_impl.py
+++ b/src/agents/_run_impl.py
@ -1,6 +1,7 @@
 from __future__ import annotations

 import asyncio
+import dataclasses
 import inspect
 from collections.abc import Awaitable
 from dataclasses import dataclass
@ -47,10 +48,11 @@ from .items import (
 )
 from .lifecycle import RunHooks
 from .logger import logger
+from .model_settings import ModelSettings
 from .models.interface import ModelTracing
 from .run_context import RunContextWrapper, TContext
 from .stream_events import RunItemStreamEvent, StreamEvent
-from .tool import ComputerTool, FunctionTool, FunctionToolResult
+from .tool import ComputerTool, FunctionTool, FunctionToolResult, Tool
 from .tracing import (
    SpanError,
    Trace,
@ -206,6 +208,29 @@ class RunImpl:
        new_step_items.extend([result.run_item for result in function_results])
        new_step_items.extend(computer_results)

+        # Reset tool_choice to "auto" after tool execution to prevent infinite loops
+        if processed_response.functions or processed_response.computer_actions:
+            tools = agent.tools
+
+            if (
+                run_config.model_settings and
+                cls._should_reset_tool_choice(run_config.model_settings, tools)
+            ):
+                # update the run_config model settings with a copy
+                new_run_config_settings = dataclasses.replace(
+                    run_config.model_settings,
+                    tool_choice="auto"
+                )
+                run_config = dataclasses.replace(run_config, model_settings=new_run_config_settings)
+
+            if cls._should_reset_tool_choice(agent.model_settings, tools):
+                # Create a modified copy instead of modifying the original agent
+                new_model_settings = dataclasses.replace(
+                    agent.model_settings,
+                    tool_choice="auto"
+                )
+                agent = dataclasses.replace(agent, model_settings=new_model_settings)
+
        # Second, check if there are any handoffs
        if run_handoffs := processed_response.handoffs:
            return await cls.execute_handoffs(
@ -296,6 +321,24 @@ class RunImpl:
                next_step=NextStepRunAgain(),
            )

+    @classmethod
+    def _should_reset_tool_choice(cls, model_settings: ModelSettings, tools: list[Tool]) -> bool:
+        if model_settings is None or model_settings.tool_choice is None:
+            return False
+
+        # for specific tool choices
+        if (
+            isinstance(model_settings.tool_choice, str) and
+            model_settings.tool_choice not in ["auto", "required", "none"]
+        ):
+            return True
+
+        # for one tool and required tool choice
+        if model_settings.tool_choice == "required":
+            return len(tools) == 1
+
+        return False
+
    @classmethod
    def process_model_response(
        cls,
--- a/tests/test_tool_choice_reset.py
+++ b/tests/test_tool_choice_reset.py
@ -0,0 +1,161 @@
+import pytest
+
+from agents import Agent, ModelSettings, Runner, Tool
+from agents._run_impl import RunImpl
+
+from .fake_model import FakeModel
+from .test_responses import (
+    get_function_tool,
+    get_function_tool_call,
+    get_text_message,
+)
+
+
+class TestToolChoiceReset:
+
+    def test_should_reset_tool_choice_direct(self):
+        """
+        Test the _should_reset_tool_choice method directly with various inputs
+        to ensure it correctly identifies cases where reset is needed.
+        """
+        # Case 1: tool_choice = None should not reset
+        model_settings = ModelSettings(tool_choice=None)
+        tools1: list[Tool] = [get_function_tool("tool1")]
+        # Cast to list[Tool] to fix type checking issues
+        assert not RunImpl._should_reset_tool_choice(model_settings, tools1)
+
+        # Case 2: tool_choice = "auto" should not reset
+        model_settings = ModelSettings(tool_choice="auto")
+        assert not RunImpl._should_reset_tool_choice(model_settings, tools1)
+
+        # Case 3: tool_choice = "none" should not reset
+        model_settings = ModelSettings(tool_choice="none")
+        assert not RunImpl._should_reset_tool_choice(model_settings, tools1)
+
+        # Case 4: tool_choice = "required" with one tool should reset
+        model_settings = ModelSettings(tool_choice="required")
+        assert RunImpl._should_reset_tool_choice(model_settings, tools1)
+
+        # Case 5: tool_choice = "required" with multiple tools should not reset
+        model_settings = ModelSettings(tool_choice="required")
+        tools2: list[Tool] = [get_function_tool("tool1"), get_function_tool("tool2")]
+        assert not RunImpl._should_reset_tool_choice(model_settings, tools2)
+
+        # Case 6: Specific tool choice should reset
+        model_settings = ModelSettings(tool_choice="specific_tool")
+        assert RunImpl._should_reset_tool_choice(model_settings, tools1)
+
+    @pytest.mark.asyncio
+    async def test_required_tool_choice_with_multiple_runs(self):
+        """
+        Test scenario 1: When multiple runs are executed with tool_choice="required"
+        Ensure each run works correctly and doesn't get stuck in infinite loop
+        Also verify that tool_choice remains "required" between runs
+        """
+        # Set up our fake model with responses for two runs
+        fake_model = FakeModel()
+        fake_model.add_multiple_turn_outputs([
+            [get_text_message("First run response")],
+            [get_text_message("Second run response")]
+        ])
+
+        # Create agent with a custom tool and tool_choice="required"
+        custom_tool = get_function_tool("custom_tool")
+        agent = Agent(
+            name="test_agent",
+            model=fake_model,
+            tools=[custom_tool],
+            model_settings=ModelSettings(tool_choice="required"),
+        )
+
+        # First run should work correctly and preserve tool_choice
+        result1 = await Runner.run(agent, "first run")
+        assert result1.final_output == "First run response"
+        assert agent.model_settings.tool_choice == "required", "tool_choice should stay required"
+
+        # Second run should also work correctly with tool_choice still required
+        result2 = await Runner.run(agent, "second run")
+        assert result2.final_output == "Second run response"
+        assert agent.model_settings.tool_choice == "required", "tool_choice should stay required"
+
+    @pytest.mark.asyncio
+    async def test_required_with_stop_at_tool_name(self):
+        """
+        Test scenario 2: When using required tool_choice with stop_at_tool_names behavior
+        Ensure it correctly stops at the specified tool
+        """
+        # Set up fake model to return a tool call for second_tool
+        fake_model = FakeModel()
+        fake_model.set_next_output([
+            get_function_tool_call("second_tool", "{}")
+        ])
+
+        # Create agent with two tools and tool_choice="required" and stop_at_tool behavior
+        first_tool = get_function_tool("first_tool", return_value="first tool result")
+        second_tool = get_function_tool("second_tool", return_value="second tool result")
+
+        agent = Agent(
+            name="test_agent",
+            model=fake_model,
+            tools=[first_tool, second_tool],
+            model_settings=ModelSettings(tool_choice="required"),
+            tool_use_behavior={"stop_at_tool_names": ["second_tool"]},
+        )
+
+        # Run should stop after using second_tool
+        result = await Runner.run(agent, "run test")
+        assert result.final_output == "second tool result"
+
+    @pytest.mark.asyncio
+    async def test_specific_tool_choice(self):
+        """
+        Test scenario 3: When using a specific tool choice name
+        Ensure it doesn't cause infinite loops
+        """
+        # Set up fake model to return a text message
+        fake_model = FakeModel()
+        fake_model.set_next_output([get_text_message("Test message")])
+
+        # Create agent with specific tool_choice
+        tool1 = get_function_tool("tool1")
+        tool2 = get_function_tool("tool2")
+        tool3 = get_function_tool("tool3")
+
+        agent = Agent(
+            name="test_agent",
+            model=fake_model,
+            tools=[tool1, tool2, tool3],
+            model_settings=ModelSettings(tool_choice="tool1"),  # Specific tool
+        )
+
+        # Run should complete without infinite loops
+        result = await Runner.run(agent, "first run")
+        assert result.final_output == "Test message"
+
+    @pytest.mark.asyncio
+    async def test_required_with_single_tool(self):
+        """
+        Test scenario 4: When using required tool_choice with only one tool
+        Ensure it doesn't cause infinite loops
+        """
+        # Set up fake model to return a tool call followed by a text message
+        fake_model = FakeModel()
+        fake_model.add_multiple_turn_outputs([
+            # First call returns a tool call
+            [get_function_tool_call("custom_tool", "{}")],
+            # Second call returns a text message
+            [get_text_message("Final response")]
+        ])
+
+        # Create agent with a single tool and tool_choice="required"
+        custom_tool = get_function_tool("custom_tool", return_value="tool result")
+        agent = Agent(
+            name="test_agent",
+            model=fake_model,
+            tools=[custom_tool],
+            model_settings=ModelSettings(tool_choice="required"),
+        )
+
+        # Run should complete without infinite loops
+        result = await Runner.run(agent, "first run")
+        assert result.final_output == "Final response"