Fix potential infinite tool call loop by resetting tool_choice after … (#263)

# Fix potential infinite tool call loop by resetting tool_choice after
tool execution

## Summary

This PR fixes an issue where setting `tool_choice` to "required" or a
specific function name could cause models to get stuck in an infinite
tool call loop.

When `tool_choice` is set to force tool usage, this setting persists
across model invocations. This PR automatically resets `tool_choice` to
"auto" after tool execution, allowing the model to decide whether to
make additional tool calls in subsequent turns.

Unlike using `tool_use_behavior="stop_on_first_tool"`, this approach
lets the model continue processing tool results while preventing forced
repeated tool calls.

## Test plan

- Added tests to verify tool_choice reset behavior for both agent and
run_config settings
- Added integration test to verify the solution prevents infinite loops
- All tests pass

## Checks

- [x] I've added new tests for the fix
- [x] I've updated the relevant documentation (added comment in code)
- [x] I've run `make lint` and `make format`
- [x] I've made sure tests pass
This commit is contained in:
Rohan Mehta 2025-03-25 11:30:53 -04:00 committed by GitHub
commit 927a29c56b
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 213 additions and 2 deletions

View file

@ -142,4 +142,11 @@ Supplying a list of tools doesn't always mean the LLM will use a tool. You can f
!!! note
If requiring tool use, you should consider setting [`Agent.tool_use_behavior`] to stop the Agent from running when a tool output is produced. Otherwise, the Agent might run in an infinite loop, where the LLM produces a tool call , and the tool result is sent to the LLM, and this infinite loops because the LLM is always forced to use a tool.
To prevent infinite loops, the framework automatically resets `tool_choice` to "auto" after a tool call in the following scenarios:
1. When `tool_choice` is set to a specific function name (any string that's not "auto", "required", or "none")
2. When `tool_choice` is set to "required" AND there is only one tool available
This targeted reset mechanism allows the model to decide whether to make additional tool calls in subsequent turns while avoiding infinite loops in these specific cases.
If you want the Agent to completely stop after a tool call (rather than continuing with auto mode), you can set [`Agent.tool_use_behavior="stop_on_first_tool"`] which will directly use the tool output as the final response without further LLM processing.

View file

@ -1,6 +1,7 @@
from __future__ import annotations
import asyncio
import dataclasses
import inspect
from collections.abc import Awaitable
from dataclasses import dataclass
@ -47,10 +48,11 @@ from .items import (
)
from .lifecycle import RunHooks
from .logger import logger
from .model_settings import ModelSettings
from .models.interface import ModelTracing
from .run_context import RunContextWrapper, TContext
from .stream_events import RunItemStreamEvent, StreamEvent
from .tool import ComputerTool, FunctionTool, FunctionToolResult
from .tool import ComputerTool, FunctionTool, FunctionToolResult, Tool
from .tracing import (
SpanError,
Trace,
@ -206,6 +208,29 @@ class RunImpl:
new_step_items.extend([result.run_item for result in function_results])
new_step_items.extend(computer_results)
# Reset tool_choice to "auto" after tool execution to prevent infinite loops
if processed_response.functions or processed_response.computer_actions:
tools = agent.tools
if (
run_config.model_settings and
cls._should_reset_tool_choice(run_config.model_settings, tools)
):
# update the run_config model settings with a copy
new_run_config_settings = dataclasses.replace(
run_config.model_settings,
tool_choice="auto"
)
run_config = dataclasses.replace(run_config, model_settings=new_run_config_settings)
if cls._should_reset_tool_choice(agent.model_settings, tools):
# Create a modified copy instead of modifying the original agent
new_model_settings = dataclasses.replace(
agent.model_settings,
tool_choice="auto"
)
agent = dataclasses.replace(agent, model_settings=new_model_settings)
# Second, check if there are any handoffs
if run_handoffs := processed_response.handoffs:
return await cls.execute_handoffs(
@ -296,6 +321,24 @@ class RunImpl:
next_step=NextStepRunAgain(),
)
@classmethod
def _should_reset_tool_choice(cls, model_settings: ModelSettings, tools: list[Tool]) -> bool:
if model_settings is None or model_settings.tool_choice is None:
return False
# for specific tool choices
if (
isinstance(model_settings.tool_choice, str) and
model_settings.tool_choice not in ["auto", "required", "none"]
):
return True
# for one tool and required tool choice
if model_settings.tool_choice == "required":
return len(tools) == 1
return False
@classmethod
def process_model_response(
cls,

View file

@ -0,0 +1,161 @@
import pytest
from agents import Agent, ModelSettings, Runner, Tool
from agents._run_impl import RunImpl
from .fake_model import FakeModel
from .test_responses import (
get_function_tool,
get_function_tool_call,
get_text_message,
)
class TestToolChoiceReset:
def test_should_reset_tool_choice_direct(self):
"""
Test the _should_reset_tool_choice method directly with various inputs
to ensure it correctly identifies cases where reset is needed.
"""
# Case 1: tool_choice = None should not reset
model_settings = ModelSettings(tool_choice=None)
tools1: list[Tool] = [get_function_tool("tool1")]
# Cast to list[Tool] to fix type checking issues
assert not RunImpl._should_reset_tool_choice(model_settings, tools1)
# Case 2: tool_choice = "auto" should not reset
model_settings = ModelSettings(tool_choice="auto")
assert not RunImpl._should_reset_tool_choice(model_settings, tools1)
# Case 3: tool_choice = "none" should not reset
model_settings = ModelSettings(tool_choice="none")
assert not RunImpl._should_reset_tool_choice(model_settings, tools1)
# Case 4: tool_choice = "required" with one tool should reset
model_settings = ModelSettings(tool_choice="required")
assert RunImpl._should_reset_tool_choice(model_settings, tools1)
# Case 5: tool_choice = "required" with multiple tools should not reset
model_settings = ModelSettings(tool_choice="required")
tools2: list[Tool] = [get_function_tool("tool1"), get_function_tool("tool2")]
assert not RunImpl._should_reset_tool_choice(model_settings, tools2)
# Case 6: Specific tool choice should reset
model_settings = ModelSettings(tool_choice="specific_tool")
assert RunImpl._should_reset_tool_choice(model_settings, tools1)
@pytest.mark.asyncio
async def test_required_tool_choice_with_multiple_runs(self):
"""
Test scenario 1: When multiple runs are executed with tool_choice="required"
Ensure each run works correctly and doesn't get stuck in infinite loop
Also verify that tool_choice remains "required" between runs
"""
# Set up our fake model with responses for two runs
fake_model = FakeModel()
fake_model.add_multiple_turn_outputs([
[get_text_message("First run response")],
[get_text_message("Second run response")]
])
# Create agent with a custom tool and tool_choice="required"
custom_tool = get_function_tool("custom_tool")
agent = Agent(
name="test_agent",
model=fake_model,
tools=[custom_tool],
model_settings=ModelSettings(tool_choice="required"),
)
# First run should work correctly and preserve tool_choice
result1 = await Runner.run(agent, "first run")
assert result1.final_output == "First run response"
assert agent.model_settings.tool_choice == "required", "tool_choice should stay required"
# Second run should also work correctly with tool_choice still required
result2 = await Runner.run(agent, "second run")
assert result2.final_output == "Second run response"
assert agent.model_settings.tool_choice == "required", "tool_choice should stay required"
@pytest.mark.asyncio
async def test_required_with_stop_at_tool_name(self):
"""
Test scenario 2: When using required tool_choice with stop_at_tool_names behavior
Ensure it correctly stops at the specified tool
"""
# Set up fake model to return a tool call for second_tool
fake_model = FakeModel()
fake_model.set_next_output([
get_function_tool_call("second_tool", "{}")
])
# Create agent with two tools and tool_choice="required" and stop_at_tool behavior
first_tool = get_function_tool("first_tool", return_value="first tool result")
second_tool = get_function_tool("second_tool", return_value="second tool result")
agent = Agent(
name="test_agent",
model=fake_model,
tools=[first_tool, second_tool],
model_settings=ModelSettings(tool_choice="required"),
tool_use_behavior={"stop_at_tool_names": ["second_tool"]},
)
# Run should stop after using second_tool
result = await Runner.run(agent, "run test")
assert result.final_output == "second tool result"
@pytest.mark.asyncio
async def test_specific_tool_choice(self):
"""
Test scenario 3: When using a specific tool choice name
Ensure it doesn't cause infinite loops
"""
# Set up fake model to return a text message
fake_model = FakeModel()
fake_model.set_next_output([get_text_message("Test message")])
# Create agent with specific tool_choice
tool1 = get_function_tool("tool1")
tool2 = get_function_tool("tool2")
tool3 = get_function_tool("tool3")
agent = Agent(
name="test_agent",
model=fake_model,
tools=[tool1, tool2, tool3],
model_settings=ModelSettings(tool_choice="tool1"), # Specific tool
)
# Run should complete without infinite loops
result = await Runner.run(agent, "first run")
assert result.final_output == "Test message"
@pytest.mark.asyncio
async def test_required_with_single_tool(self):
"""
Test scenario 4: When using required tool_choice with only one tool
Ensure it doesn't cause infinite loops
"""
# Set up fake model to return a tool call followed by a text message
fake_model = FakeModel()
fake_model.add_multiple_turn_outputs([
# First call returns a tool call
[get_function_tool_call("custom_tool", "{}")],
# Second call returns a text message
[get_text_message("Final response")]
])
# Create agent with a single tool and tool_choice="required"
custom_tool = get_function_tool("custom_tool", return_value="tool result")
agent = Agent(
name="test_agent",
model=fake_model,
tools=[custom_tool],
model_settings=ModelSettings(tool_choice="required"),
)
# Run should complete without infinite loops
result = await Runner.run(agent, "first run")
assert result.final_output == "Final response"