Fix potential infinite tool call loop by resetting tool_choice after … (#263)
# Fix potential infinite tool call loop by resetting tool_choice after tool execution ## Summary This PR fixes an issue where setting `tool_choice` to "required" or a specific function name could cause models to get stuck in an infinite tool call loop. When `tool_choice` is set to force tool usage, this setting persists across model invocations. This PR automatically resets `tool_choice` to "auto" after tool execution, allowing the model to decide whether to make additional tool calls in subsequent turns. Unlike using `tool_use_behavior="stop_on_first_tool"`, this approach lets the model continue processing tool results while preventing forced repeated tool calls. ## Test plan - Added tests to verify tool_choice reset behavior for both agent and run_config settings - Added integration test to verify the solution prevents infinite loops - All tests pass ## Checks - [x] I've added new tests for the fix - [x] I've updated the relevant documentation (added comment in code) - [x] I've run `make lint` and `make format` - [x] I've made sure tests pass
This commit is contained in:
commit
927a29c56b
3 changed files with 213 additions and 2 deletions
|
|
@ -142,4 +142,11 @@ Supplying a list of tools doesn't always mean the LLM will use a tool. You can f
|
|||
|
||||
!!! note
|
||||
|
||||
If requiring tool use, you should consider setting [`Agent.tool_use_behavior`] to stop the Agent from running when a tool output is produced. Otherwise, the Agent might run in an infinite loop, where the LLM produces a tool call , and the tool result is sent to the LLM, and this infinite loops because the LLM is always forced to use a tool.
|
||||
To prevent infinite loops, the framework automatically resets `tool_choice` to "auto" after a tool call in the following scenarios:
|
||||
|
||||
1. When `tool_choice` is set to a specific function name (any string that's not "auto", "required", or "none")
|
||||
2. When `tool_choice` is set to "required" AND there is only one tool available
|
||||
|
||||
This targeted reset mechanism allows the model to decide whether to make additional tool calls in subsequent turns while avoiding infinite loops in these specific cases.
|
||||
|
||||
If you want the Agent to completely stop after a tool call (rather than continuing with auto mode), you can set [`Agent.tool_use_behavior="stop_on_first_tool"`] which will directly use the tool output as the final response without further LLM processing.
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import dataclasses
|
||||
import inspect
|
||||
from collections.abc import Awaitable
|
||||
from dataclasses import dataclass
|
||||
|
|
@ -47,10 +48,11 @@ from .items import (
|
|||
)
|
||||
from .lifecycle import RunHooks
|
||||
from .logger import logger
|
||||
from .model_settings import ModelSettings
|
||||
from .models.interface import ModelTracing
|
||||
from .run_context import RunContextWrapper, TContext
|
||||
from .stream_events import RunItemStreamEvent, StreamEvent
|
||||
from .tool import ComputerTool, FunctionTool, FunctionToolResult
|
||||
from .tool import ComputerTool, FunctionTool, FunctionToolResult, Tool
|
||||
from .tracing import (
|
||||
SpanError,
|
||||
Trace,
|
||||
|
|
@ -206,6 +208,29 @@ class RunImpl:
|
|||
new_step_items.extend([result.run_item for result in function_results])
|
||||
new_step_items.extend(computer_results)
|
||||
|
||||
# Reset tool_choice to "auto" after tool execution to prevent infinite loops
|
||||
if processed_response.functions or processed_response.computer_actions:
|
||||
tools = agent.tools
|
||||
|
||||
if (
|
||||
run_config.model_settings and
|
||||
cls._should_reset_tool_choice(run_config.model_settings, tools)
|
||||
):
|
||||
# update the run_config model settings with a copy
|
||||
new_run_config_settings = dataclasses.replace(
|
||||
run_config.model_settings,
|
||||
tool_choice="auto"
|
||||
)
|
||||
run_config = dataclasses.replace(run_config, model_settings=new_run_config_settings)
|
||||
|
||||
if cls._should_reset_tool_choice(agent.model_settings, tools):
|
||||
# Create a modified copy instead of modifying the original agent
|
||||
new_model_settings = dataclasses.replace(
|
||||
agent.model_settings,
|
||||
tool_choice="auto"
|
||||
)
|
||||
agent = dataclasses.replace(agent, model_settings=new_model_settings)
|
||||
|
||||
# Second, check if there are any handoffs
|
||||
if run_handoffs := processed_response.handoffs:
|
||||
return await cls.execute_handoffs(
|
||||
|
|
@ -296,6 +321,24 @@ class RunImpl:
|
|||
next_step=NextStepRunAgain(),
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def _should_reset_tool_choice(cls, model_settings: ModelSettings, tools: list[Tool]) -> bool:
|
||||
if model_settings is None or model_settings.tool_choice is None:
|
||||
return False
|
||||
|
||||
# for specific tool choices
|
||||
if (
|
||||
isinstance(model_settings.tool_choice, str) and
|
||||
model_settings.tool_choice not in ["auto", "required", "none"]
|
||||
):
|
||||
return True
|
||||
|
||||
# for one tool and required tool choice
|
||||
if model_settings.tool_choice == "required":
|
||||
return len(tools) == 1
|
||||
|
||||
return False
|
||||
|
||||
@classmethod
|
||||
def process_model_response(
|
||||
cls,
|
||||
|
|
|
|||
161
tests/test_tool_choice_reset.py
Normal file
161
tests/test_tool_choice_reset.py
Normal file
|
|
@ -0,0 +1,161 @@
|
|||
import pytest
|
||||
|
||||
from agents import Agent, ModelSettings, Runner, Tool
|
||||
from agents._run_impl import RunImpl
|
||||
|
||||
from .fake_model import FakeModel
|
||||
from .test_responses import (
|
||||
get_function_tool,
|
||||
get_function_tool_call,
|
||||
get_text_message,
|
||||
)
|
||||
|
||||
|
||||
class TestToolChoiceReset:
|
||||
|
||||
def test_should_reset_tool_choice_direct(self):
|
||||
"""
|
||||
Test the _should_reset_tool_choice method directly with various inputs
|
||||
to ensure it correctly identifies cases where reset is needed.
|
||||
"""
|
||||
# Case 1: tool_choice = None should not reset
|
||||
model_settings = ModelSettings(tool_choice=None)
|
||||
tools1: list[Tool] = [get_function_tool("tool1")]
|
||||
# Cast to list[Tool] to fix type checking issues
|
||||
assert not RunImpl._should_reset_tool_choice(model_settings, tools1)
|
||||
|
||||
# Case 2: tool_choice = "auto" should not reset
|
||||
model_settings = ModelSettings(tool_choice="auto")
|
||||
assert not RunImpl._should_reset_tool_choice(model_settings, tools1)
|
||||
|
||||
# Case 3: tool_choice = "none" should not reset
|
||||
model_settings = ModelSettings(tool_choice="none")
|
||||
assert not RunImpl._should_reset_tool_choice(model_settings, tools1)
|
||||
|
||||
# Case 4: tool_choice = "required" with one tool should reset
|
||||
model_settings = ModelSettings(tool_choice="required")
|
||||
assert RunImpl._should_reset_tool_choice(model_settings, tools1)
|
||||
|
||||
# Case 5: tool_choice = "required" with multiple tools should not reset
|
||||
model_settings = ModelSettings(tool_choice="required")
|
||||
tools2: list[Tool] = [get_function_tool("tool1"), get_function_tool("tool2")]
|
||||
assert not RunImpl._should_reset_tool_choice(model_settings, tools2)
|
||||
|
||||
# Case 6: Specific tool choice should reset
|
||||
model_settings = ModelSettings(tool_choice="specific_tool")
|
||||
assert RunImpl._should_reset_tool_choice(model_settings, tools1)
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_required_tool_choice_with_multiple_runs(self):
|
||||
"""
|
||||
Test scenario 1: When multiple runs are executed with tool_choice="required"
|
||||
Ensure each run works correctly and doesn't get stuck in infinite loop
|
||||
Also verify that tool_choice remains "required" between runs
|
||||
"""
|
||||
# Set up our fake model with responses for two runs
|
||||
fake_model = FakeModel()
|
||||
fake_model.add_multiple_turn_outputs([
|
||||
[get_text_message("First run response")],
|
||||
[get_text_message("Second run response")]
|
||||
])
|
||||
|
||||
# Create agent with a custom tool and tool_choice="required"
|
||||
custom_tool = get_function_tool("custom_tool")
|
||||
agent = Agent(
|
||||
name="test_agent",
|
||||
model=fake_model,
|
||||
tools=[custom_tool],
|
||||
model_settings=ModelSettings(tool_choice="required"),
|
||||
)
|
||||
|
||||
# First run should work correctly and preserve tool_choice
|
||||
result1 = await Runner.run(agent, "first run")
|
||||
assert result1.final_output == "First run response"
|
||||
assert agent.model_settings.tool_choice == "required", "tool_choice should stay required"
|
||||
|
||||
# Second run should also work correctly with tool_choice still required
|
||||
result2 = await Runner.run(agent, "second run")
|
||||
assert result2.final_output == "Second run response"
|
||||
assert agent.model_settings.tool_choice == "required", "tool_choice should stay required"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_required_with_stop_at_tool_name(self):
|
||||
"""
|
||||
Test scenario 2: When using required tool_choice with stop_at_tool_names behavior
|
||||
Ensure it correctly stops at the specified tool
|
||||
"""
|
||||
# Set up fake model to return a tool call for second_tool
|
||||
fake_model = FakeModel()
|
||||
fake_model.set_next_output([
|
||||
get_function_tool_call("second_tool", "{}")
|
||||
])
|
||||
|
||||
# Create agent with two tools and tool_choice="required" and stop_at_tool behavior
|
||||
first_tool = get_function_tool("first_tool", return_value="first tool result")
|
||||
second_tool = get_function_tool("second_tool", return_value="second tool result")
|
||||
|
||||
agent = Agent(
|
||||
name="test_agent",
|
||||
model=fake_model,
|
||||
tools=[first_tool, second_tool],
|
||||
model_settings=ModelSettings(tool_choice="required"),
|
||||
tool_use_behavior={"stop_at_tool_names": ["second_tool"]},
|
||||
)
|
||||
|
||||
# Run should stop after using second_tool
|
||||
result = await Runner.run(agent, "run test")
|
||||
assert result.final_output == "second tool result"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_specific_tool_choice(self):
|
||||
"""
|
||||
Test scenario 3: When using a specific tool choice name
|
||||
Ensure it doesn't cause infinite loops
|
||||
"""
|
||||
# Set up fake model to return a text message
|
||||
fake_model = FakeModel()
|
||||
fake_model.set_next_output([get_text_message("Test message")])
|
||||
|
||||
# Create agent with specific tool_choice
|
||||
tool1 = get_function_tool("tool1")
|
||||
tool2 = get_function_tool("tool2")
|
||||
tool3 = get_function_tool("tool3")
|
||||
|
||||
agent = Agent(
|
||||
name="test_agent",
|
||||
model=fake_model,
|
||||
tools=[tool1, tool2, tool3],
|
||||
model_settings=ModelSettings(tool_choice="tool1"), # Specific tool
|
||||
)
|
||||
|
||||
# Run should complete without infinite loops
|
||||
result = await Runner.run(agent, "first run")
|
||||
assert result.final_output == "Test message"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_required_with_single_tool(self):
|
||||
"""
|
||||
Test scenario 4: When using required tool_choice with only one tool
|
||||
Ensure it doesn't cause infinite loops
|
||||
"""
|
||||
# Set up fake model to return a tool call followed by a text message
|
||||
fake_model = FakeModel()
|
||||
fake_model.add_multiple_turn_outputs([
|
||||
# First call returns a tool call
|
||||
[get_function_tool_call("custom_tool", "{}")],
|
||||
# Second call returns a text message
|
||||
[get_text_message("Final response")]
|
||||
])
|
||||
|
||||
# Create agent with a single tool and tool_choice="required"
|
||||
custom_tool = get_function_tool("custom_tool", return_value="tool result")
|
||||
agent = Agent(
|
||||
name="test_agent",
|
||||
model=fake_model,
|
||||
tools=[custom_tool],
|
||||
model_settings=ModelSettings(tool_choice="required"),
|
||||
)
|
||||
|
||||
# Run should complete without infinite loops
|
||||
result = await Runner.run(agent, "first run")
|
||||
assert result.final_output == "Final response"
|
||||
Loading…
Reference in a new issue