fix: prevent modifying the original agent's model_settings

This fixes the issue where the original agent's model_settings was being directly modified during the tool choice reset process. The original implementation caused the agent's tool_choice to unintentionally reset to "auto" for subsequent runs, which could be unexpected behavior.

The fix creates new copies of the agent and model settings objects using dataclasses.replace() instead of modifying the original objects. This ensures that the tool choice reset is limited to the current run only, maintaining the expected behavior for sequential runs with the same agent.

Addresses feedback from @baderalfahad about the agent instance being modified when it should maintain its original state between runs.
This commit is contained in:
xianghuijin 2025-03-23 17:20:23 +08:00
parent 8f2f76cb65
commit 6ed0bee672
2 changed files with 148 additions and 267 deletions

View file

@ -213,19 +213,25 @@ class RunImpl:
tools = agent.tools
# Only reset in the problematic scenarios where loops are likely unintentional
if cls._should_reset_tool_choice(agent.model_settings, tools):
agent.model_settings = dataclasses.replace(
# Create a modified copy instead of modifying the original agent
new_model_settings = dataclasses.replace(
agent.model_settings,
tool_choice="auto"
)
# Create a new internal agent with updated settings
agent = dataclasses.replace(agent, model_settings=new_model_settings)
if (
run_config.model_settings and
cls._should_reset_tool_choice(run_config.model_settings, tools)
):
run_config.model_settings = dataclasses.replace(
# Also update the run_config model settings with a copy
new_run_config_settings = dataclasses.replace(
run_config.model_settings,
tool_choice="auto"
)
# Create a new run_config with the new settings
run_config = dataclasses.replace(run_config, model_settings=new_run_config_settings)
# Second, check if there are any handoffs
if run_handoffs := processed_response.handoffs:

View file

@ -1,286 +1,161 @@
import asyncio
import dataclasses
import json
from unittest import mock
import pytest
from openai.types.responses.response_function_tool_call import ResponseFunctionToolCall
from agents import Agent, ModelSettings, Runner, Tool
from agents._run_impl import RunImpl
from agents import Agent, ModelSettings, RunConfig, Runner, function_tool
from agents.items import Usage
from agents.models.interface import ModelResponse
from agents.tool import Tool
@function_tool
def echo(text: str) -> str:
"""Echo the input text"""
return text
def should_reset_tool_choice(model_settings: ModelSettings, tools: list[Tool]) -> bool:
if model_settings is None or model_settings.tool_choice is None:
return False
# for specific tool choices
if (
isinstance(model_settings.tool_choice, str) and
model_settings.tool_choice not in ["auto", "required", "none"]
):
return True
# for one tool and required tool choice
if model_settings.tool_choice == "required":
return len(tools) == 1
return False
# Mock model implementation that always calls tools when tool_choice is set
class MockModel:
def __init__(self, tool_call_counter):
self.tool_call_counter = tool_call_counter
async def get_response(self, **kwargs):
tools = kwargs.get("tools", [])
model_settings = kwargs.get("model_settings")
# Increment the counter to track how many times this model is called
self.tool_call_counter["count"] += 1
# If we've been called many times, we're likely in an infinite loop
if self.tool_call_counter["count"] > 5:
self.tool_call_counter["potential_infinite_loop"] = True
# Always create a tool call if tool_choice is required/specific
tool_calls = []
if model_settings and model_settings.tool_choice:
if model_settings.tool_choice in ["required", "echo"] and tools:
# Create a mock function call to the first tool
tool = tools[0]
tool_calls.append(
ResponseFunctionToolCall(
id="call_1",
name=tool.name,
arguments=json.dumps({"text": "This is a test"}),
call_id="call_1",
type="function_call",
)
)
return ModelResponse(
output=tool_calls,
referenceable_id="123",
usage=Usage(input_tokens=10, output_tokens=10, total_tokens=20),
from .fake_model import FakeModel
from .test_responses import (
get_function_tool,
get_function_tool_call,
get_text_message,
)
class TestToolChoiceReset:
async def test_tool_choice_resets_after_call(self):
"""Test that tool_choice is reset to 'auto' after tool call when set to 'required'"""
# Create an agent with tool_choice="required"
def test_should_reset_tool_choice_direct(self):
"""
Test the _should_reset_tool_choice method directly with various inputs
to ensure it correctly identifies cases where reset is needed.
"""
# Case 1: tool_choice = None should not reset
model_settings = ModelSettings(tool_choice=None)
tools1: list[Tool] = [get_function_tool("tool1")]
# Cast to list[Tool] to fix type checking issues
assert not RunImpl._should_reset_tool_choice(model_settings, tools1)
# Case 2: tool_choice = "auto" should not reset
model_settings = ModelSettings(tool_choice="auto")
assert not RunImpl._should_reset_tool_choice(model_settings, tools1)
# Case 3: tool_choice = "none" should not reset
model_settings = ModelSettings(tool_choice="none")
assert not RunImpl._should_reset_tool_choice(model_settings, tools1)
# Case 4: tool_choice = "required" with one tool should reset
model_settings = ModelSettings(tool_choice="required")
assert RunImpl._should_reset_tool_choice(model_settings, tools1)
# Case 5: tool_choice = "required" with multiple tools should not reset
model_settings = ModelSettings(tool_choice="required")
tools2: list[Tool] = [get_function_tool("tool1"), get_function_tool("tool2")]
assert not RunImpl._should_reset_tool_choice(model_settings, tools2)
# Case 6: Specific tool choice should reset
model_settings = ModelSettings(tool_choice="specific_tool")
assert RunImpl._should_reset_tool_choice(model_settings, tools1)
@pytest.mark.asyncio
async def test_required_tool_choice_with_multiple_runs(self):
"""
Test scenario 1: When multiple runs are executed with tool_choice="required"
Ensure each run works correctly and doesn't get stuck in infinite loop
Also verify that tool_choice remains "required" between runs
"""
# Set up our fake model with responses for two runs
fake_model = FakeModel()
fake_model.add_multiple_turn_outputs([
[get_text_message("First run response")],
[get_text_message("Second run response")]
])
# Create agent with a custom tool and tool_choice="required"
custom_tool = get_function_tool("custom_tool")
agent = Agent(
name="Test agent",
tools=[echo], # Only one tool
name="test_agent",
model=fake_model,
tools=[custom_tool],
model_settings=ModelSettings(tool_choice="required"),
)
# Directly modify the model_settings
# Instead of trying to run the full execute_tools_and_side_effects,
# we'll just test the tool_choice reset logic directly
processed_response = mock.MagicMock()
processed_response.functions = [mock.MagicMock()] # At least one function call
processed_response.computer_actions = []
# First run should work correctly and preserve tool_choice
result1 = await Runner.run(agent, "first run")
assert result1.final_output == "First run response"
assert agent.model_settings.tool_choice == "required", "tool_choice should stay required"
# Create a mock run_config
run_config = mock.MagicMock()
run_config.model_settings = None
# Second run should also work correctly with tool_choice still required
result2 = await Runner.run(agent, "second run")
assert result2.final_output == "Second run response"
assert agent.model_settings.tool_choice == "required", "tool_choice should stay required"
# Execute our code under test
if processed_response.functions:
# Apply the targeted reset logic
tools = agent.tools
if should_reset_tool_choice(agent.model_settings, tools):
agent.model_settings = dataclasses.replace(
agent.model_settings,
tool_choice="auto" # Reset to auto
)
@pytest.mark.asyncio
async def test_required_with_stop_at_tool_name(self):
"""
Test scenario 2: When using required tool_choice with stop_at_tool_names behavior
Ensure it correctly stops at the specified tool
"""
# Set up fake model to return a tool call for second_tool
fake_model = FakeModel()
fake_model.set_next_output([
get_function_tool_call("second_tool", "{}")
])
# Also reset run_config's model_settings if it exists
if (
run_config.model_settings and
should_reset_tool_choice(run_config.model_settings, tools)
):
run_config.model_settings = dataclasses.replace(
run_config.model_settings,
tool_choice="auto" # Reset to auto
)
# Create agent with two tools and tool_choice="required" and stop_at_tool behavior
first_tool = get_function_tool("first_tool", return_value="first tool result")
second_tool = get_function_tool("second_tool", return_value="second tool result")
# Check that tool_choice was reset to "auto"
assert agent.model_settings.tool_choice == "auto"
async def test_tool_choice_resets_from_specific_function(self):
"""Test tool_choice reset to 'auto' after call when set to specific function name"""
# Create an agent with tool_choice set to a specific function
agent = Agent(
name="Test agent",
instructions="You are a test agent",
tools=[echo],
model="gpt-4-0125-preview",
model_settings=ModelSettings(tool_choice="echo"), # Specific function name
)
# Execute our code under test
processed_response = mock.MagicMock()
processed_response.functions = [mock.MagicMock()] # At least one function call
processed_response.computer_actions = []
# Create a mock run_config
run_config = mock.MagicMock()
run_config.model_settings = None
# Execute our code under test
if processed_response.functions:
# Apply the targeted reset logic
tools = agent.tools
if should_reset_tool_choice(agent.model_settings, tools):
agent.model_settings = dataclasses.replace(
agent.model_settings,
tool_choice="auto" # Reset to auto
)
# Also reset run_config's model_settings if it exists
if (
run_config.model_settings and
should_reset_tool_choice(run_config.model_settings, tools)
):
run_config.model_settings = dataclasses.replace(
run_config.model_settings,
tool_choice="auto" # Reset to auto
)
# Check that tool_choice was reset to "auto"
assert agent.model_settings.tool_choice == "auto"
async def test_tool_choice_no_reset_when_auto(self):
"""Test that tool_choice is not changed when it's already set to 'auto'"""
# Create an agent with tool_choice="auto"
agent = Agent(
name="Test agent",
tools=[echo],
model_settings=ModelSettings(tool_choice="auto"),
)
# Execute our code under test
processed_response = mock.MagicMock()
processed_response.functions = [mock.MagicMock()] # At least one function call
processed_response.computer_actions = []
# Create a mock run_config
run_config = mock.MagicMock()
run_config.model_settings = None
# Execute our code under test
if processed_response.functions:
# Apply the targeted reset logic
tools = agent.tools
if should_reset_tool_choice(agent.model_settings, tools):
agent.model_settings = dataclasses.replace(
agent.model_settings,
tool_choice="auto" # Reset to auto
)
# Also reset run_config's model_settings if it exists
if (
run_config.model_settings and
should_reset_tool_choice(run_config.model_settings, tools)
):
run_config.model_settings = dataclasses.replace(
run_config.model_settings,
tool_choice="auto" # Reset to auto
)
# Check that tool_choice remains "auto"
assert agent.model_settings.tool_choice == "auto"
async def test_run_config_tool_choice_reset(self):
"""Test that run_config.model_settings.tool_choice is reset to 'auto'"""
# Create an agent with default model_settings
agent = Agent(
name="Test agent",
tools=[echo], # Only one tool
model_settings=ModelSettings(tool_choice=None),
)
# Create a run_config with tool_choice="required"
run_config = RunConfig()
run_config.model_settings = ModelSettings(tool_choice="required")
# Execute our code under test
processed_response = mock.MagicMock()
processed_response.functions = [mock.MagicMock()] # At least one function call
processed_response.computer_actions = []
# Execute our code under test
if processed_response.functions:
# Apply the targeted reset logic
tools = agent.tools
if should_reset_tool_choice(agent.model_settings, tools):
agent.model_settings = dataclasses.replace(
agent.model_settings,
tool_choice="auto" # Reset to auto
)
# Also reset run_config's model_settings if it exists
if (
run_config.model_settings and
should_reset_tool_choice(run_config.model_settings, tools)
):
run_config.model_settings = dataclasses.replace(
run_config.model_settings,
tool_choice="auto" # Reset to auto
)
# Check that run_config's tool_choice was reset to "auto"
assert run_config.model_settings.tool_choice == "auto"
@mock.patch("agents.run.Runner._get_model")
async def test_integration_prevents_infinite_loop(self, mock_get_model):
"""Integration test to verify that tool_choice reset prevents infinite loops"""
# Create a counter to track model calls and detect potential infinite loops
tool_call_counter = {"count": 0, "potential_infinite_loop": False}
# Set up our mock model that will always use tools when tool_choice is set
mock_model_instance = MockModel(tool_call_counter)
# Return our mock model directly
mock_get_model.return_value = mock_model_instance
# Create an agent with tool_choice="required" to force tool usage
agent = Agent(
name="Test agent",
instructions="You are a test agent",
tools=[echo],
name="test_agent",
model=fake_model,
tools=[first_tool, second_tool],
model_settings=ModelSettings(tool_choice="required"),
# Use "run_llm_again" to allow LLM to continue after tool calls
# This would cause infinite loops without the tool_choice reset
tool_use_behavior="run_llm_again",
tool_use_behavior={"stop_at_tool_names": ["second_tool"]},
)
# Set a timeout to catch potential infinite loops that our fix doesn't address
try:
# Run the agent with a timeout
async def run_with_timeout():
return await Runner.run(agent, input="Test input")
# Run should stop after using second_tool
result = await Runner.run(agent, "run test")
assert result.final_output == "second tool result"
result = await asyncio.wait_for(run_with_timeout(), timeout=2.0)
@pytest.mark.asyncio
async def test_specific_tool_choice(self):
"""
Test scenario 3: When using a specific tool choice name
Ensure it doesn't cause infinite loops
"""
# Set up fake model to return a text message
fake_model = FakeModel()
fake_model.set_next_output([get_text_message("Test message")])
# Verify the agent ran successfully
assert result is not None
# Create agent with specific tool_choice
tool1 = get_function_tool("tool1")
tool2 = get_function_tool("tool2")
tool3 = get_function_tool("tool3")
# Verify the tool was called at least once but not too many times
# (indicating no infinite loop)
assert tool_call_counter["count"] >= 1
assert tool_call_counter["count"] < 5
assert not tool_call_counter["potential_infinite_loop"]
agent = Agent(
name="test_agent",
model=fake_model,
tools=[tool1, tool2, tool3],
model_settings=ModelSettings(tool_choice="tool1"), # Specific tool
)
except asyncio.TimeoutError as err:
# If we hit a timeout, the test failed - we likely have an infinite loop
raise AssertionError("Timeout occurred, potential infinite loop detected") from err
# Run should complete without infinite loops
result = await Runner.run(agent, "first run")
assert result.final_output == "Test message"
@pytest.mark.asyncio
async def test_required_with_single_tool(self):
"""
Test scenario 4: When using required tool_choice with only one tool
Ensure it doesn't cause infinite loops
"""
# Set up fake model to return a tool call followed by a text message
fake_model = FakeModel()
fake_model.add_multiple_turn_outputs([
# First call returns a tool call
[get_function_tool_call("custom_tool", "{}")],
# Second call returns a text message
[get_text_message("Final response")]
])
# Create agent with a single tool and tool_choice="required"
custom_tool = get_function_tool("custom_tool", return_value="tool result")
agent = Agent(
name="test_agent",
model=fake_model,
tools=[custom_tool],
model_settings=ModelSettings(tool_choice="required"),
)
# Run should complete without infinite loops
result = await Runner.run(agent, "first run")
assert result.final_output == "Final response"