openai-agents-python/tests/test_items_helpers.py
Rohan Mehta d6f5190d53
Replace referencable_id with response_id (#508)
Minor change - naming. So that it doesn't pollute the next PR.

---
[//]: # (BEGIN SAPLING FOOTER)
* #509
* __->__ #508
2025-04-14 21:37:18 -04:00

283 lines
12 KiB
Python

from __future__ import annotations
from openai.types.responses.response_computer_tool_call import (
ActionScreenshot,
ResponseComputerToolCall,
)
from openai.types.responses.response_computer_tool_call_param import ResponseComputerToolCallParam
from openai.types.responses.response_file_search_tool_call import ResponseFileSearchToolCall
from openai.types.responses.response_file_search_tool_call_param import (
ResponseFileSearchToolCallParam,
)
from openai.types.responses.response_function_tool_call import ResponseFunctionToolCall
from openai.types.responses.response_function_tool_call_param import ResponseFunctionToolCallParam
from openai.types.responses.response_function_web_search import ResponseFunctionWebSearch
from openai.types.responses.response_function_web_search_param import ResponseFunctionWebSearchParam
from openai.types.responses.response_output_message import ResponseOutputMessage
from openai.types.responses.response_output_message_param import ResponseOutputMessageParam
from openai.types.responses.response_output_refusal import ResponseOutputRefusal
from openai.types.responses.response_output_text import ResponseOutputText
from openai.types.responses.response_reasoning_item import ResponseReasoningItem, Summary
from openai.types.responses.response_reasoning_item_param import ResponseReasoningItemParam
from agents import (
Agent,
ItemHelpers,
MessageOutputItem,
ModelResponse,
ReasoningItem,
RunItem,
TResponseInputItem,
Usage,
)
def make_message(
content_items: list[ResponseOutputText | ResponseOutputRefusal],
) -> ResponseOutputMessage:
"""
Helper to construct a ResponseOutputMessage with a single batch of content
items, using a fixed id/status.
"""
return ResponseOutputMessage(
id="msg123",
content=content_items,
role="assistant",
status="completed",
type="message",
)
def test_extract_last_content_of_text_message() -> None:
# Build a message containing two text segments.
content1 = ResponseOutputText(annotations=[], text="Hello ", type="output_text")
content2 = ResponseOutputText(annotations=[], text="world!", type="output_text")
message = make_message([content1, content2])
# Helpers should yield the last segment's text.
assert ItemHelpers.extract_last_content(message) == "world!"
def test_extract_last_content_of_refusal_message() -> None:
# Build a message whose last content entry is a refusal.
content1 = ResponseOutputText(annotations=[], text="Before refusal", type="output_text")
refusal = ResponseOutputRefusal(refusal="I cannot do that", type="refusal")
message = make_message([content1, refusal])
# Helpers should extract the refusal string when last content is a refusal.
assert ItemHelpers.extract_last_content(message) == "I cannot do that"
def test_extract_last_content_non_message_returns_empty() -> None:
# Construct some other type of output item, e.g. a tool call, to verify non-message returns "".
tool_call = ResponseFunctionToolCall(
id="tool123",
arguments="{}",
call_id="call123",
name="func",
type="function_call",
)
assert ItemHelpers.extract_last_content(tool_call) == ""
def test_extract_last_text_returns_text_only() -> None:
# A message whose last segment is text yields the text.
first_text = ResponseOutputText(annotations=[], text="part1", type="output_text")
second_text = ResponseOutputText(annotations=[], text="part2", type="output_text")
message = make_message([first_text, second_text])
assert ItemHelpers.extract_last_text(message) == "part2"
# Whereas when last content is a refusal, extract_last_text returns None.
message2 = make_message([first_text, ResponseOutputRefusal(refusal="no", type="refusal")])
assert ItemHelpers.extract_last_text(message2) is None
def test_input_to_new_input_list_from_string() -> None:
result = ItemHelpers.input_to_new_input_list("hi")
# Should wrap the string into a list with a single dict containing content and user role.
assert isinstance(result, list)
assert result == [{"content": "hi", "role": "user"}]
def test_input_to_new_input_list_deep_copies_lists() -> None:
# Given a list of message dictionaries, ensure the returned list is a deep copy.
original: list[TResponseInputItem] = [{"content": "abc", "role": "developer"}]
new_list = ItemHelpers.input_to_new_input_list(original)
assert new_list == original
# Mutating the returned list should not mutate the original.
new_list.pop()
assert "content" in original[0] and original[0].get("content") == "abc"
def test_text_message_output_concatenates_text_segments() -> None:
# Build a message with both text and refusal segments, only text segments are concatenated.
pieces: list[ResponseOutputText | ResponseOutputRefusal] = []
pieces.append(ResponseOutputText(annotations=[], text="a", type="output_text"))
pieces.append(ResponseOutputRefusal(refusal="denied", type="refusal"))
pieces.append(ResponseOutputText(annotations=[], text="b", type="output_text"))
message = make_message(pieces)
# Wrap into MessageOutputItem to feed into text_message_output.
item = MessageOutputItem(agent=Agent(name="test"), raw_item=message)
assert ItemHelpers.text_message_output(item) == "ab"
def test_text_message_outputs_across_list_of_runitems() -> None:
"""
Compose several RunItem instances, including a non-message run item, and ensure
that only MessageOutputItem instances contribute any text. The non-message
(ReasoningItem) should be ignored by Helpers.text_message_outputs.
"""
message1 = make_message([ResponseOutputText(annotations=[], text="foo", type="output_text")])
message2 = make_message([ResponseOutputText(annotations=[], text="bar", type="output_text")])
item1: RunItem = MessageOutputItem(agent=Agent(name="test"), raw_item=message1)
item2: RunItem = MessageOutputItem(agent=Agent(name="test"), raw_item=message2)
# Create a non-message run item of a different type, e.g., a reasoning trace.
reasoning = ResponseReasoningItem(id="rid", summary=[], type="reasoning")
non_message_item: RunItem = ReasoningItem(agent=Agent(name="test"), raw_item=reasoning)
# Confirm only the message outputs are concatenated.
assert ItemHelpers.text_message_outputs([item1, non_message_item, item2]) == "foobar"
def test_tool_call_output_item_constructs_function_call_output_dict():
# Build a simple ResponseFunctionToolCall.
call = ResponseFunctionToolCall(
id="call-abc",
arguments='{"x": 1}',
call_id="call-abc",
name="do_something",
type="function_call",
)
payload = ItemHelpers.tool_call_output_item(call, "result-string")
assert isinstance(payload, dict)
assert payload["type"] == "function_call_output"
assert payload["call_id"] == call.id
assert payload["output"] == "result-string"
# The following tests ensure that every possible output item type defined by
# OpenAI's API can be converted back into an input item dict via
# ModelResponse.to_input_items. The output and input schema for each item are
# intended to be symmetric, so given any ResponseOutputItem, its model_dump
# should produce a dict that can satisfy the corresponding TypedDict input
# type. These tests construct minimal valid instances of each output type,
# invoke to_input_items, and then verify that the resulting dict can be used
# to round-trip back into a Pydantic output model without errors.
def test_to_input_items_for_message() -> None:
"""An output message should convert into an input dict matching the message's own structure."""
content = ResponseOutputText(annotations=[], text="hello world", type="output_text")
message = ResponseOutputMessage(
id="m1", content=[content], role="assistant", status="completed", type="message"
)
resp = ModelResponse(output=[message], usage=Usage(), response_id=None)
input_items = resp.to_input_items()
assert isinstance(input_items, list) and len(input_items) == 1
# The dict should contain exactly the primitive values of the message
expected: ResponseOutputMessageParam = {
"id": "m1",
"content": [
{
"annotations": [],
"text": "hello world",
"type": "output_text",
}
],
"role": "assistant",
"status": "completed",
"type": "message",
}
assert input_items[0] == expected
def test_to_input_items_for_function_call() -> None:
"""A function tool call output should produce the same dict as a function tool call input."""
tool_call = ResponseFunctionToolCall(
id="f1", arguments="{}", call_id="c1", name="func", type="function_call"
)
resp = ModelResponse(output=[tool_call], usage=Usage(), response_id=None)
input_items = resp.to_input_items()
assert isinstance(input_items, list) and len(input_items) == 1
expected: ResponseFunctionToolCallParam = {
"id": "f1",
"arguments": "{}",
"call_id": "c1",
"name": "func",
"type": "function_call",
}
assert input_items[0] == expected
def test_to_input_items_for_file_search_call() -> None:
"""A file search tool call output should produce the same dict as a file search input."""
fs_call = ResponseFileSearchToolCall(
id="fs1", queries=["query"], status="completed", type="file_search_call"
)
resp = ModelResponse(output=[fs_call], usage=Usage(), response_id=None)
input_items = resp.to_input_items()
assert isinstance(input_items, list) and len(input_items) == 1
expected: ResponseFileSearchToolCallParam = {
"id": "fs1",
"queries": ["query"],
"status": "completed",
"type": "file_search_call",
}
assert input_items[0] == expected
def test_to_input_items_for_web_search_call() -> None:
"""A web search tool call output should produce the same dict as a web search input."""
ws_call = ResponseFunctionWebSearch(id="w1", status="completed", type="web_search_call")
resp = ModelResponse(output=[ws_call], usage=Usage(), response_id=None)
input_items = resp.to_input_items()
assert isinstance(input_items, list) and len(input_items) == 1
expected: ResponseFunctionWebSearchParam = {
"id": "w1",
"status": "completed",
"type": "web_search_call",
}
assert input_items[0] == expected
def test_to_input_items_for_computer_call_click() -> None:
"""A computer call output should yield a dict whose shape matches the computer call input."""
action = ActionScreenshot(type="screenshot")
comp_call = ResponseComputerToolCall(
id="comp1",
action=action,
type="computer_call",
call_id="comp1",
pending_safety_checks=[],
status="completed",
)
resp = ModelResponse(output=[comp_call], usage=Usage(), response_id=None)
input_items = resp.to_input_items()
assert isinstance(input_items, list) and len(input_items) == 1
converted_dict = input_items[0]
# Top-level keys should match what we expect for a computer call input
expected: ResponseComputerToolCallParam = {
"id": "comp1",
"type": "computer_call",
"action": {"type": "screenshot"},
"call_id": "comp1",
"pending_safety_checks": [],
"status": "completed",
}
assert converted_dict == expected
def test_to_input_items_for_reasoning() -> None:
"""A reasoning output should produce the same dict as a reasoning input item."""
rc = Summary(text="why", type="summary_text")
reasoning = ResponseReasoningItem(id="rid1", summary=[rc], type="reasoning")
resp = ModelResponse(output=[reasoning], usage=Usage(), response_id=None)
input_items = resp.to_input_items()
assert isinstance(input_items, list) and len(input_items) == 1
converted_dict = input_items[0]
expected: ResponseReasoningItemParam = {
"id": "rid1",
"summary": [{"text": "why", "type": "summary_text"}],
"type": "reasoning",
}
print(converted_dict)
print(expected)
assert converted_dict == expected