From c374ad064faa6824d5c5a6c5bd9870688526bc81 Mon Sep 17 00:00:00 2001 From: Alex Hall Date: Tue, 11 Mar 2025 22:53:48 +0200 Subject: [PATCH 01/16] Run make format --- src/agents/agent_output.py | 2 +- src/agents/model_settings.py | 1 + tests/src/agents/agent_output.py | 2 +- tests/src/agents/model_settings.py | 1 + tests/test_config.py | 9 ++++++--- 5 files changed, 10 insertions(+), 5 deletions(-) diff --git a/src/agents/agent_output.py b/src/agents/agent_output.py index 8140d8c..0c28800 100644 --- a/src/agents/agent_output.py +++ b/src/agents/agent_output.py @@ -138,7 +138,7 @@ def _type_to_str(t: type[Any]) -> str: # It's a simple type like `str`, `int`, etc. return t.__name__ elif args: - args_str = ', '.join(_type_to_str(arg) for arg in args) + args_str = ", ".join(_type_to_str(arg) for arg in args) return f"{origin.__name__}[{args_str}]" else: return str(t) diff --git a/src/agents/model_settings.py b/src/agents/model_settings.py index 78cf9a8..d8178ae 100644 --- a/src/agents/model_settings.py +++ b/src/agents/model_settings.py @@ -11,6 +11,7 @@ class ModelSettings: This class holds optional model configuration parameters (e.g. temperature, top_p, penalties, truncation, etc.). """ + temperature: float | None = None top_p: float | None = None frequency_penalty: float | None = None diff --git a/tests/src/agents/agent_output.py b/tests/src/agents/agent_output.py index 8140d8c..0c28800 100644 --- a/tests/src/agents/agent_output.py +++ b/tests/src/agents/agent_output.py @@ -138,7 +138,7 @@ def _type_to_str(t: type[Any]) -> str: # It's a simple type like `str`, `int`, etc. return t.__name__ elif args: - args_str = ', '.join(_type_to_str(arg) for arg in args) + args_str = ", ".join(_type_to_str(arg) for arg in args) return f"{origin.__name__}[{args_str}]" else: return str(t) diff --git a/tests/src/agents/model_settings.py b/tests/src/agents/model_settings.py index 78cf9a8..d8178ae 100644 --- a/tests/src/agents/model_settings.py +++ b/tests/src/agents/model_settings.py @@ -11,6 +11,7 @@ class ModelSettings: This class holds optional model configuration parameters (e.g. temperature, top_p, penalties, truncation, etc.). """ + temperature: float | None = None top_p: float | None = None frequency_penalty: float | None = None diff --git a/tests/test_config.py b/tests/test_config.py index 8f37200..dba854d 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -49,13 +49,16 @@ def test_resp_set_default_openai_client(): def test_set_default_openai_api(): - assert isinstance(OpenAIProvider().get_model("gpt-4"), OpenAIResponsesModel), \ + assert isinstance(OpenAIProvider().get_model("gpt-4"), OpenAIResponsesModel), ( "Default should be responses" + ) set_default_openai_api("chat_completions") - assert isinstance(OpenAIProvider().get_model("gpt-4"), OpenAIChatCompletionsModel), \ + assert isinstance(OpenAIProvider().get_model("gpt-4"), OpenAIChatCompletionsModel), ( "Should be chat completions model" + ) set_default_openai_api("responses") - assert isinstance(OpenAIProvider().get_model("gpt-4"), OpenAIResponsesModel), \ + assert isinstance(OpenAIProvider().get_model("gpt-4"), OpenAIResponsesModel), ( "Should be responses model" + ) From c03d314fb80181858693e915be3d26848e437fa5 Mon Sep 17 00:00:00 2001 From: Alex Hall Date: Tue, 11 Mar 2025 22:57:14 +0200 Subject: [PATCH 02/16] Stronger tracing tests with inline-snapshot --- pyproject.toml | 3 +- tests/test_agent_tracing.py | 115 +++++++- tests/test_responses_tracing.py | 33 ++- tests/test_tracing_errors.py | 279 ++++++++++++++++++- tests/test_tracing_errors_streamed.py | 382 +++++++++++++++++++++++++- tests/testing_processor.py | 33 +++ uv.lock | 35 +++ 7 files changed, 875 insertions(+), 5 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 9c18d5f..17265e7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -47,6 +47,7 @@ dev = [ "mkdocstrings[python]>=0.28.0", "coverage>=7.6.12", "playwright==1.50.0", + "inline-snapshot>=0.20.5", ] [tool.uv.workspace] members = ["agents"] @@ -116,4 +117,4 @@ filterwarnings = [ ] markers = [ "allow_call_model_methods: mark test as allowing calls to real model implementations", -] \ No newline at end of file +] diff --git a/tests/test_agent_tracing.py b/tests/test_agent_tracing.py index 24bd72f..3d7196a 100644 --- a/tests/test_agent_tracing.py +++ b/tests/test_agent_tracing.py @@ -3,12 +3,13 @@ from __future__ import annotations import asyncio import pytest +from inline_snapshot import snapshot from agents import Agent, RunConfig, Runner, trace from .fake_model import FakeModel from .test_responses import get_text_message -from .testing_processor import fetch_ordered_spans, fetch_traces +from .testing_processor import fetch_normalized_spans, fetch_ordered_spans, fetch_traces @pytest.mark.asyncio @@ -25,6 +26,25 @@ async def test_single_run_is_single_trace(): traces = fetch_traces() assert len(traces) == 1, f"Expected 1 trace, got {len(traces)}" + assert fetch_normalized_spans() == snapshot( + [ + { + "workflow_name": "Agent workflow", + "children": [ + { + "type": "agent", + "data": { + "name": "test_agent", + "handoffs": [], + "tools": [], + "output_type": "str", + }, + } + ], + } + ] + ) + spans = fetch_ordered_spans() assert len(spans) == 1, ( f"Got {len(spans)}, but expected 1: the agent span. data:" @@ -52,6 +72,39 @@ async def test_multiple_runs_are_multiple_traces(): traces = fetch_traces() assert len(traces) == 2, f"Expected 2 traces, got {len(traces)}" + assert fetch_normalized_spans() == snapshot( + [ + { + "workflow_name": "Agent workflow", + "children": [ + { + "type": "agent", + "data": { + "name": "test_agent_1", + "handoffs": [], + "tools": [], + "output_type": "str", + }, + } + ], + }, + { + "workflow_name": "Agent workflow", + "children": [ + { + "type": "agent", + "data": { + "name": "test_agent_1", + "handoffs": [], + "tools": [], + "output_type": "str", + }, + } + ], + }, + ] + ) + spans = fetch_ordered_spans() assert len(spans) == 2, f"Got {len(spans)}, but expected 2: agent span per run" @@ -79,6 +132,43 @@ async def test_wrapped_trace_is_single_trace(): traces = fetch_traces() assert len(traces) == 1, f"Expected 1 trace, got {len(traces)}" + assert fetch_normalized_spans() == snapshot( + [ + { + "workflow_name": "test_workflow", + "children": [ + { + "type": "agent", + "data": { + "name": "test_agent_1", + "handoffs": [], + "tools": [], + "output_type": "str", + }, + }, + { + "type": "agent", + "data": { + "name": "test_agent_1", + "handoffs": [], + "tools": [], + "output_type": "str", + }, + }, + { + "type": "agent", + "data": { + "name": "test_agent_1", + "handoffs": [], + "tools": [], + "output_type": "str", + }, + }, + ], + } + ] + ) + spans = fetch_ordered_spans() assert len(spans) == 3, f"Got {len(spans)}, but expected 3: the agent span per run" @@ -97,6 +187,8 @@ async def test_parent_disabled_trace_disabled_agent_trace(): traces = fetch_traces() assert len(traces) == 0, f"Expected 0 traces, got {len(traces)}" + assert fetch_normalized_spans() == snapshot([]) + spans = fetch_ordered_spans() assert len(spans) == 0, ( f"Expected no spans, got {len(spans)}, with {[x.span_data for x in spans]}" @@ -116,6 +208,8 @@ async def test_manual_disabling_works(): traces = fetch_traces() assert len(traces) == 0, f"Expected 0 traces, got {len(traces)}" + assert fetch_normalized_spans() == snapshot([]) + spans = fetch_ordered_spans() assert len(spans) == 0, f"Got {len(spans)}, but expected no spans" @@ -164,6 +258,25 @@ async def test_not_starting_streaming_creates_trace(): traces = fetch_traces() assert len(traces) == 1, f"Expected 1 trace, got {len(traces)}" + assert fetch_normalized_spans() == snapshot( + [ + { + "workflow_name": "Agent workflow", + "children": [ + { + "type": "agent", + "data": { + "name": "test_agent", + "handoffs": [], + "tools": [], + "output_type": "str", + }, + } + ], + } + ] + ) + spans = fetch_ordered_spans() assert len(spans) == 1, f"Got {len(spans)}, but expected 1: the agent span" diff --git a/tests/test_responses_tracing.py b/tests/test_responses_tracing.py index 82b8e75..41b87eb 100644 --- a/tests/test_responses_tracing.py +++ b/tests/test_responses_tracing.py @@ -1,4 +1,5 @@ import pytest +from inline_snapshot import snapshot from openai import AsyncOpenAI from openai.types.responses import ResponseCompletedEvent @@ -6,7 +7,7 @@ from agents import ModelSettings, ModelTracing, OpenAIResponsesModel, trace from agents.tracing.span_data import ResponseSpanData from tests import fake_model -from .testing_processor import fetch_ordered_spans +from .testing_processor import fetch_normalized_spans, fetch_ordered_spans class DummyTracing: @@ -54,6 +55,15 @@ async def test_get_response_creates_trace(monkeypatch): "instr", "input", ModelSettings(), [], None, [], ModelTracing.ENABLED ) + assert fetch_normalized_spans() == snapshot( + [ + { + "workflow_name": "test", + "children": [{"type": "response", "data": {"response_id": "dummy-id"}}], + } + ] + ) + spans = fetch_ordered_spans() assert len(spans) == 1 @@ -82,6 +92,10 @@ async def test_non_data_tracing_doesnt_set_response_id(monkeypatch): "instr", "input", ModelSettings(), [], None, [], ModelTracing.ENABLED_WITHOUT_DATA ) + assert fetch_normalized_spans() == snapshot( + [{"workflow_name": "test", "children": [{"type": "response"}]}] + ) + spans = fetch_ordered_spans() assert len(spans) == 1 assert spans[0].span_data.response is None @@ -107,6 +121,8 @@ async def test_disable_tracing_does_not_create_span(monkeypatch): "instr", "input", ModelSettings(), [], None, [], ModelTracing.DISABLED ) + assert fetch_normalized_spans() == snapshot([{"workflow_name": "test"}]) + spans = fetch_ordered_spans() assert len(spans) == 0 @@ -139,6 +155,15 @@ async def test_stream_response_creates_trace(monkeypatch): ): pass + assert fetch_normalized_spans() == snapshot( + [ + { + "workflow_name": "test", + "children": [{"type": "response", "data": {"response_id": "dummy-id-123"}}], + } + ] + ) + spans = fetch_ordered_spans() assert len(spans) == 1 assert isinstance(spans[0].span_data, ResponseSpanData) @@ -174,6 +199,10 @@ async def test_stream_non_data_tracing_doesnt_set_response_id(monkeypatch): ): pass + assert fetch_normalized_spans() == snapshot( + [{"workflow_name": "test", "children": [{"type": "response"}]}] + ) + spans = fetch_ordered_spans() assert len(spans) == 1 assert isinstance(spans[0].span_data, ResponseSpanData) @@ -208,5 +237,7 @@ async def test_stream_disabled_tracing_doesnt_create_span(monkeypatch): ): pass + assert fetch_normalized_spans() == snapshot([{"workflow_name": "test"}]) + spans = fetch_ordered_spans() assert len(spans) == 0 diff --git a/tests/test_tracing_errors.py b/tests/test_tracing_errors.py index d57e1a8..5dbd7c1 100644 --- a/tests/test_tracing_errors.py +++ b/tests/test_tracing_errors.py @@ -4,6 +4,7 @@ import json from typing import Any import pytest +from inline_snapshot import snapshot from typing_extensions import TypedDict from agents import ( @@ -27,7 +28,7 @@ from .test_responses import ( get_handoff_tool_call, get_text_message, ) -from .testing_processor import fetch_ordered_spans, fetch_traces +from .testing_processor import fetch_normalized_spans, fetch_ordered_spans, fetch_traces @pytest.mark.asyncio @@ -45,6 +46,34 @@ async def test_single_turn_model_error(): traces = fetch_traces() assert len(traces) == 1, f"Expected 1 trace, got {len(traces)}" + assert fetch_normalized_spans() == snapshot( + [ + { + "workflow_name": "Agent workflow", + "children": [ + { + "type": "agent", + "data": { + "name": "test_agent", + "handoffs": [], + "tools": [], + "output_type": "str", + }, + "children": [ + { + "type": "generation", + "error": { + "message": "Error", + "data": {"name": "ValueError", "message": "test error"}, + }, + } + ], + } + ], + } + ] + ) + spans = fetch_ordered_spans() assert len(spans) == 2, f"should have agent and generation spans, got {len(spans)}" @@ -80,6 +109,43 @@ async def test_multi_turn_no_handoffs(): traces = fetch_traces() assert len(traces) == 1, f"Expected 1 trace, got {len(traces)}" + assert fetch_normalized_spans() == snapshot( + [ + { + "workflow_name": "Agent workflow", + "children": [ + { + "type": "agent", + "data": { + "name": "test_agent", + "handoffs": [], + "tools": ["foo"], + "output_type": "str", + }, + "children": [ + {"type": "generation"}, + { + "type": "function", + "data": { + "name": "foo", + "input": '{"a": "b"}', + "output": "tool_result", + }, + }, + { + "type": "generation", + "error": { + "message": "Error", + "data": {"name": "ValueError", "message": "test error"}, + }, + }, + ], + } + ], + } + ] + ) + spans = fetch_ordered_spans() assert len(spans) == 4, ( f"should have agent, generation, tool, generation, got {len(spans)} with data: " @@ -110,6 +176,39 @@ async def test_tool_call_error(): traces = fetch_traces() assert len(traces) == 1, f"Expected 1 trace, got {len(traces)}" + assert fetch_normalized_spans() == snapshot( + [ + { + "workflow_name": "Agent workflow", + "children": [ + { + "type": "agent", + "data": { + "name": "test_agent", + "handoffs": [], + "tools": ["foo"], + "output_type": "str", + }, + "children": [ + {"type": "generation"}, + { + "type": "function", + "error": { + "message": "Error running tool", + "data": { + "tool_name": "foo", + "error": "Invalid JSON input for tool foo: bad_json", + }, + }, + "data": {"name": "foo", "input": "bad_json"}, + }, + ], + } + ], + } + ] + ) + spans = fetch_ordered_spans() assert len(spans) == 3, ( f"should have agent, generation, tool spans, got {len(spans)} with data: " @@ -159,6 +258,43 @@ async def test_multiple_handoff_doesnt_error(): traces = fetch_traces() assert len(traces) == 1, f"Expected 1 trace, got {len(traces)}" + assert fetch_normalized_spans() == snapshot( + [ + { + "workflow_name": "Agent workflow", + "children": [ + { + "type": "agent", + "data": { + "name": "test", + "handoffs": ["test", "test"], + "tools": ["some_function"], + "output_type": "str", + }, + "children": [ + {"type": "generation"}, + { + "type": "function", + "data": { + "name": "some_function", + "input": '{"a": "b"}', + "output": "result", + }, + }, + {"type": "generation"}, + {"type": "handoff", "data": {"from_agent": "test", "to_agent": "test"}}, + ], + }, + { + "type": "agent", + "data": {"name": "test", "handoffs": [], "tools": [], "output_type": "str"}, + "children": [{"type": "generation"}], + }, + ], + } + ] + ) + spans = fetch_ordered_spans() assert len(spans) == 7, ( f"should have 2 agent, 1 function, 3 generation, 1 handoff, got {len(spans)} with data: " @@ -193,6 +329,21 @@ async def test_multiple_final_output_doesnt_error(): traces = fetch_traces() assert len(traces) == 1, f"Expected 1 trace, got {len(traces)}" + assert fetch_normalized_spans() == snapshot( + [ + { + "workflow_name": "Agent workflow", + "children": [ + { + "type": "agent", + "data": {"name": "test", "handoffs": [], "tools": [], "output_type": "Foo"}, + "children": [{"type": "generation"}], + } + ], + } + ] + ) + spans = fetch_ordered_spans() assert len(spans) == 2, ( f"should have 1 agent, 1 generation, got {len(spans)} with data: " @@ -251,6 +402,76 @@ async def test_handoffs_lead_to_correct_agent_spans(): traces = fetch_traces() assert len(traces) == 1, f"Expected 1 trace, got {len(traces)}" + assert fetch_normalized_spans() == snapshot( + [ + { + "workflow_name": "Agent workflow", + "children": [ + { + "type": "agent", + "data": { + "name": "test_agent_3", + "handoffs": ["test_agent_1", "test_agent_2"], + "tools": ["some_function"], + "output_type": "str", + }, + "children": [ + {"type": "generation"}, + { + "type": "function", + "data": { + "name": "some_function", + "input": '{"a": "b"}', + "output": "result", + }, + }, + {"type": "generation"}, + { + "type": "handoff", + "data": {"from_agent": "test_agent_3", "to_agent": "test_agent_1"}, + }, + ], + }, + { + "type": "agent", + "data": { + "name": "test_agent_1", + "handoffs": ["test_agent_3"], + "tools": ["some_function"], + "output_type": "str", + }, + "children": [ + {"type": "generation"}, + { + "type": "function", + "data": { + "name": "some_function", + "input": '{"a": "b"}', + "output": "result", + }, + }, + {"type": "generation"}, + { + "type": "handoff", + "data": {"from_agent": "test_agent_1", "to_agent": "test_agent_3"}, + }, + ], + }, + { + "type": "agent", + "data": { + "name": "test_agent_3", + "handoffs": ["test_agent_1", "test_agent_2"], + "tools": ["some_function"], + "output_type": "str", + }, + "children": [{"type": "generation"}], + }, + ], + } + ] + ) + spans = fetch_ordered_spans() assert len(spans) == 12, ( f"should have 3 agents, 2 function, 5 generation, 2 handoff, got {len(spans)} with data: " @@ -285,6 +506,38 @@ async def test_max_turns_exceeded(): traces = fetch_traces() assert len(traces) == 1, f"Expected 1 trace, got {len(traces)}" + assert fetch_normalized_spans() == snapshot( + [ + { + "workflow_name": "Agent workflow", + "children": [ + { + "type": "agent", + "error": {"message": "Max turns exceeded", "data": {"max_turns": 2}}, + "data": { + "name": "test", + "handoffs": [], + "tools": ["foo"], + "output_type": "Foo", + }, + "children": [ + {"type": "generation"}, + { + "type": "function", + "data": {"name": "foo", "input": "", "output": "result"}, + }, + {"type": "generation"}, + { + "type": "function", + "data": {"name": "foo", "input": "", "output": "result"}, + }, + ], + } + ], + } + ] + ) + spans = fetch_ordered_spans() assert len(spans) == 5, ( f"should have 1 agent span, 2 generations, 2 function calls, got " @@ -318,6 +571,30 @@ async def test_guardrail_error(): traces = fetch_traces() assert len(traces) == 1, f"Expected 1 trace, got {len(traces)}" + assert fetch_normalized_spans() == snapshot( + [ + { + "workflow_name": "Agent workflow", + "children": [ + { + "type": "agent", + "error": { + "message": "Guardrail tripwire triggered", + "data": {"guardrail": "guardrail_function"}, + }, + "data": {"name": "test", "handoffs": [], "tools": [], "output_type": "str"}, + "children": [ + { + "type": "guardrail", + "data": {"name": "guardrail_function", "triggered": True}, + } + ], + } + ], + } + ] + ) + spans = fetch_ordered_spans() assert len(spans) == 2, ( f"should have 1 agent, 1 guardrail, got {len(spans)} with data: " diff --git a/tests/test_tracing_errors_streamed.py b/tests/test_tracing_errors_streamed.py index 00f440e..74cda2d 100644 --- a/tests/test_tracing_errors_streamed.py +++ b/tests/test_tracing_errors_streamed.py @@ -5,6 +5,7 @@ import json from typing import Any import pytest +from inline_snapshot import snapshot from typing_extensions import TypedDict from agents import ( @@ -32,7 +33,7 @@ from .test_responses import ( get_handoff_tool_call, get_text_message, ) -from .testing_processor import fetch_ordered_spans, fetch_traces +from .testing_processor import fetch_normalized_spans, fetch_ordered_spans, fetch_traces @pytest.mark.asyncio @@ -52,6 +53,35 @@ async def test_single_turn_model_error(): traces = fetch_traces() assert len(traces) == 1, f"Expected 1 trace, got {len(traces)}" + assert fetch_normalized_spans() == snapshot( + [ + { + "workflow_name": "Agent workflow", + "children": [ + { + "type": "agent", + "error": {"message": "Error in agent run", "data": {"error": "test error"}}, + "data": { + "name": "test_agent", + "handoffs": [], + "tools": [], + "output_type": "str", + }, + "children": [ + { + "type": "generation", + "error": { + "message": "Error", + "data": {"name": "ValueError", "message": "test error"}, + }, + } + ], + } + ], + } + ] + ) + spans = fetch_ordered_spans() assert len(spans) == 2, f"should have agent and generation spans, got {len(spans)}" @@ -89,6 +119,44 @@ async def test_multi_turn_no_handoffs(): traces = fetch_traces() assert len(traces) == 1, f"Expected 1 trace, got {len(traces)}" + assert fetch_normalized_spans() == snapshot( + [ + { + "workflow_name": "Agent workflow", + "children": [ + { + "type": "agent", + "error": {"message": "Error in agent run", "data": {"error": "test error"}}, + "data": { + "name": "test_agent", + "handoffs": [], + "tools": ["foo"], + "output_type": "str", + }, + "children": [ + {"type": "generation"}, + { + "type": "function", + "data": { + "name": "foo", + "input": '{"a": "b"}', + "output": "tool_result", + }, + }, + { + "type": "generation", + "error": { + "message": "Error", + "data": {"name": "ValueError", "message": "test error"}, + }, + }, + ], + } + ], + } + ] + ) + spans = fetch_ordered_spans() assert len(spans) == 4, ( f"should have agent, generation, tool, generation, got {len(spans)} with data: " @@ -121,6 +189,43 @@ async def test_tool_call_error(): traces = fetch_traces() assert len(traces) == 1, f"Expected 1 trace, got {len(traces)}" + assert fetch_normalized_spans() == snapshot( + [ + { + "workflow_name": "Agent workflow", + "children": [ + { + "type": "agent", + "error": { + "message": "Error in agent run", + "data": {"error": "Invalid JSON input for tool foo: bad_json"}, + }, + "data": { + "name": "test_agent", + "handoffs": [], + "tools": ["foo"], + "output_type": "str", + }, + "children": [ + {"type": "generation"}, + { + "type": "function", + "error": { + "message": "Error running tool", + "data": { + "tool_name": "foo", + "error": "Invalid JSON input for tool foo: bad_json", + }, + }, + "data": {"name": "foo", "input": "bad_json"}, + }, + ], + } + ], + } + ] + ) + spans = fetch_ordered_spans() assert len(spans) == 3, ( f"should have agent, generation, tool spans, got {len(spans)} with data: " @@ -173,6 +278,43 @@ async def test_multiple_handoff_doesnt_error(): traces = fetch_traces() assert len(traces) == 1, f"Expected 1 trace, got {len(traces)}" + assert fetch_normalized_spans() == snapshot( + [ + { + "workflow_name": "Agent workflow", + "children": [ + { + "type": "agent", + "data": { + "name": "test", + "handoffs": ["test", "test"], + "tools": ["some_function"], + "output_type": "str", + }, + "children": [ + {"type": "generation"}, + { + "type": "function", + "data": { + "name": "some_function", + "input": '{"a": "b"}', + "output": "result", + }, + }, + {"type": "generation"}, + {"type": "handoff", "data": {"from_agent": "test", "to_agent": "test"}}, + ], + }, + { + "type": "agent", + "data": {"name": "test", "handoffs": [], "tools": [], "output_type": "str"}, + "children": [{"type": "generation"}], + }, + ], + } + ] + ) + spans = fetch_ordered_spans() assert len(spans) == 7, ( f"should have 2 agent, 1 function, 3 generation, 1 handoff, got {len(spans)} with data: " @@ -211,6 +353,21 @@ async def test_multiple_final_output_no_error(): traces = fetch_traces() assert len(traces) == 1, f"Expected 1 trace, got {len(traces)}" + assert fetch_normalized_spans() == snapshot( + [ + { + "workflow_name": "Agent workflow", + "children": [ + { + "type": "agent", + "data": {"name": "test", "handoffs": [], "tools": [], "output_type": "Foo"}, + "children": [{"type": "generation"}], + } + ], + } + ] + ) + spans = fetch_ordered_spans() assert len(spans) == 2, ( f"should have 1 agent, 1 generation, got {len(spans)} with data: " @@ -271,12 +428,152 @@ async def test_handoffs_lead_to_correct_agent_spans(): traces = fetch_traces() assert len(traces) == 1, f"Expected 1 trace, got {len(traces)}" + assert fetch_normalized_spans() == snapshot( + [ + { + "workflow_name": "Agent workflow", + "children": [ + { + "type": "agent", + "data": { + "name": "test_agent_3", + "handoffs": ["test_agent_1", "test_agent_2"], + "tools": ["some_function"], + "output_type": "str", + }, + "children": [ + {"type": "generation"}, + { + "type": "function", + "data": { + "name": "some_function", + "input": '{"a": "b"}', + "output": "result", + }, + }, + {"type": "generation"}, + { + "type": "handoff", + "data": {"from_agent": "test_agent_3", "to_agent": "test_agent_1"}, + }, + ], + }, + { + "type": "agent", + "data": { + "name": "test_agent_1", + "handoffs": ["test_agent_3"], + "tools": ["some_function"], + "output_type": "str", + }, + "children": [ + {"type": "generation"}, + { + "type": "function", + "data": { + "name": "some_function", + "input": '{"a": "b"}', + "output": "result", + }, + }, + {"type": "generation"}, + { + "type": "handoff", + "data": {"from_agent": "test_agent_1", "to_agent": "test_agent_3"}, + }, + ], + }, + { + "type": "agent", + "data": { + "name": "test_agent_3", + "handoffs": ["test_agent_1", "test_agent_2"], + "tools": ["some_function"], + "output_type": "str", + }, + "children": [{"type": "generation"}], + }, + ], + } + ] + ) + spans = fetch_ordered_spans() assert len(spans) == 12, ( f"should have 3 agents, 2 function, 5 generation, 2 handoff, got {len(spans)} with data: " f"{[x.span_data for x in spans]}" ) + assert fetch_normalized_spans() == snapshot( + [ + { + "workflow_name": "Agent workflow", + "children": [ + { + "type": "agent", + "data": { + "name": "test_agent_3", + "handoffs": ["test_agent_1", "test_agent_2"], + "tools": ["some_function"], + "output_type": "str", + }, + "children": [ + {"type": "generation"}, + { + "type": "function", + "data": { + "name": "some_function", + "input": '{"a": "b"}', + "output": "result", + }, + }, + {"type": "generation"}, + { + "type": "handoff", + "data": {"from_agent": "test_agent_3", "to_agent": "test_agent_1"}, + }, + ], + }, + { + "type": "agent", + "data": { + "name": "test_agent_1", + "handoffs": ["test_agent_3"], + "tools": ["some_function"], + "output_type": "str", + }, + "children": [ + {"type": "generation"}, + { + "type": "function", + "data": { + "name": "some_function", + "input": '{"a": "b"}', + "output": "result", + }, + }, + {"type": "generation"}, + { + "type": "handoff", + "data": {"from_agent": "test_agent_1", "to_agent": "test_agent_3"}, + }, + ], + }, + { + "type": "agent", + "data": { + "name": "test_agent_3", + "handoffs": ["test_agent_1", "test_agent_2"], + "tools": ["some_function"], + "output_type": "str", + }, + "children": [{"type": "generation"}], + }, + ], + } + ] + ) + @pytest.mark.asyncio async def test_max_turns_exceeded(): @@ -307,6 +604,38 @@ async def test_max_turns_exceeded(): traces = fetch_traces() assert len(traces) == 1, f"Expected 1 trace, got {len(traces)}" + assert fetch_normalized_spans() == snapshot( + [ + { + "workflow_name": "Agent workflow", + "children": [ + { + "type": "agent", + "error": {"message": "Max turns exceeded", "data": {"max_turns": 2}}, + "data": { + "name": "test", + "handoffs": [], + "tools": ["foo"], + "output_type": "Foo", + }, + "children": [ + {"type": "generation"}, + { + "type": "function", + "data": {"name": "foo", "input": "", "output": "result"}, + }, + {"type": "generation"}, + { + "type": "function", + "data": {"name": "foo", "input": "", "output": "result"}, + }, + ], + } + ], + } + ] + ) + spans = fetch_ordered_spans() assert len(spans) == 5, ( f"should have 1 agent, 2 generations, 2 function calls, got " @@ -347,6 +676,33 @@ async def test_input_guardrail_error(): traces = fetch_traces() assert len(traces) == 1, f"Expected 1 trace, got {len(traces)}" + assert fetch_normalized_spans() == snapshot( + [ + { + "workflow_name": "Agent workflow", + "children": [ + { + "type": "agent", + "error": { + "message": "Guardrail tripwire triggered", + "data": { + "guardrail": "input_guardrail_function", + "type": "input_guardrail", + }, + }, + "data": {"name": "test", "handoffs": [], "tools": [], "output_type": "str"}, + "children": [ + { + "type": "guardrail", + "data": {"name": "input_guardrail_function", "triggered": True}, + } + ], + } + ], + } + ] + ) + spans = fetch_ordered_spans() assert len(spans) == 2, ( f"should have 1 agent, 1 guardrail, got {len(spans)} with data: " @@ -387,6 +743,30 @@ async def test_output_guardrail_error(): traces = fetch_traces() assert len(traces) == 1, f"Expected 1 trace, got {len(traces)}" + assert fetch_normalized_spans() == snapshot( + [ + { + "workflow_name": "Agent workflow", + "children": [ + { + "type": "agent", + "error": { + "message": "Guardrail tripwire triggered", + "data": {"guardrail": "output_guardrail_function"}, + }, + "data": {"name": "test", "handoffs": [], "tools": [], "output_type": "str"}, + "children": [ + { + "type": "guardrail", + "data": {"name": "output_guardrail_function", "triggered": True}, + } + ], + } + ], + } + ] + ) + spans = fetch_ordered_spans() assert len(spans) == 2, ( f"should have 1 agent, 1 guardrail, got {len(spans)} with data: " diff --git a/tests/testing_processor.py b/tests/testing_processor.py index 258a08d..e5cb6f5 100644 --- a/tests/testing_processor.py +++ b/tests/testing_processor.py @@ -1,6 +1,7 @@ from __future__ import annotations import threading +from datetime import datetime from typing import Any, Literal from agents.tracing import Span, Trace, TracingProcessor @@ -77,3 +78,35 @@ def fetch_traces() -> list[Trace]: def fetch_events() -> list[TestSpanProcessorEvent]: return SPAN_PROCESSOR_TESTING._events + + +def fetch_normalized_spans(): + nodes: dict[tuple[str, str | None], dict[str, Any]] = {} + traces = [] + for trace_obj in fetch_traces(): + trace = trace_obj.export() + assert trace.pop("object") == "trace" + assert trace.pop("id").startswith("trace_") + trace = {k: v for k, v in trace.items() if v is not None} + nodes[(trace_obj.trace_id, None)] = trace + traces.append(trace) + + if not traces: + assert not fetch_ordered_spans() + + for span_obj in fetch_ordered_spans(): + span = span_obj.export() + assert span.pop("object") == "trace.span" + assert span.pop("id").startswith("span_") + assert datetime.fromisoformat(span.pop("started_at")) + assert datetime.fromisoformat(span.pop("ended_at")) + parent_id = span.pop("parent_id") + assert "type" not in span + span_data = span.pop("span_data") + span = {"type": span_data.pop("type")} | {k: v for k, v in span.items() if v is not None} + span_data = {k: v for k, v in span_data.items() if v is not None} + if span_data: + span["data"] = span_data + nodes[(span_obj.trace_id, span_obj.span_id)] = span + nodes[(span.pop("trace_id"), parent_id)].setdefault("children", []).append(span) + return traces diff --git a/uv.lock b/uv.lock index 2bceea7..fd28b2b 100644 --- a/uv.lock +++ b/uv.lock @@ -26,6 +26,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/46/eb/e7f063ad1fec6b3178a3cd82d1a3c4de82cccf283fc42746168188e1cdd5/anyio-4.8.0-py3-none-any.whl", hash = "sha256:b5011f270ab5eb0abf13385f851315585cc37ef330dd88e27ec3d34d651fd47a", size = 96041 }, ] +[[package]] +name = "asttokens" +version = "3.0.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/4a/e7/82da0a03e7ba5141f05cce0d302e6eed121ae055e0456ca228bf693984bc/asttokens-3.0.0.tar.gz", hash = "sha256:0dcd8baa8d62b0c1d118b399b2ddba3c4aff271d0d7a9e0d4c1681c79035bbc7", size = 61978 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/25/8a/c46dcc25341b5bce5472c718902eb3d38600a903b14fa6aeecef3f21a46f/asttokens-3.0.0-py3-none-any.whl", hash = "sha256:e3078351a059199dd5138cb1c706e6430c05eff2ff136af5eb4790f9d28932e2", size = 26918 }, +] + [[package]] name = "babel" version = "2.17.0" @@ -240,6 +249,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/02/cc/b7e31358aac6ed1ef2bb790a9746ac2c69bcb3c8588b41616914eb106eaf/exceptiongroup-1.2.2-py3-none-any.whl", hash = "sha256:3111b9d131c238bec2f8f516e123e14ba243563fb135d3fe885990585aa7795b", size = 16453 }, ] +[[package]] +name = "executing" +version = "2.2.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/91/50/a9d80c47ff289c611ff12e63f7c5d13942c65d68125160cefd768c73e6e4/executing-2.2.0.tar.gz", hash = "sha256:5d108c028108fe2551d1a7b2e8b713341e2cb4fc0aa7dcf966fa4327a5226755", size = 978693 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/7b/8f/c4d9bafc34ad7ad5d8dc16dd1347ee0e507a52c3adb6bfa8887e1c6a26ba/executing-2.2.0-py2.py3-none-any.whl", hash = "sha256:11387150cad388d62750327a53d3339fad4888b39a6fe233c3afbb54ecffd3aa", size = 26702 }, +] + [[package]] name = "ghp-import" version = "2.1.0" @@ -392,6 +410,21 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ef/a6/62565a6e1cf69e10f5727360368e451d4b7f58beeac6173dc9db836a5b46/iniconfig-2.0.0-py3-none-any.whl", hash = "sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374", size = 5892 }, ] +[[package]] +name = "inline-snapshot" +version = "0.20.5" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "asttokens" }, + { name = "executing" }, + { name = "rich" }, + { name = "tomli", marker = "python_full_version < '3.11'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/3b/95/9b85a63031c168dd1c479f8cfd5cae42d42d6ac41c18dd760a104bc87ddc/inline_snapshot-0.20.5.tar.gz", hash = "sha256:d8b67c6d533c0a3f566e72608144b54da65dc3da5d0dba4169b2c56b75530fb5", size = 92215 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d4/71/34e775bbf0bcf81d588d80a1df93437f937b0df9a841f246606a03fc5eff/inline_snapshot-0.20.5-py3-none-any.whl", hash = "sha256:3aa56acf5985d89f17ebd4df4aef00faacc49f10cdf4e6b42be701ffc9702b5a", size = 48071 }, +] + [[package]] name = "jinja2" version = "3.1.6" @@ -797,6 +830,7 @@ dependencies = [ [package.dev-dependencies] dev = [ { name = "coverage" }, + { name = "inline-snapshot" }, { name = "mkdocs" }, { name = "mkdocs-material" }, { name = "mkdocstrings", extra = ["python"] }, @@ -822,6 +856,7 @@ requires-dist = [ [package.metadata.requires-dev] dev = [ { name = "coverage", specifier = ">=7.6.12" }, + { name = "inline-snapshot", specifier = ">=0.20.5" }, { name = "mkdocs", specifier = ">=1.6.0" }, { name = "mkdocs-material", specifier = ">=9.6.0" }, { name = "mkdocstrings", extras = ["python"], specifier = ">=0.28.0" }, From 26828e5e6834300c32177c560acba24bd50f48d8 Mon Sep 17 00:00:00 2001 From: Carlos Souza Date: Thu, 13 Mar 2025 16:18:40 -0400 Subject: [PATCH 03/16] Fix typo on Agent documentation Argument name is not description but handoff_description --- src/agents/agent.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/agents/agent.py b/src/agents/agent.py index 61c0a89..eb39164 100644 --- a/src/agents/agent.py +++ b/src/agents/agent.py @@ -27,7 +27,7 @@ class Agent(Generic[TContext]): """An agent is an AI model configured with instructions, tools, guardrails, handoffs and more. We strongly recommend passing `instructions`, which is the "system prompt" for the agent. In - addition, you can pass `description`, which is a human-readable description of the agent, used + addition, you can pass `handoff_description`, which is a human-readable description of the agent, used when the agent is used inside tools/handoffs. Agents are generic on the context type. The context is a (mutable) object you create. It is From 792cdea4648f10e3457649bd817dc049f19d0424 Mon Sep 17 00:00:00 2001 From: Kento Yamanaka Date: Thu, 13 Mar 2025 18:26:49 -0700 Subject: [PATCH 04/16] fix: use first_agent instead of second_agent for a task to generate random number --- examples/handoffs/message_filter.py | 4 ++-- examples/handoffs/message_filter_streaming.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/examples/handoffs/message_filter.py b/examples/handoffs/message_filter.py index 9dd56ef..b7fed6c 100644 --- a/examples/handoffs/message_filter.py +++ b/examples/handoffs/message_filter.py @@ -60,9 +60,9 @@ async def main(): print("Step 1 done") - # 2. Ask it to square a number + # 2. Ask it to generate a number result = await Runner.run( - second_agent, + first_agent, input=result.to_input_list() + [{"content": "Can you generate a random number between 0 and 100?", "role": "user"}], ) diff --git a/examples/handoffs/message_filter_streaming.py b/examples/handoffs/message_filter_streaming.py index 8d1b420..63cb1de 100644 --- a/examples/handoffs/message_filter_streaming.py +++ b/examples/handoffs/message_filter_streaming.py @@ -60,9 +60,9 @@ async def main(): print("Step 1 done") - # 2. Ask it to square a number + # 2. Ask it to generate a number result = await Runner.run( - second_agent, + first_agent, input=result.to_input_list() + [{"content": "Can you generate a random number between 0 and 100?", "role": "user"}], ) From 8540b1e65b5cc0ccbf994a2868932fbe62e7da79 Mon Sep 17 00:00:00 2001 From: CCM Date: Fri, 14 Mar 2025 19:14:26 +0800 Subject: [PATCH 05/16] fix typo in agent_lifecycle_example.py --- examples/basic/agent_lifecycle_example.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/basic/agent_lifecycle_example.py b/examples/basic/agent_lifecycle_example.py index bc0bbe4..29bb18c 100644 --- a/examples/basic/agent_lifecycle_example.py +++ b/examples/basic/agent_lifecycle_example.py @@ -74,7 +74,7 @@ multiply_agent = Agent( start_agent = Agent( name="Start Agent", - instructions="Generate a random number. If it's even, stop. If it's odd, hand off to the multipler agent.", + instructions="Generate a random number. If it's even, stop. If it's odd, hand off to the multiply agent.", tools=[random_number], output_type=FinalResult, handoffs=[multiply_agent], From f0ef7d71ebe4d6a1c122eb46a4292f4b8103b5a3 Mon Sep 17 00:00:00 2001 From: Alexander Song Date: Fri, 14 Mar 2025 17:50:10 -0700 Subject: [PATCH 06/16] docs: add arize-phoenix to tracing documentation --- docs/tracing.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/tracing.md b/docs/tracing.md index d7d0a65..9831ac0 100644 --- a/docs/tracing.md +++ b/docs/tracing.md @@ -90,8 +90,9 @@ To customize this default setup, to send traces to alternative or additional bac External trace processors include: +- [Arize-Phoenix](https://docs.arize.com/phoenix/tracing/integrations-tracing/openai-agents-sdk) - [Braintrust](https://braintrust.dev/docs/guides/traces/integrations#openai-agents-sdk) - [Pydantic Logfire](https://logfire.pydantic.dev/docs/integrations/llms/openai/#openai-agents) - [AgentOps](https://docs.agentops.ai/v1/integrations/agentssdk) -- [Scorecard](https://docs.scorecard.io/docs/documentation/features/tracing#openai-agents-sdk-integration)) +- [Scorecard](https://docs.scorecard.io/docs/documentation/features/tracing#openai-agents-sdk-integration) - [Keywords AI](https://docs.keywordsai.co/integration/development-frameworks/openai-agent) From 09d70c074daf210fbb1a3acd31bc2ac048f9ba26 Mon Sep 17 00:00:00 2001 From: Rohan Mehta Date: Sun, 16 Mar 2025 18:48:45 -0400 Subject: [PATCH 07/16] utils directory --- examples/basic/hello_world_jupyter.py | 2 +- src/agents/_run_impl.py | 24 +++++------ src/agents/_utils.py | 61 --------------------------- src/agents/agent.py | 6 +-- src/agents/agent_output.py | 8 ++-- src/agents/guardrail.py | 2 +- src/agents/handoffs.py | 8 ++-- src/agents/run.py | 20 ++++----- src/agents/tool.py | 7 +-- src/agents/util/__init__.py | 0 src/agents/util/_coro.py | 2 + src/agents/util/_error_tracing.py | 16 +++++++ src/agents/util/_json.py | 31 ++++++++++++++ src/agents/util/_transforms.py | 11 +++++ src/agents/util/_types.py | 7 +++ tests/test_function_tool_decorator.py | 3 +- tests/test_output_tool.py | 6 ++- 17 files changed, 111 insertions(+), 103 deletions(-) delete mode 100644 src/agents/_utils.py create mode 100644 src/agents/util/__init__.py create mode 100644 src/agents/util/_coro.py create mode 100644 src/agents/util/_error_tracing.py create mode 100644 src/agents/util/_json.py create mode 100644 src/agents/util/_transforms.py create mode 100644 src/agents/util/_types.py diff --git a/examples/basic/hello_world_jupyter.py b/examples/basic/hello_world_jupyter.py index bb8f14c..c929a7c 100644 --- a/examples/basic/hello_world_jupyter.py +++ b/examples/basic/hello_world_jupyter.py @@ -3,7 +3,7 @@ from agents import Agent, Runner agent = Agent(name="Assistant", instructions="You are a helpful assistant") # Intended for Jupyter notebooks where there's an existing event loop -result = await Runner.run(agent, "Write a haiku about recursion in programming.") # type: ignore[top-level-await] # noqa: F704 +result = await Runner.run(agent, "Write a haiku about recursion in programming.") # type: ignore[top-level-await] # noqa: F704 print(result.final_output) # Code within code loops, diff --git a/src/agents/_run_impl.py b/src/agents/_run_impl.py index 2c84950..c0c0ebd 100644 --- a/src/agents/_run_impl.py +++ b/src/agents/_run_impl.py @@ -25,7 +25,6 @@ from openai.types.responses.response_computer_tool_call import ( from openai.types.responses.response_input_param import ComputerCallOutput from openai.types.responses.response_reasoning_item import ResponseReasoningItem -from . import _utils from .agent import Agent from .agent_output import AgentOutputSchema from .computer import AsyncComputer, Computer @@ -59,6 +58,7 @@ from .tracing import ( handoff_span, trace, ) +from .util import _coro, _error_tracing if TYPE_CHECKING: from .run import RunConfig @@ -293,7 +293,7 @@ class RunImpl: elif isinstance(output, ResponseComputerToolCall): items.append(ToolCallItem(raw_item=output, agent=agent)) if not computer_tool: - _utils.attach_error_to_current_span( + _error_tracing.attach_error_to_current_span( SpanError( message="Computer tool not found", data={}, @@ -324,7 +324,7 @@ class RunImpl: # Regular function tool call else: if output.name not in function_map: - _utils.attach_error_to_current_span( + _error_tracing.attach_error_to_current_span( SpanError( message="Tool not found", data={"tool_name": output.name}, @@ -368,7 +368,7 @@ class RunImpl: ( agent.hooks.on_tool_start(context_wrapper, agent, func_tool) if agent.hooks - else _utils.noop_coroutine() + else _coro.noop_coroutine() ), func_tool.on_invoke_tool(context_wrapper, tool_call.arguments), ) @@ -378,11 +378,11 @@ class RunImpl: ( agent.hooks.on_tool_end(context_wrapper, agent, func_tool, result) if agent.hooks - else _utils.noop_coroutine() + else _coro.noop_coroutine() ), ) except Exception as e: - _utils.attach_error_to_current_span( + _error_tracing.attach_error_to_current_span( SpanError( message="Error running tool", data={"tool_name": func_tool.name, "error": str(e)}, @@ -502,7 +502,7 @@ class RunImpl: source=agent, ) if agent.hooks - else _utils.noop_coroutine() + else _coro.noop_coroutine() ), ) @@ -520,7 +520,7 @@ class RunImpl: new_items=tuple(new_step_items), ) if not callable(input_filter): - _utils.attach_error_to_span( + _error_tracing.attach_error_to_span( span_handoff, SpanError( message="Invalid input filter", @@ -530,7 +530,7 @@ class RunImpl: raise UserError(f"Invalid input filter: {input_filter}") filtered = input_filter(handoff_input_data) if not isinstance(filtered, HandoffInputData): - _utils.attach_error_to_span( + _error_tracing.attach_error_to_span( span_handoff, SpanError( message="Invalid input filter result", @@ -591,7 +591,7 @@ class RunImpl: hooks.on_agent_end(context_wrapper, agent, final_output), agent.hooks.on_end(context_wrapper, agent, final_output) if agent.hooks - else _utils.noop_coroutine(), + else _coro.noop_coroutine(), ) @classmethod @@ -706,7 +706,7 @@ class ComputerAction: ( agent.hooks.on_tool_start(context_wrapper, agent, action.computer_tool) if agent.hooks - else _utils.noop_coroutine() + else _coro.noop_coroutine() ), output_func, ) @@ -716,7 +716,7 @@ class ComputerAction: ( agent.hooks.on_tool_end(context_wrapper, agent, action.computer_tool, output) if agent.hooks - else _utils.noop_coroutine() + else _coro.noop_coroutine() ), ) diff --git a/src/agents/_utils.py b/src/agents/_utils.py deleted file mode 100644 index 2a0293a..0000000 --- a/src/agents/_utils.py +++ /dev/null @@ -1,61 +0,0 @@ -from __future__ import annotations - -import re -from collections.abc import Awaitable -from typing import Any, Literal, Union - -from pydantic import TypeAdapter, ValidationError -from typing_extensions import TypeVar - -from .exceptions import ModelBehaviorError -from .logger import logger -from .tracing import Span, SpanError, get_current_span - -T = TypeVar("T") - -MaybeAwaitable = Union[Awaitable[T], T] - - -def transform_string_function_style(name: str) -> str: - # Replace spaces with underscores - name = name.replace(" ", "_") - - # Replace non-alphanumeric characters with underscores - name = re.sub(r"[^a-zA-Z0-9]", "_", name) - - return name.lower() - - -def validate_json(json_str: str, type_adapter: TypeAdapter[T], partial: bool) -> T: - partial_setting: bool | Literal["off", "on", "trailing-strings"] = ( - "trailing-strings" if partial else False - ) - try: - validated = type_adapter.validate_json(json_str, experimental_allow_partial=partial_setting) - return validated - except ValidationError as e: - attach_error_to_current_span( - SpanError( - message="Invalid JSON provided", - data={}, - ) - ) - raise ModelBehaviorError( - f"Invalid JSON when parsing {json_str} for {type_adapter}; {e}" - ) from e - - -def attach_error_to_span(span: Span[Any], error: SpanError) -> None: - span.set_error(error) - - -def attach_error_to_current_span(error: SpanError) -> None: - span = get_current_span() - if span: - attach_error_to_span(span, error) - else: - logger.warning(f"No span to add error {error} to") - - -async def noop_coroutine() -> None: - pass diff --git a/src/agents/agent.py b/src/agents/agent.py index 61c0a89..84d0ae9 100644 --- a/src/agents/agent.py +++ b/src/agents/agent.py @@ -6,8 +6,6 @@ from collections.abc import Awaitable from dataclasses import dataclass, field from typing import TYPE_CHECKING, Any, Callable, Generic, cast -from . import _utils -from ._utils import MaybeAwaitable from .guardrail import InputGuardrail, OutputGuardrail from .handoffs import Handoff from .items import ItemHelpers @@ -16,6 +14,8 @@ from .model_settings import ModelSettings from .models.interface import Model from .run_context import RunContextWrapper, TContext from .tool import Tool, function_tool +from .util import _transforms +from .util._types import MaybeAwaitable if TYPE_CHECKING: from .lifecycle import AgentHooks @@ -126,7 +126,7 @@ class Agent(Generic[TContext]): """ @function_tool( - name_override=tool_name or _utils.transform_string_function_style(self.name), + name_override=tool_name or _transforms.transform_string_function_style(self.name), description_override=tool_description or "", ) async def run_agent(context: RunContextWrapper, input: str) -> str: diff --git a/src/agents/agent_output.py b/src/agents/agent_output.py index 0c28800..3262c57 100644 --- a/src/agents/agent_output.py +++ b/src/agents/agent_output.py @@ -4,10 +4,10 @@ from typing import Any from pydantic import BaseModel, TypeAdapter from typing_extensions import TypedDict, get_args, get_origin -from . import _utils from .exceptions import ModelBehaviorError, UserError from .strict_schema import ensure_strict_json_schema from .tracing import SpanError +from .util import _error_tracing, _json _WRAPPER_DICT_KEY = "response" @@ -87,10 +87,10 @@ class AgentOutputSchema: """Validate a JSON string against the output type. Returns the validated object, or raises a `ModelBehaviorError` if the JSON is invalid. """ - validated = _utils.validate_json(json_str, self._type_adapter, partial) + validated = _json.validate_json(json_str, self._type_adapter, partial) if self._is_wrapped: if not isinstance(validated, dict): - _utils.attach_error_to_current_span( + _error_tracing.attach_error_to_current_span( SpanError( message="Invalid JSON", data={"details": f"Expected a dict, got {type(validated)}"}, @@ -101,7 +101,7 @@ class AgentOutputSchema: ) if _WRAPPER_DICT_KEY not in validated: - _utils.attach_error_to_current_span( + _error_tracing.attach_error_to_current_span( SpanError( message="Invalid JSON", data={"details": f"Could not find key {_WRAPPER_DICT_KEY} in JSON"}, diff --git a/src/agents/guardrail.py b/src/agents/guardrail.py index 5bebcd6..a96f0f7 100644 --- a/src/agents/guardrail.py +++ b/src/agents/guardrail.py @@ -7,10 +7,10 @@ from typing import TYPE_CHECKING, Any, Callable, Generic, Union, overload from typing_extensions import TypeVar -from ._utils import MaybeAwaitable from .exceptions import UserError from .items import TResponseInputItem from .run_context import RunContextWrapper, TContext +from .util._types import MaybeAwaitable if TYPE_CHECKING: from .agent import Agent diff --git a/src/agents/handoffs.py b/src/agents/handoffs.py index ac15740..686191f 100644 --- a/src/agents/handoffs.py +++ b/src/agents/handoffs.py @@ -8,12 +8,12 @@ from typing import TYPE_CHECKING, Any, Callable, Generic, cast, overload from pydantic import TypeAdapter from typing_extensions import TypeAlias, TypeVar -from . import _utils from .exceptions import ModelBehaviorError, UserError from .items import RunItem, TResponseInputItem from .run_context import RunContextWrapper, TContext from .strict_schema import ensure_strict_json_schema from .tracing.spans import SpanError +from .util import _error_tracing, _json, _transforms if TYPE_CHECKING: from .agent import Agent @@ -104,7 +104,7 @@ class Handoff(Generic[TContext]): @classmethod def default_tool_name(cls, agent: Agent[Any]) -> str: - return _utils.transform_string_function_style(f"transfer_to_{agent.name}") + return _transforms.transform_string_function_style(f"transfer_to_{agent.name}") @classmethod def default_tool_description(cls, agent: Agent[Any]) -> str: @@ -192,7 +192,7 @@ def handoff( ) -> Agent[Any]: if input_type is not None and type_adapter is not None: if input_json is None: - _utils.attach_error_to_current_span( + _error_tracing.attach_error_to_current_span( SpanError( message="Handoff function expected non-null input, but got None", data={"details": "input_json is None"}, @@ -200,7 +200,7 @@ def handoff( ) raise ModelBehaviorError("Handoff function expected non-null input, but got None") - validated_input = _utils.validate_json( + validated_input = _json.validate_json( json_str=input_json, type_adapter=type_adapter, partial=False, diff --git a/src/agents/run.py b/src/agents/run.py index dfff7e3..934400f 100644 --- a/src/agents/run.py +++ b/src/agents/run.py @@ -7,7 +7,6 @@ from typing import Any, cast from openai.types.responses import ResponseCompletedEvent -from . import Model, _utils from ._run_impl import ( NextStepFinalOutput, NextStepHandoff, @@ -33,7 +32,7 @@ from .items import ItemHelpers, ModelResponse, RunItem, TResponseInputItem from .lifecycle import RunHooks from .logger import logger from .model_settings import ModelSettings -from .models.interface import ModelProvider +from .models.interface import Model, ModelProvider from .models.openai_provider import OpenAIProvider from .result import RunResult, RunResultStreaming from .run_context import RunContextWrapper, TContext @@ -41,6 +40,7 @@ from .stream_events import AgentUpdatedStreamEvent, RawResponsesStreamEvent from .tracing import Span, SpanError, agent_span, get_current_trace, trace from .tracing.span_data import AgentSpanData from .usage import Usage +from .util import _coro, _error_tracing DEFAULT_MAX_TURNS = 10 @@ -193,7 +193,7 @@ class Runner: current_turn += 1 if current_turn > max_turns: - _utils.attach_error_to_span( + _error_tracing.attach_error_to_span( current_span, SpanError( message="Max turns exceeded", @@ -447,7 +447,7 @@ class Runner: for done in asyncio.as_completed(guardrail_tasks): result = await done if result.output.tripwire_triggered: - _utils.attach_error_to_span( + _error_tracing.attach_error_to_span( parent_span, SpanError( message="Guardrail tripwire triggered", @@ -511,7 +511,7 @@ class Runner: streamed_result.current_turn = current_turn if current_turn > max_turns: - _utils.attach_error_to_span( + _error_tracing.attach_error_to_span( current_span, SpanError( message="Max turns exceeded", @@ -583,7 +583,7 @@ class Runner: pass except Exception as e: if current_span: - _utils.attach_error_to_span( + _error_tracing.attach_error_to_span( current_span, SpanError( message="Error in agent run", @@ -615,7 +615,7 @@ class Runner: ( agent.hooks.on_start(context_wrapper, agent) if agent.hooks - else _utils.noop_coroutine() + else _coro.noop_coroutine() ), ) @@ -705,7 +705,7 @@ class Runner: ( agent.hooks.on_start(context_wrapper, agent) if agent.hooks - else _utils.noop_coroutine() + else _coro.noop_coroutine() ), ) @@ -796,7 +796,7 @@ class Runner: # Cancel all guardrail tasks if a tripwire is triggered. for t in guardrail_tasks: t.cancel() - _utils.attach_error_to_current_span( + _error_tracing.attach_error_to_current_span( SpanError( message="Guardrail tripwire triggered", data={"guardrail": result.guardrail.get_name()}, @@ -834,7 +834,7 @@ class Runner: # Cancel all guardrail tasks if a tripwire is triggered. for t in guardrail_tasks: t.cancel() - _utils.attach_error_to_current_span( + _error_tracing.attach_error_to_current_span( SpanError( message="Guardrail tripwire triggered", data={"guardrail": result.guardrail.get_name()}, diff --git a/src/agents/tool.py b/src/agents/tool.py index cbe8794..0baf2c0 100644 --- a/src/agents/tool.py +++ b/src/agents/tool.py @@ -11,14 +11,15 @@ from openai.types.responses.web_search_tool_param import UserLocation from pydantic import ValidationError from typing_extensions import Concatenate, ParamSpec -from . import _debug, _utils -from ._utils import MaybeAwaitable +from . import _debug from .computer import AsyncComputer, Computer from .exceptions import ModelBehaviorError from .function_schema import DocstringStyle, function_schema from .logger import logger from .run_context import RunContextWrapper from .tracing import SpanError +from .util import _error_tracing +from .util._types import MaybeAwaitable ToolParams = ParamSpec("ToolParams") @@ -263,7 +264,7 @@ def function_tool( if inspect.isawaitable(result): return await result - _utils.attach_error_to_current_span( + _error_tracing.attach_error_to_current_span( SpanError( message="Error running tool (non-fatal)", data={ diff --git a/src/agents/util/__init__.py b/src/agents/util/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/agents/util/_coro.py b/src/agents/util/_coro.py new file mode 100644 index 0000000..647ab86 --- /dev/null +++ b/src/agents/util/_coro.py @@ -0,0 +1,2 @@ +async def noop_coroutine() -> None: + pass diff --git a/src/agents/util/_error_tracing.py b/src/agents/util/_error_tracing.py new file mode 100644 index 0000000..09dbb1d --- /dev/null +++ b/src/agents/util/_error_tracing.py @@ -0,0 +1,16 @@ +from typing import Any + +from ..logger import logger +from ..tracing import Span, SpanError, get_current_span + + +def attach_error_to_span(span: Span[Any], error: SpanError) -> None: + span.set_error(error) + + +def attach_error_to_current_span(error: SpanError) -> None: + span = get_current_span() + if span: + attach_error_to_span(span, error) + else: + logger.warning(f"No span to add error {error} to") diff --git a/src/agents/util/_json.py b/src/agents/util/_json.py new file mode 100644 index 0000000..1e081f6 --- /dev/null +++ b/src/agents/util/_json.py @@ -0,0 +1,31 @@ +from __future__ import annotations + +from typing import Literal + +from pydantic import TypeAdapter, ValidationError +from typing_extensions import TypeVar + +from ..exceptions import ModelBehaviorError +from ..tracing import SpanError +from ._error_tracing import attach_error_to_current_span + +T = TypeVar("T") + + +def validate_json(json_str: str, type_adapter: TypeAdapter[T], partial: bool) -> T: + partial_setting: bool | Literal["off", "on", "trailing-strings"] = ( + "trailing-strings" if partial else False + ) + try: + validated = type_adapter.validate_json(json_str, experimental_allow_partial=partial_setting) + return validated + except ValidationError as e: + attach_error_to_current_span( + SpanError( + message="Invalid JSON provided", + data={}, + ) + ) + raise ModelBehaviorError( + f"Invalid JSON when parsing {json_str} for {type_adapter}; {e}" + ) from e diff --git a/src/agents/util/_transforms.py b/src/agents/util/_transforms.py new file mode 100644 index 0000000..b303074 --- /dev/null +++ b/src/agents/util/_transforms.py @@ -0,0 +1,11 @@ +import re + + +def transform_string_function_style(name: str) -> str: + # Replace spaces with underscores + name = name.replace(" ", "_") + + # Replace non-alphanumeric characters with underscores + name = re.sub(r"[^a-zA-Z0-9]", "_", name) + + return name.lower() diff --git a/src/agents/util/_types.py b/src/agents/util/_types.py new file mode 100644 index 0000000..8571a69 --- /dev/null +++ b/src/agents/util/_types.py @@ -0,0 +1,7 @@ +from collections.abc import Awaitable +from typing import Union + +from typing_extensions import TypeVar + +T = TypeVar("T") +MaybeAwaitable = Union[Awaitable[T], T] diff --git a/tests/test_function_tool_decorator.py b/tests/test_function_tool_decorator.py index b581660..f146ec7 100644 --- a/tests/test_function_tool_decorator.py +++ b/tests/test_function_tool_decorator.py @@ -175,12 +175,11 @@ def multiple_optional_params_function( return f"{x}_{y}_{z}" - @pytest.mark.asyncio async def test_multiple_optional_params_function(): tool = multiple_optional_params_function - input_data: dict[str,Any] = {} + input_data: dict[str, Any] = {} output = await tool.on_invoke_tool(ctx_wrapper(), json.dumps(input_data)) assert output == "42_hello_no_z" diff --git a/tests/test_output_tool.py b/tests/test_output_tool.py index 31ac984..86c4b3b 100644 --- a/tests/test_output_tool.py +++ b/tests/test_output_tool.py @@ -4,8 +4,9 @@ import pytest from pydantic import BaseModel from typing_extensions import TypedDict -from agents import Agent, AgentOutputSchema, ModelBehaviorError, Runner, UserError, _utils +from agents import Agent, AgentOutputSchema, ModelBehaviorError, Runner, UserError from agents.agent_output import _WRAPPER_DICT_KEY +from agents.util import _json def test_plain_text_output(): @@ -77,7 +78,7 @@ def test_bad_json_raises_error(mocker): output_schema = Runner._get_output_schema(agent) assert output_schema, "Should have an output tool config with a structured output type" - mock_validate_json = mocker.patch.object(_utils, "validate_json") + mock_validate_json = mocker.patch.object(_json, "validate_json") mock_validate_json.return_value = ["foo"] with pytest.raises(ModelBehaviorError): @@ -111,3 +112,4 @@ def test_setting_strict_false_works(): output_wrapper = AgentOutputSchema(output_type=Foo, strict_json_schema=False) assert not output_wrapper.strict_json_schema assert output_wrapper.json_schema() == Foo.model_json_schema() + assert output_wrapper.json_schema() == Foo.model_json_schema() From 54a48a39673a3359208d9ba5ee550928348c1291 Mon Sep 17 00:00:00 2001 From: heartkilla Date: Mon, 17 Mar 2025 14:56:43 +0900 Subject: [PATCH 08/16] fix reasoning order in examples --- examples/agent_patterns/input_guardrails.py | 2 +- examples/agent_patterns/llm_as_a_judge.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/agent_patterns/input_guardrails.py b/examples/agent_patterns/input_guardrails.py index 8c8e182..1545355 100644 --- a/examples/agent_patterns/input_guardrails.py +++ b/examples/agent_patterns/input_guardrails.py @@ -30,8 +30,8 @@ If the guardrail trips, we'll respond with a refusal message. ### 1. An agent-based guardrail that is triggered if the user is asking to do math homework class MathHomeworkOutput(BaseModel): - is_math_homework: bool reasoning: str + is_math_homework: bool guardrail_agent = Agent( diff --git a/examples/agent_patterns/llm_as_a_judge.py b/examples/agent_patterns/llm_as_a_judge.py index d13a67c..5a46cc3 100644 --- a/examples/agent_patterns/llm_as_a_judge.py +++ b/examples/agent_patterns/llm_as_a_judge.py @@ -23,8 +23,8 @@ story_outline_generator = Agent( @dataclass class EvaluationFeedback: - score: Literal["pass", "needs_improvement", "fail"] feedback: str + score: Literal["pass", "needs_improvement", "fail"] evaluator = Agent[None]( From 1f58528f1c905c7f4a602078173a64674abc4ebc Mon Sep 17 00:00:00 2001 From: Vincenzo Domina <54762917+vincenzodomina@users.noreply.github.com> Date: Mon, 17 Mar 2025 10:37:43 +0100 Subject: [PATCH 09/16] Add TracingProcessor export to __init__.py --- src/agents/__init__.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/agents/__init__.py b/src/agents/__init__.py index a2d7f24..21a2f2a 100644 --- a/src/agents/__init__.py +++ b/src/agents/__init__.py @@ -73,6 +73,7 @@ from .tracing import ( SpanData, SpanError, Trace, + TracingProcessor, add_trace_processor, agent_span, custom_span, @@ -208,6 +209,7 @@ __all__ = [ "set_tracing_disabled", "trace", "Trace", + "TracingProcessor", "SpanError", "Span", "SpanData", From 64e263b61433193fd9d9121b0f35a7ccebb1991c Mon Sep 17 00:00:00 2001 From: Rohan Mehta Date: Mon, 17 Mar 2025 11:11:39 -0400 Subject: [PATCH 10/16] Pretty print result classes --- Makefile | 8 ++ pyproject.toml | 6 +- src/agents/result.py | 7 ++ src/agents/util/_pretty_print.py | 56 +++++++++ tests/README.md | 25 ++++ tests/test_pretty_print.py | 201 +++++++++++++++++++++++++++++++ uv.lock | 36 ++++++ 7 files changed, 338 insertions(+), 1 deletion(-) create mode 100644 src/agents/util/_pretty_print.py create mode 100644 tests/README.md create mode 100644 tests/test_pretty_print.py diff --git a/Makefile b/Makefile index 7dd9bbd..39899d8 100644 --- a/Makefile +++ b/Makefile @@ -18,6 +18,14 @@ mypy: tests: uv run pytest +.PHONY: snapshots-fix +snapshots-fix: + uv run pytest --inline-snapshot=fix + +.PHONY: snapshots-create +snapshots-create: + uv run pytest --inline-snapshot=create + .PHONY: old_version_tests old_version_tests: UV_PROJECT_ENVIRONMENT=.venv_39 uv run --python 3.9 -m pytest diff --git a/pyproject.toml b/pyproject.toml index 8184a67..3ad1d37 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -47,6 +47,7 @@ dev = [ "mkdocstrings[python]>=0.28.0", "coverage>=7.6.12", "playwright==1.50.0", + "inline-snapshot>=0.20.7", ] [tool.uv.workspace] members = ["agents"] @@ -116,4 +117,7 @@ filterwarnings = [ ] markers = [ "allow_call_model_methods: mark test as allowing calls to real model implementations", -] \ No newline at end of file +] + +[tool.inline-snapshot] +format-command="ruff format --stdin-filename {filename}" \ No newline at end of file diff --git a/src/agents/result.py b/src/agents/result.py index 6e806b7..40a6480 100644 --- a/src/agents/result.py +++ b/src/agents/result.py @@ -17,6 +17,7 @@ from .items import ItemHelpers, ModelResponse, RunItem, TResponseInputItem from .logger import logger from .stream_events import StreamEvent from .tracing import Trace +from .util._pretty_print import pretty_print_result, pretty_print_run_result_streaming if TYPE_CHECKING: from ._run_impl import QueueCompleteSentinel @@ -89,6 +90,9 @@ class RunResult(RunResultBase): """The last agent that was run.""" return self._last_agent + def __str__(self) -> str: + return pretty_print_result(self) + @dataclass class RunResultStreaming(RunResultBase): @@ -216,3 +220,6 @@ class RunResultStreaming(RunResultBase): if self._output_guardrails_task and not self._output_guardrails_task.done(): self._output_guardrails_task.cancel() + + def __str__(self) -> str: + return pretty_print_run_result_streaming(self) diff --git a/src/agents/util/_pretty_print.py b/src/agents/util/_pretty_print.py new file mode 100644 index 0000000..afd3e2b --- /dev/null +++ b/src/agents/util/_pretty_print.py @@ -0,0 +1,56 @@ +from typing import TYPE_CHECKING + +from pydantic import BaseModel + +if TYPE_CHECKING: + from ..result import RunResult, RunResultBase, RunResultStreaming + + +def _indent(text: str, indent_level: int) -> str: + indent_string = " " * indent_level + return "\n".join(f"{indent_string}{line}" for line in text.splitlines()) + + +def _final_output_str(result: "RunResultBase") -> str: + if result.final_output is None: + return "None" + elif isinstance(result.final_output, str): + return result.final_output + elif isinstance(result.final_output, BaseModel): + return result.final_output.model_dump_json(indent=2) + else: + return str(result.final_output) + + +def pretty_print_result(result: "RunResult") -> str: + output = "RunResult:" + output += f'\n- Last agent: Agent(name="{result.last_agent.name}", ...)' + output += ( + f"\n- Final output ({type(result.final_output).__name__}):\n" + f"{_indent(_final_output_str(result), 2)}" + ) + output += f"\n- {len(result.new_items)} new item(s)" + output += f"\n- {len(result.raw_responses)} raw response(s)" + output += f"\n- {len(result.input_guardrail_results)} input guardrail result(s)" + output += f"\n- {len(result.output_guardrail_results)} output guardrail result(s)" + output += "\n(See `RunResult` for more details)" + + return output + + +def pretty_print_run_result_streaming(result: "RunResultStreaming") -> str: + output = "RunResultStreaming:" + output += f'\n- Current agent: Agent(name="{result.current_agent.name}", ...)' + output += f"\n- Current turn: {result.current_turn}" + output += f"\n- Max turns: {result.max_turns}" + output += f"\n- Is complete: {result.is_complete}" + output += ( + f"\n- Final output ({type(result.final_output).__name__}):\n" + f"{_indent(_final_output_str(result), 2)}" + ) + output += f"\n- {len(result.new_items)} new item(s)" + output += f"\n- {len(result.raw_responses)} raw response(s)" + output += f"\n- {len(result.input_guardrail_results)} input guardrail result(s)" + output += f"\n- {len(result.output_guardrail_results)} output guardrail result(s)" + output += "\n(See `RunResultStreaming` for more details)" + return output diff --git a/tests/README.md b/tests/README.md new file mode 100644 index 0000000..d68e067 --- /dev/null +++ b/tests/README.md @@ -0,0 +1,25 @@ +# Tests + +Before running any tests, make sure you have `uv` installed (and ideally run `make sync` after). + +## Running tests + +``` +make tests +``` + +## Snapshots + +We use [inline-snapshots](https://15r10nk.github.io/inline-snapshot/latest/) for some tests. If your code adds new snapshot tests or breaks existing ones, you can fix/create them. After fixing/creating snapshots, run `make tests` again to verify the tests pass. + +### Fixing snapshots + +``` +make snapshots-fix +``` + +### Creating snapshots + +``` +make snapshots-update +``` diff --git a/tests/test_pretty_print.py b/tests/test_pretty_print.py new file mode 100644 index 0000000..b2218a2 --- /dev/null +++ b/tests/test_pretty_print.py @@ -0,0 +1,201 @@ +import json + +import pytest +from inline_snapshot import snapshot +from pydantic import BaseModel + +from agents import Agent, Runner +from agents.agent_output import _WRAPPER_DICT_KEY +from agents.util._pretty_print import pretty_print_result, pretty_print_run_result_streaming +from tests.fake_model import FakeModel + +from .test_responses import get_final_output_message, get_text_message + + +@pytest.mark.asyncio +async def test_pretty_result(): + model = FakeModel() + model.set_next_output([get_text_message("Hi there")]) + + agent = Agent(name="test_agent", model=model) + result = await Runner.run(agent, input="Hello") + + assert pretty_print_result(result) == snapshot("""\ +RunResult: +- Last agent: Agent(name="test_agent", ...) +- Final output (str): + Hi there +- 1 new item(s) +- 1 raw response(s) +- 0 input guardrail result(s) +- 0 output guardrail result(s) +(See `RunResult` for more details)\ +""") + + +@pytest.mark.asyncio +async def test_pretty_run_result_streaming(): + model = FakeModel() + model.set_next_output([get_text_message("Hi there")]) + + agent = Agent(name="test_agent", model=model) + result = Runner.run_streamed(agent, input="Hello") + async for _ in result.stream_events(): + pass + + assert pretty_print_run_result_streaming(result) == snapshot("""\ +RunResultStreaming: +- Current agent: Agent(name="test_agent", ...) +- Current turn: 1 +- Max turns: 10 +- Is complete: True +- Final output (str): + Hi there +- 1 new item(s) +- 1 raw response(s) +- 0 input guardrail result(s) +- 0 output guardrail result(s) +(See `RunResultStreaming` for more details)\ +""") + + +class Foo(BaseModel): + bar: str + + +@pytest.mark.asyncio +async def test_pretty_run_result_structured_output(): + model = FakeModel() + model.set_next_output( + [ + get_text_message("Test"), + get_final_output_message(Foo(bar="Hi there").model_dump_json()), + ] + ) + + agent = Agent(name="test_agent", model=model, output_type=Foo) + result = await Runner.run(agent, input="Hello") + + assert pretty_print_result(result) == snapshot("""\ +RunResult: +- Last agent: Agent(name="test_agent", ...) +- Final output (Foo): + { + "bar": "Hi there" + } +- 2 new item(s) +- 1 raw response(s) +- 0 input guardrail result(s) +- 0 output guardrail result(s) +(See `RunResult` for more details)\ +""") + + +@pytest.mark.asyncio +async def test_pretty_run_result_streaming_structured_output(): + model = FakeModel() + model.set_next_output( + [ + get_text_message("Test"), + get_final_output_message(Foo(bar="Hi there").model_dump_json()), + ] + ) + + agent = Agent(name="test_agent", model=model, output_type=Foo) + result = Runner.run_streamed(agent, input="Hello") + + async for _ in result.stream_events(): + pass + + assert pretty_print_run_result_streaming(result) == snapshot("""\ +RunResultStreaming: +- Current agent: Agent(name="test_agent", ...) +- Current turn: 1 +- Max turns: 10 +- Is complete: True +- Final output (Foo): + { + "bar": "Hi there" + } +- 2 new item(s) +- 1 raw response(s) +- 0 input guardrail result(s) +- 0 output guardrail result(s) +(See `RunResultStreaming` for more details)\ +""") + + +@pytest.mark.asyncio +async def test_pretty_run_result_list_structured_output(): + model = FakeModel() + model.set_next_output( + [ + get_text_message("Test"), + get_final_output_message( + json.dumps( + { + _WRAPPER_DICT_KEY: [ + Foo(bar="Hi there").model_dump(), + Foo(bar="Hi there 2").model_dump(), + ] + } + ) + ), + ] + ) + + agent = Agent(name="test_agent", model=model, output_type=list[Foo]) + result = await Runner.run(agent, input="Hello") + + assert pretty_print_result(result) == snapshot("""\ +RunResult: +- Last agent: Agent(name="test_agent", ...) +- Final output (list): + [Foo(bar='Hi there'), Foo(bar='Hi there 2')] +- 2 new item(s) +- 1 raw response(s) +- 0 input guardrail result(s) +- 0 output guardrail result(s) +(See `RunResult` for more details)\ +""") + + +@pytest.mark.asyncio +async def test_pretty_run_result_streaming_list_structured_output(): + model = FakeModel() + model.set_next_output( + [ + get_text_message("Test"), + get_final_output_message( + json.dumps( + { + _WRAPPER_DICT_KEY: [ + Foo(bar="Test").model_dump(), + Foo(bar="Test 2").model_dump(), + ] + } + ) + ), + ] + ) + + agent = Agent(name="test_agent", model=model, output_type=list[Foo]) + result = Runner.run_streamed(agent, input="Hello") + + async for _ in result.stream_events(): + pass + + assert pretty_print_run_result_streaming(result) == snapshot("""\ +RunResultStreaming: +- Current agent: Agent(name="test_agent", ...) +- Current turn: 1 +- Max turns: 10 +- Is complete: True +- Final output (list): + [Foo(bar='Test'), Foo(bar='Test 2')] +- 2 new item(s) +- 1 raw response(s) +- 0 input guardrail result(s) +- 0 output guardrail result(s) +(See `RunResultStreaming` for more details)\ +""") diff --git a/uv.lock b/uv.lock index c3af99b..2c2e05b 100644 --- a/uv.lock +++ b/uv.lock @@ -1,4 +1,5 @@ version = 1 +revision = 1 requires-python = ">=3.9" [[package]] @@ -25,6 +26,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/46/eb/e7f063ad1fec6b3178a3cd82d1a3c4de82cccf283fc42746168188e1cdd5/anyio-4.8.0-py3-none-any.whl", hash = "sha256:b5011f270ab5eb0abf13385f851315585cc37ef330dd88e27ec3d34d651fd47a", size = 96041 }, ] +[[package]] +name = "asttokens" +version = "3.0.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/4a/e7/82da0a03e7ba5141f05cce0d302e6eed121ae055e0456ca228bf693984bc/asttokens-3.0.0.tar.gz", hash = "sha256:0dcd8baa8d62b0c1d118b399b2ddba3c4aff271d0d7a9e0d4c1681c79035bbc7", size = 61978 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/25/8a/c46dcc25341b5bce5472c718902eb3d38600a903b14fa6aeecef3f21a46f/asttokens-3.0.0-py3-none-any.whl", hash = "sha256:e3078351a059199dd5138cb1c706e6430c05eff2ff136af5eb4790f9d28932e2", size = 26918 }, +] + [[package]] name = "babel" version = "2.17.0" @@ -239,6 +249,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/02/cc/b7e31358aac6ed1ef2bb790a9746ac2c69bcb3c8588b41616914eb106eaf/exceptiongroup-1.2.2-py3-none-any.whl", hash = "sha256:3111b9d131c238bec2f8f516e123e14ba243563fb135d3fe885990585aa7795b", size = 16453 }, ] +[[package]] +name = "executing" +version = "2.2.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/91/50/a9d80c47ff289c611ff12e63f7c5d13942c65d68125160cefd768c73e6e4/executing-2.2.0.tar.gz", hash = "sha256:5d108c028108fe2551d1a7b2e8b713341e2cb4fc0aa7dcf966fa4327a5226755", size = 978693 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/7b/8f/c4d9bafc34ad7ad5d8dc16dd1347ee0e507a52c3adb6bfa8887e1c6a26ba/executing-2.2.0-py2.py3-none-any.whl", hash = "sha256:11387150cad388d62750327a53d3339fad4888b39a6fe233c3afbb54ecffd3aa", size = 26702 }, +] + [[package]] name = "ghp-import" version = "2.1.0" @@ -391,6 +410,21 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ef/a6/62565a6e1cf69e10f5727360368e451d4b7f58beeac6173dc9db836a5b46/iniconfig-2.0.0-py3-none-any.whl", hash = "sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374", size = 5892 }, ] +[[package]] +name = "inline-snapshot" +version = "0.20.7" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "asttokens" }, + { name = "executing" }, + { name = "rich" }, + { name = "tomli", marker = "python_full_version < '3.11'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/b0/41/9bd2ecd10ef789e8aff6fb68dcc7677dc31b33b2d27c306c0d40fc982fbc/inline_snapshot-0.20.7.tar.gz", hash = "sha256:d55bbb6254d0727dc304729ca7998cde1c1e984c4bf50281514aa9d727a56cf2", size = 92643 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/01/8f/1bf23da63ad1a0b14ca2d9114700123ef76732e375548f4f9ca94052817e/inline_snapshot-0.20.7-py3-none-any.whl", hash = "sha256:2df6dd8710d1f0def2c1f9d6c25fd03d7beba01f3addf52fc370343d9ee9959f", size = 48108 }, +] + [[package]] name = "jinja2" version = "3.1.6" @@ -796,6 +830,7 @@ dependencies = [ [package.dev-dependencies] dev = [ { name = "coverage" }, + { name = "inline-snapshot" }, { name = "mkdocs" }, { name = "mkdocs-material" }, { name = "mkdocstrings", extra = ["python"] }, @@ -821,6 +856,7 @@ requires-dist = [ [package.metadata.requires-dev] dev = [ { name = "coverage", specifier = ">=7.6.12" }, + { name = "inline-snapshot", specifier = ">=0.20.7" }, { name = "mkdocs", specifier = ">=1.6.0" }, { name = "mkdocs-material", specifier = ">=9.6.0" }, { name = "mkdocstrings", extras = ["python"], specifier = ">=0.28.0" }, From 4ebf0742f11b8c12968ed22bec91f44ae2e3e421 Mon Sep 17 00:00:00 2001 From: Bagatur Date: Mon, 17 Mar 2025 08:47:28 -0700 Subject: [PATCH 11/16] docs: List LangSmith tracing integration --- docs/tracing.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/tracing.md b/docs/tracing.md index d7d0a65..5d7477e 100644 --- a/docs/tracing.md +++ b/docs/tracing.md @@ -95,3 +95,4 @@ External trace processors include: - [AgentOps](https://docs.agentops.ai/v1/integrations/agentssdk) - [Scorecard](https://docs.scorecard.io/docs/documentation/features/tracing#openai-agents-sdk-integration)) - [Keywords AI](https://docs.keywordsai.co/integration/development-frameworks/openai-agent) +- [LangSmith](https://docs.smith.langchain.com/observability/how_to_guides/trace_with_openai_agents_sdk) From 370a748bcc824a65688759edb9c62141698e7e96 Mon Sep 17 00:00:00 2001 From: James Hills <70035505+jhills20@users.noreply.github.com> Date: Mon, 17 Mar 2025 09:13:22 -0700 Subject: [PATCH 12/16] Update tracing.md Add callout for ZDR intracing --- docs/tracing.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/tracing.md b/docs/tracing.md index d7d0a65..622145d 100644 --- a/docs/tracing.md +++ b/docs/tracing.md @@ -9,6 +9,8 @@ The Agents SDK includes built-in tracing, collecting a comprehensive record of e 1. You can globally disable tracing by setting the env var `OPENAI_AGENTS_DISABLE_TRACING=1` 2. You can disable tracing for a single run by setting [`agents.run.RunConfig.tracing_disabled`][] to `True` +***For organizations operating under a Zero Data Retention (ZDR) policy using OpenAI's APIs, tracing is unavailable.*** + ## Traces and spans - **Traces** represent a single end-to-end operation of a "workflow". They're composed of Spans. Traces have the following properties: From 1368e7ffe6a0148fc42c346df0f1a261dab2dc72 Mon Sep 17 00:00:00 2001 From: Rohan Mehta Date: Mon, 17 Mar 2025 14:55:54 -0400 Subject: [PATCH 13/16] Update tracing docs --- README.md | 2 +- docs/tracing.md | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 210f6f4..51ca3c6 100644 --- a/README.md +++ b/README.md @@ -142,7 +142,7 @@ The Agents SDK is designed to be highly flexible, allowing you to model a wide r ## Tracing -The Agents SDK automatically traces your agent runs, making it easy to track and debug the behavior of your agents. Tracing is extensible by design, supporting custom spans and a wide variety of external destinations, including [Logfire](https://logfire.pydantic.dev/docs/integrations/llms/openai/#openai-agents), [AgentOps](https://docs.agentops.ai/v1/integrations/agentssdk), [Braintrust](https://braintrust.dev/docs/guides/traces/integrations#openai-agents-sdk), [Scorecard](https://docs.scorecard.io/docs/documentation/features/tracing#openai-agents-sdk-integration), and [Keywords AI](https://docs.keywordsai.co/integration/development-frameworks/openai-agent). For more details about how to customize or disable tracing, see [Tracing](http://openai.github.io/openai-agents-python/tracing). +The Agents SDK automatically traces your agent runs, making it easy to track and debug the behavior of your agents. Tracing is extensible by design, supporting custom spans and a wide variety of external destinations, including [Logfire](https://logfire.pydantic.dev/docs/integrations/llms/openai/#openai-agents), [AgentOps](https://docs.agentops.ai/v1/integrations/agentssdk), [Braintrust](https://braintrust.dev/docs/guides/traces/integrations#openai-agents-sdk), [Scorecard](https://docs.scorecard.io/docs/documentation/features/tracing#openai-agents-sdk-integration), and [Keywords AI](https://docs.keywordsai.co/integration/development-frameworks/openai-agent). For more details about how to customize or disable tracing, see [Tracing](http://openai.github.io/openai-agents-python/tracing), which also includes a larger list of [external tracing processors](http://openai.github.io/openai-agents-python/tracing/#external-tracing-processors-list). ## Development (only needed if you need to edit the SDK/examples) diff --git a/docs/tracing.md b/docs/tracing.md index d7d0a65..372a41a 100644 --- a/docs/tracing.md +++ b/docs/tracing.md @@ -88,10 +88,10 @@ To customize this default setup, to send traces to alternative or additional bac 1. [`add_trace_processor()`][agents.tracing.add_trace_processor] lets you add an **additional** trace processor that will receive traces and spans as they are ready. This lets you do your own processing in addition to sending traces to OpenAI's backend. 2. [`set_trace_processors()`][agents.tracing.set_trace_processors] lets you **replace** the default processors with your own trace processors. This means traces will not be sent to the OpenAI backend unless you include a `TracingProcessor` that does so. -External trace processors include: +## External tracing processors list - [Braintrust](https://braintrust.dev/docs/guides/traces/integrations#openai-agents-sdk) - [Pydantic Logfire](https://logfire.pydantic.dev/docs/integrations/llms/openai/#openai-agents) - [AgentOps](https://docs.agentops.ai/v1/integrations/agentssdk) -- [Scorecard](https://docs.scorecard.io/docs/documentation/features/tracing#openai-agents-sdk-integration)) +- [Scorecard](https://docs.scorecard.io/docs/documentation/features/tracing#openai-agents-sdk-integration) - [Keywords AI](https://docs.keywordsai.co/integration/development-frameworks/openai-agent) From 64150c8aeab62d8e65e6d3d8a0fab62857139c5c Mon Sep 17 00:00:00 2001 From: Carlos Souza Date: Mon, 17 Mar 2025 16:14:28 -0400 Subject: [PATCH 14/16] Fix lint --- src/agents/agent.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/agents/agent.py b/src/agents/agent.py index eb39164..f8bce7f 100644 --- a/src/agents/agent.py +++ b/src/agents/agent.py @@ -27,8 +27,8 @@ class Agent(Generic[TContext]): """An agent is an AI model configured with instructions, tools, guardrails, handoffs and more. We strongly recommend passing `instructions`, which is the "system prompt" for the agent. In - addition, you can pass `handoff_description`, which is a human-readable description of the agent, used - when the agent is used inside tools/handoffs. + addition, you can pass `handoff_description`, which is a human-readable description of the + agent, used when the agent is used inside tools/handoffs. Agents are generic on the context type. The context is a (mutable) object you create. It is passed to tool functions, handoffs, guardrails, etc. From 7eb2bcee15b8077c4ce002df59af4a44de2b62d8 Mon Sep 17 00:00:00 2001 From: Alex Hall Date: Mon, 17 Mar 2025 23:56:42 +0200 Subject: [PATCH 15/16] mypy --- tests/testing_processor.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/testing_processor.py b/tests/testing_processor.py index e5cb6f5..371ea86 100644 --- a/tests/testing_processor.py +++ b/tests/testing_processor.py @@ -85,6 +85,7 @@ def fetch_normalized_spans(): traces = [] for trace_obj in fetch_traces(): trace = trace_obj.export() + assert trace assert trace.pop("object") == "trace" assert trace.pop("id").startswith("trace_") trace = {k: v for k, v in trace.items() if v is not None} @@ -96,6 +97,7 @@ def fetch_normalized_spans(): for span_obj in fetch_ordered_spans(): span = span_obj.export() + assert span assert span.pop("object") == "trace.span" assert span.pop("id").startswith("span_") assert datetime.fromisoformat(span.pop("started_at")) From a43cf1542b4cfbb7ee00ebf33aa7d49e716a7cba Mon Sep 17 00:00:00 2001 From: Dmitry Pimenov Date: Mon, 17 Mar 2025 15:05:15 -0700 Subject: [PATCH 16/16] clarifying that handoffs are a type of tool call under the hood --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 51ca3c6..fc98b2b 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,7 @@ The OpenAI Agents SDK is a lightweight yet powerful framework for building multi ### Core concepts: 1. [**Agents**](https://openai.github.io/openai-agents-python/agents): LLMs configured with instructions, tools, guardrails, and handoffs -2. [**Handoffs**](https://openai.github.io/openai-agents-python/handoffs/): Allow agents to transfer control to other agents for specific tasks +2. [**Handoffs**](https://openai.github.io/openai-agents-python/handoffs/): A specialized tool call used by the Agents SDK for transferring control between agents 3. [**Guardrails**](https://openai.github.io/openai-agents-python/guardrails/): Configurable safety checks for input and output validation 4. [**Tracing**](https://openai.github.io/openai-agents-python/tracing/): Built-in tracking of agent runs, allowing you to view, debug and optimize your workflows