From c374ad064faa6824d5c5a6c5bd9870688526bc81 Mon Sep 17 00:00:00 2001
From: Alex Hall <alex.mojaki@gmail.com>
Date: Tue, 11 Mar 2025 22:53:48 +0200
Subject: [PATCH 01/16] Run make format

---
 src/agents/agent_output.py         | 2 +-
 src/agents/model_settings.py       | 1 +
 tests/src/agents/agent_output.py   | 2 +-
 tests/src/agents/model_settings.py | 1 +
 tests/test_config.py               | 9 ++++++---
 5 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/src/agents/agent_output.py b/src/agents/agent_output.py
index 8140d8c..0c28800 100644
--- a/src/agents/agent_output.py
+++ b/src/agents/agent_output.py
@@ -138,7 +138,7 @@ def _type_to_str(t: type[Any]) -> str:
         # It's a simple type like `str`, `int`, etc.
         return t.__name__
     elif args:
-        args_str = ', '.join(_type_to_str(arg) for arg in args)
+        args_str = ", ".join(_type_to_str(arg) for arg in args)
         return f"{origin.__name__}[{args_str}]"
     else:
         return str(t)
diff --git a/src/agents/model_settings.py b/src/agents/model_settings.py
index 78cf9a8..d8178ae 100644
--- a/src/agents/model_settings.py
+++ b/src/agents/model_settings.py
@@ -11,6 +11,7 @@ class ModelSettings:
     This class holds optional model configuration parameters (e.g. temperature,
     top_p, penalties, truncation, etc.).
     """
+
     temperature: float | None = None
     top_p: float | None = None
     frequency_penalty: float | None = None
diff --git a/tests/src/agents/agent_output.py b/tests/src/agents/agent_output.py
index 8140d8c..0c28800 100644
--- a/tests/src/agents/agent_output.py
+++ b/tests/src/agents/agent_output.py
@@ -138,7 +138,7 @@ def _type_to_str(t: type[Any]) -> str:
         # It's a simple type like `str`, `int`, etc.
         return t.__name__
     elif args:
-        args_str = ', '.join(_type_to_str(arg) for arg in args)
+        args_str = ", ".join(_type_to_str(arg) for arg in args)
         return f"{origin.__name__}[{args_str}]"
     else:
         return str(t)
diff --git a/tests/src/agents/model_settings.py b/tests/src/agents/model_settings.py
index 78cf9a8..d8178ae 100644
--- a/tests/src/agents/model_settings.py
+++ b/tests/src/agents/model_settings.py
@@ -11,6 +11,7 @@ class ModelSettings:
     This class holds optional model configuration parameters (e.g. temperature,
     top_p, penalties, truncation, etc.).
     """
+
     temperature: float | None = None
     top_p: float | None = None
     frequency_penalty: float | None = None
diff --git a/tests/test_config.py b/tests/test_config.py
index 8f37200..dba854d 100644
--- a/tests/test_config.py
+++ b/tests/test_config.py
@@ -49,13 +49,16 @@ def test_resp_set_default_openai_client():
 
 
 def test_set_default_openai_api():
-    assert isinstance(OpenAIProvider().get_model("gpt-4"), OpenAIResponsesModel), \
+    assert isinstance(OpenAIProvider().get_model("gpt-4"), OpenAIResponsesModel), (
         "Default should be responses"
+    )
 
     set_default_openai_api("chat_completions")
-    assert isinstance(OpenAIProvider().get_model("gpt-4"), OpenAIChatCompletionsModel), \
+    assert isinstance(OpenAIProvider().get_model("gpt-4"), OpenAIChatCompletionsModel), (
         "Should be chat completions model"
+    )
 
     set_default_openai_api("responses")
-    assert isinstance(OpenAIProvider().get_model("gpt-4"), OpenAIResponsesModel), \
+    assert isinstance(OpenAIProvider().get_model("gpt-4"), OpenAIResponsesModel), (
         "Should be responses model"
+    )

From c03d314fb80181858693e915be3d26848e437fa5 Mon Sep 17 00:00:00 2001
From: Alex Hall <alex.mojaki@gmail.com>
Date: Tue, 11 Mar 2025 22:57:14 +0200
Subject: [PATCH 02/16] Stronger tracing tests with inline-snapshot

---
 pyproject.toml                        |   3 +-
 tests/test_agent_tracing.py           | 115 +++++++-
 tests/test_responses_tracing.py       |  33 ++-
 tests/test_tracing_errors.py          | 279 ++++++++++++++++++-
 tests/test_tracing_errors_streamed.py | 382 +++++++++++++++++++++++++-
 tests/testing_processor.py            |  33 +++
 uv.lock                               |  35 +++
 7 files changed, 875 insertions(+), 5 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 9c18d5f..17265e7 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -47,6 +47,7 @@ dev = [
     "mkdocstrings[python]>=0.28.0",
     "coverage>=7.6.12",
     "playwright==1.50.0",
+    "inline-snapshot>=0.20.5",
 ]
 [tool.uv.workspace]
 members = ["agents"]
@@ -116,4 +117,4 @@ filterwarnings = [
 ]
 markers = [
     "allow_call_model_methods: mark test as allowing calls to real model implementations",
-]
\ No newline at end of file
+]
diff --git a/tests/test_agent_tracing.py b/tests/test_agent_tracing.py
index 24bd72f..3d7196a 100644
--- a/tests/test_agent_tracing.py
+++ b/tests/test_agent_tracing.py
@@ -3,12 +3,13 @@ from __future__ import annotations
 import asyncio
 
 import pytest
+from inline_snapshot import snapshot
 
 from agents import Agent, RunConfig, Runner, trace
 
 from .fake_model import FakeModel
 from .test_responses import get_text_message
-from .testing_processor import fetch_ordered_spans, fetch_traces
+from .testing_processor import fetch_normalized_spans, fetch_ordered_spans, fetch_traces
 
 
 @pytest.mark.asyncio
@@ -25,6 +26,25 @@ async def test_single_run_is_single_trace():
     traces = fetch_traces()
     assert len(traces) == 1, f"Expected 1 trace, got {len(traces)}"
 
+    assert fetch_normalized_spans() == snapshot(
+        [
+            {
+                "workflow_name": "Agent workflow",
+                "children": [
+                    {
+                        "type": "agent",
+                        "data": {
+                            "name": "test_agent",
+                            "handoffs": [],
+                            "tools": [],
+                            "output_type": "str",
+                        },
+                    }
+                ],
+            }
+        ]
+    )
+
     spans = fetch_ordered_spans()
     assert len(spans) == 1, (
         f"Got {len(spans)}, but expected 1: the agent span. data:"
@@ -52,6 +72,39 @@ async def test_multiple_runs_are_multiple_traces():
     traces = fetch_traces()
     assert len(traces) == 2, f"Expected 2 traces, got {len(traces)}"
 
+    assert fetch_normalized_spans() == snapshot(
+        [
+            {
+                "workflow_name": "Agent workflow",
+                "children": [
+                    {
+                        "type": "agent",
+                        "data": {
+                            "name": "test_agent_1",
+                            "handoffs": [],
+                            "tools": [],
+                            "output_type": "str",
+                        },
+                    }
+                ],
+            },
+            {
+                "workflow_name": "Agent workflow",
+                "children": [
+                    {
+                        "type": "agent",
+                        "data": {
+                            "name": "test_agent_1",
+                            "handoffs": [],
+                            "tools": [],
+                            "output_type": "str",
+                        },
+                    }
+                ],
+            },
+        ]
+    )
+
     spans = fetch_ordered_spans()
     assert len(spans) == 2, f"Got {len(spans)}, but expected 2: agent span per run"
 
@@ -79,6 +132,43 @@ async def test_wrapped_trace_is_single_trace():
     traces = fetch_traces()
     assert len(traces) == 1, f"Expected 1 trace, got {len(traces)}"
 
+    assert fetch_normalized_spans() == snapshot(
+        [
+            {
+                "workflow_name": "test_workflow",
+                "children": [
+                    {
+                        "type": "agent",
+                        "data": {
+                            "name": "test_agent_1",
+                            "handoffs": [],
+                            "tools": [],
+                            "output_type": "str",
+                        },
+                    },
+                    {
+                        "type": "agent",
+                        "data": {
+                            "name": "test_agent_1",
+                            "handoffs": [],
+                            "tools": [],
+                            "output_type": "str",
+                        },
+                    },
+                    {
+                        "type": "agent",
+                        "data": {
+                            "name": "test_agent_1",
+                            "handoffs": [],
+                            "tools": [],
+                            "output_type": "str",
+                        },
+                    },
+                ],
+            }
+        ]
+    )
+
     spans = fetch_ordered_spans()
     assert len(spans) == 3, f"Got {len(spans)}, but expected 3: the agent span per run"
 
@@ -97,6 +187,8 @@ async def test_parent_disabled_trace_disabled_agent_trace():
 
     traces = fetch_traces()
     assert len(traces) == 0, f"Expected 0 traces, got {len(traces)}"
+    assert fetch_normalized_spans() == snapshot([])
+
     spans = fetch_ordered_spans()
     assert len(spans) == 0, (
         f"Expected no spans, got {len(spans)}, with {[x.span_data for x in spans]}"
@@ -116,6 +208,8 @@ async def test_manual_disabling_works():
 
     traces = fetch_traces()
     assert len(traces) == 0, f"Expected 0 traces, got {len(traces)}"
+    assert fetch_normalized_spans() == snapshot([])
+
     spans = fetch_ordered_spans()
     assert len(spans) == 0, f"Got {len(spans)}, but expected no spans"
 
@@ -164,6 +258,25 @@ async def test_not_starting_streaming_creates_trace():
     traces = fetch_traces()
     assert len(traces) == 1, f"Expected 1 trace, got {len(traces)}"
 
+    assert fetch_normalized_spans() == snapshot(
+        [
+            {
+                "workflow_name": "Agent workflow",
+                "children": [
+                    {
+                        "type": "agent",
+                        "data": {
+                            "name": "test_agent",
+                            "handoffs": [],
+                            "tools": [],
+                            "output_type": "str",
+                        },
+                    }
+                ],
+            }
+        ]
+    )
+
     spans = fetch_ordered_spans()
     assert len(spans) == 1, f"Got {len(spans)}, but expected 1: the agent span"
 
diff --git a/tests/test_responses_tracing.py b/tests/test_responses_tracing.py
index 82b8e75..41b87eb 100644
--- a/tests/test_responses_tracing.py
+++ b/tests/test_responses_tracing.py
@@ -1,4 +1,5 @@
 import pytest
+from inline_snapshot import snapshot
 from openai import AsyncOpenAI
 from openai.types.responses import ResponseCompletedEvent
 
@@ -6,7 +7,7 @@ from agents import ModelSettings, ModelTracing, OpenAIResponsesModel, trace
 from agents.tracing.span_data import ResponseSpanData
 from tests import fake_model
 
-from .testing_processor import fetch_ordered_spans
+from .testing_processor import fetch_normalized_spans, fetch_ordered_spans
 
 
 class DummyTracing:
@@ -54,6 +55,15 @@ async def test_get_response_creates_trace(monkeypatch):
             "instr", "input", ModelSettings(), [], None, [], ModelTracing.ENABLED
         )
 
+    assert fetch_normalized_spans() == snapshot(
+        [
+            {
+                "workflow_name": "test",
+                "children": [{"type": "response", "data": {"response_id": "dummy-id"}}],
+            }
+        ]
+    )
+
     spans = fetch_ordered_spans()
     assert len(spans) == 1
 
@@ -82,6 +92,10 @@ async def test_non_data_tracing_doesnt_set_response_id(monkeypatch):
             "instr", "input", ModelSettings(), [], None, [], ModelTracing.ENABLED_WITHOUT_DATA
         )
 
+    assert fetch_normalized_spans() == snapshot(
+        [{"workflow_name": "test", "children": [{"type": "response"}]}]
+    )
+
     spans = fetch_ordered_spans()
     assert len(spans) == 1
     assert spans[0].span_data.response is None
@@ -107,6 +121,8 @@ async def test_disable_tracing_does_not_create_span(monkeypatch):
             "instr", "input", ModelSettings(), [], None, [], ModelTracing.DISABLED
         )
 
+    assert fetch_normalized_spans() == snapshot([{"workflow_name": "test"}])
+
     spans = fetch_ordered_spans()
     assert len(spans) == 0
 
@@ -139,6 +155,15 @@ async def test_stream_response_creates_trace(monkeypatch):
         ):
             pass
 
+    assert fetch_normalized_spans() == snapshot(
+        [
+            {
+                "workflow_name": "test",
+                "children": [{"type": "response", "data": {"response_id": "dummy-id-123"}}],
+            }
+        ]
+    )
+
     spans = fetch_ordered_spans()
     assert len(spans) == 1
     assert isinstance(spans[0].span_data, ResponseSpanData)
@@ -174,6 +199,10 @@ async def test_stream_non_data_tracing_doesnt_set_response_id(monkeypatch):
         ):
             pass
 
+    assert fetch_normalized_spans() == snapshot(
+        [{"workflow_name": "test", "children": [{"type": "response"}]}]
+    )
+
     spans = fetch_ordered_spans()
     assert len(spans) == 1
     assert isinstance(spans[0].span_data, ResponseSpanData)
@@ -208,5 +237,7 @@ async def test_stream_disabled_tracing_doesnt_create_span(monkeypatch):
         ):
             pass
 
+    assert fetch_normalized_spans() == snapshot([{"workflow_name": "test"}])
+
     spans = fetch_ordered_spans()
     assert len(spans) == 0
diff --git a/tests/test_tracing_errors.py b/tests/test_tracing_errors.py
index d57e1a8..5dbd7c1 100644
--- a/tests/test_tracing_errors.py
+++ b/tests/test_tracing_errors.py
@@ -4,6 +4,7 @@ import json
 from typing import Any
 
 import pytest
+from inline_snapshot import snapshot
 from typing_extensions import TypedDict
 
 from agents import (
@@ -27,7 +28,7 @@ from .test_responses import (
     get_handoff_tool_call,
     get_text_message,
 )
-from .testing_processor import fetch_ordered_spans, fetch_traces
+from .testing_processor import fetch_normalized_spans, fetch_ordered_spans, fetch_traces
 
 
 @pytest.mark.asyncio
@@ -45,6 +46,34 @@ async def test_single_turn_model_error():
     traces = fetch_traces()
     assert len(traces) == 1, f"Expected 1 trace, got {len(traces)}"
 
+    assert fetch_normalized_spans() == snapshot(
+        [
+            {
+                "workflow_name": "Agent workflow",
+                "children": [
+                    {
+                        "type": "agent",
+                        "data": {
+                            "name": "test_agent",
+                            "handoffs": [],
+                            "tools": [],
+                            "output_type": "str",
+                        },
+                        "children": [
+                            {
+                                "type": "generation",
+                                "error": {
+                                    "message": "Error",
+                                    "data": {"name": "ValueError", "message": "test error"},
+                                },
+                            }
+                        ],
+                    }
+                ],
+            }
+        ]
+    )
+
     spans = fetch_ordered_spans()
     assert len(spans) == 2, f"should have agent and generation spans, got {len(spans)}"
 
@@ -80,6 +109,43 @@ async def test_multi_turn_no_handoffs():
     traces = fetch_traces()
     assert len(traces) == 1, f"Expected 1 trace, got {len(traces)}"
 
+    assert fetch_normalized_spans() == snapshot(
+        [
+            {
+                "workflow_name": "Agent workflow",
+                "children": [
+                    {
+                        "type": "agent",
+                        "data": {
+                            "name": "test_agent",
+                            "handoffs": [],
+                            "tools": ["foo"],
+                            "output_type": "str",
+                        },
+                        "children": [
+                            {"type": "generation"},
+                            {
+                                "type": "function",
+                                "data": {
+                                    "name": "foo",
+                                    "input": '{"a": "b"}',
+                                    "output": "tool_result",
+                                },
+                            },
+                            {
+                                "type": "generation",
+                                "error": {
+                                    "message": "Error",
+                                    "data": {"name": "ValueError", "message": "test error"},
+                                },
+                            },
+                        ],
+                    }
+                ],
+            }
+        ]
+    )
+
     spans = fetch_ordered_spans()
     assert len(spans) == 4, (
         f"should have agent, generation, tool, generation, got {len(spans)} with data: "
@@ -110,6 +176,39 @@ async def test_tool_call_error():
     traces = fetch_traces()
     assert len(traces) == 1, f"Expected 1 trace, got {len(traces)}"
 
+    assert fetch_normalized_spans() == snapshot(
+        [
+            {
+                "workflow_name": "Agent workflow",
+                "children": [
+                    {
+                        "type": "agent",
+                        "data": {
+                            "name": "test_agent",
+                            "handoffs": [],
+                            "tools": ["foo"],
+                            "output_type": "str",
+                        },
+                        "children": [
+                            {"type": "generation"},
+                            {
+                                "type": "function",
+                                "error": {
+                                    "message": "Error running tool",
+                                    "data": {
+                                        "tool_name": "foo",
+                                        "error": "Invalid JSON input for tool foo: bad_json",
+                                    },
+                                },
+                                "data": {"name": "foo", "input": "bad_json"},
+                            },
+                        ],
+                    }
+                ],
+            }
+        ]
+    )
+
     spans = fetch_ordered_spans()
     assert len(spans) == 3, (
         f"should have agent, generation, tool spans, got {len(spans)} with data: "
@@ -159,6 +258,43 @@ async def test_multiple_handoff_doesnt_error():
     traces = fetch_traces()
     assert len(traces) == 1, f"Expected 1 trace, got {len(traces)}"
 
+    assert fetch_normalized_spans() == snapshot(
+        [
+            {
+                "workflow_name": "Agent workflow",
+                "children": [
+                    {
+                        "type": "agent",
+                        "data": {
+                            "name": "test",
+                            "handoffs": ["test", "test"],
+                            "tools": ["some_function"],
+                            "output_type": "str",
+                        },
+                        "children": [
+                            {"type": "generation"},
+                            {
+                                "type": "function",
+                                "data": {
+                                    "name": "some_function",
+                                    "input": '{"a": "b"}',
+                                    "output": "result",
+                                },
+                            },
+                            {"type": "generation"},
+                            {"type": "handoff", "data": {"from_agent": "test", "to_agent": "test"}},
+                        ],
+                    },
+                    {
+                        "type": "agent",
+                        "data": {"name": "test", "handoffs": [], "tools": [], "output_type": "str"},
+                        "children": [{"type": "generation"}],
+                    },
+                ],
+            }
+        ]
+    )
+
     spans = fetch_ordered_spans()
     assert len(spans) == 7, (
         f"should have 2 agent, 1 function, 3 generation, 1 handoff, got {len(spans)} with data: "
@@ -193,6 +329,21 @@ async def test_multiple_final_output_doesnt_error():
     traces = fetch_traces()
     assert len(traces) == 1, f"Expected 1 trace, got {len(traces)}"
 
+    assert fetch_normalized_spans() == snapshot(
+        [
+            {
+                "workflow_name": "Agent workflow",
+                "children": [
+                    {
+                        "type": "agent",
+                        "data": {"name": "test", "handoffs": [], "tools": [], "output_type": "Foo"},
+                        "children": [{"type": "generation"}],
+                    }
+                ],
+            }
+        ]
+    )
+
     spans = fetch_ordered_spans()
     assert len(spans) == 2, (
         f"should have 1 agent, 1 generation, got {len(spans)} with data: "
@@ -251,6 +402,76 @@ async def test_handoffs_lead_to_correct_agent_spans():
     traces = fetch_traces()
     assert len(traces) == 1, f"Expected 1 trace, got {len(traces)}"
 
+    assert fetch_normalized_spans() == snapshot(
+        [
+            {
+                "workflow_name": "Agent workflow",
+                "children": [
+                    {
+                        "type": "agent",
+                        "data": {
+                            "name": "test_agent_3",
+                            "handoffs": ["test_agent_1", "test_agent_2"],
+                            "tools": ["some_function"],
+                            "output_type": "str",
+                        },
+                        "children": [
+                            {"type": "generation"},
+                            {
+                                "type": "function",
+                                "data": {
+                                    "name": "some_function",
+                                    "input": '{"a": "b"}',
+                                    "output": "result",
+                                },
+                            },
+                            {"type": "generation"},
+                            {
+                                "type": "handoff",
+                                "data": {"from_agent": "test_agent_3", "to_agent": "test_agent_1"},
+                            },
+                        ],
+                    },
+                    {
+                        "type": "agent",
+                        "data": {
+                            "name": "test_agent_1",
+                            "handoffs": ["test_agent_3"],
+                            "tools": ["some_function"],
+                            "output_type": "str",
+                        },
+                        "children": [
+                            {"type": "generation"},
+                            {
+                                "type": "function",
+                                "data": {
+                                    "name": "some_function",
+                                    "input": '{"a": "b"}',
+                                    "output": "result",
+                                },
+                            },
+                            {"type": "generation"},
+                            {
+                                "type": "handoff",
+                                "data": {"from_agent": "test_agent_1", "to_agent": "test_agent_3"},
+                            },
+                        ],
+                    },
+                    {
+                        "type": "agent",
+                        "data": {
+                            "name": "test_agent_3",
+                            "handoffs": ["test_agent_1", "test_agent_2"],
+                            "tools": ["some_function"],
+                            "output_type": "str",
+                        },
+                        "children": [{"type": "generation"}],
+                    },
+                ],
+            }
+        ]
+    )
+
     spans = fetch_ordered_spans()
     assert len(spans) == 12, (
         f"should have 3 agents, 2 function, 5 generation, 2 handoff, got {len(spans)} with data: "
@@ -285,6 +506,38 @@ async def test_max_turns_exceeded():
     traces = fetch_traces()
     assert len(traces) == 1, f"Expected 1 trace, got {len(traces)}"
 
+    assert fetch_normalized_spans() == snapshot(
+        [
+            {
+                "workflow_name": "Agent workflow",
+                "children": [
+                    {
+                        "type": "agent",
+                        "error": {"message": "Max turns exceeded", "data": {"max_turns": 2}},
+                        "data": {
+                            "name": "test",
+                            "handoffs": [],
+                            "tools": ["foo"],
+                            "output_type": "Foo",
+                        },
+                        "children": [
+                            {"type": "generation"},
+                            {
+                                "type": "function",
+                                "data": {"name": "foo", "input": "", "output": "result"},
+                            },
+                            {"type": "generation"},
+                            {
+                                "type": "function",
+                                "data": {"name": "foo", "input": "", "output": "result"},
+                            },
+                        ],
+                    }
+                ],
+            }
+        ]
+    )
+
     spans = fetch_ordered_spans()
     assert len(spans) == 5, (
         f"should have 1 agent span, 2 generations, 2 function calls, got "
@@ -318,6 +571,30 @@ async def test_guardrail_error():
     traces = fetch_traces()
     assert len(traces) == 1, f"Expected 1 trace, got {len(traces)}"
 
+    assert fetch_normalized_spans() == snapshot(
+        [
+            {
+                "workflow_name": "Agent workflow",
+                "children": [
+                    {
+                        "type": "agent",
+                        "error": {
+                            "message": "Guardrail tripwire triggered",
+                            "data": {"guardrail": "guardrail_function"},
+                        },
+                        "data": {"name": "test", "handoffs": [], "tools": [], "output_type": "str"},
+                        "children": [
+                            {
+                                "type": "guardrail",
+                                "data": {"name": "guardrail_function", "triggered": True},
+                            }
+                        ],
+                    }
+                ],
+            }
+        ]
+    )
+
     spans = fetch_ordered_spans()
     assert len(spans) == 2, (
         f"should have 1 agent, 1 guardrail, got {len(spans)} with data: "
diff --git a/tests/test_tracing_errors_streamed.py b/tests/test_tracing_errors_streamed.py
index 00f440e..74cda2d 100644
--- a/tests/test_tracing_errors_streamed.py
+++ b/tests/test_tracing_errors_streamed.py
@@ -5,6 +5,7 @@ import json
 from typing import Any
 
 import pytest
+from inline_snapshot import snapshot
 from typing_extensions import TypedDict
 
 from agents import (
@@ -32,7 +33,7 @@ from .test_responses import (
     get_handoff_tool_call,
     get_text_message,
 )
-from .testing_processor import fetch_ordered_spans, fetch_traces
+from .testing_processor import fetch_normalized_spans, fetch_ordered_spans, fetch_traces
 
 
 @pytest.mark.asyncio
@@ -52,6 +53,35 @@ async def test_single_turn_model_error():
     traces = fetch_traces()
     assert len(traces) == 1, f"Expected 1 trace, got {len(traces)}"
 
+    assert fetch_normalized_spans() == snapshot(
+        [
+            {
+                "workflow_name": "Agent workflow",
+                "children": [
+                    {
+                        "type": "agent",
+                        "error": {"message": "Error in agent run", "data": {"error": "test error"}},
+                        "data": {
+                            "name": "test_agent",
+                            "handoffs": [],
+                            "tools": [],
+                            "output_type": "str",
+                        },
+                        "children": [
+                            {
+                                "type": "generation",
+                                "error": {
+                                    "message": "Error",
+                                    "data": {"name": "ValueError", "message": "test error"},
+                                },
+                            }
+                        ],
+                    }
+                ],
+            }
+        ]
+    )
+
     spans = fetch_ordered_spans()
     assert len(spans) == 2, f"should have agent and generation spans, got {len(spans)}"
 
@@ -89,6 +119,44 @@ async def test_multi_turn_no_handoffs():
     traces = fetch_traces()
     assert len(traces) == 1, f"Expected 1 trace, got {len(traces)}"
 
+    assert fetch_normalized_spans() == snapshot(
+        [
+            {
+                "workflow_name": "Agent workflow",
+                "children": [
+                    {
+                        "type": "agent",
+                        "error": {"message": "Error in agent run", "data": {"error": "test error"}},
+                        "data": {
+                            "name": "test_agent",
+                            "handoffs": [],
+                            "tools": ["foo"],
+                            "output_type": "str",
+                        },
+                        "children": [
+                            {"type": "generation"},
+                            {
+                                "type": "function",
+                                "data": {
+                                    "name": "foo",
+                                    "input": '{"a": "b"}',
+                                    "output": "tool_result",
+                                },
+                            },
+                            {
+                                "type": "generation",
+                                "error": {
+                                    "message": "Error",
+                                    "data": {"name": "ValueError", "message": "test error"},
+                                },
+                            },
+                        ],
+                    }
+                ],
+            }
+        ]
+    )
+
     spans = fetch_ordered_spans()
     assert len(spans) == 4, (
         f"should have agent, generation, tool, generation, got {len(spans)} with data: "
@@ -121,6 +189,43 @@ async def test_tool_call_error():
     traces = fetch_traces()
     assert len(traces) == 1, f"Expected 1 trace, got {len(traces)}"
 
+    assert fetch_normalized_spans() == snapshot(
+        [
+            {
+                "workflow_name": "Agent workflow",
+                "children": [
+                    {
+                        "type": "agent",
+                        "error": {
+                            "message": "Error in agent run",
+                            "data": {"error": "Invalid JSON input for tool foo: bad_json"},
+                        },
+                        "data": {
+                            "name": "test_agent",
+                            "handoffs": [],
+                            "tools": ["foo"],
+                            "output_type": "str",
+                        },
+                        "children": [
+                            {"type": "generation"},
+                            {
+                                "type": "function",
+                                "error": {
+                                    "message": "Error running tool",
+                                    "data": {
+                                        "tool_name": "foo",
+                                        "error": "Invalid JSON input for tool foo: bad_json",
+                                    },
+                                },
+                                "data": {"name": "foo", "input": "bad_json"},
+                            },
+                        ],
+                    }
+                ],
+            }
+        ]
+    )
+
     spans = fetch_ordered_spans()
     assert len(spans) == 3, (
         f"should have agent, generation, tool spans, got {len(spans)} with data: "
@@ -173,6 +278,43 @@ async def test_multiple_handoff_doesnt_error():
     traces = fetch_traces()
     assert len(traces) == 1, f"Expected 1 trace, got {len(traces)}"
 
+    assert fetch_normalized_spans() == snapshot(
+        [
+            {
+                "workflow_name": "Agent workflow",
+                "children": [
+                    {
+                        "type": "agent",
+                        "data": {
+                            "name": "test",
+                            "handoffs": ["test", "test"],
+                            "tools": ["some_function"],
+                            "output_type": "str",
+                        },
+                        "children": [
+                            {"type": "generation"},
+                            {
+                                "type": "function",
+                                "data": {
+                                    "name": "some_function",
+                                    "input": '{"a": "b"}',
+                                    "output": "result",
+                                },
+                            },
+                            {"type": "generation"},
+                            {"type": "handoff", "data": {"from_agent": "test", "to_agent": "test"}},
+                        ],
+                    },
+                    {
+                        "type": "agent",
+                        "data": {"name": "test", "handoffs": [], "tools": [], "output_type": "str"},
+                        "children": [{"type": "generation"}],
+                    },
+                ],
+            }
+        ]
+    )
+
     spans = fetch_ordered_spans()
     assert len(spans) == 7, (
         f"should have 2 agent, 1 function, 3 generation, 1 handoff, got {len(spans)} with data: "
@@ -211,6 +353,21 @@ async def test_multiple_final_output_no_error():
     traces = fetch_traces()
     assert len(traces) == 1, f"Expected 1 trace, got {len(traces)}"
 
+    assert fetch_normalized_spans() == snapshot(
+        [
+            {
+                "workflow_name": "Agent workflow",
+                "children": [
+                    {
+                        "type": "agent",
+                        "data": {"name": "test", "handoffs": [], "tools": [], "output_type": "Foo"},
+                        "children": [{"type": "generation"}],
+                    }
+                ],
+            }
+        ]
+    )
+
     spans = fetch_ordered_spans()
     assert len(spans) == 2, (
         f"should have 1 agent, 1 generation, got {len(spans)} with data: "
@@ -271,12 +428,152 @@ async def test_handoffs_lead_to_correct_agent_spans():
     traces = fetch_traces()
     assert len(traces) == 1, f"Expected 1 trace, got {len(traces)}"
 
+    assert fetch_normalized_spans() == snapshot(
+        [
+            {
+                "workflow_name": "Agent workflow",
+                "children": [
+                    {
+                        "type": "agent",
+                        "data": {
+                            "name": "test_agent_3",
+                            "handoffs": ["test_agent_1", "test_agent_2"],
+                            "tools": ["some_function"],
+                            "output_type": "str",
+                        },
+                        "children": [
+                            {"type": "generation"},
+                            {
+                                "type": "function",
+                                "data": {
+                                    "name": "some_function",
+                                    "input": '{"a": "b"}',
+                                    "output": "result",
+                                },
+                            },
+                            {"type": "generation"},
+                            {
+                                "type": "handoff",
+                                "data": {"from_agent": "test_agent_3", "to_agent": "test_agent_1"},
+                            },
+                        ],
+                    },
+                    {
+                        "type": "agent",
+                        "data": {
+                            "name": "test_agent_1",
+                            "handoffs": ["test_agent_3"],
+                            "tools": ["some_function"],
+                            "output_type": "str",
+                        },
+                        "children": [
+                            {"type": "generation"},
+                            {
+                                "type": "function",
+                                "data": {
+                                    "name": "some_function",
+                                    "input": '{"a": "b"}',
+                                    "output": "result",
+                                },
+                            },
+                            {"type": "generation"},
+                            {
+                                "type": "handoff",
+                                "data": {"from_agent": "test_agent_1", "to_agent": "test_agent_3"},
+                            },
+                        ],
+                    },
+                    {
+                        "type": "agent",
+                        "data": {
+                            "name": "test_agent_3",
+                            "handoffs": ["test_agent_1", "test_agent_2"],
+                            "tools": ["some_function"],
+                            "output_type": "str",
+                        },
+                        "children": [{"type": "generation"}],
+                    },
+                ],
+            }
+        ]
+    )
+
     spans = fetch_ordered_spans()
     assert len(spans) == 12, (
         f"should have 3 agents, 2 function, 5 generation, 2 handoff, got {len(spans)} with data: "
         f"{[x.span_data for x in spans]}"
     )
 
+    assert fetch_normalized_spans() == snapshot(
+        [
+            {
+                "workflow_name": "Agent workflow",
+                "children": [
+                    {
+                        "type": "agent",
+                        "data": {
+                            "name": "test_agent_3",
+                            "handoffs": ["test_agent_1", "test_agent_2"],
+                            "tools": ["some_function"],
+                            "output_type": "str",
+                        },
+                        "children": [
+                            {"type": "generation"},
+                            {
+                                "type": "function",
+                                "data": {
+                                    "name": "some_function",
+                                    "input": '{"a": "b"}',
+                                    "output": "result",
+                                },
+                            },
+                            {"type": "generation"},
+                            {
+                                "type": "handoff",
+                                "data": {"from_agent": "test_agent_3", "to_agent": "test_agent_1"},
+                            },
+                        ],
+                    },
+                    {
+                        "type": "agent",
+                        "data": {
+                            "name": "test_agent_1",
+                            "handoffs": ["test_agent_3"],
+                            "tools": ["some_function"],
+                            "output_type": "str",
+                        },
+                        "children": [
+                            {"type": "generation"},
+                            {
+                                "type": "function",
+                                "data": {
+                                    "name": "some_function",
+                                    "input": '{"a": "b"}',
+                                    "output": "result",
+                                },
+                            },
+                            {"type": "generation"},
+                            {
+                                "type": "handoff",
+                                "data": {"from_agent": "test_agent_1", "to_agent": "test_agent_3"},
+                            },
+                        ],
+                    },
+                    {
+                        "type": "agent",
+                        "data": {
+                            "name": "test_agent_3",
+                            "handoffs": ["test_agent_1", "test_agent_2"],
+                            "tools": ["some_function"],
+                            "output_type": "str",
+                        },
+                        "children": [{"type": "generation"}],
+                    },
+                ],
+            }
+        ]
+    )
+
 
 @pytest.mark.asyncio
 async def test_max_turns_exceeded():
@@ -307,6 +604,38 @@ async def test_max_turns_exceeded():
     traces = fetch_traces()
     assert len(traces) == 1, f"Expected 1 trace, got {len(traces)}"
 
+    assert fetch_normalized_spans() == snapshot(
+        [
+            {
+                "workflow_name": "Agent workflow",
+                "children": [
+                    {
+                        "type": "agent",
+                        "error": {"message": "Max turns exceeded", "data": {"max_turns": 2}},
+                        "data": {
+                            "name": "test",
+                            "handoffs": [],
+                            "tools": ["foo"],
+                            "output_type": "Foo",
+                        },
+                        "children": [
+                            {"type": "generation"},
+                            {
+                                "type": "function",
+                                "data": {"name": "foo", "input": "", "output": "result"},
+                            },
+                            {"type": "generation"},
+                            {
+                                "type": "function",
+                                "data": {"name": "foo", "input": "", "output": "result"},
+                            },
+                        ],
+                    }
+                ],
+            }
+        ]
+    )
+
     spans = fetch_ordered_spans()
     assert len(spans) == 5, (
         f"should have 1 agent, 2 generations, 2 function calls, got "
@@ -347,6 +676,33 @@ async def test_input_guardrail_error():
     traces = fetch_traces()
     assert len(traces) == 1, f"Expected 1 trace, got {len(traces)}"
 
+    assert fetch_normalized_spans() == snapshot(
+        [
+            {
+                "workflow_name": "Agent workflow",
+                "children": [
+                    {
+                        "type": "agent",
+                        "error": {
+                            "message": "Guardrail tripwire triggered",
+                            "data": {
+                                "guardrail": "input_guardrail_function",
+                                "type": "input_guardrail",
+                            },
+                        },
+                        "data": {"name": "test", "handoffs": [], "tools": [], "output_type": "str"},
+                        "children": [
+                            {
+                                "type": "guardrail",
+                                "data": {"name": "input_guardrail_function", "triggered": True},
+                            }
+                        ],
+                    }
+                ],
+            }
+        ]
+    )
+
     spans = fetch_ordered_spans()
     assert len(spans) == 2, (
         f"should have 1 agent, 1 guardrail, got {len(spans)} with data: "
@@ -387,6 +743,30 @@ async def test_output_guardrail_error():
     traces = fetch_traces()
     assert len(traces) == 1, f"Expected 1 trace, got {len(traces)}"
 
+    assert fetch_normalized_spans() == snapshot(
+        [
+            {
+                "workflow_name": "Agent workflow",
+                "children": [
+                    {
+                        "type": "agent",
+                        "error": {
+                            "message": "Guardrail tripwire triggered",
+                            "data": {"guardrail": "output_guardrail_function"},
+                        },
+                        "data": {"name": "test", "handoffs": [], "tools": [], "output_type": "str"},
+                        "children": [
+                            {
+                                "type": "guardrail",
+                                "data": {"name": "output_guardrail_function", "triggered": True},
+                            }
+                        ],
+                    }
+                ],
+            }
+        ]
+    )
+
     spans = fetch_ordered_spans()
     assert len(spans) == 2, (
         f"should have 1 agent, 1 guardrail, got {len(spans)} with data: "
diff --git a/tests/testing_processor.py b/tests/testing_processor.py
index 258a08d..e5cb6f5 100644
--- a/tests/testing_processor.py
+++ b/tests/testing_processor.py
@@ -1,6 +1,7 @@
 from __future__ import annotations
 
 import threading
+from datetime import datetime
 from typing import Any, Literal
 
 from agents.tracing import Span, Trace, TracingProcessor
@@ -77,3 +78,35 @@ def fetch_traces() -> list[Trace]:
 
 def fetch_events() -> list[TestSpanProcessorEvent]:
     return SPAN_PROCESSOR_TESTING._events
+
+
+def fetch_normalized_spans():
+    nodes: dict[tuple[str, str | None], dict[str, Any]] = {}
+    traces = []
+    for trace_obj in fetch_traces():
+        trace = trace_obj.export()
+        assert trace.pop("object") == "trace"
+        assert trace.pop("id").startswith("trace_")
+        trace = {k: v for k, v in trace.items() if v is not None}
+        nodes[(trace_obj.trace_id, None)] = trace
+        traces.append(trace)
+
+    if not traces:
+        assert not fetch_ordered_spans()
+
+    for span_obj in fetch_ordered_spans():
+        span = span_obj.export()
+        assert span.pop("object") == "trace.span"
+        assert span.pop("id").startswith("span_")
+        assert datetime.fromisoformat(span.pop("started_at"))
+        assert datetime.fromisoformat(span.pop("ended_at"))
+        parent_id = span.pop("parent_id")
+        assert "type" not in span
+        span_data = span.pop("span_data")
+        span = {"type": span_data.pop("type")} | {k: v for k, v in span.items() if v is not None}
+        span_data = {k: v for k, v in span_data.items() if v is not None}
+        if span_data:
+            span["data"] = span_data
+        nodes[(span_obj.trace_id, span_obj.span_id)] = span
+        nodes[(span.pop("trace_id"), parent_id)].setdefault("children", []).append(span)
+    return traces
diff --git a/uv.lock b/uv.lock
index 2bceea7..fd28b2b 100644
--- a/uv.lock
+++ b/uv.lock
@@ -26,6 +26,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/46/eb/e7f063ad1fec6b3178a3cd82d1a3c4de82cccf283fc42746168188e1cdd5/anyio-4.8.0-py3-none-any.whl", hash = "sha256:b5011f270ab5eb0abf13385f851315585cc37ef330dd88e27ec3d34d651fd47a", size = 96041 },
 ]
 
+[[package]]
+name = "asttokens"
+version = "3.0.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/4a/e7/82da0a03e7ba5141f05cce0d302e6eed121ae055e0456ca228bf693984bc/asttokens-3.0.0.tar.gz", hash = "sha256:0dcd8baa8d62b0c1d118b399b2ddba3c4aff271d0d7a9e0d4c1681c79035bbc7", size = 61978 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/25/8a/c46dcc25341b5bce5472c718902eb3d38600a903b14fa6aeecef3f21a46f/asttokens-3.0.0-py3-none-any.whl", hash = "sha256:e3078351a059199dd5138cb1c706e6430c05eff2ff136af5eb4790f9d28932e2", size = 26918 },
+]
+
 [[package]]
 name = "babel"
 version = "2.17.0"
@@ -240,6 +249,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/02/cc/b7e31358aac6ed1ef2bb790a9746ac2c69bcb3c8588b41616914eb106eaf/exceptiongroup-1.2.2-py3-none-any.whl", hash = "sha256:3111b9d131c238bec2f8f516e123e14ba243563fb135d3fe885990585aa7795b", size = 16453 },
 ]
 
+[[package]]
+name = "executing"
+version = "2.2.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/91/50/a9d80c47ff289c611ff12e63f7c5d13942c65d68125160cefd768c73e6e4/executing-2.2.0.tar.gz", hash = "sha256:5d108c028108fe2551d1a7b2e8b713341e2cb4fc0aa7dcf966fa4327a5226755", size = 978693 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/7b/8f/c4d9bafc34ad7ad5d8dc16dd1347ee0e507a52c3adb6bfa8887e1c6a26ba/executing-2.2.0-py2.py3-none-any.whl", hash = "sha256:11387150cad388d62750327a53d3339fad4888b39a6fe233c3afbb54ecffd3aa", size = 26702 },
+]
+
 [[package]]
 name = "ghp-import"
 version = "2.1.0"
@@ -392,6 +410,21 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/ef/a6/62565a6e1cf69e10f5727360368e451d4b7f58beeac6173dc9db836a5b46/iniconfig-2.0.0-py3-none-any.whl", hash = "sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374", size = 5892 },
 ]
 
+[[package]]
+name = "inline-snapshot"
+version = "0.20.5"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "asttokens" },
+    { name = "executing" },
+    { name = "rich" },
+    { name = "tomli", marker = "python_full_version < '3.11'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/3b/95/9b85a63031c168dd1c479f8cfd5cae42d42d6ac41c18dd760a104bc87ddc/inline_snapshot-0.20.5.tar.gz", hash = "sha256:d8b67c6d533c0a3f566e72608144b54da65dc3da5d0dba4169b2c56b75530fb5", size = 92215 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/d4/71/34e775bbf0bcf81d588d80a1df93437f937b0df9a841f246606a03fc5eff/inline_snapshot-0.20.5-py3-none-any.whl", hash = "sha256:3aa56acf5985d89f17ebd4df4aef00faacc49f10cdf4e6b42be701ffc9702b5a", size = 48071 },
+]
+
 [[package]]
 name = "jinja2"
 version = "3.1.6"
@@ -797,6 +830,7 @@ dependencies = [
 [package.dev-dependencies]
 dev = [
     { name = "coverage" },
+    { name = "inline-snapshot" },
     { name = "mkdocs" },
     { name = "mkdocs-material" },
     { name = "mkdocstrings", extra = ["python"] },
@@ -822,6 +856,7 @@ requires-dist = [
 [package.metadata.requires-dev]
 dev = [
     { name = "coverage", specifier = ">=7.6.12" },
+    { name = "inline-snapshot", specifier = ">=0.20.5" },
     { name = "mkdocs", specifier = ">=1.6.0" },
     { name = "mkdocs-material", specifier = ">=9.6.0" },
     { name = "mkdocstrings", extras = ["python"], specifier = ">=0.28.0" },

From 26828e5e6834300c32177c560acba24bd50f48d8 Mon Sep 17 00:00:00 2001
From: Carlos Souza <caike@users.noreply.github.com>
Date: Thu, 13 Mar 2025 16:18:40 -0400
Subject: [PATCH 03/16] Fix typo on Agent documentation

Argument name is not description but handoff_description
---
 src/agents/agent.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/agents/agent.py b/src/agents/agent.py
index 61c0a89..eb39164 100644
--- a/src/agents/agent.py
+++ b/src/agents/agent.py
@@ -27,7 +27,7 @@ class Agent(Generic[TContext]):
     """An agent is an AI model configured with instructions, tools, guardrails, handoffs and more.
 
     We strongly recommend passing `instructions`, which is the "system prompt" for the agent. In
-    addition, you can pass `description`, which is a human-readable description of the agent, used
+    addition, you can pass `handoff_description`, which is a human-readable description of the agent, used
     when the agent is used inside tools/handoffs.
 
     Agents are generic on the context type. The context is a (mutable) object you create. It is

From 792cdea4648f10e3457649bd817dc049f19d0424 Mon Sep 17 00:00:00 2001
From: Kento Yamanaka <kento.ymk@gmail.com>
Date: Thu, 13 Mar 2025 18:26:49 -0700
Subject: [PATCH 04/16] fix: use first_agent instead of second_agent for a task
 to generate random number

---
 examples/handoffs/message_filter.py           | 4 ++--
 examples/handoffs/message_filter_streaming.py | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/examples/handoffs/message_filter.py b/examples/handoffs/message_filter.py
index 9dd56ef..b7fed6c 100644
--- a/examples/handoffs/message_filter.py
+++ b/examples/handoffs/message_filter.py
@@ -60,9 +60,9 @@ async def main():
 
         print("Step 1 done")
 
-        # 2. Ask it to square a number
+        # 2. Ask it to generate a number
         result = await Runner.run(
-            second_agent,
+            first_agent,
             input=result.to_input_list()
             + [{"content": "Can you generate a random number between 0 and 100?", "role": "user"}],
         )
diff --git a/examples/handoffs/message_filter_streaming.py b/examples/handoffs/message_filter_streaming.py
index 8d1b420..63cb1de 100644
--- a/examples/handoffs/message_filter_streaming.py
+++ b/examples/handoffs/message_filter_streaming.py
@@ -60,9 +60,9 @@ async def main():
 
         print("Step 1 done")
 
-        # 2. Ask it to square a number
+        # 2. Ask it to generate a number
         result = await Runner.run(
-            second_agent,
+            first_agent,
             input=result.to_input_list()
             + [{"content": "Can you generate a random number between 0 and 100?", "role": "user"}],
         )

From 8540b1e65b5cc0ccbf994a2868932fbe62e7da79 Mon Sep 17 00:00:00 2001
From: CCM <ccmien@gmail.com>
Date: Fri, 14 Mar 2025 19:14:26 +0800
Subject: [PATCH 05/16] fix typo in agent_lifecycle_example.py

---
 examples/basic/agent_lifecycle_example.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/basic/agent_lifecycle_example.py b/examples/basic/agent_lifecycle_example.py
index bc0bbe4..29bb18c 100644
--- a/examples/basic/agent_lifecycle_example.py
+++ b/examples/basic/agent_lifecycle_example.py
@@ -74,7 +74,7 @@ multiply_agent = Agent(
 
 start_agent = Agent(
     name="Start Agent",
-    instructions="Generate a random number. If it's even, stop. If it's odd, hand off to the multipler agent.",
+    instructions="Generate a random number. If it's even, stop. If it's odd, hand off to the multiply agent.",
     tools=[random_number],
     output_type=FinalResult,
     handoffs=[multiply_agent],

From f0ef7d71ebe4d6a1c122eb46a4292f4b8103b5a3 Mon Sep 17 00:00:00 2001
From: Alexander Song <axiomofjoy@gmail.com>
Date: Fri, 14 Mar 2025 17:50:10 -0700
Subject: [PATCH 06/16] docs: add arize-phoenix to tracing documentation

---
 docs/tracing.md | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/docs/tracing.md b/docs/tracing.md
index d7d0a65..9831ac0 100644
--- a/docs/tracing.md
+++ b/docs/tracing.md
@@ -90,8 +90,9 @@ To customize this default setup, to send traces to alternative or additional bac
 
 External trace processors include:
 
+-   [Arize-Phoenix](https://docs.arize.com/phoenix/tracing/integrations-tracing/openai-agents-sdk)
 -   [Braintrust](https://braintrust.dev/docs/guides/traces/integrations#openai-agents-sdk)
 -   [Pydantic Logfire](https://logfire.pydantic.dev/docs/integrations/llms/openai/#openai-agents)
 -   [AgentOps](https://docs.agentops.ai/v1/integrations/agentssdk)
--   [Scorecard](https://docs.scorecard.io/docs/documentation/features/tracing#openai-agents-sdk-integration))
+-   [Scorecard](https://docs.scorecard.io/docs/documentation/features/tracing#openai-agents-sdk-integration)
 -   [Keywords AI](https://docs.keywordsai.co/integration/development-frameworks/openai-agent)

From 09d70c074daf210fbb1a3acd31bc2ac048f9ba26 Mon Sep 17 00:00:00 2001
From: Rohan Mehta <rm@openai.com>
Date: Sun, 16 Mar 2025 18:48:45 -0400
Subject: [PATCH 07/16] utils directory

---
 examples/basic/hello_world_jupyter.py |  2 +-
 src/agents/_run_impl.py               | 24 +++++------
 src/agents/_utils.py                  | 61 ---------------------------
 src/agents/agent.py                   |  6 +--
 src/agents/agent_output.py            |  8 ++--
 src/agents/guardrail.py               |  2 +-
 src/agents/handoffs.py                |  8 ++--
 src/agents/run.py                     | 20 ++++-----
 src/agents/tool.py                    |  7 +--
 src/agents/util/__init__.py           |  0
 src/agents/util/_coro.py              |  2 +
 src/agents/util/_error_tracing.py     | 16 +++++++
 src/agents/util/_json.py              | 31 ++++++++++++++
 src/agents/util/_transforms.py        | 11 +++++
 src/agents/util/_types.py             |  7 +++
 tests/test_function_tool_decorator.py |  3 +-
 tests/test_output_tool.py             |  6 ++-
 17 files changed, 111 insertions(+), 103 deletions(-)
 delete mode 100644 src/agents/_utils.py
 create mode 100644 src/agents/util/__init__.py
 create mode 100644 src/agents/util/_coro.py
 create mode 100644 src/agents/util/_error_tracing.py
 create mode 100644 src/agents/util/_json.py
 create mode 100644 src/agents/util/_transforms.py
 create mode 100644 src/agents/util/_types.py

diff --git a/examples/basic/hello_world_jupyter.py b/examples/basic/hello_world_jupyter.py
index bb8f14c..c929a7c 100644
--- a/examples/basic/hello_world_jupyter.py
+++ b/examples/basic/hello_world_jupyter.py
@@ -3,7 +3,7 @@ from agents import Agent, Runner
 agent = Agent(name="Assistant", instructions="You are a helpful assistant")
 
 # Intended for Jupyter notebooks where there's an existing event loop
-result = await Runner.run(agent, "Write a haiku about recursion in programming.") # type: ignore[top-level-await]  # noqa: F704
+result = await Runner.run(agent, "Write a haiku about recursion in programming.")  # type: ignore[top-level-await]  # noqa: F704
 print(result.final_output)
 
 # Code within code loops,
diff --git a/src/agents/_run_impl.py b/src/agents/_run_impl.py
index 2c84950..c0c0ebd 100644
--- a/src/agents/_run_impl.py
+++ b/src/agents/_run_impl.py
@@ -25,7 +25,6 @@ from openai.types.responses.response_computer_tool_call import (
 from openai.types.responses.response_input_param import ComputerCallOutput
 from openai.types.responses.response_reasoning_item import ResponseReasoningItem
 
-from . import _utils
 from .agent import Agent
 from .agent_output import AgentOutputSchema
 from .computer import AsyncComputer, Computer
@@ -59,6 +58,7 @@ from .tracing import (
     handoff_span,
     trace,
 )
+from .util import _coro, _error_tracing
 
 if TYPE_CHECKING:
     from .run import RunConfig
@@ -293,7 +293,7 @@ class RunImpl:
             elif isinstance(output, ResponseComputerToolCall):
                 items.append(ToolCallItem(raw_item=output, agent=agent))
                 if not computer_tool:
-                    _utils.attach_error_to_current_span(
+                    _error_tracing.attach_error_to_current_span(
                         SpanError(
                             message="Computer tool not found",
                             data={},
@@ -324,7 +324,7 @@ class RunImpl:
             # Regular function tool call
             else:
                 if output.name not in function_map:
-                    _utils.attach_error_to_current_span(
+                    _error_tracing.attach_error_to_current_span(
                         SpanError(
                             message="Tool not found",
                             data={"tool_name": output.name},
@@ -368,7 +368,7 @@ class RunImpl:
                         (
                             agent.hooks.on_tool_start(context_wrapper, agent, func_tool)
                             if agent.hooks
-                            else _utils.noop_coroutine()
+                            else _coro.noop_coroutine()
                         ),
                         func_tool.on_invoke_tool(context_wrapper, tool_call.arguments),
                     )
@@ -378,11 +378,11 @@ class RunImpl:
                         (
                             agent.hooks.on_tool_end(context_wrapper, agent, func_tool, result)
                             if agent.hooks
-                            else _utils.noop_coroutine()
+                            else _coro.noop_coroutine()
                         ),
                     )
                 except Exception as e:
-                    _utils.attach_error_to_current_span(
+                    _error_tracing.attach_error_to_current_span(
                         SpanError(
                             message="Error running tool",
                             data={"tool_name": func_tool.name, "error": str(e)},
@@ -502,7 +502,7 @@ class RunImpl:
                         source=agent,
                     )
                     if agent.hooks
-                    else _utils.noop_coroutine()
+                    else _coro.noop_coroutine()
                 ),
             )
 
@@ -520,7 +520,7 @@ class RunImpl:
                     new_items=tuple(new_step_items),
                 )
                 if not callable(input_filter):
-                    _utils.attach_error_to_span(
+                    _error_tracing.attach_error_to_span(
                         span_handoff,
                         SpanError(
                             message="Invalid input filter",
@@ -530,7 +530,7 @@ class RunImpl:
                     raise UserError(f"Invalid input filter: {input_filter}")
                 filtered = input_filter(handoff_input_data)
                 if not isinstance(filtered, HandoffInputData):
-                    _utils.attach_error_to_span(
+                    _error_tracing.attach_error_to_span(
                         span_handoff,
                         SpanError(
                             message="Invalid input filter result",
@@ -591,7 +591,7 @@ class RunImpl:
             hooks.on_agent_end(context_wrapper, agent, final_output),
             agent.hooks.on_end(context_wrapper, agent, final_output)
             if agent.hooks
-            else _utils.noop_coroutine(),
+            else _coro.noop_coroutine(),
         )
 
     @classmethod
@@ -706,7 +706,7 @@ class ComputerAction:
             (
                 agent.hooks.on_tool_start(context_wrapper, agent, action.computer_tool)
                 if agent.hooks
-                else _utils.noop_coroutine()
+                else _coro.noop_coroutine()
             ),
             output_func,
         )
@@ -716,7 +716,7 @@ class ComputerAction:
             (
                 agent.hooks.on_tool_end(context_wrapper, agent, action.computer_tool, output)
                 if agent.hooks
-                else _utils.noop_coroutine()
+                else _coro.noop_coroutine()
             ),
         )
 
diff --git a/src/agents/_utils.py b/src/agents/_utils.py
deleted file mode 100644
index 2a0293a..0000000
--- a/src/agents/_utils.py
+++ /dev/null
@@ -1,61 +0,0 @@
-from __future__ import annotations
-
-import re
-from collections.abc import Awaitable
-from typing import Any, Literal, Union
-
-from pydantic import TypeAdapter, ValidationError
-from typing_extensions import TypeVar
-
-from .exceptions import ModelBehaviorError
-from .logger import logger
-from .tracing import Span, SpanError, get_current_span
-
-T = TypeVar("T")
-
-MaybeAwaitable = Union[Awaitable[T], T]
-
-
-def transform_string_function_style(name: str) -> str:
-    # Replace spaces with underscores
-    name = name.replace(" ", "_")
-
-    # Replace non-alphanumeric characters with underscores
-    name = re.sub(r"[^a-zA-Z0-9]", "_", name)
-
-    return name.lower()
-
-
-def validate_json(json_str: str, type_adapter: TypeAdapter[T], partial: bool) -> T:
-    partial_setting: bool | Literal["off", "on", "trailing-strings"] = (
-        "trailing-strings" if partial else False
-    )
-    try:
-        validated = type_adapter.validate_json(json_str, experimental_allow_partial=partial_setting)
-        return validated
-    except ValidationError as e:
-        attach_error_to_current_span(
-            SpanError(
-                message="Invalid JSON provided",
-                data={},
-            )
-        )
-        raise ModelBehaviorError(
-            f"Invalid JSON when parsing {json_str} for {type_adapter}; {e}"
-        ) from e
-
-
-def attach_error_to_span(span: Span[Any], error: SpanError) -> None:
-    span.set_error(error)
-
-
-def attach_error_to_current_span(error: SpanError) -> None:
-    span = get_current_span()
-    if span:
-        attach_error_to_span(span, error)
-    else:
-        logger.warning(f"No span to add error {error} to")
-
-
-async def noop_coroutine() -> None:
-    pass
diff --git a/src/agents/agent.py b/src/agents/agent.py
index 61c0a89..84d0ae9 100644
--- a/src/agents/agent.py
+++ b/src/agents/agent.py
@@ -6,8 +6,6 @@ from collections.abc import Awaitable
 from dataclasses import dataclass, field
 from typing import TYPE_CHECKING, Any, Callable, Generic, cast
 
-from . import _utils
-from ._utils import MaybeAwaitable
 from .guardrail import InputGuardrail, OutputGuardrail
 from .handoffs import Handoff
 from .items import ItemHelpers
@@ -16,6 +14,8 @@ from .model_settings import ModelSettings
 from .models.interface import Model
 from .run_context import RunContextWrapper, TContext
 from .tool import Tool, function_tool
+from .util import _transforms
+from .util._types import MaybeAwaitable
 
 if TYPE_CHECKING:
     from .lifecycle import AgentHooks
@@ -126,7 +126,7 @@ class Agent(Generic[TContext]):
         """
 
         @function_tool(
-            name_override=tool_name or _utils.transform_string_function_style(self.name),
+            name_override=tool_name or _transforms.transform_string_function_style(self.name),
             description_override=tool_description or "",
         )
         async def run_agent(context: RunContextWrapper, input: str) -> str:
diff --git a/src/agents/agent_output.py b/src/agents/agent_output.py
index 0c28800..3262c57 100644
--- a/src/agents/agent_output.py
+++ b/src/agents/agent_output.py
@@ -4,10 +4,10 @@ from typing import Any
 from pydantic import BaseModel, TypeAdapter
 from typing_extensions import TypedDict, get_args, get_origin
 
-from . import _utils
 from .exceptions import ModelBehaviorError, UserError
 from .strict_schema import ensure_strict_json_schema
 from .tracing import SpanError
+from .util import _error_tracing, _json
 
 _WRAPPER_DICT_KEY = "response"
 
@@ -87,10 +87,10 @@ class AgentOutputSchema:
         """Validate a JSON string against the output type. Returns the validated object, or raises
         a `ModelBehaviorError` if the JSON is invalid.
         """
-        validated = _utils.validate_json(json_str, self._type_adapter, partial)
+        validated = _json.validate_json(json_str, self._type_adapter, partial)
         if self._is_wrapped:
             if not isinstance(validated, dict):
-                _utils.attach_error_to_current_span(
+                _error_tracing.attach_error_to_current_span(
                     SpanError(
                         message="Invalid JSON",
                         data={"details": f"Expected a dict, got {type(validated)}"},
@@ -101,7 +101,7 @@ class AgentOutputSchema:
                 )
 
             if _WRAPPER_DICT_KEY not in validated:
-                _utils.attach_error_to_current_span(
+                _error_tracing.attach_error_to_current_span(
                     SpanError(
                         message="Invalid JSON",
                         data={"details": f"Could not find key {_WRAPPER_DICT_KEY} in JSON"},
diff --git a/src/agents/guardrail.py b/src/agents/guardrail.py
index 5bebcd6..a96f0f7 100644
--- a/src/agents/guardrail.py
+++ b/src/agents/guardrail.py
@@ -7,10 +7,10 @@ from typing import TYPE_CHECKING, Any, Callable, Generic, Union, overload
 
 from typing_extensions import TypeVar
 
-from ._utils import MaybeAwaitable
 from .exceptions import UserError
 from .items import TResponseInputItem
 from .run_context import RunContextWrapper, TContext
+from .util._types import MaybeAwaitable
 
 if TYPE_CHECKING:
     from .agent import Agent
diff --git a/src/agents/handoffs.py b/src/agents/handoffs.py
index ac15740..686191f 100644
--- a/src/agents/handoffs.py
+++ b/src/agents/handoffs.py
@@ -8,12 +8,12 @@ from typing import TYPE_CHECKING, Any, Callable, Generic, cast, overload
 from pydantic import TypeAdapter
 from typing_extensions import TypeAlias, TypeVar
 
-from . import _utils
 from .exceptions import ModelBehaviorError, UserError
 from .items import RunItem, TResponseInputItem
 from .run_context import RunContextWrapper, TContext
 from .strict_schema import ensure_strict_json_schema
 from .tracing.spans import SpanError
+from .util import _error_tracing, _json, _transforms
 
 if TYPE_CHECKING:
     from .agent import Agent
@@ -104,7 +104,7 @@ class Handoff(Generic[TContext]):
 
     @classmethod
     def default_tool_name(cls, agent: Agent[Any]) -> str:
-        return _utils.transform_string_function_style(f"transfer_to_{agent.name}")
+        return _transforms.transform_string_function_style(f"transfer_to_{agent.name}")
 
     @classmethod
     def default_tool_description(cls, agent: Agent[Any]) -> str:
@@ -192,7 +192,7 @@ def handoff(
     ) -> Agent[Any]:
         if input_type is not None and type_adapter is not None:
             if input_json is None:
-                _utils.attach_error_to_current_span(
+                _error_tracing.attach_error_to_current_span(
                     SpanError(
                         message="Handoff function expected non-null input, but got None",
                         data={"details": "input_json is None"},
@@ -200,7 +200,7 @@ def handoff(
                 )
                 raise ModelBehaviorError("Handoff function expected non-null input, but got None")
 
-            validated_input = _utils.validate_json(
+            validated_input = _json.validate_json(
                 json_str=input_json,
                 type_adapter=type_adapter,
                 partial=False,
diff --git a/src/agents/run.py b/src/agents/run.py
index dfff7e3..934400f 100644
--- a/src/agents/run.py
+++ b/src/agents/run.py
@@ -7,7 +7,6 @@ from typing import Any, cast
 
 from openai.types.responses import ResponseCompletedEvent
 
-from . import Model, _utils
 from ._run_impl import (
     NextStepFinalOutput,
     NextStepHandoff,
@@ -33,7 +32,7 @@ from .items import ItemHelpers, ModelResponse, RunItem, TResponseInputItem
 from .lifecycle import RunHooks
 from .logger import logger
 from .model_settings import ModelSettings
-from .models.interface import ModelProvider
+from .models.interface import Model, ModelProvider
 from .models.openai_provider import OpenAIProvider
 from .result import RunResult, RunResultStreaming
 from .run_context import RunContextWrapper, TContext
@@ -41,6 +40,7 @@ from .stream_events import AgentUpdatedStreamEvent, RawResponsesStreamEvent
 from .tracing import Span, SpanError, agent_span, get_current_trace, trace
 from .tracing.span_data import AgentSpanData
 from .usage import Usage
+from .util import _coro, _error_tracing
 
 DEFAULT_MAX_TURNS = 10
 
@@ -193,7 +193,7 @@ class Runner:
 
                     current_turn += 1
                     if current_turn > max_turns:
-                        _utils.attach_error_to_span(
+                        _error_tracing.attach_error_to_span(
                             current_span,
                             SpanError(
                                 message="Max turns exceeded",
@@ -447,7 +447,7 @@ class Runner:
             for done in asyncio.as_completed(guardrail_tasks):
                 result = await done
                 if result.output.tripwire_triggered:
-                    _utils.attach_error_to_span(
+                    _error_tracing.attach_error_to_span(
                         parent_span,
                         SpanError(
                             message="Guardrail tripwire triggered",
@@ -511,7 +511,7 @@ class Runner:
                 streamed_result.current_turn = current_turn
 
                 if current_turn > max_turns:
-                    _utils.attach_error_to_span(
+                    _error_tracing.attach_error_to_span(
                         current_span,
                         SpanError(
                             message="Max turns exceeded",
@@ -583,7 +583,7 @@ class Runner:
                         pass
                 except Exception as e:
                     if current_span:
-                        _utils.attach_error_to_span(
+                        _error_tracing.attach_error_to_span(
                             current_span,
                             SpanError(
                                 message="Error in agent run",
@@ -615,7 +615,7 @@ class Runner:
                 (
                     agent.hooks.on_start(context_wrapper, agent)
                     if agent.hooks
-                    else _utils.noop_coroutine()
+                    else _coro.noop_coroutine()
                 ),
             )
 
@@ -705,7 +705,7 @@ class Runner:
                 (
                     agent.hooks.on_start(context_wrapper, agent)
                     if agent.hooks
-                    else _utils.noop_coroutine()
+                    else _coro.noop_coroutine()
                 ),
             )
 
@@ -796,7 +796,7 @@ class Runner:
                 # Cancel all guardrail tasks if a tripwire is triggered.
                 for t in guardrail_tasks:
                     t.cancel()
-                _utils.attach_error_to_current_span(
+                _error_tracing.attach_error_to_current_span(
                     SpanError(
                         message="Guardrail tripwire triggered",
                         data={"guardrail": result.guardrail.get_name()},
@@ -834,7 +834,7 @@ class Runner:
                 # Cancel all guardrail tasks if a tripwire is triggered.
                 for t in guardrail_tasks:
                     t.cancel()
-                _utils.attach_error_to_current_span(
+                _error_tracing.attach_error_to_current_span(
                     SpanError(
                         message="Guardrail tripwire triggered",
                         data={"guardrail": result.guardrail.get_name()},
diff --git a/src/agents/tool.py b/src/agents/tool.py
index cbe8794..0baf2c0 100644
--- a/src/agents/tool.py
+++ b/src/agents/tool.py
@@ -11,14 +11,15 @@ from openai.types.responses.web_search_tool_param import UserLocation
 from pydantic import ValidationError
 from typing_extensions import Concatenate, ParamSpec
 
-from . import _debug, _utils
-from ._utils import MaybeAwaitable
+from . import _debug
 from .computer import AsyncComputer, Computer
 from .exceptions import ModelBehaviorError
 from .function_schema import DocstringStyle, function_schema
 from .logger import logger
 from .run_context import RunContextWrapper
 from .tracing import SpanError
+from .util import _error_tracing
+from .util._types import MaybeAwaitable
 
 ToolParams = ParamSpec("ToolParams")
 
@@ -263,7 +264,7 @@ def function_tool(
                 if inspect.isawaitable(result):
                     return await result
 
-                _utils.attach_error_to_current_span(
+                _error_tracing.attach_error_to_current_span(
                     SpanError(
                         message="Error running tool (non-fatal)",
                         data={
diff --git a/src/agents/util/__init__.py b/src/agents/util/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/src/agents/util/_coro.py b/src/agents/util/_coro.py
new file mode 100644
index 0000000..647ab86
--- /dev/null
+++ b/src/agents/util/_coro.py
@@ -0,0 +1,2 @@
+async def noop_coroutine() -> None:
+    pass
diff --git a/src/agents/util/_error_tracing.py b/src/agents/util/_error_tracing.py
new file mode 100644
index 0000000..09dbb1d
--- /dev/null
+++ b/src/agents/util/_error_tracing.py
@@ -0,0 +1,16 @@
+from typing import Any
+
+from ..logger import logger
+from ..tracing import Span, SpanError, get_current_span
+
+
+def attach_error_to_span(span: Span[Any], error: SpanError) -> None:
+    span.set_error(error)
+
+
+def attach_error_to_current_span(error: SpanError) -> None:
+    span = get_current_span()
+    if span:
+        attach_error_to_span(span, error)
+    else:
+        logger.warning(f"No span to add error {error} to")
diff --git a/src/agents/util/_json.py b/src/agents/util/_json.py
new file mode 100644
index 0000000..1e081f6
--- /dev/null
+++ b/src/agents/util/_json.py
@@ -0,0 +1,31 @@
+from __future__ import annotations
+
+from typing import Literal
+
+from pydantic import TypeAdapter, ValidationError
+from typing_extensions import TypeVar
+
+from ..exceptions import ModelBehaviorError
+from ..tracing import SpanError
+from ._error_tracing import attach_error_to_current_span
+
+T = TypeVar("T")
+
+
+def validate_json(json_str: str, type_adapter: TypeAdapter[T], partial: bool) -> T:
+    partial_setting: bool | Literal["off", "on", "trailing-strings"] = (
+        "trailing-strings" if partial else False
+    )
+    try:
+        validated = type_adapter.validate_json(json_str, experimental_allow_partial=partial_setting)
+        return validated
+    except ValidationError as e:
+        attach_error_to_current_span(
+            SpanError(
+                message="Invalid JSON provided",
+                data={},
+            )
+        )
+        raise ModelBehaviorError(
+            f"Invalid JSON when parsing {json_str} for {type_adapter}; {e}"
+        ) from e
diff --git a/src/agents/util/_transforms.py b/src/agents/util/_transforms.py
new file mode 100644
index 0000000..b303074
--- /dev/null
+++ b/src/agents/util/_transforms.py
@@ -0,0 +1,11 @@
+import re
+
+
+def transform_string_function_style(name: str) -> str:
+    # Replace spaces with underscores
+    name = name.replace(" ", "_")
+
+    # Replace non-alphanumeric characters with underscores
+    name = re.sub(r"[^a-zA-Z0-9]", "_", name)
+
+    return name.lower()
diff --git a/src/agents/util/_types.py b/src/agents/util/_types.py
new file mode 100644
index 0000000..8571a69
--- /dev/null
+++ b/src/agents/util/_types.py
@@ -0,0 +1,7 @@
+from collections.abc import Awaitable
+from typing import Union
+
+from typing_extensions import TypeVar
+
+T = TypeVar("T")
+MaybeAwaitable = Union[Awaitable[T], T]
diff --git a/tests/test_function_tool_decorator.py b/tests/test_function_tool_decorator.py
index b581660..f146ec7 100644
--- a/tests/test_function_tool_decorator.py
+++ b/tests/test_function_tool_decorator.py
@@ -175,12 +175,11 @@ def multiple_optional_params_function(
     return f"{x}_{y}_{z}"
 
 
-
 @pytest.mark.asyncio
 async def test_multiple_optional_params_function():
     tool = multiple_optional_params_function
 
-    input_data: dict[str,Any] = {}
+    input_data: dict[str, Any] = {}
     output = await tool.on_invoke_tool(ctx_wrapper(), json.dumps(input_data))
     assert output == "42_hello_no_z"
 
diff --git a/tests/test_output_tool.py b/tests/test_output_tool.py
index 31ac984..86c4b3b 100644
--- a/tests/test_output_tool.py
+++ b/tests/test_output_tool.py
@@ -4,8 +4,9 @@ import pytest
 from pydantic import BaseModel
 from typing_extensions import TypedDict
 
-from agents import Agent, AgentOutputSchema, ModelBehaviorError, Runner, UserError, _utils
+from agents import Agent, AgentOutputSchema, ModelBehaviorError, Runner, UserError
 from agents.agent_output import _WRAPPER_DICT_KEY
+from agents.util import _json
 
 
 def test_plain_text_output():
@@ -77,7 +78,7 @@ def test_bad_json_raises_error(mocker):
     output_schema = Runner._get_output_schema(agent)
     assert output_schema, "Should have an output tool config with a structured output type"
 
-    mock_validate_json = mocker.patch.object(_utils, "validate_json")
+    mock_validate_json = mocker.patch.object(_json, "validate_json")
     mock_validate_json.return_value = ["foo"]
 
     with pytest.raises(ModelBehaviorError):
@@ -111,3 +112,4 @@ def test_setting_strict_false_works():
     output_wrapper = AgentOutputSchema(output_type=Foo, strict_json_schema=False)
     assert not output_wrapper.strict_json_schema
     assert output_wrapper.json_schema() == Foo.model_json_schema()
+    assert output_wrapper.json_schema() == Foo.model_json_schema()

From 54a48a39673a3359208d9ba5ee550928348c1291 Mon Sep 17 00:00:00 2001
From: heartkilla <artem.official.by@gmail.com>
Date: Mon, 17 Mar 2025 14:56:43 +0900
Subject: [PATCH 08/16] fix reasoning order in examples

---
 examples/agent_patterns/input_guardrails.py | 2 +-
 examples/agent_patterns/llm_as_a_judge.py   | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/examples/agent_patterns/input_guardrails.py b/examples/agent_patterns/input_guardrails.py
index 8c8e182..1545355 100644
--- a/examples/agent_patterns/input_guardrails.py
+++ b/examples/agent_patterns/input_guardrails.py
@@ -30,8 +30,8 @@ If the guardrail trips, we'll respond with a refusal message.
 
 ### 1. An agent-based guardrail that is triggered if the user is asking to do math homework
 class MathHomeworkOutput(BaseModel):
-    is_math_homework: bool
     reasoning: str
+    is_math_homework: bool
 
 
 guardrail_agent = Agent(
diff --git a/examples/agent_patterns/llm_as_a_judge.py b/examples/agent_patterns/llm_as_a_judge.py
index d13a67c..5a46cc3 100644
--- a/examples/agent_patterns/llm_as_a_judge.py
+++ b/examples/agent_patterns/llm_as_a_judge.py
@@ -23,8 +23,8 @@ story_outline_generator = Agent(
 
 @dataclass
 class EvaluationFeedback:
-    score: Literal["pass", "needs_improvement", "fail"]
     feedback: str
+    score: Literal["pass", "needs_improvement", "fail"]
 
 
 evaluator = Agent[None](

From 1f58528f1c905c7f4a602078173a64674abc4ebc Mon Sep 17 00:00:00 2001
From: Vincenzo Domina <54762917+vincenzodomina@users.noreply.github.com>
Date: Mon, 17 Mar 2025 10:37:43 +0100
Subject: [PATCH 09/16] Add TracingProcessor export to  __init__.py

---
 src/agents/__init__.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/agents/__init__.py b/src/agents/__init__.py
index a2d7f24..21a2f2a 100644
--- a/src/agents/__init__.py
+++ b/src/agents/__init__.py
@@ -73,6 +73,7 @@ from .tracing import (
     SpanData,
     SpanError,
     Trace,
+    TracingProcessor,
     add_trace_processor,
     agent_span,
     custom_span,
@@ -208,6 +209,7 @@ __all__ = [
     "set_tracing_disabled",
     "trace",
     "Trace",
+    "TracingProcessor",
     "SpanError",
     "Span",
     "SpanData",

From 64e263b61433193fd9d9121b0f35a7ccebb1991c Mon Sep 17 00:00:00 2001
From: Rohan Mehta <rm@openai.com>
Date: Mon, 17 Mar 2025 11:11:39 -0400
Subject: [PATCH 10/16] Pretty print result classes

---
 Makefile                         |   8 ++
 pyproject.toml                   |   6 +-
 src/agents/result.py             |   7 ++
 src/agents/util/_pretty_print.py |  56 +++++++++
 tests/README.md                  |  25 ++++
 tests/test_pretty_print.py       | 201 +++++++++++++++++++++++++++++++
 uv.lock                          |  36 ++++++
 7 files changed, 338 insertions(+), 1 deletion(-)
 create mode 100644 src/agents/util/_pretty_print.py
 create mode 100644 tests/README.md
 create mode 100644 tests/test_pretty_print.py

diff --git a/Makefile b/Makefile
index 7dd9bbd..39899d8 100644
--- a/Makefile
+++ b/Makefile
@@ -18,6 +18,14 @@ mypy:
 tests: 
 	uv run pytest 
 
+.PHONY: snapshots-fix
+snapshots-fix: 
+	uv run pytest --inline-snapshot=fix 
+
+.PHONY: snapshots-create 
+snapshots-create: 
+	uv run pytest --inline-snapshot=create 
+
 .PHONY: old_version_tests
 old_version_tests: 
 	UV_PROJECT_ENVIRONMENT=.venv_39 uv run --python 3.9 -m pytest
diff --git a/pyproject.toml b/pyproject.toml
index 8184a67..3ad1d37 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -47,6 +47,7 @@ dev = [
     "mkdocstrings[python]>=0.28.0",
     "coverage>=7.6.12",
     "playwright==1.50.0",
+    "inline-snapshot>=0.20.7",
 ]
 [tool.uv.workspace]
 members = ["agents"]
@@ -116,4 +117,7 @@ filterwarnings = [
 ]
 markers = [
     "allow_call_model_methods: mark test as allowing calls to real model implementations",
-]
\ No newline at end of file
+]
+
+[tool.inline-snapshot]
+format-command="ruff format --stdin-filename {filename}"
\ No newline at end of file
diff --git a/src/agents/result.py b/src/agents/result.py
index 6e806b7..40a6480 100644
--- a/src/agents/result.py
+++ b/src/agents/result.py
@@ -17,6 +17,7 @@ from .items import ItemHelpers, ModelResponse, RunItem, TResponseInputItem
 from .logger import logger
 from .stream_events import StreamEvent
 from .tracing import Trace
+from .util._pretty_print import pretty_print_result, pretty_print_run_result_streaming
 
 if TYPE_CHECKING:
     from ._run_impl import QueueCompleteSentinel
@@ -89,6 +90,9 @@ class RunResult(RunResultBase):
         """The last agent that was run."""
         return self._last_agent
 
+    def __str__(self) -> str:
+        return pretty_print_result(self)
+
 
 @dataclass
 class RunResultStreaming(RunResultBase):
@@ -216,3 +220,6 @@ class RunResultStreaming(RunResultBase):
 
         if self._output_guardrails_task and not self._output_guardrails_task.done():
             self._output_guardrails_task.cancel()
+
+    def __str__(self) -> str:
+        return pretty_print_run_result_streaming(self)
diff --git a/src/agents/util/_pretty_print.py b/src/agents/util/_pretty_print.py
new file mode 100644
index 0000000..afd3e2b
--- /dev/null
+++ b/src/agents/util/_pretty_print.py
@@ -0,0 +1,56 @@
+from typing import TYPE_CHECKING
+
+from pydantic import BaseModel
+
+if TYPE_CHECKING:
+    from ..result import RunResult, RunResultBase, RunResultStreaming
+
+
+def _indent(text: str, indent_level: int) -> str:
+    indent_string = "  " * indent_level
+    return "\n".join(f"{indent_string}{line}" for line in text.splitlines())
+
+
+def _final_output_str(result: "RunResultBase") -> str:
+    if result.final_output is None:
+        return "None"
+    elif isinstance(result.final_output, str):
+        return result.final_output
+    elif isinstance(result.final_output, BaseModel):
+        return result.final_output.model_dump_json(indent=2)
+    else:
+        return str(result.final_output)
+
+
+def pretty_print_result(result: "RunResult") -> str:
+    output = "RunResult:"
+    output += f'\n- Last agent: Agent(name="{result.last_agent.name}", ...)'
+    output += (
+        f"\n- Final output ({type(result.final_output).__name__}):\n"
+        f"{_indent(_final_output_str(result), 2)}"
+    )
+    output += f"\n- {len(result.new_items)} new item(s)"
+    output += f"\n- {len(result.raw_responses)} raw response(s)"
+    output += f"\n- {len(result.input_guardrail_results)} input guardrail result(s)"
+    output += f"\n- {len(result.output_guardrail_results)} output guardrail result(s)"
+    output += "\n(See `RunResult` for more details)"
+
+    return output
+
+
+def pretty_print_run_result_streaming(result: "RunResultStreaming") -> str:
+    output = "RunResultStreaming:"
+    output += f'\n- Current agent: Agent(name="{result.current_agent.name}", ...)'
+    output += f"\n- Current turn: {result.current_turn}"
+    output += f"\n- Max turns: {result.max_turns}"
+    output += f"\n- Is complete: {result.is_complete}"
+    output += (
+        f"\n- Final output ({type(result.final_output).__name__}):\n"
+        f"{_indent(_final_output_str(result), 2)}"
+    )
+    output += f"\n- {len(result.new_items)} new item(s)"
+    output += f"\n- {len(result.raw_responses)} raw response(s)"
+    output += f"\n- {len(result.input_guardrail_results)} input guardrail result(s)"
+    output += f"\n- {len(result.output_guardrail_results)} output guardrail result(s)"
+    output += "\n(See `RunResultStreaming` for more details)"
+    return output
diff --git a/tests/README.md b/tests/README.md
new file mode 100644
index 0000000..d68e067
--- /dev/null
+++ b/tests/README.md
@@ -0,0 +1,25 @@
+# Tests
+
+Before running any tests, make sure you have `uv` installed (and ideally run `make sync` after).
+
+## Running tests
+
+```
+make tests
+```
+
+## Snapshots
+
+We use [inline-snapshots](https://15r10nk.github.io/inline-snapshot/latest/) for some tests. If your code adds new snapshot tests or breaks existing ones, you can fix/create them. After fixing/creating snapshots, run `make tests` again to verify the tests pass.
+
+### Fixing snapshots
+
+```
+make snapshots-fix
+```
+
+### Creating snapshots
+
+```
+make snapshots-update
+```
diff --git a/tests/test_pretty_print.py b/tests/test_pretty_print.py
new file mode 100644
index 0000000..b2218a2
--- /dev/null
+++ b/tests/test_pretty_print.py
@@ -0,0 +1,201 @@
+import json
+
+import pytest
+from inline_snapshot import snapshot
+from pydantic import BaseModel
+
+from agents import Agent, Runner
+from agents.agent_output import _WRAPPER_DICT_KEY
+from agents.util._pretty_print import pretty_print_result, pretty_print_run_result_streaming
+from tests.fake_model import FakeModel
+
+from .test_responses import get_final_output_message, get_text_message
+
+
+@pytest.mark.asyncio
+async def test_pretty_result():
+    model = FakeModel()
+    model.set_next_output([get_text_message("Hi there")])
+
+    agent = Agent(name="test_agent", model=model)
+    result = await Runner.run(agent, input="Hello")
+
+    assert pretty_print_result(result) == snapshot("""\
+RunResult:
+- Last agent: Agent(name="test_agent", ...)
+- Final output (str):
+    Hi there
+- 1 new item(s)
+- 1 raw response(s)
+- 0 input guardrail result(s)
+- 0 output guardrail result(s)
+(See `RunResult` for more details)\
+""")
+
+
+@pytest.mark.asyncio
+async def test_pretty_run_result_streaming():
+    model = FakeModel()
+    model.set_next_output([get_text_message("Hi there")])
+
+    agent = Agent(name="test_agent", model=model)
+    result = Runner.run_streamed(agent, input="Hello")
+    async for _ in result.stream_events():
+        pass
+
+    assert pretty_print_run_result_streaming(result) == snapshot("""\
+RunResultStreaming:
+- Current agent: Agent(name="test_agent", ...)
+- Current turn: 1
+- Max turns: 10
+- Is complete: True
+- Final output (str):
+    Hi there
+- 1 new item(s)
+- 1 raw response(s)
+- 0 input guardrail result(s)
+- 0 output guardrail result(s)
+(See `RunResultStreaming` for more details)\
+""")
+
+
+class Foo(BaseModel):
+    bar: str
+
+
+@pytest.mark.asyncio
+async def test_pretty_run_result_structured_output():
+    model = FakeModel()
+    model.set_next_output(
+        [
+            get_text_message("Test"),
+            get_final_output_message(Foo(bar="Hi there").model_dump_json()),
+        ]
+    )
+
+    agent = Agent(name="test_agent", model=model, output_type=Foo)
+    result = await Runner.run(agent, input="Hello")
+
+    assert pretty_print_result(result) == snapshot("""\
+RunResult:
+- Last agent: Agent(name="test_agent", ...)
+- Final output (Foo):
+    {
+      "bar": "Hi there"
+    }
+- 2 new item(s)
+- 1 raw response(s)
+- 0 input guardrail result(s)
+- 0 output guardrail result(s)
+(See `RunResult` for more details)\
+""")
+
+
+@pytest.mark.asyncio
+async def test_pretty_run_result_streaming_structured_output():
+    model = FakeModel()
+    model.set_next_output(
+        [
+            get_text_message("Test"),
+            get_final_output_message(Foo(bar="Hi there").model_dump_json()),
+        ]
+    )
+
+    agent = Agent(name="test_agent", model=model, output_type=Foo)
+    result = Runner.run_streamed(agent, input="Hello")
+
+    async for _ in result.stream_events():
+        pass
+
+    assert pretty_print_run_result_streaming(result) == snapshot("""\
+RunResultStreaming:
+- Current agent: Agent(name="test_agent", ...)
+- Current turn: 1
+- Max turns: 10
+- Is complete: True
+- Final output (Foo):
+    {
+      "bar": "Hi there"
+    }
+- 2 new item(s)
+- 1 raw response(s)
+- 0 input guardrail result(s)
+- 0 output guardrail result(s)
+(See `RunResultStreaming` for more details)\
+""")
+
+
+@pytest.mark.asyncio
+async def test_pretty_run_result_list_structured_output():
+    model = FakeModel()
+    model.set_next_output(
+        [
+            get_text_message("Test"),
+            get_final_output_message(
+                json.dumps(
+                    {
+                        _WRAPPER_DICT_KEY: [
+                            Foo(bar="Hi there").model_dump(),
+                            Foo(bar="Hi there 2").model_dump(),
+                        ]
+                    }
+                )
+            ),
+        ]
+    )
+
+    agent = Agent(name="test_agent", model=model, output_type=list[Foo])
+    result = await Runner.run(agent, input="Hello")
+
+    assert pretty_print_result(result) == snapshot("""\
+RunResult:
+- Last agent: Agent(name="test_agent", ...)
+- Final output (list):
+    [Foo(bar='Hi there'), Foo(bar='Hi there 2')]
+- 2 new item(s)
+- 1 raw response(s)
+- 0 input guardrail result(s)
+- 0 output guardrail result(s)
+(See `RunResult` for more details)\
+""")
+
+
+@pytest.mark.asyncio
+async def test_pretty_run_result_streaming_list_structured_output():
+    model = FakeModel()
+    model.set_next_output(
+        [
+            get_text_message("Test"),
+            get_final_output_message(
+                json.dumps(
+                    {
+                        _WRAPPER_DICT_KEY: [
+                            Foo(bar="Test").model_dump(),
+                            Foo(bar="Test 2").model_dump(),
+                        ]
+                    }
+                )
+            ),
+        ]
+    )
+
+    agent = Agent(name="test_agent", model=model, output_type=list[Foo])
+    result = Runner.run_streamed(agent, input="Hello")
+
+    async for _ in result.stream_events():
+        pass
+
+    assert pretty_print_run_result_streaming(result) == snapshot("""\
+RunResultStreaming:
+- Current agent: Agent(name="test_agent", ...)
+- Current turn: 1
+- Max turns: 10
+- Is complete: True
+- Final output (list):
+    [Foo(bar='Test'), Foo(bar='Test 2')]
+- 2 new item(s)
+- 1 raw response(s)
+- 0 input guardrail result(s)
+- 0 output guardrail result(s)
+(See `RunResultStreaming` for more details)\
+""")
diff --git a/uv.lock b/uv.lock
index c3af99b..2c2e05b 100644
--- a/uv.lock
+++ b/uv.lock
@@ -1,4 +1,5 @@
 version = 1
+revision = 1
 requires-python = ">=3.9"
 
 [[package]]
@@ -25,6 +26,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/46/eb/e7f063ad1fec6b3178a3cd82d1a3c4de82cccf283fc42746168188e1cdd5/anyio-4.8.0-py3-none-any.whl", hash = "sha256:b5011f270ab5eb0abf13385f851315585cc37ef330dd88e27ec3d34d651fd47a", size = 96041 },
 ]
 
+[[package]]
+name = "asttokens"
+version = "3.0.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/4a/e7/82da0a03e7ba5141f05cce0d302e6eed121ae055e0456ca228bf693984bc/asttokens-3.0.0.tar.gz", hash = "sha256:0dcd8baa8d62b0c1d118b399b2ddba3c4aff271d0d7a9e0d4c1681c79035bbc7", size = 61978 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/25/8a/c46dcc25341b5bce5472c718902eb3d38600a903b14fa6aeecef3f21a46f/asttokens-3.0.0-py3-none-any.whl", hash = "sha256:e3078351a059199dd5138cb1c706e6430c05eff2ff136af5eb4790f9d28932e2", size = 26918 },
+]
+
 [[package]]
 name = "babel"
 version = "2.17.0"
@@ -239,6 +249,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/02/cc/b7e31358aac6ed1ef2bb790a9746ac2c69bcb3c8588b41616914eb106eaf/exceptiongroup-1.2.2-py3-none-any.whl", hash = "sha256:3111b9d131c238bec2f8f516e123e14ba243563fb135d3fe885990585aa7795b", size = 16453 },
 ]
 
+[[package]]
+name = "executing"
+version = "2.2.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/91/50/a9d80c47ff289c611ff12e63f7c5d13942c65d68125160cefd768c73e6e4/executing-2.2.0.tar.gz", hash = "sha256:5d108c028108fe2551d1a7b2e8b713341e2cb4fc0aa7dcf966fa4327a5226755", size = 978693 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/7b/8f/c4d9bafc34ad7ad5d8dc16dd1347ee0e507a52c3adb6bfa8887e1c6a26ba/executing-2.2.0-py2.py3-none-any.whl", hash = "sha256:11387150cad388d62750327a53d3339fad4888b39a6fe233c3afbb54ecffd3aa", size = 26702 },
+]
+
 [[package]]
 name = "ghp-import"
 version = "2.1.0"
@@ -391,6 +410,21 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/ef/a6/62565a6e1cf69e10f5727360368e451d4b7f58beeac6173dc9db836a5b46/iniconfig-2.0.0-py3-none-any.whl", hash = "sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374", size = 5892 },
 ]
 
+[[package]]
+name = "inline-snapshot"
+version = "0.20.7"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "asttokens" },
+    { name = "executing" },
+    { name = "rich" },
+    { name = "tomli", marker = "python_full_version < '3.11'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/b0/41/9bd2ecd10ef789e8aff6fb68dcc7677dc31b33b2d27c306c0d40fc982fbc/inline_snapshot-0.20.7.tar.gz", hash = "sha256:d55bbb6254d0727dc304729ca7998cde1c1e984c4bf50281514aa9d727a56cf2", size = 92643 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/01/8f/1bf23da63ad1a0b14ca2d9114700123ef76732e375548f4f9ca94052817e/inline_snapshot-0.20.7-py3-none-any.whl", hash = "sha256:2df6dd8710d1f0def2c1f9d6c25fd03d7beba01f3addf52fc370343d9ee9959f", size = 48108 },
+]
+
 [[package]]
 name = "jinja2"
 version = "3.1.6"
@@ -796,6 +830,7 @@ dependencies = [
 [package.dev-dependencies]
 dev = [
     { name = "coverage" },
+    { name = "inline-snapshot" },
     { name = "mkdocs" },
     { name = "mkdocs-material" },
     { name = "mkdocstrings", extra = ["python"] },
@@ -821,6 +856,7 @@ requires-dist = [
 [package.metadata.requires-dev]
 dev = [
     { name = "coverage", specifier = ">=7.6.12" },
+    { name = "inline-snapshot", specifier = ">=0.20.7" },
     { name = "mkdocs", specifier = ">=1.6.0" },
     { name = "mkdocs-material", specifier = ">=9.6.0" },
     { name = "mkdocstrings", extras = ["python"], specifier = ">=0.28.0" },

From 4ebf0742f11b8c12968ed22bec91f44ae2e3e421 Mon Sep 17 00:00:00 2001
From: Bagatur <baskaryan@gmail.com>
Date: Mon, 17 Mar 2025 08:47:28 -0700
Subject: [PATCH 11/16] docs: List LangSmith tracing integration

---
 docs/tracing.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docs/tracing.md b/docs/tracing.md
index d7d0a65..5d7477e 100644
--- a/docs/tracing.md
+++ b/docs/tracing.md
@@ -95,3 +95,4 @@ External trace processors include:
 -   [AgentOps](https://docs.agentops.ai/v1/integrations/agentssdk)
 -   [Scorecard](https://docs.scorecard.io/docs/documentation/features/tracing#openai-agents-sdk-integration))
 -   [Keywords AI](https://docs.keywordsai.co/integration/development-frameworks/openai-agent)
+-   [LangSmith](https://docs.smith.langchain.com/observability/how_to_guides/trace_with_openai_agents_sdk)

From 370a748bcc824a65688759edb9c62141698e7e96 Mon Sep 17 00:00:00 2001
From: James Hills <70035505+jhills20@users.noreply.github.com>
Date: Mon, 17 Mar 2025 09:13:22 -0700
Subject: [PATCH 12/16] Update tracing.md

Add callout for ZDR intracing
---
 docs/tracing.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/docs/tracing.md b/docs/tracing.md
index d7d0a65..622145d 100644
--- a/docs/tracing.md
+++ b/docs/tracing.md
@@ -9,6 +9,8 @@ The Agents SDK includes built-in tracing, collecting a comprehensive record of e
     1. You can globally disable tracing by setting the env var `OPENAI_AGENTS_DISABLE_TRACING=1`
     2. You can disable tracing for a single run by setting [`agents.run.RunConfig.tracing_disabled`][] to `True`
 
+***For organizations operating under a Zero Data Retention (ZDR) policy using OpenAI's APIs, tracing is unavailable.***
+
 ## Traces and spans
 
 -   **Traces** represent a single end-to-end operation of a "workflow". They're composed of Spans. Traces have the following properties:

From 1368e7ffe6a0148fc42c346df0f1a261dab2dc72 Mon Sep 17 00:00:00 2001
From: Rohan Mehta <rm@openai.com>
Date: Mon, 17 Mar 2025 14:55:54 -0400
Subject: [PATCH 13/16] Update tracing docs

---
 README.md       | 2 +-
 docs/tracing.md | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/README.md b/README.md
index 210f6f4..51ca3c6 100644
--- a/README.md
+++ b/README.md
@@ -142,7 +142,7 @@ The Agents SDK is designed to be highly flexible, allowing you to model a wide r
 
 ## Tracing
 
-The Agents SDK automatically traces your agent runs, making it easy to track and debug the behavior of your agents. Tracing is extensible by design, supporting custom spans and a wide variety of external destinations, including [Logfire](https://logfire.pydantic.dev/docs/integrations/llms/openai/#openai-agents), [AgentOps](https://docs.agentops.ai/v1/integrations/agentssdk), [Braintrust](https://braintrust.dev/docs/guides/traces/integrations#openai-agents-sdk), [Scorecard](https://docs.scorecard.io/docs/documentation/features/tracing#openai-agents-sdk-integration), and [Keywords AI](https://docs.keywordsai.co/integration/development-frameworks/openai-agent). For more details about how to customize or disable tracing, see [Tracing](http://openai.github.io/openai-agents-python/tracing).
+The Agents SDK automatically traces your agent runs, making it easy to track and debug the behavior of your agents. Tracing is extensible by design, supporting custom spans and a wide variety of external destinations, including [Logfire](https://logfire.pydantic.dev/docs/integrations/llms/openai/#openai-agents), [AgentOps](https://docs.agentops.ai/v1/integrations/agentssdk), [Braintrust](https://braintrust.dev/docs/guides/traces/integrations#openai-agents-sdk), [Scorecard](https://docs.scorecard.io/docs/documentation/features/tracing#openai-agents-sdk-integration), and [Keywords AI](https://docs.keywordsai.co/integration/development-frameworks/openai-agent). For more details about how to customize or disable tracing, see [Tracing](http://openai.github.io/openai-agents-python/tracing), which also includes a larger list of [external tracing processors](http://openai.github.io/openai-agents-python/tracing/#external-tracing-processors-list).
 
 ## Development (only needed if you need to edit the SDK/examples)
 
diff --git a/docs/tracing.md b/docs/tracing.md
index d7d0a65..372a41a 100644
--- a/docs/tracing.md
+++ b/docs/tracing.md
@@ -88,10 +88,10 @@ To customize this default setup, to send traces to alternative or additional bac
 1. [`add_trace_processor()`][agents.tracing.add_trace_processor] lets you add an **additional** trace processor that will receive traces and spans as they are ready. This lets you do your own processing in addition to sending traces to OpenAI's backend.
 2. [`set_trace_processors()`][agents.tracing.set_trace_processors] lets you **replace** the default processors with your own trace processors. This means traces will not be sent to the OpenAI backend unless you include a `TracingProcessor` that does so.
 
-External trace processors include:
+## External tracing processors list
 
 -   [Braintrust](https://braintrust.dev/docs/guides/traces/integrations#openai-agents-sdk)
 -   [Pydantic Logfire](https://logfire.pydantic.dev/docs/integrations/llms/openai/#openai-agents)
 -   [AgentOps](https://docs.agentops.ai/v1/integrations/agentssdk)
--   [Scorecard](https://docs.scorecard.io/docs/documentation/features/tracing#openai-agents-sdk-integration))
+-   [Scorecard](https://docs.scorecard.io/docs/documentation/features/tracing#openai-agents-sdk-integration)
 -   [Keywords AI](https://docs.keywordsai.co/integration/development-frameworks/openai-agent)

From 64150c8aeab62d8e65e6d3d8a0fab62857139c5c Mon Sep 17 00:00:00 2001
From: Carlos Souza <carloshrsouza@gmail.com>
Date: Mon, 17 Mar 2025 16:14:28 -0400
Subject: [PATCH 14/16] Fix lint

---
 src/agents/agent.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/agents/agent.py b/src/agents/agent.py
index eb39164..f8bce7f 100644
--- a/src/agents/agent.py
+++ b/src/agents/agent.py
@@ -27,8 +27,8 @@ class Agent(Generic[TContext]):
     """An agent is an AI model configured with instructions, tools, guardrails, handoffs and more.
 
     We strongly recommend passing `instructions`, which is the "system prompt" for the agent. In
-    addition, you can pass `handoff_description`, which is a human-readable description of the agent, used
-    when the agent is used inside tools/handoffs.
+    addition, you can pass `handoff_description`, which is a human-readable description of the
+    agent, used when the agent is used inside tools/handoffs.
 
     Agents are generic on the context type. The context is a (mutable) object you create. It is
     passed to tool functions, handoffs, guardrails, etc.

From 7eb2bcee15b8077c4ce002df59af4a44de2b62d8 Mon Sep 17 00:00:00 2001
From: Alex Hall <alex.mojaki@gmail.com>
Date: Mon, 17 Mar 2025 23:56:42 +0200
Subject: [PATCH 15/16] mypy

---
 tests/testing_processor.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tests/testing_processor.py b/tests/testing_processor.py
index e5cb6f5..371ea86 100644
--- a/tests/testing_processor.py
+++ b/tests/testing_processor.py
@@ -85,6 +85,7 @@ def fetch_normalized_spans():
     traces = []
     for trace_obj in fetch_traces():
         trace = trace_obj.export()
+        assert trace
         assert trace.pop("object") == "trace"
         assert trace.pop("id").startswith("trace_")
         trace = {k: v for k, v in trace.items() if v is not None}
@@ -96,6 +97,7 @@ def fetch_normalized_spans():
 
     for span_obj in fetch_ordered_spans():
         span = span_obj.export()
+        assert span
         assert span.pop("object") == "trace.span"
         assert span.pop("id").startswith("span_")
         assert datetime.fromisoformat(span.pop("started_at"))

From a43cf1542b4cfbb7ee00ebf33aa7d49e716a7cba Mon Sep 17 00:00:00 2001
From: Dmitry Pimenov <dmitry@openai.com>
Date: Mon, 17 Mar 2025 15:05:15 -0700
Subject: [PATCH 16/16] clarifying that handoffs are a type of tool call under
 the hood

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 51ca3c6..fc98b2b 100644
--- a/README.md
+++ b/README.md
@@ -7,7 +7,7 @@ The OpenAI Agents SDK is a lightweight yet powerful framework for building multi
 ### Core concepts:
 
 1. [**Agents**](https://openai.github.io/openai-agents-python/agents): LLMs configured with instructions, tools, guardrails, and handoffs
-2. [**Handoffs**](https://openai.github.io/openai-agents-python/handoffs/): Allow agents to transfer control to other agents for specific tasks
+2. [**Handoffs**](https://openai.github.io/openai-agents-python/handoffs/): A specialized tool call used by the Agents SDK for transferring control between agents
 3. [**Guardrails**](https://openai.github.io/openai-agents-python/guardrails/): Configurable safety checks for input and output validation
 4. [**Tracing**](https://openai.github.io/openai-agents-python/tracing/): Built-in tracking of agent runs, allowing you to view, debug and optimize your workflows