Remove redundant weaker tracing assertions

This commit is contained in:
Alex Hall 2025-03-20 13:49:38 +02:00
parent cef3d5357c
commit 7031d4ab87
4 changed files with 2 additions and 310 deletions

View file

@ -23,9 +23,6 @@ async def test_single_run_is_single_trace():
await Runner.run(agent, input="first_test")
traces = fetch_traces()
assert len(traces) == 1, f"Expected 1 trace, got {len(traces)}"
assert fetch_normalized_spans() == snapshot(
[
{
@ -45,12 +42,6 @@ async def test_single_run_is_single_trace():
]
)
spans = fetch_ordered_spans()
assert len(spans) == 1, (
f"Got {len(spans)}, but expected 1: the agent span. data:"
f"{[span.span_data for span in spans]}"
)
@pytest.mark.asyncio
async def test_multiple_runs_are_multiple_traces():
@ -69,9 +60,6 @@ async def test_multiple_runs_are_multiple_traces():
await Runner.run(agent, input="first_test")
await Runner.run(agent, input="second_test")
traces = fetch_traces()
assert len(traces) == 2, f"Expected 2 traces, got {len(traces)}"
assert fetch_normalized_spans() == snapshot(
[
{
@ -105,9 +93,6 @@ async def test_multiple_runs_are_multiple_traces():
]
)
spans = fetch_ordered_spans()
assert len(spans) == 2, f"Got {len(spans)}, but expected 2: agent span per run"
@pytest.mark.asyncio
async def test_wrapped_trace_is_single_trace():
@ -129,9 +114,6 @@ async def test_wrapped_trace_is_single_trace():
await Runner.run(agent, input="second_test")
await Runner.run(agent, input="third_test")
traces = fetch_traces()
assert len(traces) == 1, f"Expected 1 trace, got {len(traces)}"
assert fetch_normalized_spans() == snapshot(
[
{
@ -169,9 +151,6 @@ async def test_wrapped_trace_is_single_trace():
]
)
spans = fetch_ordered_spans()
assert len(spans) == 3, f"Got {len(spans)}, but expected 3: the agent span per run"
@pytest.mark.asyncio
async def test_parent_disabled_trace_disabled_agent_trace():
@ -185,15 +164,8 @@ async def test_parent_disabled_trace_disabled_agent_trace():
await Runner.run(agent, input="first_test")
traces = fetch_traces()
assert len(traces) == 0, f"Expected 0 traces, got {len(traces)}"
assert fetch_normalized_spans() == snapshot([])
spans = fetch_ordered_spans()
assert len(spans) == 0, (
f"Expected no spans, got {len(spans)}, with {[x.span_data for x in spans]}"
)
@pytest.mark.asyncio
async def test_manual_disabling_works():
@ -206,13 +178,8 @@ async def test_manual_disabling_works():
await Runner.run(agent, input="first_test", run_config=RunConfig(tracing_disabled=True))
traces = fetch_traces()
assert len(traces) == 0, f"Expected 0 traces, got {len(traces)}"
assert fetch_normalized_spans() == snapshot([])
spans = fetch_ordered_spans()
assert len(spans) == 0, f"Got {len(spans)}, but expected no spans"
@pytest.mark.asyncio
async def test_trace_config_works():
@ -255,9 +222,6 @@ async def test_not_starting_streaming_creates_trace():
break
await asyncio.sleep(0.1)
traces = fetch_traces()
assert len(traces) == 1, f"Expected 1 trace, got {len(traces)}"
assert fetch_normalized_spans() == snapshot(
[
{
@ -277,9 +241,6 @@ async def test_not_starting_streaming_creates_trace():
]
)
spans = fetch_ordered_spans()
assert len(spans) == 1, f"Got {len(spans)}, but expected 1: the agent span"
# Await the stream to avoid warnings about it not being awaited
async for _ in result.stream_events():
pass

View file

@ -64,13 +64,6 @@ async def test_get_response_creates_trace(monkeypatch):
]
)
spans = fetch_ordered_spans()
assert len(spans) == 1
assert isinstance(spans[0].span_data, ResponseSpanData)
assert spans[0].span_data.response is not None
assert spans[0].span_data.response.id == "dummy-id"
@pytest.mark.allow_call_model_methods
@pytest.mark.asyncio
@ -164,12 +157,6 @@ async def test_stream_response_creates_trace(monkeypatch):
]
)
spans = fetch_ordered_spans()
assert len(spans) == 1
assert isinstance(spans[0].span_data, ResponseSpanData)
assert spans[0].span_data.response is not None
assert spans[0].span_data.response.id == "dummy-id-123"
@pytest.mark.allow_call_model_methods
@pytest.mark.asyncio

View file

@ -18,7 +18,6 @@ from agents import (
Runner,
TResponseInputItem,
)
from agents.tracing import AgentSpanData, FunctionSpanData, GenerationSpanData
from .fake_model import FakeModel
from .test_responses import (
@ -28,7 +27,7 @@ from .test_responses import (
get_handoff_tool_call,
get_text_message,
)
from .testing_processor import fetch_normalized_spans, fetch_ordered_spans, fetch_traces
from .testing_processor import fetch_normalized_spans
@pytest.mark.asyncio
@ -43,9 +42,6 @@ async def test_single_turn_model_error():
with pytest.raises(ValueError):
await Runner.run(agent, input="first_test")
traces = fetch_traces()
assert len(traces) == 1, f"Expected 1 trace, got {len(traces)}"
assert fetch_normalized_spans() == snapshot(
[
{
@ -74,13 +70,6 @@ async def test_single_turn_model_error():
]
)
spans = fetch_ordered_spans()
assert len(spans) == 2, f"should have agent and generation spans, got {len(spans)}"
generation_span = spans[1]
assert isinstance(generation_span.span_data, GenerationSpanData)
assert generation_span.error, "should have error"
@pytest.mark.asyncio
async def test_multi_turn_no_handoffs():
@ -106,9 +95,6 @@ async def test_multi_turn_no_handoffs():
with pytest.raises(ValueError):
await Runner.run(agent, input="first_test")
traces = fetch_traces()
assert len(traces) == 1, f"Expected 1 trace, got {len(traces)}"
assert fetch_normalized_spans() == snapshot(
[
{
@ -146,15 +132,6 @@ async def test_multi_turn_no_handoffs():
]
)
spans = fetch_ordered_spans()
assert len(spans) == 4, (
f"should have agent, generation, tool, generation, got {len(spans)} with data: "
f"{[x.span_data for x in spans]}"
)
last_generation_span = [x for x in spans if isinstance(x.span_data, GenerationSpanData)][-1]
assert last_generation_span.error, "should have error"
@pytest.mark.asyncio
async def test_tool_call_error():
@ -173,9 +150,6 @@ async def test_tool_call_error():
with pytest.raises(ModelBehaviorError):
await Runner.run(agent, input="first_test")
traces = fetch_traces()
assert len(traces) == 1, f"Expected 1 trace, got {len(traces)}"
assert fetch_normalized_spans() == snapshot(
[
{
@ -209,15 +183,6 @@ async def test_tool_call_error():
]
)
spans = fetch_ordered_spans()
assert len(spans) == 3, (
f"should have agent, generation, tool spans, got {len(spans)} with data: "
f"{[x.span_data for x in spans]}"
)
function_span = [x for x in spans if isinstance(x.span_data, FunctionSpanData)][0]
assert function_span.error, "should have error"
@pytest.mark.asyncio
async def test_multiple_handoff_doesnt_error():
@ -255,9 +220,6 @@ async def test_multiple_handoff_doesnt_error():
result = await Runner.run(agent_3, input="user_message")
assert result.last_agent == agent_1, "should have picked first handoff"
traces = fetch_traces()
assert len(traces) == 1, f"Expected 1 trace, got {len(traces)}"
assert fetch_normalized_spans() == snapshot(
[
{
@ -295,12 +257,6 @@ async def test_multiple_handoff_doesnt_error():
]
)
spans = fetch_ordered_spans()
assert len(spans) == 7, (
f"should have 2 agent, 1 function, 3 generation, 1 handoff, got {len(spans)} with data: "
f"{[x.span_data for x in spans]}"
)
class Foo(TypedDict):
bar: str
@ -326,9 +282,6 @@ async def test_multiple_final_output_doesnt_error():
result = await Runner.run(agent_1, input="user_message")
assert result.final_output == Foo(bar="abc")
traces = fetch_traces()
assert len(traces) == 1, f"Expected 1 trace, got {len(traces)}"
assert fetch_normalized_spans() == snapshot(
[
{
@ -344,12 +297,6 @@ async def test_multiple_final_output_doesnt_error():
]
)
spans = fetch_ordered_spans()
assert len(spans) == 2, (
f"should have 1 agent, 1 generation, got {len(spans)} with data: "
f"{[x.span_data for x in spans]}"
)
@pytest.mark.asyncio
async def test_handoffs_lead_to_correct_agent_spans():
@ -399,9 +346,6 @@ async def test_handoffs_lead_to_correct_agent_spans():
f"should have ended on the third agent, got {result.last_agent.name}"
)
traces = fetch_traces()
assert len(traces) == 1, f"Expected 1 trace, got {len(traces)}"
assert fetch_normalized_spans() == snapshot(
[
{
@ -472,12 +416,6 @@ async def test_handoffs_lead_to_correct_agent_spans():
]
)
spans = fetch_ordered_spans()
assert len(spans) == 12, (
f"should have 3 agents, 2 function, 5 generation, 2 handoff, got {len(spans)} with data: "
f"{[x.span_data for x in spans]}"
)
@pytest.mark.asyncio
async def test_max_turns_exceeded():
@ -503,9 +441,6 @@ async def test_max_turns_exceeded():
with pytest.raises(MaxTurnsExceeded):
await Runner.run(agent, input="user_message", max_turns=2)
traces = fetch_traces()
assert len(traces) == 1, f"Expected 1 trace, got {len(traces)}"
assert fetch_normalized_spans() == snapshot(
[
{
@ -538,15 +473,6 @@ async def test_max_turns_exceeded():
]
)
spans = fetch_ordered_spans()
assert len(spans) == 5, (
f"should have 1 agent span, 2 generations, 2 function calls, got "
f"{len(spans)} with data: {[x.span_data for x in spans]}"
)
agent_span = [x for x in spans if isinstance(x.span_data, AgentSpanData)][-1]
assert agent_span.error, "last agent should have error"
def guardrail_function(
context: RunContextWrapper[Any], agent: Agent[Any], input: str | list[TResponseInputItem]
@ -568,9 +494,6 @@ async def test_guardrail_error():
with pytest.raises(InputGuardrailTripwireTriggered):
await Runner.run(agent, input="user_message")
traces = fetch_traces()
assert len(traces) == 1, f"Expected 1 trace, got {len(traces)}"
assert fetch_normalized_spans() == snapshot(
[
{
@ -594,12 +517,3 @@ async def test_guardrail_error():
}
]
)
spans = fetch_ordered_spans()
assert len(spans) == 2, (
f"should have 1 agent, 1 guardrail, got {len(spans)} with data: "
f"{[x.span_data for x in spans]}"
)
agent_span = [x for x in spans if isinstance(x.span_data, AgentSpanData)][-1]
assert agent_span.error, "last agent should have error"

View file

@ -10,9 +10,6 @@ from typing_extensions import TypedDict
from agents import (
Agent,
AgentSpanData,
FunctionSpanData,
GenerationSpanData,
GuardrailFunctionOutput,
InputGuardrail,
InputGuardrailTripwireTriggered,
@ -33,7 +30,7 @@ from .test_responses import (
get_handoff_tool_call,
get_text_message,
)
from .testing_processor import fetch_normalized_spans, fetch_ordered_spans, fetch_traces
from .testing_processor import fetch_normalized_spans
@pytest.mark.asyncio
@ -50,9 +47,6 @@ async def test_single_turn_model_error():
async for _ in result.stream_events():
pass
traces = fetch_traces()
assert len(traces) == 1, f"Expected 1 trace, got {len(traces)}"
assert fetch_normalized_spans() == snapshot(
[
{
@ -82,13 +76,6 @@ async def test_single_turn_model_error():
]
)
spans = fetch_ordered_spans()
assert len(spans) == 2, f"should have agent and generation spans, got {len(spans)}"
generation_span = spans[1]
assert isinstance(generation_span.span_data, GenerationSpanData)
assert generation_span.error, "should have error"
@pytest.mark.asyncio
async def test_multi_turn_no_handoffs():
@ -116,9 +103,6 @@ async def test_multi_turn_no_handoffs():
async for _ in result.stream_events():
pass
traces = fetch_traces()
assert len(traces) == 1, f"Expected 1 trace, got {len(traces)}"
assert fetch_normalized_spans() == snapshot(
[
{
@ -157,15 +141,6 @@ async def test_multi_turn_no_handoffs():
]
)
spans = fetch_ordered_spans()
assert len(spans) == 4, (
f"should have agent, generation, tool, generation, got {len(spans)} with data: "
f"{[x.span_data for x in spans]}"
)
last_generation_span = [x for x in spans if isinstance(x.span_data, GenerationSpanData)][-1]
assert last_generation_span.error, "should have error"
@pytest.mark.asyncio
async def test_tool_call_error():
@ -186,9 +161,6 @@ async def test_tool_call_error():
async for _ in result.stream_events():
pass
traces = fetch_traces()
assert len(traces) == 1, f"Expected 1 trace, got {len(traces)}"
assert fetch_normalized_spans() == snapshot(
[
{
@ -226,15 +198,6 @@ async def test_tool_call_error():
]
)
spans = fetch_ordered_spans()
assert len(spans) == 3, (
f"should have agent, generation, tool spans, got {len(spans)} with data: "
f"{[x.span_data for x in spans]}"
)
function_span = [x for x in spans if isinstance(x.span_data, FunctionSpanData)][0]
assert function_span.error, "should have error"
@pytest.mark.asyncio
async def test_multiple_handoff_doesnt_error():
@ -275,9 +238,6 @@ async def test_multiple_handoff_doesnt_error():
assert result.last_agent == agent_1, "should have picked first handoff"
traces = fetch_traces()
assert len(traces) == 1, f"Expected 1 trace, got {len(traces)}"
assert fetch_normalized_spans() == snapshot(
[
{
@ -315,12 +275,6 @@ async def test_multiple_handoff_doesnt_error():
]
)
spans = fetch_ordered_spans()
assert len(spans) == 7, (
f"should have 2 agent, 1 function, 3 generation, 1 handoff, got {len(spans)} with data: "
f"{[x.span_data for x in spans]}"
)
class Foo(TypedDict):
bar: str
@ -350,9 +304,6 @@ async def test_multiple_final_output_no_error():
assert isinstance(result.final_output, dict)
assert result.final_output["bar"] == "abc"
traces = fetch_traces()
assert len(traces) == 1, f"Expected 1 trace, got {len(traces)}"
assert fetch_normalized_spans() == snapshot(
[
{
@ -368,12 +319,6 @@ async def test_multiple_final_output_no_error():
]
)
spans = fetch_ordered_spans()
assert len(spans) == 2, (
f"should have 1 agent, 1 generation, got {len(spans)} with data: "
f"{[x.span_data for x in spans]}"
)
@pytest.mark.asyncio
async def test_handoffs_lead_to_correct_agent_spans():
@ -425,85 +370,6 @@ async def test_handoffs_lead_to_correct_agent_spans():
f"should have ended on the third agent, got {result.last_agent.name}"
)
traces = fetch_traces()
assert len(traces) == 1, f"Expected 1 trace, got {len(traces)}"
assert fetch_normalized_spans() == snapshot(
[
{
"workflow_name": "Agent workflow",
"children": [
{
"type": "agent",
"data": {
"name": "test_agent_3",
"handoffs": ["test_agent_1", "test_agent_2"],
"tools": ["some_function"],
"output_type": "str",
},
"children": [
{"type": "generation"},
{
"type": "function",
"data": {
"name": "some_function",
"input": '{"a": "b"}',
"output": "result",
},
},
{"type": "generation"},
{
"type": "handoff",
"data": {"from_agent": "test_agent_3", "to_agent": "test_agent_1"},
},
],
},
{
"type": "agent",
"data": {
"name": "test_agent_1",
"handoffs": ["test_agent_3"],
"tools": ["some_function"],
"output_type": "str",
},
"children": [
{"type": "generation"},
{
"type": "function",
"data": {
"name": "some_function",
"input": '{"a": "b"}',
"output": "result",
},
},
{"type": "generation"},
{
"type": "handoff",
"data": {"from_agent": "test_agent_1", "to_agent": "test_agent_3"},
},
],
},
{
"type": "agent",
"data": {
"name": "test_agent_3",
"handoffs": ["test_agent_1", "test_agent_2"],
"tools": ["some_function"],
"output_type": "str",
},
"children": [{"type": "generation"}],
},
],
}
]
)
spans = fetch_ordered_spans()
assert len(spans) == 12, (
f"should have 3 agents, 2 function, 5 generation, 2 handoff, got {len(spans)} with data: "
f"{[x.span_data for x in spans]}"
)
assert fetch_normalized_spans() == snapshot(
[
{
@ -601,9 +467,6 @@ async def test_max_turns_exceeded():
async for _ in result.stream_events():
pass
traces = fetch_traces()
assert len(traces) == 1, f"Expected 1 trace, got {len(traces)}"
assert fetch_normalized_spans() == snapshot(
[
{
@ -636,15 +499,6 @@ async def test_max_turns_exceeded():
]
)
spans = fetch_ordered_spans()
assert len(spans) == 5, (
f"should have 1 agent, 2 generations, 2 function calls, got "
f"{len(spans)} with data: {[x.span_data for x in spans]}"
)
agent_span = [x for x in spans if isinstance(x.span_data, AgentSpanData)][-1]
assert agent_span.error, "last agent should have error"
def input_guardrail_function(
context: RunContextWrapper[Any], agent: Agent[Any], input: str | list[TResponseInputItem]
@ -673,9 +527,6 @@ async def test_input_guardrail_error():
await asyncio.sleep(1)
traces = fetch_traces()
assert len(traces) == 1, f"Expected 1 trace, got {len(traces)}"
assert fetch_normalized_spans() == snapshot(
[
{
@ -703,15 +554,6 @@ async def test_input_guardrail_error():
]
)
spans = fetch_ordered_spans()
assert len(spans) == 2, (
f"should have 1 agent, 1 guardrail, got {len(spans)} with data: "
f"{[x.span_data for x in spans]}"
)
agent_span = [x for x in spans if isinstance(x.span_data, AgentSpanData)][-1]
assert agent_span.error, "last agent should have error"
def output_guardrail_function(
context: RunContextWrapper[Any], agent: Agent[Any], agent_output: Any
@ -740,9 +582,6 @@ async def test_output_guardrail_error():
await asyncio.sleep(1)
traces = fetch_traces()
assert len(traces) == 1, f"Expected 1 trace, got {len(traces)}"
assert fetch_normalized_spans() == snapshot(
[
{
@ -766,12 +605,3 @@ async def test_output_guardrail_error():
}
]
)
spans = fetch_ordered_spans()
assert len(spans) == 2, (
f"should have 1 agent, 1 guardrail, got {len(spans)} with data: "
f"{[x.span_data for x in spans]}"
)
agent_span = [x for x in spans if isinstance(x.span_data, AgentSpanData)][-1]
assert agent_span.error, "last agent should have error"