Previous response id (#509)

Allows passing in the previous_response_id to reduce sending the same
data again and again.

Test plan:
Examples. Adding tests in next PR shortly.

---
[//]: # (BEGIN SAPLING FOOTER)
* __->__ #509
* #508
This commit is contained in:
Rohan Mehta 2025-04-14 22:02:47 -04:00 committed by GitHub
parent 86ad99d798
commit 92d6e3e66c
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
11 changed files with 161 additions and 13 deletions

View file

@ -169,6 +169,8 @@ class ModelResponse:
response_id: str | None
"""An ID for the response which can be used to refer to the response in subsequent calls to the
model. Not supported by all model providers.
If using OpenAI models via the Responses API, this is the `response_id` parameter, and it can
be passed to `Runner.run`.
"""
def to_input_items(self) -> list[TResponseInputItem]:

View file

@ -44,6 +44,8 @@ class Model(abc.ABC):
output_schema: AgentOutputSchema | None,
handoffs: list[Handoff],
tracing: ModelTracing,
*,
previous_response_id: str | None,
) -> ModelResponse:
"""Get a response from the model.
@ -55,6 +57,8 @@ class Model(abc.ABC):
output_schema: The output schema to use.
handoffs: The handoffs available to the model.
tracing: Tracing configuration.
previous_response_id: the ID of the previous response. Generally not used by the model,
except for the OpenAI Responses API.
Returns:
The full model response.
@ -71,6 +75,8 @@ class Model(abc.ABC):
output_schema: AgentOutputSchema | None,
handoffs: list[Handoff],
tracing: ModelTracing,
*,
previous_response_id: str | None,
) -> AsyncIterator[TResponseStreamEvent]:
"""Stream a response from the model.
@ -82,6 +88,8 @@ class Model(abc.ABC):
output_schema: The output schema to use.
handoffs: The handoffs available to the model.
tracing: Tracing configuration.
previous_response_id: the ID of the previous response. Generally not used by the model,
except for the OpenAI Responses API.
Returns:
An iterator of response stream events, in OpenAI Responses format.

View file

@ -108,6 +108,7 @@ class OpenAIChatCompletionsModel(Model):
output_schema: AgentOutputSchema | None,
handoffs: list[Handoff],
tracing: ModelTracing,
previous_response_id: str | None,
) -> ModelResponse:
with generation_span(
model=str(self.model),
@ -168,6 +169,8 @@ class OpenAIChatCompletionsModel(Model):
output_schema: AgentOutputSchema | None,
handoffs: list[Handoff],
tracing: ModelTracing,
*,
previous_response_id: str | None,
) -> AsyncIterator[TResponseStreamEvent]:
"""
Yields a partial message as it is generated, as well as the usage information.

View file

@ -69,6 +69,7 @@ class OpenAIResponsesModel(Model):
output_schema: AgentOutputSchema | None,
handoffs: list[Handoff],
tracing: ModelTracing,
previous_response_id: str | None,
) -> ModelResponse:
with response_span(disabled=tracing.is_disabled()) as span_response:
try:
@ -79,6 +80,7 @@ class OpenAIResponsesModel(Model):
tools,
output_schema,
handoffs,
previous_response_id,
stream=False,
)
@ -132,6 +134,7 @@ class OpenAIResponsesModel(Model):
output_schema: AgentOutputSchema | None,
handoffs: list[Handoff],
tracing: ModelTracing,
previous_response_id: str | None,
) -> AsyncIterator[ResponseStreamEvent]:
"""
Yields a partial message as it is generated, as well as the usage information.
@ -145,6 +148,7 @@ class OpenAIResponsesModel(Model):
tools,
output_schema,
handoffs,
previous_response_id,
stream=True,
)
@ -180,6 +184,7 @@ class OpenAIResponsesModel(Model):
tools: list[Tool],
output_schema: AgentOutputSchema | None,
handoffs: list[Handoff],
previous_response_id: str | None,
stream: Literal[True],
) -> AsyncStream[ResponseStreamEvent]: ...
@ -192,6 +197,7 @@ class OpenAIResponsesModel(Model):
tools: list[Tool],
output_schema: AgentOutputSchema | None,
handoffs: list[Handoff],
previous_response_id: str | None,
stream: Literal[False],
) -> Response: ...
@ -203,6 +209,7 @@ class OpenAIResponsesModel(Model):
tools: list[Tool],
output_schema: AgentOutputSchema | None,
handoffs: list[Handoff],
previous_response_id: str | None,
stream: Literal[True] | Literal[False] = False,
) -> Response | AsyncStream[ResponseStreamEvent]:
list_input = ItemHelpers.input_to_new_input_list(input)
@ -229,9 +236,11 @@ class OpenAIResponsesModel(Model):
f"Stream: {stream}\n"
f"Tool choice: {tool_choice}\n"
f"Response format: {response_format}\n"
f"Previous response id: {previous_response_id}\n"
)
return await self._client.responses.create(
previous_response_id=self._non_null_or_not_given(previous_response_id),
instructions=self._non_null_or_not_given(system_instructions),
model=self.model,
input=list_input,

View file

@ -80,6 +80,14 @@ class RunResultBase(abc.ABC):
return original_items + new_items
@property
def last_response_id(self) -> str | None:
"""Convenience method to get the response ID of the last model response."""
if not self.raw_responses:
return None
return self.raw_responses[-1].response_id
@dataclass
class RunResult(RunResultBase):

View file

@ -117,6 +117,7 @@ class Runner:
max_turns: int = DEFAULT_MAX_TURNS,
hooks: RunHooks[TContext] | None = None,
run_config: RunConfig | None = None,
previous_response_id: str | None = None,
) -> RunResult:
"""Run a workflow starting at the given agent. The agent will run in a loop until a final
output is generated. The loop runs like so:
@ -141,6 +142,8 @@ class Runner:
AI invocation (including any tool calls that might occur).
hooks: An object that receives callbacks on various lifecycle events.
run_config: Global settings for the entire agent run.
previous_response_id: The ID of the previous response, if using OpenAI models via the
Responses API, this allows you to skip passing in input from the previous turn.
Returns:
A run result containing all the inputs, guardrail results and the output of the last
@ -230,6 +233,7 @@ class Runner:
run_config=run_config,
should_run_agent_start_hooks=should_run_agent_start_hooks,
tool_use_tracker=tool_use_tracker,
previous_response_id=previous_response_id,
),
)
else:
@ -243,6 +247,7 @@ class Runner:
run_config=run_config,
should_run_agent_start_hooks=should_run_agent_start_hooks,
tool_use_tracker=tool_use_tracker,
previous_response_id=previous_response_id,
)
should_run_agent_start_hooks = False
@ -291,6 +296,7 @@ class Runner:
max_turns: int = DEFAULT_MAX_TURNS,
hooks: RunHooks[TContext] | None = None,
run_config: RunConfig | None = None,
previous_response_id: str | None = None,
) -> RunResult:
"""Run a workflow synchronously, starting at the given agent. Note that this just wraps the
`run` method, so it will not work if there's already an event loop (e.g. inside an async
@ -319,6 +325,8 @@ class Runner:
AI invocation (including any tool calls that might occur).
hooks: An object that receives callbacks on various lifecycle events.
run_config: Global settings for the entire agent run.
previous_response_id: The ID of the previous response, if using OpenAI models via the
Responses API, this allows you to skip passing in input from the previous turn.
Returns:
A run result containing all the inputs, guardrail results and the output of the last
@ -332,6 +340,7 @@ class Runner:
max_turns=max_turns,
hooks=hooks,
run_config=run_config,
previous_response_id=previous_response_id,
)
)
@ -344,6 +353,7 @@ class Runner:
max_turns: int = DEFAULT_MAX_TURNS,
hooks: RunHooks[TContext] | None = None,
run_config: RunConfig | None = None,
previous_response_id: str | None = None,
) -> RunResultStreaming:
"""Run a workflow starting at the given agent in streaming mode. The returned result object
contains a method you can use to stream semantic events as they are generated.
@ -370,7 +380,8 @@ class Runner:
AI invocation (including any tool calls that might occur).
hooks: An object that receives callbacks on various lifecycle events.
run_config: Global settings for the entire agent run.
previous_response_id: The ID of the previous response, if using OpenAI models via the
Responses API, this allows you to skip passing in input from the previous turn.
Returns:
A result object that contains data about the run, as well as a method to stream events.
"""
@ -428,6 +439,7 @@ class Runner:
hooks=hooks,
context_wrapper=context_wrapper,
run_config=run_config,
previous_response_id=previous_response_id,
)
)
return streamed_result
@ -485,6 +497,7 @@ class Runner:
hooks: RunHooks[TContext],
context_wrapper: RunContextWrapper[TContext],
run_config: RunConfig,
previous_response_id: str | None,
):
current_span: Span[AgentSpanData] | None = None
current_agent = starting_agent
@ -554,6 +567,7 @@ class Runner:
should_run_agent_start_hooks,
tool_use_tracker,
all_tools,
previous_response_id,
)
should_run_agent_start_hooks = False
@ -623,6 +637,7 @@ class Runner:
should_run_agent_start_hooks: bool,
tool_use_tracker: AgentToolUseTracker,
all_tools: list[Tool],
previous_response_id: str | None,
) -> SingleStepResult:
if should_run_agent_start_hooks:
await asyncio.gather(
@ -662,6 +677,7 @@ class Runner:
get_model_tracing_impl(
run_config.tracing_disabled, run_config.trace_include_sensitive_data
),
previous_response_id=previous_response_id,
):
if isinstance(event, ResponseCompletedEvent):
usage = (
@ -717,6 +733,7 @@ class Runner:
run_config: RunConfig,
should_run_agent_start_hooks: bool,
tool_use_tracker: AgentToolUseTracker,
previous_response_id: str | None,
) -> SingleStepResult:
# Ensure we run the hooks before anything else
if should_run_agent_start_hooks:
@ -746,6 +763,7 @@ class Runner:
context_wrapper,
run_config,
tool_use_tracker,
previous_response_id,
)
return await cls._get_single_step_result_from_response(
@ -888,6 +906,7 @@ class Runner:
context_wrapper: RunContextWrapper[TContext],
run_config: RunConfig,
tool_use_tracker: AgentToolUseTracker,
previous_response_id: str | None,
) -> ModelResponse:
model = cls._get_model(agent, run_config)
model_settings = agent.model_settings.resolve(run_config.model_settings)
@ -903,6 +922,7 @@ class Runner:
tracing=get_model_tracing_impl(
run_config.tracing_disabled, run_config.trace_include_sensitive_data
),
previous_response_id=previous_response_id,
)
context_wrapper.usage.add(new_response.usage)

View file

@ -54,6 +54,8 @@ class FakeModel(Model):
output_schema: AgentOutputSchema | None,
handoffs: list[Handoff],
tracing: ModelTracing,
*,
previous_response_id: str | None,
) -> ModelResponse:
self.last_turn_args = {
"system_instructions": system_instructions,
@ -93,6 +95,8 @@ class FakeModel(Model):
output_schema: AgentOutputSchema | None,
handoffs: list[Handoff],
tracing: ModelTracing,
*,
previous_response_id: str | None,
) -> AsyncIterator[TResponseStreamEvent]:
with generation_span(disabled=not self.tracing_enabled) as span:
output = self.get_next_output()

View file

@ -67,6 +67,7 @@ async def test_get_response_with_text_message(monkeypatch) -> None:
output_schema=None,
handoffs=[],
tracing=ModelTracing.DISABLED,
previous_response_id=None,
)
# Should have produced exactly one output message with one text part
assert isinstance(resp, ModelResponse)
@ -115,6 +116,7 @@ async def test_get_response_with_refusal(monkeypatch) -> None:
output_schema=None,
handoffs=[],
tracing=ModelTracing.DISABLED,
previous_response_id=None,
)
assert len(resp.output) == 1
assert isinstance(resp.output[0], ResponseOutputMessage)
@ -164,6 +166,7 @@ async def test_get_response_with_tool_call(monkeypatch) -> None:
output_schema=None,
handoffs=[],
tracing=ModelTracing.DISABLED,
previous_response_id=None,
)
# Expect a message item followed by a function tool call item.
assert len(resp.output) == 2

View file

@ -79,6 +79,7 @@ async def test_stream_response_yields_events_for_text_content(monkeypatch) -> No
output_schema=None,
handoffs=[],
tracing=ModelTracing.DISABLED,
previous_response_id=None,
):
output_events.append(event)
# We expect a response.created, then a response.output_item.added, content part added,
@ -168,6 +169,7 @@ async def test_stream_response_yields_events_for_refusal_content(monkeypatch) ->
output_schema=None,
handoffs=[],
tracing=ModelTracing.DISABLED,
previous_response_id=None,
):
output_events.append(event)
# Expect sequence similar to text: created, output_item.added, content part added,
@ -255,6 +257,7 @@ async def test_stream_response_yields_events_for_tool_call(monkeypatch) -> None:
output_schema=None,
handoffs=[],
tracing=ModelTracing.DISABLED,
previous_response_id=None,
):
output_events.append(event)
# Sequence should be: response.created, then after loop we expect function call-related events:

View file

@ -44,7 +44,14 @@ async def test_get_response_creates_trace(monkeypatch):
# Mock _fetch_response to return a dummy response with a known id
async def dummy_fetch_response(
system_instructions, input, model_settings, tools, output_schema, handoffs, stream
system_instructions,
input,
model_settings,
tools,
output_schema,
handoffs,
prev_response_id,
stream,
):
return DummyResponse()
@ -52,7 +59,14 @@ async def test_get_response_creates_trace(monkeypatch):
# Call get_response
await model.get_response(
"instr", "input", ModelSettings(), [], None, [], ModelTracing.ENABLED
"instr",
"input",
ModelSettings(),
[],
None,
[],
ModelTracing.ENABLED,
previous_response_id=None,
)
assert fetch_normalized_spans() == snapshot(
@ -74,7 +88,14 @@ async def test_non_data_tracing_doesnt_set_response_id(monkeypatch):
# Mock _fetch_response to return a dummy response with a known id
async def dummy_fetch_response(
system_instructions, input, model_settings, tools, output_schema, handoffs, stream
system_instructions,
input,
model_settings,
tools,
output_schema,
handoffs,
prev_response_id,
stream,
):
return DummyResponse()
@ -82,7 +103,14 @@ async def test_non_data_tracing_doesnt_set_response_id(monkeypatch):
# Call get_response
await model.get_response(
"instr", "input", ModelSettings(), [], None, [], ModelTracing.ENABLED_WITHOUT_DATA
"instr",
"input",
ModelSettings(),
[],
None,
[],
ModelTracing.ENABLED_WITHOUT_DATA,
previous_response_id=None,
)
assert fetch_normalized_spans() == snapshot(
@ -102,7 +130,14 @@ async def test_disable_tracing_does_not_create_span(monkeypatch):
# Mock _fetch_response to return a dummy response with a known id
async def dummy_fetch_response(
system_instructions, input, model_settings, tools, output_schema, handoffs, stream
system_instructions,
input,
model_settings,
tools,
output_schema,
handoffs,
prev_response_id,
stream,
):
return DummyResponse()
@ -110,7 +145,14 @@ async def test_disable_tracing_does_not_create_span(monkeypatch):
# Call get_response
await model.get_response(
"instr", "input", ModelSettings(), [], None, [], ModelTracing.DISABLED
"instr",
"input",
ModelSettings(),
[],
None,
[],
ModelTracing.DISABLED,
previous_response_id=None,
)
assert fetch_normalized_spans() == snapshot([{"workflow_name": "test"}])
@ -127,7 +169,14 @@ async def test_stream_response_creates_trace(monkeypatch):
# Define a dummy fetch function that returns an async stream with a dummy response
async def dummy_fetch_response(
system_instructions, input, model_settings, tools, output_schema, handoffs, stream
system_instructions,
input,
model_settings,
tools,
output_schema,
handoffs,
prev_response_id,
stream,
):
class DummyStream:
async def __aiter__(self):
@ -142,7 +191,14 @@ async def test_stream_response_creates_trace(monkeypatch):
# Consume the stream to trigger processing of the final response
async for _ in model.stream_response(
"instr", "input", ModelSettings(), [], None, [], ModelTracing.ENABLED
"instr",
"input",
ModelSettings(),
[],
None,
[],
ModelTracing.ENABLED,
previous_response_id=None,
):
pass
@ -165,7 +221,14 @@ async def test_stream_non_data_tracing_doesnt_set_response_id(monkeypatch):
# Define a dummy fetch function that returns an async stream with a dummy response
async def dummy_fetch_response(
system_instructions, input, model_settings, tools, output_schema, handoffs, stream
system_instructions,
input,
model_settings,
tools,
output_schema,
handoffs,
prev_response_id,
stream,
):
class DummyStream:
async def __aiter__(self):
@ -180,7 +243,14 @@ async def test_stream_non_data_tracing_doesnt_set_response_id(monkeypatch):
# Consume the stream to trigger processing of the final response
async for _ in model.stream_response(
"instr", "input", ModelSettings(), [], None, [], ModelTracing.ENABLED_WITHOUT_DATA
"instr",
"input",
ModelSettings(),
[],
None,
[],
ModelTracing.ENABLED_WITHOUT_DATA,
previous_response_id=None,
):
pass
@ -202,7 +272,14 @@ async def test_stream_disabled_tracing_doesnt_create_span(monkeypatch):
# Define a dummy fetch function that returns an async stream with a dummy response
async def dummy_fetch_response(
system_instructions, input, model_settings, tools, output_schema, handoffs, stream
system_instructions,
input,
model_settings,
tools,
output_schema,
handoffs,
prev_response_id,
stream,
):
class DummyStream:
async def __aiter__(self):
@ -217,7 +294,14 @@ async def test_stream_disabled_tracing_doesnt_create_span(monkeypatch):
# Consume the stream to trigger processing of the final response
async for _ in model.stream_response(
"instr", "input", ModelSettings(), [], None, [], ModelTracing.DISABLED
"instr",
"input",
ModelSettings(),
[],
None,
[],
ModelTracing.DISABLED,
previous_response_id=None,
):
pass

View file

@ -51,6 +51,8 @@ class FakeStreamingModel(Model):
output_schema: AgentOutputSchema | None,
handoffs: list[Handoff],
tracing: ModelTracing,
*,
previous_response_id: str | None,
) -> ModelResponse:
raise NotImplementedError("Not implemented")
@ -63,6 +65,8 @@ class FakeStreamingModel(Model):
output_schema: AgentOutputSchema | None,
handoffs: list[Handoff],
tracing: ModelTracing,
*,
previous_response_id: str | None,
) -> AsyncIterator[TResponseStreamEvent]:
output = self.get_next_output()
for item in output: