Previous response id (#509)

Allows passing in the previous_response_id to reduce sending the same data again and again. Test plan: Examples. Adding tests in next PR shortly. --- [//]: # (BEGIN SAPLING FOOTER) * __->__ #509 * #508
2025-04-14 22:02:47 -04:00 · 2025-04-14 22:02:47 -04:00 · 92d6e3e66c
commit 92d6e3e66c
parent 86ad99d798
11 changed files with 161 additions and 13 deletions
--- a/src/agents/items.py
+++ b/src/agents/items.py
@ -169,6 +169,8 @@ class ModelResponse:
    response_id: str | None
    """An ID for the response which can be used to refer to the response in subsequent calls to the
    model. Not supported by all model providers.
+    If using OpenAI models via the Responses API, this is the `response_id` parameter, and it can
+    be passed to `Runner.run`.
    """

    def to_input_items(self) -> list[TResponseInputItem]:
--- a/src/agents/models/interface.py
+++ b/src/agents/models/interface.py
@ -44,6 +44,8 @@ class Model(abc.ABC):
        output_schema: AgentOutputSchema | None,
        handoffs: list[Handoff],
        tracing: ModelTracing,
+        *,
+        previous_response_id: str | None,
    ) -> ModelResponse:
        """Get a response from the model.

@ -55,6 +57,8 @@ class Model(abc.ABC):
            output_schema: The output schema to use.
            handoffs: The handoffs available to the model.
            tracing: Tracing configuration.
+            previous_response_id: the ID of the previous response. Generally not used by the model,
+                except for the OpenAI Responses API.

        Returns:
            The full model response.
@ -71,6 +75,8 @@ class Model(abc.ABC):
        output_schema: AgentOutputSchema | None,
        handoffs: list[Handoff],
        tracing: ModelTracing,
+        *,
+        previous_response_id: str | None,
    ) -> AsyncIterator[TResponseStreamEvent]:
        """Stream a response from the model.

@ -82,6 +88,8 @@ class Model(abc.ABC):
            output_schema: The output schema to use.
            handoffs: The handoffs available to the model.
            tracing: Tracing configuration.
+            previous_response_id: the ID of the previous response. Generally not used by the model,
+                except for the OpenAI Responses API.

        Returns:
            An iterator of response stream events, in OpenAI Responses format.
--- a/src/agents/models/openai_chatcompletions.py
+++ b/src/agents/models/openai_chatcompletions.py
@ -108,6 +108,7 @@ class OpenAIChatCompletionsModel(Model):
        output_schema: AgentOutputSchema | None,
        handoffs: list[Handoff],
        tracing: ModelTracing,
+        previous_response_id: str | None,
    ) -> ModelResponse:
        with generation_span(
            model=str(self.model),
@ -168,6 +169,8 @@ class OpenAIChatCompletionsModel(Model):
        output_schema: AgentOutputSchema | None,
        handoffs: list[Handoff],
        tracing: ModelTracing,
+        *,
+        previous_response_id: str | None,
    ) -> AsyncIterator[TResponseStreamEvent]:
        """
        Yields a partial message as it is generated, as well as the usage information.
--- a/src/agents/models/openai_responses.py
+++ b/src/agents/models/openai_responses.py
@ -69,6 +69,7 @@ class OpenAIResponsesModel(Model):
        output_schema: AgentOutputSchema | None,
        handoffs: list[Handoff],
        tracing: ModelTracing,
+        previous_response_id: str | None,
    ) -> ModelResponse:
        with response_span(disabled=tracing.is_disabled()) as span_response:
            try:
@ -79,6 +80,7 @@ class OpenAIResponsesModel(Model):
                    tools,
                    output_schema,
                    handoffs,
+                    previous_response_id,
                    stream=False,
                )

@ -132,6 +134,7 @@ class OpenAIResponsesModel(Model):
        output_schema: AgentOutputSchema | None,
        handoffs: list[Handoff],
        tracing: ModelTracing,
+        previous_response_id: str | None,
    ) -> AsyncIterator[ResponseStreamEvent]:
        """
        Yields a partial message as it is generated, as well as the usage information.
@ -145,6 +148,7 @@ class OpenAIResponsesModel(Model):
                    tools,
                    output_schema,
                    handoffs,
+                    previous_response_id,
                    stream=True,
                )

@ -180,6 +184,7 @@ class OpenAIResponsesModel(Model):
        tools: list[Tool],
        output_schema: AgentOutputSchema | None,
        handoffs: list[Handoff],
+        previous_response_id: str | None,
        stream: Literal[True],
    ) -> AsyncStream[ResponseStreamEvent]: ...

@ -192,6 +197,7 @@ class OpenAIResponsesModel(Model):
        tools: list[Tool],
        output_schema: AgentOutputSchema | None,
        handoffs: list[Handoff],
+        previous_response_id: str | None,
        stream: Literal[False],
    ) -> Response: ...

@ -203,6 +209,7 @@ class OpenAIResponsesModel(Model):
        tools: list[Tool],
        output_schema: AgentOutputSchema | None,
        handoffs: list[Handoff],
+        previous_response_id: str | None,
        stream: Literal[True] | Literal[False] = False,
    ) -> Response | AsyncStream[ResponseStreamEvent]:
        list_input = ItemHelpers.input_to_new_input_list(input)
@ -229,9 +236,11 @@ class OpenAIResponsesModel(Model):
                f"Stream: {stream}\n"
                f"Tool choice: {tool_choice}\n"
                f"Response format: {response_format}\n"
+                f"Previous response id: {previous_response_id}\n"
            )

        return await self._client.responses.create(
+            previous_response_id=self._non_null_or_not_given(previous_response_id),
            instructions=self._non_null_or_not_given(system_instructions),
            model=self.model,
            input=list_input,
--- a/src/agents/result.py
+++ b/src/agents/result.py
@ -80,6 +80,14 @@ class RunResultBase(abc.ABC):

        return original_items + new_items

+    @property
+    def last_response_id(self) -> str | None:
+        """Convenience method to get the response ID of the last model response."""
+        if not self.raw_responses:
+            return None
+
+        return self.raw_responses[-1].response_id
+

@dataclass
 class RunResult(RunResultBase):
--- a/src/agents/run.py
+++ b/src/agents/run.py
@ -117,6 +117,7 @@ class Runner:
        max_turns: int = DEFAULT_MAX_TURNS,
        hooks: RunHooks[TContext] | None = None,
        run_config: RunConfig | None = None,
+        previous_response_id: str | None = None,
    ) -> RunResult:
        """Run a workflow starting at the given agent. The agent will run in a loop until a final
        output is generated. The loop runs like so:
@ -141,6 +142,8 @@ class Runner:
                AI invocation (including any tool calls that might occur).
            hooks: An object that receives callbacks on various lifecycle events.
            run_config: Global settings for the entire agent run.
+            previous_response_id: The ID of the previous response, if using OpenAI models via the
+                Responses API, this allows you to skip passing in input from the previous turn.

        Returns:
            A run result containing all the inputs, guardrail results and the output of the last
@ -230,6 +233,7 @@ class Runner:
                                run_config=run_config,
                                should_run_agent_start_hooks=should_run_agent_start_hooks,
                                tool_use_tracker=tool_use_tracker,
+                                previous_response_id=previous_response_id,
                            ),
                        )
                    else:
@ -243,6 +247,7 @@ class Runner:
                            run_config=run_config,
                            should_run_agent_start_hooks=should_run_agent_start_hooks,
                            tool_use_tracker=tool_use_tracker,
+                            previous_response_id=previous_response_id,
                        )
                    should_run_agent_start_hooks = False

@ -291,6 +296,7 @@ class Runner:
        max_turns: int = DEFAULT_MAX_TURNS,
        hooks: RunHooks[TContext] | None = None,
        run_config: RunConfig | None = None,
+        previous_response_id: str | None = None,
    ) -> RunResult:
        """Run a workflow synchronously, starting at the given agent. Note that this just wraps the
        `run` method, so it will not work if there's already an event loop (e.g. inside an async
@ -319,6 +325,8 @@ class Runner:
                AI invocation (including any tool calls that might occur).
            hooks: An object that receives callbacks on various lifecycle events.
            run_config: Global settings for the entire agent run.
+            previous_response_id: The ID of the previous response, if using OpenAI models via the
+                Responses API, this allows you to skip passing in input from the previous turn.

        Returns:
            A run result containing all the inputs, guardrail results and the output of the last
@ -332,6 +340,7 @@ class Runner:
                max_turns=max_turns,
                hooks=hooks,
                run_config=run_config,
+                previous_response_id=previous_response_id,
            )
        )

@ -344,6 +353,7 @@ class Runner:
        max_turns: int = DEFAULT_MAX_TURNS,
        hooks: RunHooks[TContext] | None = None,
        run_config: RunConfig | None = None,
+        previous_response_id: str | None = None,
    ) -> RunResultStreaming:
        """Run a workflow starting at the given agent in streaming mode. The returned result object
        contains a method you can use to stream semantic events as they are generated.
@ -370,7 +380,8 @@ class Runner:
                AI invocation (including any tool calls that might occur).
            hooks: An object that receives callbacks on various lifecycle events.
            run_config: Global settings for the entire agent run.
-
+            previous_response_id: The ID of the previous response, if using OpenAI models via the
+                Responses API, this allows you to skip passing in input from the previous turn.
        Returns:
            A result object that contains data about the run, as well as a method to stream events.
        """
@ -428,6 +439,7 @@ class Runner:
                hooks=hooks,
                context_wrapper=context_wrapper,
                run_config=run_config,
+                previous_response_id=previous_response_id,
            )
        )
        return streamed_result
@ -485,6 +497,7 @@ class Runner:
        hooks: RunHooks[TContext],
        context_wrapper: RunContextWrapper[TContext],
        run_config: RunConfig,
+        previous_response_id: str | None,
    ):
        current_span: Span[AgentSpanData] | None = None
        current_agent = starting_agent
@ -554,6 +567,7 @@ class Runner:
                        should_run_agent_start_hooks,
                        tool_use_tracker,
                        all_tools,
+                        previous_response_id,
                    )
                    should_run_agent_start_hooks = False

@ -623,6 +637,7 @@ class Runner:
        should_run_agent_start_hooks: bool,
        tool_use_tracker: AgentToolUseTracker,
        all_tools: list[Tool],
+        previous_response_id: str | None,
    ) -> SingleStepResult:
        if should_run_agent_start_hooks:
            await asyncio.gather(
@ -662,6 +677,7 @@ class Runner:
            get_model_tracing_impl(
                run_config.tracing_disabled, run_config.trace_include_sensitive_data
            ),
+            previous_response_id=previous_response_id,
        ):
            if isinstance(event, ResponseCompletedEvent):
                usage = (
@ -717,6 +733,7 @@ class Runner:
        run_config: RunConfig,
        should_run_agent_start_hooks: bool,
        tool_use_tracker: AgentToolUseTracker,
+        previous_response_id: str | None,
    ) -> SingleStepResult:
        # Ensure we run the hooks before anything else
        if should_run_agent_start_hooks:
@ -746,6 +763,7 @@ class Runner:
            context_wrapper,
            run_config,
            tool_use_tracker,
+            previous_response_id,
        )

        return await cls._get_single_step_result_from_response(
@ -888,6 +906,7 @@ class Runner:
        context_wrapper: RunContextWrapper[TContext],
        run_config: RunConfig,
        tool_use_tracker: AgentToolUseTracker,
+        previous_response_id: str | None,
    ) -> ModelResponse:
        model = cls._get_model(agent, run_config)
        model_settings = agent.model_settings.resolve(run_config.model_settings)
@ -903,6 +922,7 @@ class Runner:
            tracing=get_model_tracing_impl(
                run_config.tracing_disabled, run_config.trace_include_sensitive_data
            ),
+            previous_response_id=previous_response_id,
        )

        context_wrapper.usage.add(new_response.usage)
--- a/tests/fake_model.py
+++ b/tests/fake_model.py
@ -54,6 +54,8 @@ class FakeModel(Model):
        output_schema: AgentOutputSchema | None,
        handoffs: list[Handoff],
        tracing: ModelTracing,
+        *,
+        previous_response_id: str | None,
    ) -> ModelResponse:
        self.last_turn_args = {
            "system_instructions": system_instructions,
@ -93,6 +95,8 @@ class FakeModel(Model):
        output_schema: AgentOutputSchema | None,
        handoffs: list[Handoff],
        tracing: ModelTracing,
+        *,
+        previous_response_id: str | None,
    ) -> AsyncIterator[TResponseStreamEvent]:
        with generation_span(disabled=not self.tracing_enabled) as span:
            output = self.get_next_output()
--- a/tests/test_openai_chatcompletions.py
+++ b/tests/test_openai_chatcompletions.py
@ -67,6 +67,7 @@ async def test_get_response_with_text_message(monkeypatch) -> None:
        output_schema=None,
        handoffs=[],
        tracing=ModelTracing.DISABLED,
+        previous_response_id=None,
    )
    # Should have produced exactly one output message with one text part
    assert isinstance(resp, ModelResponse)
@ -115,6 +116,7 @@ async def test_get_response_with_refusal(monkeypatch) -> None:
        output_schema=None,
        handoffs=[],
        tracing=ModelTracing.DISABLED,
+        previous_response_id=None,
    )
    assert len(resp.output) == 1
    assert isinstance(resp.output[0], ResponseOutputMessage)
@ -164,6 +166,7 @@ async def test_get_response_with_tool_call(monkeypatch) -> None:
        output_schema=None,
        handoffs=[],
        tracing=ModelTracing.DISABLED,
+        previous_response_id=None,
    )
    # Expect a message item followed by a function tool call item.
    assert len(resp.output) == 2
--- a/tests/test_openai_chatcompletions_stream.py
+++ b/tests/test_openai_chatcompletions_stream.py
@ -79,6 +79,7 @@ async def test_stream_response_yields_events_for_text_content(monkeypatch) -> No
        output_schema=None,
        handoffs=[],
        tracing=ModelTracing.DISABLED,
+        previous_response_id=None,
    ):
        output_events.append(event)
    # We expect a response.created, then a response.output_item.added, content part added,
@ -168,6 +169,7 @@ async def test_stream_response_yields_events_for_refusal_content(monkeypatch) ->
        output_schema=None,
        handoffs=[],
        tracing=ModelTracing.DISABLED,
+        previous_response_id=None,
    ):
        output_events.append(event)
    # Expect sequence similar to text: created, output_item.added, content part added,
@ -255,6 +257,7 @@ async def test_stream_response_yields_events_for_tool_call(monkeypatch) -> None:
        output_schema=None,
        handoffs=[],
        tracing=ModelTracing.DISABLED,
+        previous_response_id=None,
    ):
        output_events.append(event)
    # Sequence should be: response.created, then after loop we expect function call-related events:
--- a/tests/test_responses_tracing.py
+++ b/tests/test_responses_tracing.py
@ -44,7 +44,14 @@ async def test_get_response_creates_trace(monkeypatch):

        # Mock _fetch_response to return a dummy response with a known id
        async def dummy_fetch_response(
-            system_instructions, input, model_settings, tools, output_schema, handoffs, stream
+            system_instructions,
+            input,
+            model_settings,
+            tools,
+            output_schema,
+            handoffs,
+            prev_response_id,
+            stream,
        ):
            return DummyResponse()

@ -52,7 +59,14 @@ async def test_get_response_creates_trace(monkeypatch):

        # Call get_response
        await model.get_response(
-            "instr", "input", ModelSettings(), [], None, [], ModelTracing.ENABLED
+            "instr",
+            "input",
+            ModelSettings(),
+            [],
+            None,
+            [],
+            ModelTracing.ENABLED,
+            previous_response_id=None,
        )

    assert fetch_normalized_spans() == snapshot(
@ -74,7 +88,14 @@ async def test_non_data_tracing_doesnt_set_response_id(monkeypatch):

        # Mock _fetch_response to return a dummy response with a known id
        async def dummy_fetch_response(
-            system_instructions, input, model_settings, tools, output_schema, handoffs, stream
+            system_instructions,
+            input,
+            model_settings,
+            tools,
+            output_schema,
+            handoffs,
+            prev_response_id,
+            stream,
        ):
            return DummyResponse()

@ -82,7 +103,14 @@ async def test_non_data_tracing_doesnt_set_response_id(monkeypatch):

        # Call get_response
        await model.get_response(
-            "instr", "input", ModelSettings(), [], None, [], ModelTracing.ENABLED_WITHOUT_DATA
+            "instr",
+            "input",
+            ModelSettings(),
+            [],
+            None,
+            [],
+            ModelTracing.ENABLED_WITHOUT_DATA,
+            previous_response_id=None,
        )

    assert fetch_normalized_spans() == snapshot(
@ -102,7 +130,14 @@ async def test_disable_tracing_does_not_create_span(monkeypatch):

        # Mock _fetch_response to return a dummy response with a known id
        async def dummy_fetch_response(
-            system_instructions, input, model_settings, tools, output_schema, handoffs, stream
+            system_instructions,
+            input,
+            model_settings,
+            tools,
+            output_schema,
+            handoffs,
+            prev_response_id,
+            stream,
        ):
            return DummyResponse()

@ -110,7 +145,14 @@ async def test_disable_tracing_does_not_create_span(monkeypatch):

        # Call get_response
        await model.get_response(
-            "instr", "input", ModelSettings(), [], None, [], ModelTracing.DISABLED
+            "instr",
+            "input",
+            ModelSettings(),
+            [],
+            None,
+            [],
+            ModelTracing.DISABLED,
+            previous_response_id=None,
        )

    assert fetch_normalized_spans() == snapshot([{"workflow_name": "test"}])
@ -127,7 +169,14 @@ async def test_stream_response_creates_trace(monkeypatch):

        # Define a dummy fetch function that returns an async stream with a dummy response
        async def dummy_fetch_response(
-            system_instructions, input, model_settings, tools, output_schema, handoffs, stream
+            system_instructions,
+            input,
+            model_settings,
+            tools,
+            output_schema,
+            handoffs,
+            prev_response_id,
+            stream,
        ):
            class DummyStream:
                async def __aiter__(self):
@ -142,7 +191,14 @@ async def test_stream_response_creates_trace(monkeypatch):

        # Consume the stream to trigger processing of the final response
        async for _ in model.stream_response(
-            "instr", "input", ModelSettings(), [], None, [], ModelTracing.ENABLED
+            "instr",
+            "input",
+            ModelSettings(),
+            [],
+            None,
+            [],
+            ModelTracing.ENABLED,
+            previous_response_id=None,
        ):
            pass

@ -165,7 +221,14 @@ async def test_stream_non_data_tracing_doesnt_set_response_id(monkeypatch):

        # Define a dummy fetch function that returns an async stream with a dummy response
        async def dummy_fetch_response(
-            system_instructions, input, model_settings, tools, output_schema, handoffs, stream
+            system_instructions,
+            input,
+            model_settings,
+            tools,
+            output_schema,
+            handoffs,
+            prev_response_id,
+            stream,
        ):
            class DummyStream:
                async def __aiter__(self):
@ -180,7 +243,14 @@ async def test_stream_non_data_tracing_doesnt_set_response_id(monkeypatch):

        # Consume the stream to trigger processing of the final response
        async for _ in model.stream_response(
-            "instr", "input", ModelSettings(), [], None, [], ModelTracing.ENABLED_WITHOUT_DATA
+            "instr",
+            "input",
+            ModelSettings(),
+            [],
+            None,
+            [],
+            ModelTracing.ENABLED_WITHOUT_DATA,
+            previous_response_id=None,
        ):
            pass

@ -202,7 +272,14 @@ async def test_stream_disabled_tracing_doesnt_create_span(monkeypatch):

        # Define a dummy fetch function that returns an async stream with a dummy response
        async def dummy_fetch_response(
-            system_instructions, input, model_settings, tools, output_schema, handoffs, stream
+            system_instructions,
+            input,
+            model_settings,
+            tools,
+            output_schema,
+            handoffs,
+            prev_response_id,
+            stream,
        ):
            class DummyStream:
                async def __aiter__(self):
@ -217,7 +294,14 @@ async def test_stream_disabled_tracing_doesnt_create_span(monkeypatch):

        # Consume the stream to trigger processing of the final response
        async for _ in model.stream_response(
-            "instr", "input", ModelSettings(), [], None, [], ModelTracing.DISABLED
+            "instr",
+            "input",
+            ModelSettings(),
+            [],
+            None,
+            [],
+            ModelTracing.DISABLED,
+            previous_response_id=None,
        ):
            pass

--- a/tests/voice/test_workflow.py
+++ b/tests/voice/test_workflow.py
@ -51,6 +51,8 @@ class FakeStreamingModel(Model):
        output_schema: AgentOutputSchema | None,
        handoffs: list[Handoff],
        tracing: ModelTracing,
+        *,
+        previous_response_id: str | None,
    ) -> ModelResponse:
        raise NotImplementedError("Not implemented")

@ -63,6 +65,8 @@ class FakeStreamingModel(Model):
        output_schema: AgentOutputSchema | None,
        handoffs: list[Handoff],
        tracing: ModelTracing,
+        *,
+        previous_response_id: str | None,
    ) -> AsyncIterator[TResponseStreamEvent]:
        output = self.get_next_output()
        for item in output: