Hosted MCP support (#731)

--- [//]: # (BEGIN SAPLING FOOTER) * #732 * __->__ #731
2025-05-21 15:21:37 -04:00 · 2025-05-21 15:21:37 -04:00 · 9fa5c39d69
commit 9fa5c39d69
parent ce2e2a4571
9 changed files with 332 additions and 11 deletions
--- a/examples/hosted_mcp/init.py
+++ b/examples/hosted_mcp/init.py
--- a/examples/hosted_mcp/approvals.py
+++ b/examples/hosted_mcp/approvals.py
@ -0,0 +1,61 @@
+import argparse
+import asyncio
+
+from agents import (
+    Agent,
+    HostedMCPTool,
+    MCPToolApprovalFunctionResult,
+    MCPToolApprovalRequest,
+    Runner,
+)
+
+"""This example demonstrates how to use the hosted MCP support in the OpenAI Responses API, with
+approval callbacks."""
+
+
+def approval_callback(request: MCPToolApprovalRequest) -> MCPToolApprovalFunctionResult:
+    answer = input(f"Approve running the tool `{request.data.name}`? (y/n) ")
+    result: MCPToolApprovalFunctionResult = {"approve": answer == "y"}
+    if not result["approve"]:
+        result["reason"] = "User denied"
+    return result
+
+
+async def main(verbose: bool, stream: bool):
+    agent = Agent(
+        name="Assistant",
+        tools=[
+            HostedMCPTool(
+                tool_config={
+                    "type": "mcp",
+                    "server_label": "gitmcp",
+                    "server_url": "https://gitmcp.io/openai/codex",
+                    "require_approval": "always",
+                },
+                on_approval_request=approval_callback,
+            )
+        ],
+    )
+
+    if stream:
+        result = Runner.run_streamed(agent, "Which language is this repo written in?")
+        async for event in result.stream_events():
+            if event.type == "run_item_stream_event":
+                print(f"Got event of type {event.item.__class__.__name__}")
+        print(f"Done streaming; final result: {result.final_output}")
+    else:
+        res = await Runner.run(agent, "Which language is this repo written in?")
+        print(res.final_output)
+
+    if verbose:
+        for item in result.new_items:
+            print(item)
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--verbose", action="store_true", default=False)
+    parser.add_argument("--stream", action="store_true", default=False)
+    args = parser.parse_args()
+
+    asyncio.run(main(args.verbose, args.stream))
--- a/examples/hosted_mcp/simple.py
+++ b/examples/hosted_mcp/simple.py
@ -0,0 +1,47 @@
+import argparse
+import asyncio
+
+from agents import Agent, HostedMCPTool, Runner
+
+"""This example demonstrates how to use the hosted MCP support in the OpenAI Responses API, with
+approvals not required for any tools. You should only use this for trusted MCP servers."""
+
+
+async def main(verbose: bool, stream: bool):
+    agent = Agent(
+        name="Assistant",
+        tools=[
+            HostedMCPTool(
+                tool_config={
+                    "type": "mcp",
+                    "server_label": "gitmcp",
+                    "server_url": "https://gitmcp.io/openai/codex",
+                    "require_approval": "never",
+                }
+            )
+        ],
+    )
+
+    if stream:
+        result = Runner.run_streamed(agent, "Which language is this repo written in?")
+        async for event in result.stream_events():
+            if event.type == "run_item_stream_event":
+                print(f"Got event of type {event.item.__class__.__name__}")
+        print(f"Done streaming; final result: {result.final_output}")
+    else:
+        res = await Runner.run(agent, "Which language is this repo written in?")
+        print(res.final_output)
+        # The repository is primarily written in multiple languages, including Rust and TypeScript...
+
+    if verbose:
+        for item in result.new_items:
+            print(item)
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--verbose", action="store_true", default=False)
+    parser.add_argument("--stream", action="store_true", default=False)
+    args = parser.parse_args()
+
+    asyncio.run(main(args.verbose, args.stream))
--- a/src/agents/init.py
+++ b/src/agents/init.py
@ -58,6 +58,10 @@ from .tool import (
    FileSearchTool,
    FunctionTool,
    FunctionToolResult,
+    HostedMCPTool,
+    MCPToolApprovalFunction,
+    MCPToolApprovalFunctionResult,
+    MCPToolApprovalRequest,
    Tool,
    WebSearchTool,
    default_tool_error_function,
@ -208,6 +212,10 @@ __all__ = [
    "FileSearchTool",
    "Tool",
    "WebSearchTool",
+    "HostedMCPTool",
+    "MCPToolApprovalFunction",
+    "MCPToolApprovalRequest",
+    "MCPToolApprovalFunctionResult",
    "function_tool",
    "Usage",
    "add_trace_processor",
--- a/src/agents/_run_impl.py
+++ b/src/agents/_run_impl.py
@ -25,7 +25,8 @@ from openai.types.responses.response_computer_tool_call import (
    ActionType,
    ActionWait,
 )
-from openai.types.responses.response_input_param import ComputerCallOutput
+from openai.types.responses.response_input_param import ComputerCallOutput, McpApprovalResponse
+from openai.types.responses.response_output_item import McpApprovalRequest, McpCall, McpListTools
 from openai.types.responses.response_reasoning_item import ResponseReasoningItem

 from .agent import Agent, ToolsToFinalOutputResult
@ -38,6 +39,9 @@ from .items import (
    HandoffCallItem,
    HandoffOutputItem,
    ItemHelpers,
+    MCPApprovalRequestItem,
+    MCPApprovalResponseItem,
+    MCPListToolsItem,
    MessageOutputItem,
    ModelResponse,
    ReasoningItem,
@ -52,7 +56,14 @@ from .model_settings import ModelSettings
 from .models.interface import ModelTracing
 from .run_context import RunContextWrapper, TContext
 from .stream_events import RunItemStreamEvent, StreamEvent
-from .tool import ComputerTool, FunctionTool, FunctionToolResult, Tool
+from .tool import (
+    ComputerTool,
+    FunctionTool,
+    FunctionToolResult,
+    HostedMCPTool,
+    MCPToolApprovalRequest,
+    Tool,
+)
 from .tracing import (
    SpanError,
    Trace,
@ -112,6 +123,12 @@ class ToolRunComputerAction:
    computer_tool: ComputerTool


+@dataclass
+class ToolRunMCPApprovalRequest:
+    request_item: McpApprovalRequest
+    mcp_tool: HostedMCPTool
+
+
@dataclass
 class ProcessedResponse:
    new_items: list[RunItem]
@ -119,8 +136,9 @@ class ProcessedResponse:
    functions: list[ToolRunFunction]
    computer_actions: list[ToolRunComputerAction]
    tools_used: list[str]  # Names of all tools used, including hosted tools
+    mcp_approval_requests: list[ToolRunMCPApprovalRequest]  # Only requests with callbacks

-    def has_tools_to_run(self) -> bool:
+    def has_tools_or_approvals_to_run(self) -> bool:
        # Handoffs, functions and computer actions need local processing
        # Hosted tools have already run, so there's nothing to do.
        return any(
@ -128,6 +146,7 @@ class ProcessedResponse:
                self.handoffs,
                self.functions,
                self.computer_actions,
+                self.mcp_approval_requests,
            ]
        )

@ -226,7 +245,16 @@ class RunImpl:
        new_step_items.extend([result.run_item for result in function_results])
        new_step_items.extend(computer_results)

-        # Second, check if there are any handoffs
+        # Next, run the MCP approval requests
+        if processed_response.mcp_approval_requests:
+            approval_results = await cls.execute_mcp_approval_requests(
+                agent=agent,
+                approval_requests=processed_response.mcp_approval_requests,
+                context_wrapper=context_wrapper,
+            )
+            new_step_items.extend(approval_results)
+
+        # Next, check if there are any handoffs
        if run_handoffs := processed_response.handoffs:
            return await cls.execute_handoffs(
                agent=agent,
@ -240,7 +268,7 @@ class RunImpl:
                run_config=run_config,
            )

-        # Third, we'll check if the tool use should result in a final output
+        # Next, we'll check if the tool use should result in a final output
        check_tool_use = await cls._check_for_final_output_from_tools(
            agent=agent,
            tool_results=function_results,
@ -295,7 +323,7 @@ class RunImpl:
            )
        elif (
            not output_schema or output_schema.is_plain_text()
-        ) and not processed_response.has_tools_to_run():
+        ) and not processed_response.has_tools_or_approvals_to_run():
            return await cls.execute_final_output(
                agent=agent,
                original_input=original_input,
@ -343,10 +371,16 @@ class RunImpl:
        run_handoffs = []
        functions = []
        computer_actions = []
+        mcp_approval_requests = []
        tools_used: list[str] = []
        handoff_map = {handoff.tool_name: handoff for handoff in handoffs}
        function_map = {tool.name: tool for tool in all_tools if isinstance(tool, FunctionTool)}
        computer_tool = next((tool for tool in all_tools if isinstance(tool, ComputerTool)), None)
+        hosted_mcp_server_map = {
+            tool.tool_config["server_label"]: tool
+            for tool in all_tools
+            if isinstance(tool, HostedMCPTool)
+        }

        for output in response.output:
            if isinstance(output, ResponseOutputMessage):
@ -375,6 +409,34 @@ class RunImpl:
                computer_actions.append(
                    ToolRunComputerAction(tool_call=output, computer_tool=computer_tool)
                )
+            elif isinstance(output, McpApprovalRequest):
+                items.append(MCPApprovalRequestItem(raw_item=output, agent=agent))
+                if output.server_label not in hosted_mcp_server_map:
+                    _error_tracing.attach_error_to_current_span(
+                        SpanError(
+                            message="MCP server label not found",
+                            data={"server_label": output.server_label},
+                        )
+                    )
+                    raise ModelBehaviorError(f"MCP server label {output.server_label} not found")
+                else:
+                    server = hosted_mcp_server_map[output.server_label]
+                    if server.on_approval_request:
+                        mcp_approval_requests.append(
+                            ToolRunMCPApprovalRequest(
+                                request_item=output,
+                                mcp_tool=server,
+                            )
+                        )
+                    else:
+                        logger.warning(
+                            f"MCP server {output.server_label} has no on_approval_request hook"
+                        )
+            elif isinstance(output, McpListTools):
+                items.append(MCPListToolsItem(raw_item=output, agent=agent))
+            elif isinstance(output, McpCall):
+                items.append(ToolCallItem(raw_item=output, agent=agent))
+                tools_used.append(output.name)
            elif not isinstance(output, ResponseFunctionToolCall):
                logger.warning(f"Unexpected output type, ignoring: {type(output)}")
                continue
@ -417,6 +479,7 @@ class RunImpl:
            functions=functions,
            computer_actions=computer_actions,
            tools_used=tools_used,
+            mcp_approval_requests=mcp_approval_requests,
        )

    @classmethod
@ -643,6 +706,40 @@ class RunImpl:
            next_step=NextStepHandoff(new_agent),
        )

+    @classmethod
+    async def execute_mcp_approval_requests(
+        cls,
+        *,
+        agent: Agent[TContext],
+        approval_requests: list[ToolRunMCPApprovalRequest],
+        context_wrapper: RunContextWrapper[TContext],
+    ) -> list[RunItem]:
+        async def run_single_approval(approval_request: ToolRunMCPApprovalRequest) -> RunItem:
+            callback = approval_request.mcp_tool.on_approval_request
+            assert callback is not None, "Callback is required for MCP approval requests"
+            maybe_awaitable_result = callback(
+                MCPToolApprovalRequest(context_wrapper, approval_request.request_item)
+            )
+            if inspect.isawaitable(maybe_awaitable_result):
+                result = await maybe_awaitable_result
+            else:
+                result = maybe_awaitable_result
+            reason = result.get("reason", None)
+            raw_item: McpApprovalResponse = {
+                "approval_request_id": approval_request.request_item.id,
+                "approve": result["approve"],
+                "type": "mcp_approval_response",
+            }
+            if not result["approve"] and reason:
+                raw_item["reason"] = reason
+            return MCPApprovalResponseItem(
+                raw_item=raw_item,
+                agent=agent,
+            )
+
+        tasks = [run_single_approval(approval_request) for approval_request in approval_requests]
+        return await asyncio.gather(*tasks)
+
    @classmethod
    async def execute_final_output(
        cls,
@ -727,6 +824,11 @@ class RunImpl:
                event = RunItemStreamEvent(item=item, name="tool_output")
            elif isinstance(item, ReasoningItem):
                event = RunItemStreamEvent(item=item, name="reasoning_item_created")
+            elif isinstance(item, MCPApprovalRequestItem):
+                event = RunItemStreamEvent(item=item, name="mcp_approval_requested")
+            elif isinstance(item, MCPListToolsItem):
+                event = RunItemStreamEvent(item=item, name="mcp_list_tools")
+
            else:
                logger.warning(f"Unexpected item type: {type(item)}")
                event = None
--- a/src/agents/items.py
+++ b/src/agents/items.py
@ -18,7 +18,12 @@ from openai.types.responses import (
    ResponseOutputText,
    ResponseStreamEvent,
 )
-from openai.types.responses.response_input_item_param import ComputerCallOutput, FunctionCallOutput
+from openai.types.responses.response_input_item_param import (
+    ComputerCallOutput,
+    FunctionCallOutput,
+    McpApprovalResponse,
+)
+from openai.types.responses.response_output_item import McpApprovalRequest, McpCall, McpListTools
 from openai.types.responses.response_reasoning_item import ResponseReasoningItem
 from pydantic import BaseModel
 from typing_extensions import TypeAlias
@ -108,6 +113,7 @@ ToolCallItemTypes: TypeAlias = Union[
    ResponseComputerToolCall,
    ResponseFileSearchToolCall,
    ResponseFunctionWebSearch,
+    McpCall,
 ]
 """A type that represents a tool call item."""

@ -147,6 +153,36 @@ class ReasoningItem(RunItemBase[ResponseReasoningItem]):
    type: Literal["reasoning_item"] = "reasoning_item"


+@dataclass
+class MCPListToolsItem(RunItemBase[McpListTools]):
+    """Represents a call to an MCP server to list tools."""
+
+    raw_item: McpListTools
+    """The raw MCP list tools call."""
+
+    type: Literal["mcp_list_tools_item"] = "mcp_list_tools_item"
+
+
+@dataclass
+class MCPApprovalRequestItem(RunItemBase[McpApprovalRequest]):
+    """Represents a request for MCP approval."""
+
+    raw_item: McpApprovalRequest
+    """The raw MCP approval request."""
+
+    type: Literal["mcp_approval_request_item"] = "mcp_approval_request_item"
+
+
+@dataclass
+class MCPApprovalResponseItem(RunItemBase[McpApprovalResponse]):
+    """Represents a response to an MCP approval request."""
+
+    raw_item: McpApprovalResponse
+    """The raw MCP approval response."""
+
+    type: Literal["mcp_approval_response_item"] = "mcp_approval_response_item"
+
+
 RunItem: TypeAlias = Union[
    MessageOutputItem,
    HandoffCallItem,
@ -154,6 +190,9 @@ RunItem: TypeAlias = Union[
    ToolCallItem,
    ToolCallOutputItem,
    ReasoningItem,
+    MCPListToolsItem,
+    MCPApprovalRequestItem,
+    MCPApprovalResponseItem,
 ]
 """An item generated by an agent."""

--- a/src/agents/models/openai_responses.py
+++ b/src/agents/models/openai_responses.py
@ -24,7 +24,7 @@ from ..exceptions import UserError
 from ..handoffs import Handoff
 from ..items import ItemHelpers, ModelResponse, TResponseInputItem
 from ..logger import logger
-from ..tool import ComputerTool, FileSearchTool, FunctionTool, Tool, WebSearchTool
+from ..tool import ComputerTool, FileSearchTool, FunctionTool, HostedMCPTool, Tool, WebSearchTool
 from ..tracing import SpanError, response_span
 from ..usage import Usage
 from ..version import __version__
@ -383,7 +383,9 @@ class Converter:
                "display_height": tool.computer.dimensions[1],
            }
            includes = None
-
+        elif isinstance(tool, HostedMCPTool):
+            converted_tool = tool.tool_config
+            includes = None
        else:
            raise UserError(f"Unknown tool type: {type(tool)}, tool")

--- a/src/agents/stream_events.py
+++ b/src/agents/stream_events.py
@ -35,6 +35,8 @@ class RunItemStreamEvent:
        "tool_called",
        "tool_output",
        "reasoning_item_created",
+        "mcp_approval_requested",
+        "mcp_list_tools",
    ]
    """The name of the event."""

--- a/src/agents/tool.py
+++ b/src/agents/tool.py
@ -7,9 +7,11 @@ from dataclasses import dataclass
 from typing import Any, Callable, Literal, Union, overload

 from openai.types.responses.file_search_tool_param import Filters, RankingOptions
+from openai.types.responses.response_output_item import McpApprovalRequest
+from openai.types.responses.tool_param import Mcp
 from openai.types.responses.web_search_tool_param import UserLocation
 from pydantic import ValidationError
-from typing_extensions import Concatenate, ParamSpec
+from typing_extensions import Concatenate, NotRequired, ParamSpec, TypedDict

 from . import _debug
 from .computer import AsyncComputer, Computer
@ -130,7 +132,55 @@ class ComputerTool:
        return "computer_use_preview"


-Tool = Union[FunctionTool, FileSearchTool, WebSearchTool, ComputerTool]
+@dataclass
+class MCPToolApprovalRequest:
+    """A request to approve a tool call."""
+
+    ctx_wrapper: RunContextWrapper[Any]
+    """The run context."""
+
+    data: McpApprovalRequest
+    """The data from the MCP tool approval request."""
+
+
+class MCPToolApprovalFunctionResult(TypedDict):
+    """The result of an MCP tool approval function."""
+
+    approve: bool
+    """Whether to approve the tool call."""
+
+    reason: NotRequired[str]
+    """An optional reason, if rejected."""
+
+
+MCPToolApprovalFunction = Callable[
+    [MCPToolApprovalRequest], MaybeAwaitable[MCPToolApprovalFunctionResult]
+]
+"""A function that approves or rejects a tool call."""
+
+
+@dataclass
+class HostedMCPTool:
+    """A tool that allows the LLM to use a remote MCP server. The LLM will automatically list and
+    call tools, without requiring a a round trip back to your code.
+    If you want to run MCP servers locally via stdio, in a VPC or other non-publicly-accessible
+    environment, or you just prefer to run tool calls locally, then you can instead use the servers
+    in `agents.mcp` and pass `Agent(mcp_servers=[...])` to the agent."""
+
+    tool_config: Mcp
+    """The MCP tool config, which includes the server URL and other settings."""
+
+    on_approval_request: MCPToolApprovalFunction | None = None
+    """An optional function that will be called if approval is requested for an MCP tool. If not
+    provided, you will need to manually add approvals/rejections to the input and call
+    `Runner.run(...)` again."""
+
+    @property
+    def name(self):
+        return "hosted_mcp"
+
+
+Tool = Union[FunctionTool, FileSearchTool, WebSearchTool, ComputerTool, HostedMCPTool]
 """A tool that can be used in an agent."""


@ -308,3 +358,13 @@ def function_tool(
        return _create_function_tool(real_func)

    return decorator
+    return decorator
+    return decorator
+    return decorator
+    return decorator
+    return decorator
+    return decorator
+    return decorator
+    return decorator
+    return decorator
+    return decorator