Add support for local shell, image generator, code interpreter tools (#732)

2025-05-21 15:26:22 -04:00 · 2025-05-21 15:26:22 -04:00 · 079764f0ab
commit 079764f0ab
parent 9fa5c39d69
7 changed files with 334 additions and 20 deletions
--- a/examples/tools/code_interpreter.py
+++ b/examples/tools/code_interpreter.py
@ -0,0 +1,34 @@
 import asyncio
 from agents import Agent, CodeInterpreterTool, Runner, trace
 async def main():
    agent = Agent(
        name="Code interpreter",
        instructions="You love doing math.",
        tools=[
            CodeInterpreterTool(
                tool_config={"type": "code_interpreter", "container": {"type": "auto"}},
            )
        ],
    )
    with trace("Code interpreter example"):
        print("Solving math problem...")
        result = Runner.run_streamed(agent, "What is the square root of273 * 312821 plus 1782?")
        async for event in result.stream_events():
            if (
                event.type == "run_item_stream_event"
                and event.item.type == "tool_call_item"
                and event.item.raw_item.type == "code_interpreter_call"
            ):
                print(f"Code interpreter code:\n```\n{event.item.raw_item.code}\n```\n")
            elif event.type == "run_item_stream_event":
                print(f"Other event: {event.item.type}")
        print(f"Final output: {result.final_output}")
 if __name__ == "__main__":
    asyncio.run(main())
--- a/examples/tools/image_generator.py
+++ b/examples/tools/image_generator.py
@ -0,0 +1,54 @@
 import asyncio
 import base64
 import os
 import subprocess
 import sys
 import tempfile
 from agents import Agent, ImageGenerationTool, Runner, trace
 def open_file(path: str) -> None:
    if sys.platform.startswith("darwin"):
        subprocess.run(["open", path], check=False)  # macOS
    elif os.name == "nt":  # Windows
        os.astartfile(path)  # type: ignore
    elif os.name == "posix":
        subprocess.run(["xdg-open", path], check=False)  # Linux/Unix
    else:
        print(f"Don't know how to open files on this platform: {sys.platform}")
 async def main():
    agent = Agent(
        name="Image generator",
        instructions="You are a helpful agent.",
        tools=[
            ImageGenerationTool(
                tool_config={"type": "image_generation", "quality": "low"},
            )
        ],
    )
    with trace("Image generation example"):
        print("Generating image, this may take a while...")
        result = await Runner.run(
            agent, "Create an image of a frog eating a pizza, comic book style."
        )
        print(result.final_output)
        for item in result.new_items:
            if (
                item.type == "tool_call_item"
                and item.raw_item.type == "image_generation_call"
                and (img_result := item.raw_item.result)
            ):
                with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp:
                    tmp.write(base64.b64decode(img_result))
                    temp_path = tmp.name
                # Open the image
                open_file(temp_path)
 if __name__ == "__main__":
    asyncio.run(main())
--- a/src/agents/init.py
+++ b/src/agents/init.py
@ -54,11 +54,16 @@ from .stream_events import (
    StreamEvent,
 )
 from .tool import (
    CodeInterpreterTool,
    ComputerTool,
    FileSearchTool,
    FunctionTool,
    FunctionToolResult,
    HostedMCPTool,
    ImageGenerationTool,
    LocalShellCommandRequest,
    LocalShellExecutor,
    LocalShellTool,
    MCPToolApprovalFunction,
    MCPToolApprovalFunctionResult,
    MCPToolApprovalRequest,
@ -210,6 +215,11 @@ __all__ = [
    "FunctionToolResult",
    "ComputerTool",
    "FileSearchTool",
    "CodeInterpreterTool",
    "ImageGenerationTool",
    "LocalShellCommandRequest",
    "LocalShellExecutor",
    "LocalShellTool",
    "Tool",
    "WebSearchTool",
    "HostedMCPTool",
--- a/src/agents/_run_impl.py
+++ b/src/agents/_run_impl.py
@ -14,6 +14,9 @@ from openai.types.responses import (
    ResponseFunctionWebSearch,
    ResponseOutputMessage,
 )
 from openai.types.responses.response_code_interpreter_tool_call import (
    ResponseCodeInterpreterToolCall,
 )
 from openai.types.responses.response_computer_tool_call import (
    ActionClick,
    ActionDoubleClick,
@ -26,7 +29,12 @@ from openai.types.responses.response_computer_tool_call import (
    ActionWait,
 )
 from openai.types.responses.response_input_param import ComputerCallOutput, McpApprovalResponse
-from openai.types.responses.response_output_item import McpApprovalRequest, McpCall, McpListTools
+from openai.types.responses.response_output_item import (
    ImageGenerationCall,
    LocalShellCall,
    McpApprovalRequest,
    McpListTools,
 )
 from openai.types.responses.response_reasoning_item import ResponseReasoningItem
 from .agent import Agent, ToolsToFinalOutputResult
@ -61,6 +69,8 @@ from .tool import (
    FunctionTool,
    FunctionToolResult,
    HostedMCPTool,
    LocalShellCommandRequest,
    LocalShellTool,
    MCPToolApprovalRequest,
    Tool,
 )
@ -129,12 +139,19 @@ class ToolRunMCPApprovalRequest:
    mcp_tool: HostedMCPTool
@dataclass
 class ToolRunLocalShellCall:
    tool_call: LocalShellCall
    local_shell_tool: LocalShellTool
@dataclass
 class ProcessedResponse:
    new_items: list[RunItem]
    handoffs: list[ToolRunHandoff]
    functions: list[ToolRunFunction]
    computer_actions: list[ToolRunComputerAction]
    local_shell_calls: list[ToolRunLocalShellCall]
    tools_used: list[str]  # Names of all tools used, including hosted tools
    mcp_approval_requests: list[ToolRunMCPApprovalRequest]  # Only requests with callbacks
@ -146,6 +163,7 @@ class ProcessedResponse:
                self.handoffs,
                self.functions,
                self.computer_actions,
                self.local_shell_calls,
                self.mcp_approval_requests,
            ]
        )
@ -371,11 +389,15 @@ class RunImpl:
        run_handoffs = []
        functions = []
        computer_actions = []
        local_shell_calls = []
        mcp_approval_requests = []
        tools_used: list[str] = []
        handoff_map = {handoff.tool_name: handoff for handoff in handoffs}
        function_map = {tool.name: tool for tool in all_tools if isinstance(tool, FunctionTool)}
        computer_tool = next((tool for tool in all_tools if isinstance(tool, ComputerTool)), None)
        local_shell_tool = next(
            (tool for tool in all_tools if isinstance(tool, LocalShellTool)), None
        )
        hosted_mcp_server_map = {
            tool.tool_config["server_label"]: tool
            for tool in all_tools
@ -434,9 +456,29 @@ class RunImpl:
                        )
            elif isinstance(output, McpListTools):
                items.append(MCPListToolsItem(raw_item=output, agent=agent))
-            elif isinstance(output, McpCall):
+            elif isinstance(output, ImageGenerationCall):
                items.append(ToolCallItem(raw_item=output, agent=agent))
-                tools_used.append(output.name)
+                tools_used.append("image_generation")
            elif isinstance(output, ResponseCodeInterpreterToolCall):
                items.append(ToolCallItem(raw_item=output, agent=agent))
                tools_used.append("code_interpreter")
            elif isinstance(output, LocalShellCall):
                items.append(ToolCallItem(raw_item=output, agent=agent))
                tools_used.append("local_shell")
                if not local_shell_tool:
                    _error_tracing.attach_error_to_current_span(
                        SpanError(
                            message="Local shell tool not found",
                            data={},
                        )
                    )
                    raise ModelBehaviorError(
                        "Model produced local shell call without a local shell tool."
                    )
                local_shell_calls.append(
                    ToolRunLocalShellCall(tool_call=output, local_shell_tool=local_shell_tool)
                )
            elif not isinstance(output, ResponseFunctionToolCall):
                logger.warning(f"Unexpected output type, ignoring: {type(output)}")
                continue
@ -478,6 +520,7 @@ class RunImpl:
            handoffs=run_handoffs,
            functions=functions,
            computer_actions=computer_actions,
            local_shell_calls=local_shell_calls,
            tools_used=tools_used,
            mcp_approval_requests=mcp_approval_requests,
        )
@ -552,6 +595,30 @@ class RunImpl:
            for tool_run, result in zip(tool_runs, results)
        ]
    @classmethod
    async def execute_local_shell_calls(
        cls,
        *,
        agent: Agent[TContext],
        calls: list[ToolRunLocalShellCall],
        context_wrapper: RunContextWrapper[TContext],
        hooks: RunHooks[TContext],
        config: RunConfig,
    ) -> list[RunItem]:
        results: list[RunItem] = []
        # Need to run these serially, because each call can affect the local shell state
        for call in calls:
            results.append(
                await LocalShellAction.execute(
                    agent=agent,
                    call=call,
                    hooks=hooks,
                    context_wrapper=context_wrapper,
                    config=config,
                )
            )
        return results
    @classmethod
    async def execute_computer_actions(
        cls,
@ -1021,3 +1088,54 @@ class ComputerAction:
            await computer.wait()
        return await computer.screenshot()
 class LocalShellAction:
    @classmethod
    async def execute(
        cls,
        *,
        agent: Agent[TContext],
        call: ToolRunLocalShellCall,
        hooks: RunHooks[TContext],
        context_wrapper: RunContextWrapper[TContext],
        config: RunConfig,
    ) -> RunItem:
        await asyncio.gather(
            hooks.on_tool_start(context_wrapper, agent, call.local_shell_tool),
            (
                agent.hooks.on_tool_start(context_wrapper, agent, call.local_shell_tool)
                if agent.hooks
                else _coro.noop_coroutine()
            ),
        )
        request = LocalShellCommandRequest(
            ctx_wrapper=context_wrapper,
            data=call.tool_call,
        )
        output = call.local_shell_tool.executor(request)
        if inspect.isawaitable(output):
            result = await output
        else:
            result = output
        await asyncio.gather(
            hooks.on_tool_end(context_wrapper, agent, call.local_shell_tool, result),
            (
                agent.hooks.on_tool_end(context_wrapper, agent, call.local_shell_tool, result)
                if agent.hooks
                else _coro.noop_coroutine()
            ),
        )
        return ToolCallOutputItem(
            agent=agent,
            output=output,
            raw_item={
                "type": "local_shell_call_output",
                "id": call.tool_call.call_id,
                "output": result,
                # "id": "out" + call.tool_call.id,  # TODO remove this, it should be optional
            },
        )
--- a/src/agents/items.py
+++ b/src/agents/items.py
@ -18,12 +18,22 @@ from openai.types.responses import (
    ResponseOutputText,
    ResponseStreamEvent,
 )
 from openai.types.responses.response_code_interpreter_tool_call import (
    ResponseCodeInterpreterToolCall,
 )
 from openai.types.responses.response_input_item_param import (
    ComputerCallOutput,
    FunctionCallOutput,
    LocalShellCallOutput,
    McpApprovalResponse,
 )
-from openai.types.responses.response_output_item import McpApprovalRequest, McpCall, McpListTools
+from openai.types.responses.response_output_item import (
    ImageGenerationCall,
    LocalShellCall,
    McpApprovalRequest,
    McpCall,
    McpListTools,
 )
 from openai.types.responses.response_reasoning_item import ResponseReasoningItem
 from pydantic import BaseModel
 from typing_extensions import TypeAlias
@ -114,6 +124,9 @@ ToolCallItemTypes: TypeAlias = Union[
    ResponseFileSearchToolCall,
    ResponseFunctionWebSearch,
    McpCall,
    ResponseCodeInterpreterToolCall,
    ImageGenerationCall,
    LocalShellCall,
 ]
 """A type that represents a tool call item."""
@ -129,10 +142,12 @@ class ToolCallItem(RunItemBase[ToolCallItemTypes]):
@dataclass
-class ToolCallOutputItem(RunItemBase[Union[FunctionCallOutput, ComputerCallOutput]]):
+class ToolCallOutputItem(
    RunItemBase[Union[FunctionCallOutput, ComputerCallOutput, LocalShellCallOutput]]
 ):
    """Represents the output of a tool call."""
-    raw_item: FunctionCallOutput | ComputerCallOutput
+    raw_item: FunctionCallOutput | ComputerCallOutput | LocalShellCallOutput
    """The raw item from the model."""
    output: Any
--- a/src/agents/models/openai_responses.py
+++ b/src/agents/models/openai_responses.py
@ -24,7 +24,17 @@ from ..exceptions import UserError
 from ..handoffs import Handoff
 from ..items import ItemHelpers, ModelResponse, TResponseInputItem
 from ..logger import logger
-from ..tool import ComputerTool, FileSearchTool, FunctionTool, HostedMCPTool, Tool, WebSearchTool
+from ..tool import (
    CodeInterpreterTool,
    ComputerTool,
    FileSearchTool,
    FunctionTool,
    HostedMCPTool,
    ImageGenerationTool,
    LocalShellTool,
    Tool,
    WebSearchTool,
 )
 from ..tracing import SpanError, response_span
 from ..usage import Usage
 from ..version import __version__
@ -295,6 +305,18 @@ class Converter:
            return {
                "type": "computer_use_preview",
            }
        elif tool_choice == "image_generation":
            return {
                "type": "image_generation",
            }
        elif tool_choice == "code_interpreter":
            return {
                "type": "code_interpreter",
            }
        elif tool_choice == "mcp":
            return {
                "type": "mcp",
            }
        else:
            return {
                "type": "function",
@ -386,6 +408,17 @@ class Converter:
        elif isinstance(tool, HostedMCPTool):
            converted_tool = tool.tool_config
            includes = None
        elif isinstance(tool, ImageGenerationTool):
            converted_tool = tool.tool_config
            includes = None
        elif isinstance(tool, CodeInterpreterTool):
            converted_tool = tool.tool_config
            includes = None
        elif isinstance(tool, LocalShellTool):
            converted_tool = {
                "type": "local_shell",
            }
            includes = None
        else:
            raise UserError(f"Unknown tool type: {type(tool)}, tool")
--- a/src/agents/tool.py
+++ b/src/agents/tool.py
@ -7,8 +7,8 @@ from dataclasses import dataclass
 from typing import Any, Callable, Literal, Union, overload
 from openai.types.responses.file_search_tool_param import Filters, RankingOptions
-from openai.types.responses.response_output_item import McpApprovalRequest
+from openai.types.responses.response_output_item import LocalShellCall, McpApprovalRequest
-from openai.types.responses.tool_param import Mcp
+from openai.types.responses.tool_param import CodeInterpreter, ImageGeneration, Mcp
 from openai.types.responses.web_search_tool_param import UserLocation
 from pydantic import ValidationError
 from typing_extensions import Concatenate, NotRequired, ParamSpec, TypedDict
@ -180,7 +180,67 @@ class HostedMCPTool:
        return "hosted_mcp"
-Tool = Union[FunctionTool, FileSearchTool, WebSearchTool, ComputerTool, HostedMCPTool]
+@dataclass
 class CodeInterpreterTool:
    """A tool that allows the LLM to execute code in a sandboxed environment."""
    tool_config: CodeInterpreter
    """The tool config, which includes the container and other settings."""
    @property
    def name(self):
        return "code_interpreter"
@dataclass
 class ImageGenerationTool:
    """A tool that allows the LLM to generate images."""
    tool_config: ImageGeneration
    """The tool config, which image generation settings."""
    @property
    def name(self):
        return "image_generation"
@dataclass
 class LocalShellCommandRequest:
    """A request to execute a command on a shell."""
    ctx_wrapper: RunContextWrapper[Any]
    """The run context."""
    data: LocalShellCall
    """The data from the local shell tool call."""
 LocalShellExecutor = Callable[[LocalShellCommandRequest], MaybeAwaitable[str]]
 """A function that executes a command on a shell."""
@dataclass
 class LocalShellTool:
    """A tool that allows the LLM to execute commands on a shell."""
    executor: LocalShellExecutor
    """A function that executes a command on a shell."""
    @property
    def name(self):
        return "local_shell"
 Tool = Union[
    FunctionTool,
    FileSearchTool,
    WebSearchTool,
    ComputerTool,
    HostedMCPTool,
    LocalShellTool,
    ImageGenerationTool,
    CodeInterpreterTool,
 ]
 """A tool that can be used in an agent."""
@ -358,13 +418,3 @@ def function_tool(
        return _create_function_tool(real_func)
    return decorator
    return decorator
    return decorator
    return decorator
    return decorator
    return decorator
    return decorator
    return decorator
    return decorator
    return decorator
    return decorator