arcade-mcp/libs/tests/core/test_executor.py
Sam Partee b6b4cd0a4c
🏗️ Restructure: Multi-Package Architecture + uv Migration (#412)
### Overview
Major restructuring from monolithic `arcade-ai` package to modular
library architecture with standardized uv-based dependency management.

![arcade-ai Monorepo
(2)](https://github.com/user-attachments/assets/25f102b0-bb87-4a04-9701-d227d05664b1)

### New Package Structure
- **`arcade-tdk`** - Lightweight toolkit development kit (core
decorators, auth)
- **`arcade-core`** - Core execution engine and catalog functionality  
- **`arcade-serve`** - FastAPI/MCP server components
- **`arcade-ai`** - Meta package that includes CLI functionality.
Optionally include evals via the `evals` extra. Optionally include all
packages via the `all` extra.

### Key Benefits
- **Lighter Dependencies**: Toolkits now depend only on `arcade-tdk` (~2
deps) vs full `arcade-ai` (~30+ deps)
- **Faster Builds**: uv provides 10-100x faster dependency resolution
and installation
- **Better Modularity**: Clear separation of concerns, consumers import
only what they need
- **Standard Tooling**: Eliminates custom poetry scripts, uses standard
Python packaging

### Migration Impact
- All 20 toolkits converted from poetry → uv with `arcade-tdk`
dependencies plus `arcade-ai[evals]` and `arcade-serve` dev
dependencies. When developing locally, devs should install toolkits via
`make install-local`.
- Modern Python 3.10+ type hints throughout
- Standardized build system with hatchling backend
- Enhanced Makefile with robust toolkit management commands
- Removed `arcade dev` CLI command
- Reduce the number of files created by `arcade new` and add an option
to not generate a tests and evals folder.

This foundation enables faster development cycles and cleaner dependency
chains for the growing toolkit ecosystem.

### Todo After this PR is merged
- [ ] Post-merge workflow(s) (release & publish containers, etc)
- [ ] Release order plan. @EricGustin suggests releasing in the
following order:
    1. `arcade-core` version 0.1.0
    2. `arcade-serve` version 0.1.0 and `arcade-tdk` version 0.1.0
    3. `arcade-ai` version 2.0.0
4. Patch release for all toolkits (all changes in toolkits are internal
refactors)
- [ ] [Update docs](https://github.com/ArcadeAI/docs/pull/318)

---------

Co-authored-by: Eric Gustin <eric@arcade.dev>
Co-authored-by: Eric Gustin <34000337+EricGustin@users.noreply.github.com>
2025-06-11 16:48:17 -07:00

185 lines
5.7 KiB
Python

from typing import Annotated
import pytest
from arcade_core.catalog import ToolCatalog
from arcade_core.executor import ToolExecutor
from arcade_core.schema import ToolCallError, ToolCallLog, ToolCallOutput, ToolContext
from arcade_tdk import tool
from arcade_tdk.errors import RetryableToolError, ToolExecutionError
@tool
def simple_tool(inp: Annotated[str, "input"]) -> Annotated[str, "output"]:
"""Simple tool"""
return inp
@tool.deprecated("Use simple_tool instead")
@tool
def simple_deprecated_tool(inp: Annotated[str, "input"]) -> Annotated[str, "output"]:
"""Simple tool that is deprecated"""
return inp
@tool
def retryable_error_tool() -> Annotated[str, "output"]:
"""Tool that raises a retryable error"""
raise RetryableToolError("test", "test", "test", 1000)
@tool
def exec_error_tool() -> Annotated[str, "output"]:
"""Tool that raises an error"""
raise ToolExecutionError("test", "test")
@tool
def unexpected_error_tool() -> Annotated[str, "output"]:
"""Tool that raises an unexpected error"""
raise RuntimeError("test")
@tool
def bad_output_error_tool() -> Annotated[str, "output"]:
"""tool that returns a bad output type"""
return {"output": "test"}
# ---- Test Driver ----
catalog = ToolCatalog()
catalog.add_tool(simple_tool, "simple_toolkit")
catalog.add_tool(simple_deprecated_tool, "simple_toolkit")
catalog.add_tool(retryable_error_tool, "simple_toolkit")
catalog.add_tool(exec_error_tool, "simple_toolkit")
catalog.add_tool(unexpected_error_tool, "simple_toolkit")
catalog.add_tool(bad_output_error_tool, "simple_toolkit")
@pytest.mark.asyncio
@pytest.mark.parametrize(
"tool_func, inputs, expected_output",
[
(simple_tool, {"inp": "test"}, ToolCallOutput(value="test")),
(
simple_deprecated_tool,
{"inp": "test"},
ToolCallOutput(
value="test",
logs=[
ToolCallLog(
message="Use simple_tool instead",
level="warning",
subtype="deprecation",
)
],
),
),
(
retryable_error_tool,
{},
ToolCallOutput(
error=ToolCallError(
message="test",
developer_message="test",
additional_prompt_content="test",
retry_after_ms=1000,
can_retry=True,
)
),
),
(
exec_error_tool,
{},
ToolCallOutput(
error=ToolCallError(
message="test",
developer_message="test",
)
),
),
(
unexpected_error_tool,
{},
ToolCallOutput(
error=ToolCallError(
message="Error in execution of UnexpectedErrorTool",
developer_message="Error in unexpected_error_tool: test",
)
),
),
(
simple_tool,
{"inp": {"test": "test"}}, # takes in a string not a dict
ToolCallOutput(
error=ToolCallError(
message="Error in tool input deserialization",
developer_message=None, # can't gaurantee this will be the same
)
),
),
(
bad_output_error_tool,
{},
ToolCallOutput(
error=ToolCallError(
message="Failed to serialize tool output",
developer_message=None, # can't gaurantee this will be the same
)
),
),
],
ids=[
"simple_tool",
"simple_deprecated_tool",
"retryable_error_tool",
"exec_error_tool",
"unexpected_error_tool",
"invalid_input_type",
"bad_output_type",
],
)
async def test_tool_executor(tool_func, inputs, expected_output):
tool_definition = catalog.find_tool_by_func(tool_func)
dummy_context = ToolContext()
full_tool = catalog.get_tool(tool_definition.get_fully_qualified_name())
output = await ToolExecutor.run(
func=tool_func,
definition=tool_definition,
input_model=full_tool.input_model,
output_model=full_tool.output_model,
context=dummy_context,
**inputs,
)
check_output(output, expected_output)
def check_output(output: ToolCallOutput, expected_output: ToolCallOutput):
# execution error in tool
if output.error:
assert output.error.message == expected_output.error.message
if expected_output.error.developer_message:
assert output.error.developer_message == expected_output.error.developer_message
if expected_output.error.traceback_info:
assert output.error.traceback_info == expected_output.error.traceback_info
assert output.error.can_retry == expected_output.error.can_retry
assert (
output.error.additional_prompt_content
== expected_output.error.additional_prompt_content
)
assert output.error.retry_after_ms == expected_output.error.retry_after_ms
# normal tool execution
else:
assert output.value == expected_output.value
# check logs
output_logs = output.logs or []
expected_logs = expected_output.logs or []
assert len(output_logs) == len(expected_logs)
for output_log, expected_log in zip(output_logs, expected_logs):
assert output_log.message == expected_log.message
assert output_log.level == expected_log.level
assert output_log.subtype == expected_log.subtype