### Overview Major restructuring from monolithic `arcade-ai` package to modular library architecture with standardized uv-based dependency management.  ### New Package Structure - **`arcade-tdk`** - Lightweight toolkit development kit (core decorators, auth) - **`arcade-core`** - Core execution engine and catalog functionality - **`arcade-serve`** - FastAPI/MCP server components - **`arcade-ai`** - Meta package that includes CLI functionality. Optionally include evals via the `evals` extra. Optionally include all packages via the `all` extra. ### Key Benefits - **Lighter Dependencies**: Toolkits now depend only on `arcade-tdk` (~2 deps) vs full `arcade-ai` (~30+ deps) - **Faster Builds**: uv provides 10-100x faster dependency resolution and installation - **Better Modularity**: Clear separation of concerns, consumers import only what they need - **Standard Tooling**: Eliminates custom poetry scripts, uses standard Python packaging ### Migration Impact - All 20 toolkits converted from poetry → uv with `arcade-tdk` dependencies plus `arcade-ai[evals]` and `arcade-serve` dev dependencies. When developing locally, devs should install toolkits via `make install-local`. - Modern Python 3.10+ type hints throughout - Standardized build system with hatchling backend - Enhanced Makefile with robust toolkit management commands - Removed `arcade dev` CLI command - Reduce the number of files created by `arcade new` and add an option to not generate a tests and evals folder. This foundation enables faster development cycles and cleaner dependency chains for the growing toolkit ecosystem. ### Todo After this PR is merged - [ ] Post-merge workflow(s) (release & publish containers, etc) - [ ] Release order plan. @EricGustin suggests releasing in the following order: 1. `arcade-core` version 0.1.0 2. `arcade-serve` version 0.1.0 and `arcade-tdk` version 0.1.0 3. `arcade-ai` version 2.0.0 4. Patch release for all toolkits (all changes in toolkits are internal refactors) - [ ] [Update docs](https://github.com/ArcadeAI/docs/pull/318) --------- Co-authored-by: Eric Gustin <eric@arcade.dev> Co-authored-by: Eric Gustin <34000337+EricGustin@users.noreply.github.com>
185 lines
5.7 KiB
Python
185 lines
5.7 KiB
Python
from typing import Annotated
|
|
|
|
import pytest
|
|
from arcade_core.catalog import ToolCatalog
|
|
from arcade_core.executor import ToolExecutor
|
|
from arcade_core.schema import ToolCallError, ToolCallLog, ToolCallOutput, ToolContext
|
|
from arcade_tdk import tool
|
|
from arcade_tdk.errors import RetryableToolError, ToolExecutionError
|
|
|
|
|
|
@tool
|
|
def simple_tool(inp: Annotated[str, "input"]) -> Annotated[str, "output"]:
|
|
"""Simple tool"""
|
|
return inp
|
|
|
|
|
|
@tool.deprecated("Use simple_tool instead")
|
|
@tool
|
|
def simple_deprecated_tool(inp: Annotated[str, "input"]) -> Annotated[str, "output"]:
|
|
"""Simple tool that is deprecated"""
|
|
return inp
|
|
|
|
|
|
@tool
|
|
def retryable_error_tool() -> Annotated[str, "output"]:
|
|
"""Tool that raises a retryable error"""
|
|
raise RetryableToolError("test", "test", "test", 1000)
|
|
|
|
|
|
@tool
|
|
def exec_error_tool() -> Annotated[str, "output"]:
|
|
"""Tool that raises an error"""
|
|
raise ToolExecutionError("test", "test")
|
|
|
|
|
|
@tool
|
|
def unexpected_error_tool() -> Annotated[str, "output"]:
|
|
"""Tool that raises an unexpected error"""
|
|
raise RuntimeError("test")
|
|
|
|
|
|
@tool
|
|
def bad_output_error_tool() -> Annotated[str, "output"]:
|
|
"""tool that returns a bad output type"""
|
|
return {"output": "test"}
|
|
|
|
|
|
# ---- Test Driver ----
|
|
|
|
catalog = ToolCatalog()
|
|
catalog.add_tool(simple_tool, "simple_toolkit")
|
|
catalog.add_tool(simple_deprecated_tool, "simple_toolkit")
|
|
catalog.add_tool(retryable_error_tool, "simple_toolkit")
|
|
catalog.add_tool(exec_error_tool, "simple_toolkit")
|
|
catalog.add_tool(unexpected_error_tool, "simple_toolkit")
|
|
catalog.add_tool(bad_output_error_tool, "simple_toolkit")
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
@pytest.mark.parametrize(
|
|
"tool_func, inputs, expected_output",
|
|
[
|
|
(simple_tool, {"inp": "test"}, ToolCallOutput(value="test")),
|
|
(
|
|
simple_deprecated_tool,
|
|
{"inp": "test"},
|
|
ToolCallOutput(
|
|
value="test",
|
|
logs=[
|
|
ToolCallLog(
|
|
message="Use simple_tool instead",
|
|
level="warning",
|
|
subtype="deprecation",
|
|
)
|
|
],
|
|
),
|
|
),
|
|
(
|
|
retryable_error_tool,
|
|
{},
|
|
ToolCallOutput(
|
|
error=ToolCallError(
|
|
message="test",
|
|
developer_message="test",
|
|
additional_prompt_content="test",
|
|
retry_after_ms=1000,
|
|
can_retry=True,
|
|
)
|
|
),
|
|
),
|
|
(
|
|
exec_error_tool,
|
|
{},
|
|
ToolCallOutput(
|
|
error=ToolCallError(
|
|
message="test",
|
|
developer_message="test",
|
|
)
|
|
),
|
|
),
|
|
(
|
|
unexpected_error_tool,
|
|
{},
|
|
ToolCallOutput(
|
|
error=ToolCallError(
|
|
message="Error in execution of UnexpectedErrorTool",
|
|
developer_message="Error in unexpected_error_tool: test",
|
|
)
|
|
),
|
|
),
|
|
(
|
|
simple_tool,
|
|
{"inp": {"test": "test"}}, # takes in a string not a dict
|
|
ToolCallOutput(
|
|
error=ToolCallError(
|
|
message="Error in tool input deserialization",
|
|
developer_message=None, # can't gaurantee this will be the same
|
|
)
|
|
),
|
|
),
|
|
(
|
|
bad_output_error_tool,
|
|
{},
|
|
ToolCallOutput(
|
|
error=ToolCallError(
|
|
message="Failed to serialize tool output",
|
|
developer_message=None, # can't gaurantee this will be the same
|
|
)
|
|
),
|
|
),
|
|
],
|
|
ids=[
|
|
"simple_tool",
|
|
"simple_deprecated_tool",
|
|
"retryable_error_tool",
|
|
"exec_error_tool",
|
|
"unexpected_error_tool",
|
|
"invalid_input_type",
|
|
"bad_output_type",
|
|
],
|
|
)
|
|
async def test_tool_executor(tool_func, inputs, expected_output):
|
|
tool_definition = catalog.find_tool_by_func(tool_func)
|
|
|
|
dummy_context = ToolContext()
|
|
full_tool = catalog.get_tool(tool_definition.get_fully_qualified_name())
|
|
output = await ToolExecutor.run(
|
|
func=tool_func,
|
|
definition=tool_definition,
|
|
input_model=full_tool.input_model,
|
|
output_model=full_tool.output_model,
|
|
context=dummy_context,
|
|
**inputs,
|
|
)
|
|
|
|
check_output(output, expected_output)
|
|
|
|
|
|
def check_output(output: ToolCallOutput, expected_output: ToolCallOutput):
|
|
# execution error in tool
|
|
if output.error:
|
|
assert output.error.message == expected_output.error.message
|
|
if expected_output.error.developer_message:
|
|
assert output.error.developer_message == expected_output.error.developer_message
|
|
if expected_output.error.traceback_info:
|
|
assert output.error.traceback_info == expected_output.error.traceback_info
|
|
assert output.error.can_retry == expected_output.error.can_retry
|
|
assert (
|
|
output.error.additional_prompt_content
|
|
== expected_output.error.additional_prompt_content
|
|
)
|
|
assert output.error.retry_after_ms == expected_output.error.retry_after_ms
|
|
|
|
# normal tool execution
|
|
else:
|
|
assert output.value == expected_output.value
|
|
|
|
# check logs
|
|
output_logs = output.logs or []
|
|
expected_logs = expected_output.logs or []
|
|
assert len(output_logs) == len(expected_logs)
|
|
for output_log, expected_log in zip(output_logs, expected_logs):
|
|
assert output_log.message == expected_log.message
|
|
assert output_log.level == expected_log.level
|
|
assert output_log.subtype == expected_log.subtype
|