# Improvements to Arcade TDK Error Handling
I tried my very best to not make any breaking changes in this PR. So,
you will notice various "Deprecation" notices throughout.
### Instructions for PR reviewers
1. Pull down this PR's branch
2. Pull down the Engine's tool error handling PR's branch
3. Update your installed arcadepy to have the following:
- In `arcadepy/resources/tools/tools.py`, if you want to test out
including stacktraces, then you need to update `ToolsResource.execute`
to accept a `include_error_stacktrace` argument and also include the
"include_error_stacktrace" argument to the POST to the Engine inside of
the function's execute method's body.
- In `arcadepy/types/execute_tool_response.py` add the following enum
```py
class ErrorKind(str, Enum):
"""Error kind that is comprised of
- the who (toolkit, tool, upstream)
- the when (load time, definition parsing time, runtime)
- the what (bad_definition, bad_input, bad_output, retry,
context_required, fatal, etc.)"""
TOOLKIT_LOAD_FAILED = "TOOLKIT_LOAD_FAILED"
TOOL_DEFINITION_BAD_DEFINITION = "TOOL_DEFINITION_BAD_DEFINITION"
TOOL_DEFINITION_BAD_INPUT_SCHEMA = "TOOL_DEFINITION_BAD_INPUT_SCHEMA"
TOOL_DEFINITION_BAD_OUTPUT_SCHEMA = "TOOL_DEFINITION_BAD_OUTPUT_SCHEMA"
TOOL_RUNTIME_BAD_INPUT_VALUE = "TOOL_RUNTIME_BAD_INPUT_VALUE"
TOOL_RUNTIME_BAD_OUTPUT_VALUE = "TOOL_RUNTIME_BAD_OUTPUT_VALUE"
TOOL_RUNTIME_RETRY = "TOOL_RUNTIME_RETRY"
TOOL_RUNTIME_CONTEXT_REQUIRED = "TOOL_RUNTIME_CONTEXT_REQUIRED"
TOOL_RUNTIME_FATAL = "TOOL_RUNTIME_FATAL"
UPSTREAM_RUNTIME_BAD_REQUEST = "UPSTREAM_RUNTIME_BAD_REQUEST"
UPSTREAM_RUNTIME_AUTH_ERROR = "UPSTREAM_RUNTIME_AUTH_ERROR"
UPSTREAM_RUNTIME_NOT_FOUND = "UPSTREAM_RUNTIME_NOT_FOUND"
UPSTREAM_RUNTIME_VALIDATION_ERROR = "UPSTREAM_RUNTIME_VALIDATION_ERROR"
UPSTREAM_RUNTIME_RATE_LIMIT = "UPSTREAM_RUNTIME_RATE_LIMIT"
UPSTREAM_RUNTIME_SERVER_ERROR = "UPSTREAM_RUNTIME_SERVER_ERROR"
UPSTREAM_RUNTIME_UNMAPPED = "UPSTREAM_RUNTIME_UNMAPPED"
UNKNOWN = "UNKNOWN"
```
- In `arcadepy/types/execute_tool_response.py` add the following fields
to OutputError:
```py
kind: ErrorKind
status_code: Optional[int] = None
stacktrace: Optional[str] = None
extra: Optional[dict[str, Any]] = None
```
### Example Client Usage
```py
# Example of handling an upstream rate limit
error = response.output.error
if error and error.kind == ErrorKind.UPSTREAM_RUNTIME_RATE_LIMIT:
sleep_time = error.retry_after_ms / 1000
time.sleep(sleep_time)
# and then execute again
```
```py
# Examples of determining what type of runtime error it is
error = response.output.error
if error:
is_retryable_error = error.kind == ErrorKind.TOOL_RUNTIME_RETRY
is_a_bug_in_the_tool = error.kind == ErrorKind.TOOL_RUNTIME_FATAL
is_additional_context_required = error.kind == ErrorKind.TOOL_RUNTIME_CONTEXT_REQUIRED
```
### Example Tool Usage
```py
# EXAMPLE 1 letting Arcade handle upstream error handling for you
reddit_client.post(params) # Arcade's httpx adapter will handle error handling for you!
# ------------------------------------
# EXAMPLE 2 handling upstream bad request yourself, but letting Arcade handle the rest
try:
reddit_client.post(params)
except httpx.HTTPStatusError as e:
if e.status_code == 400:
raise UpstreamError("My extra custom message) from e
raise
```
```py
# EXAMPLE 1 letting Arcade handle it for you
risky_element = my_risky_list[42] # Arcade will raise a FatalToolError for you
# ------------------------------------
# EXAMPLE 2 handling it yourself for extra flexibility
try:
risky_element = my_risky_list[42]
except IndexError as e:
raise FatalToolError("My extra custom message") from e
```
### Non-runtime Error Message Examples
Example ToolkitLoadError Messages:
```
- [TOOLKIT_LOAD_FAILED] ToolkitLoadError when loading toolkit 'sample_tool': Could not import module mock_module. Reason: Mock import error
- [TOOLKIT_LOAD_FAILED] ToolkitLoadError when loading toolkit 'test_toolkit': Tool 'ValidTool' in toolkit 'test_toolkit' already exists in the catalog.
```
Example ToolDefinitionError Messages
```
- [TOOL_DEFINITION_BAD_DEFINITION] ToolDefinitionError in definition of tool 'tool_missing_description': Tool 'tool_missing_description' is missing a description
- [TOOL_DEFINITION_BAD_DEFINITION] ToolDefinitionError in definition of tool 'tool_with_invalid_secret_type': Secret keys must be strings (error in tool ToolWithInvalidSecretType).
- [TOOL_DEFINITION_BAD_DEFINITION] ToolDefinitionError in definition of tool 'tool_with_empty_secret': Secrets must have a non-empty key (error in tool ToolWithEmptySecret).
- [TOOL_DEFINITION_BAD_DEFINITION] ToolDefinitionError in definition of tool 'tool_with_invalid_metadata_type': Metadata must be strings (error in tool ToolWithInvalidMetadataType).
- [TOOL_DEFINITION_BAD_DEFINITION] ToolDefinitionError in definition of tool 'tool_with_metadata_requiring_auth_without_auth': Tool ToolWithMetadataRequiringAuthWithoutAuth declares metadata key 'client_id', which requires that the tool has an auth requirement, but no auth requirement was provided. Please specify an auth requirement.
- [TOOL_DEFINITION_BAD_DEFINITION] ToolDefinitionError in definition of tool 'tool_with_empty_metadata': Metadata must have a non-empty key (error in tool ToolWithEmptyMetadata).
- [TOOL_DEFINITION_BAD_DEFINITION] ToolDefinitionError in definition of tool 'tool_with_unsupported_param_type': Unsupported parameter type: <class 'test_catalog.MyFancyTestClass'>
```
Example ToolInputSchemaError Messages
```
- [TOOL_DEFINITION_BAD_INPUT_SCHEMA] ToolInputSchemaError in definition of tool 'tool_with_missing_input_parameter_annotation': Parameter 'input_text' is missing a description
- [TOOL_DEFINITION_BAD_INPUT_SCHEMA] ToolInputSchemaError in definition of tool 'tool_with_no_type_annotation': Parameter param has no type annotation.
- [TOOL_DEFINITION_BAD_INPUT_SCHEMA] ToolInputSchemaError in definition of tool 'tool_with_invalid_param_name': Invalid parameter name: '123invalid' is not a valid identifier. Identifiers must start with a letter or underscore, and can only contain letters, digits, or underscores.
- [TOOL_DEFINITION_BAD_INPUT_SCHEMA] ToolInputSchemaError in definition of tool 'tool_with_too_many_annotations': Parameter param: Annotated[str, 'name', 'desc', 'extra'] has too many string annotations. Expected 0, 1, or 2, got 3.
- [TOOL_DEFINITION_BAD_INPUT_SCHEMA] ToolInputSchemaError in definition of tool 'tool_with_required_union_param': Parameter param is a union type. Only optional types are supported.
- [TOOL_DEFINITION_BAD_INPUT_SCHEMA] ToolInputSchemaError in definition of tool 'tool_with_non_callable_default_factory': Default factory for parameter param: Annotated[str, 'Parameter'] = FieldInfo(annotation=NoneType, required=False, default_factory=str) is not callable.
- [TOOL_DEFINITION_BAD_INPUT_SCHEMA] ToolInputSchemaError in definition of tool 'tool_with_multiple_tool_contexts': Only one ToolContext parameter is supported, but tool tool_with_multiple_tool_contexts has multiple.
```
Example ToolOutputSchemaError Messages
```
- [TOOL_DEFINITION_BAD_OUTPUT_SCHEMA] ToolOutputSchemaError in definition of tool 'tool_missing_return_type_hint': Tool 'ToolMissingReturnTypeHint' must have a return type
- [TOOL_DEFINITION_BAD_OUTPUT_SCHEMA] ToolOutputSchemaError in definition of tool 'tool_with_unsupported_output_type': Unsupported output type '<class 'test_catalog.MyFancyTestClass'>'. Only built-in Python types, TypedDicts, Pydantic models, and standard collections are supported as tool output types.
```
### Runtime Error Message Examples
Example Tool Runtime Error Messages
```
- [TOOL_RUNTIME_FATAL] FatalToolError during execution of tool 'get_posts_in_subreddit': list index out of range
- [TOOL_RUNTIME_CONTEXT_REQUIRED] ContextRequiredToolError during execution of tool 'get_posts_in_subreddit': Ambiguous username. Please provide a more specific username
- [TOOL_RUNTIME_RETRY] RetryableToolError during execution of tool 'get_posts_in_subreddit': Retry with subreddit=learnpython or subreddit=learnprogramming
```
Example Upstream Runtime Error Messages
```
- [UPSTREAM_RUNTIME_RATE_LIMIT] UpstreamRateLimitError during execution of tool 'get_posts_in_subreddit': 429 Client Error: Too Many Requests
- [UPSTREAM_RUNTIME_BAD_REQUEST] UpstreamError during execution of tool 'get_posts_in_subreddit': 400 Client Error: Bad request. Missing 'id' parameter.
- [UPSTREAM_RUNTIME_BAD_REQUEST] UpstreamError during execution of tool 'search_files': Upstream Google API error: Invalid value '-23'. Values must be within the range: [value: 1\n, value: 1000\n]
```
359 lines
12 KiB
Python
359 lines
12 KiB
Python
from typing import Annotated
|
|
|
|
import pytest
|
|
from arcade_core.catalog import ToolCatalog
|
|
from arcade_core.errors import (
|
|
ContextRequiredToolError,
|
|
ErrorKind,
|
|
ToolRuntimeError,
|
|
UpstreamError,
|
|
UpstreamRateLimitError,
|
|
)
|
|
from arcade_core.executor import ToolExecutor
|
|
from arcade_core.schema import ToolCallError, ToolCallLog, ToolCallOutput, ToolContext
|
|
from arcade_tdk import tool
|
|
from arcade_tdk.errors import (
|
|
RetryableToolError,
|
|
ToolExecutionError,
|
|
)
|
|
from typing_extensions import TypedDict
|
|
|
|
|
|
@tool
|
|
def simple_tool(inp: Annotated[str, "input"]) -> Annotated[str, "output"]:
|
|
"""Simple tool"""
|
|
return inp
|
|
|
|
|
|
@tool.deprecated("Use simple_tool instead")
|
|
@tool
|
|
def simple_deprecated_tool(inp: Annotated[str, "input"]) -> Annotated[str, "output"]:
|
|
"""Simple tool that is deprecated"""
|
|
return inp
|
|
|
|
|
|
@tool
|
|
def retryable_error_tool() -> Annotated[str, "output"]:
|
|
"""Tool that raises a retryable error"""
|
|
raise RetryableToolError("test", "test developer message", "additional prompt content", 1000)
|
|
|
|
|
|
@tool
|
|
def tool_execution_error_tool() -> Annotated[str, "output"]:
|
|
"""Tool that raises an error"""
|
|
raise ToolExecutionError("test", "test developer message")
|
|
|
|
|
|
@tool
|
|
def unexpected_error_tool() -> Annotated[str, "output"]:
|
|
"""Tool that raises an unexpected error"""
|
|
raise RuntimeError("test")
|
|
|
|
|
|
@tool
|
|
def context_required_error_tool() -> Annotated[str, "output"]:
|
|
"""Tool that raises a context required error"""
|
|
raise ContextRequiredToolError(
|
|
"test", additional_prompt_content="need the user to clarify something"
|
|
)
|
|
|
|
|
|
@tool
|
|
def upstream_error_tool() -> Annotated[str, "output"]:
|
|
"""Tool that raises an upstream error"""
|
|
# TODO: or test raising a httpx error? Do these types of tests belong in adapter tests?
|
|
raise UpstreamError("test", status_code=400)
|
|
|
|
|
|
@tool
|
|
def upstream_ratelimit_error_tool() -> Annotated[str, "output"]:
|
|
"""Tool that raises an upstream error"""
|
|
# TODO: or test raising a httpx error? Do these types of tests belong in adapter tests?
|
|
raise UpstreamRateLimitError("test", 1000)
|
|
|
|
|
|
@tool
|
|
def tool_runtime_error_tool() -> Annotated[str, "output"]:
|
|
"""Tool that raises a tool runtime error"""
|
|
raise ToolRuntimeError("test", "test developer message")
|
|
|
|
|
|
@tool
|
|
def bad_output_error_tool() -> Annotated[str, "output"]:
|
|
"""tool that returns a bad output type"""
|
|
return {"output": "test"}
|
|
|
|
|
|
# TypedDict output tools
|
|
class ResultDict(TypedDict):
|
|
"""Result dictionary."""
|
|
|
|
status: str
|
|
count: int
|
|
items: list[str]
|
|
|
|
|
|
@tool
|
|
def typeddict_output_tool() -> Annotated[ResultDict, "Returns a TypedDict"]:
|
|
"""Tool that returns a TypedDict."""
|
|
return ResultDict(status="success", count=3, items=["a", "b", "c"])
|
|
|
|
|
|
@tool
|
|
def list_typeddict_output_tool() -> Annotated[list[ResultDict], "Returns list of TypedDict"]:
|
|
"""Tool that returns a list of TypedDict."""
|
|
return [
|
|
ResultDict(status="first", count=1, items=["x"]),
|
|
ResultDict(status="second", count=2, items=["y", "z"]),
|
|
]
|
|
|
|
|
|
@tool
|
|
def dict_output_tool() -> Annotated[dict, "Returns a plain dict"]:
|
|
"""Tool that returns a plain dict."""
|
|
return {"key": "value", "number": 42, "nested": {"inner": "data"}}
|
|
|
|
|
|
# ---- Test Driver ----
|
|
tools = [
|
|
simple_tool,
|
|
simple_deprecated_tool,
|
|
retryable_error_tool,
|
|
tool_execution_error_tool,
|
|
unexpected_error_tool,
|
|
context_required_error_tool,
|
|
upstream_error_tool,
|
|
upstream_ratelimit_error_tool,
|
|
tool_runtime_error_tool,
|
|
bad_output_error_tool,
|
|
typeddict_output_tool,
|
|
list_typeddict_output_tool,
|
|
dict_output_tool,
|
|
]
|
|
catalog = ToolCatalog()
|
|
for tool_func in tools:
|
|
catalog.add_tool(tool_func, "simple_toolkit")
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
@pytest.mark.parametrize(
|
|
"tool_func, inputs, expected_output",
|
|
[
|
|
(simple_tool, {"inp": "test"}, ToolCallOutput(value="test")),
|
|
(
|
|
simple_deprecated_tool,
|
|
{"inp": "test"},
|
|
ToolCallOutput(
|
|
value="test",
|
|
logs=[
|
|
ToolCallLog(
|
|
message="Use simple_tool instead",
|
|
level="warning",
|
|
subtype="deprecation",
|
|
)
|
|
],
|
|
),
|
|
),
|
|
(
|
|
retryable_error_tool,
|
|
{},
|
|
ToolCallOutput(
|
|
error=ToolCallError(
|
|
message="[TOOL_RUNTIME_RETRY] RetryableToolError during execution of tool 'retryable_error_tool': test",
|
|
kind=ErrorKind.TOOL_RUNTIME_RETRY,
|
|
developer_message="[TOOL_RUNTIME_RETRY] RetryableToolError during execution of tool 'retryable_error_tool': test developer message",
|
|
additional_prompt_content="additional prompt content",
|
|
retry_after_ms=1000,
|
|
can_retry=True,
|
|
)
|
|
),
|
|
),
|
|
(
|
|
tool_execution_error_tool,
|
|
{},
|
|
ToolCallOutput(
|
|
error=ToolCallError(
|
|
message="[TOOL_RUNTIME_FATAL] ToolExecutionError during execution of tool 'tool_execution_error_tool': test",
|
|
kind=ErrorKind.TOOL_RUNTIME_FATAL,
|
|
developer_message="[TOOL_RUNTIME_FATAL] ToolExecutionError during execution of tool 'tool_execution_error_tool': test developer message",
|
|
can_retry=False,
|
|
)
|
|
),
|
|
),
|
|
(
|
|
unexpected_error_tool,
|
|
{},
|
|
ToolCallOutput(
|
|
error=ToolCallError(
|
|
message="[TOOL_RUNTIME_FATAL] FatalToolError during execution of tool 'unexpected_error_tool': test",
|
|
kind=ErrorKind.TOOL_RUNTIME_FATAL,
|
|
developer_message="[TOOL_RUNTIME_FATAL] FatalToolError during execution of tool 'unexpected_error_tool': test",
|
|
can_retry=False,
|
|
status_code=500,
|
|
)
|
|
),
|
|
),
|
|
(
|
|
simple_tool,
|
|
{"inp": {"test": "test"}}, # takes in a string not a dict
|
|
ToolCallOutput(
|
|
error=ToolCallError(
|
|
message="[TOOL_RUNTIME_BAD_INPUT_VALUE] ToolInputError during execution of tool 'simple_tool': Error in tool input deserialization",
|
|
kind=ErrorKind.TOOL_RUNTIME_BAD_INPUT_VALUE,
|
|
status_code=400,
|
|
developer_message=None, # can't gaurantee this will be the same
|
|
)
|
|
),
|
|
),
|
|
(
|
|
context_required_error_tool,
|
|
{},
|
|
ToolCallOutput(
|
|
error=ToolCallError(
|
|
message="[TOOL_RUNTIME_CONTEXT_REQUIRED] ContextRequiredToolError during execution of tool 'context_required_error_tool': test",
|
|
kind=ErrorKind.TOOL_RUNTIME_CONTEXT_REQUIRED,
|
|
developer_message=None,
|
|
additional_prompt_content="need the user to clarify something",
|
|
)
|
|
),
|
|
),
|
|
(
|
|
upstream_error_tool,
|
|
{},
|
|
ToolCallOutput(
|
|
error=ToolCallError(
|
|
message="[UPSTREAM_RUNTIME_BAD_REQUEST] UpstreamError during execution of tool 'upstream_error_tool': test",
|
|
kind=ErrorKind.UPSTREAM_RUNTIME_BAD_REQUEST,
|
|
status_code=400,
|
|
developer_message=None,
|
|
)
|
|
),
|
|
),
|
|
(
|
|
upstream_ratelimit_error_tool,
|
|
{},
|
|
ToolCallOutput(
|
|
error=ToolCallError(
|
|
message="[UPSTREAM_RUNTIME_RATE_LIMIT] UpstreamRateLimitError during execution of tool 'upstream_ratelimit_error_tool': test",
|
|
kind=ErrorKind.UPSTREAM_RUNTIME_RATE_LIMIT,
|
|
status_code=429,
|
|
developer_message=None,
|
|
retry_after_ms=1000,
|
|
can_retry=True,
|
|
)
|
|
),
|
|
),
|
|
(
|
|
tool_runtime_error_tool,
|
|
{},
|
|
ToolCallOutput(
|
|
error=ToolCallError(
|
|
message="[TOOL_RUNTIME_FATAL] ToolRuntimeError during execution of tool 'tool_runtime_error_tool': test",
|
|
kind=ErrorKind.TOOL_RUNTIME_FATAL,
|
|
developer_message="[TOOL_RUNTIME_FATAL] ToolRuntimeError during execution of tool 'tool_runtime_error_tool': test developer message",
|
|
can_retry=False,
|
|
)
|
|
),
|
|
),
|
|
(
|
|
bad_output_error_tool,
|
|
{},
|
|
ToolCallOutput(
|
|
error=ToolCallError(
|
|
message="[TOOL_RUNTIME_BAD_OUTPUT_VALUE] ToolOutputError during execution of tool 'bad_output_error_tool': Failed to serialize tool output",
|
|
kind=ErrorKind.TOOL_RUNTIME_BAD_OUTPUT_VALUE,
|
|
status_code=500,
|
|
developer_message=None, # can't gaurantee this will be the same
|
|
)
|
|
),
|
|
),
|
|
(
|
|
typeddict_output_tool,
|
|
{},
|
|
ToolCallOutput(value={"status": "success", "count": 3, "items": ["a", "b", "c"]}),
|
|
),
|
|
(
|
|
list_typeddict_output_tool,
|
|
{},
|
|
ToolCallOutput(
|
|
value=[
|
|
{"status": "first", "count": 1, "items": ["x"]},
|
|
{"status": "second", "count": 2, "items": ["y", "z"]},
|
|
]
|
|
),
|
|
),
|
|
(
|
|
dict_output_tool,
|
|
{},
|
|
ToolCallOutput(value={"key": "value", "number": 42, "nested": {"inner": "data"}}),
|
|
),
|
|
],
|
|
ids=[
|
|
"simple_tool",
|
|
"simple_deprecated_tool",
|
|
"retryable_error_tool",
|
|
"exec_error_tool",
|
|
"unexpected_error_tool",
|
|
"invalid_input_type",
|
|
"context_required_error_tool",
|
|
"upstream_error_tool",
|
|
"upstream_ratelimit_error_tool",
|
|
"tool_runtime_error_tool",
|
|
"bad_output_type",
|
|
"typeddict_output",
|
|
"list_typeddict_output",
|
|
"dict_output",
|
|
],
|
|
)
|
|
async def test_tool_executor(tool_func, inputs, expected_output):
|
|
tool_definition = catalog.find_tool_by_func(tool_func)
|
|
|
|
dummy_context = ToolContext()
|
|
full_tool = catalog.get_tool(tool_definition.get_fully_qualified_name())
|
|
output = await ToolExecutor.run(
|
|
func=tool_func,
|
|
definition=tool_definition,
|
|
input_model=full_tool.input_model,
|
|
output_model=full_tool.output_model,
|
|
context=dummy_context,
|
|
**inputs,
|
|
)
|
|
|
|
check_output(output, expected_output)
|
|
|
|
|
|
def check_output_error(output_error: ToolCallError, expected_error: ToolCallError):
|
|
assert output_error.message == expected_error.message, "message mismatch"
|
|
assert output_error.kind == expected_error.kind, "kind mismatch"
|
|
if expected_error.developer_message:
|
|
assert (
|
|
output_error.developer_message == expected_error.developer_message
|
|
), "developer message mismatch"
|
|
assert output_error.can_retry == expected_error.can_retry, "can retry mismatch"
|
|
assert (
|
|
output_error.additional_prompt_content == expected_error.additional_prompt_content
|
|
), "additional prompt content mismatch"
|
|
assert output_error.retry_after_ms == expected_error.retry_after_ms, "retry after ms mismatch"
|
|
if expected_error.stacktrace:
|
|
assert output_error.stacktrace == expected_error.stacktrace, "stacktrace mismatch"
|
|
assert output_error.status_code == expected_error.status_code, "status code mismatch"
|
|
assert output_error.extra == expected_error.extra, "extra mismatch"
|
|
|
|
|
|
def check_output(output: ToolCallOutput, expected_output: ToolCallOutput):
|
|
# error in ToolCallOutput
|
|
if output.error:
|
|
check_output_error(output.error, expected_output.error)
|
|
|
|
# normal tool execution
|
|
else:
|
|
assert output.value == expected_output.value
|
|
|
|
# check logs
|
|
output_logs = output.logs or []
|
|
expected_logs = expected_output.logs or []
|
|
assert len(output_logs) == len(expected_logs)
|
|
for output_log, expected_log in zip(output_logs, expected_logs, strict=False):
|
|
assert output_log.message == expected_log.message
|
|
assert output_log.level == expected_log.level
|
|
assert output_log.subtype == expected_log.subtype
|