arcade-mcp/libs/tests/core/test_schema_validation.py
Eric Gustin f4558ef3a8
Tool Error Handling (#539)
# Improvements to Arcade TDK Error Handling
I tried my very best to not make any breaking changes in this PR. So,
you will notice various "Deprecation" notices throughout.

### Instructions for PR reviewers
1. Pull down this PR's branch
2. Pull down the Engine's tool error handling PR's branch
3. Update your installed arcadepy to have the following:
- In `arcadepy/resources/tools/tools.py`, if you want to test out
including stacktraces, then you need to update `ToolsResource.execute`
to accept a `include_error_stacktrace` argument and also include the
"include_error_stacktrace" argument to the POST to the Engine inside of
the function's execute method's body.
- In `arcadepy/types/execute_tool_response.py` add the following enum
      ```py
      class ErrorKind(str, Enum):
          """Error kind that is comprised of
          - the who (toolkit, tool, upstream)
          - the when (load time, definition parsing time, runtime)
- the what (bad_definition, bad_input, bad_output, retry,
context_required, fatal, etc.)"""
      
          TOOLKIT_LOAD_FAILED = "TOOLKIT_LOAD_FAILED"
TOOL_DEFINITION_BAD_DEFINITION = "TOOL_DEFINITION_BAD_DEFINITION"
TOOL_DEFINITION_BAD_INPUT_SCHEMA = "TOOL_DEFINITION_BAD_INPUT_SCHEMA"
TOOL_DEFINITION_BAD_OUTPUT_SCHEMA = "TOOL_DEFINITION_BAD_OUTPUT_SCHEMA"
          TOOL_RUNTIME_BAD_INPUT_VALUE = "TOOL_RUNTIME_BAD_INPUT_VALUE"
TOOL_RUNTIME_BAD_OUTPUT_VALUE = "TOOL_RUNTIME_BAD_OUTPUT_VALUE"
          TOOL_RUNTIME_RETRY = "TOOL_RUNTIME_RETRY"
TOOL_RUNTIME_CONTEXT_REQUIRED = "TOOL_RUNTIME_CONTEXT_REQUIRED"
          TOOL_RUNTIME_FATAL = "TOOL_RUNTIME_FATAL"
          UPSTREAM_RUNTIME_BAD_REQUEST = "UPSTREAM_RUNTIME_BAD_REQUEST"
          UPSTREAM_RUNTIME_AUTH_ERROR = "UPSTREAM_RUNTIME_AUTH_ERROR"
          UPSTREAM_RUNTIME_NOT_FOUND = "UPSTREAM_RUNTIME_NOT_FOUND"
UPSTREAM_RUNTIME_VALIDATION_ERROR = "UPSTREAM_RUNTIME_VALIDATION_ERROR"
          UPSTREAM_RUNTIME_RATE_LIMIT = "UPSTREAM_RUNTIME_RATE_LIMIT"
UPSTREAM_RUNTIME_SERVER_ERROR = "UPSTREAM_RUNTIME_SERVER_ERROR"
          UPSTREAM_RUNTIME_UNMAPPED = "UPSTREAM_RUNTIME_UNMAPPED"
          UNKNOWN = "UNKNOWN"
      ```
- In `arcadepy/types/execute_tool_response.py` add the following fields
to OutputError:
      ```py
      kind: ErrorKind
      status_code: Optional[int] = None
      stacktrace: Optional[str] = None
      extra: Optional[dict[str, Any]] = None
      ```
### Example Client Usage
```py
# Example of handling an upstream rate limit
error = response.output.error
if  error and error.kind == ErrorKind.UPSTREAM_RUNTIME_RATE_LIMIT:
    sleep_time = error.retry_after_ms / 1000
    time.sleep(sleep_time)
    # and then execute again
```
```py
# Examples of determining what type of runtime error it is
error = response.output.error
if error:
    is_retryable_error = error.kind == ErrorKind.TOOL_RUNTIME_RETRY
    is_a_bug_in_the_tool = error.kind == ErrorKind.TOOL_RUNTIME_FATAL
    is_additional_context_required = error.kind == ErrorKind.TOOL_RUNTIME_CONTEXT_REQUIRED
```

### Example Tool Usage
```py
# EXAMPLE 1 letting Arcade handle upstream error handling for you
reddit_client.post(params) # Arcade's httpx adapter will handle error handling for you!

# ------------------------------------

# EXAMPLE 2 handling upstream bad request yourself, but letting Arcade handle the rest
try:
    reddit_client.post(params)
except httpx.HTTPStatusError as e:
    if e.status_code == 400:
        raise UpstreamError("My extra custom message) from e
    raise
```
```py
# EXAMPLE 1 letting Arcade handle it for you
risky_element = my_risky_list[42] # Arcade will raise a FatalToolError for you

# ------------------------------------

# EXAMPLE 2 handling it yourself for extra flexibility
try:
    risky_element = my_risky_list[42]
except IndexError as e:
    raise FatalToolError("My extra custom message") from e
```
### Non-runtime Error Message Examples
Example ToolkitLoadError Messages:
```
- [TOOLKIT_LOAD_FAILED] ToolkitLoadError when loading toolkit 'sample_tool': Could not import module mock_module. Reason: Mock import error
- [TOOLKIT_LOAD_FAILED] ToolkitLoadError when loading toolkit 'test_toolkit': Tool 'ValidTool' in toolkit 'test_toolkit' already exists in the catalog.
```
Example ToolDefinitionError Messages
```
- [TOOL_DEFINITION_BAD_DEFINITION] ToolDefinitionError in definition of tool 'tool_missing_description': Tool 'tool_missing_description' is missing a description
- [TOOL_DEFINITION_BAD_DEFINITION] ToolDefinitionError in definition of tool 'tool_with_invalid_secret_type': Secret keys must be strings (error in tool ToolWithInvalidSecretType).
- [TOOL_DEFINITION_BAD_DEFINITION] ToolDefinitionError in definition of tool 'tool_with_empty_secret': Secrets must have a non-empty key (error in tool ToolWithEmptySecret).
- [TOOL_DEFINITION_BAD_DEFINITION] ToolDefinitionError in definition of tool 'tool_with_invalid_metadata_type': Metadata must be strings (error in tool ToolWithInvalidMetadataType).
- [TOOL_DEFINITION_BAD_DEFINITION] ToolDefinitionError in definition of tool 'tool_with_metadata_requiring_auth_without_auth': Tool ToolWithMetadataRequiringAuthWithoutAuth declares metadata key 'client_id', which requires that the tool has an auth requirement, but no auth requirement was provided. Please specify an auth requirement.
- [TOOL_DEFINITION_BAD_DEFINITION] ToolDefinitionError in definition of tool 'tool_with_empty_metadata': Metadata must have a non-empty key (error in tool ToolWithEmptyMetadata).
- [TOOL_DEFINITION_BAD_DEFINITION] ToolDefinitionError in definition of tool 'tool_with_unsupported_param_type': Unsupported parameter type: <class 'test_catalog.MyFancyTestClass'>
```
Example ToolInputSchemaError Messages
```
- [TOOL_DEFINITION_BAD_INPUT_SCHEMA] ToolInputSchemaError in definition of tool 'tool_with_missing_input_parameter_annotation': Parameter 'input_text' is missing a description
- [TOOL_DEFINITION_BAD_INPUT_SCHEMA] ToolInputSchemaError in definition of tool 'tool_with_no_type_annotation': Parameter param has no type annotation.
- [TOOL_DEFINITION_BAD_INPUT_SCHEMA] ToolInputSchemaError in definition of tool 'tool_with_invalid_param_name': Invalid parameter name: '123invalid' is not a valid identifier. Identifiers must start with a letter or underscore, and can only contain letters, digits, or underscores.
- [TOOL_DEFINITION_BAD_INPUT_SCHEMA] ToolInputSchemaError in definition of tool 'tool_with_too_many_annotations': Parameter param: Annotated[str, 'name', 'desc', 'extra'] has too many string annotations. Expected 0, 1, or 2, got 3.
- [TOOL_DEFINITION_BAD_INPUT_SCHEMA] ToolInputSchemaError in definition of tool 'tool_with_required_union_param': Parameter param is a union type. Only optional types are supported.
- [TOOL_DEFINITION_BAD_INPUT_SCHEMA] ToolInputSchemaError in definition of tool 'tool_with_non_callable_default_factory': Default factory for parameter param: Annotated[str, 'Parameter'] = FieldInfo(annotation=NoneType, required=False, default_factory=str) is not callable.
- [TOOL_DEFINITION_BAD_INPUT_SCHEMA] ToolInputSchemaError in definition of tool 'tool_with_multiple_tool_contexts': Only one ToolContext parameter is supported, but tool tool_with_multiple_tool_contexts has multiple.
```
Example ToolOutputSchemaError Messages
```
- [TOOL_DEFINITION_BAD_OUTPUT_SCHEMA] ToolOutputSchemaError in definition of tool 'tool_missing_return_type_hint': Tool 'ToolMissingReturnTypeHint' must have a return type
- [TOOL_DEFINITION_BAD_OUTPUT_SCHEMA] ToolOutputSchemaError in definition of tool 'tool_with_unsupported_output_type': Unsupported output type '<class 'test_catalog.MyFancyTestClass'>'. Only built-in Python types, TypedDicts, Pydantic models, and standard collections are supported as tool output types.
```
### Runtime Error Message Examples
Example Tool Runtime Error Messages
```
- [TOOL_RUNTIME_FATAL] FatalToolError during execution of tool 'get_posts_in_subreddit': list index out of range
- [TOOL_RUNTIME_CONTEXT_REQUIRED] ContextRequiredToolError during execution of tool 'get_posts_in_subreddit': Ambiguous username. Please provide a more specific username
- [TOOL_RUNTIME_RETRY] RetryableToolError during execution of tool 'get_posts_in_subreddit': Retry with subreddit=learnpython or subreddit=learnprogramming
```

Example Upstream Runtime Error Messages
```
- [UPSTREAM_RUNTIME_RATE_LIMIT] UpstreamRateLimitError during execution of tool 'get_posts_in_subreddit': 429 Client Error: Too Many Requests
- [UPSTREAM_RUNTIME_BAD_REQUEST] UpstreamError during execution of tool 'get_posts_in_subreddit': 400 Client Error: Bad request. Missing 'id' parameter.
- [UPSTREAM_RUNTIME_BAD_REQUEST] UpstreamError during execution of tool 'search_files': Upstream Google API error: Invalid value '-23'. Values must be within the range: [value: 1\n, value: 1000\n]
```
2025-09-10 10:45:18 -07:00

198 lines
6.9 KiB
Python

"""
Tests for ToolCallOutput schema validation with complex types.
"""
import pytest
from arcade_core.errors import ErrorKind
from arcade_core.schema import ToolCallError, ToolCallLog, ToolCallOutput
from pydantic import ValidationError
class TestToolCallOutputValidation:
"""Test ToolCallOutput validation with various data types."""
def test_basic_types(self):
"""Test that basic types are validated correctly."""
# String
output = ToolCallOutput(value="test string")
assert output.value == "test string"
# Integer
output = ToolCallOutput(value=42)
assert output.value == 42
# Float
output = ToolCallOutput(value=3.14)
assert output.value == 3.14
# Boolean
output = ToolCallOutput(value=True)
assert output.value is True
# None
output = ToolCallOutput(value=None)
assert output.value is None
def test_dict_types(self):
"""Test that dict types are validated correctly."""
# Simple dict
output = ToolCallOutput(value={"key": "value"})
assert output.value == {"key": "value"}
# Nested dict
output = ToolCallOutput(value={"outer": {"inner": "value"}})
assert output.value == {"outer": {"inner": "value"}}
# Empty dict
output = ToolCallOutput(value={})
assert output.value == {}
# Dict with mixed types
output = ToolCallOutput(
value={
"string": "text",
"number": 123,
"float": 45.6,
"bool": True,
"null": None,
"list": [1, 2, 3],
"dict": {"nested": "value"},
}
)
assert output.value["string"] == "text"
assert output.value["number"] == 123
assert output.value["list"] == [1, 2, 3]
def test_list_types(self):
"""Test that list types are validated correctly."""
# List of strings (original type)
output = ToolCallOutput(value=["a", "b", "c"])
assert output.value == ["a", "b", "c"]
# List of integers
output = ToolCallOutput(value=[1, 2, 3])
assert output.value == [1, 2, 3]
# List of dicts (TypedDict at runtime)
output = ToolCallOutput(value=[{"id": 1, "name": "first"}, {"id": 2, "name": "second"}])
assert output.value == [{"id": 1, "name": "first"}, {"id": 2, "name": "second"}]
# Mixed type list
output = ToolCallOutput(value=[1, "two", 3.0, True, None, {"key": "value"}])
assert len(output.value) == 6
assert output.value[5] == {"key": "value"}
# Empty list
output = ToolCallOutput(value=[])
assert output.value == []
# Nested lists
output = ToolCallOutput(value=[[1, 2], [3, 4], [5, 6]])
assert output.value == [[1, 2], [3, 4], [5, 6]]
def test_complex_nested_structures(self):
"""Test complex nested structures that might come from TypedDict."""
# Simulate a complex API response structure
complex_data = {
"status": "success",
"data": {
"users": [
{
"id": 1,
"name": "Alice",
"roles": ["admin", "user"],
"metadata": {"last_login": "2024-01-01", "active": True},
},
{
"id": 2,
"name": "Bob",
"roles": ["user"],
"metadata": {"last_login": "2024-01-02", "active": False},
},
],
"total": 2,
"page_info": {"page": 1, "per_page": 10, "has_next": False},
},
"errors": [],
}
output = ToolCallOutput(value=complex_data)
assert output.value == complex_data
assert output.value["data"]["users"][0]["name"] == "Alice"
assert output.value["data"]["page_info"]["has_next"] is False
def test_error_and_logs_with_value(self):
"""Test that error and logs can coexist with different value types."""
# With dict value and logs
output = ToolCallOutput(
value={"result": "success"},
logs=[
ToolCallLog(message="Processing started", level="info"),
ToolCallLog(message="Deprecation warning", level="warning", subtype="deprecation"),
],
)
assert output.value == {"result": "success"}
assert len(output.logs) == 2
# With list value and error
output = ToolCallOutput(
error=ToolCallError(
message="Partial failure",
developer_message="Some items failed to process",
can_retry=True,
kind=ErrorKind.TOOL_RUNTIME_RETRY,
)
)
assert output.error.message == "Partial failure"
assert output.value is None
def test_unsupported_types_still_fail(self):
"""Test that truly unsupported types still fail validation."""
# Custom object (not dict, list, or basic type)
class CustomClass:
def __init__(self):
self.data = "test"
# This should fail because CustomClass instance is not a supported type
# Note: This test is about Pydantic validation, not the output factory
# The output factory would catch this earlier
with pytest.raises(ValidationError):
# Directly creating with an unsupported type should fail
ToolCallOutput(value=CustomClass())
def test_very_large_structures(self):
"""Test that large structures are handled properly."""
# Large list of dicts
large_list = [{"id": i, "value": f"item_{i}"} for i in range(1000)]
output = ToolCallOutput(value=large_list)
assert len(output.value) == 1000
assert output.value[500]["id"] == 500
# Deeply nested structure
deep_dict = {"level1": {"level2": {"level3": {"level4": {"level5": "deep_value"}}}}}
output = ToolCallOutput(value=deep_dict)
assert output.value["level1"]["level2"]["level3"]["level4"]["level5"] == "deep_value"
def test_json_serializable(self):
"""Test that all supported types are JSON serializable."""
import json
test_cases = [
{"type": "string"},
["list", "of", "strings"],
[{"id": 1}, {"id": 2}],
{"nested": {"data": [1, 2, 3]}},
123,
45.6,
True,
None,
]
for test_value in test_cases:
output = ToolCallOutput(value=test_value)
# This should not raise an exception
json_str = json.dumps(output.model_dump())
# And we should be able to parse it back
parsed = json.loads(json_str)
assert parsed["value"] == test_value