## Summary - Improve tool call error messages across 4 libraries (arcade-core, arcade-tdk, arcade-mcp-server, arcade-serve) so agents can self-correct and Datadog can facet on structured fields - Guard empty error messages, enrich input validation errors with field-level detail, fix `@tool` decorator fallback formatting, surface `additional_prompt_content` in MCP responses, and add structured log extras for Datadog - Addresses the 3 worst error patterns: generic "Error in tool input deserialization", bare `KeyError` values, and empty `FatalToolError` messages **Linear:** TOO-627 **Plan:** `docs/plans/2026-04-08-improve-error-messages-handoff.md` ## Tasks - [ ] Task 1: Guard empty error messages (arcade-core) - [ ] Task 2: Enrich input validation error messages (arcade-core) - [ ] Task 3: Improve `@tool` decorator error fallback (arcade-tdk) - [ ] Task 4: Fix MCP agent-facing error response (arcade-mcp-server) - [ ] Task 5: Add structured log extras in BaseWorker (arcade-serve) - [ ] Task 6: Add structured log extras in MCP server (arcade-mcp-server) ## Test plan - [ ] Each task has dedicated unit tests verifying the new behavior - [ ] `make test` passes after all tasks - [ ] `make check` (ruff + mypy) passes - [ ] Verify the 3 worst error patterns now produce actionable messages 🤖 Generated with [Claude Code](https://claude.com/claude-code) <!-- CURSOR_SUMMARY --> --- > [!NOTE] > **Medium Risk** > Touches cross-library error formatting and logging behavior used in production tool execution paths; while mostly additive/guardrails, it changes agent-visible messages and Datadog log facets, which could impact client expectations and alerting. > > **Overview** > Improves tool-call error handling across core/runtime, MCP transport, worker transport, and the TDK to make agent-visible failures more actionable while *reducing sensitive-data leakage*. > > In `arcade-core`, empty error messages now get placeholders, `ToolOutputFactory.fail*` defaults blank messages, and input validation errors are rewritten as field-level summaries that intentionally omit rejected values (avoiding Pydantic echo of secrets). The `@tool` fallback in `arcade-tdk` no longer surfaces `str(exception)` to agents; it returns exception *type-only* in `message` while preserving full detail in `developer_message`. > > Adds a shared `build_tool_error_log_extra` helper and updates `arcade-serve` + `arcade-mcp-server` to emit consistent structured WARNING logs (`error_*`, `tool_name`, optional toolkit/version) for Datadog, while MCP error responses now append `additional_prompt_content` and force `structuredContent=None` on failures per spec. Includes extensive new tests and bumps package versions (`arcade-core` 4.6.2, `arcade-tdk` 3.6.1, `arcade-mcp-server` 1.19.3, `arcade-serve` 3.2.3). > > <sup>Reviewed by [Cursor Bugbot](https://cursor.com/bugbot) for commit e5c7ebcaf56176cfbd8e6d1f2b6295352abd0ec0. Bugbot is set up for automated code reviews on this repo. Configure [here](https://www.cursor.com/dashboard/bugbot).</sup> <!-- /CURSOR_SUMMARY --> --------- Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
162 lines
5.2 KiB
Python
162 lines
5.2 KiB
Python
from typing import Any
|
|
|
|
import pytest
|
|
from arcade_core.output import ToolOutputFactory
|
|
from pydantic import BaseModel
|
|
|
|
|
|
@pytest.fixture
|
|
def output_factory():
|
|
return ToolOutputFactory()
|
|
|
|
|
|
class SampleOutputModel(BaseModel):
|
|
result: Any
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
"data, expected_value",
|
|
[
|
|
(None, ""),
|
|
("success", "success"),
|
|
("", ""),
|
|
(None, ""),
|
|
(123, 123),
|
|
(0, 0),
|
|
(123.45, 123.45),
|
|
(True, True),
|
|
(False, False),
|
|
],
|
|
)
|
|
def test_success(output_factory, data, expected_value):
|
|
data_obj = SampleOutputModel(result=data) if data is not None else None
|
|
output = output_factory.success(data=data_obj)
|
|
assert output.value == expected_value
|
|
assert output.error is None
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
"data, expected_value",
|
|
[
|
|
# Dict types (simulating TypedDict at runtime)
|
|
({"name": "test", "value": 123}, {"name": "test", "value": 123}),
|
|
({}, {}),
|
|
({"nested": {"key": "value"}}, {"nested": {"key": "value"}}),
|
|
# List types
|
|
(["a", "b", "c"], ["a", "b", "c"]),
|
|
([1, 2, 3], [1, 2, 3]),
|
|
([], []),
|
|
# List of dicts (simulating list[TypedDict])
|
|
(
|
|
[{"id": 1, "name": "a"}, {"id": 2, "name": "b"}],
|
|
[{"id": 1, "name": "a"}, {"id": 2, "name": "b"}],
|
|
),
|
|
([{}], [{}]),
|
|
# Mixed lists
|
|
([1, "two", 3.0, True], [1, "two", 3.0, True]),
|
|
],
|
|
)
|
|
def test_success_complex_types(output_factory, data, expected_value):
|
|
"""Test that dict and list types are properly handled by ToolOutputFactory."""
|
|
data_obj = SampleOutputModel(result=data)
|
|
output = output_factory.success(data=data_obj)
|
|
assert output.value == expected_value
|
|
assert output.error is None
|
|
|
|
|
|
def test_success_with_basemodel_direct(output_factory):
|
|
"""Test that BaseModel instances are converted to dict via model_dump()."""
|
|
|
|
class TestModel(BaseModel):
|
|
name: str
|
|
value: int
|
|
|
|
model = TestModel(name="test", value=42)
|
|
output = output_factory.success(data=model)
|
|
assert output.value == {"name": "test", "value": 42}
|
|
assert output.error is None
|
|
|
|
|
|
def test_success_raw_dict(output_factory):
|
|
"""Test that raw dict values (not wrapped in model) are handled correctly."""
|
|
raw_dict = {"key": "value", "number": 123}
|
|
output = output_factory.success(data=raw_dict)
|
|
assert output.value == raw_dict
|
|
assert output.error is None
|
|
|
|
|
|
def test_success_raw_list(output_factory):
|
|
"""Test that raw list values (not wrapped in model) are handled correctly."""
|
|
raw_list = [{"id": 1}, {"id": 2}, {"id": 3}]
|
|
output = output_factory.success(data=raw_list)
|
|
assert output.value == raw_list
|
|
assert output.error is None
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
"message, developer_message",
|
|
[
|
|
("Error occurred", None),
|
|
("Error occurred", "Detailed error message"),
|
|
],
|
|
)
|
|
def test_fail(output_factory, message, developer_message):
|
|
output = output_factory.fail(message=message, developer_message=developer_message)
|
|
assert output.error is not None
|
|
assert output.error.message == message
|
|
assert output.error.developer_message == developer_message
|
|
assert output.error.can_retry is False
|
|
|
|
|
|
def test_fail_empty_message_gets_default(output_factory):
|
|
output = output_factory.fail(message="")
|
|
assert output.error is not None
|
|
assert output.error.message == "Unspecified error during tool execution"
|
|
|
|
|
|
def test_fail_whitespace_message_gets_default(output_factory):
|
|
output = output_factory.fail(message=" ")
|
|
assert output.error is not None
|
|
assert output.error.message == "Unspecified error during tool execution"
|
|
|
|
|
|
def test_fail_nonempty_message_unchanged(output_factory):
|
|
output = output_factory.fail(message="real error")
|
|
assert output.error is not None
|
|
assert output.error.message == "real error"
|
|
|
|
|
|
def test_fail_retry_empty_message_gets_default(output_factory):
|
|
output = output_factory.fail_retry(message="")
|
|
assert output.error is not None
|
|
assert output.error.message == "Unspecified error during tool execution"
|
|
|
|
|
|
def test_fail_retry_whitespace_message_gets_default(output_factory):
|
|
output = output_factory.fail_retry(message=" ")
|
|
assert output.error is not None
|
|
assert output.error.message == "Unspecified error during tool execution"
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
"message, developer_message, additional_prompt_content, retry_after_ms",
|
|
[
|
|
("Retry error", None, None, None),
|
|
("Retry error", "Retrying", "Please try again with this additional data: foobar", 1000),
|
|
],
|
|
)
|
|
def test_fail_retry(
|
|
output_factory, message, developer_message, additional_prompt_content, retry_after_ms
|
|
):
|
|
output = output_factory.fail_retry(
|
|
message=message,
|
|
developer_message=developer_message,
|
|
additional_prompt_content=additional_prompt_content,
|
|
retry_after_ms=retry_after_ms,
|
|
)
|
|
assert output.error is not None
|
|
assert output.error.message == message
|
|
assert output.error.developer_message == developer_message
|
|
assert output.error.can_retry is True
|
|
assert output.error.additional_prompt_content == additional_prompt_content
|
|
assert output.error.retry_after_ms == retry_after_ms
|