## Summary - Improve tool call error messages across 4 libraries (arcade-core, arcade-tdk, arcade-mcp-server, arcade-serve) so agents can self-correct and Datadog can facet on structured fields - Guard empty error messages, enrich input validation errors with field-level detail, fix `@tool` decorator fallback formatting, surface `additional_prompt_content` in MCP responses, and add structured log extras for Datadog - Addresses the 3 worst error patterns: generic "Error in tool input deserialization", bare `KeyError` values, and empty `FatalToolError` messages **Linear:** TOO-627 **Plan:** `docs/plans/2026-04-08-improve-error-messages-handoff.md` ## Tasks - [ ] Task 1: Guard empty error messages (arcade-core) - [ ] Task 2: Enrich input validation error messages (arcade-core) - [ ] Task 3: Improve `@tool` decorator error fallback (arcade-tdk) - [ ] Task 4: Fix MCP agent-facing error response (arcade-mcp-server) - [ ] Task 5: Add structured log extras in BaseWorker (arcade-serve) - [ ] Task 6: Add structured log extras in MCP server (arcade-mcp-server) ## Test plan - [ ] Each task has dedicated unit tests verifying the new behavior - [ ] `make test` passes after all tasks - [ ] `make check` (ruff + mypy) passes - [ ] Verify the 3 worst error patterns now produce actionable messages 🤖 Generated with [Claude Code](https://claude.com/claude-code) <!-- CURSOR_SUMMARY --> --- > [!NOTE] > **Medium Risk** > Touches cross-library error formatting and logging behavior used in production tool execution paths; while mostly additive/guardrails, it changes agent-visible messages and Datadog log facets, which could impact client expectations and alerting. > > **Overview** > Improves tool-call error handling across core/runtime, MCP transport, worker transport, and the TDK to make agent-visible failures more actionable while *reducing sensitive-data leakage*. > > In `arcade-core`, empty error messages now get placeholders, `ToolOutputFactory.fail*` defaults blank messages, and input validation errors are rewritten as field-level summaries that intentionally omit rejected values (avoiding Pydantic echo of secrets). The `@tool` fallback in `arcade-tdk` no longer surfaces `str(exception)` to agents; it returns exception *type-only* in `message` while preserving full detail in `developer_message`. > > Adds a shared `build_tool_error_log_extra` helper and updates `arcade-serve` + `arcade-mcp-server` to emit consistent structured WARNING logs (`error_*`, `tool_name`, optional toolkit/version) for Datadog, while MCP error responses now append `additional_prompt_content` and force `structuredContent=None` on failures per spec. Includes extensive new tests and bumps package versions (`arcade-core` 4.6.2, `arcade-tdk` 3.6.1, `arcade-mcp-server` 1.19.3, `arcade-serve` 3.2.3). > > <sup>Reviewed by [Cursor Bugbot](https://cursor.com/bugbot) for commit e5c7ebcaf56176cfbd8e6d1f2b6295352abd0ec0. Bugbot is set up for automated code reviews on this repo. Configure [here](https://www.cursor.com/dashboard/bugbot).</sup> <!-- /CURSOR_SUMMARY --> --------- Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
141 lines
5.5 KiB
Python
141 lines
5.5 KiB
Python
from typing import Annotated
|
|
|
|
import pytest
|
|
from arcade_core.catalog import ToolCatalog
|
|
from arcade_core.executor import ToolExecutor
|
|
from arcade_core.schema import ToolContext
|
|
from arcade_tdk import tool
|
|
|
|
|
|
catalog = ToolCatalog()
|
|
|
|
|
|
@tool
|
|
def keyerror_tool() -> Annotated[str, "output"]:
|
|
"""Tool that raises a bare KeyError"""
|
|
raise KeyError("x")
|
|
|
|
|
|
@tool
|
|
def empty_exception_tool() -> Annotated[str, "output"]:
|
|
"""Tool that raises an exception with no message"""
|
|
raise Exception()
|
|
|
|
|
|
@tool
|
|
def fallback_with_secret_in_exception_tool() -> Annotated[str, "output"]:
|
|
"""Tool that raises an exception whose ``str()`` contains a fake secret.
|
|
|
|
Used to verify the strict data-leak policy: the secret must NEVER appear
|
|
in the agent-facing ``message``. It is allowed in ``developer_message``,
|
|
which is server-side only (Datadog facet, never returned to the MCP client).
|
|
"""
|
|
raise RuntimeError("Bad credentials for alice: password=hunter2_SECRET_PII")
|
|
|
|
|
|
catalog.add_tool(keyerror_tool, "ErrorFallbackToolkit")
|
|
catalog.add_tool(empty_exception_tool, "ErrorFallbackToolkit")
|
|
catalog.add_tool(fallback_with_secret_in_exception_tool, "ErrorFallbackToolkit")
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_fallback_message_contains_type_but_not_exception_string():
|
|
"""The agent-facing ``message`` must include the exception **type**
|
|
(so agents know what class of failure occurred) but must NOT include
|
|
``str(exception)`` content — that's what the data-leak policy reserves
|
|
for ``developer_message``."""
|
|
tool_definition = catalog.find_tool_by_func(keyerror_tool)
|
|
full_tool = catalog.get_tool(tool_definition.get_fully_qualified_name())
|
|
|
|
output = await ToolExecutor.run(
|
|
func=keyerror_tool,
|
|
definition=tool_definition,
|
|
input_model=full_tool.input_model,
|
|
output_model=full_tool.output_model,
|
|
context=ToolContext(),
|
|
)
|
|
|
|
assert output.error is not None
|
|
# Type IS present (safe: class names are source-defined).
|
|
assert "KeyError" in output.error.message
|
|
# The hint to use FatalToolError is present.
|
|
assert "FatalToolError" in output.error.message
|
|
# The exception string content (``str(KeyError('x'))`` == ``"'x'"``) is NOT.
|
|
# Stricter: no quoted argument representation should appear.
|
|
assert "'x'" not in output.error.message
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_fallback_message_never_leaks_exception_str_content():
|
|
"""Strict data-leak policy: even if the tool author embeds secrets in
|
|
``str(exception)``, the agent-facing ``message`` MUST NOT contain them.
|
|
The full content is preserved in ``developer_message`` (server-side
|
|
logs only) so on-call engineers retain debugging context."""
|
|
tool_definition = catalog.find_tool_by_func(fallback_with_secret_in_exception_tool)
|
|
full_tool = catalog.get_tool(tool_definition.get_fully_qualified_name())
|
|
|
|
output = await ToolExecutor.run(
|
|
func=fallback_with_secret_in_exception_tool,
|
|
definition=tool_definition,
|
|
input_model=full_tool.input_model,
|
|
output_model=full_tool.output_model,
|
|
context=ToolContext(),
|
|
)
|
|
|
|
assert output.error is not None
|
|
|
|
# ── Agent-facing channel: nothing from str(exception) leaks ──
|
|
assert "hunter2_SECRET_PII" not in output.error.message
|
|
assert "alice" not in output.error.message
|
|
assert "password=" not in output.error.message
|
|
assert "Bad credentials" not in output.error.message
|
|
# Type still present so the agent knows what failed.
|
|
assert "RuntimeError" in output.error.message
|
|
|
|
# ── Server-side log channel: full content preserved for debugging ──
|
|
assert output.error.developer_message is not None
|
|
assert "hunter2_SECRET_PII" in output.error.developer_message
|
|
assert "RuntimeError" in output.error.developer_message
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_fallback_developer_message_carries_full_exception_content():
|
|
"""``developer_message`` (server-side log only) is where verbose exception
|
|
content lives — type + str(exception) — so engineers can debug."""
|
|
tool_definition = catalog.find_tool_by_func(keyerror_tool)
|
|
full_tool = catalog.get_tool(tool_definition.get_fully_qualified_name())
|
|
|
|
output = await ToolExecutor.run(
|
|
func=keyerror_tool,
|
|
definition=tool_definition,
|
|
input_model=full_tool.input_model,
|
|
output_model=full_tool.output_model,
|
|
context=ToolContext(),
|
|
)
|
|
|
|
assert output.error is not None
|
|
assert output.error.developer_message is not None
|
|
assert "KeyError" in output.error.developer_message
|
|
# The KeyError argument IS present in developer_message (vs absent in message).
|
|
assert "'x'" in output.error.developer_message
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_fallback_empty_exception_developer_message_marks_no_details():
|
|
"""Empty exceptions (``raise Exception()``) get a ``(no exception message)``
|
|
marker in ``developer_message`` so on-call engineers can distinguish
|
|
'nothing to log' from 'log was lost'."""
|
|
tool_definition = catalog.find_tool_by_func(empty_exception_tool)
|
|
full_tool = catalog.get_tool(tool_definition.get_fully_qualified_name())
|
|
|
|
output = await ToolExecutor.run(
|
|
func=empty_exception_tool,
|
|
definition=tool_definition,
|
|
input_model=full_tool.input_model,
|
|
output_model=full_tool.output_model,
|
|
context=ToolContext(),
|
|
)
|
|
|
|
assert output.error is not None
|
|
assert output.error.developer_message is not None
|
|
assert "Exception (no exception message)" in output.error.developer_message
|