arcade-mcp/libs/arcade-serve/arcade_serve/core/components.py
Francisco Or Something dc4607daa4
feat(telemetry): add developer messages to tool error spans (#831)
## Summary
- Add shared span attributes for tool error diagnostics, including
developer-facing messages when present.
- Wire those attributes through MCP server, worker RunTool, and HTTP
CallTool spans while keeping default MCP response content public-only.
- Cover no-leak response behavior, non-recording spans, outputless
worker responses, and the shared attribute contract.

## Verification
- `uv run ruff format ...`
- `uv run ruff check ...`
- `uv run pytest -W ignore
libs/tests/arcade_mcp_server/test_debug_exposure_integration.py
libs/tests/core/test_log_extras.py
libs/tests/worker/test_worker_base.py`

Made with [Cursor](https://cursor.com)

<!-- CURSOR_SUMMARY -->
---

> [!NOTE]
> **Medium Risk**
> Adds new telemetry attributes that propagate tool error messages
(including optional developer_message) into active spans across MCP
server and worker execution paths; risk is mainly around potential
leakage of sensitive developer messages into tracing backends and
changes to observability contracts.
> 
> **Overview**
> Adds a shared
`arcade_core.log_extras.build_tool_error_span_attributes()` helper and
wires it into tool error paths so the current OpenTelemetry span is
annotated with stable `tool_error_*` attributes (including
`developer_message` when present).
> 
> MCP tool calls now record these span attributes on failure while
keeping default MCP response content sanitized, and `arcade-serve`
records the same attributes on both `RunTool` and HTTP `CallTool` spans
(handling `output=None`). Versions and dependency constraints are bumped
to consume the new core helper, with tests added/updated to lock the
span-attribute contract and verify behavior for non-recording spans and
no-leak responses.
> 
> <sup>Reviewed by [Cursor Bugbot](https://cursor.com/bugbot) for commit
33a53991d72140a662152f508dc53e9b769b9f07. Bugbot is set up for automated
code reviews on this repo. Configure
[here](https://www.cursor.com/dashboard/bugbot).</sup>
<!-- /CURSOR_SUMMARY -->
2026-04-29 20:41:07 -03:00

111 lines
3.5 KiB
Python

from arcade_core.log_extras import build_tool_error_span_attributes
from arcade_core.schema import (
ToolCallRequest,
ToolCallResponse,
)
from opentelemetry import trace
from arcade_serve.core.common import (
CatalogResponse,
HealthCheckResponse,
RequestData,
Router,
Worker,
WorkerComponent,
)
class CatalogComponent(WorkerComponent):
def __init__(self, worker: Worker) -> None:
self.worker = worker
def register(self, router: Router) -> None:
"""
Register the catalog route with the router.
"""
router.add_route(
"tools",
self,
method="GET",
response_type=CatalogResponse,
operation_id="get_catalog",
description="Get the catalog of tools",
summary="Get the catalog of tools",
tags=["Arcade"],
)
async def __call__(self, request: RequestData) -> CatalogResponse:
"""
Handle the request to get the catalog.
"""
tracer = trace.get_tracer(__name__)
with tracer.start_as_current_span("Catalog"):
return self.worker.get_catalog()
class CallToolComponent(WorkerComponent):
def __init__(self, worker: Worker) -> None:
self.worker = worker
def register(self, router: Router) -> None:
"""
Register the call tool route with the router.
"""
router.add_route(
"tools/invoke",
self,
method="POST",
response_type=ToolCallResponse,
operation_id="call_tool",
description="Call a tool",
summary="Call a tool",
tags=["Arcade"],
)
async def __call__(self, request: RequestData) -> ToolCallResponse:
"""
Handle the request to call (invoke) a tool.
"""
tracer = trace.get_tracer(__name__)
with tracer.start_as_current_span("CallTool") as current_span:
call_tool_request_data = request.body_json
call_tool_request = ToolCallRequest.model_validate(call_tool_request_data)
current_span.set_attribute("tool_name", str(call_tool_request.tool.name))
current_span.set_attribute("toolkit_version", str(call_tool_request.tool.version))
current_span.set_attribute("toolkit_name", str(call_tool_request.tool.toolkit))
if hasattr(self.worker, "environment"):
current_span.set_attribute("environment", self.worker.environment)
response = await self.worker.call_tool(call_tool_request)
if response.output and response.output.error:
for key, value in build_tool_error_span_attributes(response.output.error).items():
current_span.set_attribute(key, value)
return response
class HealthCheckComponent(WorkerComponent):
def __init__(self, worker: Worker) -> None:
self.worker = worker
def register(self, router: Router) -> None:
"""
Register the health check route with the router.
"""
router.add_route(
"health",
self,
method="GET",
response_type=HealthCheckResponse,
operation_id="health_check",
description="Health check",
summary="Health check",
tags=["Arcade"],
require_auth=False,
)
async def __call__(self, request: RequestData) -> HealthCheckResponse:
"""
Handle the request to check the health of the worker.
"""
return self.worker.health_check()