diff --git a/libs/arcade-core/arcade_core/log_extras.py b/libs/arcade-core/arcade_core/log_extras.py index 8ab40bf9..dc248578 100644 --- a/libs/arcade-core/arcade_core/log_extras.py +++ b/libs/arcade-core/arcade_core/log_extras.py @@ -58,3 +58,15 @@ def build_tool_error_log_extra( continue extra[k] = v return extra + + +def build_tool_error_span_attributes(error: ToolCallError) -> dict[str, str]: + """Build stable span attributes for failed tool-call diagnostics.""" + kind_value = error.kind.value if hasattr(error.kind, "value") else str(error.kind) + attrs = { + "tool_error_kind": kind_value, + "tool_error_message": error.message, + } + if error.developer_message: + attrs["tool_error_developer_message"] = error.developer_message + return attrs diff --git a/libs/arcade-core/pyproject.toml b/libs/arcade-core/pyproject.toml index b5ee335f..79053d29 100644 --- a/libs/arcade-core/pyproject.toml +++ b/libs/arcade-core/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "arcade-core" -version = "4.7.0" +version = "4.7.1" description = "Arcade Core - Core library for Arcade platform" readme = "README.md" license = { text = "MIT" } diff --git a/libs/arcade-mcp-server/arcade_mcp_server/server.py b/libs/arcade-mcp-server/arcade_mcp_server/server.py index e5430151..b25b3e94 100644 --- a/libs/arcade-mcp-server/arcade_mcp_server/server.py +++ b/libs/arcade-mcp-server/arcade_mcp_server/server.py @@ -22,12 +22,13 @@ from typing import Any, Callable, cast from arcade_core.auth_tokens import get_valid_access_token from arcade_core.catalog import MaterializedTool, ToolCatalog from arcade_core.executor import ToolExecutor -from arcade_core.log_extras import build_tool_error_log_extra +from arcade_core.log_extras import build_tool_error_log_extra, build_tool_error_span_attributes from arcade_core.network.org_transport import build_org_scoped_async_http_client from arcade_core.schema import ToolAuthorizationContext, ToolCallError, ToolContext from arcade_core.schema import ToolAuthRequirement as CoreToolAuthRequirement from arcadepy import ArcadeError, AsyncArcade from arcadepy.types.auth_authorize_params import AuthRequirement, AuthRequirementOauth2 +from opentelemetry import trace from arcade_mcp_server._debug_exposure import augment_error_message_for_debug from arcade_mcp_server.context import Context, get_current_model_context, set_current_model_context @@ -934,6 +935,7 @@ class MCPServer: error_text = error.message if error.additional_prompt_content: error_text += f"\n\n{error.additional_prompt_content}" + self._record_tool_error_span_attributes(error) error_text = augment_error_message_for_debug( error_text, error.developer_message, @@ -1005,6 +1007,15 @@ class MCPServer: extra=build_tool_error_log_extra(error, tool_name=tool_name), ) + def _record_tool_error_span_attributes(self, error: ToolCallError) -> None: + """Attach tool error details to the active telemetry span when present.""" + span = trace.get_current_span() + if not span or not span.is_recording(): + return + + for key, value in build_tool_error_span_attributes(error).items(): + span.set_attribute(key, value) + def _create_error_response( self, message: CallToolRequest, tool_response: dict[str, Any] ) -> JSONRPCResponse[CallToolResult]: diff --git a/libs/arcade-mcp-server/pyproject.toml b/libs/arcade-mcp-server/pyproject.toml index 42d09559..f74ce5ae 100644 --- a/libs/arcade-mcp-server/pyproject.toml +++ b/libs/arcade-mcp-server/pyproject.toml @@ -21,8 +21,8 @@ classifiers = [ ] requires-python = ">=3.10" dependencies = [ - "arcade-core>=4.7.0,<5.0.0", - "arcade-serve>=3.2.0,<4.0.0", + "arcade-core>=4.7.1,<5.0.0", + "arcade-serve>=3.2.4,<4.0.0", "arcade-tdk>=3.7.0,<4.0.0", "arcadepy>=1.5.0", "pydantic>=2.0.0", diff --git a/libs/arcade-serve/arcade_serve/core/base.py b/libs/arcade-serve/arcade_serve/core/base.py index 40b5dae5..35b892bd 100644 --- a/libs/arcade-serve/arcade_serve/core/base.py +++ b/libs/arcade-serve/arcade_serve/core/base.py @@ -6,7 +6,7 @@ from typing import Any, Callable, ClassVar from arcade_core.catalog import ToolCatalog, Toolkit from arcade_core.executor import ToolExecutor -from arcade_core.log_extras import build_tool_error_log_extra +from arcade_core.log_extras import build_tool_error_log_extra, build_tool_error_span_attributes from arcade_core.schema import ( ToolCallRequest, ToolCallResponse, @@ -149,6 +149,9 @@ class BaseWorker(Worker): context=tool_request.context, **tool_request.inputs or {}, ) + if output.error: + for key, value in build_tool_error_span_attributes(output.error).items(): + current_span.set_attribute(key, value) end_time = time.time() # End time in seconds duration_ms = (end_time - start_time) * 1000 # Convert to milliseconds diff --git a/libs/arcade-serve/arcade_serve/core/components.py b/libs/arcade-serve/arcade_serve/core/components.py index 25465008..ca62dfc0 100644 --- a/libs/arcade-serve/arcade_serve/core/components.py +++ b/libs/arcade-serve/arcade_serve/core/components.py @@ -1,3 +1,4 @@ +from arcade_core.log_extras import build_tool_error_span_attributes from arcade_core.schema import ( ToolCallRequest, ToolCallResponse, @@ -76,7 +77,11 @@ class CallToolComponent(WorkerComponent): if hasattr(self.worker, "environment"): current_span.set_attribute("environment", self.worker.environment) - return await self.worker.call_tool(call_tool_request) + response = await self.worker.call_tool(call_tool_request) + if response.output and response.output.error: + for key, value in build_tool_error_span_attributes(response.output.error).items(): + current_span.set_attribute(key, value) + return response class HealthCheckComponent(WorkerComponent): diff --git a/libs/arcade-serve/pyproject.toml b/libs/arcade-serve/pyproject.toml index b4082daa..41ed75de 100644 --- a/libs/arcade-serve/pyproject.toml +++ b/libs/arcade-serve/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "arcade-serve" -version = "3.2.3" +version = "3.2.4" description = "Arcade Serve - Serving infrastructure for Arcade tools and workers" readme = "README.md" license = {text = "MIT"} @@ -19,7 +19,7 @@ classifiers = [ ] requires-python = ">=3.10" dependencies = [ - "arcade-core>=4.0.0,<5.0.0", + "arcade-core>=4.7.1,<5.0.0", "fastapi>=0.115.3", "uvicorn>=0.30.0", "watchfiles>=1.0.5", diff --git a/libs/tests/arcade_mcp_server/test_debug_exposure_integration.py b/libs/tests/arcade_mcp_server/test_debug_exposure_integration.py index b7931847..0e67976a 100644 --- a/libs/tests/arcade_mcp_server/test_debug_exposure_integration.py +++ b/libs/tests/arcade_mcp_server/test_debug_exposure_integration.py @@ -164,6 +164,66 @@ async def test_integration_baseline_no_leak(erroring_server): assert "query='ping'" not in text +@pytest.mark.asyncio +async def test_integration_tool_error_records_developer_message_on_current_span( + erroring_server, monkeypatch +): + """Developer message stays out of MCP content but is attached to telemetry.""" + + class FakeSpan: + def __init__(self): + self.attributes = {} + + def is_recording(self): + return True + + def set_attribute(self, key, value): + self.attributes[key] = value + + span = FakeSpan() + monkeypatch.setattr("arcade_mcp_server.server.trace.get_current_span", lambda: span) + + result = await _call(erroring_server, "raises_fatal_tool_error") + text = result.content[0].text + + assert "Failed to fetch results" in text + assert "HTTP 503" not in text + assert span.attributes["tool_error_kind"] == "TOOL_RUNTIME_FATAL" + assert span.attributes["tool_error_message"].startswith("[TOOL_RUNTIME_FATAL] FatalToolError") + assert ( + span.attributes["tool_error_developer_message"] + == "[TOOL_RUNTIME_FATAL] FatalToolError during execution of tool " + "'raises_fatal_tool_error': HTTP 503 on upstream endpoint for query='ping'" + ) + + +@pytest.mark.asyncio +async def test_integration_tool_error_skips_span_attributes_when_span_not_recording( + erroring_server, monkeypatch +): + """A no-op/non-recording span should not be mutated.""" + + class FakeSpan: + def __init__(self): + self.attributes = {} + + def is_recording(self): + return False + + def set_attribute(self, key, value): + self.attributes[key] = value + + span = FakeSpan() + monkeypatch.setattr("arcade_mcp_server.server.trace.get_current_span", lambda: span) + + result = await _call(erroring_server, "raises_fatal_tool_error") + text = result.content[0].text + + assert "Failed to fetch results" in text + assert "HTTP 503" not in text + assert span.attributes == {} + + @pytest.mark.asyncio async def test_integration_boolean_rejected_no_leak(erroring_server, monkeypatch, caplog): """Boolean-looking values are rejected by the MCP boundary too.""" @@ -176,15 +236,11 @@ async def test_integration_boolean_rejected_no_leak(erroring_server, monkeypatch assert "Failed to fetch results" in text assert "[DEBUG]" not in text assert "HTTP 503" not in text - assert any( - "set to a truthy value but not to the required" in r.message for r in caplog.records - ) + assert any("set to a truthy value but not to the required" in r.message for r in caplog.records) @pytest.mark.asyncio -async def test_integration_developer_message_flag_leaks_through_mcp( - erroring_server, monkeypatch -): +async def test_integration_developer_message_flag_leaks_through_mcp(erroring_server, monkeypatch): """When the flag is set to the magic value, the MCP response `content` carries `developer_message` alongside the sanitized message.""" monkeypatch.setenv(_ENV_DEV_MSG, _LEAK_MAGIC) diff --git a/libs/tests/core/test_log_extras.py b/libs/tests/core/test_log_extras.py index 5b81c513..7e5714ef 100644 --- a/libs/tests/core/test_log_extras.py +++ b/libs/tests/core/test_log_extras.py @@ -6,7 +6,7 @@ Field names are load-bearing for dashboards. import pytest from arcade_core.errors import ErrorKind -from arcade_core.log_extras import build_tool_error_log_extra +from arcade_core.log_extras import build_tool_error_log_extra, build_tool_error_span_attributes from arcade_core.schema import ToolCallError @@ -114,6 +114,23 @@ def test_developer_message_none_propagates(): assert extra["error_developer_message"] is None +def test_span_attributes_include_developer_message_when_present(): + attrs = build_tool_error_span_attributes(_err(developer_message="dev: x")) + assert attrs == { + "tool_error_kind": "TOOL_RUNTIME_FATAL", + "tool_error_message": "Spreadsheet not found", + "tool_error_developer_message": "dev: x", + } + + +def test_span_attributes_omit_empty_developer_message(): + attrs = build_tool_error_span_attributes(_err(developer_message="")) + assert attrs == { + "tool_error_kind": "TOOL_RUNTIME_FATAL", + "tool_error_message": "Spreadsheet not found", + } + + def test_status_code_none_propagates(): extra = build_tool_error_log_extra(_err(status_code=None), tool_name="t") assert "error_status_code" in extra diff --git a/libs/tests/worker/test_worker_base.py b/libs/tests/worker/test_worker_base.py index fafe738c..14befac0 100644 --- a/libs/tests/worker/test_worker_base.py +++ b/libs/tests/worker/test_worker_base.py @@ -2,13 +2,17 @@ from typing import Annotated from unittest.mock import MagicMock import pytest -from arcade_core.errors import ToolDefinitionError +from arcade_core.errors import ErrorKind, ToolDefinitionError from arcade_core.schema import ( + ToolCallError, + ToolCallOutput, ToolCallRequest, ToolCallResponse, ToolContext, ToolReference, ) +from arcade_serve.core import base as base_module +from arcade_serve.core import components as components_module from arcade_serve.core.base import BaseWorker from arcade_serve.core.common import RequestData, Router from arcade_serve.core.components import ( @@ -34,6 +38,31 @@ def error_tool(context: ToolContext) -> int: raise ValueError("Something went wrong") +class FakeSpan: + def __init__(self, name: str): + self.name = name + self.attributes: dict[str, object] = {} + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc, tb): + return False + + def set_attribute(self, key: str, value: object) -> None: + self.attributes[key] = value + + +class FakeTracer: + def __init__(self): + self.spans: list[FakeSpan] = [] + + def start_as_current_span(self, name: str) -> FakeSpan: + span = FakeSpan(name) + self.spans.append(span) + return span + + @pytest.fixture def mock_router(): router = MagicMock(spec=Router) @@ -55,6 +84,11 @@ def base_worker_no_auth(): return BaseWorker(disable_auth=True) +@pytest.fixture +def fake_tracer(): + return FakeTracer() + + # --- BaseWorker Tests --- @@ -153,10 +187,14 @@ async def test_call_tool_success_and_error_logs_use_same_tool_identifiers( await base_worker_no_auth.call_tool(error_req) success_line = next( - r for r in caplog.records if "exec_consistency_ok" in r.getMessage() and "success" in r.getMessage() + r + for r in caplog.records + if "exec_consistency_ok" in r.getMessage() and "success" in r.getMessage() ) error_line = next( - r for r in caplog.records if "exec_consistency_err" in r.getMessage() and "failed:" in r.getMessage() + r + for r in caplog.records + if "exec_consistency_err" in r.getMessage() and "failed:" in r.getMessage() ) # Both must use the bare tool name (".name"), NOT the full ``Toolkit.Tool`` fqname. assert "Tool SampleTool " in success_line.getMessage() @@ -193,6 +231,30 @@ async def test_call_tool_execution_error(base_worker_no_auth): assert response.output.error is not None +@pytest.mark.asyncio +async def test_call_tool_error_records_run_tool_span_attributes( + base_worker_no_auth, fake_tracer, monkeypatch +): + monkeypatch.setattr(base_module.trace, "get_tracer", lambda name: fake_tracer) + base_worker_no_auth.register_tool(error_tool, toolkit_name="error_kit") + tool_request = ToolCallRequest( + execution_id="exec_span_attrs", + tool=ToolReference(toolkit="ErrorKit", name="ErrorTool"), + inputs={}, + ) + + response = await base_worker_no_auth.call_tool(tool_request) + + assert response.success is False + run_tool_span = next(span for span in fake_tracer.spans if span.name == "RunTool") + assert run_tool_span.attributes["tool_error_kind"] == "TOOL_RUNTIME_FATAL" + assert run_tool_span.attributes["tool_error_message"].startswith( + "[TOOL_RUNTIME_FATAL] FatalToolError" + ) + assert "ValueError" in run_tool_span.attributes["tool_error_developer_message"] + assert "Something went wrong" in run_tool_span.attributes["tool_error_developer_message"] + + @pytest.mark.asyncio async def test_call_tool_error_log_text_matches_structured_extras(base_worker_no_auth, caplog): """The primary failure warning's f-string must use the same resolved @@ -212,7 +274,9 @@ async def test_call_tool_error_log_text_matches_structured_extras(base_worker_no await base_worker_no_auth.call_tool(tool_request) primary = next( - r for r in caplog.records if "exec_log_check" in r.getMessage() and "failed:" in r.getMessage() + r + for r in caplog.records + if "exec_log_check" in r.getMessage() and "failed:" in r.getMessage() ) # Text and structured extra must agree on name + version. assert "Tool ErrorTool " in primary.getMessage() @@ -243,7 +307,8 @@ async def test_call_tool_error_secondary_log_carries_full_exception_content( await base_worker_no_auth.call_tool(tool_request) secondary = [ - r for r in caplog.records + r + for r in caplog.records if "exec_dev_msg" in r.getMessage() and "Developer message:" in r.getMessage() ] assert len(secondary) == 1, "secondary 'Developer message:' log should fire once" @@ -324,6 +389,75 @@ async def test_call_tool_component_call(base_worker_no_auth): assert response.execution_id == "comp_test_exec" +@pytest.mark.asyncio +async def test_call_tool_component_allows_missing_output(): + class OutputlessWorker: + async def call_tool(self, call_tool_request): + return ToolCallResponse( + execution_id="comp_outputless_exec", + duration=1, + finished_at="2026-01-01T00:00:00", + success=False, + output=None, + ) + + component = CallToolComponent(OutputlessWorker()) + mock_request = MagicMock(spec=RequestData) + mock_request.body_json = { + "execution_id": "comp_outputless_exec", + "tool": ToolReference(toolkit="TestKit", name="SampleTool").model_dump(), + "inputs": {}, + } + + response = await component(mock_request) + + assert response.success is False + assert response.output is None + + +@pytest.mark.asyncio +async def test_call_tool_component_error_records_call_tool_span_attributes( + fake_tracer, monkeypatch +): + monkeypatch.setattr(components_module.trace, "get_tracer", lambda name: fake_tracer) + + class ErrorWorker: + environment = "test" + + async def call_tool(self, call_tool_request): + return ToolCallResponse( + execution_id="comp_error_exec", + duration=1, + finished_at="2026-01-01T00:00:00", + success=False, + output=ToolCallOutput( + error=ToolCallError( + kind=ErrorKind.TOOL_RUNTIME_FATAL, + message="public component failure", + developer_message="component developer details", + ), + ), + ) + + component = CallToolComponent(ErrorWorker()) + mock_request = MagicMock(spec=RequestData) + mock_request.body_json = { + "execution_id": "comp_error_exec", + "tool": ToolReference(toolkit="TestKit", name="SampleTool").model_dump(), + "inputs": {}, + } + + response = await component(mock_request) + + assert response.success is False + call_tool_span = next(span for span in fake_tracer.spans if span.name == "CallTool") + assert call_tool_span.attributes["tool_error_kind"] == "TOOL_RUNTIME_FATAL" + assert call_tool_span.attributes["tool_error_message"] == "public component failure" + assert ( + call_tool_span.attributes["tool_error_developer_message"] == "component developer details" + ) + + @pytest.mark.asyncio async def test_health_check_component_call(base_worker_no_auth): component = HealthCheckComponent(base_worker_no_auth) diff --git a/pyproject.toml b/pyproject.toml index 9eef8a8d..267517a0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -19,8 +19,8 @@ requires-python = ">=3.10" dependencies = [ # CLI dependencies - "arcade-mcp-server>=1.17.4,<2.0.0", - "arcade-core>=4.4.2,<5.0.0", + "arcade-mcp-server>=1.21.1,<2.0.0", + "arcade-core>=4.7.1,<5.0.0", "typer==0.10.0", "rich>=14.0.0,<15.0.0", "Jinja2==3.1.6",