arcade-mcp/libs/tests/arcade_mcp_server/test_telemetry_middleware.py
Sankara R. Avula 78c8e6fb99
feat: Add TelemetryPassbackMiddleware for serverExecutionTelemetry capability (#797)
**Implements**: [SEP-2448: server execution telemetry]
(https://github.com/modelcontextprotocol/modelcontextprotocol/pull/2448)



**Description:**

**The Observability Gap (The Problem)**

MCP clients propagate trace context to servers, but server-side
execution remains a black box. The client sees a single tools/call or
resources/read span; everything the server does (auth checks, policy
evaluation, API calls, sub-tool invocations) is invisible. In
cross-organization deployments, clients and servers use separate
observability backends with no shared collector access, making
traditional span export useless.

<img width="1015" height="450" alt="Screenshot 2026-03-23 at 3 43 21 PM"
src="https://github.com/user-attachments/assets/58c817b5-fee6-46a3-9877-d523a25368ad"
/>


**Server Execution Telemetry (The Solution)**

Servers advertise serverExecutionTelemetry and return a curated slice of
their execution spans directly in _meta.otel of the response. Clients
ingest these verbatim OTLP spans into their own collector, stitching
server-side execution into their distributed trace; no shared
infrastructure required. The black box becomes transparent.

<img width="945" height="574" alt="Screenshot 2026-03-23 at 3 43 44 PM"
src="https://github.com/user-attachments/assets/38d97c94-aa73-4e62-9b4e-3264600e5ed0"
/>

.
**Summary:**

Implement MCP serverExecutionTelemetry capability that enables
cross-organization distributed tracing by returning server-side
OpenTelemetry spans to clients inline via _meta.otel.traces.

  Server-side (middleware):
  - TelemetryPassbackMiddleware intercepts tools/call and resources/read
- ContextVarSpanCollector isolates span collection per-request via
ContextVar
- Propagates traceparent from client request for distributed trace
stitching
- Serializes collected spans to verbatim OTLP JSON (resourceSpans
format), directly POSTable to /v1/traces
- Top-level span filtering by default; full span tree via detailed
opt-in
- Middleware advertises capabilities via get_capabilities() on the
Middleware base class
  - Provisional API: FutureWarning emitted until SEP-2448 is ratified

  Client-side (reference agent):
- LangChain ReAct agent connects to MCP server via
streamable_http_client with OAuth 2.1
  - Detects serverExecutionTelemetry capability at initialization
- Dynamically wraps discovered MCP tools with traceparent propagation
and _meta.otel span request
- Ingests returned server spans into Jaeger (OTLP JSON) and Galileo
(OTLP protobuf)
- Two-act demo: --no-passback (black box) vs default (full server-side
visibility)

  Dependencies:
  - opentelemetry-api and opentelemetry-sdk added to arcade-mcp-server

  Bump arcade-mcp-server version to 1.18.0.
2026-03-25 15:57:50 -07:00

529 lines
17 KiB
Python

"""Tests for Telemetry Passback Middleware."""
from __future__ import annotations
from unittest.mock import MagicMock
import pytest
from opentelemetry.sdk.trace import ReadableSpan, TracerProvider
from opentelemetry.trace import SpanKind, StatusCode
from arcade_mcp_server.middleware.base import MiddlewareContext
from arcade_mcp_server.middleware.telemetry import (
ContextVarSpanCollector,
TelemetryPassbackMiddleware,
_attrs_to_kv,
_ns,
_request_spans,
filter_top_level_spans,
spans_to_otlp_json,
start_collecting,
stop_collecting,
)
from arcade_mcp_server.types import (
CallToolResult,
JSONRPCResponse,
ReadResourceResult,
TextContent,
TextResourceContents,
)
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
def _make_span(
*,
name: str = "test-span",
trace_id: int = 0xABCD,
span_id: int = 0x1234,
parent_span_id: int | None = None,
kind: SpanKind = SpanKind.INTERNAL,
start_time: int = 1000,
end_time: int = 2000,
status_code: StatusCode = StatusCode.UNSET,
attributes: dict | None = None,
events: list | None = None,
) -> ReadableSpan:
"""Build a lightweight mock ReadableSpan."""
span = MagicMock(spec=ReadableSpan)
ctx = MagicMock()
ctx.trace_id = trace_id
ctx.span_id = span_id
span.context = ctx
span.name = name
span.kind = kind
span.start_time = start_time
span.end_time = end_time
status = MagicMock()
status.status_code = status_code
span.status = status
if parent_span_id is not None:
parent = MagicMock()
parent.span_id = parent_span_id
span.parent = parent
else:
span.parent = None
span.attributes = attributes
span.events = events or []
return span
def _make_tool_response(*, text: str = "hello") -> JSONRPCResponse[CallToolResult]:
"""Build a JSONRPCResponse wrapping a CallToolResult."""
return JSONRPCResponse(
id="1",
result=CallToolResult(
content=[TextContent(type="text", text=text)],
isError=False,
),
)
def _make_resource_response(*, text: str = "hello") -> JSONRPCResponse[ReadResourceResult]:
"""Build a JSONRPCResponse wrapping a ReadResourceResult."""
return JSONRPCResponse(
id="1",
result=ReadResourceResult(
contents=[TextResourceContents(uri="file:///test", text=text)],
),
)
# ===========================================================================
# ContextVarSpanCollector
# ===========================================================================
class TestContextVarSpanCollector:
"""Test ContextVarSpanCollector class."""
@pytest.fixture(autouse=True)
def _reset_contextvar(self):
"""Ensure the ContextVar is clean before and after each test."""
_request_spans.set(None)
yield
_request_spans.set(None)
def test_spans_collected_when_bucket_active(self):
collector = ContextVarSpanCollector()
bucket = start_collecting()
span = _make_span()
collector.on_end(span)
assert span in bucket
def test_spans_not_collected_when_bucket_inactive(self):
collector = ContextVarSpanCollector()
span = _make_span()
collector.on_end(span)
# No active bucket; span is silently dropped
assert _request_spans.get() is None
def test_on_start_is_noop(self):
collector = ContextVarSpanCollector()
collector.on_start(MagicMock())
def test_shutdown_is_noop(self):
collector = ContextVarSpanCollector()
collector.shutdown()
def test_force_flush_returns_true(self):
collector = ContextVarSpanCollector()
assert collector.force_flush() is True
# ===========================================================================
# start_collecting / stop_collecting
# ===========================================================================
class TestCollectingLifecycle:
"""Test start_collecting / stop_collecting."""
@pytest.fixture(autouse=True)
def _reset_contextvar(self):
_request_spans.set(None)
yield
_request_spans.set(None)
def test_lifecycle(self):
bucket = start_collecting()
assert bucket == []
assert _request_spans.get() is bucket
span = _make_span()
bucket.append(span)
result = stop_collecting()
assert span in result
assert _request_spans.get() is None
def test_idempotent_stop(self):
start_collecting()
stop_collecting()
result = stop_collecting()
assert result == []
def test_isolation_between_calls(self):
bucket1 = start_collecting()
bucket1.append(_make_span(name="a"))
stop_collecting()
bucket2 = start_collecting()
assert bucket2 == []
# ===========================================================================
# filter_top_level_spans
# ===========================================================================
class TestFilterTopLevelSpans:
def test_root_and_children_kept_grandchildren_dropped(self):
root = _make_span(name="root", span_id=1, parent_span_id=999)
child = _make_span(name="child", span_id=2, parent_span_id=1)
grandchild = _make_span(name="grandchild", span_id=3, parent_span_id=2)
result = filter_top_level_spans([root, child, grandchild])
names = {s.name for s in result}
assert names == {"root", "child"}
def test_empty_list(self):
assert filter_top_level_spans([]) == []
def test_single_span_no_parent(self):
span = _make_span(name="only", span_id=1)
result = filter_top_level_spans([span])
assert result == [span]
def test_single_span_with_external_parent(self):
span = _make_span(name="only", span_id=1, parent_span_id=999)
result = filter_top_level_spans([span])
assert len(result) == 1
assert result[0].name == "only"
# ===========================================================================
# _attrs_to_kv
# ===========================================================================
class TestAttrsToKv:
def test_bool_value(self):
result = _attrs_to_kv({"flag": True})
assert result == [{"key": "flag", "value": {"boolValue": True}}]
def test_int_value(self):
result = _attrs_to_kv({"count": 42})
assert result == [{"key": "count", "value": {"intValue": "42"}}]
def test_float_value(self):
result = _attrs_to_kv({"rate": 3.14})
assert result == [{"key": "rate", "value": {"doubleValue": 3.14}}]
def test_str_value(self):
result = _attrs_to_kv({"name": "foo"})
assert result == [{"key": "name", "value": {"stringValue": "foo"}}]
def test_list_value(self):
result = _attrs_to_kv({"tags": ["a", "b"]})
assert result == [
{"key": "tags", "value": {"arrayValue": {"values": [{"stringValue": "a"}, {"stringValue": "b"}]}}}
]
def test_fallback_to_string(self):
result = _attrs_to_kv({"obj": object()})
assert len(result) == 1
assert "stringValue" in result[0]["value"]
def test_empty_attrs(self):
assert _attrs_to_kv({}) == []
def test_none_attrs(self):
assert _attrs_to_kv(None) == []
# ===========================================================================
# spans_to_otlp_json
# ===========================================================================
class TestSpansToOtlpJson:
def test_full_serialization(self):
event = MagicMock()
event.timestamp = 1500
event.name = "log"
event.attributes = {"msg": "hi"}
span = _make_span(
name="op",
trace_id=0xABCDEF,
span_id=0x123456,
parent_span_id=0x111,
kind=SpanKind.SERVER,
start_time=1000,
end_time=2000,
status_code=StatusCode.OK,
attributes={"k": "v"},
events=[event],
)
result = spans_to_otlp_json([span], "test-service")
assert "resourceSpans" in result
rs = result["resourceSpans"][0]
assert rs["resource"]["attributes"][0]["value"]["stringValue"] == "test-service"
otlp_span = rs["scopeSpans"][0]["spans"][0]
assert otlp_span["name"] == "op"
assert otlp_span["kind"] == 2 # SERVER
assert otlp_span["status"]["code"] == 1 # OK
assert "parentSpanId" in otlp_span
assert len(otlp_span["events"]) == 1
def test_span_without_parent(self):
span = _make_span(name="root", span_id=1)
result = spans_to_otlp_json([span], "svc")
otlp_span = result["resourceSpans"][0]["scopeSpans"][0]["spans"][0]
assert "parentSpanId" not in otlp_span
def test_span_with_no_context_skipped(self):
span = _make_span()
span.context = None
result = spans_to_otlp_json([span], "svc")
assert result["resourceSpans"][0]["scopeSpans"][0]["spans"] == []
def test_status_error(self):
span = _make_span(status_code=StatusCode.ERROR)
result = spans_to_otlp_json([span], "svc")
otlp_span = result["resourceSpans"][0]["scopeSpans"][0]["spans"][0]
assert otlp_span["status"]["code"] == 2
# ===========================================================================
# _ns
# ===========================================================================
class TestNs:
def test_zero(self):
assert _ns(0) == "0"
def test_none(self):
assert _ns(None) == "0"
def test_positive(self):
assert _ns(123456) == "123456"
# ===========================================================================
# TelemetryPassbackMiddleware
# ===========================================================================
def _make_otel_context(*, request: bool = True, detailed: bool = False, traceparent: str | None = None):
"""Build a MiddlewareContext with otel meta configured."""
session = MagicMock()
meta = MagicMock()
meta.traceparent = traceparent
meta.otel = {"traces": {"request": request, "detailed": detailed}}
session._request_meta = meta
mcp_ctx = MagicMock()
mcp_ctx._session = session
return mcp_ctx
@pytest.mark.filterwarnings("ignore::FutureWarning")
class TestTelemetryPassbackMiddleware:
"""Test TelemetryPassbackMiddleware class."""
@pytest.fixture(autouse=True)
def _reset_contextvar(self):
_request_spans.set(None)
yield
_request_spans.set(None)
@pytest.fixture
def tracer_provider(self):
return TracerProvider()
@pytest.fixture
def middleware(self, tracer_provider):
return TelemetryPassbackMiddleware(
service_name="test-svc",
tracer_provider=tracer_provider,
)
@pytest.fixture
def tool_call_context(self):
"""Context for a tools/call with otel request=True."""
return MiddlewareContext(
message={"method": "tools/call", "params": {"name": "my_tool", "arguments": {"x": 1}}},
mcp_context=_make_otel_context(request=True, detailed=False),
method="tools/call",
request_id="req-1",
)
@pytest.fixture
def tool_call_context_no_request(self):
"""Context for a tools/call with otel request=False."""
return MiddlewareContext(
message={"method": "tools/call", "params": {"name": "my_tool", "arguments": {}}},
mcp_context=_make_otel_context(request=False),
method="tools/call",
request_id="req-2",
)
@pytest.fixture
def resource_read_context(self):
"""Context for resources/read with otel request=True."""
return MiddlewareContext(
message={"method": "resources/read", "params": {"uri": "file:///foo.txt"}},
mcp_context=_make_otel_context(request=True, detailed=False),
method="resources/read",
request_id="req-3",
)
# --- get_capabilities ---
def test_get_capabilities(self, middleware):
caps = middleware.get_capabilities()
assert "serverExecutionTelemetry" in caps
assert caps["serverExecutionTelemetry"]["signals"]["traces"]["supported"] is True
# --- on_call_tool: request=False passthrough ---
@pytest.mark.asyncio
async def test_on_call_tool_no_request_passthrough(self, middleware, tool_call_context_no_request):
sentinel = object()
async def handler(ctx):
return sentinel
result = await middleware.on_call_tool(tool_call_context_no_request, handler)
assert result is sentinel
# --- on_call_tool: request=True attaches spans ---
@pytest.mark.asyncio
async def test_on_call_tool_with_request_attaches_spans(self, middleware, tool_call_context):
resp = _make_tool_response(text="tool output")
async def handler(ctx):
return resp
result = await middleware.on_call_tool(tool_call_context, handler)
meta = result.result.meta
assert meta is not None
assert "otel" in meta
assert "traces" in meta["otel"]
assert "resourceSpans" in meta["otel"]["traces"]
# --- on_read_resource ---
@pytest.mark.asyncio
async def test_on_read_resource_attaches_spans(self, middleware, resource_read_context):
resp = _make_resource_response(text="resource content")
async def handler(ctx):
return resp
result = await middleware.on_read_resource(resource_read_context, handler)
meta = result.result.meta
assert meta is not None
assert "otel" in meta
assert "traces" in meta["otel"]
# --- detailed=True vs detailed=False ---
@pytest.mark.asyncio
async def test_detailed_true_returns_all_spans(self, tracer_provider):
mw = TelemetryPassbackMiddleware(service_name="svc", tracer_provider=tracer_provider)
context = MiddlewareContext(
message={"method": "tools/call", "params": {"name": "t", "arguments": {}}},
mcp_context=_make_otel_context(request=True, detailed=True),
method="tools/call",
)
resp = _make_tool_response()
async def handler(ctx):
return resp
result = await mw.on_call_tool(context, handler)
otel = result.result.meta["otel"]["traces"]
assert otel["truncated"] is False
assert otel["droppedSpanCount"] == 0
# --- traceparent propagation ---
@pytest.mark.asyncio
async def test_traceparent_propagation(self, tracer_provider):
mw = TelemetryPassbackMiddleware(service_name="svc", tracer_provider=tracer_provider)
context = MiddlewareContext(
message={"method": "tools/call", "params": {"name": "t", "arguments": {}}},
mcp_context=_make_otel_context(
request=True,
detailed=True,
traceparent="00-0af7651916cd43dd8448eb211c80319c-b7ad6b7169203331-01",
),
method="tools/call",
)
resp = _make_tool_response()
async def handler(ctx):
return resp
result = await mw.on_call_tool(context, handler)
assert result.result.meta is not None
assert "otel" in result.result.meta
# --- error in call_next — spans still cleaned up ---
@pytest.mark.asyncio
async def test_error_in_call_next_cleans_up_spans(self, middleware, tool_call_context):
async def handler(ctx):
raise RuntimeError("boom")
with pytest.raises(RuntimeError, match="boom"):
await middleware.on_call_tool(tool_call_context, handler)
assert _request_spans.get() is None
# --- _extract_response_text ---
def test_extract_response_text_with_content(self, middleware):
resp = _make_tool_response(text="hello")
assert middleware._extract_response_text(resp) == "hello"
def test_extract_response_text_with_contents(self, middleware):
resp = _make_resource_response(text="world")
assert middleware._extract_response_text(resp) == "world"
def test_extract_response_text_no_text(self, middleware):
# Empty text is falsy, filtered out by _extract_response_text
resp = _make_tool_response(text="")
assert middleware._extract_response_text(resp) is None
def test_extract_response_text_non_jsonrpc(self, middleware):
assert middleware._extract_response_text({"not": "a response"}) is None
def test_extract_response_text_none_result(self, middleware):
resp = JSONRPCResponse(id="1", result={})
assert middleware._extract_response_text(resp) is None
# ===========================================================================
# Middleware base class default get_capabilities
# ===========================================================================
class TestMiddlewareBaseGetCapabilities:
def test_default_returns_empty_dict(self):
from arcade_mcp_server.middleware.base import Middleware
mw = Middleware()
assert mw.get_capabilities() == {}