diff --git a/.github/workflows/check-debug-leak-flags.yml b/.github/workflows/check-debug-leak-flags.yml new file mode 100644 index 00000000..d0ecb9e6 --- /dev/null +++ b/.github/workflows/check-debug-leak-flags.yml @@ -0,0 +1,30 @@ +name: Debug Leak Flag Guard + +# Ensures the debug-exposure flags in arcade_mcp_server/_debug_exposure.py cannot be +# activated by anything shipped in committed files. The flags only activate +# when the env var is set to one specific acknowledgement string, so we just +# need to guarantee that string never appears outside its allowlist. See +# scripts/check_debug_leak_flags_off.py for details. + +on: + push: + branches: + - main + pull_request: + types: [opened, synchronize, reopened, ready_for_review] + +jobs: + guard: + name: Debug leak flag guard + runs-on: ubuntu-latest + steps: + - name: Check out + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.11" + + - name: Verify debug-leak flags stay off + run: python scripts/check_debug_leak_flags_off.py diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index e9a74942..76c3efad 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -20,3 +20,16 @@ repos: exclude: "(.*/templates/.*|libs/tests/.*)" - id: ruff-format exclude: "(.*/templates/.*|libs/tests/.*)" + + - repo: local + hooks: + - id: check-debug-leak-flags + name: "Guard: unsafe debug-leak flags must stay off" + description: >- + Fails if the activation acknowledgement string for the unsafe + debug-leak flags in arcade_core/output.py appears in any tracked + file outside its small allowlist. + entry: python scripts/check_debug_leak_flags_off.py + language: python + pass_filenames: false + always_run: true diff --git a/CLAUDE.md b/CLAUDE.md index db20c97a..3d5b9785 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -253,6 +253,17 @@ The `arcade` CLI (`libs/arcade-cli/arcade_cli/main.py`) is typer-based. Key comm | `ARCADE_DISABLE_AUTOUPDATE=1` | Disable CLI auto-update checks | | Any non-`MCP_`/`_` prefixed var | Automatically available as a tool secret via `context.get_secret()` | +### Debug-only flags: expose error internals in tool error responses (toolkit authors) + +When set, these flags append `developer_message` and/or the tool stacktrace to the `message` field of the MCP tool error response — useful while debugging a toolkit, because most MCP clients render only `message` and drop `developer_message`. Use ONLY for local debugging. Both require the exact value `yes-i-accept-leaking-internals-to-the-agent` (nothing else is accepted — `true`, `1`, etc. are rejected and log a warning). Each logs a loud WARNING on first activation. Implemented in `libs/arcade-mcp-server/arcade_mcp_server/_debug_exposure.py`. + +| Env var | Effect when set to the magic ack value | +|---------|-----------------------------------------| +| `ARCADE_DEBUG_EXPOSE_DEVELOPER_MESSAGE_IN_TOOL_ERROR_RESPONSES` | Appends `developer_message` to the error response `message` field | +| `ARCADE_DEBUG_EXPOSE_STACKTRACE_IN_TOOL_ERROR_RESPONSES` | Appends the tool stacktrace to the error response `message` field | + +**Never enable in production.** The `message` field is returned verbatim to whoever called the tool — LLMs, transcripts, end-user UIs, and anything else downstream. + ## Project Layout - `libs/arcade-*/` — Core libraries, each with own `pyproject.toml` (except cli/evals → root) diff --git a/libs/arcade-mcp-server/arcade_mcp_server/_debug_exposure.py b/libs/arcade-mcp-server/arcade_mcp_server/_debug_exposure.py new file mode 100644 index 00000000..b4939bac --- /dev/null +++ b/libs/arcade-mcp-server/arcade_mcp_server/_debug_exposure.py @@ -0,0 +1,84 @@ +""" +Debug-only escape hatch for MCP tool error responses. + +MCP clients typically render only the ``message`` field of a tool error +response, dropping ``developer_message`` and ``stacktrace``. That makes +server-side iteration painful when a tool is failing. The flags in this +module let a toolkit author opt in to appending those internals to the +``message`` field while debugging. + +DEBUG-ONLY. Activating these flags can leak paths, tokens, or PII to +callers. Don't add more flags of this shape — put debug info in logs +instead. +""" + +from __future__ import annotations + +import logging +import os + +_logger = logging.getLogger(__name__) + +# Acknowledgement string a developer must set as the env value. Picked to be +# impossible to set by mistake — no sane config management or CI will ever +# emit this string. +_DEBUG_LEAK_MAGIC = "yes-i-accept-leaking-internals-to-the-agent" + +_ENV_EXPOSE_DEVELOPER_MESSAGE = "ARCADE_DEBUG_EXPOSE_DEVELOPER_MESSAGE_IN_TOOL_ERROR_RESPONSES" +_ENV_EXPOSE_STACKTRACE = "ARCADE_DEBUG_EXPOSE_STACKTRACE_IN_TOOL_ERROR_RESPONSES" + +# One-shot warning state per flag. The rejection warning (truthy but not the +# magic string) and the activation warning (magic string set) are tracked in +# *separate* sets so that fixing a misconfigured flag within the same process +# still fires the critical activation warning. +_warned_rejected: set[str] = set() +_warned_activated: set[str] = set() + + +def _leak_enabled(env_var: str) -> bool: + raw = os.environ.get(env_var) + if raw is None: + return False + if raw.strip() != _DEBUG_LEAK_MAGIC: + # A value is set but it isn't the magic ack. Treat as off and, if it + # looks like someone tried a boolean, nudge them via a log so the + # silence isn't confusing. + if raw.strip().lower() in {"1", "true", "yes", "on"} and env_var not in _warned_rejected: + _warned_rejected.add(env_var) + _logger.warning( + "%s is set to a truthy value but not to the required " + "acknowledgement string. Flag remains OFF. " + "See arcade_mcp_server/_debug_exposure.py.", + env_var, + ) + return False + if env_var not in _warned_activated: + _warned_activated.add(env_var) + _logger.warning( + "%s is ENABLED. Tool error internals will be appended to the " + "`message` field of MCP tool error responses. This can leak paths, " + "tokens, or PII to callers. DO NOT USE IN PRODUCTION.", + env_var, + ) + return True + + +def augment_error_message_for_debug( + message: str, + developer_message: str | None, + stacktrace: str | None, +) -> str: + """Append debug internals to ``message`` when the corresponding env flags are set. + + This is a no-op in the default case (both flags off), and also a no-op when + the flags are set to anything other than the activation ack string. See + module docstring for the full rationale. + """ + extras: list[str] = [] + if developer_message and _leak_enabled(_ENV_EXPOSE_DEVELOPER_MESSAGE): + extras.append(f"developer_message: {developer_message}") + if stacktrace and _leak_enabled(_ENV_EXPOSE_STACKTRACE): + extras.append(f"stacktrace:\n{stacktrace}") + if not extras: + return message + return f"{message}\n\n[DEBUG] " + "\n\n[DEBUG] ".join(extras) diff --git a/libs/arcade-mcp-server/arcade_mcp_server/server.py b/libs/arcade-mcp-server/arcade_mcp_server/server.py index 22c63028..e5430151 100644 --- a/libs/arcade-mcp-server/arcade_mcp_server/server.py +++ b/libs/arcade-mcp-server/arcade_mcp_server/server.py @@ -29,6 +29,7 @@ from arcade_core.schema import ToolAuthRequirement as CoreToolAuthRequirement from arcadepy import ArcadeError, AsyncArcade from arcadepy.types.auth_authorize_params import AuthRequirement, AuthRequirementOauth2 +from arcade_mcp_server._debug_exposure import augment_error_message_for_debug from arcade_mcp_server.context import Context, get_current_model_context, set_current_model_context from arcade_mcp_server.convert import convert_content_to_structured_content, convert_to_mcp_content from arcade_mcp_server.exceptions import NotFoundError, ToolRuntimeError @@ -933,6 +934,11 @@ class MCPServer: error_text = error.message if error.additional_prompt_content: error_text += f"\n\n{error.additional_prompt_content}" + error_text = augment_error_message_for_debug( + error_text, + error.developer_message, + error.stacktrace, + ) content = convert_to_mcp_content(error_text) self._log_tool_call_error(tool_name, error) else: diff --git a/libs/arcade-mcp-server/pyproject.toml b/libs/arcade-mcp-server/pyproject.toml index 3e83b237..aae72aa3 100644 --- a/libs/arcade-mcp-server/pyproject.toml +++ b/libs/arcade-mcp-server/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "hatchling.build" [project] name = "arcade-mcp-server" -version = "1.20.0" +version = "1.21.0" description = "Model Context Protocol (MCP) server framework for Arcade.dev" readme = "README.md" authors = [{ name = "Arcade.dev" }] diff --git a/libs/tests/arcade_mcp_server/test_debug_exposure.py b/libs/tests/arcade_mcp_server/test_debug_exposure.py new file mode 100644 index 00000000..bdb751b4 --- /dev/null +++ b/libs/tests/arcade_mcp_server/test_debug_exposure.py @@ -0,0 +1,162 @@ +"""Tests for the debug-exposure escape hatch in ``arcade_mcp_server/_debug_exposure.py``.""" + +import logging + +import pytest +from arcade_mcp_server import _debug_exposure as debug_exposure +from arcade_mcp_server._debug_exposure import augment_error_message_for_debug + +_LEAK_MAGIC = "yes-i-accept-leaking-internals-to-the-agent" +_ENV_DEV_MSG = "ARCADE_DEBUG_EXPOSE_DEVELOPER_MESSAGE_IN_TOOL_ERROR_RESPONSES" +_ENV_STACKTRACE = "ARCADE_DEBUG_EXPOSE_STACKTRACE_IN_TOOL_ERROR_RESPONSES" + + +@pytest.fixture(autouse=True) +def _reset_leak_warn_state(monkeypatch): + """Clear the per-process one-shot warning state so each test starts clean. + + Both flags emit loud warnings (rejection and activation) one-shot per flag. + Without a reset, later tests would silently lose coverage of those branches + because the module-level tracking sets are already populated from earlier + tests. + """ + monkeypatch.delenv(_ENV_DEV_MSG, raising=False) + monkeypatch.delenv(_ENV_STACKTRACE, raising=False) + debug_exposure._warned_rejected.clear() + debug_exposure._warned_activated.clear() + yield + debug_exposure._warned_rejected.clear() + debug_exposure._warned_activated.clear() + + +def test_no_leak_by_default(): + """With both flags unset, message must not be augmented.""" + out = augment_error_message_for_debug( + "public error", + developer_message="secret internals", + stacktrace="Traceback...\n line", + ) + assert out == "public error" + + +@pytest.mark.parametrize("bad_value", ["true", "1", "yes", "on", "TRUE", "True"]) +def test_rejects_boolean_activation(monkeypatch, caplog, bad_value): + """Any truthy-looking value that isn't the magic string must be rejected.""" + monkeypatch.setenv(_ENV_DEV_MSG, bad_value) + with caplog.at_level(logging.WARNING, logger="arcade_mcp_server._debug_exposure"): + out = augment_error_message_for_debug( + "public error", developer_message="secret internals", stacktrace=None + ) + assert out == "public error" + assert any( + "set to a truthy value but not to the required" in rec.message for rec in caplog.records + ) + + +def test_rejects_random_non_magic_value(monkeypatch, caplog): + """A non-boolean-looking value that isn't the magic string is silently off.""" + monkeypatch.setenv(_ENV_DEV_MSG, "debug-please") + with caplog.at_level(logging.WARNING, logger="arcade_mcp_server._debug_exposure"): + out = augment_error_message_for_debug( + "public error", developer_message="secret internals", stacktrace=None + ) + assert out == "public error" + assert not any( + "set to a truthy value but not to the required" in rec.message for rec in caplog.records + ) + + +def test_developer_message_flag_enabled(monkeypatch, caplog): + monkeypatch.setenv(_ENV_DEV_MSG, _LEAK_MAGIC) + with caplog.at_level(logging.WARNING, logger="arcade_mcp_server._debug_exposure"): + out = augment_error_message_for_debug( + "public error", developer_message="secret internals", stacktrace="trace" + ) + assert "public error" in out + assert "[DEBUG] developer_message: secret internals" in out + # Stacktrace flag is off → stacktrace must NOT be in the augmented text. + assert "trace" not in out.replace("public error", "") + assert any("is ENABLED" in rec.message for rec in caplog.records) + + +def test_stacktrace_flag_enabled(monkeypatch): + monkeypatch.setenv(_ENV_STACKTRACE, _LEAK_MAGIC) + out = augment_error_message_for_debug( + "public error", + developer_message="secret internals", + stacktrace="Traceback (most recent call last):\n File ...", + ) + assert "public error" in out + assert "[DEBUG] stacktrace:" in out + assert "File ..." in out + # Developer-message flag off → dev message must NOT leak. + assert "secret internals" not in out + + +def test_both_flags_enabled(monkeypatch): + monkeypatch.setenv(_ENV_DEV_MSG, _LEAK_MAGIC) + monkeypatch.setenv(_ENV_STACKTRACE, _LEAK_MAGIC) + out = augment_error_message_for_debug( + "public error", developer_message="dev info", stacktrace="trace info" + ) + assert "[DEBUG] developer_message: dev info" in out + assert "[DEBUG] stacktrace:\ntrace info" in out + + +def test_flag_enabled_but_no_content_to_leak(monkeypatch): + """Flag on but developer_message/stacktrace are None → message unchanged.""" + monkeypatch.setenv(_ENV_DEV_MSG, _LEAK_MAGIC) + monkeypatch.setenv(_ENV_STACKTRACE, _LEAK_MAGIC) + out = augment_error_message_for_debug("public error", None, None) + assert out == "public error" + + +def test_activation_warning_emitted_once_per_process(monkeypatch, caplog): + """Second call with the flag on must NOT emit another activation warning.""" + monkeypatch.setenv(_ENV_DEV_MSG, _LEAK_MAGIC) + with caplog.at_level(logging.WARNING, logger="arcade_mcp_server._debug_exposure"): + augment_error_message_for_debug("a", developer_message="dev", stacktrace=None) + first_count = sum("is ENABLED" in r.message for r in caplog.records) + augment_error_message_for_debug("b", developer_message="dev", stacktrace=None) + second_count = sum("is ENABLED" in r.message for r in caplog.records) + assert first_count == 1 + assert second_count == 1 # one-shot per process + + +def test_rejection_does_not_suppress_later_activation_warning(monkeypatch, caplog): + """Regression: once a truthy-but-non-magic value has been rejected for a + flag, correcting the value to the magic string within the same process + must still emit the critical "ENABLED ... DO NOT USE IN PRODUCTION" + warning. Previously both paths shared one state set, so the activation + warning was silently swallowed in this scenario. + """ + with caplog.at_level(logging.WARNING, logger="arcade_mcp_server._debug_exposure"): + monkeypatch.setenv(_ENV_DEV_MSG, "true") + out_rejected = augment_error_message_for_debug( + "public error", developer_message="secret internals", stacktrace=None + ) + assert "[DEBUG]" not in out_rejected + rejection_count = sum( + "set to a truthy value but not to the required" in r.message for r in caplog.records + ) + assert rejection_count == 1 + + monkeypatch.setenv(_ENV_DEV_MSG, _LEAK_MAGIC) + out_activated = augment_error_message_for_debug( + "public error", developer_message="secret internals", stacktrace=None + ) + assert "[DEBUG] developer_message: secret internals" in out_activated + activation_count = sum("is ENABLED" in r.message for r in caplog.records) + assert activation_count == 1, ( + "activation warning must fire even after the rejection warning " + "has already been emitted for the same flag in this process" + ) + + +def test_magic_value_ignores_surrounding_whitespace(monkeypatch): + """Leading/trailing whitespace around the magic string still activates the flag.""" + monkeypatch.setenv(_ENV_DEV_MSG, f" {_LEAK_MAGIC} ") + out = augment_error_message_for_debug( + "public error", developer_message="secret internals", stacktrace=None + ) + assert "[DEBUG] developer_message: secret internals" in out diff --git a/libs/tests/arcade_mcp_server/test_debug_exposure_integration.py b/libs/tests/arcade_mcp_server/test_debug_exposure_integration.py new file mode 100644 index 00000000..b7931847 --- /dev/null +++ b/libs/tests/arcade_mcp_server/test_debug_exposure_integration.py @@ -0,0 +1,263 @@ +"""End-to-end integration tests for the MCP debug-exposure escape hatch. + +These complement the pure-function unit tests in ``test_debug_exposure.py`` by +exercising the full MCP tool-call path: + + tool raises -> ToolExecutor.run -> ToolOutputFactory.fail -> + MCPServer._call_tool -> augment_error_message_for_debug -> + CallToolResult.content[0].text + +This is the path every real MCP client hits, so it's where regressions in the +wire-up (wrong call site, wrong argument order, missing import, etc.) would +actually surface. The unit tests can't catch those because they call the pure +function directly. +""" + +from typing import Annotated + +import pytest +import pytest_asyncio +from arcade_core.catalog import MaterializedTool, ToolCatalog, ToolMeta, create_func_models +from arcade_core.errors import FatalToolError +from arcade_core.schema import ( + InputParameter, + ToolDefinition, + ToolInput, + ToolkitDefinition, + ToolOutput, + ToolRequirements, + ValueSchema, +) +from arcade_mcp_server import _debug_exposure as debug_exposure +from arcade_mcp_server import tool +from arcade_mcp_server.server import MCPServer +from arcade_mcp_server.settings import MCPSettings +from arcade_mcp_server.types import CallToolRequest, CallToolResult, JSONRPCResponse + +_LEAK_MAGIC = "yes-i-accept-leaking-internals-to-the-agent" +_ENV_DEV_MSG = "ARCADE_DEBUG_EXPOSE_DEVELOPER_MESSAGE_IN_TOOL_ERROR_RESPONSES" +_ENV_STACKTRACE = "ARCADE_DEBUG_EXPOSE_STACKTRACE_IN_TOOL_ERROR_RESPONSES" + + +@pytest.fixture(autouse=True) +def _reset_leak_state(monkeypatch): + monkeypatch.delenv(_ENV_DEV_MSG, raising=False) + monkeypatch.delenv(_ENV_STACKTRACE, raising=False) + debug_exposure._warned_rejected.clear() + debug_exposure._warned_activated.clear() + yield + debug_exposure._warned_rejected.clear() + debug_exposure._warned_activated.clear() + + +# ---- Tool definitions used by the integration tests ------------------------- + + +@tool +def raises_fatal_tool_error( + query: Annotated[str, "A query"], +) -> Annotated[str, "Result"]: + """Simulates a toolkit author's tool failing with a rich error.""" + raise FatalToolError( + message="Failed to fetch results", + developer_message=f"HTTP 503 on upstream endpoint for query={query!r}", + ) + + +@tool +def raises_unhandled_exception( + query: Annotated[str, "A query"], +) -> Annotated[str, "Result"]: + """Simulates a toolkit author's tool crashing with an unexpected exception. + + The executor's generic `except Exception` branch populates the stacktrace + via `traceback.format_exc()`, which is what the stacktrace flag leaks. + """ + raise ValueError(f"unexpected crash for query={query!r}") + + +def _materialized(func, name: str) -> MaterializedTool: + definition = ToolDefinition( + name=name, + fully_qualified_name=f"TestToolkit.{name}", + description=f"{name} integration fixture", + toolkit=ToolkitDefinition(name="TestToolkit", description="", version="1.0.0"), + input=ToolInput( + parameters=[ + InputParameter( + name="query", + required=True, + description="A query", + value_schema=ValueSchema(val_type="string"), + ), + ] + ), + output=ToolOutput( + description="Result", + value_schema=ValueSchema(val_type="string"), + ), + requirements=ToolRequirements(), + ) + input_model, output_model = create_func_models(func) + return MaterializedTool( + tool=func, + definition=definition, + meta=ToolMeta(module=func.__module__, toolkit="TestToolkit"), + input_model=input_model, + output_model=output_model, + ) + + +@pytest.fixture +def erroring_catalog() -> ToolCatalog: + catalog = ToolCatalog() + mt1 = _materialized(raises_fatal_tool_error, "raises_fatal_tool_error") + mt2 = _materialized(raises_unhandled_exception, "raises_unhandled_exception") + catalog._tools[mt1.definition.get_fully_qualified_name()] = mt1 + catalog._tools[mt2.definition.get_fully_qualified_name()] = mt2 + return catalog + + +@pytest_asyncio.fixture +async def erroring_server(erroring_catalog) -> MCPServer: + settings = MCPSettings() + settings.middleware.mask_error_details = False + server = MCPServer( + catalog=erroring_catalog, + name="Integration Debug Exposure Server", + version="0.0.0", + settings=settings, + ) + await server.start() + try: + yield server + finally: + await server.stop() + + +async def _call(erroring_server: MCPServer, tool_name: str) -> CallToolResult: + message = CallToolRequest( + jsonrpc="2.0", + id=1, + method="tools/call", + params={"name": f"TestToolkit.{tool_name}", "arguments": {"query": "ping"}}, + ) + response = await erroring_server._handle_call_tool(message) + assert isinstance(response, JSONRPCResponse) + assert isinstance(response.result, CallToolResult) + assert response.result.isError is True + assert response.result.structuredContent is None + return response.result + + +# ---- Integration tests ------------------------------------------------------ + + +@pytest.mark.asyncio +async def test_integration_baseline_no_leak(erroring_server): + """Default state: the agent sees ONLY the sanitized message.""" + result = await _call(erroring_server, "raises_fatal_tool_error") + text = result.content[0].text + assert "Failed to fetch results" in text + assert "[DEBUG]" not in text + assert "HTTP 503" not in text + assert "query='ping'" not in text + + +@pytest.mark.asyncio +async def test_integration_boolean_rejected_no_leak(erroring_server, monkeypatch, caplog): + """Boolean-looking values are rejected by the MCP boundary too.""" + monkeypatch.setenv(_ENV_DEV_MSG, "true") + import logging + + with caplog.at_level(logging.WARNING, logger="arcade_mcp_server._debug_exposure"): + result = await _call(erroring_server, "raises_fatal_tool_error") + text = result.content[0].text + assert "Failed to fetch results" in text + assert "[DEBUG]" not in text + assert "HTTP 503" not in text + assert any( + "set to a truthy value but not to the required" in r.message for r in caplog.records + ) + + +@pytest.mark.asyncio +async def test_integration_developer_message_flag_leaks_through_mcp( + erroring_server, monkeypatch +): + """When the flag is set to the magic value, the MCP response `content` + carries `developer_message` alongside the sanitized message.""" + monkeypatch.setenv(_ENV_DEV_MSG, _LEAK_MAGIC) + result = await _call(erroring_server, "raises_fatal_tool_error") + text = result.content[0].text + assert "Failed to fetch results" in text + assert "[DEBUG] developer_message:" in text + assert "HTTP 503 on upstream endpoint for query='ping'" in text + # Stacktrace flag is off — stacktrace must NOT leak. + assert "[DEBUG] stacktrace:" not in text + + +@pytest.mark.asyncio +async def test_integration_stacktrace_flag_leaks_traceback_through_mcp( + erroring_server, monkeypatch +): + """Unhandled exceptions go through the executor's generic except branch, + which populates a real stacktrace. With the flag on, that stacktrace must + appear in the MCP response content.""" + monkeypatch.setenv(_ENV_STACKTRACE, _LEAK_MAGIC) + result = await _call(erroring_server, "raises_unhandled_exception") + text = result.content[0].text + # The generic-exception branch wraps the message with the tool name. + assert "raises_unhandled_exception" in text + assert "[DEBUG] stacktrace:" in text + assert "Traceback" in text + assert "ValueError" in text + assert "unexpected crash for query='ping'" in text + + +@pytest.mark.asyncio +async def test_integration_both_flags_leak_through_mcp(erroring_server, monkeypatch): + """Both flags together on an unhandled exception: developer_message (from + `str(e)` in the executor) AND the stacktrace both reach the MCP content.""" + monkeypatch.setenv(_ENV_DEV_MSG, _LEAK_MAGIC) + monkeypatch.setenv(_ENV_STACKTRACE, _LEAK_MAGIC) + result = await _call(erroring_server, "raises_unhandled_exception") + text = result.content[0].text + assert "[DEBUG] developer_message:" in text + assert "unexpected crash for query='ping'" in text + assert "[DEBUG] stacktrace:" in text + assert "Traceback" in text + + +@pytest.mark.asyncio +async def test_integration_success_path_unaffected_by_flags( + tool_catalog, mcp_settings, monkeypatch +): + """Sanity check: even with both flags on, SUCCESSFUL tool responses are + not touched. The augmentation only runs on the error branch.""" + monkeypatch.setenv(_ENV_DEV_MSG, _LEAK_MAGIC) + monkeypatch.setenv(_ENV_STACKTRACE, _LEAK_MAGIC) + server = MCPServer( + catalog=tool_catalog, + name="Success Path Server", + version="0.0.0", + settings=mcp_settings, + ) + await server.start() + try: + response = await server._handle_call_tool( + CallToolRequest( + jsonrpc="2.0", + id=1, + method="tools/call", + params={"name": "TestToolkit.test_tool", "arguments": {"text": "hi"}}, + ) + ) + finally: + await server.stop() + assert isinstance(response, JSONRPCResponse) + assert isinstance(response.result, CallToolResult) + assert response.result.isError is False + assert response.result.structuredContent is not None + for item in response.result.content: + assert "[DEBUG]" not in getattr(item, "text", "") diff --git a/scripts/check_debug_leak_flags_off.py b/scripts/check_debug_leak_flags_off.py new file mode 100755 index 00000000..da06d5ed --- /dev/null +++ b/scripts/check_debug_leak_flags_off.py @@ -0,0 +1,126 @@ +#!/usr/bin/env python3 +# ruff: noqa: S603, S607 +# +# This script shells out to `git` via PATH on purpose: it runs inside +# pre-commit and GitHub Actions, both of which guarantee git on PATH, and +# hard-coding an absolute path would break portability. The subprocess +# invocations here pass only constant argv lists, so S603/S607 don't apply. +""" +Guard: the debug-exposure flags in ``arcade_mcp_server/_debug_exposure.py`` +must never ship in the "on" state through committed files. + +The two env vars + ARCADE_DEBUG_EXPOSE_DEVELOPER_MESSAGE_IN_TOOL_ERROR_RESPONSES + ARCADE_DEBUG_EXPOSE_STACKTRACE_IN_TOOL_ERROR_RESPONSES +only activate when set to one specific acknowledgement string. Therefore we +only need to guarantee that string never appears in the tree outside a tiny +allowlist of files (the source that defines it, the tests that exercise it, +the developer doc, and this guard itself). + +This script is run both as a pre-commit hook and as a dedicated CI workflow. + +Exit codes: + 0 OK — flags cannot be activated by anything in the tree. + 1 FAIL — the magic string was found in a non-allowlisted file. + 2 Infrastructure error (e.g. ``git ls-files`` unavailable). +""" + +from __future__ import annotations + +import subprocess +import sys +from pathlib import Path + +# The activation ack string. Kept as the sole constant so updating it in one +# place (arcade_mcp_server/_debug_exposure.py) also updates the guard. +MAGIC = "yes-i-accept-leaking-internals-to-the-agent" + +# Files that are *allowed* to mention the magic string. Everything else is a +# hard fail. Paths are relative to the repository root and use forward slashes. +ALLOWLIST: frozenset[str] = frozenset({ + # The source of truth for the flags. + "libs/arcade-mcp-server/arcade_mcp_server/_debug_exposure.py", + # Unit tests for the pure augmentation function. + "libs/tests/arcade_mcp_server/test_debug_exposure.py", + # Integration tests for the MCP-boundary wire-up. + "libs/tests/arcade_mcp_server/test_debug_exposure_integration.py", + # Developer documentation for the flags. + "CLAUDE.md", + # This guard itself. + "scripts/check_debug_leak_flags_off.py", +}) + + +def _repo_root() -> Path: + try: + out = subprocess.check_output( + ["git", "rev-parse", "--show-toplevel"], + text=True, + stderr=subprocess.DEVNULL, + ) + except (subprocess.CalledProcessError, FileNotFoundError): + print("check_debug_leak_flags_off: not a git checkout", file=sys.stderr) + raise SystemExit(2) from None + return Path(out.strip()) + + +def _tracked_files(root: Path) -> list[str]: + try: + out = subprocess.check_output( + ["git", "-C", str(root), "ls-files"], + text=True, + stderr=subprocess.DEVNULL, + ) + except (subprocess.CalledProcessError, FileNotFoundError): + print("check_debug_leak_flags_off: git ls-files failed", file=sys.stderr) + raise SystemExit(2) from None + return [line for line in out.splitlines() if line] + + +def main() -> int: + root = _repo_root() + failures: list[str] = [] + + for rel in _tracked_files(root): + if rel in ALLOWLIST: + continue + path = root / rel + if not path.is_file(): + continue + try: + text = path.read_text(encoding="utf-8", errors="ignore") + except OSError: + continue + if MAGIC in text: + failures.append(rel) + + if failures: + print("Debug-leak flag guard: FAIL", file=sys.stderr) + print("", file=sys.stderr) + print( + "The activation acknowledgement string for the unsafe debug-leak " + "flags was found in files that must never contain it:", + file=sys.stderr, + ) + for f in failures: + print(f" - {f}", file=sys.stderr) + print("", file=sys.stderr) + print( + "These env vars must stay off by default everywhere the repo ships. " + "If you need to iterate locally, export the magic value in your " + "shell only — never commit it.", + file=sys.stderr, + ) + print( + "See libs/arcade-mcp-server/arcade_mcp_server/_debug_exposure.py " + "for the full rationale.", + file=sys.stderr, + ) + return 1 + + print("Debug-leak flag guard: OK") + return 0 + + +if __name__ == "__main__": + sys.exit(main())