From a4160dd9fe15d78e152f0688f1a90cc2ee55c320 Mon Sep 17 00:00:00 2001 From: Eric Gustin <34000337+EricGustin@users.noreply.github.com> Date: Thu, 29 Jan 2026 15:12:06 -0800 Subject: [PATCH] Four bug fixes (#754) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 1. Resolves [TOO-363](https://linear.app/arcadedev/issue/TOO-363/arcade-deploy-fails-when-additional-deps-are-added-to-the-server). 2. Resolves [TOO-364](https://linear.app/arcadedev/issue/TOO-364/arcade-cores-tool-skip-logic-is-missing-case-for-direct-execution). 3. Resolves [TOO-358](https://linear.app/arcadedev/issue/TOO-358/missing-evals-error-message-shows-wrong-command). 4. Resolves [TOO-365](https://linear.app/arcadedev/issue/TOO-365/arcade-evals-unit-tests-are-hanging). --- > [!NOTE] > **Medium Risk** > Medium risk because it changes how `arcade deploy` spawns the server process and adjusts toolkit discovery skip logic, which can affect deployments and tool discovery; however, the changes are small and covered by new unit/integration tests. > > **Overview** > `arcade deploy` now starts the validation server using the project’s `.venv` interpreter (via `find_python_interpreter`) instead of the CLI’s own `sys.executable`, preventing missing dependency failures when the CLI is installed in an isolated env. > > `arcade-core`’s `Toolkit.tools_from_directory` skip logic is hardened to also skip the currently executing entrypoint by module name (`__main__.__spec__.name`) when file paths don’t match (e.g., bundled execution). CLI error printing now escapes plain messages to avoid rich markup issues, and `arcade-evals` lock acquisition accepts an optional timeout default. > > Adds unit tests for the new toolkit skip behavior and an integration test that boots the MCP server via direct Python invocation to mirror deployment behavior, and bumps `arcade-core`, `arcade-mcp-server`, and root dependency versions accordingly. > > Written by [Cursor Bugbot](https://cursor.com/dashboard?tab=bugbot) for commit e7785634c231c059f2e0bd1bc73a56bd7470a494. This will update automatically on new commits. Configure [here](https://cursor.com/dashboard?tab=bugbot). --- libs/arcade-cli/arcade_cli/configure.py | 11 +- libs/arcade-cli/arcade_cli/deploy.py | 10 +- libs/arcade-cli/arcade_cli/utils.py | 2 +- libs/arcade-core/arcade_core/toolkit.py | 46 ++++-- libs/arcade-core/pyproject.toml | 2 +- libs/arcade-evals/arcade_evals/loaders.py | 6 +- libs/arcade-mcp-server/pyproject.toml | 4 +- .../integration/test_end_to_end.py | 102 ++++++++++++ libs/tests/core/test_toolkit.py | 151 ++++++++++++++++++ pyproject.toml | 6 +- 10 files changed, 313 insertions(+), 27 deletions(-) diff --git a/libs/arcade-cli/arcade_cli/configure.py b/libs/arcade-cli/arcade_cli/configure.py index 50ea47c9..a6599850 100644 --- a/libs/arcade-cli/arcade_cli/configure.py +++ b/libs/arcade-cli/arcade_cli/configure.py @@ -146,7 +146,16 @@ def get_tool_secrets() -> dict: def find_python_interpreter() -> Path: - # Find the Python interpreter in the virtual environment + """ + Find the Python interpreter in the virtual environment. + + NOTE: This function assumes it is called from the project root directory (where .venv lives). + Currently, callers like `arcade deploy` enforce this by requiring pyproject.toml in cwd. + If this requirement is relaxed in the future, this function should be updated to: + 1. Accept a project_root parameter, OR + 2. Honor VIRTUAL_ENV / UV_PROJECT_ENVIRONMENT env vars, OR + 3. Search upward from cwd to find pyproject.toml and resolve .venv relative to that + """ venv_python = None # Check for .venv first (uv default) if (Path.cwd() / ".venv").exists(): diff --git a/libs/arcade-cli/arcade_cli/deploy.py b/libs/arcade-cli/arcade_cli/deploy.py index 82a64ff0..7b8308d5 100644 --- a/libs/arcade-cli/arcade_cli/deploy.py +++ b/libs/arcade-cli/arcade_cli/deploy.py @@ -4,7 +4,6 @@ import io import os import random import subprocess -import sys import tarfile import time from collections import deque @@ -22,6 +21,7 @@ from rich.spinner import Spinner from rich.text import Text from typing_extensions import Literal +from arcade_cli.configure import find_python_interpreter from arcade_cli.secret import load_env_file from arcade_cli.utils import ( compute_base_url, @@ -388,7 +388,13 @@ def start_server_process(entrypoint: str, debug: bool = False) -> tuple[subproce "ARCADE_WORKER_SECRET": "temp-validation-secret", } - cmd = [sys.executable, entrypoint] + # Use the project's Python environment, not the CLI's isolated environment. + # find_python_interpreter() looks for .venv/bin/python in cwd, falling back to sys.executable. + # This ensures the server runs in the project's environment even when the CLI is installed + # in an isolated environment (e.g., via 'uv tool install arcade-mcp'). + project_python = find_python_interpreter() + cmd = [str(project_python), entrypoint] + process = subprocess.Popen( cmd, stdout=subprocess.PIPE, diff --git a/libs/arcade-cli/arcade_cli/utils.py b/libs/arcade-cli/arcade_cli/utils.py index c5818069..47914fc1 100644 --- a/libs/arcade-cli/arcade_cli/utils.py +++ b/libs/arcade-cli/arcade_cli/utils.py @@ -394,7 +394,7 @@ def handle_cli_error( elif error: console.print(f"❌ {message}: {escape(str(error))}", style="bold red") else: - console.print(f"❌ {message}", style="bold red") + console.print(f"❌ {escape(message)}", style="bold red") if should_exit: raise CLIError(message, error) diff --git a/libs/arcade-core/arcade_core/toolkit.py b/libs/arcade-core/arcade_core/toolkit.py index 6460a0a3..426f84e1 100644 --- a/libs/arcade-core/arcade_core/toolkit.py +++ b/libs/arcade-core/arcade_core/toolkit.py @@ -299,32 +299,50 @@ class Toolkit(BaseModel): # Skipping this file is necessary because tools are discovered via AST parsing, but those tools # aren't in the module's namespace yet since the file is still executing. current_file = None + current_module_name = None main_module = sys.modules.get("__main__") - if main_module and hasattr(main_module, "__file__") and main_module.__file__: - with contextlib.suppress(Exception): - current_file = Path(main_module.__file__).resolve() + if main_module: + if hasattr(main_module, "__file__") and main_module.__file__: + with contextlib.suppress(Exception): + current_file = Path(main_module.__file__).resolve() + # Get module name from __spec__ if available (used when paths don't match, + # e.g., script runs from bundle but package is in site-packages) + main_spec = getattr(main_module, "__spec__", None) + if main_spec and main_spec.name: + current_module_name = main_spec.name tools: dict[str, list[str]] = {} for module_path in modules: - # Skip adding tools from the currently executing file - if current_file: - try: - module_path_resolved = module_path.resolve() - if module_path_resolved == current_file: - continue - except Exception: # noqa: S110 - pass - + # Build import path first (needed for module name comparison in skip logic) relative_path = module_path.relative_to(package_dir) - cls.validate_file(module_path) - # Build import path and avoid duplicating the package prefix if it already exists relative_parts = relative_path.with_suffix("").parts import_path = ".".join(relative_parts) if relative_parts and relative_parts[0] == package_name: full_import_path = import_path else: full_import_path = f"{package_name}.{import_path}" if import_path else package_name + + # Skip logic: check by file path OR by module name + # This handles cases where the script is run from a different location than + # where the package is installed (e.g., deployment scenarios) + should_skip = False + if current_file: + try: + module_path_resolved = module_path.resolve() + if module_path_resolved == current_file: + should_skip = True + except Exception: # noqa: S110 + pass + + # Secondary check: compare module names when paths don't match + if not should_skip and current_module_name and full_import_path == current_module_name: + should_skip = True + + if should_skip: + continue + + cls.validate_file(module_path) tools[full_import_path] = get_tools_from_file(str(module_path)) if not tools: diff --git a/libs/arcade-core/pyproject.toml b/libs/arcade-core/pyproject.toml index b98736cb..867ea6e8 100644 --- a/libs/arcade-core/pyproject.toml +++ b/libs/arcade-core/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "arcade-core" -version = "4.2.1" +version = "4.2.2" description = "Arcade Core - Core library for Arcade platform" readme = "README.md" license = { text = "MIT" } diff --git a/libs/arcade-evals/arcade_evals/loaders.py b/libs/arcade-evals/arcade_evals/loaders.py index 882023d3..58150fb0 100644 --- a/libs/arcade-evals/arcade_evals/loaders.py +++ b/libs/arcade-evals/arcade_evals/loaders.py @@ -98,10 +98,10 @@ async def _get_cache_lock(cache_key: str) -> asyncio.Lock: return _cache_locks[cache_key] -async def _acquire_lock_with_timeout( - lock: asyncio.Lock, timeout: float = LOCK_TIMEOUT_SECONDS -) -> bool: +async def _acquire_lock_with_timeout(lock: asyncio.Lock, timeout: float | None = None) -> bool: """Acquire a lock with timeout. Returns True if acquired, False on timeout.""" + if timeout is None: + timeout = LOCK_TIMEOUT_SECONDS try: await asyncio.wait_for(lock.acquire(), timeout=timeout) except asyncio.TimeoutError: diff --git a/libs/arcade-mcp-server/pyproject.toml b/libs/arcade-mcp-server/pyproject.toml index 67be5f96..854158b3 100644 --- a/libs/arcade-mcp-server/pyproject.toml +++ b/libs/arcade-mcp-server/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "hatchling.build" [project] name = "arcade-mcp-server" -version = "1.15.0" +version = "1.15.1" description = "Model Context Protocol (MCP) server framework for Arcade.dev" readme = "README.md" authors = [{ name = "Arcade.dev" }] @@ -21,7 +21,7 @@ classifiers = [ ] requires-python = ">=3.10" dependencies = [ - "arcade-core>=4.2.1,<5.0.0", + "arcade-core>=4.2.2,<5.0.0", "arcade-serve>=3.2.0,<4.0.0", "arcade-tdk>=3.4.0,<4.0.0", "arcadepy>=1.5.0", diff --git a/libs/tests/arcade_mcp_server/integration/test_end_to_end.py b/libs/tests/arcade_mcp_server/integration/test_end_to_end.py index d18e21fa..be461c88 100644 --- a/libs/tests/arcade_mcp_server/integration/test_end_to_end.py +++ b/libs/tests/arcade_mcp_server/integration/test_end_to_end.py @@ -82,6 +82,51 @@ def start_mcp_server( raise ValueError(f"Invalid transport: {transport}") +def start_mcp_server_direct_python( + transport: str, port: int | None = None +) -> tuple[subprocess.Popen, int | None]: + """ + Start MCP server with direct Python invocation (simulates what happens in the Engine during deployment). + + Args: + transport: Transport type ("stdio" or "http") + port: Port for HTTP transport (optional, will be random if not provided) + + Returns: + Tuple of (process, port). Port is None for stdio transport. + """ + entrypoint_path = get_entrypoint_path() + package_path = Path(__file__).parent / "server" + + # Find Python in the server's venv + venv_python = package_path / ".venv" / "bin" / "python" + if not venv_python.exists(): + pytest.skip("Server venv not found - run 'uv sync' in integration/server first") + + if port is None: + port = random.randint(8000, 9000) # noqa: S311 + + env = { + **os.environ, + "ARCADE_SERVER_HOST": "127.0.0.1", + "ARCADE_SERVER_PORT": str(port), + "ARCADE_SERVER_TRANSPORT": transport, + "ARCADE_AUTH_DISABLED": "true", + "ARCADE_WORKER_SECRET": "test-secret-direct", + } + + cmd = [str(venv_python), entrypoint_path, transport] + process = subprocess.Popen( + cmd, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + env=env, + cwd=str(package_path), + ) + return process, port if transport == "http" else None + + def wait_for_http_server_ready(port: int, timeout: int = 30) -> None: """ Wait for HTTP server to become healthy. @@ -913,3 +958,60 @@ async def test_http_mixed_route_concurrent_execution(): except subprocess.TimeoutExpired: process.kill() process.wait() + + +@pytest.mark.asyncio +async def test_http_direct_python_invocation(): + """Test server starts correctly with direct Python (simulates what happens in the Engine during deployment)""" + process, port = start_mcp_server_direct_python("http") + assert port is not None + + try: + wait_for_http_server_ready(port, timeout=10) + + # Verify server is healthy and tools are discoverable + headers = { + "Content-Type": "application/json", + "Accept": "application/json", + } + client = httpx.Client(base_url=f"http://127.0.0.1:{port}", timeout=10.0, headers=headers) + + # Initialize + init_response = client.post( + "/mcp", + json=build_jsonrpc_request( + "initialize", + { + "protocolVersion": "2025-06-18", + "capabilities": {}, + "clientInfo": {"name": "test", "version": "1.0"}, + }, + request_id=1, + ), + ) + assert init_response.status_code == 200 + + session_id = init_response.headers.get("mcp-session-id") + client.headers.update({"Mcp-Session-Id": session_id}) + + # Send initialized notification + init_notif = build_jsonrpc_request( + "notifications/initialized", params=None, request_id=None + ) + client.post("/mcp", json=init_notif) + + # List tools - should have 9 tools (including hello_world from entrypoint.py) + list_response = client.post("/mcp", json=build_jsonrpc_request("tools/list", request_id=2)) + assert list_response.status_code == 200 + tools = list_response.json()["result"]["tools"] + assert len(tools) == 9 + + client.close() + + finally: + process.terminate() + try: + process.wait(timeout=5) + except subprocess.TimeoutExpired: + process.kill() + process.wait() diff --git a/libs/tests/core/test_toolkit.py b/libs/tests/core/test_toolkit.py index 8cea12c5..3c362791 100644 --- a/libs/tests/core/test_toolkit.py +++ b/libs/tests/core/test_toolkit.py @@ -500,3 +500,154 @@ class TestValidPath: def test_invalid_paths(self, path_input): """Test that invalid paths are rejected.""" assert Validate.path(path_input) is False + + +class TestToolsFromDirectory: + """Test the tools_from_directory skip logic for entrypoint files. + + It has two conditions: + 1. File path match: module_path.resolve() == current_file + 2. Module name match: full_import_path == current_module_name + """ + + @pytest.fixture + def temp_package(self, tmp_path): + """Create a temporary package with Python files containing tools.""" + package_dir = tmp_path / "mypackage" + package_dir.mkdir() + + (package_dir / "__init__.py").write_text("") + (package_dir / "entrypoint.py").write_text( + ''' +from arcade_mcp_server import tool + +@tool +def my_tool(): + """A tool.""" + pass +''' + ) + tools_dir = package_dir / "tools" + tools_dir.mkdir() + (tools_dir / "__init__.py").write_text("") + (tools_dir / "helper.py").write_text( + ''' +from arcade_mcp_server import tool + +@tool +def helper_tool(): + """A helper tool.""" + pass +''' + ) + + return package_dir + + @patch("arcade_core.toolkit.get_tools_from_file") + def test_skips_file_when_path_matches(self, mock_get_tools, temp_package): + """Verify skip when __main__.__file__ matches module path (original behavior).""" + mock_get_tools.return_value = ["some_tool"] + entrypoint_path = temp_package / "entrypoint.py" + + # Create mock __main__ module with matching file path + mock_main = MagicMock() + mock_main.__file__ = str(entrypoint_path) + mock_main.__spec__ = None + + with patch.dict("sys.modules", {"__main__": mock_main}): + result = Toolkit.tools_from_directory(temp_package, "mypackage") + + # entrypoint.py should be skipped, only tools/helper.py should be included + assert "mypackage.entrypoint" not in result + assert "mypackage.tools.helper" in result + + @patch("arcade_core.toolkit.get_tools_from_file") + def test_skips_file_when_module_name_matches(self, mock_get_tools, temp_package): + """Verify skip when __main__.__spec__.name matches even if paths differ. + + This simulates deployment scenarios where the script runs from a bundle + but the package is installed in site-packages (different paths). + """ + mock_get_tools.return_value = ["some_tool"] + + # Create mock __main__ module with different file path but matching module name + mock_main = MagicMock() + mock_main.__file__ = "/some/other/path/entrypoint.py" # Different path + mock_spec = MagicMock() + mock_spec.name = "mypackage.entrypoint" # but, matches the module being scanned + mock_main.__spec__ = mock_spec + + with patch.dict("sys.modules", {"__main__": mock_main}): + result = Toolkit.tools_from_directory(temp_package, "mypackage") + + # entrypoint.py should be skipped due to module name match + assert "mypackage.entrypoint" not in result + assert "mypackage.tools.helper" in result + + @patch("arcade_core.toolkit.get_tools_from_file") + def test_no_skip_when_different_module(self, mock_get_tools, temp_package): + """Verify unrelated modules are not skipped.""" + mock_get_tools.return_value = ["some_tool"] + + # Create mock __main__ module with completely different identity + mock_main = MagicMock() + mock_main.__file__ = "/some/other/path/other_script.py" + mock_spec = MagicMock() + mock_spec.name = "some_other_package.script" + mock_main.__spec__ = mock_spec + + with patch.dict("sys.modules", {"__main__": mock_main}): + result = Toolkit.tools_from_directory(temp_package, "mypackage") + + # Both files should be included since neither matches __main__ + assert "mypackage.entrypoint" in result + assert "mypackage.tools.helper" in result + + @patch("arcade_core.toolkit.get_tools_from_file") + def test_no_skip_when_no_main_module(self, mock_get_tools, temp_package): + """Handle case where __main__ is not in sys.modules.""" + mock_get_tools.return_value = ["some_tool"] + + # Remove __main__ from sys.modules + with patch.dict("sys.modules", {"__main__": None}): + result = Toolkit.tools_from_directory(temp_package, "mypackage") + + # Both files should be included + assert "mypackage.entrypoint" in result + assert "mypackage.tools.helper" in result + + @patch("arcade_core.toolkit.get_tools_from_file") + def test_no_skip_when_no_spec(self, mock_get_tools, temp_package): + """Handle case where __main__ has no __spec__ attribute.""" + mock_get_tools.return_value = ["some_tool"] + + # Create mock __main__ module with different file path and no __spec__ + mock_main = MagicMock() + mock_main.__file__ = "/some/other/path/script.py" + mock_main.__spec__ = None + + with patch.dict("sys.modules", {"__main__": mock_main}): + result = Toolkit.tools_from_directory(temp_package, "mypackage") + + # Both files should be included since path doesn't match and no spec + assert "mypackage.entrypoint" in result + assert "mypackage.tools.helper" in result + + @patch("arcade_core.toolkit.get_tools_from_file") + def test_no_skip_when_spec_has_no_name(self, mock_get_tools, temp_package): + """Handle case where __main__.__spec__ exists but has no name.""" + mock_get_tools.return_value = ["some_tool"] + + # Create mock __main__ module with __spec__ but no name + mock_main = MagicMock() + mock_main.__file__ = "/some/other/path/script.py" + mock_spec = MagicMock() + mock_spec.name = None + mock_main.__spec__ = mock_spec + + with patch.dict("sys.modules", {"__main__": mock_main}): + result = Toolkit.tools_from_directory(temp_package, "mypackage") + + # Both files should be included + assert "mypackage.entrypoint" in result + assert "mypackage.tools.helper" in result diff --git a/pyproject.toml b/pyproject.toml index a89e33dd..af9fc6e9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "arcade-mcp" -version = "1.8.2" +version = "1.8.3" description = "Arcade.dev - Tool Calling platform for Agents" readme = "README.md" license = { file = "LICENSE" } @@ -19,8 +19,8 @@ requires-python = ">=3.10" dependencies = [ # CLI dependencies - "arcade-mcp-server>=1.14.2,<2.0.0", - "arcade-core>=4.2.1,<5.0.0", + "arcade-mcp-server>=1.15.1,<2.0.0", + "arcade-core>=4.2.2,<5.0.0", "typer==0.10.0", "rich>=14.0.0,<15.0.0", "Jinja2==3.1.6",