Four bug fixes (#754)

1. Resolves
[TOO-363](https://linear.app/arcadedev/issue/TOO-363/arcade-deploy-fails-when-additional-deps-are-added-to-the-server).
2. Resolves
[TOO-364](https://linear.app/arcadedev/issue/TOO-364/arcade-cores-tool-skip-logic-is-missing-case-for-direct-execution).
3. Resolves
[TOO-358](https://linear.app/arcadedev/issue/TOO-358/missing-evals-error-message-shows-wrong-command).
4. Resolves
[TOO-365](https://linear.app/arcadedev/issue/TOO-365/arcade-evals-unit-tests-are-hanging).

<!-- CURSOR_SUMMARY -->
---

> [!NOTE]
> **Medium Risk**
> Medium risk because it changes how `arcade deploy` spawns the server
process and adjusts toolkit discovery skip logic, which can affect
deployments and tool discovery; however, the changes are small and
covered by new unit/integration tests.
> 
> **Overview**
> `arcade deploy` now starts the validation server using the project’s
`.venv` interpreter (via `find_python_interpreter`) instead of the CLI’s
own `sys.executable`, preventing missing dependency failures when the
CLI is installed in an isolated env.
> 
> `arcade-core`’s `Toolkit.tools_from_directory` skip logic is hardened
to also skip the currently executing entrypoint by module name
(`__main__.__spec__.name`) when file paths don’t match (e.g., bundled
execution). CLI error printing now escapes plain messages to avoid rich
markup issues, and `arcade-evals` lock acquisition accepts an optional
timeout default.
> 
> Adds unit tests for the new toolkit skip behavior and an integration
test that boots the MCP server via direct Python invocation to mirror
deployment behavior, and bumps `arcade-core`, `arcade-mcp-server`, and
root dependency versions accordingly.
> 
> <sup>Written by [Cursor
Bugbot](https://cursor.com/dashboard?tab=bugbot) for commit
e7785634c231c059f2e0bd1bc73a56bd7470a494. This will update automatically
on new commits. Configure
[here](https://cursor.com/dashboard?tab=bugbot).</sup>
<!-- /CURSOR_SUMMARY -->
This commit is contained in:
Eric Gustin 2026-01-29 15:12:06 -08:00 committed by GitHub
parent ab47565540
commit a4160dd9fe
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
10 changed files with 313 additions and 27 deletions

View file

@ -146,7 +146,16 @@ def get_tool_secrets() -> dict:
def find_python_interpreter() -> Path:
# Find the Python interpreter in the virtual environment
"""
Find the Python interpreter in the virtual environment.
NOTE: This function assumes it is called from the project root directory (where .venv lives).
Currently, callers like `arcade deploy` enforce this by requiring pyproject.toml in cwd.
If this requirement is relaxed in the future, this function should be updated to:
1. Accept a project_root parameter, OR
2. Honor VIRTUAL_ENV / UV_PROJECT_ENVIRONMENT env vars, OR
3. Search upward from cwd to find pyproject.toml and resolve .venv relative to that
"""
venv_python = None
# Check for .venv first (uv default)
if (Path.cwd() / ".venv").exists():

View file

@ -4,7 +4,6 @@ import io
import os
import random
import subprocess
import sys
import tarfile
import time
from collections import deque
@ -22,6 +21,7 @@ from rich.spinner import Spinner
from rich.text import Text
from typing_extensions import Literal
from arcade_cli.configure import find_python_interpreter
from arcade_cli.secret import load_env_file
from arcade_cli.utils import (
compute_base_url,
@ -388,7 +388,13 @@ def start_server_process(entrypoint: str, debug: bool = False) -> tuple[subproce
"ARCADE_WORKER_SECRET": "temp-validation-secret",
}
cmd = [sys.executable, entrypoint]
# Use the project's Python environment, not the CLI's isolated environment.
# find_python_interpreter() looks for .venv/bin/python in cwd, falling back to sys.executable.
# This ensures the server runs in the project's environment even when the CLI is installed
# in an isolated environment (e.g., via 'uv tool install arcade-mcp').
project_python = find_python_interpreter()
cmd = [str(project_python), entrypoint]
process = subprocess.Popen(
cmd,
stdout=subprocess.PIPE,

View file

@ -394,7 +394,7 @@ def handle_cli_error(
elif error:
console.print(f"{message}: {escape(str(error))}", style="bold red")
else:
console.print(f"{message}", style="bold red")
console.print(f"{escape(message)}", style="bold red")
if should_exit:
raise CLIError(message, error)

View file

@ -299,32 +299,50 @@ class Toolkit(BaseModel):
# Skipping this file is necessary because tools are discovered via AST parsing, but those tools
# aren't in the module's namespace yet since the file is still executing.
current_file = None
current_module_name = None
main_module = sys.modules.get("__main__")
if main_module and hasattr(main_module, "__file__") and main_module.__file__:
with contextlib.suppress(Exception):
current_file = Path(main_module.__file__).resolve()
if main_module:
if hasattr(main_module, "__file__") and main_module.__file__:
with contextlib.suppress(Exception):
current_file = Path(main_module.__file__).resolve()
# Get module name from __spec__ if available (used when paths don't match,
# e.g., script runs from bundle but package is in site-packages)
main_spec = getattr(main_module, "__spec__", None)
if main_spec and main_spec.name:
current_module_name = main_spec.name
tools: dict[str, list[str]] = {}
for module_path in modules:
# Skip adding tools from the currently executing file
if current_file:
try:
module_path_resolved = module_path.resolve()
if module_path_resolved == current_file:
continue
except Exception: # noqa: S110
pass
# Build import path first (needed for module name comparison in skip logic)
relative_path = module_path.relative_to(package_dir)
cls.validate_file(module_path)
# Build import path and avoid duplicating the package prefix if it already exists
relative_parts = relative_path.with_suffix("").parts
import_path = ".".join(relative_parts)
if relative_parts and relative_parts[0] == package_name:
full_import_path = import_path
else:
full_import_path = f"{package_name}.{import_path}" if import_path else package_name
# Skip logic: check by file path OR by module name
# This handles cases where the script is run from a different location than
# where the package is installed (e.g., deployment scenarios)
should_skip = False
if current_file:
try:
module_path_resolved = module_path.resolve()
if module_path_resolved == current_file:
should_skip = True
except Exception: # noqa: S110
pass
# Secondary check: compare module names when paths don't match
if not should_skip and current_module_name and full_import_path == current_module_name:
should_skip = True
if should_skip:
continue
cls.validate_file(module_path)
tools[full_import_path] = get_tools_from_file(str(module_path))
if not tools:

View file

@ -1,6 +1,6 @@
[project]
name = "arcade-core"
version = "4.2.1"
version = "4.2.2"
description = "Arcade Core - Core library for Arcade platform"
readme = "README.md"
license = { text = "MIT" }

View file

@ -98,10 +98,10 @@ async def _get_cache_lock(cache_key: str) -> asyncio.Lock:
return _cache_locks[cache_key]
async def _acquire_lock_with_timeout(
lock: asyncio.Lock, timeout: float = LOCK_TIMEOUT_SECONDS
) -> bool:
async def _acquire_lock_with_timeout(lock: asyncio.Lock, timeout: float | None = None) -> bool:
"""Acquire a lock with timeout. Returns True if acquired, False on timeout."""
if timeout is None:
timeout = LOCK_TIMEOUT_SECONDS
try:
await asyncio.wait_for(lock.acquire(), timeout=timeout)
except asyncio.TimeoutError:

View file

@ -4,7 +4,7 @@ build-backend = "hatchling.build"
[project]
name = "arcade-mcp-server"
version = "1.15.0"
version = "1.15.1"
description = "Model Context Protocol (MCP) server framework for Arcade.dev"
readme = "README.md"
authors = [{ name = "Arcade.dev" }]
@ -21,7 +21,7 @@ classifiers = [
]
requires-python = ">=3.10"
dependencies = [
"arcade-core>=4.2.1,<5.0.0",
"arcade-core>=4.2.2,<5.0.0",
"arcade-serve>=3.2.0,<4.0.0",
"arcade-tdk>=3.4.0,<4.0.0",
"arcadepy>=1.5.0",

View file

@ -82,6 +82,51 @@ def start_mcp_server(
raise ValueError(f"Invalid transport: {transport}")
def start_mcp_server_direct_python(
transport: str, port: int | None = None
) -> tuple[subprocess.Popen, int | None]:
"""
Start MCP server with direct Python invocation (simulates what happens in the Engine during deployment).
Args:
transport: Transport type ("stdio" or "http")
port: Port for HTTP transport (optional, will be random if not provided)
Returns:
Tuple of (process, port). Port is None for stdio transport.
"""
entrypoint_path = get_entrypoint_path()
package_path = Path(__file__).parent / "server"
# Find Python in the server's venv
venv_python = package_path / ".venv" / "bin" / "python"
if not venv_python.exists():
pytest.skip("Server venv not found - run 'uv sync' in integration/server first")
if port is None:
port = random.randint(8000, 9000) # noqa: S311
env = {
**os.environ,
"ARCADE_SERVER_HOST": "127.0.0.1",
"ARCADE_SERVER_PORT": str(port),
"ARCADE_SERVER_TRANSPORT": transport,
"ARCADE_AUTH_DISABLED": "true",
"ARCADE_WORKER_SECRET": "test-secret-direct",
}
cmd = [str(venv_python), entrypoint_path, transport]
process = subprocess.Popen(
cmd,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True,
env=env,
cwd=str(package_path),
)
return process, port if transport == "http" else None
def wait_for_http_server_ready(port: int, timeout: int = 30) -> None:
"""
Wait for HTTP server to become healthy.
@ -913,3 +958,60 @@ async def test_http_mixed_route_concurrent_execution():
except subprocess.TimeoutExpired:
process.kill()
process.wait()
@pytest.mark.asyncio
async def test_http_direct_python_invocation():
"""Test server starts correctly with direct Python (simulates what happens in the Engine during deployment)"""
process, port = start_mcp_server_direct_python("http")
assert port is not None
try:
wait_for_http_server_ready(port, timeout=10)
# Verify server is healthy and tools are discoverable
headers = {
"Content-Type": "application/json",
"Accept": "application/json",
}
client = httpx.Client(base_url=f"http://127.0.0.1:{port}", timeout=10.0, headers=headers)
# Initialize
init_response = client.post(
"/mcp",
json=build_jsonrpc_request(
"initialize",
{
"protocolVersion": "2025-06-18",
"capabilities": {},
"clientInfo": {"name": "test", "version": "1.0"},
},
request_id=1,
),
)
assert init_response.status_code == 200
session_id = init_response.headers.get("mcp-session-id")
client.headers.update({"Mcp-Session-Id": session_id})
# Send initialized notification
init_notif = build_jsonrpc_request(
"notifications/initialized", params=None, request_id=None
)
client.post("/mcp", json=init_notif)
# List tools - should have 9 tools (including hello_world from entrypoint.py)
list_response = client.post("/mcp", json=build_jsonrpc_request("tools/list", request_id=2))
assert list_response.status_code == 200
tools = list_response.json()["result"]["tools"]
assert len(tools) == 9
client.close()
finally:
process.terminate()
try:
process.wait(timeout=5)
except subprocess.TimeoutExpired:
process.kill()
process.wait()

View file

@ -500,3 +500,154 @@ class TestValidPath:
def test_invalid_paths(self, path_input):
"""Test that invalid paths are rejected."""
assert Validate.path(path_input) is False
class TestToolsFromDirectory:
"""Test the tools_from_directory skip logic for entrypoint files.
It has two conditions:
1. File path match: module_path.resolve() == current_file
2. Module name match: full_import_path == current_module_name
"""
@pytest.fixture
def temp_package(self, tmp_path):
"""Create a temporary package with Python files containing tools."""
package_dir = tmp_path / "mypackage"
package_dir.mkdir()
(package_dir / "__init__.py").write_text("")
(package_dir / "entrypoint.py").write_text(
'''
from arcade_mcp_server import tool
@tool
def my_tool():
"""A tool."""
pass
'''
)
tools_dir = package_dir / "tools"
tools_dir.mkdir()
(tools_dir / "__init__.py").write_text("")
(tools_dir / "helper.py").write_text(
'''
from arcade_mcp_server import tool
@tool
def helper_tool():
"""A helper tool."""
pass
'''
)
return package_dir
@patch("arcade_core.toolkit.get_tools_from_file")
def test_skips_file_when_path_matches(self, mock_get_tools, temp_package):
"""Verify skip when __main__.__file__ matches module path (original behavior)."""
mock_get_tools.return_value = ["some_tool"]
entrypoint_path = temp_package / "entrypoint.py"
# Create mock __main__ module with matching file path
mock_main = MagicMock()
mock_main.__file__ = str(entrypoint_path)
mock_main.__spec__ = None
with patch.dict("sys.modules", {"__main__": mock_main}):
result = Toolkit.tools_from_directory(temp_package, "mypackage")
# entrypoint.py should be skipped, only tools/helper.py should be included
assert "mypackage.entrypoint" not in result
assert "mypackage.tools.helper" in result
@patch("arcade_core.toolkit.get_tools_from_file")
def test_skips_file_when_module_name_matches(self, mock_get_tools, temp_package):
"""Verify skip when __main__.__spec__.name matches even if paths differ.
This simulates deployment scenarios where the script runs from a bundle
but the package is installed in site-packages (different paths).
"""
mock_get_tools.return_value = ["some_tool"]
# Create mock __main__ module with different file path but matching module name
mock_main = MagicMock()
mock_main.__file__ = "/some/other/path/entrypoint.py" # Different path
mock_spec = MagicMock()
mock_spec.name = "mypackage.entrypoint" # but, matches the module being scanned
mock_main.__spec__ = mock_spec
with patch.dict("sys.modules", {"__main__": mock_main}):
result = Toolkit.tools_from_directory(temp_package, "mypackage")
# entrypoint.py should be skipped due to module name match
assert "mypackage.entrypoint" not in result
assert "mypackage.tools.helper" in result
@patch("arcade_core.toolkit.get_tools_from_file")
def test_no_skip_when_different_module(self, mock_get_tools, temp_package):
"""Verify unrelated modules are not skipped."""
mock_get_tools.return_value = ["some_tool"]
# Create mock __main__ module with completely different identity
mock_main = MagicMock()
mock_main.__file__ = "/some/other/path/other_script.py"
mock_spec = MagicMock()
mock_spec.name = "some_other_package.script"
mock_main.__spec__ = mock_spec
with patch.dict("sys.modules", {"__main__": mock_main}):
result = Toolkit.tools_from_directory(temp_package, "mypackage")
# Both files should be included since neither matches __main__
assert "mypackage.entrypoint" in result
assert "mypackage.tools.helper" in result
@patch("arcade_core.toolkit.get_tools_from_file")
def test_no_skip_when_no_main_module(self, mock_get_tools, temp_package):
"""Handle case where __main__ is not in sys.modules."""
mock_get_tools.return_value = ["some_tool"]
# Remove __main__ from sys.modules
with patch.dict("sys.modules", {"__main__": None}):
result = Toolkit.tools_from_directory(temp_package, "mypackage")
# Both files should be included
assert "mypackage.entrypoint" in result
assert "mypackage.tools.helper" in result
@patch("arcade_core.toolkit.get_tools_from_file")
def test_no_skip_when_no_spec(self, mock_get_tools, temp_package):
"""Handle case where __main__ has no __spec__ attribute."""
mock_get_tools.return_value = ["some_tool"]
# Create mock __main__ module with different file path and no __spec__
mock_main = MagicMock()
mock_main.__file__ = "/some/other/path/script.py"
mock_main.__spec__ = None
with patch.dict("sys.modules", {"__main__": mock_main}):
result = Toolkit.tools_from_directory(temp_package, "mypackage")
# Both files should be included since path doesn't match and no spec
assert "mypackage.entrypoint" in result
assert "mypackage.tools.helper" in result
@patch("arcade_core.toolkit.get_tools_from_file")
def test_no_skip_when_spec_has_no_name(self, mock_get_tools, temp_package):
"""Handle case where __main__.__spec__ exists but has no name."""
mock_get_tools.return_value = ["some_tool"]
# Create mock __main__ module with __spec__ but no name
mock_main = MagicMock()
mock_main.__file__ = "/some/other/path/script.py"
mock_spec = MagicMock()
mock_spec.name = None
mock_main.__spec__ = mock_spec
with patch.dict("sys.modules", {"__main__": mock_main}):
result = Toolkit.tools_from_directory(temp_package, "mypackage")
# Both files should be included
assert "mypackage.entrypoint" in result
assert "mypackage.tools.helper" in result

View file

@ -1,6 +1,6 @@
[project]
name = "arcade-mcp"
version = "1.8.2"
version = "1.8.3"
description = "Arcade.dev - Tool Calling platform for Agents"
readme = "README.md"
license = { file = "LICENSE" }
@ -19,8 +19,8 @@ requires-python = ">=3.10"
dependencies = [
# CLI dependencies
"arcade-mcp-server>=1.14.2,<2.0.0",
"arcade-core>=4.2.1,<5.0.0",
"arcade-mcp-server>=1.15.1,<2.0.0",
"arcade-core>=4.2.2,<5.0.0",
"typer==0.10.0",
"rich>=14.0.0,<15.0.0",
"Jinja2==3.1.6",