arcade-mcp/toolkits/clickhouse/arcade_clickhouse/database_engine.py
Eric Gustin c50699d5e6
Migrate OSS toolkits to MCPApp (#782)
<!-- CURSOR_SUMMARY -->
> [!NOTE]
> **Medium Risk**
> Touches multiple toolkits’ runtime entrypoints and context/error/auth
plumbing, so breakage risk is mainly around invocation/packaging and
tool execution wiring rather than business logic.
> 
> **Overview**
> Migrates the BrightData, ClickHouse, LinkedIn, Math, MongoDB,
Postgres, and Zendesk OSS toolkits from `arcade-tdk` to
`arcade-mcp-server` APIs by updating tool decorators, `Context` types,
auth classes, and exception imports.
> 
> Adds per-toolkit `__main__.py` files that construct an `MCPApp`,
register module tools, and run via configurable transport/host/port;
corresponding `pyproject.toml` updates bump versions, drop
`arcade-tdk`/`arcade-serve` deps, and add `project.scripts` console
entrypoints.
> 
> Updates tests and eval suites to use `arcade_mcp_server.Context`
(mocked) and switches eval `ToolCatalog` imports to `arcade_core`.
> 
> <sup>Written by [Cursor
Bugbot](https://cursor.com/dashboard?tab=bugbot) for commit
9b3e31acb4b35e1d72efd47e2d279c5b19e3ecb0. This will update automatically
on new commits. Configure
[here](https://cursor.com/dashboard?tab=bugbot).</sup>
<!-- /CURSOR_SUMMARY -->
2026-02-25 14:29:18 -08:00

209 lines
7.2 KiB
Python

import contextlib
from typing import Any, ClassVar
from urllib.parse import urlparse
import clickhouse_connect
from arcade_mcp_server.exceptions import RetryableToolError
MAX_ROWS_RETURNED = 1000
TEST_QUERY = "SELECT 1"
class DatabaseEngine:
_instance: ClassVar[None] = None
_clients: ClassVar[dict[str, Any]] = {}
@classmethod
async def get_instance(cls, connection_string: str) -> Any:
parsed_url = urlparse(connection_string)
# Extract connection parameters from the URL
host = parsed_url.hostname or "localhost"
port = parsed_url.port
database = parsed_url.path.lstrip("/") or "default"
username = parsed_url.username
password = parsed_url.password
# Handle different ClickHouse protocols
# clickhouse-connect only supports HTTP and HTTPS interfaces
if parsed_url.scheme in ["clickhouse+native"]:
# Convert native protocol to HTTP for clickhouse-connect compatibility
# Convert native port 9000 to HTTP port 8123
port = 8123 if port == 9000 else port or 8123
interface = "http"
elif parsed_url.scheme in ["clickhouse+https"]:
# For HTTPS protocol
port = port or 8443
interface = "https"
else:
# For HTTP or unspecified, use port 8123 by default
port = port or 8123
interface = "http"
key = f"{interface}://{host}:{port}/{database}"
if key not in cls._clients:
try:
# Create ClickHouse client
client_args: dict[str, Any] = {
"host": host,
"port": port,
"database": database,
"interface": interface,
}
if username:
client_args["username"] = username
if password:
client_args["password"] = password
client = clickhouse_connect.get_client(**client_args)
cls._clients[key] = client
# Test the connection
client.command(TEST_QUERY)
except Exception as e:
# Remove failed client from cache
cls._clients.pop(key, None)
raise RetryableToolError(
f"Connection failed: {e}",
developer_message="Connection to ClickHouse failed.",
additional_prompt_content="Check the connection string and try again.",
) from e
return cls._clients[key]
@classmethod
async def get_engine(cls, connection_string: str) -> Any:
client = await cls.get_instance(connection_string)
class ConnectionContextManager:
def __init__(self, client: Any) -> None:
self.client = client
async def __aenter__(self) -> Any:
return self.client
async def __aexit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
# Connection cleanup is handled by clickhouse-connect
pass
return ConnectionContextManager(client)
@classmethod
async def cleanup(cls) -> None:
"""Clean up all cached clients. Call this when shutting down."""
for client in cls._clients.values():
with contextlib.suppress(Exception):
client.close()
cls._clients.clear()
@classmethod
def clear_cache(cls) -> None:
"""Clear the client cache without disposing clients. Use with caution."""
cls._clients.clear()
@classmethod
def sanitize_query( # noqa: C901
cls,
select_clause: str,
from_clause: str,
limit: int,
offset: int,
join_clause: str | None,
where_clause: str | None,
having_clause: str | None,
group_by_clause: str | None,
order_by_clause: str | None,
with_clause: str | None,
) -> tuple[str, dict[str, Any]]:
# Remove the leading keywords from the clauses if they are present
if select_clause.strip().split(" ")[0].upper() == "SELECT":
select_clause = select_clause.strip()[6:]
if from_clause.strip().split(" ")[0].upper() == "FROM":
from_clause = from_clause.strip()[4:]
if join_clause and join_clause.strip().split(" ")[0].upper() == "JOIN":
join_clause = join_clause.strip()[4:]
if where_clause and where_clause.strip().split(" ")[0].upper() == "WHERE":
where_clause = where_clause.strip()[5:]
if group_by_clause and group_by_clause.strip().split(" ")[0].upper() == "GROUP BY":
group_by_clause = group_by_clause.strip()[8:]
if order_by_clause and order_by_clause.strip().split(" ")[0].upper() == "ORDER BY":
order_by_clause = order_by_clause.strip()[8:]
if having_clause and having_clause.strip().split(" ")[0].upper() == "HAVING":
having_clause = having_clause.strip()[6:]
first_select_word = select_clause.strip().split(" ")[0].upper()
if first_select_word in [
"INSERT",
"UPDATE",
"DELETE",
"CREATE",
"ALTER",
"DROP",
"TRUNCATE",
"REINDEX",
"VACUUM",
"ANALYZE",
"COMMENT",
"OPTIMIZE", # ClickHouse-specific
"SYSTEM", # ClickHouse-specific
]:
raise RetryableToolError(
"Only SELECT queries are allowed.",
)
if select_clause.strip() == "*":
raise RetryableToolError(
"Do not use * in the select clause. Use a comma separated list of columns you wish to return.",
)
if limit > MAX_ROWS_RETURNED:
raise RetryableToolError(
f"Limit is too high. Maximum is {MAX_ROWS_RETURNED}.",
)
if offset < 0:
raise RetryableToolError(
"Offset must be greater than or equal to 0.",
developer_message="Offset must be greater than or equal to 0.",
)
if limit <= 0:
raise RetryableToolError(
"Limit must be greater than 0.",
developer_message="Limit must be greater than 0.",
)
# Build query with identifiers directly interpolated, but use parameters for values
parts = []
if with_clause:
parts.append(f"WITH {with_clause}")
parts.append(f"SELECT {select_clause} FROM {from_clause}") # noqa: S608
if join_clause:
parts.append(f"JOIN {join_clause}")
if where_clause:
parts.append(f"WHERE {where_clause}")
if group_by_clause:
parts.append(f"GROUP BY {group_by_clause}")
if having_clause:
parts.append(f"HAVING {having_clause}")
if order_by_clause:
parts.append(f"ORDER BY {order_by_clause}")
parts.append("LIMIT :limit OFFSET :offset")
query = " ".join(parts)
# Only use parameters for values, not identifiers
parameters = {
"limit": limit,
"offset": offset,
}
return query, parameters