arcade-mcp/toolkits/mongodb/arcade_mongodb/tools/utils.py
Eric Gustin c50699d5e6
Migrate OSS toolkits to MCPApp (#782)
<!-- CURSOR_SUMMARY -->
> [!NOTE]
> **Medium Risk**
> Touches multiple toolkits’ runtime entrypoints and context/error/auth
plumbing, so breakage risk is mainly around invocation/packaging and
tool execution wiring rather than business logic.
> 
> **Overview**
> Migrates the BrightData, ClickHouse, LinkedIn, Math, MongoDB,
Postgres, and Zendesk OSS toolkits from `arcade-tdk` to
`arcade-mcp-server` APIs by updating tool decorators, `Context` types,
auth classes, and exception imports.
> 
> Adds per-toolkit `__main__.py` files that construct an `MCPApp`,
register module tools, and run via configurable transport/host/port;
corresponding `pyproject.toml` updates bump versions, drop
`arcade-tdk`/`arcade-serve` deps, and add `project.scripts` console
entrypoints.
> 
> Updates tests and eval suites to use `arcade_mcp_server.Context`
(mocked) and switches eval `ToolCatalog` imports to `arcade_core`.
> 
> <sup>Written by [Cursor
Bugbot](https://cursor.com/dashboard?tab=bugbot) for commit
9b3e31acb4b35e1d72efd47e2d279c5b19e3ecb0. This will update automatically
on new commits. Configure
[here](https://cursor.com/dashboard?tab=bugbot).</sup>
<!-- /CURSOR_SUMMARY -->
2026-02-25 14:29:18 -08:00

281 lines
11 KiB
Python

import json
from datetime import datetime
from typing import Any
from arcade_mcp_server.exceptions import RetryableToolError
from bson import ObjectId
def _validate_no_write_operations(obj: Any, parameter_name: str, path: str = "") -> None:
"""
Recursively validate that an object doesn't contain MongoDB write operations.
Args:
obj: The object to validate
parameter_name: Name of the parameter for error messages
path: Current path in the object (for nested validation)
Raises:
RetryableToolError: If write operations are detected
"""
# MongoDB write/update operators that should be blocked
WRITE_OPERATORS = {
# Update operators
"$set",
"$unset",
"$inc",
"$mul",
"$rename",
"$min",
"$max",
"$currentDate",
"$addToSet",
"$pop",
"$pull",
"$push",
"$pullAll",
"$each",
"$slice",
"$sort",
"$position",
"$bit",
"$isolated",
# Array update operators
"$",
"$[]",
"$[<identifier>]",
# Pipeline update operators
"$addFields",
"$replaceRoot",
"$replaceWith",
# Aggregation stages that can modify (in case they're misused)
"$out",
"$merge",
# Other potentially dangerous operators
"$where", # Can execute JavaScript
}
if isinstance(obj, dict):
for key, value in obj.items():
current_path = f"{path}.{key}" if path else key
# Special check for $where operator which can execute JavaScript (check this first)
if key == "$where":
raise RetryableToolError(
f"JavaScript execution operator '$where' not allowed in {parameter_name}",
developer_message=f"Found '$where' operator at path '{current_path}' in parameter '{parameter_name}'. JavaScript execution is not allowed for security reasons.",
additional_prompt_content=f"The {parameter_name} parameter cannot use the $where operator. Use other query operators instead.",
)
# Check if this key is a write operator
if key in WRITE_OPERATORS:
raise RetryableToolError(
f"Write operation '{key}' not allowed in {parameter_name}",
developer_message=f"Found write operation '{key}' at path '{current_path}' in parameter '{parameter_name}'. Only read operations are allowed.",
additional_prompt_content=f"The {parameter_name} parameter cannot contain write operations like '{key}'. Use only query/read operations such as $match, $gte, $lte, $in, $regex, etc.",
)
# Recursively validate nested objects
_validate_no_write_operations(value, parameter_name, current_path)
elif isinstance(obj, list):
for i, item in enumerate(obj):
current_path = f"{path}[{i}]" if path else f"[{i}]"
_validate_no_write_operations(item, parameter_name, current_path)
def _parse_json_parameter(
json_string: str | None, parameter_name: str, validate_read_only: bool = True
) -> Any | None:
"""
Parse a JSON string parameter with proper error handling and optional write operation validation.
Args:
json_string: The JSON string to parse (can be None)
parameter_name: Name of the parameter for error messages
validate_read_only: Whether to validate that no write operations are present
Returns:
Parsed JSON object or None if json_string is None
Raises:
RetryableToolError: If JSON parsing fails or write operations are detected
"""
if json_string is None:
return None
try:
parsed_obj = json.loads(json_string)
# Validate that no write operations are present
if validate_read_only and parsed_obj is not None:
_validate_no_write_operations(parsed_obj, parameter_name)
except json.JSONDecodeError as e:
raise RetryableToolError(
f"Invalid JSON in {parameter_name}: {e}",
developer_message=f"Failed to parse JSON string for parameter '{parameter_name}': {json_string}. Error: {e}",
additional_prompt_content=f"Please provide valid JSON for the {parameter_name} parameter. Check for proper escaping of quotes and valid JSON syntax.",
) from e
else:
return parsed_obj
def _validate_aggregation_pipeline(pipeline: list[Any], parameter_name: str) -> None:
"""
Validate that an aggregation pipeline only contains read operations.
Args:
pipeline: The aggregation pipeline to validate
parameter_name: Name of the parameter for error messages
Raises:
RetryableToolError: If write operations are detected in the pipeline
"""
# MongoDB aggregation stages that can modify data
WRITE_STAGES = {
"$out",
"$merge", # These stages write to collections
}
# Aggregation stages that are potentially dangerous
DANGEROUS_STAGES = {
"$where", # Can execute JavaScript
}
for i, stage in enumerate(pipeline):
if isinstance(stage, dict):
for stage_name in stage:
if stage_name in WRITE_STAGES:
raise RetryableToolError(
f"Write stage '{stage_name}' not allowed in {parameter_name}",
developer_message=f"Found write stage '{stage_name}' at pipeline index {i} in parameter '{parameter_name}'. Only read operations are allowed.",
additional_prompt_content=f"The {parameter_name} parameter cannot contain write stages like '{stage_name}'. Use only read stages such as $match, $group, $project, $sort, $limit, etc.",
)
if stage_name in DANGEROUS_STAGES:
raise RetryableToolError(
f"Dangerous stage '{stage_name}' not allowed in {parameter_name}",
developer_message=f"Found dangerous stage '{stage_name}' at pipeline index {i} in parameter '{parameter_name}'. JavaScript execution is not allowed for security reasons.",
additional_prompt_content=f"The {parameter_name} parameter cannot use the {stage_name} stage. Use other aggregation stages instead.",
)
# Also validate the stage content for write operations
_validate_no_write_operations(
stage[stage_name], f"{parameter_name}[{i}].{stage_name}"
)
def _parse_json_list_parameter(
json_strings: list[str] | None, parameter_name: str, validate_read_only: bool = True
) -> list[Any] | None:
"""
Parse a list of JSON strings with proper error handling and optional write operation validation.
Args:
json_strings: List of JSON strings to parse (can be None)
parameter_name: Name of the parameter for error messages
validate_read_only: Whether to validate that no write operations are present
Returns:
List of parsed JSON objects or None if json_strings is None
Raises:
RetryableToolError: If JSON parsing fails for any string or write operations are detected
"""
if json_strings is None:
return None
try:
parsed_list = [json.loads(json_str) for json_str in json_strings]
# Validate that no write operations are present
if validate_read_only and parsed_list is not None:
# Special handling for pipeline parameters
if parameter_name == "pipeline":
_validate_aggregation_pipeline(parsed_list, parameter_name)
else:
# For non-pipeline lists, validate each item
for i, item in enumerate(parsed_list):
_validate_no_write_operations(item, f"{parameter_name}[{i}]")
except json.JSONDecodeError as e:
raise RetryableToolError(
f"Invalid JSON in {parameter_name}: {e}",
developer_message=f"Failed to parse JSON string list for parameter '{parameter_name}': {json_strings}. Error: {e}",
additional_prompt_content=f"Please provide valid JSON strings for the {parameter_name} parameter. Each string must be valid JSON with proper escaping of quotes.",
) from e
else:
return parsed_list
def _infer_schema_from_docs(docs: list[dict[str, Any]]) -> dict[str, Any]:
"""Infer schema structure from a list of documents."""
schema: dict[str, Any] = {}
for doc in docs:
_update_schema_with_doc(schema, doc)
# Convert sets to lists for serialization
for key in schema:
if isinstance(schema[key]["types"], set):
schema[key]["types"] = list(schema[key]["types"])
return schema
def _update_schema_with_doc(schema: dict[str, Any], doc: dict[str, Any], prefix: str = "") -> None:
"""Recursively update schema with document structure."""
for key, value in doc.items():
full_key = f"{prefix}.{key}" if prefix else key
if full_key not in schema:
schema[full_key] = {
"types": set(),
"sample_values": [],
"null_count": 0,
"total_count": 0,
}
schema[full_key]["total_count"] += 1
if value is None:
schema[full_key]["null_count"] += 1
schema[full_key]["types"].add("null")
else:
value_type = type(value).__name__
schema[full_key]["types"].add(value_type)
# Store sample values (limit to 3 unique samples)
if (
len(schema[full_key]["sample_values"]) < 3
and value not in schema[full_key]["sample_values"]
):
schema[full_key]["sample_values"].append(value)
# Handle nested objects
if isinstance(value, dict):
_update_schema_with_doc(schema, value, full_key)
elif isinstance(value, list) and value and isinstance(value[0], dict):
# Handle arrays of objects by sampling the first few
for i, item in enumerate(value[:3]): # Sample first 3 array items
if isinstance(item, dict):
_update_schema_with_doc(schema, item, f"{full_key}[{i}]")
def _serialize_document(doc: dict[str, Any]) -> dict[str, Any]:
"""Convert MongoDB document to JSON-serializable format."""
if isinstance(doc, dict):
result = {}
for key, value in doc.items():
result[key] = _serialize_document(value)
return result
elif isinstance(doc, list):
return [_serialize_document(item) for item in doc]
elif isinstance(doc, ObjectId):
return str(doc)
elif isinstance(doc, datetime):
return doc.isoformat()
else:
return doc