arcade-mcp/libs/tests/tool/test_tool_metadata.py
Eric Gustin 25267ab6ee
JSON-safety validation for ToolMetadata.extras (#773)
<!-- CURSOR_SUMMARY -->
> [!NOTE]
> **Medium Risk**
> Tightens validation on tool metadata, which may break existing tools
that relied on non-JSON-serializable `extras` values or keys; changes
are localized and well-covered by tests.
> 
> **Overview**
> Adds **JSON-safety enforcement** for `ToolMetadata.extras`: top-level
keys must be strings at model construction, and `validate_for_tool()`
now recursively rejects non-JSON-native values (including non-finite
floats) with path-rich `ToolDefinitionError` messages.
> 
> Expands tests to cover valid/invalid nested `extras` cases and
error-message quality, and bumps `arcade-core` version to `4.5.0`.
> 
> <sup>Written by [Cursor
Bugbot](https://cursor.com/dashboard?tab=bugbot) for commit
2bab0db3c17f0ddb97868764d10494da543b39e5. This will update automatically
on new commits. Configure
[here](https://cursor.com/dashboard?tab=bugbot).</sup>
<!-- /CURSOR_SUMMARY -->
2026-02-25 09:48:04 -08:00

369 lines
15 KiB
Python

import datetime
import pytest
from arcade_core.catalog import ToolCatalog
from arcade_core.errors import ToolDefinitionError
from arcade_core.metadata import (
_INDETERMINATE_OPERATIONS,
_MUTATING_OPERATIONS,
_READ_ONLY_OPERATIONS,
Behavior,
Classification,
Operation,
ServiceDomain,
ToolMetadata,
)
from arcade_tdk import tool
class TestEnumCoverage:
"""
Tests to ensure all enum values are accounted for in validation helper sets.
These tests will fail if new enum values are added without updating the
corresponding helper sets, ensuring future maintainers don't forget to
categorize new values.
"""
def test_all_operations_are_categorized(self):
"""Every Operation must be in _READ_ONLY_OPERATIONS, _MUTATING_OPERATIONS, or _INDETERMINATE_OPERATIONS."""
all_operations = set(Operation)
categorized_operations = (
_READ_ONLY_OPERATIONS | _MUTATING_OPERATIONS | _INDETERMINATE_OPERATIONS
)
# Check that every operation is categorized
uncategorized = all_operations - categorized_operations
assert not uncategorized, (
f"The following Operation values are not categorized in _READ_ONLY_OPERATIONS, "
f"_MUTATING_OPERATIONS, or _INDETERMINATE_OPERATIONS: {uncategorized}. "
f"Please add them to the appropriate set in arcade_core/metadata.py"
)
# Check that there are no extra operations in the sets that don't exist in the enum
extra = categorized_operations - all_operations
assert not extra, (
f"The following values are in _READ_ONLY_OPERATIONS, _MUTATING_OPERATIONS, or "
f"_INDETERMINATE_OPERATIONS but don't exist in the Operation enum: {extra}"
)
def test_operation_categories_are_disjoint(self):
"""_READ_ONLY_OPERATIONS, _MUTATING_OPERATIONS, and _INDETERMINATE_OPERATIONS should not overlap."""
ro_mut = _READ_ONLY_OPERATIONS & _MUTATING_OPERATIONS
assert not ro_mut, (
f"The following Operation values appear in both _READ_ONLY_OPERATIONS and "
f"_MUTATING_OPERATIONS: {ro_mut}. An operation should be in exactly one category."
)
ro_ind = _READ_ONLY_OPERATIONS & _INDETERMINATE_OPERATIONS
assert not ro_ind, (
f"The following Operation values appear in both _READ_ONLY_OPERATIONS and "
f"_INDETERMINATE_OPERATIONS: {ro_ind}. An operation should be in exactly one category."
)
mut_ind = _MUTATING_OPERATIONS & _INDETERMINATE_OPERATIONS
assert not mut_ind, (
f"The following Operation values appear in both _MUTATING_OPERATIONS and "
f"_INDETERMINATE_OPERATIONS: {mut_ind}. An operation should be in exactly one category."
)
class TestToolMetadataValidation:
"""Test strict mode validation rules for ToolMetadata."""
def test_valid_metadata_passes(self):
"""Valid metadata with consistent values should not raise."""
metadata = ToolMetadata(
classification=Classification(
service_domains=[ServiceDomain.EMAIL],
),
behavior=Behavior(
operations=[Operation.CREATE],
read_only=False,
destructive=False,
open_world=True,
),
)
assert metadata is not None
def test_mutating_operation_with_read_only_raises(self):
"""Mutating operations with read_only=True should raise when validated."""
metadata = ToolMetadata(
behavior=Behavior(operations=[Operation.CREATE], read_only=True),
)
with pytest.raises(
ToolDefinitionError, match="mutating operation.*but is marked read_only=True"
):
metadata.validate_for_tool()
def test_opaque_with_read_only_raises(self):
"""OPAQUE operation with read_only=True should raise when validated."""
metadata = ToolMetadata(
behavior=Behavior(operations=[Operation.OPAQUE], read_only=True),
)
with pytest.raises(
ToolDefinitionError, match="OPAQUE operation but is marked read_only=True"
):
metadata.validate_for_tool()
def test_delete_without_destructive_raises(self):
"""DELETE operation without destructive=True should raise when validated."""
metadata = ToolMetadata(
behavior=Behavior(operations=[Operation.DELETE], destructive=False),
)
with pytest.raises(
ToolDefinitionError, match="'DELETE' operation.*but is not marked destructive=True"
):
metadata.validate_for_tool()
def test_service_domain_with_open_world_false_raises(self):
"""ServiceDomain present with open_world=False should raise when validated."""
metadata = ToolMetadata(
classification=Classification(service_domains=[ServiceDomain.EMAIL]),
behavior=Behavior(open_world=False),
)
with pytest.raises(
ToolDefinitionError, match="ServiceDomain.*but is marked open_world=False"
):
metadata.validate_for_tool()
def test_strict_false_bypasses_validation(self):
"""Setting strict=False should bypass all validation rules."""
# This would normally raise due to contradiction
metadata = ToolMetadata(
behavior=Behavior(operations=[Operation.CREATE], read_only=True),
strict=False,
)
# No error should be raised when validate_for_tool is called
metadata.validate_for_tool() # Should not raise
assert metadata is not None
def test_error_message_includes_operation_name(self):
"""Error messages should include the operation name for debugging."""
metadata = ToolMetadata(
behavior=Behavior(operations=[Operation.CREATE], read_only=True),
)
with pytest.raises(ToolDefinitionError, match="Tool has the mutating operation"):
metadata.validate_for_tool()
def test_read_only_operation_with_read_only_true_passes(self):
"""READ operation with read_only=True should pass validation."""
metadata = ToolMetadata(
behavior=Behavior(operations=[Operation.READ], read_only=True),
)
assert metadata is not None
assert metadata.behavior.read_only is True
def test_multiple_service_domains_allowed(self):
"""Tools can have multiple service domains."""
metadata = ToolMetadata(
classification=Classification(
service_domains=[ServiceDomain.CLOUD_STORAGE, ServiceDomain.DOCUMENTS],
),
behavior=Behavior(operations=[Operation.READ], read_only=True, open_world=True),
)
assert len(metadata.classification.service_domains) == 2
def test_extras_accepts_json_native_values(self):
"""Extras field accepts JSON-native key/value pairs."""
metadata = ToolMetadata(
extras={"idp": "entraID", "requires_mfa": True, "max_requests": 100},
)
assert metadata.extras["idp"] == "entraID"
assert metadata.extras["requires_mfa"] is True
assert metadata.extras["max_requests"] == 100
class TestExtrasJsonSafety:
"""Test that ToolMetadata.extras enforces JSON-native types at all depths.
JSON-native types: str, int, float, bool, None, dict (str keys), list.
Top-level non-string keys are caught at construction time (field_validator).
Nested keys and non-JSON-native values are caught at registration time
(validate_for_tool) where the tool name is available for error context.
"""
@pytest.mark.parametrize(
"extras",
[
pytest.param(None, id="none"),
pytest.param({}, id="empty_dict"),
pytest.param(
{"string": "hello", "int": 42, "float": 3.14, "bool": True, "null": None},
id="flat_json_native_values",
),
pytest.param({"config": {"api_key": "abc", "retries": 3}}, id="nested_dict"),
pytest.param({"tags": ["a", "b"], "counts": [1, 2, 3]}, id="lists"),
pytest.param(
{"l1": {"l2": [{"l3": [1, "two", None, True, 3.0]}]}},
id="deeply_nested",
),
pytest.param(
{"empty_dict": {}, "empty_list": [], "nested": {"also_empty": []}},
id="empty_nested_structures",
),
],
)
def test_valid_json_safe_extras(self, extras: dict | None):
metadata = ToolMetadata(extras=extras)
assert metadata.extras == extras
metadata.validate_for_tool()
# --- Top-level non-string keys: caught at construction time ---
@pytest.mark.parametrize(
"extras",
[
pytest.param({3: "three"}, id="int_key"),
pytest.param({True: "yes"}, id="bool_key"),
pytest.param({None: "null key"}, id="none_key"),
],
)
def test_non_string_top_level_key_rejected_at_construction(self, extras: dict):
with pytest.raises(ToolDefinitionError, match="must be strings"):
ToolMetadata(extras=extras)
# --- Nested non-string keys + non-JSON values: caught by validate_for_tool ---
@pytest.mark.parametrize(
"extras, match",
[
# Non-string keys nested in dicts/lists
pytest.param({"o": {42: "v"}}, "must be strings", id="int_key_nested"),
pytest.param({"o": {True: "v"}}, "must be strings", id="bool_key_nested"),
pytest.param({"o": {(1, 2): "v"}}, "must be strings", id="tuple_key_nested"),
pytest.param({"a": {"b": {42: "v"}}}, "must be strings", id="int_key_deep"),
pytest.param({"items": [{True: "v"}]}, "must be strings", id="bool_key_in_list"),
# Non-JSON-native values at top level
pytest.param({"v": datetime.datetime(2023, 1, 1)}, "JSON-safe", id="datetime_value"),
pytest.param({"v": datetime.date(2023, 1, 1)}, "JSON-safe", id="date_value"),
pytest.param({"v": datetime.timedelta(seconds=60)}, "JSON-safe", id="timedelta_value"),
pytest.param({"v": {1, 2, 3}}, "JSON-safe", id="set_value"),
pytest.param({"v": frozenset([1, 2])}, "JSON-safe", id="frozenset_value"),
pytest.param({"v": (1, 2)}, "JSON-safe", id="tuple_value"),
pytest.param({"v": b"hello"}, "JSON-safe", id="bytes_value"),
# Non-finite floats (not valid JSON per RFC 8259)
pytest.param({"v": float("nan")}, "JSON-safe", id="float_nan"),
pytest.param({"v": float("inf")}, "JSON-safe", id="float_inf"),
pytest.param({"v": float("-inf")}, "JSON-safe", id="float_neg_inf"),
# Non-JSON-native values nested
pytest.param(
{"o": {"i": datetime.datetime(2023, 1, 1)}},
"JSON-safe",
id="datetime_nested_in_dict",
),
pytest.param({"items": [1, "ok", {3, 4}]}, "JSON-safe", id="set_nested_in_list"),
pytest.param(
{"a": [{"b": [datetime.date(2023, 1, 1)]}]},
"JSON-safe",
id="date_deeply_nested",
),
],
)
def test_rejects_non_json_safe_extras_at_validation(self, extras: dict, match: str):
metadata = ToolMetadata(extras=extras)
with pytest.raises(ToolDefinitionError, match=match):
metadata.validate_for_tool()
# --- Error message quality ---
def test_error_includes_path_for_nested_violations(self):
metadata = ToolMetadata(extras={"outer": {42: "bad"}})
with pytest.raises(ToolDefinitionError, match=r"extras\['outer'\]"):
metadata.validate_for_tool()
metadata = ToolMetadata(extras={"outer": datetime.datetime(2023, 1, 1)})
with pytest.raises(ToolDefinitionError, match=r"extras\['outer'\]"):
metadata.validate_for_tool()
def test_error_includes_type_name(self):
metadata = ToolMetadata(extras={"ts": datetime.datetime(2023, 1, 1)})
with pytest.raises(ToolDefinitionError, match="datetime"):
metadata.validate_for_tool()
def test_error_reports_all_violations(self):
metadata = ToolMetadata(extras={"ok_key": {True: "bool key"}, "bad": (1, 2)})
with pytest.raises(ToolDefinitionError) as exc_info:
metadata.validate_for_tool()
msg = str(exc_info.value)
assert "True" in msg
assert "tuple" in msg
class TestToolDecoratorWithMetadata:
"""Test @tool decorator with metadata parameter."""
def test_decorator_accepts_metadata(self):
"""Decorator should store metadata as __tool_metadata__ attribute."""
@tool(
desc="Test tool",
metadata=ToolMetadata(
classification=Classification(service_domains=[ServiceDomain.MESSAGING]),
behavior=Behavior(operations=[Operation.CREATE], open_world=True),
),
)
def my_tool() -> str:
return "test"
assert hasattr(my_tool, "__tool_metadata__")
assert my_tool.__tool_metadata__.classification.service_domains == [ServiceDomain.MESSAGING]
def test_decorator_without_metadata_is_backward_compatible(self):
"""Decorator should work without metadata (existing tools unchanged)."""
@tool(desc="Test tool")
def my_tool() -> str:
return "test"
assert getattr(my_tool, "__tool_metadata__", None) is None
class TestToolDefinitionWithMetadata:
"""Test ToolDefinition includes metadata from decorator."""
def test_tool_definition_includes_metadata(self):
"""ToolDefinition.metadata should be populated from decorator."""
@tool(
desc="Send a message",
metadata=ToolMetadata(
classification=Classification(
service_domains=[ServiceDomain.MESSAGING],
),
behavior=Behavior(
operations=[Operation.CREATE],
read_only=False,
destructive=False,
open_world=True,
),
extras={"idp": "entraID"},
),
)
def send_message() -> str:
"""Send a message."""
return "sent"
definition = ToolCatalog.create_tool_definition(
send_message, toolkit_name="TestToolkit", toolkit_version="1.0.0"
)
assert definition.metadata is not None
assert definition.metadata.classification.service_domains == [ServiceDomain.MESSAGING]
assert definition.metadata.behavior.operations == [Operation.CREATE]
assert definition.metadata.extras == {"idp": "entraID"}
def test_tool_definition_without_metadata_is_none(self):
"""ToolDefinition.metadata should be None when not provided."""
@tool(desc="Simple tool")
def simple_tool() -> str:
"""A simple tool."""
return "done"
definition = ToolCatalog.create_tool_definition(
simple_tool, toolkit_name="TestToolkit", toolkit_version="1.0.0"
)
assert definition.metadata is None